arm64: add support for I8X16 ICmp

Copyright (c) 2020, Arm Limited.
This commit is contained in:
Joey Gouly
2020-06-02 16:58:09 +01:00
parent 1acbad089a
commit 90a421193f
7 changed files with 274 additions and 24 deletions

View File

@@ -7,7 +7,7 @@ use crate::ir::Inst as IRInst;
use crate::ir::{InstructionData, Opcode, TrapCode};
use crate::machinst::lower::*;
use crate::machinst::*;
use crate::CodegenResult;
use crate::{CodegenError, CodegenResult};
use crate::isa::aarch64::abi::*;
use crate::isa::aarch64::inst::*;
@@ -96,6 +96,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
rn: va.to_reg(),
rm: vb.to_reg(),
alu_op,
ty: I64,
});
ctx.emit(Inst::MovFromVec64 {
rd,
@@ -127,6 +128,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
rn: va.to_reg(),
rm: vb.to_reg(),
alu_op,
ty: I64,
});
ctx.emit(Inst::MovFromVec64 {
rd,
@@ -1152,12 +1154,66 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
(false, true) => NarrowValueMode::SignExtend64,
(false, false) => NarrowValueMode::ZeroExtend64,
};
let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64);
let rn = input_to_reg(ctx, inputs[0], narrow_mode);
let rm = input_to_rse_imm12(ctx, inputs[1], narrow_mode);
let rd = output_to_reg(ctx, outputs[0]);
ctx.emit(alu_inst_imm12(alu_op, writable_zero_reg(), rn, rm));
ctx.emit(Inst::CondSet { cond, rd });
if ty_bits(ty) < 128 {
let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64);
let rn = input_to_reg(ctx, inputs[0], narrow_mode);
let rm = input_to_rse_imm12(ctx, inputs[1], narrow_mode);
let rd = output_to_reg(ctx, outputs[0]);
ctx.emit(alu_inst_imm12(alu_op, writable_zero_reg(), rn, rm));
ctx.emit(Inst::CondSet { cond, rd });
} else {
if ty != I8X16 {
return Err(CodegenError::Unsupported(format!(
"unsupported simd type: {:?}",
ty
)));
}
let mut rn = input_to_reg(ctx, inputs[0], narrow_mode);
let mut rm = input_to_reg(ctx, inputs[1], narrow_mode);
let rd = output_to_reg(ctx, outputs[0]);
// 'Less than' operations are implemented by swapping
// the order of operands and using the 'greater than'
// instructions.
// 'Not equal' is implemented with 'equal' and inverting
// the result.
let (alu_op, swap) = match cond {
Cond::Eq => (VecALUOp::Cmeq, false),
Cond::Ne => (VecALUOp::Cmeq, false),
Cond::Ge => (VecALUOp::Cmge, false),
Cond::Gt => (VecALUOp::Cmgt, false),
Cond::Le => (VecALUOp::Cmge, true),
Cond::Lt => (VecALUOp::Cmgt, true),
Cond::Hs => (VecALUOp::Cmhs, false),
Cond::Hi => (VecALUOp::Cmhi, false),
Cond::Ls => (VecALUOp::Cmhs, true),
Cond::Lo => (VecALUOp::Cmhi, true),
_ => unreachable!(),
};
if swap {
std::mem::swap(&mut rn, &mut rm);
}
ctx.emit(Inst::VecRRR {
alu_op,
rd,
rn,
rm,
ty,
});
if cond == Cond::Ne {
ctx.emit(Inst::VecMisc {
op: VecMisc2::Not,
rd,
rn: rd.to_reg(),
ty: I8X16,
});
}
}
}
Opcode::Fcmp => {
@@ -1350,6 +1406,13 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
lower_constant_f128(ctx, rd, value);
}
Opcode::RawBitcast => {
let rm = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
let rd = output_to_reg(ctx, outputs[0]);
let ty = ctx.input_ty(insn, 0);
ctx.emit(Inst::gen_move(rd, rm, ty));
}
Opcode::Shuffle
| Opcode::Vsplit
| Opcode::Vconcat
@@ -1359,7 +1422,6 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
| Opcode::Splat
| Opcode::Insertlane
| Opcode::Extractlane
| Opcode::RawBitcast
| Opcode::ScalarToVector
| Opcode::Swizzle
| Opcode::Uload8x8