Merge pull request #1892 from akirilov-arm/simd_fp_cmp
AArch64: Implement SIMD floating-point comparisons
This commit is contained in:
2
build.rs
2
build.rs
@@ -184,6 +184,8 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
|
|||||||
("simd", "simd_align") => return false,
|
("simd", "simd_align") => return false,
|
||||||
("simd", "simd_bitwise") => return false,
|
("simd", "simd_bitwise") => return false,
|
||||||
("simd", "simd_boolean") => return false,
|
("simd", "simd_boolean") => return false,
|
||||||
|
("simd", "simd_f32x4_cmp") => return false,
|
||||||
|
("simd", "simd_f64x2_cmp") => return false,
|
||||||
("simd", "simd_i8x16_cmp") => return false,
|
("simd", "simd_i8x16_cmp") => return false,
|
||||||
("simd", "simd_i16x8_cmp") => return false,
|
("simd", "simd_i16x8_cmp") => return false,
|
||||||
("simd", "simd_i32x4_cmp") => return false,
|
("simd", "simd_i32x4_cmp") => return false,
|
||||||
|
|||||||
@@ -1279,6 +1279,11 @@ impl MachInstEmit for Inst {
|
|||||||
I32X4 => 0b10,
|
I32X4 => 0b10,
|
||||||
_ => 0,
|
_ => 0,
|
||||||
};
|
};
|
||||||
|
let enc_size_for_fcmp = match ty {
|
||||||
|
F32X4 => 0b0,
|
||||||
|
F64X2 => 0b1,
|
||||||
|
_ => 0,
|
||||||
|
};
|
||||||
|
|
||||||
let (top11, bit15_10) = match alu_op {
|
let (top11, bit15_10) = match alu_op {
|
||||||
VecALUOp::SQAddScalar => {
|
VecALUOp::SQAddScalar => {
|
||||||
@@ -1302,6 +1307,9 @@ impl MachInstEmit for Inst {
|
|||||||
VecALUOp::Cmgt => (0b010_01110_00_1 | enc_size << 1, 0b001101),
|
VecALUOp::Cmgt => (0b010_01110_00_1 | enc_size << 1, 0b001101),
|
||||||
VecALUOp::Cmhi => (0b011_01110_00_1 | enc_size << 1, 0b001101),
|
VecALUOp::Cmhi => (0b011_01110_00_1 | enc_size << 1, 0b001101),
|
||||||
VecALUOp::Cmhs => (0b011_01110_00_1 | enc_size << 1, 0b001111),
|
VecALUOp::Cmhs => (0b011_01110_00_1 | enc_size << 1, 0b001111),
|
||||||
|
VecALUOp::Fcmeq => (0b010_01110_00_1 | enc_size_for_fcmp << 1, 0b111001),
|
||||||
|
VecALUOp::Fcmgt => (0b011_01110_10_1 | enc_size_for_fcmp << 1, 0b111001),
|
||||||
|
VecALUOp::Fcmge => (0b011_01110_00_1 | enc_size_for_fcmp << 1, 0b111001),
|
||||||
// The following logical instructions operate on bytes, so are not encoded differently
|
// The following logical instructions operate on bytes, so are not encoded differently
|
||||||
// for the different vector types.
|
// for the different vector types.
|
||||||
VecALUOp::And => {
|
VecALUOp::And => {
|
||||||
|
|||||||
@@ -2209,6 +2209,42 @@ fn test_aarch64_binemit() {
|
|||||||
"cmhs v8.4s, v2.4s, v15.4s",
|
"cmhs v8.4s, v2.4s, v15.4s",
|
||||||
));
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecRRR {
|
||||||
|
alu_op: VecALUOp::Fcmeq,
|
||||||
|
rd: writable_vreg(28),
|
||||||
|
rn: vreg(12),
|
||||||
|
rm: vreg(4),
|
||||||
|
ty: F32X4,
|
||||||
|
},
|
||||||
|
"9CE5244E",
|
||||||
|
"fcmeq v28.4s, v12.4s, v4.4s",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecRRR {
|
||||||
|
alu_op: VecALUOp::Fcmgt,
|
||||||
|
rd: writable_vreg(3),
|
||||||
|
rn: vreg(16),
|
||||||
|
rm: vreg(31),
|
||||||
|
ty: F64X2,
|
||||||
|
},
|
||||||
|
"03E6FF6E",
|
||||||
|
"fcmgt v3.2d, v16.2d, v31.2d",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecRRR {
|
||||||
|
alu_op: VecALUOp::Fcmge,
|
||||||
|
rd: writable_vreg(18),
|
||||||
|
rn: vreg(23),
|
||||||
|
rm: vreg(0),
|
||||||
|
ty: F64X2,
|
||||||
|
},
|
||||||
|
"F2E6606E",
|
||||||
|
"fcmge v18.2d, v23.2d, v0.2d",
|
||||||
|
));
|
||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::VecRRR {
|
Inst::VecRRR {
|
||||||
alu_op: VecALUOp::And,
|
alu_op: VecALUOp::And,
|
||||||
|
|||||||
@@ -225,6 +225,12 @@ pub enum VecALUOp {
|
|||||||
Cmhs,
|
Cmhs,
|
||||||
/// Compare unsigned higher or same
|
/// Compare unsigned higher or same
|
||||||
Cmhi,
|
Cmhi,
|
||||||
|
/// Floating-point compare equal
|
||||||
|
Fcmeq,
|
||||||
|
/// Floating-point compare greater than
|
||||||
|
Fcmgt,
|
||||||
|
/// Floating-point compare greater than or equal
|
||||||
|
Fcmge,
|
||||||
/// Bitwise and
|
/// Bitwise and
|
||||||
And,
|
And,
|
||||||
/// Bitwise bit clear
|
/// Bitwise bit clear
|
||||||
@@ -2085,7 +2091,9 @@ impl MachInst for Inst {
|
|||||||
I8 | I16 | I32 | I64 | B1 | B8 | B16 | B32 | B64 => Ok(RegClass::I64),
|
I8 | I16 | I32 | I64 | B1 | B8 | B16 | B32 | B64 => Ok(RegClass::I64),
|
||||||
F32 | F64 => Ok(RegClass::V128),
|
F32 | F64 => Ok(RegClass::V128),
|
||||||
IFLAGS | FFLAGS => Ok(RegClass::I64),
|
IFLAGS | FFLAGS => Ok(RegClass::I64),
|
||||||
B8X16 | I8X16 | B16X8 | I16X8 | B32X4 | I32X4 | B64X2 | I64X2 => Ok(RegClass::V128),
|
B8X16 | I8X16 | B16X8 | I16X8 | B32X4 | I32X4 | B64X2 | I64X2 | F32X4 | F64X2 => {
|
||||||
|
Ok(RegClass::V128)
|
||||||
|
}
|
||||||
_ => Err(CodegenError::Unsupported(format!(
|
_ => Err(CodegenError::Unsupported(format!(
|
||||||
"Unexpected SSA-value type: {}",
|
"Unexpected SSA-value type: {}",
|
||||||
ty
|
ty
|
||||||
@@ -2720,6 +2728,9 @@ impl ShowWithRRU for Inst {
|
|||||||
VecALUOp::Cmgt => ("cmgt", true, ty),
|
VecALUOp::Cmgt => ("cmgt", true, ty),
|
||||||
VecALUOp::Cmhs => ("cmhs", true, ty),
|
VecALUOp::Cmhs => ("cmhs", true, ty),
|
||||||
VecALUOp::Cmhi => ("cmhi", true, ty),
|
VecALUOp::Cmhi => ("cmhi", true, ty),
|
||||||
|
VecALUOp::Fcmeq => ("fcmeq", true, ty),
|
||||||
|
VecALUOp::Fcmgt => ("fcmgt", true, ty),
|
||||||
|
VecALUOp::Fcmge => ("fcmge", true, ty),
|
||||||
VecALUOp::And => ("and", true, I8X16),
|
VecALUOp::And => ("and", true, I8X16),
|
||||||
VecALUOp::Bic => ("bic", true, I8X16),
|
VecALUOp::Bic => ("bic", true, I8X16),
|
||||||
VecALUOp::Orr => ("orr", true, I8X16),
|
VecALUOp::Orr => ("orr", true, I8X16),
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ use crate::ir::Inst as IRInst;
|
|||||||
use crate::ir::{InstructionData, Opcode, TrapCode, Type};
|
use crate::ir::{InstructionData, Opcode, TrapCode, Type};
|
||||||
use crate::machinst::lower::*;
|
use crate::machinst::lower::*;
|
||||||
use crate::machinst::*;
|
use crate::machinst::*;
|
||||||
use crate::CodegenResult;
|
use crate::{CodegenError, CodegenResult};
|
||||||
|
|
||||||
use crate::isa::aarch64::inst::*;
|
use crate::isa::aarch64::inst::*;
|
||||||
use crate::isa::aarch64::AArch64Backend;
|
use crate::isa::aarch64::AArch64Backend;
|
||||||
@@ -726,6 +726,77 @@ pub(crate) fn lower_fp_condcode(cc: FloatCC) -> Cond {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) fn lower_vector_compare<C: LowerCtx<I = Inst>>(
|
||||||
|
ctx: &mut C,
|
||||||
|
rd: Writable<Reg>,
|
||||||
|
mut rn: Reg,
|
||||||
|
mut rm: Reg,
|
||||||
|
ty: Type,
|
||||||
|
cond: Cond,
|
||||||
|
) -> CodegenResult<()> {
|
||||||
|
match ty {
|
||||||
|
F32X4 | F64X2 | I8X16 | I16X8 | I32X4 => {}
|
||||||
|
_ => {
|
||||||
|
return Err(CodegenError::Unsupported(format!(
|
||||||
|
"unsupported SIMD type: {:?}",
|
||||||
|
ty
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let is_float = match ty {
|
||||||
|
F32X4 | F64X2 => true,
|
||||||
|
_ => false,
|
||||||
|
};
|
||||||
|
// 'Less than' operations are implemented by swapping
|
||||||
|
// the order of operands and using the 'greater than'
|
||||||
|
// instructions.
|
||||||
|
// 'Not equal' is implemented with 'equal' and inverting
|
||||||
|
// the result.
|
||||||
|
let (alu_op, swap) = match (is_float, cond) {
|
||||||
|
(false, Cond::Eq) => (VecALUOp::Cmeq, false),
|
||||||
|
(false, Cond::Ne) => (VecALUOp::Cmeq, false),
|
||||||
|
(false, Cond::Ge) => (VecALUOp::Cmge, false),
|
||||||
|
(false, Cond::Gt) => (VecALUOp::Cmgt, false),
|
||||||
|
(false, Cond::Le) => (VecALUOp::Cmge, true),
|
||||||
|
(false, Cond::Lt) => (VecALUOp::Cmgt, true),
|
||||||
|
(false, Cond::Hs) => (VecALUOp::Cmhs, false),
|
||||||
|
(false, Cond::Hi) => (VecALUOp::Cmhi, false),
|
||||||
|
(false, Cond::Ls) => (VecALUOp::Cmhs, true),
|
||||||
|
(false, Cond::Lo) => (VecALUOp::Cmhi, true),
|
||||||
|
(true, Cond::Eq) => (VecALUOp::Fcmeq, false),
|
||||||
|
(true, Cond::Ne) => (VecALUOp::Fcmeq, false),
|
||||||
|
(true, Cond::Mi) => (VecALUOp::Fcmgt, true),
|
||||||
|
(true, Cond::Ls) => (VecALUOp::Fcmge, true),
|
||||||
|
(true, Cond::Ge) => (VecALUOp::Fcmge, false),
|
||||||
|
(true, Cond::Gt) => (VecALUOp::Fcmgt, false),
|
||||||
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
|
|
||||||
|
if swap {
|
||||||
|
std::mem::swap(&mut rn, &mut rm);
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.emit(Inst::VecRRR {
|
||||||
|
alu_op,
|
||||||
|
rd,
|
||||||
|
rn,
|
||||||
|
rm,
|
||||||
|
ty,
|
||||||
|
});
|
||||||
|
|
||||||
|
if cond == Cond::Ne {
|
||||||
|
ctx.emit(Inst::VecMisc {
|
||||||
|
op: VecMisc2::Not,
|
||||||
|
rd,
|
||||||
|
rn: rd.to_reg(),
|
||||||
|
ty: I8X16,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
/// Determines whether this condcode interprets inputs as signed or
|
/// Determines whether this condcode interprets inputs as signed or
|
||||||
/// unsigned. See the documentation for the `icmp` instruction in
|
/// unsigned. See the documentation for the `icmp` instruction in
|
||||||
/// cranelift-codegen/meta/src/shared/instructions.rs for further insights
|
/// cranelift-codegen/meta/src/shared/instructions.rs for further insights
|
||||||
@@ -762,6 +833,7 @@ pub fn ty_bits(ty: Type) -> usize {
|
|||||||
IFLAGS | FFLAGS => 32,
|
IFLAGS | FFLAGS => 32,
|
||||||
B8X8 | I8X8 | B16X4 | I16X4 | B32X2 | I32X2 => 64,
|
B8X8 | I8X8 | B16X4 | I16X4 | B32X2 | I32X2 => 64,
|
||||||
B8X16 | I8X16 | B16X8 | I16X8 | B32X4 | I32X4 | B64X2 | I64X2 => 128,
|
B8X16 | I8X16 | B16X8 | I16X8 | B32X4 | I32X4 | B64X2 | I64X2 => 128,
|
||||||
|
F32X4 | F64X2 => 128,
|
||||||
_ => panic!("ty_bits() on unknown type: {:?}", ty),
|
_ => panic!("ty_bits() on unknown type: {:?}", ty),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ use crate::ir::Inst as IRInst;
|
|||||||
use crate::ir::{InstructionData, Opcode, TrapCode};
|
use crate::ir::{InstructionData, Opcode, TrapCode};
|
||||||
use crate::machinst::lower::*;
|
use crate::machinst::lower::*;
|
||||||
use crate::machinst::*;
|
use crate::machinst::*;
|
||||||
use crate::{CodegenError, CodegenResult};
|
use crate::CodegenResult;
|
||||||
|
|
||||||
use crate::isa::aarch64::abi::*;
|
use crate::isa::aarch64::abi::*;
|
||||||
use crate::isa::aarch64::inst::*;
|
use crate::isa::aarch64::inst::*;
|
||||||
@@ -1234,6 +1234,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
let condcode = inst_condcode(ctx.data(insn)).unwrap();
|
let condcode = inst_condcode(ctx.data(insn)).unwrap();
|
||||||
let cond = lower_condcode(condcode);
|
let cond = lower_condcode(condcode);
|
||||||
let is_signed = condcode_is_signed(condcode);
|
let is_signed = condcode_is_signed(condcode);
|
||||||
|
let rd = output_to_reg(ctx, outputs[0]);
|
||||||
let ty = ctx.input_ty(insn, 0);
|
let ty = ctx.input_ty(insn, 0);
|
||||||
let bits = ty_bits(ty);
|
let bits = ty_bits(ty);
|
||||||
let narrow_mode = match (bits <= 32, is_signed) {
|
let narrow_mode = match (bits <= 32, is_signed) {
|
||||||
@@ -1242,68 +1243,16 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
(false, true) => NarrowValueMode::SignExtend64,
|
(false, true) => NarrowValueMode::SignExtend64,
|
||||||
(false, false) => NarrowValueMode::ZeroExtend64,
|
(false, false) => NarrowValueMode::ZeroExtend64,
|
||||||
};
|
};
|
||||||
|
let rn = input_to_reg(ctx, inputs[0], narrow_mode);
|
||||||
|
|
||||||
if ty_bits(ty) < 128 {
|
if ty_bits(ty) < 128 {
|
||||||
let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64);
|
let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64);
|
||||||
let rn = input_to_reg(ctx, inputs[0], narrow_mode);
|
|
||||||
let rm = input_to_rse_imm12(ctx, inputs[1], narrow_mode);
|
let rm = input_to_rse_imm12(ctx, inputs[1], narrow_mode);
|
||||||
let rd = output_to_reg(ctx, outputs[0]);
|
|
||||||
ctx.emit(alu_inst_imm12(alu_op, writable_zero_reg(), rn, rm));
|
ctx.emit(alu_inst_imm12(alu_op, writable_zero_reg(), rn, rm));
|
||||||
ctx.emit(Inst::CondSet { cond, rd });
|
ctx.emit(Inst::CondSet { cond, rd });
|
||||||
} else {
|
} else {
|
||||||
match ty {
|
let rm = input_to_reg(ctx, inputs[1], narrow_mode);
|
||||||
I8X16 | I16X8 | I32X4 => {}
|
lower_vector_compare(ctx, rd, rn, rm, ty, cond)?;
|
||||||
_ => {
|
|
||||||
return Err(CodegenError::Unsupported(format!(
|
|
||||||
"unsupported simd type: {:?}",
|
|
||||||
ty
|
|
||||||
)));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut rn = input_to_reg(ctx, inputs[0], narrow_mode);
|
|
||||||
let mut rm = input_to_reg(ctx, inputs[1], narrow_mode);
|
|
||||||
let rd = output_to_reg(ctx, outputs[0]);
|
|
||||||
|
|
||||||
// 'Less than' operations are implemented by swapping
|
|
||||||
// the order of operands and using the 'greater than'
|
|
||||||
// instructions.
|
|
||||||
// 'Not equal' is implemented with 'equal' and inverting
|
|
||||||
// the result.
|
|
||||||
let (alu_op, swap) = match cond {
|
|
||||||
Cond::Eq => (VecALUOp::Cmeq, false),
|
|
||||||
Cond::Ne => (VecALUOp::Cmeq, false),
|
|
||||||
Cond::Ge => (VecALUOp::Cmge, false),
|
|
||||||
Cond::Gt => (VecALUOp::Cmgt, false),
|
|
||||||
Cond::Le => (VecALUOp::Cmge, true),
|
|
||||||
Cond::Lt => (VecALUOp::Cmgt, true),
|
|
||||||
Cond::Hs => (VecALUOp::Cmhs, false),
|
|
||||||
Cond::Hi => (VecALUOp::Cmhi, false),
|
|
||||||
Cond::Ls => (VecALUOp::Cmhs, true),
|
|
||||||
Cond::Lo => (VecALUOp::Cmhi, true),
|
|
||||||
_ => unreachable!(),
|
|
||||||
};
|
|
||||||
|
|
||||||
if swap {
|
|
||||||
std::mem::swap(&mut rn, &mut rm);
|
|
||||||
}
|
|
||||||
|
|
||||||
ctx.emit(Inst::VecRRR {
|
|
||||||
alu_op,
|
|
||||||
rd,
|
|
||||||
rn,
|
|
||||||
rm,
|
|
||||||
ty,
|
|
||||||
});
|
|
||||||
|
|
||||||
if cond == Cond::Ne {
|
|
||||||
ctx.emit(Inst::VecMisc {
|
|
||||||
op: VecMisc2::Not,
|
|
||||||
rd,
|
|
||||||
rn: rd.to_reg(),
|
|
||||||
ty: I8X16,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1314,6 +1263,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
|
let rn = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||||
let rm = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
|
let rm = input_to_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||||
let rd = output_to_reg(ctx, outputs[0]);
|
let rd = output_to_reg(ctx, outputs[0]);
|
||||||
|
|
||||||
|
if ty_bits(ty) < 128 {
|
||||||
match ty_bits(ty) {
|
match ty_bits(ty) {
|
||||||
32 => {
|
32 => {
|
||||||
ctx.emit(Inst::FpuCmp32 { rn, rm });
|
ctx.emit(Inst::FpuCmp32 { rn, rm });
|
||||||
@@ -1324,6 +1275,9 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
_ => panic!("Bad float size"),
|
_ => panic!("Bad float size"),
|
||||||
}
|
}
|
||||||
ctx.emit(Inst::CondSet { cond, rd });
|
ctx.emit(Inst::CondSet { cond, rd });
|
||||||
|
} else {
|
||||||
|
lower_vector_compare(ctx, rd, rn, rm, ty, cond)?;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Opcode::JumpTableEntry | Opcode::JumpTableBase => {
|
Opcode::JumpTableEntry | Opcode::JumpTableBase => {
|
||||||
|
|||||||
Reference in New Issue
Block a user