arm64: add support for I8X16 ICmp
Copyright (c) 2020, Arm Limited.
This commit is contained in:
1
build.rs
1
build.rs
@@ -180,6 +180,7 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
|
|||||||
},
|
},
|
||||||
"Cranelift" => match (testsuite, testname) {
|
"Cranelift" => match (testsuite, testname) {
|
||||||
("simd", "simd_store") => return false,
|
("simd", "simd_store") => return false,
|
||||||
|
("simd", "simd_i8x16_cmp") => return false,
|
||||||
// Most simd tests are known to fail on aarch64 for now, it's going
|
// Most simd tests are known to fail on aarch64 for now, it's going
|
||||||
// to be a big chunk of work to implement them all there!
|
// to be a big chunk of work to implement them all there!
|
||||||
("simd", _) if target.contains("aarch64") => return true,
|
("simd", _) if target.contains("aarch64") => return true,
|
||||||
|
|||||||
@@ -342,6 +342,12 @@ fn enc_fround(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
|
|||||||
(top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg())
|
(top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn enc_vec_rr_misc(bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
|
||||||
|
debug_assert_eq!(bits_12_16 & 0b11111, bits_12_16);
|
||||||
|
let bits = 0b0_1_1_01110_00_10000_00000_10_00000_00000;
|
||||||
|
bits | bits_12_16 << 12 | machreg_to_vec(rn) << 5 | machreg_to_vec(rd.to_reg())
|
||||||
|
}
|
||||||
|
|
||||||
/// State carried between emissions of a sequence of instructions.
|
/// State carried between emissions of a sequence of instructions.
|
||||||
#[derive(Default, Clone, Debug)]
|
#[derive(Default, Clone, Debug)]
|
||||||
pub struct EmitState {
|
pub struct EmitState {
|
||||||
@@ -1002,6 +1008,15 @@ impl MachInstEmit for Inst {
|
|||||||
};
|
};
|
||||||
sink.put4(enc_fpurrrr(top17, rd, rn, rm, ra));
|
sink.put4(enc_fpurrrr(top17, rd, rn, rm, ra));
|
||||||
}
|
}
|
||||||
|
&Inst::VecMisc { op, rd, rn, ty } => {
|
||||||
|
let bits_12_16 = match op {
|
||||||
|
VecMisc2::Not => {
|
||||||
|
debug_assert_eq!(I8X16, ty);
|
||||||
|
0b00101
|
||||||
|
}
|
||||||
|
};
|
||||||
|
sink.put4(enc_vec_rr_misc(bits_12_16, rd, rn));
|
||||||
|
}
|
||||||
&Inst::FpuCmp32 { rn, rm } => {
|
&Inst::FpuCmp32 { rn, rm } => {
|
||||||
sink.put4(enc_fcmp(InstSize::Size32, rn, rm));
|
sink.put4(enc_fcmp(InstSize::Size32, rn, rm));
|
||||||
}
|
}
|
||||||
@@ -1125,12 +1140,40 @@ impl MachInstEmit for Inst {
|
|||||||
| machreg_to_gpr(rd.to_reg()),
|
| machreg_to_gpr(rd.to_reg()),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
&Inst::VecRRR { rd, rn, rm, alu_op } => {
|
&Inst::VecRRR {
|
||||||
|
rd,
|
||||||
|
rn,
|
||||||
|
rm,
|
||||||
|
alu_op,
|
||||||
|
ty,
|
||||||
|
} => {
|
||||||
|
let enc_size_for_cmp = match ty {
|
||||||
|
I8X16 => 0b00,
|
||||||
|
_ => 0,
|
||||||
|
};
|
||||||
|
|
||||||
let (top11, bit15_10) = match alu_op {
|
let (top11, bit15_10) = match alu_op {
|
||||||
VecALUOp::SQAddScalar => (0b010_11110_11_1, 0b000011),
|
VecALUOp::SQAddScalar => {
|
||||||
VecALUOp::SQSubScalar => (0b010_11110_11_1, 0b001011),
|
debug_assert_eq!(I64, ty);
|
||||||
VecALUOp::UQAddScalar => (0b011_11110_11_1, 0b000011),
|
(0b010_11110_11_1, 0b000011)
|
||||||
VecALUOp::UQSubScalar => (0b011_11110_11_1, 0b001011),
|
}
|
||||||
|
VecALUOp::SQSubScalar => {
|
||||||
|
debug_assert_eq!(I64, ty);
|
||||||
|
(0b010_11110_11_1, 0b001011)
|
||||||
|
}
|
||||||
|
VecALUOp::UQAddScalar => {
|
||||||
|
debug_assert_eq!(I64, ty);
|
||||||
|
(0b011_11110_11_1, 0b000011)
|
||||||
|
}
|
||||||
|
VecALUOp::UQSubScalar => {
|
||||||
|
debug_assert_eq!(I64, ty);
|
||||||
|
(0b011_11110_11_1, 0b001011)
|
||||||
|
}
|
||||||
|
VecALUOp::Cmeq => (0b011_01110_00_1 | enc_size_for_cmp << 1, 0b100011),
|
||||||
|
VecALUOp::Cmge => (0b010_01110_00_1 | enc_size_for_cmp << 1, 0b001111),
|
||||||
|
VecALUOp::Cmgt => (0b010_01110_00_1 | enc_size_for_cmp << 1, 0b001101),
|
||||||
|
VecALUOp::Cmhi => (0b011_01110_00_1 | enc_size_for_cmp << 1, 0b001101),
|
||||||
|
VecALUOp::Cmhs => (0b011_01110_00_1 | enc_size_for_cmp << 1, 0b001111),
|
||||||
};
|
};
|
||||||
sink.put4(enc_vec_rrr(top11, rm, bit15_10, rn, rd));
|
sink.put4(enc_vec_rrr(top11, rm, bit15_10, rn, rd));
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1802,6 +1802,7 @@ fn test_aarch64_binemit() {
|
|||||||
rn: vreg(22),
|
rn: vreg(22),
|
||||||
rm: vreg(23),
|
rm: vreg(23),
|
||||||
alu_op: VecALUOp::UQAddScalar,
|
alu_op: VecALUOp::UQAddScalar,
|
||||||
|
ty: I64,
|
||||||
},
|
},
|
||||||
"D50EF77E",
|
"D50EF77E",
|
||||||
"uqadd d21, d22, d23",
|
"uqadd d21, d22, d23",
|
||||||
@@ -1812,6 +1813,7 @@ fn test_aarch64_binemit() {
|
|||||||
rn: vreg(22),
|
rn: vreg(22),
|
||||||
rm: vreg(23),
|
rm: vreg(23),
|
||||||
alu_op: VecALUOp::SQAddScalar,
|
alu_op: VecALUOp::SQAddScalar,
|
||||||
|
ty: I64,
|
||||||
},
|
},
|
||||||
"D50EF75E",
|
"D50EF75E",
|
||||||
"sqadd d21, d22, d23",
|
"sqadd d21, d22, d23",
|
||||||
@@ -1822,6 +1824,7 @@ fn test_aarch64_binemit() {
|
|||||||
rn: vreg(22),
|
rn: vreg(22),
|
||||||
rm: vreg(23),
|
rm: vreg(23),
|
||||||
alu_op: VecALUOp::UQSubScalar,
|
alu_op: VecALUOp::UQSubScalar,
|
||||||
|
ty: I64,
|
||||||
},
|
},
|
||||||
"D52EF77E",
|
"D52EF77E",
|
||||||
"uqsub d21, d22, d23",
|
"uqsub d21, d22, d23",
|
||||||
@@ -1832,10 +1835,83 @@ fn test_aarch64_binemit() {
|
|||||||
rn: vreg(22),
|
rn: vreg(22),
|
||||||
rm: vreg(23),
|
rm: vreg(23),
|
||||||
alu_op: VecALUOp::SQSubScalar,
|
alu_op: VecALUOp::SQSubScalar,
|
||||||
|
ty: I64,
|
||||||
},
|
},
|
||||||
"D52EF75E",
|
"D52EF75E",
|
||||||
"sqsub d21, d22, d23",
|
"sqsub d21, d22, d23",
|
||||||
));
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecRRR {
|
||||||
|
alu_op: VecALUOp::Cmeq,
|
||||||
|
rd: writable_vreg(3),
|
||||||
|
rn: vreg(23),
|
||||||
|
rm: vreg(24),
|
||||||
|
ty: I8X16,
|
||||||
|
},
|
||||||
|
"E38E386E",
|
||||||
|
"cmeq v3.16b, v23.16b, v24.16b",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecRRR {
|
||||||
|
alu_op: VecALUOp::Cmgt,
|
||||||
|
rd: writable_vreg(3),
|
||||||
|
rn: vreg(23),
|
||||||
|
rm: vreg(24),
|
||||||
|
ty: I8X16,
|
||||||
|
},
|
||||||
|
"E336384E",
|
||||||
|
"cmgt v3.16b, v23.16b, v24.16b",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecRRR {
|
||||||
|
alu_op: VecALUOp::Cmge,
|
||||||
|
rd: writable_vreg(23),
|
||||||
|
rn: vreg(9),
|
||||||
|
rm: vreg(12),
|
||||||
|
ty: I8X16,
|
||||||
|
},
|
||||||
|
"373D2C4E",
|
||||||
|
"cmge v23.16b, v9.16b, v12.16b",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecRRR {
|
||||||
|
alu_op: VecALUOp::Cmhi,
|
||||||
|
rd: writable_vreg(5),
|
||||||
|
rn: vreg(1),
|
||||||
|
rm: vreg(1),
|
||||||
|
ty: I8X16,
|
||||||
|
},
|
||||||
|
"2534216E",
|
||||||
|
"cmhi v5.16b, v1.16b, v1.16b",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecRRR {
|
||||||
|
alu_op: VecALUOp::Cmhs,
|
||||||
|
rd: writable_vreg(8),
|
||||||
|
rn: vreg(2),
|
||||||
|
rm: vreg(15),
|
||||||
|
ty: I8X16,
|
||||||
|
},
|
||||||
|
"483C2F6E",
|
||||||
|
"cmhs v8.16b, v2.16b, v15.16b",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecMisc {
|
||||||
|
op: VecMisc2::Not,
|
||||||
|
rd: writable_vreg(2),
|
||||||
|
rn: vreg(1),
|
||||||
|
ty: I8X16,
|
||||||
|
},
|
||||||
|
"2258206E",
|
||||||
|
"mvn v2.16b, v1.16b",
|
||||||
|
));
|
||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::Extend {
|
Inst::Extend {
|
||||||
rd: writable_xreg(1),
|
rd: writable_xreg(1),
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
|
|
||||||
use crate::binemit::CodeOffset;
|
use crate::binemit::CodeOffset;
|
||||||
use crate::ir::types::{
|
use crate::ir::types::{
|
||||||
B1, B16, B32, B64, B8, F32, F32X2, F64, FFLAGS, I128, I16, I32, I64, I8, I8X16, IFLAGS,
|
B1, B16, B32, B64, B8, B8X16, F32, F32X2, F64, FFLAGS, I128, I16, I32, I64, I8, I8X16, IFLAGS,
|
||||||
};
|
};
|
||||||
use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode, Type};
|
use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode, Type};
|
||||||
use crate::machinst::*;
|
use crate::machinst::*;
|
||||||
@@ -197,6 +197,23 @@ pub enum VecALUOp {
|
|||||||
SQSubScalar,
|
SQSubScalar,
|
||||||
/// Unsigned saturating subtract
|
/// Unsigned saturating subtract
|
||||||
UQSubScalar,
|
UQSubScalar,
|
||||||
|
/// Compare bitwise equal
|
||||||
|
Cmeq,
|
||||||
|
/// Compare signed greater than or equal
|
||||||
|
Cmge,
|
||||||
|
/// Compare signed greater than
|
||||||
|
Cmgt,
|
||||||
|
/// Compare unsigned higher
|
||||||
|
Cmhs,
|
||||||
|
/// Compare unsigned higher or same
|
||||||
|
Cmhi,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A Vector miscellaneous operation with two registers.
|
||||||
|
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
|
||||||
|
pub enum VecMisc2 {
|
||||||
|
/// Bitwise NOT.
|
||||||
|
Not,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// An operation on the bits of a register. This can be paired with several instruction formats
|
/// An operation on the bits of a register. This can be paired with several instruction formats
|
||||||
@@ -626,6 +643,15 @@ pub enum Inst {
|
|||||||
rd: Writable<Reg>,
|
rd: Writable<Reg>,
|
||||||
rn: Reg,
|
rn: Reg,
|
||||||
rm: Reg,
|
rm: Reg,
|
||||||
|
ty: Type,
|
||||||
|
},
|
||||||
|
|
||||||
|
/// Vector two register miscellaneous instruction.
|
||||||
|
VecMisc {
|
||||||
|
op: VecMisc2,
|
||||||
|
rd: Writable<Reg>,
|
||||||
|
rn: Reg,
|
||||||
|
ty: Type,
|
||||||
},
|
},
|
||||||
|
|
||||||
/// Move to the NZCV flags (actually a `MSR NZCV, Xn` insn).
|
/// Move to the NZCV flags (actually a `MSR NZCV, Xn` insn).
|
||||||
@@ -1096,6 +1122,10 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
|||||||
collector.add_use(rm);
|
collector.add_use(rm);
|
||||||
collector.add_use(ra);
|
collector.add_use(ra);
|
||||||
}
|
}
|
||||||
|
&Inst::VecMisc { rd, rn, .. } => {
|
||||||
|
collector.add_def(rd);
|
||||||
|
collector.add_use(rn);
|
||||||
|
}
|
||||||
&Inst::FpuCmp32 { rn, rm } | &Inst::FpuCmp64 { rn, rm } => {
|
&Inst::FpuCmp32 { rn, rm } | &Inst::FpuCmp64 { rn, rm } => {
|
||||||
collector.add_use(rn);
|
collector.add_use(rn);
|
||||||
collector.add_use(rm);
|
collector.add_use(rm);
|
||||||
@@ -1567,6 +1597,14 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
|||||||
map_use(mapper, rm);
|
map_use(mapper, rm);
|
||||||
map_use(mapper, ra);
|
map_use(mapper, ra);
|
||||||
}
|
}
|
||||||
|
&mut Inst::VecMisc {
|
||||||
|
ref mut rd,
|
||||||
|
ref mut rn,
|
||||||
|
..
|
||||||
|
} => {
|
||||||
|
map_def(mapper, rd);
|
||||||
|
map_use(mapper, rn);
|
||||||
|
}
|
||||||
&mut Inst::FpuCmp32 {
|
&mut Inst::FpuCmp32 {
|
||||||
ref mut rn,
|
ref mut rn,
|
||||||
ref mut rm,
|
ref mut rm,
|
||||||
@@ -1909,6 +1947,7 @@ impl MachInst for Inst {
|
|||||||
F32 | F64 => Ok(RegClass::V128),
|
F32 | F64 => Ok(RegClass::V128),
|
||||||
IFLAGS | FFLAGS => Ok(RegClass::I64),
|
IFLAGS | FFLAGS => Ok(RegClass::I64),
|
||||||
I8X16 => Ok(RegClass::V128),
|
I8X16 => Ok(RegClass::V128),
|
||||||
|
B8X16 => Ok(RegClass::V128),
|
||||||
_ => Err(CodegenError::Unsupported(format!(
|
_ => Err(CodegenError::Unsupported(format!(
|
||||||
"Unexpected SSA-value type: {}",
|
"Unexpected SSA-value type: {}",
|
||||||
ty
|
ty
|
||||||
@@ -2482,18 +2521,45 @@ impl ShowWithRRU for Inst {
|
|||||||
let rn = rn.show_rru(mb_rru);
|
let rn = rn.show_rru(mb_rru);
|
||||||
format!("mov {}, {}.d[0]", rd, rn)
|
format!("mov {}, {}.d[0]", rd, rn)
|
||||||
}
|
}
|
||||||
&Inst::VecRRR { rd, rn, rm, alu_op } => {
|
&Inst::VecRRR {
|
||||||
let op = match alu_op {
|
rd,
|
||||||
VecALUOp::SQAddScalar => "sqadd",
|
rn,
|
||||||
VecALUOp::UQAddScalar => "uqadd",
|
rm,
|
||||||
VecALUOp::SQSubScalar => "sqsub",
|
alu_op,
|
||||||
VecALUOp::UQSubScalar => "uqsub",
|
ty,
|
||||||
|
} => {
|
||||||
|
let (op, vector) = match alu_op {
|
||||||
|
VecALUOp::SQAddScalar => ("sqadd", false),
|
||||||
|
VecALUOp::UQAddScalar => ("uqadd", false),
|
||||||
|
VecALUOp::SQSubScalar => ("sqsub", false),
|
||||||
|
VecALUOp::UQSubScalar => ("uqsub", false),
|
||||||
|
VecALUOp::Cmeq => ("cmeq", true),
|
||||||
|
VecALUOp::Cmge => ("cmge", true),
|
||||||
|
VecALUOp::Cmgt => ("cmgt", true),
|
||||||
|
VecALUOp::Cmhs => ("cmhs", true),
|
||||||
|
VecALUOp::Cmhi => ("cmhi", true),
|
||||||
};
|
};
|
||||||
let rd = show_vreg_scalar(rd.to_reg(), mb_rru);
|
|
||||||
let rn = show_vreg_scalar(rn, mb_rru);
|
let show_vreg_fn: fn(Reg, Option<&RealRegUniverse>, Type) -> String = if vector {
|
||||||
let rm = show_vreg_scalar(rm, mb_rru);
|
|reg, mb_rru, ty| show_vreg_vector(reg, mb_rru, ty)
|
||||||
|
} else {
|
||||||
|
|reg, mb_rru, _ty| show_vreg_scalar(reg, mb_rru)
|
||||||
|
};
|
||||||
|
|
||||||
|
let rd = show_vreg_fn(rd.to_reg(), mb_rru, ty);
|
||||||
|
let rn = show_vreg_fn(rn, mb_rru, ty);
|
||||||
|
let rm = show_vreg_fn(rm, mb_rru, ty);
|
||||||
format!("{} {}, {}, {}", op, rd, rn, rm)
|
format!("{} {}, {}, {}", op, rd, rn, rm)
|
||||||
}
|
}
|
||||||
|
&Inst::VecMisc { op, rd, rn, ty } => {
|
||||||
|
let op = match op {
|
||||||
|
VecMisc2::Not => "mvn",
|
||||||
|
};
|
||||||
|
|
||||||
|
let rd = show_vreg_vector(rd.to_reg(), mb_rru, ty);
|
||||||
|
let rn = show_vreg_vector(rn, mb_rru, ty);
|
||||||
|
format!("{} {}, {}", op, rd, rn)
|
||||||
|
}
|
||||||
&Inst::MovToNZCV { rn } => {
|
&Inst::MovToNZCV { rn } => {
|
||||||
let rn = rn.show_rru(mb_rru);
|
let rn = rn.show_rru(mb_rru);
|
||||||
format!("msr nzcv, {}", rn)
|
format!("msr nzcv, {}", rn)
|
||||||
|
|||||||
@@ -319,6 +319,7 @@ pub fn show_vreg_vector(reg: Reg, mb_rru: Option<&RealRegUniverse>, ty: Type) ->
|
|||||||
let mut s = reg.show_rru(mb_rru);
|
let mut s = reg.show_rru(mb_rru);
|
||||||
|
|
||||||
match ty {
|
match ty {
|
||||||
|
I8X16 => s.push_str(".16b"),
|
||||||
F32X2 => s.push_str(".2s"),
|
F32X2 => s.push_str(".2s"),
|
||||||
_ => unimplemented!(),
|
_ => unimplemented!(),
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -277,6 +277,7 @@ pub(crate) fn input_to_reg<C: LowerCtx<I = Inst>>(
|
|||||||
tmp.to_reg()
|
tmp.to_reg()
|
||||||
}
|
}
|
||||||
(_, 64) => in_reg,
|
(_, 64) => in_reg,
|
||||||
|
(_, 128) => in_reg,
|
||||||
|
|
||||||
_ => panic!(
|
_ => panic!(
|
||||||
"Unsupported input width: input ty {} bits {} mode {:?}",
|
"Unsupported input width: input ty {} bits {} mode {:?}",
|
||||||
@@ -712,7 +713,7 @@ pub fn ty_bits(ty: Type) -> usize {
|
|||||||
B64 | I64 | F64 => 64,
|
B64 | I64 | F64 => 64,
|
||||||
B128 | I128 => 128,
|
B128 | I128 => 128,
|
||||||
IFLAGS | FFLAGS => 32,
|
IFLAGS | FFLAGS => 32,
|
||||||
I8X16 => 128,
|
I8X16 | B8X16 => 128,
|
||||||
_ => panic!("ty_bits() on unknown type: {:?}", ty),
|
_ => panic!("ty_bits() on unknown type: {:?}", ty),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ use crate::ir::Inst as IRInst;
|
|||||||
use crate::ir::{InstructionData, Opcode, TrapCode};
|
use crate::ir::{InstructionData, Opcode, TrapCode};
|
||||||
use crate::machinst::lower::*;
|
use crate::machinst::lower::*;
|
||||||
use crate::machinst::*;
|
use crate::machinst::*;
|
||||||
use crate::CodegenResult;
|
use crate::{CodegenError, CodegenResult};
|
||||||
|
|
||||||
use crate::isa::aarch64::abi::*;
|
use crate::isa::aarch64::abi::*;
|
||||||
use crate::isa::aarch64::inst::*;
|
use crate::isa::aarch64::inst::*;
|
||||||
@@ -96,6 +96,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
rn: va.to_reg(),
|
rn: va.to_reg(),
|
||||||
rm: vb.to_reg(),
|
rm: vb.to_reg(),
|
||||||
alu_op,
|
alu_op,
|
||||||
|
ty: I64,
|
||||||
});
|
});
|
||||||
ctx.emit(Inst::MovFromVec64 {
|
ctx.emit(Inst::MovFromVec64 {
|
||||||
rd,
|
rd,
|
||||||
@@ -127,6 +128,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
rn: va.to_reg(),
|
rn: va.to_reg(),
|
||||||
rm: vb.to_reg(),
|
rm: vb.to_reg(),
|
||||||
alu_op,
|
alu_op,
|
||||||
|
ty: I64,
|
||||||
});
|
});
|
||||||
ctx.emit(Inst::MovFromVec64 {
|
ctx.emit(Inst::MovFromVec64 {
|
||||||
rd,
|
rd,
|
||||||
@@ -1152,12 +1154,66 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
(false, true) => NarrowValueMode::SignExtend64,
|
(false, true) => NarrowValueMode::SignExtend64,
|
||||||
(false, false) => NarrowValueMode::ZeroExtend64,
|
(false, false) => NarrowValueMode::ZeroExtend64,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
if ty_bits(ty) < 128 {
|
||||||
let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64);
|
let alu_op = choose_32_64(ty, ALUOp::SubS32, ALUOp::SubS64);
|
||||||
let rn = input_to_reg(ctx, inputs[0], narrow_mode);
|
let rn = input_to_reg(ctx, inputs[0], narrow_mode);
|
||||||
let rm = input_to_rse_imm12(ctx, inputs[1], narrow_mode);
|
let rm = input_to_rse_imm12(ctx, inputs[1], narrow_mode);
|
||||||
let rd = output_to_reg(ctx, outputs[0]);
|
let rd = output_to_reg(ctx, outputs[0]);
|
||||||
ctx.emit(alu_inst_imm12(alu_op, writable_zero_reg(), rn, rm));
|
ctx.emit(alu_inst_imm12(alu_op, writable_zero_reg(), rn, rm));
|
||||||
ctx.emit(Inst::CondSet { cond, rd });
|
ctx.emit(Inst::CondSet { cond, rd });
|
||||||
|
} else {
|
||||||
|
if ty != I8X16 {
|
||||||
|
return Err(CodegenError::Unsupported(format!(
|
||||||
|
"unsupported simd type: {:?}",
|
||||||
|
ty
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut rn = input_to_reg(ctx, inputs[0], narrow_mode);
|
||||||
|
let mut rm = input_to_reg(ctx, inputs[1], narrow_mode);
|
||||||
|
let rd = output_to_reg(ctx, outputs[0]);
|
||||||
|
|
||||||
|
// 'Less than' operations are implemented by swapping
|
||||||
|
// the order of operands and using the 'greater than'
|
||||||
|
// instructions.
|
||||||
|
// 'Not equal' is implemented with 'equal' and inverting
|
||||||
|
// the result.
|
||||||
|
let (alu_op, swap) = match cond {
|
||||||
|
Cond::Eq => (VecALUOp::Cmeq, false),
|
||||||
|
Cond::Ne => (VecALUOp::Cmeq, false),
|
||||||
|
Cond::Ge => (VecALUOp::Cmge, false),
|
||||||
|
Cond::Gt => (VecALUOp::Cmgt, false),
|
||||||
|
Cond::Le => (VecALUOp::Cmge, true),
|
||||||
|
Cond::Lt => (VecALUOp::Cmgt, true),
|
||||||
|
Cond::Hs => (VecALUOp::Cmhs, false),
|
||||||
|
Cond::Hi => (VecALUOp::Cmhi, false),
|
||||||
|
Cond::Ls => (VecALUOp::Cmhs, true),
|
||||||
|
Cond::Lo => (VecALUOp::Cmhi, true),
|
||||||
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
|
|
||||||
|
if swap {
|
||||||
|
std::mem::swap(&mut rn, &mut rm);
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx.emit(Inst::VecRRR {
|
||||||
|
alu_op,
|
||||||
|
rd,
|
||||||
|
rn,
|
||||||
|
rm,
|
||||||
|
ty,
|
||||||
|
});
|
||||||
|
|
||||||
|
if cond == Cond::Ne {
|
||||||
|
ctx.emit(Inst::VecMisc {
|
||||||
|
op: VecMisc2::Not,
|
||||||
|
rd,
|
||||||
|
rn: rd.to_reg(),
|
||||||
|
ty: I8X16,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Opcode::Fcmp => {
|
Opcode::Fcmp => {
|
||||||
@@ -1350,6 +1406,13 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
lower_constant_f128(ctx, rd, value);
|
lower_constant_f128(ctx, rd, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Opcode::RawBitcast => {
|
||||||
|
let rm = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||||
|
let rd = output_to_reg(ctx, outputs[0]);
|
||||||
|
let ty = ctx.input_ty(insn, 0);
|
||||||
|
ctx.emit(Inst::gen_move(rd, rm, ty));
|
||||||
|
}
|
||||||
|
|
||||||
Opcode::Shuffle
|
Opcode::Shuffle
|
||||||
| Opcode::Vsplit
|
| Opcode::Vsplit
|
||||||
| Opcode::Vconcat
|
| Opcode::Vconcat
|
||||||
@@ -1359,7 +1422,6 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
| Opcode::Splat
|
| Opcode::Splat
|
||||||
| Opcode::Insertlane
|
| Opcode::Insertlane
|
||||||
| Opcode::Extractlane
|
| Opcode::Extractlane
|
||||||
| Opcode::RawBitcast
|
|
||||||
| Opcode::ScalarToVector
|
| Opcode::ScalarToVector
|
||||||
| Opcode::Swizzle
|
| Opcode::Swizzle
|
||||||
| Opcode::Uload8x8
|
| Opcode::Uload8x8
|
||||||
|
|||||||
Reference in New Issue
Block a user