Merge pull request #1890 from jgouly/simd-bool
arm64: Implement AllTrue and AnyTrue
This commit is contained in:
1
build.rs
1
build.rs
@@ -182,6 +182,7 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
|
|||||||
"Cranelift" => match (testsuite, testname) {
|
"Cranelift" => match (testsuite, testname) {
|
||||||
("simd", "simd_address") => return false,
|
("simd", "simd_address") => return false,
|
||||||
("simd", "simd_bitwise") => return false,
|
("simd", "simd_bitwise") => return false,
|
||||||
|
("simd", "simd_boolean") => return false,
|
||||||
("simd", "simd_i8x16_cmp") => return false,
|
("simd", "simd_i8x16_cmp") => return false,
|
||||||
("simd", "simd_i16x8_cmp") => return false,
|
("simd", "simd_i16x8_cmp") => return false,
|
||||||
("simd", "simd_i32x4_cmp") => return false,
|
("simd", "simd_i32x4_cmp") => return false,
|
||||||
|
|||||||
@@ -361,6 +361,20 @@ fn enc_vec_rr_misc(bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
|
|||||||
bits | bits_12_16 << 12 | machreg_to_vec(rn) << 5 | machreg_to_vec(rd.to_reg())
|
bits | bits_12_16 << 12 | machreg_to_vec(rn) << 5 | machreg_to_vec(rd.to_reg())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn enc_vec_lanes(q: u32, u: u32, size: u32, opcode: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
|
||||||
|
debug_assert_eq!(q & 0b1, q);
|
||||||
|
debug_assert_eq!(u & 0b1, u);
|
||||||
|
debug_assert_eq!(size & 0b11, size);
|
||||||
|
debug_assert_eq!(opcode & 0b11111, opcode);
|
||||||
|
0b0_0_0_01110_00_11000_0_0000_10_00000_00000
|
||||||
|
| q << 30
|
||||||
|
| u << 29
|
||||||
|
| size << 22
|
||||||
|
| opcode << 12
|
||||||
|
| machreg_to_vec(rn) << 5
|
||||||
|
| machreg_to_vec(rd.to_reg())
|
||||||
|
}
|
||||||
|
|
||||||
/// State carried between emissions of a sequence of instructions.
|
/// State carried between emissions of a sequence of instructions.
|
||||||
#[derive(Default, Clone, Debug)]
|
#[derive(Default, Clone, Debug)]
|
||||||
pub struct EmitState {
|
pub struct EmitState {
|
||||||
@@ -1061,6 +1075,18 @@ impl MachInstEmit for Inst {
|
|||||||
};
|
};
|
||||||
sink.put4(enc_vec_rr_misc(bits_12_16, rd, rn));
|
sink.put4(enc_vec_rr_misc(bits_12_16, rd, rn));
|
||||||
}
|
}
|
||||||
|
&Inst::VecLanes { op, rd, rn, ty } => {
|
||||||
|
let (q, size) = match ty {
|
||||||
|
I8X16 => (0b1, 0b00),
|
||||||
|
I16X8 => (0b1, 0b01),
|
||||||
|
I32X4 => (0b1, 0b10),
|
||||||
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
|
let (u, opcode) = match op {
|
||||||
|
VecLanesOp::Uminv => (0b1, 0b11010),
|
||||||
|
};
|
||||||
|
sink.put4(enc_vec_lanes(q, u, size, opcode, rd, rn));
|
||||||
|
}
|
||||||
&Inst::FpuCmp32 { rn, rm } => {
|
&Inst::FpuCmp32 { rn, rm } => {
|
||||||
sink.put4(enc_fcmp(InstSize::Size32, rn, rm));
|
sink.put4(enc_fcmp(InstSize::Size32, rn, rm));
|
||||||
}
|
}
|
||||||
@@ -1247,7 +1273,7 @@ impl MachInstEmit for Inst {
|
|||||||
alu_op,
|
alu_op,
|
||||||
ty,
|
ty,
|
||||||
} => {
|
} => {
|
||||||
let enc_size_for_cmp = match ty {
|
let enc_size = match ty {
|
||||||
I8X16 => 0b00,
|
I8X16 => 0b00,
|
||||||
I16X8 => 0b01,
|
I16X8 => 0b01,
|
||||||
I32X4 => 0b10,
|
I32X4 => 0b10,
|
||||||
@@ -1271,12 +1297,12 @@ impl MachInstEmit for Inst {
|
|||||||
debug_assert_eq!(I64, ty);
|
debug_assert_eq!(I64, ty);
|
||||||
(0b011_11110_11_1, 0b001011)
|
(0b011_11110_11_1, 0b001011)
|
||||||
}
|
}
|
||||||
VecALUOp::Cmeq => (0b011_01110_00_1 | enc_size_for_cmp << 1, 0b100011),
|
VecALUOp::Cmeq => (0b011_01110_00_1 | enc_size << 1, 0b100011),
|
||||||
VecALUOp::Cmge => (0b010_01110_00_1 | enc_size_for_cmp << 1, 0b001111),
|
VecALUOp::Cmge => (0b010_01110_00_1 | enc_size << 1, 0b001111),
|
||||||
VecALUOp::Cmgt => (0b010_01110_00_1 | enc_size_for_cmp << 1, 0b001101),
|
VecALUOp::Cmgt => (0b010_01110_00_1 | enc_size << 1, 0b001101),
|
||||||
VecALUOp::Cmhi => (0b011_01110_00_1 | enc_size_for_cmp << 1, 0b001101),
|
VecALUOp::Cmhi => (0b011_01110_00_1 | enc_size << 1, 0b001101),
|
||||||
VecALUOp::Cmhs => (0b011_01110_00_1 | enc_size_for_cmp << 1, 0b001111),
|
VecALUOp::Cmhs => (0b011_01110_00_1 | enc_size << 1, 0b001111),
|
||||||
// The following instructions operate on bytes, so are not encoded differently
|
// The following logical instructions operate on bytes, so are not encoded differently
|
||||||
// for the different vector types.
|
// for the different vector types.
|
||||||
VecALUOp::And => {
|
VecALUOp::And => {
|
||||||
debug_assert_eq!(128, ty_bits(ty));
|
debug_assert_eq!(128, ty_bits(ty));
|
||||||
@@ -1298,6 +1324,7 @@ impl MachInstEmit for Inst {
|
|||||||
debug_assert_eq!(128, ty_bits(ty));
|
debug_assert_eq!(128, ty_bits(ty));
|
||||||
(0b011_01110_01_1, 0b000111)
|
(0b011_01110_01_1, 0b000111)
|
||||||
}
|
}
|
||||||
|
VecALUOp::Umaxp => (0b011_01110_00_1 | enc_size << 1, 0b101001),
|
||||||
};
|
};
|
||||||
sink.put4(enc_vec_rrr(top11, rm, bit15_10, rn, rd));
|
sink.put4(enc_vec_rrr(top11, rm, bit15_10, rn, rd));
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2269,6 +2269,42 @@ fn test_aarch64_binemit() {
|
|||||||
"bsl v8.16b, v9.16b, v1.16b",
|
"bsl v8.16b, v9.16b, v1.16b",
|
||||||
));
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecRRR {
|
||||||
|
alu_op: VecALUOp::Umaxp,
|
||||||
|
rd: writable_vreg(8),
|
||||||
|
rn: vreg(12),
|
||||||
|
rm: vreg(1),
|
||||||
|
ty: I8X16,
|
||||||
|
},
|
||||||
|
"88A5216E",
|
||||||
|
"umaxp v8.16b, v12.16b, v1.16b",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecRRR {
|
||||||
|
alu_op: VecALUOp::Umaxp,
|
||||||
|
rd: writable_vreg(1),
|
||||||
|
rn: vreg(6),
|
||||||
|
rm: vreg(1),
|
||||||
|
ty: I16X8,
|
||||||
|
},
|
||||||
|
"C1A4616E",
|
||||||
|
"umaxp v1.8h, v6.8h, v1.8h",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecRRR {
|
||||||
|
alu_op: VecALUOp::Umaxp,
|
||||||
|
rd: writable_vreg(1),
|
||||||
|
rn: vreg(20),
|
||||||
|
rm: vreg(16),
|
||||||
|
ty: I32X4,
|
||||||
|
},
|
||||||
|
"81A6B06E",
|
||||||
|
"umaxp v1.4s, v20.4s, v16.4s",
|
||||||
|
));
|
||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::VecMisc {
|
Inst::VecMisc {
|
||||||
op: VecMisc2::Not,
|
op: VecMisc2::Not,
|
||||||
@@ -2280,6 +2316,39 @@ fn test_aarch64_binemit() {
|
|||||||
"mvn v2.16b, v1.16b",
|
"mvn v2.16b, v1.16b",
|
||||||
));
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecLanes {
|
||||||
|
op: VecLanesOp::Uminv,
|
||||||
|
rd: writable_vreg(2),
|
||||||
|
rn: vreg(1),
|
||||||
|
ty: I8X16,
|
||||||
|
},
|
||||||
|
"22A8316E",
|
||||||
|
"uminv b2, v1.16b",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecLanes {
|
||||||
|
op: VecLanesOp::Uminv,
|
||||||
|
rd: writable_vreg(3),
|
||||||
|
rn: vreg(11),
|
||||||
|
ty: I16X8,
|
||||||
|
},
|
||||||
|
"63A9716E",
|
||||||
|
"uminv h3, v11.8h",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecLanes {
|
||||||
|
op: VecLanesOp::Uminv,
|
||||||
|
rd: writable_vreg(18),
|
||||||
|
rn: vreg(4),
|
||||||
|
ty: I32X4,
|
||||||
|
},
|
||||||
|
"92A8B16E",
|
||||||
|
"uminv s18, v4.4s",
|
||||||
|
));
|
||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::Extend {
|
Inst::Extend {
|
||||||
rd: writable_xreg(1),
|
rd: writable_xreg(1),
|
||||||
|
|||||||
@@ -304,6 +304,14 @@ impl Imm12 {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Create a zero immediate of this format.
|
||||||
|
pub fn zero() -> Self {
|
||||||
|
Imm12 {
|
||||||
|
bits: 0,
|
||||||
|
shift12: false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Bits for 2-bit "shift" field in e.g. AddI.
|
/// Bits for 2-bit "shift" field in e.g. AddI.
|
||||||
pub fn shift_bits(&self) -> u32 {
|
pub fn shift_bits(&self) -> u32 {
|
||||||
if self.shift12 {
|
if self.shift12 {
|
||||||
|
|||||||
@@ -235,6 +235,8 @@ pub enum VecALUOp {
|
|||||||
Eor,
|
Eor,
|
||||||
/// Bitwise select
|
/// Bitwise select
|
||||||
Bsl,
|
Bsl,
|
||||||
|
/// Unsigned maximum pairwise
|
||||||
|
Umaxp,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A Vector miscellaneous operation with two registers.
|
/// A Vector miscellaneous operation with two registers.
|
||||||
@@ -244,6 +246,13 @@ pub enum VecMisc2 {
|
|||||||
Not,
|
Not,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// An operation across the lanes of vectors.
|
||||||
|
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
|
||||||
|
pub enum VecLanesOp {
|
||||||
|
/// Unsigned minimum across a vector
|
||||||
|
Uminv,
|
||||||
|
}
|
||||||
|
|
||||||
/// An operation on the bits of a register. This can be paired with several instruction formats
|
/// An operation on the bits of a register. This can be paired with several instruction formats
|
||||||
/// below (see `Inst`) in any combination.
|
/// below (see `Inst`) in any combination.
|
||||||
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
|
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
|
||||||
@@ -743,6 +752,14 @@ pub enum Inst {
|
|||||||
ty: Type,
|
ty: Type,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
/// Vector instruction across lanes.
|
||||||
|
VecLanes {
|
||||||
|
op: VecLanesOp,
|
||||||
|
rd: Writable<Reg>,
|
||||||
|
rn: Reg,
|
||||||
|
ty: Type,
|
||||||
|
},
|
||||||
|
|
||||||
/// Move to the NZCV flags (actually a `MSR NZCV, Xn` insn).
|
/// Move to the NZCV flags (actually a `MSR NZCV, Xn` insn).
|
||||||
MovToNZCV {
|
MovToNZCV {
|
||||||
rn: Reg,
|
rn: Reg,
|
||||||
@@ -1214,6 +1231,11 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
|||||||
collector.add_def(rd);
|
collector.add_def(rd);
|
||||||
collector.add_use(rn);
|
collector.add_use(rn);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
&Inst::VecLanes { rd, rn, .. } => {
|
||||||
|
collector.add_def(rd);
|
||||||
|
collector.add_use(rn);
|
||||||
|
}
|
||||||
&Inst::FpuCmp32 { rn, rm } | &Inst::FpuCmp64 { rn, rm } => {
|
&Inst::FpuCmp32 { rn, rm } | &Inst::FpuCmp64 { rn, rm } => {
|
||||||
collector.add_use(rn);
|
collector.add_use(rn);
|
||||||
collector.add_use(rm);
|
collector.add_use(rm);
|
||||||
@@ -1708,6 +1730,14 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
|||||||
map_def(mapper, rd);
|
map_def(mapper, rd);
|
||||||
map_use(mapper, rn);
|
map_use(mapper, rn);
|
||||||
}
|
}
|
||||||
|
&mut Inst::VecLanes {
|
||||||
|
ref mut rd,
|
||||||
|
ref mut rn,
|
||||||
|
..
|
||||||
|
} => {
|
||||||
|
map_def(mapper, rd);
|
||||||
|
map_use(mapper, rn);
|
||||||
|
}
|
||||||
&mut Inst::FpuCmp32 {
|
&mut Inst::FpuCmp32 {
|
||||||
ref mut rn,
|
ref mut rn,
|
||||||
ref mut rm,
|
ref mut rm,
|
||||||
@@ -2482,7 +2512,7 @@ impl ShowWithRRU for Inst {
|
|||||||
let show_vreg_fn: fn(Reg, Option<&RealRegUniverse>) -> String = if vector {
|
let show_vreg_fn: fn(Reg, Option<&RealRegUniverse>) -> String = if vector {
|
||||||
|reg, mb_rru| show_vreg_vector(reg, mb_rru, F32X2)
|
|reg, mb_rru| show_vreg_vector(reg, mb_rru, F32X2)
|
||||||
} else {
|
} else {
|
||||||
show_vreg_scalar
|
|reg, mb_rru| show_vreg_scalar(reg, mb_rru, F64)
|
||||||
};
|
};
|
||||||
let rd = show_vreg_fn(rd.to_reg(), mb_rru);
|
let rd = show_vreg_fn(rd.to_reg(), mb_rru);
|
||||||
let rn = show_vreg_fn(rn, mb_rru);
|
let rn = show_vreg_fn(rn, mb_rru);
|
||||||
@@ -2695,12 +2725,13 @@ impl ShowWithRRU for Inst {
|
|||||||
VecALUOp::Orr => ("orr", true, I8X16),
|
VecALUOp::Orr => ("orr", true, I8X16),
|
||||||
VecALUOp::Eor => ("eor", true, I8X16),
|
VecALUOp::Eor => ("eor", true, I8X16),
|
||||||
VecALUOp::Bsl => ("bsl", true, I8X16),
|
VecALUOp::Bsl => ("bsl", true, I8X16),
|
||||||
|
VecALUOp::Umaxp => ("umaxp", true, ty),
|
||||||
};
|
};
|
||||||
|
|
||||||
let show_vreg_fn: fn(Reg, Option<&RealRegUniverse>, Type) -> String = if vector {
|
let show_vreg_fn: fn(Reg, Option<&RealRegUniverse>, Type) -> String = if vector {
|
||||||
|reg, mb_rru, ty| show_vreg_vector(reg, mb_rru, ty)
|
|reg, mb_rru, ty| show_vreg_vector(reg, mb_rru, ty)
|
||||||
} else {
|
} else {
|
||||||
|reg, mb_rru, _ty| show_vreg_scalar(reg, mb_rru)
|
|reg, mb_rru, _ty| show_vreg_scalar(reg, mb_rru, I64)
|
||||||
};
|
};
|
||||||
|
|
||||||
let rd = show_vreg_fn(rd.to_reg(), mb_rru, ty);
|
let rd = show_vreg_fn(rd.to_reg(), mb_rru, ty);
|
||||||
@@ -2722,6 +2753,15 @@ impl ShowWithRRU for Inst {
|
|||||||
let rn = show_vreg_vector(rn, mb_rru, ty);
|
let rn = show_vreg_vector(rn, mb_rru, ty);
|
||||||
format!("{} {}, {}", op, rd, rn)
|
format!("{} {}, {}", op, rd, rn)
|
||||||
}
|
}
|
||||||
|
&Inst::VecLanes { op, rd, rn, ty } => {
|
||||||
|
let op = match op {
|
||||||
|
VecLanesOp::Uminv => "uminv",
|
||||||
|
};
|
||||||
|
|
||||||
|
let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ty);
|
||||||
|
let rn = show_vreg_vector(rn, mb_rru, ty);
|
||||||
|
format!("{} {}, {}", op, rd, rn)
|
||||||
|
}
|
||||||
&Inst::MovToNZCV { rn } => {
|
&Inst::MovToNZCV { rn } => {
|
||||||
let rn = rn.show_rru(mb_rru);
|
let rn = rn.show_rru(mb_rru);
|
||||||
format!("msr nzcv, {}", rn)
|
format!("msr nzcv, {}", rn)
|
||||||
|
|||||||
@@ -292,7 +292,7 @@ pub fn show_freg_sized(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: InstSiz
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Show a vector register used in a scalar context.
|
/// Show a vector register used in a scalar context.
|
||||||
pub fn show_vreg_scalar(reg: Reg, mb_rru: Option<&RealRegUniverse>) -> String {
|
pub fn show_vreg_scalar(reg: Reg, mb_rru: Option<&RealRegUniverse>, ty: Type) -> String {
|
||||||
let mut s = reg.show_rru(mb_rru);
|
let mut s = reg.show_rru(mb_rru);
|
||||||
if reg.get_class() != RegClass::V128 {
|
if reg.get_class() != RegClass::V128 {
|
||||||
// We can't do any better.
|
// We can't do any better.
|
||||||
@@ -302,7 +302,14 @@ pub fn show_vreg_scalar(reg: Reg, mb_rru: Option<&RealRegUniverse>) -> String {
|
|||||||
if reg.is_real() {
|
if reg.is_real() {
|
||||||
// Change (eg) "v0" into "d0".
|
// Change (eg) "v0" into "d0".
|
||||||
if reg.get_class() == RegClass::V128 && s.starts_with("v") {
|
if reg.get_class() == RegClass::V128 && s.starts_with("v") {
|
||||||
s.replace_range(0..1, "d");
|
let replacement = match ty {
|
||||||
|
I64 | F64 => "d",
|
||||||
|
I8X16 => "b",
|
||||||
|
I16X8 => "h",
|
||||||
|
I32X4 => "s",
|
||||||
|
_ => unimplemented!(),
|
||||||
|
};
|
||||||
|
s.replace_range(0..1, replacement);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Add a "d" suffix to RegClass::V128 vregs.
|
// Add a "d" suffix to RegClass::V128 vregs.
|
||||||
|
|||||||
@@ -1540,12 +1540,58 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
ctx.emit(inst);
|
ctx.emit(inst);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Opcode::VanyTrue | Opcode::VallTrue => {
|
||||||
|
let rd = output_to_reg(ctx, outputs[0]);
|
||||||
|
let rm = input_to_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||||
|
let tmp = ctx.alloc_tmp(RegClass::V128, ty.unwrap());
|
||||||
|
|
||||||
|
// This operation is implemented by using umaxp or uminv to
|
||||||
|
// create a scalar value, which is then compared against zero.
|
||||||
|
//
|
||||||
|
// umaxp vn.16b, vm.16, vm.16 / uminv bn, vm.16b
|
||||||
|
// mov xm, vn.d[0]
|
||||||
|
// cmp xm, #0
|
||||||
|
// cset xm, ne
|
||||||
|
|
||||||
|
let input_ty = ctx.input_ty(insn, 0);
|
||||||
|
if op == Opcode::VanyTrue {
|
||||||
|
ctx.emit(Inst::VecRRR {
|
||||||
|
alu_op: VecALUOp::Umaxp,
|
||||||
|
rd: tmp,
|
||||||
|
rn: rm,
|
||||||
|
rm: rm,
|
||||||
|
ty: input_ty,
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
ctx.emit(Inst::VecLanes {
|
||||||
|
op: VecLanesOp::Uminv,
|
||||||
|
rd: tmp,
|
||||||
|
rn: rm,
|
||||||
|
ty: input_ty,
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
ctx.emit(Inst::MovFromVec {
|
||||||
|
rd,
|
||||||
|
rn: tmp.to_reg(),
|
||||||
|
idx: 0,
|
||||||
|
ty: I64,
|
||||||
|
});
|
||||||
|
|
||||||
|
ctx.emit(Inst::AluRRImm12 {
|
||||||
|
alu_op: ALUOp::SubS64,
|
||||||
|
rd: writable_zero_reg(),
|
||||||
|
rn: rd.to_reg(),
|
||||||
|
imm12: Imm12::zero(),
|
||||||
|
});
|
||||||
|
|
||||||
|
ctx.emit(Inst::CSet { rd, cond: Cond::Ne });
|
||||||
|
}
|
||||||
|
|
||||||
Opcode::Shuffle
|
Opcode::Shuffle
|
||||||
| Opcode::Vsplit
|
| Opcode::Vsplit
|
||||||
| Opcode::Vconcat
|
| Opcode::Vconcat
|
||||||
| Opcode::Vselect
|
| Opcode::Vselect
|
||||||
| Opcode::VanyTrue
|
|
||||||
| Opcode::VallTrue
|
|
||||||
| Opcode::Insertlane
|
| Opcode::Insertlane
|
||||||
| Opcode::ScalarToVector
|
| Opcode::ScalarToVector
|
||||||
| Opcode::Swizzle
|
| Opcode::Swizzle
|
||||||
|
|||||||
Reference in New Issue
Block a user