arm64: Implement SIMD i64x2 multiply
Copyright (c) 2020, Arm Limited.
This commit is contained in:
2
build.rs
2
build.rs
@@ -211,6 +211,7 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
|
|||||||
("simd", "simd_bitwise") => return false,
|
("simd", "simd_bitwise") => return false,
|
||||||
("simd", "simd_bit_shift") => return false,
|
("simd", "simd_bit_shift") => return false,
|
||||||
("simd", "simd_boolean") => return false,
|
("simd", "simd_boolean") => return false,
|
||||||
|
("simd", "simd_const") => return false,
|
||||||
("simd", "simd_f32x4") => return false,
|
("simd", "simd_f32x4") => return false,
|
||||||
("simd", "simd_f32x4_arith") => return false,
|
("simd", "simd_f32x4_arith") => return false,
|
||||||
("simd", "simd_f32x4_cmp") => return false,
|
("simd", "simd_f32x4_cmp") => return false,
|
||||||
@@ -228,6 +229,7 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
|
|||||||
("simd", "simd_i32x4_arith") => return false,
|
("simd", "simd_i32x4_arith") => return false,
|
||||||
("simd", "simd_i32x4_arith2") => return false,
|
("simd", "simd_i32x4_arith2") => return false,
|
||||||
("simd", "simd_i32x4_cmp") => return false,
|
("simd", "simd_i32x4_cmp") => return false,
|
||||||
|
("simd", "simd_i64x2_arith") => return false,
|
||||||
("simd", "simd_lane") => return false,
|
("simd", "simd_lane") => return false,
|
||||||
("simd", "simd_load_extend") => return false,
|
("simd", "simd_load_extend") => return false,
|
||||||
("simd", "simd_load_splat") => return false,
|
("simd", "simd_load_splat") => return false,
|
||||||
|
|||||||
@@ -647,6 +647,30 @@ impl VectorSize {
|
|||||||
VectorSize::Size64x2 => ScalarSize::Size64,
|
VectorSize::Size64x2 => ScalarSize::Size64,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn is_128bits(&self) -> bool {
|
||||||
|
match self {
|
||||||
|
VectorSize::Size8x8 => false,
|
||||||
|
VectorSize::Size8x16 => true,
|
||||||
|
VectorSize::Size16x4 => false,
|
||||||
|
VectorSize::Size16x8 => true,
|
||||||
|
VectorSize::Size32x2 => false,
|
||||||
|
VectorSize::Size32x4 => true,
|
||||||
|
VectorSize::Size64x2 => true,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn widen(&self) -> VectorSize {
|
||||||
|
match self {
|
||||||
|
VectorSize::Size8x8 => VectorSize::Size16x8,
|
||||||
|
VectorSize::Size8x16 => VectorSize::Size16x8,
|
||||||
|
VectorSize::Size16x4 => VectorSize::Size32x4,
|
||||||
|
VectorSize::Size16x8 => VectorSize::Size32x4,
|
||||||
|
VectorSize::Size32x2 => VectorSize::Size64x2,
|
||||||
|
VectorSize::Size32x4 => VectorSize::Size64x2,
|
||||||
|
VectorSize::Size64x2 => unreachable!(),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//=============================================================================
|
//=============================================================================
|
||||||
|
|||||||
@@ -352,12 +352,12 @@ fn enc_fround(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
|
|||||||
(top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg())
|
(top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn enc_vec_rr_misc(u: u32, size: u32, bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
|
fn enc_vec_rr_misc(qu: u32, size: u32, bits_12_16: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
|
||||||
debug_assert_eq!(u & 0b1, u);
|
debug_assert_eq!(qu & 0b11, qu);
|
||||||
debug_assert_eq!(size & 0b11, size);
|
debug_assert_eq!(size & 0b11, size);
|
||||||
debug_assert_eq!(bits_12_16 & 0b11111, bits_12_16);
|
debug_assert_eq!(bits_12_16 & 0b11111, bits_12_16);
|
||||||
let bits = 0b0_1_0_01110_00_10000_00000_10_00000_00000;
|
let bits = 0b0_00_01110_00_10000_00000_10_00000_00000;
|
||||||
bits | u << 29
|
bits | qu << 29
|
||||||
| size << 22
|
| size << 22
|
||||||
| bits_12_16 << 12
|
| bits_12_16 << 12
|
||||||
| machreg_to_vec(rn) << 5
|
| machreg_to_vec(rn) << 5
|
||||||
@@ -1367,13 +1367,14 @@ impl MachInstEmit for Inst {
|
|||||||
sink.put4(enc_fpurrrr(top17, rd, rn, rm, ra));
|
sink.put4(enc_fpurrrr(top17, rd, rn, rm, ra));
|
||||||
}
|
}
|
||||||
&Inst::VecMisc { op, rd, rn, size } => {
|
&Inst::VecMisc { op, rd, rn, size } => {
|
||||||
let enc_size = match size {
|
let enc_size = match size.lane_size() {
|
||||||
VectorSize::Size8x16 => 0b00,
|
ScalarSize::Size8 => 0b00,
|
||||||
VectorSize::Size16x8 => 0b01,
|
ScalarSize::Size16 => 0b01,
|
||||||
VectorSize::Size32x4 => 0b10,
|
ScalarSize::Size32 => 0b10,
|
||||||
VectorSize::Size64x2 => 0b11,
|
ScalarSize::Size64 => 0b11,
|
||||||
_ => unimplemented!(),
|
_ => unreachable!(),
|
||||||
};
|
};
|
||||||
|
let q = if size.is_128bits() { 1 } else { 0 };
|
||||||
let (u, bits_12_16, size) = match op {
|
let (u, bits_12_16, size) = match op {
|
||||||
VecMisc2::Not => (0b1, 0b00101, 0b00),
|
VecMisc2::Not => (0b1, 0b00101, 0b00),
|
||||||
VecMisc2::Neg => (0b1, 0b01011, enc_size),
|
VecMisc2::Neg => (0b1, 0b01011, enc_size),
|
||||||
@@ -1390,8 +1391,17 @@ impl MachInstEmit for Inst {
|
|||||||
debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
|
debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
|
||||||
(0b1, 0b11111, enc_size)
|
(0b1, 0b11111, enc_size)
|
||||||
}
|
}
|
||||||
|
VecMisc2::Rev64 => {
|
||||||
|
debug_assert_ne!(VectorSize::Size64x2, size);
|
||||||
|
(0b0, 0b00000, enc_size)
|
||||||
|
}
|
||||||
|
VecMisc2::Shll => {
|
||||||
|
debug_assert_ne!(VectorSize::Size64x2, size);
|
||||||
|
debug_assert!(!size.is_128bits());
|
||||||
|
(0b1, 0b10011, enc_size)
|
||||||
|
}
|
||||||
};
|
};
|
||||||
sink.put4(enc_vec_rr_misc(u, size, bits_12_16, rd, rn));
|
sink.put4(enc_vec_rr_misc((q << 1) | u, size, bits_12_16, rd, rn));
|
||||||
}
|
}
|
||||||
&Inst::VecLanes { op, rd, rn, size } => {
|
&Inst::VecLanes { op, rd, rn, size } => {
|
||||||
let (q, size) = match size {
|
let (q, size) = match size {
|
||||||
@@ -1651,6 +1661,17 @@ impl MachInstEmit for Inst {
|
|||||||
| machreg_to_vec(rd.to_reg()),
|
| machreg_to_vec(rd.to_reg()),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
&Inst::VecMiscNarrow { op, rd, rn, size } => {
|
||||||
|
debug_assert!(!size.is_128bits());
|
||||||
|
let size = match size.widen() {
|
||||||
|
VectorSize::Size64x2 => 0b10,
|
||||||
|
_ => unimplemented!(),
|
||||||
|
};
|
||||||
|
let (u, bits_12_16) = match op {
|
||||||
|
VecMiscNarrowOp::Xtn => (0b0, 0b10010),
|
||||||
|
};
|
||||||
|
sink.put4(enc_vec_rr_misc(u, size, bits_12_16, rd, rn));
|
||||||
|
}
|
||||||
&Inst::VecMovElement {
|
&Inst::VecMovElement {
|
||||||
rd,
|
rd,
|
||||||
rn,
|
rn,
|
||||||
@@ -1685,12 +1706,12 @@ impl MachInstEmit for Inst {
|
|||||||
alu_op,
|
alu_op,
|
||||||
size,
|
size,
|
||||||
} => {
|
} => {
|
||||||
let enc_size = match size {
|
let enc_size = match size.lane_size() {
|
||||||
VectorSize::Size8x16 => 0b00,
|
ScalarSize::Size8 => 0b00,
|
||||||
VectorSize::Size16x8 => 0b01,
|
ScalarSize::Size16 => 0b01,
|
||||||
VectorSize::Size32x4 => 0b10,
|
ScalarSize::Size32 => 0b10,
|
||||||
VectorSize::Size64x2 => 0b11,
|
ScalarSize::Size64 => 0b11,
|
||||||
_ => 0,
|
_ => unreachable!(),
|
||||||
};
|
};
|
||||||
let is_float = match alu_op {
|
let is_float = match alu_op {
|
||||||
VecALUOp::Fcmeq
|
VecALUOp::Fcmeq
|
||||||
@@ -1751,6 +1772,11 @@ impl MachInstEmit for Inst {
|
|||||||
VecALUOp::Fmax => (0b010_01110_00_1, 0b111101),
|
VecALUOp::Fmax => (0b010_01110_00_1, 0b111101),
|
||||||
VecALUOp::Fmin => (0b010_01110_10_1, 0b111101),
|
VecALUOp::Fmin => (0b010_01110_10_1, 0b111101),
|
||||||
VecALUOp::Fmul => (0b011_01110_00_1, 0b110111),
|
VecALUOp::Fmul => (0b011_01110_00_1, 0b110111),
|
||||||
|
VecALUOp::Addp => (0b010_01110_00_1 | enc_size << 1, 0b101111),
|
||||||
|
VecALUOp::Umlal => {
|
||||||
|
debug_assert!(!size.is_128bits());
|
||||||
|
(0b001_01110_00_1 | enc_size << 1, 0b100000)
|
||||||
|
}
|
||||||
};
|
};
|
||||||
let top11 = if is_float {
|
let top11 = if is_float {
|
||||||
top11 | enc_float_size << 1
|
top11 | enc_float_size << 1
|
||||||
|
|||||||
@@ -2082,6 +2082,17 @@ fn test_aarch64_binemit() {
|
|||||||
"mov v31.s[1], v16.s[0]",
|
"mov v31.s[1], v16.s[0]",
|
||||||
));
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecMiscNarrow {
|
||||||
|
op: VecMiscNarrowOp::Xtn,
|
||||||
|
rd: writable_vreg(22),
|
||||||
|
rn: vreg(8),
|
||||||
|
size: VectorSize::Size32x2,
|
||||||
|
},
|
||||||
|
"1629A10E",
|
||||||
|
"xtn v22.2s, v8.2d",
|
||||||
|
));
|
||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::VecRRR {
|
Inst::VecRRR {
|
||||||
alu_op: VecALUOp::Sqadd,
|
alu_op: VecALUOp::Sqadd,
|
||||||
@@ -3066,6 +3077,53 @@ fn test_aarch64_binemit() {
|
|||||||
"fmul v2.2d, v0.2d, v5.2d",
|
"fmul v2.2d, v0.2d, v5.2d",
|
||||||
));
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecRRR {
|
||||||
|
alu_op: VecALUOp::Addp,
|
||||||
|
rd: writable_vreg(16),
|
||||||
|
rn: vreg(12),
|
||||||
|
rm: vreg(1),
|
||||||
|
size: VectorSize::Size8x16,
|
||||||
|
},
|
||||||
|
"90BD214E",
|
||||||
|
"addp v16.16b, v12.16b, v1.16b",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecRRR {
|
||||||
|
alu_op: VecALUOp::Addp,
|
||||||
|
rd: writable_vreg(8),
|
||||||
|
rn: vreg(12),
|
||||||
|
rm: vreg(14),
|
||||||
|
size: VectorSize::Size32x4,
|
||||||
|
},
|
||||||
|
"88BDAE4E",
|
||||||
|
"addp v8.4s, v12.4s, v14.4s",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecRRR {
|
||||||
|
alu_op: VecALUOp::Umlal,
|
||||||
|
rd: writable_vreg(9),
|
||||||
|
rn: vreg(20),
|
||||||
|
rm: vreg(17),
|
||||||
|
size: VectorSize::Size32x2,
|
||||||
|
},
|
||||||
|
"8982B12E",
|
||||||
|
"umlal v9.2d, v20.2s, v17.2s",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecMisc {
|
||||||
|
op: VecMisc2::Not,
|
||||||
|
rd: writable_vreg(20),
|
||||||
|
rn: vreg(17),
|
||||||
|
size: VectorSize::Size8x8,
|
||||||
|
},
|
||||||
|
"345A202E",
|
||||||
|
"mvn v20.8b, v17.8b",
|
||||||
|
));
|
||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::VecMisc {
|
Inst::VecMisc {
|
||||||
op: VecMisc2::Not,
|
op: VecMisc2::Not,
|
||||||
@@ -3077,6 +3135,17 @@ fn test_aarch64_binemit() {
|
|||||||
"mvn v2.16b, v1.16b",
|
"mvn v2.16b, v1.16b",
|
||||||
));
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecMisc {
|
||||||
|
op: VecMisc2::Neg,
|
||||||
|
rd: writable_vreg(3),
|
||||||
|
rn: vreg(7),
|
||||||
|
size: VectorSize::Size8x8,
|
||||||
|
},
|
||||||
|
"E3B8202E",
|
||||||
|
"neg v3.8b, v7.8b",
|
||||||
|
));
|
||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::VecMisc {
|
Inst::VecMisc {
|
||||||
op: VecMisc2::Neg,
|
op: VecMisc2::Neg,
|
||||||
@@ -3121,6 +3190,17 @@ fn test_aarch64_binemit() {
|
|||||||
"neg v10.2d, v8.2d",
|
"neg v10.2d, v8.2d",
|
||||||
));
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecMisc {
|
||||||
|
op: VecMisc2::Abs,
|
||||||
|
rd: writable_vreg(3),
|
||||||
|
rn: vreg(1),
|
||||||
|
size: VectorSize::Size8x8,
|
||||||
|
},
|
||||||
|
"23B8200E",
|
||||||
|
"abs v3.8b, v1.8b",
|
||||||
|
));
|
||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::VecMisc {
|
Inst::VecMisc {
|
||||||
op: VecMisc2::Abs,
|
op: VecMisc2::Abs,
|
||||||
@@ -3198,6 +3278,50 @@ fn test_aarch64_binemit() {
|
|||||||
"fsqrt v7.2d, v18.2d",
|
"fsqrt v7.2d, v18.2d",
|
||||||
));
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecMisc {
|
||||||
|
op: VecMisc2::Rev64,
|
||||||
|
rd: writable_vreg(1),
|
||||||
|
rn: vreg(10),
|
||||||
|
size: VectorSize::Size32x4,
|
||||||
|
},
|
||||||
|
"4109A04E",
|
||||||
|
"rev64 v1.4s, v10.4s",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecMisc {
|
||||||
|
op: VecMisc2::Shll,
|
||||||
|
rd: writable_vreg(12),
|
||||||
|
rn: vreg(5),
|
||||||
|
size: VectorSize::Size8x8,
|
||||||
|
},
|
||||||
|
"AC38212E",
|
||||||
|
"shll v12.8h, v5.8b, #8",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecMisc {
|
||||||
|
op: VecMisc2::Shll,
|
||||||
|
rd: writable_vreg(9),
|
||||||
|
rn: vreg(1),
|
||||||
|
size: VectorSize::Size16x4,
|
||||||
|
},
|
||||||
|
"2938612E",
|
||||||
|
"shll v9.4s, v1.4h, #16",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecMisc {
|
||||||
|
op: VecMisc2::Shll,
|
||||||
|
rd: writable_vreg(1),
|
||||||
|
rn: vreg(10),
|
||||||
|
size: VectorSize::Size32x2,
|
||||||
|
},
|
||||||
|
"4139A12E",
|
||||||
|
"shll v1.2d, v10.2s, #32",
|
||||||
|
));
|
||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::VecLanes {
|
Inst::VecLanes {
|
||||||
op: VecLanesOp::Uminv,
|
op: VecLanesOp::Uminv,
|
||||||
|
|||||||
@@ -283,6 +283,10 @@ pub enum VecALUOp {
|
|||||||
Fmin,
|
Fmin,
|
||||||
/// Floating-point multiply
|
/// Floating-point multiply
|
||||||
Fmul,
|
Fmul,
|
||||||
|
/// Add pairwise
|
||||||
|
Addp,
|
||||||
|
/// Unsigned multiply add long
|
||||||
|
Umlal,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A Vector miscellaneous operation with two registers.
|
/// A Vector miscellaneous operation with two registers.
|
||||||
@@ -300,6 +304,17 @@ pub enum VecMisc2 {
|
|||||||
Fneg,
|
Fneg,
|
||||||
/// Floating-point square root
|
/// Floating-point square root
|
||||||
Fsqrt,
|
Fsqrt,
|
||||||
|
/// Reverse elements in 64-bit doublewords
|
||||||
|
Rev64,
|
||||||
|
/// Shift left long (by element size)
|
||||||
|
Shll,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A Vector narrowing operation with two registers.
|
||||||
|
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
|
||||||
|
pub enum VecMiscNarrowOp {
|
||||||
|
/// Extract Narrow
|
||||||
|
Xtn,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// An operation across the lanes of vectors.
|
/// An operation across the lanes of vectors.
|
||||||
@@ -880,6 +895,14 @@ pub enum Inst {
|
|||||||
size: VectorSize,
|
size: VectorSize,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
/// Vector narrowing operation.
|
||||||
|
VecMiscNarrow {
|
||||||
|
op: VecMiscNarrowOp,
|
||||||
|
rd: Writable<Reg>,
|
||||||
|
rn: Reg,
|
||||||
|
size: VectorSize,
|
||||||
|
},
|
||||||
|
|
||||||
/// A vector ALU op.
|
/// A vector ALU op.
|
||||||
VecRRR {
|
VecRRR {
|
||||||
alu_op: VecALUOp,
|
alu_op: VecALUOp,
|
||||||
@@ -1605,10 +1628,14 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
|||||||
collector.add_mod(rd);
|
collector.add_mod(rd);
|
||||||
collector.add_use(rn);
|
collector.add_use(rn);
|
||||||
}
|
}
|
||||||
|
&Inst::VecMiscNarrow { rd, rn, .. } => {
|
||||||
|
collector.add_def(rd);
|
||||||
|
collector.add_use(rn);
|
||||||
|
}
|
||||||
&Inst::VecRRR {
|
&Inst::VecRRR {
|
||||||
alu_op, rd, rn, rm, ..
|
alu_op, rd, rn, rm, ..
|
||||||
} => {
|
} => {
|
||||||
if alu_op == VecALUOp::Bsl {
|
if alu_op == VecALUOp::Bsl || alu_op == VecALUOp::Umlal {
|
||||||
collector.add_mod(rd);
|
collector.add_mod(rd);
|
||||||
} else {
|
} else {
|
||||||
collector.add_def(rd);
|
collector.add_def(rd);
|
||||||
@@ -2270,6 +2297,14 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
|||||||
map_mod(mapper, rd);
|
map_mod(mapper, rd);
|
||||||
map_use(mapper, rn);
|
map_use(mapper, rn);
|
||||||
}
|
}
|
||||||
|
&mut Inst::VecMiscNarrow {
|
||||||
|
ref mut rd,
|
||||||
|
ref mut rn,
|
||||||
|
..
|
||||||
|
} => {
|
||||||
|
map_def(mapper, rd);
|
||||||
|
map_use(mapper, rn);
|
||||||
|
}
|
||||||
&mut Inst::VecRRR {
|
&mut Inst::VecRRR {
|
||||||
alu_op,
|
alu_op,
|
||||||
ref mut rd,
|
ref mut rd,
|
||||||
@@ -2277,7 +2312,7 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
|||||||
ref mut rm,
|
ref mut rm,
|
||||||
..
|
..
|
||||||
} => {
|
} => {
|
||||||
if alu_op == VecALUOp::Bsl {
|
if alu_op == VecALUOp::Bsl || alu_op == VecALUOp::Umlal {
|
||||||
map_mod(mapper, rd);
|
map_mod(mapper, rd);
|
||||||
} else {
|
} else {
|
||||||
map_def(mapper, rd);
|
map_def(mapper, rd);
|
||||||
@@ -3144,6 +3179,14 @@ impl Inst {
|
|||||||
let rn = show_vreg_element(rn, mb_rru, idx2, size);
|
let rn = show_vreg_element(rn, mb_rru, idx2, size);
|
||||||
format!("mov {}, {}", rd, rn)
|
format!("mov {}, {}", rd, rn)
|
||||||
}
|
}
|
||||||
|
&Inst::VecMiscNarrow { op, rd, rn, size } => {
|
||||||
|
let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
|
||||||
|
let rn = show_vreg_vector(rn, mb_rru, size.widen());
|
||||||
|
let op = match op {
|
||||||
|
VecMiscNarrowOp::Xtn => "xtn",
|
||||||
|
};
|
||||||
|
format!("{} {}, {}", op, rd, rn)
|
||||||
|
}
|
||||||
&Inst::VecRRR {
|
&Inst::VecRRR {
|
||||||
rd,
|
rd,
|
||||||
rn,
|
rn,
|
||||||
@@ -3186,25 +3229,51 @@ impl Inst {
|
|||||||
VecALUOp::Fmax => ("fmax", size),
|
VecALUOp::Fmax => ("fmax", size),
|
||||||
VecALUOp::Fmin => ("fmin", size),
|
VecALUOp::Fmin => ("fmin", size),
|
||||||
VecALUOp::Fmul => ("fmul", size),
|
VecALUOp::Fmul => ("fmul", size),
|
||||||
|
VecALUOp::Addp => ("addp", size),
|
||||||
|
VecALUOp::Umlal => ("umlal", size),
|
||||||
};
|
};
|
||||||
let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
|
let rd_size = if alu_op == VecALUOp::Umlal {
|
||||||
|
size.widen()
|
||||||
|
} else {
|
||||||
|
size
|
||||||
|
};
|
||||||
|
let rd = show_vreg_vector(rd.to_reg(), mb_rru, rd_size);
|
||||||
let rn = show_vreg_vector(rn, mb_rru, size);
|
let rn = show_vreg_vector(rn, mb_rru, size);
|
||||||
let rm = show_vreg_vector(rm, mb_rru, size);
|
let rm = show_vreg_vector(rm, mb_rru, size);
|
||||||
format!("{} {}, {}, {}", op, rd, rn, rm)
|
format!("{} {}, {}, {}", op, rd, rn, rm)
|
||||||
}
|
}
|
||||||
&Inst::VecMisc { op, rd, rn, size } => {
|
&Inst::VecMisc { op, rd, rn, size } => {
|
||||||
|
let is_shll = op == VecMisc2::Shll;
|
||||||
|
let suffix = match (is_shll, size) {
|
||||||
|
(true, VectorSize::Size8x8) => ", #8",
|
||||||
|
(true, VectorSize::Size16x4) => ", #16",
|
||||||
|
(true, VectorSize::Size32x2) => ", #32",
|
||||||
|
_ => "",
|
||||||
|
};
|
||||||
|
|
||||||
let (op, size) = match op {
|
let (op, size) = match op {
|
||||||
VecMisc2::Not => ("mvn", VectorSize::Size8x16),
|
VecMisc2::Not => (
|
||||||
|
"mvn",
|
||||||
|
if size.is_128bits() {
|
||||||
|
VectorSize::Size8x16
|
||||||
|
} else {
|
||||||
|
VectorSize::Size8x8
|
||||||
|
},
|
||||||
|
),
|
||||||
VecMisc2::Neg => ("neg", size),
|
VecMisc2::Neg => ("neg", size),
|
||||||
VecMisc2::Abs => ("abs", size),
|
VecMisc2::Abs => ("abs", size),
|
||||||
VecMisc2::Fabs => ("fabs", size),
|
VecMisc2::Fabs => ("fabs", size),
|
||||||
VecMisc2::Fneg => ("fneg", size),
|
VecMisc2::Fneg => ("fneg", size),
|
||||||
VecMisc2::Fsqrt => ("fsqrt", size),
|
VecMisc2::Fsqrt => ("fsqrt", size),
|
||||||
|
VecMisc2::Rev64 => ("rev64", size),
|
||||||
|
VecMisc2::Shll => ("shll", size),
|
||||||
};
|
};
|
||||||
|
|
||||||
let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
|
let rd_size = if is_shll { size.widen() } else { size };
|
||||||
|
|
||||||
|
let rd = show_vreg_vector(rd.to_reg(), mb_rru, rd_size);
|
||||||
let rn = show_vreg_vector(rn, mb_rru, size);
|
let rn = show_vreg_vector(rn, mb_rru, size);
|
||||||
format!("{} {}, {}", op, rd, rn)
|
format!("{} {}, {}{}", op, rd, rn, suffix)
|
||||||
}
|
}
|
||||||
&Inst::VecLanes { op, rd, rn, size } => {
|
&Inst::VecLanes { op, rd, rn, size } => {
|
||||||
let op = match op {
|
let op = match op {
|
||||||
|
|||||||
@@ -210,6 +210,110 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
rm,
|
rm,
|
||||||
ra: zero_reg(),
|
ra: zero_reg(),
|
||||||
});
|
});
|
||||||
|
} else {
|
||||||
|
if ty == I64X2 {
|
||||||
|
let tmp1 = ctx.alloc_tmp(RegClass::V128, I64X2);
|
||||||
|
let tmp2 = ctx.alloc_tmp(RegClass::V128, I64X2);
|
||||||
|
|
||||||
|
// This I64X2 multiplication is performed with several 32-bit
|
||||||
|
// operations.
|
||||||
|
|
||||||
|
// 64-bit numbers x and y, can be represented as:
|
||||||
|
// x = a + 2^32(b)
|
||||||
|
// y = c + 2^32(d)
|
||||||
|
|
||||||
|
// A 64-bit multiplication is:
|
||||||
|
// x * y = ac + 2^32(ad + bc) + 2^64(bd)
|
||||||
|
// note: `2^64(bd)` can be ignored, the value is too large to fit in
|
||||||
|
// 64 bits.
|
||||||
|
|
||||||
|
// This sequence implements a I64X2 multiply, where the registers
|
||||||
|
// `rn` and `rm` are split up into 32-bit components:
|
||||||
|
// rn = |d|c|b|a|
|
||||||
|
// rm = |h|g|f|e|
|
||||||
|
//
|
||||||
|
// rn * rm = |cg + 2^32(ch + dg)|ae + 2^32(af + be)|
|
||||||
|
//
|
||||||
|
// The sequence is:
|
||||||
|
// rev64 rd.4s, rm.4s
|
||||||
|
// mul rd.4s, rd.4s, rn.4s
|
||||||
|
// xtn tmp1.2s, rn.2d
|
||||||
|
// addp rd.4s, rd.4s, rd.4s
|
||||||
|
// xtn tmp2.2s, rm.2d
|
||||||
|
// shll rd.2d, rd.2s, #32
|
||||||
|
// umlal rd.2d, tmp2.2s, tmp1.2s
|
||||||
|
|
||||||
|
// Reverse the 32-bit elements in the 64-bit words.
|
||||||
|
// rd = |g|h|e|f|
|
||||||
|
ctx.emit(Inst::VecMisc {
|
||||||
|
op: VecMisc2::Rev64,
|
||||||
|
rd,
|
||||||
|
rn: rm,
|
||||||
|
size: VectorSize::Size32x4,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Calculate the high half components.
|
||||||
|
// rd = |dg|ch|be|af|
|
||||||
|
//
|
||||||
|
// Note that this 32-bit multiply of the high half
|
||||||
|
// discards the bits that would overflow, same as
|
||||||
|
// if 64-bit operations were used. Also the Shll
|
||||||
|
// below would shift out the overflow bits anyway.
|
||||||
|
ctx.emit(Inst::VecRRR {
|
||||||
|
alu_op: VecALUOp::Mul,
|
||||||
|
rd,
|
||||||
|
rn: rd.to_reg(),
|
||||||
|
rm: rn,
|
||||||
|
size: VectorSize::Size32x4,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Extract the low half components of rn.
|
||||||
|
// tmp1 = |c|a|
|
||||||
|
ctx.emit(Inst::VecMiscNarrow {
|
||||||
|
op: VecMiscNarrowOp::Xtn,
|
||||||
|
rd: tmp1,
|
||||||
|
rn,
|
||||||
|
size: VectorSize::Size32x2,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Sum the respective high half components.
|
||||||
|
// rd = |dg+ch|be+af||dg+ch|be+af|
|
||||||
|
ctx.emit(Inst::VecRRR {
|
||||||
|
alu_op: VecALUOp::Addp,
|
||||||
|
rd: rd,
|
||||||
|
rn: rd.to_reg(),
|
||||||
|
rm: rd.to_reg(),
|
||||||
|
size: VectorSize::Size32x4,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Extract the low half components of rm.
|
||||||
|
// tmp2 = |g|e|
|
||||||
|
ctx.emit(Inst::VecMiscNarrow {
|
||||||
|
op: VecMiscNarrowOp::Xtn,
|
||||||
|
rd: tmp2,
|
||||||
|
rn: rm,
|
||||||
|
size: VectorSize::Size32x2,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Shift the high half components, into the high half.
|
||||||
|
// rd = |dg+ch << 32|be+af << 32|
|
||||||
|
ctx.emit(Inst::VecMisc {
|
||||||
|
op: VecMisc2::Shll,
|
||||||
|
rd,
|
||||||
|
rn: rd.to_reg(),
|
||||||
|
size: VectorSize::Size32x2,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Multiply the low components together, and accumulate with the high
|
||||||
|
// half.
|
||||||
|
// rd = |rd[1] + cg|rd[0] + ae|
|
||||||
|
ctx.emit(Inst::VecRRR {
|
||||||
|
alu_op: VecALUOp::Umlal,
|
||||||
|
rd,
|
||||||
|
rn: tmp2.to_reg(),
|
||||||
|
rm: tmp1.to_reg(),
|
||||||
|
size: VectorSize::Size32x2,
|
||||||
|
});
|
||||||
} else {
|
} else {
|
||||||
ctx.emit(Inst::VecRRR {
|
ctx.emit(Inst::VecRRR {
|
||||||
alu_op: VecALUOp::Mul,
|
alu_op: VecALUOp::Mul,
|
||||||
@@ -220,6 +324,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Opcode::Umulhi | Opcode::Smulhi => {
|
Opcode::Umulhi | Opcode::Smulhi => {
|
||||||
let rd = get_output_reg(ctx, outputs[0]);
|
let rd = get_output_reg(ctx, outputs[0]);
|
||||||
|
|||||||
Reference in New Issue
Block a user