AArch64: Introduce an enum to specify vector instruction operand sizes
Copyright (c) 2020, Arm Limited.
This commit is contained in:
@@ -3,6 +3,7 @@
|
||||
// Some variants are never constructed, but we still want them as options in the future.
|
||||
#![allow(dead_code)]
|
||||
|
||||
use crate::ir::types::{F32X2, F32X4, F64X2, I16X4, I16X8, I32X2, I32X4, I64X2, I8X16, I8X8};
|
||||
use crate::ir::Type;
|
||||
use crate::isa::aarch64::inst::*;
|
||||
use crate::isa::aarch64::lower::ty_bits;
|
||||
@@ -587,3 +588,55 @@ impl ScalarSize {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Type used to communicate the size of a vector operand.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum VectorSize {
|
||||
Size8x8,
|
||||
Size8x16,
|
||||
Size16x4,
|
||||
Size16x8,
|
||||
Size32x2,
|
||||
Size32x4,
|
||||
Size64x2,
|
||||
}
|
||||
|
||||
impl VectorSize {
|
||||
/// Convert from a type into a vector operand size.
|
||||
pub fn from_ty(ty: Type) -> VectorSize {
|
||||
match ty {
|
||||
F32X2 => VectorSize::Size32x2,
|
||||
F32X4 => VectorSize::Size32x4,
|
||||
F64X2 => VectorSize::Size64x2,
|
||||
I8X8 => VectorSize::Size8x8,
|
||||
I8X16 => VectorSize::Size8x16,
|
||||
I16X4 => VectorSize::Size16x4,
|
||||
I16X8 => VectorSize::Size16x8,
|
||||
I32X2 => VectorSize::Size32x2,
|
||||
I32X4 => VectorSize::Size32x4,
|
||||
I64X2 => VectorSize::Size64x2,
|
||||
_ => unimplemented!(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the integer operand size that corresponds to a lane of a vector with a certain size.
|
||||
pub fn operand_size(&self) -> OperandSize {
|
||||
match self {
|
||||
VectorSize::Size64x2 => OperandSize::Size64,
|
||||
_ => OperandSize::Size32,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the scalar operand size that corresponds to a lane of a vector with a certain size.
|
||||
pub fn lane_size(&self) -> ScalarSize {
|
||||
match self {
|
||||
VectorSize::Size8x8 => ScalarSize::Size8,
|
||||
VectorSize::Size8x16 => ScalarSize::Size8,
|
||||
VectorSize::Size16x4 => ScalarSize::Size16,
|
||||
VectorSize::Size16x8 => ScalarSize::Size16,
|
||||
VectorSize::Size32x2 => ScalarSize::Size32,
|
||||
VectorSize::Size32x4 => ScalarSize::Size32,
|
||||
VectorSize::Size64x2 => ScalarSize::Size64,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1007,7 +1007,7 @@ impl MachInstEmit for Inst {
|
||||
sink.put4(enc_vecmov(/* 16b = */ true, rd, rn));
|
||||
}
|
||||
&Inst::FpuMoveFromVec { rd, rn, idx, size } => {
|
||||
let (imm5, shift, mask) = match size {
|
||||
let (imm5, shift, mask) = match size.lane_size() {
|
||||
ScalarSize::Size32 => (0b00100, 3, 0b011),
|
||||
ScalarSize::Size64 => (0b01000, 4, 0b001),
|
||||
_ => unimplemented!(),
|
||||
@@ -1048,6 +1048,10 @@ impl MachInstEmit for Inst {
|
||||
FPUOp2::Max64 => 0b000_11110_01_1_00000_010010,
|
||||
FPUOp2::Min32 => 0b000_11110_00_1_00000_010110,
|
||||
FPUOp2::Min64 => 0b000_11110_01_1_00000_010110,
|
||||
FPUOp2::Sqadd64 => 0b010_11110_11_1_00000_000011,
|
||||
FPUOp2::Uqadd64 => 0b011_11110_11_1_00000_000011,
|
||||
FPUOp2::Sqsub64 => 0b010_11110_11_1_00000_001011,
|
||||
FPUOp2::Uqsub64 => 0b011_11110_11_1_00000_001011,
|
||||
};
|
||||
sink.put4(enc_fpurrr(top22, rd, rn, rm));
|
||||
}
|
||||
@@ -1102,31 +1106,25 @@ impl MachInstEmit for Inst {
|
||||
};
|
||||
sink.put4(enc_fpurrrr(top17, rd, rn, rm, ra));
|
||||
}
|
||||
&Inst::VecMisc { op, rd, rn, ty } => {
|
||||
let enc_size = match ty {
|
||||
I8X16 => 0b00,
|
||||
I16X8 => 0b01,
|
||||
I32X4 => 0b10,
|
||||
I64X2 => 0b11,
|
||||
_ => 0,
|
||||
&Inst::VecMisc { op, rd, rn, size } => {
|
||||
let enc_size = match size {
|
||||
VectorSize::Size8x16 => 0b00,
|
||||
VectorSize::Size16x8 => 0b01,
|
||||
VectorSize::Size32x4 => 0b10,
|
||||
VectorSize::Size64x2 => 0b11,
|
||||
_ => unimplemented!(),
|
||||
};
|
||||
let (bits_12_16, size) = match op {
|
||||
VecMisc2::Not => {
|
||||
debug_assert_eq!(128, ty_bits(ty));
|
||||
(0b00101, 0b00)
|
||||
}
|
||||
VecMisc2::Neg => {
|
||||
debug_assert_eq!(128, ty_bits(ty));
|
||||
(0b01011, enc_size)
|
||||
}
|
||||
VecMisc2::Not => (0b00101, 0b00),
|
||||
VecMisc2::Neg => (0b01011, enc_size),
|
||||
};
|
||||
sink.put4(enc_vec_rr_misc(size, bits_12_16, rd, rn));
|
||||
}
|
||||
&Inst::VecLanes { op, rd, rn, ty } => {
|
||||
let (q, size) = match ty {
|
||||
I8X16 => (0b1, 0b00),
|
||||
I16X8 => (0b1, 0b01),
|
||||
I32X4 => (0b1, 0b10),
|
||||
&Inst::VecLanes { op, rd, rn, size } => {
|
||||
let (q, size) = match size {
|
||||
VectorSize::Size8x16 => (0b1, 0b00),
|
||||
VectorSize::Size16x8 => (0b1, 0b01),
|
||||
VectorSize::Size32x4 => (0b1, 0b10),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let (u, opcode) = match op {
|
||||
@@ -1250,12 +1248,12 @@ impl MachInstEmit for Inst {
|
||||
| machreg_to_vec(rd.to_reg()),
|
||||
);
|
||||
}
|
||||
&Inst::MovFromVec { rd, rn, idx, ty } => {
|
||||
let (q, imm5, shift, mask) = match ty {
|
||||
I8 => (0b0, 0b00001, 1, 0b1111),
|
||||
I16 => (0b0, 0b00010, 2, 0b0111),
|
||||
I32 => (0b0, 0b00100, 3, 0b0011),
|
||||
I64 => (0b1, 0b01000, 4, 0b0001),
|
||||
&Inst::MovFromVec { rd, rn, idx, size } => {
|
||||
let (q, imm5, shift, mask) = match size {
|
||||
VectorSize::Size8x16 => (0b0, 0b00001, 1, 0b1111),
|
||||
VectorSize::Size16x8 => (0b0, 0b00010, 2, 0b0111),
|
||||
VectorSize::Size32x4 => (0b0, 0b00100, 3, 0b0011),
|
||||
VectorSize::Size64x2 => (0b1, 0b01000, 4, 0b0001),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
debug_assert_eq!(idx & mask, idx);
|
||||
@@ -1268,12 +1266,12 @@ impl MachInstEmit for Inst {
|
||||
| machreg_to_gpr(rd.to_reg()),
|
||||
);
|
||||
}
|
||||
&Inst::VecDup { rd, rn, ty } => {
|
||||
let imm5 = match ty {
|
||||
I8 => 0b00001,
|
||||
I16 => 0b00010,
|
||||
I32 => 0b00100,
|
||||
I64 => 0b01000,
|
||||
&Inst::VecDup { rd, rn, size } => {
|
||||
let imm5 = match size {
|
||||
VectorSize::Size8x16 => 0b00001,
|
||||
VectorSize::Size16x8 => 0b00010,
|
||||
VectorSize::Size32x4 => 0b00100,
|
||||
VectorSize::Size64x2 => 0b01000,
|
||||
_ => unimplemented!(),
|
||||
};
|
||||
sink.put4(
|
||||
@@ -1283,10 +1281,10 @@ impl MachInstEmit for Inst {
|
||||
| machreg_to_vec(rd.to_reg()),
|
||||
);
|
||||
}
|
||||
&Inst::VecDupFromFpu { rd, rn, ty } => {
|
||||
let imm5 = match ty {
|
||||
F32 => 0b00100,
|
||||
F64 => 0b01000,
|
||||
&Inst::VecDupFromFpu { rd, rn, size } => {
|
||||
let imm5 = match size {
|
||||
VectorSize::Size32x4 => 0b00100,
|
||||
VectorSize::Size64x2 => 0b01000,
|
||||
_ => unimplemented!(),
|
||||
};
|
||||
sink.put4(
|
||||
@@ -1318,41 +1316,25 @@ impl MachInstEmit for Inst {
|
||||
rn,
|
||||
rm,
|
||||
alu_op,
|
||||
ty,
|
||||
size,
|
||||
} => {
|
||||
let enc_size = match ty {
|
||||
I8X16 => 0b00,
|
||||
I16X8 => 0b01,
|
||||
I32X4 => 0b10,
|
||||
I64X2 => 0b11,
|
||||
let enc_size = match size {
|
||||
VectorSize::Size8x16 => 0b00,
|
||||
VectorSize::Size16x8 => 0b01,
|
||||
VectorSize::Size32x4 => 0b10,
|
||||
VectorSize::Size64x2 => 0b11,
|
||||
_ => 0,
|
||||
};
|
||||
let enc_size_for_fcmp = match ty {
|
||||
F32X4 => 0b0,
|
||||
F64X2 => 0b1,
|
||||
let enc_size_for_fcmp = match size {
|
||||
VectorSize::Size32x4 => 0b0,
|
||||
VectorSize::Size64x2 => 0b1,
|
||||
_ => 0,
|
||||
};
|
||||
|
||||
let (top11, bit15_10) = match alu_op {
|
||||
VecALUOp::SQAddScalar => {
|
||||
debug_assert_eq!(I64, ty);
|
||||
(0b010_11110_11_1, 0b000011)
|
||||
}
|
||||
VecALUOp::Sqadd => (0b010_01110_00_1 | enc_size << 1, 0b000011),
|
||||
VecALUOp::SQSubScalar => {
|
||||
debug_assert_eq!(I64, ty);
|
||||
(0b010_11110_11_1, 0b001011)
|
||||
}
|
||||
VecALUOp::Sqsub => (0b010_01110_00_1 | enc_size << 1, 0b001011),
|
||||
VecALUOp::UQAddScalar => {
|
||||
debug_assert_eq!(I64, ty);
|
||||
(0b011_11110_11_1, 0b000011)
|
||||
}
|
||||
VecALUOp::Uqadd => (0b011_01110_00_1 | enc_size << 1, 0b000011),
|
||||
VecALUOp::UQSubScalar => {
|
||||
debug_assert_eq!(I64, ty);
|
||||
(0b011_11110_11_1, 0b001011)
|
||||
}
|
||||
VecALUOp::Uqsub => (0b011_01110_00_1 | enc_size << 1, 0b001011),
|
||||
VecALUOp::Cmeq => (0b011_01110_00_1 | enc_size << 1, 0b100011),
|
||||
VecALUOp::Cmge => (0b010_01110_00_1 | enc_size << 1, 0b001111),
|
||||
@@ -1364,31 +1346,16 @@ impl MachInstEmit for Inst {
|
||||
VecALUOp::Fcmge => (0b011_01110_00_1 | enc_size_for_fcmp << 1, 0b111001),
|
||||
// The following logical instructions operate on bytes, so are not encoded differently
|
||||
// for the different vector types.
|
||||
VecALUOp::And => {
|
||||
debug_assert_eq!(128, ty_bits(ty));
|
||||
(0b010_01110_00_1, 0b000111)
|
||||
}
|
||||
VecALUOp::Bic => {
|
||||
debug_assert_eq!(128, ty_bits(ty));
|
||||
(0b010_01110_01_1, 0b000111)
|
||||
}
|
||||
VecALUOp::Orr => {
|
||||
debug_assert_eq!(128, ty_bits(ty));
|
||||
(0b010_01110_10_1, 0b000111)
|
||||
}
|
||||
VecALUOp::Eor => {
|
||||
debug_assert_eq!(128, ty_bits(ty));
|
||||
(0b011_01110_00_1, 0b000111)
|
||||
}
|
||||
VecALUOp::Bsl => {
|
||||
debug_assert_eq!(128, ty_bits(ty));
|
||||
(0b011_01110_01_1, 0b000111)
|
||||
}
|
||||
VecALUOp::And => (0b010_01110_00_1, 0b000111),
|
||||
VecALUOp::Bic => (0b010_01110_01_1, 0b000111),
|
||||
VecALUOp::Orr => (0b010_01110_10_1, 0b000111),
|
||||
VecALUOp::Eor => (0b011_01110_00_1, 0b000111),
|
||||
VecALUOp::Bsl => (0b011_01110_01_1, 0b000111),
|
||||
VecALUOp::Umaxp => (0b011_01110_00_1 | enc_size << 1, 0b101001),
|
||||
VecALUOp::Add => (0b010_01110_00_1 | enc_size << 1, 0b100001),
|
||||
VecALUOp::Sub => (0b011_01110_00_1 | enc_size << 1, 0b100001),
|
||||
VecALUOp::Mul => {
|
||||
debug_assert_ne!(I64X2, ty);
|
||||
debug_assert_ne!(size, VectorSize::Size64x2);
|
||||
(0b010_01110_00_1 | enc_size << 1, 0b100111)
|
||||
}
|
||||
VecALUOp::Sshl => (0b010_01110_00_1 | enc_size << 1, 0b010001),
|
||||
|
||||
@@ -1841,7 +1841,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_xreg(3),
|
||||
rn: vreg(27),
|
||||
idx: 14,
|
||||
ty: I8,
|
||||
size: VectorSize::Size8x16,
|
||||
},
|
||||
"633F1D0E",
|
||||
"umov w3, v27.b[14]",
|
||||
@@ -1851,7 +1851,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_xreg(24),
|
||||
rn: vreg(5),
|
||||
idx: 3,
|
||||
ty: I16,
|
||||
size: VectorSize::Size16x8,
|
||||
},
|
||||
"B83C0E0E",
|
||||
"umov w24, v5.h[3]",
|
||||
@@ -1861,7 +1861,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_xreg(12),
|
||||
rn: vreg(17),
|
||||
idx: 1,
|
||||
ty: I32,
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"2C3E0C0E",
|
||||
"mov w12, v17.s[1]",
|
||||
@@ -1871,7 +1871,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_xreg(21),
|
||||
rn: vreg(20),
|
||||
idx: 0,
|
||||
ty: I64,
|
||||
size: VectorSize::Size64x2,
|
||||
},
|
||||
"953E084E",
|
||||
"mov x21, v20.d[0]",
|
||||
@@ -1900,7 +1900,7 @@ fn test_aarch64_binemit() {
|
||||
Inst::VecDup {
|
||||
rd: writable_vreg(25),
|
||||
rn: xreg(7),
|
||||
ty: I8,
|
||||
size: VectorSize::Size8x16,
|
||||
},
|
||||
"F90C014E",
|
||||
"dup v25.16b, w7",
|
||||
@@ -1909,7 +1909,7 @@ fn test_aarch64_binemit() {
|
||||
Inst::VecDup {
|
||||
rd: writable_vreg(2),
|
||||
rn: xreg(23),
|
||||
ty: I16,
|
||||
size: VectorSize::Size16x8,
|
||||
},
|
||||
"E20E024E",
|
||||
"dup v2.8h, w23",
|
||||
@@ -1918,7 +1918,7 @@ fn test_aarch64_binemit() {
|
||||
Inst::VecDup {
|
||||
rd: writable_vreg(0),
|
||||
rn: xreg(28),
|
||||
ty: I32,
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"800F044E",
|
||||
"dup v0.4s, w28",
|
||||
@@ -1927,7 +1927,7 @@ fn test_aarch64_binemit() {
|
||||
Inst::VecDup {
|
||||
rd: writable_vreg(31),
|
||||
rn: xreg(5),
|
||||
ty: I64,
|
||||
size: VectorSize::Size64x2,
|
||||
},
|
||||
"BF0C084E",
|
||||
"dup v31.2d, x5",
|
||||
@@ -1936,7 +1936,7 @@ fn test_aarch64_binemit() {
|
||||
Inst::VecDupFromFpu {
|
||||
rd: writable_vreg(14),
|
||||
rn: vreg(19),
|
||||
ty: F32,
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"6E06044E",
|
||||
"dup v14.4s, v19.s[0]",
|
||||
@@ -1945,7 +1945,7 @@ fn test_aarch64_binemit() {
|
||||
Inst::VecDupFromFpu {
|
||||
rd: writable_vreg(18),
|
||||
rn: vreg(10),
|
||||
ty: F64,
|
||||
size: VectorSize::Size64x2,
|
||||
},
|
||||
"5205084E",
|
||||
"dup v18.2d, v10.d[0]",
|
||||
@@ -2004,50 +2004,6 @@ fn test_aarch64_binemit() {
|
||||
"5CA4202F",
|
||||
"uxtl v28.2d, v2.2s",
|
||||
));
|
||||
insns.push((
|
||||
Inst::VecRRR {
|
||||
rd: writable_vreg(21),
|
||||
rn: vreg(22),
|
||||
rm: vreg(23),
|
||||
alu_op: VecALUOp::UQAddScalar,
|
||||
ty: I64,
|
||||
},
|
||||
"D50EF77E",
|
||||
"uqadd d21, d22, d23",
|
||||
));
|
||||
insns.push((
|
||||
Inst::VecRRR {
|
||||
rd: writable_vreg(21),
|
||||
rn: vreg(22),
|
||||
rm: vreg(23),
|
||||
alu_op: VecALUOp::SQAddScalar,
|
||||
ty: I64,
|
||||
},
|
||||
"D50EF75E",
|
||||
"sqadd d21, d22, d23",
|
||||
));
|
||||
insns.push((
|
||||
Inst::VecRRR {
|
||||
rd: writable_vreg(21),
|
||||
rn: vreg(22),
|
||||
rm: vreg(23),
|
||||
alu_op: VecALUOp::UQSubScalar,
|
||||
ty: I64,
|
||||
},
|
||||
"D52EF77E",
|
||||
"uqsub d21, d22, d23",
|
||||
));
|
||||
insns.push((
|
||||
Inst::VecRRR {
|
||||
rd: writable_vreg(21),
|
||||
rn: vreg(22),
|
||||
rm: vreg(23),
|
||||
alu_op: VecALUOp::SQSubScalar,
|
||||
ty: I64,
|
||||
},
|
||||
"D52EF75E",
|
||||
"sqsub d21, d22, d23",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecRRR {
|
||||
@@ -2055,7 +2011,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(1),
|
||||
rn: vreg(2),
|
||||
rm: vreg(8),
|
||||
ty: I8X16,
|
||||
size: VectorSize::Size8x16,
|
||||
},
|
||||
"410C284E",
|
||||
"sqadd v1.16b, v2.16b, v8.16b",
|
||||
@@ -2067,7 +2023,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(1),
|
||||
rn: vreg(12),
|
||||
rm: vreg(28),
|
||||
ty: I16X8,
|
||||
size: VectorSize::Size16x8,
|
||||
},
|
||||
"810D7C4E",
|
||||
"sqadd v1.8h, v12.8h, v28.8h",
|
||||
@@ -2079,7 +2035,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(12),
|
||||
rn: vreg(2),
|
||||
rm: vreg(6),
|
||||
ty: I32X4,
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"4C0CA64E",
|
||||
"sqadd v12.4s, v2.4s, v6.4s",
|
||||
@@ -2091,7 +2047,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(20),
|
||||
rn: vreg(7),
|
||||
rm: vreg(13),
|
||||
ty: I64X2,
|
||||
size: VectorSize::Size64x2,
|
||||
},
|
||||
"F40CED4E",
|
||||
"sqadd v20.2d, v7.2d, v13.2d",
|
||||
@@ -2103,7 +2059,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(1),
|
||||
rn: vreg(2),
|
||||
rm: vreg(8),
|
||||
ty: I8X16,
|
||||
size: VectorSize::Size8x16,
|
||||
},
|
||||
"412C284E",
|
||||
"sqsub v1.16b, v2.16b, v8.16b",
|
||||
@@ -2115,7 +2071,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(1),
|
||||
rn: vreg(12),
|
||||
rm: vreg(28),
|
||||
ty: I16X8,
|
||||
size: VectorSize::Size16x8,
|
||||
},
|
||||
"812D7C4E",
|
||||
"sqsub v1.8h, v12.8h, v28.8h",
|
||||
@@ -2127,7 +2083,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(12),
|
||||
rn: vreg(2),
|
||||
rm: vreg(6),
|
||||
ty: I32X4,
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"4C2CA64E",
|
||||
"sqsub v12.4s, v2.4s, v6.4s",
|
||||
@@ -2139,7 +2095,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(20),
|
||||
rn: vreg(7),
|
||||
rm: vreg(13),
|
||||
ty: I64X2,
|
||||
size: VectorSize::Size64x2,
|
||||
},
|
||||
"F42CED4E",
|
||||
"sqsub v20.2d, v7.2d, v13.2d",
|
||||
@@ -2151,7 +2107,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(1),
|
||||
rn: vreg(2),
|
||||
rm: vreg(8),
|
||||
ty: I8X16,
|
||||
size: VectorSize::Size8x16,
|
||||
},
|
||||
"410C286E",
|
||||
"uqadd v1.16b, v2.16b, v8.16b",
|
||||
@@ -2163,7 +2119,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(1),
|
||||
rn: vreg(12),
|
||||
rm: vreg(28),
|
||||
ty: I16X8,
|
||||
size: VectorSize::Size16x8,
|
||||
},
|
||||
"810D7C6E",
|
||||
"uqadd v1.8h, v12.8h, v28.8h",
|
||||
@@ -2175,7 +2131,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(12),
|
||||
rn: vreg(2),
|
||||
rm: vreg(6),
|
||||
ty: I32X4,
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"4C0CA66E",
|
||||
"uqadd v12.4s, v2.4s, v6.4s",
|
||||
@@ -2187,7 +2143,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(20),
|
||||
rn: vreg(7),
|
||||
rm: vreg(13),
|
||||
ty: I64X2,
|
||||
size: VectorSize::Size64x2,
|
||||
},
|
||||
"F40CED6E",
|
||||
"uqadd v20.2d, v7.2d, v13.2d",
|
||||
@@ -2199,7 +2155,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(1),
|
||||
rn: vreg(2),
|
||||
rm: vreg(8),
|
||||
ty: I8X16,
|
||||
size: VectorSize::Size8x16,
|
||||
},
|
||||
"412C286E",
|
||||
"uqsub v1.16b, v2.16b, v8.16b",
|
||||
@@ -2211,7 +2167,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(1),
|
||||
rn: vreg(12),
|
||||
rm: vreg(28),
|
||||
ty: I16X8,
|
||||
size: VectorSize::Size16x8,
|
||||
},
|
||||
"812D7C6E",
|
||||
"uqsub v1.8h, v12.8h, v28.8h",
|
||||
@@ -2223,7 +2179,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(12),
|
||||
rn: vreg(2),
|
||||
rm: vreg(6),
|
||||
ty: I32X4,
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"4C2CA66E",
|
||||
"uqsub v12.4s, v2.4s, v6.4s",
|
||||
@@ -2235,7 +2191,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(20),
|
||||
rn: vreg(7),
|
||||
rm: vreg(13),
|
||||
ty: I64X2,
|
||||
size: VectorSize::Size64x2,
|
||||
},
|
||||
"F42CED6E",
|
||||
"uqsub v20.2d, v7.2d, v13.2d",
|
||||
@@ -2247,7 +2203,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(3),
|
||||
rn: vreg(23),
|
||||
rm: vreg(24),
|
||||
ty: I8X16,
|
||||
size: VectorSize::Size8x16,
|
||||
},
|
||||
"E38E386E",
|
||||
"cmeq v3.16b, v23.16b, v24.16b",
|
||||
@@ -2259,7 +2215,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(3),
|
||||
rn: vreg(23),
|
||||
rm: vreg(24),
|
||||
ty: I8X16,
|
||||
size: VectorSize::Size8x16,
|
||||
},
|
||||
"E336384E",
|
||||
"cmgt v3.16b, v23.16b, v24.16b",
|
||||
@@ -2271,7 +2227,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(23),
|
||||
rn: vreg(9),
|
||||
rm: vreg(12),
|
||||
ty: I8X16,
|
||||
size: VectorSize::Size8x16,
|
||||
},
|
||||
"373D2C4E",
|
||||
"cmge v23.16b, v9.16b, v12.16b",
|
||||
@@ -2283,7 +2239,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(5),
|
||||
rn: vreg(1),
|
||||
rm: vreg(1),
|
||||
ty: I8X16,
|
||||
size: VectorSize::Size8x16,
|
||||
},
|
||||
"2534216E",
|
||||
"cmhi v5.16b, v1.16b, v1.16b",
|
||||
@@ -2295,7 +2251,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(8),
|
||||
rn: vreg(2),
|
||||
rm: vreg(15),
|
||||
ty: I8X16,
|
||||
size: VectorSize::Size8x16,
|
||||
},
|
||||
"483C2F6E",
|
||||
"cmhs v8.16b, v2.16b, v15.16b",
|
||||
@@ -2307,7 +2263,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(3),
|
||||
rn: vreg(23),
|
||||
rm: vreg(24),
|
||||
ty: I16X8,
|
||||
size: VectorSize::Size16x8,
|
||||
},
|
||||
"E38E786E",
|
||||
"cmeq v3.8h, v23.8h, v24.8h",
|
||||
@@ -2319,7 +2275,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(3),
|
||||
rn: vreg(23),
|
||||
rm: vreg(24),
|
||||
ty: I16X8,
|
||||
size: VectorSize::Size16x8,
|
||||
},
|
||||
"E336784E",
|
||||
"cmgt v3.8h, v23.8h, v24.8h",
|
||||
@@ -2331,7 +2287,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(23),
|
||||
rn: vreg(9),
|
||||
rm: vreg(12),
|
||||
ty: I16X8,
|
||||
size: VectorSize::Size16x8,
|
||||
},
|
||||
"373D6C4E",
|
||||
"cmge v23.8h, v9.8h, v12.8h",
|
||||
@@ -2343,7 +2299,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(5),
|
||||
rn: vreg(1),
|
||||
rm: vreg(1),
|
||||
ty: I16X8,
|
||||
size: VectorSize::Size16x8,
|
||||
},
|
||||
"2534616E",
|
||||
"cmhi v5.8h, v1.8h, v1.8h",
|
||||
@@ -2355,7 +2311,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(8),
|
||||
rn: vreg(2),
|
||||
rm: vreg(15),
|
||||
ty: I16X8,
|
||||
size: VectorSize::Size16x8,
|
||||
},
|
||||
"483C6F6E",
|
||||
"cmhs v8.8h, v2.8h, v15.8h",
|
||||
@@ -2367,7 +2323,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(3),
|
||||
rn: vreg(23),
|
||||
rm: vreg(24),
|
||||
ty: I32X4,
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"E38EB86E",
|
||||
"cmeq v3.4s, v23.4s, v24.4s",
|
||||
@@ -2379,7 +2335,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(3),
|
||||
rn: vreg(23),
|
||||
rm: vreg(24),
|
||||
ty: I32X4,
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"E336B84E",
|
||||
"cmgt v3.4s, v23.4s, v24.4s",
|
||||
@@ -2391,7 +2347,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(23),
|
||||
rn: vreg(9),
|
||||
rm: vreg(12),
|
||||
ty: I32X4,
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"373DAC4E",
|
||||
"cmge v23.4s, v9.4s, v12.4s",
|
||||
@@ -2403,7 +2359,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(5),
|
||||
rn: vreg(1),
|
||||
rm: vreg(1),
|
||||
ty: I32X4,
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"2534A16E",
|
||||
"cmhi v5.4s, v1.4s, v1.4s",
|
||||
@@ -2415,7 +2371,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(8),
|
||||
rn: vreg(2),
|
||||
rm: vreg(15),
|
||||
ty: I32X4,
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"483CAF6E",
|
||||
"cmhs v8.4s, v2.4s, v15.4s",
|
||||
@@ -2427,7 +2383,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(28),
|
||||
rn: vreg(12),
|
||||
rm: vreg(4),
|
||||
ty: F32X4,
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"9CE5244E",
|
||||
"fcmeq v28.4s, v12.4s, v4.4s",
|
||||
@@ -2439,7 +2395,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(3),
|
||||
rn: vreg(16),
|
||||
rm: vreg(31),
|
||||
ty: F64X2,
|
||||
size: VectorSize::Size64x2,
|
||||
},
|
||||
"03E6FF6E",
|
||||
"fcmgt v3.2d, v16.2d, v31.2d",
|
||||
@@ -2451,7 +2407,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(18),
|
||||
rn: vreg(23),
|
||||
rm: vreg(0),
|
||||
ty: F64X2,
|
||||
size: VectorSize::Size64x2,
|
||||
},
|
||||
"F2E6606E",
|
||||
"fcmge v18.2d, v23.2d, v0.2d",
|
||||
@@ -2463,7 +2419,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(20),
|
||||
rn: vreg(19),
|
||||
rm: vreg(18),
|
||||
ty: I32X4,
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"741E324E",
|
||||
"and v20.16b, v19.16b, v18.16b",
|
||||
@@ -2475,7 +2431,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(8),
|
||||
rn: vreg(11),
|
||||
rm: vreg(1),
|
||||
ty: I8X16,
|
||||
size: VectorSize::Size8x16,
|
||||
},
|
||||
"681D614E",
|
||||
"bic v8.16b, v11.16b, v1.16b",
|
||||
@@ -2487,7 +2443,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(15),
|
||||
rn: vreg(2),
|
||||
rm: vreg(12),
|
||||
ty: I16X8,
|
||||
size: VectorSize::Size16x8,
|
||||
},
|
||||
"4F1CAC4E",
|
||||
"orr v15.16b, v2.16b, v12.16b",
|
||||
@@ -2499,7 +2455,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(18),
|
||||
rn: vreg(3),
|
||||
rm: vreg(22),
|
||||
ty: I8X16,
|
||||
size: VectorSize::Size8x16,
|
||||
},
|
||||
"721C366E",
|
||||
"eor v18.16b, v3.16b, v22.16b",
|
||||
@@ -2511,7 +2467,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(8),
|
||||
rn: vreg(9),
|
||||
rm: vreg(1),
|
||||
ty: I8X16,
|
||||
size: VectorSize::Size8x16,
|
||||
},
|
||||
"281D616E",
|
||||
"bsl v8.16b, v9.16b, v1.16b",
|
||||
@@ -2523,7 +2479,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(8),
|
||||
rn: vreg(12),
|
||||
rm: vreg(1),
|
||||
ty: I8X16,
|
||||
size: VectorSize::Size8x16,
|
||||
},
|
||||
"88A5216E",
|
||||
"umaxp v8.16b, v12.16b, v1.16b",
|
||||
@@ -2535,7 +2491,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(1),
|
||||
rn: vreg(6),
|
||||
rm: vreg(1),
|
||||
ty: I16X8,
|
||||
size: VectorSize::Size16x8,
|
||||
},
|
||||
"C1A4616E",
|
||||
"umaxp v1.8h, v6.8h, v1.8h",
|
||||
@@ -2547,7 +2503,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(1),
|
||||
rn: vreg(20),
|
||||
rm: vreg(16),
|
||||
ty: I32X4,
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"81A6B06E",
|
||||
"umaxp v1.4s, v20.4s, v16.4s",
|
||||
@@ -2559,7 +2515,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(5),
|
||||
rn: vreg(1),
|
||||
rm: vreg(1),
|
||||
ty: I8X16,
|
||||
size: VectorSize::Size8x16,
|
||||
},
|
||||
"2584214E",
|
||||
"add v5.16b, v1.16b, v1.16b",
|
||||
@@ -2571,7 +2527,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(7),
|
||||
rn: vreg(13),
|
||||
rm: vreg(2),
|
||||
ty: I16X8,
|
||||
size: VectorSize::Size16x8,
|
||||
},
|
||||
"A785624E",
|
||||
"add v7.8h, v13.8h, v2.8h",
|
||||
@@ -2583,7 +2539,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(18),
|
||||
rn: vreg(9),
|
||||
rm: vreg(6),
|
||||
ty: I32X4,
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"3285A64E",
|
||||
"add v18.4s, v9.4s, v6.4s",
|
||||
@@ -2595,7 +2551,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(1),
|
||||
rn: vreg(3),
|
||||
rm: vreg(2),
|
||||
ty: I64X2,
|
||||
size: VectorSize::Size64x2,
|
||||
},
|
||||
"6184E24E",
|
||||
"add v1.2d, v3.2d, v2.2d",
|
||||
@@ -2607,7 +2563,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(5),
|
||||
rn: vreg(1),
|
||||
rm: vreg(1),
|
||||
ty: I8X16,
|
||||
size: VectorSize::Size8x16,
|
||||
},
|
||||
"2584216E",
|
||||
"sub v5.16b, v1.16b, v1.16b",
|
||||
@@ -2619,7 +2575,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(7),
|
||||
rn: vreg(13),
|
||||
rm: vreg(2),
|
||||
ty: I16X8,
|
||||
size: VectorSize::Size16x8,
|
||||
},
|
||||
"A785626E",
|
||||
"sub v7.8h, v13.8h, v2.8h",
|
||||
@@ -2631,7 +2587,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(18),
|
||||
rn: vreg(9),
|
||||
rm: vreg(6),
|
||||
ty: I32X4,
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"3285A66E",
|
||||
"sub v18.4s, v9.4s, v6.4s",
|
||||
@@ -2643,7 +2599,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(18),
|
||||
rn: vreg(0),
|
||||
rm: vreg(8),
|
||||
ty: I64X2,
|
||||
size: VectorSize::Size64x2,
|
||||
},
|
||||
"1284E86E",
|
||||
"sub v18.2d, v0.2d, v8.2d",
|
||||
@@ -2655,7 +2611,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(25),
|
||||
rn: vreg(9),
|
||||
rm: vreg(8),
|
||||
ty: I8X16,
|
||||
size: VectorSize::Size8x16,
|
||||
},
|
||||
"399D284E",
|
||||
"mul v25.16b, v9.16b, v8.16b",
|
||||
@@ -2667,7 +2623,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(30),
|
||||
rn: vreg(30),
|
||||
rm: vreg(12),
|
||||
ty: I16X8,
|
||||
size: VectorSize::Size16x8,
|
||||
},
|
||||
"DE9F6C4E",
|
||||
"mul v30.8h, v30.8h, v12.8h",
|
||||
@@ -2679,7 +2635,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(18),
|
||||
rn: vreg(18),
|
||||
rm: vreg(18),
|
||||
ty: I32X4,
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"529EB24E",
|
||||
"mul v18.4s, v18.4s, v18.4s",
|
||||
@@ -2691,7 +2647,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(18),
|
||||
rn: vreg(18),
|
||||
rm: vreg(18),
|
||||
ty: I8X16,
|
||||
size: VectorSize::Size8x16,
|
||||
},
|
||||
"5246326E",
|
||||
"ushl v18.16b, v18.16b, v18.16b",
|
||||
@@ -2703,7 +2659,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(18),
|
||||
rn: vreg(18),
|
||||
rm: vreg(18),
|
||||
ty: I16X8,
|
||||
size: VectorSize::Size16x8,
|
||||
},
|
||||
"5246726E",
|
||||
"ushl v18.8h, v18.8h, v18.8h",
|
||||
@@ -2715,7 +2671,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(18),
|
||||
rn: vreg(1),
|
||||
rm: vreg(21),
|
||||
ty: I32X4,
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"3244B56E",
|
||||
"ushl v18.4s, v1.4s, v21.4s",
|
||||
@@ -2727,7 +2683,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(5),
|
||||
rn: vreg(7),
|
||||
rm: vreg(19),
|
||||
ty: I64X2,
|
||||
size: VectorSize::Size64x2,
|
||||
},
|
||||
"E544F36E",
|
||||
"ushl v5.2d, v7.2d, v19.2d",
|
||||
@@ -2739,7 +2695,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(18),
|
||||
rn: vreg(18),
|
||||
rm: vreg(18),
|
||||
ty: I8X16,
|
||||
size: VectorSize::Size8x16,
|
||||
},
|
||||
"5246324E",
|
||||
"sshl v18.16b, v18.16b, v18.16b",
|
||||
@@ -2751,7 +2707,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(30),
|
||||
rn: vreg(1),
|
||||
rm: vreg(29),
|
||||
ty: I16X8,
|
||||
size: VectorSize::Size16x8,
|
||||
},
|
||||
"3E447D4E",
|
||||
"sshl v30.8h, v1.8h, v29.8h",
|
||||
@@ -2763,7 +2719,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(8),
|
||||
rn: vreg(22),
|
||||
rm: vreg(21),
|
||||
ty: I32X4,
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"C846B54E",
|
||||
"sshl v8.4s, v22.4s, v21.4s",
|
||||
@@ -2775,7 +2731,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(8),
|
||||
rn: vreg(22),
|
||||
rm: vreg(2),
|
||||
ty: I64X2,
|
||||
size: VectorSize::Size64x2,
|
||||
},
|
||||
"C846E24E",
|
||||
"sshl v8.2d, v22.2d, v2.2d",
|
||||
@@ -2786,7 +2742,7 @@ fn test_aarch64_binemit() {
|
||||
op: VecMisc2::Not,
|
||||
rd: writable_vreg(2),
|
||||
rn: vreg(1),
|
||||
ty: I32X4,
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"2258206E",
|
||||
"mvn v2.16b, v1.16b",
|
||||
@@ -2797,7 +2753,7 @@ fn test_aarch64_binemit() {
|
||||
op: VecMisc2::Neg,
|
||||
rd: writable_vreg(8),
|
||||
rn: vreg(12),
|
||||
ty: I8X16,
|
||||
size: VectorSize::Size8x16,
|
||||
},
|
||||
"88B9206E",
|
||||
"neg v8.16b, v12.16b",
|
||||
@@ -2808,7 +2764,7 @@ fn test_aarch64_binemit() {
|
||||
op: VecMisc2::Neg,
|
||||
rd: writable_vreg(0),
|
||||
rn: vreg(31),
|
||||
ty: I16X8,
|
||||
size: VectorSize::Size16x8,
|
||||
},
|
||||
"E0BB606E",
|
||||
"neg v0.8h, v31.8h",
|
||||
@@ -2819,7 +2775,7 @@ fn test_aarch64_binemit() {
|
||||
op: VecMisc2::Neg,
|
||||
rd: writable_vreg(2),
|
||||
rn: vreg(3),
|
||||
ty: I32X4,
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"62B8A06E",
|
||||
"neg v2.4s, v3.4s",
|
||||
@@ -2830,7 +2786,7 @@ fn test_aarch64_binemit() {
|
||||
op: VecMisc2::Neg,
|
||||
rd: writable_vreg(10),
|
||||
rn: vreg(8),
|
||||
ty: I64X2,
|
||||
size: VectorSize::Size64x2,
|
||||
},
|
||||
"0AB9E06E",
|
||||
"neg v10.2d, v8.2d",
|
||||
@@ -2841,7 +2797,7 @@ fn test_aarch64_binemit() {
|
||||
op: VecLanesOp::Uminv,
|
||||
rd: writable_vreg(2),
|
||||
rn: vreg(1),
|
||||
ty: I8X16,
|
||||
size: VectorSize::Size8x16,
|
||||
},
|
||||
"22A8316E",
|
||||
"uminv b2, v1.16b",
|
||||
@@ -2852,7 +2808,7 @@ fn test_aarch64_binemit() {
|
||||
op: VecLanesOp::Uminv,
|
||||
rd: writable_vreg(3),
|
||||
rn: vreg(11),
|
||||
ty: I16X8,
|
||||
size: VectorSize::Size16x8,
|
||||
},
|
||||
"63A9716E",
|
||||
"uminv h3, v11.8h",
|
||||
@@ -2863,7 +2819,7 @@ fn test_aarch64_binemit() {
|
||||
op: VecLanesOp::Uminv,
|
||||
rd: writable_vreg(18),
|
||||
rn: vreg(4),
|
||||
ty: I32X4,
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"92A8B16E",
|
||||
"uminv s18, v4.4s",
|
||||
@@ -3214,7 +3170,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(1),
|
||||
rn: vreg(30),
|
||||
idx: 2,
|
||||
size: ScalarSize::Size32,
|
||||
size: VectorSize::Size32x4,
|
||||
},
|
||||
"C107145E",
|
||||
"mov s1, v30.s[2]",
|
||||
@@ -3225,7 +3181,7 @@ fn test_aarch64_binemit() {
|
||||
rd: writable_vreg(23),
|
||||
rn: vreg(11),
|
||||
idx: 0,
|
||||
size: ScalarSize::Size64,
|
||||
size: VectorSize::Size64x2,
|
||||
},
|
||||
"7705085E",
|
||||
"mov d23, v11.d[0]",
|
||||
@@ -3443,6 +3399,50 @@ fn test_aarch64_binemit() {
|
||||
"fmin d15, d30, d31",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::FpuRRR {
|
||||
fpu_op: FPUOp2::Uqadd64,
|
||||
rd: writable_vreg(21),
|
||||
rn: vreg(22),
|
||||
rm: vreg(23),
|
||||
},
|
||||
"D50EF77E",
|
||||
"uqadd d21, d22, d23",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::FpuRRR {
|
||||
fpu_op: FPUOp2::Sqadd64,
|
||||
rd: writable_vreg(21),
|
||||
rn: vreg(22),
|
||||
rm: vreg(23),
|
||||
},
|
||||
"D50EF75E",
|
||||
"sqadd d21, d22, d23",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::FpuRRR {
|
||||
fpu_op: FPUOp2::Uqsub64,
|
||||
rd: writable_vreg(21),
|
||||
rn: vreg(22),
|
||||
rm: vreg(23),
|
||||
},
|
||||
"D52EF77E",
|
||||
"uqsub d21, d22, d23",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::FpuRRR {
|
||||
fpu_op: FPUOp2::Sqsub64,
|
||||
rd: writable_vreg(21),
|
||||
rn: vreg(22),
|
||||
rm: vreg(23),
|
||||
},
|
||||
"D52EF75E",
|
||||
"sqsub d21, d22, d23",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::FpuRRRR {
|
||||
fpu_op: FPUOp3::MAdd32,
|
||||
|
||||
@@ -5,8 +5,8 @@
|
||||
|
||||
use crate::binemit::CodeOffset;
|
||||
use crate::ir::types::{
|
||||
B1, B16, B16X8, B32, B32X4, B64, B64X2, B8, B8X16, F32, F32X2, F32X4, F64, F64X2, FFLAGS, I16,
|
||||
I16X4, I16X8, I32, I32X2, I32X4, I64, I64X2, I8, I8X16, I8X8, IFLAGS, R32, R64,
|
||||
B1, B16, B16X8, B32, B32X4, B64, B64X2, B8, B8X16, F32, F32X4, F64, F64X2, FFLAGS, I16, I16X8,
|
||||
I32, I32X4, I64, I64X2, I8, I8X16, IFLAGS, R32, R64,
|
||||
};
|
||||
use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode, Type};
|
||||
use crate::machinst::*;
|
||||
@@ -125,6 +125,14 @@ pub enum FPUOp2 {
|
||||
Max64,
|
||||
Min32,
|
||||
Min64,
|
||||
/// Signed saturating add
|
||||
Sqadd64,
|
||||
/// Unsigned saturating add
|
||||
Uqadd64,
|
||||
/// Signed saturating subtract
|
||||
Sqsub64,
|
||||
/// Unsigned saturating subtract
|
||||
Uqsub64,
|
||||
}
|
||||
|
||||
/// A floating-point unit (FPU) operation with two args, a register and an immediate.
|
||||
@@ -208,16 +216,12 @@ pub enum VecExtendOp {
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
|
||||
pub enum VecALUOp {
|
||||
/// Signed saturating add
|
||||
SQAddScalar,
|
||||
Sqadd,
|
||||
/// Unsigned saturating add
|
||||
UQAddScalar,
|
||||
Uqadd,
|
||||
/// Signed saturating subtract
|
||||
SQSubScalar,
|
||||
Sqsub,
|
||||
/// Unsigned saturating subtract
|
||||
UQSubScalar,
|
||||
Uqsub,
|
||||
/// Compare bitwise equal
|
||||
Cmeq,
|
||||
@@ -590,7 +594,7 @@ pub enum Inst {
|
||||
rd: Writable<Reg>,
|
||||
rn: Reg,
|
||||
idx: u8,
|
||||
size: ScalarSize,
|
||||
size: VectorSize,
|
||||
},
|
||||
|
||||
/// 1-op FPU instruction.
|
||||
@@ -734,21 +738,21 @@ pub enum Inst {
|
||||
rd: Writable<Reg>,
|
||||
rn: Reg,
|
||||
idx: u8,
|
||||
ty: Type,
|
||||
size: VectorSize,
|
||||
},
|
||||
|
||||
/// Duplicate general-purpose register to vector.
|
||||
VecDup {
|
||||
rd: Writable<Reg>,
|
||||
rn: Reg,
|
||||
ty: Type,
|
||||
size: VectorSize,
|
||||
},
|
||||
|
||||
/// Duplicate scalar to vector.
|
||||
VecDupFromFpu {
|
||||
rd: Writable<Reg>,
|
||||
rn: Reg,
|
||||
ty: Type,
|
||||
size: VectorSize,
|
||||
},
|
||||
|
||||
/// Vector extend.
|
||||
@@ -764,7 +768,7 @@ pub enum Inst {
|
||||
rd: Writable<Reg>,
|
||||
rn: Reg,
|
||||
rm: Reg,
|
||||
ty: Type,
|
||||
size: VectorSize,
|
||||
},
|
||||
|
||||
/// Vector two register miscellaneous instruction.
|
||||
@@ -772,7 +776,7 @@ pub enum Inst {
|
||||
op: VecMisc2,
|
||||
rd: Writable<Reg>,
|
||||
rn: Reg,
|
||||
ty: Type,
|
||||
size: VectorSize,
|
||||
},
|
||||
|
||||
/// Vector instruction across lanes.
|
||||
@@ -780,7 +784,7 @@ pub enum Inst {
|
||||
op: VecLanesOp,
|
||||
rd: Writable<Reg>,
|
||||
rn: Reg,
|
||||
ty: Type,
|
||||
size: VectorSize,
|
||||
},
|
||||
|
||||
/// Move to the NZCV flags (actually a `MSR NZCV, Xn` insn).
|
||||
@@ -2504,13 +2508,8 @@ impl Inst {
|
||||
format!("mov {}.16b, {}.16b", rd, rn)
|
||||
}
|
||||
&Inst::FpuMoveFromVec { rd, rn, idx, size } => {
|
||||
let vector_type = match size {
|
||||
ScalarSize::Size32 => F32,
|
||||
ScalarSize::Size64 => F64,
|
||||
_ => unimplemented!(),
|
||||
};
|
||||
let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size);
|
||||
let rn = show_vreg_element(rn, mb_rru, idx, vector_type);
|
||||
let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size.lane_size());
|
||||
let rn = show_vreg_element(rn, mb_rru, idx, size);
|
||||
format!("mov {}, {}", rd, rn)
|
||||
}
|
||||
&Inst::FpuRR { fpu_op, rd, rn } => {
|
||||
@@ -2542,6 +2541,10 @@ impl Inst {
|
||||
FPUOp2::Max64 => ("fmax", ScalarSize::Size64),
|
||||
FPUOp2::Min32 => ("fmin", ScalarSize::Size32),
|
||||
FPUOp2::Min64 => ("fmin", ScalarSize::Size64),
|
||||
FPUOp2::Sqadd64 => ("sqadd", ScalarSize::Size64),
|
||||
FPUOp2::Uqadd64 => ("uqadd", ScalarSize::Size64),
|
||||
FPUOp2::Sqsub64 => ("sqsub", ScalarSize::Size64),
|
||||
FPUOp2::Uqsub64 => ("uqsub", ScalarSize::Size64),
|
||||
};
|
||||
let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size);
|
||||
let rn = show_vreg_scalar(rn, mb_rru, size);
|
||||
@@ -2557,7 +2560,7 @@ impl Inst {
|
||||
};
|
||||
|
||||
let show_vreg_fn: fn(Reg, Option<&RealRegUniverse>) -> String = if vector {
|
||||
|reg, mb_rru| show_vreg_vector(reg, mb_rru, F32X2)
|
||||
|reg, mb_rru| show_vreg_vector(reg, mb_rru, VectorSize::Size32x2)
|
||||
} else {
|
||||
|reg, mb_rru| show_vreg_scalar(reg, mb_rru, ScalarSize::Size64)
|
||||
};
|
||||
@@ -2706,45 +2709,36 @@ impl Inst {
|
||||
let rn = rn.show_rru(mb_rru);
|
||||
format!("mov {}.d[0], {}", rd, rn)
|
||||
}
|
||||
&Inst::MovFromVec { rd, rn, idx, ty } => {
|
||||
let op = match ty {
|
||||
I32 | I64 => "mov",
|
||||
_ => "umov",
|
||||
&Inst::MovFromVec { rd, rn, idx, size } => {
|
||||
let op = match size {
|
||||
VectorSize::Size8x16 => "umov",
|
||||
VectorSize::Size16x8 => "umov",
|
||||
VectorSize::Size32x4 => "mov",
|
||||
VectorSize::Size64x2 => "mov",
|
||||
_ => unimplemented!(),
|
||||
};
|
||||
let rd = show_ireg_sized(rd.to_reg(), mb_rru, OperandSize::from_ty(ty));
|
||||
let rn = show_vreg_element(rn, mb_rru, idx, ty);
|
||||
let rd = show_ireg_sized(rd.to_reg(), mb_rru, size.operand_size());
|
||||
let rn = show_vreg_element(rn, mb_rru, idx, size);
|
||||
format!("{} {}, {}", op, rd, rn)
|
||||
}
|
||||
&Inst::VecDup { rd, rn, ty } => {
|
||||
let vector_type = match ty {
|
||||
I8 => I8X16,
|
||||
I16 => I16X8,
|
||||
I32 => I32X4,
|
||||
I64 => I64X2,
|
||||
_ => unimplemented!(),
|
||||
};
|
||||
let rd = show_vreg_vector(rd.to_reg(), mb_rru, vector_type);
|
||||
let rn = show_ireg_sized(rn, mb_rru, OperandSize::from_ty(ty));
|
||||
&Inst::VecDup { rd, rn, size } => {
|
||||
let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
|
||||
let rn = show_ireg_sized(rn, mb_rru, size.operand_size());
|
||||
format!("dup {}, {}", rd, rn)
|
||||
}
|
||||
&Inst::VecDupFromFpu { rd, rn, ty } => {
|
||||
let vector_type = match ty {
|
||||
F32 => F32X4,
|
||||
F64 => F64X2,
|
||||
_ => unimplemented!(),
|
||||
};
|
||||
let rd = show_vreg_vector(rd.to_reg(), mb_rru, vector_type);
|
||||
let rn = show_vreg_element(rn, mb_rru, 0, ty);
|
||||
&Inst::VecDupFromFpu { rd, rn, size } => {
|
||||
let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
|
||||
let rn = show_vreg_element(rn, mb_rru, 0, size);
|
||||
format!("dup {}, {}", rd, rn)
|
||||
}
|
||||
&Inst::VecExtend { t, rd, rn } => {
|
||||
let (op, dest, src) = match t {
|
||||
VecExtendOp::Sxtl8 => ("sxtl", I16X8, I8X8),
|
||||
VecExtendOp::Sxtl16 => ("sxtl", I32X4, I16X4),
|
||||
VecExtendOp::Sxtl32 => ("sxtl", I64X2, I32X2),
|
||||
VecExtendOp::Uxtl8 => ("uxtl", I16X8, I8X8),
|
||||
VecExtendOp::Uxtl16 => ("uxtl", I32X4, I16X4),
|
||||
VecExtendOp::Uxtl32 => ("uxtl", I64X2, I32X2),
|
||||
VecExtendOp::Sxtl8 => ("sxtl", VectorSize::Size16x8, VectorSize::Size8x8),
|
||||
VecExtendOp::Sxtl16 => ("sxtl", VectorSize::Size32x4, VectorSize::Size16x4),
|
||||
VecExtendOp::Sxtl32 => ("sxtl", VectorSize::Size64x2, VectorSize::Size32x2),
|
||||
VecExtendOp::Uxtl8 => ("uxtl", VectorSize::Size16x8, VectorSize::Size8x8),
|
||||
VecExtendOp::Uxtl16 => ("uxtl", VectorSize::Size32x4, VectorSize::Size16x4),
|
||||
VecExtendOp::Uxtl32 => ("uxtl", VectorSize::Size64x2, VectorSize::Size32x2),
|
||||
};
|
||||
let rd = show_vreg_vector(rd.to_reg(), mb_rru, dest);
|
||||
let rn = show_vreg_vector(rn, mb_rru, src);
|
||||
@@ -2755,72 +2749,54 @@ impl Inst {
|
||||
rn,
|
||||
rm,
|
||||
alu_op,
|
||||
ty,
|
||||
size,
|
||||
} => {
|
||||
let (op, vector, ty) = match alu_op {
|
||||
VecALUOp::SQAddScalar => ("sqadd", false, ty),
|
||||
VecALUOp::Sqadd => ("sqadd", true, ty),
|
||||
VecALUOp::UQAddScalar => ("uqadd", false, ty),
|
||||
VecALUOp::Uqadd => ("uqadd", true, ty),
|
||||
VecALUOp::SQSubScalar => ("sqsub", false, ty),
|
||||
VecALUOp::Sqsub => ("sqsub", true, ty),
|
||||
VecALUOp::UQSubScalar => ("uqsub", false, ty),
|
||||
VecALUOp::Uqsub => ("uqsub", true, ty),
|
||||
VecALUOp::Cmeq => ("cmeq", true, ty),
|
||||
VecALUOp::Cmge => ("cmge", true, ty),
|
||||
VecALUOp::Cmgt => ("cmgt", true, ty),
|
||||
VecALUOp::Cmhs => ("cmhs", true, ty),
|
||||
VecALUOp::Cmhi => ("cmhi", true, ty),
|
||||
VecALUOp::Fcmeq => ("fcmeq", true, ty),
|
||||
VecALUOp::Fcmgt => ("fcmgt", true, ty),
|
||||
VecALUOp::Fcmge => ("fcmge", true, ty),
|
||||
VecALUOp::And => ("and", true, I8X16),
|
||||
VecALUOp::Bic => ("bic", true, I8X16),
|
||||
VecALUOp::Orr => ("orr", true, I8X16),
|
||||
VecALUOp::Eor => ("eor", true, I8X16),
|
||||
VecALUOp::Bsl => ("bsl", true, I8X16),
|
||||
VecALUOp::Umaxp => ("umaxp", true, ty),
|
||||
VecALUOp::Add => ("add", true, ty),
|
||||
VecALUOp::Sub => ("sub", true, ty),
|
||||
VecALUOp::Mul => ("mul", true, ty),
|
||||
VecALUOp::Sshl => ("sshl", true, ty),
|
||||
VecALUOp::Ushl => ("ushl", true, ty),
|
||||
let (op, size) = match alu_op {
|
||||
VecALUOp::Sqadd => ("sqadd", size),
|
||||
VecALUOp::Uqadd => ("uqadd", size),
|
||||
VecALUOp::Sqsub => ("sqsub", size),
|
||||
VecALUOp::Uqsub => ("uqsub", size),
|
||||
VecALUOp::Cmeq => ("cmeq", size),
|
||||
VecALUOp::Cmge => ("cmge", size),
|
||||
VecALUOp::Cmgt => ("cmgt", size),
|
||||
VecALUOp::Cmhs => ("cmhs", size),
|
||||
VecALUOp::Cmhi => ("cmhi", size),
|
||||
VecALUOp::Fcmeq => ("fcmeq", size),
|
||||
VecALUOp::Fcmgt => ("fcmgt", size),
|
||||
VecALUOp::Fcmge => ("fcmge", size),
|
||||
VecALUOp::And => ("and", VectorSize::Size8x16),
|
||||
VecALUOp::Bic => ("bic", VectorSize::Size8x16),
|
||||
VecALUOp::Orr => ("orr", VectorSize::Size8x16),
|
||||
VecALUOp::Eor => ("eor", VectorSize::Size8x16),
|
||||
VecALUOp::Bsl => ("bsl", VectorSize::Size8x16),
|
||||
VecALUOp::Umaxp => ("umaxp", size),
|
||||
VecALUOp::Add => ("add", size),
|
||||
VecALUOp::Sub => ("sub", size),
|
||||
VecALUOp::Mul => ("mul", size),
|
||||
VecALUOp::Sshl => ("sshl", size),
|
||||
VecALUOp::Ushl => ("ushl", size),
|
||||
};
|
||||
|
||||
let show_vreg_fn: fn(Reg, Option<&RealRegUniverse>, Type) -> String = if vector {
|
||||
|reg, mb_rru, ty| show_vreg_vector(reg, mb_rru, ty)
|
||||
} else {
|
||||
|reg, mb_rru, _ty| show_vreg_scalar(reg, mb_rru, ScalarSize::Size64)
|
||||
};
|
||||
|
||||
let rd = show_vreg_fn(rd.to_reg(), mb_rru, ty);
|
||||
let rn = show_vreg_fn(rn, mb_rru, ty);
|
||||
let rm = show_vreg_fn(rm, mb_rru, ty);
|
||||
let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
|
||||
let rn = show_vreg_vector(rn, mb_rru, size);
|
||||
let rm = show_vreg_vector(rm, mb_rru, size);
|
||||
format!("{} {}, {}, {}", op, rd, rn, rm)
|
||||
}
|
||||
&Inst::VecMisc { op, rd, rn, ty } => {
|
||||
let (op, ty) = match op {
|
||||
VecMisc2::Not => ("mvn", I8X16),
|
||||
VecMisc2::Neg => ("neg", ty),
|
||||
&Inst::VecMisc { op, rd, rn, size } => {
|
||||
let (op, size) = match op {
|
||||
VecMisc2::Not => ("mvn", VectorSize::Size8x16),
|
||||
VecMisc2::Neg => ("neg", size),
|
||||
};
|
||||
|
||||
let rd = show_vreg_vector(rd.to_reg(), mb_rru, ty);
|
||||
let rn = show_vreg_vector(rn, mb_rru, ty);
|
||||
let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
|
||||
let rn = show_vreg_vector(rn, mb_rru, size);
|
||||
format!("{} {}, {}", op, rd, rn)
|
||||
}
|
||||
&Inst::VecLanes { op, rd, rn, ty } => {
|
||||
&Inst::VecLanes { op, rd, rn, size } => {
|
||||
let op = match op {
|
||||
VecLanesOp::Uminv => "uminv",
|
||||
};
|
||||
let size = match ty {
|
||||
I8X16 => ScalarSize::Size8,
|
||||
I16X8 => ScalarSize::Size16,
|
||||
I32X4 => ScalarSize::Size32,
|
||||
_ => unimplemented!(),
|
||||
};
|
||||
|
||||
let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size);
|
||||
let rn = show_vreg_vector(rn, mb_rru, ty);
|
||||
let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size.lane_size());
|
||||
let rn = show_vreg_vector(rn, mb_rru, size);
|
||||
format!("{} {}, {}", op, rd, rn)
|
||||
}
|
||||
&Inst::MovToNZCV { rn } => {
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
//! AArch64 ISA definitions: registers.
|
||||
|
||||
use crate::ir::types::*;
|
||||
use crate::isa::aarch64::inst::OperandSize;
|
||||
use crate::isa::aarch64::inst::ScalarSize;
|
||||
use crate::isa::aarch64::inst::VectorSize;
|
||||
use crate::machinst::*;
|
||||
use crate::settings;
|
||||
|
||||
@@ -307,40 +307,42 @@ pub fn show_vreg_scalar(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: Scalar
|
||||
}
|
||||
|
||||
/// Show a vector register.
|
||||
pub fn show_vreg_vector(reg: Reg, mb_rru: Option<&RealRegUniverse>, ty: Type) -> String {
|
||||
pub fn show_vreg_vector(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: VectorSize) -> String {
|
||||
assert_eq!(RegClass::V128, reg.get_class());
|
||||
let mut s = reg.show_rru(mb_rru);
|
||||
|
||||
match ty {
|
||||
F32X2 => s.push_str(".2s"),
|
||||
F32X4 => s.push_str(".4s"),
|
||||
F64X2 => s.push_str(".2d"),
|
||||
I8X8 => s.push_str(".8b"),
|
||||
I8X16 => s.push_str(".16b"),
|
||||
I16X4 => s.push_str(".4h"),
|
||||
I16X8 => s.push_str(".8h"),
|
||||
I32X2 => s.push_str(".2s"),
|
||||
I32X4 => s.push_str(".4s"),
|
||||
I64X2 => s.push_str(".2d"),
|
||||
_ => unimplemented!(),
|
||||
}
|
||||
let suffix = match size {
|
||||
VectorSize::Size8x8 => ".8b",
|
||||
VectorSize::Size8x16 => ".16b",
|
||||
VectorSize::Size16x4 => ".4h",
|
||||
VectorSize::Size16x8 => ".8h",
|
||||
VectorSize::Size32x2 => ".2s",
|
||||
VectorSize::Size32x4 => ".4s",
|
||||
VectorSize::Size64x2 => ".2d",
|
||||
};
|
||||
|
||||
s.push_str(suffix);
|
||||
s
|
||||
}
|
||||
|
||||
/// Show an indexed vector element.
|
||||
pub fn show_vreg_element(reg: Reg, mb_rru: Option<&RealRegUniverse>, idx: u8, ty: Type) -> String {
|
||||
pub fn show_vreg_element(
|
||||
reg: Reg,
|
||||
mb_rru: Option<&RealRegUniverse>,
|
||||
idx: u8,
|
||||
size: VectorSize,
|
||||
) -> String {
|
||||
assert_eq!(RegClass::V128, reg.get_class());
|
||||
let mut s = reg.show_rru(mb_rru);
|
||||
|
||||
let suffix = match ty {
|
||||
I8 => "b",
|
||||
I16 => "h",
|
||||
I32 => "s",
|
||||
I64 => "d",
|
||||
F32 => "s",
|
||||
F64 => "d",
|
||||
_ => unimplemented!(),
|
||||
let suffix = match size {
|
||||
VectorSize::Size8x8 => "b",
|
||||
VectorSize::Size8x16 => "b",
|
||||
VectorSize::Size16x4 => "h",
|
||||
VectorSize::Size16x8 => "h",
|
||||
VectorSize::Size32x2 => "s",
|
||||
VectorSize::Size32x4 => "s",
|
||||
VectorSize::Size64x2 => "d",
|
||||
};
|
||||
|
||||
s.push_str(&format!(".{}[{}]", suffix, idx));
|
||||
|
||||
@@ -14,7 +14,7 @@ use crate::ir::Inst as IRInst;
|
||||
use crate::ir::{InstructionData, Opcode, TrapCode, Type};
|
||||
use crate::machinst::lower::*;
|
||||
use crate::machinst::*;
|
||||
use crate::{CodegenError, CodegenResult};
|
||||
use crate::CodegenResult;
|
||||
|
||||
use crate::isa::aarch64::inst::*;
|
||||
use crate::isa::aarch64::AArch64Backend;
|
||||
@@ -736,20 +736,11 @@ pub(crate) fn lower_vector_compare<C: LowerCtx<I = Inst>>(
|
||||
ty: Type,
|
||||
cond: Cond,
|
||||
) -> CodegenResult<()> {
|
||||
match ty {
|
||||
F32X4 | F64X2 | I8X16 | I16X8 | I32X4 => {}
|
||||
_ => {
|
||||
return Err(CodegenError::Unsupported(format!(
|
||||
"unsupported SIMD type: {:?}",
|
||||
ty
|
||||
)));
|
||||
}
|
||||
};
|
||||
|
||||
let is_float = match ty {
|
||||
F32X4 | F64X2 => true,
|
||||
_ => false,
|
||||
};
|
||||
let size = VectorSize::from_ty(ty);
|
||||
// 'Less than' operations are implemented by swapping
|
||||
// the order of operands and using the 'greater than'
|
||||
// instructions.
|
||||
@@ -784,7 +775,7 @@ pub(crate) fn lower_vector_compare<C: LowerCtx<I = Inst>>(
|
||||
rd,
|
||||
rn,
|
||||
rm,
|
||||
ty,
|
||||
size,
|
||||
});
|
||||
|
||||
if cond == Cond::Ne {
|
||||
@@ -792,7 +783,7 @@ pub(crate) fn lower_vector_compare<C: LowerCtx<I = Inst>>(
|
||||
op: VecMisc2::Not,
|
||||
rd,
|
||||
rn: rd.to_reg(),
|
||||
ty: I8X16,
|
||||
size,
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -70,7 +70,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
rn,
|
||||
rm,
|
||||
alu_op: VecALUOp::Add,
|
||||
ty,
|
||||
size: VectorSize::from_ty(ty),
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -89,13 +89,13 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
rn,
|
||||
rm,
|
||||
alu_op: VecALUOp::Sub,
|
||||
ty,
|
||||
size: VectorSize::from_ty(ty),
|
||||
});
|
||||
}
|
||||
}
|
||||
Opcode::UaddSat | Opcode::SaddSat | Opcode::UsubSat | Opcode::SsubSat => {
|
||||
// We use the vector instruction set's saturating adds (UQADD /
|
||||
// SQADD), which require vector registers.
|
||||
// We use the scalar SIMD & FP saturating additions and subtractions
|
||||
// (SQADD / UQADD / SQSUB / UQSUB), which require scalar FP registers.
|
||||
let is_signed = op == Opcode::SaddSat || op == Opcode::SsubSat;
|
||||
let ty = ty.unwrap();
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
@@ -105,11 +105,11 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
} else {
|
||||
NarrowValueMode::ZeroExtend64
|
||||
};
|
||||
let alu_op = match op {
|
||||
Opcode::UaddSat => VecALUOp::UQAddScalar,
|
||||
Opcode::SaddSat => VecALUOp::SQAddScalar,
|
||||
Opcode::UsubSat => VecALUOp::UQSubScalar,
|
||||
Opcode::SsubSat => VecALUOp::SQSubScalar,
|
||||
let fpu_op = match op {
|
||||
Opcode::UaddSat => FPUOp2::Uqadd64,
|
||||
Opcode::SaddSat => FPUOp2::Sqadd64,
|
||||
Opcode::UsubSat => FPUOp2::Uqsub64,
|
||||
Opcode::SsubSat => FPUOp2::Sqsub64,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let va = ctx.alloc_tmp(RegClass::V128, I128);
|
||||
@@ -118,18 +118,17 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
let rb = put_input_in_reg(ctx, inputs[1], narrow_mode);
|
||||
ctx.emit(Inst::MovToVec64 { rd: va, rn: ra });
|
||||
ctx.emit(Inst::MovToVec64 { rd: vb, rn: rb });
|
||||
ctx.emit(Inst::VecRRR {
|
||||
ctx.emit(Inst::FpuRRR {
|
||||
fpu_op,
|
||||
rd: va,
|
||||
rn: va.to_reg(),
|
||||
rm: vb.to_reg(),
|
||||
alu_op,
|
||||
ty: I64,
|
||||
});
|
||||
ctx.emit(Inst::MovFromVec {
|
||||
rd,
|
||||
rn: va.to_reg(),
|
||||
idx: 0,
|
||||
ty: I64,
|
||||
size: VectorSize::Size64x2,
|
||||
});
|
||||
} else {
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
@@ -148,7 +147,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
rn,
|
||||
rm,
|
||||
alu_op,
|
||||
ty,
|
||||
size: VectorSize::from_ty(ty),
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -167,7 +166,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
op: VecMisc2::Neg,
|
||||
rd,
|
||||
rn,
|
||||
ty,
|
||||
size: VectorSize::from_ty(ty),
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -192,7 +191,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
rd,
|
||||
rn,
|
||||
rm,
|
||||
ty,
|
||||
size: VectorSize::from_ty(ty),
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -422,7 +421,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
op: VecMisc2::Not,
|
||||
rd,
|
||||
rn: rm,
|
||||
ty,
|
||||
size: VectorSize::from_ty(ty),
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -466,7 +465,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
rd,
|
||||
rn,
|
||||
rm,
|
||||
ty,
|
||||
size: VectorSize::from_ty(ty),
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -495,7 +494,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
ctx.emit(alu_inst_immshift(alu_op, rd, rn, rm));
|
||||
} else {
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
|
||||
let size = VectorSize::from_ty(ty);
|
||||
let (alu_op, is_right_shift) = match op {
|
||||
Opcode::Ishl => (VecALUOp::Sshl, false),
|
||||
Opcode::Ushr => (VecALUOp::Ushl, true),
|
||||
@@ -514,18 +513,14 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
put_input_in_reg(ctx, inputs[1], NarrowValueMode::None)
|
||||
};
|
||||
|
||||
ctx.emit(Inst::VecDup {
|
||||
rd,
|
||||
rn: rm,
|
||||
ty: ty.lane_type(),
|
||||
});
|
||||
ctx.emit(Inst::VecDup { rd, rn: rm, size });
|
||||
|
||||
ctx.emit(Inst::VecRRR {
|
||||
alu_op,
|
||||
rd,
|
||||
rn,
|
||||
rm: rd.to_reg(),
|
||||
ty,
|
||||
size,
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -1167,7 +1162,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
rd,
|
||||
rn,
|
||||
rm,
|
||||
ty,
|
||||
size: VectorSize::from_ty(ty),
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -1297,7 +1292,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
rd,
|
||||
rn,
|
||||
idx: 0,
|
||||
ty: I64,
|
||||
size: VectorSize::Size64x2,
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -1557,15 +1552,15 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
let idx = *imm;
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let size = VectorSize::from_ty(ctx.input_ty(insn, 0));
|
||||
let ty = ty.unwrap();
|
||||
|
||||
if ty_is_int(ty) {
|
||||
ctx.emit(Inst::MovFromVec { rd, rn, idx, ty });
|
||||
ctx.emit(Inst::MovFromVec { rd, rn, idx, size });
|
||||
// Plain moves are faster on some processors.
|
||||
} else if idx == 0 {
|
||||
ctx.emit(Inst::gen_move(rd, rn, ty));
|
||||
} else {
|
||||
let size = ScalarSize::from_ty(ty);
|
||||
ctx.emit(Inst::FpuMoveFromVec { rd, rn, idx, size });
|
||||
}
|
||||
} else {
|
||||
@@ -1576,11 +1571,12 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
Opcode::Splat => {
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
let ty = ctx.input_ty(insn, 0);
|
||||
let inst = if ty_is_int(ty) {
|
||||
Inst::VecDup { rd, rn, ty }
|
||||
let input_ty = ctx.input_ty(insn, 0);
|
||||
let size = VectorSize::from_ty(ty.unwrap());
|
||||
let inst = if ty_is_int(input_ty) {
|
||||
Inst::VecDup { rd, rn, size }
|
||||
} else {
|
||||
Inst::VecDupFromFpu { rd, rn, ty }
|
||||
Inst::VecDupFromFpu { rd, rn, size }
|
||||
};
|
||||
ctx.emit(inst);
|
||||
}
|
||||
@@ -1598,21 +1594,22 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
// cmp xm, #0
|
||||
// cset xm, ne
|
||||
|
||||
let input_ty = ctx.input_ty(insn, 0);
|
||||
let size = VectorSize::from_ty(ctx.input_ty(insn, 0));
|
||||
|
||||
if op == Opcode::VanyTrue {
|
||||
ctx.emit(Inst::VecRRR {
|
||||
alu_op: VecALUOp::Umaxp,
|
||||
rd: tmp,
|
||||
rn: rm,
|
||||
rm: rm,
|
||||
ty: input_ty,
|
||||
size,
|
||||
});
|
||||
} else {
|
||||
ctx.emit(Inst::VecLanes {
|
||||
op: VecLanesOp::Uminv,
|
||||
rd: tmp,
|
||||
rn: rm,
|
||||
ty: input_ty,
|
||||
size,
|
||||
});
|
||||
};
|
||||
|
||||
@@ -1620,7 +1617,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
rd,
|
||||
rn: tmp.to_reg(),
|
||||
idx: 0,
|
||||
ty: I64,
|
||||
size: VectorSize::Size64x2,
|
||||
});
|
||||
|
||||
ctx.emit(Inst::AluRRImm12 {
|
||||
|
||||
Reference in New Issue
Block a user