AArch64: Introduce an enum to specify vector instruction operand sizes

Copyright (c) 2020, Arm Limited.
This commit is contained in:
Anton Kirilov
2020-06-19 01:00:47 +01:00
parent bc1e960b9e
commit 95b0b05af2
7 changed files with 374 additions and 388 deletions

View File

@@ -3,6 +3,7 @@
// Some variants are never constructed, but we still want them as options in the future.
#![allow(dead_code)]
use crate::ir::types::{F32X2, F32X4, F64X2, I16X4, I16X8, I32X2, I32X4, I64X2, I8X16, I8X8};
use crate::ir::Type;
use crate::isa::aarch64::inst::*;
use crate::isa::aarch64::lower::ty_bits;
@@ -587,3 +588,55 @@ impl ScalarSize {
}
}
}
/// Type used to communicate the size of a vector operand.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum VectorSize {
Size8x8,
Size8x16,
Size16x4,
Size16x8,
Size32x2,
Size32x4,
Size64x2,
}
impl VectorSize {
/// Convert from a type into a vector operand size.
pub fn from_ty(ty: Type) -> VectorSize {
match ty {
F32X2 => VectorSize::Size32x2,
F32X4 => VectorSize::Size32x4,
F64X2 => VectorSize::Size64x2,
I8X8 => VectorSize::Size8x8,
I8X16 => VectorSize::Size8x16,
I16X4 => VectorSize::Size16x4,
I16X8 => VectorSize::Size16x8,
I32X2 => VectorSize::Size32x2,
I32X4 => VectorSize::Size32x4,
I64X2 => VectorSize::Size64x2,
_ => unimplemented!(),
}
}
/// Get the integer operand size that corresponds to a lane of a vector with a certain size.
pub fn operand_size(&self) -> OperandSize {
match self {
VectorSize::Size64x2 => OperandSize::Size64,
_ => OperandSize::Size32,
}
}
/// Get the scalar operand size that corresponds to a lane of a vector with a certain size.
pub fn lane_size(&self) -> ScalarSize {
match self {
VectorSize::Size8x8 => ScalarSize::Size8,
VectorSize::Size8x16 => ScalarSize::Size8,
VectorSize::Size16x4 => ScalarSize::Size16,
VectorSize::Size16x8 => ScalarSize::Size16,
VectorSize::Size32x2 => ScalarSize::Size32,
VectorSize::Size32x4 => ScalarSize::Size32,
VectorSize::Size64x2 => ScalarSize::Size64,
}
}
}

View File

@@ -1007,7 +1007,7 @@ impl MachInstEmit for Inst {
sink.put4(enc_vecmov(/* 16b = */ true, rd, rn));
}
&Inst::FpuMoveFromVec { rd, rn, idx, size } => {
let (imm5, shift, mask) = match size {
let (imm5, shift, mask) = match size.lane_size() {
ScalarSize::Size32 => (0b00100, 3, 0b011),
ScalarSize::Size64 => (0b01000, 4, 0b001),
_ => unimplemented!(),
@@ -1048,6 +1048,10 @@ impl MachInstEmit for Inst {
FPUOp2::Max64 => 0b000_11110_01_1_00000_010010,
FPUOp2::Min32 => 0b000_11110_00_1_00000_010110,
FPUOp2::Min64 => 0b000_11110_01_1_00000_010110,
FPUOp2::Sqadd64 => 0b010_11110_11_1_00000_000011,
FPUOp2::Uqadd64 => 0b011_11110_11_1_00000_000011,
FPUOp2::Sqsub64 => 0b010_11110_11_1_00000_001011,
FPUOp2::Uqsub64 => 0b011_11110_11_1_00000_001011,
};
sink.put4(enc_fpurrr(top22, rd, rn, rm));
}
@@ -1102,31 +1106,25 @@ impl MachInstEmit for Inst {
};
sink.put4(enc_fpurrrr(top17, rd, rn, rm, ra));
}
&Inst::VecMisc { op, rd, rn, ty } => {
let enc_size = match ty {
I8X16 => 0b00,
I16X8 => 0b01,
I32X4 => 0b10,
I64X2 => 0b11,
_ => 0,
&Inst::VecMisc { op, rd, rn, size } => {
let enc_size = match size {
VectorSize::Size8x16 => 0b00,
VectorSize::Size16x8 => 0b01,
VectorSize::Size32x4 => 0b10,
VectorSize::Size64x2 => 0b11,
_ => unimplemented!(),
};
let (bits_12_16, size) = match op {
VecMisc2::Not => {
debug_assert_eq!(128, ty_bits(ty));
(0b00101, 0b00)
}
VecMisc2::Neg => {
debug_assert_eq!(128, ty_bits(ty));
(0b01011, enc_size)
}
VecMisc2::Not => (0b00101, 0b00),
VecMisc2::Neg => (0b01011, enc_size),
};
sink.put4(enc_vec_rr_misc(size, bits_12_16, rd, rn));
}
&Inst::VecLanes { op, rd, rn, ty } => {
let (q, size) = match ty {
I8X16 => (0b1, 0b00),
I16X8 => (0b1, 0b01),
I32X4 => (0b1, 0b10),
&Inst::VecLanes { op, rd, rn, size } => {
let (q, size) = match size {
VectorSize::Size8x16 => (0b1, 0b00),
VectorSize::Size16x8 => (0b1, 0b01),
VectorSize::Size32x4 => (0b1, 0b10),
_ => unreachable!(),
};
let (u, opcode) = match op {
@@ -1250,12 +1248,12 @@ impl MachInstEmit for Inst {
| machreg_to_vec(rd.to_reg()),
);
}
&Inst::MovFromVec { rd, rn, idx, ty } => {
let (q, imm5, shift, mask) = match ty {
I8 => (0b0, 0b00001, 1, 0b1111),
I16 => (0b0, 0b00010, 2, 0b0111),
I32 => (0b0, 0b00100, 3, 0b0011),
I64 => (0b1, 0b01000, 4, 0b0001),
&Inst::MovFromVec { rd, rn, idx, size } => {
let (q, imm5, shift, mask) = match size {
VectorSize::Size8x16 => (0b0, 0b00001, 1, 0b1111),
VectorSize::Size16x8 => (0b0, 0b00010, 2, 0b0111),
VectorSize::Size32x4 => (0b0, 0b00100, 3, 0b0011),
VectorSize::Size64x2 => (0b1, 0b01000, 4, 0b0001),
_ => unreachable!(),
};
debug_assert_eq!(idx & mask, idx);
@@ -1268,12 +1266,12 @@ impl MachInstEmit for Inst {
| machreg_to_gpr(rd.to_reg()),
);
}
&Inst::VecDup { rd, rn, ty } => {
let imm5 = match ty {
I8 => 0b00001,
I16 => 0b00010,
I32 => 0b00100,
I64 => 0b01000,
&Inst::VecDup { rd, rn, size } => {
let imm5 = match size {
VectorSize::Size8x16 => 0b00001,
VectorSize::Size16x8 => 0b00010,
VectorSize::Size32x4 => 0b00100,
VectorSize::Size64x2 => 0b01000,
_ => unimplemented!(),
};
sink.put4(
@@ -1283,10 +1281,10 @@ impl MachInstEmit for Inst {
| machreg_to_vec(rd.to_reg()),
);
}
&Inst::VecDupFromFpu { rd, rn, ty } => {
let imm5 = match ty {
F32 => 0b00100,
F64 => 0b01000,
&Inst::VecDupFromFpu { rd, rn, size } => {
let imm5 = match size {
VectorSize::Size32x4 => 0b00100,
VectorSize::Size64x2 => 0b01000,
_ => unimplemented!(),
};
sink.put4(
@@ -1318,41 +1316,25 @@ impl MachInstEmit for Inst {
rn,
rm,
alu_op,
ty,
size,
} => {
let enc_size = match ty {
I8X16 => 0b00,
I16X8 => 0b01,
I32X4 => 0b10,
I64X2 => 0b11,
let enc_size = match size {
VectorSize::Size8x16 => 0b00,
VectorSize::Size16x8 => 0b01,
VectorSize::Size32x4 => 0b10,
VectorSize::Size64x2 => 0b11,
_ => 0,
};
let enc_size_for_fcmp = match ty {
F32X4 => 0b0,
F64X2 => 0b1,
let enc_size_for_fcmp = match size {
VectorSize::Size32x4 => 0b0,
VectorSize::Size64x2 => 0b1,
_ => 0,
};
let (top11, bit15_10) = match alu_op {
VecALUOp::SQAddScalar => {
debug_assert_eq!(I64, ty);
(0b010_11110_11_1, 0b000011)
}
VecALUOp::Sqadd => (0b010_01110_00_1 | enc_size << 1, 0b000011),
VecALUOp::SQSubScalar => {
debug_assert_eq!(I64, ty);
(0b010_11110_11_1, 0b001011)
}
VecALUOp::Sqsub => (0b010_01110_00_1 | enc_size << 1, 0b001011),
VecALUOp::UQAddScalar => {
debug_assert_eq!(I64, ty);
(0b011_11110_11_1, 0b000011)
}
VecALUOp::Uqadd => (0b011_01110_00_1 | enc_size << 1, 0b000011),
VecALUOp::UQSubScalar => {
debug_assert_eq!(I64, ty);
(0b011_11110_11_1, 0b001011)
}
VecALUOp::Uqsub => (0b011_01110_00_1 | enc_size << 1, 0b001011),
VecALUOp::Cmeq => (0b011_01110_00_1 | enc_size << 1, 0b100011),
VecALUOp::Cmge => (0b010_01110_00_1 | enc_size << 1, 0b001111),
@@ -1364,31 +1346,16 @@ impl MachInstEmit for Inst {
VecALUOp::Fcmge => (0b011_01110_00_1 | enc_size_for_fcmp << 1, 0b111001),
// The following logical instructions operate on bytes, so are not encoded differently
// for the different vector types.
VecALUOp::And => {
debug_assert_eq!(128, ty_bits(ty));
(0b010_01110_00_1, 0b000111)
}
VecALUOp::Bic => {
debug_assert_eq!(128, ty_bits(ty));
(0b010_01110_01_1, 0b000111)
}
VecALUOp::Orr => {
debug_assert_eq!(128, ty_bits(ty));
(0b010_01110_10_1, 0b000111)
}
VecALUOp::Eor => {
debug_assert_eq!(128, ty_bits(ty));
(0b011_01110_00_1, 0b000111)
}
VecALUOp::Bsl => {
debug_assert_eq!(128, ty_bits(ty));
(0b011_01110_01_1, 0b000111)
}
VecALUOp::And => (0b010_01110_00_1, 0b000111),
VecALUOp::Bic => (0b010_01110_01_1, 0b000111),
VecALUOp::Orr => (0b010_01110_10_1, 0b000111),
VecALUOp::Eor => (0b011_01110_00_1, 0b000111),
VecALUOp::Bsl => (0b011_01110_01_1, 0b000111),
VecALUOp::Umaxp => (0b011_01110_00_1 | enc_size << 1, 0b101001),
VecALUOp::Add => (0b010_01110_00_1 | enc_size << 1, 0b100001),
VecALUOp::Sub => (0b011_01110_00_1 | enc_size << 1, 0b100001),
VecALUOp::Mul => {
debug_assert_ne!(I64X2, ty);
debug_assert_ne!(size, VectorSize::Size64x2);
(0b010_01110_00_1 | enc_size << 1, 0b100111)
}
VecALUOp::Sshl => (0b010_01110_00_1 | enc_size << 1, 0b010001),

View File

@@ -1841,7 +1841,7 @@ fn test_aarch64_binemit() {
rd: writable_xreg(3),
rn: vreg(27),
idx: 14,
ty: I8,
size: VectorSize::Size8x16,
},
"633F1D0E",
"umov w3, v27.b[14]",
@@ -1851,7 +1851,7 @@ fn test_aarch64_binemit() {
rd: writable_xreg(24),
rn: vreg(5),
idx: 3,
ty: I16,
size: VectorSize::Size16x8,
},
"B83C0E0E",
"umov w24, v5.h[3]",
@@ -1861,7 +1861,7 @@ fn test_aarch64_binemit() {
rd: writable_xreg(12),
rn: vreg(17),
idx: 1,
ty: I32,
size: VectorSize::Size32x4,
},
"2C3E0C0E",
"mov w12, v17.s[1]",
@@ -1871,7 +1871,7 @@ fn test_aarch64_binemit() {
rd: writable_xreg(21),
rn: vreg(20),
idx: 0,
ty: I64,
size: VectorSize::Size64x2,
},
"953E084E",
"mov x21, v20.d[0]",
@@ -1900,7 +1900,7 @@ fn test_aarch64_binemit() {
Inst::VecDup {
rd: writable_vreg(25),
rn: xreg(7),
ty: I8,
size: VectorSize::Size8x16,
},
"F90C014E",
"dup v25.16b, w7",
@@ -1909,7 +1909,7 @@ fn test_aarch64_binemit() {
Inst::VecDup {
rd: writable_vreg(2),
rn: xreg(23),
ty: I16,
size: VectorSize::Size16x8,
},
"E20E024E",
"dup v2.8h, w23",
@@ -1918,7 +1918,7 @@ fn test_aarch64_binemit() {
Inst::VecDup {
rd: writable_vreg(0),
rn: xreg(28),
ty: I32,
size: VectorSize::Size32x4,
},
"800F044E",
"dup v0.4s, w28",
@@ -1927,7 +1927,7 @@ fn test_aarch64_binemit() {
Inst::VecDup {
rd: writable_vreg(31),
rn: xreg(5),
ty: I64,
size: VectorSize::Size64x2,
},
"BF0C084E",
"dup v31.2d, x5",
@@ -1936,7 +1936,7 @@ fn test_aarch64_binemit() {
Inst::VecDupFromFpu {
rd: writable_vreg(14),
rn: vreg(19),
ty: F32,
size: VectorSize::Size32x4,
},
"6E06044E",
"dup v14.4s, v19.s[0]",
@@ -1945,7 +1945,7 @@ fn test_aarch64_binemit() {
Inst::VecDupFromFpu {
rd: writable_vreg(18),
rn: vreg(10),
ty: F64,
size: VectorSize::Size64x2,
},
"5205084E",
"dup v18.2d, v10.d[0]",
@@ -2004,50 +2004,6 @@ fn test_aarch64_binemit() {
"5CA4202F",
"uxtl v28.2d, v2.2s",
));
insns.push((
Inst::VecRRR {
rd: writable_vreg(21),
rn: vreg(22),
rm: vreg(23),
alu_op: VecALUOp::UQAddScalar,
ty: I64,
},
"D50EF77E",
"uqadd d21, d22, d23",
));
insns.push((
Inst::VecRRR {
rd: writable_vreg(21),
rn: vreg(22),
rm: vreg(23),
alu_op: VecALUOp::SQAddScalar,
ty: I64,
},
"D50EF75E",
"sqadd d21, d22, d23",
));
insns.push((
Inst::VecRRR {
rd: writable_vreg(21),
rn: vreg(22),
rm: vreg(23),
alu_op: VecALUOp::UQSubScalar,
ty: I64,
},
"D52EF77E",
"uqsub d21, d22, d23",
));
insns.push((
Inst::VecRRR {
rd: writable_vreg(21),
rn: vreg(22),
rm: vreg(23),
alu_op: VecALUOp::SQSubScalar,
ty: I64,
},
"D52EF75E",
"sqsub d21, d22, d23",
));
insns.push((
Inst::VecRRR {
@@ -2055,7 +2011,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(1),
rn: vreg(2),
rm: vreg(8),
ty: I8X16,
size: VectorSize::Size8x16,
},
"410C284E",
"sqadd v1.16b, v2.16b, v8.16b",
@@ -2067,7 +2023,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(1),
rn: vreg(12),
rm: vreg(28),
ty: I16X8,
size: VectorSize::Size16x8,
},
"810D7C4E",
"sqadd v1.8h, v12.8h, v28.8h",
@@ -2079,7 +2035,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(12),
rn: vreg(2),
rm: vreg(6),
ty: I32X4,
size: VectorSize::Size32x4,
},
"4C0CA64E",
"sqadd v12.4s, v2.4s, v6.4s",
@@ -2091,7 +2047,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(20),
rn: vreg(7),
rm: vreg(13),
ty: I64X2,
size: VectorSize::Size64x2,
},
"F40CED4E",
"sqadd v20.2d, v7.2d, v13.2d",
@@ -2103,7 +2059,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(1),
rn: vreg(2),
rm: vreg(8),
ty: I8X16,
size: VectorSize::Size8x16,
},
"412C284E",
"sqsub v1.16b, v2.16b, v8.16b",
@@ -2115,7 +2071,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(1),
rn: vreg(12),
rm: vreg(28),
ty: I16X8,
size: VectorSize::Size16x8,
},
"812D7C4E",
"sqsub v1.8h, v12.8h, v28.8h",
@@ -2127,7 +2083,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(12),
rn: vreg(2),
rm: vreg(6),
ty: I32X4,
size: VectorSize::Size32x4,
},
"4C2CA64E",
"sqsub v12.4s, v2.4s, v6.4s",
@@ -2139,7 +2095,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(20),
rn: vreg(7),
rm: vreg(13),
ty: I64X2,
size: VectorSize::Size64x2,
},
"F42CED4E",
"sqsub v20.2d, v7.2d, v13.2d",
@@ -2151,7 +2107,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(1),
rn: vreg(2),
rm: vreg(8),
ty: I8X16,
size: VectorSize::Size8x16,
},
"410C286E",
"uqadd v1.16b, v2.16b, v8.16b",
@@ -2163,7 +2119,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(1),
rn: vreg(12),
rm: vreg(28),
ty: I16X8,
size: VectorSize::Size16x8,
},
"810D7C6E",
"uqadd v1.8h, v12.8h, v28.8h",
@@ -2175,7 +2131,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(12),
rn: vreg(2),
rm: vreg(6),
ty: I32X4,
size: VectorSize::Size32x4,
},
"4C0CA66E",
"uqadd v12.4s, v2.4s, v6.4s",
@@ -2187,7 +2143,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(20),
rn: vreg(7),
rm: vreg(13),
ty: I64X2,
size: VectorSize::Size64x2,
},
"F40CED6E",
"uqadd v20.2d, v7.2d, v13.2d",
@@ -2199,7 +2155,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(1),
rn: vreg(2),
rm: vreg(8),
ty: I8X16,
size: VectorSize::Size8x16,
},
"412C286E",
"uqsub v1.16b, v2.16b, v8.16b",
@@ -2211,7 +2167,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(1),
rn: vreg(12),
rm: vreg(28),
ty: I16X8,
size: VectorSize::Size16x8,
},
"812D7C6E",
"uqsub v1.8h, v12.8h, v28.8h",
@@ -2223,7 +2179,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(12),
rn: vreg(2),
rm: vreg(6),
ty: I32X4,
size: VectorSize::Size32x4,
},
"4C2CA66E",
"uqsub v12.4s, v2.4s, v6.4s",
@@ -2235,7 +2191,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(20),
rn: vreg(7),
rm: vreg(13),
ty: I64X2,
size: VectorSize::Size64x2,
},
"F42CED6E",
"uqsub v20.2d, v7.2d, v13.2d",
@@ -2247,7 +2203,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(3),
rn: vreg(23),
rm: vreg(24),
ty: I8X16,
size: VectorSize::Size8x16,
},
"E38E386E",
"cmeq v3.16b, v23.16b, v24.16b",
@@ -2259,7 +2215,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(3),
rn: vreg(23),
rm: vreg(24),
ty: I8X16,
size: VectorSize::Size8x16,
},
"E336384E",
"cmgt v3.16b, v23.16b, v24.16b",
@@ -2271,7 +2227,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(23),
rn: vreg(9),
rm: vreg(12),
ty: I8X16,
size: VectorSize::Size8x16,
},
"373D2C4E",
"cmge v23.16b, v9.16b, v12.16b",
@@ -2283,7 +2239,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(5),
rn: vreg(1),
rm: vreg(1),
ty: I8X16,
size: VectorSize::Size8x16,
},
"2534216E",
"cmhi v5.16b, v1.16b, v1.16b",
@@ -2295,7 +2251,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(8),
rn: vreg(2),
rm: vreg(15),
ty: I8X16,
size: VectorSize::Size8x16,
},
"483C2F6E",
"cmhs v8.16b, v2.16b, v15.16b",
@@ -2307,7 +2263,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(3),
rn: vreg(23),
rm: vreg(24),
ty: I16X8,
size: VectorSize::Size16x8,
},
"E38E786E",
"cmeq v3.8h, v23.8h, v24.8h",
@@ -2319,7 +2275,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(3),
rn: vreg(23),
rm: vreg(24),
ty: I16X8,
size: VectorSize::Size16x8,
},
"E336784E",
"cmgt v3.8h, v23.8h, v24.8h",
@@ -2331,7 +2287,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(23),
rn: vreg(9),
rm: vreg(12),
ty: I16X8,
size: VectorSize::Size16x8,
},
"373D6C4E",
"cmge v23.8h, v9.8h, v12.8h",
@@ -2343,7 +2299,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(5),
rn: vreg(1),
rm: vreg(1),
ty: I16X8,
size: VectorSize::Size16x8,
},
"2534616E",
"cmhi v5.8h, v1.8h, v1.8h",
@@ -2355,7 +2311,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(8),
rn: vreg(2),
rm: vreg(15),
ty: I16X8,
size: VectorSize::Size16x8,
},
"483C6F6E",
"cmhs v8.8h, v2.8h, v15.8h",
@@ -2367,7 +2323,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(3),
rn: vreg(23),
rm: vreg(24),
ty: I32X4,
size: VectorSize::Size32x4,
},
"E38EB86E",
"cmeq v3.4s, v23.4s, v24.4s",
@@ -2379,7 +2335,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(3),
rn: vreg(23),
rm: vreg(24),
ty: I32X4,
size: VectorSize::Size32x4,
},
"E336B84E",
"cmgt v3.4s, v23.4s, v24.4s",
@@ -2391,7 +2347,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(23),
rn: vreg(9),
rm: vreg(12),
ty: I32X4,
size: VectorSize::Size32x4,
},
"373DAC4E",
"cmge v23.4s, v9.4s, v12.4s",
@@ -2403,7 +2359,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(5),
rn: vreg(1),
rm: vreg(1),
ty: I32X4,
size: VectorSize::Size32x4,
},
"2534A16E",
"cmhi v5.4s, v1.4s, v1.4s",
@@ -2415,7 +2371,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(8),
rn: vreg(2),
rm: vreg(15),
ty: I32X4,
size: VectorSize::Size32x4,
},
"483CAF6E",
"cmhs v8.4s, v2.4s, v15.4s",
@@ -2427,7 +2383,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(28),
rn: vreg(12),
rm: vreg(4),
ty: F32X4,
size: VectorSize::Size32x4,
},
"9CE5244E",
"fcmeq v28.4s, v12.4s, v4.4s",
@@ -2439,7 +2395,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(3),
rn: vreg(16),
rm: vreg(31),
ty: F64X2,
size: VectorSize::Size64x2,
},
"03E6FF6E",
"fcmgt v3.2d, v16.2d, v31.2d",
@@ -2451,7 +2407,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(18),
rn: vreg(23),
rm: vreg(0),
ty: F64X2,
size: VectorSize::Size64x2,
},
"F2E6606E",
"fcmge v18.2d, v23.2d, v0.2d",
@@ -2463,7 +2419,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(20),
rn: vreg(19),
rm: vreg(18),
ty: I32X4,
size: VectorSize::Size32x4,
},
"741E324E",
"and v20.16b, v19.16b, v18.16b",
@@ -2475,7 +2431,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(8),
rn: vreg(11),
rm: vreg(1),
ty: I8X16,
size: VectorSize::Size8x16,
},
"681D614E",
"bic v8.16b, v11.16b, v1.16b",
@@ -2487,7 +2443,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(15),
rn: vreg(2),
rm: vreg(12),
ty: I16X8,
size: VectorSize::Size16x8,
},
"4F1CAC4E",
"orr v15.16b, v2.16b, v12.16b",
@@ -2499,7 +2455,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(18),
rn: vreg(3),
rm: vreg(22),
ty: I8X16,
size: VectorSize::Size8x16,
},
"721C366E",
"eor v18.16b, v3.16b, v22.16b",
@@ -2511,7 +2467,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(8),
rn: vreg(9),
rm: vreg(1),
ty: I8X16,
size: VectorSize::Size8x16,
},
"281D616E",
"bsl v8.16b, v9.16b, v1.16b",
@@ -2523,7 +2479,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(8),
rn: vreg(12),
rm: vreg(1),
ty: I8X16,
size: VectorSize::Size8x16,
},
"88A5216E",
"umaxp v8.16b, v12.16b, v1.16b",
@@ -2535,7 +2491,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(1),
rn: vreg(6),
rm: vreg(1),
ty: I16X8,
size: VectorSize::Size16x8,
},
"C1A4616E",
"umaxp v1.8h, v6.8h, v1.8h",
@@ -2547,7 +2503,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(1),
rn: vreg(20),
rm: vreg(16),
ty: I32X4,
size: VectorSize::Size32x4,
},
"81A6B06E",
"umaxp v1.4s, v20.4s, v16.4s",
@@ -2559,7 +2515,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(5),
rn: vreg(1),
rm: vreg(1),
ty: I8X16,
size: VectorSize::Size8x16,
},
"2584214E",
"add v5.16b, v1.16b, v1.16b",
@@ -2571,7 +2527,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(7),
rn: vreg(13),
rm: vreg(2),
ty: I16X8,
size: VectorSize::Size16x8,
},
"A785624E",
"add v7.8h, v13.8h, v2.8h",
@@ -2583,7 +2539,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(18),
rn: vreg(9),
rm: vreg(6),
ty: I32X4,
size: VectorSize::Size32x4,
},
"3285A64E",
"add v18.4s, v9.4s, v6.4s",
@@ -2595,7 +2551,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(1),
rn: vreg(3),
rm: vreg(2),
ty: I64X2,
size: VectorSize::Size64x2,
},
"6184E24E",
"add v1.2d, v3.2d, v2.2d",
@@ -2607,7 +2563,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(5),
rn: vreg(1),
rm: vreg(1),
ty: I8X16,
size: VectorSize::Size8x16,
},
"2584216E",
"sub v5.16b, v1.16b, v1.16b",
@@ -2619,7 +2575,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(7),
rn: vreg(13),
rm: vreg(2),
ty: I16X8,
size: VectorSize::Size16x8,
},
"A785626E",
"sub v7.8h, v13.8h, v2.8h",
@@ -2631,7 +2587,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(18),
rn: vreg(9),
rm: vreg(6),
ty: I32X4,
size: VectorSize::Size32x4,
},
"3285A66E",
"sub v18.4s, v9.4s, v6.4s",
@@ -2643,7 +2599,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(18),
rn: vreg(0),
rm: vreg(8),
ty: I64X2,
size: VectorSize::Size64x2,
},
"1284E86E",
"sub v18.2d, v0.2d, v8.2d",
@@ -2655,7 +2611,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(25),
rn: vreg(9),
rm: vreg(8),
ty: I8X16,
size: VectorSize::Size8x16,
},
"399D284E",
"mul v25.16b, v9.16b, v8.16b",
@@ -2667,7 +2623,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(30),
rn: vreg(30),
rm: vreg(12),
ty: I16X8,
size: VectorSize::Size16x8,
},
"DE9F6C4E",
"mul v30.8h, v30.8h, v12.8h",
@@ -2679,7 +2635,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(18),
rn: vreg(18),
rm: vreg(18),
ty: I32X4,
size: VectorSize::Size32x4,
},
"529EB24E",
"mul v18.4s, v18.4s, v18.4s",
@@ -2691,7 +2647,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(18),
rn: vreg(18),
rm: vreg(18),
ty: I8X16,
size: VectorSize::Size8x16,
},
"5246326E",
"ushl v18.16b, v18.16b, v18.16b",
@@ -2703,7 +2659,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(18),
rn: vreg(18),
rm: vreg(18),
ty: I16X8,
size: VectorSize::Size16x8,
},
"5246726E",
"ushl v18.8h, v18.8h, v18.8h",
@@ -2715,7 +2671,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(18),
rn: vreg(1),
rm: vreg(21),
ty: I32X4,
size: VectorSize::Size32x4,
},
"3244B56E",
"ushl v18.4s, v1.4s, v21.4s",
@@ -2727,7 +2683,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(5),
rn: vreg(7),
rm: vreg(19),
ty: I64X2,
size: VectorSize::Size64x2,
},
"E544F36E",
"ushl v5.2d, v7.2d, v19.2d",
@@ -2739,7 +2695,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(18),
rn: vreg(18),
rm: vreg(18),
ty: I8X16,
size: VectorSize::Size8x16,
},
"5246324E",
"sshl v18.16b, v18.16b, v18.16b",
@@ -2751,7 +2707,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(30),
rn: vreg(1),
rm: vreg(29),
ty: I16X8,
size: VectorSize::Size16x8,
},
"3E447D4E",
"sshl v30.8h, v1.8h, v29.8h",
@@ -2763,7 +2719,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(8),
rn: vreg(22),
rm: vreg(21),
ty: I32X4,
size: VectorSize::Size32x4,
},
"C846B54E",
"sshl v8.4s, v22.4s, v21.4s",
@@ -2775,7 +2731,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(8),
rn: vreg(22),
rm: vreg(2),
ty: I64X2,
size: VectorSize::Size64x2,
},
"C846E24E",
"sshl v8.2d, v22.2d, v2.2d",
@@ -2786,7 +2742,7 @@ fn test_aarch64_binemit() {
op: VecMisc2::Not,
rd: writable_vreg(2),
rn: vreg(1),
ty: I32X4,
size: VectorSize::Size32x4,
},
"2258206E",
"mvn v2.16b, v1.16b",
@@ -2797,7 +2753,7 @@ fn test_aarch64_binemit() {
op: VecMisc2::Neg,
rd: writable_vreg(8),
rn: vreg(12),
ty: I8X16,
size: VectorSize::Size8x16,
},
"88B9206E",
"neg v8.16b, v12.16b",
@@ -2808,7 +2764,7 @@ fn test_aarch64_binemit() {
op: VecMisc2::Neg,
rd: writable_vreg(0),
rn: vreg(31),
ty: I16X8,
size: VectorSize::Size16x8,
},
"E0BB606E",
"neg v0.8h, v31.8h",
@@ -2819,7 +2775,7 @@ fn test_aarch64_binemit() {
op: VecMisc2::Neg,
rd: writable_vreg(2),
rn: vreg(3),
ty: I32X4,
size: VectorSize::Size32x4,
},
"62B8A06E",
"neg v2.4s, v3.4s",
@@ -2830,7 +2786,7 @@ fn test_aarch64_binemit() {
op: VecMisc2::Neg,
rd: writable_vreg(10),
rn: vreg(8),
ty: I64X2,
size: VectorSize::Size64x2,
},
"0AB9E06E",
"neg v10.2d, v8.2d",
@@ -2841,7 +2797,7 @@ fn test_aarch64_binemit() {
op: VecLanesOp::Uminv,
rd: writable_vreg(2),
rn: vreg(1),
ty: I8X16,
size: VectorSize::Size8x16,
},
"22A8316E",
"uminv b2, v1.16b",
@@ -2852,7 +2808,7 @@ fn test_aarch64_binemit() {
op: VecLanesOp::Uminv,
rd: writable_vreg(3),
rn: vreg(11),
ty: I16X8,
size: VectorSize::Size16x8,
},
"63A9716E",
"uminv h3, v11.8h",
@@ -2863,7 +2819,7 @@ fn test_aarch64_binemit() {
op: VecLanesOp::Uminv,
rd: writable_vreg(18),
rn: vreg(4),
ty: I32X4,
size: VectorSize::Size32x4,
},
"92A8B16E",
"uminv s18, v4.4s",
@@ -3214,7 +3170,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(1),
rn: vreg(30),
idx: 2,
size: ScalarSize::Size32,
size: VectorSize::Size32x4,
},
"C107145E",
"mov s1, v30.s[2]",
@@ -3225,7 +3181,7 @@ fn test_aarch64_binemit() {
rd: writable_vreg(23),
rn: vreg(11),
idx: 0,
size: ScalarSize::Size64,
size: VectorSize::Size64x2,
},
"7705085E",
"mov d23, v11.d[0]",
@@ -3443,6 +3399,50 @@ fn test_aarch64_binemit() {
"fmin d15, d30, d31",
));
insns.push((
Inst::FpuRRR {
fpu_op: FPUOp2::Uqadd64,
rd: writable_vreg(21),
rn: vreg(22),
rm: vreg(23),
},
"D50EF77E",
"uqadd d21, d22, d23",
));
insns.push((
Inst::FpuRRR {
fpu_op: FPUOp2::Sqadd64,
rd: writable_vreg(21),
rn: vreg(22),
rm: vreg(23),
},
"D50EF75E",
"sqadd d21, d22, d23",
));
insns.push((
Inst::FpuRRR {
fpu_op: FPUOp2::Uqsub64,
rd: writable_vreg(21),
rn: vreg(22),
rm: vreg(23),
},
"D52EF77E",
"uqsub d21, d22, d23",
));
insns.push((
Inst::FpuRRR {
fpu_op: FPUOp2::Sqsub64,
rd: writable_vreg(21),
rn: vreg(22),
rm: vreg(23),
},
"D52EF75E",
"sqsub d21, d22, d23",
));
insns.push((
Inst::FpuRRRR {
fpu_op: FPUOp3::MAdd32,

View File

@@ -5,8 +5,8 @@
use crate::binemit::CodeOffset;
use crate::ir::types::{
B1, B16, B16X8, B32, B32X4, B64, B64X2, B8, B8X16, F32, F32X2, F32X4, F64, F64X2, FFLAGS, I16,
I16X4, I16X8, I32, I32X2, I32X4, I64, I64X2, I8, I8X16, I8X8, IFLAGS, R32, R64,
B1, B16, B16X8, B32, B32X4, B64, B64X2, B8, B8X16, F32, F32X4, F64, F64X2, FFLAGS, I16, I16X8,
I32, I32X4, I64, I64X2, I8, I8X16, IFLAGS, R32, R64,
};
use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode, Type};
use crate::machinst::*;
@@ -125,6 +125,14 @@ pub enum FPUOp2 {
Max64,
Min32,
Min64,
/// Signed saturating add
Sqadd64,
/// Unsigned saturating add
Uqadd64,
/// Signed saturating subtract
Sqsub64,
/// Unsigned saturating subtract
Uqsub64,
}
/// A floating-point unit (FPU) operation with two args, a register and an immediate.
@@ -208,16 +216,12 @@ pub enum VecExtendOp {
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
pub enum VecALUOp {
/// Signed saturating add
SQAddScalar,
Sqadd,
/// Unsigned saturating add
UQAddScalar,
Uqadd,
/// Signed saturating subtract
SQSubScalar,
Sqsub,
/// Unsigned saturating subtract
UQSubScalar,
Uqsub,
/// Compare bitwise equal
Cmeq,
@@ -590,7 +594,7 @@ pub enum Inst {
rd: Writable<Reg>,
rn: Reg,
idx: u8,
size: ScalarSize,
size: VectorSize,
},
/// 1-op FPU instruction.
@@ -734,21 +738,21 @@ pub enum Inst {
rd: Writable<Reg>,
rn: Reg,
idx: u8,
ty: Type,
size: VectorSize,
},
/// Duplicate general-purpose register to vector.
VecDup {
rd: Writable<Reg>,
rn: Reg,
ty: Type,
size: VectorSize,
},
/// Duplicate scalar to vector.
VecDupFromFpu {
rd: Writable<Reg>,
rn: Reg,
ty: Type,
size: VectorSize,
},
/// Vector extend.
@@ -764,7 +768,7 @@ pub enum Inst {
rd: Writable<Reg>,
rn: Reg,
rm: Reg,
ty: Type,
size: VectorSize,
},
/// Vector two register miscellaneous instruction.
@@ -772,7 +776,7 @@ pub enum Inst {
op: VecMisc2,
rd: Writable<Reg>,
rn: Reg,
ty: Type,
size: VectorSize,
},
/// Vector instruction across lanes.
@@ -780,7 +784,7 @@ pub enum Inst {
op: VecLanesOp,
rd: Writable<Reg>,
rn: Reg,
ty: Type,
size: VectorSize,
},
/// Move to the NZCV flags (actually a `MSR NZCV, Xn` insn).
@@ -2504,13 +2508,8 @@ impl Inst {
format!("mov {}.16b, {}.16b", rd, rn)
}
&Inst::FpuMoveFromVec { rd, rn, idx, size } => {
let vector_type = match size {
ScalarSize::Size32 => F32,
ScalarSize::Size64 => F64,
_ => unimplemented!(),
};
let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size);
let rn = show_vreg_element(rn, mb_rru, idx, vector_type);
let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size.lane_size());
let rn = show_vreg_element(rn, mb_rru, idx, size);
format!("mov {}, {}", rd, rn)
}
&Inst::FpuRR { fpu_op, rd, rn } => {
@@ -2542,6 +2541,10 @@ impl Inst {
FPUOp2::Max64 => ("fmax", ScalarSize::Size64),
FPUOp2::Min32 => ("fmin", ScalarSize::Size32),
FPUOp2::Min64 => ("fmin", ScalarSize::Size64),
FPUOp2::Sqadd64 => ("sqadd", ScalarSize::Size64),
FPUOp2::Uqadd64 => ("uqadd", ScalarSize::Size64),
FPUOp2::Sqsub64 => ("sqsub", ScalarSize::Size64),
FPUOp2::Uqsub64 => ("uqsub", ScalarSize::Size64),
};
let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size);
let rn = show_vreg_scalar(rn, mb_rru, size);
@@ -2557,7 +2560,7 @@ impl Inst {
};
let show_vreg_fn: fn(Reg, Option<&RealRegUniverse>) -> String = if vector {
|reg, mb_rru| show_vreg_vector(reg, mb_rru, F32X2)
|reg, mb_rru| show_vreg_vector(reg, mb_rru, VectorSize::Size32x2)
} else {
|reg, mb_rru| show_vreg_scalar(reg, mb_rru, ScalarSize::Size64)
};
@@ -2706,45 +2709,36 @@ impl Inst {
let rn = rn.show_rru(mb_rru);
format!("mov {}.d[0], {}", rd, rn)
}
&Inst::MovFromVec { rd, rn, idx, ty } => {
let op = match ty {
I32 | I64 => "mov",
_ => "umov",
&Inst::MovFromVec { rd, rn, idx, size } => {
let op = match size {
VectorSize::Size8x16 => "umov",
VectorSize::Size16x8 => "umov",
VectorSize::Size32x4 => "mov",
VectorSize::Size64x2 => "mov",
_ => unimplemented!(),
};
let rd = show_ireg_sized(rd.to_reg(), mb_rru, OperandSize::from_ty(ty));
let rn = show_vreg_element(rn, mb_rru, idx, ty);
let rd = show_ireg_sized(rd.to_reg(), mb_rru, size.operand_size());
let rn = show_vreg_element(rn, mb_rru, idx, size);
format!("{} {}, {}", op, rd, rn)
}
&Inst::VecDup { rd, rn, ty } => {
let vector_type = match ty {
I8 => I8X16,
I16 => I16X8,
I32 => I32X4,
I64 => I64X2,
_ => unimplemented!(),
};
let rd = show_vreg_vector(rd.to_reg(), mb_rru, vector_type);
let rn = show_ireg_sized(rn, mb_rru, OperandSize::from_ty(ty));
&Inst::VecDup { rd, rn, size } => {
let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
let rn = show_ireg_sized(rn, mb_rru, size.operand_size());
format!("dup {}, {}", rd, rn)
}
&Inst::VecDupFromFpu { rd, rn, ty } => {
let vector_type = match ty {
F32 => F32X4,
F64 => F64X2,
_ => unimplemented!(),
};
let rd = show_vreg_vector(rd.to_reg(), mb_rru, vector_type);
let rn = show_vreg_element(rn, mb_rru, 0, ty);
&Inst::VecDupFromFpu { rd, rn, size } => {
let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
let rn = show_vreg_element(rn, mb_rru, 0, size);
format!("dup {}, {}", rd, rn)
}
&Inst::VecExtend { t, rd, rn } => {
let (op, dest, src) = match t {
VecExtendOp::Sxtl8 => ("sxtl", I16X8, I8X8),
VecExtendOp::Sxtl16 => ("sxtl", I32X4, I16X4),
VecExtendOp::Sxtl32 => ("sxtl", I64X2, I32X2),
VecExtendOp::Uxtl8 => ("uxtl", I16X8, I8X8),
VecExtendOp::Uxtl16 => ("uxtl", I32X4, I16X4),
VecExtendOp::Uxtl32 => ("uxtl", I64X2, I32X2),
VecExtendOp::Sxtl8 => ("sxtl", VectorSize::Size16x8, VectorSize::Size8x8),
VecExtendOp::Sxtl16 => ("sxtl", VectorSize::Size32x4, VectorSize::Size16x4),
VecExtendOp::Sxtl32 => ("sxtl", VectorSize::Size64x2, VectorSize::Size32x2),
VecExtendOp::Uxtl8 => ("uxtl", VectorSize::Size16x8, VectorSize::Size8x8),
VecExtendOp::Uxtl16 => ("uxtl", VectorSize::Size32x4, VectorSize::Size16x4),
VecExtendOp::Uxtl32 => ("uxtl", VectorSize::Size64x2, VectorSize::Size32x2),
};
let rd = show_vreg_vector(rd.to_reg(), mb_rru, dest);
let rn = show_vreg_vector(rn, mb_rru, src);
@@ -2755,72 +2749,54 @@ impl Inst {
rn,
rm,
alu_op,
ty,
size,
} => {
let (op, vector, ty) = match alu_op {
VecALUOp::SQAddScalar => ("sqadd", false, ty),
VecALUOp::Sqadd => ("sqadd", true, ty),
VecALUOp::UQAddScalar => ("uqadd", false, ty),
VecALUOp::Uqadd => ("uqadd", true, ty),
VecALUOp::SQSubScalar => ("sqsub", false, ty),
VecALUOp::Sqsub => ("sqsub", true, ty),
VecALUOp::UQSubScalar => ("uqsub", false, ty),
VecALUOp::Uqsub => ("uqsub", true, ty),
VecALUOp::Cmeq => ("cmeq", true, ty),
VecALUOp::Cmge => ("cmge", true, ty),
VecALUOp::Cmgt => ("cmgt", true, ty),
VecALUOp::Cmhs => ("cmhs", true, ty),
VecALUOp::Cmhi => ("cmhi", true, ty),
VecALUOp::Fcmeq => ("fcmeq", true, ty),
VecALUOp::Fcmgt => ("fcmgt", true, ty),
VecALUOp::Fcmge => ("fcmge", true, ty),
VecALUOp::And => ("and", true, I8X16),
VecALUOp::Bic => ("bic", true, I8X16),
VecALUOp::Orr => ("orr", true, I8X16),
VecALUOp::Eor => ("eor", true, I8X16),
VecALUOp::Bsl => ("bsl", true, I8X16),
VecALUOp::Umaxp => ("umaxp", true, ty),
VecALUOp::Add => ("add", true, ty),
VecALUOp::Sub => ("sub", true, ty),
VecALUOp::Mul => ("mul", true, ty),
VecALUOp::Sshl => ("sshl", true, ty),
VecALUOp::Ushl => ("ushl", true, ty),
let (op, size) = match alu_op {
VecALUOp::Sqadd => ("sqadd", size),
VecALUOp::Uqadd => ("uqadd", size),
VecALUOp::Sqsub => ("sqsub", size),
VecALUOp::Uqsub => ("uqsub", size),
VecALUOp::Cmeq => ("cmeq", size),
VecALUOp::Cmge => ("cmge", size),
VecALUOp::Cmgt => ("cmgt", size),
VecALUOp::Cmhs => ("cmhs", size),
VecALUOp::Cmhi => ("cmhi", size),
VecALUOp::Fcmeq => ("fcmeq", size),
VecALUOp::Fcmgt => ("fcmgt", size),
VecALUOp::Fcmge => ("fcmge", size),
VecALUOp::And => ("and", VectorSize::Size8x16),
VecALUOp::Bic => ("bic", VectorSize::Size8x16),
VecALUOp::Orr => ("orr", VectorSize::Size8x16),
VecALUOp::Eor => ("eor", VectorSize::Size8x16),
VecALUOp::Bsl => ("bsl", VectorSize::Size8x16),
VecALUOp::Umaxp => ("umaxp", size),
VecALUOp::Add => ("add", size),
VecALUOp::Sub => ("sub", size),
VecALUOp::Mul => ("mul", size),
VecALUOp::Sshl => ("sshl", size),
VecALUOp::Ushl => ("ushl", size),
};
let show_vreg_fn: fn(Reg, Option<&RealRegUniverse>, Type) -> String = if vector {
|reg, mb_rru, ty| show_vreg_vector(reg, mb_rru, ty)
} else {
|reg, mb_rru, _ty| show_vreg_scalar(reg, mb_rru, ScalarSize::Size64)
};
let rd = show_vreg_fn(rd.to_reg(), mb_rru, ty);
let rn = show_vreg_fn(rn, mb_rru, ty);
let rm = show_vreg_fn(rm, mb_rru, ty);
let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
let rn = show_vreg_vector(rn, mb_rru, size);
let rm = show_vreg_vector(rm, mb_rru, size);
format!("{} {}, {}, {}", op, rd, rn, rm)
}
&Inst::VecMisc { op, rd, rn, ty } => {
let (op, ty) = match op {
VecMisc2::Not => ("mvn", I8X16),
VecMisc2::Neg => ("neg", ty),
&Inst::VecMisc { op, rd, rn, size } => {
let (op, size) = match op {
VecMisc2::Not => ("mvn", VectorSize::Size8x16),
VecMisc2::Neg => ("neg", size),
};
let rd = show_vreg_vector(rd.to_reg(), mb_rru, ty);
let rn = show_vreg_vector(rn, mb_rru, ty);
let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
let rn = show_vreg_vector(rn, mb_rru, size);
format!("{} {}, {}", op, rd, rn)
}
&Inst::VecLanes { op, rd, rn, ty } => {
&Inst::VecLanes { op, rd, rn, size } => {
let op = match op {
VecLanesOp::Uminv => "uminv",
};
let size = match ty {
I8X16 => ScalarSize::Size8,
I16X8 => ScalarSize::Size16,
I32X4 => ScalarSize::Size32,
_ => unimplemented!(),
};
let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size);
let rn = show_vreg_vector(rn, mb_rru, ty);
let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size.lane_size());
let rn = show_vreg_vector(rn, mb_rru, size);
format!("{} {}, {}", op, rd, rn)
}
&Inst::MovToNZCV { rn } => {

View File

@@ -1,8 +1,8 @@
//! AArch64 ISA definitions: registers.
use crate::ir::types::*;
use crate::isa::aarch64::inst::OperandSize;
use crate::isa::aarch64::inst::ScalarSize;
use crate::isa::aarch64::inst::VectorSize;
use crate::machinst::*;
use crate::settings;
@@ -307,40 +307,42 @@ pub fn show_vreg_scalar(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: Scalar
}
/// Show a vector register.
pub fn show_vreg_vector(reg: Reg, mb_rru: Option<&RealRegUniverse>, ty: Type) -> String {
pub fn show_vreg_vector(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: VectorSize) -> String {
assert_eq!(RegClass::V128, reg.get_class());
let mut s = reg.show_rru(mb_rru);
match ty {
F32X2 => s.push_str(".2s"),
F32X4 => s.push_str(".4s"),
F64X2 => s.push_str(".2d"),
I8X8 => s.push_str(".8b"),
I8X16 => s.push_str(".16b"),
I16X4 => s.push_str(".4h"),
I16X8 => s.push_str(".8h"),
I32X2 => s.push_str(".2s"),
I32X4 => s.push_str(".4s"),
I64X2 => s.push_str(".2d"),
_ => unimplemented!(),
}
let suffix = match size {
VectorSize::Size8x8 => ".8b",
VectorSize::Size8x16 => ".16b",
VectorSize::Size16x4 => ".4h",
VectorSize::Size16x8 => ".8h",
VectorSize::Size32x2 => ".2s",
VectorSize::Size32x4 => ".4s",
VectorSize::Size64x2 => ".2d",
};
s.push_str(suffix);
s
}
/// Show an indexed vector element.
pub fn show_vreg_element(reg: Reg, mb_rru: Option<&RealRegUniverse>, idx: u8, ty: Type) -> String {
pub fn show_vreg_element(
reg: Reg,
mb_rru: Option<&RealRegUniverse>,
idx: u8,
size: VectorSize,
) -> String {
assert_eq!(RegClass::V128, reg.get_class());
let mut s = reg.show_rru(mb_rru);
let suffix = match ty {
I8 => "b",
I16 => "h",
I32 => "s",
I64 => "d",
F32 => "s",
F64 => "d",
_ => unimplemented!(),
let suffix = match size {
VectorSize::Size8x8 => "b",
VectorSize::Size8x16 => "b",
VectorSize::Size16x4 => "h",
VectorSize::Size16x8 => "h",
VectorSize::Size32x2 => "s",
VectorSize::Size32x4 => "s",
VectorSize::Size64x2 => "d",
};
s.push_str(&format!(".{}[{}]", suffix, idx));

View File

@@ -14,7 +14,7 @@ use crate::ir::Inst as IRInst;
use crate::ir::{InstructionData, Opcode, TrapCode, Type};
use crate::machinst::lower::*;
use crate::machinst::*;
use crate::{CodegenError, CodegenResult};
use crate::CodegenResult;
use crate::isa::aarch64::inst::*;
use crate::isa::aarch64::AArch64Backend;
@@ -736,20 +736,11 @@ pub(crate) fn lower_vector_compare<C: LowerCtx<I = Inst>>(
ty: Type,
cond: Cond,
) -> CodegenResult<()> {
match ty {
F32X4 | F64X2 | I8X16 | I16X8 | I32X4 => {}
_ => {
return Err(CodegenError::Unsupported(format!(
"unsupported SIMD type: {:?}",
ty
)));
}
};
let is_float = match ty {
F32X4 | F64X2 => true,
_ => false,
};
let size = VectorSize::from_ty(ty);
// 'Less than' operations are implemented by swapping
// the order of operands and using the 'greater than'
// instructions.
@@ -784,7 +775,7 @@ pub(crate) fn lower_vector_compare<C: LowerCtx<I = Inst>>(
rd,
rn,
rm,
ty,
size,
});
if cond == Cond::Ne {
@@ -792,7 +783,7 @@ pub(crate) fn lower_vector_compare<C: LowerCtx<I = Inst>>(
op: VecMisc2::Not,
rd,
rn: rd.to_reg(),
ty: I8X16,
size,
});
}

View File

@@ -70,7 +70,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
rn,
rm,
alu_op: VecALUOp::Add,
ty,
size: VectorSize::from_ty(ty),
});
}
}
@@ -89,13 +89,13 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
rn,
rm,
alu_op: VecALUOp::Sub,
ty,
size: VectorSize::from_ty(ty),
});
}
}
Opcode::UaddSat | Opcode::SaddSat | Opcode::UsubSat | Opcode::SsubSat => {
// We use the vector instruction set's saturating adds (UQADD /
// SQADD), which require vector registers.
// We use the scalar SIMD & FP saturating additions and subtractions
// (SQADD / UQADD / SQSUB / UQSUB), which require scalar FP registers.
let is_signed = op == Opcode::SaddSat || op == Opcode::SsubSat;
let ty = ty.unwrap();
let rd = get_output_reg(ctx, outputs[0]);
@@ -105,11 +105,11 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
} else {
NarrowValueMode::ZeroExtend64
};
let alu_op = match op {
Opcode::UaddSat => VecALUOp::UQAddScalar,
Opcode::SaddSat => VecALUOp::SQAddScalar,
Opcode::UsubSat => VecALUOp::UQSubScalar,
Opcode::SsubSat => VecALUOp::SQSubScalar,
let fpu_op = match op {
Opcode::UaddSat => FPUOp2::Uqadd64,
Opcode::SaddSat => FPUOp2::Sqadd64,
Opcode::UsubSat => FPUOp2::Uqsub64,
Opcode::SsubSat => FPUOp2::Sqsub64,
_ => unreachable!(),
};
let va = ctx.alloc_tmp(RegClass::V128, I128);
@@ -118,18 +118,17 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let rb = put_input_in_reg(ctx, inputs[1], narrow_mode);
ctx.emit(Inst::MovToVec64 { rd: va, rn: ra });
ctx.emit(Inst::MovToVec64 { rd: vb, rn: rb });
ctx.emit(Inst::VecRRR {
ctx.emit(Inst::FpuRRR {
fpu_op,
rd: va,
rn: va.to_reg(),
rm: vb.to_reg(),
alu_op,
ty: I64,
});
ctx.emit(Inst::MovFromVec {
rd,
rn: va.to_reg(),
idx: 0,
ty: I64,
size: VectorSize::Size64x2,
});
} else {
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
@@ -148,7 +147,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
rn,
rm,
alu_op,
ty,
size: VectorSize::from_ty(ty),
});
}
}
@@ -167,7 +166,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
op: VecMisc2::Neg,
rd,
rn,
ty,
size: VectorSize::from_ty(ty),
});
}
}
@@ -192,7 +191,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
rd,
rn,
rm,
ty,
size: VectorSize::from_ty(ty),
});
}
}
@@ -422,7 +421,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
op: VecMisc2::Not,
rd,
rn: rm,
ty,
size: VectorSize::from_ty(ty),
});
}
}
@@ -466,7 +465,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
rd,
rn,
rm,
ty,
size: VectorSize::from_ty(ty),
});
}
}
@@ -495,7 +494,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
ctx.emit(alu_inst_immshift(alu_op, rd, rn, rm));
} else {
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let size = VectorSize::from_ty(ty);
let (alu_op, is_right_shift) = match op {
Opcode::Ishl => (VecALUOp::Sshl, false),
Opcode::Ushr => (VecALUOp::Ushl, true),
@@ -514,18 +513,14 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
put_input_in_reg(ctx, inputs[1], NarrowValueMode::None)
};
ctx.emit(Inst::VecDup {
rd,
rn: rm,
ty: ty.lane_type(),
});
ctx.emit(Inst::VecDup { rd, rn: rm, size });
ctx.emit(Inst::VecRRR {
alu_op,
rd,
rn,
rm: rd.to_reg(),
ty,
size,
});
}
}
@@ -1167,7 +1162,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
rd,
rn,
rm,
ty,
size: VectorSize::from_ty(ty),
});
}
}
@@ -1297,7 +1292,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
rd,
rn,
idx: 0,
ty: I64,
size: VectorSize::Size64x2,
});
}
}
@@ -1557,15 +1552,15 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
let idx = *imm;
let rd = get_output_reg(ctx, outputs[0]);
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let size = VectorSize::from_ty(ctx.input_ty(insn, 0));
let ty = ty.unwrap();
if ty_is_int(ty) {
ctx.emit(Inst::MovFromVec { rd, rn, idx, ty });
ctx.emit(Inst::MovFromVec { rd, rn, idx, size });
// Plain moves are faster on some processors.
} else if idx == 0 {
ctx.emit(Inst::gen_move(rd, rn, ty));
} else {
let size = ScalarSize::from_ty(ty);
ctx.emit(Inst::FpuMoveFromVec { rd, rn, idx, size });
}
} else {
@@ -1576,11 +1571,12 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
Opcode::Splat => {
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
let rd = get_output_reg(ctx, outputs[0]);
let ty = ctx.input_ty(insn, 0);
let inst = if ty_is_int(ty) {
Inst::VecDup { rd, rn, ty }
let input_ty = ctx.input_ty(insn, 0);
let size = VectorSize::from_ty(ty.unwrap());
let inst = if ty_is_int(input_ty) {
Inst::VecDup { rd, rn, size }
} else {
Inst::VecDupFromFpu { rd, rn, ty }
Inst::VecDupFromFpu { rd, rn, size }
};
ctx.emit(inst);
}
@@ -1598,21 +1594,22 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
// cmp xm, #0
// cset xm, ne
let input_ty = ctx.input_ty(insn, 0);
let size = VectorSize::from_ty(ctx.input_ty(insn, 0));
if op == Opcode::VanyTrue {
ctx.emit(Inst::VecRRR {
alu_op: VecALUOp::Umaxp,
rd: tmp,
rn: rm,
rm: rm,
ty: input_ty,
size,
});
} else {
ctx.emit(Inst::VecLanes {
op: VecLanesOp::Uminv,
rd: tmp,
rn: rm,
ty: input_ty,
size,
});
};
@@ -1620,7 +1617,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
rd,
rn: tmp.to_reg(),
idx: 0,
ty: I64,
size: VectorSize::Size64x2,
});
ctx.emit(Inst::AluRRImm12 {