Merge pull request #2067 from akirilov-arm/simd_lane
Enable the spec::simd::simd_lane test for AArch64
This commit is contained in:
1
build.rs
1
build.rs
@@ -228,6 +228,7 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
|
|||||||
("simd", "simd_i32x4_arith") => return false,
|
("simd", "simd_i32x4_arith") => return false,
|
||||||
("simd", "simd_i32x4_arith2") => return false,
|
("simd", "simd_i32x4_arith2") => return false,
|
||||||
("simd", "simd_i32x4_cmp") => return false,
|
("simd", "simd_i32x4_cmp") => return false,
|
||||||
|
("simd", "simd_lane") => return false,
|
||||||
("simd", "simd_load_extend") => return false,
|
("simd", "simd_load_extend") => return false,
|
||||||
("simd", "simd_load_splat") => return false,
|
("simd", "simd_load_splat") => return false,
|
||||||
("simd", "simd_store") => return false,
|
("simd", "simd_store") => return false,
|
||||||
|
|||||||
@@ -378,6 +378,16 @@ fn enc_vec_lanes(q: u32, u: u32, size: u32, opcode: u32, rd: Writable<Reg>, rn:
|
|||||||
| machreg_to_vec(rd.to_reg())
|
| machreg_to_vec(rd.to_reg())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn enc_tbl(is_extension: bool, len: u32, rd: Writable<Reg>, rn: Reg, rm: Reg) -> u32 {
|
||||||
|
debug_assert_eq!(len & 0b11, len);
|
||||||
|
0b0_1_001110_000_00000_0_00_0_00_00000_00000
|
||||||
|
| (machreg_to_vec(rm) << 16)
|
||||||
|
| len << 13
|
||||||
|
| (is_extension as u32) << 12
|
||||||
|
| (machreg_to_vec(rn) << 5)
|
||||||
|
| machreg_to_vec(rd.to_reg())
|
||||||
|
}
|
||||||
|
|
||||||
fn enc_dmb_ish() -> u32 {
|
fn enc_dmb_ish() -> u32 {
|
||||||
0xD5033BBF
|
0xD5033BBF
|
||||||
}
|
}
|
||||||
@@ -1396,6 +1406,24 @@ impl MachInstEmit for Inst {
|
|||||||
};
|
};
|
||||||
sink.put4(enc_vec_lanes(q, u, size, opcode, rd, rn));
|
sink.put4(enc_vec_lanes(q, u, size, opcode, rd, rn));
|
||||||
}
|
}
|
||||||
|
&Inst::VecTbl {
|
||||||
|
rd,
|
||||||
|
rn,
|
||||||
|
rm,
|
||||||
|
is_extension,
|
||||||
|
} => {
|
||||||
|
sink.put4(enc_tbl(is_extension, 0b00, rd, rn, rm));
|
||||||
|
}
|
||||||
|
&Inst::VecTbl2 {
|
||||||
|
rd,
|
||||||
|
rn,
|
||||||
|
rn2,
|
||||||
|
rm,
|
||||||
|
is_extension,
|
||||||
|
} => {
|
||||||
|
assert_eq!(machreg_to_vec(rn2), (machreg_to_vec(rn) + 1) % 32);
|
||||||
|
sink.put4(enc_tbl(is_extension, 0b01, rd, rn, rm));
|
||||||
|
}
|
||||||
&Inst::FpuCmp32 { rn, rm } => {
|
&Inst::FpuCmp32 { rn, rm } => {
|
||||||
sink.put4(enc_fcmp(ScalarSize::Size32, rn, rm));
|
sink.put4(enc_fcmp(ScalarSize::Size32, rn, rm));
|
||||||
}
|
}
|
||||||
@@ -1505,9 +1533,26 @@ impl MachInstEmit for Inst {
|
|||||||
};
|
};
|
||||||
sink.put4(enc_fround(top22, rd, rn));
|
sink.put4(enc_fround(top22, rd, rn));
|
||||||
}
|
}
|
||||||
&Inst::MovToVec64 { rd, rn } => {
|
&Inst::MovToFpu { rd, rn } => {
|
||||||
sink.put4(
|
sink.put4(
|
||||||
0b010_01110000_01000_0_0011_1_00000_00000
|
0b100_11110_01_1_00_111_000000_00000_00000
|
||||||
|
| (machreg_to_gpr(rn) << 5)
|
||||||
|
| machreg_to_vec(rd.to_reg()),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
&Inst::MovToVec { rd, rn, idx, size } => {
|
||||||
|
let (imm5, shift) = match size.lane_size() {
|
||||||
|
ScalarSize::Size8 => (0b00001, 1),
|
||||||
|
ScalarSize::Size16 => (0b00010, 2),
|
||||||
|
ScalarSize::Size32 => (0b00100, 3),
|
||||||
|
ScalarSize::Size64 => (0b01000, 4),
|
||||||
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
|
debug_assert_eq!(idx & (0b11111 >> shift), idx);
|
||||||
|
let imm5 = imm5 | ((idx as u32) << shift);
|
||||||
|
sink.put4(
|
||||||
|
0b010_01110000_00000_0_0011_1_00000_00000
|
||||||
|
| (imm5 << 16)
|
||||||
| (machreg_to_gpr(rn) << 5)
|
| (machreg_to_gpr(rn) << 5)
|
||||||
| machreg_to_vec(rd.to_reg()),
|
| machreg_to_vec(rd.to_reg()),
|
||||||
);
|
);
|
||||||
@@ -1607,6 +1652,33 @@ impl MachInstEmit for Inst {
|
|||||||
| machreg_to_vec(rd.to_reg()),
|
| machreg_to_vec(rd.to_reg()),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
&Inst::VecMovElement {
|
||||||
|
rd,
|
||||||
|
rn,
|
||||||
|
idx1,
|
||||||
|
idx2,
|
||||||
|
size,
|
||||||
|
} => {
|
||||||
|
let (imm5, shift) = match size.lane_size() {
|
||||||
|
ScalarSize::Size8 => (0b00001, 1),
|
||||||
|
ScalarSize::Size16 => (0b00010, 2),
|
||||||
|
ScalarSize::Size32 => (0b00100, 3),
|
||||||
|
ScalarSize::Size64 => (0b01000, 4),
|
||||||
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
|
let mask = 0b11111 >> shift;
|
||||||
|
debug_assert_eq!(idx1 & mask, idx1);
|
||||||
|
debug_assert_eq!(idx2 & mask, idx2);
|
||||||
|
let imm4 = (idx2 as u32) << (shift - 1);
|
||||||
|
let imm5 = imm5 | ((idx1 as u32) << shift);
|
||||||
|
sink.put4(
|
||||||
|
0b011_01110000_00000_0_0000_1_00000_00000
|
||||||
|
| (imm5 << 16)
|
||||||
|
| (imm4 << 11)
|
||||||
|
| (machreg_to_vec(rn) << 5)
|
||||||
|
| machreg_to_vec(rd.to_reg()),
|
||||||
|
);
|
||||||
|
}
|
||||||
&Inst::VecRRR {
|
&Inst::VecRRR {
|
||||||
rd,
|
rd,
|
||||||
rn,
|
rn,
|
||||||
|
|||||||
@@ -1829,9 +1829,29 @@ fn test_aarch64_binemit() {
|
|||||||
"ccmp w3, #30, #NZCV, gt",
|
"ccmp w3, #30, #NZCV, gt",
|
||||||
));
|
));
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::MovToVec64 {
|
Inst::MovToFpu {
|
||||||
|
rd: writable_vreg(31),
|
||||||
|
rn: xreg(0),
|
||||||
|
},
|
||||||
|
"1F00679E",
|
||||||
|
"fmov d31, x0",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::MovToVec {
|
||||||
|
rd: writable_vreg(0),
|
||||||
|
rn: xreg(0),
|
||||||
|
idx: 7,
|
||||||
|
size: VectorSize::Size8x8,
|
||||||
|
},
|
||||||
|
"001C0F4E",
|
||||||
|
"mov v0.b[7], w0",
|
||||||
|
));
|
||||||
|
insns.push((
|
||||||
|
Inst::MovToVec {
|
||||||
rd: writable_vreg(20),
|
rd: writable_vreg(20),
|
||||||
rn: xreg(21),
|
rn: xreg(21),
|
||||||
|
idx: 0,
|
||||||
|
size: VectorSize::Size64x2,
|
||||||
},
|
},
|
||||||
"B41E084E",
|
"B41E084E",
|
||||||
"mov v20.d[0], x21",
|
"mov v20.d[0], x21",
|
||||||
@@ -2041,6 +2061,30 @@ fn test_aarch64_binemit() {
|
|||||||
"uxtl v28.2d, v2.2s",
|
"uxtl v28.2d, v2.2s",
|
||||||
));
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecMovElement {
|
||||||
|
rd: writable_vreg(0),
|
||||||
|
rn: vreg(31),
|
||||||
|
idx1: 7,
|
||||||
|
idx2: 7,
|
||||||
|
size: VectorSize::Size16x8,
|
||||||
|
},
|
||||||
|
"E0771E6E",
|
||||||
|
"mov v0.h[7], v31.h[7]",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecMovElement {
|
||||||
|
rd: writable_vreg(31),
|
||||||
|
rn: vreg(16),
|
||||||
|
idx1: 1,
|
||||||
|
idx2: 0,
|
||||||
|
size: VectorSize::Size32x2,
|
||||||
|
},
|
||||||
|
"1F060C6E",
|
||||||
|
"mov v31.s[1], v16.s[0]",
|
||||||
|
));
|
||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::VecRRR {
|
Inst::VecRRR {
|
||||||
alu_op: VecALUOp::Sqadd,
|
alu_op: VecALUOp::Sqadd,
|
||||||
@@ -3190,6 +3234,52 @@ fn test_aarch64_binemit() {
|
|||||||
"uminv s18, v4.4s",
|
"uminv s18, v4.4s",
|
||||||
));
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecTbl {
|
||||||
|
rd: writable_vreg(0),
|
||||||
|
rn: vreg(31),
|
||||||
|
rm: vreg(16),
|
||||||
|
is_extension: false,
|
||||||
|
},
|
||||||
|
"E003104E",
|
||||||
|
"tbl v0.16b, { v31.16b }, v16.16b",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecTbl {
|
||||||
|
rd: writable_vreg(4),
|
||||||
|
rn: vreg(12),
|
||||||
|
rm: vreg(23),
|
||||||
|
is_extension: true,
|
||||||
|
},
|
||||||
|
"8411174E",
|
||||||
|
"tbx v4.16b, { v12.16b }, v23.16b",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecTbl2 {
|
||||||
|
rd: writable_vreg(16),
|
||||||
|
rn: vreg(31),
|
||||||
|
rn2: vreg(0),
|
||||||
|
rm: vreg(26),
|
||||||
|
is_extension: false,
|
||||||
|
},
|
||||||
|
"F0231A4E",
|
||||||
|
"tbl v16.16b, { v31.16b, v0.16b }, v26.16b",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecTbl2 {
|
||||||
|
rd: writable_vreg(3),
|
||||||
|
rn: vreg(11),
|
||||||
|
rn2: vreg(12),
|
||||||
|
rm: vreg(19),
|
||||||
|
is_extension: true,
|
||||||
|
},
|
||||||
|
"6331134E",
|
||||||
|
"tbx v3.16b, { v11.16b, v12.16b }, v19.16b",
|
||||||
|
));
|
||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::Extend {
|
Inst::Extend {
|
||||||
rd: writable_xreg(1),
|
rd: writable_xreg(1),
|
||||||
|
|||||||
@@ -819,12 +819,20 @@ pub enum Inst {
|
|||||||
rn: Reg,
|
rn: Reg,
|
||||||
},
|
},
|
||||||
|
|
||||||
/// Move to a vector register from a GPR.
|
/// Move from a GPR to a scalar FP register.
|
||||||
MovToVec64 {
|
MovToFpu {
|
||||||
rd: Writable<Reg>,
|
rd: Writable<Reg>,
|
||||||
rn: Reg,
|
rn: Reg,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
/// Move to a vector element from a GPR.
|
||||||
|
MovToVec {
|
||||||
|
rd: Writable<Reg>,
|
||||||
|
rn: Reg,
|
||||||
|
idx: u8,
|
||||||
|
size: VectorSize,
|
||||||
|
},
|
||||||
|
|
||||||
/// Unsigned move from a vector element to a GPR.
|
/// Unsigned move from a vector element to a GPR.
|
||||||
MovFromVec {
|
MovFromVec {
|
||||||
rd: Writable<Reg>,
|
rd: Writable<Reg>,
|
||||||
@@ -863,6 +871,15 @@ pub enum Inst {
|
|||||||
rn: Reg,
|
rn: Reg,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
/// Move vector element to another vector element.
|
||||||
|
VecMovElement {
|
||||||
|
rd: Writable<Reg>,
|
||||||
|
rn: Reg,
|
||||||
|
idx1: u8,
|
||||||
|
idx2: u8,
|
||||||
|
size: VectorSize,
|
||||||
|
},
|
||||||
|
|
||||||
/// A vector ALU op.
|
/// A vector ALU op.
|
||||||
VecRRR {
|
VecRRR {
|
||||||
alu_op: VecALUOp,
|
alu_op: VecALUOp,
|
||||||
@@ -888,6 +905,32 @@ pub enum Inst {
|
|||||||
size: VectorSize,
|
size: VectorSize,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
/// Table vector lookup - single register table. The table consists of 8-bit elements and is
|
||||||
|
/// stored in `rn`, while `rm` contains 8-bit element indices. `is_extension` specifies whether
|
||||||
|
/// to emit a TBX or a TBL instruction, i.e. whether to leave the elements in the destination
|
||||||
|
/// vector that correspond to out-of-range indices (greater than 15) unmodified or to set them
|
||||||
|
/// to 0.
|
||||||
|
VecTbl {
|
||||||
|
rd: Writable<Reg>,
|
||||||
|
rn: Reg,
|
||||||
|
rm: Reg,
|
||||||
|
is_extension: bool,
|
||||||
|
},
|
||||||
|
|
||||||
|
/// Table vector lookup - two register table. The table consists of 8-bit elements and is
|
||||||
|
/// stored in `rn` and `rn2`, while `rm` contains 8-bit element indices. `is_extension`
|
||||||
|
/// specifies whether to emit a TBX or a TBL instruction, i.e. whether to leave the elements in
|
||||||
|
/// the destination vector that correspond to out-of-range indices (greater than 31) unmodified
|
||||||
|
/// or to set them to 0. The table registers `rn` and `rn2` must have consecutive numbers
|
||||||
|
/// modulo 32, that is v31 and v0 (in that order) are consecutive registers.
|
||||||
|
VecTbl2 {
|
||||||
|
rd: Writable<Reg>,
|
||||||
|
rn: Reg,
|
||||||
|
rn2: Reg,
|
||||||
|
rm: Reg,
|
||||||
|
is_extension: bool,
|
||||||
|
},
|
||||||
|
|
||||||
/// Move to the NZCV flags (actually a `MSR NZCV, Xn` insn).
|
/// Move to the NZCV flags (actually a `MSR NZCV, Xn` insn).
|
||||||
MovToNZCV {
|
MovToNZCV {
|
||||||
rn: Reg,
|
rn: Reg,
|
||||||
@@ -1377,6 +1420,39 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
|||||||
collector.add_def(rd);
|
collector.add_def(rd);
|
||||||
collector.add_use(rn);
|
collector.add_use(rn);
|
||||||
}
|
}
|
||||||
|
&Inst::VecTbl {
|
||||||
|
rd,
|
||||||
|
rn,
|
||||||
|
rm,
|
||||||
|
is_extension,
|
||||||
|
} => {
|
||||||
|
collector.add_use(rn);
|
||||||
|
collector.add_use(rm);
|
||||||
|
|
||||||
|
if is_extension {
|
||||||
|
collector.add_mod(rd);
|
||||||
|
} else {
|
||||||
|
collector.add_def(rd);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
&Inst::VecTbl2 {
|
||||||
|
rd,
|
||||||
|
rn,
|
||||||
|
rn2,
|
||||||
|
rm,
|
||||||
|
is_extension,
|
||||||
|
} => {
|
||||||
|
collector.add_use(rn);
|
||||||
|
collector.add_use(rn2);
|
||||||
|
collector.add_use(rm);
|
||||||
|
|
||||||
|
if is_extension {
|
||||||
|
collector.add_mod(rd);
|
||||||
|
} else {
|
||||||
|
collector.add_def(rd);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
&Inst::FpuCmp32 { rn, rm } | &Inst::FpuCmp64 { rn, rm } => {
|
&Inst::FpuCmp32 { rn, rm } | &Inst::FpuCmp64 { rn, rm } => {
|
||||||
collector.add_use(rn);
|
collector.add_use(rn);
|
||||||
collector.add_use(rm);
|
collector.add_use(rm);
|
||||||
@@ -1427,10 +1503,14 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
|||||||
collector.add_def(rd);
|
collector.add_def(rd);
|
||||||
collector.add_use(rn);
|
collector.add_use(rn);
|
||||||
}
|
}
|
||||||
&Inst::MovToVec64 { rd, rn } => {
|
&Inst::MovToFpu { rd, rn } => {
|
||||||
collector.add_def(rd);
|
collector.add_def(rd);
|
||||||
collector.add_use(rn);
|
collector.add_use(rn);
|
||||||
}
|
}
|
||||||
|
&Inst::MovToVec { rd, rn, .. } => {
|
||||||
|
collector.add_mod(rd);
|
||||||
|
collector.add_use(rn);
|
||||||
|
}
|
||||||
&Inst::MovFromVec { rd, rn, .. } | &Inst::MovFromVecSigned { rd, rn, .. } => {
|
&Inst::MovFromVec { rd, rn, .. } | &Inst::MovFromVecSigned { rd, rn, .. } => {
|
||||||
collector.add_def(rd);
|
collector.add_def(rd);
|
||||||
collector.add_use(rn);
|
collector.add_use(rn);
|
||||||
@@ -1447,6 +1527,10 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
|||||||
collector.add_def(rd);
|
collector.add_def(rd);
|
||||||
collector.add_use(rn);
|
collector.add_use(rn);
|
||||||
}
|
}
|
||||||
|
&Inst::VecMovElement { rd, rn, .. } => {
|
||||||
|
collector.add_mod(rd);
|
||||||
|
collector.add_use(rn);
|
||||||
|
}
|
||||||
&Inst::VecRRR {
|
&Inst::VecRRR {
|
||||||
alu_op, rd, rn, rm, ..
|
alu_op, rd, rn, rm, ..
|
||||||
} => {
|
} => {
|
||||||
@@ -1905,6 +1989,38 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
|||||||
map_def(mapper, rd);
|
map_def(mapper, rd);
|
||||||
map_use(mapper, rn);
|
map_use(mapper, rn);
|
||||||
}
|
}
|
||||||
|
&mut Inst::VecTbl {
|
||||||
|
ref mut rd,
|
||||||
|
ref mut rn,
|
||||||
|
ref mut rm,
|
||||||
|
is_extension,
|
||||||
|
} => {
|
||||||
|
map_use(mapper, rn);
|
||||||
|
map_use(mapper, rm);
|
||||||
|
|
||||||
|
if is_extension {
|
||||||
|
map_mod(mapper, rd);
|
||||||
|
} else {
|
||||||
|
map_def(mapper, rd);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
&mut Inst::VecTbl2 {
|
||||||
|
ref mut rd,
|
||||||
|
ref mut rn,
|
||||||
|
ref mut rn2,
|
||||||
|
ref mut rm,
|
||||||
|
is_extension,
|
||||||
|
} => {
|
||||||
|
map_use(mapper, rn);
|
||||||
|
map_use(mapper, rn2);
|
||||||
|
map_use(mapper, rm);
|
||||||
|
|
||||||
|
if is_extension {
|
||||||
|
map_mod(mapper, rd);
|
||||||
|
} else {
|
||||||
|
map_def(mapper, rd);
|
||||||
|
}
|
||||||
|
}
|
||||||
&mut Inst::FpuCmp32 {
|
&mut Inst::FpuCmp32 {
|
||||||
ref mut rn,
|
ref mut rn,
|
||||||
ref mut rm,
|
ref mut rm,
|
||||||
@@ -2020,13 +2136,21 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
|||||||
map_def(mapper, rd);
|
map_def(mapper, rd);
|
||||||
map_use(mapper, rn);
|
map_use(mapper, rn);
|
||||||
}
|
}
|
||||||
&mut Inst::MovToVec64 {
|
&mut Inst::MovToFpu {
|
||||||
ref mut rd,
|
ref mut rd,
|
||||||
ref mut rn,
|
ref mut rn,
|
||||||
} => {
|
} => {
|
||||||
map_def(mapper, rd);
|
map_def(mapper, rd);
|
||||||
map_use(mapper, rn);
|
map_use(mapper, rn);
|
||||||
}
|
}
|
||||||
|
&mut Inst::MovToVec {
|
||||||
|
ref mut rd,
|
||||||
|
ref mut rn,
|
||||||
|
..
|
||||||
|
} => {
|
||||||
|
map_mod(mapper, rd);
|
||||||
|
map_use(mapper, rn);
|
||||||
|
}
|
||||||
&mut Inst::MovFromVec {
|
&mut Inst::MovFromVec {
|
||||||
ref mut rd,
|
ref mut rd,
|
||||||
ref mut rn,
|
ref mut rn,
|
||||||
@@ -2064,6 +2188,14 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
|||||||
map_def(mapper, rd);
|
map_def(mapper, rd);
|
||||||
map_use(mapper, rn);
|
map_use(mapper, rn);
|
||||||
}
|
}
|
||||||
|
&mut Inst::VecMovElement {
|
||||||
|
ref mut rd,
|
||||||
|
ref mut rn,
|
||||||
|
..
|
||||||
|
} => {
|
||||||
|
map_mod(mapper, rd);
|
||||||
|
map_use(mapper, rn);
|
||||||
|
}
|
||||||
&mut Inst::VecRRR {
|
&mut Inst::VecRRR {
|
||||||
alu_op,
|
alu_op,
|
||||||
ref mut rd,
|
ref mut rd,
|
||||||
@@ -2871,10 +3003,15 @@ impl Inst {
|
|||||||
let rn = show_vreg_scalar(rn, mb_rru, size);
|
let rn = show_vreg_scalar(rn, mb_rru, size);
|
||||||
format!("{} {}, {}", inst, rd, rn)
|
format!("{} {}, {}", inst, rd, rn)
|
||||||
}
|
}
|
||||||
&Inst::MovToVec64 { rd, rn } => {
|
&Inst::MovToFpu { rd, rn } => {
|
||||||
let rd = rd.to_reg().show_rru(mb_rru);
|
let rd = show_vreg_scalar(rd.to_reg(), mb_rru, ScalarSize::Size64);
|
||||||
let rn = rn.show_rru(mb_rru);
|
let rn = show_ireg_sized(rn, mb_rru, OperandSize::Size64);
|
||||||
format!("mov {}.d[0], {}", rd, rn)
|
format!("fmov {}, {}", rd, rn)
|
||||||
|
}
|
||||||
|
&Inst::MovToVec { rd, rn, idx, size } => {
|
||||||
|
let rd = show_vreg_element(rd.to_reg(), mb_rru, idx, size);
|
||||||
|
let rn = show_ireg_sized(rn, mb_rru, size.operand_size());
|
||||||
|
format!("mov {}, {}", rd, rn)
|
||||||
}
|
}
|
||||||
&Inst::MovFromVec { rd, rn, idx, size } => {
|
&Inst::MovFromVec { rd, rn, idx, size } => {
|
||||||
let op = match size {
|
let op = match size {
|
||||||
@@ -2922,6 +3059,17 @@ impl Inst {
|
|||||||
let rn = show_vreg_vector(rn, mb_rru, src);
|
let rn = show_vreg_vector(rn, mb_rru, src);
|
||||||
format!("{} {}, {}", op, rd, rn)
|
format!("{} {}, {}", op, rd, rn)
|
||||||
}
|
}
|
||||||
|
&Inst::VecMovElement {
|
||||||
|
rd,
|
||||||
|
rn,
|
||||||
|
idx1,
|
||||||
|
idx2,
|
||||||
|
size,
|
||||||
|
} => {
|
||||||
|
let rd = show_vreg_element(rd.to_reg(), mb_rru, idx1, size);
|
||||||
|
let rn = show_vreg_element(rn, mb_rru, idx2, size);
|
||||||
|
format!("mov {}, {}", rd, rn)
|
||||||
|
}
|
||||||
&Inst::VecRRR {
|
&Inst::VecRRR {
|
||||||
rd,
|
rd,
|
||||||
rn,
|
rn,
|
||||||
@@ -2992,6 +3140,32 @@ impl Inst {
|
|||||||
let rn = show_vreg_vector(rn, mb_rru, size);
|
let rn = show_vreg_vector(rn, mb_rru, size);
|
||||||
format!("{} {}, {}", op, rd, rn)
|
format!("{} {}, {}", op, rd, rn)
|
||||||
}
|
}
|
||||||
|
&Inst::VecTbl {
|
||||||
|
rd,
|
||||||
|
rn,
|
||||||
|
rm,
|
||||||
|
is_extension,
|
||||||
|
} => {
|
||||||
|
let op = if is_extension { "tbx" } else { "tbl" };
|
||||||
|
let rd = show_vreg_vector(rd.to_reg(), mb_rru, VectorSize::Size8x16);
|
||||||
|
let rn = show_vreg_vector(rn, mb_rru, VectorSize::Size8x16);
|
||||||
|
let rm = show_vreg_vector(rm, mb_rru, VectorSize::Size8x16);
|
||||||
|
format!("{} {}, {{ {} }}, {}", op, rd, rn, rm)
|
||||||
|
}
|
||||||
|
&Inst::VecTbl2 {
|
||||||
|
rd,
|
||||||
|
rn,
|
||||||
|
rn2,
|
||||||
|
rm,
|
||||||
|
is_extension,
|
||||||
|
} => {
|
||||||
|
let op = if is_extension { "tbx" } else { "tbl" };
|
||||||
|
let rd = show_vreg_vector(rd.to_reg(), mb_rru, VectorSize::Size8x16);
|
||||||
|
let rn = show_vreg_vector(rn, mb_rru, VectorSize::Size8x16);
|
||||||
|
let rn2 = show_vreg_vector(rn2, mb_rru, VectorSize::Size8x16);
|
||||||
|
let rm = show_vreg_vector(rm, mb_rru, VectorSize::Size8x16);
|
||||||
|
format!("{} {}, {{ {}, {} }}, {}", op, rd, rn, rn2, rm)
|
||||||
|
}
|
||||||
&Inst::MovToNZCV { rn } => {
|
&Inst::MovToNZCV { rn } => {
|
||||||
let rn = rn.show_rru(mb_rru);
|
let rn = rn.show_rru(mb_rru);
|
||||||
format!("msr nzcv, {}", rn)
|
format!("msr nzcv, {}", rn)
|
||||||
|
|||||||
@@ -142,28 +142,26 @@ pub(crate) fn input_to_shiftimm<C: LowerCtx<I = Inst>>(
|
|||||||
input_to_const(ctx, input).and_then(ShiftOpShiftImm::maybe_from_shift)
|
input_to_const(ctx, input).and_then(ShiftOpShiftImm::maybe_from_shift)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn output_to_const_f128<C: LowerCtx<I = Inst>>(
|
pub(crate) fn const_param_to_u128<C: LowerCtx<I = Inst>>(
|
||||||
ctx: &mut C,
|
ctx: &mut C,
|
||||||
out: InsnOutput,
|
inst: IRInst,
|
||||||
) -> Option<u128> {
|
) -> Option<u128> {
|
||||||
if out.output > 0 {
|
let data = match ctx.data(inst) {
|
||||||
None
|
&InstructionData::Shuffle { mask, .. } => ctx.get_immediate(mask),
|
||||||
} else {
|
|
||||||
let inst_data = ctx.data(out.insn);
|
|
||||||
|
|
||||||
match inst_data {
|
|
||||||
&InstructionData::UnaryConst {
|
&InstructionData::UnaryConst {
|
||||||
opcode: _,
|
constant_handle, ..
|
||||||
constant_handle,
|
} => ctx.get_constant_data(constant_handle),
|
||||||
} => {
|
_ => return None,
|
||||||
|
};
|
||||||
|
let data = data.clone().into_vec();
|
||||||
|
|
||||||
|
if data.len() == 16 {
|
||||||
let mut bytes = [0u8; 16];
|
let mut bytes = [0u8; 16];
|
||||||
let c = ctx.get_constant_data(constant_handle).clone().into_vec();
|
|
||||||
assert_eq!(c.len(), 16);
|
bytes.copy_from_slice(&data);
|
||||||
bytes.copy_from_slice(&c);
|
|
||||||
Some(u128::from_le_bytes(bytes))
|
Some(u128::from_le_bytes(bytes))
|
||||||
}
|
} else {
|
||||||
_ => None,
|
None
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1016,7 +1014,8 @@ pub fn ty_bits(ty: Type) -> usize {
|
|||||||
pub(crate) fn ty_is_int(ty: Type) -> bool {
|
pub(crate) fn ty_is_int(ty: Type) -> bool {
|
||||||
match ty {
|
match ty {
|
||||||
B1 | B8 | I8 | B16 | I16 | B32 | I32 | B64 | I64 | R32 | R64 => true,
|
B1 | B8 | I8 | B16 | I16 | B32 | I32 | B64 | I64 | R32 | R64 => true,
|
||||||
F32 | F64 | B128 | I128 | I8X8 | I8X16 | I16X4 | I16X8 | I32X2 | I32X4 | I64X2 => false,
|
F32 | F64 | B128 | F32X2 | F32X4 | F64X2 | I128 | I8X8 | I8X16 | I16X4 | I16X8 | I32X2
|
||||||
|
| I32X4 | I64X2 => false,
|
||||||
IFLAGS | FFLAGS => panic!("Unexpected flags type"),
|
IFLAGS | FFLAGS => panic!("Unexpected flags type"),
|
||||||
_ => panic!("ty_is_int() on unknown type: {:?}", ty),
|
_ => panic!("ty_is_int() on unknown type: {:?}", ty),
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -141,8 +141,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
let vb = ctx.alloc_tmp(RegClass::V128, I128);
|
let vb = ctx.alloc_tmp(RegClass::V128, I128);
|
||||||
let ra = put_input_in_reg(ctx, inputs[0], narrow_mode);
|
let ra = put_input_in_reg(ctx, inputs[0], narrow_mode);
|
||||||
let rb = put_input_in_reg(ctx, inputs[1], narrow_mode);
|
let rb = put_input_in_reg(ctx, inputs[1], narrow_mode);
|
||||||
ctx.emit(Inst::MovToVec64 { rd: va, rn: ra });
|
ctx.emit(Inst::MovToFpu { rd: va, rn: ra });
|
||||||
ctx.emit(Inst::MovToVec64 { rd: vb, rn: rb });
|
ctx.emit(Inst::MovToFpu { rd: vb, rn: rb });
|
||||||
ctx.emit(Inst::FpuRRR {
|
ctx.emit(Inst::FpuRRR {
|
||||||
fpu_op,
|
fpu_op,
|
||||||
rd: va,
|
rd: va,
|
||||||
@@ -1537,7 +1537,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
}
|
}
|
||||||
(false, true) => {
|
(false, true) => {
|
||||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::ZeroExtend64);
|
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::ZeroExtend64);
|
||||||
ctx.emit(Inst::MovToVec64 { rd, rn });
|
ctx.emit(Inst::MovToFpu { rd, rn });
|
||||||
}
|
}
|
||||||
(true, false) => {
|
(true, false) => {
|
||||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||||
@@ -1789,7 +1789,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
}
|
}
|
||||||
|
|
||||||
Opcode::Vconst => {
|
Opcode::Vconst => {
|
||||||
let value = output_to_const_f128(ctx, outputs[0]).unwrap();
|
let value = const_param_to_u128(ctx, insn).expect("Invalid immediate bytes");
|
||||||
let rd = get_output_reg(ctx, outputs[0]);
|
let rd = get_output_reg(ctx, outputs[0]);
|
||||||
lower_constant_f128(ctx, rd, value);
|
lower_constant_f128(ctx, rd, value);
|
||||||
}
|
}
|
||||||
@@ -1822,6 +1822,34 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Opcode::Insertlane => {
|
||||||
|
let idx = if let InstructionData::TernaryImm8 { imm, .. } = ctx.data(insn) {
|
||||||
|
*imm
|
||||||
|
} else {
|
||||||
|
unreachable!();
|
||||||
|
};
|
||||||
|
let input_ty = ctx.input_ty(insn, 1);
|
||||||
|
let rd = get_output_reg(ctx, outputs[0]);
|
||||||
|
let rm = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||||
|
let rn = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||||
|
let ty = ty.unwrap();
|
||||||
|
let size = VectorSize::from_ty(ty);
|
||||||
|
|
||||||
|
ctx.emit(Inst::gen_move(rd, rm, ty));
|
||||||
|
|
||||||
|
if ty_is_int(input_ty) {
|
||||||
|
ctx.emit(Inst::MovToVec { rd, rn, idx, size });
|
||||||
|
} else {
|
||||||
|
ctx.emit(Inst::VecMovElement {
|
||||||
|
rd,
|
||||||
|
rn,
|
||||||
|
idx1: idx,
|
||||||
|
idx2: 0,
|
||||||
|
size,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
Opcode::Splat => {
|
Opcode::Splat => {
|
||||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||||
let rd = get_output_reg(ctx, outputs[0]);
|
let rd = get_output_reg(ctx, outputs[0]);
|
||||||
@@ -1885,12 +1913,51 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
normalize_bool_result(ctx, insn, rd);
|
normalize_bool_result(ctx, insn, rd);
|
||||||
}
|
}
|
||||||
|
|
||||||
Opcode::Shuffle
|
Opcode::Shuffle => {
|
||||||
| Opcode::Vsplit
|
let mask = const_param_to_u128(ctx, insn).expect("Invalid immediate mask bytes");
|
||||||
|
let rd = get_output_reg(ctx, outputs[0]);
|
||||||
|
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||||
|
let rn2 = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||||
|
// 2 register table vector lookups require consecutive table registers;
|
||||||
|
// we satisfy this constraint by hardcoding the usage of v29 and v30.
|
||||||
|
let temp = writable_vreg(29);
|
||||||
|
let temp2 = writable_vreg(30);
|
||||||
|
let input_ty = ctx.input_ty(insn, 0);
|
||||||
|
assert_eq!(input_ty, ctx.input_ty(insn, 1));
|
||||||
|
// Make sure that both inputs are in virtual registers, since it is
|
||||||
|
// not guaranteed that we can get them safely to the temporaries if
|
||||||
|
// either is in a real register.
|
||||||
|
let rn = ctx.ensure_in_vreg(rn, input_ty);
|
||||||
|
let rn2 = ctx.ensure_in_vreg(rn2, input_ty);
|
||||||
|
|
||||||
|
lower_constant_f128(ctx, rd, mask);
|
||||||
|
ctx.emit(Inst::gen_move(temp, rn, input_ty));
|
||||||
|
ctx.emit(Inst::gen_move(temp2, rn2, input_ty));
|
||||||
|
ctx.emit(Inst::VecTbl2 {
|
||||||
|
rd,
|
||||||
|
rn: temp.to_reg(),
|
||||||
|
rn2: temp2.to_reg(),
|
||||||
|
rm: rd.to_reg(),
|
||||||
|
is_extension: false,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
Opcode::Swizzle => {
|
||||||
|
let rd = get_output_reg(ctx, outputs[0]);
|
||||||
|
let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
|
||||||
|
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||||
|
|
||||||
|
ctx.emit(Inst::VecTbl {
|
||||||
|
rd,
|
||||||
|
rn,
|
||||||
|
rm,
|
||||||
|
is_extension: false,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
Opcode::Vsplit
|
||||||
| Opcode::Vconcat
|
| Opcode::Vconcat
|
||||||
| Opcode::Insertlane
|
|
||||||
| Opcode::ScalarToVector
|
| Opcode::ScalarToVector
|
||||||
| Opcode::Swizzle
|
|
||||||
| Opcode::Uload8x8Complex
|
| Opcode::Uload8x8Complex
|
||||||
| Opcode::Sload8x8Complex
|
| Opcode::Sload8x8Complex
|
||||||
| Opcode::Uload16x4Complex
|
| Opcode::Uload16x4Complex
|
||||||
|
|||||||
@@ -8,8 +8,9 @@ use crate::inst_predicates::{has_side_effect_or_load, is_constant_64bit};
|
|||||||
use crate::ir::instructions::BranchInfo;
|
use crate::ir::instructions::BranchInfo;
|
||||||
use crate::ir::types::I64;
|
use crate::ir::types::I64;
|
||||||
use crate::ir::{
|
use crate::ir::{
|
||||||
ArgumentPurpose, Block, Constant, ConstantData, ExternalName, Function, GlobalValueData, Inst,
|
ArgumentPurpose, Block, Constant, ConstantData, ExternalName, Function, GlobalValueData,
|
||||||
InstructionData, MemFlags, Opcode, Signature, SourceLoc, Type, Value, ValueDef,
|
Immediate, Inst, InstructionData, MemFlags, Opcode, Signature, SourceLoc, Type, Value,
|
||||||
|
ValueDef,
|
||||||
};
|
};
|
||||||
use crate::machinst::{
|
use crate::machinst::{
|
||||||
ABIBody, BlockIndex, BlockLoweringOrder, LoweredBlock, MachLabel, VCode, VCodeBuilder,
|
ABIBody, BlockIndex, BlockLoweringOrder, LoweredBlock, MachLabel, VCode, VCodeBuilder,
|
||||||
@@ -160,6 +161,8 @@ pub trait LowerCtx {
|
|||||||
fn is_reg_needed(&self, ir_inst: Inst, reg: Reg) -> bool;
|
fn is_reg_needed(&self, ir_inst: Inst, reg: Reg) -> bool;
|
||||||
/// Retrieve constant data given a handle.
|
/// Retrieve constant data given a handle.
|
||||||
fn get_constant_data(&self, constant_handle: Constant) -> &ConstantData;
|
fn get_constant_data(&self, constant_handle: Constant) -> &ConstantData;
|
||||||
|
/// Retrieve an immediate given a reference.
|
||||||
|
fn get_immediate(&self, imm: Immediate) -> &ConstantData;
|
||||||
/// Cause the value in `reg` to be in a virtual reg, by copying it into a new virtual reg
|
/// Cause the value in `reg` to be in a virtual reg, by copying it into a new virtual reg
|
||||||
/// if `reg` is a real reg. `ty` describes the type of the value in `reg`.
|
/// if `reg` is a real reg. `ty` describes the type of the value in `reg`.
|
||||||
fn ensure_in_vreg(&mut self, reg: Reg, ty: Type) -> Reg;
|
fn ensure_in_vreg(&mut self, reg: Reg, ty: Type) -> Reg;
|
||||||
@@ -997,6 +1000,10 @@ impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> {
|
|||||||
self.f.dfg.constants.get(constant_handle)
|
self.f.dfg.constants.get(constant_handle)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn get_immediate(&self, imm: Immediate) -> &ConstantData {
|
||||||
|
self.f.dfg.immediates.get(imm).unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
fn ensure_in_vreg(&mut self, reg: Reg, ty: Type) -> Reg {
|
fn ensure_in_vreg(&mut self, reg: Reg, ty: Type) -> Reg {
|
||||||
if reg.is_virtual() {
|
if reg.is_virtual() {
|
||||||
reg
|
reg
|
||||||
|
|||||||
@@ -9,8 +9,8 @@ block0(v0: i64, v1: i64):
|
|||||||
|
|
||||||
; check: stp fp, lr, [sp, #-16]!
|
; check: stp fp, lr, [sp, #-16]!
|
||||||
; nextln: mov fp, sp
|
; nextln: mov fp, sp
|
||||||
; nextln: mov v0.d[0], x0
|
; nextln: fmov d0, x0
|
||||||
; nextln: mov v1.d[0], x1
|
; nextln: fmov d1, x1
|
||||||
; nextln: uqadd d0, d0, d1
|
; nextln: uqadd d0, d0, d1
|
||||||
; nextln: mov x0, v0.d[0]
|
; nextln: mov x0, v0.d[0]
|
||||||
; nextln: mov sp, fp
|
; nextln: mov sp, fp
|
||||||
@@ -27,8 +27,8 @@ block0(v0: i8, v1: i8):
|
|||||||
; nextln: mov fp, sp
|
; nextln: mov fp, sp
|
||||||
; nextln: uxtb x0, w0
|
; nextln: uxtb x0, w0
|
||||||
; nextln: uxtb x1, w1
|
; nextln: uxtb x1, w1
|
||||||
; nextln: mov v0.d[0], x0
|
; nextln: fmov d0, x0
|
||||||
; nextln: mov v1.d[0], x1
|
; nextln: fmov d1, x1
|
||||||
; nextln: uqadd d0, d0, d1
|
; nextln: uqadd d0, d0, d1
|
||||||
; nextln: mov x0, v0.d[0]
|
; nextln: mov x0, v0.d[0]
|
||||||
; nextln: mov sp, fp
|
; nextln: mov sp, fp
|
||||||
|
|||||||
Reference in New Issue
Block a user