Introduce the Cranelift IR instruction LoadSplat
It corresponds to WebAssembly's `load*_splat` operations, which were previously represented as a combination of `Load` and `Splat` instructions. However, there are architectures such as Armv8-A that have a single machine instruction equivalent to the Wasm operations. In order to generate it, it is necessary to merge the `Load` and the `Splat` in the backend, which is not possible because the load may have side effects. The new IR instruction works around this limitation. The AArch64 backend leverages the new instruction to improve code generation. Copyright (c) 2020, Arm Limited.
This commit is contained in:
@@ -396,6 +396,7 @@ fn define_simd(
|
|||||||
let insertlane = insts.by_name("insertlane");
|
let insertlane = insts.by_name("insertlane");
|
||||||
let ishl = insts.by_name("ishl");
|
let ishl = insts.by_name("ishl");
|
||||||
let ishl_imm = insts.by_name("ishl_imm");
|
let ishl_imm = insts.by_name("ishl_imm");
|
||||||
|
let load_splat = insts.by_name("load_splat");
|
||||||
let raw_bitcast = insts.by_name("raw_bitcast");
|
let raw_bitcast = insts.by_name("raw_bitcast");
|
||||||
let scalar_to_vector = insts.by_name("scalar_to_vector");
|
let scalar_to_vector = insts.by_name("scalar_to_vector");
|
||||||
let splat = insts.by_name("splat");
|
let splat = insts.by_name("splat");
|
||||||
@@ -820,6 +821,7 @@ fn define_simd(
|
|||||||
narrow.custom_legalize(fcvt_to_sint_sat, "expand_fcvt_to_sint_sat_vector");
|
narrow.custom_legalize(fcvt_to_sint_sat, "expand_fcvt_to_sint_sat_vector");
|
||||||
narrow.custom_legalize(fmin, "expand_minmax_vector");
|
narrow.custom_legalize(fmin, "expand_minmax_vector");
|
||||||
narrow.custom_legalize(fmax, "expand_minmax_vector");
|
narrow.custom_legalize(fmax, "expand_minmax_vector");
|
||||||
|
narrow.custom_legalize(load_splat, "expand_load_splat");
|
||||||
|
|
||||||
narrow_avx.custom_legalize(imul, "convert_i64x2_imul");
|
narrow_avx.custom_legalize(imul, "convert_i64x2_imul");
|
||||||
narrow_avx.custom_legalize(fcvt_from_uint, "expand_fcvt_from_uint_vector");
|
narrow_avx.custom_legalize(fcvt_from_uint, "expand_fcvt_from_uint_vector");
|
||||||
|
|||||||
@@ -4409,5 +4409,24 @@ pub(crate) fn define(
|
|||||||
.other_side_effects(true),
|
.other_side_effects(true),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
let Offset = &Operand::new("Offset", &imm.offset32).with_doc("Byte offset from base address");
|
||||||
|
let a = &Operand::new("a", TxN);
|
||||||
|
|
||||||
|
ig.push(
|
||||||
|
Inst::new(
|
||||||
|
"load_splat",
|
||||||
|
r#"
|
||||||
|
Load an element from memory at ``p + Offset`` and return a vector
|
||||||
|
whose lanes are all set to that element.
|
||||||
|
|
||||||
|
This is equivalent to ``load`` followed by ``splat``.
|
||||||
|
"#,
|
||||||
|
&formats.load,
|
||||||
|
)
|
||||||
|
.operands_in(vec![MemFlags, p, Offset])
|
||||||
|
.operands_out(vec![a])
|
||||||
|
.can_load(true),
|
||||||
|
);
|
||||||
|
|
||||||
ig.build()
|
ig.build()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -680,4 +680,19 @@ impl VectorSize {
|
|||||||
_ => *self,
|
_ => *self,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return the encoding bits that are used by some SIMD instructions
|
||||||
|
/// for a particular operand size.
|
||||||
|
pub fn enc_size(&self) -> (u32, u32) {
|
||||||
|
let q = self.is_128bits() as u32;
|
||||||
|
let size = match self.lane_size() {
|
||||||
|
ScalarSize::Size8 => 0b00,
|
||||||
|
ScalarSize::Size16 => 0b01,
|
||||||
|
ScalarSize::Size32 => 0b10,
|
||||||
|
ScalarSize::Size64 => 0b11,
|
||||||
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
|
|
||||||
|
(q, size)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -248,6 +248,16 @@ fn enc_ldst_imm19(op_31_24: u32, imm19: u32, rd: Reg) -> u32 {
|
|||||||
(op_31_24 << 24) | (imm19 << 5) | machreg_to_gpr_or_vec(rd)
|
(op_31_24 << 24) | (imm19 << 5) | machreg_to_gpr_or_vec(rd)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn enc_ldst_vec(q: u32, size: u32, rn: Reg, rt: Writable<Reg>) -> u32 {
|
||||||
|
debug_assert_eq!(q & 0b1, q);
|
||||||
|
debug_assert_eq!(size & 0b11, size);
|
||||||
|
0b0_0_0011010_10_00000_110_0_00_00000_00000
|
||||||
|
| q << 30
|
||||||
|
| size << 10
|
||||||
|
| machreg_to_gpr(rn) << 5
|
||||||
|
| machreg_to_vec(rt.to_reg())
|
||||||
|
}
|
||||||
|
|
||||||
fn enc_extend(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
|
fn enc_extend(top22: u32, rd: Writable<Reg>, rn: Reg) -> u32 {
|
||||||
(top22 << 10) | (machreg_to_gpr(rn) << 5) | machreg_to_gpr(rd.to_reg())
|
(top22 << 10) | (machreg_to_gpr(rn) << 5) | machreg_to_gpr(rd.to_reg())
|
||||||
}
|
}
|
||||||
@@ -1380,14 +1390,7 @@ impl MachInstEmit for Inst {
|
|||||||
sink.put4(enc_fpurrrr(top17, rd, rn, rm, ra));
|
sink.put4(enc_fpurrrr(top17, rd, rn, rm, ra));
|
||||||
}
|
}
|
||||||
&Inst::VecMisc { op, rd, rn, size } => {
|
&Inst::VecMisc { op, rd, rn, size } => {
|
||||||
let enc_size = match size.lane_size() {
|
let (q, enc_size) = size.enc_size();
|
||||||
ScalarSize::Size8 => 0b00,
|
|
||||||
ScalarSize::Size16 => 0b01,
|
|
||||||
ScalarSize::Size32 => 0b10,
|
|
||||||
ScalarSize::Size64 => 0b11,
|
|
||||||
_ => unreachable!(),
|
|
||||||
};
|
|
||||||
let q = if size.is_128bits() { 1 } else { 0 };
|
|
||||||
let (u, bits_12_16, size) = match op {
|
let (u, bits_12_16, size) = match op {
|
||||||
VecMisc2::Not => (0b1, 0b00101, 0b00),
|
VecMisc2::Not => (0b1, 0b00101, 0b00),
|
||||||
VecMisc2::Neg => (0b1, 0b01011, enc_size),
|
VecMisc2::Neg => (0b1, 0b01011, enc_size),
|
||||||
@@ -1756,13 +1759,7 @@ impl MachInstEmit for Inst {
|
|||||||
alu_op,
|
alu_op,
|
||||||
size,
|
size,
|
||||||
} => {
|
} => {
|
||||||
let enc_size = match size.lane_size() {
|
let (q, enc_size) = size.enc_size();
|
||||||
ScalarSize::Size8 => 0b00,
|
|
||||||
ScalarSize::Size16 => 0b01,
|
|
||||||
ScalarSize::Size32 => 0b10,
|
|
||||||
ScalarSize::Size64 => 0b11,
|
|
||||||
_ => unreachable!(),
|
|
||||||
};
|
|
||||||
let is_float = match alu_op {
|
let is_float = match alu_op {
|
||||||
VecALUOp::Fcmeq
|
VecALUOp::Fcmeq
|
||||||
| VecALUOp::Fcmgt
|
| VecALUOp::Fcmgt
|
||||||
@@ -1776,6 +1773,7 @@ impl MachInstEmit for Inst {
|
|||||||
_ => false,
|
_ => false,
|
||||||
};
|
};
|
||||||
let enc_float_size = match (is_float, size) {
|
let enc_float_size = match (is_float, size) {
|
||||||
|
(true, VectorSize::Size32x2) => 0b0,
|
||||||
(true, VectorSize::Size32x4) => 0b0,
|
(true, VectorSize::Size32x4) => 0b0,
|
||||||
(true, VectorSize::Size64x2) => 0b1,
|
(true, VectorSize::Size64x2) => 0b1,
|
||||||
(true, _) => unimplemented!(),
|
(true, _) => unimplemented!(),
|
||||||
@@ -1783,58 +1781,73 @@ impl MachInstEmit for Inst {
|
|||||||
};
|
};
|
||||||
|
|
||||||
let (top11, bit15_10) = match alu_op {
|
let (top11, bit15_10) = match alu_op {
|
||||||
VecALUOp::Sqadd => (0b010_01110_00_1 | enc_size << 1, 0b000011),
|
VecALUOp::Sqadd => (0b000_01110_00_1 | enc_size << 1, 0b000011),
|
||||||
VecALUOp::Sqsub => (0b010_01110_00_1 | enc_size << 1, 0b001011),
|
VecALUOp::Sqsub => (0b000_01110_00_1 | enc_size << 1, 0b001011),
|
||||||
VecALUOp::Uqadd => (0b011_01110_00_1 | enc_size << 1, 0b000011),
|
VecALUOp::Uqadd => (0b001_01110_00_1 | enc_size << 1, 0b000011),
|
||||||
VecALUOp::Uqsub => (0b011_01110_00_1 | enc_size << 1, 0b001011),
|
VecALUOp::Uqsub => (0b001_01110_00_1 | enc_size << 1, 0b001011),
|
||||||
VecALUOp::Cmeq => (0b011_01110_00_1 | enc_size << 1, 0b100011),
|
VecALUOp::Cmeq => (0b001_01110_00_1 | enc_size << 1, 0b100011),
|
||||||
VecALUOp::Cmge => (0b010_01110_00_1 | enc_size << 1, 0b001111),
|
VecALUOp::Cmge => (0b000_01110_00_1 | enc_size << 1, 0b001111),
|
||||||
VecALUOp::Cmgt => (0b010_01110_00_1 | enc_size << 1, 0b001101),
|
VecALUOp::Cmgt => (0b000_01110_00_1 | enc_size << 1, 0b001101),
|
||||||
VecALUOp::Cmhi => (0b011_01110_00_1 | enc_size << 1, 0b001101),
|
VecALUOp::Cmhi => (0b001_01110_00_1 | enc_size << 1, 0b001101),
|
||||||
VecALUOp::Cmhs => (0b011_01110_00_1 | enc_size << 1, 0b001111),
|
VecALUOp::Cmhs => (0b001_01110_00_1 | enc_size << 1, 0b001111),
|
||||||
VecALUOp::Fcmeq => (0b010_01110_00_1, 0b111001),
|
VecALUOp::Fcmeq => (0b000_01110_00_1, 0b111001),
|
||||||
VecALUOp::Fcmgt => (0b011_01110_10_1, 0b111001),
|
VecALUOp::Fcmgt => (0b001_01110_10_1, 0b111001),
|
||||||
VecALUOp::Fcmge => (0b011_01110_00_1, 0b111001),
|
VecALUOp::Fcmge => (0b001_01110_00_1, 0b111001),
|
||||||
// The following logical instructions operate on bytes, so are not encoded differently
|
// The following logical instructions operate on bytes, so are not encoded differently
|
||||||
// for the different vector types.
|
// for the different vector types.
|
||||||
VecALUOp::And => (0b010_01110_00_1, 0b000111),
|
VecALUOp::And => (0b000_01110_00_1, 0b000111),
|
||||||
VecALUOp::Bic => (0b010_01110_01_1, 0b000111),
|
VecALUOp::Bic => (0b000_01110_01_1, 0b000111),
|
||||||
VecALUOp::Orr => (0b010_01110_10_1, 0b000111),
|
VecALUOp::Orr => (0b000_01110_10_1, 0b000111),
|
||||||
VecALUOp::Eor => (0b011_01110_00_1, 0b000111),
|
VecALUOp::Eor => (0b001_01110_00_1, 0b000111),
|
||||||
VecALUOp::Bsl => (0b011_01110_01_1, 0b000111),
|
VecALUOp::Bsl => (0b001_01110_01_1, 0b000111),
|
||||||
VecALUOp::Umaxp => (0b011_01110_00_1 | enc_size << 1, 0b101001),
|
VecALUOp::Umaxp => (0b001_01110_00_1 | enc_size << 1, 0b101001),
|
||||||
VecALUOp::Add => (0b010_01110_00_1 | enc_size << 1, 0b100001),
|
VecALUOp::Add => (0b000_01110_00_1 | enc_size << 1, 0b100001),
|
||||||
VecALUOp::Sub => (0b011_01110_00_1 | enc_size << 1, 0b100001),
|
VecALUOp::Sub => (0b001_01110_00_1 | enc_size << 1, 0b100001),
|
||||||
VecALUOp::Mul => {
|
VecALUOp::Mul => {
|
||||||
debug_assert_ne!(size, VectorSize::Size64x2);
|
debug_assert_ne!(size, VectorSize::Size64x2);
|
||||||
(0b010_01110_00_1 | enc_size << 1, 0b100111)
|
(0b000_01110_00_1 | enc_size << 1, 0b100111)
|
||||||
}
|
}
|
||||||
VecALUOp::Sshl => (0b010_01110_00_1 | enc_size << 1, 0b010001),
|
VecALUOp::Sshl => (0b000_01110_00_1 | enc_size << 1, 0b010001),
|
||||||
VecALUOp::Ushl => (0b011_01110_00_1 | enc_size << 1, 0b010001),
|
VecALUOp::Ushl => (0b001_01110_00_1 | enc_size << 1, 0b010001),
|
||||||
VecALUOp::Umin => (0b011_01110_00_1 | enc_size << 1, 0b011011),
|
VecALUOp::Umin => (0b001_01110_00_1 | enc_size << 1, 0b011011),
|
||||||
VecALUOp::Smin => (0b010_01110_00_1 | enc_size << 1, 0b011011),
|
VecALUOp::Smin => (0b000_01110_00_1 | enc_size << 1, 0b011011),
|
||||||
VecALUOp::Umax => (0b011_01110_00_1 | enc_size << 1, 0b011001),
|
VecALUOp::Umax => (0b001_01110_00_1 | enc_size << 1, 0b011001),
|
||||||
VecALUOp::Smax => (0b010_01110_00_1 | enc_size << 1, 0b011001),
|
VecALUOp::Smax => (0b000_01110_00_1 | enc_size << 1, 0b011001),
|
||||||
VecALUOp::Urhadd => (0b011_01110_00_1 | enc_size << 1, 0b000101),
|
VecALUOp::Urhadd => (0b001_01110_00_1 | enc_size << 1, 0b000101),
|
||||||
VecALUOp::Fadd => (0b010_01110_00_1, 0b110101),
|
VecALUOp::Fadd => (0b000_01110_00_1, 0b110101),
|
||||||
VecALUOp::Fsub => (0b010_01110_10_1, 0b110101),
|
VecALUOp::Fsub => (0b000_01110_10_1, 0b110101),
|
||||||
VecALUOp::Fdiv => (0b011_01110_00_1, 0b111111),
|
VecALUOp::Fdiv => (0b001_01110_00_1, 0b111111),
|
||||||
VecALUOp::Fmax => (0b010_01110_00_1, 0b111101),
|
VecALUOp::Fmax => (0b000_01110_00_1, 0b111101),
|
||||||
VecALUOp::Fmin => (0b010_01110_10_1, 0b111101),
|
VecALUOp::Fmin => (0b000_01110_10_1, 0b111101),
|
||||||
VecALUOp::Fmul => (0b011_01110_00_1, 0b110111),
|
VecALUOp::Fmul => (0b001_01110_00_1, 0b110111),
|
||||||
VecALUOp::Addp => (0b010_01110_00_1 | enc_size << 1, 0b101111),
|
VecALUOp::Addp => (0b000_01110_00_1 | enc_size << 1, 0b101111),
|
||||||
VecALUOp::Umlal => {
|
VecALUOp::Umlal => {
|
||||||
debug_assert!(!size.is_128bits());
|
debug_assert!(!size.is_128bits());
|
||||||
(0b001_01110_00_1 | enc_size << 1, 0b100000)
|
(0b001_01110_00_1 | enc_size << 1, 0b100000)
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
let top11 = if is_float {
|
let top11 = if is_float {
|
||||||
top11 | enc_float_size << 1
|
top11 | (q << 9) | enc_float_size << 1
|
||||||
} else {
|
} else {
|
||||||
top11
|
top11 | (q << 9)
|
||||||
};
|
};
|
||||||
sink.put4(enc_vec_rrr(top11, rm, bit15_10, rn, rd));
|
sink.put4(enc_vec_rrr(top11, rm, bit15_10, rn, rd));
|
||||||
}
|
}
|
||||||
|
&Inst::VecLoadReplicate {
|
||||||
|
rd,
|
||||||
|
rn,
|
||||||
|
size,
|
||||||
|
srcloc,
|
||||||
|
} => {
|
||||||
|
let (q, size) = size.enc_size();
|
||||||
|
|
||||||
|
if let Some(srcloc) = srcloc {
|
||||||
|
// Register the offset at which the actual load instruction starts.
|
||||||
|
sink.add_trap(srcloc, TrapCode::HeapOutOfBounds);
|
||||||
|
}
|
||||||
|
|
||||||
|
sink.put4(enc_ldst_vec(q, size, rn, rd));
|
||||||
|
}
|
||||||
&Inst::MovToNZCV { rn } => {
|
&Inst::MovToNZCV { rn } => {
|
||||||
sink.put4(0xd51b4200 | machreg_to_gpr(rn));
|
sink.put4(0xd51b4200 | machreg_to_gpr(rn));
|
||||||
}
|
}
|
||||||
@@ -2119,9 +2132,12 @@ impl MachInstEmit for Inst {
|
|||||||
inst.emit(sink, emit_info, state);
|
inst.emit(sink, emit_info, state);
|
||||||
}
|
}
|
||||||
|
|
||||||
let (reg, offset) = match mem {
|
let (reg, index_reg, offset) = match mem {
|
||||||
AMode::Unscaled(r, simm9) => (r, simm9.value()),
|
AMode::RegExtended(r, idx, extendop) => (r, Some((idx, extendop)), 0),
|
||||||
AMode::UnsignedOffset(r, uimm12scaled) => (r, uimm12scaled.value() as i32),
|
AMode::Unscaled(r, simm9) => (r, None, simm9.value()),
|
||||||
|
AMode::UnsignedOffset(r, uimm12scaled) => {
|
||||||
|
(r, None, uimm12scaled.value() as i32)
|
||||||
|
}
|
||||||
_ => panic!("Unsupported case for LoadAddr: {:?}", mem),
|
_ => panic!("Unsupported case for LoadAddr: {:?}", mem),
|
||||||
};
|
};
|
||||||
let abs_offset = if offset < 0 {
|
let abs_offset = if offset < 0 {
|
||||||
@@ -2135,9 +2151,22 @@ impl MachInstEmit for Inst {
|
|||||||
ALUOp::Add64
|
ALUOp::Add64
|
||||||
};
|
};
|
||||||
|
|
||||||
if offset == 0 {
|
if let Some((idx, extendop)) = index_reg {
|
||||||
let mov = Inst::mov(rd, reg);
|
let add = Inst::AluRRRExtend {
|
||||||
mov.emit(sink, emit_info, state);
|
alu_op: ALUOp::Add64,
|
||||||
|
rd,
|
||||||
|
rn: reg,
|
||||||
|
rm: idx,
|
||||||
|
extendop,
|
||||||
|
};
|
||||||
|
|
||||||
|
add.emit(sink, emit_info, state);
|
||||||
|
} else if offset == 0 {
|
||||||
|
if reg != rd.to_reg() {
|
||||||
|
let mov = Inst::mov(rd, reg);
|
||||||
|
|
||||||
|
mov.emit(sink, emit_info, state);
|
||||||
|
}
|
||||||
} else if let Some(imm12) = Imm12::maybe_from_u64(abs_offset) {
|
} else if let Some(imm12) = Imm12::maybe_from_u64(abs_offset) {
|
||||||
let add = Inst::AluRRImm12 {
|
let add = Inst::AluRRImm12 {
|
||||||
alu_op,
|
alu_op,
|
||||||
|
|||||||
@@ -2533,10 +2533,10 @@ fn test_aarch64_binemit() {
|
|||||||
rd: writable_vreg(28),
|
rd: writable_vreg(28),
|
||||||
rn: vreg(12),
|
rn: vreg(12),
|
||||||
rm: vreg(4),
|
rm: vreg(4),
|
||||||
size: VectorSize::Size32x4,
|
size: VectorSize::Size32x2,
|
||||||
},
|
},
|
||||||
"9CE5244E",
|
"9CE5240E",
|
||||||
"fcmeq v28.4s, v12.4s, v4.4s",
|
"fcmeq v28.2s, v12.2s, v4.2s",
|
||||||
));
|
));
|
||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
@@ -2965,10 +2965,10 @@ fn test_aarch64_binemit() {
|
|||||||
rd: writable_vreg(6),
|
rd: writable_vreg(6),
|
||||||
rn: vreg(9),
|
rn: vreg(9),
|
||||||
rm: vreg(8),
|
rm: vreg(8),
|
||||||
size: VectorSize::Size8x16,
|
size: VectorSize::Size8x8,
|
||||||
},
|
},
|
||||||
"2665286E",
|
"2665282E",
|
||||||
"umax v6.16b, v9.16b, v8.16b",
|
"umax v6.8b, v9.8b, v8.8b",
|
||||||
));
|
));
|
||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
@@ -3507,6 +3507,28 @@ fn test_aarch64_binemit() {
|
|||||||
"tbx v3.16b, { v11.16b, v12.16b }, v19.16b",
|
"tbx v3.16b, { v11.16b, v12.16b }, v19.16b",
|
||||||
));
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecLoadReplicate {
|
||||||
|
rd: writable_vreg(31),
|
||||||
|
rn: xreg(0),
|
||||||
|
srcloc: None,
|
||||||
|
size: VectorSize::Size64x2,
|
||||||
|
},
|
||||||
|
"1FCC404D",
|
||||||
|
"ld1r { v31.2d }, [x0]",
|
||||||
|
));
|
||||||
|
|
||||||
|
insns.push((
|
||||||
|
Inst::VecLoadReplicate {
|
||||||
|
rd: writable_vreg(0),
|
||||||
|
rn: xreg(25),
|
||||||
|
srcloc: None,
|
||||||
|
size: VectorSize::Size8x8,
|
||||||
|
},
|
||||||
|
"20C3400D",
|
||||||
|
"ld1r { v0.8b }, [x25]",
|
||||||
|
));
|
||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::Extend {
|
Inst::Extend {
|
||||||
rd: writable_xreg(1),
|
rd: writable_xreg(1),
|
||||||
|
|||||||
@@ -975,6 +975,14 @@ pub enum Inst {
|
|||||||
is_extension: bool,
|
is_extension: bool,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
/// Load an element and replicate to all lanes of a vector.
|
||||||
|
VecLoadReplicate {
|
||||||
|
rd: Writable<Reg>,
|
||||||
|
rn: Reg,
|
||||||
|
size: VectorSize,
|
||||||
|
srcloc: Option<SourceLoc>,
|
||||||
|
},
|
||||||
|
|
||||||
/// Move to the NZCV flags (actually a `MSR NZCV, Xn` insn).
|
/// Move to the NZCV flags (actually a `MSR NZCV, Xn` insn).
|
||||||
MovToNZCV {
|
MovToNZCV {
|
||||||
rn: Reg,
|
rn: Reg,
|
||||||
@@ -1609,7 +1617,10 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
|||||||
collector.add_def(rd);
|
collector.add_def(rd);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
&Inst::VecLoadReplicate { rd, rn, .. } => {
|
||||||
|
collector.add_def(rd);
|
||||||
|
collector.add_use(rn);
|
||||||
|
}
|
||||||
&Inst::FpuCmp32 { rn, rm } | &Inst::FpuCmp64 { rn, rm } => {
|
&Inst::FpuCmp32 { rn, rm } | &Inst::FpuCmp64 { rn, rm } => {
|
||||||
collector.add_use(rn);
|
collector.add_use(rn);
|
||||||
collector.add_use(rm);
|
collector.add_use(rm);
|
||||||
@@ -1762,8 +1773,9 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
|
|||||||
&Inst::LoadExtName { rd, .. } => {
|
&Inst::LoadExtName { rd, .. } => {
|
||||||
collector.add_def(rd);
|
collector.add_def(rd);
|
||||||
}
|
}
|
||||||
&Inst::LoadAddr { rd, mem: _ } => {
|
&Inst::LoadAddr { rd, ref mem } => {
|
||||||
collector.add_def(rd);
|
collector.add_def(rd);
|
||||||
|
memarg_regs(mem, collector);
|
||||||
}
|
}
|
||||||
&Inst::VirtualSPOffsetAdj { .. } => {}
|
&Inst::VirtualSPOffsetAdj { .. } => {}
|
||||||
&Inst::EmitIsland { .. } => {}
|
&Inst::EmitIsland { .. } => {}
|
||||||
@@ -2189,6 +2201,14 @@ fn aarch64_map_regs<RUM: RegUsageMapper>(inst: &mut Inst, mapper: &RUM) {
|
|||||||
map_def(mapper, rd);
|
map_def(mapper, rd);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
&mut Inst::VecLoadReplicate {
|
||||||
|
ref mut rd,
|
||||||
|
ref mut rn,
|
||||||
|
..
|
||||||
|
} => {
|
||||||
|
map_def(mapper, rd);
|
||||||
|
map_use(mapper, rn);
|
||||||
|
}
|
||||||
&mut Inst::FpuCmp32 {
|
&mut Inst::FpuCmp32 {
|
||||||
ref mut rn,
|
ref mut rn,
|
||||||
ref mut rm,
|
ref mut rm,
|
||||||
@@ -3412,6 +3432,12 @@ impl Inst {
|
|||||||
let rm = show_vreg_vector(rm, mb_rru, VectorSize::Size8x16);
|
let rm = show_vreg_vector(rm, mb_rru, VectorSize::Size8x16);
|
||||||
format!("{} {}, {{ {}, {} }}, {}", op, rd, rn, rn2, rm)
|
format!("{} {}, {{ {}, {} }}, {}", op, rd, rn, rn2, rm)
|
||||||
}
|
}
|
||||||
|
&Inst::VecLoadReplicate { rd, rn, size, .. } => {
|
||||||
|
let rd = show_vreg_vector(rd.to_reg(), mb_rru, size);
|
||||||
|
let rn = rn.show_rru(mb_rru);
|
||||||
|
|
||||||
|
format!("ld1r {{ {} }}, [{}]", rd, rn)
|
||||||
|
}
|
||||||
&Inst::MovToNZCV { rn } => {
|
&Inst::MovToNZCV { rn } => {
|
||||||
let rn = rn.show_rru(mb_rru);
|
let rn = rn.show_rru(mb_rru);
|
||||||
format!("msr nzcv, {}", rn)
|
format!("msr nzcv, {}", rn)
|
||||||
|
|||||||
@@ -1197,6 +1197,29 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Opcode::LoadSplat => {
|
||||||
|
let off = ctx.data(insn).load_store_offset().unwrap();
|
||||||
|
let ty = ty.unwrap();
|
||||||
|
let mem = lower_address(ctx, ty.lane_type(), &inputs[..], off);
|
||||||
|
let memflags = ctx.memflags(insn).expect("memory flags");
|
||||||
|
let rd = get_output_reg(ctx, outputs[0]);
|
||||||
|
let size = VectorSize::from_ty(ty);
|
||||||
|
let srcloc = if memflags.notrap() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(ctx.srcloc(insn))
|
||||||
|
};
|
||||||
|
let tmp = ctx.alloc_tmp(RegClass::I64, I64);
|
||||||
|
|
||||||
|
ctx.emit(Inst::LoadAddr { rd: tmp, mem });
|
||||||
|
ctx.emit(Inst::VecLoadReplicate {
|
||||||
|
rd,
|
||||||
|
rn: tmp.to_reg(),
|
||||||
|
size,
|
||||||
|
srcloc,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
Opcode::Store
|
Opcode::Store
|
||||||
| Opcode::Istore8
|
| Opcode::Istore8
|
||||||
| Opcode::Istore16
|
| Opcode::Istore16
|
||||||
|
|||||||
@@ -1892,3 +1892,31 @@ fn expand_tls_value(
|
|||||||
unreachable!();
|
unreachable!();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn expand_load_splat(
|
||||||
|
inst: ir::Inst,
|
||||||
|
func: &mut ir::Function,
|
||||||
|
_cfg: &mut ControlFlowGraph,
|
||||||
|
_isa: &dyn TargetIsa,
|
||||||
|
) {
|
||||||
|
let mut pos = FuncCursor::new(func).at_inst(inst);
|
||||||
|
|
||||||
|
pos.use_srcloc(inst);
|
||||||
|
|
||||||
|
let (ptr, offset, flags) = match pos.func.dfg[inst] {
|
||||||
|
ir::InstructionData::Load {
|
||||||
|
opcode: ir::Opcode::LoadSplat,
|
||||||
|
arg,
|
||||||
|
offset,
|
||||||
|
flags,
|
||||||
|
} => (arg, offset, flags),
|
||||||
|
_ => panic!(
|
||||||
|
"Expected load_splat: {}",
|
||||||
|
pos.func.dfg.display_inst(inst, None)
|
||||||
|
),
|
||||||
|
};
|
||||||
|
let ty = pos.func.dfg.ctrl_typevar(inst);
|
||||||
|
let load = pos.ins().load(ty.lane_type(), flags, ptr, offset);
|
||||||
|
|
||||||
|
pos.func.dfg.replace(inst).splat(ty, load);
|
||||||
|
}
|
||||||
|
|||||||
@@ -1380,19 +1380,17 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
|
|||||||
| Operator::V128Load16Splat { memarg }
|
| Operator::V128Load16Splat { memarg }
|
||||||
| Operator::V128Load32Splat { memarg }
|
| Operator::V128Load32Splat { memarg }
|
||||||
| Operator::V128Load64Splat { memarg } => {
|
| Operator::V128Load64Splat { memarg } => {
|
||||||
// TODO: For spec compliance, this is initially implemented as a combination of `load +
|
let opcode = ir::Opcode::LoadSplat;
|
||||||
// splat` but could be implemented eventually as a single instruction (`load_splat`).
|
let result_ty = type_of(op);
|
||||||
// See https://github.com/bytecodealliance/wasmtime/issues/1175.
|
let (flags, base, offset) = prepare_load(
|
||||||
translate_load(
|
|
||||||
memarg,
|
memarg,
|
||||||
ir::Opcode::Load,
|
mem_op_size(opcode, result_ty.lane_type()),
|
||||||
type_of(op).lane_type(),
|
|
||||||
builder,
|
builder,
|
||||||
state,
|
state,
|
||||||
environ,
|
environ,
|
||||||
)?;
|
)?;
|
||||||
let splatted = builder.ins().splat(type_of(op), state.pop1());
|
let (load, dfg) = builder.ins().Load(opcode, result_ty, flags, offset, base);
|
||||||
state.push1(splatted)
|
state.push1(dfg.first_result(load))
|
||||||
}
|
}
|
||||||
Operator::I8x16ExtractLaneS { lane } | Operator::I16x8ExtractLaneS { lane } => {
|
Operator::I8x16ExtractLaneS { lane } | Operator::I16x8ExtractLaneS { lane } => {
|
||||||
let vector = pop1_with_bitcast(state, type_of(op), builder);
|
let vector = pop1_with_bitcast(state, type_of(op), builder);
|
||||||
@@ -2040,7 +2038,7 @@ fn mem_op_size(opcode: ir::Opcode, ty: Type) -> u32 {
|
|||||||
ir::Opcode::Istore8 | ir::Opcode::Sload8 | ir::Opcode::Uload8 => 1,
|
ir::Opcode::Istore8 | ir::Opcode::Sload8 | ir::Opcode::Uload8 => 1,
|
||||||
ir::Opcode::Istore16 | ir::Opcode::Sload16 | ir::Opcode::Uload16 => 2,
|
ir::Opcode::Istore16 | ir::Opcode::Sload16 | ir::Opcode::Uload16 => 2,
|
||||||
ir::Opcode::Istore32 | ir::Opcode::Sload32 | ir::Opcode::Uload32 => 4,
|
ir::Opcode::Istore32 | ir::Opcode::Sload32 | ir::Opcode::Uload32 => 4,
|
||||||
ir::Opcode::Store | ir::Opcode::Load => ty.bytes(),
|
ir::Opcode::Store | ir::Opcode::Load | ir::Opcode::LoadSplat => ty.bytes(),
|
||||||
_ => panic!("unknown size of mem op for {:?}", opcode),
|
_ => panic!("unknown size of mem op for {:?}", opcode),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user