x64: Add most remaining AVX lowerings (#5819)
* x64: Add most remaining AVX lowerings This commit goes through `inst.isle` and adds a corresponding AVX lowering for most SSE lowerings. I opted to skip instructions where the SSE lowering didn't read/modify a register, such as `roundps`. I think that AVX will benefit these instructions when there's load-merging since AVX doesn't require alignment, but I've deferred that work to a future PR. Otherwise though in this PR I think all (or almost all) of the 3-operand forms of AVX instructions are supported with their SSE counterparts. This should ideally improve codegen slightly by removing register pressure and the need for `movdqa` between registers. I've attempted to ensure that there's at least one codegen test for all the new instructions. As a side note, the recent capstone integration into `precise-output` tests helped me catch a number of encoding bugs much earlier than otherwise, so I've found that incredibly useful in tests! * Move `vpinsr*` instructions to their own variant Use true `XmmMem` and `GprMem` types in the instruction as well to get more type-level safety for what goes where. * Remove `Inst::produces_const` accessor Instead of conditionally defining regalloc and various other operations instead add dedicated `MInst` variants for operations which are intended to produce a constant to have more clear interactions with regalloc and printing and such. * Fix tests * Register traps in `MachBuffer` for load-folding ops This adds a missing `add_trap` to encoding of VEX instructions with memory operands to ensure that if they cause a segfault that there's appropriate metadata for Wasmtime to understand that the instruction could in fact trap. This fixes a fuzz test case found locally where v8 trapped and Wasmtime didn't catch the signal and crashed the fuzzer.
This commit is contained in:
@@ -122,7 +122,8 @@ impl Inst {
|
||||
| Inst::MachOTlsGetAddr { .. }
|
||||
| Inst::CoffTlsGetAddr { .. }
|
||||
| Inst::Unwind { .. }
|
||||
| Inst::DummyUse { .. } => smallvec![],
|
||||
| Inst::DummyUse { .. }
|
||||
| Inst::AluConstOp { .. } => smallvec![],
|
||||
|
||||
Inst::AluRmRVex { op, .. } => op.available_from(),
|
||||
Inst::UnaryRmR { op, .. } => op.available_from(),
|
||||
@@ -136,7 +137,8 @@ impl Inst {
|
||||
| Inst::XmmRmRImm { op, .. }
|
||||
| Inst::XmmToGpr { op, .. }
|
||||
| Inst::XmmUnaryRmRImm { op, .. }
|
||||
| Inst::XmmUnaryRmR { op, .. } => smallvec![op.available_from()],
|
||||
| Inst::XmmUnaryRmR { op, .. }
|
||||
| Inst::XmmConstOp { op, .. } => smallvec![op.available_from()],
|
||||
|
||||
Inst::XmmUnaryRmREvex { op, .. }
|
||||
| Inst::XmmRmREvex { op, .. }
|
||||
@@ -144,7 +146,9 @@ impl Inst {
|
||||
|
||||
Inst::XmmRmiRVex { op, .. }
|
||||
| Inst::XmmRmRVex3 { op, .. }
|
||||
| Inst::XmmRmRImmVex { op, .. } => op.available_from(),
|
||||
| Inst::XmmRmRImmVex { op, .. }
|
||||
| Inst::XmmRmRBlendVex { op, .. }
|
||||
| Inst::XmmVexPinsr { op, .. } => op.available_from(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -622,40 +626,6 @@ impl Inst {
|
||||
}
|
||||
}
|
||||
|
||||
// Inst helpers.
|
||||
|
||||
impl Inst {
|
||||
/// In certain cases, instructions of this format can act as a definition of an XMM register,
|
||||
/// producing a value that is independent of its initial value.
|
||||
///
|
||||
/// For example, a vector equality comparison (`cmppd` or `cmpps`) that compares a register to
|
||||
/// itself will generate all ones as a result, regardless of its value. From the register
|
||||
/// allocator's point of view, we should (i) record the first register, which is normally a
|
||||
/// mod, as a def instead; and (ii) not record the second register as a use, because it is the
|
||||
/// same as the first register (already handled).
|
||||
fn produces_const(&self) -> bool {
|
||||
match self {
|
||||
Self::AluRmiR { op, src1, src2, .. } => {
|
||||
src2.clone().to_reg_mem_imm().to_reg() == Some(src1.to_reg())
|
||||
&& (*op == AluRmiROpcode::Xor || *op == AluRmiROpcode::Sub)
|
||||
}
|
||||
|
||||
Self::XmmRmR { op, src1, src2, .. } => {
|
||||
src2.clone().to_reg_mem().to_reg() == Some(src1.to_reg())
|
||||
&& (*op == SseOpcode::Xorps
|
||||
|| *op == SseOpcode::Xorpd
|
||||
|| *op == SseOpcode::Pxor
|
||||
|| *op == SseOpcode::Pcmpeqb
|
||||
|| *op == SseOpcode::Pcmpeqw
|
||||
|| *op == SseOpcode::Pcmpeqd
|
||||
|| *op == SseOpcode::Pcmpeqq)
|
||||
}
|
||||
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
// Instructions: printing
|
||||
|
||||
@@ -705,16 +675,6 @@ impl PrettyPrint for Inst {
|
||||
match self {
|
||||
Inst::Nop { len } => format!("{} len={}", ljustify("nop".to_string()), len),
|
||||
|
||||
Inst::AluRmiR { size, op, dst, .. } if self.produces_const() => {
|
||||
let dst = pretty_print_reg(dst.to_reg().to_reg(), size.to_bytes(), allocs);
|
||||
format!(
|
||||
"{} {}, {}, {}",
|
||||
ljustify2(op.to_string(), suffix_lqb(*size)),
|
||||
dst,
|
||||
dst,
|
||||
dst
|
||||
)
|
||||
}
|
||||
Inst::AluRmiR {
|
||||
size,
|
||||
op,
|
||||
@@ -734,6 +694,14 @@ impl PrettyPrint for Inst {
|
||||
dst
|
||||
)
|
||||
}
|
||||
Inst::AluConstOp { op, dst, size } => {
|
||||
let size_bytes = size.to_bytes();
|
||||
let dst = pretty_print_reg(dst.to_reg().to_reg(), size_bytes, allocs);
|
||||
format!(
|
||||
"{} {dst}, {dst}, {dst}",
|
||||
ljustify2(op.to_string(), suffix_lqb(*size)),
|
||||
)
|
||||
}
|
||||
Inst::AluRM {
|
||||
size,
|
||||
op,
|
||||
@@ -945,11 +913,6 @@ impl PrettyPrint for Inst {
|
||||
format!("{} {}, {}", ljustify(op.to_string()), src, dst)
|
||||
}
|
||||
|
||||
Inst::XmmRmR { op, dst, .. } if self.produces_const() => {
|
||||
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
|
||||
format!("{} {}, {}, {}", ljustify(op.to_string()), dst, dst, dst)
|
||||
}
|
||||
|
||||
Inst::XmmRmR {
|
||||
op,
|
||||
src1,
|
||||
@@ -963,6 +926,11 @@ impl PrettyPrint for Inst {
|
||||
format!("{} {}, {}, {}", ljustify(op.to_string()), src1, src2, dst)
|
||||
}
|
||||
|
||||
Inst::XmmConstOp { op, dst } => {
|
||||
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
|
||||
format!("{} {dst}, {dst}, {dst}", ljustify(op.to_string()))
|
||||
}
|
||||
|
||||
Inst::XmmRmRBlend {
|
||||
op,
|
||||
src1,
|
||||
@@ -1016,13 +984,22 @@ impl PrettyPrint for Inst {
|
||||
let src1 = pretty_print_reg(src1.to_reg(), 8, allocs);
|
||||
let src2 = src2.pretty_print(8, allocs);
|
||||
|
||||
format!(
|
||||
"{} ${imm} {}, {}, {}",
|
||||
ljustify(op.to_string()),
|
||||
src1,
|
||||
src2,
|
||||
dst
|
||||
)
|
||||
format!("{} ${imm} {src1}, {src2}, {dst}", ljustify(op.to_string()))
|
||||
}
|
||||
|
||||
Inst::XmmVexPinsr {
|
||||
op,
|
||||
src1,
|
||||
src2,
|
||||
dst,
|
||||
imm,
|
||||
..
|
||||
} => {
|
||||
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
|
||||
let src1 = pretty_print_reg(src1.to_reg(), 8, allocs);
|
||||
let src2 = src2.pretty_print(8, allocs);
|
||||
|
||||
format!("{} ${imm} {src1}, {src2}, {dst}", ljustify(op.to_string()))
|
||||
}
|
||||
|
||||
Inst::XmmRmRVex3 {
|
||||
@@ -1048,6 +1025,22 @@ impl PrettyPrint for Inst {
|
||||
)
|
||||
}
|
||||
|
||||
Inst::XmmRmRBlendVex {
|
||||
op,
|
||||
src1,
|
||||
src2,
|
||||
mask,
|
||||
dst,
|
||||
..
|
||||
} => {
|
||||
let src1 = pretty_print_reg(src1.to_reg(), 8, allocs);
|
||||
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
|
||||
let src2 = src2.pretty_print(8, allocs);
|
||||
let mask = pretty_print_reg(mask.to_reg(), 8, allocs);
|
||||
|
||||
format!("{} {src1}, {src2}, {mask}, {dst}", ljustify(op.to_string()))
|
||||
}
|
||||
|
||||
Inst::XmmRmREvex {
|
||||
op,
|
||||
src1,
|
||||
@@ -1109,28 +1102,6 @@ impl PrettyPrint for Inst {
|
||||
)
|
||||
}
|
||||
|
||||
Inst::XmmRmRImm {
|
||||
op, dst, imm, size, ..
|
||||
} if self.produces_const() => {
|
||||
let dst = pretty_print_reg(dst.to_reg(), 8, allocs);
|
||||
format!(
|
||||
"{} ${}, {}, {}, {}",
|
||||
ljustify(format!(
|
||||
"{}{}",
|
||||
op.to_string(),
|
||||
if *size == OperandSize::Size64 {
|
||||
".w"
|
||||
} else {
|
||||
""
|
||||
}
|
||||
)),
|
||||
imm,
|
||||
dst,
|
||||
dst,
|
||||
dst,
|
||||
)
|
||||
}
|
||||
|
||||
Inst::XmmRmRImm {
|
||||
op,
|
||||
src1,
|
||||
@@ -1799,14 +1770,11 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
|
||||
Inst::AluRmiR {
|
||||
src1, src2, dst, ..
|
||||
} => {
|
||||
if inst.produces_const() {
|
||||
collector.reg_def(dst.to_writable_reg());
|
||||
} else {
|
||||
collector.reg_use(src1.to_reg());
|
||||
collector.reg_reuse_def(dst.to_writable_reg(), 0);
|
||||
src2.get_operands(collector);
|
||||
}
|
||||
collector.reg_use(src1.to_reg());
|
||||
collector.reg_reuse_def(dst.to_writable_reg(), 0);
|
||||
src2.get_operands(collector);
|
||||
}
|
||||
Inst::AluConstOp { dst, .. } => collector.reg_def(dst.to_writable_reg()),
|
||||
Inst::AluRM { src1_dst, src2, .. } => {
|
||||
collector.reg_use(src2.to_reg());
|
||||
src1_dst.get_operands(collector);
|
||||
@@ -1904,13 +1872,9 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
|
||||
Inst::XmmRmR {
|
||||
src1, src2, dst, ..
|
||||
} => {
|
||||
if inst.produces_const() {
|
||||
collector.reg_def(dst.to_writable_reg());
|
||||
} else {
|
||||
collector.reg_use(src1.to_reg());
|
||||
collector.reg_reuse_def(dst.to_writable_reg(), 0);
|
||||
src2.get_operands(collector);
|
||||
}
|
||||
collector.reg_use(src1.to_reg());
|
||||
collector.reg_reuse_def(dst.to_writable_reg(), 0);
|
||||
src2.get_operands(collector);
|
||||
}
|
||||
Inst::XmmRmRBlend {
|
||||
src1,
|
||||
@@ -1943,6 +1907,13 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
|
||||
collector.reg_use(src1.to_reg());
|
||||
src2.get_operands(collector);
|
||||
}
|
||||
Inst::XmmVexPinsr {
|
||||
src1, src2, dst, ..
|
||||
} => {
|
||||
collector.reg_def(dst.to_writable_reg());
|
||||
collector.reg_use(src1.to_reg());
|
||||
src2.get_operands(collector);
|
||||
}
|
||||
Inst::XmmRmRVex3 {
|
||||
op,
|
||||
src1,
|
||||
@@ -1966,6 +1937,18 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
|
||||
collector.reg_use(src2.to_reg());
|
||||
src3.get_operands(collector);
|
||||
}
|
||||
Inst::XmmRmRBlendVex {
|
||||
src1,
|
||||
src2,
|
||||
mask,
|
||||
dst,
|
||||
..
|
||||
} => {
|
||||
collector.reg_def(dst.to_writable_reg());
|
||||
collector.reg_use(src1.to_reg());
|
||||
src2.get_operands(collector);
|
||||
collector.reg_use(mask.to_reg());
|
||||
}
|
||||
Inst::XmmRmREvex {
|
||||
op,
|
||||
src1,
|
||||
@@ -1999,9 +1982,7 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
|
||||
dst,
|
||||
..
|
||||
} => {
|
||||
if inst.produces_const() {
|
||||
collector.reg_def(*dst);
|
||||
} else if !op.uses_src1() {
|
||||
if !op.uses_src1() {
|
||||
// FIXME: split this instruction into two, so we don't
|
||||
// need this awkward src1-is-only-sometimes-an-arg
|
||||
// behavior.
|
||||
@@ -2013,6 +1994,9 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
|
||||
src2.get_operands(collector);
|
||||
}
|
||||
}
|
||||
Inst::XmmConstOp { dst, .. } => {
|
||||
collector.reg_def(dst.to_writable_reg());
|
||||
}
|
||||
Inst::XmmUninitializedValue { dst } => collector.reg_def(dst.to_writable_reg()),
|
||||
Inst::XmmMinMaxSeq { lhs, rhs, dst, .. } => {
|
||||
collector.reg_use(rhs.to_reg());
|
||||
|
||||
Reference in New Issue
Block a user