x64: Add most remaining AVX lowerings (#5819)

* x64: Add most remaining AVX lowerings

This commit goes through `inst.isle` and adds a corresponding AVX
lowering for most SSE lowerings. I opted to skip instructions where the
SSE lowering didn't read/modify a register, such as `roundps`. I think
that AVX will benefit these instructions when there's load-merging since
AVX doesn't require alignment, but I've deferred that work to a future
PR.

Otherwise though in this PR I think all (or almost all) of the 3-operand
forms of AVX instructions are supported with their SSE counterparts.
This should ideally improve codegen slightly by removing register
pressure and the need for `movdqa` between registers. I've attempted to
ensure that there's at least one codegen test for all the new instructions.

As a side note, the recent capstone integration into `precise-output`
tests helped me catch a number of encoding bugs much earlier than
otherwise, so I've found that incredibly useful in tests!

* Move `vpinsr*` instructions to their own variant

Use true `XmmMem` and `GprMem` types in the instruction as well to get
more type-level safety for what goes where.

* Remove `Inst::produces_const` accessor

Instead of conditionally defining regalloc and various other operations
instead add dedicated `MInst` variants for operations which are intended
to produce a constant to have more clear interactions with regalloc and
printing and such.

* Fix tests

* Register traps in `MachBuffer` for load-folding ops

This adds a missing `add_trap` to encoding of VEX instructions with
memory operands to ensure that if they cause a segfault that there's
appropriate metadata for Wasmtime to understand that the instruction
could in fact trap. This fixes a fuzz test case found locally where v8
trapped and Wasmtime didn't catch the signal and crashed the fuzzer.
This commit is contained in:
Alex Crichton
2023-02-20 09:11:52 -06:00
committed by GitHub
parent ad128b6811
commit c26a65a854
16 changed files with 4145 additions and 466 deletions

View File

@@ -122,7 +122,8 @@ impl Inst {
| Inst::MachOTlsGetAddr { .. }
| Inst::CoffTlsGetAddr { .. }
| Inst::Unwind { .. }
| Inst::DummyUse { .. } => smallvec![],
| Inst::DummyUse { .. }
| Inst::AluConstOp { .. } => smallvec![],
Inst::AluRmRVex { op, .. } => op.available_from(),
Inst::UnaryRmR { op, .. } => op.available_from(),
@@ -136,7 +137,8 @@ impl Inst {
| Inst::XmmRmRImm { op, .. }
| Inst::XmmToGpr { op, .. }
| Inst::XmmUnaryRmRImm { op, .. }
| Inst::XmmUnaryRmR { op, .. } => smallvec![op.available_from()],
| Inst::XmmUnaryRmR { op, .. }
| Inst::XmmConstOp { op, .. } => smallvec![op.available_from()],
Inst::XmmUnaryRmREvex { op, .. }
| Inst::XmmRmREvex { op, .. }
@@ -144,7 +146,9 @@ impl Inst {
Inst::XmmRmiRVex { op, .. }
| Inst::XmmRmRVex3 { op, .. }
| Inst::XmmRmRImmVex { op, .. } => op.available_from(),
| Inst::XmmRmRImmVex { op, .. }
| Inst::XmmRmRBlendVex { op, .. }
| Inst::XmmVexPinsr { op, .. } => op.available_from(),
}
}
}
@@ -622,40 +626,6 @@ impl Inst {
}
}
// Inst helpers.
impl Inst {
/// In certain cases, instructions of this format can act as a definition of an XMM register,
/// producing a value that is independent of its initial value.
///
/// For example, a vector equality comparison (`cmppd` or `cmpps`) that compares a register to
/// itself will generate all ones as a result, regardless of its value. From the register
/// allocator's point of view, we should (i) record the first register, which is normally a
/// mod, as a def instead; and (ii) not record the second register as a use, because it is the
/// same as the first register (already handled).
fn produces_const(&self) -> bool {
match self {
Self::AluRmiR { op, src1, src2, .. } => {
src2.clone().to_reg_mem_imm().to_reg() == Some(src1.to_reg())
&& (*op == AluRmiROpcode::Xor || *op == AluRmiROpcode::Sub)
}
Self::XmmRmR { op, src1, src2, .. } => {
src2.clone().to_reg_mem().to_reg() == Some(src1.to_reg())
&& (*op == SseOpcode::Xorps
|| *op == SseOpcode::Xorpd
|| *op == SseOpcode::Pxor
|| *op == SseOpcode::Pcmpeqb
|| *op == SseOpcode::Pcmpeqw
|| *op == SseOpcode::Pcmpeqd
|| *op == SseOpcode::Pcmpeqq)
}
_ => false,
}
}
}
//=============================================================================
// Instructions: printing
@@ -705,16 +675,6 @@ impl PrettyPrint for Inst {
match self {
Inst::Nop { len } => format!("{} len={}", ljustify("nop".to_string()), len),
Inst::AluRmiR { size, op, dst, .. } if self.produces_const() => {
let dst = pretty_print_reg(dst.to_reg().to_reg(), size.to_bytes(), allocs);
format!(
"{} {}, {}, {}",
ljustify2(op.to_string(), suffix_lqb(*size)),
dst,
dst,
dst
)
}
Inst::AluRmiR {
size,
op,
@@ -734,6 +694,14 @@ impl PrettyPrint for Inst {
dst
)
}
Inst::AluConstOp { op, dst, size } => {
let size_bytes = size.to_bytes();
let dst = pretty_print_reg(dst.to_reg().to_reg(), size_bytes, allocs);
format!(
"{} {dst}, {dst}, {dst}",
ljustify2(op.to_string(), suffix_lqb(*size)),
)
}
Inst::AluRM {
size,
op,
@@ -945,11 +913,6 @@ impl PrettyPrint for Inst {
format!("{} {}, {}", ljustify(op.to_string()), src, dst)
}
Inst::XmmRmR { op, dst, .. } if self.produces_const() => {
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
format!("{} {}, {}, {}", ljustify(op.to_string()), dst, dst, dst)
}
Inst::XmmRmR {
op,
src1,
@@ -963,6 +926,11 @@ impl PrettyPrint for Inst {
format!("{} {}, {}, {}", ljustify(op.to_string()), src1, src2, dst)
}
Inst::XmmConstOp { op, dst } => {
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
format!("{} {dst}, {dst}, {dst}", ljustify(op.to_string()))
}
Inst::XmmRmRBlend {
op,
src1,
@@ -1016,13 +984,22 @@ impl PrettyPrint for Inst {
let src1 = pretty_print_reg(src1.to_reg(), 8, allocs);
let src2 = src2.pretty_print(8, allocs);
format!(
"{} ${imm} {}, {}, {}",
ljustify(op.to_string()),
src1,
src2,
dst
)
format!("{} ${imm} {src1}, {src2}, {dst}", ljustify(op.to_string()))
}
Inst::XmmVexPinsr {
op,
src1,
src2,
dst,
imm,
..
} => {
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
let src1 = pretty_print_reg(src1.to_reg(), 8, allocs);
let src2 = src2.pretty_print(8, allocs);
format!("{} ${imm} {src1}, {src2}, {dst}", ljustify(op.to_string()))
}
Inst::XmmRmRVex3 {
@@ -1048,6 +1025,22 @@ impl PrettyPrint for Inst {
)
}
Inst::XmmRmRBlendVex {
op,
src1,
src2,
mask,
dst,
..
} => {
let src1 = pretty_print_reg(src1.to_reg(), 8, allocs);
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
let src2 = src2.pretty_print(8, allocs);
let mask = pretty_print_reg(mask.to_reg(), 8, allocs);
format!("{} {src1}, {src2}, {mask}, {dst}", ljustify(op.to_string()))
}
Inst::XmmRmREvex {
op,
src1,
@@ -1109,28 +1102,6 @@ impl PrettyPrint for Inst {
)
}
Inst::XmmRmRImm {
op, dst, imm, size, ..
} if self.produces_const() => {
let dst = pretty_print_reg(dst.to_reg(), 8, allocs);
format!(
"{} ${}, {}, {}, {}",
ljustify(format!(
"{}{}",
op.to_string(),
if *size == OperandSize::Size64 {
".w"
} else {
""
}
)),
imm,
dst,
dst,
dst,
)
}
Inst::XmmRmRImm {
op,
src1,
@@ -1799,14 +1770,11 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
Inst::AluRmiR {
src1, src2, dst, ..
} => {
if inst.produces_const() {
collector.reg_def(dst.to_writable_reg());
} else {
collector.reg_use(src1.to_reg());
collector.reg_reuse_def(dst.to_writable_reg(), 0);
src2.get_operands(collector);
}
collector.reg_use(src1.to_reg());
collector.reg_reuse_def(dst.to_writable_reg(), 0);
src2.get_operands(collector);
}
Inst::AluConstOp { dst, .. } => collector.reg_def(dst.to_writable_reg()),
Inst::AluRM { src1_dst, src2, .. } => {
collector.reg_use(src2.to_reg());
src1_dst.get_operands(collector);
@@ -1904,13 +1872,9 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
Inst::XmmRmR {
src1, src2, dst, ..
} => {
if inst.produces_const() {
collector.reg_def(dst.to_writable_reg());
} else {
collector.reg_use(src1.to_reg());
collector.reg_reuse_def(dst.to_writable_reg(), 0);
src2.get_operands(collector);
}
collector.reg_use(src1.to_reg());
collector.reg_reuse_def(dst.to_writable_reg(), 0);
src2.get_operands(collector);
}
Inst::XmmRmRBlend {
src1,
@@ -1943,6 +1907,13 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
collector.reg_use(src1.to_reg());
src2.get_operands(collector);
}
Inst::XmmVexPinsr {
src1, src2, dst, ..
} => {
collector.reg_def(dst.to_writable_reg());
collector.reg_use(src1.to_reg());
src2.get_operands(collector);
}
Inst::XmmRmRVex3 {
op,
src1,
@@ -1966,6 +1937,18 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
collector.reg_use(src2.to_reg());
src3.get_operands(collector);
}
Inst::XmmRmRBlendVex {
src1,
src2,
mask,
dst,
..
} => {
collector.reg_def(dst.to_writable_reg());
collector.reg_use(src1.to_reg());
src2.get_operands(collector);
collector.reg_use(mask.to_reg());
}
Inst::XmmRmREvex {
op,
src1,
@@ -1999,9 +1982,7 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
dst,
..
} => {
if inst.produces_const() {
collector.reg_def(*dst);
} else if !op.uses_src1() {
if !op.uses_src1() {
// FIXME: split this instruction into two, so we don't
// need this awkward src1-is-only-sometimes-an-arg
// behavior.
@@ -2013,6 +1994,9 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
src2.get_operands(collector);
}
}
Inst::XmmConstOp { dst, .. } => {
collector.reg_def(dst.to_writable_reg());
}
Inst::XmmUninitializedValue { dst } => collector.reg_def(dst.to_writable_reg()),
Inst::XmmMinMaxSeq { lhs, rhs, dst, .. } => {
collector.reg_use(rhs.to_reg());