Add MInst.XmmUnaryRmRImm to handle rounding instructions (#4823)

Add a new pseudo-instruction, XmmUnaryRmRImm, to handle instructions like roundss that only use their first register argument for the instruction's result. This has the added benefit of allowing the isle wrappers for those instructions to take an XmmMem argument, allowing for more cases where loads may be merged.
This commit is contained in:
Trevor Elliott
2022-08-31 08:29:32 -07:00
committed by GitHub
parent cf7cb10036
commit fb8b9838fe
4 changed files with 79 additions and 57 deletions

View File

@@ -220,6 +220,16 @@
(src XmmMem)
(dst WritableXmm))
;; XMM (scalar or vector) unary op with immediate: roundss, roundsd, etc.
;;
;; This differs from XMM_RM_R_IMM in that the dst register of
;; XmmUnaryRmRImm is not used in the computation of the instruction dst
;; value and so does not have to be a previously valid value.
(XmmUnaryRmRImm (op SseOpcode)
(src XmmMem)
(imm u8)
(dst WritableXmm))
;; XMM (scalar or vector) unary op that relies on the EVEX prefix.
(XmmUnaryRmREvex (op Avx512Opcode)
(src XmmMem)
@@ -2590,41 +2600,32 @@
lane
size))
;; Helper for constructing `XmmUnaryRmRImm` instructions.
(decl xmm_unary_rm_r_imm (SseOpcode XmmMem u8) Xmm)
(rule (xmm_unary_rm_r_imm op src1 imm)
(let ((dst WritableXmm (temp_writable_xmm))
(_ Unit (emit (MInst.XmmUnaryRmRImm op src1 imm dst))))
dst))
;; Helper for creating `roundss` instructions.
(decl x64_roundss (Xmm RoundImm) Xmm)
(decl x64_roundss (XmmMem RoundImm) Xmm)
(rule (x64_roundss src1 round)
(xmm_rm_r_imm (SseOpcode.Roundss)
src1
src1
(encode_round_imm round)
(OperandSize.Size32)))
(xmm_unary_rm_r_imm (SseOpcode.Roundss) src1 (encode_round_imm round)))
;; Helper for creating `roundsd` instructions.
(decl x64_roundsd (Xmm RoundImm) Xmm)
(decl x64_roundsd (XmmMem RoundImm) Xmm)
(rule (x64_roundsd src1 round)
(xmm_rm_r_imm (SseOpcode.Roundsd)
src1
src1
(encode_round_imm round)
(OperandSize.Size32)))
(xmm_unary_rm_r_imm (SseOpcode.Roundsd) src1 (encode_round_imm round)))
;; Helper for creating `roundps` instructions.
(decl x64_roundps (Xmm RoundImm) Xmm)
(decl x64_roundps (XmmMem RoundImm) Xmm)
(rule (x64_roundps src1 round)
(xmm_rm_r_imm (SseOpcode.Roundps)
src1
src1
(encode_round_imm round)
(OperandSize.Size32)))
(xmm_unary_rm_r_imm (SseOpcode.Roundps) src1 (encode_round_imm round)))
;; Helper for creating `roundpd` instructions.
(decl x64_roundpd (Xmm RoundImm) Xmm)
(decl x64_roundpd (XmmMem RoundImm) Xmm)
(rule (x64_roundpd src1 round)
(xmm_rm_r_imm (SseOpcode.Roundpd)
src1
src1
(encode_round_imm round)
(OperandSize.Size32)))
(xmm_unary_rm_r_imm (SseOpcode.Roundpd) src1 (encode_round_imm round)))
;; Helper for creating `pmaddwd` instructions.
(decl x64_pmaddwd (Xmm XmmMem) Xmm)

View File

@@ -1612,6 +1612,33 @@ pub(crate) fn emit(
};
}
Inst::XmmUnaryRmRImm { op, src, dst, imm } => {
debug_assert!(!op.uses_src1());
let dst = allocs.next(dst.to_reg().to_reg());
let src = src.clone().to_reg_mem().with_allocs(allocs);
let rex = RexFlags::clear_w();
let (prefix, opcode, len) = match op {
SseOpcode::Roundps => (LegacyPrefixes::_66, 0x0F3A08, 3),
SseOpcode::Roundss => (LegacyPrefixes::_66, 0x0F3A0A, 3),
SseOpcode::Roundpd => (LegacyPrefixes::_66, 0x0F3A09, 3),
SseOpcode::Roundsd => (LegacyPrefixes::_66, 0x0F3A0B, 3),
_ => unimplemented!("Opcode {:?} not implemented", op),
};
match src {
RegMem::Reg { reg } => {
emit_std_reg_reg(sink, prefix, opcode, len, dst, reg, rex);
}
RegMem::Mem { addr } => {
let addr = &addr.finalize(state, sink);
// N.B.: bytes_at_end == 1, because of the `imm` byte below.
emit_std_reg_mem(sink, info, prefix, opcode, len, dst, addr, rex, 1);
}
}
sink.put1(*imm);
}
Inst::XmmUnaryRmREvex { op, src, dst } => {
let dst = allocs.next(dst.to_reg().to_reg());
let src = src.clone().to_reg_mem().with_allocs(allocs);
@@ -1975,10 +2002,6 @@ pub(crate) fn emit(
SseOpcode::Pextrw => (LegacyPrefixes::_66, 0x0FC5, 2),
SseOpcode::Pextrd => (LegacyPrefixes::_66, 0x0F3A16, 3),
SseOpcode::Pshufd => (LegacyPrefixes::_66, 0x0F70, 2),
SseOpcode::Roundps => (LegacyPrefixes::_66, 0x0F3A08, 3),
SseOpcode::Roundss => (LegacyPrefixes::_66, 0x0F3A0A, 3),
SseOpcode::Roundpd => (LegacyPrefixes::_66, 0x0F3A09, 3),
SseOpcode::Roundsd => (LegacyPrefixes::_66, 0x0F3A0B, 3),
SseOpcode::Shufps => (LegacyPrefixes::None, 0x0FC6, 2),
_ => unimplemented!("Opcode {:?} not implemented", op),
};

View File

@@ -29,6 +29,17 @@ impl Inst {
}
}
fn xmm_unary_rm_r_imm(op: SseOpcode, src: RegMem, dst: Writable<Reg>, imm: u8) -> Inst {
src.assert_regclass_is(RegClass::Float);
debug_assert!(dst.to_reg().class() == RegClass::Float);
Inst::XmmUnaryRmRImm {
op,
src: XmmMem::new(src).unwrap(),
imm,
dst: WritableXmm::from_writable_reg(dst).unwrap(),
}
}
fn xmm_unary_rm_r_evex(op: Avx512Opcode, src: RegMem, dst: Writable<Reg>) -> Inst {
src.assert_regclass_is(RegClass::Float);
debug_assert!(dst.to_reg().class() == RegClass::Float);
@@ -4611,46 +4622,22 @@ fn test_x64_emit() {
));
insns.push((
Inst::xmm_rm_r_imm(
SseOpcode::Roundps,
RegMem::reg(xmm7),
w_xmm8,
3,
OperandSize::Size32,
),
Inst::xmm_unary_rm_r_imm(SseOpcode::Roundps, RegMem::reg(xmm7), w_xmm8, 3),
"66440F3A08C703",
"roundps $3, %xmm7, %xmm8",
));
insns.push((
Inst::xmm_rm_r_imm(
SseOpcode::Roundpd,
RegMem::reg(xmm10),
w_xmm7,
2,
OperandSize::Size32,
),
Inst::xmm_unary_rm_r_imm(SseOpcode::Roundpd, RegMem::reg(xmm10), w_xmm7, 2),
"66410F3A09FA02",
"roundpd $2, %xmm10, %xmm7",
));
insns.push((
Inst::xmm_rm_r_imm(
SseOpcode::Roundps,
RegMem::reg(xmm4),
w_xmm8,
1,
OperandSize::Size32,
),
Inst::xmm_unary_rm_r_imm(SseOpcode::Roundps, RegMem::reg(xmm4), w_xmm8, 1),
"66440F3A08C401",
"roundps $1, %xmm4, %xmm8",
));
insns.push((
Inst::xmm_rm_r_imm(
SseOpcode::Roundpd,
RegMem::reg(xmm15),
w_xmm15,
0,
OperandSize::Size32,
),
Inst::xmm_unary_rm_r_imm(SseOpcode::Roundpd, RegMem::reg(xmm15), w_xmm15, 0),
"66450F3A09FF00",
"roundpd $0, %xmm15, %xmm15",
));

View File

@@ -129,6 +129,7 @@ impl Inst {
| Inst::XmmRmR { op, .. }
| Inst::XmmRmRImm { op, .. }
| Inst::XmmToGpr { op, .. }
| Inst::XmmUnaryRmRImm { op, .. }
| Inst::XmmUnaryRmR { op, .. } => smallvec![op.available_from()],
Inst::XmmUnaryRmREvex { op, .. }
@@ -896,6 +897,14 @@ impl PrettyPrint for Inst {
format!("{} {}, {}", ljustify(op.to_string()), src, dst)
}
Inst::XmmUnaryRmRImm {
op, src, dst, imm, ..
} => {
let dst = pretty_print_reg(dst.to_reg().to_reg(), op.src_size(), allocs);
let src = src.pretty_print(op.src_size(), allocs);
format!("{} ${}, {}, {}", ljustify(op.to_string()), imm, src, dst)
}
Inst::XmmUnaryRmREvex { op, src, dst, .. } => {
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
let src = src.pretty_print(8, allocs);
@@ -1702,7 +1711,9 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
collector.reg_def(dst.to_writable_reg());
src.get_operands(collector);
}
Inst::XmmUnaryRmR { src, dst, .. } | Inst::XmmUnaryRmREvex { src, dst, .. } => {
Inst::XmmUnaryRmR { src, dst, .. }
| Inst::XmmUnaryRmREvex { src, dst, .. }
| Inst::XmmUnaryRmRImm { src, dst, .. } => {
collector.reg_def(dst.to_writable_reg());
src.get_operands(collector);
}