Add MInst.XmmUnaryRmRImm to handle rounding instructions (#4823)
Add a new pseudo-instruction, XmmUnaryRmRImm, to handle instructions like roundss that only use their first register argument for the instruction's result. This has the added benefit of allowing the isle wrappers for those instructions to take an XmmMem argument, allowing for more cases where loads may be merged.
This commit is contained in:
@@ -220,6 +220,16 @@
|
|||||||
(src XmmMem)
|
(src XmmMem)
|
||||||
(dst WritableXmm))
|
(dst WritableXmm))
|
||||||
|
|
||||||
|
;; XMM (scalar or vector) unary op with immediate: roundss, roundsd, etc.
|
||||||
|
;;
|
||||||
|
;; This differs from XMM_RM_R_IMM in that the dst register of
|
||||||
|
;; XmmUnaryRmRImm is not used in the computation of the instruction dst
|
||||||
|
;; value and so does not have to be a previously valid value.
|
||||||
|
(XmmUnaryRmRImm (op SseOpcode)
|
||||||
|
(src XmmMem)
|
||||||
|
(imm u8)
|
||||||
|
(dst WritableXmm))
|
||||||
|
|
||||||
;; XMM (scalar or vector) unary op that relies on the EVEX prefix.
|
;; XMM (scalar or vector) unary op that relies on the EVEX prefix.
|
||||||
(XmmUnaryRmREvex (op Avx512Opcode)
|
(XmmUnaryRmREvex (op Avx512Opcode)
|
||||||
(src XmmMem)
|
(src XmmMem)
|
||||||
@@ -2590,41 +2600,32 @@
|
|||||||
lane
|
lane
|
||||||
size))
|
size))
|
||||||
|
|
||||||
|
;; Helper for constructing `XmmUnaryRmRImm` instructions.
|
||||||
|
(decl xmm_unary_rm_r_imm (SseOpcode XmmMem u8) Xmm)
|
||||||
|
(rule (xmm_unary_rm_r_imm op src1 imm)
|
||||||
|
(let ((dst WritableXmm (temp_writable_xmm))
|
||||||
|
(_ Unit (emit (MInst.XmmUnaryRmRImm op src1 imm dst))))
|
||||||
|
dst))
|
||||||
|
|
||||||
;; Helper for creating `roundss` instructions.
|
;; Helper for creating `roundss` instructions.
|
||||||
(decl x64_roundss (Xmm RoundImm) Xmm)
|
(decl x64_roundss (XmmMem RoundImm) Xmm)
|
||||||
(rule (x64_roundss src1 round)
|
(rule (x64_roundss src1 round)
|
||||||
(xmm_rm_r_imm (SseOpcode.Roundss)
|
(xmm_unary_rm_r_imm (SseOpcode.Roundss) src1 (encode_round_imm round)))
|
||||||
src1
|
|
||||||
src1
|
|
||||||
(encode_round_imm round)
|
|
||||||
(OperandSize.Size32)))
|
|
||||||
|
|
||||||
;; Helper for creating `roundsd` instructions.
|
;; Helper for creating `roundsd` instructions.
|
||||||
(decl x64_roundsd (Xmm RoundImm) Xmm)
|
(decl x64_roundsd (XmmMem RoundImm) Xmm)
|
||||||
(rule (x64_roundsd src1 round)
|
(rule (x64_roundsd src1 round)
|
||||||
(xmm_rm_r_imm (SseOpcode.Roundsd)
|
(xmm_unary_rm_r_imm (SseOpcode.Roundsd) src1 (encode_round_imm round)))
|
||||||
src1
|
|
||||||
src1
|
|
||||||
(encode_round_imm round)
|
|
||||||
(OperandSize.Size32)))
|
|
||||||
|
|
||||||
;; Helper for creating `roundps` instructions.
|
;; Helper for creating `roundps` instructions.
|
||||||
(decl x64_roundps (Xmm RoundImm) Xmm)
|
(decl x64_roundps (XmmMem RoundImm) Xmm)
|
||||||
(rule (x64_roundps src1 round)
|
(rule (x64_roundps src1 round)
|
||||||
(xmm_rm_r_imm (SseOpcode.Roundps)
|
(xmm_unary_rm_r_imm (SseOpcode.Roundps) src1 (encode_round_imm round)))
|
||||||
src1
|
|
||||||
src1
|
|
||||||
(encode_round_imm round)
|
|
||||||
(OperandSize.Size32)))
|
|
||||||
|
|
||||||
;; Helper for creating `roundpd` instructions.
|
;; Helper for creating `roundpd` instructions.
|
||||||
(decl x64_roundpd (Xmm RoundImm) Xmm)
|
(decl x64_roundpd (XmmMem RoundImm) Xmm)
|
||||||
(rule (x64_roundpd src1 round)
|
(rule (x64_roundpd src1 round)
|
||||||
(xmm_rm_r_imm (SseOpcode.Roundpd)
|
(xmm_unary_rm_r_imm (SseOpcode.Roundpd) src1 (encode_round_imm round)))
|
||||||
src1
|
|
||||||
src1
|
|
||||||
(encode_round_imm round)
|
|
||||||
(OperandSize.Size32)))
|
|
||||||
|
|
||||||
;; Helper for creating `pmaddwd` instructions.
|
;; Helper for creating `pmaddwd` instructions.
|
||||||
(decl x64_pmaddwd (Xmm XmmMem) Xmm)
|
(decl x64_pmaddwd (Xmm XmmMem) Xmm)
|
||||||
|
|||||||
@@ -1612,6 +1612,33 @@ pub(crate) fn emit(
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Inst::XmmUnaryRmRImm { op, src, dst, imm } => {
|
||||||
|
debug_assert!(!op.uses_src1());
|
||||||
|
|
||||||
|
let dst = allocs.next(dst.to_reg().to_reg());
|
||||||
|
let src = src.clone().to_reg_mem().with_allocs(allocs);
|
||||||
|
let rex = RexFlags::clear_w();
|
||||||
|
|
||||||
|
let (prefix, opcode, len) = match op {
|
||||||
|
SseOpcode::Roundps => (LegacyPrefixes::_66, 0x0F3A08, 3),
|
||||||
|
SseOpcode::Roundss => (LegacyPrefixes::_66, 0x0F3A0A, 3),
|
||||||
|
SseOpcode::Roundpd => (LegacyPrefixes::_66, 0x0F3A09, 3),
|
||||||
|
SseOpcode::Roundsd => (LegacyPrefixes::_66, 0x0F3A0B, 3),
|
||||||
|
_ => unimplemented!("Opcode {:?} not implemented", op),
|
||||||
|
};
|
||||||
|
match src {
|
||||||
|
RegMem::Reg { reg } => {
|
||||||
|
emit_std_reg_reg(sink, prefix, opcode, len, dst, reg, rex);
|
||||||
|
}
|
||||||
|
RegMem::Mem { addr } => {
|
||||||
|
let addr = &addr.finalize(state, sink);
|
||||||
|
// N.B.: bytes_at_end == 1, because of the `imm` byte below.
|
||||||
|
emit_std_reg_mem(sink, info, prefix, opcode, len, dst, addr, rex, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sink.put1(*imm);
|
||||||
|
}
|
||||||
|
|
||||||
Inst::XmmUnaryRmREvex { op, src, dst } => {
|
Inst::XmmUnaryRmREvex { op, src, dst } => {
|
||||||
let dst = allocs.next(dst.to_reg().to_reg());
|
let dst = allocs.next(dst.to_reg().to_reg());
|
||||||
let src = src.clone().to_reg_mem().with_allocs(allocs);
|
let src = src.clone().to_reg_mem().with_allocs(allocs);
|
||||||
@@ -1975,10 +2002,6 @@ pub(crate) fn emit(
|
|||||||
SseOpcode::Pextrw => (LegacyPrefixes::_66, 0x0FC5, 2),
|
SseOpcode::Pextrw => (LegacyPrefixes::_66, 0x0FC5, 2),
|
||||||
SseOpcode::Pextrd => (LegacyPrefixes::_66, 0x0F3A16, 3),
|
SseOpcode::Pextrd => (LegacyPrefixes::_66, 0x0F3A16, 3),
|
||||||
SseOpcode::Pshufd => (LegacyPrefixes::_66, 0x0F70, 2),
|
SseOpcode::Pshufd => (LegacyPrefixes::_66, 0x0F70, 2),
|
||||||
SseOpcode::Roundps => (LegacyPrefixes::_66, 0x0F3A08, 3),
|
|
||||||
SseOpcode::Roundss => (LegacyPrefixes::_66, 0x0F3A0A, 3),
|
|
||||||
SseOpcode::Roundpd => (LegacyPrefixes::_66, 0x0F3A09, 3),
|
|
||||||
SseOpcode::Roundsd => (LegacyPrefixes::_66, 0x0F3A0B, 3),
|
|
||||||
SseOpcode::Shufps => (LegacyPrefixes::None, 0x0FC6, 2),
|
SseOpcode::Shufps => (LegacyPrefixes::None, 0x0FC6, 2),
|
||||||
_ => unimplemented!("Opcode {:?} not implemented", op),
|
_ => unimplemented!("Opcode {:?} not implemented", op),
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -29,6 +29,17 @@ impl Inst {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn xmm_unary_rm_r_imm(op: SseOpcode, src: RegMem, dst: Writable<Reg>, imm: u8) -> Inst {
|
||||||
|
src.assert_regclass_is(RegClass::Float);
|
||||||
|
debug_assert!(dst.to_reg().class() == RegClass::Float);
|
||||||
|
Inst::XmmUnaryRmRImm {
|
||||||
|
op,
|
||||||
|
src: XmmMem::new(src).unwrap(),
|
||||||
|
imm,
|
||||||
|
dst: WritableXmm::from_writable_reg(dst).unwrap(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn xmm_unary_rm_r_evex(op: Avx512Opcode, src: RegMem, dst: Writable<Reg>) -> Inst {
|
fn xmm_unary_rm_r_evex(op: Avx512Opcode, src: RegMem, dst: Writable<Reg>) -> Inst {
|
||||||
src.assert_regclass_is(RegClass::Float);
|
src.assert_regclass_is(RegClass::Float);
|
||||||
debug_assert!(dst.to_reg().class() == RegClass::Float);
|
debug_assert!(dst.to_reg().class() == RegClass::Float);
|
||||||
@@ -4611,46 +4622,22 @@ fn test_x64_emit() {
|
|||||||
));
|
));
|
||||||
|
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::xmm_rm_r_imm(
|
Inst::xmm_unary_rm_r_imm(SseOpcode::Roundps, RegMem::reg(xmm7), w_xmm8, 3),
|
||||||
SseOpcode::Roundps,
|
|
||||||
RegMem::reg(xmm7),
|
|
||||||
w_xmm8,
|
|
||||||
3,
|
|
||||||
OperandSize::Size32,
|
|
||||||
),
|
|
||||||
"66440F3A08C703",
|
"66440F3A08C703",
|
||||||
"roundps $3, %xmm7, %xmm8",
|
"roundps $3, %xmm7, %xmm8",
|
||||||
));
|
));
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::xmm_rm_r_imm(
|
Inst::xmm_unary_rm_r_imm(SseOpcode::Roundpd, RegMem::reg(xmm10), w_xmm7, 2),
|
||||||
SseOpcode::Roundpd,
|
|
||||||
RegMem::reg(xmm10),
|
|
||||||
w_xmm7,
|
|
||||||
2,
|
|
||||||
OperandSize::Size32,
|
|
||||||
),
|
|
||||||
"66410F3A09FA02",
|
"66410F3A09FA02",
|
||||||
"roundpd $2, %xmm10, %xmm7",
|
"roundpd $2, %xmm10, %xmm7",
|
||||||
));
|
));
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::xmm_rm_r_imm(
|
Inst::xmm_unary_rm_r_imm(SseOpcode::Roundps, RegMem::reg(xmm4), w_xmm8, 1),
|
||||||
SseOpcode::Roundps,
|
|
||||||
RegMem::reg(xmm4),
|
|
||||||
w_xmm8,
|
|
||||||
1,
|
|
||||||
OperandSize::Size32,
|
|
||||||
),
|
|
||||||
"66440F3A08C401",
|
"66440F3A08C401",
|
||||||
"roundps $1, %xmm4, %xmm8",
|
"roundps $1, %xmm4, %xmm8",
|
||||||
));
|
));
|
||||||
insns.push((
|
insns.push((
|
||||||
Inst::xmm_rm_r_imm(
|
Inst::xmm_unary_rm_r_imm(SseOpcode::Roundpd, RegMem::reg(xmm15), w_xmm15, 0),
|
||||||
SseOpcode::Roundpd,
|
|
||||||
RegMem::reg(xmm15),
|
|
||||||
w_xmm15,
|
|
||||||
0,
|
|
||||||
OperandSize::Size32,
|
|
||||||
),
|
|
||||||
"66450F3A09FF00",
|
"66450F3A09FF00",
|
||||||
"roundpd $0, %xmm15, %xmm15",
|
"roundpd $0, %xmm15, %xmm15",
|
||||||
));
|
));
|
||||||
|
|||||||
@@ -129,6 +129,7 @@ impl Inst {
|
|||||||
| Inst::XmmRmR { op, .. }
|
| Inst::XmmRmR { op, .. }
|
||||||
| Inst::XmmRmRImm { op, .. }
|
| Inst::XmmRmRImm { op, .. }
|
||||||
| Inst::XmmToGpr { op, .. }
|
| Inst::XmmToGpr { op, .. }
|
||||||
|
| Inst::XmmUnaryRmRImm { op, .. }
|
||||||
| Inst::XmmUnaryRmR { op, .. } => smallvec![op.available_from()],
|
| Inst::XmmUnaryRmR { op, .. } => smallvec![op.available_from()],
|
||||||
|
|
||||||
Inst::XmmUnaryRmREvex { op, .. }
|
Inst::XmmUnaryRmREvex { op, .. }
|
||||||
@@ -896,6 +897,14 @@ impl PrettyPrint for Inst {
|
|||||||
format!("{} {}, {}", ljustify(op.to_string()), src, dst)
|
format!("{} {}, {}", ljustify(op.to_string()), src, dst)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Inst::XmmUnaryRmRImm {
|
||||||
|
op, src, dst, imm, ..
|
||||||
|
} => {
|
||||||
|
let dst = pretty_print_reg(dst.to_reg().to_reg(), op.src_size(), allocs);
|
||||||
|
let src = src.pretty_print(op.src_size(), allocs);
|
||||||
|
format!("{} ${}, {}, {}", ljustify(op.to_string()), imm, src, dst)
|
||||||
|
}
|
||||||
|
|
||||||
Inst::XmmUnaryRmREvex { op, src, dst, .. } => {
|
Inst::XmmUnaryRmREvex { op, src, dst, .. } => {
|
||||||
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
|
let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs);
|
||||||
let src = src.pretty_print(8, allocs);
|
let src = src.pretty_print(8, allocs);
|
||||||
@@ -1702,7 +1711,9 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
|
|||||||
collector.reg_def(dst.to_writable_reg());
|
collector.reg_def(dst.to_writable_reg());
|
||||||
src.get_operands(collector);
|
src.get_operands(collector);
|
||||||
}
|
}
|
||||||
Inst::XmmUnaryRmR { src, dst, .. } | Inst::XmmUnaryRmREvex { src, dst, .. } => {
|
Inst::XmmUnaryRmR { src, dst, .. }
|
||||||
|
| Inst::XmmUnaryRmREvex { src, dst, .. }
|
||||||
|
| Inst::XmmUnaryRmRImm { src, dst, .. } => {
|
||||||
collector.reg_def(dst.to_writable_reg());
|
collector.reg_def(dst.to_writable_reg());
|
||||||
src.get_operands(collector);
|
src.get_operands(collector);
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user