From fb8b9838fec05843c90d1799ba9946c1574b8cdd Mon Sep 17 00:00:00 2001 From: Trevor Elliott Date: Wed, 31 Aug 2022 08:29:32 -0700 Subject: [PATCH] Add MInst.XmmUnaryRmRImm to handle rounding instructions (#4823) Add a new pseudo-instruction, XmmUnaryRmRImm, to handle instructions like roundss that only use their first register argument for the instruction's result. This has the added benefit of allowing the isle wrappers for those instructions to take an XmmMem argument, allowing for more cases where loads may be merged. --- cranelift/codegen/src/isa/x64/inst.isle | 49 ++++++++++--------- cranelift/codegen/src/isa/x64/inst/emit.rs | 31 ++++++++++-- .../codegen/src/isa/x64/inst/emit_tests.rs | 43 ++++++---------- cranelift/codegen/src/isa/x64/inst/mod.rs | 13 ++++- 4 files changed, 79 insertions(+), 57 deletions(-) diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index ff6bbd1775..25d41ae483 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -220,6 +220,16 @@ (src XmmMem) (dst WritableXmm)) + ;; XMM (scalar or vector) unary op with immediate: roundss, roundsd, etc. + ;; + ;; This differs from XMM_RM_R_IMM in that the dst register of + ;; XmmUnaryRmRImm is not used in the computation of the instruction dst + ;; value and so does not have to be a previously valid value. + (XmmUnaryRmRImm (op SseOpcode) + (src XmmMem) + (imm u8) + (dst WritableXmm)) + ;; XMM (scalar or vector) unary op that relies on the EVEX prefix. (XmmUnaryRmREvex (op Avx512Opcode) (src XmmMem) @@ -2590,41 +2600,32 @@ lane size)) +;; Helper for constructing `XmmUnaryRmRImm` instructions. +(decl xmm_unary_rm_r_imm (SseOpcode XmmMem u8) Xmm) +(rule (xmm_unary_rm_r_imm op src1 imm) + (let ((dst WritableXmm (temp_writable_xmm)) + (_ Unit (emit (MInst.XmmUnaryRmRImm op src1 imm dst)))) + dst)) + ;; Helper for creating `roundss` instructions. -(decl x64_roundss (Xmm RoundImm) Xmm) +(decl x64_roundss (XmmMem RoundImm) Xmm) (rule (x64_roundss src1 round) - (xmm_rm_r_imm (SseOpcode.Roundss) - src1 - src1 - (encode_round_imm round) - (OperandSize.Size32))) + (xmm_unary_rm_r_imm (SseOpcode.Roundss) src1 (encode_round_imm round))) ;; Helper for creating `roundsd` instructions. -(decl x64_roundsd (Xmm RoundImm) Xmm) +(decl x64_roundsd (XmmMem RoundImm) Xmm) (rule (x64_roundsd src1 round) - (xmm_rm_r_imm (SseOpcode.Roundsd) - src1 - src1 - (encode_round_imm round) - (OperandSize.Size32))) + (xmm_unary_rm_r_imm (SseOpcode.Roundsd) src1 (encode_round_imm round))) ;; Helper for creating `roundps` instructions. -(decl x64_roundps (Xmm RoundImm) Xmm) +(decl x64_roundps (XmmMem RoundImm) Xmm) (rule (x64_roundps src1 round) - (xmm_rm_r_imm (SseOpcode.Roundps) - src1 - src1 - (encode_round_imm round) - (OperandSize.Size32))) + (xmm_unary_rm_r_imm (SseOpcode.Roundps) src1 (encode_round_imm round))) ;; Helper for creating `roundpd` instructions. -(decl x64_roundpd (Xmm RoundImm) Xmm) +(decl x64_roundpd (XmmMem RoundImm) Xmm) (rule (x64_roundpd src1 round) - (xmm_rm_r_imm (SseOpcode.Roundpd) - src1 - src1 - (encode_round_imm round) - (OperandSize.Size32))) + (xmm_unary_rm_r_imm (SseOpcode.Roundpd) src1 (encode_round_imm round))) ;; Helper for creating `pmaddwd` instructions. (decl x64_pmaddwd (Xmm XmmMem) Xmm) diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 8489338054..77b0fad3d4 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1612,6 +1612,33 @@ pub(crate) fn emit( }; } + Inst::XmmUnaryRmRImm { op, src, dst, imm } => { + debug_assert!(!op.uses_src1()); + + let dst = allocs.next(dst.to_reg().to_reg()); + let src = src.clone().to_reg_mem().with_allocs(allocs); + let rex = RexFlags::clear_w(); + + let (prefix, opcode, len) = match op { + SseOpcode::Roundps => (LegacyPrefixes::_66, 0x0F3A08, 3), + SseOpcode::Roundss => (LegacyPrefixes::_66, 0x0F3A0A, 3), + SseOpcode::Roundpd => (LegacyPrefixes::_66, 0x0F3A09, 3), + SseOpcode::Roundsd => (LegacyPrefixes::_66, 0x0F3A0B, 3), + _ => unimplemented!("Opcode {:?} not implemented", op), + }; + match src { + RegMem::Reg { reg } => { + emit_std_reg_reg(sink, prefix, opcode, len, dst, reg, rex); + } + RegMem::Mem { addr } => { + let addr = &addr.finalize(state, sink); + // N.B.: bytes_at_end == 1, because of the `imm` byte below. + emit_std_reg_mem(sink, info, prefix, opcode, len, dst, addr, rex, 1); + } + } + sink.put1(*imm); + } + Inst::XmmUnaryRmREvex { op, src, dst } => { let dst = allocs.next(dst.to_reg().to_reg()); let src = src.clone().to_reg_mem().with_allocs(allocs); @@ -1975,10 +2002,6 @@ pub(crate) fn emit( SseOpcode::Pextrw => (LegacyPrefixes::_66, 0x0FC5, 2), SseOpcode::Pextrd => (LegacyPrefixes::_66, 0x0F3A16, 3), SseOpcode::Pshufd => (LegacyPrefixes::_66, 0x0F70, 2), - SseOpcode::Roundps => (LegacyPrefixes::_66, 0x0F3A08, 3), - SseOpcode::Roundss => (LegacyPrefixes::_66, 0x0F3A0A, 3), - SseOpcode::Roundpd => (LegacyPrefixes::_66, 0x0F3A09, 3), - SseOpcode::Roundsd => (LegacyPrefixes::_66, 0x0F3A0B, 3), SseOpcode::Shufps => (LegacyPrefixes::None, 0x0FC6, 2), _ => unimplemented!("Opcode {:?} not implemented", op), }; diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index 9cbde12668..dfac79cb3c 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -29,6 +29,17 @@ impl Inst { } } + fn xmm_unary_rm_r_imm(op: SseOpcode, src: RegMem, dst: Writable, imm: u8) -> Inst { + src.assert_regclass_is(RegClass::Float); + debug_assert!(dst.to_reg().class() == RegClass::Float); + Inst::XmmUnaryRmRImm { + op, + src: XmmMem::new(src).unwrap(), + imm, + dst: WritableXmm::from_writable_reg(dst).unwrap(), + } + } + fn xmm_unary_rm_r_evex(op: Avx512Opcode, src: RegMem, dst: Writable) -> Inst { src.assert_regclass_is(RegClass::Float); debug_assert!(dst.to_reg().class() == RegClass::Float); @@ -4611,46 +4622,22 @@ fn test_x64_emit() { )); insns.push(( - Inst::xmm_rm_r_imm( - SseOpcode::Roundps, - RegMem::reg(xmm7), - w_xmm8, - 3, - OperandSize::Size32, - ), + Inst::xmm_unary_rm_r_imm(SseOpcode::Roundps, RegMem::reg(xmm7), w_xmm8, 3), "66440F3A08C703", "roundps $3, %xmm7, %xmm8", )); insns.push(( - Inst::xmm_rm_r_imm( - SseOpcode::Roundpd, - RegMem::reg(xmm10), - w_xmm7, - 2, - OperandSize::Size32, - ), + Inst::xmm_unary_rm_r_imm(SseOpcode::Roundpd, RegMem::reg(xmm10), w_xmm7, 2), "66410F3A09FA02", "roundpd $2, %xmm10, %xmm7", )); insns.push(( - Inst::xmm_rm_r_imm( - SseOpcode::Roundps, - RegMem::reg(xmm4), - w_xmm8, - 1, - OperandSize::Size32, - ), + Inst::xmm_unary_rm_r_imm(SseOpcode::Roundps, RegMem::reg(xmm4), w_xmm8, 1), "66440F3A08C401", "roundps $1, %xmm4, %xmm8", )); insns.push(( - Inst::xmm_rm_r_imm( - SseOpcode::Roundpd, - RegMem::reg(xmm15), - w_xmm15, - 0, - OperandSize::Size32, - ), + Inst::xmm_unary_rm_r_imm(SseOpcode::Roundpd, RegMem::reg(xmm15), w_xmm15, 0), "66450F3A09FF00", "roundpd $0, %xmm15, %xmm15", )); diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 09b5993298..fe4f99d561 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -129,6 +129,7 @@ impl Inst { | Inst::XmmRmR { op, .. } | Inst::XmmRmRImm { op, .. } | Inst::XmmToGpr { op, .. } + | Inst::XmmUnaryRmRImm { op, .. } | Inst::XmmUnaryRmR { op, .. } => smallvec![op.available_from()], Inst::XmmUnaryRmREvex { op, .. } @@ -896,6 +897,14 @@ impl PrettyPrint for Inst { format!("{} {}, {}", ljustify(op.to_string()), src, dst) } + Inst::XmmUnaryRmRImm { + op, src, dst, imm, .. + } => { + let dst = pretty_print_reg(dst.to_reg().to_reg(), op.src_size(), allocs); + let src = src.pretty_print(op.src_size(), allocs); + format!("{} ${}, {}, {}", ljustify(op.to_string()), imm, src, dst) + } + Inst::XmmUnaryRmREvex { op, src, dst, .. } => { let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs); let src = src.pretty_print(8, allocs); @@ -1702,7 +1711,9 @@ fn x64_get_operands VReg>(inst: &Inst, collector: &mut OperandCol collector.reg_def(dst.to_writable_reg()); src.get_operands(collector); } - Inst::XmmUnaryRmR { src, dst, .. } | Inst::XmmUnaryRmREvex { src, dst, .. } => { + Inst::XmmUnaryRmR { src, dst, .. } + | Inst::XmmUnaryRmREvex { src, dst, .. } + | Inst::XmmUnaryRmRImm { src, dst, .. } => { collector.reg_def(dst.to_writable_reg()); src.get_operands(collector); }