diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index ff6bbd1775..25d41ae483 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -220,6 +220,16 @@ (src XmmMem) (dst WritableXmm)) + ;; XMM (scalar or vector) unary op with immediate: roundss, roundsd, etc. + ;; + ;; This differs from XMM_RM_R_IMM in that the dst register of + ;; XmmUnaryRmRImm is not used in the computation of the instruction dst + ;; value and so does not have to be a previously valid value. + (XmmUnaryRmRImm (op SseOpcode) + (src XmmMem) + (imm u8) + (dst WritableXmm)) + ;; XMM (scalar or vector) unary op that relies on the EVEX prefix. (XmmUnaryRmREvex (op Avx512Opcode) (src XmmMem) @@ -2590,41 +2600,32 @@ lane size)) +;; Helper for constructing `XmmUnaryRmRImm` instructions. +(decl xmm_unary_rm_r_imm (SseOpcode XmmMem u8) Xmm) +(rule (xmm_unary_rm_r_imm op src1 imm) + (let ((dst WritableXmm (temp_writable_xmm)) + (_ Unit (emit (MInst.XmmUnaryRmRImm op src1 imm dst)))) + dst)) + ;; Helper for creating `roundss` instructions. -(decl x64_roundss (Xmm RoundImm) Xmm) +(decl x64_roundss (XmmMem RoundImm) Xmm) (rule (x64_roundss src1 round) - (xmm_rm_r_imm (SseOpcode.Roundss) - src1 - src1 - (encode_round_imm round) - (OperandSize.Size32))) + (xmm_unary_rm_r_imm (SseOpcode.Roundss) src1 (encode_round_imm round))) ;; Helper for creating `roundsd` instructions. -(decl x64_roundsd (Xmm RoundImm) Xmm) +(decl x64_roundsd (XmmMem RoundImm) Xmm) (rule (x64_roundsd src1 round) - (xmm_rm_r_imm (SseOpcode.Roundsd) - src1 - src1 - (encode_round_imm round) - (OperandSize.Size32))) + (xmm_unary_rm_r_imm (SseOpcode.Roundsd) src1 (encode_round_imm round))) ;; Helper for creating `roundps` instructions. -(decl x64_roundps (Xmm RoundImm) Xmm) +(decl x64_roundps (XmmMem RoundImm) Xmm) (rule (x64_roundps src1 round) - (xmm_rm_r_imm (SseOpcode.Roundps) - src1 - src1 - (encode_round_imm round) - (OperandSize.Size32))) + (xmm_unary_rm_r_imm (SseOpcode.Roundps) src1 (encode_round_imm round))) ;; Helper for creating `roundpd` instructions. -(decl x64_roundpd (Xmm RoundImm) Xmm) +(decl x64_roundpd (XmmMem RoundImm) Xmm) (rule (x64_roundpd src1 round) - (xmm_rm_r_imm (SseOpcode.Roundpd) - src1 - src1 - (encode_round_imm round) - (OperandSize.Size32))) + (xmm_unary_rm_r_imm (SseOpcode.Roundpd) src1 (encode_round_imm round))) ;; Helper for creating `pmaddwd` instructions. (decl x64_pmaddwd (Xmm XmmMem) Xmm) diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 8489338054..77b0fad3d4 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -1612,6 +1612,33 @@ pub(crate) fn emit( }; } + Inst::XmmUnaryRmRImm { op, src, dst, imm } => { + debug_assert!(!op.uses_src1()); + + let dst = allocs.next(dst.to_reg().to_reg()); + let src = src.clone().to_reg_mem().with_allocs(allocs); + let rex = RexFlags::clear_w(); + + let (prefix, opcode, len) = match op { + SseOpcode::Roundps => (LegacyPrefixes::_66, 0x0F3A08, 3), + SseOpcode::Roundss => (LegacyPrefixes::_66, 0x0F3A0A, 3), + SseOpcode::Roundpd => (LegacyPrefixes::_66, 0x0F3A09, 3), + SseOpcode::Roundsd => (LegacyPrefixes::_66, 0x0F3A0B, 3), + _ => unimplemented!("Opcode {:?} not implemented", op), + }; + match src { + RegMem::Reg { reg } => { + emit_std_reg_reg(sink, prefix, opcode, len, dst, reg, rex); + } + RegMem::Mem { addr } => { + let addr = &addr.finalize(state, sink); + // N.B.: bytes_at_end == 1, because of the `imm` byte below. + emit_std_reg_mem(sink, info, prefix, opcode, len, dst, addr, rex, 1); + } + } + sink.put1(*imm); + } + Inst::XmmUnaryRmREvex { op, src, dst } => { let dst = allocs.next(dst.to_reg().to_reg()); let src = src.clone().to_reg_mem().with_allocs(allocs); @@ -1975,10 +2002,6 @@ pub(crate) fn emit( SseOpcode::Pextrw => (LegacyPrefixes::_66, 0x0FC5, 2), SseOpcode::Pextrd => (LegacyPrefixes::_66, 0x0F3A16, 3), SseOpcode::Pshufd => (LegacyPrefixes::_66, 0x0F70, 2), - SseOpcode::Roundps => (LegacyPrefixes::_66, 0x0F3A08, 3), - SseOpcode::Roundss => (LegacyPrefixes::_66, 0x0F3A0A, 3), - SseOpcode::Roundpd => (LegacyPrefixes::_66, 0x0F3A09, 3), - SseOpcode::Roundsd => (LegacyPrefixes::_66, 0x0F3A0B, 3), SseOpcode::Shufps => (LegacyPrefixes::None, 0x0FC6, 2), _ => unimplemented!("Opcode {:?} not implemented", op), }; diff --git a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs index 9cbde12668..dfac79cb3c 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit_tests.rs @@ -29,6 +29,17 @@ impl Inst { } } + fn xmm_unary_rm_r_imm(op: SseOpcode, src: RegMem, dst: Writable, imm: u8) -> Inst { + src.assert_regclass_is(RegClass::Float); + debug_assert!(dst.to_reg().class() == RegClass::Float); + Inst::XmmUnaryRmRImm { + op, + src: XmmMem::new(src).unwrap(), + imm, + dst: WritableXmm::from_writable_reg(dst).unwrap(), + } + } + fn xmm_unary_rm_r_evex(op: Avx512Opcode, src: RegMem, dst: Writable) -> Inst { src.assert_regclass_is(RegClass::Float); debug_assert!(dst.to_reg().class() == RegClass::Float); @@ -4611,46 +4622,22 @@ fn test_x64_emit() { )); insns.push(( - Inst::xmm_rm_r_imm( - SseOpcode::Roundps, - RegMem::reg(xmm7), - w_xmm8, - 3, - OperandSize::Size32, - ), + Inst::xmm_unary_rm_r_imm(SseOpcode::Roundps, RegMem::reg(xmm7), w_xmm8, 3), "66440F3A08C703", "roundps $3, %xmm7, %xmm8", )); insns.push(( - Inst::xmm_rm_r_imm( - SseOpcode::Roundpd, - RegMem::reg(xmm10), - w_xmm7, - 2, - OperandSize::Size32, - ), + Inst::xmm_unary_rm_r_imm(SseOpcode::Roundpd, RegMem::reg(xmm10), w_xmm7, 2), "66410F3A09FA02", "roundpd $2, %xmm10, %xmm7", )); insns.push(( - Inst::xmm_rm_r_imm( - SseOpcode::Roundps, - RegMem::reg(xmm4), - w_xmm8, - 1, - OperandSize::Size32, - ), + Inst::xmm_unary_rm_r_imm(SseOpcode::Roundps, RegMem::reg(xmm4), w_xmm8, 1), "66440F3A08C401", "roundps $1, %xmm4, %xmm8", )); insns.push(( - Inst::xmm_rm_r_imm( - SseOpcode::Roundpd, - RegMem::reg(xmm15), - w_xmm15, - 0, - OperandSize::Size32, - ), + Inst::xmm_unary_rm_r_imm(SseOpcode::Roundpd, RegMem::reg(xmm15), w_xmm15, 0), "66450F3A09FF00", "roundpd $0, %xmm15, %xmm15", )); diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index 09b5993298..fe4f99d561 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -129,6 +129,7 @@ impl Inst { | Inst::XmmRmR { op, .. } | Inst::XmmRmRImm { op, .. } | Inst::XmmToGpr { op, .. } + | Inst::XmmUnaryRmRImm { op, .. } | Inst::XmmUnaryRmR { op, .. } => smallvec![op.available_from()], Inst::XmmUnaryRmREvex { op, .. } @@ -896,6 +897,14 @@ impl PrettyPrint for Inst { format!("{} {}, {}", ljustify(op.to_string()), src, dst) } + Inst::XmmUnaryRmRImm { + op, src, dst, imm, .. + } => { + let dst = pretty_print_reg(dst.to_reg().to_reg(), op.src_size(), allocs); + let src = src.pretty_print(op.src_size(), allocs); + format!("{} ${}, {}, {}", ljustify(op.to_string()), imm, src, dst) + } + Inst::XmmUnaryRmREvex { op, src, dst, .. } => { let dst = pretty_print_reg(dst.to_reg().to_reg(), 8, allocs); let src = src.pretty_print(8, allocs); @@ -1702,7 +1711,9 @@ fn x64_get_operands VReg>(inst: &Inst, collector: &mut OperandCol collector.reg_def(dst.to_writable_reg()); src.get_operands(collector); } - Inst::XmmUnaryRmR { src, dst, .. } | Inst::XmmUnaryRmREvex { src, dst, .. } => { + Inst::XmmUnaryRmR { src, dst, .. } + | Inst::XmmUnaryRmREvex { src, dst, .. } + | Inst::XmmUnaryRmRImm { src, dst, .. } => { collector.reg_def(dst.to_writable_reg()); src.get_operands(collector); }