diff --git a/cranelift/codegen/src/isa/x64/inst.isle b/cranelift/codegen/src/isa/x64/inst.isle index fbe4e09ac9..af1630b8e4 100644 --- a/cranelift/codegen/src/isa/x64/inst.isle +++ b/cranelift/codegen/src/isa/x64/inst.isle @@ -289,7 +289,8 @@ (src Xmm) (dst WritableGpr) (tmp_gpr WritableGpr) - (tmp_xmm WritableXmm)) + (tmp_xmm WritableXmm) + (tmp_xmm2 WritableXmm)) ;; A sequence to compute min/max with the proper NaN semantics for xmm ;; registers. @@ -3213,8 +3214,9 @@ (dst WritableGpr (temp_writable_gpr)) (tmp_xmm WritableXmm (temp_writable_xmm)) + (tmp_xmm2 WritableXmm (temp_writable_xmm)) (tmp_gpr WritableGpr (temp_writable_gpr)) - (_ Unit (emit (MInst.CvtFloatToUintSeq out_size src_size is_saturating src dst tmp_gpr tmp_xmm)))) + (_ Unit (emit (MInst.CvtFloatToUintSeq out_size src_size is_saturating src dst tmp_gpr tmp_xmm tmp_xmm2)))) dst)) (decl cvt_float_to_sint_seq (Type Value bool) Gpr) diff --git a/cranelift/codegen/src/isa/x64/inst/emit.rs b/cranelift/codegen/src/isa/x64/inst/emit.rs index 4a60b83c38..1a79336569 100644 --- a/cranelift/codegen/src/isa/x64/inst/emit.rs +++ b/cranelift/codegen/src/isa/x64/inst/emit.rs @@ -2593,11 +2593,13 @@ pub(crate) fn emit( dst, tmp_gpr, tmp_xmm, + tmp_xmm2, } => { let src = allocs.next(src.to_reg()); let dst = allocs.next(dst.to_reg().to_reg()); let tmp_gpr = allocs.next(tmp_gpr.to_reg().to_reg()); let tmp_xmm = allocs.next(tmp_xmm.to_reg().to_reg()); + let tmp_xmm2 = allocs.next(tmp_xmm2.to_reg().to_reg()); // The only difference in behavior between saturating and non-saturating is how we // handle errors. Emits the following sequence: @@ -2620,7 +2622,8 @@ pub(crate) fn emit( // -- saturating: xor %dst, %dst; j done // // is_large: - // subss/subsd %tmp_xmm, %src ; <-- we clobber %src here + // mov %src, %tmp_xmm2 + // subss/subsd %tmp_xmm, %tmp_xmm2 // cvttss2si/cvttss2sd %tmp_x, %dst // cmp 0, %dst // jnl next_is_large @@ -2732,10 +2735,13 @@ pub(crate) fn emit( sink.bind_label(handle_large); - let inst = Inst::xmm_rm_r(sub_op, RegMem::reg(tmp_xmm), Writable::from_reg(src)); + let inst = Inst::gen_move(Writable::from_reg(tmp_xmm2), src, types::F64); inst.emit(&[], sink, info, state); - let inst = Inst::xmm_to_gpr(trunc_op, src, Writable::from_reg(dst), *dst_size); + let inst = Inst::xmm_rm_r(sub_op, RegMem::reg(tmp_xmm), Writable::from_reg(tmp_xmm2)); + inst.emit(&[], sink, info, state); + + let inst = Inst::xmm_to_gpr(trunc_op, tmp_xmm2, Writable::from_reg(dst), *dst_size); inst.emit(&[], sink, info, state); let inst = Inst::cmp_rmi_r(*dst_size, RegMemImm::imm(0), dst); diff --git a/cranelift/codegen/src/isa/x64/inst/mod.rs b/cranelift/codegen/src/isa/x64/inst/mod.rs index fd449fe56a..5aa6a5b731 100644 --- a/cranelift/codegen/src/isa/x64/inst/mod.rs +++ b/cranelift/codegen/src/isa/x64/inst/mod.rs @@ -1179,14 +1179,16 @@ impl PrettyPrint for Inst { dst_size, tmp_gpr, tmp_xmm, + tmp_xmm2, is_saturating, } => { let src = pretty_print_reg(src.to_reg(), src_size.to_bytes(), allocs); let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes(), allocs); let tmp_gpr = pretty_print_reg(tmp_gpr.to_reg().to_reg(), 8, allocs); let tmp_xmm = pretty_print_reg(tmp_xmm.to_reg().to_reg(), 8, allocs); + let tmp_xmm2 = pretty_print_reg(tmp_xmm2.to_reg().to_reg(), 8, allocs); format!( - "{} {}, {}, {}, {}", + "{} {}, {}, {}, {}, {}", ljustify(format!( "cvt_float{}_to_uint{}{}_seq", src_size.to_bits(), @@ -1197,6 +1199,7 @@ impl PrettyPrint for Inst { dst, tmp_gpr, tmp_xmm, + tmp_xmm2, ) } @@ -1876,7 +1879,7 @@ fn x64_get_operands VReg>(inst: &Inst, collector: &mut OperandCol .. } => { collector.reg_use(src.to_reg()); - collector.reg_def(dst.to_writable_reg()); + collector.reg_early_def(dst.to_writable_reg()); collector.reg_early_def(tmp_gpr1.to_writable_reg()); collector.reg_early_def(tmp_gpr2.to_writable_reg()); } @@ -1886,18 +1889,25 @@ fn x64_get_operands VReg>(inst: &Inst, collector: &mut OperandCol tmp_xmm, tmp_gpr, .. + } => { + collector.reg_use(src.to_reg()); + collector.reg_early_def(dst.to_writable_reg()); + collector.reg_early_def(tmp_gpr.to_writable_reg()); + collector.reg_early_def(tmp_xmm.to_writable_reg()); } - | Inst::CvtFloatToUintSeq { + Inst::CvtFloatToUintSeq { src, dst, tmp_gpr, tmp_xmm, + tmp_xmm2, .. } => { collector.reg_use(src.to_reg()); - collector.reg_def(dst.to_writable_reg()); + collector.reg_early_def(dst.to_writable_reg()); collector.reg_early_def(tmp_gpr.to_writable_reg()); collector.reg_early_def(tmp_xmm.to_writable_reg()); + collector.reg_early_def(tmp_xmm2.to_writable_reg()); } Inst::MovzxRmR { src, dst, .. } => { collector.reg_def(dst.to_writable_reg()); diff --git a/cranelift/filetests/filetests/isa/x64/fcvt.clif b/cranelift/filetests/filetests/isa/x64/fcvt.clif index 3429078f59..97734701f4 100644 --- a/cranelift/filetests/filetests/isa/x64/fcvt.clif +++ b/cranelift/filetests/filetests/isa/x64/fcvt.clif @@ -209,7 +209,7 @@ block0(v0: f32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; cvt_float32_to_uint32_seq %xmm0, %eax, %r8, %xmm4 +; cvt_float32_to_uint32_seq %xmm0, %eax, %r9, %xmm4, %xmm5 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -223,7 +223,7 @@ block0(v0: f32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; cvt_float32_to_uint64_seq %xmm0, %rax, %r8, %xmm4 +; cvt_float32_to_uint64_seq %xmm0, %rax, %r9, %xmm4, %xmm5 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -237,7 +237,7 @@ block0(v0: f64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; cvt_float64_to_uint32_seq %xmm0, %eax, %r8, %xmm4 +; cvt_float64_to_uint32_seq %xmm0, %eax, %r9, %xmm4, %xmm5 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -251,7 +251,7 @@ block0(v0: f64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; cvt_float64_to_uint64_seq %xmm0, %rax, %r8, %xmm4 +; cvt_float64_to_uint64_seq %xmm0, %rax, %r9, %xmm4, %xmm5 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -265,7 +265,7 @@ block0(v0: f32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; cvt_float32_to_uint32_sat_seq %xmm0, %eax, %r8, %xmm4 +; cvt_float32_to_uint32_sat_seq %xmm0, %eax, %r9, %xmm4, %xmm5 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -279,7 +279,7 @@ block0(v0: f32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; cvt_float32_to_uint64_sat_seq %xmm0, %rax, %r8, %xmm4 +; cvt_float32_to_uint64_sat_seq %xmm0, %rax, %r9, %xmm4, %xmm5 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -293,7 +293,7 @@ block0(v0: f64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; cvt_float64_to_uint32_sat_seq %xmm0, %eax, %r8, %xmm4 +; cvt_float64_to_uint32_sat_seq %xmm0, %eax, %r9, %xmm4, %xmm5 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -307,7 +307,7 @@ block0(v0: f64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; cvt_float64_to_uint64_sat_seq %xmm0, %rax, %r8, %xmm4 +; cvt_float64_to_uint64_sat_seq %xmm0, %rax, %r9, %xmm4, %xmm5 ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/tests/misc_testsuite/issue4840.wast b/tests/misc_testsuite/issue4840.wast new file mode 100644 index 0000000000..406b8cb6cd --- /dev/null +++ b/tests/misc_testsuite/issue4840.wast @@ -0,0 +1,16 @@ +(module + (func (export "f") (param f32 i32) (result f64) + local.get 1 + f64.convert_i32_u + i32.trunc_f64_u + f64.convert_i32_s + local.get 1 + f64.convert_i32_u + global.set 0 + drop + global.get 0 + ) + (global (;0;) (mut f64) f64.const 0) +) + +(assert_return (invoke "f" (f32.const 1.23) (i32.const -2147483648)) (f64.const 2147483648))