x64: fix CvtFloatToUintSeq: do not clobber src. (#4842)
This slipped through the regalloc2 operand code update in #4811: the CvtFloatToUintSeq pseudo-instruction actually clobbers its source. It was marked as a "mod" operand in the original and I mistakenly converted it to a "use" as I had not seen the actual clobber. The instruction now takes an extra temp and makes a copy of `src` in the appropriate place. Fixes #4840.
This commit is contained in:
@@ -289,7 +289,8 @@
|
|||||||
(src Xmm)
|
(src Xmm)
|
||||||
(dst WritableGpr)
|
(dst WritableGpr)
|
||||||
(tmp_gpr WritableGpr)
|
(tmp_gpr WritableGpr)
|
||||||
(tmp_xmm WritableXmm))
|
(tmp_xmm WritableXmm)
|
||||||
|
(tmp_xmm2 WritableXmm))
|
||||||
|
|
||||||
;; A sequence to compute min/max with the proper NaN semantics for xmm
|
;; A sequence to compute min/max with the proper NaN semantics for xmm
|
||||||
;; registers.
|
;; registers.
|
||||||
@@ -3213,8 +3214,9 @@
|
|||||||
|
|
||||||
(dst WritableGpr (temp_writable_gpr))
|
(dst WritableGpr (temp_writable_gpr))
|
||||||
(tmp_xmm WritableXmm (temp_writable_xmm))
|
(tmp_xmm WritableXmm (temp_writable_xmm))
|
||||||
|
(tmp_xmm2 WritableXmm (temp_writable_xmm))
|
||||||
(tmp_gpr WritableGpr (temp_writable_gpr))
|
(tmp_gpr WritableGpr (temp_writable_gpr))
|
||||||
(_ Unit (emit (MInst.CvtFloatToUintSeq out_size src_size is_saturating src dst tmp_gpr tmp_xmm))))
|
(_ Unit (emit (MInst.CvtFloatToUintSeq out_size src_size is_saturating src dst tmp_gpr tmp_xmm tmp_xmm2))))
|
||||||
dst))
|
dst))
|
||||||
|
|
||||||
(decl cvt_float_to_sint_seq (Type Value bool) Gpr)
|
(decl cvt_float_to_sint_seq (Type Value bool) Gpr)
|
||||||
|
|||||||
@@ -2593,11 +2593,13 @@ pub(crate) fn emit(
|
|||||||
dst,
|
dst,
|
||||||
tmp_gpr,
|
tmp_gpr,
|
||||||
tmp_xmm,
|
tmp_xmm,
|
||||||
|
tmp_xmm2,
|
||||||
} => {
|
} => {
|
||||||
let src = allocs.next(src.to_reg());
|
let src = allocs.next(src.to_reg());
|
||||||
let dst = allocs.next(dst.to_reg().to_reg());
|
let dst = allocs.next(dst.to_reg().to_reg());
|
||||||
let tmp_gpr = allocs.next(tmp_gpr.to_reg().to_reg());
|
let tmp_gpr = allocs.next(tmp_gpr.to_reg().to_reg());
|
||||||
let tmp_xmm = allocs.next(tmp_xmm.to_reg().to_reg());
|
let tmp_xmm = allocs.next(tmp_xmm.to_reg().to_reg());
|
||||||
|
let tmp_xmm2 = allocs.next(tmp_xmm2.to_reg().to_reg());
|
||||||
|
|
||||||
// The only difference in behavior between saturating and non-saturating is how we
|
// The only difference in behavior between saturating and non-saturating is how we
|
||||||
// handle errors. Emits the following sequence:
|
// handle errors. Emits the following sequence:
|
||||||
@@ -2620,7 +2622,8 @@ pub(crate) fn emit(
|
|||||||
// -- saturating: xor %dst, %dst; j done
|
// -- saturating: xor %dst, %dst; j done
|
||||||
//
|
//
|
||||||
// is_large:
|
// is_large:
|
||||||
// subss/subsd %tmp_xmm, %src ; <-- we clobber %src here
|
// mov %src, %tmp_xmm2
|
||||||
|
// subss/subsd %tmp_xmm, %tmp_xmm2
|
||||||
// cvttss2si/cvttss2sd %tmp_x, %dst
|
// cvttss2si/cvttss2sd %tmp_x, %dst
|
||||||
// cmp 0, %dst
|
// cmp 0, %dst
|
||||||
// jnl next_is_large
|
// jnl next_is_large
|
||||||
@@ -2732,10 +2735,13 @@ pub(crate) fn emit(
|
|||||||
|
|
||||||
sink.bind_label(handle_large);
|
sink.bind_label(handle_large);
|
||||||
|
|
||||||
let inst = Inst::xmm_rm_r(sub_op, RegMem::reg(tmp_xmm), Writable::from_reg(src));
|
let inst = Inst::gen_move(Writable::from_reg(tmp_xmm2), src, types::F64);
|
||||||
inst.emit(&[], sink, info, state);
|
inst.emit(&[], sink, info, state);
|
||||||
|
|
||||||
let inst = Inst::xmm_to_gpr(trunc_op, src, Writable::from_reg(dst), *dst_size);
|
let inst = Inst::xmm_rm_r(sub_op, RegMem::reg(tmp_xmm), Writable::from_reg(tmp_xmm2));
|
||||||
|
inst.emit(&[], sink, info, state);
|
||||||
|
|
||||||
|
let inst = Inst::xmm_to_gpr(trunc_op, tmp_xmm2, Writable::from_reg(dst), *dst_size);
|
||||||
inst.emit(&[], sink, info, state);
|
inst.emit(&[], sink, info, state);
|
||||||
|
|
||||||
let inst = Inst::cmp_rmi_r(*dst_size, RegMemImm::imm(0), dst);
|
let inst = Inst::cmp_rmi_r(*dst_size, RegMemImm::imm(0), dst);
|
||||||
|
|||||||
@@ -1179,14 +1179,16 @@ impl PrettyPrint for Inst {
|
|||||||
dst_size,
|
dst_size,
|
||||||
tmp_gpr,
|
tmp_gpr,
|
||||||
tmp_xmm,
|
tmp_xmm,
|
||||||
|
tmp_xmm2,
|
||||||
is_saturating,
|
is_saturating,
|
||||||
} => {
|
} => {
|
||||||
let src = pretty_print_reg(src.to_reg(), src_size.to_bytes(), allocs);
|
let src = pretty_print_reg(src.to_reg(), src_size.to_bytes(), allocs);
|
||||||
let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes(), allocs);
|
let dst = pretty_print_reg(dst.to_reg().to_reg(), dst_size.to_bytes(), allocs);
|
||||||
let tmp_gpr = pretty_print_reg(tmp_gpr.to_reg().to_reg(), 8, allocs);
|
let tmp_gpr = pretty_print_reg(tmp_gpr.to_reg().to_reg(), 8, allocs);
|
||||||
let tmp_xmm = pretty_print_reg(tmp_xmm.to_reg().to_reg(), 8, allocs);
|
let tmp_xmm = pretty_print_reg(tmp_xmm.to_reg().to_reg(), 8, allocs);
|
||||||
|
let tmp_xmm2 = pretty_print_reg(tmp_xmm2.to_reg().to_reg(), 8, allocs);
|
||||||
format!(
|
format!(
|
||||||
"{} {}, {}, {}, {}",
|
"{} {}, {}, {}, {}, {}",
|
||||||
ljustify(format!(
|
ljustify(format!(
|
||||||
"cvt_float{}_to_uint{}{}_seq",
|
"cvt_float{}_to_uint{}{}_seq",
|
||||||
src_size.to_bits(),
|
src_size.to_bits(),
|
||||||
@@ -1197,6 +1199,7 @@ impl PrettyPrint for Inst {
|
|||||||
dst,
|
dst,
|
||||||
tmp_gpr,
|
tmp_gpr,
|
||||||
tmp_xmm,
|
tmp_xmm,
|
||||||
|
tmp_xmm2,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1876,7 +1879,7 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
|
|||||||
..
|
..
|
||||||
} => {
|
} => {
|
||||||
collector.reg_use(src.to_reg());
|
collector.reg_use(src.to_reg());
|
||||||
collector.reg_def(dst.to_writable_reg());
|
collector.reg_early_def(dst.to_writable_reg());
|
||||||
collector.reg_early_def(tmp_gpr1.to_writable_reg());
|
collector.reg_early_def(tmp_gpr1.to_writable_reg());
|
||||||
collector.reg_early_def(tmp_gpr2.to_writable_reg());
|
collector.reg_early_def(tmp_gpr2.to_writable_reg());
|
||||||
}
|
}
|
||||||
@@ -1886,18 +1889,25 @@ fn x64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut OperandCol
|
|||||||
tmp_xmm,
|
tmp_xmm,
|
||||||
tmp_gpr,
|
tmp_gpr,
|
||||||
..
|
..
|
||||||
|
} => {
|
||||||
|
collector.reg_use(src.to_reg());
|
||||||
|
collector.reg_early_def(dst.to_writable_reg());
|
||||||
|
collector.reg_early_def(tmp_gpr.to_writable_reg());
|
||||||
|
collector.reg_early_def(tmp_xmm.to_writable_reg());
|
||||||
}
|
}
|
||||||
| Inst::CvtFloatToUintSeq {
|
Inst::CvtFloatToUintSeq {
|
||||||
src,
|
src,
|
||||||
dst,
|
dst,
|
||||||
tmp_gpr,
|
tmp_gpr,
|
||||||
tmp_xmm,
|
tmp_xmm,
|
||||||
|
tmp_xmm2,
|
||||||
..
|
..
|
||||||
} => {
|
} => {
|
||||||
collector.reg_use(src.to_reg());
|
collector.reg_use(src.to_reg());
|
||||||
collector.reg_def(dst.to_writable_reg());
|
collector.reg_early_def(dst.to_writable_reg());
|
||||||
collector.reg_early_def(tmp_gpr.to_writable_reg());
|
collector.reg_early_def(tmp_gpr.to_writable_reg());
|
||||||
collector.reg_early_def(tmp_xmm.to_writable_reg());
|
collector.reg_early_def(tmp_xmm.to_writable_reg());
|
||||||
|
collector.reg_early_def(tmp_xmm2.to_writable_reg());
|
||||||
}
|
}
|
||||||
Inst::MovzxRmR { src, dst, .. } => {
|
Inst::MovzxRmR { src, dst, .. } => {
|
||||||
collector.reg_def(dst.to_writable_reg());
|
collector.reg_def(dst.to_writable_reg());
|
||||||
|
|||||||
@@ -209,7 +209,7 @@ block0(v0: f32):
|
|||||||
; pushq %rbp
|
; pushq %rbp
|
||||||
; movq %rsp, %rbp
|
; movq %rsp, %rbp
|
||||||
; block0:
|
; block0:
|
||||||
; cvt_float32_to_uint32_seq %xmm0, %eax, %r8, %xmm4
|
; cvt_float32_to_uint32_seq %xmm0, %eax, %r9, %xmm4, %xmm5
|
||||||
; movq %rbp, %rsp
|
; movq %rbp, %rsp
|
||||||
; popq %rbp
|
; popq %rbp
|
||||||
; ret
|
; ret
|
||||||
@@ -223,7 +223,7 @@ block0(v0: f32):
|
|||||||
; pushq %rbp
|
; pushq %rbp
|
||||||
; movq %rsp, %rbp
|
; movq %rsp, %rbp
|
||||||
; block0:
|
; block0:
|
||||||
; cvt_float32_to_uint64_seq %xmm0, %rax, %r8, %xmm4
|
; cvt_float32_to_uint64_seq %xmm0, %rax, %r9, %xmm4, %xmm5
|
||||||
; movq %rbp, %rsp
|
; movq %rbp, %rsp
|
||||||
; popq %rbp
|
; popq %rbp
|
||||||
; ret
|
; ret
|
||||||
@@ -237,7 +237,7 @@ block0(v0: f64):
|
|||||||
; pushq %rbp
|
; pushq %rbp
|
||||||
; movq %rsp, %rbp
|
; movq %rsp, %rbp
|
||||||
; block0:
|
; block0:
|
||||||
; cvt_float64_to_uint32_seq %xmm0, %eax, %r8, %xmm4
|
; cvt_float64_to_uint32_seq %xmm0, %eax, %r9, %xmm4, %xmm5
|
||||||
; movq %rbp, %rsp
|
; movq %rbp, %rsp
|
||||||
; popq %rbp
|
; popq %rbp
|
||||||
; ret
|
; ret
|
||||||
@@ -251,7 +251,7 @@ block0(v0: f64):
|
|||||||
; pushq %rbp
|
; pushq %rbp
|
||||||
; movq %rsp, %rbp
|
; movq %rsp, %rbp
|
||||||
; block0:
|
; block0:
|
||||||
; cvt_float64_to_uint64_seq %xmm0, %rax, %r8, %xmm4
|
; cvt_float64_to_uint64_seq %xmm0, %rax, %r9, %xmm4, %xmm5
|
||||||
; movq %rbp, %rsp
|
; movq %rbp, %rsp
|
||||||
; popq %rbp
|
; popq %rbp
|
||||||
; ret
|
; ret
|
||||||
@@ -265,7 +265,7 @@ block0(v0: f32):
|
|||||||
; pushq %rbp
|
; pushq %rbp
|
||||||
; movq %rsp, %rbp
|
; movq %rsp, %rbp
|
||||||
; block0:
|
; block0:
|
||||||
; cvt_float32_to_uint32_sat_seq %xmm0, %eax, %r8, %xmm4
|
; cvt_float32_to_uint32_sat_seq %xmm0, %eax, %r9, %xmm4, %xmm5
|
||||||
; movq %rbp, %rsp
|
; movq %rbp, %rsp
|
||||||
; popq %rbp
|
; popq %rbp
|
||||||
; ret
|
; ret
|
||||||
@@ -279,7 +279,7 @@ block0(v0: f32):
|
|||||||
; pushq %rbp
|
; pushq %rbp
|
||||||
; movq %rsp, %rbp
|
; movq %rsp, %rbp
|
||||||
; block0:
|
; block0:
|
||||||
; cvt_float32_to_uint64_sat_seq %xmm0, %rax, %r8, %xmm4
|
; cvt_float32_to_uint64_sat_seq %xmm0, %rax, %r9, %xmm4, %xmm5
|
||||||
; movq %rbp, %rsp
|
; movq %rbp, %rsp
|
||||||
; popq %rbp
|
; popq %rbp
|
||||||
; ret
|
; ret
|
||||||
@@ -293,7 +293,7 @@ block0(v0: f64):
|
|||||||
; pushq %rbp
|
; pushq %rbp
|
||||||
; movq %rsp, %rbp
|
; movq %rsp, %rbp
|
||||||
; block0:
|
; block0:
|
||||||
; cvt_float64_to_uint32_sat_seq %xmm0, %eax, %r8, %xmm4
|
; cvt_float64_to_uint32_sat_seq %xmm0, %eax, %r9, %xmm4, %xmm5
|
||||||
; movq %rbp, %rsp
|
; movq %rbp, %rsp
|
||||||
; popq %rbp
|
; popq %rbp
|
||||||
; ret
|
; ret
|
||||||
@@ -307,7 +307,7 @@ block0(v0: f64):
|
|||||||
; pushq %rbp
|
; pushq %rbp
|
||||||
; movq %rsp, %rbp
|
; movq %rsp, %rbp
|
||||||
; block0:
|
; block0:
|
||||||
; cvt_float64_to_uint64_sat_seq %xmm0, %rax, %r8, %xmm4
|
; cvt_float64_to_uint64_sat_seq %xmm0, %rax, %r9, %xmm4, %xmm5
|
||||||
; movq %rbp, %rsp
|
; movq %rbp, %rsp
|
||||||
; popq %rbp
|
; popq %rbp
|
||||||
; ret
|
; ret
|
||||||
|
|||||||
16
tests/misc_testsuite/issue4840.wast
Normal file
16
tests/misc_testsuite/issue4840.wast
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
(module
|
||||||
|
(func (export "f") (param f32 i32) (result f64)
|
||||||
|
local.get 1
|
||||||
|
f64.convert_i32_u
|
||||||
|
i32.trunc_f64_u
|
||||||
|
f64.convert_i32_s
|
||||||
|
local.get 1
|
||||||
|
f64.convert_i32_u
|
||||||
|
global.set 0
|
||||||
|
drop
|
||||||
|
global.get 0
|
||||||
|
)
|
||||||
|
(global (;0;) (mut f64) f64.const 0)
|
||||||
|
)
|
||||||
|
|
||||||
|
(assert_return (invoke "f" (f32.const 1.23) (i32.const -2147483648)) (f64.const 2147483648))
|
||||||
Reference in New Issue
Block a user