x64: Fix load sinking bugs in new lowerings (#4740)
Fixes #4736 Fix lowerings that were using values as both a Reg and a RegMem, making it look like a load could be sunk while its value in a register was still being used. Also add an assert that checks that loads that are sunk are never used.
This commit is contained in:
@@ -2934,8 +2934,9 @@
|
|||||||
(rule (cmp_zero_int_bool_ref val @ (value_type $B1))
|
(rule (cmp_zero_int_bool_ref val @ (value_type $B1))
|
||||||
(x64_test (OperandSize.Size8) (RegMemImm.Imm 1) val))
|
(x64_test (OperandSize.Size8) (RegMemImm.Imm 1) val))
|
||||||
(rule (cmp_zero_int_bool_ref val @ (value_type ty))
|
(rule (cmp_zero_int_bool_ref val @ (value_type ty))
|
||||||
(let ((size OperandSize (raw_operand_size_of_type ty)))
|
(let ((size OperandSize (raw_operand_size_of_type ty))
|
||||||
(x64_test size val val)))
|
(src Gpr val))
|
||||||
|
(x64_test size src src)))
|
||||||
|
|
||||||
;; Rules for `bricmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;; Rules for `bricmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
@@ -3036,8 +3037,10 @@
|
|||||||
;; -> Convert(Ah) // Convert .. with no loss of significant digits from previous shift
|
;; -> Convert(Ah) // Convert .. with no loss of significant digits from previous shift
|
||||||
;; -> Ah = Ah + Ah // Double Ah to account for shift right before the conversion.
|
;; -> Ah = Ah + Ah // Double Ah to account for shift right before the conversion.
|
||||||
;; -> dst = Ah + Al // Add the two floats together
|
;; -> dst = Ah + Al // Add the two floats together
|
||||||
(rule (lower (has_type $F32X4 (fcvt_from_uint a)))
|
(rule (lower (has_type $F32X4 (fcvt_from_uint val)))
|
||||||
(let (;; get the low 16 bits
|
(let ((a Xmm val)
|
||||||
|
|
||||||
|
;; get the low 16 bits
|
||||||
(a_lo Xmm (x64_pslld a (RegMemImm.Imm 16)))
|
(a_lo Xmm (x64_pslld a (RegMemImm.Imm 16)))
|
||||||
(a_lo Xmm (x64_psrld a_lo (RegMemImm.Imm 16)))
|
(a_lo Xmm (x64_psrld a_lo (RegMemImm.Imm 16)))
|
||||||
|
|
||||||
@@ -3072,9 +3075,11 @@
|
|||||||
|
|
||||||
;; The x64 backend currently only supports these two type combinations.
|
;; The x64 backend currently only supports these two type combinations.
|
||||||
(rule (lower (has_type $I32X4 (fcvt_to_sint_sat val @ (value_type $F32X4))))
|
(rule (lower (has_type $I32X4 (fcvt_to_sint_sat val @ (value_type $F32X4))))
|
||||||
(let (;; Sets tmp to zero if float is NaN
|
(let ((src Xmm val)
|
||||||
(tmp Xmm (x64_cmpps val val (FcmpImm.Equal)))
|
|
||||||
(dst Xmm (x64_andps val tmp))
|
;; Sets tmp to zero if float is NaN
|
||||||
|
(tmp Xmm (x64_cmpps src src (FcmpImm.Equal)))
|
||||||
|
(dst Xmm (x64_andps src tmp))
|
||||||
|
|
||||||
;; Sets top bit of tmp if float is positive
|
;; Sets top bit of tmp if float is positive
|
||||||
;; Setting up to set top bit on negative float values
|
;; Setting up to set top bit on negative float values
|
||||||
@@ -3141,10 +3146,12 @@
|
|||||||
;; | Step 6 | Step 7 |
|
;; | Step 6 | Step 7 |
|
||||||
;; | (0-(INT_MAX+1))..(UINT_MAX-(INT_MAX+1))(w/overflow) | ((INT_MAX+1)-(INT_MAX+1))..(INT_MAX+1) |
|
;; | (0-(INT_MAX+1))..(UINT_MAX-(INT_MAX+1))(w/overflow) | ((INT_MAX+1)-(INT_MAX+1))..(INT_MAX+1) |
|
||||||
(rule (lower (has_type $I32X4 (fcvt_to_uint_sat val @ (value_type $F32X4))))
|
(rule (lower (has_type $I32X4 (fcvt_to_uint_sat val @ (value_type $F32X4))))
|
||||||
(let (;; Converting to unsigned int so if float src is negative or NaN
|
(let ((src Xmm val)
|
||||||
|
|
||||||
|
;; Converting to unsigned int so if float src is negative or NaN
|
||||||
;; will first set to zero.
|
;; will first set to zero.
|
||||||
(tmp2 Xmm (x64_pxor val val)) ;; make a zero
|
(tmp2 Xmm (x64_pxor src src)) ;; make a zero
|
||||||
(dst Xmm (x64_maxps val tmp2))
|
(dst Xmm (x64_maxps src tmp2))
|
||||||
|
|
||||||
;; Set tmp2 to INT_MAX+1. It is important to note here that after it looks
|
;; Set tmp2 to INT_MAX+1. It is important to note here that after it looks
|
||||||
;; like we are only converting INT_MAX (0x7FFFFFFF) but in fact because
|
;; like we are only converting INT_MAX (0x7FFFFFFF) but in fact because
|
||||||
@@ -3233,10 +3240,12 @@
|
|||||||
;; Rules for `swiden_high` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;; Rules for `swiden_high` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
(rule (lower (has_type $I16X8 (swiden_high val @ (value_type $I8X16))))
|
(rule (lower (has_type $I16X8 (swiden_high val @ (value_type $I8X16))))
|
||||||
(x64_pmovsxbw (x64_palignr val val 8 (OperandSize.Size32))))
|
(let ((x Xmm val))
|
||||||
|
(x64_pmovsxbw (x64_palignr x x 8 (OperandSize.Size32)))))
|
||||||
|
|
||||||
(rule (lower (has_type $I32X4 (swiden_high val @ (value_type $I16X8))))
|
(rule (lower (has_type $I32X4 (swiden_high val @ (value_type $I16X8))))
|
||||||
(x64_pmovsxwd (x64_palignr val val 8 (OperandSize.Size32))))
|
(let ((x Xmm val))
|
||||||
|
(x64_pmovsxwd (x64_palignr x x 8 (OperandSize.Size32)))))
|
||||||
|
|
||||||
(rule (lower (has_type $I64X2 (swiden_high val @ (value_type $I32X4))))
|
(rule (lower (has_type $I64X2 (swiden_high val @ (value_type $I32X4))))
|
||||||
(x64_pmovsxdq (x64_pshufd val 0xEE (OperandSize.Size32))))
|
(x64_pmovsxdq (x64_pshufd val 0xEE (OperandSize.Size32))))
|
||||||
@@ -3255,10 +3264,12 @@
|
|||||||
;; Rules for `uwiden_high` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;; Rules for `uwiden_high` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
(rule (lower (has_type $I16X8 (uwiden_high val @ (value_type $I8X16))))
|
(rule (lower (has_type $I16X8 (uwiden_high val @ (value_type $I8X16))))
|
||||||
(x64_pmovzxbw (x64_palignr val val 8 (OperandSize.Size32))))
|
(let ((x Xmm val))
|
||||||
|
(x64_pmovzxbw (x64_palignr x x 8 (OperandSize.Size32)))))
|
||||||
|
|
||||||
(rule (lower (has_type $I32X4 (uwiden_high val @ (value_type $I16X8))))
|
(rule (lower (has_type $I32X4 (uwiden_high val @ (value_type $I16X8))))
|
||||||
(x64_pmovzxwd (x64_palignr val val 8 (OperandSize.Size32))))
|
(let ((x Xmm val))
|
||||||
|
(x64_pmovzxwd (x64_palignr x x 8 (OperandSize.Size32)))))
|
||||||
|
|
||||||
(rule (lower (has_type $I64X2 (uwiden_high val @ (value_type $I32X4))))
|
(rule (lower (has_type $I64X2 (uwiden_high val @ (value_type $I32X4))))
|
||||||
(x64_pmovzxdq (x64_pshufd val 0xEE (OperandSize.Size32))))
|
(x64_pmovzxdq (x64_pshufd val 0xEE (OperandSize.Size32))))
|
||||||
@@ -3277,9 +3288,11 @@
|
|||||||
;; This rule is a special case for handling the translation of the wasm op
|
;; This rule is a special case for handling the translation of the wasm op
|
||||||
;; `i32x4.trunc_sat_f64x2_s_zero`. It can be removed once we have an
|
;; `i32x4.trunc_sat_f64x2_s_zero`. It can be removed once we have an
|
||||||
;; implementation of `snarrow` for `I64X2`.
|
;; implementation of `snarrow` for `I64X2`.
|
||||||
(rule (lower (has_type $I32X4 (snarrow (has_type $I64X2 (fcvt_to_sint_sat a))
|
(rule (lower (has_type $I32X4 (snarrow (has_type $I64X2 (fcvt_to_sint_sat val))
|
||||||
(vconst (u128_from_constant 0)))))
|
(vconst (u128_from_constant 0)))))
|
||||||
(let (;; y = i32x4.trunc_sat_f64x2_s_zero(x) is lowered to:
|
(let ((a Xmm val)
|
||||||
|
|
||||||
|
;; y = i32x4.trunc_sat_f64x2_s_zero(x) is lowered to:
|
||||||
;; MOVE xmm_tmp, xmm_x
|
;; MOVE xmm_tmp, xmm_x
|
||||||
;; CMPEQPD xmm_tmp, xmm_x
|
;; CMPEQPD xmm_tmp, xmm_x
|
||||||
;; MOVE xmm_y, xmm_x
|
;; MOVE xmm_y, xmm_x
|
||||||
|
|||||||
@@ -1266,6 +1266,10 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
|
|||||||
let ty = self.f.dfg.value_type(val);
|
let ty = self.f.dfg.value_type(val);
|
||||||
assert!(ty != IFLAGS && ty != FFLAGS);
|
assert!(ty != IFLAGS && ty != FFLAGS);
|
||||||
|
|
||||||
|
if let Some(inst) = self.f.dfg.value_def(val).inst() {
|
||||||
|
assert!(!self.inst_sunk.contains(&inst));
|
||||||
|
}
|
||||||
|
|
||||||
// If the value is a constant, then (re)materialize it at each use. This
|
// If the value is a constant, then (re)materialize it at each use. This
|
||||||
// lowers register pressure.
|
// lowers register pressure.
|
||||||
if let Some(c) = self
|
if let Some(c) = self
|
||||||
@@ -1347,6 +1351,10 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
|
|||||||
assert!(has_lowering_side_effect(self.f, ir_inst));
|
assert!(has_lowering_side_effect(self.f, ir_inst));
|
||||||
assert!(self.cur_scan_entry_color.is_some());
|
assert!(self.cur_scan_entry_color.is_some());
|
||||||
|
|
||||||
|
for result in self.dfg().inst_results(ir_inst) {
|
||||||
|
assert!(self.value_lowered_uses[*result] == 0);
|
||||||
|
}
|
||||||
|
|
||||||
let sunk_inst_entry_color = self
|
let sunk_inst_entry_color = self
|
||||||
.side_effect_inst_entry_colors
|
.side_effect_inst_entry_colors
|
||||||
.get(&ir_inst)
|
.get(&ir_inst)
|
||||||
|
|||||||
20
cranelift/filetests/filetests/isa/x64/widen-high-bug.clif
Normal file
20
cranelift/filetests/filetests/isa/x64/widen-high-bug.clif
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
test compile precise-output
|
||||||
|
target x86_64
|
||||||
|
|
||||||
|
function u0:0(i64 vmctx, i8x16) -> i16x8 fast {
|
||||||
|
block0(v0: i64, v2: i8x16):
|
||||||
|
v5 = load.i8x16 notrap aligned table v0+80
|
||||||
|
v6 = uwiden_high v5
|
||||||
|
return v6
|
||||||
|
}
|
||||||
|
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block0:
|
||||||
|
; movdqu 80(%rdi), %xmm5
|
||||||
|
; palignr $8, %xmm5, %xmm5, %xmm5
|
||||||
|
; pmovzxbw %xmm5, %xmm0
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; ret
|
||||||
|
|
||||||
Reference in New Issue
Block a user