x64: Fix load sinking bugs in new lowerings (#4740)

Fixes #4736

Fix lowerings that were using values as both a Reg and a RegMem, making it look like a load could be sunk while its value in a register was still being used. Also add an assert that checks that loads that are sunk are never used.
This commit is contained in:
Trevor Elliott
2022-08-19 14:21:06 -07:00
committed by GitHub
parent fd98814b96
commit 754cf7156a
3 changed files with 57 additions and 16 deletions

View File

@@ -2934,8 +2934,9 @@
(rule (cmp_zero_int_bool_ref val @ (value_type $B1)) (rule (cmp_zero_int_bool_ref val @ (value_type $B1))
(x64_test (OperandSize.Size8) (RegMemImm.Imm 1) val)) (x64_test (OperandSize.Size8) (RegMemImm.Imm 1) val))
(rule (cmp_zero_int_bool_ref val @ (value_type ty)) (rule (cmp_zero_int_bool_ref val @ (value_type ty))
(let ((size OperandSize (raw_operand_size_of_type ty))) (let ((size OperandSize (raw_operand_size_of_type ty))
(x64_test size val val))) (src Gpr val))
(x64_test size src src)))
;; Rules for `bricmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Rules for `bricmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -3036,8 +3037,10 @@
;; -> Convert(Ah) // Convert .. with no loss of significant digits from previous shift ;; -> Convert(Ah) // Convert .. with no loss of significant digits from previous shift
;; -> Ah = Ah + Ah // Double Ah to account for shift right before the conversion. ;; -> Ah = Ah + Ah // Double Ah to account for shift right before the conversion.
;; -> dst = Ah + Al // Add the two floats together ;; -> dst = Ah + Al // Add the two floats together
(rule (lower (has_type $F32X4 (fcvt_from_uint a))) (rule (lower (has_type $F32X4 (fcvt_from_uint val)))
(let (;; get the low 16 bits (let ((a Xmm val)
;; get the low 16 bits
(a_lo Xmm (x64_pslld a (RegMemImm.Imm 16))) (a_lo Xmm (x64_pslld a (RegMemImm.Imm 16)))
(a_lo Xmm (x64_psrld a_lo (RegMemImm.Imm 16))) (a_lo Xmm (x64_psrld a_lo (RegMemImm.Imm 16)))
@@ -3072,9 +3075,11 @@
;; The x64 backend currently only supports these two type combinations. ;; The x64 backend currently only supports these two type combinations.
(rule (lower (has_type $I32X4 (fcvt_to_sint_sat val @ (value_type $F32X4)))) (rule (lower (has_type $I32X4 (fcvt_to_sint_sat val @ (value_type $F32X4))))
(let (;; Sets tmp to zero if float is NaN (let ((src Xmm val)
(tmp Xmm (x64_cmpps val val (FcmpImm.Equal)))
(dst Xmm (x64_andps val tmp)) ;; Sets tmp to zero if float is NaN
(tmp Xmm (x64_cmpps src src (FcmpImm.Equal)))
(dst Xmm (x64_andps src tmp))
;; Sets top bit of tmp if float is positive ;; Sets top bit of tmp if float is positive
;; Setting up to set top bit on negative float values ;; Setting up to set top bit on negative float values
@@ -3141,10 +3146,12 @@
;; | Step 6 | Step 7 | ;; | Step 6 | Step 7 |
;; | (0-(INT_MAX+1))..(UINT_MAX-(INT_MAX+1))(w/overflow) | ((INT_MAX+1)-(INT_MAX+1))..(INT_MAX+1) | ;; | (0-(INT_MAX+1))..(UINT_MAX-(INT_MAX+1))(w/overflow) | ((INT_MAX+1)-(INT_MAX+1))..(INT_MAX+1) |
(rule (lower (has_type $I32X4 (fcvt_to_uint_sat val @ (value_type $F32X4)))) (rule (lower (has_type $I32X4 (fcvt_to_uint_sat val @ (value_type $F32X4))))
(let (;; Converting to unsigned int so if float src is negative or NaN (let ((src Xmm val)
;; Converting to unsigned int so if float src is negative or NaN
;; will first set to zero. ;; will first set to zero.
(tmp2 Xmm (x64_pxor val val)) ;; make a zero (tmp2 Xmm (x64_pxor src src)) ;; make a zero
(dst Xmm (x64_maxps val tmp2)) (dst Xmm (x64_maxps src tmp2))
;; Set tmp2 to INT_MAX+1. It is important to note here that after it looks ;; Set tmp2 to INT_MAX+1. It is important to note here that after it looks
;; like we are only converting INT_MAX (0x7FFFFFFF) but in fact because ;; like we are only converting INT_MAX (0x7FFFFFFF) but in fact because
@@ -3233,10 +3240,12 @@
;; Rules for `swiden_high` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Rules for `swiden_high` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $I16X8 (swiden_high val @ (value_type $I8X16)))) (rule (lower (has_type $I16X8 (swiden_high val @ (value_type $I8X16))))
(x64_pmovsxbw (x64_palignr val val 8 (OperandSize.Size32)))) (let ((x Xmm val))
(x64_pmovsxbw (x64_palignr x x 8 (OperandSize.Size32)))))
(rule (lower (has_type $I32X4 (swiden_high val @ (value_type $I16X8)))) (rule (lower (has_type $I32X4 (swiden_high val @ (value_type $I16X8))))
(x64_pmovsxwd (x64_palignr val val 8 (OperandSize.Size32)))) (let ((x Xmm val))
(x64_pmovsxwd (x64_palignr x x 8 (OperandSize.Size32)))))
(rule (lower (has_type $I64X2 (swiden_high val @ (value_type $I32X4)))) (rule (lower (has_type $I64X2 (swiden_high val @ (value_type $I32X4))))
(x64_pmovsxdq (x64_pshufd val 0xEE (OperandSize.Size32)))) (x64_pmovsxdq (x64_pshufd val 0xEE (OperandSize.Size32))))
@@ -3255,10 +3264,12 @@
;; Rules for `uwiden_high` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Rules for `uwiden_high` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $I16X8 (uwiden_high val @ (value_type $I8X16)))) (rule (lower (has_type $I16X8 (uwiden_high val @ (value_type $I8X16))))
(x64_pmovzxbw (x64_palignr val val 8 (OperandSize.Size32)))) (let ((x Xmm val))
(x64_pmovzxbw (x64_palignr x x 8 (OperandSize.Size32)))))
(rule (lower (has_type $I32X4 (uwiden_high val @ (value_type $I16X8)))) (rule (lower (has_type $I32X4 (uwiden_high val @ (value_type $I16X8))))
(x64_pmovzxwd (x64_palignr val val 8 (OperandSize.Size32)))) (let ((x Xmm val))
(x64_pmovzxwd (x64_palignr x x 8 (OperandSize.Size32)))))
(rule (lower (has_type $I64X2 (uwiden_high val @ (value_type $I32X4)))) (rule (lower (has_type $I64X2 (uwiden_high val @ (value_type $I32X4))))
(x64_pmovzxdq (x64_pshufd val 0xEE (OperandSize.Size32)))) (x64_pmovzxdq (x64_pshufd val 0xEE (OperandSize.Size32))))
@@ -3277,9 +3288,11 @@
;; This rule is a special case for handling the translation of the wasm op ;; This rule is a special case for handling the translation of the wasm op
;; `i32x4.trunc_sat_f64x2_s_zero`. It can be removed once we have an ;; `i32x4.trunc_sat_f64x2_s_zero`. It can be removed once we have an
;; implementation of `snarrow` for `I64X2`. ;; implementation of `snarrow` for `I64X2`.
(rule (lower (has_type $I32X4 (snarrow (has_type $I64X2 (fcvt_to_sint_sat a)) (rule (lower (has_type $I32X4 (snarrow (has_type $I64X2 (fcvt_to_sint_sat val))
(vconst (u128_from_constant 0))))) (vconst (u128_from_constant 0)))))
(let (;; y = i32x4.trunc_sat_f64x2_s_zero(x) is lowered to: (let ((a Xmm val)
;; y = i32x4.trunc_sat_f64x2_s_zero(x) is lowered to:
;; MOVE xmm_tmp, xmm_x ;; MOVE xmm_tmp, xmm_x
;; CMPEQPD xmm_tmp, xmm_x ;; CMPEQPD xmm_tmp, xmm_x
;; MOVE xmm_y, xmm_x ;; MOVE xmm_y, xmm_x

View File

@@ -1266,6 +1266,10 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
let ty = self.f.dfg.value_type(val); let ty = self.f.dfg.value_type(val);
assert!(ty != IFLAGS && ty != FFLAGS); assert!(ty != IFLAGS && ty != FFLAGS);
if let Some(inst) = self.f.dfg.value_def(val).inst() {
assert!(!self.inst_sunk.contains(&inst));
}
// If the value is a constant, then (re)materialize it at each use. This // If the value is a constant, then (re)materialize it at each use. This
// lowers register pressure. // lowers register pressure.
if let Some(c) = self if let Some(c) = self
@@ -1347,6 +1351,10 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
assert!(has_lowering_side_effect(self.f, ir_inst)); assert!(has_lowering_side_effect(self.f, ir_inst));
assert!(self.cur_scan_entry_color.is_some()); assert!(self.cur_scan_entry_color.is_some());
for result in self.dfg().inst_results(ir_inst) {
assert!(self.value_lowered_uses[*result] == 0);
}
let sunk_inst_entry_color = self let sunk_inst_entry_color = self
.side_effect_inst_entry_colors .side_effect_inst_entry_colors
.get(&ir_inst) .get(&ir_inst)

View File

@@ -0,0 +1,20 @@
test compile precise-output
target x86_64
function u0:0(i64 vmctx, i8x16) -> i16x8 fast {
block0(v0: i64, v2: i8x16):
v5 = load.i8x16 notrap aligned table v0+80
v6 = uwiden_high v5
return v6
}
; pushq %rbp
; movq %rsp, %rbp
; block0:
; movdqu 80(%rdi), %xmm5
; palignr $8, %xmm5, %xmm5, %xmm5
; pmovzxbw %xmm5, %xmm0
; movq %rbp, %rsp
; popq %rbp
; ret