diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index 58e32fc206..4f6e4ba6d8 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -2934,8 +2934,9 @@ (rule (cmp_zero_int_bool_ref val @ (value_type $B1)) (x64_test (OperandSize.Size8) (RegMemImm.Imm 1) val)) (rule (cmp_zero_int_bool_ref val @ (value_type ty)) - (let ((size OperandSize (raw_operand_size_of_type ty))) - (x64_test size val val))) + (let ((size OperandSize (raw_operand_size_of_type ty)) + (src Gpr val)) + (x64_test size src src))) ;; Rules for `bricmp` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -3036,8 +3037,10 @@ ;; -> Convert(Ah) // Convert .. with no loss of significant digits from previous shift ;; -> Ah = Ah + Ah // Double Ah to account for shift right before the conversion. ;; -> dst = Ah + Al // Add the two floats together -(rule (lower (has_type $F32X4 (fcvt_from_uint a))) - (let (;; get the low 16 bits +(rule (lower (has_type $F32X4 (fcvt_from_uint val))) + (let ((a Xmm val) + + ;; get the low 16 bits (a_lo Xmm (x64_pslld a (RegMemImm.Imm 16))) (a_lo Xmm (x64_psrld a_lo (RegMemImm.Imm 16))) @@ -3072,9 +3075,11 @@ ;; The x64 backend currently only supports these two type combinations. (rule (lower (has_type $I32X4 (fcvt_to_sint_sat val @ (value_type $F32X4)))) - (let (;; Sets tmp to zero if float is NaN - (tmp Xmm (x64_cmpps val val (FcmpImm.Equal))) - (dst Xmm (x64_andps val tmp)) + (let ((src Xmm val) + + ;; Sets tmp to zero if float is NaN + (tmp Xmm (x64_cmpps src src (FcmpImm.Equal))) + (dst Xmm (x64_andps src tmp)) ;; Sets top bit of tmp if float is positive ;; Setting up to set top bit on negative float values @@ -3141,10 +3146,12 @@ ;; | Step 6 | Step 7 | ;; | (0-(INT_MAX+1))..(UINT_MAX-(INT_MAX+1))(w/overflow) | ((INT_MAX+1)-(INT_MAX+1))..(INT_MAX+1) | (rule (lower (has_type $I32X4 (fcvt_to_uint_sat val @ (value_type $F32X4)))) - (let (;; Converting to unsigned int so if float src is negative or NaN + (let ((src Xmm val) + + ;; Converting to unsigned int so if float src is negative or NaN ;; will first set to zero. - (tmp2 Xmm (x64_pxor val val)) ;; make a zero - (dst Xmm (x64_maxps val tmp2)) + (tmp2 Xmm (x64_pxor src src)) ;; make a zero + (dst Xmm (x64_maxps src tmp2)) ;; Set tmp2 to INT_MAX+1. It is important to note here that after it looks ;; like we are only converting INT_MAX (0x7FFFFFFF) but in fact because @@ -3233,10 +3240,12 @@ ;; Rules for `swiden_high` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type $I16X8 (swiden_high val @ (value_type $I8X16)))) - (x64_pmovsxbw (x64_palignr val val 8 (OperandSize.Size32)))) + (let ((x Xmm val)) + (x64_pmovsxbw (x64_palignr x x 8 (OperandSize.Size32))))) (rule (lower (has_type $I32X4 (swiden_high val @ (value_type $I16X8)))) - (x64_pmovsxwd (x64_palignr val val 8 (OperandSize.Size32)))) + (let ((x Xmm val)) + (x64_pmovsxwd (x64_palignr x x 8 (OperandSize.Size32))))) (rule (lower (has_type $I64X2 (swiden_high val @ (value_type $I32X4)))) (x64_pmovsxdq (x64_pshufd val 0xEE (OperandSize.Size32)))) @@ -3255,10 +3264,12 @@ ;; Rules for `uwiden_high` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (rule (lower (has_type $I16X8 (uwiden_high val @ (value_type $I8X16)))) - (x64_pmovzxbw (x64_palignr val val 8 (OperandSize.Size32)))) + (let ((x Xmm val)) + (x64_pmovzxbw (x64_palignr x x 8 (OperandSize.Size32))))) (rule (lower (has_type $I32X4 (uwiden_high val @ (value_type $I16X8)))) - (x64_pmovzxwd (x64_palignr val val 8 (OperandSize.Size32)))) + (let ((x Xmm val)) + (x64_pmovzxwd (x64_palignr x x 8 (OperandSize.Size32))))) (rule (lower (has_type $I64X2 (uwiden_high val @ (value_type $I32X4)))) (x64_pmovzxdq (x64_pshufd val 0xEE (OperandSize.Size32)))) @@ -3277,9 +3288,11 @@ ;; This rule is a special case for handling the translation of the wasm op ;; `i32x4.trunc_sat_f64x2_s_zero`. It can be removed once we have an ;; implementation of `snarrow` for `I64X2`. -(rule (lower (has_type $I32X4 (snarrow (has_type $I64X2 (fcvt_to_sint_sat a)) +(rule (lower (has_type $I32X4 (snarrow (has_type $I64X2 (fcvt_to_sint_sat val)) (vconst (u128_from_constant 0))))) - (let (;; y = i32x4.trunc_sat_f64x2_s_zero(x) is lowered to: + (let ((a Xmm val) + + ;; y = i32x4.trunc_sat_f64x2_s_zero(x) is lowered to: ;; MOVE xmm_tmp, xmm_x ;; CMPEQPD xmm_tmp, xmm_x ;; MOVE xmm_y, xmm_x diff --git a/cranelift/codegen/src/machinst/lower.rs b/cranelift/codegen/src/machinst/lower.rs index 7ec52eea4a..c8b0492ec7 100644 --- a/cranelift/codegen/src/machinst/lower.rs +++ b/cranelift/codegen/src/machinst/lower.rs @@ -1266,6 +1266,10 @@ impl<'func, I: VCodeInst> Lower<'func, I> { let ty = self.f.dfg.value_type(val); assert!(ty != IFLAGS && ty != FFLAGS); + if let Some(inst) = self.f.dfg.value_def(val).inst() { + assert!(!self.inst_sunk.contains(&inst)); + } + // If the value is a constant, then (re)materialize it at each use. This // lowers register pressure. if let Some(c) = self @@ -1347,6 +1351,10 @@ impl<'func, I: VCodeInst> Lower<'func, I> { assert!(has_lowering_side_effect(self.f, ir_inst)); assert!(self.cur_scan_entry_color.is_some()); + for result in self.dfg().inst_results(ir_inst) { + assert!(self.value_lowered_uses[*result] == 0); + } + let sunk_inst_entry_color = self .side_effect_inst_entry_colors .get(&ir_inst) diff --git a/cranelift/filetests/filetests/isa/x64/widen-high-bug.clif b/cranelift/filetests/filetests/isa/x64/widen-high-bug.clif new file mode 100644 index 0000000000..a2c9898dca --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/widen-high-bug.clif @@ -0,0 +1,20 @@ +test compile precise-output +target x86_64 + +function u0:0(i64 vmctx, i8x16) -> i16x8 fast { +block0(v0: i64, v2: i8x16): + v5 = load.i8x16 notrap aligned table v0+80 + v6 = uwiden_high v5 + return v6 +} + +; pushq %rbp +; movq %rsp, %rbp +; block0: +; movdqu 80(%rdi), %xmm5 +; palignr $8, %xmm5, %xmm5, %xmm5 +; pmovzxbw %xmm5, %xmm0 +; movq %rbp, %rsp +; popq %rbp +; ret +