x64 backend: fix a load-op merging bug with integer min/max. (#4068)
The recent work in #4061 introduced a notion of "unique uses" for CLIF values that both simplified the load-op merging rules and allowed loads to merge in some more places. Unfortunately there's one factor that PR didn't account for: a unique use at the CLIF level could become a multiple-use at the VCode level, when a lowering uses a value multiple times! Making this less error-prone in general is hard, because we don't know the lowering in VCode until it's emitted, so we can't ahead-of-time know that a value will be used multiple times and prevent its merging. But we *can* know in the lowerings themselves when we're doing this. At least we get a panic from regalloc when we get this wrong; no bad code (uninitialized register being read) should ever come from a backend bug like this. This is still a bit less than ideal, but for now the fix is: in `cmp_and_choose` in the x64 backend (which compares values, then picks one or the other with a cmove), explicitly put values in registers. Fixes #4067 (thanks @Mrmaxmeier for the report!).
This commit is contained in:
@@ -1367,9 +1367,15 @@
|
|||||||
|
|
||||||
(decl cmp_and_choose (Type CC Value Value) ValueRegs)
|
(decl cmp_and_choose (Type CC Value Value) ValueRegs)
|
||||||
(rule (cmp_and_choose (fits_in_64 ty) cc x y)
|
(rule (cmp_and_choose (fits_in_64 ty) cc x y)
|
||||||
(let ((size OperandSize (raw_operand_size_of_type ty)))
|
(let ((size OperandSize (raw_operand_size_of_type ty))
|
||||||
(with_flags_reg (x64_cmp size x y)
|
;; We need to put x and y in registers explicitly because
|
||||||
(cmove ty cc y x))))
|
;; we use the values more than once. Hence, even if these
|
||||||
|
;; are "unique uses" at the CLIF level and would otherwise
|
||||||
|
;; allow for load-op merging, here we cannot do that.
|
||||||
|
(x_reg Reg x)
|
||||||
|
(y_reg Reg y))
|
||||||
|
(with_flags_reg (x64_cmp size x_reg y_reg)
|
||||||
|
(cmove ty cc y_reg x_reg))))
|
||||||
|
|
||||||
(rule (lower (has_type (fits_in_64 ty) (umin x y)))
|
(rule (lower (has_type (fits_in_64 ty) (umin x y)))
|
||||||
(cmp_and_choose ty (CC.B) x y))
|
(cmp_and_choose ty (CC.B) x y))
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
src/clif.isle 443b34b797fc8ace
|
src/clif.isle 443b34b797fc8ace
|
||||||
src/prelude.isle afd037c4d91c875c
|
src/prelude.isle afd037c4d91c875c
|
||||||
src/isa/x64/inst.isle cad03431447aca1b
|
src/isa/x64/inst.isle cad03431447aca1b
|
||||||
src/isa/x64/lower.isle 803aac716f6f4c39
|
src/isa/x64/lower.isle a7181571835ddd1e
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
21
cranelift/filetests/filetests/isa/x64/umax-bug.clif
Normal file
21
cranelift/filetests/filetests/isa/x64/umax-bug.clif
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
test compile precise-output
|
||||||
|
target x86_64
|
||||||
|
|
||||||
|
function u0:0(i32, i64) -> i32 fast {
|
||||||
|
block0(v1: i32, v2: i64):
|
||||||
|
v3 = load.i32 notrap aligned v2
|
||||||
|
v4 = umax v1, v3
|
||||||
|
return v4
|
||||||
|
}
|
||||||
|
|
||||||
|
; pushq %rbp
|
||||||
|
; movq %rsp, %rbp
|
||||||
|
; block0:
|
||||||
|
; movl 0(%rsi), %r9d
|
||||||
|
; cmpl %edi, %r9d
|
||||||
|
; cmovnbl %r9d, %edi, %edi
|
||||||
|
; movq %rdi, %rax
|
||||||
|
; movq %rbp, %rsp
|
||||||
|
; popq %rbp
|
||||||
|
; ret
|
||||||
|
|
||||||
Reference in New Issue
Block a user