x64 backend: fix a load-op merging bug with integer min/max. (#4068)

The recent work in #4061 introduced a notion of "unique uses" for CLIF values that both simplified the load-op merging rules and allowed loads to merge in some more places. Unfortunately there's one factor that PR didn't account for: a unique use at the CLIF level could become a multiple-use at the VCode level, when a lowering uses a value multiple times! Making this less error-prone in general is hard, because we don't know the lowering in VCode until it's emitted, so we can't ahead-of-time know that a value will be used multiple times and prevent its merging. But we *can* know in the lowerings themselves when we're doing this. At least we get a panic from regalloc when we get this wrong; no bad code (uninitialized register being read) should ever come from a backend bug like this. This is still a bit less than ideal, but for now the fix is: in `cmp_and_choose` in the x64 backend (which compares values, then picks one or the other with a cmove), explicitly put values in registers. Fixes #4067 (thanks @Mrmaxmeier for the report!).
2022-04-25 10:32:09 -07:00
parent e4b7c8a737
commit f384938a10
4 changed files with 233 additions and 204 deletions
--- a/cranelift/codegen/src/isa/x64/lower.isle
+++ b/cranelift/codegen/src/isa/x64/lower.isle
@@ -1367,9 +1367,15 @@
 (decl cmp_and_choose (Type CC Value Value) ValueRegs)
 (rule (cmp_and_choose (fits_in_64 ty) cc x y)
-      (let ((size OperandSize (raw_operand_size_of_type ty)))
+      (let ((size OperandSize (raw_operand_size_of_type ty))
-        (with_flags_reg (x64_cmp size x y)
+            ;; We need to put x and y in registers explicitly because
-                        (cmove ty cc y x))))
+            ;; we use the values more than once. Hence, even if these
            ;; are "unique uses" at the CLIF level and would otherwise
            ;; allow for load-op merging, here we cannot do that.
            (x_reg Reg x)
            (y_reg Reg y))
        (with_flags_reg (x64_cmp size x_reg y_reg)
                        (cmove ty cc y_reg x_reg))))
 (rule (lower (has_type (fits_in_64 ty) (umin x y)))
      (cmp_and_choose ty (CC.B) x y))
--- a/cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest
+++ b/cranelift/codegen/src/isa/x64/lower/isle/generated_code.manifest
@@ -1,4 +1,4 @@
 src/clif.isle 443b34b797fc8ace
 src/prelude.isle afd037c4d91c875c
 src/isa/x64/inst.isle cad03431447aca1b
-src/isa/x64/lower.isle 803aac716f6f4c39
+src/isa/x64/lower.isle a7181571835ddd1e
--- a/cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs
+++ b/cranelift/codegen/src/isa/x64/lower/isle/generated_code.rs
--- a/cranelift/filetests/filetests/isa/x64/umax-bug.clif
+++ b/cranelift/filetests/filetests/isa/x64/umax-bug.clif
@@ -0,0 +1,21 @@
 test compile precise-output
 target x86_64
 function u0:0(i32, i64) -> i32 fast { 
 block0(v1: i32, v2: i64): 
    v3 = load.i32 notrap aligned v2
    v4 = umax v1, v3
    return v4
 }
 ;   pushq   %rbp
 ;   movq    %rsp, %rbp
 ; block0:
 ;   movl    0(%rsi), %r9d
 ;   cmpl    %edi, %r9d
 ;   cmovnbl %r9d, %edi, %edi
 ;   movq    %rdi, %rax
 ;   movq    %rbp, %rsp
 ;   popq    %rbp
 ;   ret