From e7bced95125055b41e449d414e55ccf2f05c09cf Mon Sep 17 00:00:00 2001 From: Benjamin Bouvier Date: Wed, 14 Apr 2021 18:08:52 +0200 Subject: [PATCH] cranelift: always spill i32 with i64 stores; Fixes #2839. See also the issue description and comments in this commits for details of what the fix is about here. --- cranelift/codegen/src/machinst/abi_impl.rs | 13 ++ .../isa/x64/store-stack-full-width-i32.clif | 127 ++++++++++++++++++ 2 files changed, 140 insertions(+) create mode 100644 cranelift/filetests/filetests/isa/x64/store-stack-full-width-i32.clif diff --git a/cranelift/codegen/src/machinst/abi_impl.rs b/cranelift/codegen/src/machinst/abi_impl.rs index 85a6d6831b..5636b83f82 100644 --- a/cranelift/codegen/src/machinst/abi_impl.rs +++ b/cranelift/codegen/src/machinst/abi_impl.rs @@ -1213,6 +1213,19 @@ impl ABICallee for ABICalleeImpl { let spill_off = islot * M::word_bytes() as i64; let sp_off = self.stackslots_size as i64 + spill_off; trace!("store_spillslot: slot {:?} -> sp_off {}", slot, sp_off); + + // When reloading from a spill slot, we might have lost information about real integer + // types. For instance, on the x64 backend, a zero-extension can become spurious and + // optimized into a move, causing vregs of types I32 and I64 to share the same coalescing + // equivalency class. As a matter of fact, such a value can be spilled as an I32 and later + // reloaded as an I64; to make sure the high bits are always defined, do a word-sized store + // all the time, in this case. + let ty = if ty.is_int() && ty.bytes() < M::word_bytes() { + M::word_type() + } else { + ty + }; + gen_store_stack_multi::(StackAMode::NominalSPOffset(sp_off, ty), from_regs, ty) } diff --git a/cranelift/filetests/filetests/isa/x64/store-stack-full-width-i32.clif b/cranelift/filetests/filetests/isa/x64/store-stack-full-width-i32.clif new file mode 100644 index 0000000000..10b3102c17 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x64/store-stack-full-width-i32.clif @@ -0,0 +1,127 @@ +test compile +target x86_64 machinst + +;; The goal of this test is to ensure that stack spills of an integer value, +;; which width is less than the machine word's size, cause the full word to be +;; stored, and not only the lower bits. + +;; Because of unsigned extensions which can be transformed into simple moves, +;; the source vreg of the extension operation can be coalesced with its +;; destination vreg, and if it happens to be spill, then the reload may use a +;; reload of a different, larger size. + +function %f0(i32, i32, i32) -> i64 { + fn0 = %g(i32) -> i64 + +; check: pushq %rbp +; nextln: movq %rsp, %rbp +; nextln: subq $$64, %rsp + +;; Stash all the callee-saved registers. + +; nextln: movq %r12, 16(%rsp) +; nextln: movq %r13, 24(%rsp) +; nextln: movq %r14, 32(%rsp) +; nextln: movq %rbx, 40(%rsp) +; nextln: movq %r15, 48(%rsp) + +block0(v0: i32, v1: i32, v2: i32): + ;; First, create enough virtual registers so that the call instructions + ;; causes at least one of them to be spilled onto the stack. + + v3 = iadd.i32 v0, v1 + v4 = iadd.i32 v1, v2 + v5 = iadd.i32 v0, v2 + v6 = iadd.i32 v3, v0 + v7 = iadd.i32 v4, v0 + v8 = iadd.i32 v5, v0 + +; nextln: movq %rdi, %r12 +; nextln: addl %esi, %r12d +; nextln: movq %rsi, %r13 +; nextln: addl %edx, %r13d +; nextln: movq %rdi, %r14 +; nextln: addl %edx, %r14d +; nextln: movq %r12, %rbx +; nextln: addl %edi, %ebx +; nextln: movq %r13, %r15 +; nextln: addl %edi, %r15d +; nextln: movq %r14, %rsi + +;; This should be movq below, not movl. +; nextln: movq %rsi, rsp(0 + virtual offset) + +; nextln: movslq rsp(0 + virtual offset), %rsi +; nextln: addl %edi, %esi + + ;; Put an effectful instruction so that the live-ranges of the adds and + ;; uextends are split here, and to prevent the uextend to be emitted + ;; before the call. This will effectively causing the above i32 to be + ;; spilled as an i32, and not a full i64. + + v300 = call fn0(v0) + +;; This should be movq below, not movl. +; nextln: movq %rsi, rsp(0 + virtual offset) + +; nextln: load_ext_name %g+0, %rsi +; nextln: call *%rsi + + v31 = uextend.i64 v3 + v41 = uextend.i64 v4 + v51 = uextend.i64 v5 + v61 = uextend.i64 v6 + v71 = uextend.i64 v7 + v81 = uextend.i64 v8 + + ;; None of the uextends are generated here yet. + + ;; At this point, I'd expect that this second call below would be not + ;; necessary, but if it is removed, the uextend is applied before the call, + ;; and the i64 is spilled (then reloaded), causing the bug to not appear. So + ;; an additional call it is! + + v100 = call fn0(v3) + +; nextln: movq %r12, %rsi +; nextln: movq %rsi, rsp(8 + virtual offset) +; nextln: nop len=0 +; nextln: movq %r12, %rdi +; nextln: load_ext_name %g+0, %rsi +; nextln: call *%rsi + + ;; Cause reloads of all the values. Most are in registers, but one of them + ;; is on the stack. Make sure they're all used in the final computation. + + v101 = iadd.i64 v100, v31 + v102 = iadd.i64 v101, v41 + v103 = iadd.i64 v102, v51 + v104 = iadd.i64 v103, v61 + v105 = iadd.i64 v104, v71 + v200 = iadd.i64 v105, v81 + +; nextln: movq %rax, %rsi +; nextln: movq rsp(8 + virtual offset), %rdi +; nextln: addq %rdi, %rsi +; nextln: addq %r13, %rsi +; nextln: addq %r14, %rsi +; nextln: addq %rbx, %rsi +; nextln: addq %r15, %rsi + +;; The reload operates on a full word, so uses movq. +; nextln: movq rsp(0 + virtual offset), %rdi + +; nextln: addq %rdi, %rsi +; nextln: movq %rsi, %rax +; nextln: movq 16(%rsp), %r12 +; nextln: movq 24(%rsp), %r13 +; nextln: movq 32(%rsp), %r14 +; nextln: movq 40(%rsp), %rbx +; nextln: movq 48(%rsp), %r15 +; nextln: addq $$64, %rsp + + return v200 +; nextln: movq %rbp, %rsp +; nextln: popq %rbp +; nextln: ret +}