diff --git a/cranelift/codegen/src/isa/x64/inst/regs.rs b/cranelift/codegen/src/isa/x64/inst/regs.rs index 04bc1f09bf..1d9e30bd3a 100644 --- a/cranelift/codegen/src/isa/x64/inst/regs.rs +++ b/cranelift/codegen/src/isa/x64/inst/regs.rs @@ -1,14 +1,20 @@ //! Registers, the Universe thereof, and printing. //! -//! These are ordered by sequence number, as required in the Universe. The strange ordering is -//! intended to make callee-save registers available before caller-saved ones. This is a net win -//! provided that each function makes at least one onward call. It'll be a net loss for leaf -//! functions, and we should change the ordering in that case, so as to make caller-save regs -//! available first. +//! These are ordered by sequence number, as required in the Universe. //! -//! TODO Maybe have two different universes, one for leaf functions and one for non-leaf functions? -//! Also, they will have to be ABI dependent. Need to find a way to avoid constructing a universe -//! for each function we compile. +//! The caller-saved registers are placed first in order to prefer not to clobber (requiring +//! saves/restores in prologue/epilogue code) when possible. Note that there is no other heuristic +//! in the backend that will apply such pressure; the register allocator's cost heuristics are not +//! aware of the cost of clobber-save/restore code. +//! +//! One might worry that this pessimizes code with many callsites, where using caller-saves causes +//! us to have to save them (as we are the caller) frequently. However, the register allocator +//! *should be* aware of *this* cost, because it sees that the call instruction modifies all of the +//! caller-saved (i.e., callee-clobbered) registers. +//! +//! Hence, this ordering encodes pressure in one direction (prefer not to clobber registers that we +//! ourselves have to save) and this is balanaced against the RA's pressure in the other direction +//! at callsites. use crate::settings; use alloc::vec::Vec; @@ -31,44 +37,44 @@ fn gpr(enc: u8, index: u8) -> Reg { Reg::new_real(RegClass::I64, enc, index) } -pub(crate) fn r12() -> Reg { - gpr(ENC_R12, 16) -} -pub(crate) fn r13() -> Reg { - gpr(ENC_R13, 17) -} -pub(crate) fn r14() -> Reg { - gpr(ENC_R14, 18) -} -pub(crate) fn rbx() -> Reg { - gpr(ENC_RBX, 19) -} pub(crate) fn rsi() -> Reg { - gpr(6, 20) + gpr(6, 16) } pub(crate) fn rdi() -> Reg { - gpr(7, 21) + gpr(7, 17) } pub(crate) fn rax() -> Reg { - gpr(0, 22) + gpr(0, 18) } pub(crate) fn rcx() -> Reg { - gpr(1, 23) + gpr(1, 19) } pub(crate) fn rdx() -> Reg { - gpr(2, 24) + gpr(2, 20) } pub(crate) fn r8() -> Reg { - gpr(8, 25) + gpr(8, 21) } pub(crate) fn r9() -> Reg { - gpr(9, 26) + gpr(9, 22) } pub(crate) fn r10() -> Reg { - gpr(10, 27) + gpr(10, 23) } pub(crate) fn r11() -> Reg { - gpr(11, 28) + gpr(11, 24) +} +pub(crate) fn r12() -> Reg { + gpr(ENC_R12, 25) +} +pub(crate) fn r13() -> Reg { + gpr(ENC_R13, 26) +} +pub(crate) fn r14() -> Reg { + gpr(ENC_R14, 27) +} +pub(crate) fn rbx() -> Reg { + gpr(ENC_RBX, 28) } pub(crate) fn r15() -> Reg { @@ -176,13 +182,6 @@ pub(crate) fn create_reg_universe_systemv(flags: &settings::Flags) -> RealRegUni // Integer regs. let first_gpr = regs.len(); - // Callee-saved, in the SystemV x86_64 ABI. - regs.push((r12().to_real_reg(), "%r12".into())); - regs.push((r13().to_real_reg(), "%r13".into())); - regs.push((r14().to_real_reg(), "%r14".into())); - - regs.push((rbx().to_real_reg(), "%rbx".into())); - // Caller-saved, in the SystemV x86_64 ABI. regs.push((rsi().to_real_reg(), "%rsi".into())); regs.push((rdi().to_real_reg(), "%rdi".into())); @@ -194,6 +193,13 @@ pub(crate) fn create_reg_universe_systemv(flags: &settings::Flags) -> RealRegUni regs.push((r10().to_real_reg(), "%r10".into())); regs.push((r11().to_real_reg(), "%r11".into())); + // Callee-saved, in the SystemV x86_64 ABI. + regs.push((r12().to_real_reg(), "%r12".into())); + regs.push((r13().to_real_reg(), "%r13".into())); + regs.push((r14().to_real_reg(), "%r14".into())); + + regs.push((rbx().to_real_reg(), "%rbx".into())); + // Other regs, not available to the allocator. debug_assert_eq!(r15(), pinned_reg()); let allocable = if use_pinned_reg { diff --git a/cranelift/filetests/filetests/isa/x64/amode-opt.clif b/cranelift/filetests/filetests/isa/x64/amode-opt.clif index dbeed5475e..bfe0198753 100644 --- a/cranelift/filetests/filetests/isa/x64/amode-opt.clif +++ b/cranelift/filetests/filetests/isa/x64/amode-opt.clif @@ -7,7 +7,7 @@ block0(v0: i64, v1: i64): v2 = iadd v0, v1 v3 = load.i64 v2 return v3 - ; check: movq 0(%rdi,%rsi,1), %r12 + ; check: movq 0(%rdi,%rsi,1), %rsi } function %amode_add_imm(i64) -> i64 { @@ -16,7 +16,7 @@ block0(v0: i64): v2 = iadd v0, v1 v3 = load.i64 v2 return v3 - ; check: movq 42(%rdi), %r12 + ; check: movq 42(%rdi), %rsi } ;; Same as above, but add operands have been reversed. @@ -26,7 +26,7 @@ block0(v0: i64): v2 = iadd v1, v0 v3 = load.i64 v2 return v3 - ; check: movq 42(%rdi), %r12 + ; check: movq 42(%rdi), %rsi } ;; Make sure that uextend(cst) are ignored when the cst will naturally sign-extend. @@ -37,5 +37,5 @@ block0(v0: i64): v3 = iadd v2, v0 v4 = load.i64 v3 return v4 - ; check: movq 42(%rdi), %r12 + ; check: movq 42(%rdi), %rsi } diff --git a/cranelift/filetests/filetests/isa/x64/heap.clif b/cranelift/filetests/filetests/isa/x64/heap.clif index d9efb083c6..c547582008 100644 --- a/cranelift/filetests/filetests/isa/x64/heap.clif +++ b/cranelift/filetests/filetests/isa/x64/heap.clif @@ -11,11 +11,11 @@ function %f(i32, i64 vmctx) -> i64 { block0(v0: i32, v1: i64): v2 = heap_addr.i64 heap0, v0, 0x8000 - ; check: movl 8(%rsi), %r12d - ; nextln: movq %rdi, %r13 - ; nextln: addl $$32768, %r13d + ; check: movl 8(%rsi), %ecx + ; nextln: movq %rdi, %rax + ; nextln: addl $$32768, %eax ; nextln: jnb ; ud2 heap_oob ; - ; nextln: cmpl %r12d, %r13d + ; nextln: cmpl %ecx, %eax ; nextln: jbe label1; j label2 ; check: Block 1: diff --git a/cranelift/filetests/filetests/isa/x64/load-op.clif b/cranelift/filetests/filetests/isa/x64/load-op.clif index 77e4b05420..6570a798c3 100644 --- a/cranelift/filetests/filetests/isa/x64/load-op.clif +++ b/cranelift/filetests/filetests/isa/x64/load-op.clif @@ -6,7 +6,7 @@ function %add_from_mem_u32_1(i64, i32) -> i32 { block0(v0: i64, v1: i32): v2 = load.i32 v0 v3 = iadd.i32 v2, v1 - ; check: addl 0(%rdi), %r12d + ; check: addl 0(%rdi), %esi return v3 } @@ -14,7 +14,7 @@ function %add_from_mem_u32_2(i64, i32) -> i32 { block0(v0: i64, v1: i32): v2 = load.i32 v0 v3 = iadd.i32 v1, v2 - ; check: addl 0(%rdi), %r12d + ; check: addl 0(%rdi), %esi return v3 } @@ -22,7 +22,7 @@ function %add_from_mem_u64_1(i64, i64) -> i64 { block0(v0: i64, v1: i64): v2 = load.i64 v0 v3 = iadd.i64 v2, v1 - ; check: addq 0(%rdi), %r12 + ; check: addq 0(%rdi), %rsi return v3 } @@ -30,7 +30,7 @@ function %add_from_mem_u64_2(i64, i64) -> i64 { block0(v0: i64, v1: i64): v2 = load.i64 v0 v3 = iadd.i64 v1, v2 - ; check: addq 0(%rdi), %r12 + ; check: addq 0(%rdi), %rsi return v3 } @@ -40,8 +40,8 @@ function %add_from_mem_not_narrow(i64, i8) -> i8 { block0(v0: i64, v1: i8): v2 = load.i8 v0 v3 = iadd.i8 v2, v1 - ; check: movzbq 0(%rdi), %r12 - ; nextln: addl %esi, %r12d + ; check: movzbq 0(%rdi), %rdi + ; nextln: addl %esi, %edi return v3 } @@ -52,10 +52,10 @@ block0(v0: i64, v1: i64): store.i64 v3, v1 v4 = load.i64 v3 return v4 - ; check: movq 0(%rdi), %r12 - ; nextln: movq %r12, %r13 - ; nextln: addq %rdi, %r13 - ; nextln: movq %r13, 0(%rsi) - ; nextln: movq 0(%r12,%rdi,1), %r12 - ; nextln: movq %r12, %rax + ; check: movq 0(%rdi), %rax + ; nextln: movq %rax, %rcx + ; nextln: addq %rdi, %rcx + ; nextln: movq %rcx, 0(%rsi) + ; nextln: movq 0(%rax,%rdi,1), %rsi + ; nextln: movq %rsi, %rax } diff --git a/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif b/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif index fe52e3c503..65e1b5df7e 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif @@ -28,9 +28,9 @@ block0(v0: i32): } ; check: movd %edi, %xmm1 ; nextln: psllw %xmm1, %xmm0 -; nextln: lea const(VCodeConstant(0)), %r12 +; nextln: lea const(VCodeConstant(0)), %rsi ; nextln: shlq $$4, %rdi -; nextln: movdqu 0(%r12,%rdi,1), %xmm1 +; nextln: movdqu 0(%rsi,%rdi,1), %xmm1 ; nextln: pand %xmm1, %xmm0 function %ushr_i8x16_imm() -> i8x16 { @@ -81,12 +81,12 @@ block0(v0: i64x2, v1: i32): v2 = sshr v0, v1 return v2 } -; check: pextrd.w $$0, %xmm0, %r12 -; nextln: pextrd.w $$1, %xmm0, %r13 +; check: pextrd.w $$0, %xmm0, %rsi +; nextln: pextrd.w $$1, %xmm0, %rax ; nextln: movq %rdi, %rcx -; nextln: sarq %cl, %r12 +; nextln: sarq %cl, %rsi ; nextln: movq %rdi, %rcx -; nextln: sarq %cl, %r13 -; nextln: pinsrd.w $$0, %r12, %xmm1 -; nextln: pinsrd.w $$1, %r13, %xmm1 +; nextln: sarq %cl, %rax +; nextln: pinsrd.w $$0, %rsi, %xmm1 +; nextln: pinsrd.w $$1, %rax, %xmm1 ; nextln: movdqa %xmm1, %xmm0 diff --git a/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif b/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif index c9188a9514..f44dbd3b62 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif @@ -70,8 +70,8 @@ block0: return v1 } ; check: uninit %xmm0 -; nextln: pinsrw $$0, %r12, %xmm0 -; nextln: pinsrw $$1, %r12, %xmm0 +; nextln: pinsrw $$0, %rsi, %xmm0 +; nextln: pinsrw $$1, %rsi, %xmm0 ; nextln: pshufd $$0, %xmm0, %xmm0 function %splat_i32(i32) -> i32x4 { diff --git a/cranelift/filetests/filetests/isa/x64/simd-logical-compile.clif b/cranelift/filetests/filetests/isa/x64/simd-logical-compile.clif index 0e366db441..d03aa0b204 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-logical-compile.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-logical-compile.clif @@ -17,7 +17,7 @@ block0(v0: b32x4): return v1 } ; check: ptest %xmm0, %xmm0 -; nextln: setnz %r12b +; nextln: setnz %sil function %vall_true_i64x2(i64x2) -> b1 { block0(v0: i64x2): @@ -27,4 +27,4 @@ block0(v0: i64x2): ; check: pxor %xmm1, %xmm1 ; nextln: pcmpeqq %xmm0, %xmm1 ; nextln: ptest %xmm1, %xmm1 -; nextln: setz %r12b +; nextln: setz %sil