diff --git a/cranelift/codegen/src/isa/x64/inst/regs.rs b/cranelift/codegen/src/isa/x64/inst/regs.rs
index 04bc1f09bf..1d9e30bd3a 100644
--- a/cranelift/codegen/src/isa/x64/inst/regs.rs
+++ b/cranelift/codegen/src/isa/x64/inst/regs.rs
@@ -1,14 +1,20 @@
 //! Registers, the Universe thereof, and printing.
 //!
-//! These are ordered by sequence number, as required in the Universe.  The strange ordering is
-//! intended to make callee-save registers available before caller-saved ones.  This is a net win
-//! provided that each function makes at least one onward call.  It'll be a net loss for leaf
-//! functions, and we should change the ordering in that case, so as to make caller-save regs
-//! available first.
+//! These are ordered by sequence number, as required in the Universe.
 //!
-//! TODO Maybe have two different universes, one for leaf functions and one for non-leaf functions?
-//! Also, they will have to be ABI dependent.  Need to find a way to avoid constructing a universe
-//! for each function we compile.
+//! The caller-saved registers are placed first in order to prefer not to clobber (requiring
+//! saves/restores in prologue/epilogue code) when possible. Note that there is no other heuristic
+//! in the backend that will apply such pressure; the register allocator's cost heuristics are not
+//! aware of the cost of clobber-save/restore code.
+//!
+//! One might worry that this pessimizes code with many callsites, where using caller-saves causes
+//! us to have to save them (as we are the caller) frequently. However, the register allocator
+//! *should be* aware of *this* cost, because it sees that the call instruction modifies all of the
+//! caller-saved (i.e., callee-clobbered) registers.
+//!
+//! Hence, this ordering encodes pressure in one direction (prefer not to clobber registers that we
+//! ourselves have to save) and this is balanaced against the RA's pressure in the other direction
+//! at callsites.
 
 use crate::settings;
 use alloc::vec::Vec;
@@ -31,44 +37,44 @@ fn gpr(enc: u8, index: u8) -> Reg {
     Reg::new_real(RegClass::I64, enc, index)
 }
 
-pub(crate) fn r12() -> Reg {
-    gpr(ENC_R12, 16)
-}
-pub(crate) fn r13() -> Reg {
-    gpr(ENC_R13, 17)
-}
-pub(crate) fn r14() -> Reg {
-    gpr(ENC_R14, 18)
-}
-pub(crate) fn rbx() -> Reg {
-    gpr(ENC_RBX, 19)
-}
 pub(crate) fn rsi() -> Reg {
-    gpr(6, 20)
+    gpr(6, 16)
 }
 pub(crate) fn rdi() -> Reg {
-    gpr(7, 21)
+    gpr(7, 17)
 }
 pub(crate) fn rax() -> Reg {
-    gpr(0, 22)
+    gpr(0, 18)
 }
 pub(crate) fn rcx() -> Reg {
-    gpr(1, 23)
+    gpr(1, 19)
 }
 pub(crate) fn rdx() -> Reg {
-    gpr(2, 24)
+    gpr(2, 20)
 }
 pub(crate) fn r8() -> Reg {
-    gpr(8, 25)
+    gpr(8, 21)
 }
 pub(crate) fn r9() -> Reg {
-    gpr(9, 26)
+    gpr(9, 22)
 }
 pub(crate) fn r10() -> Reg {
-    gpr(10, 27)
+    gpr(10, 23)
 }
 pub(crate) fn r11() -> Reg {
-    gpr(11, 28)
+    gpr(11, 24)
+}
+pub(crate) fn r12() -> Reg {
+    gpr(ENC_R12, 25)
+}
+pub(crate) fn r13() -> Reg {
+    gpr(ENC_R13, 26)
+}
+pub(crate) fn r14() -> Reg {
+    gpr(ENC_R14, 27)
+}
+pub(crate) fn rbx() -> Reg {
+    gpr(ENC_RBX, 28)
 }
 
 pub(crate) fn r15() -> Reg {
@@ -176,13 +182,6 @@ pub(crate) fn create_reg_universe_systemv(flags: &settings::Flags) -> RealRegUni
     // Integer regs.
     let first_gpr = regs.len();
 
-    // Callee-saved, in the SystemV x86_64 ABI.
-    regs.push((r12().to_real_reg(), "%r12".into()));
-    regs.push((r13().to_real_reg(), "%r13".into()));
-    regs.push((r14().to_real_reg(), "%r14".into()));
-
-    regs.push((rbx().to_real_reg(), "%rbx".into()));
-
     // Caller-saved, in the SystemV x86_64 ABI.
     regs.push((rsi().to_real_reg(), "%rsi".into()));
     regs.push((rdi().to_real_reg(), "%rdi".into()));
@@ -194,6 +193,13 @@ pub(crate) fn create_reg_universe_systemv(flags: &settings::Flags) -> RealRegUni
     regs.push((r10().to_real_reg(), "%r10".into()));
     regs.push((r11().to_real_reg(), "%r11".into()));
 
+    // Callee-saved, in the SystemV x86_64 ABI.
+    regs.push((r12().to_real_reg(), "%r12".into()));
+    regs.push((r13().to_real_reg(), "%r13".into()));
+    regs.push((r14().to_real_reg(), "%r14".into()));
+
+    regs.push((rbx().to_real_reg(), "%rbx".into()));
+
     // Other regs, not available to the allocator.
     debug_assert_eq!(r15(), pinned_reg());
     let allocable = if use_pinned_reg {
diff --git a/cranelift/filetests/filetests/isa/x64/amode-opt.clif b/cranelift/filetests/filetests/isa/x64/amode-opt.clif
index dbeed5475e..bfe0198753 100644
--- a/cranelift/filetests/filetests/isa/x64/amode-opt.clif
+++ b/cranelift/filetests/filetests/isa/x64/amode-opt.clif
@@ -7,7 +7,7 @@ block0(v0: i64, v1: i64):
     v2 = iadd v0, v1
     v3 = load.i64 v2
     return v3
-    ; check: movq    0(%rdi,%rsi,1), %r12
+    ; check: movq    0(%rdi,%rsi,1), %rsi
 }
 
 function %amode_add_imm(i64) -> i64 {
@@ -16,7 +16,7 @@ block0(v0: i64):
     v2 = iadd v0, v1
     v3 = load.i64 v2
     return v3
-    ; check: movq    42(%rdi), %r12
+    ; check: movq    42(%rdi), %rsi
 }
 
 ;; Same as above, but add operands have been reversed.
@@ -26,7 +26,7 @@ block0(v0: i64):
     v2 = iadd v1, v0
     v3 = load.i64 v2
     return v3
-    ; check: movq    42(%rdi), %r12
+    ; check: movq    42(%rdi), %rsi
 }
 
 ;; Make sure that uextend(cst) are ignored when the cst will naturally sign-extend.
@@ -37,5 +37,5 @@ block0(v0: i64):
     v3 = iadd v2, v0
     v4 = load.i64 v3
     return v4
-    ; check: movq    42(%rdi), %r12
+    ; check: movq    42(%rdi), %rsi
 }
diff --git a/cranelift/filetests/filetests/isa/x64/heap.clif b/cranelift/filetests/filetests/isa/x64/heap.clif
index d9efb083c6..c547582008 100644
--- a/cranelift/filetests/filetests/isa/x64/heap.clif
+++ b/cranelift/filetests/filetests/isa/x64/heap.clif
@@ -11,11 +11,11 @@ function %f(i32, i64 vmctx) -> i64 {
 block0(v0: i32, v1: i64):
 
     v2 = heap_addr.i64 heap0, v0, 0x8000
-    ; check:  movl    8(%rsi), %r12d
-    ; nextln: movq    %rdi, %r13
-    ; nextln: addl    $$32768, %r13d
+    ; check:  movl    8(%rsi), %ecx
+    ; nextln: movq    %rdi, %rax
+    ; nextln: addl    $$32768, %eax
     ; nextln: jnb ; ud2 heap_oob ;
-    ; nextln: cmpl    %r12d, %r13d
+    ; nextln: cmpl    %ecx, %eax
     ; nextln: jbe     label1; j label2
     ; check:  Block 1:
  
diff --git a/cranelift/filetests/filetests/isa/x64/load-op.clif b/cranelift/filetests/filetests/isa/x64/load-op.clif
index 77e4b05420..6570a798c3 100644
--- a/cranelift/filetests/filetests/isa/x64/load-op.clif
+++ b/cranelift/filetests/filetests/isa/x64/load-op.clif
@@ -6,7 +6,7 @@ function %add_from_mem_u32_1(i64, i32) -> i32 {
 block0(v0: i64, v1: i32):
   v2 = load.i32 v0
   v3 = iadd.i32 v2, v1
-  ; check: addl    0(%rdi), %r12d
+  ; check: addl    0(%rdi), %esi
   return v3
 }
 
@@ -14,7 +14,7 @@ function %add_from_mem_u32_2(i64, i32) -> i32 {
 block0(v0: i64, v1: i32):
   v2 = load.i32 v0
   v3 = iadd.i32 v1, v2
-  ; check: addl    0(%rdi), %r12d
+  ; check: addl    0(%rdi), %esi
   return v3
 }
 
@@ -22,7 +22,7 @@ function %add_from_mem_u64_1(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
   v2 = load.i64 v0
   v3 = iadd.i64 v2, v1
-  ; check: addq    0(%rdi), %r12
+  ; check: addq    0(%rdi), %rsi
   return v3
 }
 
@@ -30,7 +30,7 @@ function %add_from_mem_u64_2(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
   v2 = load.i64 v0
   v3 = iadd.i64 v1, v2
-  ; check: addq    0(%rdi), %r12
+  ; check: addq    0(%rdi), %rsi
   return v3
 }
 
@@ -40,8 +40,8 @@ function %add_from_mem_not_narrow(i64, i8) -> i8 {
 block0(v0: i64, v1: i8):
   v2 = load.i8 v0
   v3 = iadd.i8 v2, v1
-  ; check: movzbq  0(%rdi), %r12
-  ; nextln: addl    %esi, %r12d
+  ; check: movzbq  0(%rdi), %rdi
+  ; nextln: addl    %esi, %edi
   return v3
 }
 
@@ -52,10 +52,10 @@ block0(v0: i64, v1: i64):
   store.i64 v3, v1
   v4 = load.i64 v3
   return v4
-  ; check:  movq    0(%rdi), %r12
-  ; nextln: movq    %r12, %r13
-  ; nextln: addq    %rdi, %r13
-  ; nextln: movq    %r13, 0(%rsi)
-  ; nextln: movq    0(%r12,%rdi,1), %r12
-  ; nextln: movq    %r12, %rax
+  ; check:  movq    0(%rdi), %rax
+  ; nextln: movq    %rax, %rcx
+  ; nextln: addq    %rdi, %rcx
+  ; nextln: movq    %rcx, 0(%rsi)
+  ; nextln: movq    0(%rax,%rdi,1), %rsi
+  ; nextln: movq    %rsi, %rax
 }
diff --git a/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif b/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif
index fe52e3c503..65e1b5df7e 100644
--- a/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif
+++ b/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif
@@ -28,9 +28,9 @@ block0(v0: i32):
 }
 ; check:  movd    %edi, %xmm1
 ; nextln: psllw   %xmm1, %xmm0
-; nextln: lea     const(VCodeConstant(0)), %r12
+; nextln: lea     const(VCodeConstant(0)), %rsi
 ; nextln: shlq    $$4, %rdi
-; nextln: movdqu  0(%r12,%rdi,1), %xmm1
+; nextln: movdqu  0(%rsi,%rdi,1), %xmm1
 ; nextln: pand    %xmm1, %xmm0
 
 function %ushr_i8x16_imm() -> i8x16 {
@@ -81,12 +81,12 @@ block0(v0: i64x2, v1: i32):
     v2 = sshr v0, v1
     return v2
 }
-; check:  pextrd.w $$0, %xmm0, %r12
-; nextln: pextrd.w $$1, %xmm0, %r13
+; check:  pextrd.w $$0, %xmm0, %rsi
+; nextln: pextrd.w $$1, %xmm0, %rax
 ; nextln: movq    %rdi, %rcx
-; nextln: sarq    %cl, %r12
+; nextln: sarq    %cl, %rsi
 ; nextln: movq    %rdi, %rcx
-; nextln: sarq    %cl, %r13
-; nextln: pinsrd.w $$0, %r12, %xmm1
-; nextln: pinsrd.w $$1, %r13, %xmm1
+; nextln: sarq    %cl, %rax
+; nextln: pinsrd.w $$0, %rsi, %xmm1
+; nextln: pinsrd.w $$1, %rax, %xmm1
 ; nextln: movdqa  %xmm1, %xmm0
diff --git a/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif b/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif
index c9188a9514..f44dbd3b62 100644
--- a/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif
+++ b/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif
@@ -70,8 +70,8 @@ block0:
     return v1
 }
 ; check:  uninit  %xmm0
-; nextln: pinsrw  $$0, %r12, %xmm0
-; nextln: pinsrw  $$1, %r12, %xmm0
+; nextln: pinsrw  $$0, %rsi, %xmm0
+; nextln: pinsrw  $$1, %rsi, %xmm0
 ; nextln: pshufd  $$0, %xmm0, %xmm0
 
 function %splat_i32(i32) -> i32x4 {
diff --git a/cranelift/filetests/filetests/isa/x64/simd-logical-compile.clif b/cranelift/filetests/filetests/isa/x64/simd-logical-compile.clif
index 0e366db441..d03aa0b204 100644
--- a/cranelift/filetests/filetests/isa/x64/simd-logical-compile.clif
+++ b/cranelift/filetests/filetests/isa/x64/simd-logical-compile.clif
@@ -17,7 +17,7 @@ block0(v0: b32x4):
     return v1
 }
 ; check:  ptest   %xmm0, %xmm0
-; nextln: setnz   %r12b
+; nextln: setnz   %sil
 
 function %vall_true_i64x2(i64x2) -> b1 {
 block0(v0: i64x2):
@@ -27,4 +27,4 @@ block0(v0: i64x2):
 ; check:  pxor    %xmm1, %xmm1
 ; nextln: pcmpeqq %xmm0, %xmm1
 ; nextln: ptest   %xmm1, %xmm1
-; nextln: setz    %r12b
+; nextln: setz    %sil