From b830c3cf9318d57cffb9cf5bca8b9997332ff574 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Mon, 23 May 2022 12:51:04 -0700 Subject: [PATCH] Pull in regalloc2 v0.2.0, with no more separate scratch registers. (#4182) RA2 recently removed the need for a dedicated scratch register for cyclic moves (bytecodealliance/regalloc2#51). This has moderate positive performance impact on function bodies that were register-constrained, as it means that one more register is available. In Sightglass, I measured +5-8% on `blake3-scalar`, at least among current benchmarks. --- Cargo.lock | 4 +- cranelift/codegen/Cargo.toml | 2 +- .../codegen/src/isa/aarch64/inst/regs.rs | 17 +- cranelift/codegen/src/isa/s390x/inst/regs.rs | 5 +- cranelift/codegen/src/isa/x64/inst/regs.rs | 5 +- cranelift/codegen/src/isa/x64/mod.rs | 12 +- .../filetests/isa/aarch64/prologue.clif | 64 +-- .../isa/s390x/atomic_cas-little.clif | 16 +- .../filetests/isa/s390x/atomic_cas.clif | 8 +- .../filetests/isa/s390x/multivalue-ret.clif | 13 +- .../filetests/isa/x64/call-conv.clif | 98 ++-- .../filetests/isa/x64/cmp-mem-bug.clif | 10 +- .../filetests/filetests/isa/x64/fastcall.clif | 135 +++-- .../filetests/filetests/isa/x64/heap.clif | 14 +- .../filetests/filetests/isa/x64/i128.clif | 527 +++++++++--------- .../filetests/isa/x64/immediates.clif | 8 +- .../filetests/filetests/isa/x64/load-op.clif | 10 +- .../filetests/filetests/isa/x64/popcnt.clif | 108 ++-- .../isa/x64/simd-bitwise-compile.clif | 16 +- .../filetests/isa/x64/struct-arg.clif | 8 +- .../filetests/filetests/isa/x64/table.clif | 12 +- 21 files changed, 529 insertions(+), 563 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 926ee2c623..e23d2f39a2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2351,9 +2351,9 @@ dependencies = [ [[package]] name = "regalloc2" -version = "0.1.3" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "904196c12c9f55d3aea578613219f493ced8e05b3d0c6a42d11cb4142d8b4879" +checksum = "99e5ed49768d554b34463e467c27af849a7335c051d3ac402c4755f8366c6e0b" dependencies = [ "fxhash", "log", diff --git a/cranelift/codegen/Cargo.toml b/cranelift/codegen/Cargo.toml index 8e6b2d08b9..0185bad514 100644 --- a/cranelift/codegen/Cargo.toml +++ b/cranelift/codegen/Cargo.toml @@ -23,7 +23,7 @@ serde = { version = "1.0.94", features = ["derive"], optional = true } bincode = { version = "1.2.1", optional = true } gimli = { version = "0.26.0", default-features = false, features = ["write"], optional = true } smallvec = { version = "1.6.1" } -regalloc2 = { version = "0.1.3", features = ["checker"] } +regalloc2 = { version = "0.2.0", features = ["checker"] } souper-ir = { version = "2.1.0", optional = true } # It is a goal of the cranelift-codegen crate to have minimal external dependencies. # Please don't add any unless they are essential to the task of creating binary diff --git a/cranelift/codegen/src/isa/aarch64/inst/regs.rs b/cranelift/codegen/src/isa/aarch64/inst/regs.rs index ad74d662b4..9e486e332a 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/regs.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/regs.rs @@ -209,24 +209,9 @@ pub fn create_reg_env(flags: &settings::Flags) -> MachineEnv { preg(vreg(28)), preg(vreg(29)), preg(vreg(30)), - // v31 is the scratch reg, to allow for parallel moves. + preg(vreg(31)), ], ], - scratch_by_class: [ - // We use tmp2 (x17) as the regalloc scratch register, - // used to resolve cyclic parallel moves. This is valid - // because tmp2 is never live between regalloc-visible - // instructions, only within them (i.e. in expansion into - // multiple machine instructions when that - // occurs). spilltmp is used for moves to/from spillslots, - // but tmp2 never is, so it is available for this - // purpose. (Its only other use is in prologue stack - // checks, and the prologue is prepended after regalloc - // runs.) - preg(tmp2_reg()), - // We use v31 for Float/Vec-class parallel moves. - preg(vreg(31)), - ], fixed_stack_slots: vec![], }; diff --git a/cranelift/codegen/src/isa/s390x/inst/regs.rs b/cranelift/codegen/src/isa/s390x/inst/regs.rs index 7b661bf0e3..2782a3d1ff 100644 --- a/cranelift/codegen/src/isa/s390x/inst/regs.rs +++ b/cranelift/codegen/src/isa/s390x/inst/regs.rs @@ -102,7 +102,7 @@ pub fn create_machine_env(_flags: &settings::Flags) -> MachineEnv { preg(gpr(10)), preg(gpr(11)), preg(gpr(12)), - // no r13; it is our scratch reg. + preg(gpr(13)), preg(gpr(14)), // no r15; it is the stack pointer. ], @@ -114,10 +114,9 @@ pub fn create_machine_env(_flags: &settings::Flags) -> MachineEnv { preg(fpr(12)), preg(fpr(13)), preg(fpr(14)), - // no f15; it is our scratch reg. + preg(fpr(15)), ], ], - scratch_by_class: [preg(gpr(13)), preg(fpr(15))], fixed_stack_slots: vec![], } } diff --git a/cranelift/codegen/src/isa/x64/inst/regs.rs b/cranelift/codegen/src/isa/x64/inst/regs.rs index d720951f57..7dee9a9a17 100644 --- a/cranelift/codegen/src/isa/x64/inst/regs.rs +++ b/cranelift/codegen/src/isa/x64/inst/regs.rs @@ -166,7 +166,7 @@ pub(crate) fn create_reg_env_systemv(flags: &settings::Flags) -> MachineEnv { preg(rdx()), preg(r8()), preg(r9()), - // N.B.: not r10; it is our scratch reg. + preg(r10()), preg(r11()), ], // Preferred XMMs: all of them. @@ -186,7 +186,7 @@ pub(crate) fn create_reg_env_systemv(flags: &settings::Flags) -> MachineEnv { preg(xmm12()), preg(xmm13()), preg(xmm14()), - // N.B.: not xmm15; it is our scratch reg. + preg(xmm15()), ], ], non_preferred_regs_by_class: [ @@ -195,7 +195,6 @@ pub(crate) fn create_reg_env_systemv(flags: &settings::Flags) -> MachineEnv { // Non-preferred XMMs: none. vec![], ], - scratch_by_class: [preg(r10()), preg(xmm15())], fixed_stack_slots: vec![], }; diff --git a/cranelift/codegen/src/isa/x64/mod.rs b/cranelift/codegen/src/isa/x64/mod.rs index 05e6ccdc78..c732b56194 100644 --- a/cranelift/codegen/src/isa/x64/mod.rs +++ b/cranelift/codegen/src/isa/x64/mod.rs @@ -425,11 +425,11 @@ mod test { // 00000004 41B900000000 mov r9d,0x0 // 0000000A 83FF02 cmp edi,byte +0x2 // 0000000D 0F8320000000 jnc near 0x33 - // 00000013 8BC7 mov eax,edi - // 00000015 490F43C1 cmovnc rax,r9 + // 00000013 8BF7 mov esi,edi + // 00000015 490F43F1 cmovnc rsi,r9 // 00000019 4C8D0D0B000000 lea r9,[rel 0x2b] - // 00000020 4963448100 movsxd rax,dword [r9+rax*4+0x0] - // 00000025 4901C1 add r9,rax + // 00000020 496374B100 movsxd rsi,dword [r9+rsi*4+0x0] + // 00000025 4901F1 add r9,rsi // 00000028 41FFE1 jmp r9 // 0000002B 1200 adc al,[rax] // 0000002D 0000 add [rax],al @@ -449,8 +449,8 @@ mod test { // 00000050 C3 ret let golden = vec![ - 85, 72, 137, 229, 65, 185, 0, 0, 0, 0, 131, 255, 2, 15, 131, 32, 0, 0, 0, 139, 199, 73, - 15, 67, 193, 76, 141, 13, 11, 0, 0, 0, 73, 99, 68, 129, 0, 73, 1, 193, 65, 255, 225, + 85, 72, 137, 229, 65, 185, 0, 0, 0, 0, 131, 255, 2, 15, 131, 32, 0, 0, 0, 139, 247, 73, + 15, 67, 241, 76, 141, 13, 11, 0, 0, 0, 73, 99, 116, 177, 0, 73, 1, 241, 65, 255, 225, 18, 0, 0, 0, 28, 0, 0, 0, 184, 3, 0, 0, 0, 72, 137, 236, 93, 195, 184, 1, 0, 0, 0, 72, 137, 236, 93, 195, 184, 2, 0, 0, 0, 72, 137, 236, 93, 195, ]; diff --git a/cranelift/filetests/filetests/isa/aarch64/prologue.clif b/cranelift/filetests/filetests/isa/aarch64/prologue.clif index 6742f1f0f1..2451faa03c 100644 --- a/cranelift/filetests/filetests/isa/aarch64/prologue.clif +++ b/cranelift/filetests/filetests/isa/aarch64/prologue.clif @@ -81,12 +81,10 @@ block0(v0: f64): ; stp d12, d13, [sp, #-16]! ; stp d10, d11, [sp, #-16]! ; stp d8, d9, [sp, #-16]! -; sub sp, sp, #16 ; block0: +; fadd d2, d0, d0 ; fadd d4, d0, d0 ; fadd d6, d0, d0 -; str q6, [sp] -; fadd d6, d0, d0 ; fadd d8, d0, d0 ; fadd d10, d0, d0 ; fadd d12, d0, d0 @@ -98,56 +96,54 @@ block0(v0: f64): ; fadd d9, d0, d0 ; fadd d11, d0, d0 ; fadd d13, d0, d0 -; fadd d16, d0, d0 +; fadd d30, d0, d0 ; fadd d15, d0, d0 +; fadd d18, d0, d0 ; fadd d20, d0, d0 ; fadd d22, d0, d0 ; fadd d24, d0, d0 ; fadd d26, d0, d0 ; fadd d28, d0, d0 -; fadd d30, d0, d0 -; fadd d17, d0, d0 +; fadd d31, d0, d0 +; fadd d16, d0, d0 ; fadd d19, d0, d0 ; fadd d21, d0, d0 ; fadd d23, d0, d0 ; fadd d25, d0, d0 ; fadd d27, d0, d0 ; fadd d29, d0, d0 -; fadd d18, d0, d0 -; fadd d2, d0, d0 -; fadd d0, d0, d4 -; ldr q4, [sp] -; fadd d6, d4, d6 +; fadd d17, d0, d0 +; fadd d0, d0, d2 +; fadd d2, d4, d6 ; fadd d4, d8, d10 -; fadd d10, d12, d14 +; fadd d6, d12, d14 ; fadd d8, d1, d3 -; fadd d14, d5, d7 +; fadd d10, d5, d7 ; fadd d12, d9, d11 -; fadd d3, d13, d16 -; fadd d1, d15, d20 -; fadd d7, d22, d24 -; fadd d5, d26, d28 -; fadd d11, d30, d17 -; fadd d9, d19, d21 -; fadd d15, d23, d25 -; fadd d13, d27, d29 -; fadd d2, d18, d2 -; fadd d0, d0, d6 -; fadd d6, d4, d10 -; fadd d4, d8, d14 -; fadd d10, d12, d3 -; fadd d8, d1, d7 -; fadd d11, d5, d11 -; fadd d12, d9, d15 -; fadd d14, d13, d2 -; fadd d0, d0, d6 -; fadd d2, d4, d10 -; fadd d4, d8, d11 +; fadd d14, d13, d30 +; fadd d1, d15, d18 +; fadd d3, d20, d22 +; fadd d5, d24, d26 +; fadd d7, d28, d31 +; fadd d9, d16, d19 +; fadd d11, d21, d23 +; fadd d13, d25, d27 +; fadd d15, d29, d17 +; fadd d0, d0, d2 +; fadd d2, d4, d6 +; fadd d4, d8, d10 +; fadd d6, d12, d14 +; fadd d8, d1, d3 +; fadd d10, d5, d7 +; fadd d12, d9, d11 +; fadd d14, d13, d15 +; fadd d0, d0, d2 +; fadd d2, d4, d6 +; fadd d4, d8, d10 ; fadd d6, d12, d14 ; fadd d8, d0, d2 ; fadd d10, d4, d6 ; fadd d0, d8, d10 -; add sp, sp, #16 ; ldp d8, d9, [sp], #16 ; ldp d10, d11, [sp], #16 ; ldp d12, d13, [sp], #16 diff --git a/cranelift/filetests/filetests/isa/s390x/atomic_cas-little.clif b/cranelift/filetests/filetests/isa/s390x/atomic_cas-little.clif index 624f0d1849..81844995d0 100644 --- a/cranelift/filetests/filetests/isa/s390x/atomic_cas-little.clif +++ b/cranelift/filetests/filetests/isa/s390x/atomic_cas-little.clif @@ -37,19 +37,19 @@ block0(v0: i64, v1: i16, v2: i16, v3: i64): return v4 } -; stmg %r6, %r15, 48(%r15) +; stmg %r13, %r15, 104(%r15) ; block0: -; lgr %r6, %r3 +; lgr %r13, %r3 ; sllk %r3, %r5, 3 ; nill %r5, 65532 -; lgr %r2, %r6 +; lgr %r2, %r13 ; lrvr %r2, %r2 ; lrvr %r4, %r4 ; l %r0, 0(%r5) ; 0: rll %r1, %r0, 16(%r3) ; rxsbg %r1, %r2, 176, 64, 48 ; jglh 1f ; risbgn %r1, %r4, 48, 64, 48 ; rll %r1, %r1, 16(%r3) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: ; rll %r2, %r0, 0(%r3) ; lrvr %r2, %r2 -; lmg %r6, %r15, 48(%r15) +; lmg %r13, %r15, 104(%r15) ; br %r14 function %atomic_cas_i8(i64, i8, i8, i64) -> i8 { @@ -58,14 +58,14 @@ block0(v0: i64, v1: i8, v2: i8, v3: i64): return v4 } -; stmg %r12, %r15, 96(%r15) +; stmg %r11, %r15, 88(%r15) ; block0: ; sllk %r2, %r5, 3 ; nill %r5, 65532 -; lcr %r12, %r2 +; lcr %r11, %r2 ; l %r0, 0(%r5) -; 0: rll %r1, %r0, 0(%r2) ; rxsbg %r1, %r3, 160, 40, 24 ; jglh 1f ; risbgn %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r12) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; 0: rll %r1, %r0, 0(%r2) ; rxsbg %r1, %r3, 160, 40, 24 ; jglh 1f ; risbgn %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r11) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: ; rll %r2, %r0, 8(%r2) -; lmg %r12, %r15, 96(%r15) +; lmg %r11, %r15, 88(%r15) ; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/atomic_cas.clif b/cranelift/filetests/filetests/isa/s390x/atomic_cas.clif index a7d94fc46c..0a3d10f403 100644 --- a/cranelift/filetests/filetests/isa/s390x/atomic_cas.clif +++ b/cranelift/filetests/filetests/isa/s390x/atomic_cas.clif @@ -45,14 +45,14 @@ block0(v0: i64, v1: i8, v2: i8, v3: i64): return v4 } -; stmg %r12, %r15, 96(%r15) +; stmg %r11, %r15, 88(%r15) ; block0: ; sllk %r2, %r5, 3 ; nill %r5, 65532 -; lcr %r12, %r2 +; lcr %r11, %r2 ; l %r0, 0(%r5) -; 0: rll %r1, %r0, 0(%r2) ; rxsbg %r1, %r3, 160, 40, 24 ; jglh 1f ; risbgn %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r12) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: +; 0: rll %r1, %r0, 0(%r2) ; rxsbg %r1, %r3, 160, 40, 24 ; jglh 1f ; risbgn %r1, %r4, 32, 40, 24 ; rll %r1, %r1, 0(%r11) ; cs %r0, %r1, 0(%r5) ; jglh 0b ; 1: ; rll %r2, %r0, 8(%r2) -; lmg %r12, %r15, 96(%r15) +; lmg %r11, %r15, 88(%r15) ; br %r14 diff --git a/cranelift/filetests/filetests/isa/s390x/multivalue-ret.clif b/cranelift/filetests/filetests/isa/s390x/multivalue-ret.clif index 896f9a49d2..dd58f5895f 100644 --- a/cranelift/filetests/filetests/isa/s390x/multivalue-ret.clif +++ b/cranelift/filetests/filetests/isa/s390x/multivalue-ret.clif @@ -29,18 +29,19 @@ block1: return v0, v1, v2, v3, v4, v5 } -; stmg %r6, %r15, 48(%r15) +; stmg %r8, %r15, 64(%r15) ; block0: ; lgr %r12, %r2 ; lghi %r2, 1 ; lghi %r3, 2 ; lghi %r4, 3 ; lghi %r5, 4 -; lghi %r10, 5 -; lghi %r6, 6 -; stg %r10, 0(%r12) -; stg %r6, 8(%r12) -; lmg %r6, %r15, 48(%r15) +; lghi %r8, 5 +; lghi %r11, 6 +; lgr %r9, %r12 +; stg %r8, 0(%r9) +; stg %r11, 8(%r9) +; lmg %r8, %r15, 64(%r15) ; br %r14 function %f3() -> f64, f64, f64, f64 { diff --git a/cranelift/filetests/filetests/isa/x64/call-conv.clif b/cranelift/filetests/filetests/isa/x64/call-conv.clif index cfb2bd3009..4a14f53315 100644 --- a/cranelift/filetests/filetests/isa/x64/call-conv.clif +++ b/cranelift/filetests/filetests/isa/x64/call-conv.clif @@ -36,20 +36,20 @@ block0(v0: i32, v1: f32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movq %rdi, %rax +; movq %rdi, %r11 ; movdqa %xmm0, %xmm6 ; subq %rsp, $32, %rsp ; virtual_sp_offset_adjust 32 -; movq %rax, %rcx +; movq %r11, %rcx ; movdqa %xmm6, %xmm1 -; movq %rax, %rdi +; movq %r11, %rdi ; movdqa %xmm1, %xmm6 ; call *%rdi ; addq %rsp, $32, %rsp ; virtual_sp_offset_adjust -32 -; movq %rdi, %rax +; movq %rdi, %r11 ; movdqa %xmm6, %xmm0 -; call *%rax +; call *%r11 ; movq %rbp, %rsp ; popq %rbp ; ret @@ -129,20 +129,19 @@ block0( ; pushq %rbp ; movq %rsp, %rbp -; subq %rsp, $96, %rsp -; movq %rbx, 48(%rsp) -; movq %r12, 56(%rsp) -; movq %r13, 64(%rsp) -; movq %r14, 72(%rsp) -; movq %r15, 80(%rsp) +; subq %rsp, $64, %rsp +; movq %rbx, 32(%rsp) +; movq %r13, 40(%rsp) +; movq %r14, 48(%rsp) +; movq %r15, 56(%rsp) ; block0: ; movq %rsi, %rbx ; movq %rdx, %r14 -; movq %rcx, %r12 +; movq %rcx, %rax ; movq %r8, %r13 ; movq %r9, %r15 -; movq 16(%rbp), %rax -; movq 24(%rbp), %r11 +; movq 16(%rbp), %r11 +; movq 24(%rbp), %r10 ; movss 32(%rbp), %xmm9 ; movsd 40(%rbp), %xmm8 ; subq %rsp, $144, %rsp @@ -150,7 +149,7 @@ block0( ; movq %rdi, %rcx ; movq %rbx, %rdx ; movq %r14, %r8 -; movq %r12, %r9 +; movq %rax, %r9 ; movq %r13, %rsi ; movq %rsi, 32(%rsp) ; movq %r15, %rsi @@ -163,19 +162,18 @@ block0( ; movsd %xmm5, 88(%rsp) ; movsd %xmm6, 96(%rsp) ; movsd %xmm7, 104(%rsp) -; movq %rax, 112(%rsp) -; movl %r11d, 120(%rsp) +; movq %r11, 112(%rsp) +; movl %r10d, 120(%rsp) ; movss %xmm9, 128(%rsp) ; movsd %xmm8, 136(%rsp) ; call *%rdi ; addq %rsp, $144, %rsp ; virtual_sp_offset_adjust -144 -; movq 48(%rsp), %rbx -; movq 56(%rsp), %r12 -; movq 64(%rsp), %r13 -; movq 72(%rsp), %r14 -; movq 80(%rsp), %r15 -; addq %rsp, $96, %rsp +; movq 32(%rsp), %rbx +; movq 40(%rsp), %r13 +; movq 48(%rsp), %r14 +; movq 56(%rsp), %r15 +; addq %rsp, $64, %rsp ; movq %rbp, %rsp ; popq %rbp ; ret @@ -194,26 +192,22 @@ block0(v0: i64, v1:i64, v2:i64, v3:i64, v4:i64): ; pushq %rbp ; movq %rsp, %rbp -; subq %rsp, $16, %rsp -; movq %rbx, 0(%rsp) ; block0: -; movq %rsi, %r11 -; movq %rdx, %r9 -; movq %rcx, %rax -; movq %r8, %rbx +; movq %rsi, %r9 +; movq %rdx, %rax +; movq %rcx, %r11 +; movq %r8, %r10 ; subq %rsp, $48, %rsp ; virtual_sp_offset_adjust 48 ; movq %rdi, %rcx -; movq %r11, %rdx -; movq %r9, %r8 -; movq %rax, %r9 -; movq %rbx, %r11 -; movq %r11, 32(%rsp) +; movq %r9, %rdx +; movq %rax, %r8 +; movq %r11, %r9 +; movq %r10, %rsi +; movq %rsi, 32(%rsp) ; call *%rdi ; addq %rsp, $48, %rsp ; virtual_sp_offset_adjust -48 -; movq 0(%rsp), %rbx -; addq %rsp, $16, %rsp ; movq %rbp, %rsp ; popq %rbp ; ret @@ -229,25 +223,23 @@ block0(v0: i32, v1: f32, v2: i64, v3: f64, v4: i32, v5: i32, v6: i32, v7: f32, v ; movq %rsp, %rbp ; block0: ; movdqa %xmm0, %xmm6 -; movq %rsi, %rax +; movq %rsi, %r10 ; movdqa %xmm1, %xmm14 ; movq %rcx, %r11 ; movq %r8, %r9 -; movdqa %xmm3, %xmm10 +; movdqa %xmm3, %xmm15 ; subq %rsp, $96, %rsp ; virtual_sp_offset_adjust 96 ; movq %rdi, %rcx ; movdqa %xmm6, %xmm1 -; movq %rax, %r8 +; movq %r10, %r8 ; movdqa %xmm14, %xmm3 ; movl %edx, 32(%rsp) -; movq %r11, %rdx -; movl %edx, 40(%rsp) -; movq %r9, %rax -; movl %eax, 48(%rsp) +; movq %r11, %r10 +; movl %r10d, 40(%rsp) +; movl %r9d, 48(%rsp) ; movss %xmm2, 56(%rsp) -; movdqa %xmm10, %xmm2 -; movsd %xmm2, 64(%rsp) +; movsd %xmm15, 64(%rsp) ; movss %xmm4, 72(%rsp) ; movsd %xmm5, 80(%rsp) ; call *%rdi @@ -350,11 +342,11 @@ block0: ; movq %rbx, 0(%rsp) ; block0: ; movq %rdi, %rbx -; movl $1, %esi +; movl $1, %r11d ; subq %rsp, $16, %rsp ; virtual_sp_offset_adjust 16 ; lea 0(%rsp), %rdi -; call *%rsi +; call *%r11 ; movq 0(%rsp), %rcx ; addq %rsp, $16, %rsp ; virtual_sp_offset_adjust -16 @@ -381,8 +373,8 @@ block0: ; movq %r12, 0(%rsp) ; block0: ; movq %rdi, %r12 -; movl $1, %eax -; call *%rax +; movl $1, %edi +; call *%rdi ; movq %r12, %rdi ; movq %rax, 0(%rdi) ; movl %edx, 8(%rdi) @@ -409,10 +401,10 @@ block0(v0: f32, v1: i64, v2: i32, v3: f32): ; movq %rdx, %rbx ; movl $1, %eax ; call *%rax -; movq %rbx, %rcx -; movq %rax, 0(%rcx) -; movl %edx, 8(%rcx) -; movss %xmm1, 12(%rcx) +; movq %rbx, %r10 +; movq %rax, 0(%r10) +; movl %edx, 8(%r10) +; movss %xmm1, 12(%r10) ; movq 0(%rsp), %rbx ; addq %rsp, $16, %rsp ; movq %rbp, %rsp diff --git a/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif b/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif index ffa1a37d60..6fc713bdf0 100644 --- a/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif +++ b/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif @@ -39,13 +39,13 @@ block0(v0: f64, v1: i64): ; movsd 0(%rdi), %xmm12 ; ucomisd %xmm12, %xmm0 ; setnp %al -; setz %r8b -; andl %eax, %r8d, %eax +; setz %dl +; andl %eax, %edx, %eax ; andq %rax, $1, %rax ; ucomisd %xmm0, %xmm12 -; movdqa %xmm0, %xmm6 -; mov z, sd; j%xmm6 $next; mov%xmm0 %xmm0, %xmm0; $next: -; mov np, sd; j%xmm6 $next; mov%xmm0 %xmm0, %xmm0; $next: +; movdqa %xmm0, %xmm5 +; mov z, sd; j%xmm5 $next; mov%xmm0 %xmm0, %xmm0; $next: +; mov np, sd; j%xmm5 $next; mov%xmm0 %xmm0, %xmm0; $next: ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/fastcall.clif b/cranelift/filetests/filetests/isa/x64/fastcall.clif index 247af5ac38..95d07115b5 100644 --- a/cranelift/filetests/filetests/isa/x64/fastcall.clif +++ b/cranelift/filetests/filetests/isa/x64/fastcall.clif @@ -113,7 +113,7 @@ block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64): ; movq %rsp, %rbp ; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } ; block0: -; movq 48(%rbp), %r11 +; movq 48(%rbp), %r10 ; movq 56(%rbp), %rax ; movq %rbp, %rsp ; popq %rbp @@ -129,7 +129,7 @@ block0(v0: i128, v1: i64, v2: i128, v3: i128): ; movq %rsp, %rbp ; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 0 } ; block0: -; movq 48(%rbp), %r11 +; movq 48(%rbp), %r10 ; movq 56(%rbp), %rax ; movq 64(%rbp), %rdx ; movq %rbp, %rsp @@ -159,8 +159,8 @@ block0(v0: i64): ; movdqa %xmm2, %xmm3 ; movq %r8, 32(%rsp) ; movq %r8, 40(%rsp) -; load_ext_name %g+0, %r9 -; call *%r9 +; load_ext_name %g+0, %r8 +; call *%r8 ; addq %rsp, $48, %rsp ; virtual_sp_offset_adjust -48 ; movq %rbp, %rsp @@ -220,87 +220,86 @@ block0(v0: i64): ; pushq %rbp ; unwind PushFrameRegs { offset_upward_to_caller_sp: 16 } ; movq %rsp, %rbp -; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 144 } -; subq %rsp, $240, %rsp -; movdqu %xmm6, 96(%rsp) +; unwind DefineNewFrame { offset_upward_to_caller_sp: 16, offset_downward_to_clobbers: 160 } +; subq %rsp, $224, %rsp +; movdqu %xmm6, 64(%rsp) ; unwind SaveReg { clobber_offset: 0, reg: p6f } -; movdqu %xmm7, 112(%rsp) +; movdqu %xmm7, 80(%rsp) ; unwind SaveReg { clobber_offset: 16, reg: p7f } -; movdqu %xmm8, 128(%rsp) +; movdqu %xmm8, 96(%rsp) ; unwind SaveReg { clobber_offset: 32, reg: p8f } -; movdqu %xmm9, 144(%rsp) +; movdqu %xmm9, 112(%rsp) ; unwind SaveReg { clobber_offset: 48, reg: p9f } -; movdqu %xmm10, 160(%rsp) +; movdqu %xmm10, 128(%rsp) ; unwind SaveReg { clobber_offset: 64, reg: p10f } -; movdqu %xmm11, 176(%rsp) +; movdqu %xmm11, 144(%rsp) ; unwind SaveReg { clobber_offset: 80, reg: p11f } -; movdqu %xmm12, 192(%rsp) +; movdqu %xmm12, 160(%rsp) ; unwind SaveReg { clobber_offset: 96, reg: p12f } -; movdqu %xmm13, 208(%rsp) +; movdqu %xmm13, 176(%rsp) ; unwind SaveReg { clobber_offset: 112, reg: p13f } -; movdqu %xmm14, 224(%rsp) +; movdqu %xmm14, 192(%rsp) ; unwind SaveReg { clobber_offset: 128, reg: p14f } +; movdqu %xmm15, 208(%rsp) +; unwind SaveReg { clobber_offset: 144, reg: p15f } ; block0: ; movsd 0(%rcx), %xmm0 -; movsd 8(%rcx), %xmm12 -; movdqu %xmm12, rsp(80 + virtual offset) +; movsd 8(%rcx), %xmm11 +; movdqu %xmm11, rsp(48 + virtual offset) ; movsd 16(%rcx), %xmm6 -; movdqu %xmm6, rsp(0 + virtual offset) -; movsd 24(%rcx), %xmm2 -; movdqu %xmm2, rsp(64 + virtual offset) +; movsd 24(%rcx), %xmm15 +; movdqu %xmm15, rsp(32 + virtual offset) ; movsd 32(%rcx), %xmm14 -; movsd 40(%rcx), %xmm3 -; movdqu %xmm3, rsp(48 + virtual offset) -; movsd 48(%rcx), %xmm9 -; movsd 56(%rcx), %xmm7 -; movdqu %xmm7, rsp(32 + virtual offset) +; movsd 40(%rcx), %xmm1 +; movdqu %xmm1, rsp(16 + virtual offset) +; movsd 48(%rcx), %xmm8 +; movsd 56(%rcx), %xmm9 +; movdqu %xmm9, rsp(0 + virtual offset) ; movsd 64(%rcx), %xmm13 -; movsd 72(%rcx), %xmm10 -; movdqu %xmm10, rsp(16 + virtual offset) -; movsd 80(%rcx), %xmm11 -; movsd 88(%rcx), %xmm10 -; movsd 96(%rcx), %xmm5 -; movsd 104(%rcx), %xmm12 -; movsd 112(%rcx), %xmm1 -; movsd 120(%rcx), %xmm2 -; movsd 128(%rcx), %xmm8 -; movsd 136(%rcx), %xmm3 -; movsd 144(%rcx), %xmm4 -; movdqu rsp(80 + virtual offset), %xmm6 +; movsd 72(%rcx), %xmm3 +; movsd 80(%rcx), %xmm10 +; movsd 88(%rcx), %xmm5 +; movsd 96(%rcx), %xmm4 +; movsd 104(%rcx), %xmm9 +; movsd 112(%rcx), %xmm12 +; movsd 120(%rcx), %xmm11 +; movsd 128(%rcx), %xmm7 +; movsd 136(%rcx), %xmm15 +; movsd 144(%rcx), %xmm2 +; movdqu rsp(48 + virtual offset), %xmm1 +; addsd %xmm0, %xmm1, %xmm0 +; movdqu rsp(32 + virtual offset), %xmm1 +; addsd %xmm6, %xmm1, %xmm6 +; movdqu rsp(16 + virtual offset), %xmm1 +; addsd %xmm14, %xmm1, %xmm14 +; movdqu rsp(0 + virtual offset), %xmm1 +; addsd %xmm8, %xmm1, %xmm8 +; addsd %xmm13, %xmm3, %xmm13 +; addsd %xmm10, %xmm5, %xmm10 +; addsd %xmm4, %xmm9, %xmm4 +; addsd %xmm12, %xmm11, %xmm12 +; addsd %xmm7, %xmm15, %xmm7 +; addsd %xmm2, 152(%rcx), %xmm2 ; addsd %xmm0, %xmm6, %xmm0 -; movdqu rsp(0 + virtual offset), %xmm6 -; movdqu rsp(64 + virtual offset), %xmm7 -; addsd %xmm6, %xmm7, %xmm6 -; movdqu rsp(48 + virtual offset), %xmm7 -; addsd %xmm14, %xmm7, %xmm14 -; movdqu rsp(32 + virtual offset), %xmm7 -; addsd %xmm9, %xmm7, %xmm9 -; movdqu rsp(16 + virtual offset), %xmm7 -; addsd %xmm13, %xmm7, %xmm13 -; addsd %xmm11, %xmm10, %xmm11 -; addsd %xmm5, %xmm12, %xmm5 -; addsd %xmm1, %xmm2, %xmm1 -; addsd %xmm8, %xmm3, %xmm8 -; addsd %xmm4, 152(%rcx), %xmm4 -; addsd %xmm0, %xmm6, %xmm0 -; addsd %xmm14, %xmm9, %xmm14 -; addsd %xmm13, %xmm11, %xmm13 -; addsd %xmm5, %xmm1, %xmm5 -; addsd %xmm8, %xmm4, %xmm8 +; addsd %xmm14, %xmm8, %xmm14 +; addsd %xmm13, %xmm10, %xmm13 +; addsd %xmm4, %xmm12, %xmm4 +; addsd %xmm7, %xmm2, %xmm7 ; addsd %xmm0, %xmm14, %xmm0 -; addsd %xmm13, %xmm5, %xmm13 +; addsd %xmm13, %xmm4, %xmm13 ; addsd %xmm0, %xmm13, %xmm0 -; addsd %xmm0, %xmm8, %xmm0 -; movdqu 96(%rsp), %xmm6 -; movdqu 112(%rsp), %xmm7 -; movdqu 128(%rsp), %xmm8 -; movdqu 144(%rsp), %xmm9 -; movdqu 160(%rsp), %xmm10 -; movdqu 176(%rsp), %xmm11 -; movdqu 192(%rsp), %xmm12 -; movdqu 208(%rsp), %xmm13 -; movdqu 224(%rsp), %xmm14 -; addq %rsp, $240, %rsp +; addsd %xmm0, %xmm7, %xmm0 +; movdqu 64(%rsp), %xmm6 +; movdqu 80(%rsp), %xmm7 +; movdqu 96(%rsp), %xmm8 +; movdqu 112(%rsp), %xmm9 +; movdqu 128(%rsp), %xmm10 +; movdqu 144(%rsp), %xmm11 +; movdqu 160(%rsp), %xmm12 +; movdqu 176(%rsp), %xmm13 +; movdqu 192(%rsp), %xmm14 +; movdqu 208(%rsp), %xmm15 +; addq %rsp, $224, %rsp ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/heap.clif b/cranelift/filetests/filetests/isa/x64/heap.clif index 65fd524663..44c0ee30b8 100644 --- a/cranelift/filetests/filetests/isa/x64/heap.clif +++ b/cranelift/filetests/filetests/isa/x64/heap.clif @@ -17,17 +17,17 @@ block0(v0: i32, v1: i64): ; movq %rsp, %rbp ; block0: ; movl %edi, %eax -; movq 8(%rsi), %rcx -; movq %rax, %rdx -; addq %rdx, $32768, %rdx +; movq 8(%rsi), %rdi +; movq %rax, %rcx +; addq %rcx, $32768, %rcx ; jnb ; ud2 heap_oob ; -; cmpq %rcx, %rdx +; cmpq %rdi, %rcx ; jbe label1; j label2 ; block1: ; addq %rax, 0(%rsi), %rax -; xorq %r8, %r8, %r8 -; cmpq %rcx, %rdx -; cmovnbeq %r8, %rax, %rax +; xorq %rdx, %rdx, %rdx +; cmpq %rdi, %rcx +; cmovnbeq %rdx, %rax, %rax ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/i128.clif b/cranelift/filetests/filetests/isa/x64/i128.clif index 51cc8129b6..e71170c9d6 100644 --- a/cranelift/filetests/filetests/isa/x64/i128.clif +++ b/cranelift/filetests/filetests/isa/x64/i128.clif @@ -113,18 +113,16 @@ block0(v0: i128, v1: i128): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movq %rdi, %r11 -; imulq %r11, %rcx, %r11 +; movq %rdi, %r9 +; imulq %r9, %rcx, %r9 ; movq %rdi, %rax ; imulq %rsi, %rdx, %rsi -; movq %r11, %rdi -; addq %rdi, %rsi, %rdi -; movq %rdi, %r11 +; addq %r9, %rsi, %r9 ; mul %rax, %rdx, %rax, %rdx -; movq %r11, %r8 -; addq %r8, %rdx, %r8 -; movq %r8, %r11 -; movq %r11, %rdx +; movq %r9, %rdi +; addq %rdi, %rdx, %rdi +; movq %rdi, %r9 +; movq %r9, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -193,48 +191,47 @@ block0(v0: i128, v1: i128): ; movq %r15, 48(%rsp) ; block0: ; cmpq %rdx, %rdi -; setz %al -; cmpq %rcx, %rsi ; setz %r8b -; andq %rax, %r8, %rax -; testq $1, %rax +; cmpq %rcx, %rsi +; setz %r9b +; andq %r8, %r9, %r8 +; testq $1, %r8 ; setnz %al ; cmpq %rdx, %rdi -; setnz %r8b -; cmpq %rcx, %rsi -; setnz %r9b -; orq %r8, %r9, %r8 -; testq $1, %r8 -; setnz %r8b -; movq %r8, rsp(8 + virtual offset) -; cmpq %rcx, %rsi -; setl %r9b -; setz %r15b -; cmpq %rdx, %rdi -; setb %r11b -; andq %r11, %r15, %r11 -; orq %r11, %r9, %r11 -; andq %r11, $1, %r11 ; setnz %r9b ; cmpq %rcx, %rsi -; setl %r11b -; setz %bl -; cmpq %rdx, %rdi -; setbe %r14b -; andq %r14, %rbx, %r14 -; orq %r14, %r11, %r14 -; andq %r14, $1, %r14 +; setnz %r10b +; orq %r9, %r10, %r9 +; testq $1, %r9 ; setnz %r8b ; movq %r8, rsp(0 + virtual offset) ; cmpq %rcx, %rsi -; setnle %r15b +; setl %r11b +; setz %r10b +; cmpq %rdx, %rdi +; setb %r9b +; andq %r9, %r10, %r9 +; orq %r9, %r11, %r9 +; andq %r9, $1, %r9 +; setnz %r9b +; cmpq %rcx, %rsi +; setl %r10b +; setz %bl +; cmpq %rdx, %rdi +; setbe %r11b +; andq %r11, %rbx, %r11 +; orq %r11, %r10, %r11 +; andq %r11, $1, %r11 +; setnz %r10b +; cmpq %rcx, %rsi +; setnle %r11b ; setz %r12b ; cmpq %rdx, %rdi -; setnbe %bl -; andq %rbx, %r12, %rbx -; orq %rbx, %r15, %rbx -; andq %rbx, $1, %rbx -; setnz %r15b +; setnbe %r15b +; andq %r15, %r12, %r15 +; orq %r15, %r11, %r15 +; andq %r15, $1, %r15 +; setnz %r11b ; cmpq %rcx, %rsi ; setnle %bl ; setz %r13b @@ -255,41 +252,40 @@ block0(v0: i128, v1: i128): ; setnz %r12b ; cmpq %rcx, %rsi ; setb %r13b -; setz %r8b +; setz %r15b ; cmpq %rdx, %rdi ; setbe %r14b -; andq %r14, %r8, %r14 +; andq %r14, %r15, %r14 ; orq %r14, %r13, %r14 ; andq %r14, $1, %r14 ; setnz %r13b ; cmpq %rcx, %rsi ; setnbe %r14b -; setz %r11b +; setz %r8b ; cmpq %rdx, %rdi -; setnbe %r8b -; andq %r8, %r11, %r8 -; orq %r8, %r14, %r8 -; andq %r8, $1, %r8 +; setnbe %r15b +; andq %r15, %r8, %r15 +; orq %r15, %r14, %r15 +; andq %r15, $1, %r15 ; setnz %r14b ; cmpq %rcx, %rsi ; setnbe %sil ; setz %cl ; cmpq %rdx, %rdi -; setnb %r8b -; andq %r8, %rcx, %r8 -; orq %r8, %rsi, %r8 -; andq %r8, $1, %r8 -; setnz %dl -; movq rsp(8 + virtual offset), %r11 -; andl %eax, %r11d, %eax -; movq rsp(0 + virtual offset), %rdi -; andl %r9d, %edi, %r9d -; andl %r15d, %ebx, %r15d +; setnb %dil +; andq %rdi, %rcx, %rdi +; orq %rdi, %rsi, %rdi +; andq %rdi, $1, %rdi +; setnz %dil +; movq rsp(0 + virtual offset), %rsi +; andl %eax, %esi, %eax +; andl %r9d, %r10d, %r9d +; andl %r11d, %ebx, %r11d ; andl %r12d, %r13d, %r12d -; andl %r14d, %edx, %r14d +; andl %r14d, %edi, %r14d ; andl %eax, %r9d, %eax -; andl %r15d, %r12d, %r15d -; andl %eax, %r15d, %eax +; andl %r11d, %r12d, %r11d +; andl %eax, %r11d, %eax ; andl %eax, %r14d, %eax ; movq 16(%rsp), %rbx ; movq 24(%rsp), %r12 @@ -319,10 +315,10 @@ block2: ; movq %rsp, %rbp ; block0: ; cmpq $0, %rdi -; setz %dil +; setz %r11b ; cmpq $0, %rsi -; setz %cl -; andb %cl, %dil, %cl +; setz %al +; andb %al, %r11b, %al ; jnz label1; j label2 ; block1: ; movl $1, %eax @@ -353,10 +349,10 @@ block2: ; movq %rsp, %rbp ; block0: ; cmpq $0, %rdi -; setnz %dil +; setnz %r11b ; cmpq $0, %rsi -; setnz %cl -; orb %cl, %dil, %cl +; setnz %al +; orb %al, %r11b, %al ; jnz label1; j label2 ; block1: ; movl $1, %eax @@ -484,43 +480,43 @@ block0(v0: i128): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movq %rdi, %r9 -; shrq $1, %r9, %r9 -; movabsq $8608480567731124087, %rax -; andq %r9, %rax, %r9 -; subq %rdi, %r9, %rdi -; shrq $1, %r9, %r9 -; andq %r9, %rax, %r9 -; subq %rdi, %r9, %rdi -; shrq $1, %r9, %r9 -; andq %r9, %rax, %r9 -; subq %rdi, %r9, %rdi +; movq %rdi, %r8 +; shrq $1, %r8, %r8 +; movabsq $8608480567731124087, %r11 +; andq %r8, %r11, %r8 +; subq %rdi, %r8, %rdi +; shrq $1, %r8, %r8 +; andq %r8, %r11, %r8 +; subq %rdi, %r8, %rdi +; shrq $1, %r8, %r8 +; andq %r8, %r11, %r8 +; subq %rdi, %r8, %rdi ; movq %rdi, %rax ; shrq $4, %rax, %rax ; addq %rax, %rdi, %rax -; movabsq $1085102592571150095, %r9 -; andq %rax, %r9, %rax -; movabsq $72340172838076673, %rcx -; imulq %rax, %rcx, %rax +; movabsq $1085102592571150095, %rcx +; andq %rax, %rcx, %rax +; movabsq $72340172838076673, %r10 +; imulq %rax, %r10, %rax ; shrq $56, %rax, %rax -; movq %rsi, %rdi -; shrq $1, %rdi, %rdi -; movabsq $8608480567731124087, %rcx -; andq %rdi, %rcx, %rdi -; subq %rsi, %rdi, %rsi -; shrq $1, %rdi, %rdi -; andq %rdi, %rcx, %rdi -; subq %rsi, %rdi, %rsi -; shrq $1, %rdi, %rdi -; andq %rdi, %rcx, %rdi -; subq %rsi, %rdi, %rsi +; movq %rsi, %rcx +; shrq $1, %rcx, %rcx +; movabsq $8608480567731124087, %r9 +; andq %rcx, %r9, %rcx +; subq %rsi, %rcx, %rsi +; shrq $1, %rcx, %rcx +; andq %rcx, %r9, %rcx +; subq %rsi, %rcx, %rsi +; shrq $1, %rcx, %rcx +; andq %rcx, %r9, %rcx +; subq %rsi, %rcx, %rsi ; movq %rsi, %rcx ; shrq $4, %rcx, %rcx ; addq %rcx, %rsi, %rcx -; movabsq $1085102592571150095, %rsi -; andq %rcx, %rsi, %rcx -; movabsq $72340172838076673, %rdx -; imulq %rcx, %rdx, %rcx +; movabsq $1085102592571150095, %rdi +; andq %rcx, %rdi, %rcx +; movabsq $72340172838076673, %r8 +; imulq %rcx, %r8, %rcx ; shrq $56, %rcx, %rcx ; addq %rax, %rcx, %rax ; xorq %rdx, %rdx, %rdx @@ -538,87 +534,87 @@ block0(v0: i128): ; movq %rsp, %rbp ; block0: ; movabsq $6148914691236517205, %r9 -; movq %rsi, %rax -; andq %rax, %r9, %rax +; movq %rsi, %r10 +; andq %r10, %r9, %r10 ; shrq $1, %rsi, %rsi ; andq %rsi, %r9, %rsi -; shlq $1, %rax, %rax -; orq %rax, %rsi, %rax -; movabsq $3689348814741910323, %rcx +; shlq $1, %r10, %r10 +; orq %r10, %rsi, %r10 +; movabsq $3689348814741910323, %rsi +; movq %r10, %rax +; andq %rax, %rsi, %rax +; shrq $2, %r10, %r10 +; andq %r10, %rsi, %r10 +; shlq $2, %rax, %rax +; orq %rax, %r10, %rax +; movabsq $1085102592571150095, %rcx ; movq %rax, %rdx ; andq %rdx, %rcx, %rdx -; shrq $2, %rax, %rax +; shrq $4, %rax, %rax ; andq %rax, %rcx, %rax -; shlq $2, %rdx, %rdx +; shlq $4, %rdx, %rdx ; orq %rdx, %rax, %rdx -; movabsq $1085102592571150095, %r9 -; movq %rdx, %rsi -; andq %rsi, %r9, %rsi -; shrq $4, %rdx, %rdx +; movabsq $71777214294589695, %r9 +; movq %rdx, %r10 +; andq %r10, %r9, %r10 +; shrq $8, %rdx, %rdx ; andq %rdx, %r9, %rdx -; shlq $4, %rsi, %rsi -; orq %rsi, %rdx, %rsi -; movabsq $71777214294589695, %rax -; movq %rsi, %rdx -; andq %rdx, %rax, %rdx -; shrq $8, %rsi, %rsi +; shlq $8, %r10, %r10 +; orq %r10, %rdx, %r10 +; movabsq $281470681808895, %rax +; movq %r10, %rsi ; andq %rsi, %rax, %rsi -; shlq $8, %rdx, %rdx -; orq %rdx, %rsi, %rdx -; movabsq $281470681808895, %r9 -; movq %rdx, %r11 -; andq %r11, %r9, %r11 -; shrq $16, %rdx, %rdx -; andq %rdx, %r9, %rdx -; shlq $16, %r11, %r11 -; orq %r11, %rdx, %r11 +; shrq $16, %r10, %r10 +; andq %r10, %rax, %r10 +; shlq $16, %rsi, %rsi +; orq %rsi, %r10, %rsi ; movabsq $4294967295, %rcx -; movq %r11, %rax +; movq %rsi, %rax ; andq %rax, %rcx, %rax -; shrq $32, %r11, %r11 +; shrq $32, %rsi, %rsi ; shlq $32, %rax, %rax -; orq %rax, %r11, %rax +; orq %rax, %rsi, %rax ; movabsq $6148914691236517205, %rdx -; movq %rdi, %r9 -; andq %r9, %rdx, %r9 +; movq %rdi, %r8 +; andq %r8, %rdx, %r8 ; shrq $1, %rdi, %rdi ; andq %rdi, %rdx, %rdi -; shlq $1, %r9, %r9 -; orq %r9, %rdi, %r9 -; movabsq $3689348814741910323, %rsi -; movq %r9, %rcx -; andq %rcx, %rsi, %rcx -; shrq $2, %r9, %r9 -; andq %r9, %rsi, %r9 -; shlq $2, %rcx, %rcx -; orq %rcx, %r9, %rcx -; movabsq $1085102592571150095, %rdx -; movq %rcx, %r9 -; andq %r9, %rdx, %r9 -; shrq $4, %rcx, %rcx -; andq %rcx, %rdx, %rcx -; shlq $4, %r9, %r9 -; orq %r9, %rcx, %r9 -; movabsq $71777214294589695, %rsi -; movq %r9, %rcx -; andq %rcx, %rsi, %rcx -; shrq $8, %r9, %r9 -; andq %r9, %rsi, %r9 -; shlq $8, %rcx, %rcx -; orq %rcx, %r9, %rcx -; movabsq $281470681808895, %rdx +; shlq $1, %r8, %r8 +; orq %r8, %rdi, %r8 +; movabsq $3689348814741910323, %r10 +; movq %r8, %r11 +; andq %r11, %r10, %r11 +; shrq $2, %r8, %r8 +; andq %r8, %r10, %r8 +; shlq $2, %r11, %r11 +; orq %r11, %r8, %r11 +; movabsq $1085102592571150095, %rdi +; movq %r11, %rcx +; andq %rcx, %rdi, %rcx +; shrq $4, %r11, %r11 +; andq %r11, %rdi, %r11 +; shlq $4, %rcx, %rcx +; orq %rcx, %r11, %rcx +; movabsq $71777214294589695, %rdx ; movq %rcx, %r8 ; andq %r8, %rdx, %r8 -; shrq $16, %rcx, %rcx +; shrq $8, %rcx, %rcx ; andq %rcx, %rdx, %rcx -; shlq $16, %r8, %r8 +; shlq $8, %r8, %r8 ; orq %r8, %rcx, %r8 -; movabsq $4294967295, %rsi -; movq %r8, %rdx -; andq %rdx, %rsi, %rdx -; shrq $32, %r8, %r8 +; movabsq $281470681808895, %r11 +; movq %r8, %r10 +; andq %r10, %r11, %r10 +; shrq $16, %r8, %r8 +; andq %r8, %r11, %r8 +; shlq $16, %r10, %r10 +; orq %r10, %r8, %r10 +; movabsq $4294967295, %rdi +; movq %r10, %rdx +; andq %rdx, %rdi, %rdx +; shrq $32, %r10, %r10 ; shlq $32, %rdx, %rdx -; orq %rdx, %r8, %rdx +; orq %rdx, %r10, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -678,20 +674,20 @@ block2(v6: i128): ; block1: ; xorq %rax, %rax, %rax ; xorq %rdx, %rdx, %rdx -; movl $1, %r8d -; xorq %r9, %r9, %r9 -; addq %rax, %r8, %rax -; adcq %rdx, %r9, %rdx +; movl $1, %ecx +; xorq %r8, %r8, %r8 +; addq %rax, %rcx, %rax +; adcq %rdx, %r8, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret ; block2: ; xorq %rax, %rax, %rax ; xorq %rdx, %rdx, %rdx -; movl $2, %edi -; xorq %rcx, %rcx, %rcx -; addq %rax, %rdi, %rax -; adcq %rdx, %rcx, %rdx +; movl $2, %r10d +; xorq %rsi, %rsi, %rsi +; addq %rax, %r10, %rax +; adcq %rdx, %rsi, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -710,33 +706,31 @@ block0(v0: i128, v1: i128, v2: i64, v3: i128, v4: i128, v5: i128): ; pushq %rbp ; movq %rsp, %rbp -; subq %rsp, $64, %rsp -; movq %rbx, 32(%rsp) -; movq %r13, 40(%rsp) -; movq %r15, 48(%rsp) +; subq %rsp, $32, %rsp +; movq %r13, 16(%rsp) +; movq %r15, 24(%rsp) ; block0: ; movq %rdx, rsp(0 + virtual offset) -; movq 16(%rbp), %r11 +; movq 16(%rbp), %r10 ; movq 24(%rbp), %rax ; movq 32(%rbp), %rdx ; movq 40(%rbp), %r15 -; movq 48(%rbp), %rbx +; movq 48(%rbp), %r11 ; movq rsp(0 + virtual offset), %r13 ; addq %rdi, %r13, %rdi ; adcq %rsi, %rcx, %rsi ; xorq %rcx, %rcx, %rcx ; addq %r9, %r8, %r9 -; adcq %r11, %rcx, %r11 +; adcq %r10, %rcx, %r10 ; addq %rax, %r15, %rax -; adcq %rdx, %rbx, %rdx +; adcq %rdx, %r11, %rdx ; addq %rdi, %r9, %rdi -; adcq %rsi, %r11, %rsi +; adcq %rsi, %r10, %rsi ; addq %rax, %rdi, %rax ; adcq %rdx, %rsi, %rdx -; movq 32(%rsp), %rbx -; movq 40(%rsp), %r13 -; movq 48(%rsp), %r15 -; addq %rsp, $64, %rsp +; movq 16(%rsp), %r13 +; movq 24(%rsp), %r15 +; addq %rsp, $32, %rsp ; movq %rbp, %rsp ; popq %rbp ; ret @@ -753,31 +747,29 @@ block0(v0: i128): ; movq %rbx, 0(%rsp) ; movq %r13, 8(%rsp) ; movq %r14, 16(%rsp) -; movq %r15, 24(%rsp) ; block0: ; movq %rdx, %r14 ; movq %rdi, %rax ; movq %rsi, %rdx ; movq %rdi, %rbx ; movq %rsi, %r13 -; movq %rdi, %r15 -; movq %rsi, %r11 +; movq %rdi, %r11 +; movq %rsi, %r10 +; movq %rdi, %r9 ; movq %rdi, %rcx -; movq %rdi, %r8 -; movq %rsi, %r9 +; movq %rsi, %r8 ; movq %rbx, 0(%r14) ; movq %r13, 8(%r14) -; movq %r15, 16(%r14) -; movq %r11, 24(%r14) -; movq %rcx, 32(%r14) -; movq %r8, 40(%r14) -; movq %r9, 48(%r14) +; movq %r11, 16(%r14) +; movq %r10, 24(%r14) +; movq %r9, 32(%r14) +; movq %rcx, 40(%r14) +; movq %r8, 48(%r14) ; movq %rdi, 56(%r14) ; movq %rsi, 64(%r14) ; movq 0(%rsp), %rbx ; movq 8(%rsp), %r13 ; movq 16(%rsp), %r14 -; movq 24(%rsp), %r15 ; addq %rsp, $32, %rsp ; movq %rbp, %rsp ; popq %rbp @@ -799,15 +791,15 @@ block0(v0: i128, v1: i128): ; subq %rsp, $16, %rsp ; virtual_sp_offset_adjust 16 ; lea 0(%rsp), %r8 -; load_ext_name %g+0, %r9 -; call *%r9 -; movq 0(%rsp), %r11 -; movq 8(%rsp), %rdi +; load_ext_name %g+0, %rax +; call *%rax +; movq 0(%rsp), %r8 +; movq 8(%rsp), %r10 ; addq %rsp, $16, %rsp ; virtual_sp_offset_adjust -16 -; movq %r12, %r8 -; movq %r11, 0(%r8) -; movq %rdi, 8(%r8) +; movq %r12, %r9 +; movq %r8, 0(%r9) +; movq %r10, 8(%r9) ; movq 0(%rsp), %r12 ; addq %rsp, $16, %rsp ; movq %rbp, %rsp @@ -824,18 +816,18 @@ block0(v0: i128): ; movq %rsp, %rbp ; block0: ; movabsq $-1, %r9 -; bsrq %rsi, %rax -; cmovzq %r9, %rax, %rax -; movl $63, %r8d -; subq %r8, %rax, %r8 -; movabsq $-1, %rsi -; bsrq %rdi, %rcx -; cmovzq %rsi, %rcx, %rcx +; bsrq %rsi, %rsi +; cmovzq %r9, %rsi, %rsi +; movl $63, %edx +; subq %rdx, %rsi, %rdx +; movabsq $-1, %r10 +; bsrq %rdi, %rdi +; cmovzq %r10, %rdi, %rdi ; movl $63, %eax -; subq %rax, %rcx, %rax +; subq %rax, %rdi, %rax ; addq %rax, $64, %rax -; cmpq $64, %r8 -; cmovnzq %r8, %rax, %rax +; cmpq $64, %rdx +; cmovnzq %rdx, %rax, %rax ; xorq %rdx, %rdx, %rdx ; movq %rbp, %rsp ; popq %rbp @@ -853,12 +845,12 @@ block0(v0: i128): ; movl $64, %r9d ; bsfq %rdi, %rax ; cmovzq %r9, %rax, %rax -; movl $64, %edx -; bsfq %rsi, %rsi -; cmovzq %rdx, %rsi, %rsi -; addq %rsi, $64, %rsi +; movl $64, %ecx +; bsfq %rsi, %r10 +; cmovzq %rcx, %r10, %r10 +; addq %r10, $64, %r10 ; cmpq $64, %rax -; cmovzq %rsi, %rax, %rax +; cmovzq %r10, %rax, %rax ; xorq %rdx, %rdx, %rdx ; movq %rbp, %rsp ; popq %rbp @@ -890,19 +882,20 @@ block0(v0: i128, v1: i128): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movq %rdx, %r8 -; movq %r8, %rcx +; movq %rdx, %rax +; movq %rax, %rcx ; movq %rdi, %rdx ; shlq %cl, %rdx, %rdx ; shlq %cl, %rsi, %rsi ; movl $64, %ecx -; subq %rcx, %r8, %rcx +; movq %rax, %r11 +; subq %rcx, %r11, %rcx ; shrq %cl, %rdi, %rdi ; xorq %rax, %rax, %rax -; testq $127, %r8 +; testq $127, %r11 ; cmovzq %rax, %rdi, %rdi ; orq %rdi, %rsi, %rdi -; testq $64, %r8 +; testq $64, %r11 ; cmovzq %rdx, %rax, %rax ; cmovzq %rdi, %rdx, %rdx ; movq %rbp, %rsp @@ -926,9 +919,9 @@ block0(v0: i128, v1: i128): ; movl $64, %ecx ; subq %rcx, %r9, %rcx ; shlq %cl, %rsi, %rsi -; xorq %rdx, %rdx, %rdx +; xorq %rax, %rax, %rax ; testq $127, %r9 -; cmovzq %rdx, %rsi, %rsi +; cmovzq %rax, %rsi, %rsi ; orq %rsi, %rdi, %rsi ; xorq %rdx, %rdx, %rdx ; testq $64, %r9 @@ -954,16 +947,16 @@ block0(v0: i128, v1: i128): ; movq %rsi, %rdx ; sarq %cl, %rdx, %rdx ; movl $64, %ecx -; movq %r8, %r11 -; subq %rcx, %r11, %rcx -; movq %rsi, %r8 -; shlq %cl, %r8, %r8 -; xorq %r9, %r9, %r9 -; testq $127, %r11 -; cmovzq %r9, %r8, %r8 -; orq %rdi, %r8, %rdi +; movq %r8, %r9 +; subq %rcx, %r9, %rcx +; movq %rsi, %rax +; shlq %cl, %rax, %rax +; xorq %r8, %r8, %r8 +; testq $127, %r9 +; cmovzq %r8, %rax, %rax +; orq %rdi, %rax, %rdi ; sarq $63, %rsi, %rsi -; testq $64, %r11 +; testq $64, %r9 ; movq %rdx, %rax ; cmovzq %rdi, %rax, %rax ; cmovzq %rdx, %rsi, %rsi @@ -988,37 +981,39 @@ block0(v0: i128, v1: i128): ; movq %rsi, %r9 ; shlq %cl, %r9, %r9 ; movl $64, %ecx -; subq %rcx, %r11, %rcx +; movq %r11, %r10 +; subq %rcx, %r10, %rcx ; movq %rdi, %r8 ; shrq %cl, %r8, %r8 ; xorq %rax, %rax, %rax -; testq $127, %r11 +; testq $127, %r10 ; cmovzq %rax, %r8, %r8 ; orq %r8, %r9, %r8 -; testq $64, %r11 +; testq $64, %r10 ; cmovzq %rdx, %rax, %rax ; cmovzq %r8, %rdx, %rdx ; movl $128, %ecx -; movq %r11, %r8 -; subq %rcx, %r8, %rcx +; movq %r11, %r9 +; subq %rcx, %r9, %rcx ; shrq %cl, %rdi, %rdi -; movq %rsi, %r9 -; shrq %cl, %r9, %r9 -; movq %rcx, %r8 +; movq %rsi, %r8 +; shrq %cl, %r8, %r8 +; movq %rcx, %r9 ; movl $64, %ecx -; subq %rcx, %r8, %rcx +; movq %r9, %r10 +; subq %rcx, %r10, %rcx ; shlq %cl, %rsi, %rsi -; xorq %r11, %r11, %r11 -; testq $127, %r8 -; cmovzq %r11, %rsi, %rsi +; xorq %r9, %r9, %r9 +; testq $127, %r10 +; cmovzq %r9, %rsi, %rsi ; orq %rsi, %rdi, %rsi -; xorq %r11, %r11, %r11 -; testq $64, %r8 -; movq %r9, %rdi -; cmovzq %rsi, %rdi, %rdi -; cmovzq %r9, %r11, %r11 -; orq %rax, %rdi, %rax -; orq %rdx, %r11, %rdx +; xorq %rdi, %rdi, %rdi +; testq $64, %r10 +; movq %r8, %rcx +; cmovzq %rsi, %rcx, %rcx +; cmovzq %r8, %rdi, %rdi +; orq %rax, %rcx, %rax +; orq %rdx, %rdi, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret @@ -1039,38 +1034,38 @@ block0(v0: i128, v1: i128): ; movq %rsi, %r8 ; shrq %cl, %r8, %r8 ; movl $64, %ecx -; subq %rcx, %r11, %rcx +; movq %r11, %r10 +; subq %rcx, %r10, %rcx ; movq %rsi, %r9 ; shlq %cl, %r9, %r9 ; xorq %rdx, %rdx, %rdx -; testq $127, %r11 +; testq $127, %r10 ; cmovzq %rdx, %r9, %r9 ; orq %r9, %rax, %r9 ; xorq %rdx, %rdx, %rdx -; testq $64, %r11 +; testq $64, %r10 ; movq %r8, %rax ; cmovzq %r9, %rax, %rax ; cmovzq %r8, %rdx, %rdx ; movl $128, %ecx -; movq %r11, %r8 -; subq %rcx, %r8, %rcx -; movq %rdi, %r11 -; shlq %cl, %r11, %r11 +; subq %rcx, %r10, %rcx +; movq %rdi, %r8 +; shlq %cl, %r8, %r8 ; shlq %cl, %rsi, %rsi -; movq %rcx, %r8 +; movq %rcx, %r9 ; movl $64, %ecx -; movq %r8, %r9 -; subq %rcx, %r9, %rcx +; movq %r9, %r10 +; subq %rcx, %r10, %rcx ; shrq %cl, %rdi, %rdi -; xorq %r8, %r8, %r8 -; testq $127, %r9 -; cmovzq %r8, %rdi, %rdi +; xorq %r9, %r9, %r9 +; testq $127, %r10 +; cmovzq %r9, %rdi, %rdi ; orq %rdi, %rsi, %rdi -; testq $64, %r9 -; cmovzq %r11, %r8, %r8 -; cmovzq %rdi, %r11, %r11 -; orq %rax, %r8, %rax -; orq %rdx, %r11, %rdx +; testq $64, %r10 +; cmovzq %r8, %r9, %r9 +; cmovzq %rdi, %r8, %r8 +; orq %rax, %r9, %rax +; orq %rdx, %r8, %rdx ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/immediates.clif b/cranelift/filetests/filetests/isa/x64/immediates.clif index c6df5eb151..031dabb73a 100644 --- a/cranelift/filetests/filetests/isa/x64/immediates.clif +++ b/cranelift/filetests/filetests/isa/x64/immediates.clif @@ -18,13 +18,13 @@ block0(v0: i64, v1: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: +; movq %rdi, %r10 +; addq %r10, const(VCodeConstant(0)), %r10 +; movq %r10, 0(%rsi) ; movq %rdi, %r11 -; addq %r11, const(VCodeConstant(0)), %r11 +; subq %r11, const(VCodeConstant(0)), %r11 ; movq %r11, 0(%rsi) ; movq %rdi, %rax -; subq %rax, const(VCodeConstant(0)), %rax -; movq %rax, 0(%rsi) -; movq %rdi, %rax ; andq %rax, const(VCodeConstant(0)), %rax ; movq %rax, 0(%rsi) ; orq %rdi, const(VCodeConstant(0)), %rdi diff --git a/cranelift/filetests/filetests/isa/x64/load-op.clif b/cranelift/filetests/filetests/isa/x64/load-op.clif index dcbe3f8570..adca7bcb67 100644 --- a/cranelift/filetests/filetests/isa/x64/load-op.clif +++ b/cranelift/filetests/filetests/isa/x64/load-op.clif @@ -51,11 +51,11 @@ block0(v0: i64, v1: i64): store.i64 v3, v1 v4 = load.i64 v3 return v4 - ; check: movq 0(%rdi), %r11 - ; nextln: movq %r11, %rax - ; nextln: addq %rax, %rdi, %rax - ; nextln: movq %rax, 0(%rsi) - ; nextln: movq 0(%r11,%rdi,1), %rax + ; check: movq 0(%rdi), %r10 + ; nextln: movq %r10, %r11 + ; nextln: addq %r11, %rdi, %r11 + ; nextln: movq %r11, 0(%rsi) + ; nextln: movq 0(%r10,%rdi,1), %rax } function %merge_scalar_to_vector(i64) -> i32x4 { diff --git a/cranelift/filetests/filetests/isa/x64/popcnt.clif b/cranelift/filetests/filetests/isa/x64/popcnt.clif index 94582c1d12..4f0be7407d 100644 --- a/cranelift/filetests/filetests/isa/x64/popcnt.clif +++ b/cranelift/filetests/filetests/isa/x64/popcnt.clif @@ -10,24 +10,24 @@ block0(v0: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movq %rdi, %rcx -; shrq $1, %rcx, %rcx +; movq %rdi, %rax +; shrq $1, %rax, %rax ; movabsq $8608480567731124087, %r8 -; andq %rcx, %r8, %rcx -; subq %rdi, %rcx, %rdi -; shrq $1, %rcx, %rcx -; andq %rcx, %r8, %rcx -; subq %rdi, %rcx, %rdi -; shrq $1, %rcx, %rcx -; andq %rcx, %r8, %rcx -; subq %rdi, %rcx, %rdi +; andq %rax, %r8, %rax +; subq %rdi, %rax, %rdi +; shrq $1, %rax, %rax +; andq %rax, %r8, %rax +; subq %rdi, %rax, %rdi +; shrq $1, %rax, %rax +; andq %rax, %r8, %rax +; subq %rdi, %rax, %rdi ; movq %rdi, %rax ; shrq $4, %rax, %rax ; addq %rax, %rdi, %rax -; movabsq $1085102592571150095, %rcx -; andq %rax, %rcx, %rax -; movabsq $72340172838076673, %r11 -; imulq %rax, %r11, %rax +; movabsq $1085102592571150095, %rsi +; andq %rax, %rsi, %rax +; movabsq $72340172838076673, %rdx +; imulq %rax, %rdx, %rax ; shrq $56, %rax, %rax ; movq %rbp, %rsp ; popq %rbp @@ -43,25 +43,25 @@ block0(v0: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movq 0(%rdi), %rdx -; movq %rdx, %r8 -; shrq $1, %r8, %r8 +; movq 0(%rdi), %rcx +; movq %rcx, %rdx +; shrq $1, %rdx, %rdx ; movabsq $8608480567731124087, %r9 -; andq %r8, %r9, %r8 -; subq %rdx, %r8, %rdx -; shrq $1, %r8, %r8 -; andq %r8, %r9, %r8 -; subq %rdx, %r8, %rdx -; shrq $1, %r8, %r8 -; andq %r8, %r9, %r8 -; subq %rdx, %r8, %rdx -; movq %rdx, %rax +; andq %rdx, %r9, %rdx +; subq %rcx, %rdx, %rcx +; shrq $1, %rdx, %rdx +; andq %rdx, %r9, %rdx +; subq %rcx, %rdx, %rcx +; shrq $1, %rdx, %rdx +; andq %rdx, %r9, %rdx +; subq %rcx, %rdx, %rcx +; movq %rcx, %rax ; shrq $4, %rax, %rax -; addq %rax, %rdx, %rax -; movabsq $1085102592571150095, %rdx -; andq %rax, %rdx, %rax -; movabsq $72340172838076673, %rsi -; imulq %rax, %rsi, %rax +; addq %rax, %rcx, %rax +; movabsq $1085102592571150095, %rdi +; andq %rax, %rdi, %rax +; movabsq $72340172838076673, %r8 +; imulq %rax, %r8, %rax ; shrq $56, %rax, %rax ; movq %rbp, %rsp ; popq %rbp @@ -76,17 +76,17 @@ block0(v0: i32): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movq %rdi, %rcx -; shrl $1, %ecx, %ecx +; movq %rdi, %rax +; shrl $1, %eax, %eax ; movl $2004318071, %r8d -; andl %ecx, %r8d, %ecx -; subl %edi, %ecx, %edi -; shrl $1, %ecx, %ecx -; andl %ecx, %r8d, %ecx -; subl %edi, %ecx, %edi -; shrl $1, %ecx, %ecx -; andl %ecx, %r8d, %ecx -; subl %edi, %ecx, %edi +; andl %eax, %r8d, %eax +; subl %edi, %eax, %edi +; shrl $1, %eax, %eax +; andl %eax, %r8d, %eax +; subl %edi, %eax, %edi +; shrl $1, %eax, %eax +; andl %eax, %r8d, %eax +; subl %edi, %eax, %edi ; movq %rdi, %rax ; shrl $4, %eax, %eax ; addl %eax, %edi, %eax @@ -107,21 +107,21 @@ block0(v0: i64): ; pushq %rbp ; movq %rsp, %rbp ; block0: -; movl 0(%rdi), %edx -; movq %rdx, %r8 -; shrl $1, %r8d, %r8d +; movl 0(%rdi), %ecx +; movq %rcx, %rdx +; shrl $1, %edx, %edx ; movl $2004318071, %r9d -; andl %r8d, %r9d, %r8d -; subl %edx, %r8d, %edx -; shrl $1, %r8d, %r8d -; andl %r8d, %r9d, %r8d -; subl %edx, %r8d, %edx -; shrl $1, %r8d, %r8d -; andl %r8d, %r9d, %r8d -; subl %edx, %r8d, %edx -; movq %rdx, %rax +; andl %edx, %r9d, %edx +; subl %ecx, %edx, %ecx +; shrl $1, %edx, %edx +; andl %edx, %r9d, %edx +; subl %ecx, %edx, %ecx +; shrl $1, %edx, %edx +; andl %edx, %r9d, %edx +; subl %ecx, %edx, %ecx +; movq %rcx, %rax ; shrl $4, %eax, %eax -; addl %eax, %edx, %eax +; addl %eax, %ecx, %eax ; andl %eax, $252645135, %eax ; imull %eax, $16843009, %eax ; shrl $24, %eax, %eax diff --git a/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif b/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif index 987a5f619a..b14699ef99 100644 --- a/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif +++ b/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif @@ -208,9 +208,9 @@ block0(v0: i32): ; load_const VCodeConstant(1), %xmm0 ; movd %edi, %xmm5 ; psllw %xmm0, %xmm5, %xmm0 -; lea const(VCodeConstant(0)), %rax +; lea const(VCodeConstant(0)), %rsi ; shlq $4, %rdi, %rdi -; movdqu 0(%rax,%rdi,1), %xmm13 +; movdqu 0(%rsi,%rdi,1), %xmm13 ; pand %xmm0, %xmm13, %xmm0 ; movq %rbp, %rsp ; popq %rbp @@ -292,14 +292,14 @@ block0(v0: i64x2, v1: i32): ; movq %rsp, %rbp ; block0: ; movq %rdi, %rax -; pextrd.w $0, %xmm0, %r11 -; pextrd.w $1, %xmm0, %rdi +; pextrd.w $0, %xmm0, %r10 +; pextrd.w $1, %xmm0, %rsi ; movq %rax, %rcx -; sarq %cl, %r11, %r11 -; sarq %cl, %rdi, %rdi +; sarq %cl, %r10, %r10 +; sarq %cl, %rsi, %rsi ; uninit %xmm0 -; pinsrd.w $0, %xmm0, %r11, %xmm0 -; pinsrd.w $1, %xmm0, %rdi, %xmm0 +; pinsrd.w $0, %xmm0, %r10, %xmm0 +; pinsrd.w $1, %xmm0, %rsi, %xmm0 ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/struct-arg.clif b/cranelift/filetests/filetests/isa/x64/struct-arg.clif index 9076630171..5ae2c3fd03 100644 --- a/cranelift/filetests/filetests/isa/x64/struct-arg.clif +++ b/cranelift/filetests/filetests/isa/x64/struct-arg.clif @@ -29,8 +29,8 @@ block0(v0: i64, v1: i64): ; block0: ; lea 16(%rbp), %rsi ; movzbq 0(%rdi), %rax -; movzbq 0(%rsi), %r11 -; addl %eax, %r11d, %eax +; movzbq 0(%rsi), %r10 +; addl %eax, %r10d, %eax ; movq %rbp, %rsp ; popq %rbp ; ret @@ -105,8 +105,8 @@ block0(v0: i64, v1: i64): ; lea 16(%rbp), %rsi ; lea 144(%rbp), %rdi ; movzbq 0(%rsi), %rax -; movzbq 0(%rdi), %r11 -; addl %eax, %r11d, %eax +; movzbq 0(%rdi), %r10 +; addl %eax, %r10d, %eax ; movq %rbp, %rsp ; popq %rbp ; ret diff --git a/cranelift/filetests/filetests/isa/x64/table.clif b/cranelift/filetests/filetests/isa/x64/table.clif index 2a24da6efd..57e6d46e62 100644 --- a/cranelift/filetests/filetests/isa/x64/table.clif +++ b/cranelift/filetests/filetests/isa/x64/table.clif @@ -23,13 +23,13 @@ block0(v0: i32, v1: r64, v2: i64): ; cmpl %eax, %edi ; jb label1; j label2 ; block1: -; movl %edi, %r9d -; movq 0(%rdx), %rdx -; movq %rdx, %r8 -; addq %r8, %r9, %r8 +; movl %edi, %r8d +; movq 0(%rdx), %rcx +; movq %rcx, %rdx +; addq %rdx, %r8, %rdx ; cmpl %eax, %edi -; cmovnbq %rdx, %r8, %r8 -; movq %rsi, 0(%r8) +; cmovnbq %rcx, %rdx, %rdx +; movq %rsi, 0(%rdx) ; movq %rbp, %rsp ; popq %rbp ; ret