This PR switches Cranelift over to the new register allocator, regalloc2. See [this document](https://gist.github.com/cfallin/08553421a91f150254fe878f67301801) for a summary of the design changes. This switchover has implications for core VCode/MachInst types and the lowering pass. Overall, this change brings improvements to both compile time and speed of generated code (runtime), as reported in #3942: ``` Benchmark Compilation (wallclock) Execution (wallclock) blake3-scalar 25% faster 28% faster blake3-simd no diff no diff meshoptimizer 19% faster 17% faster pulldown-cmark 17% faster no diff bz2 15% faster no diff SpiderMonkey, 21% faster 2% faster fib(30) clang.wasm 42% faster N/A ```
76 lines
1.2 KiB
Plaintext
76 lines
1.2 KiB
Plaintext
test compile precise-output
|
|
target x86_64
|
|
|
|
function %f0(b1, i32, i32) -> i32 {
|
|
block0(v0: b1, v1: i32, v2: i32):
|
|
v3 = select.i32 v0, v1, v2
|
|
return v3
|
|
}
|
|
|
|
; pushq %rbp
|
|
; movq %rsp, %rbp
|
|
; block0:
|
|
; testb $1, %dil
|
|
; cmovnzl %esi, %edx, %edx
|
|
; movq %rdx, %rax
|
|
; movq %rbp, %rsp
|
|
; popq %rbp
|
|
; ret
|
|
|
|
function %f1(b1) -> i32 {
|
|
block0(v0: b1):
|
|
brnz v0, block1
|
|
jump block2
|
|
block1:
|
|
v1 = iconst.i32 1
|
|
return v1
|
|
block2:
|
|
v2 = iconst.i32 2
|
|
return v2
|
|
}
|
|
|
|
; pushq %rbp
|
|
; movq %rsp, %rbp
|
|
; block0:
|
|
; testb $1, %dil
|
|
; jnz label1; j label2
|
|
; block1:
|
|
; movl $1, %eax
|
|
; movq %rbp, %rsp
|
|
; popq %rbp
|
|
; ret
|
|
; block2:
|
|
; movl $2, %eax
|
|
; movq %rbp, %rsp
|
|
; popq %rbp
|
|
; ret
|
|
|
|
function %f2(b1) -> i32 {
|
|
block0(v0: b1):
|
|
brz v0, block1
|
|
jump block2
|
|
block1:
|
|
v1 = iconst.i32 1
|
|
return v1
|
|
block2:
|
|
v2 = iconst.i32 2
|
|
return v2
|
|
}
|
|
|
|
; pushq %rbp
|
|
; movq %rsp, %rbp
|
|
; block0:
|
|
; testb $1, %dil
|
|
; jz label1; j label2
|
|
; block1:
|
|
; movl $1, %eax
|
|
; movq %rbp, %rsp
|
|
; popq %rbp
|
|
; ret
|
|
; block2:
|
|
; movl $2, %eax
|
|
; movq %rbp, %rsp
|
|
; popq %rbp
|
|
; ret
|
|
|