This PR switches Cranelift over to the new register allocator, regalloc2. See [this document](https://gist.github.com/cfallin/08553421a91f150254fe878f67301801) for a summary of the design changes. This switchover has implications for core VCode/MachInst types and the lowering pass. Overall, this change brings improvements to both compile time and speed of generated code (runtime), as reported in #3942: ``` Benchmark Compilation (wallclock) Execution (wallclock) blake3-scalar 25% faster 28% faster blake3-simd no diff no diff meshoptimizer 19% faster 17% faster pulldown-cmark 17% faster no diff bz2 15% faster no diff SpiderMonkey, 21% faster 2% faster fib(30) clang.wasm 42% faster N/A ```
83 lines
1.3 KiB
Plaintext
83 lines
1.3 KiB
Plaintext
test compile precise-output
|
|
set unwind_info=false
|
|
target aarch64
|
|
|
|
function %f(i8, i64, i64) -> i64 {
|
|
block0(v0: i8, v1: i64, v2: i64):
|
|
v3 = iconst.i8 42
|
|
v4 = ifcmp v0, v3
|
|
v5 = selectif.i64 eq v4, v1, v2
|
|
return v5
|
|
}
|
|
|
|
; block0:
|
|
; uxtb w8, w0
|
|
; subs wzr, w8, #42
|
|
; csel x0, x1, x2, eq
|
|
; ret
|
|
|
|
function %g(i8) -> b1 {
|
|
block0(v0: i8):
|
|
v3 = iconst.i8 42
|
|
v4 = ifcmp v0, v3
|
|
v5 = trueif eq v4
|
|
return v5
|
|
}
|
|
|
|
; block0:
|
|
; uxtb w4, w0
|
|
; subs wzr, w4, #42
|
|
; cset x0, eq
|
|
; ret
|
|
|
|
function %h(i8, i8, i8) -> i8 {
|
|
block0(v0: i8, v1: i8, v2: i8):
|
|
v3 = bitselect.i8 v0, v1, v2
|
|
return v3
|
|
}
|
|
|
|
; block0:
|
|
; and x8, x1, x0
|
|
; bic x0, x2, x0
|
|
; orr x0, x0, x8
|
|
; ret
|
|
|
|
function %i(b1, i8, i8) -> i8 {
|
|
block0(v0: b1, v1: i8, v2: i8):
|
|
v3 = select.i8 v0, v1, v2
|
|
return v3
|
|
}
|
|
|
|
; block0:
|
|
; and w8, w0, #1
|
|
; subs wzr, w8, wzr
|
|
; csel x0, x1, x2, ne
|
|
; ret
|
|
|
|
function %i(i32, i8, i8) -> i8 {
|
|
block0(v0: i32, v1: i8, v2: i8):
|
|
v3 = iconst.i32 42
|
|
v4 = icmp.i32 eq v0, v3
|
|
v5 = select.i8 v4, v1, v2
|
|
return v5
|
|
}
|
|
|
|
; block0:
|
|
; subs wzr, w0, #42
|
|
; csel x0, x1, x2, eq
|
|
; ret
|
|
|
|
function %i128_select(b1, i128, i128) -> i128 {
|
|
block0(v0: b1, v1: i128, v2: i128):
|
|
v3 = select.i128 v0, v1, v2
|
|
return v3
|
|
}
|
|
|
|
; block0:
|
|
; and w14, w0, #1
|
|
; subs wzr, w14, wzr
|
|
; csel x0, x2, x4, ne
|
|
; csel x1, x3, x5, ne
|
|
; ret
|
|
|