This PR switches Cranelift over to the new register allocator, regalloc2. See [this document](https://gist.github.com/cfallin/08553421a91f150254fe878f67301801) for a summary of the design changes. This switchover has implications for core VCode/MachInst types and the lowering pass. Overall, this change brings improvements to both compile time and speed of generated code (runtime), as reported in #3942: ``` Benchmark Compilation (wallclock) Execution (wallclock) blake3-scalar 25% faster 28% faster blake3-simd no diff no diff meshoptimizer 19% faster 17% faster pulldown-cmark 17% faster no diff bz2 15% faster no diff SpiderMonkey, 21% faster 2% faster fib(30) clang.wasm 42% faster N/A ```
190 lines
3.4 KiB
Plaintext
190 lines
3.4 KiB
Plaintext
test compile precise-output
|
|
target s390x
|
|
|
|
function %foo() {
|
|
block0:
|
|
return
|
|
}
|
|
|
|
; block0:
|
|
; br %r14
|
|
|
|
function %stack_limit_leaf_zero(i64 stack_limit) {
|
|
block0(v0: i64):
|
|
return
|
|
}
|
|
|
|
; block0:
|
|
; br %r14
|
|
|
|
function %stack_limit_gv_leaf_zero(i64 vmctx) {
|
|
gv0 = vmctx
|
|
gv1 = load.i64 notrap aligned gv0
|
|
gv2 = load.i64 notrap aligned gv1+4
|
|
stack_limit = gv2
|
|
block0(v0: i64):
|
|
return
|
|
}
|
|
|
|
; block0:
|
|
; br %r14
|
|
|
|
function %stack_limit_call_zero(i64 stack_limit) {
|
|
fn0 = %foo()
|
|
block0(v0: i64):
|
|
call fn0()
|
|
return
|
|
}
|
|
|
|
; clgrtle %r15, %r2
|
|
; stmg %r14, %r15, 112(%r15)
|
|
; aghi %r15, -160
|
|
; virtual_sp_offset_adjust 160
|
|
; block0:
|
|
; bras %r1, 12 ; data %foo + 0 ; lg %r4, 0(%r1)
|
|
; basr %r14, %r4
|
|
; lmg %r14, %r15, 272(%r15)
|
|
; br %r14
|
|
|
|
function %stack_limit_gv_call_zero(i64 vmctx) {
|
|
gv0 = vmctx
|
|
gv1 = load.i64 notrap aligned gv0
|
|
gv2 = load.i64 notrap aligned gv1+4
|
|
stack_limit = gv2
|
|
fn0 = %foo()
|
|
block0(v0: i64):
|
|
call fn0()
|
|
return
|
|
}
|
|
|
|
; lg %r1, 0(%r2)
|
|
; lg %r1, 4(%r1)
|
|
; clgrtle %r15, %r1
|
|
; stmg %r14, %r15, 112(%r15)
|
|
; aghi %r15, -160
|
|
; virtual_sp_offset_adjust 160
|
|
; block0:
|
|
; bras %r1, 12 ; data %foo + 0 ; lg %r4, 0(%r1)
|
|
; basr %r14, %r4
|
|
; lmg %r14, %r15, 272(%r15)
|
|
; br %r14
|
|
|
|
function %stack_limit(i64 stack_limit) {
|
|
ss0 = explicit_slot 168
|
|
block0(v0: i64):
|
|
return
|
|
}
|
|
|
|
; la %r1, 168(%r2)
|
|
; clgrtle %r15, %r1
|
|
; aghi %r15, -168
|
|
; block0:
|
|
; aghi %r15, 168
|
|
; br %r14
|
|
|
|
function %large_stack_limit(i64 stack_limit) {
|
|
ss0 = explicit_slot 400000
|
|
block0(v0: i64):
|
|
return
|
|
}
|
|
|
|
; clgrtle %r15, %r2
|
|
; lay %r1, 400000(%r2)
|
|
; clgrtle %r15, %r1
|
|
; agfi %r15, -400000
|
|
; block0:
|
|
; agfi %r15, 400000
|
|
; br %r14
|
|
|
|
function %huge_stack_limit(i64 stack_limit) {
|
|
ss0 = explicit_slot 4000000
|
|
block0(v0: i64):
|
|
return
|
|
}
|
|
|
|
; clgrtle %r15, %r2
|
|
; lgr %r1, %r2
|
|
; algfi %r1, 4000000
|
|
; clgrtle %r15, %r1
|
|
; agfi %r15, -4000000
|
|
; block0:
|
|
; agfi %r15, 4000000
|
|
; br %r14
|
|
|
|
function %limit_preamble(i64 vmctx) {
|
|
gv0 = vmctx
|
|
gv1 = load.i64 notrap aligned gv0
|
|
gv2 = load.i64 notrap aligned gv1+4
|
|
stack_limit = gv2
|
|
ss0 = explicit_slot 20
|
|
block0(v0: i64):
|
|
return
|
|
}
|
|
|
|
; lg %r1, 0(%r2)
|
|
; lg %r1, 4(%r1)
|
|
; la %r1, 24(%r1)
|
|
; clgrtle %r15, %r1
|
|
; aghi %r15, -24
|
|
; block0:
|
|
; aghi %r15, 24
|
|
; br %r14
|
|
|
|
function %limit_preamble_large(i64 vmctx) {
|
|
gv0 = vmctx
|
|
gv1 = load.i64 notrap aligned gv0
|
|
gv2 = load.i64 notrap aligned gv1+4
|
|
stack_limit = gv2
|
|
ss0 = explicit_slot 400000
|
|
block0(v0: i64):
|
|
return
|
|
}
|
|
|
|
; lg %r1, 0(%r2)
|
|
; lg %r1, 4(%r1)
|
|
; clgrtle %r15, %r1
|
|
; lay %r1, 400000(%r1)
|
|
; clgrtle %r15, %r1
|
|
; agfi %r15, -400000
|
|
; block0:
|
|
; agfi %r15, 400000
|
|
; br %r14
|
|
|
|
function %limit_preamble_huge(i64 vmctx) {
|
|
gv0 = vmctx
|
|
gv1 = load.i64 notrap aligned gv0
|
|
gv2 = load.i64 notrap aligned gv1+4
|
|
stack_limit = gv2
|
|
ss0 = explicit_slot 4000000
|
|
block0(v0: i64):
|
|
return
|
|
}
|
|
|
|
; lg %r1, 0(%r2)
|
|
; lg %r1, 4(%r1)
|
|
; clgrtle %r15, %r1
|
|
; algfi %r1, 4000000
|
|
; clgrtle %r15, %r1
|
|
; agfi %r15, -4000000
|
|
; block0:
|
|
; agfi %r15, 4000000
|
|
; br %r14
|
|
|
|
function %limit_preamble_huge_offset(i64 vmctx) {
|
|
gv0 = vmctx
|
|
gv1 = load.i64 notrap aligned gv0+1000000
|
|
stack_limit = gv1
|
|
ss0 = explicit_slot 20
|
|
block0(v0: i64):
|
|
return
|
|
}
|
|
|
|
; lgfi %r1, 1000000 ; lg %r1, 0(%r1,%r2)
|
|
; la %r1, 24(%r1)
|
|
; clgrtle %r15, %r1
|
|
; aghi %r15, -24
|
|
; block0:
|
|
; aghi %r15, 24
|
|
; br %r14
|
|
|