This PR switches Cranelift over to the new register allocator, regalloc2. See [this document](https://gist.github.com/cfallin/08553421a91f150254fe878f67301801) for a summary of the design changes. This switchover has implications for core VCode/MachInst types and the lowering pass. Overall, this change brings improvements to both compile time and speed of generated code (runtime), as reported in #3942: ``` Benchmark Compilation (wallclock) Execution (wallclock) blake3-scalar 25% faster 28% faster blake3-simd no diff no diff meshoptimizer 19% faster 17% faster pulldown-cmark 17% faster no diff bz2 15% faster no diff SpiderMonkey, 21% faster 2% faster fib(30) clang.wasm 42% faster N/A ```
292 lines
4.0 KiB
Plaintext
292 lines
4.0 KiB
Plaintext
test compile precise-output
|
|
target s390x
|
|
|
|
function %load_i64(i64) -> i64 {
|
|
block0(v0: i64):
|
|
v1 = load.i64 v0
|
|
return v1
|
|
}
|
|
|
|
; block0:
|
|
; lg %r2, 0(%r2)
|
|
; br %r14
|
|
|
|
function %load_i64_sym() -> i64 {
|
|
gv0 = symbol colocated %sym
|
|
block0:
|
|
v0 = symbol_value.i64 gv0
|
|
v1 = load.i64 v0
|
|
return v1
|
|
}
|
|
|
|
; block0:
|
|
; lgrl %r2, %sym + 0
|
|
; br %r14
|
|
|
|
function %uload8_i64(i64) -> i64 {
|
|
block0(v0: i64):
|
|
v1 = uload8.i64 v0
|
|
return v1
|
|
}
|
|
|
|
; block0:
|
|
; llgc %r2, 0(%r2)
|
|
; br %r14
|
|
|
|
function %sload8_i64(i64) -> i64 {
|
|
block0(v0: i64):
|
|
v1 = sload8.i64 v0
|
|
return v1
|
|
}
|
|
|
|
; block0:
|
|
; lgb %r2, 0(%r2)
|
|
; br %r14
|
|
|
|
function %uload16_i64(i64) -> i64 {
|
|
block0(v0: i64):
|
|
v1 = uload16.i64 v0
|
|
return v1
|
|
}
|
|
|
|
; block0:
|
|
; llgh %r2, 0(%r2)
|
|
; br %r14
|
|
|
|
function %uload16_i64_sym() -> i64 {
|
|
gv0 = symbol colocated %sym
|
|
block0:
|
|
v0 = symbol_value.i64 gv0
|
|
v1 = uload16.i64 v0
|
|
return v1
|
|
}
|
|
|
|
; block0:
|
|
; llghrl %r2, %sym + 0
|
|
; br %r14
|
|
|
|
function %sload16_i64(i64) -> i64 {
|
|
block0(v0: i64):
|
|
v1 = sload16.i64 v0
|
|
return v1
|
|
}
|
|
|
|
; block0:
|
|
; lgh %r2, 0(%r2)
|
|
; br %r14
|
|
|
|
function %sload16_i64_sym() -> i64 {
|
|
gv0 = symbol colocated %sym
|
|
block0:
|
|
v0 = symbol_value.i64 gv0
|
|
v1 = sload16.i64 v0
|
|
return v1
|
|
}
|
|
|
|
; block0:
|
|
; lghrl %r2, %sym + 0
|
|
; br %r14
|
|
|
|
function %uload32_i64(i64) -> i64 {
|
|
block0(v0: i64):
|
|
v1 = uload32.i64 v0
|
|
return v1
|
|
}
|
|
|
|
; block0:
|
|
; llgf %r2, 0(%r2)
|
|
; br %r14
|
|
|
|
function %uload32_i64_sym() -> i64 {
|
|
gv0 = symbol colocated %sym
|
|
block0:
|
|
v0 = symbol_value.i64 gv0
|
|
v1 = uload32.i64 v0
|
|
return v1
|
|
}
|
|
|
|
; block0:
|
|
; llgfrl %r2, %sym + 0
|
|
; br %r14
|
|
|
|
function %sload32_i64(i64) -> i64 {
|
|
block0(v0: i64):
|
|
v1 = sload32.i64 v0
|
|
return v1
|
|
}
|
|
|
|
; block0:
|
|
; lgf %r2, 0(%r2)
|
|
; br %r14
|
|
|
|
function %sload32_i64_sym() -> i64 {
|
|
gv0 = symbol colocated %sym
|
|
block0:
|
|
v0 = symbol_value.i64 gv0
|
|
v1 = sload32.i64 v0
|
|
return v1
|
|
}
|
|
|
|
; block0:
|
|
; lgfrl %r2, %sym + 0
|
|
; br %r14
|
|
|
|
function %load_i32(i64) -> i32 {
|
|
block0(v0: i64):
|
|
v1 = load.i32 v0
|
|
return v1
|
|
}
|
|
|
|
; block0:
|
|
; l %r2, 0(%r2)
|
|
; br %r14
|
|
|
|
function %load_i32_sym() -> i32 {
|
|
gv0 = symbol colocated %sym
|
|
block0:
|
|
v0 = symbol_value.i64 gv0
|
|
v1 = load.i32 v0
|
|
return v1
|
|
}
|
|
|
|
; block0:
|
|
; lrl %r2, %sym + 0
|
|
; br %r14
|
|
|
|
function %load_i32_off(i64) -> i32 {
|
|
block0(v0: i64):
|
|
v1 = load.i32 v0+4096
|
|
return v1
|
|
}
|
|
|
|
; block0:
|
|
; ly %r2, 4096(%r2)
|
|
; br %r14
|
|
|
|
function %uload8_i32(i64) -> i32 {
|
|
block0(v0: i64):
|
|
v1 = uload8.i32 v0
|
|
return v1
|
|
}
|
|
|
|
; block0:
|
|
; llc %r2, 0(%r2)
|
|
; br %r14
|
|
|
|
function %sload8_i32(i64) -> i32 {
|
|
block0(v0: i64):
|
|
v1 = sload8.i32 v0
|
|
return v1
|
|
}
|
|
|
|
; block0:
|
|
; lb %r2, 0(%r2)
|
|
; br %r14
|
|
|
|
function %uload16_i32(i64) -> i32 {
|
|
block0(v0: i64):
|
|
v1 = uload16.i32 v0
|
|
return v1
|
|
}
|
|
|
|
; block0:
|
|
; llh %r2, 0(%r2)
|
|
; br %r14
|
|
|
|
function %uload16_i32_sym() -> i32 {
|
|
gv0 = symbol colocated %sym
|
|
block0:
|
|
v0 = symbol_value.i64 gv0
|
|
v1 = uload16.i32 v0
|
|
return v1
|
|
}
|
|
|
|
; block0:
|
|
; llhrl %r2, %sym + 0
|
|
; br %r14
|
|
|
|
function %sload16_i32(i64) -> i32 {
|
|
block0(v0: i64):
|
|
v1 = sload16.i32 v0
|
|
return v1
|
|
}
|
|
|
|
; block0:
|
|
; lh %r2, 0(%r2)
|
|
; br %r14
|
|
|
|
function %sload16_i32_off(i64) -> i32 {
|
|
block0(v0: i64):
|
|
v1 = sload16.i32 v0+4096
|
|
return v1
|
|
}
|
|
|
|
; block0:
|
|
; lhy %r2, 4096(%r2)
|
|
; br %r14
|
|
|
|
function %sload16_i32_sym() -> i32 {
|
|
gv0 = symbol colocated %sym
|
|
block0:
|
|
v0 = symbol_value.i64 gv0
|
|
v1 = sload16.i32 v0
|
|
return v1
|
|
}
|
|
|
|
; block0:
|
|
; lhrl %r2, %sym + 0
|
|
; br %r14
|
|
|
|
function %load_i16(i64) -> i16 {
|
|
block0(v0: i64):
|
|
v1 = load.i16 v0
|
|
return v1
|
|
}
|
|
|
|
; block0:
|
|
; llh %r2, 0(%r2)
|
|
; br %r14
|
|
|
|
function %load_i16_sym() -> i16 {
|
|
gv0 = symbol colocated %sym
|
|
block0:
|
|
v0 = symbol_value.i64 gv0
|
|
v1 = load.i16 v0
|
|
return v1
|
|
}
|
|
|
|
; block0:
|
|
; llhrl %r2, %sym + 0
|
|
; br %r14
|
|
|
|
function %uload8_i16(i64) -> i16 {
|
|
block0(v0: i64):
|
|
v1 = uload8.i16 v0
|
|
return v1
|
|
}
|
|
|
|
; block0:
|
|
; llc %r2, 0(%r2)
|
|
; br %r14
|
|
|
|
function %sload8_i16(i64) -> i16 {
|
|
block0(v0: i64):
|
|
v1 = sload8.i16 v0
|
|
return v1
|
|
}
|
|
|
|
; block0:
|
|
; lb %r2, 0(%r2)
|
|
; br %r14
|
|
|
|
function %load_i8(i64) -> i8 {
|
|
block0(v0: i64):
|
|
v1 = load.i8 v0
|
|
return v1
|
|
}
|
|
|
|
; block0:
|
|
; llc %r2, 0(%r2)
|
|
; br %r14
|
|
|