This PR switches Cranelift over to the new register allocator, regalloc2. See [this document](https://gist.github.com/cfallin/08553421a91f150254fe878f67301801) for a summary of the design changes. This switchover has implications for core VCode/MachInst types and the lowering pass. Overall, this change brings improvements to both compile time and speed of generated code (runtime), as reported in #3942: ``` Benchmark Compilation (wallclock) Execution (wallclock) blake3-scalar 25% faster 28% faster blake3-simd no diff no diff meshoptimizer 19% faster 17% faster pulldown-cmark 17% faster no diff bz2 15% faster no diff SpiderMonkey, 21% faster 2% faster fib(30) clang.wasm 42% faster N/A ```
297 lines
4.3 KiB
Plaintext
297 lines
4.3 KiB
Plaintext
test compile precise-output
|
|
set unwind_info=false
|
|
target aarch64
|
|
|
|
function %f0(i8x16) -> b8x16 {
|
|
block0(v0: i8x16):
|
|
v1 = iconst.i8 0
|
|
v2 = splat.i8x16 v1
|
|
v3 = icmp eq v0, v2
|
|
return v3
|
|
}
|
|
|
|
; block0:
|
|
; cmeq v0.16b, v0.16b, #0
|
|
; ret
|
|
|
|
function %f1(i16x8) -> b16x8 {
|
|
block0(v0: i16x8):
|
|
v1 = iconst.i16 0
|
|
v2 = splat.i16x8 v1
|
|
v3 = icmp eq v2, v0
|
|
return v3
|
|
}
|
|
|
|
; block0:
|
|
; cmeq v0.8h, v0.8h, #0
|
|
; ret
|
|
|
|
function %f2(i32x4) -> b32x4 {
|
|
block0(v0: i32x4):
|
|
v1 = iconst.i32 0
|
|
v2 = splat.i32x4 v1
|
|
v3 = icmp ne v0, v2
|
|
return v3
|
|
}
|
|
|
|
; block0:
|
|
; cmeq v3.4s, v0.4s, #0
|
|
; mvn v0.16b, v3.16b
|
|
; ret
|
|
|
|
function %f3(i64x2) -> b64x2 {
|
|
block0(v0: i64x2):
|
|
v1 = iconst.i64 0
|
|
v2 = splat.i64x2 v1
|
|
v3 = icmp ne v2, v0
|
|
return v3
|
|
}
|
|
|
|
; block0:
|
|
; cmeq v3.2d, v0.2d, #0
|
|
; mvn v0.16b, v3.16b
|
|
; ret
|
|
|
|
function %f4(i8x16) -> b8x16 {
|
|
block0(v0: i8x16):
|
|
v1 = iconst.i8 0
|
|
v2 = splat.i8x16 v1
|
|
v3 = icmp sle v0, v2
|
|
return v3
|
|
}
|
|
|
|
; block0:
|
|
; cmle v0.16b, v0.16b, #0
|
|
; ret
|
|
|
|
function %f5(i16x8) -> b16x8 {
|
|
block0(v0: i16x8):
|
|
v1 = iconst.i16 0
|
|
v2 = splat.i16x8 v1
|
|
v3 = icmp sle v2, v0
|
|
return v3
|
|
}
|
|
|
|
; block0:
|
|
; cmge v0.8h, v0.8h, #0
|
|
; ret
|
|
|
|
function %f6(i32x4) -> b32x4 {
|
|
block0(v0: i32x4):
|
|
v1 = iconst.i32 0
|
|
v2 = splat.i32x4 v1
|
|
v3 = icmp sge v0, v2
|
|
return v3
|
|
}
|
|
|
|
; block0:
|
|
; cmge v0.4s, v0.4s, #0
|
|
; ret
|
|
|
|
function %f7(i64x2) -> b64x2 {
|
|
block0(v0: i64x2):
|
|
v1 = iconst.i64 0
|
|
v2 = splat.i64x2 v1
|
|
v3 = icmp sge v2, v0
|
|
return v3
|
|
}
|
|
|
|
; block0:
|
|
; cmle v0.2d, v0.2d, #0
|
|
; ret
|
|
|
|
function %f8(i8x16) -> b8x16 {
|
|
block0(v0: i8x16):
|
|
v1 = iconst.i8 0
|
|
v2 = splat.i8x16 v1
|
|
v3 = icmp slt v0, v2
|
|
return v3
|
|
}
|
|
|
|
; block0:
|
|
; cmlt v0.16b, v0.16b, #0
|
|
; ret
|
|
|
|
function %f9(i16x8) -> b16x8 {
|
|
block0(v0: i16x8):
|
|
v1 = iconst.i16 0
|
|
v2 = splat.i16x8 v1
|
|
v3 = icmp slt v2, v0
|
|
return v3
|
|
}
|
|
|
|
; block0:
|
|
; cmgt v0.8h, v0.8h, #0
|
|
; ret
|
|
|
|
function %f10(i32x4) -> b32x4 {
|
|
block0(v0: i32x4):
|
|
v1 = iconst.i32 0
|
|
v2 = splat.i32x4 v1
|
|
v3 = icmp sgt v0, v2
|
|
return v3
|
|
}
|
|
|
|
; block0:
|
|
; cmgt v0.4s, v0.4s, #0
|
|
; ret
|
|
|
|
function %f11(i64x2) -> b64x2 {
|
|
block0(v0: i64x2):
|
|
v1 = iconst.i64 0
|
|
v2 = splat.i64x2 v1
|
|
v3 = icmp sgt v2, v0
|
|
return v3
|
|
}
|
|
|
|
; block0:
|
|
; cmlt v0.2d, v0.2d, #0
|
|
; ret
|
|
|
|
function %f12(f32x4) -> b32x4 {
|
|
block0(v0: f32x4):
|
|
v1 = f32const 0.0
|
|
v2 = splat.f32x4 v1
|
|
v3 = fcmp eq v0, v2
|
|
return v3
|
|
}
|
|
|
|
; block0:
|
|
; fcmeq v0.4s, v0.4s, #0.0
|
|
; ret
|
|
|
|
function %f13(f64x2) -> b64x2 {
|
|
block0(v0: f64x2):
|
|
v1 = f64const 0.0
|
|
v2 = splat.f64x2 v1
|
|
v3 = fcmp eq v2, v0
|
|
return v3
|
|
}
|
|
|
|
; block0:
|
|
; fcmeq v0.2d, v0.2d, #0.0
|
|
; ret
|
|
|
|
function %f14(f64x2) -> b64x2 {
|
|
block0(v0: f64x2):
|
|
v1 = f64const 0.0
|
|
v2 = splat.f64x2 v1
|
|
v3 = fcmp ne v0, v2
|
|
return v3
|
|
}
|
|
|
|
; block0:
|
|
; fcmeq v3.2d, v0.2d, #0.0
|
|
; mvn v0.16b, v3.16b
|
|
; ret
|
|
|
|
function %f15(f32x4) -> b32x4 {
|
|
block0(v0: f32x4):
|
|
v1 = f32const 0.0
|
|
v2 = splat.f32x4 v1
|
|
v3 = fcmp ne v2, v0
|
|
return v3
|
|
}
|
|
|
|
; block0:
|
|
; fcmeq v3.4s, v0.4s, #0.0
|
|
; mvn v0.16b, v3.16b
|
|
; ret
|
|
|
|
function %f16(f32x4) -> b32x4 {
|
|
block0(v0: f32x4):
|
|
v1 = f32const 0.0
|
|
v2 = splat.f32x4 v1
|
|
v3 = fcmp le v0, v2
|
|
return v3
|
|
}
|
|
|
|
; block0:
|
|
; fcmle v0.4s, v0.4s, #0.0
|
|
; ret
|
|
|
|
function %f17(f64x2) -> b64x2 {
|
|
block0(v0: f64x2):
|
|
v1 = f64const 0.0
|
|
v2 = splat.f64x2 v1
|
|
v3 = fcmp le v2, v0
|
|
return v3
|
|
}
|
|
|
|
; block0:
|
|
; fcmge v0.2d, v0.2d, #0.0
|
|
; ret
|
|
|
|
function %f18(f64x2) -> b64x2 {
|
|
block0(v0: f64x2):
|
|
v1 = f64const 0.0
|
|
v2 = splat.f64x2 v1
|
|
v3 = fcmp ge v0, v2
|
|
return v3
|
|
}
|
|
|
|
; block0:
|
|
; fcmge v0.2d, v0.2d, #0.0
|
|
; ret
|
|
|
|
function %f19(f32x4) -> b32x4 {
|
|
block0(v0: f32x4):
|
|
v1 = f32const 0.0
|
|
v2 = splat.f32x4 v1
|
|
v3 = fcmp ge v2, v0
|
|
return v3
|
|
}
|
|
|
|
; block0:
|
|
; fcmle v0.4s, v0.4s, #0.0
|
|
; ret
|
|
|
|
function %f20(f32x4) -> b32x4 {
|
|
block0(v0: f32x4):
|
|
v1 = f32const 0.0
|
|
v2 = splat.f32x4 v1
|
|
v3 = fcmp lt v0, v2
|
|
return v3
|
|
}
|
|
|
|
; block0:
|
|
; fcmlt v0.4s, v0.4s, #0.0
|
|
; ret
|
|
|
|
function %f21(f64x2) -> b64x2 {
|
|
block0(v0: f64x2):
|
|
v1 = f64const 0.0
|
|
v2 = splat.f64x2 v1
|
|
v3 = fcmp lt v2, v0
|
|
return v3
|
|
}
|
|
|
|
; block0:
|
|
; fcmgt v0.2d, v0.2d, #0.0
|
|
; ret
|
|
|
|
function %f22(f64x2) -> b64x2 {
|
|
block0(v0: f64x2):
|
|
v1 = f64const 0.0
|
|
v2 = splat.f64x2 v1
|
|
v3 = fcmp gt v0, v2
|
|
return v3
|
|
}
|
|
|
|
; block0:
|
|
; fcmgt v0.2d, v0.2d, #0.0
|
|
; ret
|
|
|
|
function %f23(f32x4) -> b32x4 {
|
|
block0(v0: f32x4):
|
|
v1 = f32const 0.0
|
|
v2 = splat.f32x4 v1
|
|
v3 = fcmp gt v2, v0
|
|
return v3
|
|
}
|
|
|
|
; block0:
|
|
; fcmlt v0.4s, v0.4s, #0.0
|
|
; ret
|
|
|