Switch Cranelift over to regalloc2. (#3989)

This PR switches Cranelift over to the new register allocator, regalloc2.

See [this document](https://gist.github.com/cfallin/08553421a91f150254fe878f67301801)
for a summary of the design changes. This switchover has implications for
core VCode/MachInst types and the lowering pass.

Overall, this change brings improvements to both compile time and speed of
generated code (runtime), as reported in #3942:

```
Benchmark       Compilation (wallclock)     Execution (wallclock)
blake3-scalar   25% faster                  28% faster
blake3-simd     no diff                     no diff
meshoptimizer   19% faster                  17% faster
pulldown-cmark  17% faster                  no diff
bz2             15% faster                  no diff
SpiderMonkey,   21% faster                  2% faster
  fib(30)
clang.wasm      42% faster                  N/A
```
This commit is contained in:
Chris Fallin
2022-04-14 10:28:21 -07:00
committed by GitHub
parent bfae6384aa
commit a0318f36f0
181 changed files with 16887 additions and 21587 deletions

View File

@@ -11,14 +11,9 @@ block0(v0: i8x16):
return v3
}
; VCode_ShowWithRRU {{
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 2)
; Inst 0: saddlp v0.8h, v0.16b
; Inst 1: ret
; }}
; block0:
; saddlp v0.8h, v0.16b
; ret
function %fn2(i8x16) -> i16x8 {
block0(v0: i8x16):
@@ -28,14 +23,9 @@ block0(v0: i8x16):
return v3
}
; VCode_ShowWithRRU {{
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 2)
; Inst 0: uaddlp v0.8h, v0.16b
; Inst 1: ret
; }}
; block0:
; uaddlp v0.8h, v0.16b
; ret
function %fn3(i16x8) -> i32x4 {
block0(v0: i16x8):
@@ -45,14 +35,9 @@ block0(v0: i16x8):
return v3
}
; VCode_ShowWithRRU {{
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 2)
; Inst 0: saddlp v0.4s, v0.8h
; Inst 1: ret
; }}
; block0:
; saddlp v0.4s, v0.8h
; ret
function %fn4(i16x8) -> i32x4 {
block0(v0: i16x8):
@@ -62,14 +47,9 @@ block0(v0: i16x8):
return v3
}
; VCode_ShowWithRRU {{
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 2)
; Inst 0: uaddlp v0.4s, v0.8h
; Inst 1: ret
; }}
; block0:
; uaddlp v0.4s, v0.8h
; ret
function %fn5(i8x16, i8x16) -> i16x8 {
block0(v0: i8x16, v1: i8x16):
@@ -79,16 +59,11 @@ block0(v0: i8x16, v1: i8x16):
return v4
}
; VCode_ShowWithRRU {{
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 4)
; Inst 0: sxtl v0.8h, v0.8b
; Inst 1: sxtl2 v1.8h, v1.16b
; Inst 2: addp v0.8h, v0.8h, v1.8h
; Inst 3: ret
; }}
; block0:
; sxtl v4.8h, v0.8b
; sxtl2 v6.8h, v1.16b
; addp v0.8h, v4.8h, v6.8h
; ret
function %fn6(i8x16, i8x16) -> i16x8 {
block0(v0: i8x16, v1: i8x16):
@@ -98,16 +73,11 @@ block0(v0: i8x16, v1: i8x16):
return v4
}
; VCode_ShowWithRRU {{
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 4)
; Inst 0: uxtl v0.8h, v0.8b
; Inst 1: uxtl2 v1.8h, v1.16b
; Inst 2: addp v0.8h, v0.8h, v1.8h
; Inst 3: ret
; }}
; block0:
; uxtl v4.8h, v0.8b
; uxtl2 v6.8h, v1.16b
; addp v0.8h, v4.8h, v6.8h
; ret
function %fn7(i8x16) -> i16x8 {
block0(v0: i8x16):
@@ -117,16 +87,11 @@ block0(v0: i8x16):
return v3
}
; VCode_ShowWithRRU {{
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 4)
; Inst 0: uxtl v1.8h, v0.8b
; Inst 1: sxtl2 v0.8h, v0.16b
; Inst 2: addp v0.8h, v1.8h, v0.8h
; Inst 3: ret
; }}
; block0:
; uxtl v2.8h, v0.8b
; sxtl2 v4.8h, v0.16b
; addp v0.8h, v2.8h, v4.8h
; ret
function %fn8(i8x16) -> i16x8 {
block0(v0: i8x16):
@@ -136,14 +101,9 @@ block0(v0: i8x16):
return v3
}
; VCode_ShowWithRRU {{
; Entry block: 0
; Block 0:
; (original IR block: block0)
; (instruction range: 0 .. 4)
; Inst 0: sxtl v1.8h, v0.8b
; Inst 1: uxtl2 v0.8h, v0.16b
; Inst 2: addp v0.8h, v1.8h, v0.8h
; Inst 3: ret
; }}
; block0:
; sxtl v2.8h, v0.8b
; uxtl2 v4.8h, v0.16b
; addp v0.8h, v2.8h, v4.8h
; ret