Pull in regalloc2 v0.2.0, with no more separate scratch registers. (#4182)

RA2 recently removed the need for a dedicated scratch register for
cyclic moves (bytecodealliance/regalloc2#51). This has moderate positive
performance impact on function bodies that were register-constrained, as
it means that one more register is available. In Sightglass, I measured
+5-8% on `blake3-scalar`, at least among current benchmarks.
This commit is contained in:
Chris Fallin
2022-05-23 12:51:04 -07:00
committed by GitHub
parent 6e828df632
commit b830c3cf93
21 changed files with 529 additions and 563 deletions

View File

@@ -425,11 +425,11 @@ mod test {
// 00000004 41B900000000 mov r9d,0x0
// 0000000A 83FF02 cmp edi,byte +0x2
// 0000000D 0F8320000000 jnc near 0x33
// 00000013 8BC7 mov eax,edi
// 00000015 490F43C1 cmovnc rax,r9
// 00000013 8BF7 mov esi,edi
// 00000015 490F43F1 cmovnc rsi,r9
// 00000019 4C8D0D0B000000 lea r9,[rel 0x2b]
// 00000020 4963448100 movsxd rax,dword [r9+rax*4+0x0]
// 00000025 4901C1 add r9,rax
// 00000020 496374B100 movsxd rsi,dword [r9+rsi*4+0x0]
// 00000025 4901F1 add r9,rsi
// 00000028 41FFE1 jmp r9
// 0000002B 1200 adc al,[rax]
// 0000002D 0000 add [rax],al
@@ -449,8 +449,8 @@ mod test {
// 00000050 C3 ret
let golden = vec![
85, 72, 137, 229, 65, 185, 0, 0, 0, 0, 131, 255, 2, 15, 131, 32, 0, 0, 0, 139, 199, 73,
15, 67, 193, 76, 141, 13, 11, 0, 0, 0, 73, 99, 68, 129, 0, 73, 1, 193, 65, 255, 225,
85, 72, 137, 229, 65, 185, 0, 0, 0, 0, 131, 255, 2, 15, 131, 32, 0, 0, 0, 139, 247, 73,
15, 67, 241, 76, 141, 13, 11, 0, 0, 0, 73, 99, 116, 177, 0, 73, 1, 241, 65, 255, 225,
18, 0, 0, 0, 28, 0, 0, 0, 184, 3, 0, 0, 0, 72, 137, 236, 93, 195, 184, 1, 0, 0, 0, 72,
137, 236, 93, 195, 184, 2, 0, 0, 0, 72, 137, 236, 93, 195,
];