Pull in regalloc2 v0.2.0, with no more separate scratch registers. (#4182)
RA2 recently removed the need for a dedicated scratch register for cyclic moves (bytecodealliance/regalloc2#51). This has moderate positive performance impact on function bodies that were register-constrained, as it means that one more register is available. In Sightglass, I measured +5-8% on `blake3-scalar`, at least among current benchmarks.
This commit is contained in:
@@ -425,11 +425,11 @@ mod test {
|
||||
// 00000004 41B900000000 mov r9d,0x0
|
||||
// 0000000A 83FF02 cmp edi,byte +0x2
|
||||
// 0000000D 0F8320000000 jnc near 0x33
|
||||
// 00000013 8BC7 mov eax,edi
|
||||
// 00000015 490F43C1 cmovnc rax,r9
|
||||
// 00000013 8BF7 mov esi,edi
|
||||
// 00000015 490F43F1 cmovnc rsi,r9
|
||||
// 00000019 4C8D0D0B000000 lea r9,[rel 0x2b]
|
||||
// 00000020 4963448100 movsxd rax,dword [r9+rax*4+0x0]
|
||||
// 00000025 4901C1 add r9,rax
|
||||
// 00000020 496374B100 movsxd rsi,dword [r9+rsi*4+0x0]
|
||||
// 00000025 4901F1 add r9,rsi
|
||||
// 00000028 41FFE1 jmp r9
|
||||
// 0000002B 1200 adc al,[rax]
|
||||
// 0000002D 0000 add [rax],al
|
||||
@@ -449,8 +449,8 @@ mod test {
|
||||
// 00000050 C3 ret
|
||||
|
||||
let golden = vec![
|
||||
85, 72, 137, 229, 65, 185, 0, 0, 0, 0, 131, 255, 2, 15, 131, 32, 0, 0, 0, 139, 199, 73,
|
||||
15, 67, 193, 76, 141, 13, 11, 0, 0, 0, 73, 99, 68, 129, 0, 73, 1, 193, 65, 255, 225,
|
||||
85, 72, 137, 229, 65, 185, 0, 0, 0, 0, 131, 255, 2, 15, 131, 32, 0, 0, 0, 139, 247, 73,
|
||||
15, 67, 241, 76, 141, 13, 11, 0, 0, 0, 73, 99, 116, 177, 0, 73, 1, 241, 65, 255, 225,
|
||||
18, 0, 0, 0, 28, 0, 0, 0, 184, 3, 0, 0, 0, 72, 137, 236, 93, 195, 184, 1, 0, 0, 0, 72,
|
||||
137, 236, 93, 195, 184, 2, 0, 0, 0, 72, 137, 236, 93, 195,
|
||||
];
|
||||
|
||||
Reference in New Issue
Block a user