Pull in regalloc2 v0.2.0, with no more separate scratch registers. (#4182)

RA2 recently removed the need for a dedicated scratch register for
cyclic moves (bytecodealliance/regalloc2#51). This has moderate positive
performance impact on function bodies that were register-constrained, as
it means that one more register is available. In Sightglass, I measured
+5-8% on `blake3-scalar`, at least among current benchmarks.
This commit is contained in:
Chris Fallin
2022-05-23 12:51:04 -07:00
committed by GitHub
parent 6e828df632
commit b830c3cf93
21 changed files with 529 additions and 563 deletions

View File

@@ -209,24 +209,9 @@ pub fn create_reg_env(flags: &settings::Flags) -> MachineEnv {
preg(vreg(28)),
preg(vreg(29)),
preg(vreg(30)),
// v31 is the scratch reg, to allow for parallel moves.
preg(vreg(31)),
],
],
scratch_by_class: [
// We use tmp2 (x17) as the regalloc scratch register,
// used to resolve cyclic parallel moves. This is valid
// because tmp2 is never live between regalloc-visible
// instructions, only within them (i.e. in expansion into
// multiple machine instructions when that
// occurs). spilltmp is used for moves to/from spillslots,
// but tmp2 never is, so it is available for this
// purpose. (Its only other use is in prologue stack
// checks, and the prologue is prepended after regalloc
// runs.)
preg(tmp2_reg()),
// We use v31 for Float/Vec-class parallel moves.
preg(vreg(31)),
],
fixed_stack_slots: vec![],
};

View File

@@ -102,7 +102,7 @@ pub fn create_machine_env(_flags: &settings::Flags) -> MachineEnv {
preg(gpr(10)),
preg(gpr(11)),
preg(gpr(12)),
// no r13; it is our scratch reg.
preg(gpr(13)),
preg(gpr(14)),
// no r15; it is the stack pointer.
],
@@ -114,10 +114,9 @@ pub fn create_machine_env(_flags: &settings::Flags) -> MachineEnv {
preg(fpr(12)),
preg(fpr(13)),
preg(fpr(14)),
// no f15; it is our scratch reg.
preg(fpr(15)),
],
],
scratch_by_class: [preg(gpr(13)), preg(fpr(15))],
fixed_stack_slots: vec![],
}
}

View File

@@ -166,7 +166,7 @@ pub(crate) fn create_reg_env_systemv(flags: &settings::Flags) -> MachineEnv {
preg(rdx()),
preg(r8()),
preg(r9()),
// N.B.: not r10; it is our scratch reg.
preg(r10()),
preg(r11()),
],
// Preferred XMMs: all of them.
@@ -186,7 +186,7 @@ pub(crate) fn create_reg_env_systemv(flags: &settings::Flags) -> MachineEnv {
preg(xmm12()),
preg(xmm13()),
preg(xmm14()),
// N.B.: not xmm15; it is our scratch reg.
preg(xmm15()),
],
],
non_preferred_regs_by_class: [
@@ -195,7 +195,6 @@ pub(crate) fn create_reg_env_systemv(flags: &settings::Flags) -> MachineEnv {
// Non-preferred XMMs: none.
vec![],
],
scratch_by_class: [preg(r10()), preg(xmm15())],
fixed_stack_slots: vec![],
};

View File

@@ -425,11 +425,11 @@ mod test {
// 00000004 41B900000000 mov r9d,0x0
// 0000000A 83FF02 cmp edi,byte +0x2
// 0000000D 0F8320000000 jnc near 0x33
// 00000013 8BC7 mov eax,edi
// 00000015 490F43C1 cmovnc rax,r9
// 00000013 8BF7 mov esi,edi
// 00000015 490F43F1 cmovnc rsi,r9
// 00000019 4C8D0D0B000000 lea r9,[rel 0x2b]
// 00000020 4963448100 movsxd rax,dword [r9+rax*4+0x0]
// 00000025 4901C1 add r9,rax
// 00000020 496374B100 movsxd rsi,dword [r9+rsi*4+0x0]
// 00000025 4901F1 add r9,rsi
// 00000028 41FFE1 jmp r9
// 0000002B 1200 adc al,[rax]
// 0000002D 0000 add [rax],al
@@ -449,8 +449,8 @@ mod test {
// 00000050 C3 ret
let golden = vec![
85, 72, 137, 229, 65, 185, 0, 0, 0, 0, 131, 255, 2, 15, 131, 32, 0, 0, 0, 139, 199, 73,
15, 67, 193, 76, 141, 13, 11, 0, 0, 0, 73, 99, 68, 129, 0, 73, 1, 193, 65, 255, 225,
85, 72, 137, 229, 65, 185, 0, 0, 0, 0, 131, 255, 2, 15, 131, 32, 0, 0, 0, 139, 247, 73,
15, 67, 241, 76, 141, 13, 11, 0, 0, 0, 73, 99, 116, 177, 0, 73, 1, 241, 65, 255, 225,
18, 0, 0, 0, 28, 0, 0, 0, 184, 3, 0, 0, 0, 72, 137, 236, 93, 195, 184, 1, 0, 0, 0, 72,
137, 236, 93, 195, 184, 2, 0, 0, 0, 72, 137, 236, 93, 195,
];