Only save callee-saved registers that are used (#293)

* Only save callee-saved registers that are actually being used.

* Rename AllocatableSet to RegisterSet

* Style cleanup and small renames for readability.

* Adjust x86 prologue-epilogue test to account for callee-saved register optimization.

* Add more tests for prologue-epilogue optimizations.
This commit is contained in:
Tyler McMullen
2018-04-03 14:44:12 -07:00
committed by Dan Gohman
parent 0948ca9963
commit 775c674b38
15 changed files with 310 additions and 123 deletions

View File

@@ -3,30 +3,206 @@ set is_64bit
set is_compressed
isa intel haswell
function %foo() {
; An empty function.
function %empty() {
ebb0:
return
}
; check: function %empty(i64 fp [%rbp]) -> i64 fp [%rbp] system_v {
; nextln: ss0 = incoming_arg 16, offset -16
; nextln:
; nextln: ebb0(v0: i64 [%rbp]):
; nextln: x86_push v0
; nextln: copy_special %rsp -> %rbp
; nextln: v1 = x86_pop.i64
; nextln: return v1
; nextln: }
; A function with a single stack slot.
function %one_stack_slot() {
ss0 = explicit_slot 168
ebb0:
return
}
; check: function %foo(i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15]) -> i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15] system_v {
; nextln: ss0 = explicit_slot 168, offset -224
; nextln: ss1 = incoming_arg 56, offset -56
; check: ebb0(v0: i64 [%rbp], v1: i64 [%rbx], v2: i64 [%r12], v3: i64 [%r13], v4: i64 [%r14], v5: i64 [%r15]):
; check: function %one_stack_slot(i64 fp [%rbp]) -> i64 fp [%rbp] system_v {
; nextln: ss0 = explicit_slot 168, offset -184
; nextln: ss1 = incoming_arg 16, offset -16
; nextln:
; nextln: ebb0(v0: i64 [%rbp]):
; nextln: x86_push v0
; nextln: copy_special %rsp -> %rbp
; nextln: x86_push v1
; nextln: x86_push v2
; nextln: x86_push v3
; nextln: x86_push v4
; nextln: x86_push v5
; nextln: adjust_sp_imm -168
; nextln: adjust_sp_imm 168
; nextln: v11 = x86_pop.i64
; nextln: v10 = x86_pop.i64
; nextln: v9 = x86_pop.i64
; nextln: v8 = x86_pop.i64
; nextln: v7 = x86_pop.i64
; nextln: v6 = x86_pop.i64
; nextln: return v6, v7, v8, v9, v10, v11
; nextln: adjust_sp_imm -176
; nextln: adjust_sp_imm 176
; nextln: v1 = x86_pop.i64
; nextln: return v1
; nextln: }
; A function performing a call.
function %call() {
fn0 = function %foo()
ebb0:
call fn0()
return
}
; check: function %call(i64 fp [%rbp]) -> i64 fp [%rbp] system_v {
; nextln: ss0 = incoming_arg 16, offset -16
; nextln: sig0 = () system_v
; nextln: fn0 = sig0 %foo
; nextln:
; nextln: ebb0(v0: i64 [%rbp]):
; nextln: x86_push v0
; nextln: copy_special %rsp -> %rbp
; nextln: call fn0()
; nextln: v1 = x86_pop.i64
; nextln: return v1
; nextln: }
; A function that uses a lot of registers but doesn't quite need to spill.
function %no_spill(i64, i64) {
ebb0(v0: i64, v1: i64):
v2 = load.i32 v0+0
v3 = load.i32 v0+8
v4 = load.i32 v0+16
v5 = load.i32 v0+24
v6 = load.i32 v0+32
v7 = load.i32 v0+40
v8 = load.i32 v0+48
v9 = load.i32 v0+56
v10 = load.i32 v0+64
v11 = load.i32 v0+72
v12 = load.i32 v0+80
v13 = load.i32 v0+88
v14 = load.i32 v0+96
store.i32 v2, v1+0
store.i32 v3, v1+8
store.i32 v4, v1+16
store.i32 v5, v1+24
store.i32 v6, v1+32
store.i32 v7, v1+40
store.i32 v8, v1+48
store.i32 v9, v1+56
store.i32 v10, v1+64
store.i32 v11, v1+72
store.i32 v12, v1+80
store.i32 v13, v1+88
store.i32 v14, v1+96
return
}
; check: function %no_spill(i64 [%rdi], i64 [%rsi], i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15]) -> i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15] system_v {
; nextln: ss0 = incoming_arg 56, offset -56
; nextln:
; nextln: ebb0(v0: i64 [%rdi], v1: i64 [%rsi], v15: i64 [%rbp], v16: i64 [%rbx], v17: i64 [%r12], v18: i64 [%r13], v19: i64 [%r14], v20: i64 [%r15]):
; nextln: x86_push v15
; nextln: copy_special %rsp -> %rbp
; nextln: x86_push v16
; nextln: x86_push v17
; nextln: x86_push v18
; nextln: x86_push v19
; nextln: x86_push v20
; nextln: adjust_sp_imm -8
; nextln: v2 = load.i32 v0
; nextln: v3 = load.i32 v0+8
; nextln: v4 = load.i32 v0+16
; nextln: v5 = load.i32 v0+24
; nextln: v6 = load.i32 v0+32
; nextln: v7 = load.i32 v0+40
; nextln: v8 = load.i32 v0+48
; nextln: v9 = load.i32 v0+56
; nextln: v10 = load.i32 v0+64
; nextln: v11 = load.i32 v0+72
; nextln: v12 = load.i32 v0+80
; nextln: v13 = load.i32 v0+88
; nextln: v14 = load.i32 v0+96
; nextln: store v2, v1
; nextln: store v3, v1+8
; nextln: store v4, v1+16
; nextln: store v5, v1+24
; nextln: store v6, v1+32
; nextln: store v7, v1+40
; nextln: store v8, v1+48
; nextln: store v9, v1+56
; nextln: store v10, v1+64
; nextln: store v11, v1+72
; nextln: store v12, v1+80
; nextln: store v13, v1+88
; nextln: store v14, v1+96
; nextln: adjust_sp_imm 8
; nextln: v26 = x86_pop.i64
; nextln: v25 = x86_pop.i64
; nextln: v24 = x86_pop.i64
; nextln: v23 = x86_pop.i64
; nextln: v22 = x86_pop.i64
; nextln: v21 = x86_pop.i64
; nextln: return v21, v22, v23, v24, v25, v26
; nextln: }
; This function requires too many registers and must spill.
function %yes_spill(i64, i64) {
ebb0(v0: i64, v1: i64):
v2 = load.i32 v0+0
v3 = load.i32 v0+8
v4 = load.i32 v0+16
v5 = load.i32 v0+24
v6 = load.i32 v0+32
v7 = load.i32 v0+40
v8 = load.i32 v0+48
v9 = load.i32 v0+56
v10 = load.i32 v0+64
v11 = load.i32 v0+72
v12 = load.i32 v0+80
v13 = load.i32 v0+88
v14 = load.i32 v0+96
v15 = load.i32 v0+104
store.i32 v2, v1+0
store.i32 v3, v1+8
store.i32 v4, v1+16
store.i32 v5, v1+24
store.i32 v6, v1+32
store.i32 v7, v1+40
store.i32 v8, v1+48
store.i32 v9, v1+56
store.i32 v10, v1+64
store.i32 v11, v1+72
store.i32 v12, v1+80
store.i32 v13, v1+88
store.i32 v14, v1+96
store.i32 v15, v1+104
return
}
; check: function %yes_spill(i64 [%rdi], i64 [%rsi], i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15]) -> i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15] system_v {
; check: ss0 = spill_slot
; check: ebb0(v16: i64 [%rdi], v17: i64 [%rsi], v48: i64 [%rbp], v49: i64 [%rbx], v50: i64 [%r12], v51: i64 [%r13], v52: i64 [%r14], v53: i64 [%r15]):
; nextln: x86_push v48
; nextln: copy_special %rsp -> %rbp
; nextln: x86_push v49
; nextln: x86_push v50
; nextln: x86_push v51
; nextln: x86_push v52
; nextln: x86_push v53
; nextln: adjust_sp_imm
; check: spill
; check: fill
; check: adjust_sp_imm
; nextln: v59 = x86_pop.i64
; nextln: v58 = x86_pop.i64
; nextln: v57 = x86_pop.i64
; nextln: v56 = x86_pop.i64
; nextln: v55 = x86_pop.i64
; nextln: v54 = x86_pop.i64
; nextln: return v54, v55, v56, v57, v58, v59
; nextln: }