Only save callee-saved registers that are used (#293)
* Only save callee-saved registers that are actually being used. * Rename AllocatableSet to RegisterSet * Style cleanup and small renames for readability. * Adjust x86 prologue-epilogue test to account for callee-saved register optimization. * Add more tests for prologue-epilogue optimizations.
This commit is contained in:
committed by
Dan Gohman
parent
0948ca9963
commit
775c674b38
@@ -3,30 +3,206 @@ set is_64bit
|
||||
set is_compressed
|
||||
isa intel haswell
|
||||
|
||||
function %foo() {
|
||||
; An empty function.
|
||||
|
||||
function %empty() {
|
||||
ebb0:
|
||||
return
|
||||
}
|
||||
|
||||
; check: function %empty(i64 fp [%rbp]) -> i64 fp [%rbp] system_v {
|
||||
; nextln: ss0 = incoming_arg 16, offset -16
|
||||
; nextln:
|
||||
; nextln: ebb0(v0: i64 [%rbp]):
|
||||
; nextln: x86_push v0
|
||||
; nextln: copy_special %rsp -> %rbp
|
||||
; nextln: v1 = x86_pop.i64
|
||||
; nextln: return v1
|
||||
; nextln: }
|
||||
|
||||
; A function with a single stack slot.
|
||||
|
||||
function %one_stack_slot() {
|
||||
ss0 = explicit_slot 168
|
||||
ebb0:
|
||||
return
|
||||
}
|
||||
|
||||
; check: function %foo(i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15]) -> i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15] system_v {
|
||||
; nextln: ss0 = explicit_slot 168, offset -224
|
||||
; nextln: ss1 = incoming_arg 56, offset -56
|
||||
; check: ebb0(v0: i64 [%rbp], v1: i64 [%rbx], v2: i64 [%r12], v3: i64 [%r13], v4: i64 [%r14], v5: i64 [%r15]):
|
||||
; check: function %one_stack_slot(i64 fp [%rbp]) -> i64 fp [%rbp] system_v {
|
||||
; nextln: ss0 = explicit_slot 168, offset -184
|
||||
; nextln: ss1 = incoming_arg 16, offset -16
|
||||
; nextln:
|
||||
; nextln: ebb0(v0: i64 [%rbp]):
|
||||
; nextln: x86_push v0
|
||||
; nextln: copy_special %rsp -> %rbp
|
||||
; nextln: x86_push v1
|
||||
; nextln: x86_push v2
|
||||
; nextln: x86_push v3
|
||||
; nextln: x86_push v4
|
||||
; nextln: x86_push v5
|
||||
; nextln: adjust_sp_imm -168
|
||||
; nextln: adjust_sp_imm 168
|
||||
; nextln: v11 = x86_pop.i64
|
||||
; nextln: v10 = x86_pop.i64
|
||||
; nextln: v9 = x86_pop.i64
|
||||
; nextln: v8 = x86_pop.i64
|
||||
; nextln: v7 = x86_pop.i64
|
||||
; nextln: v6 = x86_pop.i64
|
||||
; nextln: return v6, v7, v8, v9, v10, v11
|
||||
; nextln: adjust_sp_imm -176
|
||||
; nextln: adjust_sp_imm 176
|
||||
; nextln: v1 = x86_pop.i64
|
||||
; nextln: return v1
|
||||
; nextln: }
|
||||
|
||||
; A function performing a call.
|
||||
|
||||
function %call() {
|
||||
fn0 = function %foo()
|
||||
|
||||
ebb0:
|
||||
call fn0()
|
||||
return
|
||||
}
|
||||
|
||||
; check: function %call(i64 fp [%rbp]) -> i64 fp [%rbp] system_v {
|
||||
; nextln: ss0 = incoming_arg 16, offset -16
|
||||
; nextln: sig0 = () system_v
|
||||
; nextln: fn0 = sig0 %foo
|
||||
; nextln:
|
||||
; nextln: ebb0(v0: i64 [%rbp]):
|
||||
; nextln: x86_push v0
|
||||
; nextln: copy_special %rsp -> %rbp
|
||||
; nextln: call fn0()
|
||||
; nextln: v1 = x86_pop.i64
|
||||
; nextln: return v1
|
||||
; nextln: }
|
||||
|
||||
; A function that uses a lot of registers but doesn't quite need to spill.
|
||||
|
||||
function %no_spill(i64, i64) {
|
||||
ebb0(v0: i64, v1: i64):
|
||||
v2 = load.i32 v0+0
|
||||
v3 = load.i32 v0+8
|
||||
v4 = load.i32 v0+16
|
||||
v5 = load.i32 v0+24
|
||||
v6 = load.i32 v0+32
|
||||
v7 = load.i32 v0+40
|
||||
v8 = load.i32 v0+48
|
||||
v9 = load.i32 v0+56
|
||||
v10 = load.i32 v0+64
|
||||
v11 = load.i32 v0+72
|
||||
v12 = load.i32 v0+80
|
||||
v13 = load.i32 v0+88
|
||||
v14 = load.i32 v0+96
|
||||
store.i32 v2, v1+0
|
||||
store.i32 v3, v1+8
|
||||
store.i32 v4, v1+16
|
||||
store.i32 v5, v1+24
|
||||
store.i32 v6, v1+32
|
||||
store.i32 v7, v1+40
|
||||
store.i32 v8, v1+48
|
||||
store.i32 v9, v1+56
|
||||
store.i32 v10, v1+64
|
||||
store.i32 v11, v1+72
|
||||
store.i32 v12, v1+80
|
||||
store.i32 v13, v1+88
|
||||
store.i32 v14, v1+96
|
||||
return
|
||||
}
|
||||
|
||||
; check: function %no_spill(i64 [%rdi], i64 [%rsi], i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15]) -> i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15] system_v {
|
||||
; nextln: ss0 = incoming_arg 56, offset -56
|
||||
; nextln:
|
||||
; nextln: ebb0(v0: i64 [%rdi], v1: i64 [%rsi], v15: i64 [%rbp], v16: i64 [%rbx], v17: i64 [%r12], v18: i64 [%r13], v19: i64 [%r14], v20: i64 [%r15]):
|
||||
; nextln: x86_push v15
|
||||
; nextln: copy_special %rsp -> %rbp
|
||||
; nextln: x86_push v16
|
||||
; nextln: x86_push v17
|
||||
; nextln: x86_push v18
|
||||
; nextln: x86_push v19
|
||||
; nextln: x86_push v20
|
||||
; nextln: adjust_sp_imm -8
|
||||
; nextln: v2 = load.i32 v0
|
||||
; nextln: v3 = load.i32 v0+8
|
||||
; nextln: v4 = load.i32 v0+16
|
||||
; nextln: v5 = load.i32 v0+24
|
||||
; nextln: v6 = load.i32 v0+32
|
||||
; nextln: v7 = load.i32 v0+40
|
||||
; nextln: v8 = load.i32 v0+48
|
||||
; nextln: v9 = load.i32 v0+56
|
||||
; nextln: v10 = load.i32 v0+64
|
||||
; nextln: v11 = load.i32 v0+72
|
||||
; nextln: v12 = load.i32 v0+80
|
||||
; nextln: v13 = load.i32 v0+88
|
||||
; nextln: v14 = load.i32 v0+96
|
||||
; nextln: store v2, v1
|
||||
; nextln: store v3, v1+8
|
||||
; nextln: store v4, v1+16
|
||||
; nextln: store v5, v1+24
|
||||
; nextln: store v6, v1+32
|
||||
; nextln: store v7, v1+40
|
||||
; nextln: store v8, v1+48
|
||||
; nextln: store v9, v1+56
|
||||
; nextln: store v10, v1+64
|
||||
; nextln: store v11, v1+72
|
||||
; nextln: store v12, v1+80
|
||||
; nextln: store v13, v1+88
|
||||
; nextln: store v14, v1+96
|
||||
; nextln: adjust_sp_imm 8
|
||||
; nextln: v26 = x86_pop.i64
|
||||
; nextln: v25 = x86_pop.i64
|
||||
; nextln: v24 = x86_pop.i64
|
||||
; nextln: v23 = x86_pop.i64
|
||||
; nextln: v22 = x86_pop.i64
|
||||
; nextln: v21 = x86_pop.i64
|
||||
; nextln: return v21, v22, v23, v24, v25, v26
|
||||
; nextln: }
|
||||
|
||||
; This function requires too many registers and must spill.
|
||||
|
||||
function %yes_spill(i64, i64) {
|
||||
ebb0(v0: i64, v1: i64):
|
||||
v2 = load.i32 v0+0
|
||||
v3 = load.i32 v0+8
|
||||
v4 = load.i32 v0+16
|
||||
v5 = load.i32 v0+24
|
||||
v6 = load.i32 v0+32
|
||||
v7 = load.i32 v0+40
|
||||
v8 = load.i32 v0+48
|
||||
v9 = load.i32 v0+56
|
||||
v10 = load.i32 v0+64
|
||||
v11 = load.i32 v0+72
|
||||
v12 = load.i32 v0+80
|
||||
v13 = load.i32 v0+88
|
||||
v14 = load.i32 v0+96
|
||||
v15 = load.i32 v0+104
|
||||
store.i32 v2, v1+0
|
||||
store.i32 v3, v1+8
|
||||
store.i32 v4, v1+16
|
||||
store.i32 v5, v1+24
|
||||
store.i32 v6, v1+32
|
||||
store.i32 v7, v1+40
|
||||
store.i32 v8, v1+48
|
||||
store.i32 v9, v1+56
|
||||
store.i32 v10, v1+64
|
||||
store.i32 v11, v1+72
|
||||
store.i32 v12, v1+80
|
||||
store.i32 v13, v1+88
|
||||
store.i32 v14, v1+96
|
||||
store.i32 v15, v1+104
|
||||
return
|
||||
}
|
||||
|
||||
; check: function %yes_spill(i64 [%rdi], i64 [%rsi], i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15]) -> i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15] system_v {
|
||||
; check: ss0 = spill_slot
|
||||
|
||||
; check: ebb0(v16: i64 [%rdi], v17: i64 [%rsi], v48: i64 [%rbp], v49: i64 [%rbx], v50: i64 [%r12], v51: i64 [%r13], v52: i64 [%r14], v53: i64 [%r15]):
|
||||
; nextln: x86_push v48
|
||||
; nextln: copy_special %rsp -> %rbp
|
||||
; nextln: x86_push v49
|
||||
; nextln: x86_push v50
|
||||
; nextln: x86_push v51
|
||||
; nextln: x86_push v52
|
||||
; nextln: x86_push v53
|
||||
; nextln: adjust_sp_imm
|
||||
|
||||
; check: spill
|
||||
|
||||
; check: fill
|
||||
|
||||
; check: adjust_sp_imm
|
||||
; nextln: v59 = x86_pop.i64
|
||||
; nextln: v58 = x86_pop.i64
|
||||
; nextln: v57 = x86_pop.i64
|
||||
; nextln: v56 = x86_pop.i64
|
||||
; nextln: v55 = x86_pop.i64
|
||||
; nextln: v54 = x86_pop.i64
|
||||
; nextln: return v54, v55, v56, v57, v58, v59
|
||||
; nextln: }
|
||||
|
||||
Reference in New Issue
Block a user