Only save callee-saved registers that are used (#293)
* Only save callee-saved registers that are actually being used. * Rename AllocatableSet to RegisterSet * Style cleanup and small renames for readability. * Adjust x86 prologue-epilogue test to account for callee-saved register optimization. * Add more tests for prologue-epilogue optimizations.
This commit is contained in:
committed by
Dan Gohman
parent
0948ca9963
commit
775c674b38
@@ -3,30 +3,206 @@ set is_64bit
|
||||
set is_compressed
|
||||
isa intel haswell
|
||||
|
||||
function %foo() {
|
||||
; An empty function.
|
||||
|
||||
function %empty() {
|
||||
ebb0:
|
||||
return
|
||||
}
|
||||
|
||||
; check: function %empty(i64 fp [%rbp]) -> i64 fp [%rbp] system_v {
|
||||
; nextln: ss0 = incoming_arg 16, offset -16
|
||||
; nextln:
|
||||
; nextln: ebb0(v0: i64 [%rbp]):
|
||||
; nextln: x86_push v0
|
||||
; nextln: copy_special %rsp -> %rbp
|
||||
; nextln: v1 = x86_pop.i64
|
||||
; nextln: return v1
|
||||
; nextln: }
|
||||
|
||||
; A function with a single stack slot.
|
||||
|
||||
function %one_stack_slot() {
|
||||
ss0 = explicit_slot 168
|
||||
ebb0:
|
||||
return
|
||||
}
|
||||
|
||||
; check: function %foo(i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15]) -> i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15] system_v {
|
||||
; nextln: ss0 = explicit_slot 168, offset -224
|
||||
; nextln: ss1 = incoming_arg 56, offset -56
|
||||
; check: ebb0(v0: i64 [%rbp], v1: i64 [%rbx], v2: i64 [%r12], v3: i64 [%r13], v4: i64 [%r14], v5: i64 [%r15]):
|
||||
; check: function %one_stack_slot(i64 fp [%rbp]) -> i64 fp [%rbp] system_v {
|
||||
; nextln: ss0 = explicit_slot 168, offset -184
|
||||
; nextln: ss1 = incoming_arg 16, offset -16
|
||||
; nextln:
|
||||
; nextln: ebb0(v0: i64 [%rbp]):
|
||||
; nextln: x86_push v0
|
||||
; nextln: copy_special %rsp -> %rbp
|
||||
; nextln: x86_push v1
|
||||
; nextln: x86_push v2
|
||||
; nextln: x86_push v3
|
||||
; nextln: x86_push v4
|
||||
; nextln: x86_push v5
|
||||
; nextln: adjust_sp_imm -168
|
||||
; nextln: adjust_sp_imm 168
|
||||
; nextln: v11 = x86_pop.i64
|
||||
; nextln: v10 = x86_pop.i64
|
||||
; nextln: v9 = x86_pop.i64
|
||||
; nextln: v8 = x86_pop.i64
|
||||
; nextln: v7 = x86_pop.i64
|
||||
; nextln: v6 = x86_pop.i64
|
||||
; nextln: return v6, v7, v8, v9, v10, v11
|
||||
; nextln: adjust_sp_imm -176
|
||||
; nextln: adjust_sp_imm 176
|
||||
; nextln: v1 = x86_pop.i64
|
||||
; nextln: return v1
|
||||
; nextln: }
|
||||
|
||||
; A function performing a call.
|
||||
|
||||
function %call() {
|
||||
fn0 = function %foo()
|
||||
|
||||
ebb0:
|
||||
call fn0()
|
||||
return
|
||||
}
|
||||
|
||||
; check: function %call(i64 fp [%rbp]) -> i64 fp [%rbp] system_v {
|
||||
; nextln: ss0 = incoming_arg 16, offset -16
|
||||
; nextln: sig0 = () system_v
|
||||
; nextln: fn0 = sig0 %foo
|
||||
; nextln:
|
||||
; nextln: ebb0(v0: i64 [%rbp]):
|
||||
; nextln: x86_push v0
|
||||
; nextln: copy_special %rsp -> %rbp
|
||||
; nextln: call fn0()
|
||||
; nextln: v1 = x86_pop.i64
|
||||
; nextln: return v1
|
||||
; nextln: }
|
||||
|
||||
; A function that uses a lot of registers but doesn't quite need to spill.
|
||||
|
||||
function %no_spill(i64, i64) {
|
||||
ebb0(v0: i64, v1: i64):
|
||||
v2 = load.i32 v0+0
|
||||
v3 = load.i32 v0+8
|
||||
v4 = load.i32 v0+16
|
||||
v5 = load.i32 v0+24
|
||||
v6 = load.i32 v0+32
|
||||
v7 = load.i32 v0+40
|
||||
v8 = load.i32 v0+48
|
||||
v9 = load.i32 v0+56
|
||||
v10 = load.i32 v0+64
|
||||
v11 = load.i32 v0+72
|
||||
v12 = load.i32 v0+80
|
||||
v13 = load.i32 v0+88
|
||||
v14 = load.i32 v0+96
|
||||
store.i32 v2, v1+0
|
||||
store.i32 v3, v1+8
|
||||
store.i32 v4, v1+16
|
||||
store.i32 v5, v1+24
|
||||
store.i32 v6, v1+32
|
||||
store.i32 v7, v1+40
|
||||
store.i32 v8, v1+48
|
||||
store.i32 v9, v1+56
|
||||
store.i32 v10, v1+64
|
||||
store.i32 v11, v1+72
|
||||
store.i32 v12, v1+80
|
||||
store.i32 v13, v1+88
|
||||
store.i32 v14, v1+96
|
||||
return
|
||||
}
|
||||
|
||||
; check: function %no_spill(i64 [%rdi], i64 [%rsi], i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15]) -> i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15] system_v {
|
||||
; nextln: ss0 = incoming_arg 56, offset -56
|
||||
; nextln:
|
||||
; nextln: ebb0(v0: i64 [%rdi], v1: i64 [%rsi], v15: i64 [%rbp], v16: i64 [%rbx], v17: i64 [%r12], v18: i64 [%r13], v19: i64 [%r14], v20: i64 [%r15]):
|
||||
; nextln: x86_push v15
|
||||
; nextln: copy_special %rsp -> %rbp
|
||||
; nextln: x86_push v16
|
||||
; nextln: x86_push v17
|
||||
; nextln: x86_push v18
|
||||
; nextln: x86_push v19
|
||||
; nextln: x86_push v20
|
||||
; nextln: adjust_sp_imm -8
|
||||
; nextln: v2 = load.i32 v0
|
||||
; nextln: v3 = load.i32 v0+8
|
||||
; nextln: v4 = load.i32 v0+16
|
||||
; nextln: v5 = load.i32 v0+24
|
||||
; nextln: v6 = load.i32 v0+32
|
||||
; nextln: v7 = load.i32 v0+40
|
||||
; nextln: v8 = load.i32 v0+48
|
||||
; nextln: v9 = load.i32 v0+56
|
||||
; nextln: v10 = load.i32 v0+64
|
||||
; nextln: v11 = load.i32 v0+72
|
||||
; nextln: v12 = load.i32 v0+80
|
||||
; nextln: v13 = load.i32 v0+88
|
||||
; nextln: v14 = load.i32 v0+96
|
||||
; nextln: store v2, v1
|
||||
; nextln: store v3, v1+8
|
||||
; nextln: store v4, v1+16
|
||||
; nextln: store v5, v1+24
|
||||
; nextln: store v6, v1+32
|
||||
; nextln: store v7, v1+40
|
||||
; nextln: store v8, v1+48
|
||||
; nextln: store v9, v1+56
|
||||
; nextln: store v10, v1+64
|
||||
; nextln: store v11, v1+72
|
||||
; nextln: store v12, v1+80
|
||||
; nextln: store v13, v1+88
|
||||
; nextln: store v14, v1+96
|
||||
; nextln: adjust_sp_imm 8
|
||||
; nextln: v26 = x86_pop.i64
|
||||
; nextln: v25 = x86_pop.i64
|
||||
; nextln: v24 = x86_pop.i64
|
||||
; nextln: v23 = x86_pop.i64
|
||||
; nextln: v22 = x86_pop.i64
|
||||
; nextln: v21 = x86_pop.i64
|
||||
; nextln: return v21, v22, v23, v24, v25, v26
|
||||
; nextln: }
|
||||
|
||||
; This function requires too many registers and must spill.
|
||||
|
||||
function %yes_spill(i64, i64) {
|
||||
ebb0(v0: i64, v1: i64):
|
||||
v2 = load.i32 v0+0
|
||||
v3 = load.i32 v0+8
|
||||
v4 = load.i32 v0+16
|
||||
v5 = load.i32 v0+24
|
||||
v6 = load.i32 v0+32
|
||||
v7 = load.i32 v0+40
|
||||
v8 = load.i32 v0+48
|
||||
v9 = load.i32 v0+56
|
||||
v10 = load.i32 v0+64
|
||||
v11 = load.i32 v0+72
|
||||
v12 = load.i32 v0+80
|
||||
v13 = load.i32 v0+88
|
||||
v14 = load.i32 v0+96
|
||||
v15 = load.i32 v0+104
|
||||
store.i32 v2, v1+0
|
||||
store.i32 v3, v1+8
|
||||
store.i32 v4, v1+16
|
||||
store.i32 v5, v1+24
|
||||
store.i32 v6, v1+32
|
||||
store.i32 v7, v1+40
|
||||
store.i32 v8, v1+48
|
||||
store.i32 v9, v1+56
|
||||
store.i32 v10, v1+64
|
||||
store.i32 v11, v1+72
|
||||
store.i32 v12, v1+80
|
||||
store.i32 v13, v1+88
|
||||
store.i32 v14, v1+96
|
||||
store.i32 v15, v1+104
|
||||
return
|
||||
}
|
||||
|
||||
; check: function %yes_spill(i64 [%rdi], i64 [%rsi], i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15]) -> i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15] system_v {
|
||||
; check: ss0 = spill_slot
|
||||
|
||||
; check: ebb0(v16: i64 [%rdi], v17: i64 [%rsi], v48: i64 [%rbp], v49: i64 [%rbx], v50: i64 [%r12], v51: i64 [%r13], v52: i64 [%r14], v53: i64 [%r15]):
|
||||
; nextln: x86_push v48
|
||||
; nextln: copy_special %rsp -> %rbp
|
||||
; nextln: x86_push v49
|
||||
; nextln: x86_push v50
|
||||
; nextln: x86_push v51
|
||||
; nextln: x86_push v52
|
||||
; nextln: x86_push v53
|
||||
; nextln: adjust_sp_imm
|
||||
|
||||
; check: spill
|
||||
|
||||
; check: fill
|
||||
|
||||
; check: adjust_sp_imm
|
||||
; nextln: v59 = x86_pop.i64
|
||||
; nextln: v58 = x86_pop.i64
|
||||
; nextln: v57 = x86_pop.i64
|
||||
; nextln: v56 = x86_pop.i64
|
||||
; nextln: v55 = x86_pop.i64
|
||||
; nextln: v54 = x86_pop.i64
|
||||
; nextln: return v54, v55, v56, v57, v58, v59
|
||||
; nextln: }
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
use super::registers::{D, GPR, Q, S};
|
||||
use ir;
|
||||
use isa::RegClass;
|
||||
use regalloc::AllocatableSet;
|
||||
use regalloc::RegisterSet;
|
||||
use settings as shared_settings;
|
||||
|
||||
/// Legalize `sig`.
|
||||
@@ -30,6 +30,6 @@ pub fn regclass_for_abi_type(ty: ir::Type) -> RegClass {
|
||||
}
|
||||
|
||||
/// Get the set of allocatable registers for `func`.
|
||||
pub fn allocatable_registers(_func: &ir::Function) -> AllocatableSet {
|
||||
pub fn allocatable_registers(_func: &ir::Function) -> RegisterSet {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
@@ -92,7 +92,7 @@ impl TargetIsa for Isa {
|
||||
abi::regclass_for_abi_type(ty)
|
||||
}
|
||||
|
||||
fn allocatable_registers(&self, func: &ir::Function) -> regalloc::AllocatableSet {
|
||||
fn allocatable_registers(&self, func: &ir::Function) -> regalloc::RegisterSet {
|
||||
abi::allocatable_registers(func)
|
||||
}
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
use super::registers::{FPR, GPR};
|
||||
use ir;
|
||||
use isa::RegClass;
|
||||
use regalloc::AllocatableSet;
|
||||
use regalloc::RegisterSet;
|
||||
use settings as shared_settings;
|
||||
|
||||
/// Legalize `sig`.
|
||||
@@ -21,6 +21,6 @@ pub fn regclass_for_abi_type(ty: ir::Type) -> RegClass {
|
||||
}
|
||||
|
||||
/// Get the set of allocatable registers for `func`.
|
||||
pub fn allocatable_registers(_func: &ir::Function) -> AllocatableSet {
|
||||
pub fn allocatable_registers(_func: &ir::Function) -> RegisterSet {
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
@@ -85,7 +85,7 @@ impl TargetIsa for Isa {
|
||||
abi::regclass_for_abi_type(ty)
|
||||
}
|
||||
|
||||
fn allocatable_registers(&self, func: &ir::Function) -> regalloc::AllocatableSet {
|
||||
fn allocatable_registers(&self, func: &ir::Function) -> regalloc::RegisterSet {
|
||||
abi::allocatable_registers(func)
|
||||
}
|
||||
|
||||
|
||||
@@ -6,9 +6,10 @@ use cursor::{Cursor, CursorPosition, EncCursor};
|
||||
use ir;
|
||||
use ir::immediates::Imm64;
|
||||
use ir::stackslot::{StackOffset, StackSize};
|
||||
use ir::{AbiParam, ArgumentExtension, ArgumentLoc, ArgumentPurpose, CallConv, InstBuilder};
|
||||
use ir::{AbiParam, ArgumentExtension, ArgumentLoc, ArgumentPurpose, CallConv, InstBuilder,
|
||||
ValueLoc};
|
||||
use isa::{RegClass, RegUnit, TargetIsa};
|
||||
use regalloc::AllocatableSet;
|
||||
use regalloc::RegisterSet;
|
||||
use result;
|
||||
use settings as shared_settings;
|
||||
use stack_layout::layout_stack;
|
||||
@@ -140,11 +141,8 @@ pub fn regclass_for_abi_type(ty: ir::Type) -> RegClass {
|
||||
}
|
||||
|
||||
/// Get the set of allocatable registers for `func`.
|
||||
pub fn allocatable_registers(
|
||||
_func: &ir::Function,
|
||||
flags: &shared_settings::Flags,
|
||||
) -> AllocatableSet {
|
||||
let mut regs = AllocatableSet::new();
|
||||
pub fn allocatable_registers(_func: &ir::Function, flags: &shared_settings::Flags) -> RegisterSet {
|
||||
let mut regs = RegisterSet::new();
|
||||
regs.take(GPR, RU::rsp as RegUnit);
|
||||
regs.take(GPR, RU::rbp as RegUnit);
|
||||
|
||||
@@ -160,7 +158,7 @@ pub fn allocatable_registers(
|
||||
}
|
||||
|
||||
/// Get the set of callee-saved registers.
|
||||
pub fn callee_saved_registers(flags: &shared_settings::Flags) -> &'static [RU] {
|
||||
fn callee_saved_gprs(flags: &shared_settings::Flags) -> &'static [RU] {
|
||||
if flags.is_64bit() {
|
||||
&[RU::rbx, RU::r12, RU::r13, RU::r14, RU::r15]
|
||||
} else {
|
||||
@@ -168,6 +166,28 @@ pub fn callee_saved_registers(flags: &shared_settings::Flags) -> &'static [RU] {
|
||||
}
|
||||
}
|
||||
|
||||
fn callee_saved_gprs_used(flags: &shared_settings::Flags, func: &ir::Function) -> RegisterSet {
|
||||
let mut all_callee_saved = RegisterSet::empty();
|
||||
for reg in callee_saved_gprs(flags) {
|
||||
all_callee_saved.free(GPR, *reg as RegUnit);
|
||||
}
|
||||
|
||||
let mut used = RegisterSet::empty();
|
||||
for value_loc in func.locations.values() {
|
||||
// Note that `value_loc` here contains only a single unit of a potentially multi-unit
|
||||
// register. We don't use registers that overlap each other in the x86 ISA, but in others
|
||||
// we do. So this should not be blindly reused.
|
||||
if let ValueLoc::Reg(ru) = *value_loc {
|
||||
if !used.is_avail(GPR, ru) {
|
||||
used.free(GPR, ru);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
used.intersect(&all_callee_saved);
|
||||
return used;
|
||||
}
|
||||
|
||||
pub fn prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> result::CtonResult {
|
||||
match func.signature.call_conv {
|
||||
ir::CallConv::SystemV => system_v_prologue_epilogue(func, isa),
|
||||
@@ -203,7 +223,8 @@ pub fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> r
|
||||
} else {
|
||||
ir::types::I32
|
||||
};
|
||||
let csrs = callee_saved_registers(isa.flags());
|
||||
|
||||
let csrs = callee_saved_gprs_used(isa.flags(), func);
|
||||
|
||||
// The reserved stack area is composed of:
|
||||
// return address + frame pointer + all callee-saved registers
|
||||
@@ -212,7 +233,7 @@ pub fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> r
|
||||
// instruction. Each of the others we will then push explicitly. Then we
|
||||
// will adjust the stack pointer to make room for the rest of the required
|
||||
// space for this frame.
|
||||
let csr_stack_size = ((csrs.len() + 2) * word_size as usize) as i32;
|
||||
let csr_stack_size = ((csrs.iter(GPR).len() + 2) * word_size as usize) as i32;
|
||||
func.create_stack_slot(ir::StackSlotData {
|
||||
kind: ir::StackSlotKind::IncomingArg,
|
||||
size: csr_stack_size as u32,
|
||||
@@ -231,9 +252,8 @@ pub fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> r
|
||||
func.signature.params.push(fp_arg);
|
||||
func.signature.returns.push(fp_arg);
|
||||
|
||||
for csr in csrs.iter() {
|
||||
let csr_arg =
|
||||
ir::AbiParam::special_reg(csr_type, ir::ArgumentPurpose::CalleeSaved, *csr as RegUnit);
|
||||
for csr in csrs.iter(GPR) {
|
||||
let csr_arg = ir::AbiParam::special_reg(csr_type, ir::ArgumentPurpose::CalleeSaved, csr);
|
||||
func.signature.params.push(csr_arg);
|
||||
func.signature.returns.push(csr_arg);
|
||||
}
|
||||
@@ -241,11 +261,11 @@ pub fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> r
|
||||
// Set up the cursor and insert the prologue
|
||||
let entry_ebb = func.layout.entry_block().expect("missing entry block");
|
||||
let mut pos = EncCursor::new(func, isa).at_first_insertion_point(entry_ebb);
|
||||
insert_system_v_prologue(&mut pos, local_stack_size, csr_type, csrs);
|
||||
insert_system_v_prologue(&mut pos, local_stack_size, csr_type, &csrs);
|
||||
|
||||
// Reset the cursor and insert the epilogue
|
||||
let mut pos = pos.at_position(CursorPosition::Nowhere);
|
||||
insert_system_v_epilogues(&mut pos, local_stack_size, csr_type, csrs);
|
||||
insert_system_v_epilogues(&mut pos, local_stack_size, csr_type, &csrs);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
@@ -255,7 +275,7 @@ fn insert_system_v_prologue(
|
||||
pos: &mut EncCursor,
|
||||
stack_size: i64,
|
||||
csr_type: ir::types::Type,
|
||||
csrs: &'static [RU],
|
||||
csrs: &RegisterSet,
|
||||
) {
|
||||
// Append param to entry EBB
|
||||
let ebb = pos.current_ebb().expect("missing ebb under cursor");
|
||||
@@ -268,12 +288,12 @@ fn insert_system_v_prologue(
|
||||
RU::rbp as RegUnit,
|
||||
);
|
||||
|
||||
for reg in csrs.iter() {
|
||||
for reg in csrs.iter(GPR) {
|
||||
// Append param to entry EBB
|
||||
let csr_arg = pos.func.dfg.append_ebb_param(ebb, csr_type);
|
||||
|
||||
// Assign it a location
|
||||
pos.func.locations[csr_arg] = ir::ValueLoc::Reg(*reg as RegUnit);
|
||||
pos.func.locations[csr_arg] = ir::ValueLoc::Reg(reg);
|
||||
|
||||
// Remember it so we can push it momentarily
|
||||
pos.ins().x86_push(csr_arg);
|
||||
@@ -289,7 +309,7 @@ fn insert_system_v_epilogues(
|
||||
pos: &mut EncCursor,
|
||||
stack_size: i64,
|
||||
csr_type: ir::types::Type,
|
||||
csrs: &'static [RU],
|
||||
csrs: &RegisterSet,
|
||||
) {
|
||||
while let Some(ebb) = pos.next_ebb() {
|
||||
pos.goto_last_inst(ebb);
|
||||
@@ -307,7 +327,7 @@ fn insert_system_v_epilogue(
|
||||
stack_size: i64,
|
||||
pos: &mut EncCursor,
|
||||
csr_type: ir::types::Type,
|
||||
csrs: &'static [RU],
|
||||
csrs: &RegisterSet,
|
||||
) {
|
||||
if stack_size > 0 {
|
||||
pos.ins().adjust_sp_imm(Imm64::new(stack_size));
|
||||
@@ -321,11 +341,11 @@ fn insert_system_v_epilogue(
|
||||
pos.func.locations[fp_ret] = ir::ValueLoc::Reg(RU::rbp as RegUnit);
|
||||
pos.func.dfg.append_inst_arg(inst, fp_ret);
|
||||
|
||||
for reg in csrs.iter() {
|
||||
for reg in csrs.iter(GPR) {
|
||||
let csr_ret = pos.ins().x86_pop(csr_type);
|
||||
pos.prev_inst();
|
||||
|
||||
pos.func.locations[csr_ret] = ir::ValueLoc::Reg(*reg as RegUnit);
|
||||
pos.func.locations[csr_ret] = ir::ValueLoc::Reg(reg);
|
||||
pos.func.dfg.append_inst_arg(inst, csr_ret);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -98,7 +98,7 @@ impl TargetIsa for Isa {
|
||||
abi::regclass_for_abi_type(ty)
|
||||
}
|
||||
|
||||
fn allocatable_registers(&self, func: &ir::Function) -> regalloc::AllocatableSet {
|
||||
fn allocatable_registers(&self, func: &ir::Function) -> regalloc::RegisterSet {
|
||||
abi::allocatable_registers(func, &self.shared_flags)
|
||||
}
|
||||
|
||||
|
||||
@@ -238,7 +238,7 @@ pub trait TargetIsa: fmt::Display {
|
||||
///
|
||||
/// This set excludes reserved registers like the stack pointer and other special-purpose
|
||||
/// registers.
|
||||
fn allocatable_registers(&self, func: &ir::Function) -> regalloc::AllocatableSet;
|
||||
fn allocatable_registers(&self, func: &ir::Function) -> regalloc::RegisterSet;
|
||||
|
||||
/// Compute the stack layout and insert prologue and epilogue code into `func`.
|
||||
///
|
||||
|
||||
@@ -10,7 +10,7 @@ use super::settings;
|
||||
use abi::{legalize_args, ArgAction, ArgAssigner, ValueConversion};
|
||||
use ir::{self, AbiParam, ArgumentExtension, ArgumentLoc, ArgumentPurpose, Type};
|
||||
use isa::RegClass;
|
||||
use regalloc::AllocatableSet;
|
||||
use regalloc::RegisterSet;
|
||||
use settings as shared_settings;
|
||||
use std::i32;
|
||||
|
||||
@@ -120,8 +120,8 @@ pub fn regclass_for_abi_type(ty: Type) -> RegClass {
|
||||
if ty.is_float() { FPR } else { GPR }
|
||||
}
|
||||
|
||||
pub fn allocatable_registers(_func: &ir::Function, isa_flags: &settings::Flags) -> AllocatableSet {
|
||||
let mut regs = AllocatableSet::new();
|
||||
pub fn allocatable_registers(_func: &ir::Function, isa_flags: &settings::Flags) -> RegisterSet {
|
||||
let mut regs = RegisterSet::new();
|
||||
regs.take(GPR, GPR.unit(0)); // Hard-wired 0.
|
||||
// %x1 is the link register which is available for allocation.
|
||||
regs.take(GPR, GPR.unit(2)); // Stack pointer.
|
||||
|
||||
@@ -92,7 +92,7 @@ impl TargetIsa for Isa {
|
||||
abi::regclass_for_abi_type(ty)
|
||||
}
|
||||
|
||||
fn allocatable_registers(&self, func: &ir::Function) -> regalloc::AllocatableSet {
|
||||
fn allocatable_registers(&self, func: &ir::Function) -> regalloc::RegisterSet {
|
||||
abi::allocatable_registers(func, &self.isa_flags)
|
||||
}
|
||||
|
||||
|
||||
@@ -51,7 +51,7 @@ use isa::{regs_overlap, RegClass, RegInfo, RegUnit};
|
||||
use packed_option::PackedOption;
|
||||
use regalloc::RegDiversions;
|
||||
use regalloc::affinity::Affinity;
|
||||
use regalloc::allocatable_set::AllocatableSet;
|
||||
use regalloc::register_set::RegisterSet;
|
||||
use regalloc::live_value_tracker::{LiveValue, LiveValueTracker};
|
||||
use regalloc::liveness::Liveness;
|
||||
use regalloc::liverange::{LiveRange, LiveRangeContext};
|
||||
@@ -96,7 +96,7 @@ struct Context<'a> {
|
||||
|
||||
// Pristine set of registers that the allocator can use.
|
||||
// This set remains immutable, we make clones.
|
||||
usable_regs: AllocatableSet,
|
||||
usable_regs: RegisterSet,
|
||||
}
|
||||
|
||||
impl Coloring {
|
||||
@@ -699,7 +699,7 @@ impl<'a> Context<'a> {
|
||||
defs: &[LiveValue],
|
||||
throughs: &[LiveValue],
|
||||
replace_global_defines: &mut bool,
|
||||
global_regs: &AllocatableSet,
|
||||
global_regs: &RegisterSet,
|
||||
) {
|
||||
for (op, lv) in constraints.iter().zip(defs) {
|
||||
match op.kind {
|
||||
@@ -732,7 +732,7 @@ impl<'a> Context<'a> {
|
||||
defs: &[LiveValue],
|
||||
throughs: &[LiveValue],
|
||||
replace_global_defines: &mut bool,
|
||||
global_regs: &AllocatableSet,
|
||||
global_regs: &RegisterSet,
|
||||
) {
|
||||
// It's technically possible for a call instruction to have fixed results before the
|
||||
// variable list of results, but we have no known instances of that.
|
||||
@@ -797,7 +797,7 @@ impl<'a> Context<'a> {
|
||||
constraints: &[OperandConstraint],
|
||||
defs: &[LiveValue],
|
||||
replace_global_defines: &mut bool,
|
||||
global_regs: &AllocatableSet,
|
||||
global_regs: &RegisterSet,
|
||||
) {
|
||||
for (op, lv) in constraints.iter().zip(defs) {
|
||||
match op.kind {
|
||||
@@ -843,9 +843,9 @@ impl<'a> Context<'a> {
|
||||
fn iterate_solution(
|
||||
&mut self,
|
||||
throughs: &[LiveValue],
|
||||
global_regs: &AllocatableSet,
|
||||
global_regs: &RegisterSet,
|
||||
replace_global_defines: &mut bool,
|
||||
) -> AllocatableSet {
|
||||
) -> RegisterSet {
|
||||
// Make sure `try_add_var()` below doesn't create a variable with too loose constraints.
|
||||
self.program_complete_input_constraints();
|
||||
|
||||
@@ -923,7 +923,7 @@ impl<'a> Context<'a> {
|
||||
/// inserted before.
|
||||
///
|
||||
/// The solver needs to be reminded of the available registers before any moves are inserted.
|
||||
fn shuffle_inputs(&mut self, regs: &mut AllocatableSet) {
|
||||
fn shuffle_inputs(&mut self, regs: &mut RegisterSet) {
|
||||
use regalloc::solver::Move::*;
|
||||
|
||||
let spills = self.solver.schedule_moves(regs);
|
||||
@@ -1114,19 +1114,19 @@ fn program_input_abi(
|
||||
struct AvailableRegs {
|
||||
/// The exact set of registers available on the input side of the current instruction. This
|
||||
/// takes into account register diversions, and it includes both local and global live ranges.
|
||||
input: AllocatableSet,
|
||||
input: RegisterSet,
|
||||
|
||||
/// Registers available for allocating globally live values. This set ignores any local values,
|
||||
/// and it does not account for register diversions.
|
||||
///
|
||||
/// Global values must be allocated out of this set because conflicts with other global values
|
||||
/// can't be resolved with local diversions.
|
||||
global: AllocatableSet,
|
||||
global: RegisterSet,
|
||||
}
|
||||
|
||||
impl AvailableRegs {
|
||||
/// Initialize both the input and global sets from `regs`.
|
||||
pub fn new(regs: &AllocatableSet) -> AvailableRegs {
|
||||
pub fn new(regs: &RegisterSet) -> AvailableRegs {
|
||||
AvailableRegs {
|
||||
input: regs.clone(),
|
||||
global: regs.clone(),
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
//!
|
||||
//! This module contains data structures and algorithms used for register allocation.
|
||||
|
||||
pub mod allocatable_set;
|
||||
pub mod register_set;
|
||||
pub mod coloring;
|
||||
pub mod live_value_tracker;
|
||||
pub mod liveness;
|
||||
@@ -18,6 +18,6 @@ mod reload;
|
||||
mod solver;
|
||||
mod spilling;
|
||||
|
||||
pub use self::allocatable_set::AllocatableSet;
|
||||
pub use self::register_set::RegisterSet;
|
||||
pub use self::context::Context;
|
||||
pub use self::diversion::RegDiversions;
|
||||
|
||||
@@ -37,7 +37,7 @@
|
||||
#![allow(dead_code)]
|
||||
|
||||
use isa::registers::{RegClass, RegClassMask, RegInfo, MAX_TRACKED_TOPRCS};
|
||||
use regalloc::AllocatableSet;
|
||||
use regalloc::RegisterSet;
|
||||
use std::cmp::min;
|
||||
use std::fmt;
|
||||
use std::iter::ExactSizeIterator;
|
||||
@@ -81,7 +81,7 @@ pub struct Pressure {
|
||||
|
||||
impl Pressure {
|
||||
/// Create a new register pressure tracker.
|
||||
pub fn new(reginfo: &RegInfo, usable: &AllocatableSet) -> Pressure {
|
||||
pub fn new(reginfo: &RegInfo, usable: &RegisterSet) -> Pressure {
|
||||
let mut p = Pressure {
|
||||
aliased: 0,
|
||||
toprc: Default::default(),
|
||||
@@ -271,7 +271,7 @@ impl fmt::Display for Pressure {
|
||||
mod tests {
|
||||
use super::Pressure;
|
||||
use isa::{RegClass, TargetIsa};
|
||||
use regalloc::AllocatableSet;
|
||||
use regalloc::RegisterSet;
|
||||
use std::borrow::Borrow;
|
||||
use std::boxed::Box;
|
||||
|
||||
@@ -302,7 +302,7 @@ mod tests {
|
||||
let gpr = rc_by_name(isa, "GPR");
|
||||
let s = rc_by_name(isa, "S");
|
||||
let reginfo = isa.register_info();
|
||||
let regs = AllocatableSet::new();
|
||||
let regs = RegisterSet::new();
|
||||
|
||||
let mut pressure = Pressure::new(®info, ®s);
|
||||
let mut count = 0;
|
||||
@@ -331,7 +331,7 @@ mod tests {
|
||||
let d = rc_by_name(isa, "D");
|
||||
let q = rc_by_name(isa, "Q");
|
||||
let reginfo = isa.register_info();
|
||||
let regs = AllocatableSet::new();
|
||||
let regs = RegisterSet::new();
|
||||
|
||||
let mut pressure = Pressure::new(®info, ®s);
|
||||
assert_eq!(pressure.check_avail(s), 0);
|
||||
|
||||
@@ -13,7 +13,7 @@ use std::mem::size_of_val;
|
||||
|
||||
/// Set of registers available for allocation.
|
||||
#[derive(Clone)]
|
||||
pub struct AllocatableSet {
|
||||
pub struct RegisterSet {
|
||||
avail: RegUnitMask,
|
||||
}
|
||||
|
||||
@@ -32,7 +32,7 @@ fn bitmask(rc: RegClass, reg: RegUnit) -> (usize, u32) {
|
||||
(word_index, reg_bits)
|
||||
}
|
||||
|
||||
impl AllocatableSet {
|
||||
impl RegisterSet {
|
||||
/// Create a new register set with all registers available.
|
||||
///
|
||||
/// Note that this includes *all* registers. Query the `TargetIsa` object to get a set of
|
||||
@@ -41,6 +41,11 @@ impl AllocatableSet {
|
||||
Self { avail: [!0; 3] }
|
||||
}
|
||||
|
||||
/// Create a new register set with no registers available.
|
||||
pub fn empty() -> Self {
|
||||
Self { avail: [0; 3] }
|
||||
}
|
||||
|
||||
/// Returns `true` if the specified register is available.
|
||||
pub fn is_avail(&self, rc: RegClass, reg: RegUnit) -> bool {
|
||||
let (idx, bits) = bitmask(rc, reg);
|
||||
@@ -62,7 +67,7 @@ impl AllocatableSet {
|
||||
self.avail[idx] &= !bits;
|
||||
}
|
||||
|
||||
/// Make `reg` available for allocation again.
|
||||
/// Return `reg` and all of its register units to the set of available registers.
|
||||
pub fn free(&mut self, rc: RegClass, reg: RegUnit) {
|
||||
let (idx, bits) = bitmask(rc, reg);
|
||||
debug_assert!(
|
||||
@@ -98,15 +103,15 @@ impl AllocatableSet {
|
||||
/// of `other`.
|
||||
///
|
||||
/// This assumes that unused bits are 1.
|
||||
pub fn interferes_with(&self, other: &AllocatableSet) -> bool {
|
||||
pub fn interferes_with(&self, other: &RegisterSet) -> bool {
|
||||
self.avail.iter().zip(&other.avail).any(
|
||||
|(&x, &y)| (x | y) != !0,
|
||||
)
|
||||
}
|
||||
|
||||
/// Intersect this set of allocatable registers with `other`. This has the effect of removing
|
||||
/// any register units from this set that are not in `other`.
|
||||
pub fn intersect(&mut self, other: &AllocatableSet) {
|
||||
/// Intersect this set of registers with `other`. This has the effect of removing any register
|
||||
/// units from this set that are not in `other`.
|
||||
pub fn intersect(&mut self, other: &RegisterSet) {
|
||||
for (x, &y) in self.avail.iter_mut().zip(&other.avail) {
|
||||
*x &= y;
|
||||
}
|
||||
@@ -114,8 +119,8 @@ impl AllocatableSet {
|
||||
|
||||
/// Return an object that can display this register set, using the register info from the
|
||||
/// target ISA.
|
||||
pub fn display<'a, R: Into<Option<&'a RegInfo>>>(&self, regs: R) -> DisplayAllocatableSet<'a> {
|
||||
DisplayAllocatableSet(self.clone(), regs.into())
|
||||
pub fn display<'a, R: Into<Option<&'a RegInfo>>>(&self, regs: R) -> DisplayRegisterSet<'a> {
|
||||
DisplayRegisterSet(self.clone(), regs.into())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -157,10 +162,10 @@ impl Iterator for RegSetIter {
|
||||
|
||||
impl ExactSizeIterator for RegSetIter {}
|
||||
|
||||
/// Displaying an `AllocatableSet` correctly requires the associated `RegInfo` from the target ISA.
|
||||
pub struct DisplayAllocatableSet<'a>(AllocatableSet, Option<&'a RegInfo>);
|
||||
/// Displaying an `RegisterSet` correctly requires the associated `RegInfo` from the target ISA.
|
||||
pub struct DisplayRegisterSet<'a>(RegisterSet, Option<&'a RegInfo>);
|
||||
|
||||
impl<'a> fmt::Display for DisplayAllocatableSet<'a> {
|
||||
impl<'a> fmt::Display for DisplayRegisterSet<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "[")?;
|
||||
match self.1 {
|
||||
@@ -211,7 +216,7 @@ impl<'a> fmt::Display for DisplayAllocatableSet<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for AllocatableSet {
|
||||
impl fmt::Display for RegisterSet {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
self.display(None).fmt(f)
|
||||
}
|
||||
@@ -255,7 +260,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn put_and_take() {
|
||||
let mut regs = AllocatableSet::new();
|
||||
let mut regs = RegisterSet::new();
|
||||
|
||||
// `GPR` has units 28-36.
|
||||
assert_eq!(regs.iter(GPR).len(), 8);
|
||||
@@ -302,8 +307,8 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn interference() {
|
||||
let mut regs1 = AllocatableSet::new();
|
||||
let mut regs2 = AllocatableSet::new();
|
||||
let mut regs1 = RegisterSet::new();
|
||||
let mut regs2 = RegisterSet::new();
|
||||
|
||||
assert!(!regs1.interferes_with(®s2));
|
||||
regs1.take(&GPR, 32);
|
||||
@@ -98,12 +98,12 @@
|
||||
//! appropriate candidate among the set of live register values, add it as a variable and start
|
||||
//! over.
|
||||
|
||||
use super::AllocatableSet;
|
||||
use super::RegisterSet;
|
||||
use dbg::DisplayList;
|
||||
use entity::{SparseMap, SparseMapValue};
|
||||
use ir::Value;
|
||||
use isa::{RegClass, RegUnit};
|
||||
use regalloc::allocatable_set::RegSetIter;
|
||||
use regalloc::register_set::RegSetIter;
|
||||
use std::cmp;
|
||||
use std::fmt;
|
||||
use std::mem;
|
||||
@@ -184,12 +184,7 @@ impl Variable {
|
||||
|
||||
/// Get an iterator over possible register choices, given the available registers on the input
|
||||
/// and output sides as well as the available global register set.
|
||||
fn iter(
|
||||
&self,
|
||||
iregs: &AllocatableSet,
|
||||
oregs: &AllocatableSet,
|
||||
gregs: &AllocatableSet,
|
||||
) -> RegSetIter {
|
||||
fn iter(&self, iregs: &RegisterSet, oregs: &RegisterSet, gregs: &RegisterSet) -> RegSetIter {
|
||||
if !self.is_output {
|
||||
debug_assert!(!self.is_global, "Global implies output");
|
||||
debug_assert!(self.is_input, "Missing interference set");
|
||||
@@ -476,7 +471,7 @@ pub struct Solver {
|
||||
/// - The 'to' registers of fixed input reassignments are marked as unavailable.
|
||||
/// - Input-side variables are marked as available.
|
||||
///
|
||||
regs_in: AllocatableSet,
|
||||
regs_in: RegisterSet,
|
||||
|
||||
/// Available registers on the output side of the instruction / fixed input scratch space.
|
||||
///
|
||||
@@ -490,7 +485,7 @@ pub struct Solver {
|
||||
/// - Fixed output assignments are marked as unavailable.
|
||||
/// - Live-through variables are marked as available.
|
||||
///
|
||||
regs_out: AllocatableSet,
|
||||
regs_out: RegisterSet,
|
||||
|
||||
/// List of register moves scheduled to avoid conflicts.
|
||||
///
|
||||
@@ -509,8 +504,8 @@ impl Solver {
|
||||
assignments: SparseMap::new(),
|
||||
vars: Vec::new(),
|
||||
inputs_done: false,
|
||||
regs_in: AllocatableSet::new(),
|
||||
regs_out: AllocatableSet::new(),
|
||||
regs_in: RegisterSet::new(),
|
||||
regs_out: RegisterSet::new(),
|
||||
moves: Vec::new(),
|
||||
fills: Vec::new(),
|
||||
}
|
||||
@@ -521,8 +516,8 @@ impl Solver {
|
||||
self.assignments.clear();
|
||||
self.vars.clear();
|
||||
self.inputs_done = false;
|
||||
self.regs_in = AllocatableSet::new();
|
||||
self.regs_out = AllocatableSet::new();
|
||||
self.regs_in = RegisterSet::new();
|
||||
self.regs_out = RegisterSet::new();
|
||||
self.moves.clear();
|
||||
self.fills.clear();
|
||||
}
|
||||
@@ -531,13 +526,13 @@ impl Solver {
|
||||
/// allocatable registers.
|
||||
///
|
||||
/// The `regs` set is the allocatable registers before any reassignments are applied.
|
||||
pub fn reset(&mut self, regs: &AllocatableSet) {
|
||||
pub fn reset(&mut self, regs: &RegisterSet) {
|
||||
self.assignments.clear();
|
||||
self.vars.clear();
|
||||
self.inputs_done = false;
|
||||
self.regs_in = regs.clone();
|
||||
// Used for tracking fixed input assignments while `!inputs_done`:
|
||||
self.regs_out = AllocatableSet::new();
|
||||
self.regs_out = RegisterSet::new();
|
||||
self.moves.clear();
|
||||
self.fills.clear();
|
||||
}
|
||||
@@ -870,10 +865,7 @@ impl Solver {
|
||||
/// always trivial.
|
||||
///
|
||||
/// Returns `Ok(regs)` if a solution was found.
|
||||
pub fn quick_solve(
|
||||
&mut self,
|
||||
global_regs: &AllocatableSet,
|
||||
) -> Result<AllocatableSet, SolverError> {
|
||||
pub fn quick_solve(&mut self, global_regs: &RegisterSet) -> Result<RegisterSet, SolverError> {
|
||||
self.find_solution(global_regs)
|
||||
}
|
||||
|
||||
@@ -884,10 +876,7 @@ impl Solver {
|
||||
/// This may return an error with a register class that has run out of registers. If registers
|
||||
/// can be freed up in the starving class, this method can be called again after adding
|
||||
/// variables for the freed registers.
|
||||
pub fn real_solve(
|
||||
&mut self,
|
||||
global_regs: &AllocatableSet,
|
||||
) -> Result<AllocatableSet, SolverError> {
|
||||
pub fn real_solve(&mut self, global_regs: &RegisterSet) -> Result<RegisterSet, SolverError> {
|
||||
// Compute domain sizes for all the variables given the current register sets.
|
||||
for v in &mut self.vars {
|
||||
let d = v.iter(&self.regs_in, &self.regs_out, global_regs).len();
|
||||
@@ -933,10 +922,7 @@ impl Solver {
|
||||
/// If a solution was found, returns `Ok(regs)` with the set of available registers on the
|
||||
/// output side after the solution. If no solution could be found, returns `Err(rc)` with the
|
||||
/// constraint register class that needs more available registers.
|
||||
fn find_solution(
|
||||
&mut self,
|
||||
global_regs: &AllocatableSet,
|
||||
) -> Result<AllocatableSet, SolverError> {
|
||||
fn find_solution(&mut self, global_regs: &RegisterSet) -> Result<RegisterSet, SolverError> {
|
||||
// Available registers on the input and output sides respectively.
|
||||
let mut iregs = self.regs_in.clone();
|
||||
let mut oregs = self.regs_out.clone();
|
||||
@@ -1025,7 +1011,7 @@ impl Solver {
|
||||
/// a register.
|
||||
///
|
||||
/// Returns the number of spills that had to be emitted.
|
||||
pub fn schedule_moves(&mut self, regs: &AllocatableSet) -> usize {
|
||||
pub fn schedule_moves(&mut self, regs: &RegisterSet) -> usize {
|
||||
self.collect_moves();
|
||||
debug_assert!(self.fills.is_empty());
|
||||
|
||||
@@ -1162,7 +1148,7 @@ mod tests {
|
||||
use entity::EntityRef;
|
||||
use ir::Value;
|
||||
use isa::{RegClass, RegInfo, RegUnit, TargetIsa};
|
||||
use regalloc::AllocatableSet;
|
||||
use regalloc::RegisterSet;
|
||||
use std::boxed::Box;
|
||||
|
||||
// Make an arm32 `TargetIsa`, if possible.
|
||||
@@ -1219,8 +1205,8 @@ mod tests {
|
||||
let r0 = gpr.unit(0);
|
||||
let r1 = gpr.unit(1);
|
||||
let r2 = gpr.unit(2);
|
||||
let gregs = AllocatableSet::new();
|
||||
let mut regs = AllocatableSet::new();
|
||||
let gregs = RegisterSet::new();
|
||||
let mut regs = RegisterSet::new();
|
||||
let mut solver = Solver::new();
|
||||
let v10 = Value::new(10);
|
||||
let v11 = Value::new(11);
|
||||
@@ -1277,8 +1263,8 @@ mod tests {
|
||||
let s1 = s.unit(1);
|
||||
let s2 = s.unit(2);
|
||||
let s3 = s.unit(3);
|
||||
let gregs = AllocatableSet::new();
|
||||
let mut regs = AllocatableSet::new();
|
||||
let gregs = RegisterSet::new();
|
||||
let mut regs = RegisterSet::new();
|
||||
let mut solver = Solver::new();
|
||||
let v10 = Value::new(10);
|
||||
let v11 = Value::new(11);
|
||||
@@ -1337,8 +1323,8 @@ mod tests {
|
||||
let r3 = gpr.unit(3);
|
||||
let r4 = gpr.unit(4);
|
||||
let r5 = gpr.unit(5);
|
||||
let gregs = AllocatableSet::new();
|
||||
let mut regs = AllocatableSet::new();
|
||||
let gregs = RegisterSet::new();
|
||||
let mut regs = RegisterSet::new();
|
||||
let mut solver = Solver::new();
|
||||
let v10 = Value::new(10);
|
||||
let v11 = Value::new(11);
|
||||
|
||||
Reference in New Issue
Block a user