Only save callee-saved registers that are used (#293)

* Only save callee-saved registers that are actually being used.

* Rename AllocatableSet to RegisterSet

* Style cleanup and small renames for readability.

* Adjust x86 prologue-epilogue test to account for callee-saved register optimization.

* Add more tests for prologue-epilogue optimizations.
This commit is contained in:
Tyler McMullen
2018-04-03 14:44:12 -07:00
committed by Dan Gohman
parent 0948ca9963
commit 775c674b38
15 changed files with 310 additions and 123 deletions

View File

@@ -3,30 +3,206 @@ set is_64bit
set is_compressed
isa intel haswell
function %foo() {
; An empty function.
function %empty() {
ebb0:
return
}
; check: function %empty(i64 fp [%rbp]) -> i64 fp [%rbp] system_v {
; nextln: ss0 = incoming_arg 16, offset -16
; nextln:
; nextln: ebb0(v0: i64 [%rbp]):
; nextln: x86_push v0
; nextln: copy_special %rsp -> %rbp
; nextln: v1 = x86_pop.i64
; nextln: return v1
; nextln: }
; A function with a single stack slot.
function %one_stack_slot() {
ss0 = explicit_slot 168
ebb0:
return
}
; check: function %foo(i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15]) -> i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15] system_v {
; nextln: ss0 = explicit_slot 168, offset -224
; nextln: ss1 = incoming_arg 56, offset -56
; check: ebb0(v0: i64 [%rbp], v1: i64 [%rbx], v2: i64 [%r12], v3: i64 [%r13], v4: i64 [%r14], v5: i64 [%r15]):
; check: function %one_stack_slot(i64 fp [%rbp]) -> i64 fp [%rbp] system_v {
; nextln: ss0 = explicit_slot 168, offset -184
; nextln: ss1 = incoming_arg 16, offset -16
; nextln:
; nextln: ebb0(v0: i64 [%rbp]):
; nextln: x86_push v0
; nextln: copy_special %rsp -> %rbp
; nextln: x86_push v1
; nextln: x86_push v2
; nextln: x86_push v3
; nextln: x86_push v4
; nextln: x86_push v5
; nextln: adjust_sp_imm -168
; nextln: adjust_sp_imm 168
; nextln: v11 = x86_pop.i64
; nextln: v10 = x86_pop.i64
; nextln: v9 = x86_pop.i64
; nextln: v8 = x86_pop.i64
; nextln: v7 = x86_pop.i64
; nextln: v6 = x86_pop.i64
; nextln: return v6, v7, v8, v9, v10, v11
; nextln: adjust_sp_imm -176
; nextln: adjust_sp_imm 176
; nextln: v1 = x86_pop.i64
; nextln: return v1
; nextln: }
; A function performing a call.
function %call() {
fn0 = function %foo()
ebb0:
call fn0()
return
}
; check: function %call(i64 fp [%rbp]) -> i64 fp [%rbp] system_v {
; nextln: ss0 = incoming_arg 16, offset -16
; nextln: sig0 = () system_v
; nextln: fn0 = sig0 %foo
; nextln:
; nextln: ebb0(v0: i64 [%rbp]):
; nextln: x86_push v0
; nextln: copy_special %rsp -> %rbp
; nextln: call fn0()
; nextln: v1 = x86_pop.i64
; nextln: return v1
; nextln: }
; A function that uses a lot of registers but doesn't quite need to spill.
function %no_spill(i64, i64) {
ebb0(v0: i64, v1: i64):
v2 = load.i32 v0+0
v3 = load.i32 v0+8
v4 = load.i32 v0+16
v5 = load.i32 v0+24
v6 = load.i32 v0+32
v7 = load.i32 v0+40
v8 = load.i32 v0+48
v9 = load.i32 v0+56
v10 = load.i32 v0+64
v11 = load.i32 v0+72
v12 = load.i32 v0+80
v13 = load.i32 v0+88
v14 = load.i32 v0+96
store.i32 v2, v1+0
store.i32 v3, v1+8
store.i32 v4, v1+16
store.i32 v5, v1+24
store.i32 v6, v1+32
store.i32 v7, v1+40
store.i32 v8, v1+48
store.i32 v9, v1+56
store.i32 v10, v1+64
store.i32 v11, v1+72
store.i32 v12, v1+80
store.i32 v13, v1+88
store.i32 v14, v1+96
return
}
; check: function %no_spill(i64 [%rdi], i64 [%rsi], i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15]) -> i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15] system_v {
; nextln: ss0 = incoming_arg 56, offset -56
; nextln:
; nextln: ebb0(v0: i64 [%rdi], v1: i64 [%rsi], v15: i64 [%rbp], v16: i64 [%rbx], v17: i64 [%r12], v18: i64 [%r13], v19: i64 [%r14], v20: i64 [%r15]):
; nextln: x86_push v15
; nextln: copy_special %rsp -> %rbp
; nextln: x86_push v16
; nextln: x86_push v17
; nextln: x86_push v18
; nextln: x86_push v19
; nextln: x86_push v20
; nextln: adjust_sp_imm -8
; nextln: v2 = load.i32 v0
; nextln: v3 = load.i32 v0+8
; nextln: v4 = load.i32 v0+16
; nextln: v5 = load.i32 v0+24
; nextln: v6 = load.i32 v0+32
; nextln: v7 = load.i32 v0+40
; nextln: v8 = load.i32 v0+48
; nextln: v9 = load.i32 v0+56
; nextln: v10 = load.i32 v0+64
; nextln: v11 = load.i32 v0+72
; nextln: v12 = load.i32 v0+80
; nextln: v13 = load.i32 v0+88
; nextln: v14 = load.i32 v0+96
; nextln: store v2, v1
; nextln: store v3, v1+8
; nextln: store v4, v1+16
; nextln: store v5, v1+24
; nextln: store v6, v1+32
; nextln: store v7, v1+40
; nextln: store v8, v1+48
; nextln: store v9, v1+56
; nextln: store v10, v1+64
; nextln: store v11, v1+72
; nextln: store v12, v1+80
; nextln: store v13, v1+88
; nextln: store v14, v1+96
; nextln: adjust_sp_imm 8
; nextln: v26 = x86_pop.i64
; nextln: v25 = x86_pop.i64
; nextln: v24 = x86_pop.i64
; nextln: v23 = x86_pop.i64
; nextln: v22 = x86_pop.i64
; nextln: v21 = x86_pop.i64
; nextln: return v21, v22, v23, v24, v25, v26
; nextln: }
; This function requires too many registers and must spill.
function %yes_spill(i64, i64) {
ebb0(v0: i64, v1: i64):
v2 = load.i32 v0+0
v3 = load.i32 v0+8
v4 = load.i32 v0+16
v5 = load.i32 v0+24
v6 = load.i32 v0+32
v7 = load.i32 v0+40
v8 = load.i32 v0+48
v9 = load.i32 v0+56
v10 = load.i32 v0+64
v11 = load.i32 v0+72
v12 = load.i32 v0+80
v13 = load.i32 v0+88
v14 = load.i32 v0+96
v15 = load.i32 v0+104
store.i32 v2, v1+0
store.i32 v3, v1+8
store.i32 v4, v1+16
store.i32 v5, v1+24
store.i32 v6, v1+32
store.i32 v7, v1+40
store.i32 v8, v1+48
store.i32 v9, v1+56
store.i32 v10, v1+64
store.i32 v11, v1+72
store.i32 v12, v1+80
store.i32 v13, v1+88
store.i32 v14, v1+96
store.i32 v15, v1+104
return
}
; check: function %yes_spill(i64 [%rdi], i64 [%rsi], i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15]) -> i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15] system_v {
; check: ss0 = spill_slot
; check: ebb0(v16: i64 [%rdi], v17: i64 [%rsi], v48: i64 [%rbp], v49: i64 [%rbx], v50: i64 [%r12], v51: i64 [%r13], v52: i64 [%r14], v53: i64 [%r15]):
; nextln: x86_push v48
; nextln: copy_special %rsp -> %rbp
; nextln: x86_push v49
; nextln: x86_push v50
; nextln: x86_push v51
; nextln: x86_push v52
; nextln: x86_push v53
; nextln: adjust_sp_imm
; check: spill
; check: fill
; check: adjust_sp_imm
; nextln: v59 = x86_pop.i64
; nextln: v58 = x86_pop.i64
; nextln: v57 = x86_pop.i64
; nextln: v56 = x86_pop.i64
; nextln: v55 = x86_pop.i64
; nextln: v54 = x86_pop.i64
; nextln: return v54, v55, v56, v57, v58, v59
; nextln: }

View File

@@ -3,7 +3,7 @@
use super::registers::{D, GPR, Q, S};
use ir;
use isa::RegClass;
use regalloc::AllocatableSet;
use regalloc::RegisterSet;
use settings as shared_settings;
/// Legalize `sig`.
@@ -30,6 +30,6 @@ pub fn regclass_for_abi_type(ty: ir::Type) -> RegClass {
}
/// Get the set of allocatable registers for `func`.
pub fn allocatable_registers(_func: &ir::Function) -> AllocatableSet {
pub fn allocatable_registers(_func: &ir::Function) -> RegisterSet {
unimplemented!()
}

View File

@@ -92,7 +92,7 @@ impl TargetIsa for Isa {
abi::regclass_for_abi_type(ty)
}
fn allocatable_registers(&self, func: &ir::Function) -> regalloc::AllocatableSet {
fn allocatable_registers(&self, func: &ir::Function) -> regalloc::RegisterSet {
abi::allocatable_registers(func)
}

View File

@@ -3,7 +3,7 @@
use super::registers::{FPR, GPR};
use ir;
use isa::RegClass;
use regalloc::AllocatableSet;
use regalloc::RegisterSet;
use settings as shared_settings;
/// Legalize `sig`.
@@ -21,6 +21,6 @@ pub fn regclass_for_abi_type(ty: ir::Type) -> RegClass {
}
/// Get the set of allocatable registers for `func`.
pub fn allocatable_registers(_func: &ir::Function) -> AllocatableSet {
pub fn allocatable_registers(_func: &ir::Function) -> RegisterSet {
unimplemented!()
}

View File

@@ -85,7 +85,7 @@ impl TargetIsa for Isa {
abi::regclass_for_abi_type(ty)
}
fn allocatable_registers(&self, func: &ir::Function) -> regalloc::AllocatableSet {
fn allocatable_registers(&self, func: &ir::Function) -> regalloc::RegisterSet {
abi::allocatable_registers(func)
}

View File

@@ -6,9 +6,10 @@ use cursor::{Cursor, CursorPosition, EncCursor};
use ir;
use ir::immediates::Imm64;
use ir::stackslot::{StackOffset, StackSize};
use ir::{AbiParam, ArgumentExtension, ArgumentLoc, ArgumentPurpose, CallConv, InstBuilder};
use ir::{AbiParam, ArgumentExtension, ArgumentLoc, ArgumentPurpose, CallConv, InstBuilder,
ValueLoc};
use isa::{RegClass, RegUnit, TargetIsa};
use regalloc::AllocatableSet;
use regalloc::RegisterSet;
use result;
use settings as shared_settings;
use stack_layout::layout_stack;
@@ -140,11 +141,8 @@ pub fn regclass_for_abi_type(ty: ir::Type) -> RegClass {
}
/// Get the set of allocatable registers for `func`.
pub fn allocatable_registers(
_func: &ir::Function,
flags: &shared_settings::Flags,
) -> AllocatableSet {
let mut regs = AllocatableSet::new();
pub fn allocatable_registers(_func: &ir::Function, flags: &shared_settings::Flags) -> RegisterSet {
let mut regs = RegisterSet::new();
regs.take(GPR, RU::rsp as RegUnit);
regs.take(GPR, RU::rbp as RegUnit);
@@ -160,7 +158,7 @@ pub fn allocatable_registers(
}
/// Get the set of callee-saved registers.
pub fn callee_saved_registers(flags: &shared_settings::Flags) -> &'static [RU] {
fn callee_saved_gprs(flags: &shared_settings::Flags) -> &'static [RU] {
if flags.is_64bit() {
&[RU::rbx, RU::r12, RU::r13, RU::r14, RU::r15]
} else {
@@ -168,6 +166,28 @@ pub fn callee_saved_registers(flags: &shared_settings::Flags) -> &'static [RU] {
}
}
fn callee_saved_gprs_used(flags: &shared_settings::Flags, func: &ir::Function) -> RegisterSet {
let mut all_callee_saved = RegisterSet::empty();
for reg in callee_saved_gprs(flags) {
all_callee_saved.free(GPR, *reg as RegUnit);
}
let mut used = RegisterSet::empty();
for value_loc in func.locations.values() {
// Note that `value_loc` here contains only a single unit of a potentially multi-unit
// register. We don't use registers that overlap each other in the x86 ISA, but in others
// we do. So this should not be blindly reused.
if let ValueLoc::Reg(ru) = *value_loc {
if !used.is_avail(GPR, ru) {
used.free(GPR, ru);
}
}
}
used.intersect(&all_callee_saved);
return used;
}
pub fn prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> result::CtonResult {
match func.signature.call_conv {
ir::CallConv::SystemV => system_v_prologue_epilogue(func, isa),
@@ -203,7 +223,8 @@ pub fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> r
} else {
ir::types::I32
};
let csrs = callee_saved_registers(isa.flags());
let csrs = callee_saved_gprs_used(isa.flags(), func);
// The reserved stack area is composed of:
// return address + frame pointer + all callee-saved registers
@@ -212,7 +233,7 @@ pub fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> r
// instruction. Each of the others we will then push explicitly. Then we
// will adjust the stack pointer to make room for the rest of the required
// space for this frame.
let csr_stack_size = ((csrs.len() + 2) * word_size as usize) as i32;
let csr_stack_size = ((csrs.iter(GPR).len() + 2) * word_size as usize) as i32;
func.create_stack_slot(ir::StackSlotData {
kind: ir::StackSlotKind::IncomingArg,
size: csr_stack_size as u32,
@@ -231,9 +252,8 @@ pub fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> r
func.signature.params.push(fp_arg);
func.signature.returns.push(fp_arg);
for csr in csrs.iter() {
let csr_arg =
ir::AbiParam::special_reg(csr_type, ir::ArgumentPurpose::CalleeSaved, *csr as RegUnit);
for csr in csrs.iter(GPR) {
let csr_arg = ir::AbiParam::special_reg(csr_type, ir::ArgumentPurpose::CalleeSaved, csr);
func.signature.params.push(csr_arg);
func.signature.returns.push(csr_arg);
}
@@ -241,11 +261,11 @@ pub fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &TargetIsa) -> r
// Set up the cursor and insert the prologue
let entry_ebb = func.layout.entry_block().expect("missing entry block");
let mut pos = EncCursor::new(func, isa).at_first_insertion_point(entry_ebb);
insert_system_v_prologue(&mut pos, local_stack_size, csr_type, csrs);
insert_system_v_prologue(&mut pos, local_stack_size, csr_type, &csrs);
// Reset the cursor and insert the epilogue
let mut pos = pos.at_position(CursorPosition::Nowhere);
insert_system_v_epilogues(&mut pos, local_stack_size, csr_type, csrs);
insert_system_v_epilogues(&mut pos, local_stack_size, csr_type, &csrs);
Ok(())
}
@@ -255,7 +275,7 @@ fn insert_system_v_prologue(
pos: &mut EncCursor,
stack_size: i64,
csr_type: ir::types::Type,
csrs: &'static [RU],
csrs: &RegisterSet,
) {
// Append param to entry EBB
let ebb = pos.current_ebb().expect("missing ebb under cursor");
@@ -268,12 +288,12 @@ fn insert_system_v_prologue(
RU::rbp as RegUnit,
);
for reg in csrs.iter() {
for reg in csrs.iter(GPR) {
// Append param to entry EBB
let csr_arg = pos.func.dfg.append_ebb_param(ebb, csr_type);
// Assign it a location
pos.func.locations[csr_arg] = ir::ValueLoc::Reg(*reg as RegUnit);
pos.func.locations[csr_arg] = ir::ValueLoc::Reg(reg);
// Remember it so we can push it momentarily
pos.ins().x86_push(csr_arg);
@@ -289,7 +309,7 @@ fn insert_system_v_epilogues(
pos: &mut EncCursor,
stack_size: i64,
csr_type: ir::types::Type,
csrs: &'static [RU],
csrs: &RegisterSet,
) {
while let Some(ebb) = pos.next_ebb() {
pos.goto_last_inst(ebb);
@@ -307,7 +327,7 @@ fn insert_system_v_epilogue(
stack_size: i64,
pos: &mut EncCursor,
csr_type: ir::types::Type,
csrs: &'static [RU],
csrs: &RegisterSet,
) {
if stack_size > 0 {
pos.ins().adjust_sp_imm(Imm64::new(stack_size));
@@ -321,11 +341,11 @@ fn insert_system_v_epilogue(
pos.func.locations[fp_ret] = ir::ValueLoc::Reg(RU::rbp as RegUnit);
pos.func.dfg.append_inst_arg(inst, fp_ret);
for reg in csrs.iter() {
for reg in csrs.iter(GPR) {
let csr_ret = pos.ins().x86_pop(csr_type);
pos.prev_inst();
pos.func.locations[csr_ret] = ir::ValueLoc::Reg(*reg as RegUnit);
pos.func.locations[csr_ret] = ir::ValueLoc::Reg(reg);
pos.func.dfg.append_inst_arg(inst, csr_ret);
}
}

View File

@@ -98,7 +98,7 @@ impl TargetIsa for Isa {
abi::regclass_for_abi_type(ty)
}
fn allocatable_registers(&self, func: &ir::Function) -> regalloc::AllocatableSet {
fn allocatable_registers(&self, func: &ir::Function) -> regalloc::RegisterSet {
abi::allocatable_registers(func, &self.shared_flags)
}

View File

@@ -238,7 +238,7 @@ pub trait TargetIsa: fmt::Display {
///
/// This set excludes reserved registers like the stack pointer and other special-purpose
/// registers.
fn allocatable_registers(&self, func: &ir::Function) -> regalloc::AllocatableSet;
fn allocatable_registers(&self, func: &ir::Function) -> regalloc::RegisterSet;
/// Compute the stack layout and insert prologue and epilogue code into `func`.
///

View File

@@ -10,7 +10,7 @@ use super::settings;
use abi::{legalize_args, ArgAction, ArgAssigner, ValueConversion};
use ir::{self, AbiParam, ArgumentExtension, ArgumentLoc, ArgumentPurpose, Type};
use isa::RegClass;
use regalloc::AllocatableSet;
use regalloc::RegisterSet;
use settings as shared_settings;
use std::i32;
@@ -120,8 +120,8 @@ pub fn regclass_for_abi_type(ty: Type) -> RegClass {
if ty.is_float() { FPR } else { GPR }
}
pub fn allocatable_registers(_func: &ir::Function, isa_flags: &settings::Flags) -> AllocatableSet {
let mut regs = AllocatableSet::new();
pub fn allocatable_registers(_func: &ir::Function, isa_flags: &settings::Flags) -> RegisterSet {
let mut regs = RegisterSet::new();
regs.take(GPR, GPR.unit(0)); // Hard-wired 0.
// %x1 is the link register which is available for allocation.
regs.take(GPR, GPR.unit(2)); // Stack pointer.

View File

@@ -92,7 +92,7 @@ impl TargetIsa for Isa {
abi::regclass_for_abi_type(ty)
}
fn allocatable_registers(&self, func: &ir::Function) -> regalloc::AllocatableSet {
fn allocatable_registers(&self, func: &ir::Function) -> regalloc::RegisterSet {
abi::allocatable_registers(func, &self.isa_flags)
}

View File

@@ -51,7 +51,7 @@ use isa::{regs_overlap, RegClass, RegInfo, RegUnit};
use packed_option::PackedOption;
use regalloc::RegDiversions;
use regalloc::affinity::Affinity;
use regalloc::allocatable_set::AllocatableSet;
use regalloc::register_set::RegisterSet;
use regalloc::live_value_tracker::{LiveValue, LiveValueTracker};
use regalloc::liveness::Liveness;
use regalloc::liverange::{LiveRange, LiveRangeContext};
@@ -96,7 +96,7 @@ struct Context<'a> {
// Pristine set of registers that the allocator can use.
// This set remains immutable, we make clones.
usable_regs: AllocatableSet,
usable_regs: RegisterSet,
}
impl Coloring {
@@ -699,7 +699,7 @@ impl<'a> Context<'a> {
defs: &[LiveValue],
throughs: &[LiveValue],
replace_global_defines: &mut bool,
global_regs: &AllocatableSet,
global_regs: &RegisterSet,
) {
for (op, lv) in constraints.iter().zip(defs) {
match op.kind {
@@ -732,7 +732,7 @@ impl<'a> Context<'a> {
defs: &[LiveValue],
throughs: &[LiveValue],
replace_global_defines: &mut bool,
global_regs: &AllocatableSet,
global_regs: &RegisterSet,
) {
// It's technically possible for a call instruction to have fixed results before the
// variable list of results, but we have no known instances of that.
@@ -797,7 +797,7 @@ impl<'a> Context<'a> {
constraints: &[OperandConstraint],
defs: &[LiveValue],
replace_global_defines: &mut bool,
global_regs: &AllocatableSet,
global_regs: &RegisterSet,
) {
for (op, lv) in constraints.iter().zip(defs) {
match op.kind {
@@ -843,9 +843,9 @@ impl<'a> Context<'a> {
fn iterate_solution(
&mut self,
throughs: &[LiveValue],
global_regs: &AllocatableSet,
global_regs: &RegisterSet,
replace_global_defines: &mut bool,
) -> AllocatableSet {
) -> RegisterSet {
// Make sure `try_add_var()` below doesn't create a variable with too loose constraints.
self.program_complete_input_constraints();
@@ -923,7 +923,7 @@ impl<'a> Context<'a> {
/// inserted before.
///
/// The solver needs to be reminded of the available registers before any moves are inserted.
fn shuffle_inputs(&mut self, regs: &mut AllocatableSet) {
fn shuffle_inputs(&mut self, regs: &mut RegisterSet) {
use regalloc::solver::Move::*;
let spills = self.solver.schedule_moves(regs);
@@ -1114,19 +1114,19 @@ fn program_input_abi(
struct AvailableRegs {
/// The exact set of registers available on the input side of the current instruction. This
/// takes into account register diversions, and it includes both local and global live ranges.
input: AllocatableSet,
input: RegisterSet,
/// Registers available for allocating globally live values. This set ignores any local values,
/// and it does not account for register diversions.
///
/// Global values must be allocated out of this set because conflicts with other global values
/// can't be resolved with local diversions.
global: AllocatableSet,
global: RegisterSet,
}
impl AvailableRegs {
/// Initialize both the input and global sets from `regs`.
pub fn new(regs: &AllocatableSet) -> AvailableRegs {
pub fn new(regs: &RegisterSet) -> AvailableRegs {
AvailableRegs {
input: regs.clone(),
global: regs.clone(),

View File

@@ -2,7 +2,7 @@
//!
//! This module contains data structures and algorithms used for register allocation.
pub mod allocatable_set;
pub mod register_set;
pub mod coloring;
pub mod live_value_tracker;
pub mod liveness;
@@ -18,6 +18,6 @@ mod reload;
mod solver;
mod spilling;
pub use self::allocatable_set::AllocatableSet;
pub use self::register_set::RegisterSet;
pub use self::context::Context;
pub use self::diversion::RegDiversions;

View File

@@ -37,7 +37,7 @@
#![allow(dead_code)]
use isa::registers::{RegClass, RegClassMask, RegInfo, MAX_TRACKED_TOPRCS};
use regalloc::AllocatableSet;
use regalloc::RegisterSet;
use std::cmp::min;
use std::fmt;
use std::iter::ExactSizeIterator;
@@ -81,7 +81,7 @@ pub struct Pressure {
impl Pressure {
/// Create a new register pressure tracker.
pub fn new(reginfo: &RegInfo, usable: &AllocatableSet) -> Pressure {
pub fn new(reginfo: &RegInfo, usable: &RegisterSet) -> Pressure {
let mut p = Pressure {
aliased: 0,
toprc: Default::default(),
@@ -271,7 +271,7 @@ impl fmt::Display for Pressure {
mod tests {
use super::Pressure;
use isa::{RegClass, TargetIsa};
use regalloc::AllocatableSet;
use regalloc::RegisterSet;
use std::borrow::Borrow;
use std::boxed::Box;
@@ -302,7 +302,7 @@ mod tests {
let gpr = rc_by_name(isa, "GPR");
let s = rc_by_name(isa, "S");
let reginfo = isa.register_info();
let regs = AllocatableSet::new();
let regs = RegisterSet::new();
let mut pressure = Pressure::new(&reginfo, &regs);
let mut count = 0;
@@ -331,7 +331,7 @@ mod tests {
let d = rc_by_name(isa, "D");
let q = rc_by_name(isa, "Q");
let reginfo = isa.register_info();
let regs = AllocatableSet::new();
let regs = RegisterSet::new();
let mut pressure = Pressure::new(&reginfo, &regs);
assert_eq!(pressure.check_avail(s), 0);

View File

@@ -13,7 +13,7 @@ use std::mem::size_of_val;
/// Set of registers available for allocation.
#[derive(Clone)]
pub struct AllocatableSet {
pub struct RegisterSet {
avail: RegUnitMask,
}
@@ -32,7 +32,7 @@ fn bitmask(rc: RegClass, reg: RegUnit) -> (usize, u32) {
(word_index, reg_bits)
}
impl AllocatableSet {
impl RegisterSet {
/// Create a new register set with all registers available.
///
/// Note that this includes *all* registers. Query the `TargetIsa` object to get a set of
@@ -41,6 +41,11 @@ impl AllocatableSet {
Self { avail: [!0; 3] }
}
/// Create a new register set with no registers available.
pub fn empty() -> Self {
Self { avail: [0; 3] }
}
/// Returns `true` if the specified register is available.
pub fn is_avail(&self, rc: RegClass, reg: RegUnit) -> bool {
let (idx, bits) = bitmask(rc, reg);
@@ -62,7 +67,7 @@ impl AllocatableSet {
self.avail[idx] &= !bits;
}
/// Make `reg` available for allocation again.
/// Return `reg` and all of its register units to the set of available registers.
pub fn free(&mut self, rc: RegClass, reg: RegUnit) {
let (idx, bits) = bitmask(rc, reg);
debug_assert!(
@@ -98,15 +103,15 @@ impl AllocatableSet {
/// of `other`.
///
/// This assumes that unused bits are 1.
pub fn interferes_with(&self, other: &AllocatableSet) -> bool {
pub fn interferes_with(&self, other: &RegisterSet) -> bool {
self.avail.iter().zip(&other.avail).any(
|(&x, &y)| (x | y) != !0,
)
}
/// Intersect this set of allocatable registers with `other`. This has the effect of removing
/// any register units from this set that are not in `other`.
pub fn intersect(&mut self, other: &AllocatableSet) {
/// Intersect this set of registers with `other`. This has the effect of removing any register
/// units from this set that are not in `other`.
pub fn intersect(&mut self, other: &RegisterSet) {
for (x, &y) in self.avail.iter_mut().zip(&other.avail) {
*x &= y;
}
@@ -114,8 +119,8 @@ impl AllocatableSet {
/// Return an object that can display this register set, using the register info from the
/// target ISA.
pub fn display<'a, R: Into<Option<&'a RegInfo>>>(&self, regs: R) -> DisplayAllocatableSet<'a> {
DisplayAllocatableSet(self.clone(), regs.into())
pub fn display<'a, R: Into<Option<&'a RegInfo>>>(&self, regs: R) -> DisplayRegisterSet<'a> {
DisplayRegisterSet(self.clone(), regs.into())
}
}
@@ -157,10 +162,10 @@ impl Iterator for RegSetIter {
impl ExactSizeIterator for RegSetIter {}
/// Displaying an `AllocatableSet` correctly requires the associated `RegInfo` from the target ISA.
pub struct DisplayAllocatableSet<'a>(AllocatableSet, Option<&'a RegInfo>);
/// Displaying an `RegisterSet` correctly requires the associated `RegInfo` from the target ISA.
pub struct DisplayRegisterSet<'a>(RegisterSet, Option<&'a RegInfo>);
impl<'a> fmt::Display for DisplayAllocatableSet<'a> {
impl<'a> fmt::Display for DisplayRegisterSet<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "[")?;
match self.1 {
@@ -211,7 +216,7 @@ impl<'a> fmt::Display for DisplayAllocatableSet<'a> {
}
}
impl fmt::Display for AllocatableSet {
impl fmt::Display for RegisterSet {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
self.display(None).fmt(f)
}
@@ -255,7 +260,7 @@ mod tests {
#[test]
fn put_and_take() {
let mut regs = AllocatableSet::new();
let mut regs = RegisterSet::new();
// `GPR` has units 28-36.
assert_eq!(regs.iter(GPR).len(), 8);
@@ -302,8 +307,8 @@ mod tests {
#[test]
fn interference() {
let mut regs1 = AllocatableSet::new();
let mut regs2 = AllocatableSet::new();
let mut regs1 = RegisterSet::new();
let mut regs2 = RegisterSet::new();
assert!(!regs1.interferes_with(&regs2));
regs1.take(&GPR, 32);

View File

@@ -98,12 +98,12 @@
//! appropriate candidate among the set of live register values, add it as a variable and start
//! over.
use super::AllocatableSet;
use super::RegisterSet;
use dbg::DisplayList;
use entity::{SparseMap, SparseMapValue};
use ir::Value;
use isa::{RegClass, RegUnit};
use regalloc::allocatable_set::RegSetIter;
use regalloc::register_set::RegSetIter;
use std::cmp;
use std::fmt;
use std::mem;
@@ -184,12 +184,7 @@ impl Variable {
/// Get an iterator over possible register choices, given the available registers on the input
/// and output sides as well as the available global register set.
fn iter(
&self,
iregs: &AllocatableSet,
oregs: &AllocatableSet,
gregs: &AllocatableSet,
) -> RegSetIter {
fn iter(&self, iregs: &RegisterSet, oregs: &RegisterSet, gregs: &RegisterSet) -> RegSetIter {
if !self.is_output {
debug_assert!(!self.is_global, "Global implies output");
debug_assert!(self.is_input, "Missing interference set");
@@ -476,7 +471,7 @@ pub struct Solver {
/// - The 'to' registers of fixed input reassignments are marked as unavailable.
/// - Input-side variables are marked as available.
///
regs_in: AllocatableSet,
regs_in: RegisterSet,
/// Available registers on the output side of the instruction / fixed input scratch space.
///
@@ -490,7 +485,7 @@ pub struct Solver {
/// - Fixed output assignments are marked as unavailable.
/// - Live-through variables are marked as available.
///
regs_out: AllocatableSet,
regs_out: RegisterSet,
/// List of register moves scheduled to avoid conflicts.
///
@@ -509,8 +504,8 @@ impl Solver {
assignments: SparseMap::new(),
vars: Vec::new(),
inputs_done: false,
regs_in: AllocatableSet::new(),
regs_out: AllocatableSet::new(),
regs_in: RegisterSet::new(),
regs_out: RegisterSet::new(),
moves: Vec::new(),
fills: Vec::new(),
}
@@ -521,8 +516,8 @@ impl Solver {
self.assignments.clear();
self.vars.clear();
self.inputs_done = false;
self.regs_in = AllocatableSet::new();
self.regs_out = AllocatableSet::new();
self.regs_in = RegisterSet::new();
self.regs_out = RegisterSet::new();
self.moves.clear();
self.fills.clear();
}
@@ -531,13 +526,13 @@ impl Solver {
/// allocatable registers.
///
/// The `regs` set is the allocatable registers before any reassignments are applied.
pub fn reset(&mut self, regs: &AllocatableSet) {
pub fn reset(&mut self, regs: &RegisterSet) {
self.assignments.clear();
self.vars.clear();
self.inputs_done = false;
self.regs_in = regs.clone();
// Used for tracking fixed input assignments while `!inputs_done`:
self.regs_out = AllocatableSet::new();
self.regs_out = RegisterSet::new();
self.moves.clear();
self.fills.clear();
}
@@ -870,10 +865,7 @@ impl Solver {
/// always trivial.
///
/// Returns `Ok(regs)` if a solution was found.
pub fn quick_solve(
&mut self,
global_regs: &AllocatableSet,
) -> Result<AllocatableSet, SolverError> {
pub fn quick_solve(&mut self, global_regs: &RegisterSet) -> Result<RegisterSet, SolverError> {
self.find_solution(global_regs)
}
@@ -884,10 +876,7 @@ impl Solver {
/// This may return an error with a register class that has run out of registers. If registers
/// can be freed up in the starving class, this method can be called again after adding
/// variables for the freed registers.
pub fn real_solve(
&mut self,
global_regs: &AllocatableSet,
) -> Result<AllocatableSet, SolverError> {
pub fn real_solve(&mut self, global_regs: &RegisterSet) -> Result<RegisterSet, SolverError> {
// Compute domain sizes for all the variables given the current register sets.
for v in &mut self.vars {
let d = v.iter(&self.regs_in, &self.regs_out, global_regs).len();
@@ -933,10 +922,7 @@ impl Solver {
/// If a solution was found, returns `Ok(regs)` with the set of available registers on the
/// output side after the solution. If no solution could be found, returns `Err(rc)` with the
/// constraint register class that needs more available registers.
fn find_solution(
&mut self,
global_regs: &AllocatableSet,
) -> Result<AllocatableSet, SolverError> {
fn find_solution(&mut self, global_regs: &RegisterSet) -> Result<RegisterSet, SolverError> {
// Available registers on the input and output sides respectively.
let mut iregs = self.regs_in.clone();
let mut oregs = self.regs_out.clone();
@@ -1025,7 +1011,7 @@ impl Solver {
/// a register.
///
/// Returns the number of spills that had to be emitted.
pub fn schedule_moves(&mut self, regs: &AllocatableSet) -> usize {
pub fn schedule_moves(&mut self, regs: &RegisterSet) -> usize {
self.collect_moves();
debug_assert!(self.fills.is_empty());
@@ -1162,7 +1148,7 @@ mod tests {
use entity::EntityRef;
use ir::Value;
use isa::{RegClass, RegInfo, RegUnit, TargetIsa};
use regalloc::AllocatableSet;
use regalloc::RegisterSet;
use std::boxed::Box;
// Make an arm32 `TargetIsa`, if possible.
@@ -1219,8 +1205,8 @@ mod tests {
let r0 = gpr.unit(0);
let r1 = gpr.unit(1);
let r2 = gpr.unit(2);
let gregs = AllocatableSet::new();
let mut regs = AllocatableSet::new();
let gregs = RegisterSet::new();
let mut regs = RegisterSet::new();
let mut solver = Solver::new();
let v10 = Value::new(10);
let v11 = Value::new(11);
@@ -1277,8 +1263,8 @@ mod tests {
let s1 = s.unit(1);
let s2 = s.unit(2);
let s3 = s.unit(3);
let gregs = AllocatableSet::new();
let mut regs = AllocatableSet::new();
let gregs = RegisterSet::new();
let mut regs = RegisterSet::new();
let mut solver = Solver::new();
let v10 = Value::new(10);
let v11 = Value::new(11);
@@ -1337,8 +1323,8 @@ mod tests {
let r3 = gpr.unit(3);
let r4 = gpr.unit(4);
let r5 = gpr.unit(5);
let gregs = AllocatableSet::new();
let mut regs = AllocatableSet::new();
let gregs = RegisterSet::new();
let mut regs = RegisterSet::new();
let mut solver = Solver::new();
let v10 = Value::new(10);
let v11 = Value::new(11);