Rename the 'cretonne' crate to 'cretonne-codegen'.

This fixes the next part of #287.
This commit is contained in:
Dan Gohman
2018-04-17 08:48:02 -07:00
parent 7767186dd0
commit 24fa169e1f
254 changed files with 265 additions and 264 deletions

View File

@@ -0,0 +1,131 @@
//! Value affinity for register allocation.
//!
//! An SSA value's affinity is a hint used to guide the register allocator. It specifies the class
//! of allocation that is likely to cause the least amount of fixup moves in order to satisfy
//! instruction operand constraints.
//!
//! For values that want to be in registers, the affinity hint includes a register class or
//! subclass. This is just a hint, and the register allocator is allowed to pick a register from a
//! larger register class instead.
use ir::{AbiParam, ArgumentLoc};
use isa::{ConstraintKind, OperandConstraint, RegClassIndex, RegInfo, TargetIsa};
use std::fmt;
/// Preferred register allocation for an SSA value.
#[derive(Clone, Copy, Debug)]
pub enum Affinity {
/// No affinity.
///
/// This indicates a value that is not defined or used by any real instructions. It is a ghost
/// value that won't appear in the final program.
None,
/// This value should be placed in a spill slot on the stack.
Stack,
/// This value prefers a register from the given register class.
Reg(RegClassIndex),
}
impl Default for Affinity {
fn default() -> Self {
Affinity::None
}
}
impl Affinity {
/// Create an affinity that satisfies a single constraint.
///
/// This will never create an `Affinity::None`.
/// Use the `Default` implementation for that.
pub fn new(constraint: &OperandConstraint) -> Affinity {
if constraint.kind == ConstraintKind::Stack {
Affinity::Stack
} else {
Affinity::Reg(constraint.regclass.into())
}
}
/// Create an affinity that matches an ABI argument for `isa`.
pub fn abi(arg: &AbiParam, isa: &TargetIsa) -> Affinity {
match arg.location {
ArgumentLoc::Unassigned => Affinity::None,
ArgumentLoc::Reg(_) => Affinity::Reg(isa.regclass_for_abi_type(arg.value_type).into()),
ArgumentLoc::Stack(_) => Affinity::Stack,
}
}
/// Is this the `None` affinity?
pub fn is_none(self) -> bool {
match self {
Affinity::None => true,
_ => false,
}
}
/// Is this the `Reg` affinity?
pub fn is_reg(self) -> bool {
match self {
Affinity::Reg(_) => true,
_ => false,
}
}
/// Is this the `Stack` affinity?
pub fn is_stack(self) -> bool {
match self {
Affinity::Stack => true,
_ => false,
}
}
/// Merge an operand constraint into this affinity.
///
/// Note that this does not guarantee that the register allocator will pick a register that
/// satisfies the constraint.
pub fn merge(&mut self, constraint: &OperandConstraint, reg_info: &RegInfo) {
match *self {
Affinity::None => *self = Affinity::new(constraint),
Affinity::Reg(rc) => {
// If the preferred register class is a subclass of the constraint, there's no need
// to change anything.
if constraint.kind != ConstraintKind::Stack &&
!constraint.regclass.has_subclass(rc)
{
// If the register classes don't overlap, `intersect` returns `None`, and we
// just keep our previous affinity.
if let Some(subclass) = constraint.regclass.intersect_index(reg_info.rc(rc)) {
// This constraint shrinks our preferred register class.
*self = Affinity::Reg(subclass);
}
}
}
Affinity::Stack => {}
}
}
/// Return an object that can display this value affinity, using the register info from the
/// target ISA.
pub fn display<'a, R: Into<Option<&'a RegInfo>>>(self, regs: R) -> DisplayAffinity<'a> {
DisplayAffinity(self, regs.into())
}
}
/// Displaying an `Affinity` correctly requires the associated `RegInfo` from the target ISA.
pub struct DisplayAffinity<'a>(Affinity, Option<&'a RegInfo>);
impl<'a> fmt::Display for DisplayAffinity<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self.0 {
Affinity::None => write!(f, "none"),
Affinity::Stack => write!(f, "stack"),
Affinity::Reg(rci) => {
match self.1 {
Some(regs) => write!(f, "{}", regs.rc(rci)),
None => write!(f, "{}", rci),
}
}
}
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,159 @@
//! Register allocator context.
//!
//! The `Context` struct contains data structures that should be preserved across invocations of
//! the register allocator algorithm. This doesn't preserve any data between functions, but it
//! avoids allocating data structures independently for each function begin compiled.
use dominator_tree::DominatorTree;
use flowgraph::ControlFlowGraph;
use ir::Function;
use isa::TargetIsa;
use regalloc::coalescing::Coalescing;
use regalloc::coloring::Coloring;
use regalloc::live_value_tracker::LiveValueTracker;
use regalloc::liveness::Liveness;
use regalloc::reload::Reload;
use regalloc::spilling::Spilling;
use regalloc::virtregs::VirtRegs;
use result::CtonResult;
use timing;
use topo_order::TopoOrder;
use verifier::{verify_context, verify_cssa, verify_liveness, verify_locations};
/// Persistent memory allocations for register allocation.
pub struct Context {
liveness: Liveness,
virtregs: VirtRegs,
coalescing: Coalescing,
topo: TopoOrder,
tracker: LiveValueTracker,
spilling: Spilling,
reload: Reload,
coloring: Coloring,
}
impl Context {
/// Create a new context for register allocation.
///
/// This context should be reused for multiple functions in order to avoid repeated memory
/// allocations.
pub fn new() -> Self {
Self {
liveness: Liveness::new(),
virtregs: VirtRegs::new(),
coalescing: Coalescing::new(),
topo: TopoOrder::new(),
tracker: LiveValueTracker::new(),
spilling: Spilling::new(),
reload: Reload::new(),
coloring: Coloring::new(),
}
}
/// Clear all data structures in this context.
pub fn clear(&mut self) {
self.liveness.clear();
self.virtregs.clear();
self.coalescing.clear();
self.topo.clear();
self.tracker.clear();
self.spilling.clear();
self.reload.clear();
self.coloring.clear();
}
/// Allocate registers in `func`.
///
/// After register allocation, all values in `func` have been assigned to a register or stack
/// location that is consistent with instruction encoding constraints.
pub fn run(
&mut self,
isa: &TargetIsa,
func: &mut Function,
cfg: &ControlFlowGraph,
domtree: &mut DominatorTree,
) -> CtonResult {
let _tt = timing::regalloc();
debug_assert!(domtree.is_valid());
// `Liveness` and `Coloring` are self-clearing.
self.virtregs.clear();
// Tracker state (dominator live sets) is actually reused between the spilling and coloring
// phases.
self.tracker.clear();
// Pass: Liveness analysis.
self.liveness.compute(isa, func, cfg);
if isa.flags().enable_verifier() {
verify_liveness(isa, func, cfg, &self.liveness)?;
}
// Pass: Coalesce and create Conventional SSA form.
self.coalescing.conventional_ssa(
isa,
func,
cfg,
domtree,
&mut self.liveness,
&mut self.virtregs,
);
if isa.flags().enable_verifier() {
verify_context(func, cfg, domtree, isa)?;
verify_liveness(isa, func, cfg, &self.liveness)?;
verify_cssa(func, cfg, domtree, &self.liveness, &self.virtregs)?;
}
// Pass: Spilling.
self.spilling.run(
isa,
func,
domtree,
&mut self.liveness,
&self.virtregs,
&mut self.topo,
&mut self.tracker,
);
if isa.flags().enable_verifier() {
verify_context(func, cfg, domtree, isa)?;
verify_liveness(isa, func, cfg, &self.liveness)?;
verify_cssa(func, cfg, domtree, &self.liveness, &self.virtregs)?;
}
// Pass: Reload.
self.reload.run(
isa,
func,
domtree,
&mut self.liveness,
&mut self.topo,
&mut self.tracker,
);
if isa.flags().enable_verifier() {
verify_context(func, cfg, domtree, isa)?;
verify_liveness(isa, func, cfg, &self.liveness)?;
verify_cssa(func, cfg, domtree, &self.liveness, &self.virtregs)?;
}
// Pass: Coloring.
self.coloring.run(
isa,
func,
domtree,
&mut self.liveness,
&mut self.tracker,
);
if isa.flags().enable_verifier() {
verify_context(func, cfg, domtree, isa)?;
verify_liveness(isa, func, cfg, &self.liveness)?;
verify_locations(isa, func, Some(&self.liveness))?;
verify_cssa(func, cfg, domtree, &self.liveness, &self.virtregs)?;
}
Ok(())
}
}

View File

@@ -0,0 +1,215 @@
//! Register diversions.
//!
//! Normally, a value is assigned to a single register or stack location by the register allocator.
//! Sometimes, it is necessary to move register values to a different register in order to satisfy
//! instruction constraints.
//!
//! These register diversions are local to an EBB. No values can be diverted when entering a new
//! EBB.
use ir::{InstructionData, Opcode};
use ir::{StackSlot, Value, ValueLoc, ValueLocations};
use isa::{RegInfo, RegUnit};
use std::fmt;
use std::vec::Vec;
/// A diversion of a value from its original location to a new register or stack location.
///
/// In IR, a diversion is represented by a `regmove` instruction, possibly a chain of them for the
/// same value.
///
/// When tracking diversions, the `from` field is the original assigned value location, and `to` is
/// the current one.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct Diversion {
/// The value that is diverted.
pub value: Value,
/// The original value location.
pub from: ValueLoc,
/// The current value location.
pub to: ValueLoc,
}
impl Diversion {
/// Make a new diversion.
pub fn new(value: Value, from: ValueLoc, to: ValueLoc) -> Diversion {
debug_assert!(from.is_assigned() && to.is_assigned());
Diversion { value, from, to }
}
}
/// Keep track of diversions in an EBB.
pub struct RegDiversions {
current: Vec<Diversion>,
}
impl RegDiversions {
/// Create a new empty diversion tracker.
pub fn new() -> Self {
Self { current: Vec::new() }
}
/// Clear the tracker, preparing for a new EBB.
pub fn clear(&mut self) {
self.current.clear()
}
/// Are there any diversions?
pub fn is_empty(&self) -> bool {
self.current.is_empty()
}
/// Get the current diversion of `value`, if any.
pub fn diversion(&self, value: Value) -> Option<&Diversion> {
self.current.iter().find(|d| d.value == value)
}
/// Get all current diversions.
pub fn all(&self) -> &[Diversion] {
self.current.as_slice()
}
/// Get the current location for `value`. Fall back to the assignment map for non-diverted
/// values
pub fn get(&self, value: Value, locations: &ValueLocations) -> ValueLoc {
match self.diversion(value) {
Some(d) => d.to,
None => locations[value],
}
}
/// Get the current register location for `value`, or panic if `value` isn't in a register.
pub fn reg(&self, value: Value, locations: &ValueLocations) -> RegUnit {
self.get(value, locations).unwrap_reg()
}
/// Get the current stack location for `value`, or panic if `value` isn't in a stack slot.
pub fn stack(&self, value: Value, locations: &ValueLocations) -> StackSlot {
self.get(value, locations).unwrap_stack()
}
/// Record any kind of move.
///
/// The `from` location must match an existing `to` location, if any.
pub fn divert(&mut self, value: Value, from: ValueLoc, to: ValueLoc) {
debug_assert!(from.is_assigned() && to.is_assigned());
if let Some(i) = self.current.iter().position(|d| d.value == value) {
debug_assert_eq!(self.current[i].to, from, "Bad regmove chain for {}", value);
if self.current[i].from != to {
self.current[i].to = to;
} else {
self.current.swap_remove(i);
}
} else {
self.current.push(Diversion::new(value, from, to));
}
}
/// Record a register -> register move.
pub fn regmove(&mut self, value: Value, from: RegUnit, to: RegUnit) {
self.divert(value, ValueLoc::Reg(from), ValueLoc::Reg(to));
}
/// Record a register -> stack move.
pub fn regspill(&mut self, value: Value, from: RegUnit, to: StackSlot) {
self.divert(value, ValueLoc::Reg(from), ValueLoc::Stack(to));
}
/// Record a stack -> register move.
pub fn regfill(&mut self, value: Value, from: StackSlot, to: RegUnit) {
self.divert(value, ValueLoc::Stack(from), ValueLoc::Reg(to));
}
/// Apply the effect of `inst`.
///
/// If `inst` is a `regmove`, `regfill`, or `regspill` instruction, update the diversions to
/// match.
pub fn apply(&mut self, inst: &InstructionData) {
match *inst {
InstructionData::RegMove {
opcode: Opcode::Regmove,
arg,
src,
dst,
} => self.regmove(arg, src, dst),
InstructionData::RegSpill {
opcode: Opcode::Regspill,
arg,
src,
dst,
} => self.regspill(arg, src, dst),
InstructionData::RegFill {
opcode: Opcode::Regfill,
arg,
src,
dst,
} => self.regfill(arg, src, dst),
_ => {}
}
}
/// Drop any recorded move for `value`.
///
/// Returns the `to` location of the removed diversion.
pub fn remove(&mut self, value: Value) -> Option<ValueLoc> {
self.current.iter().position(|d| d.value == value).map(
|i| {
self.current.swap_remove(i).to
},
)
}
/// Return an object that can display the diversions.
pub fn display<'a, R: Into<Option<&'a RegInfo>>>(&'a self, regs: R) -> DisplayDiversions<'a> {
DisplayDiversions(self, regs.into())
}
}
/// Object that displays register diversions.
pub struct DisplayDiversions<'a>(&'a RegDiversions, Option<&'a RegInfo>);
impl<'a> fmt::Display for DisplayDiversions<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{{")?;
for div in self.0.all() {
write!(
f,
" {}: {} -> {}",
div.value,
div.from.display(self.1),
div.to.display(self.1)
)?
}
write!(f, " }}")
}
}
#[cfg(test)]
mod tests {
use super::*;
use entity::EntityRef;
use ir::Value;
#[test]
fn inserts() {
let mut divs = RegDiversions::new();
let v1 = Value::new(1);
let v2 = Value::new(2);
divs.regmove(v1, 10, 12);
assert_eq!(
divs.diversion(v1),
Some(&Diversion {
value: v1,
from: ValueLoc::Reg(10),
to: ValueLoc::Reg(12),
})
);
assert_eq!(divs.diversion(v2), None);
divs.regmove(v1, 12, 11);
assert_eq!(divs.diversion(v1).unwrap().to, ValueLoc::Reg(11));
divs.regmove(v1, 11, 10);
assert_eq!(divs.diversion(v1), None);
}
}

View File

@@ -0,0 +1,348 @@
//! Track which values are live in an EBB with instruction granularity.
//!
//! The `LiveValueTracker` keeps track of the set of live SSA values at each instruction in an EBB.
//! The sets of live values are computed on the fly as the tracker is moved from instruction to
//! instruction, starting at the EBB header.
use dominator_tree::DominatorTree;
use entity::{EntityList, ListPool};
use ir::{DataFlowGraph, Ebb, ExpandedProgramPoint, Inst, Layout, Value};
use partition_slice::partition_slice;
use regalloc::affinity::Affinity;
use regalloc::liveness::Liveness;
use regalloc::liverange::LiveRange;
use std::collections::HashMap;
use std::vec::Vec;
type ValueList = EntityList<Value>;
/// Compute and track live values throughout an EBB.
pub struct LiveValueTracker {
/// The set of values that are live at the current program point.
live: LiveValueVec,
/// Saved set of live values for every jump and branch that can potentially be an immediate
/// dominator of an EBB.
///
/// This is the set of values that are live *before* the branch.
idom_sets: HashMap<Inst, ValueList>,
/// Memory pool for the live sets.
idom_pool: ListPool<Value>,
}
/// Information about a value that is live at the current program point.
#[derive(Debug)]
pub struct LiveValue {
/// The live value.
pub value: Value,
/// The local ending point of the live range in the current EBB, as returned by
/// `LiveRange::def_local_end()` or `LiveRange::livein_local_end()`.
pub endpoint: Inst,
/// The affinity of the value as represented in its `LiveRange`.
///
/// This value is simply a copy of the affinity stored in the live range. We copy it because
/// almost all users of `LiveValue` need to look at it.
pub affinity: Affinity,
/// The live range for this value never leaves its EBB.
pub is_local: bool,
/// This value is dead - the live range ends immediately.
pub is_dead: bool,
}
struct LiveValueVec {
/// The set of values that are live at the current program point.
values: Vec<LiveValue>,
/// How many values at the front of `values` are known to be live after `inst`?
///
/// This is used to pass a much smaller slice to `partition_slice` when its called a second
/// time for the same instruction.
live_prefix: Option<(Inst, usize)>,
}
impl LiveValueVec {
fn new() -> Self {
Self {
values: Vec::new(),
live_prefix: None,
}
}
/// Add a new live value to `values`. Copy some properties from `lr`.
fn push(&mut self, value: Value, endpoint: Inst, lr: &LiveRange) {
self.values.push(LiveValue {
value,
endpoint,
affinity: lr.affinity,
is_local: lr.is_local(),
is_dead: lr.is_dead(),
});
}
/// Remove all elements.
fn clear(&mut self) {
self.values.clear();
self.live_prefix = None;
}
/// Make sure that the values killed by `next_inst` are moved to the end of the `values`
/// vector.
///
/// Returns the number of values that will be live after `next_inst`.
fn live_after(&mut self, next_inst: Inst) -> usize {
// How many values at the front of the vector are already known to survive `next_inst`?
// We don't need to pass this prefix to `partition_slice()`
let keep = match self.live_prefix {
Some((i, prefix)) if i == next_inst => prefix,
_ => 0,
};
// Move the remaining surviving values to the front partition of the vector.
let prefix = keep + partition_slice(&mut self.values[keep..], |v| v.endpoint != next_inst);
// Remember the new prefix length in case we get called again for the same `next_inst`.
self.live_prefix = Some((next_inst, prefix));
prefix
}
/// Remove the values killed by `next_inst`.
fn remove_kill_values(&mut self, next_inst: Inst) {
let keep = self.live_after(next_inst);
self.values.truncate(keep);
}
/// Remove any dead values.
fn remove_dead_values(&mut self) {
self.values.retain(|v| !v.is_dead);
self.live_prefix = None;
}
}
impl LiveValueTracker {
/// Create a new blank tracker.
pub fn new() -> Self {
Self {
live: LiveValueVec::new(),
idom_sets: HashMap::new(),
idom_pool: ListPool::new(),
}
}
/// Clear all cached information.
pub fn clear(&mut self) {
self.live.clear();
self.idom_sets.clear();
self.idom_pool.clear();
}
/// Get the set of currently live values.
///
/// Between calls to `process_inst()` and `drop_dead()`, this includes both values killed and
/// defined by the current instruction.
pub fn live(&self) -> &[LiveValue] {
&self.live.values
}
/// Get a mutable set of currently live values.
///
/// Use with care and don't move entries around.
pub fn live_mut(&mut self) -> &mut [LiveValue] {
&mut self.live.values
}
/// Move the current position to the top of `ebb`.
///
/// This depends on the stored live value set at `ebb`'s immediate dominator, so that must have
/// been visited first.
///
/// Returns `(liveins, args)` as a pair of slices. The first slice is the set of live-in values
/// from the immediate dominator. The second slice is the set of `ebb` parameters.
///
/// Dead parameters with no uses are included in `args`. Call `drop_dead_args()` to remove them.
pub fn ebb_top(
&mut self,
ebb: Ebb,
dfg: &DataFlowGraph,
liveness: &Liveness,
layout: &Layout,
domtree: &DominatorTree,
) -> (&[LiveValue], &[LiveValue]) {
// Start over, compute the set of live values at the top of the EBB from two sources:
//
// 1. Values that were live before `ebb`'s immediate dominator, filtered for those that are
// actually live-in.
// 2. Arguments to `ebb` that are not dead.
//
self.live.clear();
// Compute the live-in values. Start by filtering the set of values that were live before
// the immediate dominator. Just use the empty set if there's no immediate dominator (i.e.,
// the entry block or an unreachable block).
if let Some(idom) = domtree.idom(ebb) {
// If the immediate dominator exits, we must have a stored list for it. This is a
// requirement to the order EBBs are visited: All dominators must have been processed
// before the current EBB.
let idom_live_list = self.idom_sets.get(&idom).expect(
"No stored live set for dominator",
);
let ctx = liveness.context(layout);
// Get just the values that are live-in to `ebb`.
for &value in idom_live_list.as_slice(&self.idom_pool) {
let lr = liveness.get(value).expect(
"Immediate dominator value has no live range",
);
// Check if this value is live-in here.
if let Some(endpoint) = lr.livein_local_end(ebb, ctx) {
self.live.push(value, endpoint, lr);
}
}
}
// Now add all the live parameters to `ebb`.
let first_arg = self.live.values.len();
for &value in dfg.ebb_params(ebb) {
let lr = &liveness[value];
debug_assert_eq!(lr.def(), ebb.into());
match lr.def_local_end().into() {
ExpandedProgramPoint::Inst(endpoint) => {
self.live.push(value, endpoint, lr);
}
ExpandedProgramPoint::Ebb(local_ebb) => {
// This is a dead EBB parameter which is not even live into the first
// instruction in the EBB.
debug_assert_eq!(
local_ebb,
ebb,
"EBB parameter live range ends at wrong EBB header"
);
// Give this value a fake endpoint that is the first instruction in the EBB.
// We expect it to be removed by calling `drop_dead_args()`.
self.live.push(
value,
layout.first_inst(ebb).expect("Empty EBB"),
lr,
);
}
}
}
self.live.values.split_at(first_arg)
}
/// Prepare to move past `inst`.
///
/// Determine the set of already live values that are killed by `inst`, and add the new defined
/// values to the tracked set.
///
/// Returns `(throughs, kills, defs)` as a tuple of slices:
///
/// 1. The `throughs` slice is the set of live-through values that are neither defined nor
/// killed by the instruction.
/// 2. The `kills` slice is the set of values that were live before the instruction and are
/// killed at the instruction. This does not include dead defs.
/// 3. The `defs` slice is guaranteed to be in the same order as `inst`'s results, and includes
/// dead defines.
///
/// The order of `throughs` and `kills` is arbitrary.
///
/// The `drop_dead()` method must be called next to actually remove the dead values from the
/// tracked set after the two returned slices are no longer needed.
pub fn process_inst(
&mut self,
inst: Inst,
dfg: &DataFlowGraph,
liveness: &Liveness,
) -> (&[LiveValue], &[LiveValue], &[LiveValue]) {
// Save a copy of the live values before any branches or jumps that could be somebody's
// immediate dominator.
if dfg[inst].opcode().is_branch() {
self.save_idom_live_set(inst);
}
// Move killed values to the end of the vector.
// Don't remove them yet, `drop_dead()` will do that.
let first_kill = self.live.live_after(inst);
// Add the values defined by `inst`.
let first_def = self.live.values.len();
for &value in dfg.inst_results(inst) {
let lr = &liveness[value];
debug_assert_eq!(lr.def(), inst.into());
match lr.def_local_end().into() {
ExpandedProgramPoint::Inst(endpoint) => {
self.live.push(value, endpoint, lr);
}
ExpandedProgramPoint::Ebb(ebb) => {
panic!("Instruction result live range can't end at {}", ebb);
}
}
}
(
&self.live.values[0..first_kill],
&self.live.values[first_kill..first_def],
&self.live.values[first_def..],
)
}
/// Prepare to move past a ghost instruction.
///
/// This is like `process_inst`, except any defs are ignored.
///
/// Returns `(throughs, kills)`.
pub fn process_ghost(&mut self, inst: Inst) -> (&[LiveValue], &[LiveValue]) {
let first_kill = self.live.live_after(inst);
self.live.values.as_slice().split_at(first_kill)
}
/// Drop the values that are now dead after moving past `inst`.
///
/// This removes both live values that were killed by `inst` and dead defines on `inst` itself.
///
/// This must be called after `process_inst(inst)` and before proceeding to the next
/// instruction.
pub fn drop_dead(&mut self, inst: Inst) {
// Remove both live values that were killed by `inst` and dead defines from `inst`.
self.live.remove_kill_values(inst);
}
/// Drop any values that are marked as `is_dead`.
///
/// Use this after calling `ebb_top` to clean out dead EBB parameters.
pub fn drop_dead_params(&mut self) {
self.live.remove_dead_values();
}
/// Process new spills.
///
/// Any values where `f` returns true are spilled and will be treated as if their affinity was
/// `Stack`.
pub fn process_spills<F>(&mut self, mut f: F)
where
F: FnMut(Value) -> bool,
{
for lv in &mut self.live.values {
if f(lv.value) {
lv.affinity = Affinity::Stack;
}
}
}
/// Save the current set of live values so it is associated with `idom`.
fn save_idom_live_set(&mut self, idom: Inst) {
let values = self.live.values.iter().map(|lv| lv.value);
let pool = &mut self.idom_pool;
// If there already is a set saved for `idom`, just keep it.
self.idom_sets.entry(idom).or_insert_with(|| {
let mut list = ValueList::default();
list.extend(values, pool);
list
});
}
}

View File

@@ -0,0 +1,458 @@
//! Liveness analysis for SSA values.
//!
//! This module computes the live range of all the SSA values in a function and produces a
//! `LiveRange` instance for each.
//!
//!
//! # Liveness consumers
//!
//! The primary consumer of the liveness analysis is the SSA coloring pass which goes through each
//! EBB and assigns a register to the defined values. This algorithm needs to maintain a set of the
//! currently live values as it is iterating down the instructions in the EBB. It asks the
//! following questions:
//!
//! - What is the set of live values at the entry to the EBB?
//! - When moving past a use of a value, is that value still alive in the EBB, or was that the last
//! use?
//! - When moving past a branch, which of the live values are still live below the branch?
//!
//! The set of `LiveRange` instances can answer these questions through their `def_local_end` and
//! `livein_local_end` queries. The coloring algorithm visits EBBs in a topological order of the
//! dominator tree, so it can compute the set of live values at the beginning of an EBB by starting
//! from the set of live values at the dominating branch instruction and filtering it with
//! `livein_local_end`. These sets do not need to be stored in the liveness analysis.
//!
//! The secondary consumer of the liveness analysis is the spilling pass which needs to count the
//! number of live values at every program point and insert spill code until the number of
//! registers needed is small enough.
//!
//!
//! # Alternative algorithms
//!
//! A number of different liveness analysis algorithms exist, so it is worthwhile to look at a few
//! alternatives.
//!
//! ## Data-flow equations
//!
//! The classic *live variables analysis* that you will find in all compiler books from the
//! previous century does not depend on SSA form. It is typically implemented by iteratively
//! solving data-flow equations on bit-vectors of variables. The result is a live-out bit-vector of
//! variables for every basic block in the program.
//!
//! This algorithm has some disadvantages that makes us look elsewhere:
//!
//! - Quadratic memory use. We need a bit per variable per basic block in the function.
//! - Sparse representation. In practice, the majority of SSA values never leave their basic block,
//! and those that do span basic blocks rarely span a large number of basic blocks. This makes
//! the bit-vectors quite sparse.
//! - Traditionally, the data-flow equations were solved for real program *variables* which does
//! not include temporaries used in evaluating expressions. We have an SSA form program which
//! blurs the distinction between temporaries and variables. This makes the quadratic memory
//! problem worse because there are many more SSA values than there was variables in the original
//! program, and we don't know a priori which SSA values leave their basic block.
//! - Missing last-use information. For values that are not live-out of a basic block, we would
//! need to store information about the last use in the block somewhere. LLVM stores this
//! information as a 'kill bit' on the last use in the IR. Maintaining these kill bits has been a
//! source of problems for LLVM's register allocator.
//!
//! Data-flow equations can detect when a variable is used uninitialized, and they can handle
//! multiple definitions of the same variable. We don't need this generality since we already have
//! a program in SSA form.
//!
//! ## LLVM's liveness analysis
//!
//! LLVM's register allocator computes liveness per *virtual register*, where a virtual register is
//! a disjoint union of related SSA values that should be assigned to the same physical register.
//! It uses a compact data structure very similar to our `LiveRange`. The important difference is
//! that Cretonne's `LiveRange` only describes a single SSA value, while LLVM's `LiveInterval`
//! describes the live range of a virtual register *and* which one of the related SSA values is
//! live at any given program point.
//!
//! LLVM computes the live range of each virtual register independently by using the use-def chains
//! that are baked into its IR. The algorithm for a single virtual register is:
//!
//! 1. Initialize the live range with a single-instruction snippet of liveness at each def, using
//! the def-chain. This does not include any phi-values.
//! 2. Go through the virtual register's use chain and perform the following steps at each use:
//! 3. Perform an exhaustive depth-first traversal up the CFG from the use. Look for basic blocks
//! that already contain some liveness and extend the last live SSA value in the block to be
//! live-out. Also build a list of new basic blocks where the register needs to be live-in.
//! 4. Iteratively propagate live-out SSA values to the new live-in blocks. This may require new
//! PHI values to be created when different SSA values can reach the same block.
//!
//! The iterative SSA form reconstruction can be skipped if the depth-first search only encountered
//! one SSA value.
//!
//! This algorithm has some advantages compared to the data-flow equations:
//!
//! - The live ranges of local virtual registers are computed very quickly without ever traversing
//! the CFG. The memory needed to store these live ranges is independent of the number of basic
//! blocks in the program.
//! - The time to compute the live range of a global virtual register is proportional to the number
//! of basic blocks covered. Many virtual registers only cover a few blocks, even in very large
//! functions.
//! - A single live range can be recomputed after making modifications to the IR. No global
//! algorithm is necessary. This feature depends on having use-def chains for virtual registers
//! which Cretonne doesn't.
//!
//! Cretonne uses a very similar data structures and algorithms to LLVM, with the important
//! difference that live ranges are computed per SSA value instead of per virtual register, and the
//! uses in Cretonne IR refers to SSA values instead of virtual registers. This means that Cretonne
//! can skip the last step of reconstructing SSA form for the virtual register uses.
//!
//! ## Fast Liveness Checking for SSA-Form Programs
//!
//! A liveness analysis that is often brought up in the context of SSA-based register allocation
//! was presented at CGO 2008:
//!
//! > Boissinot, B., Hack, S., Grund, D., de Dinechin, B. D., & Rastello, F. (2008). *Fast Liveness
//! Checking for SSA-Form Programs.* CGO.
//!
//! This analysis uses a global pre-computation that only depends on the CFG of the function. It
//! then allows liveness queries for any (value, program point) pair. Each query traverses the use
//! chain of the value and performs lookups in the precomputed bit-vectors.
//!
//! I did not seriously consider this analysis for Cretonne because:
//!
//! - It depends critically on use chains which Cretonne doesn't have.
//! - Popular variables like the `this` pointer in a C++ method can have very large use chains.
//! Traversing such a long use chain on every liveness lookup has the potential for some nasty
//! quadratic behavior in unfortunate cases.
//! - It says "fast" in the title, but the paper only claims to be 16% faster than a data-flow
//! based approach, which isn't that impressive.
//!
//! Nevertheless, the property of only depending in the CFG structure is very useful. If Cretonne
//! gains use chains, this approach would be worth a proper evaluation.
//!
//!
//! # Cretonne's liveness analysis
//!
//! The algorithm implemented in this module is similar to LLVM's with these differences:
//!
//! - The `LiveRange` data structure describes the liveness of a single SSA value, not a virtual
//! register.
//! - Instructions in Cretonne IR contains references to SSA values, not virtual registers.
//! - All live ranges are computed in one traversal of the program. Cretonne doesn't have use
//! chains, so it is not possible to compute the live range for a single SSA value independently.
//!
//! The liveness computation visits all instructions in the program. The order is not important for
//! the algorithm to be correct. At each instruction, the used values are examined.
//!
//! - The first time a value is encountered, its live range is constructed as a dead live range
//! containing only the defining program point.
//! - The local interval of the value's live range is extended so it reaches the use. This may
//! require creating a new live-in local interval for the EBB.
//! - If the live range became live-in to the EBB, add the EBB to a work-list.
//! - While the work-list is non-empty pop a live-in EBB and repeat the two steps above, using each
//! of the live-in EBB's CFG predecessor instructions as a 'use'.
//!
//! The effect of this algorithm is to extend the live range of each to reach uses as they are
//! visited. No data about each value beyond the live range is needed between visiting uses, so
//! nothing is lost by computing the live range of all values simultaneously.
//!
//! ## Cache efficiency of Cretonne vs LLVM
//!
//! Since LLVM computes the complete live range of a virtual register in one go, it can keep the
//! whole `LiveInterval` for the register in L1 cache. Since it is visiting the instructions in use
//! chain order, some cache thrashing can occur as a result of pulling instructions into cache
//! somewhat chaotically.
//!
//! Cretonne uses a transposed algorithm, visiting instructions in order. This means that each
//! instruction is brought into cache only once, and it is likely that the other instructions on
//! the same cache line will be visited before the line is evicted.
//!
//! Cretonne's problem is that the `LiveRange` structs are visited many times and not always
//! regularly. We should strive to make the `LiveRange` struct as small as possible such that
//! multiple related values can live on the same cache line.
//!
//! - Local values should fit in a 16-byte `LiveRange` struct or smaller. The current
//! implementation contains a 24-byte `Vec` object and a redundant `value` member pushing the
//! size to 32 bytes.
//! - Related values should be stored on the same cache line. The current sparse set implementation
//! does a decent job of that.
//! - For global values, the list of live-in intervals is very likely to fit on a single cache
//! line. These lists are very likely to be found in L2 cache at least.
//!
//! There is some room for improvement.
use entity::SparseMap;
use flowgraph::ControlFlowGraph;
use ir::dfg::ValueDef;
use ir::{Ebb, Function, Inst, Layout, ProgramPoint, Value};
use isa::{EncInfo, TargetIsa};
use regalloc::affinity::Affinity;
use regalloc::liverange::{LiveRange, LiveRangeContext, LiveRangeForest};
use std::mem;
use std::ops::Index;
use std::vec::Vec;
use timing;
/// A set of live ranges, indexed by value number.
type LiveRangeSet = SparseMap<Value, LiveRange>;
/// Get a mutable reference to the live range for `value`.
/// Create it if necessary.
fn get_or_create<'a>(
lrset: &'a mut LiveRangeSet,
value: Value,
isa: &TargetIsa,
func: &Function,
enc_info: &EncInfo,
) -> &'a mut LiveRange {
// It would be better to use `get_mut()` here, but that leads to borrow checker fighting
// which can probably only be resolved by non-lexical lifetimes.
// https://github.com/rust-lang/rfcs/issues/811
if lrset.get(value).is_none() {
// Create a live range for value. We need the program point that defines it.
let def;
let affinity;
match func.dfg.value_def(value) {
ValueDef::Result(inst, rnum) => {
def = inst.into();
// Initialize the affinity from the defining instruction's result constraints.
// Don't do this for call return values which are always tied to a single register.
affinity = enc_info
.operand_constraints(func.encodings[inst])
.and_then(|rc| rc.outs.get(rnum))
.map(Affinity::new)
.or_else(|| {
// If this is a call, get the return value affinity.
func.dfg.call_signature(inst).map(|sig| {
Affinity::abi(&func.dfg.signatures[sig].returns[rnum], isa)
})
})
.unwrap_or_default();
}
ValueDef::Param(ebb, num) => {
def = ebb.into();
if func.layout.entry_block() == Some(ebb) {
// The affinity for entry block parameters can be inferred from the function
// signature.
affinity = Affinity::abi(&func.signature.params[num], isa);
} else {
// Give normal EBB parameters a register affinity matching their type.
let rc = isa.regclass_for_abi_type(func.dfg.value_type(value));
affinity = Affinity::Reg(rc.into());
}
}
};
lrset.insert(LiveRange::new(value, def, affinity));
}
lrset.get_mut(value).unwrap()
}
/// Extend the live range for `value` so it reaches `to` which must live in `ebb`.
fn extend_to_use(
lr: &mut LiveRange,
ebb: Ebb,
to: Inst,
worklist: &mut Vec<Ebb>,
func: &Function,
cfg: &ControlFlowGraph,
forest: &mut LiveRangeForest,
) {
// This is our scratch working space, and we'll leave it empty when we return.
debug_assert!(worklist.is_empty());
// Extend the range locally in `ebb`.
// If there already was a live interval in that block, we're done.
if lr.extend_in_ebb(ebb, to, &func.layout, forest) {
worklist.push(ebb);
}
// The work list contains those EBBs where we have learned that the value needs to be
// live-in.
//
// This algorithm becomes a depth-first traversal up the CFG, enumerating all paths through the
// CFG from the existing live range to `ebb`.
//
// Extend the live range as we go. The live range itself also serves as a visited set since
// `extend_in_ebb` will never return true twice for the same EBB.
//
while let Some(livein) = worklist.pop() {
// We've learned that the value needs to be live-in to the `livein` EBB.
// Make sure it is also live at all predecessor branches to `livein`.
for (pred, branch) in cfg.pred_iter(livein) {
if lr.extend_in_ebb(pred, branch, &func.layout, forest) {
// This predecessor EBB also became live-in. We need to process it later.
worklist.push(pred);
}
}
}
}
/// Liveness analysis for a function.
///
/// Compute a live range for every SSA value used in the function.
pub struct Liveness {
/// The live ranges that have been computed so far.
ranges: LiveRangeSet,
/// Memory pool for the live ranges.
forest: LiveRangeForest,
/// Working space for the `extend_to_use` algorithm.
/// This vector is always empty, except for inside that function.
/// It lives here to avoid repeated allocation of scratch memory.
worklist: Vec<Ebb>,
}
impl Liveness {
/// Create a new empty liveness analysis.
///
/// The memory allocated for this analysis can be reused for multiple functions. Use the
/// `compute` method to actually runs the analysis for a function.
pub fn new() -> Self {
Self {
ranges: LiveRangeSet::new(),
forest: LiveRangeForest::new(),
worklist: Vec::new(),
}
}
/// Get a context needed for working with a `LiveRange`.
pub fn context<'a>(&'a self, layout: &'a Layout) -> LiveRangeContext<'a, Layout> {
LiveRangeContext::new(layout, &self.forest)
}
/// Clear all data structures in this liveness analysis.
pub fn clear(&mut self) {
self.ranges.clear();
self.forest.clear();
self.worklist.clear();
}
/// Get the live range for `value`, if it exists.
pub fn get(&self, value: Value) -> Option<&LiveRange> {
self.ranges.get(value)
}
/// Create a new live range for `value`.
///
/// The new live range will be defined at `def` with no extent, like a dead value.
///
/// This asserts that `value` does not have an existing live range.
pub fn create_dead<PP>(&mut self, value: Value, def: PP, affinity: Affinity)
where
PP: Into<ProgramPoint>,
{
let old = self.ranges.insert(
LiveRange::new(value, def.into(), affinity),
);
debug_assert!(old.is_none(), "{} already has a live range", value);
}
/// Move the definition of `value` to `def`.
///
/// The old and new def points must be in the same EBB, and before the end of the live range.
pub fn move_def_locally<PP>(&mut self, value: Value, def: PP)
where
PP: Into<ProgramPoint>,
{
let lr = self.ranges.get_mut(value).expect("Value has no live range");
lr.move_def_locally(def.into());
}
/// Locally extend the live range for `value` to reach `user`.
///
/// It is assumed the `value` is already live before `user` in `ebb`.
///
/// Returns a mutable reference to the value's affinity in case that also needs to be updated.
pub fn extend_locally(
&mut self,
value: Value,
ebb: Ebb,
user: Inst,
layout: &Layout,
) -> &mut Affinity {
debug_assert_eq!(Some(ebb), layout.inst_ebb(user));
let lr = self.ranges.get_mut(value).expect("Value has no live range");
let livein = lr.extend_in_ebb(ebb, user, layout, &mut self.forest);
debug_assert!(!livein, "{} should already be live in {}", value, ebb);
&mut lr.affinity
}
/// Change the affinity of `value` to `Stack` and return the previous affinity.
pub fn spill(&mut self, value: Value) -> Affinity {
let lr = self.ranges.get_mut(value).expect("Value has no live range");
mem::replace(&mut lr.affinity, Affinity::Stack)
}
/// Compute the live ranges of all SSA values used in `func`.
/// This clears out any existing analysis stored in this data structure.
pub fn compute(&mut self, isa: &TargetIsa, func: &mut Function, cfg: &ControlFlowGraph) {
let _tt = timing::ra_liveness();
self.ranges.clear();
// Get ISA data structures used for computing live range affinities.
let enc_info = isa.encoding_info();
let reg_info = isa.register_info();
// The liveness computation needs to visit all uses, but the order doesn't matter.
// TODO: Perhaps this traversal of the function could be combined with a dead code
// elimination pass if we visit a post-order of the dominator tree?
// TODO: Resolve value aliases while we're visiting instructions?
for ebb in func.layout.ebbs() {
// Make sure we have created live ranges for dead EBB parameters.
// TODO: If these parameters are really dead, we could remove them, except for the
// entry block which must match the function signature.
for &arg in func.dfg.ebb_params(ebb) {
get_or_create(&mut self.ranges, arg, isa, func, &enc_info);
}
for inst in func.layout.ebb_insts(ebb) {
// Eliminate all value aliases, they would confuse the register allocator.
func.dfg.resolve_aliases_in_arguments(inst);
// Make sure we have created live ranges for dead defs.
// TODO: When we implement DCE, we can use the absence of a live range to indicate
// an unused value.
for &def in func.dfg.inst_results(inst) {
get_or_create(&mut self.ranges, def, isa, func, &enc_info);
}
// Iterator of constraints, one per value operand.
let encoding = func.encodings[inst];
let mut operand_constraints = enc_info
.operand_constraints(encoding)
.map(|c| c.ins)
.unwrap_or(&[])
.iter();
for &arg in func.dfg.inst_args(inst) {
// Get the live range, create it as a dead range if necessary.
let lr = get_or_create(&mut self.ranges, arg, isa, func, &enc_info);
// Extend the live range to reach this use.
extend_to_use(
lr,
ebb,
inst,
&mut self.worklist,
func,
cfg,
&mut self.forest,
);
// Apply operand constraint, ignoring any variable arguments after the fixed
// operands described by `operand_constraints`. Variable arguments are either
// EBB arguments or call/return ABI arguments.
if let Some(constraint) = operand_constraints.next() {
lr.affinity.merge(constraint, &reg_info);
}
}
}
}
}
}
impl Index<Value> for Liveness {
type Output = LiveRange;
fn index(&self, index: Value) -> &LiveRange {
match self.ranges.get(index) {
Some(lr) => lr,
None => panic!("{} has no live range", index),
}
}
}

View File

@@ -0,0 +1,748 @@
//! Data structure representing the live range of an SSA value.
//!
//! Live ranges are tracked per SSA value, not per variable or virtual register. The live range of
//! an SSA value begins where it is defined and extends to all program points where the value is
//! still needed.
//!
//! # Local Live Ranges
//!
//! Inside a single extended basic block, the live range of a value is always an interval between
//! two program points (if the value is live in the EBB at all). The starting point is either:
//!
//! 1. The instruction that defines the value, or
//! 2. The EBB header, because the value is an argument to the EBB, or
//! 3. The EBB header, because the value is defined in another EBB and live-in to this one.
//!
//! The ending point of the local live range is the last of the following program points in the
//! EBB:
//!
//! 1. The last use in the EBB, where a *use* is an instruction that has the value as an argument.
//! 2. The last branch or jump instruction in the EBB that can reach a use.
//! 3. If the value has no uses anywhere (a *dead value*), the program point that defines it.
//!
//! Note that 2. includes loop back-edges to the same EBB. In general, if a value is defined
//! outside a loop and used inside the loop, it will be live in the entire loop.
//!
//! # Global Live Ranges
//!
//! Values that appear in more than one EBB have a *global live range* which can be seen as the
//! disjoint union of the per-EBB local intervals for all of the EBBs where the value is live.
//! Together with a `ProgramOrder` which provides a linear ordering of the EBBs, the global live
//! range becomes a linear sequence of disjoint intervals, at most one per EBB.
//!
//! In the special case of a dead value, the global live range is a single interval where the start
//! and end points are the same. The global live range of a value is never completely empty.
//!
//! # Register interference
//!
//! The register allocator uses live ranges to determine if values *interfere*, which means that
//! they can't be stored in the same register. Two live ranges interfere if and only if any of
//! their intervals overlap.
//!
//! If one live range ends at an instruction that defines another live range, those two live ranges
//! are not considered to interfere. This is because most ISAs allow instructions to reuse an input
//! register for an output value. If Cretonne gets support for inline assembly, we will need to
//! handle *early clobbers* which are output registers that are not allowed to alias any input
//! registers.
//!
//! If `i1 < i2 < i3` are program points, we have:
//!
//! - `i1-i2` and `i1-i3` interfere because the intervals overlap.
//! - `i1-i2` and `i2-i3` don't interfere.
//! - `i1-i3` and `i2-i2` do interfere because the dead def would clobber the register.
//! - `i1-i2` and `i2-i2` don't interfere.
//! - `i2-i3` and `i2-i2` do interfere.
//!
//! Because of this behavior around interval end points, live range interference is not completely
//! equivalent to mathematical intersection of open or half-open intervals.
//!
//! # Implementation notes
//!
//! A few notes about the implementation of this data structure. This should not concern someone
//! only looking to use the public interface.
//!
//! ## EBB ordering
//!
//! The relative order of EBBs is used to maintain a sorted list of live-in intervals and to
//! coalesce adjacent live-in intervals when the prior interval covers the whole EBB. This doesn't
//! depend on any property of the program order, so alternative orderings are possible:
//!
//! 1. The EBB layout order. This is what we currently use.
//! 2. A topological order of the dominator tree. All the live-in intervals would come after the
//! def interval.
//! 3. A numerical order by EBB number. Performant because it doesn't need to indirect through the
//! `ProgramOrder` for comparisons.
//!
//! These orderings will cause small differences in coalescing opportunities, but all of them would
//! do a decent job of compressing a long live range. The numerical order might be preferable
//! because:
//!
//! - It has better performance because EBB numbers can be compared directly without any table
//! lookups.
//! - If EBB numbers are not reused, it is safe to allocate new EBBs without getting spurious
//! live-in intervals from any coalesced representations that happen to cross a new EBB.
//!
//! For comparing instructions, the layout order is always what we want.
//!
//! ## Alternative representation
//!
//! Since a local live-in interval always begins at its EBB header, it is uniquely described by its
//! end point instruction alone. We can use the layout to look up the EBB containing the end point.
//! This means that a sorted `Vec<Inst>` would be enough to represent the set of live-in intervals.
//!
//! Coalescing is an important compression technique because some live ranges can span thousands of
//! EBBs. We can represent that by switching to a sorted `Vec<ProgramPoint>` representation where
//! an `[Ebb, Inst]` pair represents a coalesced range, while an `Inst` entry without a preceding
//! `Ebb` entry represents a single live-in interval.
//!
//! This representation is more compact for a live range with many uncoalesced live-in intervals.
//! It is more complicated to work with, though, so it is probably not worth it. The performance
//! benefits of switching to a numerical EBB order only appears if the binary search is doing
//! EBB-EBB comparisons.
//!
//! ## B-tree representation
//!
//! A `BTreeMap<Ebb, Inst>` could also be used for the live-in intervals. It looks like the
//! standard library B-tree doesn't provide the necessary interface for an efficient implementation
//! of coalescing, so we would need to roll our own.
//!
use bforest;
use entity::SparseMapValue;
use ir::{Ebb, ExpandedProgramPoint, Inst, Layout, ProgramOrder, ProgramPoint, Value};
use regalloc::affinity::Affinity;
use std::cmp::Ordering;
/// Global live range of a single SSA value.
///
/// As [explained in the module documentation](index.html#local-live-ranges), the live range of an
/// SSA value is the disjoint union of a set of intervals, each local to a single EBB, and with at
/// most one interval per EBB. We further distinguish between:
///
/// 1. The *def interval* is the local interval in the EBB where the value is defined, and
/// 2. The *live-in intervals* are the local intervals in the remaining EBBs.
///
/// A live-in interval always begins at the EBB header, while the def interval can begin at the
/// defining instruction, or at the EBB header for an EBB argument value.
///
/// All values have a def interval, but a large proportion of values don't have any live-in
/// intervals. These are called *local live ranges*.
///
/// # Program order requirements
///
/// The internal representation of a `LiveRange` depends on a consistent `ProgramOrder` both for
/// ordering instructions inside an EBB *and* for ordering EBBs. The methods that depend on the
/// ordering take an explicit `ProgramOrder` object, and it is the caller's responsibility to
/// ensure that the provided ordering is consistent between calls.
///
/// In particular, changing the order of EBBs or inserting new EBBs will invalidate live ranges.
///
/// Inserting new instructions in the layout is safe, but removing instructions is not. Besides the
/// instructions using or defining their value, `LiveRange` structs can contain references to
/// branch and jump instructions.
pub type LiveRange = GenLiveRange<Layout>;
/// Generic live range implementation.
///
/// The intended generic parameter is `PO=Layout`, but tests are simpler with a mock order.
/// Use `LiveRange` instead of using this generic directly.
pub struct GenLiveRange<PO: ProgramOrder> {
/// The value described by this live range.
/// This member can't be modified in case the live range is stored in a `SparseMap`.
value: Value,
/// The preferred register allocation for this value.
pub affinity: Affinity,
/// The instruction or EBB header where this value is defined.
def_begin: ProgramPoint,
/// The end point of the def interval. This must always belong to the same EBB as `def_begin`.
///
/// We always have `def_begin <= def_end` with equality implying a dead def live range with no
/// uses.
def_end: ProgramPoint,
/// Additional live-in intervals sorted in program order.
///
/// This map is empty for most values which are only used in one EBB.
///
/// A map entry `ebb -> inst` means that the live range is live-in to `ebb`, continuing up to
/// `inst` which may belong to a later EBB in the program order.
///
/// The entries are non-overlapping, and none of them overlap the EBB where the value is
/// defined.
liveins: bforest::Map<Ebb, Inst, PO>,
}
/// Context information needed to query a `LiveRange`.
pub struct LiveRangeContext<'a, PO: 'a + ProgramOrder> {
/// Ordering of EBBs.
pub order: &'a PO,
/// Memory pool.
pub forest: &'a bforest::MapForest<Ebb, Inst, PO>,
}
impl<'a, PO: ProgramOrder> LiveRangeContext<'a, PO> {
/// Make a new context.
pub fn new(
order: &'a PO,
forest: &'a bforest::MapForest<Ebb, Inst, PO>,
) -> LiveRangeContext<'a, PO> {
LiveRangeContext { order, forest }
}
}
impl<'a, PO: ProgramOrder> Clone for LiveRangeContext<'a, PO> {
fn clone(&self) -> Self {
LiveRangeContext {
order: self.order,
forest: self.forest,
}
}
}
impl<'a, PO: ProgramOrder> Copy for LiveRangeContext<'a, PO> {}
/// Forest of B-trees used for storing live ranges.
pub type LiveRangeForest = bforest::MapForest<Ebb, Inst, Layout>;
impl<PO: ProgramOrder> bforest::Comparator<Ebb> for PO {
fn cmp(&self, a: Ebb, b: Ebb) -> Ordering {
self.cmp(a, b)
}
}
impl<PO: ProgramOrder> GenLiveRange<PO> {
/// Create a new live range for `value` defined at `def`.
///
/// The live range will be created as dead, but it can be extended with `extend_in_ebb()`.
pub fn new(value: Value, def: ProgramPoint, affinity: Affinity) -> GenLiveRange<PO> {
GenLiveRange {
value,
affinity,
def_begin: def,
def_end: def,
liveins: bforest::Map::new(),
}
}
/// Extend the local interval for `ebb` so it reaches `to` which must belong to `ebb`.
/// Create a live-in interval if necessary.
///
/// If the live range already has a local interval in `ebb`, extend its end point so it
/// includes `to`, and return false.
///
/// If the live range did not previously have a local interval in `ebb`, add one so the value
/// is live-in to `ebb`, extending to `to`. Return true.
///
/// The return value can be used to detect if we just learned that the value is live-in to
/// `ebb`. This can trigger recursive extensions in `ebb`'s CFG predecessor blocks.
pub fn extend_in_ebb(
&mut self,
ebb: Ebb,
to: Inst,
order: &PO,
forest: &mut bforest::MapForest<Ebb, Inst, PO>,
) -> bool {
// First check if we're extending the def interval.
//
// We're assuming here that `to` never precedes `def_begin` in the same EBB, but we can't
// check it without a method for getting `to`'s EBB.
if order.cmp(ebb, self.def_end) != Ordering::Greater &&
order.cmp(to, self.def_begin) != Ordering::Less
{
let to_pp = to.into();
debug_assert_ne!(
to_pp,
self.def_begin,
"Can't use value in the defining instruction."
);
if order.cmp(to, self.def_end) == Ordering::Greater {
self.def_end = to_pp;
}
return false;
}
// Now check if we're extending any of the existing live-in intervals.
let mut c = self.liveins.cursor(forest, order);
let first_time_livein;
if let Some(end) = c.goto(ebb) {
// There's an interval beginning at `ebb`. See if it extends.
first_time_livein = false;
if order.cmp(end, to) == Ordering::Less {
*c.value_mut().unwrap() = to;
} else {
return first_time_livein;
}
} else if let Some((_, end)) = c.prev() {
// There's no interval beginning at `ebb`, but we could still be live-in at `ebb` with
// a coalesced interval that begins before and ends after.
if order.cmp(end, ebb) == Ordering::Greater {
// Yep, the previous interval overlaps `ebb`.
first_time_livein = false;
if order.cmp(end, to) == Ordering::Less {
*c.value_mut().unwrap() = to;
} else {
return first_time_livein;
}
} else {
first_time_livein = true;
// The current interval does not overlap `ebb`, but it may still be possible to
// coalesce with it.
if order.is_ebb_gap(end, ebb) {
*c.value_mut().unwrap() = to;
} else {
c.insert(ebb, to);
}
}
} else {
// There is no existing interval before `ebb`.
first_time_livein = true;
c.insert(ebb, to);
}
// Now `c` to left pointing at an interval that ends in `to`.
debug_assert_eq!(c.value(), Some(to));
// See if it can be coalesced with the following interval.
if let Some((next_ebb, next_end)) = c.next() {
if order.is_ebb_gap(to, next_ebb) {
// Remove this interval and extend the previous end point to `next_end`.
c.remove();
c.prev();
*c.value_mut().unwrap() = next_end;
}
}
first_time_livein
}
/// Is this the live range of a dead value?
///
/// A dead value has no uses, and its live range ends at the same program point where it is
/// defined.
pub fn is_dead(&self) -> bool {
self.def_begin == self.def_end
}
/// Is this a local live range?
///
/// A local live range is only used in the same EBB where it was defined. It is allowed to span
/// multiple basic blocks within that EBB.
pub fn is_local(&self) -> bool {
self.liveins.is_empty()
}
/// Get the program point where this live range is defined.
///
/// This will be an EBB header when the value is an EBB argument, otherwise it is the defining
/// instruction.
pub fn def(&self) -> ProgramPoint {
self.def_begin
}
/// Move the definition of this value to a new program point.
///
/// It is only valid to move the definition within the same EBB, and it can't be moved beyond
/// `def_local_end()`.
pub fn move_def_locally(&mut self, def: ProgramPoint) {
self.def_begin = def;
}
/// Get the local end-point of this live range in the EBB where it is defined.
///
/// This can be the EBB header itself in the case of a dead EBB argument.
/// Otherwise, it will be the last local use or branch/jump that can reach a use.
pub fn def_local_end(&self) -> ProgramPoint {
self.def_end
}
/// Get the local end-point of this live range in an EBB where it is live-in.
///
/// If this live range is not live-in to `ebb`, return `None`. Otherwise, return the end-point
/// of this live range's local interval in `ebb`.
///
/// If the live range is live through all of `ebb`, the terminator of `ebb` is a correct
/// answer, but it is also possible that an even later program point is returned. So don't
/// depend on the returned `Inst` to belong to `ebb`.
pub fn livein_local_end(&self, ebb: Ebb, ctx: LiveRangeContext<PO>) -> Option<Inst> {
self.liveins
.get_or_less(ebb, ctx.forest, ctx.order)
.and_then(|(_, inst)| {
// We have an entry that ends at `inst`.
if ctx.order.cmp(inst, ebb) == Ordering::Greater {
Some(inst)
} else {
None
}
})
}
/// Is this value live-in to `ebb`?
///
/// An EBB argument is not considered to be live in.
pub fn is_livein(&self, ebb: Ebb, ctx: LiveRangeContext<PO>) -> bool {
self.livein_local_end(ebb, ctx).is_some()
}
/// Get all the live-in intervals.
///
/// Note that the intervals are stored in a compressed form so each entry may span multiple
/// EBBs where the value is live in.
pub fn liveins<'a>(
&'a self,
ctx: LiveRangeContext<'a, PO>,
) -> bforest::MapIter<'a, Ebb, Inst, PO> {
self.liveins.iter(ctx.forest)
}
/// Check if this live range overlaps a definition in `ebb`.
pub fn overlaps_def(
&self,
def: ExpandedProgramPoint,
ebb: Ebb,
ctx: LiveRangeContext<PO>,
) -> bool {
// Two defs at the same program point always overlap, even if one is dead.
if def == self.def_begin.into() {
return true;
}
// Check for an overlap with the local range.
if ctx.order.cmp(def, self.def_begin) != Ordering::Less &&
ctx.order.cmp(def, self.def_end) == Ordering::Less
{
return true;
}
// Check for an overlap with a live-in range.
match self.livein_local_end(ebb, ctx) {
Some(inst) => ctx.order.cmp(def, inst) == Ordering::Less,
None => false,
}
}
/// Check if this live range reaches a use at `user` in `ebb`.
pub fn reaches_use(&self, user: Inst, ebb: Ebb, ctx: LiveRangeContext<PO>) -> bool {
// Check for an overlap with the local range.
if ctx.order.cmp(user, self.def_begin) == Ordering::Greater &&
ctx.order.cmp(user, self.def_end) != Ordering::Greater
{
return true;
}
// Check for an overlap with a live-in range.
match self.livein_local_end(ebb, ctx) {
Some(inst) => ctx.order.cmp(user, inst) != Ordering::Greater,
None => false,
}
}
/// Check if this live range is killed at `user` in `ebb`.
pub fn killed_at(&self, user: Inst, ebb: Ebb, ctx: LiveRangeContext<PO>) -> bool {
self.def_local_end() == user.into() || self.livein_local_end(ebb, ctx) == Some(user)
}
}
/// Allow a `LiveRange` to be stored in a `SparseMap` indexed by values.
impl<PO: ProgramOrder> SparseMapValue<Value> for GenLiveRange<PO> {
fn key(&self) -> Value {
self.value
}
}
#[cfg(test)]
mod tests {
use super::{GenLiveRange, LiveRangeContext};
use bforest;
use entity::EntityRef;
use ir::{Ebb, Inst, Value};
use ir::{ExpandedProgramPoint, ProgramOrder};
use std::cmp::Ordering;
use std::vec::Vec;
// Dummy program order which simply compares indexes.
// It is assumed that EBBs have indexes that are multiples of 10, and instructions have indexes
// in between. `is_ebb_gap` assumes that terminator instructions have indexes of the form
// ebb * 10 + 1. This is used in the coalesce test.
struct ProgOrder {}
impl ProgramOrder for ProgOrder {
fn cmp<A, B>(&self, a: A, b: B) -> Ordering
where
A: Into<ExpandedProgramPoint>,
B: Into<ExpandedProgramPoint>,
{
fn idx(pp: ExpandedProgramPoint) -> usize {
match pp {
ExpandedProgramPoint::Inst(i) => i.index(),
ExpandedProgramPoint::Ebb(e) => e.index(),
}
}
let ia = idx(a.into());
let ib = idx(b.into());
ia.cmp(&ib)
}
fn is_ebb_gap(&self, inst: Inst, ebb: Ebb) -> bool {
inst.index() % 10 == 1 && ebb.index() / 10 == inst.index() / 10 + 1
}
}
impl ProgOrder {
// Get the EBB corresponding to `inst`.
fn inst_ebb(&self, inst: Inst) -> Ebb {
let i = inst.index();
Ebb::new(i - i % 10)
}
// Get the EBB of a program point.
fn pp_ebb<PP: Into<ExpandedProgramPoint>>(&self, pp: PP) -> Ebb {
match pp.into() {
ExpandedProgramPoint::Inst(i) => self.inst_ebb(i),
ExpandedProgramPoint::Ebb(e) => e,
}
}
// Validate the live range invariants.
fn validate(
&self,
lr: &GenLiveRange<ProgOrder>,
forest: &bforest::MapForest<Ebb, Inst, ProgOrder>,
) {
// The def interval must cover a single EBB.
let def_ebb = self.pp_ebb(lr.def_begin);
assert_eq!(def_ebb, self.pp_ebb(lr.def_end));
// Check that the def interval isn't backwards.
match self.cmp(lr.def_begin, lr.def_end) {
Ordering::Equal => assert!(lr.liveins.is_empty()),
Ordering::Greater => {
panic!("Backwards def interval: {}-{}", lr.def_begin, lr.def_end)
}
Ordering::Less => {}
}
// Check the live-in intervals.
let mut prev_end = None;
for (begin, end) in lr.liveins.iter(forest) {
assert_eq!(self.cmp(begin, end), Ordering::Less);
if let Some(e) = prev_end {
assert_eq!(self.cmp(e, begin), Ordering::Less);
}
assert!(
self.cmp(lr.def_end, begin) == Ordering::Less ||
self.cmp(lr.def_begin, end) == Ordering::Greater,
"Interval can't overlap the def EBB"
);
// Save for next round.
prev_end = Some(end);
}
}
}
// Singleton `ProgramOrder` for tests below.
const PO: &'static ProgOrder = &ProgOrder {};
#[test]
fn dead_def_range() {
let v0 = Value::new(0);
let e0 = Ebb::new(0);
let i1 = Inst::new(1);
let i2 = Inst::new(2);
let e2 = Ebb::new(2);
let lr = GenLiveRange::new(v0, i1.into(), Default::default());
let forest = &bforest::MapForest::new();
let ctx = LiveRangeContext::new(PO, forest);
assert!(lr.is_dead());
assert!(lr.is_local());
assert_eq!(lr.def(), i1.into());
assert_eq!(lr.def_local_end(), i1.into());
assert_eq!(lr.livein_local_end(e2, ctx), None);
PO.validate(&lr, ctx.forest);
// A dead live range overlaps its own def program point.
assert!(lr.overlaps_def(i1.into(), e0, ctx));
assert!(!lr.overlaps_def(i2.into(), e0, ctx));
assert!(!lr.overlaps_def(e0.into(), e0, ctx));
}
#[test]
fn dead_arg_range() {
let v0 = Value::new(0);
let e2 = Ebb::new(2);
let lr = GenLiveRange::new(v0, e2.into(), Default::default());
let forest = &bforest::MapForest::new();
let ctx = LiveRangeContext::new(PO, forest);
assert!(lr.is_dead());
assert!(lr.is_local());
assert_eq!(lr.def(), e2.into());
assert_eq!(lr.def_local_end(), e2.into());
// The def interval of an EBB argument does not count as live-in.
assert_eq!(lr.livein_local_end(e2, ctx), None);
PO.validate(&lr, ctx.forest);
}
#[test]
fn local_def() {
let v0 = Value::new(0);
let e10 = Ebb::new(10);
let i11 = Inst::new(11);
let i12 = Inst::new(12);
let i13 = Inst::new(13);
let mut lr = GenLiveRange::new(v0, i11.into(), Default::default());
let forest = &mut bforest::MapForest::new();
assert_eq!(lr.extend_in_ebb(e10, i13, PO, forest), false);
PO.validate(&lr, forest);
assert!(!lr.is_dead());
assert!(lr.is_local());
assert_eq!(lr.def(), i11.into());
assert_eq!(lr.def_local_end(), i13.into());
// Extending to an already covered inst should not change anything.
assert_eq!(lr.extend_in_ebb(e10, i12, PO, forest), false);
PO.validate(&lr, forest);
assert_eq!(lr.def(), i11.into());
assert_eq!(lr.def_local_end(), i13.into());
}
#[test]
fn local_arg() {
let v0 = Value::new(0);
let e10 = Ebb::new(10);
let i11 = Inst::new(11);
let i12 = Inst::new(12);
let i13 = Inst::new(13);
let mut lr = GenLiveRange::new(v0, e10.into(), Default::default());
let forest = &mut bforest::MapForest::new();
// Extending a dead EBB argument in its own block should not indicate that a live-in
// interval was created.
assert_eq!(lr.extend_in_ebb(e10, i12, PO, forest), false);
PO.validate(&lr, forest);
assert!(!lr.is_dead());
assert!(lr.is_local());
assert_eq!(lr.def(), e10.into());
assert_eq!(lr.def_local_end(), i12.into());
// Extending to an already covered inst should not change anything.
assert_eq!(lr.extend_in_ebb(e10, i11, PO, forest), false);
PO.validate(&lr, forest);
assert_eq!(lr.def(), e10.into());
assert_eq!(lr.def_local_end(), i12.into());
// Extending further.
assert_eq!(lr.extend_in_ebb(e10, i13, PO, forest), false);
PO.validate(&lr, forest);
assert_eq!(lr.def(), e10.into());
assert_eq!(lr.def_local_end(), i13.into());
}
#[test]
fn global_def() {
let v0 = Value::new(0);
let e10 = Ebb::new(10);
let i11 = Inst::new(11);
let i12 = Inst::new(12);
let e20 = Ebb::new(20);
let i21 = Inst::new(21);
let i22 = Inst::new(22);
let i23 = Inst::new(23);
let mut lr = GenLiveRange::new(v0, i11.into(), Default::default());
let forest = &mut bforest::MapForest::new();
assert_eq!(lr.extend_in_ebb(e10, i12, PO, forest), false);
// Adding a live-in interval.
assert_eq!(lr.extend_in_ebb(e20, i22, PO, forest), true);
PO.validate(&lr, forest);
assert_eq!(
lr.livein_local_end(e20, LiveRangeContext::new(PO, forest)),
Some(i22)
);
// Non-extending the live-in.
assert_eq!(lr.extend_in_ebb(e20, i21, PO, forest), false);
assert_eq!(
lr.livein_local_end(e20, LiveRangeContext::new(PO, forest)),
Some(i22)
);
// Extending the existing live-in.
assert_eq!(lr.extend_in_ebb(e20, i23, PO, forest), false);
PO.validate(&lr, forest);
assert_eq!(
lr.livein_local_end(e20, LiveRangeContext::new(PO, forest)),
Some(i23)
);
}
#[test]
fn coalesce() {
let v0 = Value::new(0);
let i11 = Inst::new(11);
let e20 = Ebb::new(20);
let i21 = Inst::new(21);
let e30 = Ebb::new(30);
let i31 = Inst::new(31);
let e40 = Ebb::new(40);
let i41 = Inst::new(41);
let mut lr = GenLiveRange::new(v0, i11.into(), Default::default());
let forest = &mut bforest::MapForest::new();
assert_eq!(lr.extend_in_ebb(e30, i31, PO, forest), true);
assert_eq!(
lr.liveins(LiveRangeContext::new(PO, forest))
.collect::<Vec<_>>(),
[(e30, i31)]
);
// Coalesce to previous
assert_eq!(lr.extend_in_ebb(e40, i41, PO, forest), true);
assert_eq!(
lr.liveins(LiveRangeContext::new(PO, forest))
.collect::<Vec<_>>(),
[(e30, i41)]
);
// Coalesce to next
assert_eq!(lr.extend_in_ebb(e20, i21, PO, forest), true);
assert_eq!(
lr.liveins(LiveRangeContext::new(PO, forest))
.collect::<Vec<_>>(),
[(e20, i41)]
);
let mut lr = GenLiveRange::new(v0, i11.into(), Default::default());
assert_eq!(lr.extend_in_ebb(e40, i41, PO, forest), true);
assert_eq!(
lr.liveins(LiveRangeContext::new(PO, forest))
.collect::<Vec<_>>(),
[(e40, i41)]
);
assert_eq!(lr.extend_in_ebb(e20, i21, PO, forest), true);
assert_eq!(
lr.liveins(LiveRangeContext::new(PO, forest))
.collect::<Vec<_>>(),
[(e20, i21), (e40, i41)]
);
// Coalesce to previous and next
assert_eq!(lr.extend_in_ebb(e30, i31, PO, forest), true);
assert_eq!(
lr.liveins(LiveRangeContext::new(PO, forest))
.collect::<Vec<_>>(),
[(e20, i41)]
);
}
// TODO: Add more tests that exercise the binary search algorithm.
}

View File

@@ -0,0 +1,23 @@
//! Register allocation.
//!
//! This module contains data structures and algorithms used for register allocation.
pub mod register_set;
pub mod coloring;
pub mod live_value_tracker;
pub mod liveness;
pub mod liverange;
pub mod virtregs;
mod affinity;
mod coalescing;
mod context;
mod diversion;
mod pressure;
mod reload;
mod solver;
mod spilling;
pub use self::register_set::RegisterSet;
pub use self::context::Context;
pub use self::diversion::RegDiversions;

View File

@@ -0,0 +1,377 @@
//! Register pressure tracking.
//!
//! SSA-based register allocation depends on a spilling phase that "lowers register pressure
//! sufficiently". This module defines the data structures needed to measure register pressure
//! accurately enough to guarantee that the coloring phase will not run out of registers.
//!
//! Ideally, measuring register pressure amounts to simply counting the number of live registers at
//! any given program point. This simplistic method has two problems:
//!
//! 1. Registers are not interchangeable. Most ISAs have separate integer and floating-point
//! register banks, so we need to at least count the number of live registers in each register
//! bank separately.
//!
//! 2. Some ISAs have complicated register aliasing properties. In particular, the 32-bit ARM
//! ISA has a floating-point register bank where two 32-bit registers alias one 64-bit register.
//! This makes it difficult to accurately measure register pressure.
//!
//! This module deals with the problems via *register banks* and *top-level register classes*.
//! Register classes in different register banks are completely independent, so we can count
//! registers in one bank without worrying about the other bank at all.
//!
//! All register classes have a unique top-level register class, and we will count registers for
//! each top-level register class individually. However, a register bank can have multiple
//! top-level register classes that interfere with each other, so all top-level counts need to
//! be considered when determining how many more registers can be allocated.
//!
//! Currently, the only register bank with multiple top-level registers is the `arm32`
//! floating-point register bank which has `S`, `D`, and `Q` top-level classes.
//!
//! # Base and transient counts
//!
//! We maintain two separate register counts per top-level register class: base counts and
//! transient counts. The base counts are adjusted with the `take` and `free` functions. The
//! transient counts are adjusted with `take_transient` and `free_transient`.
// Remove once we're using the pressure tracker.
#![allow(dead_code)]
use isa::registers::{RegClass, RegClassMask, RegInfo, MAX_TRACKED_TOPRCS};
use regalloc::RegisterSet;
use std::cmp::min;
use std::fmt;
use std::iter::ExactSizeIterator;
/// Information per top-level register class.
///
/// Everything but the counts is static information computed from the constructor arguments.
#[derive(Default)]
struct TopRC {
// Number of registers currently used from this register class.
base_count: u32,
transient_count: u32,
// Max number of registers that can be allocated.
limit: u32,
// Register units per register.
width: u8,
// The first aliasing top-level RC.
first_toprc: u8,
// The number of aliasing top-level RCs.
num_toprcs: u8,
}
impl TopRC {
fn total_count(&self) -> u32 {
self.base_count + self.transient_count
}
}
pub struct Pressure {
// Bit mask of top-level register classes that are aliased by other top-level register classes.
// Unaliased register classes can use a simpler interference algorithm.
aliased: RegClassMask,
// Current register counts per top-level register class.
toprc: [TopRC; MAX_TRACKED_TOPRCS],
}
impl Pressure {
/// Create a new register pressure tracker.
pub fn new(reginfo: &RegInfo, usable: &RegisterSet) -> Pressure {
let mut p = Pressure {
aliased: 0,
toprc: Default::default(),
};
// Get the layout of aliasing top-level register classes from the register banks.
for bank in reginfo.banks.iter() {
let first = bank.first_toprc;
let num = bank.num_toprcs;
if bank.pressure_tracking {
for rc in &mut p.toprc[first..first + num] {
rc.first_toprc = first as u8;
rc.num_toprcs = num as u8;
}
// Flag the top-level register classes with aliases.
if num > 1 {
p.aliased |= ((1 << num) - 1) << first;
}
} else {
// This bank has no pressure tracking, so its top-level register classes may exceed
// `MAX_TRACKED_TOPRCS`. Fill in dummy entries.
for rc in &mut p.toprc[first..min(first + num, MAX_TRACKED_TOPRCS)] {
// These aren't used if we don't set the `aliased` bit.
rc.first_toprc = !0;
rc.limit = !0;
}
}
}
// Compute per-class limits from `usable`.
for (toprc, rc) in p.toprc.iter_mut().take_while(|t| t.num_toprcs > 0).zip(
reginfo.classes,
)
{
toprc.limit = usable.iter(rc).len() as u32;
toprc.width = rc.width;
}
p
}
/// Check for an available register in the register class `rc`.
///
/// If it is possible to allocate one more register from `rc`'s top-level register class,
/// returns 0.
///
/// If not, returns a bit-mask of top-level register classes that are interfering. Register
/// pressure should be eased in one of the returned top-level register classes before calling
/// `can_take()` to check again.
fn check_avail(&self, rc: RegClass) -> RegClassMask {
let entry = match self.toprc.get(rc.toprc as usize) {
None => return 0, // Not a pressure tracked bank.
Some(e) => e,
};
let mask = 1 << rc.toprc;
if (self.aliased & mask) == 0 {
// This is a simple unaliased top-level register class.
if entry.total_count() < entry.limit {
0
} else {
mask
}
} else {
// This is the more complicated case. The top-level register class has aliases.
self.check_avail_aliased(entry)
}
}
/// Check for an available register in a top-level register class that may have aliases.
///
/// This is the out-of-line slow path for `check_avail()`.
fn check_avail_aliased(&self, entry: &TopRC) -> RegClassMask {
let first = usize::from(entry.first_toprc);
let num = usize::from(entry.num_toprcs);
let width = u32::from(entry.width);
let ulimit = entry.limit * width;
// Count up the number of available register units.
let mut units = 0;
for (rc, rci) in self.toprc[first..first + num].iter().zip(first..) {
let rcw = u32::from(rc.width);
// If `rc.width` is smaller than `width`, each register in `rc` could potentially block
// one of ours. This is assuming that none of the smaller registers are straddling the
// bigger ones.
//
// If `rc.width` is larger than `width`, we are also assuming that the registers are
// aligned and `rc.width` is a multiple of `width`.
let u = if rcw < width {
// We can't take more than the total number of register units in the class.
// This matters for arm32 S-registers which can only ever lock out 16 D-registers.
min(rc.total_count() * width, rc.limit * rcw)
} else {
rc.total_count() * rcw
};
// If this top-level RC on its own is responsible for exceeding our limit, return it
// early to guarantee that registers here are spilled before spilling other registers
// unnecessarily.
if u >= ulimit {
return 1 << rci;
}
units += u;
}
// We've counted up the worst-case number of register units claimed by all aliasing
// classes. Compare to the unit limit in this class.
if units < ulimit {
0
} else {
// Registers need to be spilled from any one of the aliasing classes.
((1 << num) - 1) << first
}
}
/// Take a register from `rc`.
///
/// This does not check if there are enough registers available.
pub fn take(&mut self, rc: RegClass) {
self.toprc.get_mut(rc.toprc as usize).map(
|t| t.base_count += 1,
);
}
/// Free a register in `rc`.
pub fn free(&mut self, rc: RegClass) {
self.toprc.get_mut(rc.toprc as usize).map(
|t| t.base_count -= 1,
);
}
/// Reset all counts to 0, both base and transient.
pub fn reset(&mut self) {
for e in &mut self.toprc {
e.base_count = 0;
e.transient_count = 0;
}
}
/// Try to increment a transient counter.
///
/// This will fail if there are not enough registers available.
pub fn take_transient(&mut self, rc: RegClass) -> Result<(), RegClassMask> {
let mask = self.check_avail(rc);
if mask == 0 {
self.toprc.get_mut(rc.toprc as usize).map(|t| {
t.transient_count += 1
});
Ok(())
} else {
Err(mask)
}
}
/// Reset all transient counts to 0.
pub fn reset_transient(&mut self) {
for e in &mut self.toprc {
e.transient_count = 0;
}
}
/// Preserve the transient counts by transferring them to the base counts.
pub fn preserve_transient(&mut self) {
for e in &mut self.toprc {
e.base_count += e.transient_count;
e.transient_count = 0;
}
}
}
impl fmt::Display for Pressure {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "Pressure[")?;
for rc in &self.toprc {
if rc.limit > 0 && rc.limit < !0 {
write!(f, " {}+{}/{}", rc.base_count, rc.transient_count, rc.limit)?;
}
}
write!(f, " ]")
}
}
#[cfg(test)]
#[cfg(build_arm32)]
mod tests {
use super::Pressure;
use isa::{RegClass, TargetIsa};
use regalloc::RegisterSet;
use std::borrow::Borrow;
use std::boxed::Box;
// Make an arm32 `TargetIsa`, if possible.
fn arm32() -> Option<Box<TargetIsa>> {
use isa;
use settings;
let shared_builder = settings::builder();
let shared_flags = settings::Flags::new(&shared_builder);
isa::lookup("arm32").ok().map(|b| b.finish(shared_flags))
}
// Get a register class by name.
fn rc_by_name(isa: &TargetIsa, name: &str) -> RegClass {
isa.register_info()
.classes
.iter()
.find(|rc| rc.name == name)
.expect("Can't find named register class.")
}
#[test]
fn basic_counting() {
let isa = arm32().expect("This test requires arm32 support");
let isa = isa.borrow();
let gpr = rc_by_name(isa, "GPR");
let s = rc_by_name(isa, "S");
let reginfo = isa.register_info();
let regs = RegisterSet::new();
let mut pressure = Pressure::new(&reginfo, &regs);
let mut count = 0;
while pressure.check_avail(gpr) == 0 {
pressure.take(gpr);
count += 1;
}
assert_eq!(count, 16);
assert_eq!(pressure.check_avail(gpr), 1 << gpr.toprc);
assert_eq!(pressure.check_avail(s), 0);
pressure.free(gpr);
assert_eq!(pressure.check_avail(gpr), 0);
pressure.take(gpr);
assert_eq!(pressure.check_avail(gpr), 1 << gpr.toprc);
assert_eq!(pressure.check_avail(s), 0);
pressure.reset();
assert_eq!(pressure.check_avail(gpr), 0);
assert_eq!(pressure.check_avail(s), 0);
}
#[test]
fn arm_float_bank() {
let isa = arm32().expect("This test requires arm32 support");
let isa = isa.borrow();
let s = rc_by_name(isa, "S");
let d = rc_by_name(isa, "D");
let q = rc_by_name(isa, "Q");
let reginfo = isa.register_info();
let regs = RegisterSet::new();
let mut pressure = Pressure::new(&reginfo, &regs);
assert_eq!(pressure.check_avail(s), 0);
assert_eq!(pressure.check_avail(d), 0);
assert_eq!(pressure.check_avail(q), 0);
// Allocating a single S-register should not affect availability.
pressure.take(s);
assert_eq!(pressure.check_avail(s), 0);
assert_eq!(pressure.check_avail(d), 0);
assert_eq!(pressure.check_avail(q), 0);
pressure.take(d);
assert_eq!(pressure.check_avail(s), 0);
assert_eq!(pressure.check_avail(d), 0);
assert_eq!(pressure.check_avail(q), 0);
pressure.take(q);
assert_eq!(pressure.check_avail(s), 0);
assert_eq!(pressure.check_avail(d), 0);
assert_eq!(pressure.check_avail(q), 0);
// Take a total of 16 S-regs.
for _ in 1..16 {
pressure.take(s);
}
assert_eq!(pressure.check_avail(s), 0);
assert_eq!(pressure.check_avail(d), 0);
assert_eq!(pressure.check_avail(q), 0);
// We've taken 16 S, 1 D, and 1 Q. There should be 6 more Qs.
for _ in 0..6 {
assert_eq!(pressure.check_avail(d), 0);
assert_eq!(pressure.check_avail(q), 0);
pressure.take(q);
}
// We've taken 16 S, 1 D, and 7 Qs.
assert!(pressure.check_avail(s) != 0);
assert_eq!(pressure.check_avail(d), 0);
assert!(pressure.check_avail(q) != 0);
}
}

View File

@@ -0,0 +1,321 @@
//! Set of allocatable registers as a bit vector of register units.
//!
//! While allocating registers, we need to keep track of which registers are available and which
//! registers are in use. Since registers can alias in different ways, we track this via the
//! "register unit" abstraction. Every register contains one or more register units. Registers that
//! share a register unit can't be in use at the same time.
use isa::registers::{RegClass, RegInfo, RegUnit, RegUnitMask};
use std::char;
use std::fmt;
use std::iter::ExactSizeIterator;
use std::mem::size_of_val;
/// Set of registers available for allocation.
#[derive(Clone)]
pub struct RegisterSet {
avail: RegUnitMask,
}
// Given a register class and a register unit in the class, compute a word index and a bit mask of
// register units representing that register.
//
// Note that a register is not allowed to straddle words.
fn bitmask(rc: RegClass, reg: RegUnit) -> (usize, u32) {
// Bit mask representing the register. It is `rc.width` consecutive units.
let width_bits = (1 << rc.width) - 1;
// Index into avail[] of the word containing `reg`.
let word_index = (reg / 32) as usize;
// The actual bits in the word that cover `reg`.
let reg_bits = width_bits << (reg % 32);
(word_index, reg_bits)
}
impl RegisterSet {
/// Create a new register set with all registers available.
///
/// Note that this includes *all* registers. Query the `TargetIsa` object to get a set of
/// allocatable registers where reserved registers have been filtered out.
pub fn new() -> Self {
Self { avail: [!0; 3] }
}
/// Create a new register set with no registers available.
pub fn empty() -> Self {
Self { avail: [0; 3] }
}
/// Returns `true` if the specified register is available.
pub fn is_avail(&self, rc: RegClass, reg: RegUnit) -> bool {
let (idx, bits) = bitmask(rc, reg);
(self.avail[idx] & bits) == bits
}
/// Allocate `reg` from `rc` so it is no longer available.
///
/// It is an error to take a register that doesn't have all of its register units available.
pub fn take(&mut self, rc: RegClass, reg: RegUnit) {
let (idx, bits) = bitmask(rc, reg);
debug_assert!(
(self.avail[idx] & bits) == bits,
"{}:{} not available in {}",
rc,
rc.info.display_regunit(reg),
self.display(rc.info)
);
self.avail[idx] &= !bits;
}
/// Return `reg` and all of its register units to the set of available registers.
pub fn free(&mut self, rc: RegClass, reg: RegUnit) {
let (idx, bits) = bitmask(rc, reg);
debug_assert!(
(self.avail[idx] & bits) == 0,
"{}:{} not allocated in {}",
rc,
rc.info.display_regunit(reg),
self.display(rc.info)
);
self.avail[idx] |= bits;
}
/// Return an iterator over all available registers belonging to the register class `rc`.
///
/// This doesn't allocate anything from the set; use `take()` for that.
pub fn iter(&self, rc: RegClass) -> RegSetIter {
// Start by copying the RC mask. It is a single set bit for each register in the class.
let mut rsi = RegSetIter { regs: rc.mask };
// Mask out the unavailable units.
for idx in 0..self.avail.len() {
// If a single unit in a register is unavailable, the whole register can't be used.
// If a register straddles a word boundary, it will be marked as unavailable.
// There's an assertion in `cdsl/registers.py` to check for that.
for i in 0..rc.width {
rsi.regs[idx] &= self.avail[idx] >> i;
}
}
rsi
}
/// Check if any register units allocated out of this set interferes with units allocated out
/// of `other`.
///
/// This assumes that unused bits are 1.
pub fn interferes_with(&self, other: &RegisterSet) -> bool {
self.avail.iter().zip(&other.avail).any(
|(&x, &y)| (x | y) != !0,
)
}
/// Intersect this set of registers with `other`. This has the effect of removing any register
/// units from this set that are not in `other`.
pub fn intersect(&mut self, other: &RegisterSet) {
for (x, &y) in self.avail.iter_mut().zip(&other.avail) {
*x &= y;
}
}
/// Return an object that can display this register set, using the register info from the
/// target ISA.
pub fn display<'a, R: Into<Option<&'a RegInfo>>>(&self, regs: R) -> DisplayRegisterSet<'a> {
DisplayRegisterSet(self.clone(), regs.into())
}
}
/// Iterator over available registers in a register class.
pub struct RegSetIter {
regs: RegUnitMask,
}
impl Iterator for RegSetIter {
type Item = RegUnit;
fn next(&mut self) -> Option<RegUnit> {
let mut unit_offset = 0;
// Find the first set bit in `self.regs`.
for word in &mut self.regs {
if *word != 0 {
// Compute the register unit number from the lowest set bit in the word.
let unit = unit_offset + word.trailing_zeros() as RegUnit;
// Clear that lowest bit so we won't find it again.
*word &= *word - 1;
return Some(unit);
}
// How many register units was there in the word? This is a constant 32 for `u32` etc.
unit_offset += 8 * size_of_val(word) as RegUnit;
}
// All of `self.regs` is 0.
None
}
fn size_hint(&self) -> (usize, Option<usize>) {
let bits = self.regs.iter().map(|&w| w.count_ones() as usize).sum();
(bits, Some(bits))
}
}
impl ExactSizeIterator for RegSetIter {}
/// Displaying an `RegisterSet` correctly requires the associated `RegInfo` from the target ISA.
pub struct DisplayRegisterSet<'a>(RegisterSet, Option<&'a RegInfo>);
impl<'a> fmt::Display for DisplayRegisterSet<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "[")?;
match self.1 {
None => {
for w in &self.0.avail {
write!(f, " #{:08x}", w)?;
}
}
Some(reginfo) => {
let toprcs = reginfo
.banks
.iter()
.map(|b| b.first_toprc + b.num_toprcs)
.max()
.expect("No register banks");
for rc in &reginfo.classes[0..toprcs] {
if rc.width == 1 {
let bank = &reginfo.banks[rc.bank as usize];
write!(f, " {}: ", rc)?;
for offset in 0..bank.units {
let reg = bank.first_unit + offset;
if !rc.contains(reg) {
continue;
}
if !self.0.is_avail(rc, reg) {
write!(f, "-")?;
continue;
}
// Display individual registers as either the second letter of their
// name or the last digit of their number.
// This works for x86 (rax, rbx, ...) and for numbered regs.
write!(
f,
"{}",
bank.names
.get(offset as usize)
.and_then(|name| name.chars().nth(1))
.unwrap_or_else(
|| char::from_digit(u32::from(offset % 10), 10).unwrap(),
)
)?;
}
}
}
}
}
write!(f, " ]")
}
}
impl fmt::Display for RegisterSet {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
self.display(None).fmt(f)
}
}
#[cfg(test)]
mod tests {
use super::*;
use isa::registers::{RegClass, RegClassData};
use std::vec::Vec;
// Register classes for testing.
const GPR: RegClass = &RegClassData {
name: "GPR",
index: 0,
width: 1,
bank: 0,
toprc: 0,
first: 28,
subclasses: 0,
mask: [0xf0000000, 0x0000000f, 0],
info: &INFO,
};
const DPR: RegClass = &RegClassData {
name: "DPR",
index: 0,
width: 2,
bank: 0,
toprc: 0,
first: 28,
subclasses: 0,
mask: [0x50000000, 0x0000000a, 0],
info: &INFO,
};
const INFO: RegInfo = RegInfo {
banks: &[],
classes: &[],
};
#[test]
fn put_and_take() {
let mut regs = RegisterSet::new();
// `GPR` has units 28-36.
assert_eq!(regs.iter(GPR).len(), 8);
assert_eq!(regs.iter(GPR).count(), 8);
assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [28, 30, 33, 35]);
assert!(regs.is_avail(GPR, 29));
regs.take(&GPR, 29);
assert!(!regs.is_avail(GPR, 29));
assert_eq!(regs.iter(GPR).count(), 7);
assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [30, 33, 35]);
assert!(regs.is_avail(GPR, 30));
regs.take(&GPR, 30);
assert!(!regs.is_avail(GPR, 30));
assert_eq!(regs.iter(GPR).count(), 6);
assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [33, 35]);
assert!(regs.is_avail(GPR, 32));
regs.take(&GPR, 32);
assert!(!regs.is_avail(GPR, 32));
assert_eq!(regs.iter(GPR).count(), 5);
assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [33, 35]);
regs.free(&GPR, 30);
assert!(regs.is_avail(GPR, 30));
assert!(!regs.is_avail(GPR, 29));
assert!(!regs.is_avail(GPR, 32));
assert_eq!(regs.iter(GPR).count(), 6);
assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [30, 33, 35]);
regs.free(&GPR, 32);
assert!(regs.is_avail(GPR, 31));
assert!(!regs.is_avail(GPR, 29));
assert!(regs.is_avail(GPR, 32));
assert_eq!(regs.iter(GPR).count(), 7);
assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [30, 33, 35]);
}
#[test]
fn interference() {
let mut regs1 = RegisterSet::new();
let mut regs2 = RegisterSet::new();
assert!(!regs1.interferes_with(&regs2));
regs1.take(&GPR, 32);
assert!(!regs1.interferes_with(&regs2));
regs2.take(&GPR, 31);
assert!(!regs1.interferes_with(&regs2));
regs1.intersect(&regs2);
assert!(regs1.interferes_with(&regs2));
}
}

View File

@@ -0,0 +1,390 @@
//! Reload pass
//!
//! The reload pass runs between the spilling and coloring passes. Its primary responsibility is to
//! insert `spill` and `fill` instructions such that instruction operands expecting a register will
//! get a value with register affinity, and operands expecting a stack slot will get a value with
//! stack affinity.
//!
//! The secondary responsibility of the reload pass is to reuse values in registers as much as
//! possible to minimize the number of `fill` instructions needed. This must not cause the register
//! pressure limits to be exceeded.
use cursor::{Cursor, EncCursor};
use dominator_tree::DominatorTree;
use entity::{SparseMap, SparseMapValue};
use ir::{AbiParam, ArgumentLoc, InstBuilder};
use ir::{Ebb, Function, Inst, Value};
use isa::RegClass;
use isa::{ConstraintKind, EncInfo, Encoding, RecipeConstraints, TargetIsa};
use regalloc::affinity::Affinity;
use regalloc::live_value_tracker::{LiveValue, LiveValueTracker};
use regalloc::liveness::Liveness;
use std::vec::Vec;
use timing;
use topo_order::TopoOrder;
/// Reusable data structures for the reload pass.
pub struct Reload {
candidates: Vec<ReloadCandidate>,
reloads: SparseMap<Value, ReloadedValue>,
}
/// Context data structure that gets instantiated once per pass.
struct Context<'a> {
cur: EncCursor<'a>,
// Cached ISA information.
// We save it here to avoid frequent virtual function calls on the `TargetIsa` trait object.
encinfo: EncInfo,
// References to contextual data structures we need.
domtree: &'a DominatorTree,
liveness: &'a mut Liveness,
topo: &'a mut TopoOrder,
candidates: &'a mut Vec<ReloadCandidate>,
reloads: &'a mut SparseMap<Value, ReloadedValue>,
}
impl Reload {
/// Create a new blank reload pass.
pub fn new() -> Self {
Self {
candidates: Vec::new(),
reloads: SparseMap::new(),
}
}
/// Clear all data structures in this reload pass.
pub fn clear(&mut self) {
self.candidates.clear();
self.reloads.clear();
}
/// Run the reload algorithm over `func`.
pub fn run(
&mut self,
isa: &TargetIsa,
func: &mut Function,
domtree: &DominatorTree,
liveness: &mut Liveness,
topo: &mut TopoOrder,
tracker: &mut LiveValueTracker,
) {
let _tt = timing::ra_reload();
dbg!("Reload for:\n{}", func.display(isa));
let mut ctx = Context {
cur: EncCursor::new(func, isa),
encinfo: isa.encoding_info(),
domtree,
liveness,
topo,
candidates: &mut self.candidates,
reloads: &mut self.reloads,
};
ctx.run(tracker)
}
}
/// A reload candidate.
///
/// This represents a stack value that is used by the current instruction where a register is
/// needed.
struct ReloadCandidate {
argidx: usize,
value: Value,
regclass: RegClass,
}
/// A Reloaded value.
///
/// This represents a value that has been reloaded into a register value from the stack.
struct ReloadedValue {
stack: Value,
reg: Value,
}
impl SparseMapValue<Value> for ReloadedValue {
fn key(&self) -> Value {
self.stack
}
}
impl<'a> Context<'a> {
fn run(&mut self, tracker: &mut LiveValueTracker) {
self.topo.reset(self.cur.func.layout.ebbs());
while let Some(ebb) = self.topo.next(&self.cur.func.layout, self.domtree) {
self.visit_ebb(ebb, tracker);
}
}
fn visit_ebb(&mut self, ebb: Ebb, tracker: &mut LiveValueTracker) {
dbg!("Reloading {}:", ebb);
self.visit_ebb_header(ebb, tracker);
tracker.drop_dead_params();
// visit_ebb_header() places us at the first interesting instruction in the EBB.
while let Some(inst) = self.cur.current_inst() {
let encoding = self.cur.func.encodings[inst];
if encoding.is_legal() {
self.visit_inst(ebb, inst, encoding, tracker);
tracker.drop_dead(inst);
} else {
self.cur.next_inst();
}
}
}
/// Process the EBB parameters. Move to the next instruction in the EBB to be processed
fn visit_ebb_header(&mut self, ebb: Ebb, tracker: &mut LiveValueTracker) {
let (liveins, args) = tracker.ebb_top(
ebb,
&self.cur.func.dfg,
self.liveness,
&self.cur.func.layout,
self.domtree,
);
if self.cur.func.layout.entry_block() == Some(ebb) {
debug_assert_eq!(liveins.len(), 0);
self.visit_entry_params(ebb, args);
} else {
self.visit_ebb_params(ebb, args);
}
}
/// Visit the parameters on the entry block.
/// These values have ABI constraints from the function signature.
fn visit_entry_params(&mut self, ebb: Ebb, args: &[LiveValue]) {
debug_assert_eq!(self.cur.func.signature.params.len(), args.len());
self.cur.goto_first_inst(ebb);
for (arg_idx, arg) in args.iter().enumerate() {
let abi = self.cur.func.signature.params[arg_idx];
match abi.location {
ArgumentLoc::Reg(_) => {
if arg.affinity.is_stack() {
// An incoming register parameter was spilled. Replace the parameter value
// with a temporary register value that is immediately spilled.
let reg = self.cur.func.dfg.replace_ebb_param(
arg.value,
abi.value_type,
);
let affinity = Affinity::abi(&abi, self.cur.isa);
self.liveness.create_dead(reg, ebb, affinity);
self.insert_spill(ebb, arg.value, reg);
}
}
ArgumentLoc::Stack(_) => {
debug_assert!(arg.affinity.is_stack());
}
ArgumentLoc::Unassigned => panic!("Unexpected ABI location"),
}
}
}
fn visit_ebb_params(&mut self, ebb: Ebb, _args: &[LiveValue]) {
self.cur.goto_first_inst(ebb);
}
/// Process the instruction pointed to by `pos`, and advance the cursor to the next instruction
/// that needs processing.
fn visit_inst(
&mut self,
ebb: Ebb,
inst: Inst,
encoding: Encoding,
tracker: &mut LiveValueTracker,
) {
self.cur.use_srcloc(inst);
// Get the operand constraints for `inst` that we are trying to satisfy.
let constraints = self.encinfo.operand_constraints(encoding).expect(
"Missing instruction encoding",
);
// Identify reload candidates.
debug_assert!(self.candidates.is_empty());
self.find_candidates(inst, constraints);
// Insert fill instructions before `inst` and replace `cand.value` with the filled value.
for cand in self.candidates.iter_mut() {
if let Some(reload) = self.reloads.get(cand.value) {
cand.value = reload.reg;
continue;
}
let reg = self.cur.ins().fill(cand.value);
let fill = self.cur.built_inst();
self.reloads.insert(ReloadedValue {
stack: cand.value,
reg: reg,
});
cand.value = reg;
// Create a live range for the new reload.
let affinity = Affinity::Reg(cand.regclass.into());
self.liveness.create_dead(reg, fill, affinity);
self.liveness.extend_locally(
reg,
ebb,
inst,
&self.cur.func.layout,
);
}
// Rewrite instruction arguments.
//
// Only rewrite those arguments that were identified as candidates. This leaves EBB
// arguments on branches as-is without rewriting them. A spilled EBB argument needs to stay
// spilled because the matching EBB parameter is going to be in the same virtual register
// and therefore the same stack slot as the EBB argument value.
if !self.candidates.is_empty() {
let args = self.cur.func.dfg.inst_args_mut(inst);
while let Some(cand) = self.candidates.pop() {
args[cand.argidx] = cand.value;
}
}
// TODO: Reuse reloads for future instructions.
self.reloads.clear();
let (_throughs, _kills, defs) =
tracker.process_inst(inst, &self.cur.func.dfg, self.liveness);
// Advance to the next instruction so we can insert any spills after the instruction.
self.cur.next_inst();
// Rewrite register defs that need to be spilled.
//
// Change:
//
// v2 = inst ...
//
// Into:
//
// v7 = inst ...
// v2 = spill v7
//
// That way, we don't need to rewrite all future uses of v2.
for (lv, op) in defs.iter().zip(constraints.outs) {
if lv.affinity.is_stack() && op.kind != ConstraintKind::Stack {
let value_type = self.cur.func.dfg.value_type(lv.value);
let reg = self.cur.func.dfg.replace_result(lv.value, value_type);
self.liveness.create_dead(reg, inst, Affinity::new(op));
self.insert_spill(ebb, lv.value, reg);
}
}
// Same thing for spilled call return values.
let retvals = &defs[constraints.outs.len()..];
if !retvals.is_empty() {
let sig = self.cur.func.dfg.call_signature(inst).expect(
"Extra results on non-call instruction",
);
for (i, lv) in retvals.iter().enumerate() {
let abi = self.cur.func.dfg.signatures[sig].returns[i];
debug_assert!(abi.location.is_reg());
if lv.affinity.is_stack() {
let reg = self.cur.func.dfg.replace_result(lv.value, abi.value_type);
self.liveness.create_dead(
reg,
inst,
Affinity::abi(&abi, self.cur.isa),
);
self.insert_spill(ebb, lv.value, reg);
}
}
}
}
// Find reload candidates for `inst` and add them to `self.candidates`.
//
// These are uses of spilled values where the operand constraint requires a register.
fn find_candidates(&mut self, inst: Inst, constraints: &RecipeConstraints) {
let args = self.cur.func.dfg.inst_args(inst);
for (argidx, (op, &arg)) in constraints.ins.iter().zip(args).enumerate() {
if op.kind != ConstraintKind::Stack && self.liveness[arg].affinity.is_stack() {
self.candidates.push(ReloadCandidate {
argidx,
value: arg,
regclass: op.regclass,
})
}
}
// If we only have the fixed arguments, we're done now.
let offset = constraints.ins.len();
if args.len() == offset {
return;
}
let var_args = &args[offset..];
// Handle ABI arguments.
if let Some(sig) = self.cur.func.dfg.call_signature(inst) {
handle_abi_args(
self.candidates,
&self.cur.func.dfg.signatures[sig].params,
var_args,
offset,
self.cur.isa,
self.liveness,
);
} else if self.cur.func.dfg[inst].opcode().is_return() {
handle_abi_args(
self.candidates,
&self.cur.func.signature.returns,
var_args,
offset,
self.cur.isa,
self.liveness,
);
}
}
/// Insert a spill at `pos` and update data structures.
///
/// - Insert `stack = spill reg` at `pos`, and assign an encoding.
/// - Move the `stack` live range starting point to the new instruction.
/// - Extend the `reg` live range to reach the new instruction.
fn insert_spill(&mut self, ebb: Ebb, stack: Value, reg: Value) {
self.cur.ins().with_result(stack).spill(reg);
let inst = self.cur.built_inst();
// Update live ranges.
self.liveness.move_def_locally(stack, inst);
self.liveness.extend_locally(
reg,
ebb,
inst,
&self.cur.func.layout,
);
}
}
/// Find reload candidates in the instruction's ABI variable arguments. This handles both
/// return values and call arguments.
fn handle_abi_args(
candidates: &mut Vec<ReloadCandidate>,
abi_types: &[AbiParam],
var_args: &[Value],
offset: usize,
isa: &TargetIsa,
liveness: &Liveness,
) {
debug_assert_eq!(abi_types.len(), var_args.len());
for ((abi, &arg), argidx) in abi_types.iter().zip(var_args).zip(offset..) {
if abi.location.is_reg() {
let lv = liveness.get(arg).expect("Missing live range for ABI arg");
if lv.affinity.is_stack() {
candidates.push(ReloadCandidate {
argidx,
value: arg,
regclass: isa.regclass_for_abi_type(abi.value_type),
});
}
}
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,596 @@
//! Spilling pass.
//!
//! The spilling pass is the first to run after the liveness analysis. Its primary function is to
//! ensure that the register pressure never exceeds the number of available registers by moving
//! some SSA values to spill slots on the stack. This is encoded in the affinity of the value's
//! live range.
//!
//! Some instruction operand constraints may require additional registers to resolve. Since this
//! can cause spilling, the spilling pass is also responsible for resolving those constraints by
//! inserting copies. The extra constraints are:
//!
//! 1. A value used by a tied operand must be killed by the instruction. This is resolved by
//! inserting a copy to a temporary value when necessary.
//! 2. When the same value is used more than once by an instruction, the operand constraints must
//! be compatible. Otherwise, the value must be copied into a new register for some of the
//! operands.
use cursor::{Cursor, EncCursor};
use dominator_tree::DominatorTree;
use ir::{Ebb, Function, Inst, InstBuilder, SigRef, Value, ValueLoc};
use isa::registers::{RegClassIndex, RegClassMask};
use isa::{ConstraintKind, EncInfo, RecipeConstraints, RegInfo, TargetIsa};
use regalloc::affinity::Affinity;
use regalloc::live_value_tracker::{LiveValue, LiveValueTracker};
use regalloc::liveness::Liveness;
use regalloc::pressure::Pressure;
use regalloc::virtregs::VirtRegs;
use std::fmt;
use std::vec::Vec;
use timing;
use topo_order::TopoOrder;
/// Persistent data structures for the spilling pass.
pub struct Spilling {
spills: Vec<Value>,
reg_uses: Vec<RegUse>,
}
/// Context data structure that gets instantiated once per pass.
struct Context<'a> {
// Current instruction as well as reference to function and ISA.
cur: EncCursor<'a>,
// Cached ISA information.
reginfo: RegInfo,
encinfo: EncInfo,
// References to contextual data structures we need.
domtree: &'a DominatorTree,
liveness: &'a mut Liveness,
virtregs: &'a VirtRegs,
topo: &'a mut TopoOrder,
// Current register pressure.
pressure: Pressure,
// Values spilled for the current instruction. These values have already been removed from the
// pressure tracker, but they are still present in the live value tracker and their affinity
// hasn't been changed yet.
spills: &'a mut Vec<Value>,
// Uses of register values in the current instruction.
reg_uses: &'a mut Vec<RegUse>,
}
impl Spilling {
/// Create a new spilling data structure.
pub fn new() -> Self {
Self {
spills: Vec::new(),
reg_uses: Vec::new(),
}
}
/// Clear all data structures in this spilling pass.
pub fn clear(&mut self) {
self.spills.clear();
self.reg_uses.clear();
}
/// Run the spilling algorithm over `func`.
pub fn run(
&mut self,
isa: &TargetIsa,
func: &mut Function,
domtree: &DominatorTree,
liveness: &mut Liveness,
virtregs: &VirtRegs,
topo: &mut TopoOrder,
tracker: &mut LiveValueTracker,
) {
let _tt = timing::ra_spilling();
dbg!("Spilling for:\n{}", func.display(isa));
let reginfo = isa.register_info();
let usable_regs = isa.allocatable_registers(func);
let mut ctx = Context {
cur: EncCursor::new(func, isa),
reginfo: isa.register_info(),
encinfo: isa.encoding_info(),
domtree,
liveness,
virtregs,
topo,
pressure: Pressure::new(&reginfo, &usable_regs),
spills: &mut self.spills,
reg_uses: &mut self.reg_uses,
};
ctx.run(tracker)
}
}
impl<'a> Context<'a> {
fn run(&mut self, tracker: &mut LiveValueTracker) {
self.topo.reset(self.cur.func.layout.ebbs());
while let Some(ebb) = self.topo.next(&self.cur.func.layout, self.domtree) {
self.visit_ebb(ebb, tracker);
}
}
fn visit_ebb(&mut self, ebb: Ebb, tracker: &mut LiveValueTracker) {
dbg!("Spilling {}:", ebb);
self.cur.goto_top(ebb);
self.visit_ebb_header(ebb, tracker);
tracker.drop_dead_params();
self.process_spills(tracker);
while let Some(inst) = self.cur.next_inst() {
if let Some(constraints) =
self.encinfo.operand_constraints(
self.cur.func.encodings[inst],
)
{
self.visit_inst(inst, ebb, constraints, tracker);
} else {
let (_throughs, kills) = tracker.process_ghost(inst);
self.free_regs(kills);
}
tracker.drop_dead(inst);
self.process_spills(tracker);
}
}
// Take all live registers in `regs` from the pressure set.
// This doesn't cause any spilling, it is assumed there are enough registers.
fn take_live_regs(&mut self, regs: &[LiveValue]) {
for lv in regs {
if !lv.is_dead {
if let Affinity::Reg(rci) = lv.affinity {
let rc = self.reginfo.rc(rci);
self.pressure.take(rc);
}
}
}
}
// Free all registers in `kills` from the pressure set.
fn free_regs(&mut self, kills: &[LiveValue]) {
for lv in kills {
if let Affinity::Reg(rci) = lv.affinity {
if !self.spills.contains(&lv.value) {
let rc = self.reginfo.rc(rci);
self.pressure.free(rc);
}
}
}
}
// Free all dead registers in `regs` from the pressure set.
fn free_dead_regs(&mut self, regs: &[LiveValue]) {
for lv in regs {
if lv.is_dead {
if let Affinity::Reg(rci) = lv.affinity {
if !self.spills.contains(&lv.value) {
let rc = self.reginfo.rc(rci);
self.pressure.free(rc);
}
}
}
}
}
fn visit_ebb_header(&mut self, ebb: Ebb, tracker: &mut LiveValueTracker) {
let (liveins, params) = tracker.ebb_top(
ebb,
&self.cur.func.dfg,
self.liveness,
&self.cur.func.layout,
self.domtree,
);
// Count the live-in registers. These should already fit in registers; they did at the
// dominator.
self.pressure.reset();
self.take_live_regs(liveins);
// An EBB can have an arbitrary (up to 2^16...) number of parameters, so they are not
// guaranteed to fit in registers.
for lv in params {
if let Affinity::Reg(rci) = lv.affinity {
let rc = self.reginfo.rc(rci);
'try_take: while let Err(mask) = self.pressure.take_transient(rc) {
dbg!("Need {} reg for EBB param {}", rc, lv.value);
match self.spill_candidate(mask, liveins) {
Some(cand) => {
dbg!(
"Spilling live-in {} to make room for {} EBB param {}",
cand,
rc,
lv.value
);
self.spill_reg(cand);
}
None => {
// We can't spill any of the live-in registers, so we have to spill an
// EBB argument. Since the current spill metric would consider all the
// EBB arguments equal, just spill the present register.
dbg!("Spilling {} EBB argument {}", rc, lv.value);
// Since `spill_reg` will free a register, add the current one here.
self.pressure.take(rc);
self.spill_reg(lv.value);
break 'try_take;
}
}
}
}
}
// The transient pressure counts for the EBB arguments are accurate. Just preserve them.
self.pressure.preserve_transient();
self.free_dead_regs(params);
}
fn visit_inst(
&mut self,
inst: Inst,
ebb: Ebb,
constraints: &RecipeConstraints,
tracker: &mut LiveValueTracker,
) {
dbg!("Inst {}, {}", self.cur.display_inst(inst), self.pressure);
debug_assert_eq!(self.cur.current_inst(), Some(inst));
debug_assert_eq!(self.cur.current_ebb(), Some(ebb));
// We may need to resolve register constraints if there are any noteworthy uses.
debug_assert!(self.reg_uses.is_empty());
self.collect_reg_uses(inst, ebb, constraints);
// Calls usually have fixed register uses.
let call_sig = self.cur.func.dfg.call_signature(inst);
if let Some(sig) = call_sig {
self.collect_abi_reg_uses(inst, sig);
}
if !self.reg_uses.is_empty() {
self.process_reg_uses(inst, tracker);
}
// Update the live value tracker with this instruction.
let (throughs, kills, defs) = tracker.process_inst(inst, &self.cur.func.dfg, self.liveness);
// Remove kills from the pressure tracker.
self.free_regs(kills);
// If inst is a call, spill all register values that are live across the call.
// This means that we don't currently take advantage of callee-saved registers.
// TODO: Be more sophisticated.
if call_sig.is_some() {
for lv in throughs {
if lv.affinity.is_reg() && !self.spills.contains(&lv.value) {
self.spill_reg(lv.value);
}
}
}
// Make sure we have enough registers for the register defs.
// Dead defs are included here. They need a register too.
// No need to process call return values, they are in fixed registers.
for op in constraints.outs {
if op.kind != ConstraintKind::Stack {
// Add register def to pressure, spill if needed.
while let Err(mask) = self.pressure.take_transient(op.regclass) {
dbg!("Need {} reg from {} throughs", op.regclass, throughs.len());
match self.spill_candidate(mask, throughs) {
Some(cand) => self.spill_reg(cand),
None => {
panic!(
"Ran out of {} registers for {}",
op.regclass,
self.cur.display_inst(inst)
)
}
}
}
}
}
self.pressure.reset_transient();
// Restore pressure state, compute pressure with affinities from `defs`.
// Exclude dead defs. Includes call return values.
// This won't cause spilling.
self.take_live_regs(defs);
}
// Collect register uses that are noteworthy in one of the following ways:
//
// 1. It's a fixed register constraint.
// 2. It's a use of a spilled value.
// 3. It's a tied register constraint and the value isn't killed.
//
// We are assuming here that if a value is used both by a fixed register operand and a register
// class operand, they two are compatible. We are also assuming that two register class
// operands are always compatible.
fn collect_reg_uses(&mut self, inst: Inst, ebb: Ebb, constraints: &RecipeConstraints) {
let args = self.cur.func.dfg.inst_args(inst);
for (idx, (op, &arg)) in constraints.ins.iter().zip(args).enumerate() {
let mut reguse = RegUse::new(arg, idx, op.regclass.into());
let lr = &self.liveness[arg];
let ctx = self.liveness.context(&self.cur.func.layout);
match op.kind {
ConstraintKind::Stack => continue,
ConstraintKind::FixedReg(_) => reguse.fixed = true,
ConstraintKind::Tied(_) => {
// A tied operand must kill the used value.
reguse.tied = !lr.killed_at(inst, ebb, ctx);
}
ConstraintKind::FixedTied(_) => {
reguse.fixed = true;
reguse.tied = !lr.killed_at(inst, ebb, ctx);
}
ConstraintKind::Reg => {}
}
if lr.affinity.is_stack() {
reguse.spilled = true;
}
// Only collect the interesting register uses.
if reguse.fixed || reguse.tied || reguse.spilled {
dbg!(" reguse: {}", reguse);
self.reg_uses.push(reguse);
}
}
}
// Collect register uses from the ABI input constraints.
fn collect_abi_reg_uses(&mut self, inst: Inst, sig: SigRef) {
let fixed_args = self.cur.func.dfg[inst]
.opcode()
.constraints()
.fixed_value_arguments();
let args = self.cur.func.dfg.inst_variable_args(inst);
for (idx, (abi, &arg)) in
self.cur.func.dfg.signatures[sig]
.params
.iter()
.zip(args)
.enumerate()
{
if abi.location.is_reg() {
let (rci, spilled) = match self.liveness[arg].affinity {
Affinity::Reg(rci) => (rci, false),
Affinity::Stack => (
self.cur.isa.regclass_for_abi_type(abi.value_type).into(),
true,
),
Affinity::None => panic!("Missing affinity for {}", arg),
};
let mut reguse = RegUse::new(arg, fixed_args + idx, rci);
reguse.fixed = true;
reguse.spilled = spilled;
self.reg_uses.push(reguse);
}
}
}
// Process multiple register uses to resolve potential conflicts.
//
// Look for multiple uses of the same value in `self.reg_uses` and insert copies as necessary.
// Trigger spilling if any of the temporaries cause the register pressure to become too high.
//
// Leave `self.reg_uses` empty.
fn process_reg_uses(&mut self, inst: Inst, tracker: &LiveValueTracker) {
// We're looking for multiple uses of the same value, so start by sorting by value. The
// secondary `opidx` key makes it possible to use an unstable (non-allocating) sort.
self.reg_uses.sort_unstable_by_key(|u| (u.value, u.opidx));
for i in 0..self.reg_uses.len() {
let ru = self.reg_uses[i];
// Do we need to insert a copy for this use?
let need_copy = if ru.tied {
true
} else if ru.fixed {
// This is a fixed register use which doesn't necessarily require a copy.
// Make a copy only if this is not the first use of the value.
self.reg_uses
.get(i.wrapping_sub(1))
.map(|ru2| ru2.value == ru.value)
.unwrap_or(false)
} else {
false
};
if need_copy {
let copy = self.insert_copy(ru.value, ru.rci);
self.cur.func.dfg.inst_args_mut(inst)[ru.opidx as usize] = copy;
}
// Even if we don't insert a copy, we may need to account for register pressure for the
// reload pass.
if need_copy || ru.spilled {
let rc = self.reginfo.rc(ru.rci);
while let Err(mask) = self.pressure.take_transient(rc) {
dbg!("Copy of {} reg causes spill", rc);
// Spill a live register that is *not* used by the current instruction.
// Spilling a use wouldn't help.
//
// Do allow spilling of EBB arguments on branches. This is safe since we spill
// the whole virtual register which includes the matching EBB parameter value
// at the branch destination. It is also necessary since there can be
// arbitrarily many EBB arguments.
match {
let args = if self.cur.func.dfg[inst].opcode().is_branch() {
self.cur.func.dfg.inst_fixed_args(inst)
} else {
self.cur.func.dfg.inst_args(inst)
};
self.spill_candidate(
mask,
tracker.live().iter().filter(|lv| !args.contains(&lv.value)),
)
} {
Some(cand) => self.spill_reg(cand),
None => {
panic!(
"Ran out of {} registers when inserting copy before {}",
rc,
self.cur.display_inst(inst)
)
}
}
}
}
}
self.pressure.reset_transient();
self.reg_uses.clear()
}
// Find a spill candidate from `candidates` whose top-level register class is in `mask`.
fn spill_candidate<'ii, II>(&self, mask: RegClassMask, candidates: II) -> Option<Value>
where
II: IntoIterator<Item = &'ii LiveValue>,
{
// Find the best viable spill candidate.
//
// The very simple strategy implemented here is to spill the value with the earliest def in
// the reverse post-order. This strategy depends on a good reload pass to generate good
// code.
//
// We know that all candidate defs dominate the current instruction, so one of them will
// dominate the others. That is the earliest def.
candidates
.into_iter()
.filter_map(|lv| {
// Viable candidates are registers in one of the `mask` classes, and not already in
// the spill set.
if let Affinity::Reg(rci) = lv.affinity {
let rc = self.reginfo.rc(rci);
if (mask & (1 << rc.toprc)) != 0 && !self.spills.contains(&lv.value) {
// Here, `lv` is a viable spill candidate.
return Some(lv.value);
}
}
None
})
.min_by(|&a, &b| {
// Find the minimum candidate according to the RPO of their defs.
self.domtree.rpo_cmp(
self.cur.func.dfg.value_def(a),
self.cur.func.dfg.value_def(b),
&self.cur.func.layout,
)
})
}
/// Spill `value` immediately by
///
/// 1. Changing its affinity to `Stack` which marks the spill.
/// 2. Removing the value from the pressure tracker.
/// 3. Adding the value to `self.spills` for later reference by `process_spills`.
///
/// Note that this does not update the cached affinity in the live value tracker. Call
/// `process_spills` to do that.
fn spill_reg(&mut self, value: Value) {
if let Affinity::Reg(rci) = self.liveness.spill(value) {
let rc = self.reginfo.rc(rci);
self.pressure.free(rc);
self.spills.push(value);
dbg!("Spilled {}:{} -> {}", value, rc, self.pressure);
} else {
panic!("Cannot spill {} that was already on the stack", value);
}
// Assign a spill slot for the whole virtual register.
let ss = self.cur.func.stack_slots.make_spill_slot(
self.cur.func.dfg.value_type(value),
);
for &v in self.virtregs.congruence_class(&value) {
self.liveness.spill(v);
self.cur.func.locations[v] = ValueLoc::Stack(ss);
}
}
/// Process any pending spills in the `self.spills` vector.
///
/// It is assumed that spills are removed from the pressure tracker immediately, see
/// `spill_reg` above.
///
/// We also need to update the live range affinity and remove spilled values from the live
/// value tracker.
fn process_spills(&mut self, tracker: &mut LiveValueTracker) {
if !self.spills.is_empty() {
tracker.process_spills(|v| self.spills.contains(&v));
self.spills.clear()
}
}
/// Insert a `copy value` before the current instruction and give it a live range extending to
/// the current instruction.
///
/// Returns the new local value created.
fn insert_copy(&mut self, value: Value, rci: RegClassIndex) -> Value {
let copy = self.cur.ins().copy(value);
let inst = self.cur.built_inst();
// Update live ranges.
self.liveness.create_dead(copy, inst, Affinity::Reg(rci));
self.liveness.extend_locally(
copy,
self.cur.func.layout.pp_ebb(inst),
self.cur.current_inst().expect("must be at an instruction"),
&self.cur.func.layout,
);
copy
}
}
/// Struct representing a register use of a value.
/// Used to detect multiple uses of the same value with incompatible register constraints.
#[derive(Clone, Copy)]
struct RegUse {
value: Value,
opidx: u16,
// Register class required by the use.
rci: RegClassIndex,
// A use with a fixed register constraint.
fixed: bool,
// A register use of a spilled value.
spilled: bool,
// A use with a tied register constraint *and* the used value is not killed.
tied: bool,
}
impl RegUse {
fn new(value: Value, idx: usize, rci: RegClassIndex) -> RegUse {
RegUse {
value,
opidx: idx as u16,
rci,
fixed: false,
spilled: false,
tied: false,
}
}
}
impl fmt::Display for RegUse {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}@op{}", self.value, self.opidx)?;
if self.fixed {
write!(f, "/fixed")?;
}
if self.spilled {
write!(f, "/spilled")?;
}
if self.tied {
write!(f, "/tied")?;
}
Ok(())
}
}

View File

@@ -0,0 +1,503 @@
//! Virtual registers.
//!
//! A virtual register is a set of related SSA values whose live ranges don't interfere. If all the
//! values in a virtual register are assigned to the same location, fewer copies will result in the
//! output.
//!
//! A virtual register is typically built by merging together SSA values that are "phi-related" -
//! that is, one value is passed as an EBB argument to a branch and the other is the EBB parameter
//! value itself.
//!
//! If any values in a virtual register are spilled, they will use the same stack slot. This avoids
//! memory-to-memory copies when a spilled value is passed as an EBB argument.
use dbg::DisplayList;
use dominator_tree::DominatorTreePreorder;
use entity::EntityRef;
use entity::{EntityList, ListPool};
use entity::{EntityMap, Keys, PrimaryMap};
use ir::{Function, Value};
use packed_option::PackedOption;
use ref_slice::ref_slice;
use std::cmp::Ordering;
use std::fmt;
use std::vec::Vec;
/// A virtual register reference.
#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct VirtReg(u32);
entity_impl!(VirtReg, "vreg");
type ValueList = EntityList<Value>;
/// Collection of virtual registers.
///
/// Each virtual register is a list of values. Also maintain a map from values to their unique
/// virtual register, if any.
pub struct VirtRegs {
/// Memory pool for the value lists.
pool: ListPool<Value>,
/// The primary table of virtual registers.
vregs: PrimaryMap<VirtReg, ValueList>,
/// Allocated virtual register numbers that are no longer in use.
unused_vregs: Vec<VirtReg>,
/// Each value belongs to at most one virtual register.
value_vregs: EntityMap<Value, PackedOption<VirtReg>>,
/// Table used during the union-find phase while `vregs` is empty.
union_find: EntityMap<Value, i32>,
/// Values that have been activated in the `union_find` table, but not yet added to any virtual
/// registers by the `finish_union_find()` function.
pending_values: Vec<Value>,
}
impl VirtRegs {
/// Create a new virtual register collection.
pub fn new() -> Self {
Self {
pool: ListPool::new(),
vregs: PrimaryMap::new(),
unused_vregs: Vec::new(),
value_vregs: EntityMap::new(),
union_find: EntityMap::new(),
pending_values: Vec::new(),
}
}
/// Clear all virtual registers.
pub fn clear(&mut self) {
self.vregs.clear();
self.unused_vregs.clear();
self.value_vregs.clear();
self.pool.clear();
self.union_find.clear();
self.pending_values.clear();
}
/// Get the virtual register containing `value`, if any.
pub fn get(&self, value: Value) -> Option<VirtReg> {
self.value_vregs[value].into()
}
/// Get the list of values in `vreg`.
pub fn values(&self, vreg: VirtReg) -> &[Value] {
self.vregs[vreg].as_slice(&self.pool)
}
/// Get an iterator over all virtual registers.
pub fn all_virtregs(&self) -> Keys<VirtReg> {
self.vregs.keys()
}
/// Get the congruence class of `value`.
///
/// If `value` belongs to a virtual register, the congruence class is the values of the virtual
/// register. Otherwise it is just the value itself.
pub fn congruence_class<'a, 'b>(&'a self, value: &'b Value) -> &'b [Value]
where
'a: 'b,
{
self.get(*value).map(|vr| self.values(vr)).unwrap_or_else(
|| {
ref_slice(value)
},
)
}
/// Check if `a` and `b` belong to the same congruence class.
pub fn same_class(&self, a: Value, b: Value) -> bool {
match (self.get(a), self.get(b)) {
(Some(va), Some(vb)) => va == vb,
_ => a == b,
}
}
/// Sort the values in `vreg` according to the dominator tree pre-order.
///
/// Returns the slice of sorted values which `values(vreg)` will also return from now on.
pub fn sort_values(
&mut self,
vreg: VirtReg,
func: &Function,
preorder: &DominatorTreePreorder,
) -> &[Value] {
let s = self.vregs[vreg].as_mut_slice(&mut self.pool);
s.sort_unstable_by(|&a, &b| preorder.pre_cmp_def(a, b, func));
s
}
/// Insert a single value into a sorted virtual register.
///
/// It is assumed that the virtual register containing `big` is already sorted by
/// `sort_values()`, and that `single` does not already belong to a virtual register.
///
/// If `big` is not part of a virtual register, one will be created.
pub fn insert_single(
&mut self,
big: Value,
single: Value,
func: &Function,
preorder: &DominatorTreePreorder,
) -> VirtReg {
debug_assert_eq!(self.get(single), None, "Expected singleton {}", single);
// Make sure `big` has a vreg.
let vreg = self.get(big).unwrap_or_else(|| {
let vr = self.alloc();
self.vregs[vr].push(big, &mut self.pool);
self.value_vregs[big] = vr.into();
vr
});
// Determine the insertion position for `single`.
let index = match self.values(vreg).binary_search_by(
|&v| preorder.pre_cmp_def(v, single, func),
) {
Ok(_) => panic!("{} already in {}", single, vreg),
Err(i) => i,
};
self.vregs[vreg].insert(index, single, &mut self.pool);
self.value_vregs[single] = vreg.into();
vreg
}
/// Remove a virtual register.
///
/// The values in `vreg` become singletons, and the virtual register number may be reused in
/// the future.
pub fn remove(&mut self, vreg: VirtReg) {
// Start by reassigning all the values.
for &v in self.vregs[vreg].as_slice(&self.pool) {
let old = self.value_vregs[v].take();
debug_assert_eq!(old, Some(vreg));
}
self.vregs[vreg].clear(&mut self.pool);
self.unused_vregs.push(vreg);
}
/// Allocate a new empty virtual register.
fn alloc(&mut self) -> VirtReg {
self.unused_vregs.pop().unwrap_or_else(|| {
self.vregs.push(Default::default())
})
}
/// Unify `values` into a single virtual register.
///
/// The values in the slice can be singletons or they can belong to a virtual register already.
/// If a value belongs to a virtual register, all of the values in that register must be
/// present.
///
/// The values are assumed to already be in topological order.
pub fn unify(&mut self, values: &[Value]) -> VirtReg {
// Start by clearing all virtual registers involved.
let mut singletons = 0;
let mut cleared = 0;
for &val in values {
match self.get(val) {
None => singletons += 1,
Some(vreg) => {
if !self.vregs[vreg].is_empty() {
cleared += self.vregs[vreg].len(&self.pool);
self.vregs[vreg].clear(&mut self.pool);
self.unused_vregs.push(vreg);
}
}
}
}
debug_assert_eq!(
values.len(),
singletons + cleared,
"Can't unify partial virtual registers"
);
let vreg = self.alloc();
self.vregs[vreg].extend(values.iter().cloned(), &mut self.pool);
for &v in values {
self.value_vregs[v] = vreg.into();
}
vreg
}
}
impl fmt::Display for VirtRegs {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
for vreg in self.all_virtregs() {
write!(f, "\n{} = {}", vreg, DisplayList(self.values(vreg)))?;
}
Ok(())
}
}
/// Expanded version of a union-find table entry.
enum UFEntry {
/// This value is a a set leader. The embedded number is the set's rank.
Rank(u32),
/// This value belongs to the same set as the linked value.
Link(Value),
}
/// The `union_find` table contains `i32` entries that are interpreted as follows:
///
/// x = 0: The value belongs to its own singleton set.
/// x > 0: The value is the leader of a set with rank x.
/// x < 0: The value belongs to the same set as the value numbered !x.
///
/// The rank of a set is an upper bound on the number of links that must be followed from a member
/// of the set to the set leader.
///
/// A singleton set is the same as a set with rank 0. It contains only the leader value.
impl UFEntry {
/// Decode a table entry.
fn decode(x: i32) -> UFEntry {
if x < 0 {
UFEntry::Link(Value::new((!x) as usize))
} else {
UFEntry::Rank(x as u32)
}
}
/// Encode a link entry.
fn encode_link(v: Value) -> i32 {
!(v.index() as i32)
}
}
/// Union-find algorithm for building virtual registers.
///
/// Before values are added to virtual registers, it is possible to use a union-find algorithm to
/// construct virtual registers efficiently. This support implemented here is used as follows:
///
/// 1. Repeatedly call the `union(a, b)` method to request that `a` and `b` are placed in the same
/// virtual register.
/// 2. When done, call `finish_union_find()` to construct the virtual register sets based on the
/// `union()` calls.
///
/// The values that were passed to `union(a, b)` mist not belong to any existing virtual registers
/// by the time `finish_union_find()` is called.
///
/// For more information on the algorithm implemented here, see Chapter 21 "Data Structures for
/// Disjoint Sets" of Cormen, Leiserson, Rivest, Stein, "Introduction to algorithms", 3rd Ed.
///
/// The [Wikipedia entry on disjoint-set data
/// structures](https://en.wikipedia.org/wiki/Disjoint-set_data_structure) is also good.
impl VirtRegs {
/// Find the leader value and rank of the set containing `v`.
/// Compress the path if needed.
fn find(&mut self, val: Value) -> (Value, u32) {
match UFEntry::decode(self.union_find[val]) {
UFEntry::Rank(rank) => (val, rank),
UFEntry::Link(parent) => {
// TODO: This recursion would be more efficient as an iteration that pushes
// elements onto a SmallVector.
let found = self.find(parent);
// Compress the path if needed.
if found.0 != parent {
self.union_find[val] = UFEntry::encode_link(found.0);
}
found
}
}
}
/// Union the two sets containing `a` and `b`.
///
/// This ensures that `a` and `b` will belong to the same virtual register after calling
/// `finish_union_find()`.
pub fn union(&mut self, a: Value, b: Value) {
let (leader_a, rank_a) = self.find(a);
let (leader_b, rank_b) = self.find(b);
if leader_a == leader_b {
return;
}
// The first time we see a value, its rank will be 0. Add it to the list of pending values.
if rank_a == 0 {
debug_assert_eq!(a, leader_a);
self.pending_values.push(a);
}
if rank_b == 0 {
debug_assert_eq!(b, leader_b);
self.pending_values.push(b);
}
// Merge into the set with the greater rank. This preserves the invariant that the rank is
// an upper bound on the number of links to the leader.
match rank_a.cmp(&rank_b) {
Ordering::Less => {
self.union_find[leader_a] = UFEntry::encode_link(leader_b);
}
Ordering::Greater => {
self.union_find[leader_b] = UFEntry::encode_link(leader_a);
}
Ordering::Equal => {
// When the two sets have the same rank, we arbitrarily pick the a-set to preserve.
// We need to increase the rank by one since the elements in the b-set are now one
// link further away from the leader.
self.union_find[leader_a] += 1;
self.union_find[leader_b] = UFEntry::encode_link(leader_a);
}
}
}
/// Compute virtual registers based on previous calls to `union(a, b)`.
///
/// This terminates the union-find algorithm, so the next time `union()` is called, it is for a
/// new independent batch of values.
///
/// The values in each virtual register will be ordered according to when they were first
/// passed to `union()`, but backwards. It is expected that `sort_values()` will be used to
/// create a more sensible value order.
///
/// The new virtual registers will be appended to `new_vregs`, if present.
pub fn finish_union_find(&mut self, mut new_vregs: Option<&mut Vec<VirtReg>>) {
debug_assert_eq!(
self.pending_values.iter().find(|&&v| self.get(v).is_some()),
None,
"Values participating in union-find must not belong to existing virtual registers"
);
while let Some(val) = self.pending_values.pop() {
let (leader, _) = self.find(val);
// Get the vreg for `leader`, or create it.
let vreg = self.get(leader).unwrap_or_else(|| {
// Allocate a vreg for `leader`, but leave it empty.
let vr = self.alloc();
if let Some(ref mut vec) = new_vregs {
vec.push(vr);
}
self.value_vregs[leader] = vr.into();
vr
});
// Push values in `pending_values` order, including when `v == leader`.
self.vregs[vreg].push(val, &mut self.pool);
self.value_vregs[val] = vreg.into();
// Clear the entry in the union-find table. The `find(val)` call may still look at this
// entry in a future iteration, but that it ok. It will return a rank 0 leader that has
// already been assigned to the correct virtual register.
self.union_find[val] = 0;
}
// We do *not* call `union_find.clear()` table here because re-initializing the table for
// sparse use takes time linear in the number of values in the function. Instead we reset
// the entries that are known to be non-zero in the loop above.
}
}
#[cfg(test)]
mod test {
use super::*;
use entity::EntityRef;
use ir::Value;
#[test]
fn empty_union_find() {
let mut vregs = VirtRegs::new();
vregs.finish_union_find(None);
assert_eq!(vregs.all_virtregs().count(), 0);
}
#[test]
fn union_self() {
let mut vregs = VirtRegs::new();
let v1 = Value::new(1);
vregs.union(v1, v1);
vregs.finish_union_find(None);
assert_eq!(vregs.get(v1), None);
assert_eq!(vregs.all_virtregs().count(), 0);
}
#[test]
fn union_pair() {
let mut vregs = VirtRegs::new();
let v1 = Value::new(1);
let v2 = Value::new(2);
vregs.union(v1, v2);
vregs.finish_union_find(None);
assert_eq!(vregs.congruence_class(&v1), &[v2, v1]);
assert_eq!(vregs.congruence_class(&v2), &[v2, v1]);
assert_eq!(vregs.all_virtregs().count(), 1);
}
#[test]
fn union_pair_backwards() {
let mut vregs = VirtRegs::new();
let v1 = Value::new(1);
let v2 = Value::new(2);
vregs.union(v2, v1);
vregs.finish_union_find(None);
assert_eq!(vregs.congruence_class(&v1), &[v1, v2]);
assert_eq!(vregs.congruence_class(&v2), &[v1, v2]);
assert_eq!(vregs.all_virtregs().count(), 1);
}
#[test]
fn union_tree() {
let mut vregs = VirtRegs::new();
let v1 = Value::new(1);
let v2 = Value::new(2);
let v3 = Value::new(3);
let v4 = Value::new(4);
vregs.union(v2, v4);
vregs.union(v3, v1);
// Leaders: v2, v3
vregs.union(v4, v1);
vregs.finish_union_find(None);
assert_eq!(vregs.congruence_class(&v1), &[v1, v3, v4, v2]);
assert_eq!(vregs.congruence_class(&v2), &[v1, v3, v4, v2]);
assert_eq!(vregs.congruence_class(&v3), &[v1, v3, v4, v2]);
assert_eq!(vregs.congruence_class(&v4), &[v1, v3, v4, v2]);
assert_eq!(vregs.all_virtregs().count(), 1);
}
#[test]
fn union_two() {
let mut vregs = VirtRegs::new();
let v1 = Value::new(1);
let v2 = Value::new(2);
let v3 = Value::new(3);
let v4 = Value::new(4);
vregs.union(v2, v4);
vregs.union(v3, v1);
// Leaders: v2, v3
vregs.finish_union_find(None);
assert_eq!(vregs.congruence_class(&v1), &[v1, v3]);
assert_eq!(vregs.congruence_class(&v2), &[v4, v2]);
assert_eq!(vregs.congruence_class(&v3), &[v1, v3]);
assert_eq!(vregs.congruence_class(&v4), &[v4, v2]);
assert_eq!(vregs.all_virtregs().count(), 2);
}
#[test]
fn union_uneven() {
let mut vregs = VirtRegs::new();
let v1 = Value::new(1);
let v2 = Value::new(2);
let v3 = Value::new(3);
let v4 = Value::new(4);
vregs.union(v2, v4); // Rank 0-0
vregs.union(v3, v2); // Rank 0-1
vregs.union(v2, v1); // Rank 1-0
vregs.finish_union_find(None);
assert_eq!(vregs.congruence_class(&v1), &[v1, v3, v4, v2]);
assert_eq!(vregs.congruence_class(&v2), &[v1, v3, v4, v2]);
assert_eq!(vregs.congruence_class(&v3), &[v1, v3, v4, v2]);
assert_eq!(vregs.congruence_class(&v4), &[v1, v3, v4, v2]);
assert_eq!(vregs.all_virtregs().count(), 1);
}
}