moved crates in lib/ to src/, renamed crates, modified some files' text (#660)
moved crates in lib/ to src/, renamed crates, modified some files' text (#660)
This commit is contained in:
128
cranelift/codegen/src/regalloc/affinity.rs
Normal file
128
cranelift/codegen/src/regalloc/affinity.rs
Normal file
@@ -0,0 +1,128 @@
|
||||
//! Value affinity for register allocation.
|
||||
//!
|
||||
//! An SSA value's affinity is a hint used to guide the register allocator. It specifies the class
|
||||
//! of allocation that is likely to cause the least amount of fixup moves in order to satisfy
|
||||
//! instruction operand constraints.
|
||||
//!
|
||||
//! For values that want to be in registers, the affinity hint includes a register class or
|
||||
//! subclass. This is just a hint, and the register allocator is allowed to pick a register from a
|
||||
//! larger register class instead.
|
||||
|
||||
use crate::ir::{AbiParam, ArgumentLoc};
|
||||
use crate::isa::{ConstraintKind, OperandConstraint, RegClassIndex, RegInfo, TargetIsa};
|
||||
use core::fmt;
|
||||
|
||||
/// Preferred register allocation for an SSA value.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub enum Affinity {
|
||||
/// No affinity.
|
||||
///
|
||||
/// This indicates a value that is not defined or used by any real instructions. It is a ghost
|
||||
/// value that won't appear in the final program.
|
||||
Unassigned,
|
||||
|
||||
/// This value should be placed in a spill slot on the stack.
|
||||
Stack,
|
||||
|
||||
/// This value prefers a register from the given register class.
|
||||
Reg(RegClassIndex),
|
||||
}
|
||||
|
||||
impl Default for Affinity {
|
||||
fn default() -> Self {
|
||||
Affinity::Unassigned
|
||||
}
|
||||
}
|
||||
|
||||
impl Affinity {
|
||||
/// Create an affinity that satisfies a single constraint.
|
||||
///
|
||||
/// This will never create an `Affinity::Unassigned`.
|
||||
/// Use the `Default` implementation for that.
|
||||
pub fn new(constraint: &OperandConstraint) -> Self {
|
||||
if constraint.kind == ConstraintKind::Stack {
|
||||
Affinity::Stack
|
||||
} else {
|
||||
Affinity::Reg(constraint.regclass.into())
|
||||
}
|
||||
}
|
||||
|
||||
/// Create an affinity that matches an ABI argument for `isa`.
|
||||
pub fn abi(arg: &AbiParam, isa: &TargetIsa) -> Self {
|
||||
match arg.location {
|
||||
ArgumentLoc::Unassigned => Affinity::Unassigned,
|
||||
ArgumentLoc::Reg(_) => Affinity::Reg(isa.regclass_for_abi_type(arg.value_type).into()),
|
||||
ArgumentLoc::Stack(_) => Affinity::Stack,
|
||||
}
|
||||
}
|
||||
|
||||
/// Is this the `Unassigned` affinity?
|
||||
pub fn is_unassigned(self) -> bool {
|
||||
match self {
|
||||
Affinity::Unassigned => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Is this the `Reg` affinity?
|
||||
pub fn is_reg(self) -> bool {
|
||||
match self {
|
||||
Affinity::Reg(_) => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Is this the `Stack` affinity?
|
||||
pub fn is_stack(self) -> bool {
|
||||
match self {
|
||||
Affinity::Stack => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Merge an operand constraint into this affinity.
|
||||
///
|
||||
/// Note that this does not guarantee that the register allocator will pick a register that
|
||||
/// satisfies the constraint.
|
||||
pub fn merge(&mut self, constraint: &OperandConstraint, reginfo: &RegInfo) {
|
||||
match *self {
|
||||
Affinity::Unassigned => *self = Self::new(constraint),
|
||||
Affinity::Reg(rc) => {
|
||||
// If the preferred register class is a subclass of the constraint, there's no need
|
||||
// to change anything.
|
||||
if constraint.kind != ConstraintKind::Stack && !constraint.regclass.has_subclass(rc)
|
||||
{
|
||||
// If the register classes don't overlap, `intersect` returns `Unassigned`, and
|
||||
// we just keep our previous affinity.
|
||||
if let Some(subclass) = constraint.regclass.intersect_index(reginfo.rc(rc)) {
|
||||
// This constraint shrinks our preferred register class.
|
||||
*self = Affinity::Reg(subclass);
|
||||
}
|
||||
}
|
||||
}
|
||||
Affinity::Stack => {}
|
||||
}
|
||||
}
|
||||
|
||||
/// Return an object that can display this value affinity, using the register info from the
|
||||
/// target ISA.
|
||||
pub fn display<'a, R: Into<Option<&'a RegInfo>>>(self, regs: R) -> DisplayAffinity<'a> {
|
||||
DisplayAffinity(self, regs.into())
|
||||
}
|
||||
}
|
||||
|
||||
/// Displaying an `Affinity` correctly requires the associated `RegInfo` from the target ISA.
|
||||
pub struct DisplayAffinity<'a>(Affinity, Option<&'a RegInfo>);
|
||||
|
||||
impl<'a> fmt::Display for DisplayAffinity<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self.0 {
|
||||
Affinity::Unassigned => write!(f, "unassigned"),
|
||||
Affinity::Stack => write!(f, "stack"),
|
||||
Affinity::Reg(rci) => match self.1 {
|
||||
Some(regs) => write!(f, "{}", regs.rc(rci)),
|
||||
None => write!(f, "{}", rci),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
1110
cranelift/codegen/src/regalloc/coalescing.rs
Normal file
1110
cranelift/codegen/src/regalloc/coalescing.rs
Normal file
File diff suppressed because it is too large
Load Diff
1149
cranelift/codegen/src/regalloc/coloring.rs
Normal file
1149
cranelift/codegen/src/regalloc/coloring.rs
Normal file
File diff suppressed because it is too large
Load Diff
217
cranelift/codegen/src/regalloc/context.rs
Normal file
217
cranelift/codegen/src/regalloc/context.rs
Normal file
@@ -0,0 +1,217 @@
|
||||
//! Register allocator context.
|
||||
//!
|
||||
//! The `Context` struct contains data structures that should be preserved across invocations of
|
||||
//! the register allocator algorithm. This doesn't preserve any data between functions, but it
|
||||
//! avoids allocating data structures independently for each function begin compiled.
|
||||
|
||||
use crate::dominator_tree::DominatorTree;
|
||||
use crate::flowgraph::ControlFlowGraph;
|
||||
use crate::ir::Function;
|
||||
use crate::isa::TargetIsa;
|
||||
use crate::regalloc::coalescing::Coalescing;
|
||||
use crate::regalloc::coloring::Coloring;
|
||||
use crate::regalloc::live_value_tracker::LiveValueTracker;
|
||||
use crate::regalloc::liveness::Liveness;
|
||||
use crate::regalloc::reload::Reload;
|
||||
use crate::regalloc::spilling::Spilling;
|
||||
use crate::regalloc::virtregs::VirtRegs;
|
||||
use crate::result::CodegenResult;
|
||||
use crate::timing;
|
||||
use crate::topo_order::TopoOrder;
|
||||
use crate::verifier::{
|
||||
verify_context, verify_cssa, verify_liveness, verify_locations, VerifierErrors,
|
||||
};
|
||||
|
||||
/// Persistent memory allocations for register allocation.
|
||||
pub struct Context {
|
||||
liveness: Liveness,
|
||||
virtregs: VirtRegs,
|
||||
coalescing: Coalescing,
|
||||
topo: TopoOrder,
|
||||
tracker: LiveValueTracker,
|
||||
spilling: Spilling,
|
||||
reload: Reload,
|
||||
coloring: Coloring,
|
||||
}
|
||||
|
||||
impl Context {
|
||||
/// Create a new context for register allocation.
|
||||
///
|
||||
/// This context should be reused for multiple functions in order to avoid repeated memory
|
||||
/// allocations.
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
liveness: Liveness::new(),
|
||||
virtregs: VirtRegs::new(),
|
||||
coalescing: Coalescing::new(),
|
||||
topo: TopoOrder::new(),
|
||||
tracker: LiveValueTracker::new(),
|
||||
spilling: Spilling::new(),
|
||||
reload: Reload::new(),
|
||||
coloring: Coloring::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Clear all data structures in this context.
|
||||
pub fn clear(&mut self) {
|
||||
self.liveness.clear();
|
||||
self.virtregs.clear();
|
||||
self.coalescing.clear();
|
||||
self.topo.clear();
|
||||
self.tracker.clear();
|
||||
self.spilling.clear();
|
||||
self.reload.clear();
|
||||
self.coloring.clear();
|
||||
}
|
||||
|
||||
/// Allocate registers in `func`.
|
||||
///
|
||||
/// After register allocation, all values in `func` have been assigned to a register or stack
|
||||
/// location that is consistent with instruction encoding constraints.
|
||||
pub fn run(
|
||||
&mut self,
|
||||
isa: &TargetIsa,
|
||||
func: &mut Function,
|
||||
cfg: &ControlFlowGraph,
|
||||
domtree: &mut DominatorTree,
|
||||
) -> CodegenResult<()> {
|
||||
let _tt = timing::regalloc();
|
||||
debug_assert!(domtree.is_valid());
|
||||
|
||||
let mut errors = VerifierErrors::default();
|
||||
|
||||
// `Liveness` and `Coloring` are self-clearing.
|
||||
self.virtregs.clear();
|
||||
|
||||
// Tracker state (dominator live sets) is actually reused between the spilling and coloring
|
||||
// phases.
|
||||
self.tracker.clear();
|
||||
|
||||
// Pass: Liveness analysis.
|
||||
self.liveness.compute(isa, func, cfg);
|
||||
|
||||
if isa.flags().enable_verifier() {
|
||||
let ok = verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok();
|
||||
|
||||
if !ok {
|
||||
return Err(errors.into());
|
||||
}
|
||||
}
|
||||
|
||||
// Pass: Coalesce and create Conventional SSA form.
|
||||
self.coalescing.conventional_ssa(
|
||||
isa,
|
||||
func,
|
||||
cfg,
|
||||
domtree,
|
||||
&mut self.liveness,
|
||||
&mut self.virtregs,
|
||||
);
|
||||
|
||||
if isa.flags().enable_verifier() {
|
||||
let ok = verify_context(func, cfg, domtree, isa, &mut errors).is_ok()
|
||||
&& verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok()
|
||||
&& verify_cssa(
|
||||
func,
|
||||
cfg,
|
||||
domtree,
|
||||
&self.liveness,
|
||||
&self.virtregs,
|
||||
&mut errors,
|
||||
)
|
||||
.is_ok();
|
||||
|
||||
if !ok {
|
||||
return Err(errors.into());
|
||||
}
|
||||
}
|
||||
|
||||
// Pass: Spilling.
|
||||
self.spilling.run(
|
||||
isa,
|
||||
func,
|
||||
domtree,
|
||||
&mut self.liveness,
|
||||
&self.virtregs,
|
||||
&mut self.topo,
|
||||
&mut self.tracker,
|
||||
);
|
||||
|
||||
if isa.flags().enable_verifier() {
|
||||
let ok = verify_context(func, cfg, domtree, isa, &mut errors).is_ok()
|
||||
&& verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok()
|
||||
&& verify_cssa(
|
||||
func,
|
||||
cfg,
|
||||
domtree,
|
||||
&self.liveness,
|
||||
&self.virtregs,
|
||||
&mut errors,
|
||||
)
|
||||
.is_ok();
|
||||
|
||||
if !ok {
|
||||
return Err(errors.into());
|
||||
}
|
||||
}
|
||||
|
||||
// Pass: Reload.
|
||||
self.reload.run(
|
||||
isa,
|
||||
func,
|
||||
domtree,
|
||||
&mut self.liveness,
|
||||
&mut self.topo,
|
||||
&mut self.tracker,
|
||||
);
|
||||
|
||||
if isa.flags().enable_verifier() {
|
||||
let ok = verify_context(func, cfg, domtree, isa, &mut errors).is_ok()
|
||||
&& verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok()
|
||||
&& verify_cssa(
|
||||
func,
|
||||
cfg,
|
||||
domtree,
|
||||
&self.liveness,
|
||||
&self.virtregs,
|
||||
&mut errors,
|
||||
)
|
||||
.is_ok();
|
||||
|
||||
if !ok {
|
||||
return Err(errors.into());
|
||||
}
|
||||
}
|
||||
|
||||
// Pass: Coloring.
|
||||
self.coloring
|
||||
.run(isa, func, domtree, &mut self.liveness, &mut self.tracker);
|
||||
|
||||
if isa.flags().enable_verifier() {
|
||||
let ok = verify_context(func, cfg, domtree, isa, &mut errors).is_ok()
|
||||
&& verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok()
|
||||
&& verify_locations(isa, func, Some(&self.liveness), &mut errors).is_ok()
|
||||
&& verify_cssa(
|
||||
func,
|
||||
cfg,
|
||||
domtree,
|
||||
&self.liveness,
|
||||
&self.virtregs,
|
||||
&mut errors,
|
||||
)
|
||||
.is_ok();
|
||||
|
||||
if !ok {
|
||||
return Err(errors.into());
|
||||
}
|
||||
}
|
||||
|
||||
// Even if we arrive here, (non-fatal) errors might have been reported, so we
|
||||
// must make sure absolutely nothing is wrong
|
||||
if errors.is_empty() {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(errors.into())
|
||||
}
|
||||
}
|
||||
}
|
||||
218
cranelift/codegen/src/regalloc/diversion.rs
Normal file
218
cranelift/codegen/src/regalloc/diversion.rs
Normal file
@@ -0,0 +1,218 @@
|
||||
//! Register diversions.
|
||||
//!
|
||||
//! Normally, a value is assigned to a single register or stack location by the register allocator.
|
||||
//! Sometimes, it is necessary to move register values to a different register in order to satisfy
|
||||
//! instruction constraints.
|
||||
//!
|
||||
//! These register diversions are local to an EBB. No values can be diverted when entering a new
|
||||
//! EBB.
|
||||
|
||||
use crate::fx::FxHashMap;
|
||||
use crate::hash_map::{Entry, Iter};
|
||||
use crate::ir::{InstructionData, Opcode};
|
||||
use crate::ir::{StackSlot, Value, ValueLoc, ValueLocations};
|
||||
use crate::isa::{RegInfo, RegUnit};
|
||||
use core::fmt;
|
||||
|
||||
/// A diversion of a value from its original location to a new register or stack location.
|
||||
///
|
||||
/// In IR, a diversion is represented by a `regmove` instruction, possibly a chain of them for the
|
||||
/// same value.
|
||||
///
|
||||
/// When tracking diversions, the `from` field is the original assigned value location, and `to` is
|
||||
/// the current one.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub struct Diversion {
|
||||
/// The original value location.
|
||||
pub from: ValueLoc,
|
||||
/// The current value location.
|
||||
pub to: ValueLoc,
|
||||
}
|
||||
|
||||
impl Diversion {
|
||||
/// Make a new diversion.
|
||||
pub fn new(from: ValueLoc, to: ValueLoc) -> Self {
|
||||
debug_assert!(from.is_assigned() && to.is_assigned());
|
||||
Self { from, to }
|
||||
}
|
||||
}
|
||||
|
||||
/// Keep track of diversions in an EBB.
|
||||
pub struct RegDiversions {
|
||||
current: FxHashMap<Value, Diversion>,
|
||||
}
|
||||
|
||||
impl RegDiversions {
|
||||
/// Create a new empty diversion tracker.
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
current: FxHashMap::default(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Clear the tracker, preparing for a new EBB.
|
||||
pub fn clear(&mut self) {
|
||||
self.current.clear()
|
||||
}
|
||||
|
||||
/// Are there any diversions?
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.current.is_empty()
|
||||
}
|
||||
|
||||
/// Get the current diversion of `value`, if any.
|
||||
pub fn diversion(&self, value: Value) -> Option<&Diversion> {
|
||||
self.current.get(&value)
|
||||
}
|
||||
|
||||
/// Get all current diversions.
|
||||
pub fn iter(&self) -> Iter<'_, Value, Diversion> {
|
||||
self.current.iter()
|
||||
}
|
||||
|
||||
/// Get the current location for `value`. Fall back to the assignment map for non-diverted
|
||||
/// values
|
||||
pub fn get(&self, value: Value, locations: &ValueLocations) -> ValueLoc {
|
||||
match self.diversion(value) {
|
||||
Some(d) => d.to,
|
||||
None => locations[value],
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the current register location for `value`, or panic if `value` isn't in a register.
|
||||
pub fn reg(&self, value: Value, locations: &ValueLocations) -> RegUnit {
|
||||
self.get(value, locations).unwrap_reg()
|
||||
}
|
||||
|
||||
/// Get the current stack location for `value`, or panic if `value` isn't in a stack slot.
|
||||
pub fn stack(&self, value: Value, locations: &ValueLocations) -> StackSlot {
|
||||
self.get(value, locations).unwrap_stack()
|
||||
}
|
||||
|
||||
/// Record any kind of move.
|
||||
///
|
||||
/// The `from` location must match an existing `to` location, if any.
|
||||
pub fn divert(&mut self, value: Value, from: ValueLoc, to: ValueLoc) {
|
||||
debug_assert!(from.is_assigned() && to.is_assigned());
|
||||
match self.current.entry(value) {
|
||||
Entry::Occupied(mut e) => {
|
||||
// TODO: non-lexical lifetimes should allow removal of the scope and early return.
|
||||
{
|
||||
let d = e.get_mut();
|
||||
debug_assert_eq!(d.to, from, "Bad regmove chain for {}", value);
|
||||
if d.from != to {
|
||||
d.to = to;
|
||||
return;
|
||||
}
|
||||
}
|
||||
e.remove();
|
||||
}
|
||||
Entry::Vacant(e) => {
|
||||
e.insert(Diversion::new(from, to));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Record a register -> register move.
|
||||
pub fn regmove(&mut self, value: Value, from: RegUnit, to: RegUnit) {
|
||||
self.divert(value, ValueLoc::Reg(from), ValueLoc::Reg(to));
|
||||
}
|
||||
|
||||
/// Record a register -> stack move.
|
||||
pub fn regspill(&mut self, value: Value, from: RegUnit, to: StackSlot) {
|
||||
self.divert(value, ValueLoc::Reg(from), ValueLoc::Stack(to));
|
||||
}
|
||||
|
||||
/// Record a stack -> register move.
|
||||
pub fn regfill(&mut self, value: Value, from: StackSlot, to: RegUnit) {
|
||||
self.divert(value, ValueLoc::Stack(from), ValueLoc::Reg(to));
|
||||
}
|
||||
|
||||
/// Apply the effect of `inst`.
|
||||
///
|
||||
/// If `inst` is a `regmove`, `regfill`, or `regspill` instruction, update the diversions to
|
||||
/// match.
|
||||
pub fn apply(&mut self, inst: &InstructionData) {
|
||||
match *inst {
|
||||
InstructionData::RegMove {
|
||||
opcode: Opcode::Regmove,
|
||||
arg,
|
||||
src,
|
||||
dst,
|
||||
} => self.regmove(arg, src, dst),
|
||||
InstructionData::RegSpill {
|
||||
opcode: Opcode::Regspill,
|
||||
arg,
|
||||
src,
|
||||
dst,
|
||||
} => self.regspill(arg, src, dst),
|
||||
InstructionData::RegFill {
|
||||
opcode: Opcode::Regfill,
|
||||
arg,
|
||||
src,
|
||||
dst,
|
||||
} => self.regfill(arg, src, dst),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
/// Drop any recorded move for `value`.
|
||||
///
|
||||
/// Returns the `to` location of the removed diversion.
|
||||
pub fn remove(&mut self, value: Value) -> Option<ValueLoc> {
|
||||
self.current.remove(&value).map(|d| d.to)
|
||||
}
|
||||
|
||||
/// Return an object that can display the diversions.
|
||||
pub fn display<'a, R: Into<Option<&'a RegInfo>>>(&'a self, regs: R) -> DisplayDiversions<'a> {
|
||||
DisplayDiversions(self, regs.into())
|
||||
}
|
||||
}
|
||||
|
||||
/// Object that displays register diversions.
|
||||
pub struct DisplayDiversions<'a>(&'a RegDiversions, Option<&'a RegInfo>);
|
||||
|
||||
impl<'a> fmt::Display for DisplayDiversions<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{{")?;
|
||||
for (value, div) in self.0.iter() {
|
||||
write!(
|
||||
f,
|
||||
" {}: {} -> {}",
|
||||
value,
|
||||
div.from.display(self.1),
|
||||
div.to.display(self.1)
|
||||
)?
|
||||
}
|
||||
write!(f, " }}")
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::entity::EntityRef;
|
||||
use crate::ir::Value;
|
||||
|
||||
#[test]
|
||||
fn inserts() {
|
||||
let mut divs = RegDiversions::new();
|
||||
let v1 = Value::new(1);
|
||||
let v2 = Value::new(2);
|
||||
|
||||
divs.regmove(v1, 10, 12);
|
||||
assert_eq!(
|
||||
divs.diversion(v1),
|
||||
Some(&Diversion {
|
||||
from: ValueLoc::Reg(10),
|
||||
to: ValueLoc::Reg(12),
|
||||
})
|
||||
);
|
||||
assert_eq!(divs.diversion(v2), None);
|
||||
|
||||
divs.regmove(v1, 12, 11);
|
||||
assert_eq!(divs.diversion(v1).unwrap().to, ValueLoc::Reg(11));
|
||||
divs.regmove(v1, 11, 10);
|
||||
assert_eq!(divs.diversion(v1), None);
|
||||
}
|
||||
}
|
||||
345
cranelift/codegen/src/regalloc/live_value_tracker.rs
Normal file
345
cranelift/codegen/src/regalloc/live_value_tracker.rs
Normal file
@@ -0,0 +1,345 @@
|
||||
//! Track which values are live in an EBB with instruction granularity.
|
||||
//!
|
||||
//! The `LiveValueTracker` keeps track of the set of live SSA values at each instruction in an EBB.
|
||||
//! The sets of live values are computed on the fly as the tracker is moved from instruction to
|
||||
//! instruction, starting at the EBB header.
|
||||
|
||||
use crate::dominator_tree::DominatorTree;
|
||||
use crate::entity::{EntityList, ListPool};
|
||||
use crate::fx::FxHashMap;
|
||||
use crate::ir::{DataFlowGraph, Ebb, ExpandedProgramPoint, Inst, Layout, Value};
|
||||
use crate::partition_slice::partition_slice;
|
||||
use crate::regalloc::affinity::Affinity;
|
||||
use crate::regalloc::liveness::Liveness;
|
||||
use crate::regalloc::liverange::LiveRange;
|
||||
use std::vec::Vec;
|
||||
|
||||
type ValueList = EntityList<Value>;
|
||||
|
||||
/// Compute and track live values throughout an EBB.
|
||||
pub struct LiveValueTracker {
|
||||
/// The set of values that are live at the current program point.
|
||||
live: LiveValueVec,
|
||||
|
||||
/// Saved set of live values for every jump and branch that can potentially be an immediate
|
||||
/// dominator of an EBB.
|
||||
///
|
||||
/// This is the set of values that are live *before* the branch.
|
||||
idom_sets: FxHashMap<Inst, ValueList>,
|
||||
|
||||
/// Memory pool for the live sets.
|
||||
idom_pool: ListPool<Value>,
|
||||
}
|
||||
|
||||
/// Information about a value that is live at the current program point.
|
||||
#[derive(Debug)]
|
||||
pub struct LiveValue {
|
||||
/// The live value.
|
||||
pub value: Value,
|
||||
|
||||
/// The local ending point of the live range in the current EBB, as returned by
|
||||
/// `LiveRange::def_local_end()` or `LiveRange::livein_local_end()`.
|
||||
pub endpoint: Inst,
|
||||
|
||||
/// The affinity of the value as represented in its `LiveRange`.
|
||||
///
|
||||
/// This value is simply a copy of the affinity stored in the live range. We copy it because
|
||||
/// almost all users of `LiveValue` need to look at it.
|
||||
pub affinity: Affinity,
|
||||
|
||||
/// The live range for this value never leaves its EBB.
|
||||
pub is_local: bool,
|
||||
|
||||
/// This value is dead - the live range ends immediately.
|
||||
pub is_dead: bool,
|
||||
}
|
||||
|
||||
struct LiveValueVec {
|
||||
/// The set of values that are live at the current program point.
|
||||
values: Vec<LiveValue>,
|
||||
|
||||
/// How many values at the front of `values` are known to be live after `inst`?
|
||||
///
|
||||
/// This is used to pass a much smaller slice to `partition_slice` when its called a second
|
||||
/// time for the same instruction.
|
||||
live_prefix: Option<(Inst, usize)>,
|
||||
}
|
||||
|
||||
impl LiveValueVec {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
values: Vec::new(),
|
||||
live_prefix: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a new live value to `values`. Copy some properties from `lr`.
|
||||
fn push(&mut self, value: Value, endpoint: Inst, lr: &LiveRange) {
|
||||
self.values.push(LiveValue {
|
||||
value,
|
||||
endpoint,
|
||||
affinity: lr.affinity,
|
||||
is_local: lr.is_local(),
|
||||
is_dead: lr.is_dead(),
|
||||
});
|
||||
}
|
||||
|
||||
/// Remove all elements.
|
||||
fn clear(&mut self) {
|
||||
self.values.clear();
|
||||
self.live_prefix = None;
|
||||
}
|
||||
|
||||
/// Make sure that the values killed by `next_inst` are moved to the end of the `values`
|
||||
/// vector.
|
||||
///
|
||||
/// Returns the number of values that will be live after `next_inst`.
|
||||
fn live_after(&mut self, next_inst: Inst) -> usize {
|
||||
// How many values at the front of the vector are already known to survive `next_inst`?
|
||||
// We don't need to pass this prefix to `partition_slice()`
|
||||
let keep = match self.live_prefix {
|
||||
Some((i, prefix)) if i == next_inst => prefix,
|
||||
_ => 0,
|
||||
};
|
||||
|
||||
// Move the remaining surviving values to the front partition of the vector.
|
||||
let prefix = keep + partition_slice(&mut self.values[keep..], |v| v.endpoint != next_inst);
|
||||
|
||||
// Remember the new prefix length in case we get called again for the same `next_inst`.
|
||||
self.live_prefix = Some((next_inst, prefix));
|
||||
prefix
|
||||
}
|
||||
|
||||
/// Remove the values killed by `next_inst`.
|
||||
fn remove_kill_values(&mut self, next_inst: Inst) {
|
||||
let keep = self.live_after(next_inst);
|
||||
self.values.truncate(keep);
|
||||
}
|
||||
|
||||
/// Remove any dead values.
|
||||
fn remove_dead_values(&mut self) {
|
||||
self.values.retain(|v| !v.is_dead);
|
||||
self.live_prefix = None;
|
||||
}
|
||||
}
|
||||
|
||||
impl LiveValueTracker {
|
||||
/// Create a new blank tracker.
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
live: LiveValueVec::new(),
|
||||
idom_sets: FxHashMap(),
|
||||
idom_pool: ListPool::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Clear all cached information.
|
||||
pub fn clear(&mut self) {
|
||||
self.live.clear();
|
||||
self.idom_sets.clear();
|
||||
self.idom_pool.clear();
|
||||
}
|
||||
|
||||
/// Get the set of currently live values.
|
||||
///
|
||||
/// Between calls to `process_inst()` and `drop_dead()`, this includes both values killed and
|
||||
/// defined by the current instruction.
|
||||
pub fn live(&self) -> &[LiveValue] {
|
||||
&self.live.values
|
||||
}
|
||||
|
||||
/// Get a mutable set of currently live values.
|
||||
///
|
||||
/// Use with care and don't move entries around.
|
||||
pub fn live_mut(&mut self) -> &mut [LiveValue] {
|
||||
&mut self.live.values
|
||||
}
|
||||
|
||||
/// Move the current position to the top of `ebb`.
|
||||
///
|
||||
/// This depends on the stored live value set at `ebb`'s immediate dominator, so that must have
|
||||
/// been visited first.
|
||||
///
|
||||
/// Returns `(liveins, args)` as a pair of slices. The first slice is the set of live-in values
|
||||
/// from the immediate dominator. The second slice is the set of `ebb` parameters.
|
||||
///
|
||||
/// Dead parameters with no uses are included in `args`. Call `drop_dead_args()` to remove them.
|
||||
pub fn ebb_top(
|
||||
&mut self,
|
||||
ebb: Ebb,
|
||||
dfg: &DataFlowGraph,
|
||||
liveness: &Liveness,
|
||||
layout: &Layout,
|
||||
domtree: &DominatorTree,
|
||||
) -> (&[LiveValue], &[LiveValue]) {
|
||||
// Start over, compute the set of live values at the top of the EBB from two sources:
|
||||
//
|
||||
// 1. Values that were live before `ebb`'s immediate dominator, filtered for those that are
|
||||
// actually live-in.
|
||||
// 2. Arguments to `ebb` that are not dead.
|
||||
//
|
||||
self.live.clear();
|
||||
|
||||
// Compute the live-in values. Start by filtering the set of values that were live before
|
||||
// the immediate dominator. Just use the empty set if there's no immediate dominator (i.e.,
|
||||
// the entry block or an unreachable block).
|
||||
if let Some(idom) = domtree.idom(ebb) {
|
||||
// If the immediate dominator exits, we must have a stored list for it. This is a
|
||||
// requirement to the order EBBs are visited: All dominators must have been processed
|
||||
// before the current EBB.
|
||||
let idom_live_list = self
|
||||
.idom_sets
|
||||
.get(&idom)
|
||||
.expect("No stored live set for dominator");
|
||||
let ctx = liveness.context(layout);
|
||||
// Get just the values that are live-in to `ebb`.
|
||||
for &value in idom_live_list.as_slice(&self.idom_pool) {
|
||||
let lr = liveness
|
||||
.get(value)
|
||||
.expect("Immediate dominator value has no live range");
|
||||
|
||||
// Check if this value is live-in here.
|
||||
if let Some(endpoint) = lr.livein_local_end(ebb, ctx) {
|
||||
self.live.push(value, endpoint, lr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Now add all the live parameters to `ebb`.
|
||||
let first_arg = self.live.values.len();
|
||||
for &value in dfg.ebb_params(ebb) {
|
||||
let lr = &liveness[value];
|
||||
debug_assert_eq!(lr.def(), ebb.into());
|
||||
match lr.def_local_end().into() {
|
||||
ExpandedProgramPoint::Inst(endpoint) => {
|
||||
self.live.push(value, endpoint, lr);
|
||||
}
|
||||
ExpandedProgramPoint::Ebb(local_ebb) => {
|
||||
// This is a dead EBB parameter which is not even live into the first
|
||||
// instruction in the EBB.
|
||||
debug_assert_eq!(
|
||||
local_ebb, ebb,
|
||||
"EBB parameter live range ends at wrong EBB header"
|
||||
);
|
||||
// Give this value a fake endpoint that is the first instruction in the EBB.
|
||||
// We expect it to be removed by calling `drop_dead_args()`.
|
||||
self.live
|
||||
.push(value, layout.first_inst(ebb).expect("Empty EBB"), lr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.live.values.split_at(first_arg)
|
||||
}
|
||||
|
||||
/// Prepare to move past `inst`.
|
||||
///
|
||||
/// Determine the set of already live values that are killed by `inst`, and add the new defined
|
||||
/// values to the tracked set.
|
||||
///
|
||||
/// Returns `(throughs, kills, defs)` as a tuple of slices:
|
||||
///
|
||||
/// 1. The `throughs` slice is the set of live-through values that are neither defined nor
|
||||
/// killed by the instruction.
|
||||
/// 2. The `kills` slice is the set of values that were live before the instruction and are
|
||||
/// killed at the instruction. This does not include dead defs.
|
||||
/// 3. The `defs` slice is guaranteed to be in the same order as `inst`'s results, and includes
|
||||
/// dead defines.
|
||||
///
|
||||
/// The order of `throughs` and `kills` is arbitrary.
|
||||
///
|
||||
/// The `drop_dead()` method must be called next to actually remove the dead values from the
|
||||
/// tracked set after the two returned slices are no longer needed.
|
||||
pub fn process_inst(
|
||||
&mut self,
|
||||
inst: Inst,
|
||||
dfg: &DataFlowGraph,
|
||||
liveness: &Liveness,
|
||||
) -> (&[LiveValue], &[LiveValue], &[LiveValue]) {
|
||||
// Save a copy of the live values before any branches or jumps that could be somebody's
|
||||
// immediate dominator.
|
||||
if dfg[inst].opcode().is_branch() {
|
||||
self.save_idom_live_set(inst);
|
||||
}
|
||||
|
||||
// Move killed values to the end of the vector.
|
||||
// Don't remove them yet, `drop_dead()` will do that.
|
||||
let first_kill = self.live.live_after(inst);
|
||||
|
||||
// Add the values defined by `inst`.
|
||||
let first_def = self.live.values.len();
|
||||
for &value in dfg.inst_results(inst) {
|
||||
let lr = &liveness[value];
|
||||
debug_assert_eq!(lr.def(), inst.into());
|
||||
match lr.def_local_end().into() {
|
||||
ExpandedProgramPoint::Inst(endpoint) => {
|
||||
self.live.push(value, endpoint, lr);
|
||||
}
|
||||
ExpandedProgramPoint::Ebb(ebb) => {
|
||||
panic!("Instruction result live range can't end at {}", ebb);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(
|
||||
&self.live.values[0..first_kill],
|
||||
&self.live.values[first_kill..first_def],
|
||||
&self.live.values[first_def..],
|
||||
)
|
||||
}
|
||||
|
||||
/// Prepare to move past a ghost instruction.
|
||||
///
|
||||
/// This is like `process_inst`, except any defs are ignored.
|
||||
///
|
||||
/// Returns `(throughs, kills)`.
|
||||
pub fn process_ghost(&mut self, inst: Inst) -> (&[LiveValue], &[LiveValue]) {
|
||||
let first_kill = self.live.live_after(inst);
|
||||
self.live.values.as_slice().split_at(first_kill)
|
||||
}
|
||||
|
||||
/// Drop the values that are now dead after moving past `inst`.
|
||||
///
|
||||
/// This removes both live values that were killed by `inst` and dead defines on `inst` itself.
|
||||
///
|
||||
/// This must be called after `process_inst(inst)` and before proceeding to the next
|
||||
/// instruction.
|
||||
pub fn drop_dead(&mut self, inst: Inst) {
|
||||
// Remove both live values that were killed by `inst` and dead defines from `inst`.
|
||||
self.live.remove_kill_values(inst);
|
||||
}
|
||||
|
||||
/// Drop any values that are marked as `is_dead`.
|
||||
///
|
||||
/// Use this after calling `ebb_top` to clean out dead EBB parameters.
|
||||
pub fn drop_dead_params(&mut self) {
|
||||
self.live.remove_dead_values();
|
||||
}
|
||||
|
||||
/// Process new spills.
|
||||
///
|
||||
/// Any values where `f` returns true are spilled and will be treated as if their affinity was
|
||||
/// `Stack`.
|
||||
pub fn process_spills<F>(&mut self, mut f: F)
|
||||
where
|
||||
F: FnMut(Value) -> bool,
|
||||
{
|
||||
for lv in &mut self.live.values {
|
||||
if f(lv.value) {
|
||||
lv.affinity = Affinity::Stack;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Save the current set of live values so it is associated with `idom`.
|
||||
fn save_idom_live_set(&mut self, idom: Inst) {
|
||||
let values = self.live.values.iter().map(|lv| lv.value);
|
||||
let pool = &mut self.idom_pool;
|
||||
// If there already is a set saved for `idom`, just keep it.
|
||||
self.idom_sets.entry(idom).or_insert_with(|| {
|
||||
let mut list = ValueList::default();
|
||||
list.extend(values, pool);
|
||||
list
|
||||
});
|
||||
}
|
||||
}
|
||||
460
cranelift/codegen/src/regalloc/liveness.rs
Normal file
460
cranelift/codegen/src/regalloc/liveness.rs
Normal file
@@ -0,0 +1,460 @@
|
||||
//! Liveness analysis for SSA values.
|
||||
//!
|
||||
//! This module computes the live range of all the SSA values in a function and produces a
|
||||
//! `LiveRange` instance for each.
|
||||
//!
|
||||
//!
|
||||
//! # Liveness consumers
|
||||
//!
|
||||
//! The primary consumer of the liveness analysis is the SSA coloring pass which goes through each
|
||||
//! EBB and assigns a register to the defined values. This algorithm needs to maintain a set of the
|
||||
//! currently live values as it is iterating down the instructions in the EBB. It asks the
|
||||
//! following questions:
|
||||
//!
|
||||
//! - What is the set of live values at the entry to the EBB?
|
||||
//! - When moving past a use of a value, is that value still alive in the EBB, or was that the last
|
||||
//! use?
|
||||
//! - When moving past a branch, which of the live values are still live below the branch?
|
||||
//!
|
||||
//! The set of `LiveRange` instances can answer these questions through their `def_local_end` and
|
||||
//! `livein_local_end` queries. The coloring algorithm visits EBBs in a topological order of the
|
||||
//! dominator tree, so it can compute the set of live values at the beginning of an EBB by starting
|
||||
//! from the set of live values at the dominating branch instruction and filtering it with
|
||||
//! `livein_local_end`. These sets do not need to be stored in the liveness analysis.
|
||||
//!
|
||||
//! The secondary consumer of the liveness analysis is the spilling pass which needs to count the
|
||||
//! number of live values at every program point and insert spill code until the number of
|
||||
//! registers needed is small enough.
|
||||
//!
|
||||
//!
|
||||
//! # Alternative algorithms
|
||||
//!
|
||||
//! A number of different liveness analysis algorithms exist, so it is worthwhile to look at a few
|
||||
//! alternatives.
|
||||
//!
|
||||
//! ## Data-flow equations
|
||||
//!
|
||||
//! The classic *live variables analysis* that you will find in all compiler books from the
|
||||
//! previous century does not depend on SSA form. It is typically implemented by iteratively
|
||||
//! solving data-flow equations on bit-vectors of variables. The result is a live-out bit-vector of
|
||||
//! variables for every basic block in the program.
|
||||
//!
|
||||
//! This algorithm has some disadvantages that makes us look elsewhere:
|
||||
//!
|
||||
//! - Quadratic memory use. We need a bit per variable per basic block in the function.
|
||||
//! - Dense representation of sparse data. In practice, the majority of SSA values never leave
|
||||
//! their basic block, and those that do span basic blocks rarely span a large number of basic
|
||||
//! blocks. This makes the data stored in the bitvectors quite sparse.
|
||||
//! - Traditionally, the data-flow equations were solved for real program *variables* which does
|
||||
//! not include temporaries used in evaluating expressions. We have an SSA form program which
|
||||
//! blurs the distinction between temporaries and variables. This makes the quadratic memory
|
||||
//! problem worse because there are many more SSA values than there was variables in the original
|
||||
//! program, and we don't know a priori which SSA values leave their basic block.
|
||||
//! - Missing last-use information. For values that are not live-out of a basic block, we would
|
||||
//! need to store information about the last use in the block somewhere. LLVM stores this
|
||||
//! information as a 'kill bit' on the last use in the IR. Maintaining these kill bits has been a
|
||||
//! source of problems for LLVM's register allocator.
|
||||
//!
|
||||
//! Data-flow equations can detect when a variable is used uninitialized, and they can handle
|
||||
//! multiple definitions of the same variable. We don't need this generality since we already have
|
||||
//! a program in SSA form.
|
||||
//!
|
||||
//! ## LLVM's liveness analysis
|
||||
//!
|
||||
//! LLVM's register allocator computes liveness per *virtual register*, where a virtual register is
|
||||
//! a disjoint union of related SSA values that should be assigned to the same physical register.
|
||||
//! It uses a compact data structure very similar to our `LiveRange`. The important difference is
|
||||
//! that Cranelift's `LiveRange` only describes a single SSA value, while LLVM's `LiveInterval`
|
||||
//! describes the live range of a virtual register *and* which one of the related SSA values is
|
||||
//! live at any given program point.
|
||||
//!
|
||||
//! LLVM computes the live range of each virtual register independently by using the use-def chains
|
||||
//! that are baked into its IR. The algorithm for a single virtual register is:
|
||||
//!
|
||||
//! 1. Initialize the live range with a single-instruction snippet of liveness at each def, using
|
||||
//! the def-chain. This does not include any phi-values.
|
||||
//! 2. Go through the virtual register's use chain and perform the following steps at each use:
|
||||
//! 3. Perform an exhaustive depth-first traversal up the CFG from the use. Look for basic blocks
|
||||
//! that already contain some liveness and extend the last live SSA value in the block to be
|
||||
//! live-out. Also build a list of new basic blocks where the register needs to be live-in.
|
||||
//! 4. Iteratively propagate live-out SSA values to the new live-in blocks. This may require new
|
||||
//! PHI values to be created when different SSA values can reach the same block.
|
||||
//!
|
||||
//! The iterative SSA form reconstruction can be skipped if the depth-first search only encountered
|
||||
//! one SSA value.
|
||||
//!
|
||||
//! This algorithm has some advantages compared to the data-flow equations:
|
||||
//!
|
||||
//! - The live ranges of local virtual registers are computed very quickly without ever traversing
|
||||
//! the CFG. The memory needed to store these live ranges is independent of the number of basic
|
||||
//! blocks in the program.
|
||||
//! - The time to compute the live range of a global virtual register is proportional to the number
|
||||
//! of basic blocks covered. Many virtual registers only cover a few blocks, even in very large
|
||||
//! functions.
|
||||
//! - A single live range can be recomputed after making modifications to the IR. No global
|
||||
//! algorithm is necessary. This feature depends on having use-def chains for virtual registers
|
||||
//! which Cranelift doesn't.
|
||||
//!
|
||||
//! Cranelift uses a very similar data structures and algorithms to LLVM, with the important
|
||||
//! difference that live ranges are computed per SSA value instead of per virtual register, and the
|
||||
//! uses in Cranelift IR refers to SSA values instead of virtual registers. This means that
|
||||
//! Cranelift can skip the last step of reconstructing SSA form for the virtual register uses.
|
||||
//!
|
||||
//! ## Fast Liveness Checking for SSA-Form Programs
|
||||
//!
|
||||
//! A liveness analysis that is often brought up in the context of SSA-based register allocation
|
||||
//! was presented at CGO 2008:
|
||||
//!
|
||||
//! > Boissinot, B., Hack, S., Grund, D., de Dinechin, B. D., & Rastello, F. (2008). *Fast Liveness
|
||||
//! Checking for SSA-Form Programs.* CGO.
|
||||
//!
|
||||
//! This analysis uses a global pre-computation that only depends on the CFG of the function. It
|
||||
//! then allows liveness queries for any (value, program point) pair. Each query traverses the use
|
||||
//! chain of the value and performs lookups in the precomputed bit-vectors.
|
||||
//!
|
||||
//! I did not seriously consider this analysis for Cranelift because:
|
||||
//!
|
||||
//! - It depends critically on use chains which Cranelift doesn't have.
|
||||
//! - Popular variables like the `this` pointer in a C++ method can have very large use chains.
|
||||
//! Traversing such a long use chain on every liveness lookup has the potential for some nasty
|
||||
//! quadratic behavior in unfortunate cases.
|
||||
//! - It says "fast" in the title, but the paper only claims to be 16% faster than a data-flow
|
||||
//! based approach, which isn't that impressive.
|
||||
//!
|
||||
//! Nevertheless, the property of only depending in the CFG structure is very useful. If Cranelift
|
||||
//! gains use chains, this approach would be worth a proper evaluation.
|
||||
//!
|
||||
//!
|
||||
//! # Cranelift's liveness analysis
|
||||
//!
|
||||
//! The algorithm implemented in this module is similar to LLVM's with these differences:
|
||||
//!
|
||||
//! - The `LiveRange` data structure describes the liveness of a single SSA value, not a virtual
|
||||
//! register.
|
||||
//! - Instructions in Cranelift IR contains references to SSA values, not virtual registers.
|
||||
//! - All live ranges are computed in one traversal of the program. Cranelift doesn't have use
|
||||
//! chains, so it is not possible to compute the live range for a single SSA value independently.
|
||||
//!
|
||||
//! The liveness computation visits all instructions in the program. The order is not important for
|
||||
//! the algorithm to be correct. At each instruction, the used values are examined.
|
||||
//!
|
||||
//! - The first time a value is encountered, its live range is constructed as a dead live range
|
||||
//! containing only the defining program point.
|
||||
//! - The local interval of the value's live range is extended so it reaches the use. This may
|
||||
//! require creating a new live-in local interval for the EBB.
|
||||
//! - If the live range became live-in to the EBB, add the EBB to a work-list.
|
||||
//! - While the work-list is non-empty pop a live-in EBB and repeat the two steps above, using each
|
||||
//! of the live-in EBB's CFG predecessor instructions as a 'use'.
|
||||
//!
|
||||
//! The effect of this algorithm is to extend the live range of each to reach uses as they are
|
||||
//! visited. No data about each value beyond the live range is needed between visiting uses, so
|
||||
//! nothing is lost by computing the live range of all values simultaneously.
|
||||
//!
|
||||
//! ## Cache efficiency of Cranelift vs LLVM
|
||||
//!
|
||||
//! Since LLVM computes the complete live range of a virtual register in one go, it can keep the
|
||||
//! whole `LiveInterval` for the register in L1 cache. Since it is visiting the instructions in use
|
||||
//! chain order, some cache thrashing can occur as a result of pulling instructions into cache
|
||||
//! somewhat chaotically.
|
||||
//!
|
||||
//! Cranelift uses a transposed algorithm, visiting instructions in order. This means that each
|
||||
//! instruction is brought into cache only once, and it is likely that the other instructions on
|
||||
//! the same cache line will be visited before the line is evicted.
|
||||
//!
|
||||
//! Cranelift's problem is that the `LiveRange` structs are visited many times and not always
|
||||
//! regularly. We should strive to make the `LiveRange` struct as small as possible such that
|
||||
//! multiple related values can live on the same cache line.
|
||||
//!
|
||||
//! - Local values should fit in a 16-byte `LiveRange` struct or smaller. The current
|
||||
//! implementation contains a 24-byte `Vec` object and a redundant `value` member pushing the
|
||||
//! size to 32 bytes.
|
||||
//! - Related values should be stored on the same cache line. The current sparse set implementation
|
||||
//! does a decent job of that.
|
||||
//! - For global values, the list of live-in intervals is very likely to fit on a single cache
|
||||
//! line. These lists are very likely to be found in L2 cache at least.
|
||||
//!
|
||||
//! There is some room for improvement.
|
||||
|
||||
use crate::entity::SparseMap;
|
||||
use crate::flowgraph::{BasicBlock, ControlFlowGraph};
|
||||
use crate::ir::dfg::ValueDef;
|
||||
use crate::ir::{Ebb, Function, Inst, Layout, ProgramPoint, Value};
|
||||
use crate::isa::{EncInfo, OperandConstraint, TargetIsa};
|
||||
use crate::regalloc::affinity::Affinity;
|
||||
use crate::regalloc::liverange::{LiveRange, LiveRangeContext, LiveRangeForest};
|
||||
use crate::timing;
|
||||
use core::mem;
|
||||
use core::ops::Index;
|
||||
use std::vec::Vec;
|
||||
|
||||
/// A set of live ranges, indexed by value number.
|
||||
type LiveRangeSet = SparseMap<Value, LiveRange>;
|
||||
|
||||
/// Get a mutable reference to the live range for `value`.
|
||||
/// Create it if necessary.
|
||||
fn get_or_create<'a>(
|
||||
lrset: &'a mut LiveRangeSet,
|
||||
value: Value,
|
||||
isa: &TargetIsa,
|
||||
func: &Function,
|
||||
encinfo: &EncInfo,
|
||||
) -> &'a mut LiveRange {
|
||||
// It would be better to use `get_mut()` here, but that leads to borrow checker fighting
|
||||
// which can probably only be resolved by non-lexical lifetimes.
|
||||
// https://github.com/rust-lang/rfcs/issues/811
|
||||
if lrset.get(value).is_none() {
|
||||
// Create a live range for value. We need the program point that defines it.
|
||||
let def;
|
||||
let affinity;
|
||||
match func.dfg.value_def(value) {
|
||||
ValueDef::Result(inst, rnum) => {
|
||||
def = inst.into();
|
||||
// Initialize the affinity from the defining instruction's result constraints.
|
||||
// Don't do this for call return values which are always tied to a single register.
|
||||
affinity = encinfo
|
||||
.operand_constraints(func.encodings[inst])
|
||||
.and_then(|rc| rc.outs.get(rnum))
|
||||
.map(Affinity::new)
|
||||
.or_else(|| {
|
||||
// If this is a call, get the return value affinity.
|
||||
func.dfg
|
||||
.call_signature(inst)
|
||||
.map(|sig| Affinity::abi(&func.dfg.signatures[sig].returns[rnum], isa))
|
||||
})
|
||||
.unwrap_or_default();
|
||||
}
|
||||
ValueDef::Param(ebb, num) => {
|
||||
def = ebb.into();
|
||||
if func.layout.entry_block() == Some(ebb) {
|
||||
// The affinity for entry block parameters can be inferred from the function
|
||||
// signature.
|
||||
affinity = Affinity::abi(&func.signature.params[num], isa);
|
||||
} else {
|
||||
// Give normal EBB parameters a register affinity matching their type.
|
||||
let rc = isa.regclass_for_abi_type(func.dfg.value_type(value));
|
||||
affinity = Affinity::Reg(rc.into());
|
||||
}
|
||||
}
|
||||
};
|
||||
lrset.insert(LiveRange::new(value, def, affinity));
|
||||
}
|
||||
lrset.get_mut(value).unwrap()
|
||||
}
|
||||
|
||||
/// Extend the live range for `value` so it reaches `to` which must live in `ebb`.
|
||||
fn extend_to_use(
|
||||
lr: &mut LiveRange,
|
||||
ebb: Ebb,
|
||||
to: Inst,
|
||||
worklist: &mut Vec<Ebb>,
|
||||
func: &Function,
|
||||
cfg: &ControlFlowGraph,
|
||||
forest: &mut LiveRangeForest,
|
||||
) {
|
||||
// This is our scratch working space, and we'll leave it empty when we return.
|
||||
debug_assert!(worklist.is_empty());
|
||||
|
||||
// Extend the range locally in `ebb`.
|
||||
// If there already was a live interval in that block, we're done.
|
||||
if lr.extend_in_ebb(ebb, to, &func.layout, forest) {
|
||||
worklist.push(ebb);
|
||||
}
|
||||
|
||||
// The work list contains those EBBs where we have learned that the value needs to be
|
||||
// live-in.
|
||||
//
|
||||
// This algorithm becomes a depth-first traversal up the CFG, enumerating all paths through the
|
||||
// CFG from the existing live range to `ebb`.
|
||||
//
|
||||
// Extend the live range as we go. The live range itself also serves as a visited set since
|
||||
// `extend_in_ebb` will never return true twice for the same EBB.
|
||||
//
|
||||
while let Some(livein) = worklist.pop() {
|
||||
// We've learned that the value needs to be live-in to the `livein` EBB.
|
||||
// Make sure it is also live at all predecessor branches to `livein`.
|
||||
for BasicBlock {
|
||||
ebb: pred,
|
||||
inst: branch,
|
||||
} in cfg.pred_iter(livein)
|
||||
{
|
||||
if lr.extend_in_ebb(pred, branch, &func.layout, forest) {
|
||||
// This predecessor EBB also became live-in. We need to process it later.
|
||||
worklist.push(pred);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Liveness analysis for a function.
|
||||
///
|
||||
/// Compute a live range for every SSA value used in the function.
|
||||
pub struct Liveness {
|
||||
/// The live ranges that have been computed so far.
|
||||
ranges: LiveRangeSet,
|
||||
|
||||
/// Memory pool for the live ranges.
|
||||
forest: LiveRangeForest,
|
||||
|
||||
/// Working space for the `extend_to_use` algorithm.
|
||||
/// This vector is always empty, except for inside that function.
|
||||
/// It lives here to avoid repeated allocation of scratch memory.
|
||||
worklist: Vec<Ebb>,
|
||||
}
|
||||
|
||||
impl Liveness {
|
||||
/// Create a new empty liveness analysis.
|
||||
///
|
||||
/// The memory allocated for this analysis can be reused for multiple functions. Use the
|
||||
/// `compute` method to actually runs the analysis for a function.
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
ranges: LiveRangeSet::new(),
|
||||
forest: LiveRangeForest::new(),
|
||||
worklist: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get a context needed for working with a `LiveRange`.
|
||||
pub fn context<'a>(&'a self, layout: &'a Layout) -> LiveRangeContext<'a, Layout> {
|
||||
LiveRangeContext::new(layout, &self.forest)
|
||||
}
|
||||
|
||||
/// Clear all data structures in this liveness analysis.
|
||||
pub fn clear(&mut self) {
|
||||
self.ranges.clear();
|
||||
self.forest.clear();
|
||||
self.worklist.clear();
|
||||
}
|
||||
|
||||
/// Get the live range for `value`, if it exists.
|
||||
pub fn get(&self, value: Value) -> Option<&LiveRange> {
|
||||
self.ranges.get(value)
|
||||
}
|
||||
|
||||
/// Create a new live range for `value`.
|
||||
///
|
||||
/// The new live range will be defined at `def` with no extent, like a dead value.
|
||||
///
|
||||
/// This asserts that `value` does not have an existing live range.
|
||||
pub fn create_dead<PP>(&mut self, value: Value, def: PP, affinity: Affinity)
|
||||
where
|
||||
PP: Into<ProgramPoint>,
|
||||
{
|
||||
let old = self
|
||||
.ranges
|
||||
.insert(LiveRange::new(value, def.into(), affinity));
|
||||
debug_assert!(old.is_none(), "{} already has a live range", value);
|
||||
}
|
||||
|
||||
/// Move the definition of `value` to `def`.
|
||||
///
|
||||
/// The old and new def points must be in the same EBB, and before the end of the live range.
|
||||
pub fn move_def_locally<PP>(&mut self, value: Value, def: PP)
|
||||
where
|
||||
PP: Into<ProgramPoint>,
|
||||
{
|
||||
let lr = self.ranges.get_mut(value).expect("Value has no live range");
|
||||
lr.move_def_locally(def.into());
|
||||
}
|
||||
|
||||
/// Locally extend the live range for `value` to reach `user`.
|
||||
///
|
||||
/// It is assumed the `value` is already live before `user` in `ebb`.
|
||||
///
|
||||
/// Returns a mutable reference to the value's affinity in case that also needs to be updated.
|
||||
pub fn extend_locally(
|
||||
&mut self,
|
||||
value: Value,
|
||||
ebb: Ebb,
|
||||
user: Inst,
|
||||
layout: &Layout,
|
||||
) -> &mut Affinity {
|
||||
debug_assert_eq!(Some(ebb), layout.inst_ebb(user));
|
||||
let lr = self.ranges.get_mut(value).expect("Value has no live range");
|
||||
let livein = lr.extend_in_ebb(ebb, user, layout, &mut self.forest);
|
||||
debug_assert!(!livein, "{} should already be live in {}", value, ebb);
|
||||
&mut lr.affinity
|
||||
}
|
||||
|
||||
/// Change the affinity of `value` to `Stack` and return the previous affinity.
|
||||
pub fn spill(&mut self, value: Value) -> Affinity {
|
||||
let lr = self.ranges.get_mut(value).expect("Value has no live range");
|
||||
mem::replace(&mut lr.affinity, Affinity::Stack)
|
||||
}
|
||||
|
||||
/// Compute the live ranges of all SSA values used in `func`.
|
||||
/// This clears out any existing analysis stored in this data structure.
|
||||
pub fn compute(&mut self, isa: &TargetIsa, func: &mut Function, cfg: &ControlFlowGraph) {
|
||||
let _tt = timing::ra_liveness();
|
||||
self.ranges.clear();
|
||||
|
||||
// Get ISA data structures used for computing live range affinities.
|
||||
let encinfo = isa.encoding_info();
|
||||
let reginfo = isa.register_info();
|
||||
|
||||
// The liveness computation needs to visit all uses, but the order doesn't matter.
|
||||
// TODO: Perhaps this traversal of the function could be combined with a dead code
|
||||
// elimination pass if we visit a post-order of the dominator tree?
|
||||
// TODO: Resolve value aliases while we're visiting instructions?
|
||||
for ebb in func.layout.ebbs() {
|
||||
// Make sure we have created live ranges for dead EBB parameters.
|
||||
// TODO: If these parameters are really dead, we could remove them, except for the
|
||||
// entry block which must match the function signature.
|
||||
for &arg in func.dfg.ebb_params(ebb) {
|
||||
get_or_create(&mut self.ranges, arg, isa, func, &encinfo);
|
||||
}
|
||||
|
||||
for inst in func.layout.ebb_insts(ebb) {
|
||||
// Eliminate all value aliases, they would confuse the register allocator.
|
||||
func.dfg.resolve_aliases_in_arguments(inst);
|
||||
|
||||
// Make sure we have created live ranges for dead defs.
|
||||
// TODO: When we implement DCE, we can use the absence of a live range to indicate
|
||||
// an unused value.
|
||||
for &def in func.dfg.inst_results(inst) {
|
||||
get_or_create(&mut self.ranges, def, isa, func, &encinfo);
|
||||
}
|
||||
|
||||
// Iterator of constraints, one per value operand.
|
||||
let encoding = func.encodings[inst];
|
||||
let operand_constraint_slice: &[OperandConstraint] =
|
||||
encinfo.operand_constraints(encoding).map_or(&[], |c| c.ins);
|
||||
let mut operand_constraints = operand_constraint_slice.iter();
|
||||
|
||||
for &arg in func.dfg.inst_args(inst) {
|
||||
// Get the live range, create it as a dead range if necessary.
|
||||
let lr = get_or_create(&mut self.ranges, arg, isa, func, &encinfo);
|
||||
|
||||
// Extend the live range to reach this use.
|
||||
extend_to_use(
|
||||
lr,
|
||||
ebb,
|
||||
inst,
|
||||
&mut self.worklist,
|
||||
func,
|
||||
cfg,
|
||||
&mut self.forest,
|
||||
);
|
||||
|
||||
// Apply operand constraint, ignoring any variable arguments after the fixed
|
||||
// operands described by `operand_constraints`. Variable arguments are either
|
||||
// EBB arguments or call/return ABI arguments.
|
||||
if let Some(constraint) = operand_constraints.next() {
|
||||
lr.affinity.merge(constraint, ®info);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Index<Value> for Liveness {
|
||||
type Output = LiveRange;
|
||||
|
||||
fn index(&self, index: Value) -> &LiveRange {
|
||||
match self.ranges.get(index) {
|
||||
Some(lr) => lr,
|
||||
None => panic!("{} has no live range", index),
|
||||
}
|
||||
}
|
||||
}
|
||||
745
cranelift/codegen/src/regalloc/liverange.rs
Normal file
745
cranelift/codegen/src/regalloc/liverange.rs
Normal file
@@ -0,0 +1,745 @@
|
||||
//! Data structure representing the live range of an SSA value.
|
||||
//!
|
||||
//! Live ranges are tracked per SSA value, not per variable or virtual register. The live range of
|
||||
//! an SSA value begins where it is defined and extends to all program points where the value is
|
||||
//! still needed.
|
||||
//!
|
||||
//! # Local Live Ranges
|
||||
//!
|
||||
//! Inside a single extended basic block, the live range of a value is always an interval between
|
||||
//! two program points (if the value is live in the EBB at all). The starting point is either:
|
||||
//!
|
||||
//! 1. The instruction that defines the value, or
|
||||
//! 2. The EBB header, because the value is an argument to the EBB, or
|
||||
//! 3. The EBB header, because the value is defined in another EBB and live-in to this one.
|
||||
//!
|
||||
//! The ending point of the local live range is the last of the following program points in the
|
||||
//! EBB:
|
||||
//!
|
||||
//! 1. The last use in the EBB, where a *use* is an instruction that has the value as an argument.
|
||||
//! 2. The last branch or jump instruction in the EBB that can reach a use.
|
||||
//! 3. If the value has no uses anywhere (a *dead value*), the program point that defines it.
|
||||
//!
|
||||
//! Note that 2. includes loop back-edges to the same EBB. In general, if a value is defined
|
||||
//! outside a loop and used inside the loop, it will be live in the entire loop.
|
||||
//!
|
||||
//! # Global Live Ranges
|
||||
//!
|
||||
//! Values that appear in more than one EBB have a *global live range* which can be seen as the
|
||||
//! disjoint union of the per-EBB local intervals for all of the EBBs where the value is live.
|
||||
//! Together with a `ProgramOrder` which provides a linear ordering of the EBBs, the global live
|
||||
//! range becomes a linear sequence of disjoint intervals, at most one per EBB.
|
||||
//!
|
||||
//! In the special case of a dead value, the global live range is a single interval where the start
|
||||
//! and end points are the same. The global live range of a value is never completely empty.
|
||||
//!
|
||||
//! # Register interference
|
||||
//!
|
||||
//! The register allocator uses live ranges to determine if values *interfere*, which means that
|
||||
//! they can't be stored in the same register. Two live ranges interfere if and only if any of
|
||||
//! their intervals overlap.
|
||||
//!
|
||||
//! If one live range ends at an instruction that defines another live range, those two live ranges
|
||||
//! are not considered to interfere. This is because most ISAs allow instructions to reuse an input
|
||||
//! register for an output value. If Cranelift gets support for inline assembly, we will need to
|
||||
//! handle *early clobbers* which are output registers that are not allowed to alias any input
|
||||
//! registers.
|
||||
//!
|
||||
//! If `i1 < i2 < i3` are program points, we have:
|
||||
//!
|
||||
//! - `i1-i2` and `i1-i3` interfere because the intervals overlap.
|
||||
//! - `i1-i2` and `i2-i3` don't interfere.
|
||||
//! - `i1-i3` and `i2-i2` do interfere because the dead def would clobber the register.
|
||||
//! - `i1-i2` and `i2-i2` don't interfere.
|
||||
//! - `i2-i3` and `i2-i2` do interfere.
|
||||
//!
|
||||
//! Because of this behavior around interval end points, live range interference is not completely
|
||||
//! equivalent to mathematical intersection of open or half-open intervals.
|
||||
//!
|
||||
//! # Implementation notes
|
||||
//!
|
||||
//! A few notes about the implementation of this data structure. This should not concern someone
|
||||
//! only looking to use the public interface.
|
||||
//!
|
||||
//! ## EBB ordering
|
||||
//!
|
||||
//! The relative order of EBBs is used to maintain a sorted list of live-in intervals and to
|
||||
//! coalesce adjacent live-in intervals when the prior interval covers the whole EBB. This doesn't
|
||||
//! depend on any property of the program order, so alternative orderings are possible:
|
||||
//!
|
||||
//! 1. The EBB layout order. This is what we currently use.
|
||||
//! 2. A topological order of the dominator tree. All the live-in intervals would come after the
|
||||
//! def interval.
|
||||
//! 3. A numerical order by EBB number. Performant because it doesn't need to indirect through the
|
||||
//! `ProgramOrder` for comparisons.
|
||||
//!
|
||||
//! These orderings will cause small differences in coalescing opportunities, but all of them would
|
||||
//! do a decent job of compressing a long live range. The numerical order might be preferable
|
||||
//! because:
|
||||
//!
|
||||
//! - It has better performance because EBB numbers can be compared directly without any table
|
||||
//! lookups.
|
||||
//! - If EBB numbers are not reused, it is safe to allocate new EBBs without getting spurious
|
||||
//! live-in intervals from any coalesced representations that happen to cross a new EBB.
|
||||
//!
|
||||
//! For comparing instructions, the layout order is always what we want.
|
||||
//!
|
||||
//! ## Alternative representation
|
||||
//!
|
||||
//! Since a local live-in interval always begins at its EBB header, it is uniquely described by its
|
||||
//! end point instruction alone. We can use the layout to look up the EBB containing the end point.
|
||||
//! This means that a sorted `Vec<Inst>` would be enough to represent the set of live-in intervals.
|
||||
//!
|
||||
//! Coalescing is an important compression technique because some live ranges can span thousands of
|
||||
//! EBBs. We can represent that by switching to a sorted `Vec<ProgramPoint>` representation where
|
||||
//! an `[Ebb, Inst]` pair represents a coalesced range, while an `Inst` entry without a preceding
|
||||
//! `Ebb` entry represents a single live-in interval.
|
||||
//!
|
||||
//! This representation is more compact for a live range with many uncoalesced live-in intervals.
|
||||
//! It is more complicated to work with, though, so it is probably not worth it. The performance
|
||||
//! benefits of switching to a numerical EBB order only appears if the binary search is doing
|
||||
//! EBB-EBB comparisons.
|
||||
//!
|
||||
//! ## B-tree representation
|
||||
//!
|
||||
//! A `BTreeMap<Ebb, Inst>` could also be used for the live-in intervals. It looks like the
|
||||
//! standard library B-tree doesn't provide the necessary interface for an efficient implementation
|
||||
//! of coalescing, so we would need to roll our own.
|
||||
//!
|
||||
|
||||
use crate::bforest;
|
||||
use crate::entity::SparseMapValue;
|
||||
use crate::ir::{Ebb, ExpandedProgramPoint, Inst, Layout, ProgramOrder, ProgramPoint, Value};
|
||||
use crate::regalloc::affinity::Affinity;
|
||||
use core::cmp::Ordering;
|
||||
use core::marker::PhantomData;
|
||||
|
||||
/// Global live range of a single SSA value.
|
||||
///
|
||||
/// As [explained in the module documentation](index.html#local-live-ranges), the live range of an
|
||||
/// SSA value is the disjoint union of a set of intervals, each local to a single EBB, and with at
|
||||
/// most one interval per EBB. We further distinguish between:
|
||||
///
|
||||
/// 1. The *def interval* is the local interval in the EBB where the value is defined, and
|
||||
/// 2. The *live-in intervals* are the local intervals in the remaining EBBs.
|
||||
///
|
||||
/// A live-in interval always begins at the EBB header, while the def interval can begin at the
|
||||
/// defining instruction, or at the EBB header for an EBB argument value.
|
||||
///
|
||||
/// All values have a def interval, but a large proportion of values don't have any live-in
|
||||
/// intervals. These are called *local live ranges*.
|
||||
///
|
||||
/// # Program order requirements
|
||||
///
|
||||
/// The internal representation of a `LiveRange` depends on a consistent `ProgramOrder` both for
|
||||
/// ordering instructions inside an EBB *and* for ordering EBBs. The methods that depend on the
|
||||
/// ordering take an explicit `ProgramOrder` object, and it is the caller's responsibility to
|
||||
/// ensure that the provided ordering is consistent between calls.
|
||||
///
|
||||
/// In particular, changing the order of EBBs or inserting new EBBs will invalidate live ranges.
|
||||
///
|
||||
/// Inserting new instructions in the layout is safe, but removing instructions is not. Besides the
|
||||
/// instructions using or defining their value, `LiveRange` structs can contain references to
|
||||
/// branch and jump instructions.
|
||||
pub type LiveRange = GenLiveRange<Layout>;
|
||||
|
||||
/// Generic live range implementation.
|
||||
///
|
||||
/// The intended generic parameter is `PO=Layout`, but tests are simpler with a mock order.
|
||||
/// Use `LiveRange` instead of using this generic directly.
|
||||
pub struct GenLiveRange<PO: ProgramOrder> {
|
||||
/// The value described by this live range.
|
||||
/// This member can't be modified in case the live range is stored in a `SparseMap`.
|
||||
value: Value,
|
||||
|
||||
/// The preferred register allocation for this value.
|
||||
pub affinity: Affinity,
|
||||
|
||||
/// The instruction or EBB header where this value is defined.
|
||||
def_begin: ProgramPoint,
|
||||
|
||||
/// The end point of the def interval. This must always belong to the same EBB as `def_begin`.
|
||||
///
|
||||
/// We always have `def_begin <= def_end` with equality implying a dead def live range with no
|
||||
/// uses.
|
||||
def_end: ProgramPoint,
|
||||
|
||||
/// Additional live-in intervals sorted in program order.
|
||||
///
|
||||
/// This map is empty for most values which are only used in one EBB.
|
||||
///
|
||||
/// A map entry `ebb -> inst` means that the live range is live-in to `ebb`, continuing up to
|
||||
/// `inst` which may belong to a later EBB in the program order.
|
||||
///
|
||||
/// The entries are non-overlapping, and none of them overlap the EBB where the value is
|
||||
/// defined.
|
||||
liveins: bforest::Map<Ebb, Inst>,
|
||||
|
||||
po: PhantomData<*const PO>,
|
||||
}
|
||||
|
||||
/// Context information needed to query a `LiveRange`.
|
||||
pub struct LiveRangeContext<'a, PO: 'a + ProgramOrder> {
|
||||
/// Ordering of EBBs.
|
||||
pub order: &'a PO,
|
||||
/// Memory pool.
|
||||
pub forest: &'a bforest::MapForest<Ebb, Inst>,
|
||||
}
|
||||
|
||||
impl<'a, PO: ProgramOrder> LiveRangeContext<'a, PO> {
|
||||
/// Make a new context.
|
||||
pub fn new(order: &'a PO, forest: &'a bforest::MapForest<Ebb, Inst>) -> Self {
|
||||
Self { order, forest }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, PO: ProgramOrder> Clone for LiveRangeContext<'a, PO> {
|
||||
fn clone(&self) -> Self {
|
||||
LiveRangeContext {
|
||||
order: self.order,
|
||||
forest: self.forest,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, PO: ProgramOrder> Copy for LiveRangeContext<'a, PO> {}
|
||||
|
||||
/// Forest of B-trees used for storing live ranges.
|
||||
pub type LiveRangeForest = bforest::MapForest<Ebb, Inst>;
|
||||
|
||||
struct Cmp<'a, PO: ProgramOrder + 'a>(&'a PO);
|
||||
|
||||
impl<'a, PO: ProgramOrder> bforest::Comparator<Ebb> for Cmp<'a, PO> {
|
||||
fn cmp(&self, a: Ebb, b: Ebb) -> Ordering {
|
||||
self.0.cmp(a, b)
|
||||
}
|
||||
}
|
||||
|
||||
impl<PO: ProgramOrder> GenLiveRange<PO> {
|
||||
/// Create a new live range for `value` defined at `def`.
|
||||
///
|
||||
/// The live range will be created as dead, but it can be extended with `extend_in_ebb()`.
|
||||
pub fn new(value: Value, def: ProgramPoint, affinity: Affinity) -> Self {
|
||||
Self {
|
||||
value,
|
||||
affinity,
|
||||
def_begin: def,
|
||||
def_end: def,
|
||||
liveins: bforest::Map::new(),
|
||||
po: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
/// Extend the local interval for `ebb` so it reaches `to` which must belong to `ebb`.
|
||||
/// Create a live-in interval if necessary.
|
||||
///
|
||||
/// If the live range already has a local interval in `ebb`, extend its end point so it
|
||||
/// includes `to`, and return false.
|
||||
///
|
||||
/// If the live range did not previously have a local interval in `ebb`, add one so the value
|
||||
/// is live-in to `ebb`, extending to `to`. Return true.
|
||||
///
|
||||
/// The return value can be used to detect if we just learned that the value is live-in to
|
||||
/// `ebb`. This can trigger recursive extensions in `ebb`'s CFG predecessor blocks.
|
||||
pub fn extend_in_ebb(
|
||||
&mut self,
|
||||
ebb: Ebb,
|
||||
to: Inst,
|
||||
order: &PO,
|
||||
forest: &mut bforest::MapForest<Ebb, Inst>,
|
||||
) -> bool {
|
||||
// First check if we're extending the def interval.
|
||||
//
|
||||
// We're assuming here that `to` never precedes `def_begin` in the same EBB, but we can't
|
||||
// check it without a method for getting `to`'s EBB.
|
||||
if order.cmp(ebb, self.def_end) != Ordering::Greater
|
||||
&& order.cmp(to, self.def_begin) != Ordering::Less
|
||||
{
|
||||
let to_pp = to.into();
|
||||
debug_assert_ne!(
|
||||
to_pp, self.def_begin,
|
||||
"Can't use value in the defining instruction."
|
||||
);
|
||||
if order.cmp(to, self.def_end) == Ordering::Greater {
|
||||
self.def_end = to_pp;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Now check if we're extending any of the existing live-in intervals.
|
||||
let cmp = Cmp(order);
|
||||
let mut c = self.liveins.cursor(forest, &cmp);
|
||||
let first_time_livein;
|
||||
|
||||
if let Some(end) = c.goto(ebb) {
|
||||
// There's an interval beginning at `ebb`. See if it extends.
|
||||
first_time_livein = false;
|
||||
if order.cmp(end, to) == Ordering::Less {
|
||||
*c.value_mut().unwrap() = to;
|
||||
} else {
|
||||
return first_time_livein;
|
||||
}
|
||||
} else if let Some((_, end)) = c.prev() {
|
||||
// There's no interval beginning at `ebb`, but we could still be live-in at `ebb` with
|
||||
// a coalesced interval that begins before and ends after.
|
||||
if order.cmp(end, ebb) == Ordering::Greater {
|
||||
// Yep, the previous interval overlaps `ebb`.
|
||||
first_time_livein = false;
|
||||
if order.cmp(end, to) == Ordering::Less {
|
||||
*c.value_mut().unwrap() = to;
|
||||
} else {
|
||||
return first_time_livein;
|
||||
}
|
||||
} else {
|
||||
first_time_livein = true;
|
||||
// The current interval does not overlap `ebb`, but it may still be possible to
|
||||
// coalesce with it.
|
||||
if order.is_ebb_gap(end, ebb) {
|
||||
*c.value_mut().unwrap() = to;
|
||||
} else {
|
||||
c.insert(ebb, to);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// There is no existing interval before `ebb`.
|
||||
first_time_livein = true;
|
||||
c.insert(ebb, to);
|
||||
}
|
||||
|
||||
// Now `c` to left pointing at an interval that ends in `to`.
|
||||
debug_assert_eq!(c.value(), Some(to));
|
||||
|
||||
// See if it can be coalesced with the following interval.
|
||||
if let Some((next_ebb, next_end)) = c.next() {
|
||||
if order.is_ebb_gap(to, next_ebb) {
|
||||
// Remove this interval and extend the previous end point to `next_end`.
|
||||
c.remove();
|
||||
c.prev();
|
||||
*c.value_mut().unwrap() = next_end;
|
||||
}
|
||||
}
|
||||
|
||||
first_time_livein
|
||||
}
|
||||
|
||||
/// Is this the live range of a dead value?
|
||||
///
|
||||
/// A dead value has no uses, and its live range ends at the same program point where it is
|
||||
/// defined.
|
||||
pub fn is_dead(&self) -> bool {
|
||||
self.def_begin == self.def_end
|
||||
}
|
||||
|
||||
/// Is this a local live range?
|
||||
///
|
||||
/// A local live range is only used in the same EBB where it was defined. It is allowed to span
|
||||
/// multiple basic blocks within that EBB.
|
||||
pub fn is_local(&self) -> bool {
|
||||
self.liveins.is_empty()
|
||||
}
|
||||
|
||||
/// Get the program point where this live range is defined.
|
||||
///
|
||||
/// This will be an EBB header when the value is an EBB argument, otherwise it is the defining
|
||||
/// instruction.
|
||||
pub fn def(&self) -> ProgramPoint {
|
||||
self.def_begin
|
||||
}
|
||||
|
||||
/// Move the definition of this value to a new program point.
|
||||
///
|
||||
/// It is only valid to move the definition within the same EBB, and it can't be moved beyond
|
||||
/// `def_local_end()`.
|
||||
pub fn move_def_locally(&mut self, def: ProgramPoint) {
|
||||
self.def_begin = def;
|
||||
}
|
||||
|
||||
/// Get the local end-point of this live range in the EBB where it is defined.
|
||||
///
|
||||
/// This can be the EBB header itself in the case of a dead EBB argument.
|
||||
/// Otherwise, it will be the last local use or branch/jump that can reach a use.
|
||||
pub fn def_local_end(&self) -> ProgramPoint {
|
||||
self.def_end
|
||||
}
|
||||
|
||||
/// Get the local end-point of this live range in an EBB where it is live-in.
|
||||
///
|
||||
/// If this live range is not live-in to `ebb`, return `None`. Otherwise, return the end-point
|
||||
/// of this live range's local interval in `ebb`.
|
||||
///
|
||||
/// If the live range is live through all of `ebb`, the terminator of `ebb` is a correct
|
||||
/// answer, but it is also possible that an even later program point is returned. So don't
|
||||
/// depend on the returned `Inst` to belong to `ebb`.
|
||||
pub fn livein_local_end(&self, ebb: Ebb, ctx: LiveRangeContext<PO>) -> Option<Inst> {
|
||||
let cmp = Cmp(ctx.order);
|
||||
self.liveins
|
||||
.get_or_less(ebb, ctx.forest, &cmp)
|
||||
.and_then(|(_, inst)| {
|
||||
// We have an entry that ends at `inst`.
|
||||
if ctx.order.cmp(inst, ebb) == Ordering::Greater {
|
||||
Some(inst)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Is this value live-in to `ebb`?
|
||||
///
|
||||
/// An EBB argument is not considered to be live in.
|
||||
pub fn is_livein(&self, ebb: Ebb, ctx: LiveRangeContext<PO>) -> bool {
|
||||
self.livein_local_end(ebb, ctx).is_some()
|
||||
}
|
||||
|
||||
/// Get all the live-in intervals.
|
||||
///
|
||||
/// Note that the intervals are stored in a compressed form so each entry may span multiple
|
||||
/// EBBs where the value is live in.
|
||||
pub fn liveins<'a>(&'a self, ctx: LiveRangeContext<'a, PO>) -> bforest::MapIter<'a, Ebb, Inst> {
|
||||
self.liveins.iter(ctx.forest)
|
||||
}
|
||||
|
||||
/// Check if this live range overlaps a definition in `ebb`.
|
||||
pub fn overlaps_def(
|
||||
&self,
|
||||
def: ExpandedProgramPoint,
|
||||
ebb: Ebb,
|
||||
ctx: LiveRangeContext<PO>,
|
||||
) -> bool {
|
||||
// Two defs at the same program point always overlap, even if one is dead.
|
||||
if def == self.def_begin.into() {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check for an overlap with the local range.
|
||||
if ctx.order.cmp(def, self.def_begin) != Ordering::Less
|
||||
&& ctx.order.cmp(def, self.def_end) == Ordering::Less
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check for an overlap with a live-in range.
|
||||
match self.livein_local_end(ebb, ctx) {
|
||||
Some(inst) => ctx.order.cmp(def, inst) == Ordering::Less,
|
||||
None => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if this live range reaches a use at `user` in `ebb`.
|
||||
pub fn reaches_use(&self, user: Inst, ebb: Ebb, ctx: LiveRangeContext<PO>) -> bool {
|
||||
// Check for an overlap with the local range.
|
||||
if ctx.order.cmp(user, self.def_begin) == Ordering::Greater
|
||||
&& ctx.order.cmp(user, self.def_end) != Ordering::Greater
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check for an overlap with a live-in range.
|
||||
match self.livein_local_end(ebb, ctx) {
|
||||
Some(inst) => ctx.order.cmp(user, inst) != Ordering::Greater,
|
||||
None => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if this live range is killed at `user` in `ebb`.
|
||||
pub fn killed_at(&self, user: Inst, ebb: Ebb, ctx: LiveRangeContext<PO>) -> bool {
|
||||
self.def_local_end() == user.into() || self.livein_local_end(ebb, ctx) == Some(user)
|
||||
}
|
||||
}
|
||||
|
||||
/// Allow a `LiveRange` to be stored in a `SparseMap` indexed by values.
|
||||
impl<PO: ProgramOrder> SparseMapValue<Value> for GenLiveRange<PO> {
|
||||
fn key(&self) -> Value {
|
||||
self.value
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{GenLiveRange, LiveRangeContext};
|
||||
use crate::bforest;
|
||||
use crate::entity::EntityRef;
|
||||
use crate::ir::{Ebb, Inst, Value};
|
||||
use crate::ir::{ExpandedProgramPoint, ProgramOrder};
|
||||
use core::cmp::Ordering;
|
||||
use std::vec::Vec;
|
||||
|
||||
// Dummy program order which simply compares indexes.
|
||||
// It is assumed that EBBs have indexes that are multiples of 10, and instructions have indexes
|
||||
// in between. `is_ebb_gap` assumes that terminator instructions have indexes of the form
|
||||
// ebb * 10 + 1. This is used in the coalesce test.
|
||||
struct ProgOrder {}
|
||||
|
||||
impl ProgramOrder for ProgOrder {
|
||||
fn cmp<A, B>(&self, a: A, b: B) -> Ordering
|
||||
where
|
||||
A: Into<ExpandedProgramPoint>,
|
||||
B: Into<ExpandedProgramPoint>,
|
||||
{
|
||||
fn idx(pp: ExpandedProgramPoint) -> usize {
|
||||
match pp {
|
||||
ExpandedProgramPoint::Inst(i) => i.index(),
|
||||
ExpandedProgramPoint::Ebb(e) => e.index(),
|
||||
}
|
||||
}
|
||||
|
||||
let ia = idx(a.into());
|
||||
let ib = idx(b.into());
|
||||
ia.cmp(&ib)
|
||||
}
|
||||
|
||||
fn is_ebb_gap(&self, inst: Inst, ebb: Ebb) -> bool {
|
||||
inst.index() % 10 == 1 && ebb.index() / 10 == inst.index() / 10 + 1
|
||||
}
|
||||
}
|
||||
|
||||
impl ProgOrder {
|
||||
// Get the EBB corresponding to `inst`.
|
||||
fn inst_ebb(&self, inst: Inst) -> Ebb {
|
||||
let i = inst.index();
|
||||
Ebb::new(i - i % 10)
|
||||
}
|
||||
|
||||
// Get the EBB of a program point.
|
||||
fn pp_ebb<PP: Into<ExpandedProgramPoint>>(&self, pp: PP) -> Ebb {
|
||||
match pp.into() {
|
||||
ExpandedProgramPoint::Inst(i) => self.inst_ebb(i),
|
||||
ExpandedProgramPoint::Ebb(e) => e,
|
||||
}
|
||||
}
|
||||
|
||||
// Validate the live range invariants.
|
||||
fn validate(&self, lr: &GenLiveRange<ProgOrder>, forest: &bforest::MapForest<Ebb, Inst>) {
|
||||
// The def interval must cover a single EBB.
|
||||
let def_ebb = self.pp_ebb(lr.def_begin);
|
||||
assert_eq!(def_ebb, self.pp_ebb(lr.def_end));
|
||||
|
||||
// Check that the def interval isn't backwards.
|
||||
match self.cmp(lr.def_begin, lr.def_end) {
|
||||
Ordering::Equal => assert!(lr.liveins.is_empty()),
|
||||
Ordering::Greater => {
|
||||
panic!("Backwards def interval: {}-{}", lr.def_begin, lr.def_end)
|
||||
}
|
||||
Ordering::Less => {}
|
||||
}
|
||||
|
||||
// Check the live-in intervals.
|
||||
let mut prev_end = None;
|
||||
for (begin, end) in lr.liveins.iter(forest) {
|
||||
assert_eq!(self.cmp(begin, end), Ordering::Less);
|
||||
if let Some(e) = prev_end {
|
||||
assert_eq!(self.cmp(e, begin), Ordering::Less);
|
||||
}
|
||||
|
||||
assert!(
|
||||
self.cmp(lr.def_end, begin) == Ordering::Less
|
||||
|| self.cmp(lr.def_begin, end) == Ordering::Greater,
|
||||
"Interval can't overlap the def EBB"
|
||||
);
|
||||
|
||||
// Save for next round.
|
||||
prev_end = Some(end);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Singleton `ProgramOrder` for tests below.
|
||||
const PO: &'static ProgOrder = &ProgOrder {};
|
||||
|
||||
#[test]
|
||||
fn dead_def_range() {
|
||||
let v0 = Value::new(0);
|
||||
let e0 = Ebb::new(0);
|
||||
let i1 = Inst::new(1);
|
||||
let i2 = Inst::new(2);
|
||||
let e2 = Ebb::new(2);
|
||||
let lr = GenLiveRange::new(v0, i1.into(), Default::default());
|
||||
let forest = &bforest::MapForest::new();
|
||||
let ctx = LiveRangeContext::new(PO, forest);
|
||||
assert!(lr.is_dead());
|
||||
assert!(lr.is_local());
|
||||
assert_eq!(lr.def(), i1.into());
|
||||
assert_eq!(lr.def_local_end(), i1.into());
|
||||
assert_eq!(lr.livein_local_end(e2, ctx), None);
|
||||
PO.validate(&lr, ctx.forest);
|
||||
|
||||
// A dead live range overlaps its own def program point.
|
||||
assert!(lr.overlaps_def(i1.into(), e0, ctx));
|
||||
assert!(!lr.overlaps_def(i2.into(), e0, ctx));
|
||||
assert!(!lr.overlaps_def(e0.into(), e0, ctx));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dead_arg_range() {
|
||||
let v0 = Value::new(0);
|
||||
let e2 = Ebb::new(2);
|
||||
let lr = GenLiveRange::new(v0, e2.into(), Default::default());
|
||||
let forest = &bforest::MapForest::new();
|
||||
let ctx = LiveRangeContext::new(PO, forest);
|
||||
assert!(lr.is_dead());
|
||||
assert!(lr.is_local());
|
||||
assert_eq!(lr.def(), e2.into());
|
||||
assert_eq!(lr.def_local_end(), e2.into());
|
||||
// The def interval of an EBB argument does not count as live-in.
|
||||
assert_eq!(lr.livein_local_end(e2, ctx), None);
|
||||
PO.validate(&lr, ctx.forest);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn local_def() {
|
||||
let v0 = Value::new(0);
|
||||
let e10 = Ebb::new(10);
|
||||
let i11 = Inst::new(11);
|
||||
let i12 = Inst::new(12);
|
||||
let i13 = Inst::new(13);
|
||||
let mut lr = GenLiveRange::new(v0, i11.into(), Default::default());
|
||||
let forest = &mut bforest::MapForest::new();
|
||||
|
||||
assert_eq!(lr.extend_in_ebb(e10, i13, PO, forest), false);
|
||||
PO.validate(&lr, forest);
|
||||
assert!(!lr.is_dead());
|
||||
assert!(lr.is_local());
|
||||
assert_eq!(lr.def(), i11.into());
|
||||
assert_eq!(lr.def_local_end(), i13.into());
|
||||
|
||||
// Extending to an already covered inst should not change anything.
|
||||
assert_eq!(lr.extend_in_ebb(e10, i12, PO, forest), false);
|
||||
PO.validate(&lr, forest);
|
||||
assert_eq!(lr.def(), i11.into());
|
||||
assert_eq!(lr.def_local_end(), i13.into());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn local_arg() {
|
||||
let v0 = Value::new(0);
|
||||
let e10 = Ebb::new(10);
|
||||
let i11 = Inst::new(11);
|
||||
let i12 = Inst::new(12);
|
||||
let i13 = Inst::new(13);
|
||||
let mut lr = GenLiveRange::new(v0, e10.into(), Default::default());
|
||||
let forest = &mut bforest::MapForest::new();
|
||||
|
||||
// Extending a dead EBB argument in its own block should not indicate that a live-in
|
||||
// interval was created.
|
||||
assert_eq!(lr.extend_in_ebb(e10, i12, PO, forest), false);
|
||||
PO.validate(&lr, forest);
|
||||
assert!(!lr.is_dead());
|
||||
assert!(lr.is_local());
|
||||
assert_eq!(lr.def(), e10.into());
|
||||
assert_eq!(lr.def_local_end(), i12.into());
|
||||
|
||||
// Extending to an already covered inst should not change anything.
|
||||
assert_eq!(lr.extend_in_ebb(e10, i11, PO, forest), false);
|
||||
PO.validate(&lr, forest);
|
||||
assert_eq!(lr.def(), e10.into());
|
||||
assert_eq!(lr.def_local_end(), i12.into());
|
||||
|
||||
// Extending further.
|
||||
assert_eq!(lr.extend_in_ebb(e10, i13, PO, forest), false);
|
||||
PO.validate(&lr, forest);
|
||||
assert_eq!(lr.def(), e10.into());
|
||||
assert_eq!(lr.def_local_end(), i13.into());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn global_def() {
|
||||
let v0 = Value::new(0);
|
||||
let e10 = Ebb::new(10);
|
||||
let i11 = Inst::new(11);
|
||||
let i12 = Inst::new(12);
|
||||
let e20 = Ebb::new(20);
|
||||
let i21 = Inst::new(21);
|
||||
let i22 = Inst::new(22);
|
||||
let i23 = Inst::new(23);
|
||||
let mut lr = GenLiveRange::new(v0, i11.into(), Default::default());
|
||||
let forest = &mut bforest::MapForest::new();
|
||||
|
||||
assert_eq!(lr.extend_in_ebb(e10, i12, PO, forest), false);
|
||||
|
||||
// Adding a live-in interval.
|
||||
assert_eq!(lr.extend_in_ebb(e20, i22, PO, forest), true);
|
||||
PO.validate(&lr, forest);
|
||||
assert_eq!(
|
||||
lr.livein_local_end(e20, LiveRangeContext::new(PO, forest)),
|
||||
Some(i22)
|
||||
);
|
||||
|
||||
// Non-extending the live-in.
|
||||
assert_eq!(lr.extend_in_ebb(e20, i21, PO, forest), false);
|
||||
assert_eq!(
|
||||
lr.livein_local_end(e20, LiveRangeContext::new(PO, forest)),
|
||||
Some(i22)
|
||||
);
|
||||
|
||||
// Extending the existing live-in.
|
||||
assert_eq!(lr.extend_in_ebb(e20, i23, PO, forest), false);
|
||||
PO.validate(&lr, forest);
|
||||
assert_eq!(
|
||||
lr.livein_local_end(e20, LiveRangeContext::new(PO, forest)),
|
||||
Some(i23)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn coalesce() {
|
||||
let v0 = Value::new(0);
|
||||
let i11 = Inst::new(11);
|
||||
let e20 = Ebb::new(20);
|
||||
let i21 = Inst::new(21);
|
||||
let e30 = Ebb::new(30);
|
||||
let i31 = Inst::new(31);
|
||||
let e40 = Ebb::new(40);
|
||||
let i41 = Inst::new(41);
|
||||
let mut lr = GenLiveRange::new(v0, i11.into(), Default::default());
|
||||
let forest = &mut bforest::MapForest::new();
|
||||
|
||||
assert_eq!(lr.extend_in_ebb(e30, i31, PO, forest), true);
|
||||
assert_eq!(
|
||||
lr.liveins(LiveRangeContext::new(PO, forest))
|
||||
.collect::<Vec<_>>(),
|
||||
[(e30, i31)]
|
||||
);
|
||||
|
||||
// Coalesce to previous
|
||||
assert_eq!(lr.extend_in_ebb(e40, i41, PO, forest), true);
|
||||
assert_eq!(
|
||||
lr.liveins(LiveRangeContext::new(PO, forest))
|
||||
.collect::<Vec<_>>(),
|
||||
[(e30, i41)]
|
||||
);
|
||||
|
||||
// Coalesce to next
|
||||
assert_eq!(lr.extend_in_ebb(e20, i21, PO, forest), true);
|
||||
assert_eq!(
|
||||
lr.liveins(LiveRangeContext::new(PO, forest))
|
||||
.collect::<Vec<_>>(),
|
||||
[(e20, i41)]
|
||||
);
|
||||
|
||||
let mut lr = GenLiveRange::new(v0, i11.into(), Default::default());
|
||||
|
||||
assert_eq!(lr.extend_in_ebb(e40, i41, PO, forest), true);
|
||||
assert_eq!(
|
||||
lr.liveins(LiveRangeContext::new(PO, forest))
|
||||
.collect::<Vec<_>>(),
|
||||
[(e40, i41)]
|
||||
);
|
||||
|
||||
assert_eq!(lr.extend_in_ebb(e20, i21, PO, forest), true);
|
||||
assert_eq!(
|
||||
lr.liveins(LiveRangeContext::new(PO, forest))
|
||||
.collect::<Vec<_>>(),
|
||||
[(e20, i21), (e40, i41)]
|
||||
);
|
||||
|
||||
// Coalesce to previous and next
|
||||
assert_eq!(lr.extend_in_ebb(e30, i31, PO, forest), true);
|
||||
assert_eq!(
|
||||
lr.liveins(LiveRangeContext::new(PO, forest))
|
||||
.collect::<Vec<_>>(),
|
||||
[(e20, i41)]
|
||||
);
|
||||
}
|
||||
|
||||
// TODO: Add more tests that exercise the binary search algorithm.
|
||||
}
|
||||
23
cranelift/codegen/src/regalloc/mod.rs
Normal file
23
cranelift/codegen/src/regalloc/mod.rs
Normal file
@@ -0,0 +1,23 @@
|
||||
//! Register allocation.
|
||||
//!
|
||||
//! This module contains data structures and algorithms used for register allocation.
|
||||
|
||||
pub mod coloring;
|
||||
pub mod live_value_tracker;
|
||||
pub mod liveness;
|
||||
pub mod liverange;
|
||||
pub mod register_set;
|
||||
pub mod virtregs;
|
||||
|
||||
mod affinity;
|
||||
mod coalescing;
|
||||
mod context;
|
||||
mod diversion;
|
||||
mod pressure;
|
||||
mod reload;
|
||||
mod solver;
|
||||
mod spilling;
|
||||
|
||||
pub use self::context::Context;
|
||||
pub use self::diversion::RegDiversions;
|
||||
pub use self::register_set::RegisterSet;
|
||||
384
cranelift/codegen/src/regalloc/pressure.rs
Normal file
384
cranelift/codegen/src/regalloc/pressure.rs
Normal file
@@ -0,0 +1,384 @@
|
||||
//! Register pressure tracking.
|
||||
//!
|
||||
//! SSA-based register allocation depends on a spilling phase that "lowers register pressure
|
||||
//! sufficiently". This module defines the data structures needed to measure register pressure
|
||||
//! accurately enough to guarantee that the coloring phase will not run out of registers.
|
||||
//!
|
||||
//! Ideally, measuring register pressure amounts to simply counting the number of live registers at
|
||||
//! any given program point. This simplistic method has two problems:
|
||||
//!
|
||||
//! 1. Registers are not interchangeable. Most ISAs have separate integer and floating-point
|
||||
//! register banks, so we need to at least count the number of live registers in each register
|
||||
//! bank separately.
|
||||
//!
|
||||
//! 2. Some ISAs have complicated register aliasing properties. In particular, the 32-bit ARM
|
||||
//! ISA has a floating-point register bank where two 32-bit registers alias one 64-bit register.
|
||||
//! This makes it difficult to accurately measure register pressure.
|
||||
//!
|
||||
//! This module deals with the problems via *register banks* and *top-level register classes*.
|
||||
//! Register classes in different register banks are completely independent, so we can count
|
||||
//! registers in one bank without worrying about the other bank at all.
|
||||
//!
|
||||
//! All register classes have a unique top-level register class, and we will count registers for
|
||||
//! each top-level register class individually. However, a register bank can have multiple
|
||||
//! top-level register classes that interfere with each other, so all top-level counts need to
|
||||
//! be considered when determining how many more registers can be allocated.
|
||||
//!
|
||||
//! Currently, the only register bank with multiple top-level registers is the `arm32`
|
||||
//! floating-point register bank which has `S`, `D`, and `Q` top-level classes.
|
||||
//!
|
||||
//! # Base and transient counts
|
||||
//!
|
||||
//! We maintain two separate register counts per top-level register class: base counts and
|
||||
//! transient counts. The base counts are adjusted with the `take` and `free` functions. The
|
||||
//! transient counts are adjusted with `take_transient` and `free_transient`.
|
||||
|
||||
// Remove once we're using the pressure tracker.
|
||||
#![allow(dead_code)]
|
||||
|
||||
use crate::isa::registers::{RegClass, RegClassMask, RegInfo, MAX_TRACKED_TOPRCS};
|
||||
use crate::regalloc::RegisterSet;
|
||||
use core::cmp::min;
|
||||
use core::fmt;
|
||||
use core::iter::ExactSizeIterator;
|
||||
|
||||
/// Information per top-level register class.
|
||||
///
|
||||
/// Everything but the counts is static information computed from the constructor arguments.
|
||||
#[derive(Default)]
|
||||
struct TopRC {
|
||||
// Number of registers currently used from this register class.
|
||||
base_count: u32,
|
||||
transient_count: u32,
|
||||
|
||||
// Max number of registers that can be allocated.
|
||||
limit: u32,
|
||||
|
||||
// Register units per register.
|
||||
width: u8,
|
||||
|
||||
// The first aliasing top-level RC.
|
||||
first_toprc: u8,
|
||||
|
||||
// The number of aliasing top-level RCs.
|
||||
num_toprcs: u8,
|
||||
}
|
||||
|
||||
impl TopRC {
|
||||
fn total_count(&self) -> u32 {
|
||||
self.base_count + self.transient_count
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Pressure {
|
||||
// Bit mask of top-level register classes that are aliased by other top-level register classes.
|
||||
// Unaliased register classes can use a simpler interference algorithm.
|
||||
aliased: RegClassMask,
|
||||
|
||||
// Current register counts per top-level register class.
|
||||
toprc: [TopRC; MAX_TRACKED_TOPRCS],
|
||||
}
|
||||
|
||||
impl Pressure {
|
||||
/// Create a new register pressure tracker.
|
||||
pub fn new(reginfo: &RegInfo, usable: &RegisterSet) -> Self {
|
||||
let mut p = Self {
|
||||
aliased: 0,
|
||||
toprc: Default::default(),
|
||||
};
|
||||
|
||||
// Get the layout of aliasing top-level register classes from the register banks.
|
||||
for bank in reginfo.banks {
|
||||
let first = bank.first_toprc;
|
||||
let num = bank.num_toprcs;
|
||||
|
||||
if bank.pressure_tracking {
|
||||
for rc in &mut p.toprc[first..first + num] {
|
||||
rc.first_toprc = first as u8;
|
||||
rc.num_toprcs = num as u8;
|
||||
}
|
||||
|
||||
// Flag the top-level register classes with aliases.
|
||||
if num > 1 {
|
||||
p.aliased |= ((1 << num) - 1) << first;
|
||||
}
|
||||
} else {
|
||||
// This bank has no pressure tracking, so its top-level register classes may exceed
|
||||
// `MAX_TRACKED_TOPRCS`. Fill in dummy entries.
|
||||
for rc in &mut p.toprc[first..min(first + num, MAX_TRACKED_TOPRCS)] {
|
||||
// These aren't used if we don't set the `aliased` bit.
|
||||
rc.first_toprc = !0;
|
||||
rc.limit = !0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Compute per-class limits from `usable`.
|
||||
for (toprc, rc) in p
|
||||
.toprc
|
||||
.iter_mut()
|
||||
.take_while(|t| t.num_toprcs > 0)
|
||||
.zip(reginfo.classes)
|
||||
{
|
||||
toprc.limit = usable.iter(rc).len() as u32;
|
||||
toprc.width = rc.width;
|
||||
}
|
||||
|
||||
p
|
||||
}
|
||||
|
||||
/// Check for an available register in the register class `rc`.
|
||||
///
|
||||
/// If it is possible to allocate one more register from `rc`'s top-level register class,
|
||||
/// returns 0.
|
||||
///
|
||||
/// If not, returns a bit-mask of top-level register classes that are interfering. Register
|
||||
/// pressure should be eased in one of the returned top-level register classes before calling
|
||||
/// `can_take()` to check again.
|
||||
fn check_avail(&self, rc: RegClass) -> RegClassMask {
|
||||
let entry = match self.toprc.get(rc.toprc as usize) {
|
||||
None => return 0, // Not a pressure tracked bank.
|
||||
Some(e) => e,
|
||||
};
|
||||
let mask = 1 << rc.toprc;
|
||||
if (self.aliased & mask) == 0 {
|
||||
// This is a simple unaliased top-level register class.
|
||||
if entry.total_count() < entry.limit {
|
||||
0
|
||||
} else {
|
||||
mask
|
||||
}
|
||||
} else {
|
||||
// This is the more complicated case. The top-level register class has aliases.
|
||||
self.check_avail_aliased(entry)
|
||||
}
|
||||
}
|
||||
|
||||
/// Check for an available register in a top-level register class that may have aliases.
|
||||
///
|
||||
/// This is the out-of-line slow path for `check_avail()`.
|
||||
fn check_avail_aliased(&self, entry: &TopRC) -> RegClassMask {
|
||||
let first = usize::from(entry.first_toprc);
|
||||
let num = usize::from(entry.num_toprcs);
|
||||
let width = u32::from(entry.width);
|
||||
let ulimit = entry.limit * width;
|
||||
|
||||
// Count up the number of available register units.
|
||||
let mut units = 0;
|
||||
for (rc, rci) in self.toprc[first..first + num].iter().zip(first..) {
|
||||
let rcw = u32::from(rc.width);
|
||||
// If `rc.width` is smaller than `width`, each register in `rc` could potentially block
|
||||
// one of ours. This is assuming that none of the smaller registers are straddling the
|
||||
// bigger ones.
|
||||
//
|
||||
// If `rc.width` is larger than `width`, we are also assuming that the registers are
|
||||
// aligned and `rc.width` is a multiple of `width`.
|
||||
let u = if rcw < width {
|
||||
// We can't take more than the total number of register units in the class.
|
||||
// This matters for arm32 S-registers which can only ever lock out 16 D-registers.
|
||||
min(rc.total_count() * width, rc.limit * rcw)
|
||||
} else {
|
||||
rc.total_count() * rcw
|
||||
};
|
||||
|
||||
// If this top-level RC on its own is responsible for exceeding our limit, return it
|
||||
// early to guarantee that registers here are spilled before spilling other registers
|
||||
// unnecessarily.
|
||||
if u >= ulimit {
|
||||
return 1 << rci;
|
||||
}
|
||||
|
||||
units += u;
|
||||
}
|
||||
|
||||
// We've counted up the worst-case number of register units claimed by all aliasing
|
||||
// classes. Compare to the unit limit in this class.
|
||||
if units < ulimit {
|
||||
0
|
||||
} else {
|
||||
// Registers need to be spilled from any one of the aliasing classes.
|
||||
((1 << num) - 1) << first
|
||||
}
|
||||
}
|
||||
|
||||
/// Take a register from `rc`.
|
||||
///
|
||||
/// This does not check if there are enough registers available.
|
||||
pub fn take(&mut self, rc: RegClass) {
|
||||
if let Some(t) = self.toprc.get_mut(rc.toprc as usize) {
|
||||
t.base_count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
/// Free a register in `rc`.
|
||||
pub fn free(&mut self, rc: RegClass) {
|
||||
if let Some(t) = self.toprc.get_mut(rc.toprc as usize) {
|
||||
t.base_count -= 1;
|
||||
}
|
||||
}
|
||||
|
||||
/// Reset all counts to 0, both base and transient.
|
||||
pub fn reset(&mut self) {
|
||||
for e in &mut self.toprc {
|
||||
e.base_count = 0;
|
||||
e.transient_count = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/// Try to increment a transient counter.
|
||||
///
|
||||
/// This will fail if there are not enough registers available.
|
||||
pub fn take_transient(&mut self, rc: RegClass) -> Result<(), RegClassMask> {
|
||||
let mask = self.check_avail(rc);
|
||||
if mask == 0 {
|
||||
if let Some(t) = self.toprc.get_mut(rc.toprc as usize) {
|
||||
t.transient_count += 1;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
} else {
|
||||
Err(mask)
|
||||
}
|
||||
}
|
||||
|
||||
/// Reset all transient counts to 0.
|
||||
pub fn reset_transient(&mut self) {
|
||||
for e in &mut self.toprc {
|
||||
e.transient_count = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/// Preserve the transient counts by transferring them to the base counts.
|
||||
pub fn preserve_transient(&mut self) {
|
||||
for e in &mut self.toprc {
|
||||
e.base_count += e.transient_count;
|
||||
e.transient_count = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Pressure {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "Pressure[")?;
|
||||
for rc in &self.toprc {
|
||||
if rc.limit > 0 && rc.limit < !0 {
|
||||
write!(f, " {}+{}/{}", rc.base_count, rc.transient_count, rc.limit)?;
|
||||
}
|
||||
}
|
||||
write!(f, " ]")
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[cfg(build_arm32)]
|
||||
mod tests {
|
||||
use super::Pressure;
|
||||
use crate::isa::{RegClass, TargetIsa};
|
||||
use crate::regalloc::RegisterSet;
|
||||
use core::borrow::Borrow;
|
||||
use core::str::FromStr;
|
||||
use std::boxed::Box;
|
||||
use target_lexicon::triple;
|
||||
|
||||
// Make an arm32 `TargetIsa`, if possible.
|
||||
fn arm32() -> Option<Box<TargetIsa>> {
|
||||
use crate::isa;
|
||||
use crate::settings;
|
||||
|
||||
let shared_builder = settings::builder();
|
||||
let shared_flags = settings::Flags::new(shared_builder);
|
||||
|
||||
isa::lookup(triple!("arm"))
|
||||
.ok()
|
||||
.map(|b| b.finish(shared_flags))
|
||||
}
|
||||
|
||||
// Get a register class by name.
|
||||
fn rc_by_name(isa: &TargetIsa, name: &str) -> RegClass {
|
||||
isa.register_info()
|
||||
.classes
|
||||
.iter()
|
||||
.find(|rc| rc.name == name)
|
||||
.expect("Can't find named register class.")
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn basic_counting() {
|
||||
let isa = arm32().expect("This test requires arm32 support");
|
||||
let isa = isa.borrow();
|
||||
let gpr = rc_by_name(isa, "GPR");
|
||||
let s = rc_by_name(isa, "S");
|
||||
let reginfo = isa.register_info();
|
||||
let regs = RegisterSet::new();
|
||||
|
||||
let mut pressure = Pressure::new(®info, ®s);
|
||||
let mut count = 0;
|
||||
while pressure.check_avail(gpr) == 0 {
|
||||
pressure.take(gpr);
|
||||
count += 1;
|
||||
}
|
||||
assert_eq!(count, 16);
|
||||
assert_eq!(pressure.check_avail(gpr), 1 << gpr.toprc);
|
||||
assert_eq!(pressure.check_avail(s), 0);
|
||||
pressure.free(gpr);
|
||||
assert_eq!(pressure.check_avail(gpr), 0);
|
||||
pressure.take(gpr);
|
||||
assert_eq!(pressure.check_avail(gpr), 1 << gpr.toprc);
|
||||
assert_eq!(pressure.check_avail(s), 0);
|
||||
pressure.reset();
|
||||
assert_eq!(pressure.check_avail(gpr), 0);
|
||||
assert_eq!(pressure.check_avail(s), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn arm_float_bank() {
|
||||
let isa = arm32().expect("This test requires arm32 support");
|
||||
let isa = isa.borrow();
|
||||
let s = rc_by_name(isa, "S");
|
||||
let d = rc_by_name(isa, "D");
|
||||
let q = rc_by_name(isa, "Q");
|
||||
let reginfo = isa.register_info();
|
||||
let regs = RegisterSet::new();
|
||||
|
||||
let mut pressure = Pressure::new(®info, ®s);
|
||||
assert_eq!(pressure.check_avail(s), 0);
|
||||
assert_eq!(pressure.check_avail(d), 0);
|
||||
assert_eq!(pressure.check_avail(q), 0);
|
||||
|
||||
// Allocating a single S-register should not affect availability.
|
||||
pressure.take(s);
|
||||
assert_eq!(pressure.check_avail(s), 0);
|
||||
assert_eq!(pressure.check_avail(d), 0);
|
||||
assert_eq!(pressure.check_avail(q), 0);
|
||||
|
||||
pressure.take(d);
|
||||
assert_eq!(pressure.check_avail(s), 0);
|
||||
assert_eq!(pressure.check_avail(d), 0);
|
||||
assert_eq!(pressure.check_avail(q), 0);
|
||||
|
||||
pressure.take(q);
|
||||
assert_eq!(pressure.check_avail(s), 0);
|
||||
assert_eq!(pressure.check_avail(d), 0);
|
||||
assert_eq!(pressure.check_avail(q), 0);
|
||||
|
||||
// Take a total of 16 S-regs.
|
||||
for _ in 1..16 {
|
||||
pressure.take(s);
|
||||
}
|
||||
assert_eq!(pressure.check_avail(s), 0);
|
||||
assert_eq!(pressure.check_avail(d), 0);
|
||||
assert_eq!(pressure.check_avail(q), 0);
|
||||
|
||||
// We've taken 16 S, 1 D, and 1 Q. There should be 6 more Qs.
|
||||
for _ in 0..6 {
|
||||
assert_eq!(pressure.check_avail(d), 0);
|
||||
assert_eq!(pressure.check_avail(q), 0);
|
||||
pressure.take(q);
|
||||
}
|
||||
|
||||
// We've taken 16 S, 1 D, and 7 Qs.
|
||||
assert!(pressure.check_avail(s) != 0);
|
||||
assert_eq!(pressure.check_avail(d), 0);
|
||||
assert!(pressure.check_avail(q) != 0);
|
||||
}
|
||||
}
|
||||
324
cranelift/codegen/src/regalloc/register_set.rs
Normal file
324
cranelift/codegen/src/regalloc/register_set.rs
Normal file
@@ -0,0 +1,324 @@
|
||||
//! Set of allocatable registers as a bit vector of register units.
|
||||
//!
|
||||
//! While allocating registers, we need to keep track of which registers are available and which
|
||||
//! registers are in use. Since registers can alias in different ways, we track this via the
|
||||
//! "register unit" abstraction. Every register contains one or more register units. Registers that
|
||||
//! share a register unit can't be in use at the same time.
|
||||
|
||||
use crate::isa::registers::{RegClass, RegInfo, RegUnit, RegUnitMask};
|
||||
use core::char;
|
||||
use core::fmt;
|
||||
use core::iter::ExactSizeIterator;
|
||||
use core::mem::size_of_val;
|
||||
|
||||
/// Set of registers available for allocation.
|
||||
#[derive(Clone)]
|
||||
pub struct RegisterSet {
|
||||
avail: RegUnitMask,
|
||||
}
|
||||
|
||||
// Given a register class and a register unit in the class, compute a word index and a bit mask of
|
||||
// register units representing that register.
|
||||
//
|
||||
// Note that a register is not allowed to straddle words.
|
||||
fn bitmask(rc: RegClass, reg: RegUnit) -> (usize, u32) {
|
||||
// Bit mask representing the register. It is `rc.width` consecutive units.
|
||||
let width_bits = (1 << rc.width) - 1;
|
||||
// Index into avail[] of the word containing `reg`.
|
||||
let word_index = (reg / 32) as usize;
|
||||
// The actual bits in the word that cover `reg`.
|
||||
let reg_bits = width_bits << (reg % 32);
|
||||
|
||||
(word_index, reg_bits)
|
||||
}
|
||||
|
||||
impl RegisterSet {
|
||||
/// Create a new register set with all registers available.
|
||||
///
|
||||
/// Note that this includes *all* registers. Query the `TargetIsa` object to get a set of
|
||||
/// allocatable registers where reserved registers have been filtered out.
|
||||
pub fn new() -> Self {
|
||||
Self { avail: [!0; 3] }
|
||||
}
|
||||
|
||||
/// Create a new register set with no registers available.
|
||||
pub fn empty() -> Self {
|
||||
Self { avail: [0; 3] }
|
||||
}
|
||||
|
||||
/// Returns `true` if the specified register is available.
|
||||
pub fn is_avail(&self, rc: RegClass, reg: RegUnit) -> bool {
|
||||
let (idx, bits) = bitmask(rc, reg);
|
||||
(self.avail[idx] & bits) == bits
|
||||
}
|
||||
|
||||
/// Allocate `reg` from `rc` so it is no longer available.
|
||||
///
|
||||
/// It is an error to take a register that doesn't have all of its register units available.
|
||||
pub fn take(&mut self, rc: RegClass, reg: RegUnit) {
|
||||
let (idx, bits) = bitmask(rc, reg);
|
||||
debug_assert!(
|
||||
(self.avail[idx] & bits) == bits,
|
||||
"{}:{} not available in {}",
|
||||
rc,
|
||||
rc.info.display_regunit(reg),
|
||||
self.display(rc.info)
|
||||
);
|
||||
self.avail[idx] &= !bits;
|
||||
}
|
||||
|
||||
/// Return `reg` and all of its register units to the set of available registers.
|
||||
pub fn free(&mut self, rc: RegClass, reg: RegUnit) {
|
||||
let (idx, bits) = bitmask(rc, reg);
|
||||
debug_assert!(
|
||||
(self.avail[idx] & bits) == 0,
|
||||
"{}:{} is already free in {}",
|
||||
rc,
|
||||
rc.info.display_regunit(reg),
|
||||
self.display(rc.info)
|
||||
);
|
||||
self.avail[idx] |= bits;
|
||||
}
|
||||
|
||||
/// Return an iterator over all available registers belonging to the register class `rc`.
|
||||
///
|
||||
/// This doesn't allocate anything from the set; use `take()` for that.
|
||||
pub fn iter(&self, rc: RegClass) -> RegSetIter {
|
||||
// Start by copying the RC mask. It is a single set bit for each register in the class.
|
||||
let mut rsi = RegSetIter { regs: rc.mask };
|
||||
|
||||
// Mask out the unavailable units.
|
||||
for idx in 0..self.avail.len() {
|
||||
// If a single unit in a register is unavailable, the whole register can't be used.
|
||||
// If a register straddles a word boundary, it will be marked as unavailable.
|
||||
// There's an assertion in `cdsl/registers.py` to check for that.
|
||||
for i in 0..rc.width {
|
||||
rsi.regs[idx] &= self.avail[idx] >> i;
|
||||
}
|
||||
}
|
||||
rsi
|
||||
}
|
||||
|
||||
/// Check if any register units allocated out of this set interferes with units allocated out
|
||||
/// of `other`.
|
||||
///
|
||||
/// This assumes that unused bits are 1.
|
||||
pub fn interferes_with(&self, other: &Self) -> bool {
|
||||
self.avail
|
||||
.iter()
|
||||
.zip(&other.avail)
|
||||
.any(|(&x, &y)| (x | y) != !0)
|
||||
}
|
||||
|
||||
/// Intersect this set of registers with `other`. This has the effect of removing any register
|
||||
/// units from this set that are not in `other`.
|
||||
pub fn intersect(&mut self, other: &Self) {
|
||||
for (x, &y) in self.avail.iter_mut().zip(&other.avail) {
|
||||
*x &= y;
|
||||
}
|
||||
}
|
||||
|
||||
/// Return an object that can display this register set, using the register info from the
|
||||
/// target ISA.
|
||||
pub fn display<'a, R: Into<Option<&'a RegInfo>>>(&self, regs: R) -> DisplayRegisterSet<'a> {
|
||||
DisplayRegisterSet(self.clone(), regs.into())
|
||||
}
|
||||
}
|
||||
|
||||
/// Iterator over available registers in a register class.
|
||||
pub struct RegSetIter {
|
||||
regs: RegUnitMask,
|
||||
}
|
||||
|
||||
impl Iterator for RegSetIter {
|
||||
type Item = RegUnit;
|
||||
|
||||
fn next(&mut self) -> Option<RegUnit> {
|
||||
let mut unit_offset = 0;
|
||||
|
||||
// Find the first set bit in `self.regs`.
|
||||
for word in &mut self.regs {
|
||||
if *word != 0 {
|
||||
// Compute the register unit number from the lowest set bit in the word.
|
||||
let unit = unit_offset + word.trailing_zeros() as RegUnit;
|
||||
|
||||
// Clear that lowest bit so we won't find it again.
|
||||
*word &= *word - 1;
|
||||
|
||||
return Some(unit);
|
||||
}
|
||||
// How many register units was there in the word? This is a constant 32 for `u32` etc.
|
||||
unit_offset += 8 * size_of_val(word) as RegUnit;
|
||||
}
|
||||
|
||||
// All of `self.regs` is 0.
|
||||
None
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
let bits = self.regs.iter().map(|&w| w.count_ones() as usize).sum();
|
||||
(bits, Some(bits))
|
||||
}
|
||||
}
|
||||
|
||||
impl ExactSizeIterator for RegSetIter {}
|
||||
|
||||
/// Displaying an `RegisterSet` correctly requires the associated `RegInfo` from the target ISA.
|
||||
pub struct DisplayRegisterSet<'a>(RegisterSet, Option<&'a RegInfo>);
|
||||
|
||||
impl<'a> fmt::Display for DisplayRegisterSet<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "[")?;
|
||||
match self.1 {
|
||||
None => {
|
||||
for w in &self.0.avail {
|
||||
write!(f, " #{:08x}", w)?;
|
||||
}
|
||||
}
|
||||
Some(reginfo) => {
|
||||
let toprcs = reginfo
|
||||
.banks
|
||||
.iter()
|
||||
.map(|b| b.first_toprc + b.num_toprcs)
|
||||
.max()
|
||||
.expect("No register banks");
|
||||
for rc in ®info.classes[0..toprcs] {
|
||||
if rc.width == 1 {
|
||||
let bank = ®info.banks[rc.bank as usize];
|
||||
write!(f, " {}: ", rc)?;
|
||||
for offset in 0..bank.units {
|
||||
let reg = bank.first_unit + offset;
|
||||
if !rc.contains(reg) {
|
||||
continue;
|
||||
}
|
||||
if !self.0.is_avail(rc, reg) {
|
||||
write!(f, "-")?;
|
||||
continue;
|
||||
}
|
||||
// Display individual registers as either the second letter of their
|
||||
// name or the last digit of their number.
|
||||
// This works for x86 (rax, rbx, ...) and for numbered regs.
|
||||
write!(
|
||||
f,
|
||||
"{}",
|
||||
bank.names
|
||||
.get(offset as usize)
|
||||
.and_then(|name| name.chars().nth(1))
|
||||
.unwrap_or_else(|| char::from_digit(
|
||||
u32::from(offset % 10),
|
||||
10
|
||||
)
|
||||
.unwrap())
|
||||
)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
write!(f, " ]")
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for RegisterSet {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
self.display(None).fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::isa::registers::{RegClass, RegClassData};
|
||||
use std::vec::Vec;
|
||||
|
||||
// Register classes for testing.
|
||||
const GPR: RegClass = &RegClassData {
|
||||
name: "GPR",
|
||||
index: 0,
|
||||
width: 1,
|
||||
bank: 0,
|
||||
toprc: 0,
|
||||
first: 28,
|
||||
subclasses: 0,
|
||||
mask: [0xf0000000, 0x0000000f, 0],
|
||||
info: &INFO,
|
||||
};
|
||||
|
||||
const DPR: RegClass = &RegClassData {
|
||||
name: "DPR",
|
||||
index: 0,
|
||||
width: 2,
|
||||
bank: 0,
|
||||
toprc: 0,
|
||||
first: 28,
|
||||
subclasses: 0,
|
||||
mask: [0x50000000, 0x0000000a, 0],
|
||||
info: &INFO,
|
||||
};
|
||||
|
||||
const INFO: RegInfo = RegInfo {
|
||||
banks: &[],
|
||||
classes: &[],
|
||||
};
|
||||
|
||||
#[test]
|
||||
fn put_and_take() {
|
||||
let mut regs = RegisterSet::new();
|
||||
|
||||
// `GPR` has units 28-36.
|
||||
assert_eq!(regs.iter(GPR).len(), 8);
|
||||
assert_eq!(regs.iter(GPR).count(), 8);
|
||||
assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [28, 30, 33, 35]);
|
||||
|
||||
assert!(regs.is_avail(GPR, 29));
|
||||
regs.take(&GPR, 29);
|
||||
assert!(!regs.is_avail(GPR, 29));
|
||||
|
||||
assert_eq!(regs.iter(GPR).count(), 7);
|
||||
assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [30, 33, 35]);
|
||||
|
||||
assert!(regs.is_avail(GPR, 30));
|
||||
regs.take(&GPR, 30);
|
||||
assert!(!regs.is_avail(GPR, 30));
|
||||
|
||||
assert_eq!(regs.iter(GPR).count(), 6);
|
||||
assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [33, 35]);
|
||||
|
||||
assert!(regs.is_avail(GPR, 32));
|
||||
regs.take(&GPR, 32);
|
||||
assert!(!regs.is_avail(GPR, 32));
|
||||
|
||||
assert_eq!(regs.iter(GPR).count(), 5);
|
||||
assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [33, 35]);
|
||||
|
||||
regs.free(&GPR, 30);
|
||||
assert!(regs.is_avail(GPR, 30));
|
||||
assert!(!regs.is_avail(GPR, 29));
|
||||
assert!(!regs.is_avail(GPR, 32));
|
||||
|
||||
assert_eq!(regs.iter(GPR).count(), 6);
|
||||
assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [30, 33, 35]);
|
||||
|
||||
regs.free(&GPR, 32);
|
||||
assert!(regs.is_avail(GPR, 31));
|
||||
assert!(!regs.is_avail(GPR, 29));
|
||||
assert!(regs.is_avail(GPR, 32));
|
||||
|
||||
assert_eq!(regs.iter(GPR).count(), 7);
|
||||
assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [30, 33, 35]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn interference() {
|
||||
let mut regs1 = RegisterSet::new();
|
||||
let mut regs2 = RegisterSet::new();
|
||||
|
||||
assert!(!regs1.interferes_with(®s2));
|
||||
regs1.take(&GPR, 32);
|
||||
assert!(!regs1.interferes_with(®s2));
|
||||
regs2.take(&GPR, 31);
|
||||
assert!(!regs1.interferes_with(®s2));
|
||||
regs1.intersect(®s2);
|
||||
assert!(regs1.interferes_with(®s2));
|
||||
}
|
||||
}
|
||||
438
cranelift/codegen/src/regalloc/reload.rs
Normal file
438
cranelift/codegen/src/regalloc/reload.rs
Normal file
@@ -0,0 +1,438 @@
|
||||
//! Reload pass
|
||||
//!
|
||||
//! The reload pass runs between the spilling and coloring passes. Its primary responsibility is to
|
||||
//! insert `spill` and `fill` instructions such that instruction operands expecting a register will
|
||||
//! get a value with register affinity, and operands expecting a stack slot will get a value with
|
||||
//! stack affinity.
|
||||
//!
|
||||
//! The secondary responsibility of the reload pass is to reuse values in registers as much as
|
||||
//! possible to minimize the number of `fill` instructions needed. This must not cause the register
|
||||
//! pressure limits to be exceeded.
|
||||
|
||||
use crate::cursor::{Cursor, EncCursor};
|
||||
use crate::dominator_tree::DominatorTree;
|
||||
use crate::entity::{SparseMap, SparseMapValue};
|
||||
use crate::ir::{AbiParam, ArgumentLoc, InstBuilder};
|
||||
use crate::ir::{Ebb, Function, Inst, InstructionData, Opcode, Value};
|
||||
use crate::isa::RegClass;
|
||||
use crate::isa::{ConstraintKind, EncInfo, Encoding, RecipeConstraints, TargetIsa};
|
||||
use crate::regalloc::affinity::Affinity;
|
||||
use crate::regalloc::live_value_tracker::{LiveValue, LiveValueTracker};
|
||||
use crate::regalloc::liveness::Liveness;
|
||||
use crate::timing;
|
||||
use crate::topo_order::TopoOrder;
|
||||
use log::debug;
|
||||
use std::vec::Vec;
|
||||
|
||||
/// Reusable data structures for the reload pass.
|
||||
pub struct Reload {
|
||||
candidates: Vec<ReloadCandidate>,
|
||||
reloads: SparseMap<Value, ReloadedValue>,
|
||||
}
|
||||
|
||||
/// Context data structure that gets instantiated once per pass.
|
||||
struct Context<'a> {
|
||||
cur: EncCursor<'a>,
|
||||
|
||||
// Cached ISA information.
|
||||
// We save it here to avoid frequent virtual function calls on the `TargetIsa` trait object.
|
||||
encinfo: EncInfo,
|
||||
|
||||
// References to contextual data structures we need.
|
||||
domtree: &'a DominatorTree,
|
||||
liveness: &'a mut Liveness,
|
||||
topo: &'a mut TopoOrder,
|
||||
|
||||
candidates: &'a mut Vec<ReloadCandidate>,
|
||||
reloads: &'a mut SparseMap<Value, ReloadedValue>,
|
||||
}
|
||||
|
||||
impl Reload {
|
||||
/// Create a new blank reload pass.
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
candidates: Vec::new(),
|
||||
reloads: SparseMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Clear all data structures in this reload pass.
|
||||
pub fn clear(&mut self) {
|
||||
self.candidates.clear();
|
||||
self.reloads.clear();
|
||||
}
|
||||
|
||||
/// Run the reload algorithm over `func`.
|
||||
pub fn run(
|
||||
&mut self,
|
||||
isa: &TargetIsa,
|
||||
func: &mut Function,
|
||||
domtree: &DominatorTree,
|
||||
liveness: &mut Liveness,
|
||||
topo: &mut TopoOrder,
|
||||
tracker: &mut LiveValueTracker,
|
||||
) {
|
||||
let _tt = timing::ra_reload();
|
||||
debug!("Reload for:\n{}", func.display(isa));
|
||||
let mut ctx = Context {
|
||||
cur: EncCursor::new(func, isa),
|
||||
encinfo: isa.encoding_info(),
|
||||
domtree,
|
||||
liveness,
|
||||
topo,
|
||||
candidates: &mut self.candidates,
|
||||
reloads: &mut self.reloads,
|
||||
};
|
||||
ctx.run(tracker)
|
||||
}
|
||||
}
|
||||
|
||||
/// A reload candidate.
|
||||
///
|
||||
/// This represents a stack value that is used by the current instruction where a register is
|
||||
/// needed.
|
||||
struct ReloadCandidate {
|
||||
argidx: usize,
|
||||
value: Value,
|
||||
regclass: RegClass,
|
||||
}
|
||||
|
||||
/// A Reloaded value.
|
||||
///
|
||||
/// This represents a value that has been reloaded into a register value from the stack.
|
||||
struct ReloadedValue {
|
||||
stack: Value,
|
||||
reg: Value,
|
||||
}
|
||||
|
||||
impl SparseMapValue<Value> for ReloadedValue {
|
||||
fn key(&self) -> Value {
|
||||
self.stack
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Context<'a> {
|
||||
fn run(&mut self, tracker: &mut LiveValueTracker) {
|
||||
self.topo.reset(self.cur.func.layout.ebbs());
|
||||
while let Some(ebb) = self.topo.next(&self.cur.func.layout, self.domtree) {
|
||||
self.visit_ebb(ebb, tracker);
|
||||
}
|
||||
}
|
||||
|
||||
fn visit_ebb(&mut self, ebb: Ebb, tracker: &mut LiveValueTracker) {
|
||||
debug!("Reloading {}:", ebb);
|
||||
self.visit_ebb_header(ebb, tracker);
|
||||
tracker.drop_dead_params();
|
||||
|
||||
// visit_ebb_header() places us at the first interesting instruction in the EBB.
|
||||
while let Some(inst) = self.cur.current_inst() {
|
||||
if !self.cur.func.dfg[inst].opcode().is_ghost() {
|
||||
// This instruction either has an encoding or has ABI constraints, so visit it to
|
||||
// insert spills and fills as needed.
|
||||
let encoding = self.cur.func.encodings[inst];
|
||||
self.visit_inst(ebb, inst, encoding, tracker);
|
||||
tracker.drop_dead(inst);
|
||||
} else {
|
||||
// This is a ghost instruction with no encoding and no extra constraints, so we can
|
||||
// just skip over it.
|
||||
self.cur.next_inst();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Process the EBB parameters. Move to the next instruction in the EBB to be processed
|
||||
fn visit_ebb_header(&mut self, ebb: Ebb, tracker: &mut LiveValueTracker) {
|
||||
let (liveins, args) = tracker.ebb_top(
|
||||
ebb,
|
||||
&self.cur.func.dfg,
|
||||
self.liveness,
|
||||
&self.cur.func.layout,
|
||||
self.domtree,
|
||||
);
|
||||
|
||||
if self.cur.func.layout.entry_block() == Some(ebb) {
|
||||
debug_assert_eq!(liveins.len(), 0);
|
||||
self.visit_entry_params(ebb, args);
|
||||
} else {
|
||||
self.visit_ebb_params(ebb, args);
|
||||
}
|
||||
}
|
||||
|
||||
/// Visit the parameters on the entry block.
|
||||
/// These values have ABI constraints from the function signature.
|
||||
fn visit_entry_params(&mut self, ebb: Ebb, args: &[LiveValue]) {
|
||||
debug_assert_eq!(self.cur.func.signature.params.len(), args.len());
|
||||
self.cur.goto_first_inst(ebb);
|
||||
|
||||
for (arg_idx, arg) in args.iter().enumerate() {
|
||||
let abi = self.cur.func.signature.params[arg_idx];
|
||||
match abi.location {
|
||||
ArgumentLoc::Reg(_) => {
|
||||
if arg.affinity.is_stack() {
|
||||
// An incoming register parameter was spilled. Replace the parameter value
|
||||
// with a temporary register value that is immediately spilled.
|
||||
let reg = self
|
||||
.cur
|
||||
.func
|
||||
.dfg
|
||||
.replace_ebb_param(arg.value, abi.value_type);
|
||||
let affinity = Affinity::abi(&abi, self.cur.isa);
|
||||
self.liveness.create_dead(reg, ebb, affinity);
|
||||
self.insert_spill(ebb, arg.value, reg);
|
||||
}
|
||||
}
|
||||
ArgumentLoc::Stack(_) => {
|
||||
debug_assert!(arg.affinity.is_stack());
|
||||
}
|
||||
ArgumentLoc::Unassigned => panic!("Unexpected ABI location"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn visit_ebb_params(&mut self, ebb: Ebb, _args: &[LiveValue]) {
|
||||
self.cur.goto_first_inst(ebb);
|
||||
}
|
||||
|
||||
/// Process the instruction pointed to by `pos`, and advance the cursor to the next instruction
|
||||
/// that needs processing.
|
||||
fn visit_inst(
|
||||
&mut self,
|
||||
ebb: Ebb,
|
||||
inst: Inst,
|
||||
encoding: Encoding,
|
||||
tracker: &mut LiveValueTracker,
|
||||
) {
|
||||
self.cur.use_srcloc(inst);
|
||||
|
||||
// Get the operand constraints for `inst` that we are trying to satisfy.
|
||||
let constraints = self.encinfo.operand_constraints(encoding);
|
||||
|
||||
// Identify reload candidates.
|
||||
debug_assert!(self.candidates.is_empty());
|
||||
self.find_candidates(inst, constraints);
|
||||
|
||||
if let InstructionData::Unary {
|
||||
opcode: Opcode::Copy,
|
||||
..
|
||||
} = self.cur.func.dfg[inst]
|
||||
{
|
||||
self.reload_copy_candidates(inst);
|
||||
} else {
|
||||
self.reload_inst_candidates(ebb, inst);
|
||||
}
|
||||
|
||||
// TODO: Reuse reloads for future instructions.
|
||||
self.reloads.clear();
|
||||
|
||||
let (_throughs, _kills, defs) =
|
||||
tracker.process_inst(inst, &self.cur.func.dfg, self.liveness);
|
||||
|
||||
// Advance to the next instruction so we can insert any spills after the instruction.
|
||||
self.cur.next_inst();
|
||||
|
||||
// Rewrite register defs that need to be spilled.
|
||||
//
|
||||
// Change:
|
||||
//
|
||||
// v2 = inst ...
|
||||
//
|
||||
// Into:
|
||||
//
|
||||
// v7 = inst ...
|
||||
// v2 = spill v7
|
||||
//
|
||||
// That way, we don't need to rewrite all future uses of v2.
|
||||
if let Some(constraints) = constraints {
|
||||
for (lv, op) in defs.iter().zip(constraints.outs) {
|
||||
if lv.affinity.is_stack() && op.kind != ConstraintKind::Stack {
|
||||
if let InstructionData::Unary {
|
||||
opcode: Opcode::Copy,
|
||||
arg,
|
||||
} = self.cur.func.dfg[inst]
|
||||
{
|
||||
self.cur.func.dfg.replace(inst).spill(arg);
|
||||
let ok = self.cur.func.update_encoding(inst, self.cur.isa).is_ok();
|
||||
debug_assert!(ok);
|
||||
} else {
|
||||
let value_type = self.cur.func.dfg.value_type(lv.value);
|
||||
let reg = self.cur.func.dfg.replace_result(lv.value, value_type);
|
||||
self.liveness.create_dead(reg, inst, Affinity::new(op));
|
||||
self.insert_spill(ebb, lv.value, reg);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Same thing for spilled call return values.
|
||||
let retvals = &defs[self.cur.func.dfg[inst]
|
||||
.opcode()
|
||||
.constraints()
|
||||
.num_fixed_results()..];
|
||||
if !retvals.is_empty() {
|
||||
let sig = self
|
||||
.cur
|
||||
.func
|
||||
.dfg
|
||||
.call_signature(inst)
|
||||
.expect("Extra results on non-call instruction");
|
||||
for (i, lv) in retvals.iter().enumerate() {
|
||||
let abi = self.cur.func.dfg.signatures[sig].returns[i];
|
||||
debug_assert!(
|
||||
abi.location.is_reg(),
|
||||
"expected reg; got {:?}",
|
||||
abi.location
|
||||
);
|
||||
if lv.affinity.is_stack() {
|
||||
let reg = self.cur.func.dfg.replace_result(lv.value, abi.value_type);
|
||||
self.liveness
|
||||
.create_dead(reg, inst, Affinity::abi(&abi, self.cur.isa));
|
||||
self.insert_spill(ebb, lv.value, reg);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Reload the current candidates for the given `inst`.
|
||||
fn reload_inst_candidates(&mut self, ebb: Ebb, inst: Inst) {
|
||||
// Insert fill instructions before `inst` and replace `cand.value` with the filled value.
|
||||
for cand in self.candidates.iter_mut() {
|
||||
if let Some(reload) = self.reloads.get(cand.value) {
|
||||
cand.value = reload.reg;
|
||||
continue;
|
||||
}
|
||||
|
||||
let reg = self.cur.ins().fill(cand.value);
|
||||
let fill = self.cur.built_inst();
|
||||
|
||||
self.reloads.insert(ReloadedValue {
|
||||
stack: cand.value,
|
||||
reg,
|
||||
});
|
||||
cand.value = reg;
|
||||
|
||||
// Create a live range for the new reload.
|
||||
let affinity = Affinity::Reg(cand.regclass.into());
|
||||
self.liveness.create_dead(reg, fill, affinity);
|
||||
self.liveness
|
||||
.extend_locally(reg, ebb, inst, &self.cur.func.layout);
|
||||
}
|
||||
|
||||
// Rewrite instruction arguments.
|
||||
//
|
||||
// Only rewrite those arguments that were identified as candidates. This leaves EBB
|
||||
// arguments on branches as-is without rewriting them. A spilled EBB argument needs to stay
|
||||
// spilled because the matching EBB parameter is going to be in the same virtual register
|
||||
// and therefore the same stack slot as the EBB argument value.
|
||||
if !self.candidates.is_empty() {
|
||||
let args = self.cur.func.dfg.inst_args_mut(inst);
|
||||
while let Some(cand) = self.candidates.pop() {
|
||||
args[cand.argidx] = cand.value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Reload the current candidates for the given copy `inst`.
|
||||
//
|
||||
// As an optimization, replace a copy instruction where the argument has been spilled with
|
||||
// a fill instruction.
|
||||
fn reload_copy_candidates(&mut self, inst: Inst) {
|
||||
// Copy instructions can only have one argument.
|
||||
debug_assert!(self.candidates.is_empty() || self.candidates.len() == 1);
|
||||
|
||||
if let Some(cand) = self.candidates.pop() {
|
||||
self.cur.func.dfg.replace(inst).fill(cand.value);
|
||||
let ok = self.cur.func.update_encoding(inst, self.cur.isa).is_ok();
|
||||
debug_assert!(ok);
|
||||
}
|
||||
}
|
||||
|
||||
// Find reload candidates for `inst` and add them to `self.candidates`.
|
||||
//
|
||||
// These are uses of spilled values where the operand constraint requires a register.
|
||||
fn find_candidates(&mut self, inst: Inst, constraints: Option<&RecipeConstraints>) {
|
||||
let args = self.cur.func.dfg.inst_args(inst);
|
||||
|
||||
if let Some(constraints) = constraints {
|
||||
for (argidx, (op, &arg)) in constraints.ins.iter().zip(args).enumerate() {
|
||||
if op.kind != ConstraintKind::Stack && self.liveness[arg].affinity.is_stack() {
|
||||
self.candidates.push(ReloadCandidate {
|
||||
argidx,
|
||||
value: arg,
|
||||
regclass: op.regclass,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If we only have the fixed arguments, we're done now.
|
||||
let offset = self.cur.func.dfg[inst]
|
||||
.opcode()
|
||||
.constraints()
|
||||
.num_fixed_value_arguments();
|
||||
if args.len() == offset {
|
||||
return;
|
||||
}
|
||||
let var_args = &args[offset..];
|
||||
|
||||
// Handle ABI arguments.
|
||||
if let Some(sig) = self.cur.func.dfg.call_signature(inst) {
|
||||
handle_abi_args(
|
||||
self.candidates,
|
||||
&self.cur.func.dfg.signatures[sig].params,
|
||||
var_args,
|
||||
offset,
|
||||
self.cur.isa,
|
||||
self.liveness,
|
||||
);
|
||||
} else if self.cur.func.dfg[inst].opcode().is_return() {
|
||||
handle_abi_args(
|
||||
self.candidates,
|
||||
&self.cur.func.signature.returns,
|
||||
var_args,
|
||||
offset,
|
||||
self.cur.isa,
|
||||
self.liveness,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Insert a spill at `pos` and update data structures.
|
||||
///
|
||||
/// - Insert `stack = spill reg` at `pos`, and assign an encoding.
|
||||
/// - Move the `stack` live range starting point to the new instruction.
|
||||
/// - Extend the `reg` live range to reach the new instruction.
|
||||
fn insert_spill(&mut self, ebb: Ebb, stack: Value, reg: Value) {
|
||||
self.cur.ins().with_result(stack).spill(reg);
|
||||
let inst = self.cur.built_inst();
|
||||
|
||||
// Update live ranges.
|
||||
self.liveness.move_def_locally(stack, inst);
|
||||
self.liveness
|
||||
.extend_locally(reg, ebb, inst, &self.cur.func.layout);
|
||||
}
|
||||
}
|
||||
|
||||
/// Find reload candidates in the instruction's ABI variable arguments. This handles both
|
||||
/// return values and call arguments.
|
||||
fn handle_abi_args(
|
||||
candidates: &mut Vec<ReloadCandidate>,
|
||||
abi_types: &[AbiParam],
|
||||
var_args: &[Value],
|
||||
offset: usize,
|
||||
isa: &TargetIsa,
|
||||
liveness: &Liveness,
|
||||
) {
|
||||
debug_assert_eq!(abi_types.len(), var_args.len());
|
||||
for ((abi, &arg), argidx) in abi_types.iter().zip(var_args).zip(offset..) {
|
||||
if abi.location.is_reg() {
|
||||
let lv = liveness.get(arg).expect("Missing live range for ABI arg");
|
||||
if lv.affinity.is_stack() {
|
||||
candidates.push(ReloadCandidate {
|
||||
argidx,
|
||||
value: arg,
|
||||
regclass: isa.regclass_for_abi_type(abi.value_type),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
1379
cranelift/codegen/src/regalloc/solver.rs
Normal file
1379
cranelift/codegen/src/regalloc/solver.rs
Normal file
File diff suppressed because it is too large
Load Diff
631
cranelift/codegen/src/regalloc/spilling.rs
Normal file
631
cranelift/codegen/src/regalloc/spilling.rs
Normal file
@@ -0,0 +1,631 @@
|
||||
//! Spilling pass.
|
||||
//!
|
||||
//! The spilling pass is the first to run after the liveness analysis. Its primary function is to
|
||||
//! ensure that the register pressure never exceeds the number of available registers by moving
|
||||
//! some SSA values to spill slots on the stack. This is encoded in the affinity of the value's
|
||||
//! live range.
|
||||
//!
|
||||
//! Some instruction operand constraints may require additional registers to resolve. Since this
|
||||
//! can cause spilling, the spilling pass is also responsible for resolving those constraints by
|
||||
//! inserting copies. The extra constraints are:
|
||||
//!
|
||||
//! 1. A value used by a tied operand must be killed by the instruction. This is resolved by
|
||||
//! inserting a copy to a temporary value when necessary.
|
||||
//! 2. When the same value is used more than once by an instruction, the operand constraints must
|
||||
//! be compatible. Otherwise, the value must be copied into a new register for some of the
|
||||
//! operands.
|
||||
|
||||
use crate::cursor::{Cursor, EncCursor};
|
||||
use crate::dominator_tree::DominatorTree;
|
||||
use crate::ir::{ArgumentLoc, Ebb, Function, Inst, InstBuilder, SigRef, Value, ValueLoc};
|
||||
use crate::isa::registers::{RegClass, RegClassIndex, RegClassMask, RegUnit};
|
||||
use crate::isa::{ConstraintKind, EncInfo, RecipeConstraints, RegInfo, TargetIsa};
|
||||
use crate::regalloc::affinity::Affinity;
|
||||
use crate::regalloc::live_value_tracker::{LiveValue, LiveValueTracker};
|
||||
use crate::regalloc::liveness::Liveness;
|
||||
use crate::regalloc::pressure::Pressure;
|
||||
use crate::regalloc::virtregs::VirtRegs;
|
||||
use crate::timing;
|
||||
use crate::topo_order::TopoOrder;
|
||||
use core::fmt;
|
||||
use log::debug;
|
||||
use std::vec::Vec;
|
||||
|
||||
/// Return a top-level register class which contains `unit`.
|
||||
fn toprc_containing_regunit(unit: RegUnit, reginfo: &RegInfo) -> RegClass {
|
||||
let bank = reginfo.bank_containing_regunit(unit).unwrap();
|
||||
reginfo.classes[bank.first_toprc..(bank.first_toprc + bank.num_toprcs)]
|
||||
.iter()
|
||||
.find(|&rc| rc.contains(unit))
|
||||
.expect("reg unit should be in a toprc")
|
||||
}
|
||||
|
||||
/// Persistent data structures for the spilling pass.
|
||||
pub struct Spilling {
|
||||
spills: Vec<Value>,
|
||||
reg_uses: Vec<RegUse>,
|
||||
}
|
||||
|
||||
/// Context data structure that gets instantiated once per pass.
|
||||
struct Context<'a> {
|
||||
// Current instruction as well as reference to function and ISA.
|
||||
cur: EncCursor<'a>,
|
||||
|
||||
// Cached ISA information.
|
||||
reginfo: RegInfo,
|
||||
encinfo: EncInfo,
|
||||
|
||||
// References to contextual data structures we need.
|
||||
domtree: &'a DominatorTree,
|
||||
liveness: &'a mut Liveness,
|
||||
virtregs: &'a VirtRegs,
|
||||
topo: &'a mut TopoOrder,
|
||||
|
||||
// Current register pressure.
|
||||
pressure: Pressure,
|
||||
|
||||
// Values spilled for the current instruction. These values have already been removed from the
|
||||
// pressure tracker, but they are still present in the live value tracker and their affinity
|
||||
// hasn't been changed yet.
|
||||
spills: &'a mut Vec<Value>,
|
||||
|
||||
// Uses of register values in the current instruction.
|
||||
reg_uses: &'a mut Vec<RegUse>,
|
||||
}
|
||||
|
||||
impl Spilling {
|
||||
/// Create a new spilling data structure.
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
spills: Vec::new(),
|
||||
reg_uses: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Clear all data structures in this spilling pass.
|
||||
pub fn clear(&mut self) {
|
||||
self.spills.clear();
|
||||
self.reg_uses.clear();
|
||||
}
|
||||
|
||||
/// Run the spilling algorithm over `func`.
|
||||
pub fn run(
|
||||
&mut self,
|
||||
isa: &TargetIsa,
|
||||
func: &mut Function,
|
||||
domtree: &DominatorTree,
|
||||
liveness: &mut Liveness,
|
||||
virtregs: &VirtRegs,
|
||||
topo: &mut TopoOrder,
|
||||
tracker: &mut LiveValueTracker,
|
||||
) {
|
||||
let _tt = timing::ra_spilling();
|
||||
debug!("Spilling for:\n{}", func.display(isa));
|
||||
let reginfo = isa.register_info();
|
||||
let usable_regs = isa.allocatable_registers(func);
|
||||
let mut ctx = Context {
|
||||
cur: EncCursor::new(func, isa),
|
||||
reginfo: isa.register_info(),
|
||||
encinfo: isa.encoding_info(),
|
||||
domtree,
|
||||
liveness,
|
||||
virtregs,
|
||||
topo,
|
||||
pressure: Pressure::new(®info, &usable_regs),
|
||||
spills: &mut self.spills,
|
||||
reg_uses: &mut self.reg_uses,
|
||||
};
|
||||
ctx.run(tracker)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Context<'a> {
|
||||
fn run(&mut self, tracker: &mut LiveValueTracker) {
|
||||
self.topo.reset(self.cur.func.layout.ebbs());
|
||||
while let Some(ebb) = self.topo.next(&self.cur.func.layout, self.domtree) {
|
||||
self.visit_ebb(ebb, tracker);
|
||||
}
|
||||
}
|
||||
|
||||
fn visit_ebb(&mut self, ebb: Ebb, tracker: &mut LiveValueTracker) {
|
||||
debug!("Spilling {}:", ebb);
|
||||
self.cur.goto_top(ebb);
|
||||
self.visit_ebb_header(ebb, tracker);
|
||||
tracker.drop_dead_params();
|
||||
self.process_spills(tracker);
|
||||
|
||||
while let Some(inst) = self.cur.next_inst() {
|
||||
if !self.cur.func.dfg[inst].opcode().is_ghost() {
|
||||
self.visit_inst(inst, ebb, tracker);
|
||||
} else {
|
||||
let (_throughs, kills) = tracker.process_ghost(inst);
|
||||
self.free_regs(kills);
|
||||
}
|
||||
tracker.drop_dead(inst);
|
||||
self.process_spills(tracker);
|
||||
}
|
||||
}
|
||||
|
||||
// Take all live registers in `regs` from the pressure set.
|
||||
// This doesn't cause any spilling, it is assumed there are enough registers.
|
||||
fn take_live_regs(&mut self, regs: &[LiveValue]) {
|
||||
for lv in regs {
|
||||
if !lv.is_dead {
|
||||
if let Affinity::Reg(rci) = lv.affinity {
|
||||
let rc = self.reginfo.rc(rci);
|
||||
self.pressure.take(rc);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Free all registers in `kills` from the pressure set.
|
||||
fn free_regs(&mut self, kills: &[LiveValue]) {
|
||||
for lv in kills {
|
||||
if let Affinity::Reg(rci) = lv.affinity {
|
||||
if !self.spills.contains(&lv.value) {
|
||||
let rc = self.reginfo.rc(rci);
|
||||
self.pressure.free(rc);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Free all dead registers in `regs` from the pressure set.
|
||||
fn free_dead_regs(&mut self, regs: &[LiveValue]) {
|
||||
for lv in regs {
|
||||
if lv.is_dead {
|
||||
if let Affinity::Reg(rci) = lv.affinity {
|
||||
if !self.spills.contains(&lv.value) {
|
||||
let rc = self.reginfo.rc(rci);
|
||||
self.pressure.free(rc);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn visit_ebb_header(&mut self, ebb: Ebb, tracker: &mut LiveValueTracker) {
|
||||
let (liveins, params) = tracker.ebb_top(
|
||||
ebb,
|
||||
&self.cur.func.dfg,
|
||||
self.liveness,
|
||||
&self.cur.func.layout,
|
||||
self.domtree,
|
||||
);
|
||||
|
||||
// Count the live-in registers. These should already fit in registers; they did at the
|
||||
// dominator.
|
||||
self.pressure.reset();
|
||||
self.take_live_regs(liveins);
|
||||
|
||||
// An EBB can have an arbitrary (up to 2^16...) number of parameters, so they are not
|
||||
// guaranteed to fit in registers.
|
||||
for lv in params {
|
||||
if let Affinity::Reg(rci) = lv.affinity {
|
||||
let rc = self.reginfo.rc(rci);
|
||||
'try_take: while let Err(mask) = self.pressure.take_transient(rc) {
|
||||
debug!("Need {} reg for EBB param {}", rc, lv.value);
|
||||
match self.spill_candidate(mask, liveins) {
|
||||
Some(cand) => {
|
||||
debug!(
|
||||
"Spilling live-in {} to make room for {} EBB param {}",
|
||||
cand, rc, lv.value
|
||||
);
|
||||
self.spill_reg(cand);
|
||||
}
|
||||
None => {
|
||||
// We can't spill any of the live-in registers, so we have to spill an
|
||||
// EBB argument. Since the current spill metric would consider all the
|
||||
// EBB arguments equal, just spill the present register.
|
||||
debug!("Spilling {} EBB argument {}", rc, lv.value);
|
||||
|
||||
// Since `spill_reg` will free a register, add the current one here.
|
||||
self.pressure.take(rc);
|
||||
self.spill_reg(lv.value);
|
||||
break 'try_take;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// The transient pressure counts for the EBB arguments are accurate. Just preserve them.
|
||||
self.pressure.preserve_transient();
|
||||
self.free_dead_regs(params);
|
||||
}
|
||||
|
||||
fn visit_inst(&mut self, inst: Inst, ebb: Ebb, tracker: &mut LiveValueTracker) {
|
||||
debug!("Inst {}, {}", self.cur.display_inst(inst), self.pressure);
|
||||
debug_assert_eq!(self.cur.current_inst(), Some(inst));
|
||||
debug_assert_eq!(self.cur.current_ebb(), Some(ebb));
|
||||
|
||||
let constraints = self
|
||||
.encinfo
|
||||
.operand_constraints(self.cur.func.encodings[inst]);
|
||||
|
||||
// We may need to resolve register constraints if there are any noteworthy uses.
|
||||
debug_assert!(self.reg_uses.is_empty());
|
||||
self.collect_reg_uses(inst, ebb, constraints);
|
||||
|
||||
// Calls usually have fixed register uses.
|
||||
let call_sig = self.cur.func.dfg.call_signature(inst);
|
||||
if let Some(sig) = call_sig {
|
||||
self.collect_abi_reg_uses(inst, sig);
|
||||
}
|
||||
|
||||
if !self.reg_uses.is_empty() {
|
||||
self.process_reg_uses(inst, tracker);
|
||||
}
|
||||
|
||||
// Update the live value tracker with this instruction.
|
||||
let (throughs, kills, defs) = tracker.process_inst(inst, &self.cur.func.dfg, self.liveness);
|
||||
|
||||
// Remove kills from the pressure tracker.
|
||||
self.free_regs(kills);
|
||||
|
||||
// If inst is a call, spill all register values that are live across the call.
|
||||
// This means that we don't currently take advantage of callee-saved registers.
|
||||
// TODO: Be more sophisticated.
|
||||
if call_sig.is_some() {
|
||||
for lv in throughs {
|
||||
if lv.affinity.is_reg() && !self.spills.contains(&lv.value) {
|
||||
self.spill_reg(lv.value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Make sure we have enough registers for the register defs.
|
||||
// Dead defs are included here. They need a register too.
|
||||
// No need to process call return values, they are in fixed registers.
|
||||
if let Some(constraints) = constraints {
|
||||
for op in constraints.outs {
|
||||
if op.kind != ConstraintKind::Stack {
|
||||
// Add register def to pressure, spill if needed.
|
||||
while let Err(mask) = self.pressure.take_transient(op.regclass) {
|
||||
debug!("Need {} reg from {} throughs", op.regclass, throughs.len());
|
||||
match self.spill_candidate(mask, throughs) {
|
||||
Some(cand) => self.spill_reg(cand),
|
||||
None => panic!(
|
||||
"Ran out of {} registers for {}",
|
||||
op.regclass,
|
||||
self.cur.display_inst(inst)
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
self.pressure.reset_transient();
|
||||
}
|
||||
|
||||
// Restore pressure state, compute pressure with affinities from `defs`.
|
||||
// Exclude dead defs. Includes call return values.
|
||||
// This won't cause spilling.
|
||||
self.take_live_regs(defs);
|
||||
}
|
||||
|
||||
// Collect register uses that are noteworthy in one of the following ways:
|
||||
//
|
||||
// 1. It's a fixed register constraint.
|
||||
// 2. It's a use of a spilled value.
|
||||
// 3. It's a tied register constraint and the value isn't killed.
|
||||
//
|
||||
// We are assuming here that if a value is used both by a fixed register operand and a register
|
||||
// class operand, they two are compatible. We are also assuming that two register class
|
||||
// operands are always compatible.
|
||||
fn collect_reg_uses(&mut self, inst: Inst, ebb: Ebb, constraints: Option<&RecipeConstraints>) {
|
||||
let args = self.cur.func.dfg.inst_args(inst);
|
||||
let num_fixed_ins = if let Some(constraints) = constraints {
|
||||
for (idx, (op, &arg)) in constraints.ins.iter().zip(args).enumerate() {
|
||||
let mut reguse = RegUse::new(arg, idx, op.regclass.into());
|
||||
let lr = &self.liveness[arg];
|
||||
let ctx = self.liveness.context(&self.cur.func.layout);
|
||||
match op.kind {
|
||||
ConstraintKind::Stack => continue,
|
||||
ConstraintKind::FixedReg(_) => reguse.fixed = true,
|
||||
ConstraintKind::Tied(_) => {
|
||||
// A tied operand must kill the used value.
|
||||
reguse.tied = !lr.killed_at(inst, ebb, ctx);
|
||||
}
|
||||
ConstraintKind::FixedTied(_) => {
|
||||
reguse.fixed = true;
|
||||
reguse.tied = !lr.killed_at(inst, ebb, ctx);
|
||||
}
|
||||
ConstraintKind::Reg => {}
|
||||
}
|
||||
if lr.affinity.is_stack() {
|
||||
reguse.spilled = true;
|
||||
}
|
||||
|
||||
// Only collect the interesting register uses.
|
||||
if reguse.fixed || reguse.tied || reguse.spilled {
|
||||
debug!(" reguse: {}", reguse);
|
||||
self.reg_uses.push(reguse);
|
||||
}
|
||||
}
|
||||
constraints.ins.len()
|
||||
} else {
|
||||
// A non-ghost instruction with no constraints can't have any
|
||||
// fixed operands.
|
||||
0
|
||||
};
|
||||
|
||||
// Similarly, for return instructions, collect uses of ABI-defined
|
||||
// return values.
|
||||
if self.cur.func.dfg[inst].opcode().is_return() {
|
||||
debug_assert_eq!(
|
||||
self.cur.func.dfg.inst_variable_args(inst).len(),
|
||||
self.cur.func.signature.returns.len(),
|
||||
"The non-fixed arguments in a return should follow the function's signature."
|
||||
);
|
||||
for (ret_idx, (ret, &arg)) in
|
||||
self.cur.func.signature.returns.iter().zip(args).enumerate()
|
||||
{
|
||||
let idx = num_fixed_ins + ret_idx;
|
||||
let unit = match ret.location {
|
||||
ArgumentLoc::Unassigned => {
|
||||
panic!("function return signature should be legalized")
|
||||
}
|
||||
ArgumentLoc::Reg(unit) => unit,
|
||||
ArgumentLoc::Stack(_) => continue,
|
||||
};
|
||||
let toprc = toprc_containing_regunit(unit, &self.reginfo);
|
||||
let mut reguse = RegUse::new(arg, idx, toprc.into());
|
||||
reguse.fixed = true;
|
||||
|
||||
debug!(" reguse: {}", reguse);
|
||||
self.reg_uses.push(reguse);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Collect register uses from the ABI input constraints.
|
||||
fn collect_abi_reg_uses(&mut self, inst: Inst, sig: SigRef) {
|
||||
let num_fixed_args = self.cur.func.dfg[inst]
|
||||
.opcode()
|
||||
.constraints()
|
||||
.num_fixed_value_arguments();
|
||||
let args = self.cur.func.dfg.inst_variable_args(inst);
|
||||
for (idx, (abi, &arg)) in self.cur.func.dfg.signatures[sig]
|
||||
.params
|
||||
.iter()
|
||||
.zip(args)
|
||||
.enumerate()
|
||||
{
|
||||
if abi.location.is_reg() {
|
||||
let (rci, spilled) = match self.liveness[arg].affinity {
|
||||
Affinity::Reg(rci) => (rci, false),
|
||||
Affinity::Stack => (
|
||||
self.cur.isa.regclass_for_abi_type(abi.value_type).into(),
|
||||
true,
|
||||
),
|
||||
Affinity::Unassigned => panic!("Missing affinity for {}", arg),
|
||||
};
|
||||
let mut reguse = RegUse::new(arg, num_fixed_args + idx, rci);
|
||||
reguse.fixed = true;
|
||||
reguse.spilled = spilled;
|
||||
self.reg_uses.push(reguse);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Process multiple register uses to resolve potential conflicts.
|
||||
//
|
||||
// Look for multiple uses of the same value in `self.reg_uses` and insert copies as necessary.
|
||||
// Trigger spilling if any of the temporaries cause the register pressure to become too high.
|
||||
//
|
||||
// Leave `self.reg_uses` empty.
|
||||
fn process_reg_uses(&mut self, inst: Inst, tracker: &LiveValueTracker) {
|
||||
// We're looking for multiple uses of the same value, so start by sorting by value. The
|
||||
// secondary `opidx` key makes it possible to use an unstable (non-allocating) sort.
|
||||
self.reg_uses.sort_unstable_by_key(|u| (u.value, u.opidx));
|
||||
|
||||
for i in 0..self.reg_uses.len() {
|
||||
let ru = self.reg_uses[i];
|
||||
|
||||
// Do we need to insert a copy for this use?
|
||||
let need_copy = if ru.tied {
|
||||
true
|
||||
} else if ru.fixed {
|
||||
// This is a fixed register use which doesn't necessarily require a copy.
|
||||
// Make a copy only if this is not the first use of the value.
|
||||
self.reg_uses
|
||||
.get(i.wrapping_sub(1))
|
||||
.map_or(false, |ru2| ru2.value == ru.value)
|
||||
} else {
|
||||
false
|
||||
};
|
||||
|
||||
if need_copy {
|
||||
let copy = self.insert_copy(ru.value, ru.rci);
|
||||
self.cur.func.dfg.inst_args_mut(inst)[ru.opidx as usize] = copy;
|
||||
}
|
||||
|
||||
// Even if we don't insert a copy, we may need to account for register pressure for the
|
||||
// reload pass.
|
||||
if need_copy || ru.spilled {
|
||||
let rc = self.reginfo.rc(ru.rci);
|
||||
while let Err(mask) = self.pressure.take_transient(rc) {
|
||||
debug!("Copy of {} reg causes spill", rc);
|
||||
// Spill a live register that is *not* used by the current instruction.
|
||||
// Spilling a use wouldn't help.
|
||||
//
|
||||
// Do allow spilling of EBB arguments on branches. This is safe since we spill
|
||||
// the whole virtual register which includes the matching EBB parameter value
|
||||
// at the branch destination. It is also necessary since there can be
|
||||
// arbitrarily many EBB arguments.
|
||||
match {
|
||||
let args = if self.cur.func.dfg[inst].opcode().is_branch() {
|
||||
self.cur.func.dfg.inst_fixed_args(inst)
|
||||
} else {
|
||||
self.cur.func.dfg.inst_args(inst)
|
||||
};
|
||||
self.spill_candidate(
|
||||
mask,
|
||||
tracker.live().iter().filter(|lv| !args.contains(&lv.value)),
|
||||
)
|
||||
} {
|
||||
Some(cand) => self.spill_reg(cand),
|
||||
None => panic!(
|
||||
"Ran out of {} registers when inserting copy before {}",
|
||||
rc,
|
||||
self.cur.display_inst(inst)
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
self.pressure.reset_transient();
|
||||
self.reg_uses.clear()
|
||||
}
|
||||
|
||||
// Find a spill candidate from `candidates` whose top-level register class is in `mask`.
|
||||
fn spill_candidate<'ii, II>(&self, mask: RegClassMask, candidates: II) -> Option<Value>
|
||||
where
|
||||
II: IntoIterator<Item = &'ii LiveValue>,
|
||||
{
|
||||
// Find the best viable spill candidate.
|
||||
//
|
||||
// The very simple strategy implemented here is to spill the value with the earliest def in
|
||||
// the reverse post-order. This strategy depends on a good reload pass to generate good
|
||||
// code.
|
||||
//
|
||||
// We know that all candidate defs dominate the current instruction, so one of them will
|
||||
// dominate the others. That is the earliest def.
|
||||
candidates
|
||||
.into_iter()
|
||||
.filter_map(|lv| {
|
||||
// Viable candidates are registers in one of the `mask` classes, and not already in
|
||||
// the spill set.
|
||||
if let Affinity::Reg(rci) = lv.affinity {
|
||||
let rc = self.reginfo.rc(rci);
|
||||
if (mask & (1 << rc.toprc)) != 0 && !self.spills.contains(&lv.value) {
|
||||
// Here, `lv` is a viable spill candidate.
|
||||
return Some(lv.value);
|
||||
}
|
||||
}
|
||||
None
|
||||
})
|
||||
.min_by(|&a, &b| {
|
||||
// Find the minimum candidate according to the RPO of their defs.
|
||||
self.domtree.rpo_cmp(
|
||||
self.cur.func.dfg.value_def(a),
|
||||
self.cur.func.dfg.value_def(b),
|
||||
&self.cur.func.layout,
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
/// Spill `value` immediately by
|
||||
///
|
||||
/// 1. Changing its affinity to `Stack` which marks the spill.
|
||||
/// 2. Removing the value from the pressure tracker.
|
||||
/// 3. Adding the value to `self.spills` for later reference by `process_spills`.
|
||||
///
|
||||
/// Note that this does not update the cached affinity in the live value tracker. Call
|
||||
/// `process_spills` to do that.
|
||||
fn spill_reg(&mut self, value: Value) {
|
||||
if let Affinity::Reg(rci) = self.liveness.spill(value) {
|
||||
let rc = self.reginfo.rc(rci);
|
||||
self.pressure.free(rc);
|
||||
self.spills.push(value);
|
||||
debug!("Spilled {}:{} -> {}", value, rc, self.pressure);
|
||||
} else {
|
||||
panic!("Cannot spill {} that was already on the stack", value);
|
||||
}
|
||||
|
||||
// Assign a spill slot for the whole virtual register.
|
||||
let ss = self
|
||||
.cur
|
||||
.func
|
||||
.stack_slots
|
||||
.make_spill_slot(self.cur.func.dfg.value_type(value));
|
||||
for &v in self.virtregs.congruence_class(&value) {
|
||||
self.liveness.spill(v);
|
||||
self.cur.func.locations[v] = ValueLoc::Stack(ss);
|
||||
}
|
||||
}
|
||||
|
||||
/// Process any pending spills in the `self.spills` vector.
|
||||
///
|
||||
/// It is assumed that spills are removed from the pressure tracker immediately, see
|
||||
/// `spill_reg` above.
|
||||
///
|
||||
/// We also need to update the live range affinity and remove spilled values from the live
|
||||
/// value tracker.
|
||||
fn process_spills(&mut self, tracker: &mut LiveValueTracker) {
|
||||
if !self.spills.is_empty() {
|
||||
tracker.process_spills(|v| self.spills.contains(&v));
|
||||
self.spills.clear()
|
||||
}
|
||||
}
|
||||
|
||||
/// Insert a `copy value` before the current instruction and give it a live range extending to
|
||||
/// the current instruction.
|
||||
///
|
||||
/// Returns the new local value created.
|
||||
fn insert_copy(&mut self, value: Value, rci: RegClassIndex) -> Value {
|
||||
let copy = self.cur.ins().copy(value);
|
||||
let inst = self.cur.built_inst();
|
||||
|
||||
// Update live ranges.
|
||||
self.liveness.create_dead(copy, inst, Affinity::Reg(rci));
|
||||
self.liveness.extend_locally(
|
||||
copy,
|
||||
self.cur.func.layout.pp_ebb(inst),
|
||||
self.cur.current_inst().expect("must be at an instruction"),
|
||||
&self.cur.func.layout,
|
||||
);
|
||||
|
||||
copy
|
||||
}
|
||||
}
|
||||
|
||||
/// Struct representing a register use of a value.
|
||||
/// Used to detect multiple uses of the same value with incompatible register constraints.
|
||||
#[derive(Clone, Copy)]
|
||||
struct RegUse {
|
||||
value: Value,
|
||||
opidx: u16,
|
||||
|
||||
// Register class required by the use.
|
||||
rci: RegClassIndex,
|
||||
|
||||
// A use with a fixed register constraint.
|
||||
fixed: bool,
|
||||
|
||||
// A register use of a spilled value.
|
||||
spilled: bool,
|
||||
|
||||
// A use with a tied register constraint *and* the used value is not killed.
|
||||
tied: bool,
|
||||
}
|
||||
|
||||
impl RegUse {
|
||||
fn new(value: Value, idx: usize, rci: RegClassIndex) -> Self {
|
||||
Self {
|
||||
value,
|
||||
opidx: idx as u16,
|
||||
rci,
|
||||
fixed: false,
|
||||
spilled: false,
|
||||
tied: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for RegUse {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{}@op{}", self.value, self.opidx)?;
|
||||
if self.fixed {
|
||||
write!(f, "/fixed")?;
|
||||
}
|
||||
if self.spilled {
|
||||
write!(f, "/spilled")?;
|
||||
}
|
||||
if self.tied {
|
||||
write!(f, "/tied")?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
504
cranelift/codegen/src/regalloc/virtregs.rs
Normal file
504
cranelift/codegen/src/regalloc/virtregs.rs
Normal file
@@ -0,0 +1,504 @@
|
||||
//! Virtual registers.
|
||||
//!
|
||||
//! A virtual register is a set of related SSA values whose live ranges don't interfere. If all the
|
||||
//! values in a virtual register are assigned to the same location, fewer copies will result in the
|
||||
//! output.
|
||||
//!
|
||||
//! A virtual register is typically built by merging together SSA values that are "phi-related" -
|
||||
//! that is, one value is passed as an EBB argument to a branch and the other is the EBB parameter
|
||||
//! value itself.
|
||||
//!
|
||||
//! If any values in a virtual register are spilled, they will use the same stack slot. This avoids
|
||||
//! memory-to-memory copies when a spilled value is passed as an EBB argument.
|
||||
|
||||
use crate::dbg::DisplayList;
|
||||
use crate::dominator_tree::DominatorTreePreorder;
|
||||
use crate::entity::entity_impl;
|
||||
use crate::entity::{EntityList, ListPool};
|
||||
use crate::entity::{Keys, PrimaryMap, SecondaryMap};
|
||||
use crate::ir::{Function, Value};
|
||||
use crate::packed_option::PackedOption;
|
||||
use crate::ref_slice::ref_slice;
|
||||
use core::cmp::Ordering;
|
||||
use core::fmt;
|
||||
use std::vec::Vec;
|
||||
|
||||
/// A virtual register reference.
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)]
|
||||
pub struct VirtReg(u32);
|
||||
entity_impl!(VirtReg, "vreg");
|
||||
|
||||
type ValueList = EntityList<Value>;
|
||||
|
||||
/// Collection of virtual registers.
|
||||
///
|
||||
/// Each virtual register is a list of values. Also maintain a map from values to their unique
|
||||
/// virtual register, if any.
|
||||
pub struct VirtRegs {
|
||||
/// Memory pool for the value lists.
|
||||
pool: ListPool<Value>,
|
||||
|
||||
/// The primary table of virtual registers.
|
||||
vregs: PrimaryMap<VirtReg, ValueList>,
|
||||
|
||||
/// Allocated virtual register numbers that are no longer in use.
|
||||
unused_vregs: Vec<VirtReg>,
|
||||
|
||||
/// Each value belongs to at most one virtual register.
|
||||
value_vregs: SecondaryMap<Value, PackedOption<VirtReg>>,
|
||||
|
||||
/// Table used during the union-find phase while `vregs` is empty.
|
||||
union_find: SecondaryMap<Value, i32>,
|
||||
|
||||
/// Values that have been activated in the `union_find` table, but not yet added to any virtual
|
||||
/// registers by the `finish_union_find()` function.
|
||||
pending_values: Vec<Value>,
|
||||
}
|
||||
|
||||
impl VirtRegs {
|
||||
/// Create a new virtual register collection.
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
pool: ListPool::new(),
|
||||
vregs: PrimaryMap::new(),
|
||||
unused_vregs: Vec::new(),
|
||||
value_vregs: SecondaryMap::new(),
|
||||
union_find: SecondaryMap::new(),
|
||||
pending_values: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Clear all virtual registers.
|
||||
pub fn clear(&mut self) {
|
||||
self.vregs.clear();
|
||||
self.unused_vregs.clear();
|
||||
self.value_vregs.clear();
|
||||
self.pool.clear();
|
||||
self.union_find.clear();
|
||||
self.pending_values.clear();
|
||||
}
|
||||
|
||||
/// Get the virtual register containing `value`, if any.
|
||||
pub fn get(&self, value: Value) -> Option<VirtReg> {
|
||||
self.value_vregs[value].into()
|
||||
}
|
||||
|
||||
/// Get the list of values in `vreg`.
|
||||
pub fn values(&self, vreg: VirtReg) -> &[Value] {
|
||||
self.vregs[vreg].as_slice(&self.pool)
|
||||
}
|
||||
|
||||
/// Get an iterator over all virtual registers.
|
||||
pub fn all_virtregs(&self) -> Keys<VirtReg> {
|
||||
self.vregs.keys()
|
||||
}
|
||||
|
||||
/// Get the congruence class of `value`.
|
||||
///
|
||||
/// If `value` belongs to a virtual register, the congruence class is the values of the virtual
|
||||
/// register. Otherwise it is just the value itself.
|
||||
#[cfg_attr(feature = "cargo-clippy", allow(clippy::trivially_copy_pass_by_ref))]
|
||||
pub fn congruence_class<'a, 'b>(&'a self, value: &'b Value) -> &'b [Value]
|
||||
where
|
||||
'a: 'b,
|
||||
{
|
||||
self.get(*value)
|
||||
.map_or_else(|| ref_slice(value), |vr| self.values(vr))
|
||||
}
|
||||
|
||||
/// Check if `a` and `b` belong to the same congruence class.
|
||||
pub fn same_class(&self, a: Value, b: Value) -> bool {
|
||||
match (self.get(a), self.get(b)) {
|
||||
(Some(va), Some(vb)) => va == vb,
|
||||
_ => a == b,
|
||||
}
|
||||
}
|
||||
|
||||
/// Sort the values in `vreg` according to the dominator tree pre-order.
|
||||
///
|
||||
/// Returns the slice of sorted values which `values(vreg)` will also return from now on.
|
||||
pub fn sort_values(
|
||||
&mut self,
|
||||
vreg: VirtReg,
|
||||
func: &Function,
|
||||
preorder: &DominatorTreePreorder,
|
||||
) -> &[Value] {
|
||||
let s = self.vregs[vreg].as_mut_slice(&mut self.pool);
|
||||
s.sort_unstable_by(|&a, &b| preorder.pre_cmp_def(a, b, func));
|
||||
s
|
||||
}
|
||||
|
||||
/// Insert a single value into a sorted virtual register.
|
||||
///
|
||||
/// It is assumed that the virtual register containing `big` is already sorted by
|
||||
/// `sort_values()`, and that `single` does not already belong to a virtual register.
|
||||
///
|
||||
/// If `big` is not part of a virtual register, one will be created.
|
||||
pub fn insert_single(
|
||||
&mut self,
|
||||
big: Value,
|
||||
single: Value,
|
||||
func: &Function,
|
||||
preorder: &DominatorTreePreorder,
|
||||
) -> VirtReg {
|
||||
debug_assert_eq!(self.get(single), None, "Expected singleton {}", single);
|
||||
|
||||
// Make sure `big` has a vreg.
|
||||
let vreg = self.get(big).unwrap_or_else(|| {
|
||||
let vr = self.alloc();
|
||||
self.vregs[vr].push(big, &mut self.pool);
|
||||
self.value_vregs[big] = vr.into();
|
||||
vr
|
||||
});
|
||||
|
||||
// Determine the insertion position for `single`.
|
||||
let index = match self
|
||||
.values(vreg)
|
||||
.binary_search_by(|&v| preorder.pre_cmp_def(v, single, func))
|
||||
{
|
||||
Ok(_) => panic!("{} already in {}", single, vreg),
|
||||
Err(i) => i,
|
||||
};
|
||||
self.vregs[vreg].insert(index, single, &mut self.pool);
|
||||
self.value_vregs[single] = vreg.into();
|
||||
vreg
|
||||
}
|
||||
|
||||
/// Remove a virtual register.
|
||||
///
|
||||
/// The values in `vreg` become singletons, and the virtual register number may be reused in
|
||||
/// the future.
|
||||
pub fn remove(&mut self, vreg: VirtReg) {
|
||||
// Start by reassigning all the values.
|
||||
for &v in self.vregs[vreg].as_slice(&self.pool) {
|
||||
let old = self.value_vregs[v].take();
|
||||
debug_assert_eq!(old, Some(vreg));
|
||||
}
|
||||
|
||||
self.vregs[vreg].clear(&mut self.pool);
|
||||
self.unused_vregs.push(vreg);
|
||||
}
|
||||
|
||||
/// Allocate a new empty virtual register.
|
||||
fn alloc(&mut self) -> VirtReg {
|
||||
self.unused_vregs
|
||||
.pop()
|
||||
.unwrap_or_else(|| self.vregs.push(Default::default()))
|
||||
}
|
||||
|
||||
/// Unify `values` into a single virtual register.
|
||||
///
|
||||
/// The values in the slice can be singletons or they can belong to a virtual register already.
|
||||
/// If a value belongs to a virtual register, all of the values in that register must be
|
||||
/// present.
|
||||
///
|
||||
/// The values are assumed to already be in topological order.
|
||||
pub fn unify(&mut self, values: &[Value]) -> VirtReg {
|
||||
// Start by clearing all virtual registers involved.
|
||||
let mut singletons = 0;
|
||||
let mut cleared = 0;
|
||||
for &val in values {
|
||||
match self.get(val) {
|
||||
None => singletons += 1,
|
||||
Some(vreg) => {
|
||||
if !self.vregs[vreg].is_empty() {
|
||||
cleared += self.vregs[vreg].len(&self.pool);
|
||||
self.vregs[vreg].clear(&mut self.pool);
|
||||
self.unused_vregs.push(vreg);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
debug_assert_eq!(
|
||||
values.len(),
|
||||
singletons + cleared,
|
||||
"Can't unify partial virtual registers"
|
||||
);
|
||||
|
||||
let vreg = self.alloc();
|
||||
self.vregs[vreg].extend(values.iter().cloned(), &mut self.pool);
|
||||
for &v in values {
|
||||
self.value_vregs[v] = vreg.into();
|
||||
}
|
||||
|
||||
vreg
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for VirtRegs {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
for vreg in self.all_virtregs() {
|
||||
write!(f, "\n{} = {}", vreg, DisplayList(self.values(vreg)))?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Expanded version of a union-find table entry.
|
||||
enum UFEntry {
|
||||
/// This value is a a set leader. The embedded number is the set's rank.
|
||||
Rank(u32),
|
||||
|
||||
/// This value belongs to the same set as the linked value.
|
||||
Link(Value),
|
||||
}
|
||||
|
||||
/// The `union_find` table contains `i32` entries that are interpreted as follows:
|
||||
///
|
||||
/// x = 0: The value belongs to its own singleton set.
|
||||
/// x > 0: The value is the leader of a set with rank x.
|
||||
/// x < 0: The value belongs to the same set as the value numbered !x.
|
||||
///
|
||||
/// The rank of a set is an upper bound on the number of links that must be followed from a member
|
||||
/// of the set to the set leader.
|
||||
///
|
||||
/// A singleton set is the same as a set with rank 0. It contains only the leader value.
|
||||
impl UFEntry {
|
||||
/// Decode a table entry.
|
||||
fn decode(x: i32) -> Self {
|
||||
if x < 0 {
|
||||
UFEntry::Link(Value::from_u32((!x) as u32))
|
||||
} else {
|
||||
UFEntry::Rank(x as u32)
|
||||
}
|
||||
}
|
||||
|
||||
/// Encode a link entry.
|
||||
fn encode_link(v: Value) -> i32 {
|
||||
!(v.as_u32() as i32)
|
||||
}
|
||||
}
|
||||
|
||||
/// Union-find algorithm for building virtual registers.
|
||||
///
|
||||
/// Before values are added to virtual registers, it is possible to use a union-find algorithm to
|
||||
/// construct virtual registers efficiently. This support implemented here is used as follows:
|
||||
///
|
||||
/// 1. Repeatedly call the `union(a, b)` method to request that `a` and `b` are placed in the same
|
||||
/// virtual register.
|
||||
/// 2. When done, call `finish_union_find()` to construct the virtual register sets based on the
|
||||
/// `union()` calls.
|
||||
///
|
||||
/// The values that were passed to `union(a, b)` must not belong to any existing virtual registers
|
||||
/// by the time `finish_union_find()` is called.
|
||||
///
|
||||
/// For more information on the algorithm implemented here, see Chapter 21 "Data Structures for
|
||||
/// Disjoint Sets" of Cormen, Leiserson, Rivest, Stein, "Introduction to algorithms", 3rd Ed.
|
||||
///
|
||||
/// The [Wikipedia entry on disjoint-set data
|
||||
/// structures](https://en.wikipedia.org/wiki/Disjoint-set_data_structure) is also good.
|
||||
impl VirtRegs {
|
||||
/// Find the leader value and rank of the set containing `v`.
|
||||
/// Compress the path if needed.
|
||||
fn find(&mut self, mut val: Value) -> (Value, u32) {
|
||||
let mut val_stack = vec![];
|
||||
let found = loop {
|
||||
match UFEntry::decode(self.union_find[val]) {
|
||||
UFEntry::Rank(rank) => break (val, rank),
|
||||
UFEntry::Link(parent) => {
|
||||
val_stack.push(val);
|
||||
val = parent;
|
||||
}
|
||||
}
|
||||
};
|
||||
// Compress the path
|
||||
while let Some(val) = val_stack.pop() {
|
||||
self.union_find[val] = UFEntry::encode_link(found.0);
|
||||
}
|
||||
found
|
||||
}
|
||||
|
||||
/// Union the two sets containing `a` and `b`.
|
||||
///
|
||||
/// This ensures that `a` and `b` will belong to the same virtual register after calling
|
||||
/// `finish_union_find()`.
|
||||
pub fn union(&mut self, a: Value, b: Value) {
|
||||
let (leader_a, rank_a) = self.find(a);
|
||||
let (leader_b, rank_b) = self.find(b);
|
||||
|
||||
if leader_a == leader_b {
|
||||
return;
|
||||
}
|
||||
|
||||
// The first time we see a value, its rank will be 0. Add it to the list of pending values.
|
||||
if rank_a == 0 {
|
||||
debug_assert_eq!(a, leader_a);
|
||||
self.pending_values.push(a);
|
||||
}
|
||||
if rank_b == 0 {
|
||||
debug_assert_eq!(b, leader_b);
|
||||
self.pending_values.push(b);
|
||||
}
|
||||
|
||||
// Merge into the set with the greater rank. This preserves the invariant that the rank is
|
||||
// an upper bound on the number of links to the leader.
|
||||
match rank_a.cmp(&rank_b) {
|
||||
Ordering::Less => {
|
||||
self.union_find[leader_a] = UFEntry::encode_link(leader_b);
|
||||
}
|
||||
Ordering::Greater => {
|
||||
self.union_find[leader_b] = UFEntry::encode_link(leader_a);
|
||||
}
|
||||
Ordering::Equal => {
|
||||
// When the two sets have the same rank, we arbitrarily pick the a-set to preserve.
|
||||
// We need to increase the rank by one since the elements in the b-set are now one
|
||||
// link further away from the leader.
|
||||
self.union_find[leader_a] += 1;
|
||||
self.union_find[leader_b] = UFEntry::encode_link(leader_a);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute virtual registers based on previous calls to `union(a, b)`.
|
||||
///
|
||||
/// This terminates the union-find algorithm, so the next time `union()` is called, it is for a
|
||||
/// new independent batch of values.
|
||||
///
|
||||
/// The values in each virtual register will be ordered according to when they were first
|
||||
/// passed to `union()`, but backwards. It is expected that `sort_values()` will be used to
|
||||
/// create a more sensible value order.
|
||||
///
|
||||
/// The new virtual registers will be appended to `new_vregs`, if present.
|
||||
pub fn finish_union_find(&mut self, mut new_vregs: Option<&mut Vec<VirtReg>>) {
|
||||
debug_assert_eq!(
|
||||
self.pending_values.iter().find(|&&v| self.get(v).is_some()),
|
||||
None,
|
||||
"Values participating in union-find must not belong to existing virtual registers"
|
||||
);
|
||||
|
||||
while let Some(val) = self.pending_values.pop() {
|
||||
let (leader, _) = self.find(val);
|
||||
|
||||
// Get the vreg for `leader`, or create it.
|
||||
let vreg = self.get(leader).unwrap_or_else(|| {
|
||||
// Allocate a vreg for `leader`, but leave it empty.
|
||||
let vr = self.alloc();
|
||||
if let Some(ref mut vec) = new_vregs {
|
||||
vec.push(vr);
|
||||
}
|
||||
self.value_vregs[leader] = vr.into();
|
||||
vr
|
||||
});
|
||||
|
||||
// Push values in `pending_values` order, including when `v == leader`.
|
||||
self.vregs[vreg].push(val, &mut self.pool);
|
||||
self.value_vregs[val] = vreg.into();
|
||||
|
||||
// Clear the entry in the union-find table. The `find(val)` call may still look at this
|
||||
// entry in a future iteration, but that it ok. It will return a rank 0 leader that has
|
||||
// already been assigned to the correct virtual register.
|
||||
self.union_find[val] = 0;
|
||||
}
|
||||
|
||||
// We do *not* call `union_find.clear()` table here because re-initializing the table for
|
||||
// sparse use takes time linear in the number of values in the function. Instead we reset
|
||||
// the entries that are known to be non-zero in the loop above.
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::entity::EntityRef;
|
||||
use crate::ir::Value;
|
||||
|
||||
#[test]
|
||||
fn empty_union_find() {
|
||||
let mut vregs = VirtRegs::new();
|
||||
vregs.finish_union_find(None);
|
||||
assert_eq!(vregs.all_virtregs().count(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn union_self() {
|
||||
let mut vregs = VirtRegs::new();
|
||||
let v1 = Value::new(1);
|
||||
vregs.union(v1, v1);
|
||||
vregs.finish_union_find(None);
|
||||
assert_eq!(vregs.get(v1), None);
|
||||
assert_eq!(vregs.all_virtregs().count(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn union_pair() {
|
||||
let mut vregs = VirtRegs::new();
|
||||
let v1 = Value::new(1);
|
||||
let v2 = Value::new(2);
|
||||
vregs.union(v1, v2);
|
||||
vregs.finish_union_find(None);
|
||||
assert_eq!(vregs.congruence_class(&v1), &[v2, v1]);
|
||||
assert_eq!(vregs.congruence_class(&v2), &[v2, v1]);
|
||||
assert_eq!(vregs.all_virtregs().count(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn union_pair_backwards() {
|
||||
let mut vregs = VirtRegs::new();
|
||||
let v1 = Value::new(1);
|
||||
let v2 = Value::new(2);
|
||||
vregs.union(v2, v1);
|
||||
vregs.finish_union_find(None);
|
||||
assert_eq!(vregs.congruence_class(&v1), &[v1, v2]);
|
||||
assert_eq!(vregs.congruence_class(&v2), &[v1, v2]);
|
||||
assert_eq!(vregs.all_virtregs().count(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn union_tree() {
|
||||
let mut vregs = VirtRegs::new();
|
||||
let v1 = Value::new(1);
|
||||
let v2 = Value::new(2);
|
||||
let v3 = Value::new(3);
|
||||
let v4 = Value::new(4);
|
||||
|
||||
vregs.union(v2, v4);
|
||||
vregs.union(v3, v1);
|
||||
// Leaders: v2, v3
|
||||
vregs.union(v4, v1);
|
||||
vregs.finish_union_find(None);
|
||||
assert_eq!(vregs.congruence_class(&v1), &[v1, v3, v4, v2]);
|
||||
assert_eq!(vregs.congruence_class(&v2), &[v1, v3, v4, v2]);
|
||||
assert_eq!(vregs.congruence_class(&v3), &[v1, v3, v4, v2]);
|
||||
assert_eq!(vregs.congruence_class(&v4), &[v1, v3, v4, v2]);
|
||||
assert_eq!(vregs.all_virtregs().count(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn union_two() {
|
||||
let mut vregs = VirtRegs::new();
|
||||
let v1 = Value::new(1);
|
||||
let v2 = Value::new(2);
|
||||
let v3 = Value::new(3);
|
||||
let v4 = Value::new(4);
|
||||
|
||||
vregs.union(v2, v4);
|
||||
vregs.union(v3, v1);
|
||||
// Leaders: v2, v3
|
||||
vregs.finish_union_find(None);
|
||||
assert_eq!(vregs.congruence_class(&v1), &[v1, v3]);
|
||||
assert_eq!(vregs.congruence_class(&v2), &[v4, v2]);
|
||||
assert_eq!(vregs.congruence_class(&v3), &[v1, v3]);
|
||||
assert_eq!(vregs.congruence_class(&v4), &[v4, v2]);
|
||||
assert_eq!(vregs.all_virtregs().count(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn union_uneven() {
|
||||
let mut vregs = VirtRegs::new();
|
||||
let v1 = Value::new(1);
|
||||
let v2 = Value::new(2);
|
||||
let v3 = Value::new(3);
|
||||
let v4 = Value::new(4);
|
||||
|
||||
vregs.union(v2, v4); // Rank 0-0
|
||||
vregs.union(v3, v2); // Rank 0-1
|
||||
vregs.union(v2, v1); // Rank 1-0
|
||||
vregs.finish_union_find(None);
|
||||
assert_eq!(vregs.congruence_class(&v1), &[v1, v3, v4, v2]);
|
||||
assert_eq!(vregs.congruence_class(&v2), &[v1, v3, v4, v2]);
|
||||
assert_eq!(vregs.congruence_class(&v3), &[v1, v3, v4, v2]);
|
||||
assert_eq!(vregs.congruence_class(&v4), &[v1, v3, v4, v2]);
|
||||
assert_eq!(vregs.all_virtregs().count(), 1);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user