From 85b624d13bf5249d78ff2047cf2fcac8d1529e08 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Wed, 21 Jun 2017 09:24:12 -0700 Subject: [PATCH] Add a coalescing pass to the register allocator. Coalescing means creating virtual registers and transforming the code into conventional SSA form. This means that every value used as a branch argument will belong to the same virtual register as the corresponding EBB argument value. Conventional SSA form makes it easy to avoid memory-memory copies when spilling values, and the virtual registers can be used as hints when picking registers too. This reduces the number of register moves needed for EBB arguments. --- cranelift/filetests/regalloc/coalesce.cton | 91 ++++ lib/cretonne/src/dbg.rs | 21 + lib/cretonne/src/regalloc/coalescing.rs | 530 +++++++++++++++++++++ lib/cretonne/src/regalloc/context.rs | 18 + lib/cretonne/src/regalloc/mod.rs | 1 + 5 files changed, 661 insertions(+) create mode 100644 cranelift/filetests/regalloc/coalesce.cton create mode 100644 lib/cretonne/src/regalloc/coalescing.rs diff --git a/cranelift/filetests/regalloc/coalesce.cton b/cranelift/filetests/regalloc/coalesce.cton new file mode 100644 index 0000000000..8b76c8db6b --- /dev/null +++ b/cranelift/filetests/regalloc/coalesce.cton @@ -0,0 +1,91 @@ +test regalloc +isa riscv + +; Test the coalescer. +; regex: V=v\d+ +; regex: WS=\s+ + +; This function is already CSSA, so no copies should be inserted. +function %cssa(i32) -> i32 { +ebb0(v0: i32): + ; not: copy + ; v0 is used by the branch and passed as an arg - that's no conflict. + brnz v0, ebb1(v0) + ; v0 is live across the branch above. That's no conflict. + v1 = iadd_imm v0, 7 + jump ebb1(v1) + +ebb1(v10: i32): + v11 = iadd_imm v10, 7 + return v11 +} + +function %trivial(i32) -> i32 { +ebb0(v0: i32): + ; check: $(cp1=$V) = copy $v0 + ; nextln: brnz $v0, $ebb1($cp1) + brnz v0, ebb1(v0) + ; not: copy + v1 = iadd_imm v0, 7 + jump ebb1(v1) + +ebb1(v10: i32): + ; Use v0 in the destination EBB causes a conflict. + v11 = iadd v10, v0 + return v11 +} + +; A value is used as an SSA argument twice in the same branch. +function %dualuse(i32) -> i32 { +ebb0(v0: i32): + ; check: $(cp1=$V) = copy $v0 + ; nextln: brnz $v0, $ebb1($v0, $cp1) + brnz v0, ebb1(v0, v0) + ; not: copy + v1 = iadd_imm v0, 7 + v2 = iadd_imm v1, 56 + jump ebb1(v1, v2) + +ebb1(v10: i32, v11: i32): + v12 = iadd v10, v11 + return v12 +} + +; Interference away from the branch +; The interference can be broken with a copy at either branch. +function %interference(i32) -> i32 { +ebb0(v0: i32): + ; not: copy + brnz v0, ebb1(v0) + v1 = iadd_imm v0, 7 + ; v1 and v0 interfere here: + trapnz v0 + ; check: $(cp1=$V) = copy $v1 + ; nextln: jump $ebb1($cp1) + jump ebb1(v1) + +ebb1(v10: i32): + ; not: copy + v11 = iadd_imm v10, 7 + return v11 +} + +; A loop where one induction variable is used as a backedge argument. +function %fibonacci(i32) -> i32 { +ebb0(v0: i32): + ; not: copy + v1 = iconst.i32 1 + v2 = iconst.i32 2 + jump ebb1(v1, v2) + +ebb1(v10: i32, v11: i32): + ; v11 needs to be isolated because it interferes with v10. + ; check: $ebb1($v10: i32, $(nv11a=$V): i32) + ; check: $v11 = copy $nv11a + v12 = iadd v10, v11 + v13 = icmp ult v12, v0 + ; check: $(nv11b=$V) = copy $v11 + ; nextln: brnz $v13, $ebb1($nv11b, $v12) + brnz v13, ebb1(v11, v12) + return v12 +} diff --git a/lib/cretonne/src/dbg.rs b/lib/cretonne/src/dbg.rs index 06723fa018..dc7793f3d9 100644 --- a/lib/cretonne/src/dbg.rs +++ b/lib/cretonne/src/dbg.rs @@ -13,6 +13,7 @@ use std::ascii::AsciiExt; use std::cell::RefCell; use std::env; use std::ffi::OsStr; +use std::fmt; use std::fs::File; use std::io::{Write, BufWriter}; use std::sync::atomic; @@ -98,3 +99,23 @@ macro_rules! dbg { } } } + +/// Helper for printing lists. +pub struct DisplayList<'a, T>(pub &'a [T]) where T: 'a + fmt::Display; + +impl<'a, T> fmt::Display for DisplayList<'a, T> + where T: 'a + fmt::Display +{ + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self.0.split_first() { + None => write!(f, "[]"), + Some((first, rest)) => { + write!(f, "[{}", first)?; + for x in rest { + write!(f, ", {}", x)?; + } + write!(f, "]") + } + } + } +} diff --git a/lib/cretonne/src/regalloc/coalescing.rs b/lib/cretonne/src/regalloc/coalescing.rs new file mode 100644 index 0000000000..70a20e3897 --- /dev/null +++ b/lib/cretonne/src/regalloc/coalescing.rs @@ -0,0 +1,530 @@ +//! Constructing conventional SSA form. +//! +//! Conventional SSA form is a subset of SSA form where any (transitively) phi-related values do +//! not interfere. We construct CSSA by building virtual registers that are as large as possible +//! and inserting copies where necessary such that all values passed to an EBB argument will belong +//! to the same virtual register as the EBB argument value itself. + +use dbg::DisplayList; +use dominator_tree::DominatorTree; +use flowgraph::{ControlFlowGraph, BasicBlock}; +use ir::{DataFlowGraph, Layout, Cursor, InstBuilder}; +use ir::{Function, Ebb, Inst, Value, ExpandedProgramPoint}; +use regalloc::affinity::Affinity; +use regalloc::liveness::Liveness; +use regalloc::virtregs::VirtRegs; +use std::cmp::Ordering; +use std::iter::Peekable; +use std::mem; +use isa::{TargetIsa, EncInfo}; + +/// Dominator forest. +/// +/// This is a utility type used for merging virtual registers, where each virtual register is a +/// list of values ordered according to `DomTree::rpo_cmp`. +/// +/// A `DomForest` object is used as a buffer for building virtual registers. It lets you merge two +/// sorted lists of values while checking for interference only whee necessary. +/// +/// The idea of a dominator forest was introduced here: +/// +/// Budimlic, Z., Budimlic, Z., Cooper, K. D., Cooper, K. D., Harvey, T. J., Harvey, T. J., et al. +/// (2002). Fast copy coalescing and live-range identification (Vol. 37, pp. 25–32). ACM. +/// http://doi.org/10.1145/543552.512534 +/// +/// The linear stack representation here: +/// +/// Boissinot, B., Darte, A., & Rastello, F. (2009). Revisiting out-of-SSA translation for +/// correctness, code quality and efficiency. Presented at the Proceedings of the 7th …. +struct DomForest { + // The sequence of values that have been merged so far. In RPO order of their defs. + values: Vec, + + // Stack representing the rightmost edge of the dominator forest so far, ending in the last + // element of `values`. At all times, each element in the stack dominates the next one, and all + // elements dominating the end of `values` are on the stack. + stack: Vec, +} + +/// A node in the dominator forest. +#[derive(Clone, Copy, Debug)] +struct Node { + value: Value, + /// Set identifier. Values in the same set are assumed to be non-interfering. + set: u8, + /// The program point where `value` is defined. + def: ExpandedProgramPoint, +} + +impl Node { + /// Create a node for `value`. + pub fn new(value: Value, set: u8, dfg: &DataFlowGraph) -> Node { + Node { + value, + set, + def: dfg.value_def(value).into(), + } + } +} + +/// Push a node to `stack` and update `stack` so it contains all dominator forest ancestors of +/// the pushed value. +/// + +impl DomForest { + /// Create a new empty dominator forest. + pub fn new() -> DomForest { + DomForest { + values: Vec::new(), + stack: Vec::new(), + } + } + + /// Swap the merged list with `buffer`, leaving the dominator forest empty. + /// + /// This is typically called after a successful merge to extract the merged value list. + pub fn swap(&mut self, buffer: &mut Vec) { + buffer.clear(); + mem::swap(&mut self.values, buffer); + } + + /// Add a single node to the forest. + /// + /// Update the stack so its dominance invariants are preserved. Detect a parent node on the + /// stack which is the closest one dominating the new node. + /// + /// If the pushed node's parent in the dominator forest belongs to a different set, returns + /// `Some(parent)`. + fn push_node(&mut self, node: Node, layout: &Layout, domtree: &DominatorTree) -> Option { + self.values.push(node.value); + + // The stack contains the current sequence of dominating defs. Pop elements until we + // find one that dominates `node`. + while let Some(top) = self.stack.pop() { + if domtree.dominates(top.def, node.def, layout) { + // This is the right insertion spot for `node`. + self.stack.push(top); + self.stack.push(node); + // If the parent value comes from a different set, return it for interference + // checking. If the sets are equal, assume that interference is already handled. + if top.set != node.set { + return Some(top.value); + } else { + return None; + } + } + } + + // No dominators, start a new tree in the forest. + self.stack.push(node); + None + } + + /// Try to merge two sorted sets of values. Each slice must already be sorted and free of any + /// interference. + /// + /// It is permitted for a value to appear in both lists. The merged sequence will only have one + /// copy of the value. + /// + /// If an interference is detected, returns `Err((a, b))` with the two conflicting values form + /// `va` and `vb` respectively. + /// + /// If the merge succeeds, returns `Ok(())`. The merged sequence can be extracted with + /// `swap()`. + pub fn try_merge(&mut self, + va: &[Value], + vb: &[Value], + dfg: &DataFlowGraph, + layout: &Layout, + domtree: &DominatorTree, + liveness: &Liveness) + -> Result<(), (Value, Value)> { + self.stack.clear(); + self.values.clear(); + self.values.reserve(va.len() + vb.len()); + + // Convert the two value lists into a merged sequence of nodes. + let merged = MergedNodes { + a: va.iter().map(|&value| Node::new(value, 0, dfg)).peekable(), + b: vb.iter().map(|&value| Node::new(value, 1, dfg)).peekable(), + layout, + domtree, + }; + for node in merged { + if let Some(parent) = self.push_node(node, layout, domtree) { + // Check if `parent` live range contains `node.def`. + let lr = liveness + .get(parent) + .expect("No live range for parent value"); + if lr.overlaps_def(node.def, layout.pp_ebb(node.def), layout) { + // Interference detected. Get the `(a, b)` order right in the error. + return Err(if node.set == 0 { + (node.value, parent) + } else { + (parent, node.value) + }); + } + } + } + + Ok(()) + } +} + +/// Node-merging iterator. +/// +/// Given two ordered sequences of nodes, yield an ordered sequence containing all of them. +/// Duplicates are removed. +struct MergedNodes<'a, IA, IB> + where IA: Iterator, + IB: Iterator +{ + a: Peekable, + b: Peekable, + layout: &'a Layout, + domtree: &'a DominatorTree, +} + +impl<'a, IA, IB> Iterator for MergedNodes<'a, IA, IB> + where IA: Iterator, + IB: Iterator +{ + type Item = Node; + + fn next(&mut self) -> Option { + let ord = match (self.a.peek(), self.b.peek()) { + (Some(a), Some(b)) => { + // If the two values are defined at the same point, compare value numbers instead + // this is going to cause an interference conflict unless its actually the same + // value appearing in both streams. + self.domtree + .rpo_cmp(a.def, b.def, self.layout) + .then(Ord::cmp(&a.value, &b.value)) + } + (Some(_), None) => Ordering::Less, + (None, Some(_)) => Ordering::Greater, + (None, None) => return None, + }; + match ord { + Ordering::Equal => { + // The two iterators produced the same value. Just return the first one. + self.b.next(); + self.a.next() + } + Ordering::Less => self.a.next(), + Ordering::Greater => self.b.next(), + } + } +} + +/// Data structures to be used by the coalescing pass. +pub struct Coalescing { + forest: DomForest, + + // Current set of coalesced values. Kept sorted and interference free. + values: Vec, + + // New values that were created when splitting interferences. + split_values: Vec, +} + +/// One-shot context created once per invocation. +struct Context<'a> { + isa: &'a TargetIsa, + encinfo: EncInfo, + + func: &'a mut Function, + domtree: &'a DominatorTree, + liveness: &'a mut Liveness, + virtregs: &'a mut VirtRegs, + + forest: &'a mut DomForest, + values: &'a mut Vec, + split_values: &'a mut Vec, +} + +impl Coalescing { + /// Create a new coalescing pass. + pub fn new() -> Coalescing { + Coalescing { + forest: DomForest::new(), + values: Vec::new(), + split_values: Vec::new(), + } + + } + + /// Convert `func` to conventional SSA form and build virtual registers in the process. + pub fn conventional_ssa(&mut self, + isa: &TargetIsa, + func: &mut Function, + cfg: &ControlFlowGraph, + domtree: &DominatorTree, + liveness: &mut Liveness, + virtregs: &mut VirtRegs) { + dbg!("Coalescing for:\n{}", func.display(isa)); + let mut context = Context { + isa, + encinfo: isa.encoding_info(), + func, + domtree, + liveness, + virtregs, + forest: &mut self.forest, + values: &mut self.values, + split_values: &mut self.split_values, + }; + + // TODO: The iteration order matters here. We should coalesce in the most important blocks + // first, so they get first pick at forming virtual registers. + for &ebb in domtree.cfg_postorder() { + let preds = cfg.get_predecessors(ebb); + if !preds.is_empty() { + for argnum in 0..context.func.dfg.num_ebb_args(ebb) { + context.coalesce_ebb_arg(ebb, argnum, preds) + } + } + } + } +} + +impl<'a> Context<'a> { + /// Coalesce the `argnum`'th argument to `ebb`. + fn coalesce_ebb_arg(&mut self, ebb: Ebb, argnum: usize, preds: &[BasicBlock]) { + self.split_values.clear(); + let mut succ_val = self.func.dfg.ebb_args(ebb)[argnum]; + dbg!("Processing {}/{}: {}", ebb, argnum, succ_val); + + // We want to merge the virtual register for `succ_val` with the virtual registers for + // the branch arguments in the predecessors. This may not be possible if any live + // ranges interfere, so we can insert copies to break interferences: + // + // pred: + // jump ebb1(v1) + // + // ebb1(v10: i32): + // ... + // + // In the predecessor: + // + // v2 = copy v1 + // jump ebb(v2) + // + // A predecessor copy is always required if the branch argument virtual register is + // live into the successor. + // + // In the successor: + // + // ebb1(v11: i32): + // v10 = copy v11 + // + // A successor copy is always required if the `succ_val` virtual register is live at + // any predecessor branch. + + while let Some(bad_value) = self.try_coalesce(argnum, succ_val, preds) { + dbg!("Isolating interfering value {}", bad_value); + // The bad value has some conflict that can only be reconciled by excluding its + // congruence class from the new virtual register. + // + // Try to catch infinite splitting loops. The values created by splitting should never + // have irreconcilable interferences. + assert!(!self.split_values.contains(&bad_value), + "{} was already isolated", + bad_value); + let split_len = self.split_values.len(); + + // The bad value can be both the successor value and a predecessor value at the same + // time. + if self.virtregs.same_class(bad_value, succ_val) { + succ_val = self.split_succ(ebb, succ_val); + } + + // Check the predecessors. + for &(pred_ebb, pred_inst) in preds { + let pred_val = self.func.dfg.inst_variable_args(pred_inst)[argnum]; + if self.virtregs.same_class(bad_value, pred_val) { + self.split_pred(pred_inst, pred_ebb, argnum, pred_val); + } + } + + // Second loop check. + assert_ne!(split_len, + self.split_values.len(), + "Couldn't isolate {}", + bad_value); + } + + let vreg = self.virtregs.unify(self.values); + dbg!("Coalesced {} arg {} into {} = {}", + ebb, + argnum, + vreg, + DisplayList(self.virtregs.values(vreg))); + } + + /// Reset `self.values` to just the set of split values. + fn reset_values(&mut self) { + self.values.clear(); + self.values.extend_from_slice(self.split_values); + let domtree = &self.domtree; + let func = &self.func; + self.values + .sort_by(|&a, &b| { + domtree.rpo_cmp(func.dfg.value_def(a), func.dfg.value_def(b), &func.layout) + }); + } + + /// Try coalescing predecessors with `succ_val`. + /// + /// Returns a value from a congruence class that needs to be split before starting over, or + /// `None` if everything was successfully coalesced into `self.values`. + fn try_coalesce(&mut self, + argnum: usize, + succ_val: Value, + preds: &[BasicBlock]) + -> Option { + /// Initialize the value list with the split values. These are guaranteed to be + /// interference free, and anything that interferes with them must be split away. + self.reset_values(); + dbg!("Trying {} with split values: {:?}", succ_val, self.values); + + // Start by adding `succ_val` so we can determine if it interferes with any of the new + // split values. If it does, we must split it. + if self.add_class(succ_val).is_err() { + return Some(succ_val); + } + + for &(pred_ebb, pred_inst) in preds { + let pred_val = self.func.dfg.inst_variable_args(pred_inst)[argnum]; + dbg!("Checking {}: {}: {}", + pred_val, + pred_ebb, + self.func.dfg.display_inst(pred_inst)); + if let Err((a, b)) = self.add_class(pred_val) { + dbg!("Found conflict between {} and {}", a, b); + // We have a conflict between the already merged value `a` and one of the new + // values `b`. + // + // Check if the `a` live range is fundamentally incompatible with `pred_inst`. + if self.liveness + .get(a) + .expect("No live range for interfering value") + .reaches_use(pred_inst, pred_ebb, &self.func.layout) { + // Splitting at `pred_inst` wouldn't resolve the interference, so we need to + // start over. + return Some(a); + } + + // The local conflict could be avoided by splitting at this predecessor, so try + // that. This split is not necessarily required, but it allows us to make progress. + let new_val = self.split_pred(pred_inst, pred_ebb, argnum, pred_val); + assert!(self.add_class(new_val).is_ok(), + "Splitting didn't resolve conflict."); + } + } + + None + } + + /// Try merging the congruence class for `value` into `self.values`. + /// + /// Leave `self.values` unchanged on failure. + fn add_class(&mut self, value: Value) -> Result<(), (Value, Value)> { + self.forest + .try_merge(&self.values, + self.virtregs.congruence_class(&value), + &self.func.dfg, + &self.func.layout, + self.domtree, + self.liveness)?; + self.forest.swap(&mut self.values); + Ok(()) + } + + /// Split the congruence class for the `argnum` argument to `pred_inst` by inserting a copy. + fn split_pred(&mut self, + pred_inst: Inst, + pred_ebb: Ebb, + argnum: usize, + pred_val: Value) + -> Value { + let copy; + { + let mut pos = Cursor::new(&mut self.func.layout); + pos.goto_inst(pred_inst); + copy = self.func.dfg.ins(&mut pos).copy(pred_val); + } + let inst = self.func.dfg.value_def(copy).unwrap_inst(); + let ty = self.func.dfg.value_type(copy); + + dbg!("Inserted {}, before {}: {}", + self.func.dfg.display_inst(inst), + pred_ebb, + self.func.dfg.display_inst(pred_inst)); + + // Give it an encoding. + let encoding = self.isa + .encode(&self.func.dfg, &self.func.dfg[inst], ty) + .expect("Can't encode copy"); + *self.func.encodings.ensure(inst) = encoding; + + // Create a live range for the new value. + let affinity = Affinity::new(&self.encinfo + .operand_constraints(encoding) + .expect("Bad copy encoding") + .outs + [0]); + self.liveness.create_dead(copy, inst, affinity); + self.liveness + .extend_locally(copy, pred_ebb, pred_inst, &self.func.layout); + + self.func.dfg.inst_variable_args_mut(pred_inst)[argnum] = copy; + self.split_values.push(copy); + copy + } + + /// Split the congruence class for the successor EBB value itself. + fn split_succ(&mut self, ebb: Ebb, succ_val: Value) -> Value { + let ty = self.func.dfg.value_type(succ_val); + let new_val = self.func.dfg.replace_ebb_arg(succ_val, ty); + + // Insert a copy instruction at the top of ebb. + { + let mut pos = Cursor::new(&mut self.func.layout); + pos.goto_top(ebb); + pos.next_inst(); + self.func + .dfg + .ins(&mut pos) + .with_result(succ_val) + .copy(new_val); + } + let inst = self.func.dfg.value_def(succ_val).unwrap_inst(); + self.liveness.move_def_locally(succ_val, inst); + + dbg!("Inserted {}, following {}({}: {})", + self.func.dfg.display_inst(inst), + ebb, + new_val, + ty); + + // Give it an encoding. + let encoding = self.isa + .encode(&self.func.dfg, &self.func.dfg[inst], ty) + .expect("Can't encode copy"); + *self.func.encodings.ensure(inst) = encoding; + + // Create a live range for the new value. + let affinity = Affinity::new(&self.encinfo + .operand_constraints(encoding) + .expect("Bad copy encoding") + .outs + [0]); + self.liveness.create_dead(new_val, ebb, affinity); + self.liveness + .extend_locally(new_val, ebb, inst, &self.func.layout); + + self.split_values.push(new_val); + new_val + } +} diff --git a/lib/cretonne/src/regalloc/context.rs b/lib/cretonne/src/regalloc/context.rs index ea2cdcd924..78e1595747 100644 --- a/lib/cretonne/src/regalloc/context.rs +++ b/lib/cretonne/src/regalloc/context.rs @@ -8,6 +8,7 @@ use dominator_tree::DominatorTree; use flowgraph::ControlFlowGraph; use ir::Function; use isa::TargetIsa; +use regalloc::coalescing::Coalescing; use regalloc::coloring::Coloring; use regalloc::live_value_tracker::LiveValueTracker; use regalloc::liveness::Liveness; @@ -22,6 +23,7 @@ use verifier::{verify_context, verify_liveness}; pub struct Context { liveness: Liveness, virtregs: VirtRegs, + coalescing: Coalescing, topo: TopoOrder, tracker: LiveValueTracker, spilling: Spilling, @@ -38,6 +40,7 @@ impl Context { Context { liveness: Liveness::new(), virtregs: VirtRegs::new(), + coalescing: Coalescing::new(), topo: TopoOrder::new(), tracker: LiveValueTracker::new(), spilling: Spilling::new(), @@ -70,6 +73,21 @@ impl Context { verify_liveness(isa, func, cfg, &self.liveness)?; } + // Coalesce and create conventional SSA form. + self.coalescing + .conventional_ssa(isa, + func, + cfg, + domtree, + &mut self.liveness, + &mut self.virtregs); + + if isa.flags().enable_verifier() { + verify_context(func, cfg, domtree, Some(isa))?; + verify_liveness(isa, func, cfg, &self.liveness)?; + } + + // Second pass: Spilling. self.spilling .run(isa, diff --git a/lib/cretonne/src/regalloc/mod.rs b/lib/cretonne/src/regalloc/mod.rs index ca4624b45b..f689503c03 100644 --- a/lib/cretonne/src/regalloc/mod.rs +++ b/lib/cretonne/src/regalloc/mod.rs @@ -9,6 +9,7 @@ pub mod live_value_tracker; pub mod coloring; mod affinity; +mod coalescing; mod context; mod diversion; mod pressure;