From d1f236b00aa1ec3f06c11862f3466302fd2377ac Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Tue, 9 Jan 2018 10:33:02 -0800 Subject: [PATCH] Reimplement coalescer following the Budimlic paper. The old coalescing algorithm had some algorithmic complexity issues when dealing with large virtual registers. Reimplement to use a proper union-find algorithm so we only need one pass through the dominator forests for virtual registers that are interference free. Virtual registers that do have interference are split and new registers built. This pass is about twice as fast as the old one when dealing with complex virtual registers. --- cranelift/filetests/regalloc/coalesce.cton | 12 +- .../regalloc/infinite-interference.cton | 37 + lib/cretonne/src/ir/dfg.rs | 19 +- lib/cretonne/src/regalloc/coalescing.rs | 1021 +++++++++-------- 4 files changed, 604 insertions(+), 485 deletions(-) create mode 100644 cranelift/filetests/regalloc/infinite-interference.cton diff --git a/cranelift/filetests/regalloc/coalesce.cton b/cranelift/filetests/regalloc/coalesce.cton index bdddf26ef4..0deac6fc85 100644 --- a/cranelift/filetests/regalloc/coalesce.cton +++ b/cranelift/filetests/regalloc/coalesce.cton @@ -40,9 +40,8 @@ ebb1(v10: i32): function %dualuse(i32) -> i32 { ebb0(v0: i32): ; check: $(cp1=$V) = copy $v0 - ; nextln: brnz $v0, $ebb1($v0, $cp1) + ; nextln: brnz $v0, $ebb1($cp1, $v0) brnz v0, ebb1(v0, v0) - ; not: copy v1 = iadd_imm v0, 7 v2 = iadd_imm v1, 56 jump ebb1(v1, v2) @@ -56,14 +55,15 @@ ebb1(v10: i32, v11: i32): ; The interference can be broken with a copy at either branch. function %interference(i32) -> i32 { ebb0(v0: i32): + ; check: $(cp0=$V) = copy $v0 ; not: copy + ; check: brnz $v0, ebb1($cp0) brnz v0, ebb1(v0) v1 = iadd_imm v0, 7 ; v1 and v0 interfere here: v2 = iadd_imm v0, 8 - ; check: $(cp1=$V) = copy $v1 ; not: copy - ; check: jump $ebb1($cp1) + ; check: jump $ebb1($v1) jump ebb1(v1) ebb1(v10: i32): @@ -75,7 +75,6 @@ ebb1(v10: i32): ; A loop where one induction variable is used as a backedge argument. function %fibonacci(i32) -> i32 { ebb0(v0: i32): - ; not: copy v1 = iconst.i32 1 v2 = iconst.i32 2 jump ebb1(v1, v2) @@ -103,8 +102,7 @@ function %stackarg(i32, i32, i32, i32, i32, i32, i32, i32, i32) -> i32 { ebb0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i32, v5: i32, v6: i32, v7: i32, v8: i32): ; check: fill v8 ; not: v8 - brnz v0, ebb1(v8) - jump ebb1(v7) + jump ebb1(v8) ebb1(v10: i32): v11 = iadd_imm v10, 1 diff --git a/cranelift/filetests/regalloc/infinite-interference.cton b/cranelift/filetests/regalloc/infinite-interference.cton new file mode 100644 index 0000000000..8f07c4cf77 --- /dev/null +++ b/cranelift/filetests/regalloc/infinite-interference.cton @@ -0,0 +1,37 @@ +test regalloc +isa riscv + +; Here, the coalescer initially builds vreg0 = [v1, v2, v3] +; +; There's interference between v1 and v2 at the brz instruction. Isolating v2 is not going to +; resolve that conflict since v1 will just interfere with the inserted copy too. + +;function %c1(i32) -> i32 { +;ebb0(v0: i32): +; v1 = iadd_imm v0, 1 +; v2 = iconst.i32 1 +; brz v1, ebb1(v2) +; jump ebb2 +; +;ebb1(v3: i32): +; return v3 +; +;ebb2: +; jump ebb1(v1) +;} + +; Same thing with v1 and v2 swapped to reverse the order of definitions. + +function %c2(i32) -> i32 { +ebb0(v0: i32): + v1 = iadd_imm v0, 1 + v2 = iconst.i32 1 + brz v2, ebb1(v1) + jump ebb2 + +ebb1(v3: i32): + return v3 + +ebb2: + jump ebb1(v2) +} diff --git a/lib/cretonne/src/ir/dfg.rs b/lib/cretonne/src/ir/dfg.rs index 50842b3fa5..25457000d7 100644 --- a/lib/cretonne/src/ir/dfg.rs +++ b/lib/cretonne/src/ir/dfg.rs @@ -2,6 +2,7 @@ use entity::{PrimaryMap, EntityMap}; use isa::TargetIsa; +use ir; use ir::builder::ReplaceBuilder; use ir::extfunc::ExtFuncData; use ir::instructions::{InstructionData, CallInfo, BranchInfo}; @@ -315,7 +316,7 @@ impl DataFlowGraph { } /// Where did a value come from? -#[derive(Debug, PartialEq, Eq)] +#[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum ValueDef { /// Value is the n'th result of an instruction. Result(Inst, usize), @@ -331,6 +332,22 @@ impl ValueDef { _ => panic!("Value is not an instruction result"), } } + + /// Get the program point where the value was defined. + pub fn pp(self) -> ir::ExpandedProgramPoint { + self.into() + } + + /// Get the number component of this definition. + /// + /// When multiple values are defined at the same program point, this indicates the index of + /// this value. + pub fn num(self) -> usize { + match self { + ValueDef::Result(_, n) | + ValueDef::Param(_, n) => n, + } + } } // Internal table storage for extended values. diff --git a/lib/cretonne/src/regalloc/coalescing.rs b/lib/cretonne/src/regalloc/coalescing.rs index 4819969605..ff01542bd8 100644 --- a/lib/cretonne/src/regalloc/coalescing.rs +++ b/lib/cretonne/src/regalloc/coalescing.rs @@ -9,277 +9,55 @@ use cursor::{Cursor, EncCursor}; use dbg::DisplayList; use dominator_tree::{DominatorTree, DominatorTreePreorder}; use flowgraph::ControlFlowGraph; -use ir::{Layout, InstBuilder, ValueDef}; +use ir::{self, InstBuilder}; use ir::{Function, Ebb, Inst, Value, ExpandedProgramPoint}; use regalloc::affinity::Affinity; use regalloc::liveness::Liveness; -use regalloc::virtregs::VirtRegs; -use std::cmp::Ordering; +use regalloc::virtregs::{VirtReg, VirtRegs}; use std::fmt; -use std::iter::Peekable; -use std::mem; use isa::{TargetIsa, EncInfo}; use timing; -/// Dominator forest. -/// -/// This is a utility type used for merging virtual registers, where each virtual register is a -/// list of values ordered according to the dominator tree pre-order. -/// -/// A `DomForest` object is used as a buffer for building virtual registers. It lets you merge two -/// sorted lists of values while checking for interference only where necessary. -/// -/// The idea of a dominator forest was introduced here: -/// -/// Budimlic, Z., Budimlic, Z., Cooper, K. D., Cooper, K. D., Harvey, T. J., Harvey, T. J., et al. -/// (2002). Fast copy coalescing and live-range identification (Vol. 37, pp. 25–32). ACM. -/// http://doi.org/10.1145/543552.512534 -/// -/// The linear stack representation here: -/// -/// Boissinot, B., Darte, A., & Rastello, F. (2009). Revisiting out-of-SSA translation for -/// correctness, code quality and efficiency. -/// -/// Our version of the linear stack is slightly modified because we have a pre-order of the -/// dominator tree at the EBB granularity, not basic block granularity. -struct DomForest { - // The sequence of values that have been merged so far. - // In domtree pre-order order of their definitions. - values: Vec, - - // Stack representing the rightmost edge of the dominator forest so far, ending in the last - // element of `values`. - // - // At all times, the EBB of each element in the stack dominates the EBB of the next one, and - // all elements dominating the end of `values` are on the stack. - stack: Vec, -} - -/// A node in the dominator forest. -#[derive(Clone, Copy, Debug)] -struct Node { - value: Value, - /// Set identifier. Values in the same set are assumed to be non-interfering. - set: u8, - /// The program point where `value` is defined. - def: ExpandedProgramPoint, - /// EBB containing `def`. - ebb: Ebb, -} - -impl Node { - /// Create a node for `value`. - pub fn new(value: Value, set: u8, func: &Function) -> Node { - let def = func.dfg.value_def(value).into(); - let ebb = func.layout.pp_ebb(def); - Node { - value, - set, - def, - ebb, - } - } -} - -impl fmt::Display for Node { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}@{}:{}", self.value, self.ebb, self.set) - } -} - -impl DomForest { - /// Create a new empty dominator forest. - pub fn new() -> Self { - Self { - values: Vec::new(), - stack: Vec::new(), - } - } - - /// Clear all data structures in this dominator forest. - pub fn clear(&mut self) { - self.values.clear(); - self.stack.clear(); - } - - /// Swap the merged list with `buffer`, leaving the dominator forest empty. - /// - /// This is typically called after a successful merge to extract the merged value list. - pub fn swap(&mut self, buffer: &mut Vec) { - buffer.clear(); - mem::swap(&mut self.values, buffer); - } - - /// Add a single node to the forest. - /// - /// Update the stack so its dominance invariants are preserved. Detect a parent node on the - /// stack which is the closest one dominating the new node. - /// - /// If the pushed node's parent in the dominator forest belongs to a different set, returns - /// `Some(parent)`. - fn push_node( - &mut self, - node: Node, - layout: &Layout, - domtree: &DominatorTree, - preorder: &DominatorTreePreorder, - ) -> Option { - self.values.push(node.value); - - // The stack contains the current sequence of dominating defs. Pop elements until we - // find one whose EBB dominates `node.ebb`. - while let Some(top) = self.stack.pop() { - if preorder.dominates(top.ebb, node.ebb) { - // This is the right insertion spot for `node`. - self.stack.push(top); - self.stack.push(node); - - // We know here that `top.ebb` dominates `node.ebb`, and thus `node.def`. This does - // not necessarily mean that `top.def` dominates `node.def`, though. The `top.def` - // program point may be below the last branch in `top.ebb` that dominates - // `node.def`. - debug_assert!(domtree.dominates(top.ebb, node.def, layout)); - - // We do know, though, that if there is a nearest value dominating `node.def`, it - // will be on the stack. We just need to find the last stack entry that actually - // dominates. - // - // TODO: This search could be more efficient if we had access to - // `domtree.last_dominator()`. Each call to `dominates()` here ends up walking up - // the dominator tree starting from `node.ebb`. - let dom = self.stack[0..self.stack.len() - 1].iter().rposition(|n| { - domtree.dominates(n.def, node.def, layout) - }); - - // If the parent value comes from a different set, return it for interference - // checking. If the sets are equal, assume that interference is already handled. - if let Some(pos) = dom { - let parent = &self.stack[pos]; - if parent.set != node.set { - return Some(parent.value); - } - } - - // There was no opposite-set value dominating `node.def`. - return None; - } - } - - // No dominators, start a new tree in the forest. - self.stack.push(node); - None - } - - /// Try to merge two sorted sets of values. Each slice must already be sorted and free of any - /// interference. - /// - /// It is permitted for a value to appear in both lists. The merged sequence will only have one - /// copy of the value. - /// - /// If an interference is detected, returns `Err((a, b))` with the two conflicting values form - /// `va` and `vb` respectively. - /// - /// If the merge succeeds, returns `Ok(())`. The merged sequence can be extracted with - /// `swap()`. - pub fn try_merge( - &mut self, - va: &[Value], - vb: &[Value], - func: &Function, - domtree: &DominatorTree, - preorder: &DominatorTreePreorder, - liveness: &Liveness, - ) -> Result<(), (Value, Value)> { - self.clear(); - self.values.reserve_exact(va.len() + vb.len()); - - // Convert the two value lists into a merged sequence of nodes. - let merged = MergedNodes { - a: va.iter().map(|&value| Node::new(value, 0, func)).peekable(), - b: vb.iter().map(|&value| Node::new(value, 1, func)).peekable(), - layout: &func.layout, - preorder, - }; - let ctx = liveness.context(&func.layout); - for node in merged { - if let Some(parent) = self.push_node(node, &func.layout, domtree, preorder) { - // Check if `parent` live range contains `node.def`. - if liveness[parent].overlaps_def(node.def, func.layout.pp_ebb(node.def), ctx) { - // Interference detected. Get the `(a, b)` order right in the error. - return Err(if node.set == 0 { - (node.value, parent) - } else { - (parent, node.value) - }); - } - } - } - - Ok(()) - } -} - -/// Node-merging iterator. -/// -/// Given two ordered sequences of nodes, yield an ordered sequence containing all of them. -/// Duplicates are removed. -struct MergedNodes<'a, IA, IB> -where - IA: Iterator, - IB: Iterator, -{ - a: Peekable, - b: Peekable, - layout: &'a Layout, - preorder: &'a DominatorTreePreorder, -} - -impl<'a, IA, IB> Iterator for MergedNodes<'a, IA, IB> -where - IA: Iterator, - IB: Iterator, -{ - type Item = Node; - - fn next(&mut self) -> Option { - let ord = match (self.a.peek(), self.b.peek()) { - (Some(a), Some(b)) => { - // If the two values are defined at the same point, compare value numbers instead - // this is going to cause an interference conflict unless its actually the same - // value appearing in both streams. - self.preorder.pre_cmp(a.def, b.def, self.layout).then( - Ord::cmp( - &a.value, - &b.value, - ), - ) - } - (Some(_), None) => Ordering::Less, - (None, Some(_)) => Ordering::Greater, - (None, None) => return None, - }; - match ord { - Ordering::Equal => { - // The two iterators produced the same value. Just return the first one. - self.b.next(); - self.a.next() - } - Ordering::Less => self.a.next(), - Ordering::Greater => self.b.next(), - } - } -} +// # Implementation +// +// The coalescing algorithm implemented follows this paper fairly closely: +// +// Budimlic, Z., Cooper, K. D., Harvey, T. J., et al. (2002). Fast copy coalescing and +// live-range identification (Vol. 37, pp. 25–32). ACM. http://doi.org/10.1145/543552.512534 +// +// We use a more efficient dominator forest representation (a linear stack) described here: +// +// Boissinot, B., Darte, A., & Rastello, F. (2009). Revisiting out-of-SSA translation for +// correctness, code quality and efficiency. +// +// The algorithm has two main phases: +// +// Phase 1: Union-find. +// +// We use the union-find support in `VirtRegs` to build virtual registers such that EBB parameter +// values always belong to the same virtual register as their corresponding EBB arguments at the +// predecessor branches. Trivial interferences between parameter and argument value live ranges are +// detected and resolved before unioning congruence classes, but non-trivial interferences between +// values that end up in the same congruence class are possible. +// +// Phase 2: Dominator forests. +// +// The virtual registers formed in phase 1 can contain interferences that we need to detect and +// eliminate. By ordering the values in a virtual register according to a dominator tree pre-order, +// we can identify all interferences in the virtual register in linear time. +// +// Interfering values are isolated and virtual registers rebuilt. /// Data structures to be used by the coalescing pass. pub struct Coalescing { forest: DomForest, preorder: DominatorTreePreorder, - // Current set of coalesced values. Kept sorted and interference free. - values: Vec, + /// EBB parameter values present in the current virtual register. + params: Vec, - // New values that were created when splitting interferences. - split_values: Vec, + /// Worklist of virtual registers that need to be processed. + worklist: Vec, } /// One-shot context created once per invocation. @@ -295,8 +73,8 @@ struct Context<'a> { virtregs: &'a mut VirtRegs, forest: &'a mut DomForest, - values: &'a mut Vec, - split_values: &'a mut Vec, + params: &'a mut Vec, + worklist: &'a mut Vec, } impl Coalescing { @@ -305,8 +83,8 @@ impl Coalescing { Self { forest: DomForest::new(), preorder: DominatorTreePreorder::new(), - values: Vec::new(), - split_values: Vec::new(), + params: Vec::new(), + worklist: Vec::new(), } } @@ -314,8 +92,8 @@ impl Coalescing { /// Clear all data structures in this coalescing pass. pub fn clear(&mut self) { self.forest.clear(); - self.values.clear(); - self.split_values.clear(); + self.params.clear(); + self.worklist.clear(); } /// Convert `func` to conventional SSA form and build virtual registers in the process. @@ -341,212 +119,216 @@ impl Coalescing { liveness, virtregs, forest: &mut self.forest, - values: &mut self.values, - split_values: &mut self.split_values, + params: &mut self.params, + worklist: &mut self.worklist, }; - // TODO: The iteration order matters here. We should coalesce in the most important blocks - // first, so they get first pick at forming virtual registers. + // Run phase 1 (union-find) of the coalescing algorithm on the current function. for &ebb in domtree.cfg_postorder() { - for argnum in 0..context.func.dfg.num_ebb_params(ebb) { - context.coalesce_ebb_param(ebb, argnum) - } + context.union_find_ebb(ebb); } + context.finish_union_find(); + + // Run phase 2 (dominator forests) on the current function. + context.process_vregs(); } } +/// Phase 1: Union-find. +/// +/// The two entry points for phase 1 are `union_find_ebb()` and `finish_union_find`. impl<'a> Context<'a> { - /// Coalesce the `argnum`'th parameter on `ebb`. - fn coalesce_ebb_param(&mut self, ebb: Ebb, argnum: usize) { - self.split_values.clear(); - let mut succ_val = self.func.dfg.ebb_params(ebb)[argnum]; - dbg!("Processing {}/{}: {}", ebb, argnum, succ_val); - - // We want to merge the virtual register for `succ_val` with the virtual registers for - // the branch arguments in the predecessors. This may not be possible if any live - // ranges interfere, so we can insert copies to break interferences: - // - // pred: - // jump ebb1(v1) - // - // ebb1(v10: i32): - // ... - // - // In the predecessor: - // - // v2 = copy v1 - // jump ebb(v2) - // - // A predecessor copy is always required if the branch argument virtual register is - // live into the successor. - // - // In the successor: - // - // ebb1(v11: i32): - // v10 = copy v11 - // - // A successor copy is always required if the `succ_val` virtual register is live at - // any predecessor branch. - - while let Some(bad_value) = self.try_coalesce(argnum, succ_val, ebb) { - dbg!("Isolating interfering value {}", bad_value); - // The bad value has some conflict that can only be reconciled by excluding its - // congruence class from the new virtual register. - // - // Try to catch infinite splitting loops. The values created by splitting should never - // have irreconcilable interferences. - assert!( - !self.split_values.contains(&bad_value), - "{} was already isolated", - bad_value - ); - let split_len = self.split_values.len(); - - // The bad value can be both the successor value and a predecessor value at the same - // time. - if self.virtregs.same_class(bad_value, succ_val) { - succ_val = self.split_succ(ebb, succ_val); - } - - // Check the predecessors. - for (pred_ebb, pred_inst) in self.cfg.pred_iter(ebb) { - let pred_val = self.func.dfg.inst_variable_args(pred_inst)[argnum]; - if self.virtregs.same_class(bad_value, pred_val) { - self.split_pred(pred_inst, pred_ebb, argnum, pred_val); - } - } - - // Second loop check. - assert_ne!( - split_len, - self.split_values.len(), - "Couldn't isolate {}", - bad_value - ); - } - - let vreg = self.virtregs.unify(self.values); - dbg!( - "Coalesced {} arg {} into {} = {}", - ebb, - argnum, - vreg, - DisplayList(self.virtregs.values(vreg)) - ); - } - - /// Reset `self.values` to just the set of split values. - fn reset_values(&mut self) { - self.values.clear(); - self.values.extend_from_slice(self.split_values); - let domtree = &self.domtree; - let func = &self.func; - self.values.sort_by(|&a, &b| { - domtree.rpo_cmp(func.dfg.value_def(a), func.dfg.value_def(b), &func.layout) - }); - } - - /// Try coalescing predecessors with `succ_val`. + /// Run the union-find algorithm on the parameter values on `ebb`. /// - /// Returns a value from a congruence class that needs to be split before starting over, or - /// `None` if everything was successfully coalesced into `self.values`. - fn try_coalesce(&mut self, argnum: usize, succ_val: Value, succ_ebb: Ebb) -> Option { - // Initialize the value list with the split values. These are guaranteed to be - // interference free, and anything that interferes with them must be split away. - self.reset_values(); - dbg!("Trying {} with split values: {:?}", succ_val, self.values); - - // Start by adding `succ_val` so we can determine if it interferes with any of the new - // split values. If it does, we must split it. - if self.add_class(succ_val).is_err() { - return Some(succ_val); + /// This ensure that all EBB parameters will belong to the same virtual register as their + /// corresponding arguments at all predecessor branches. + pub fn union_find_ebb(&mut self, ebb: Ebb) { + let num_params = self.func.dfg.num_ebb_params(ebb); + if num_params == 0 { + return; } - for (pred_ebb, pred_inst) in self.cfg.pred_iter(succ_ebb) { - let pred_val = self.func.dfg.inst_variable_args(pred_inst)[argnum]; + self.isolate_conflicting_params(ebb, num_params); + + for i in 0..num_params { + self.union_pred_args(ebb, i); + } + } + + // Identify EBB parameter values that are live at one of the predecessor branches. + // + // Such a parameter value will conflict with any argument value at the predecessor branch, so + // it must be isolated by inserting a copy. + fn isolate_conflicting_params(&mut self, ebb: Ebb, num_params: usize) { + debug_assert_eq!(num_params, self.func.dfg.num_ebb_params(ebb)); + // The only way a parameter value can interfere with a predecessor branch is if the EBB is + // dominating the predecessor branch. That is, we are looking for loop back-edges. + for (pred_ebb, pred_inst) in self.cfg.pred_iter(ebb) { + // The quick pre-order dominance check is accurate because the EBB parameter is defined + // at the top of the EBB before any branches. + if !self.preorder.dominates(ebb, pred_ebb) { + continue; + } + dbg!( - "Checking {}: {}: {}", - pred_val, + " - checking {} params at back-edge {}: {}", + num_params, pred_ebb, self.func.dfg.display_inst(pred_inst, self.isa) ); + // Now `pred_inst` is known to be a back-edge, so it is possible for parameter values + // to be live at the use. + for i in 0..num_params { + let param = self.func.dfg.ebb_params(ebb)[i]; + if self.liveness[param].reaches_use( + pred_inst, + pred_ebb, + self.liveness.context(&self.func.layout), + ) + { + self.isolate_param(ebb, param); + } + } + } + } + + // Union EBB parameter value `num` with the corresponding EBB arguments on the predecessor + // branches. + // + // Detect cases where the argument value is live-in to `ebb` so it conflicts with any EBB + // parameter. Isolate the argument in those cases before unioning it with the parameter value. + fn union_pred_args(&mut self, ebb: Ebb, argnum: usize) { + let param = self.func.dfg.ebb_params(ebb)[argnum]; + + for (pred_ebb, pred_inst) in self.cfg.pred_iter(ebb) { + let arg = self.func.dfg.inst_variable_args(pred_inst)[argnum]; + // Never coalesce incoming function parameters on the stack. These parameters are // pre-spilled, and the rest of the virtual register would be forced to spill to the // `incoming_arg` stack slot too. - if let ValueDef::Param(def_ebb, def_num) = self.func.dfg.value_def(pred_val) { + if let ir::ValueDef::Param(def_ebb, def_num) = self.func.dfg.value_def(arg) { if Some(def_ebb) == self.func.layout.entry_block() && self.func.signature.params[def_num].location.is_stack() { - dbg!("Isolating incoming stack parameter {}", pred_val); - let new_val = self.split_pred(pred_inst, pred_ebb, argnum, pred_val); - assert!(self.add_class(new_val).is_ok()); + dbg!("-> isolating function stack parameter {}", arg); + let new_arg = self.isolate_arg(pred_ebb, pred_inst, argnum, arg); + self.virtregs.union(param, new_arg); continue; } } - if let Err((a, b)) = self.add_class(pred_val) { - dbg!("Found conflict between {} and {}", a, b); - // We have a conflict between the already merged value `a` and one of the new - // values `b`. + // Check for basic interference: If `arg` overlaps a value defined at the entry to + // `ebb`, it can never be used as an EBB argument. + let interference = { + let lr = &self.liveness[arg]; + let ctx = self.liveness.context(&self.func.layout); + + // There are two ways the argument value can interfere with `ebb`: // - // Check if the `a` live range is fundamentally incompatible with `pred_inst`. - if self.liveness - .get(a) - .expect("No live range for interfering value") - .reaches_use( - pred_inst, - pred_ebb, - self.liveness.context(&self.func.layout), - ) - { - // Splitting at `pred_inst` wouldn't resolve the interference, so we need to - // start over. - return Some(a); - } + // 1. It is defined in a dominating EBB and live-in to `ebb`. + // 2. If is itself a parameter value for `ebb`. This case should already have been + // eliminated by `isolate_conflicting_params()`. + assert!( + lr.def() != ebb.into(), + "{} parameter {} was missed by isolate_conflicting_params()", + ebb, + arg + ); - // The local conflict could likely be avoided by splitting at this predecessor, so - // try that. This split is not necessarily required, but it allows us to make - // progress. - let new_val = self.split_pred(pred_inst, pred_ebb, argnum, pred_val); + // The only other possibility is that `arg` is live-in to `ebb`. + lr.is_livein(ebb, ctx) + }; - // If this tiny new live range can't be merged, there is something in the already - // merged values that is fundamentally incompatible with `pred_inst`, and we need - // to start over after removing that value. - // TODO: It is unfortunate that we discover this *after* splitting. It would have - // been better if we could detect and isolate `merged` before splitting. - if let Err((merged, _)) = self.add_class(new_val) { - dbg!("Splitting didn't help: {} interferes", merged); - // We need to start over, isolating the bad value. - return Some(merged); - } + if interference { + let new_arg = self.isolate_arg(pred_ebb, pred_inst, argnum, arg); + self.virtregs.union(param, new_arg); + } else { + self.virtregs.union(param, arg); } } - - None } - /// Try merging the congruence class for `value` into `self.values`. - /// - /// Leave `self.values` unchanged on failure. - fn add_class(&mut self, value: Value) -> Result<(), (Value, Value)> { - self.forest.try_merge( - self.values, - self.virtregs.congruence_class(&value), - self.func, - self.domtree, - self.preorder, - self.liveness, - )?; - self.forest.swap(&mut self.values); - Ok(()) + // Isolate EBB parameter value `param` on `ebb`. + // + // When `param=v10`: + // + // ebb1(v10: i32): + // foo + // + // becomes: + // + // ebb1(v11: i32): + // v10 = copy v11 + // foo + // + // This function inserts the copy and updates the live ranges of the old and new parameter + // values. Returns the new parameter value. + fn isolate_param(&mut self, ebb: Ebb, param: Value) -> Value { + debug_assert_eq!( + self.func.dfg.value_def(param).pp(), + ExpandedProgramPoint::Ebb(ebb) + ); + let ty = self.func.dfg.value_type(param); + let new_val = self.func.dfg.replace_ebb_param(param, ty); + + // Insert a copy instruction at the top of `ebb`. + let mut pos = EncCursor::new(self.func, self.isa).at_first_inst(ebb); + pos.ins().with_result(param).copy(new_val); + let inst = pos.built_inst(); + self.liveness.move_def_locally(param, inst); + + dbg!( + "-> inserted {}, following {}({}: {})", + pos.display_inst(inst), + ebb, + new_val, + ty + ); + + // Create a live range for the new value. + // TODO: Should we handle ghost values? + let affinity = Affinity::new( + &self.encinfo + .operand_constraints(pos.func.encodings[inst]) + .expect("Bad copy encoding") + .outs + [0], + ); + self.liveness.create_dead(new_val, ebb, affinity); + self.liveness.extend_locally( + new_val, + ebb, + inst, + &pos.func.layout, + ); + + new_val } - /// Split the congruence class for the `argnum` argument to `pred_inst` by inserting a copy. - fn split_pred( + // Isolate the EBB argument `pred_val` from the predecessor `(pred_ebb, pred_inst)`. + // + // It is assumed that `pred_inst` is a branch instruction in `pred_ebb` whose `argnum`'th EBB + // argument is `pred_val`. Since the argument value interferes with the corresponding EBB + // parameter at the destination, a copy is used instead: + // + // brnz v1, ebb2(v10) + // + // Becomes: + // + // v11 = copy v10 + // brnz v1, ebb2(v11) + // + // This way the interference with the EBB parameter is avoided. + // + // A live range for the new value is created while the live range for `pred_val` is left + // unaltered. + // + // The new argument value is returned. + fn isolate_arg( &mut self, - pred_inst: Inst, pred_ebb: Ebb, + pred_inst: Inst, argnum: usize, pred_val: Value, ) -> Value { @@ -554,14 +336,8 @@ impl<'a> Context<'a> { let copy = pos.ins().copy(pred_val); let inst = pos.built_inst(); - dbg!( - "Inserted {}, before {}: {}", - pos.display_inst(inst), - pred_ebb, - pos.display_inst(pred_inst) - ); - // Create a live range for the new value. + // TODO: Handle affinity for ghost values. let affinity = Affinity::new( &self.encinfo .operand_constraints(pos.func.encodings[inst]) @@ -578,46 +354,337 @@ impl<'a> Context<'a> { ); pos.func.dfg.inst_variable_args_mut(pred_inst)[argnum] = copy; - self.split_values.push(copy); + + dbg!( + "-> inserted {}, before {}: {}", + pos.display_inst(inst), + pred_ebb, + pos.display_inst(pred_inst) + ); + copy } - /// Split the congruence class for the successor EBB value itself. - fn split_succ(&mut self, ebb: Ebb, succ_val: Value) -> Value { - let ty = self.func.dfg.value_type(succ_val); - let new_val = self.func.dfg.replace_ebb_param(succ_val, ty); - - // Insert a copy instruction at the top of ebb. - let mut pos = EncCursor::new(self.func, self.isa).at_first_inst(ebb); - pos.ins().with_result(succ_val).copy(new_val); - let inst = pos.built_inst(); - self.liveness.move_def_locally(succ_val, inst); - - dbg!( - "Inserted {}, following {}({}: {})", - pos.display_inst(inst), - ebb, - new_val, - ty - ); - - // Create a live range for the new value. - let affinity = Affinity::new( - &self.encinfo - .operand_constraints(pos.func.encodings[inst]) - .expect("Bad copy encoding") - .outs - [0], - ); - self.liveness.create_dead(new_val, ebb, affinity); - self.liveness.extend_locally( - new_val, - ebb, - inst, - &pos.func.layout, - ); - - self.split_values.push(new_val); - new_val + /// Finish the union-find part of the coalescing algorithm. + /// /// + /// This builds the initial set of virtual registers as the transitive/reflexive/symmetric + /// closure of the relation formed by EBB parameter-argument pairs found by `union_find_ebb()`. + fn finish_union_find(&mut self) { + self.virtregs.finish_union_find(None); + dbg!("After union-find phase:{}", self.virtregs); + } +} + +/// Phase 2: Dominator forests. +/// +/// The main entry point is `process_vregs()`. +impl<'a> Context<'a> { + /// Check al virtual registers for interference and fix conflicts. + pub fn process_vregs(&mut self) { + for vreg in self.virtregs.all_virtregs() { + self.process_vreg(vreg); + while let Some(vr) = self.worklist.pop() { + self.process_vreg(vr); + } + } + } + + // Check `vreg` for interferences and fix conflicts. + fn process_vreg(&mut self, vreg: VirtReg) { + if self.analyze_vreg(vreg) { + self.synthesize_vreg(vreg); + } + } + + // Check `vreg` for interferences and choose values to isolate. + // + // We use a Budimlic dominator forest to check for interferences between the values in `vreg` + // and identify values that should be isolated. + // + // Returns true if `vreg` has conflicts that need to be fixed. Additionally leaves state in + // member variables: + // + // - `self.params` contains all the EBB parameter values that were present in the virtual + // register. + // - `self.forest` contains the set of values that should be isolated from the virtual register. + fn analyze_vreg(&mut self, vreg: VirtReg) -> bool { + // Order the values according to the dominator pre-order of their definition. + let dfg = &self.func.dfg; + let layout = &self.func.layout; + let preorder = self.preorder; + let values = self.virtregs.sort_values(vreg, |a, b| { + let da = dfg.value_def(a); + let db = dfg.value_def(b); + preorder.pre_cmp(da, db, layout).then( + da.num().cmp(&db.num()), + ) + }); + dbg!("Analyzing {} = {}", vreg, DisplayList(values)); + + // Now push the values in order to the dominator forest. This gives us the closest + // dominating value def for each of the values. + self.params.clear(); + self.forest.clear(); + for &value in values { + let node = Node::new(value, self.func); + + // Remember the parameter values in case we need to re-synthesize virtual registers. + if let ExpandedProgramPoint::Ebb(_) = node.def { + self.params.push(value); + } + + // Push this value and get the nearest dominating def back. + let parent = match self.forest.push_value( + node, + self.func, + self.domtree, + self.preorder, + ) { + None => continue, + Some(p) => p, + }; + + // Check for interference between `parent` and `value`. Since `parent` dominates + // `value`, we only have to check if it overlaps the definition. + let ctx = self.liveness.context(&self.func.layout); + if !self.liveness[parent].overlaps_def(node.def, node.ebb, ctx) { + // No interference, both values can stay in the virtual register. + continue; + } + + // The two values are interfering, so they can't both be in the same virtual register. + // We need to pick one to isolate. It's hard to pick a heuristic that only looks at two + // values since an optimal solution is a global problem involving all the values in the + // virtual register. + // + // We choose to always isolate the dominating parent value for two reasons: + // + // 1. We avoid the case of a parent value with a very long live range pushing many + // following values out of the virtual register. + // + // 2. In the case of a value that is live across a branch to the definition of a + // parameter in the virtual register, our splitting method in `synthesize_vreg` + // doesn't actually resolve the interference unless we're trying to isolate the + // first value. This heuristic will at least pick the first value on a second + // attempt. This is actually a correctness issue - we could loop infinitely + // otherwise. See the `infinite-interference.cton` test case. + dbg!("-> isolating {} which overlaps def of {}", parent, value); + self.forest.drop_value(parent); + } + + let dropped = self.forest.prepare_dropped(); + assert!(dropped < values.len()); + dropped != 0 + } + + /// Destroy and rebuild `vreg`. + /// + /// Use `self.params` to rebuild the virtual register, but this time making sure that dropped + /// values in `self.forest` are isolated from non-dropped values. This may cause multiple new + /// virtual registers to be formed. + /// + /// All new virtual registers are appended to `self.worklist`. + fn synthesize_vreg(&mut self, vreg: VirtReg) { + dbg!("Synthesizing {} from {}", vreg, DisplayList(self.params)); + self.virtregs.remove(vreg); + + while let Some(param) = self.params.pop() { + let param_dropped = self.forest.is_dropped(param); + let (ebb, argnum) = match self.func.dfg.value_def(param) { + ir::ValueDef::Param(e, n) => (e, n), + ir::ValueDef::Result(_, _) => panic!("{} expected to be EBB parameter"), + }; + + // Union the EBB parameter with corresponding arguments on the predecessor branches, + // but make sure to isolate dropped values. + // + // Compare `union_pred_args()` which runs during phase 1. We don't need to check for + // special cases here since they have already been eliminated during phase 1. We + // already know that: + // + // 1. `arg` is not live-in to `ebb`. + // 2. `arg` is not a function argument on the stack. + for (pred_ebb, pred_inst) in self.cfg.pred_iter(ebb) { + let arg = self.func.dfg.inst_variable_args(pred_inst)[argnum]; + let arg_dropped = self.forest.is_dropped(arg); + + // We don't want to union dropped values with each other because we can't ensure + // that we are actually making progress -- the new virtual register of dropped + // values may have its own interferences and so on. + // + // TODO: Maintain a secondary dominator forest to keep track of dropped values that + // would be allowed to be unioned together. + if param_dropped || arg_dropped { + dbg!(" - {}#{}: {} isolated from {}", ebb, argnum, param, arg); + let new_arg = self.isolate_arg(pred_ebb, pred_inst, argnum, arg); + self.virtregs.union(param, new_arg); + } else { + self.virtregs.union(param, arg); + } + } + } + + // TODO: Get back the new vregs so they can be re-checked. + let old_len = self.worklist.len(); + self.virtregs.finish_union_find(Some(self.worklist)); + dbg!("-> new vregs {}", DisplayList(&self.worklist[old_len..])); + } +} + +/// Dominator forest. +/// +/// This is a utility type used for detecting interference in virtual registers, where each virtual +/// register is a list of values ordered according to the dominator tree pre-order. +/// +/// The idea of a dominator forest was introduced on the Budimlic paper and the linear stack +/// representation in the Boissinot paper. Our version of the linear stack is slightly modified +/// because we have a pre-order of the dominator tree at the EBB granularity, not basic block +/// granularity. +/// +/// Values are pushed in dominator tree pre-order of their definitions, and for each value pushed, +/// `push_value` will return the nearest previously pushed value that dominates the definition. +#[allow(dead_code)] +struct DomForest { + // Stack representing the rightmost edge of the dominator forest so far, ending in the last + // element of `values`. + // + // At all times, the EBB of each element in the stack dominates the EBB of the next one, and + // all elements dominating the end of `values` are on the stack. + stack: Vec, + + // The index into `stack` of the last dominating node returned by `push_value`. + last_dom: Option, + + // List of values that have been dropped from the forest because they were interfering with + // another member. + // + // This list is initially just appended to, then it sorted for quick member checks with + // `is_dropped()`. + dropped: Vec, +} + +/// A node in the dominator forest. +#[derive(Clone, Copy, Debug)] +#[allow(dead_code)] +struct Node { + value: Value, + /// The program point where `value` is defined. + def: ExpandedProgramPoint, + /// EBB containing `def`. + ebb: Ebb, +} + +impl Node { + /// Create a node for `value`. + pub fn new(value: Value, func: &Function) -> Node { + let def = func.dfg.value_def(value).pp(); + let ebb = func.layout.pp_ebb(def); + Node { value, def, ebb } + } +} + +impl fmt::Display for Node { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}@{}", self.value, self.ebb) + } +} + +impl DomForest { + /// Create a new empty dominator forest. + pub fn new() -> Self { + Self { + stack: Vec::new(), + last_dom: None, + dropped: Vec::new(), + } + } + + /// Clear all data structures in this dominator forest. + pub fn clear(&mut self) { + self.stack.clear(); + self.last_dom = None; + self.dropped.clear(); + } + + /// Add a single value to the forest. + /// + /// Update the stack so its dominance invariants are preserved. Detect a parent node on the + /// stack which is the closest one dominating the new node and return it. + fn push_value( + &mut self, + node: Node, + func: &Function, + domtree: &DominatorTree, + preorder: &DominatorTreePreorder, + ) -> Option { + // The stack contains the current sequence of dominating defs. Pop elements until we + // find one whose EBB dominates `node.ebb`. + while let Some(top) = self.stack.pop() { + if preorder.dominates(top.ebb, node.ebb) { + // This is the right insertion spot for `node`. + self.stack.push(top); + self.stack.push(node); + + // We know here that `top.ebb` dominates `node.ebb`, and thus `node.def`. This does + // not necessarily mean that `top.def` dominates `node.def`, though. The `top.def` + // program point may be below the last branch in `top.ebb` that dominates + // `node.def`. + debug_assert!(domtree.dominates(top.ebb, node.def, &func.layout)); + + // We do know, though, that if there is a nearest value dominating `node.def`, it + // will be on the stack. We just need to find the last stack entry that actually + // dominates. + // + // TODO: This search could be more efficient if we had access to + // `domtree.last_dominator()`. Each call to `dominates()` here ends up walking up + // the dominator tree starting from `node.ebb`. + self.last_dom = self.stack[0..self.stack.len() - 1].iter().rposition(|n| { + domtree.dominates(n.def, node.def, &func.layout) + }); + + // If there is a dominating parent value, return it for interference checking. + return self.last_dom.map(|pos| self.stack[pos].value); + } + } + + // No dominators, start a new tree in the forest. + self.stack.push(node); + None + } + + /// Drop `value` from the forest and add it to the `dropped` list. + /// + /// The value must be either the last value passed to `push_value` or the dominating value + /// returned from the call. + pub fn drop_value(&mut self, value: Value) { + self.dropped.push(value); + + // Are they dropping the last value pushed? + if self.stack.last().expect("Nothing pushed").value == value { + self.stack.pop(); + } else { + // Otherwise, they must be dropping the last dominator. + let pos = self.last_dom.take().expect("No last dominator"); + let node = self.stack.remove(pos); + assert_eq!(node.value, value, "Inconsistent value to drop_value"); + } + } + + /// Prepare the set of dropped values to be queried with `is_dropped()`. + /// + /// Returns the number of dropped values. + pub fn prepare_dropped(&mut self) -> usize { + self.stack.clear(); + if !self.dropped.is_empty() { + self.dropped.sort_unstable(); + dbg!("-> dropped {}", DisplayList(&self.dropped)); + } + self.dropped.len() + } + + /// Check if `value` was dropped. + pub fn is_dropped(&self, value: Value) -> bool { + debug_assert!(self.stack.is_empty(), "Call prepare_dropped first"); + self.dropped.binary_search(&value).is_ok() } }