Add a coalescing pass to the register allocator.

Coalescing means creating virtual registers and transforming the code into conventional SSA form. This means that every value used as a branch argument will belong to the same virtual register as the corresponding EBB argument value. Conventional SSA form makes it easy to avoid memory-memory copies when spilling values, and the virtual registers can be used as hints when picking registers too. This reduces the number of register moves needed for EBB arguments.
2017-06-21 09:24:12 -07:00
parent d5055275c4
commit 85b624d13b
5 changed files with 661 additions and 0 deletions
--- a/lib/cretonne/src/regalloc/coalescing.rs
+++ b/lib/cretonne/src/regalloc/coalescing.rs
@@ -0,0 +1,530 @@
+//! Constructing conventional SSA form.
+//!
+//! Conventional SSA form is a subset of SSA form where any (transitively) phi-related values do
+//! not interfere. We construct CSSA by building virtual registers that are as large as possible
+//! and inserting copies where necessary such that all values passed to an EBB argument will belong
+//! to the same virtual register as the EBB argument value itself.
+
+use dbg::DisplayList;
+use dominator_tree::DominatorTree;
+use flowgraph::{ControlFlowGraph, BasicBlock};
+use ir::{DataFlowGraph, Layout, Cursor, InstBuilder};
+use ir::{Function, Ebb, Inst, Value, ExpandedProgramPoint};
+use regalloc::affinity::Affinity;
+use regalloc::liveness::Liveness;
+use regalloc::virtregs::VirtRegs;
+use std::cmp::Ordering;
+use std::iter::Peekable;
+use std::mem;
+use isa::{TargetIsa, EncInfo};
+
+/// Dominator forest.
+///
+/// This is a utility type used for merging virtual registers, where each virtual register is a
+/// list of values ordered according to `DomTree::rpo_cmp`.
+///
+/// A `DomForest` object is used as a buffer for building virtual registers. It lets you merge two
+/// sorted lists of values while checking for interference only whee necessary.
+///
+/// The idea of a dominator forest was introduced here:
+///
+/// Budimlic, Z., Budimlic, Z., Cooper, K. D., Cooper, K. D., Harvey, T. J., Harvey, T. J., et al.
+/// (2002). Fast copy coalescing and live-range identification (Vol. 37, pp. 25–32). ACM.
+/// http://doi.org/10.1145/543552.512534
+///
+/// The linear stack representation here:
+///
+/// Boissinot, B., Darte, A., & Rastello, F. (2009). Revisiting out-of-SSA translation for
+/// correctness, code quality and efficiency. Presented at the Proceedings of the 7th  ….
+struct DomForest {
+    // The sequence of values that have been merged so far. In RPO order of their defs.
+    values: Vec<Value>,
+
+    // Stack representing the rightmost edge of the dominator forest so far, ending in the last
+    // element of `values`. At all times, each element in the stack dominates the next one, and all
+    // elements dominating the end of `values` are on the stack.
+    stack: Vec<Node>,
+}
+
+/// A node in the dominator forest.
+#[derive(Clone, Copy, Debug)]
+struct Node {
+    value: Value,
+    /// Set identifier. Values in the same set are assumed to be non-interfering.
+    set: u8,
+    /// The program point where `value` is defined.
+    def: ExpandedProgramPoint,
+}
+
+impl Node {
+    /// Create a node for `value`.
+    pub fn new(value: Value, set: u8, dfg: &DataFlowGraph) -> Node {
+        Node {
+            value,
+            set,
+            def: dfg.value_def(value).into(),
+        }
+    }
+}
+
+/// Push a node to `stack` and update `stack` so it contains all dominator forest ancestors of
+/// the pushed value.
+///
+
+impl DomForest {
+    /// Create a new empty dominator forest.
+    pub fn new() -> DomForest {
+        DomForest {
+            values: Vec::new(),
+            stack: Vec::new(),
+        }
+    }
+
+    /// Swap the merged list with `buffer`, leaving the dominator forest empty.
+    ///
+    /// This is typically called after a successful merge to extract the merged value list.
+    pub fn swap(&mut self, buffer: &mut Vec<Value>) {
+        buffer.clear();
+        mem::swap(&mut self.values, buffer);
+    }
+
+    /// Add a single node to the forest.
+    ///
+    /// Update the stack so its dominance invariants are preserved. Detect a parent node on the
+    /// stack which is the closest one dominating the new node.
+    ///
+    /// If the pushed node's parent in the dominator forest belongs to a different set, returns
+    /// `Some(parent)`.
+    fn push_node(&mut self, node: Node, layout: &Layout, domtree: &DominatorTree) -> Option<Value> {
+        self.values.push(node.value);
+
+        // The stack contains the current sequence of dominating defs. Pop elements until we
+        // find one that dominates `node`.
+        while let Some(top) = self.stack.pop() {
+            if domtree.dominates(top.def, node.def, layout) {
+                // This is the right insertion spot for `node`.
+                self.stack.push(top);
+                self.stack.push(node);
+                // If the parent value comes from a different set, return it for interference
+                // checking. If the sets are equal, assume that interference is already handled.
+                if top.set != node.set {
+                    return Some(top.value);
+                } else {
+                    return None;
+                }
+            }
+        }
+
+        // No dominators, start a new tree in the forest.
+        self.stack.push(node);
+        None
+    }
+
+    /// Try to merge two sorted sets of values. Each slice must already be sorted and free of any
+    /// interference.
+    ///
+    /// It is permitted for a value to appear in both lists. The merged sequence will only have one
+    /// copy of the value.
+    ///
+    /// If an interference is detected, returns `Err((a, b))` with the two conflicting values form
+    /// `va` and `vb` respectively.
+    ///
+    /// If the merge succeeds, returns `Ok(())`. The merged sequence can be extracted with
+    /// `swap()`.
+    pub fn try_merge(&mut self,
+                     va: &[Value],
+                     vb: &[Value],
+                     dfg: &DataFlowGraph,
+                     layout: &Layout,
+                     domtree: &DominatorTree,
+                     liveness: &Liveness)
+                     -> Result<(), (Value, Value)> {
+        self.stack.clear();
+        self.values.clear();
+        self.values.reserve(va.len() + vb.len());
+
+        // Convert the two value lists into a merged sequence of nodes.
+        let merged = MergedNodes {
+            a: va.iter().map(|&value| Node::new(value, 0, dfg)).peekable(),
+            b: vb.iter().map(|&value| Node::new(value, 1, dfg)).peekable(),
+            layout,
+            domtree,
+        };
+        for node in merged {
+            if let Some(parent) = self.push_node(node, layout, domtree) {
+                // Check if `parent` live range contains `node.def`.
+                let lr = liveness
+                    .get(parent)
+                    .expect("No live range for parent value");
+                if lr.overlaps_def(node.def, layout.pp_ebb(node.def), layout) {
+                    // Interference detected. Get the `(a, b)` order right in the error.
+                    return Err(if node.set == 0 {
+                                   (node.value, parent)
+                               } else {
+                                   (parent, node.value)
+                               });
+                }
+            }
+        }
+
+        Ok(())
+    }
+}
+
+/// Node-merging iterator.
+///
+/// Given two ordered sequences of nodes, yield an ordered sequence containing all of them.
+/// Duplicates are removed.
+struct MergedNodes<'a, IA, IB>
+    where IA: Iterator<Item = Node>,
+          IB: Iterator<Item = Node>
+{
+    a: Peekable<IA>,
+    b: Peekable<IB>,
+    layout: &'a Layout,
+    domtree: &'a DominatorTree,
+}
+
+impl<'a, IA, IB> Iterator for MergedNodes<'a, IA, IB>
+    where IA: Iterator<Item = Node>,
+          IB: Iterator<Item = Node>
+{
+    type Item = Node;
+
+    fn next(&mut self) -> Option<Node> {
+        let ord = match (self.a.peek(), self.b.peek()) {
+            (Some(a), Some(b)) => {
+                // If the two values are defined at the same point, compare value numbers instead
+                // this is going to cause an interference conflict unless its actually the same
+                // value appearing in both streams.
+                self.domtree
+                    .rpo_cmp(a.def, b.def, self.layout)
+                    .then(Ord::cmp(&a.value, &b.value))
+            }
+            (Some(_), None) => Ordering::Less,
+            (None, Some(_)) => Ordering::Greater,
+            (None, None) => return None,
+        };
+        match ord {
+            Ordering::Equal => {
+                // The two iterators produced the same value. Just return the first one.
+                self.b.next();
+                self.a.next()
+            }
+            Ordering::Less => self.a.next(),
+            Ordering::Greater => self.b.next(),
+        }
+    }
+}
+
+/// Data structures to be used by the coalescing pass.
+pub struct Coalescing {
+    forest: DomForest,
+
+    // Current set of coalesced values. Kept sorted and interference free.
+    values: Vec<Value>,
+
+    // New values that were created when splitting interferences.
+    split_values: Vec<Value>,
+}
+
+/// One-shot context created once per invocation.
+struct Context<'a> {
+    isa: &'a TargetIsa,
+    encinfo: EncInfo,
+
+    func: &'a mut Function,
+    domtree: &'a DominatorTree,
+    liveness: &'a mut Liveness,
+    virtregs: &'a mut VirtRegs,
+
+    forest: &'a mut DomForest,
+    values: &'a mut Vec<Value>,
+    split_values: &'a mut Vec<Value>,
+}
+
+impl Coalescing {
+    /// Create a new coalescing pass.
+    pub fn new() -> Coalescing {
+        Coalescing {
+            forest: DomForest::new(),
+            values: Vec::new(),
+            split_values: Vec::new(),
+        }
+
+    }
+
+    /// Convert `func` to conventional SSA form and build virtual registers in the process.
+    pub fn conventional_ssa(&mut self,
+                            isa: &TargetIsa,
+                            func: &mut Function,
+                            cfg: &ControlFlowGraph,
+                            domtree: &DominatorTree,
+                            liveness: &mut Liveness,
+                            virtregs: &mut VirtRegs) {
+        dbg!("Coalescing for:\n{}", func.display(isa));
+        let mut context = Context {
+            isa,
+            encinfo: isa.encoding_info(),
+            func,
+            domtree,
+            liveness,
+            virtregs,
+            forest: &mut self.forest,
+            values: &mut self.values,
+            split_values: &mut self.split_values,
+        };
+
+        // TODO: The iteration order matters here. We should coalesce in the most important blocks
+        // first, so they get first pick at forming virtual registers.
+        for &ebb in domtree.cfg_postorder() {
+            let preds = cfg.get_predecessors(ebb);
+            if !preds.is_empty() {
+                for argnum in 0..context.func.dfg.num_ebb_args(ebb) {
+                    context.coalesce_ebb_arg(ebb, argnum, preds)
+                }
+            }
+        }
+    }
+}
+
+impl<'a> Context<'a> {
+    /// Coalesce the `argnum`'th argument to `ebb`.
+    fn coalesce_ebb_arg(&mut self, ebb: Ebb, argnum: usize, preds: &[BasicBlock]) {
+        self.split_values.clear();
+        let mut succ_val = self.func.dfg.ebb_args(ebb)[argnum];
+        dbg!("Processing {}/{}: {}", ebb, argnum, succ_val);
+
+        // We want to merge the virtual register for `succ_val` with the virtual registers for
+        // the branch arguments in the predecessors. This may not be possible if any live
+        // ranges interfere, so we can insert copies to break interferences:
+        //
+        // pred:
+        //     jump ebb1(v1)
+        //
+        // ebb1(v10: i32):
+        //      ...
+        //
+        // In the predecessor:
+        //
+        //     v2 = copy v1
+        //     jump ebb(v2)
+        //
+        // A predecessor copy is always required if the branch argument virtual register is
+        // live into the successor.
+        //
+        // In the successor:
+        //
+        // ebb1(v11: i32):
+        //     v10 = copy v11
+        //
+        // A successor copy is always required if the `succ_val` virtual register is live at
+        // any predecessor branch.
+
+        while let Some(bad_value) = self.try_coalesce(argnum, succ_val, preds) {
+            dbg!("Isolating interfering value {}", bad_value);
+            // The bad value has some conflict that can only be reconciled by excluding its
+            // congruence class from the new virtual register.
+            //
+            // Try to catch infinite splitting loops. The values created by splitting should never
+            // have irreconcilable interferences.
+            assert!(!self.split_values.contains(&bad_value),
+                    "{} was already isolated",
+                    bad_value);
+            let split_len = self.split_values.len();
+
+            // The bad value can be both the successor value and a predecessor value at the same
+            // time.
+            if self.virtregs.same_class(bad_value, succ_val) {
+                succ_val = self.split_succ(ebb, succ_val);
+            }
+
+            // Check the predecessors.
+            for &(pred_ebb, pred_inst) in preds {
+                let pred_val = self.func.dfg.inst_variable_args(pred_inst)[argnum];
+                if self.virtregs.same_class(bad_value, pred_val) {
+                    self.split_pred(pred_inst, pred_ebb, argnum, pred_val);
+                }
+            }
+
+            // Second loop check.
+            assert_ne!(split_len,
+                       self.split_values.len(),
+                       "Couldn't isolate {}",
+                       bad_value);
+        }
+
+        let vreg = self.virtregs.unify(self.values);
+        dbg!("Coalesced {} arg {} into {} = {}",
+             ebb,
+             argnum,
+             vreg,
+             DisplayList(self.virtregs.values(vreg)));
+    }
+
+    /// Reset `self.values` to just the set of split values.
+    fn reset_values(&mut self) {
+        self.values.clear();
+        self.values.extend_from_slice(self.split_values);
+        let domtree = &self.domtree;
+        let func = &self.func;
+        self.values
+            .sort_by(|&a, &b| {
+                         domtree.rpo_cmp(func.dfg.value_def(a), func.dfg.value_def(b), &func.layout)
+                     });
+    }
+
+    /// Try coalescing predecessors with `succ_val`.
+    ///
+    /// Returns a value from a congruence class that needs to be split before starting over, or
+    /// `None` if everything was successfully coalesced into `self.values`.
+    fn try_coalesce(&mut self,
+                    argnum: usize,
+                    succ_val: Value,
+                    preds: &[BasicBlock])
+                    -> Option<Value> {
+        /// Initialize the value list with the split values. These are guaranteed to be
+        /// interference free, and anything that interferes with them must be split away.
+        self.reset_values();
+        dbg!("Trying {} with split values: {:?}", succ_val, self.values);
+
+        // Start by adding `succ_val` so we can determine if it interferes with any of the new
+        // split values. If it does, we must split it.
+        if self.add_class(succ_val).is_err() {
+            return Some(succ_val);
+        }
+
+        for &(pred_ebb, pred_inst) in preds {
+            let pred_val = self.func.dfg.inst_variable_args(pred_inst)[argnum];
+            dbg!("Checking {}: {}: {}",
+                 pred_val,
+                 pred_ebb,
+                 self.func.dfg.display_inst(pred_inst));
+            if let Err((a, b)) = self.add_class(pred_val) {
+                dbg!("Found conflict between {} and {}", a, b);
+                // We have a conflict between the already merged value `a` and one of the new
+                // values `b`.
+                //
+                // Check if the `a` live range is fundamentally incompatible with `pred_inst`.
+                if self.liveness
+                       .get(a)
+                       .expect("No live range for interfering value")
+                       .reaches_use(pred_inst, pred_ebb, &self.func.layout) {
+                    // Splitting at `pred_inst` wouldn't resolve the interference, so we need to
+                    // start over.
+                    return Some(a);
+                }
+
+                // The local conflict could be avoided by splitting at this predecessor, so try
+                // that. This split is not necessarily required, but it allows us to make progress.
+                let new_val = self.split_pred(pred_inst, pred_ebb, argnum, pred_val);
+                assert!(self.add_class(new_val).is_ok(),
+                        "Splitting didn't resolve conflict.");
+            }
+        }
+
+        None
+    }
+
+    /// Try merging the congruence class for `value` into `self.values`.
+    ///
+    /// Leave `self.values` unchanged on failure.
+    fn add_class(&mut self, value: Value) -> Result<(), (Value, Value)> {
+        self.forest
+            .try_merge(&self.values,
+                       self.virtregs.congruence_class(&value),
+                       &self.func.dfg,
+                       &self.func.layout,
+                       self.domtree,
+                       self.liveness)?;
+        self.forest.swap(&mut self.values);
+        Ok(())
+    }
+
+    /// Split the congruence class for the `argnum` argument to `pred_inst` by inserting a copy.
+    fn split_pred(&mut self,
+                  pred_inst: Inst,
+                  pred_ebb: Ebb,
+                  argnum: usize,
+                  pred_val: Value)
+                  -> Value {
+        let copy;
+        {
+            let mut pos = Cursor::new(&mut self.func.layout);
+            pos.goto_inst(pred_inst);
+            copy = self.func.dfg.ins(&mut pos).copy(pred_val);
+        }
+        let inst = self.func.dfg.value_def(copy).unwrap_inst();
+        let ty = self.func.dfg.value_type(copy);
+
+        dbg!("Inserted {}, before {}: {}",
+             self.func.dfg.display_inst(inst),
+             pred_ebb,
+             self.func.dfg.display_inst(pred_inst));
+
+        // Give it an encoding.
+        let encoding = self.isa
+            .encode(&self.func.dfg, &self.func.dfg[inst], ty)
+            .expect("Can't encode copy");
+        *self.func.encodings.ensure(inst) = encoding;
+
+        // Create a live range for the new value.
+        let affinity = Affinity::new(&self.encinfo
+                                          .operand_constraints(encoding)
+                                          .expect("Bad copy encoding")
+                                          .outs
+                                          [0]);
+        self.liveness.create_dead(copy, inst, affinity);
+        self.liveness
+            .extend_locally(copy, pred_ebb, pred_inst, &self.func.layout);
+
+        self.func.dfg.inst_variable_args_mut(pred_inst)[argnum] = copy;
+        self.split_values.push(copy);
+        copy
+    }
+
+    /// Split the congruence class for the successor EBB value itself.
+    fn split_succ(&mut self, ebb: Ebb, succ_val: Value) -> Value {
+        let ty = self.func.dfg.value_type(succ_val);
+        let new_val = self.func.dfg.replace_ebb_arg(succ_val, ty);
+
+        // Insert a copy instruction at the top of ebb.
+        {
+            let mut pos = Cursor::new(&mut self.func.layout);
+            pos.goto_top(ebb);
+            pos.next_inst();
+            self.func
+                .dfg
+                .ins(&mut pos)
+                .with_result(succ_val)
+                .copy(new_val);
+        }
+        let inst = self.func.dfg.value_def(succ_val).unwrap_inst();
+        self.liveness.move_def_locally(succ_val, inst);
+
+        dbg!("Inserted {}, following {}({}: {})",
+             self.func.dfg.display_inst(inst),
+             ebb,
+             new_val,
+             ty);
+
+        // Give it an encoding.
+        let encoding = self.isa
+            .encode(&self.func.dfg, &self.func.dfg[inst], ty)
+            .expect("Can't encode copy");
+        *self.func.encodings.ensure(inst) = encoding;
+
+        // Create a live range for the new value.
+        let affinity = Affinity::new(&self.encinfo
+                                          .operand_constraints(encoding)
+                                          .expect("Bad copy encoding")
+                                          .outs
+                                          [0]);
+        self.liveness.create_dead(new_val, ebb, affinity);
+        self.liveness
+            .extend_locally(new_val, ebb, inst, &self.func.layout);
+
+        self.split_values.push(new_val);
+        new_val
+    }
+}