From 16df1f1cf521c95c16b1a5f8b9294216b60868bd Mon Sep 17 00:00:00 2001
From: Jakob Stoklund Olesen <jolesen@mozilla.com>
Date: Fri, 2 Jun 2017 15:08:02 -0700
Subject: [PATCH] Compute a CFG post-order when building the dominator tree.

The DominatorTree has existing DomNodes per EBB that can be used in lieu
of expensive HastSets for the depth-first traversal of the CFG.

Make the computed and cached post-order available for other passes
through the `cfg_postorder()` method which returns a slice.

The post-order algorithm is essentially the same as the one in
ControlFlowGraph::postorder_ebbs(), except it will never push a
successor node that has already been visited once. This is more
efficient, but it generates a different post-order.

Change the cfg_traversal tests to check this new algorithm.
---
 cranelift/tests/cfg_traversal.rs   |  37 +++++-----
 lib/cretonne/src/dominator_tree.rs | 115 +++++++++++++++++++++++------
 2 files changed, 111 insertions(+), 41 deletions(-)
diff --git a/cranelift/tests/cfg_traversal.rs b/cranelift/tests/cfg_traversal.rs
index 903b1a1ec1..21e2a086b2 100644
--- a/cranelift/tests/cfg_traversal.rs
+++ b/cranelift/tests/cfg_traversal.rs
@@ -1,30 +1,27 @@
 extern crate cretonne;
 extern crate cton_reader;
 
-use self::cretonne::entity_map::EntityMap;
 use self::cretonne::flowgraph::ControlFlowGraph;
+use self::cretonne::dominator_tree::DominatorTree;
 use self::cretonne::ir::Ebb;
 use self::cton_reader::parse_functions;
 
 fn test_reverse_postorder_traversal(function_source: &str, ebb_order: Vec<u32>) {
     let func = &parse_functions(function_source).unwrap()[0];
     let cfg = ControlFlowGraph::with_function(&func);
-    let ebbs = ebb_order
+    let domtree = DominatorTree::with_function(&func, &cfg);
+
+    let got = domtree
+        .cfg_postorder()
         .iter()
-        .map(|n| Ebb::with_number(*n).unwrap())
-        .collect::<Vec<Ebb>>();
-
-    let mut postorder_ebbs = cfg.postorder_ebbs();
-    let mut postorder_map = EntityMap::with_capacity(postorder_ebbs.len());
-    for (i, ebb) in postorder_ebbs.iter().enumerate() {
-        postorder_map[ebb.clone()] = i + 1;
-    }
-    postorder_ebbs.reverse();
-
-    assert_eq!(postorder_ebbs.len(), ebbs.len());
-    for ebb in postorder_ebbs {
-        assert_eq!(ebb, ebbs[ebbs.len() - postorder_map[ebb]]);
-    }
+        .rev()
+        .cloned()
+        .collect::<Vec<_>>();
+    let want = ebb_order
+        .iter()
+        .map(|&n| Ebb::with_number(n).unwrap())
+        .collect::<Vec<_>>();
+    assert_eq!(got, want);
 }
 
 #[test]
@@ -53,7 +50,7 @@ fn simple_traversal() {
                 trap
         }
     ",
-                                     vec![0, 2, 1, 3, 4, 5]);
+                                     vec![0, 1, 3, 2, 4, 5]);
 }
 
 #[test]
@@ -71,7 +68,7 @@ fn loops_one() {
                 return
         }
     ",
-                                     vec![0, 1, 2, 3]);
+                                     vec![0, 1, 3, 2]);
 }
 
 #[test]
@@ -96,7 +93,7 @@ fn loops_two() {
                 return
         }
     ",
-                                     vec![0, 1, 2, 5, 4, 3]);
+                                     vec![0, 1, 2, 4, 3, 5]);
 }
 
 #[test]
@@ -126,7 +123,7 @@ fn loops_three() {
                 return
         }
     ",
-                                     vec![0, 1, 2, 5, 4, 3, 6, 7]);
+                                     vec![0, 1, 2, 4, 3, 6, 7, 5]);
 }
 
 #[test]
diff --git a/lib/cretonne/src/dominator_tree.rs b/lib/cretonne/src/dominator_tree.rs
index ed6aa4dbd6..6d42cefb36 100644
--- a/lib/cretonne/src/dominator_tree.rs
+++ b/lib/cretonne/src/dominator_tree.rs
@@ -25,6 +25,12 @@ struct DomNode {
 /// The dominator tree for a single function.
 pub struct DominatorTree {
     nodes: EntityMap<Ebb, DomNode>,
+
+    // CFG post-order of all reachable EBBs.
+    postorder: Vec<Ebb>,
+
+    // Scratch memory used by `compute_postorder()`.
+    stack: Vec<Ebb>,
 }
 
 /// Methods for querying the dominator tree.
@@ -34,6 +40,14 @@ impl DominatorTree {
         self.nodes[ebb].rpo_number != 0
     }
 
+    /// Get the CFG post-order of EBBs that was used to compute the dominator tree.
+    ///
+    /// Note that this post-order is not updated automatically when the CFG is modified. It is
+    /// computed from scratch and cached by `compute()`.
+    pub fn cfg_postorder(&self) -> &[Ebb] {
+        &self.postorder
+    }
+
     /// Returns the immediate dominator of `ebb`.
     ///
     /// The immediate dominator of an extended basic block is a basic block which we represent by
@@ -134,7 +148,11 @@ impl DominatorTree {
     /// Allocate a new blank dominator tree. Use `compute` to compute the dominator tree for a
     /// function.
     pub fn new() -> DominatorTree {
-        DominatorTree { nodes: EntityMap::new() }
+        DominatorTree {
+            nodes: EntityMap::new(),
+            postorder: Vec::new(),
+            stack: Vec::new(),
+        }
     }
 
     /// Allocate and compute a dominator tree.
@@ -144,39 +162,91 @@ impl DominatorTree {
         domtree
     }
 
-    /// Build a dominator tree from a control flow graph using Keith D. Cooper's
-    /// "Simple, Fast Dominator Algorithm."
+    /// Reset and compute a CFG post-order and dominator tree.
     pub fn compute(&mut self, func: &Function, cfg: &ControlFlowGraph) {
+        self.compute_postorder(func, cfg);
+        self.compute_domtree(func, cfg);
+    }
+
+    /// Reset all internal data structures and compute a post-order for `cfg`.
+    ///
+    /// This leaves `rpo_number == 1` for all reachable EBBs, 0 for unreachable ones.
+    fn compute_postorder(&mut self, func: &Function, cfg: &ControlFlowGraph) {
         self.nodes.clear();
         self.nodes.resize(func.dfg.num_ebbs());
+        self.postorder.clear();
+        assert!(self.stack.is_empty());
 
-        // We'll be iterating over a reverse post-order of the CFG.
-        // This vector only contains reachable EBBs.
-        let mut postorder = cfg.postorder_ebbs();
+        // During this algorithm only, use `rpo_number` to hold the following state:
+        //
+        // 0: EBB never reached.
+        // 2: EBB has been pushed once, so it shouldn't be pushed again.
+        // 1: EBB has already been popped once, and should be added to the post-order next time.
+        const SEEN: u32 = 2;
+        const DONE: u32 = 1;
 
-        // Remove the entry block, and abort if the function is empty.
-        // The last block visited in a post-order traversal must be the entry block.
-        let entry_block = match postorder.pop() {
-            Some(ebb) => ebb,
+        match func.layout.entry_block() {
+            Some(ebb) => {
+                self.nodes[ebb].rpo_number = SEEN;
+                self.stack.push(ebb)
+            }
+            None => return,
+        }
+
+        while let Some(ebb) = self.stack.pop() {
+            match self.nodes[ebb].rpo_number {
+                // This is the first time we visit `ebb`, forming a pre-order.
+                SEEN => {
+                    // Mark it as done and re-queue it to be visited after its children.
+                    self.nodes[ebb].rpo_number = DONE;
+                    self.stack.push(ebb);
+                    for &succ in cfg.get_successors(ebb) {
+                        // Only push children that haven't been seen before.
+                        if self.nodes[succ].rpo_number == 0 {
+                            self.nodes[succ].rpo_number = SEEN;
+                            self.stack.push(succ);
+                        }
+                    }
+                }
+                // This is the second time we popped `ebb`, so all its children have been visited.
+                // This is the post-order.
+                DONE => self.postorder.push(ebb),
+                _ => panic!("Inconsistent stack rpo_number"),
+            }
+        }
+    }
+
+    /// Build a dominator tree from a control flow graph using Keith D. Cooper's
+    /// "Simple, Fast Dominator Algorithm."
+    fn compute_domtree(&mut self, func: &Function, cfg: &ControlFlowGraph) {
+        // During this algorithm, `rpo_number` has the following values:
+        //
+        // 0: EBB is not reachable.
+        // 1: EBB is reachable, but has not yet been visited during the first pass. This is set by
+        // `compute_postorder`.
+        // 2+: EBB is reachable and has an assigned RPO number.
+
+        // We'll be iterating over a reverse post-order of the CFG, skipping the entry block.
+        let (entry_block, postorder) = match self.postorder.as_slice().split_last() {
+            Some((&eb, rest)) => (eb, rest),
             None => return,
         };
-        assert_eq!(Some(entry_block), func.layout.entry_block());
+        debug_assert_eq!(Some(entry_block), func.layout.entry_block());
 
         // Do a first pass where we assign RPO numbers to all reachable nodes.
-        self.nodes[entry_block].rpo_number = 1;
+        self.nodes[entry_block].rpo_number = 2;
         for (rpo_idx, &ebb) in postorder.iter().rev().enumerate() {
             // Update the current node and give it an RPO number.
-            // The entry block got 1, the rest start at 2.
+            // The entry block got 2, the rest start at 3.
             //
-            // Nodes do not appear as reachable until the have an assigned RPO number, and
-            // `compute_idom` will only look at reachable nodes. This means that the function will
-            // never see an uninitialized predecessor.
+            // Since `compute_idom` will only look at nodes with an assigned RPO number, the
+            // function will never see an uninitialized predecessor.
             //
             // Due to the nature of the post-order traversal, every node we visit will have at
             // least one predecessor that has previously been visited during this RPO.
             self.nodes[ebb] = DomNode {
                 idom: self.compute_idom(ebb, cfg, &func.layout).into(),
-                rpo_number: rpo_idx as u32 + 2,
+                rpo_number: rpo_idx as u32 + 3,
             }
         }
 
@@ -200,13 +270,13 @@ impl DominatorTree {
     // Compute the immediate dominator for `ebb` using the current `idom` states for the reachable
     // nodes.
     fn compute_idom(&self, ebb: Ebb, cfg: &ControlFlowGraph, layout: &Layout) -> Inst {
-        // Get an iterator with just the reachable predecessors to `ebb`.
-        // Note that during the first pass, `is_reachable` returns false for blocks that haven't
-        // been visited yet.
+        // Get an iterator with just the reachable, already visited predecessors to `ebb`.
+        // Note that during the first pass, `rpo_number` is 1 for reachable blocks that haven't
+        // been visited yet, 0 for unreachable blocks.
         let mut reachable_preds = cfg.get_predecessors(ebb)
             .iter()
             .cloned()
-            .filter(|&(ebb, _)| self.is_reachable(ebb));
+            .filter(|&(pred, _)| self.nodes[pred].rpo_number > 1);
 
         // The RPO must visit at least one predecessor before this node.
         let mut idom = reachable_preds
@@ -233,6 +303,7 @@ mod test {
         let cfg = ControlFlowGraph::with_function(&func);
         let dtree = DominatorTree::with_function(&func, &cfg);
         assert_eq!(0, dtree.nodes.keys().count());
+        assert_eq!(dtree.cfg_postorder(), &[]);
     }
 
     #[test]
@@ -277,5 +348,7 @@ mod test {
         assert!(dt.dominates(br_ebb1_ebb0, br_ebb1_ebb0, &func.layout));
         assert!(!dt.dominates(br_ebb1_ebb0, jmp_ebb3_ebb1, &func.layout));
         assert!(dt.dominates(jmp_ebb3_ebb1, br_ebb1_ebb0, &func.layout));
+
+        assert_eq!(dt.cfg_postorder(), &[ebb2, ebb0, ebb1, ebb3]);
     }
 }