From cf45afa1e77eb0e192391d01c72278edc33b0d1f Mon Sep 17 00:00:00 2001
From: Jakob Stoklund Olesen <jolesen@mozilla.com>
Date: Tue, 21 Nov 2017 09:45:52 -0800
Subject: [PATCH] Avoid the CFG get_successors() when computing a post-order.

The control flow graph does not guarantee any particular ordering for
its successor lists, and the post-order we are computing for building
the dominator tree needs to be "split-invariant".

See #146 for details.

- Discover EBB successors directly from the EBB instruction sequence to
  guarantee that the post-order we compute is canonical/split-invariant.
- Use an alternative graph DFS algorithm which doesn't require indexing
  into a slice of successors.

This changes cfg_postorder in some cases because the edge pruning when
converting the (DAG) CFG to a tree for the DFT is different.
---
 cranelift/tests/cfg_traversal.rs   |  76 ++++++++++++++-
 lib/cretonne/src/dominator_tree.rs | 149 +++++++++++++++++++++++++----
 lib/cretonne/src/loop_analysis.rs  |  12 +--
 3 files changed, 206 insertions(+), 31 deletions(-)
diff --git a/cranelift/tests/cfg_traversal.rs b/cranelift/tests/cfg_traversal.rs
index 73b4b9fad4..6709da2e9a 100644
--- a/cranelift/tests/cfg_traversal.rs
+++ b/cranelift/tests/cfg_traversal.rs
@@ -26,6 +26,30 @@ fn test_reverse_postorder_traversal(function_source: &str, ebb_order: Vec<u32>)
 
 #[test]
 fn simple_traversal() {
+    // Fall-through-first, prune-at-source DFT:
+    //
+    //  ebb0 {
+    //      ebb0:brz v0, ebb1 {
+    //          ebb0:jump ebb2 {
+    //              ebb2 {
+    //                  ebb2:brz v2, ebb2 -
+    //                  ebb2:brz v3, ebb1 -
+    //                  ebb2:brz v4, ebb4 {
+    //                      ebb2: jump ebb5 {
+    //                          ebb5 {}
+    //                      }
+    //                      ebb4 {}
+    //                  }
+    //              } ebb2
+    //          }
+    //          ebb1 {
+    //              ebb1:jump ebb3 {
+    //                  ebb3 {}
+    //              }
+    //          } ebb1
+    //      }
+    //  } ebb0
+
     test_reverse_postorder_traversal(
         "
         function %test(i32) native {
@@ -51,12 +75,28 @@ fn simple_traversal() {
                 trap user0
         }
     ",
-        vec![0, 2, 5, 4, 1, 3],
+        vec![0, 1, 3, 2, 4, 5],
     );
 }
 
 #[test]
 fn loops_one() {
+    // Fall-through-first, prune-at-source DFT:
+    // ebb0 {
+    //     ebb0:jump ebb1 {
+    //         ebb1 {
+    //             ebb1:brnz v0, ebb3 {
+    //                 ebb1:jump ebb2 {
+    //                     ebb2 {
+    //                         ebb2:jump ebb3 -
+    //                     } ebb2
+    //                 }
+    //                 ebb3 {}
+    //             }
+    //         } ebb1
+    //     }
+    // } ebb0
+
     test_reverse_postorder_traversal(
         "
         function %test(i32) native {
@@ -71,12 +111,40 @@ fn loops_one() {
                 return
         }
     ",
-        vec![0, 1, 2, 3],
+        vec![0, 1, 3, 2],
     );
 }
 
 #[test]
 fn loops_two() {
+    // Fall-through-first, prune-at-source DFT:
+    // ebb0 {
+    //     ebb0:brz v0, ebb1 {
+    //         ebb0:jump ebb2 {
+    //             ebb2 {
+    //                 ebb2:brz v0, ebb4 {
+    //                     ebb2:jump ebb5 {
+    //                         ebb5 {
+    //                             brz v0, ebb4 -
+    //                         } ebb5
+    //                     }
+    //                     ebb4 {
+    //                         ebb4:brz v0, ebb3 {
+    //                             ebb4:jump ebb5 -
+    //                             ebb3 {
+    //                                 ebb3:jump ebb4 -
+    //                             } ebb3
+    //                         }
+    //                     } ebb4
+    //                 }
+    //             } ebb2
+    //         }
+    //		   ebb1 {
+    //             ebb1:jump ebb3 -
+    //		   } ebb1
+    //     }
+    // } ebb0
+
     test_reverse_postorder_traversal(
         "
         function %test(i32) native {
@@ -98,7 +166,7 @@ fn loops_two() {
                 return
         }
     ",
-        vec![0, 2, 1, 3, 4, 5],
+        vec![0, 1, 2, 4, 3, 5],
     );
 }
 
@@ -130,7 +198,7 @@ fn loops_three() {
                 return
         }
     ",
-        vec![0, 2, 1, 3, 4, 6, 7, 5],
+        vec![0, 1, 2, 4, 3, 6, 7, 5],
     );
 }
 
diff --git a/lib/cretonne/src/dominator_tree.rs b/lib/cretonne/src/dominator_tree.rs
index 2c2fd1b48c..2507fd25e4 100644
--- a/lib/cretonne/src/dominator_tree.rs
+++ b/lib/cretonne/src/dominator_tree.rs
@@ -3,6 +3,7 @@
 use entity::EntityMap;
 use flowgraph::{ControlFlowGraph, BasicBlock};
 use ir::{Ebb, Inst, Function, Layout, ProgramOrder, ExpandedProgramPoint};
+use ir::instructions::BranchInfo;
 use packed_option::PackedOption;
 
 use std::cmp::Ordering;
@@ -11,6 +12,10 @@ use std::cmp::Ordering;
 // room for modifications of the dominator tree.
 const STRIDE: u32 = 4;
 
+// Special RPO numbers used during `compute_postorder`.
+const DONE: u32 = 1;
+const SEEN: u32 = 2;
+
 // Dominator tree node. We keep one of these per EBB.
 #[derive(Clone, Default)]
 struct DomNode {
@@ -36,7 +41,7 @@ pub struct DominatorTree {
     postorder: Vec<Ebb>,
 
     // Scratch memory used by `compute_postorder()`.
-    stack: Vec<(Ebb, usize)>,
+    stack: Vec<Ebb>,
 
     valid: bool,
 }
@@ -223,7 +228,7 @@ impl DominatorTree {
     /// Reset and compute a CFG post-order and dominator tree.
     pub fn compute(&mut self, func: &Function, cfg: &ControlFlowGraph) {
         debug_assert!(cfg.is_valid());
-        self.compute_postorder(func, cfg);
+        self.compute_postorder(func);
         self.compute_domtree(func, cfg);
         self.valid = true;
     }
@@ -246,36 +251,108 @@ impl DominatorTree {
         self.valid
     }
 
-    /// Reset all internal data structures and compute a post-order for `cfg`.
+    /// Reset all internal data structures and compute a post-order of the control flow graph.
     ///
     /// This leaves `rpo_number == 1` for all reachable EBBs, 0 for unreachable ones.
-    fn compute_postorder(&mut self, func: &Function, cfg: &ControlFlowGraph) {
+    fn compute_postorder(&mut self, func: &Function) {
         self.clear();
         self.nodes.resize(func.dfg.num_ebbs());
 
+        // This algorithm is a depth first traversal (DFT) of the control flow graph, computing a
+        // post-order of the EBBs that are reachable form the entry block. A DFT post-order is not
+        // unique. The specific order we get is controlled by two factors:
+        //
+        // 1. The order each node's children are visited, and
+        // 2. The method used for pruning graph edges to get a tree.
+        //
+        // There are two ways of viewing the CFG as a graph:
+        //
+        // 1. Each EBB is a node, with outgoing edges for all the branches in the EBB>
+        // 2. Each basic block is a node, with outgoing edges for the single branch at the end of
+        //    the BB. (An EBB is a linear sequence of basic blocks).
+        //
+        // The first graph is a contraction of the second one. We want to compute an EBB post-order
+        // that is compatible both graph interpretations. That is, if you compute a BB post-order
+        // and then remove those BBs that do not correspond to EBB headers, you get a post-order of
+        // the EBB graph.
+        //
+        // Node child order:
+        //
+        //     In the BB graph, we always go down the fall-through path first and follow the branch
+        //     destination second.
+        //
+        //     In the EBB graph, this is equivalent to visiting EBB successors in a bottom-up
+        //     order, starting from the destination of the EBB's terminating jump, ending at the
+        //     destination of the first branch in the EBB.
+        //
+        // Edge pruning:
+        //
+        //     In the BB graph, we keep an edge to an EBB the first time we visit the *source* side
+        //     of the edge. Any subsequent edges to the same EBB are pruned.
+        //
+        //     The equivalent tree is reached in the EBB graph by keeping the first edge to an EBB
+        //     in a top-down traversal of the successors. (And then visiting edges in a bottom-up
+        //     order).
+        //
+        // This pruning method makes it possible to compute the DFT without storing lots of
+        // information about the progress through an EBB.
+
         // During this algorithm only, use `rpo_number` to hold the following state:
         //
-        // 0: EBB is not yet on the stack.
-        // 1: EBB is on the stack or in postorder.
-        const SEEN: u32 = 1;
+        //   0:    EBB has not yet been reached in the pre-order.
+        //   SEEN: EBB has been pushed on the stack but successors not yet pushed.
+        //   DONE: Successors pushed.
 
         match func.layout.entry_block() {
             Some(ebb) => {
-                self.stack.push((ebb, 0));
+                self.stack.push(ebb);
                 self.nodes[ebb].rpo_number = SEEN;
             }
             None => return,
         }
 
-        while let Some((ebb, succ_index)) = self.stack.pop() {
-            if let Some(&succ) = cfg.get_successors(ebb).get(succ_index) {
-                self.stack.push((ebb, succ_index + 1));
-                if self.nodes[succ].rpo_number == 0 {
-                    self.stack.push((succ, 0));
-                    self.nodes[succ].rpo_number = SEEN;
+        while let Some(ebb) = self.stack.pop() {
+            match self.nodes[ebb].rpo_number {
+                SEEN => {
+                    // This is the first time we pop the EBB, so we need to scan its successors and
+                    // then revisit it.
+                    self.nodes[ebb].rpo_number = DONE;
+                    self.stack.push(ebb);
+                    self.push_successors(func, ebb);
                 }
-            } else {
-                self.postorder.push(ebb);
+                DONE => {
+                    // This is the second time we pop the EBB, so all successors have been
+                    // processed.
+                    self.postorder.push(ebb);
+                }
+                _ => unreachable!(),
+            }
+        }
+    }
+
+    /// Push `ebb` successors onto `self.stack`, filtering out those that have already been seen.
+    ///
+    /// The successors are pushed in program order which is important to get a split-invariant
+    /// post-order. Split-invariant means that if an EBB is split in two, we get the same
+    /// post-order except for the insertion of the new EBB header at the split point.
+    fn push_successors(&mut self, func: &Function, ebb: Ebb) {
+        for inst in func.layout.ebb_insts(ebb) {
+            match func.dfg[inst].analyze_branch(&func.dfg.value_lists) {
+                BranchInfo::SingleDest(succ, _) => {
+                    if self.nodes[succ].rpo_number == 0 {
+                        self.nodes[succ].rpo_number = SEEN;
+                        self.stack.push(succ);
+                    }
+                }
+                BranchInfo::Table(jt) => {
+                    for (_, succ) in func.jump_tables[jt].entries() {
+                        if self.nodes[succ].rpo_number == 0 {
+                            self.nodes[succ].rpo_number = SEEN;
+                            self.stack.push(succ);
+                        }
+                    }
+                }
+                BranchInfo::NotABranch => {}
             }
         }
     }
@@ -458,6 +535,18 @@ mod test {
 
         let cfg = ControlFlowGraph::with_function(cur.func);
         let dt = DominatorTree::with_function(cur.func, &cfg);
+
+        // Fall-through-first, prune-at-source DFT:
+        //
+        // ebb0 {
+        //   brnz ebb2 {
+        //     trap
+        //     ebb2 {
+        //       return
+        //     } ebb2
+        // } ebb0
+        assert_eq!(dt.cfg_postorder(), &[ebb2, ebb0]);
+
         let v2_def = cur.func.dfg.value_def(v2).unwrap_inst();
         assert!(!dt.dominates(v2_def, ebb0, &cur.func.layout));
         assert!(!dt.dominates(ebb0, v2_def, &cur.func.layout));
@@ -466,11 +555,11 @@ mod test {
     #[test]
     fn non_zero_entry_block() {
         let mut func = Function::new();
-        let ebb3 = func.dfg.make_ebb();
-        let cond = func.dfg.append_ebb_param(ebb3, I32);
+        let ebb0 = func.dfg.make_ebb();
         let ebb1 = func.dfg.make_ebb();
         let ebb2 = func.dfg.make_ebb();
-        let ebb0 = func.dfg.make_ebb();
+        let ebb3 = func.dfg.make_ebb();
+        let cond = func.dfg.append_ebb_param(ebb3, I32);
 
         let mut cur = FuncCursor::new(&mut func);
 
@@ -489,6 +578,26 @@ mod test {
         let cfg = ControlFlowGraph::with_function(cur.func);
         let dt = DominatorTree::with_function(cur.func, &cfg);
 
+        // Fall-through-first, prune-at-source DFT:
+        //
+        // ebb3 {
+        //   ebb3:jump ebb1 {
+        //     ebb1 {
+        //       ebb1:brnz ebb0 {
+        //         ebb1:jump ebb2 {
+        //           ebb2 {
+        //             ebb2:jump ebb0 (seen)
+        //           } ebb2
+        //         } ebb1:jump ebb2
+        //         ebb0 {
+        //         } ebb0
+        //       } ebb1:brnz ebb0
+        //     } ebb1
+        //   } ebb3:jump ebb1
+        // } ebb3
+
+        assert_eq!(dt.cfg_postorder(), &[ebb2, ebb0, ebb1, ebb3]);
+
         assert_eq!(cur.func.layout.entry_block().unwrap(), ebb3);
         assert_eq!(dt.idom(ebb3), None);
         assert_eq!(dt.idom(ebb1).unwrap(), jmp_ebb3_ebb1);
@@ -509,8 +618,6 @@ mod test {
             dt.rpo_cmp(jmp_ebb3_ebb1, jmp_ebb1_ebb2, &cur.func.layout),
             Ordering::Less
         );
-
-        assert_eq!(dt.cfg_postorder(), &[ebb0, ebb2, ebb1, ebb3]);
     }
 
     #[test]
diff --git a/lib/cretonne/src/loop_analysis.rs b/lib/cretonne/src/loop_analysis.rs
index ad3239f5e5..b44bd77427 100644
--- a/lib/cretonne/src/loop_analysis.rs
+++ b/lib/cretonne/src/loop_analysis.rs
@@ -326,16 +326,16 @@ mod test {
         let loops = loop_analysis.loops().collect::<Vec<Loop>>();
         assert_eq!(loops.len(), 3);
         assert_eq!(loop_analysis.loop_header(loops[0]), ebb0);
-        assert_eq!(loop_analysis.loop_header(loops[1]), ebb3);
-        assert_eq!(loop_analysis.loop_header(loops[2]), ebb1);
+        assert_eq!(loop_analysis.loop_header(loops[1]), ebb1);
+        assert_eq!(loop_analysis.loop_header(loops[2]), ebb3);
         assert_eq!(loop_analysis.loop_parent(loops[1]), Some(loops[0]));
         assert_eq!(loop_analysis.loop_parent(loops[2]), Some(loops[0]));
         assert_eq!(loop_analysis.loop_parent(loops[0]), None);
         assert_eq!(loop_analysis.is_in_loop(ebb0, loops[0]), true);
-        assert_eq!(loop_analysis.is_in_loop(ebb3, loops[1]), true);
-        assert_eq!(loop_analysis.is_in_loop(ebb4, loops[1]), true);
-        assert_eq!(loop_analysis.is_in_loop(ebb1, loops[2]), true);
-        assert_eq!(loop_analysis.is_in_loop(ebb2, loops[2]), true);
+        assert_eq!(loop_analysis.is_in_loop(ebb1, loops[1]), true);
+        assert_eq!(loop_analysis.is_in_loop(ebb2, loops[1]), true);
+        assert_eq!(loop_analysis.is_in_loop(ebb3, loops[2]), true);
+        assert_eq!(loop_analysis.is_in_loop(ebb4, loops[2]), true);
         assert_eq!(loop_analysis.is_in_loop(ebb5, loops[0]), true);
     }
 }