Avoid the CFG get_successors() when computing a post-order.

The control flow graph does not guarantee any particular ordering for its successor lists, and the post-order we are computing for building the dominator tree needs to be "split-invariant". See #146 for details. - Discover EBB successors directly from the EBB instruction sequence to guarantee that the post-order we compute is canonical/split-invariant. - Use an alternative graph DFS algorithm which doesn't require indexing into a slice of successors. This changes cfg_postorder in some cases because the edge pruning when converting the (DAG) CFG to a tree for the DFT is different.
2017-11-21 09:45:52 -08:00
parent 2e0b931590
commit cf45afa1e7
3 changed files with 206 additions and 31 deletions
--- a/cranelift/tests/cfg_traversal.rs
+++ b/cranelift/tests/cfg_traversal.rs
@@ -26,6 +26,30 @@ fn test_reverse_postorder_traversal(function_source: &str, ebb_order: Vec<u32>)
 #[test]
 fn simple_traversal() {
    // Fall-through-first, prune-at-source DFT:
    //
    //  ebb0 {
    //      ebb0:brz v0, ebb1 {
    //          ebb0:jump ebb2 {
    //              ebb2 {
    //                  ebb2:brz v2, ebb2 -
    //                  ebb2:brz v3, ebb1 -
    //                  ebb2:brz v4, ebb4 {
    //                      ebb2: jump ebb5 {
    //                          ebb5 {}
    //                      }
    //                      ebb4 {}
    //                  }
    //              } ebb2
    //          }
    //          ebb1 {
    //              ebb1:jump ebb3 {
    //                  ebb3 {}
    //              }
    //          } ebb1
    //      }
    //  } ebb0
    test_reverse_postorder_traversal(
        "
        function %test(i32) native {
@@ -51,12 +75,28 @@ fn simple_traversal() {
                trap user0
        }
    ",
-        vec![0, 2, 5, 4, 1, 3],
+        vec![0, 1, 3, 2, 4, 5],
    );
 }
 #[test]
 fn loops_one() {
    // Fall-through-first, prune-at-source DFT:
    // ebb0 {
    //     ebb0:jump ebb1 {
    //         ebb1 {
    //             ebb1:brnz v0, ebb3 {
    //                 ebb1:jump ebb2 {
    //                     ebb2 {
    //                         ebb2:jump ebb3 -
    //                     } ebb2
    //                 }
    //                 ebb3 {}
    //             }
    //         } ebb1
    //     }
    // } ebb0
    test_reverse_postorder_traversal(
        "
        function %test(i32) native {
@@ -71,12 +111,40 @@ fn loops_one() {
                return
        }
    ",
-        vec![0, 1, 2, 3],
+        vec![0, 1, 3, 2],
    );
 }
 #[test]
 fn loops_two() {
    // Fall-through-first, prune-at-source DFT:
    // ebb0 {
    //     ebb0:brz v0, ebb1 {
    //         ebb0:jump ebb2 {
    //             ebb2 {
    //                 ebb2:brz v0, ebb4 {
    //                     ebb2:jump ebb5 {
    //                         ebb5 {
    //                             brz v0, ebb4 -
    //                         } ebb5
    //                     }
    //                     ebb4 {
    //                         ebb4:brz v0, ebb3 {
    //                             ebb4:jump ebb5 -
    //                             ebb3 {
    //                                 ebb3:jump ebb4 -
    //                             } ebb3
    //                         }
    //                     } ebb4
    //                 }
    //             } ebb2
    //         }
    //		   ebb1 {
    //             ebb1:jump ebb3 -
    //		   } ebb1
    //     }
    // } ebb0
    test_reverse_postorder_traversal(
        "
        function %test(i32) native {
@@ -98,7 +166,7 @@ fn loops_two() {
                return
        }
    ",
-        vec![0, 2, 1, 3, 4, 5],
+        vec![0, 1, 2, 4, 3, 5],
    );
 }
@@ -130,7 +198,7 @@ fn loops_three() {
                return
        }
    ",
-        vec![0, 2, 1, 3, 4, 6, 7, 5],
+        vec![0, 1, 2, 4, 3, 6, 7, 5],
    );
 }
--- a/lib/cretonne/src/dominator_tree.rs
+++ b/lib/cretonne/src/dominator_tree.rs
@@ -3,6 +3,7 @@
 use entity::EntityMap;
 use flowgraph::{ControlFlowGraph, BasicBlock};
 use ir::{Ebb, Inst, Function, Layout, ProgramOrder, ExpandedProgramPoint};
 use ir::instructions::BranchInfo;
 use packed_option::PackedOption;
 use std::cmp::Ordering;
@@ -11,6 +12,10 @@ use std::cmp::Ordering;
 // room for modifications of the dominator tree.
 const STRIDE: u32 = 4;
 // Special RPO numbers used during `compute_postorder`.
 const DONE: u32 = 1;
 const SEEN: u32 = 2;
 // Dominator tree node. We keep one of these per EBB.
 #[derive(Clone, Default)]
 struct DomNode {
@@ -36,7 +41,7 @@ pub struct DominatorTree {
    postorder: Vec<Ebb>,
    // Scratch memory used by `compute_postorder()`.
-    stack: Vec<(Ebb, usize)>,
+    stack: Vec<Ebb>,
    valid: bool,
 }
@@ -223,7 +228,7 @@ impl DominatorTree {
    /// Reset and compute a CFG post-order and dominator tree.
    pub fn compute(&mut self, func: &Function, cfg: &ControlFlowGraph) {
        debug_assert!(cfg.is_valid());
-        self.compute_postorder(func, cfg);
+        self.compute_postorder(func);
        self.compute_domtree(func, cfg);
        self.valid = true;
    }
@@ -246,36 +251,108 @@ impl DominatorTree {
        self.valid
    }
-    /// Reset all internal data structures and compute a post-order for `cfg`.
+    /// Reset all internal data structures and compute a post-order of the control flow graph.
    ///
    /// This leaves `rpo_number == 1` for all reachable EBBs, 0 for unreachable ones.
-    fn compute_postorder(&mut self, func: &Function, cfg: &ControlFlowGraph) {
+    fn compute_postorder(&mut self, func: &Function) {
        self.clear();
        self.nodes.resize(func.dfg.num_ebbs());
        // This algorithm is a depth first traversal (DFT) of the control flow graph, computing a
        // post-order of the EBBs that are reachable form the entry block. A DFT post-order is not
        // unique. The specific order we get is controlled by two factors:
        //
        // 1. The order each node's children are visited, and
        // 2. The method used for pruning graph edges to get a tree.
        //
        // There are two ways of viewing the CFG as a graph:
        //
        // 1. Each EBB is a node, with outgoing edges for all the branches in the EBB>
        // 2. Each basic block is a node, with outgoing edges for the single branch at the end of
        //    the BB. (An EBB is a linear sequence of basic blocks).
        //
        // The first graph is a contraction of the second one. We want to compute an EBB post-order
        // that is compatible both graph interpretations. That is, if you compute a BB post-order
        // and then remove those BBs that do not correspond to EBB headers, you get a post-order of
        // the EBB graph.
        //
        // Node child order:
        //
        //     In the BB graph, we always go down the fall-through path first and follow the branch
        //     destination second.
        //
        //     In the EBB graph, this is equivalent to visiting EBB successors in a bottom-up
        //     order, starting from the destination of the EBB's terminating jump, ending at the
        //     destination of the first branch in the EBB.
        //
        // Edge pruning:
        //
        //     In the BB graph, we keep an edge to an EBB the first time we visit the *source* side
        //     of the edge. Any subsequent edges to the same EBB are pruned.
        //
        //     The equivalent tree is reached in the EBB graph by keeping the first edge to an EBB
        //     in a top-down traversal of the successors. (And then visiting edges in a bottom-up
        //     order).
        //
        // This pruning method makes it possible to compute the DFT without storing lots of
        // information about the progress through an EBB.
        // During this algorithm only, use `rpo_number` to hold the following state:
        //
-        // 0: EBB is not yet on the stack.
+        //   0:    EBB has not yet been reached in the pre-order.
-        // 1: EBB is on the stack or in postorder.
+        //   SEEN: EBB has been pushed on the stack but successors not yet pushed.
-        const SEEN: u32 = 1;
+        //   DONE: Successors pushed.
        match func.layout.entry_block() {
            Some(ebb) => {
-                self.stack.push((ebb, 0));
+                self.stack.push(ebb);
                self.nodes[ebb].rpo_number = SEEN;
            }
            None => return,
        }
-        while let Some((ebb, succ_index)) = self.stack.pop() {
+        while let Some(ebb) = self.stack.pop() {
-            if let Some(&succ) = cfg.get_successors(ebb).get(succ_index) {
+            match self.nodes[ebb].rpo_number {
-                self.stack.push((ebb, succ_index + 1));
+                SEEN => {
-                if self.nodes[succ].rpo_number == 0 {
+                    // This is the first time we pop the EBB, so we need to scan its successors and
-                    self.stack.push((succ, 0));
+                    // then revisit it.
-                    self.nodes[succ].rpo_number = SEEN;
+                    self.nodes[ebb].rpo_number = DONE;
                    self.stack.push(ebb);
                    self.push_successors(func, ebb);
                }
-            } else {
+                DONE => {
-                self.postorder.push(ebb);
+                    // This is the second time we pop the EBB, so all successors have been
                    // processed.
                    self.postorder.push(ebb);
                }
                _ => unreachable!(),
            }
        }
    }
    /// Push `ebb` successors onto `self.stack`, filtering out those that have already been seen.
    ///
    /// The successors are pushed in program order which is important to get a split-invariant
    /// post-order. Split-invariant means that if an EBB is split in two, we get the same
    /// post-order except for the insertion of the new EBB header at the split point.
    fn push_successors(&mut self, func: &Function, ebb: Ebb) {
        for inst in func.layout.ebb_insts(ebb) {
            match func.dfg[inst].analyze_branch(&func.dfg.value_lists) {
                BranchInfo::SingleDest(succ, _) => {
                    if self.nodes[succ].rpo_number == 0 {
                        self.nodes[succ].rpo_number = SEEN;
                        self.stack.push(succ);
                    }
                }
                BranchInfo::Table(jt) => {
                    for (_, succ) in func.jump_tables[jt].entries() {
                        if self.nodes[succ].rpo_number == 0 {
                            self.nodes[succ].rpo_number = SEEN;
                            self.stack.push(succ);
                        }
                    }
                }
                BranchInfo::NotABranch => {}
            }
        }
    }
@@ -458,6 +535,18 @@ mod test {
        let cfg = ControlFlowGraph::with_function(cur.func);
        let dt = DominatorTree::with_function(cur.func, &cfg);
        // Fall-through-first, prune-at-source DFT:
        //
        // ebb0 {
        //   brnz ebb2 {
        //     trap
        //     ebb2 {
        //       return
        //     } ebb2
        // } ebb0
        assert_eq!(dt.cfg_postorder(), &[ebb2, ebb0]);
        let v2_def = cur.func.dfg.value_def(v2).unwrap_inst();
        assert!(!dt.dominates(v2_def, ebb0, &cur.func.layout));
        assert!(!dt.dominates(ebb0, v2_def, &cur.func.layout));
@@ -466,11 +555,11 @@ mod test {
    #[test]
    fn non_zero_entry_block() {
        let mut func = Function::new();
-        let ebb3 = func.dfg.make_ebb();
+        let ebb0 = func.dfg.make_ebb();
        let cond = func.dfg.append_ebb_param(ebb3, I32);
        let ebb1 = func.dfg.make_ebb();
        let ebb2 = func.dfg.make_ebb();
-        let ebb0 = func.dfg.make_ebb();
+        let ebb3 = func.dfg.make_ebb();
        let cond = func.dfg.append_ebb_param(ebb3, I32);
        let mut cur = FuncCursor::new(&mut func);
@@ -489,6 +578,26 @@ mod test {
        let cfg = ControlFlowGraph::with_function(cur.func);
        let dt = DominatorTree::with_function(cur.func, &cfg);
        // Fall-through-first, prune-at-source DFT:
        //
        // ebb3 {
        //   ebb3:jump ebb1 {
        //     ebb1 {
        //       ebb1:brnz ebb0 {
        //         ebb1:jump ebb2 {
        //           ebb2 {
        //             ebb2:jump ebb0 (seen)
        //           } ebb2
        //         } ebb1:jump ebb2
        //         ebb0 {
        //         } ebb0
        //       } ebb1:brnz ebb0
        //     } ebb1
        //   } ebb3:jump ebb1
        // } ebb3
        assert_eq!(dt.cfg_postorder(), &[ebb2, ebb0, ebb1, ebb3]);
        assert_eq!(cur.func.layout.entry_block().unwrap(), ebb3);
        assert_eq!(dt.idom(ebb3), None);
        assert_eq!(dt.idom(ebb1).unwrap(), jmp_ebb3_ebb1);
@@ -509,8 +618,6 @@ mod test {
            dt.rpo_cmp(jmp_ebb3_ebb1, jmp_ebb1_ebb2, &cur.func.layout),
            Ordering::Less
        );
        assert_eq!(dt.cfg_postorder(), &[ebb0, ebb2, ebb1, ebb3]);
    }
    #[test]
--- a/lib/cretonne/src/loop_analysis.rs
+++ b/lib/cretonne/src/loop_analysis.rs
@@ -326,16 +326,16 @@ mod test {
        let loops = loop_analysis.loops().collect::<Vec<Loop>>();
        assert_eq!(loops.len(), 3);
        assert_eq!(loop_analysis.loop_header(loops[0]), ebb0);
-        assert_eq!(loop_analysis.loop_header(loops[1]), ebb3);
+        assert_eq!(loop_analysis.loop_header(loops[1]), ebb1);
-        assert_eq!(loop_analysis.loop_header(loops[2]), ebb1);
+        assert_eq!(loop_analysis.loop_header(loops[2]), ebb3);
        assert_eq!(loop_analysis.loop_parent(loops[1]), Some(loops[0]));
        assert_eq!(loop_analysis.loop_parent(loops[2]), Some(loops[0]));
        assert_eq!(loop_analysis.loop_parent(loops[0]), None);
        assert_eq!(loop_analysis.is_in_loop(ebb0, loops[0]), true);
-        assert_eq!(loop_analysis.is_in_loop(ebb3, loops[1]), true);
+        assert_eq!(loop_analysis.is_in_loop(ebb1, loops[1]), true);
-        assert_eq!(loop_analysis.is_in_loop(ebb4, loops[1]), true);
+        assert_eq!(loop_analysis.is_in_loop(ebb2, loops[1]), true);
-        assert_eq!(loop_analysis.is_in_loop(ebb1, loops[2]), true);
+        assert_eq!(loop_analysis.is_in_loop(ebb3, loops[2]), true);
-        assert_eq!(loop_analysis.is_in_loop(ebb2, loops[2]), true);
+        assert_eq!(loop_analysis.is_in_loop(ebb4, loops[2]), true);
        assert_eq!(loop_analysis.is_in_loop(ebb5, loops[0]), true);
    }
 }