Avoid the CFG get_successors() when computing a post-order.

The control flow graph does not guarantee any particular ordering for
its successor lists, and the post-order we are computing for building
the dominator tree needs to be "split-invariant".

See #146 for details.

- Discover EBB successors directly from the EBB instruction sequence to
  guarantee that the post-order we compute is canonical/split-invariant.
- Use an alternative graph DFS algorithm which doesn't require indexing
  into a slice of successors.

This changes cfg_postorder in some cases because the edge pruning when
converting the (DAG) CFG to a tree for the DFT is different.
This commit is contained in:
Jakob Stoklund Olesen
2017-11-21 09:45:52 -08:00
parent 2e0b931590
commit cf45afa1e7
3 changed files with 206 additions and 31 deletions

View File

@@ -26,6 +26,30 @@ fn test_reverse_postorder_traversal(function_source: &str, ebb_order: Vec<u32>)
#[test] #[test]
fn simple_traversal() { fn simple_traversal() {
// Fall-through-first, prune-at-source DFT:
//
// ebb0 {
// ebb0:brz v0, ebb1 {
// ebb0:jump ebb2 {
// ebb2 {
// ebb2:brz v2, ebb2 -
// ebb2:brz v3, ebb1 -
// ebb2:brz v4, ebb4 {
// ebb2: jump ebb5 {
// ebb5 {}
// }
// ebb4 {}
// }
// } ebb2
// }
// ebb1 {
// ebb1:jump ebb3 {
// ebb3 {}
// }
// } ebb1
// }
// } ebb0
test_reverse_postorder_traversal( test_reverse_postorder_traversal(
" "
function %test(i32) native { function %test(i32) native {
@@ -51,12 +75,28 @@ fn simple_traversal() {
trap user0 trap user0
} }
", ",
vec![0, 2, 5, 4, 1, 3], vec![0, 1, 3, 2, 4, 5],
); );
} }
#[test] #[test]
fn loops_one() { fn loops_one() {
// Fall-through-first, prune-at-source DFT:
// ebb0 {
// ebb0:jump ebb1 {
// ebb1 {
// ebb1:brnz v0, ebb3 {
// ebb1:jump ebb2 {
// ebb2 {
// ebb2:jump ebb3 -
// } ebb2
// }
// ebb3 {}
// }
// } ebb1
// }
// } ebb0
test_reverse_postorder_traversal( test_reverse_postorder_traversal(
" "
function %test(i32) native { function %test(i32) native {
@@ -71,12 +111,40 @@ fn loops_one() {
return return
} }
", ",
vec![0, 1, 2, 3], vec![0, 1, 3, 2],
); );
} }
#[test] #[test]
fn loops_two() { fn loops_two() {
// Fall-through-first, prune-at-source DFT:
// ebb0 {
// ebb0:brz v0, ebb1 {
// ebb0:jump ebb2 {
// ebb2 {
// ebb2:brz v0, ebb4 {
// ebb2:jump ebb5 {
// ebb5 {
// brz v0, ebb4 -
// } ebb5
// }
// ebb4 {
// ebb4:brz v0, ebb3 {
// ebb4:jump ebb5 -
// ebb3 {
// ebb3:jump ebb4 -
// } ebb3
// }
// } ebb4
// }
// } ebb2
// }
// ebb1 {
// ebb1:jump ebb3 -
// } ebb1
// }
// } ebb0
test_reverse_postorder_traversal( test_reverse_postorder_traversal(
" "
function %test(i32) native { function %test(i32) native {
@@ -98,7 +166,7 @@ fn loops_two() {
return return
} }
", ",
vec![0, 2, 1, 3, 4, 5], vec![0, 1, 2, 4, 3, 5],
); );
} }
@@ -130,7 +198,7 @@ fn loops_three() {
return return
} }
", ",
vec![0, 2, 1, 3, 4, 6, 7, 5], vec![0, 1, 2, 4, 3, 6, 7, 5],
); );
} }

View File

@@ -3,6 +3,7 @@
use entity::EntityMap; use entity::EntityMap;
use flowgraph::{ControlFlowGraph, BasicBlock}; use flowgraph::{ControlFlowGraph, BasicBlock};
use ir::{Ebb, Inst, Function, Layout, ProgramOrder, ExpandedProgramPoint}; use ir::{Ebb, Inst, Function, Layout, ProgramOrder, ExpandedProgramPoint};
use ir::instructions::BranchInfo;
use packed_option::PackedOption; use packed_option::PackedOption;
use std::cmp::Ordering; use std::cmp::Ordering;
@@ -11,6 +12,10 @@ use std::cmp::Ordering;
// room for modifications of the dominator tree. // room for modifications of the dominator tree.
const STRIDE: u32 = 4; const STRIDE: u32 = 4;
// Special RPO numbers used during `compute_postorder`.
const DONE: u32 = 1;
const SEEN: u32 = 2;
// Dominator tree node. We keep one of these per EBB. // Dominator tree node. We keep one of these per EBB.
#[derive(Clone, Default)] #[derive(Clone, Default)]
struct DomNode { struct DomNode {
@@ -36,7 +41,7 @@ pub struct DominatorTree {
postorder: Vec<Ebb>, postorder: Vec<Ebb>,
// Scratch memory used by `compute_postorder()`. // Scratch memory used by `compute_postorder()`.
stack: Vec<(Ebb, usize)>, stack: Vec<Ebb>,
valid: bool, valid: bool,
} }
@@ -223,7 +228,7 @@ impl DominatorTree {
/// Reset and compute a CFG post-order and dominator tree. /// Reset and compute a CFG post-order and dominator tree.
pub fn compute(&mut self, func: &Function, cfg: &ControlFlowGraph) { pub fn compute(&mut self, func: &Function, cfg: &ControlFlowGraph) {
debug_assert!(cfg.is_valid()); debug_assert!(cfg.is_valid());
self.compute_postorder(func, cfg); self.compute_postorder(func);
self.compute_domtree(func, cfg); self.compute_domtree(func, cfg);
self.valid = true; self.valid = true;
} }
@@ -246,37 +251,109 @@ impl DominatorTree {
self.valid self.valid
} }
/// Reset all internal data structures and compute a post-order for `cfg`. /// Reset all internal data structures and compute a post-order of the control flow graph.
/// ///
/// This leaves `rpo_number == 1` for all reachable EBBs, 0 for unreachable ones. /// This leaves `rpo_number == 1` for all reachable EBBs, 0 for unreachable ones.
fn compute_postorder(&mut self, func: &Function, cfg: &ControlFlowGraph) { fn compute_postorder(&mut self, func: &Function) {
self.clear(); self.clear();
self.nodes.resize(func.dfg.num_ebbs()); self.nodes.resize(func.dfg.num_ebbs());
// This algorithm is a depth first traversal (DFT) of the control flow graph, computing a
// post-order of the EBBs that are reachable form the entry block. A DFT post-order is not
// unique. The specific order we get is controlled by two factors:
//
// 1. The order each node's children are visited, and
// 2. The method used for pruning graph edges to get a tree.
//
// There are two ways of viewing the CFG as a graph:
//
// 1. Each EBB is a node, with outgoing edges for all the branches in the EBB>
// 2. Each basic block is a node, with outgoing edges for the single branch at the end of
// the BB. (An EBB is a linear sequence of basic blocks).
//
// The first graph is a contraction of the second one. We want to compute an EBB post-order
// that is compatible both graph interpretations. That is, if you compute a BB post-order
// and then remove those BBs that do not correspond to EBB headers, you get a post-order of
// the EBB graph.
//
// Node child order:
//
// In the BB graph, we always go down the fall-through path first and follow the branch
// destination second.
//
// In the EBB graph, this is equivalent to visiting EBB successors in a bottom-up
// order, starting from the destination of the EBB's terminating jump, ending at the
// destination of the first branch in the EBB.
//
// Edge pruning:
//
// In the BB graph, we keep an edge to an EBB the first time we visit the *source* side
// of the edge. Any subsequent edges to the same EBB are pruned.
//
// The equivalent tree is reached in the EBB graph by keeping the first edge to an EBB
// in a top-down traversal of the successors. (And then visiting edges in a bottom-up
// order).
//
// This pruning method makes it possible to compute the DFT without storing lots of
// information about the progress through an EBB.
// During this algorithm only, use `rpo_number` to hold the following state: // During this algorithm only, use `rpo_number` to hold the following state:
// //
// 0: EBB is not yet on the stack. // 0: EBB has not yet been reached in the pre-order.
// 1: EBB is on the stack or in postorder. // SEEN: EBB has been pushed on the stack but successors not yet pushed.
const SEEN: u32 = 1; // DONE: Successors pushed.
match func.layout.entry_block() { match func.layout.entry_block() {
Some(ebb) => { Some(ebb) => {
self.stack.push((ebb, 0)); self.stack.push(ebb);
self.nodes[ebb].rpo_number = SEEN; self.nodes[ebb].rpo_number = SEEN;
} }
None => return, None => return,
} }
while let Some((ebb, succ_index)) = self.stack.pop() { while let Some(ebb) = self.stack.pop() {
if let Some(&succ) = cfg.get_successors(ebb).get(succ_index) { match self.nodes[ebb].rpo_number {
self.stack.push((ebb, succ_index + 1)); SEEN => {
if self.nodes[succ].rpo_number == 0 { // This is the first time we pop the EBB, so we need to scan its successors and
self.stack.push((succ, 0)); // then revisit it.
self.nodes[succ].rpo_number = SEEN; self.nodes[ebb].rpo_number = DONE;
self.stack.push(ebb);
self.push_successors(func, ebb);
} }
} else { DONE => {
// This is the second time we pop the EBB, so all successors have been
// processed.
self.postorder.push(ebb); self.postorder.push(ebb);
} }
_ => unreachable!(),
}
}
}
/// Push `ebb` successors onto `self.stack`, filtering out those that have already been seen.
///
/// The successors are pushed in program order which is important to get a split-invariant
/// post-order. Split-invariant means that if an EBB is split in two, we get the same
/// post-order except for the insertion of the new EBB header at the split point.
fn push_successors(&mut self, func: &Function, ebb: Ebb) {
for inst in func.layout.ebb_insts(ebb) {
match func.dfg[inst].analyze_branch(&func.dfg.value_lists) {
BranchInfo::SingleDest(succ, _) => {
if self.nodes[succ].rpo_number == 0 {
self.nodes[succ].rpo_number = SEEN;
self.stack.push(succ);
}
}
BranchInfo::Table(jt) => {
for (_, succ) in func.jump_tables[jt].entries() {
if self.nodes[succ].rpo_number == 0 {
self.nodes[succ].rpo_number = SEEN;
self.stack.push(succ);
}
}
}
BranchInfo::NotABranch => {}
}
} }
} }
@@ -458,6 +535,18 @@ mod test {
let cfg = ControlFlowGraph::with_function(cur.func); let cfg = ControlFlowGraph::with_function(cur.func);
let dt = DominatorTree::with_function(cur.func, &cfg); let dt = DominatorTree::with_function(cur.func, &cfg);
// Fall-through-first, prune-at-source DFT:
//
// ebb0 {
// brnz ebb2 {
// trap
// ebb2 {
// return
// } ebb2
// } ebb0
assert_eq!(dt.cfg_postorder(), &[ebb2, ebb0]);
let v2_def = cur.func.dfg.value_def(v2).unwrap_inst(); let v2_def = cur.func.dfg.value_def(v2).unwrap_inst();
assert!(!dt.dominates(v2_def, ebb0, &cur.func.layout)); assert!(!dt.dominates(v2_def, ebb0, &cur.func.layout));
assert!(!dt.dominates(ebb0, v2_def, &cur.func.layout)); assert!(!dt.dominates(ebb0, v2_def, &cur.func.layout));
@@ -466,11 +555,11 @@ mod test {
#[test] #[test]
fn non_zero_entry_block() { fn non_zero_entry_block() {
let mut func = Function::new(); let mut func = Function::new();
let ebb3 = func.dfg.make_ebb(); let ebb0 = func.dfg.make_ebb();
let cond = func.dfg.append_ebb_param(ebb3, I32);
let ebb1 = func.dfg.make_ebb(); let ebb1 = func.dfg.make_ebb();
let ebb2 = func.dfg.make_ebb(); let ebb2 = func.dfg.make_ebb();
let ebb0 = func.dfg.make_ebb(); let ebb3 = func.dfg.make_ebb();
let cond = func.dfg.append_ebb_param(ebb3, I32);
let mut cur = FuncCursor::new(&mut func); let mut cur = FuncCursor::new(&mut func);
@@ -489,6 +578,26 @@ mod test {
let cfg = ControlFlowGraph::with_function(cur.func); let cfg = ControlFlowGraph::with_function(cur.func);
let dt = DominatorTree::with_function(cur.func, &cfg); let dt = DominatorTree::with_function(cur.func, &cfg);
// Fall-through-first, prune-at-source DFT:
//
// ebb3 {
// ebb3:jump ebb1 {
// ebb1 {
// ebb1:brnz ebb0 {
// ebb1:jump ebb2 {
// ebb2 {
// ebb2:jump ebb0 (seen)
// } ebb2
// } ebb1:jump ebb2
// ebb0 {
// } ebb0
// } ebb1:brnz ebb0
// } ebb1
// } ebb3:jump ebb1
// } ebb3
assert_eq!(dt.cfg_postorder(), &[ebb2, ebb0, ebb1, ebb3]);
assert_eq!(cur.func.layout.entry_block().unwrap(), ebb3); assert_eq!(cur.func.layout.entry_block().unwrap(), ebb3);
assert_eq!(dt.idom(ebb3), None); assert_eq!(dt.idom(ebb3), None);
assert_eq!(dt.idom(ebb1).unwrap(), jmp_ebb3_ebb1); assert_eq!(dt.idom(ebb1).unwrap(), jmp_ebb3_ebb1);
@@ -509,8 +618,6 @@ mod test {
dt.rpo_cmp(jmp_ebb3_ebb1, jmp_ebb1_ebb2, &cur.func.layout), dt.rpo_cmp(jmp_ebb3_ebb1, jmp_ebb1_ebb2, &cur.func.layout),
Ordering::Less Ordering::Less
); );
assert_eq!(dt.cfg_postorder(), &[ebb0, ebb2, ebb1, ebb3]);
} }
#[test] #[test]

View File

@@ -326,16 +326,16 @@ mod test {
let loops = loop_analysis.loops().collect::<Vec<Loop>>(); let loops = loop_analysis.loops().collect::<Vec<Loop>>();
assert_eq!(loops.len(), 3); assert_eq!(loops.len(), 3);
assert_eq!(loop_analysis.loop_header(loops[0]), ebb0); assert_eq!(loop_analysis.loop_header(loops[0]), ebb0);
assert_eq!(loop_analysis.loop_header(loops[1]), ebb3); assert_eq!(loop_analysis.loop_header(loops[1]), ebb1);
assert_eq!(loop_analysis.loop_header(loops[2]), ebb1); assert_eq!(loop_analysis.loop_header(loops[2]), ebb3);
assert_eq!(loop_analysis.loop_parent(loops[1]), Some(loops[0])); assert_eq!(loop_analysis.loop_parent(loops[1]), Some(loops[0]));
assert_eq!(loop_analysis.loop_parent(loops[2]), Some(loops[0])); assert_eq!(loop_analysis.loop_parent(loops[2]), Some(loops[0]));
assert_eq!(loop_analysis.loop_parent(loops[0]), None); assert_eq!(loop_analysis.loop_parent(loops[0]), None);
assert_eq!(loop_analysis.is_in_loop(ebb0, loops[0]), true); assert_eq!(loop_analysis.is_in_loop(ebb0, loops[0]), true);
assert_eq!(loop_analysis.is_in_loop(ebb3, loops[1]), true); assert_eq!(loop_analysis.is_in_loop(ebb1, loops[1]), true);
assert_eq!(loop_analysis.is_in_loop(ebb4, loops[1]), true); assert_eq!(loop_analysis.is_in_loop(ebb2, loops[1]), true);
assert_eq!(loop_analysis.is_in_loop(ebb1, loops[2]), true); assert_eq!(loop_analysis.is_in_loop(ebb3, loops[2]), true);
assert_eq!(loop_analysis.is_in_loop(ebb2, loops[2]), true); assert_eq!(loop_analysis.is_in_loop(ebb4, loops[2]), true);
assert_eq!(loop_analysis.is_in_loop(ebb5, loops[0]), true); assert_eq!(loop_analysis.is_in_loop(ebb5, loops[0]), true);
} }
} }