* Rework the blockorder module to reuse the dom tree's cfg postorder * Update domtree tests * Treat br_table with an empty jump table as multiple block exits * Bless tests * Change branch_idx to succ_idx and fix the comment
455 lines
17 KiB
Rust
455 lines
17 KiB
Rust
//! Computation of basic block order in emitted code.
|
|
//!
|
|
//! This module handles the translation from CLIF BBs to VCode BBs.
|
|
//!
|
|
//! The basic idea is that we compute a sequence of "lowered blocks" that
|
|
//! correspond to one or more blocks in the graph: (CLIF CFG) `union` (implicit
|
|
//! block on *every* edge). Conceptually, the lowering pipeline wants to insert
|
|
//! moves for phi-nodes on every block-to-block transfer; these blocks always
|
|
//! conceptually exist, but may be merged with an "original" CLIF block (and
|
|
//! hence not actually exist; this is equivalent to inserting the blocks only on
|
|
//! critical edges).
|
|
//!
|
|
//! In other words, starting from a CFG like this (where each "CLIF block" and
|
|
//! "(edge N->M)" is a separate basic block):
|
|
//!
|
|
//! ```plain
|
|
//!
|
|
//! CLIF block 0
|
|
//! / \
|
|
//! (edge 0->1) (edge 0->2)
|
|
//! | |
|
|
//! CLIF block 1 CLIF block 2
|
|
//! \ /
|
|
//! (edge 1->3) (edge 2->3)
|
|
//! \ /
|
|
//! CLIF block 3
|
|
//! ```
|
|
//!
|
|
//! We can produce a CFG of lowered blocks like so:
|
|
//!
|
|
//! ```plain
|
|
//! +--------------+
|
|
//! | CLIF block 0 |
|
|
//! +--------------+
|
|
//! / \
|
|
//! +--------------+ +--------------+
|
|
//! | (edge 0->1) | | (edge 0->2) |
|
|
//! | CLIF block 1 | | CLIF block 2 |
|
|
//! | (edge 1->3) | | (edge 2->3) |
|
|
//! +--------------+ +--------------+
|
|
//! \ /
|
|
//! \ /
|
|
//! +------------+
|
|
//! |CLIF block 3|
|
|
//! +------------+
|
|
//! ```
|
|
//!
|
|
//! Each `LoweredBlock` names just an original CLIF block, or just an edge block.
|
|
//!
|
|
//! To compute this lowering, we do a DFS over the CLIF-plus-edge-block graph
|
|
//! (never actually materialized, just defined by a "successors" function), and
|
|
//! compute the reverse postorder.
|
|
//!
|
|
//! This algorithm isn't perfect w.r.t. generated code quality: we don't, for
|
|
//! example, consider any information about whether edge blocks will actually
|
|
//! have content, because this computation happens as part of lowering *before*
|
|
//! regalloc, and regalloc may or may not insert moves/spills/reloads on any
|
|
//! particular edge. But it works relatively well and is conceptually simple.
|
|
//! Furthermore, the [MachBuffer] machine-code sink performs final peephole-like
|
|
//! branch editing that in practice elides empty blocks and simplifies some of
|
|
//! the other redundancies that this scheme produces.
|
|
|
|
use crate::dominator_tree::DominatorTree;
|
|
use crate::entity::SecondaryMap;
|
|
use crate::fx::{FxHashMap, FxHashSet};
|
|
use crate::inst_predicates::visit_block_succs;
|
|
use crate::ir::{Block, Function, Inst, Opcode};
|
|
use crate::{machinst::*, trace};
|
|
|
|
use smallvec::SmallVec;
|
|
|
|
/// Mapping from CLIF BBs to VCode BBs.
|
|
#[derive(Debug)]
|
|
pub struct BlockLoweringOrder {
|
|
/// Lowered blocks, in BlockIndex order. Each block is some combination of
|
|
/// (i) a CLIF block, and (ii) inserted crit-edge blocks before or after;
|
|
/// see [LoweredBlock] for details.
|
|
lowered_order: Vec<LoweredBlock>,
|
|
/// BlockIndex values for successors for all lowered blocks, indexing `lowered_order`.
|
|
lowered_succ_indices: Vec<BlockIndex>,
|
|
/// Ranges in `lowered_succ_indices` giving the successor lists for each lowered
|
|
/// block. Indexed by lowering-order index (`BlockIndex`).
|
|
lowered_succ_ranges: Vec<(Option<Inst>, std::ops::Range<usize>)>,
|
|
/// Cold blocks. These blocks are not reordered in the
|
|
/// `lowered_order` above; the lowered order must respect RPO
|
|
/// (uses after defs) in order for lowering to be
|
|
/// correct. Instead, this set is used to provide `is_cold()`,
|
|
/// which is used by VCode emission to sink the blocks at the last
|
|
/// moment (when we actually emit bytes into the MachBuffer).
|
|
cold_blocks: FxHashSet<BlockIndex>,
|
|
/// Lowered blocks that are indirect branch targets.
|
|
indirect_branch_targets: FxHashSet<BlockIndex>,
|
|
}
|
|
|
|
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
|
pub enum LoweredBlock {
|
|
/// Block in original CLIF.
|
|
Orig {
|
|
/// Original CLIF block.
|
|
block: Block,
|
|
},
|
|
|
|
/// Critical edge between two CLIF blocks.
|
|
CriticalEdge {
|
|
/// The predecessor block.
|
|
pred: Block,
|
|
|
|
/// The successor block.
|
|
succ: Block,
|
|
|
|
/// The index of this branch in the successor edges from `pred`, following the same
|
|
/// indexing order as `inst_predicates::visit_block_succs`. This is used to distinguish
|
|
/// multiple edges between the same CLIF blocks.
|
|
succ_idx: u32,
|
|
},
|
|
}
|
|
|
|
impl LoweredBlock {
|
|
/// Unwrap an `Orig` block.
|
|
pub fn orig_block(&self) -> Option<Block> {
|
|
match self {
|
|
&LoweredBlock::Orig { block } => Some(block),
|
|
&LoweredBlock::CriticalEdge { .. } => None,
|
|
}
|
|
}
|
|
|
|
/// The associated in-edge predecessor, if this is a critical edge.
|
|
#[cfg(test)]
|
|
pub fn in_edge(&self) -> Option<Block> {
|
|
match self {
|
|
&LoweredBlock::CriticalEdge { pred, .. } => Some(pred),
|
|
&LoweredBlock::Orig { .. } => None,
|
|
}
|
|
}
|
|
|
|
/// The associated out-edge successor, if this is a critical edge.
|
|
#[cfg(test)]
|
|
pub fn out_edge(&self) -> Option<Block> {
|
|
match self {
|
|
&LoweredBlock::CriticalEdge { succ, .. } => Some(succ),
|
|
&LoweredBlock::Orig { .. } => None,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl BlockLoweringOrder {
|
|
/// Compute and return a lowered block order for `f`.
|
|
pub fn new(f: &Function, domtree: &DominatorTree) -> BlockLoweringOrder {
|
|
trace!("BlockLoweringOrder: function body {:?}", f);
|
|
|
|
// Step 1: compute the in-edge and out-edge count of every block.
|
|
let mut block_in_count = SecondaryMap::with_default(0);
|
|
let mut block_out_count = SecondaryMap::with_default(0);
|
|
|
|
// Block successors are stored as `LoweredBlocks` to simplify the construction of
|
|
// `lowered_succs` in the final result. Initially, all entries are `Orig` values, and are
|
|
// updated to be `CriticalEdge` when those cases are identified in step 2 below.
|
|
let mut block_succs: SmallVec<[LoweredBlock; 128]> = SmallVec::new();
|
|
let mut block_succ_range = SecondaryMap::with_default(0..0);
|
|
|
|
let mut indirect_branch_target_clif_blocks = FxHashSet::default();
|
|
|
|
for block in f.layout.blocks() {
|
|
let start = block_succs.len();
|
|
visit_block_succs(f, block, |_, succ, from_table| {
|
|
block_out_count[block] += 1;
|
|
block_in_count[succ] += 1;
|
|
block_succs.push(LoweredBlock::Orig { block: succ });
|
|
|
|
if from_table {
|
|
indirect_branch_target_clif_blocks.insert(succ);
|
|
}
|
|
});
|
|
|
|
// Ensure that blocks terminated by br_table instructions with an empty jump table are
|
|
// still treated like conditional blocks from the point of view of critical edge
|
|
// splitting.
|
|
if let Some(inst) = f.layout.last_inst(block) {
|
|
if Opcode::BrTable == f.dfg.insts[inst].opcode() {
|
|
block_out_count[block] = block_out_count[block].max(2);
|
|
}
|
|
}
|
|
|
|
let end = block_succs.len();
|
|
block_succ_range[block] = start..end;
|
|
}
|
|
|
|
// Step 2: walk the postorder from the domtree in reverse to produce our desired node
|
|
// lowering order, identifying critical edges to split along the way.
|
|
|
|
let mut lb_to_bindex = FxHashMap::default();
|
|
let mut lowered_order = Vec::new();
|
|
|
|
for &block in domtree.cfg_postorder().iter().rev() {
|
|
let lb = LoweredBlock::Orig { block };
|
|
let bindex = BlockIndex::new(lowered_order.len());
|
|
lb_to_bindex.insert(lb.clone(), bindex);
|
|
lowered_order.push(lb);
|
|
|
|
if block_out_count[block] > 1 {
|
|
let range = block_succ_range[block].clone();
|
|
for (succ_ix, lb) in block_succs[range].iter_mut().enumerate() {
|
|
let succ = lb.orig_block().unwrap();
|
|
if block_in_count[succ] > 1 {
|
|
// Mutate the successor to be a critical edge, as `block` has multiple
|
|
// edges leaving it, and `succ` has multiple edges entering it.
|
|
*lb = LoweredBlock::CriticalEdge {
|
|
pred: block,
|
|
succ,
|
|
succ_idx: succ_ix as u32,
|
|
};
|
|
let bindex = BlockIndex::new(lowered_order.len());
|
|
lb_to_bindex.insert(*lb, bindex);
|
|
lowered_order.push(*lb);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Step 3: build the successor tables given the lowering order. We can't perform this step
|
|
// during the creation of `lowering_order`, as we need `lb_to_bindex` to be fully populated
|
|
// first.
|
|
let mut lowered_succ_indices = Vec::new();
|
|
let mut cold_blocks = FxHashSet::default();
|
|
let mut indirect_branch_targets = FxHashSet::default();
|
|
let lowered_succ_ranges =
|
|
Vec::from_iter(lowered_order.iter().enumerate().map(|(ix, lb)| {
|
|
let bindex = BlockIndex::new(ix);
|
|
let start = lowered_succ_indices.len();
|
|
let opt_inst = match lb {
|
|
// Block successors are pulled directly over, as they'll have been mutated when
|
|
// determining the block order already.
|
|
&LoweredBlock::Orig { block } => {
|
|
let range = block_succ_range[block].clone();
|
|
lowered_succ_indices
|
|
.extend(block_succs[range].iter().map(|lb| lb_to_bindex[lb]));
|
|
|
|
if f.layout.is_cold(block) {
|
|
cold_blocks.insert(bindex);
|
|
}
|
|
|
|
if indirect_branch_target_clif_blocks.contains(&block) {
|
|
indirect_branch_targets.insert(bindex);
|
|
}
|
|
|
|
let last = f.layout.last_inst(block).unwrap();
|
|
let opcode = f.dfg.insts[last].opcode();
|
|
|
|
assert!(opcode.is_terminator());
|
|
|
|
opcode.is_branch().then_some(last)
|
|
}
|
|
|
|
// Critical edges won't have successor information in block_succ_range, but
|
|
// they only have a single known successor to record anyway.
|
|
&LoweredBlock::CriticalEdge { succ, .. } => {
|
|
let succ_index = lb_to_bindex[&LoweredBlock::Orig { block: succ }];
|
|
lowered_succ_indices.push(succ_index);
|
|
|
|
// Edges inherit indirect branch and cold block metadata from their
|
|
// successor.
|
|
|
|
if f.layout.is_cold(succ) {
|
|
cold_blocks.insert(bindex);
|
|
}
|
|
|
|
if indirect_branch_target_clif_blocks.contains(&succ) {
|
|
indirect_branch_targets.insert(bindex);
|
|
}
|
|
|
|
None
|
|
}
|
|
};
|
|
let end = lowered_succ_indices.len();
|
|
(opt_inst, start..end)
|
|
}));
|
|
|
|
let result = BlockLoweringOrder {
|
|
lowered_order,
|
|
lowered_succ_indices,
|
|
lowered_succ_ranges,
|
|
cold_blocks,
|
|
indirect_branch_targets,
|
|
};
|
|
|
|
trace!("BlockLoweringOrder: {:#?}", result);
|
|
result
|
|
}
|
|
|
|
/// Get the lowered order of blocks.
|
|
pub fn lowered_order(&self) -> &[LoweredBlock] {
|
|
&self.lowered_order[..]
|
|
}
|
|
|
|
/// Get the successor indices for a lowered block.
|
|
pub fn succ_indices(&self, block: BlockIndex) -> (Option<Inst>, &[BlockIndex]) {
|
|
let (opt_inst, range) = &self.lowered_succ_ranges[block.index()];
|
|
(opt_inst.clone(), &self.lowered_succ_indices[range.clone()])
|
|
}
|
|
|
|
/// Determine whether the given lowered-block index is cold.
|
|
pub fn is_cold(&self, block: BlockIndex) -> bool {
|
|
self.cold_blocks.contains(&block)
|
|
}
|
|
|
|
/// Determine whether the given lowered block index is an indirect branch
|
|
/// target.
|
|
pub fn is_indirect_branch_target(&self, block: BlockIndex) -> bool {
|
|
self.indirect_branch_targets.contains(&block)
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod test {
|
|
use super::*;
|
|
use crate::cursor::{Cursor, FuncCursor};
|
|
use crate::flowgraph::ControlFlowGraph;
|
|
use crate::ir::types::*;
|
|
use crate::ir::UserFuncName;
|
|
use crate::ir::{AbiParam, Function, InstBuilder, Signature};
|
|
use crate::isa::CallConv;
|
|
|
|
fn build_test_func(n_blocks: usize, edges: &[(usize, usize)]) -> BlockLoweringOrder {
|
|
assert!(n_blocks > 0);
|
|
|
|
let name = UserFuncName::testcase("test0");
|
|
let mut sig = Signature::new(CallConv::SystemV);
|
|
sig.params.push(AbiParam::new(I32));
|
|
let mut func = Function::with_name_signature(name, sig);
|
|
let blocks = (0..n_blocks)
|
|
.map(|i| {
|
|
let bb = func.dfg.make_block();
|
|
assert!(bb.as_u32() == i as u32);
|
|
bb
|
|
})
|
|
.collect::<Vec<_>>();
|
|
|
|
let arg0 = func.dfg.append_block_param(blocks[0], I32);
|
|
|
|
let mut pos = FuncCursor::new(&mut func);
|
|
|
|
let mut edge = 0;
|
|
for i in 0..n_blocks {
|
|
pos.insert_block(blocks[i]);
|
|
let mut succs = vec![];
|
|
while edge < edges.len() && edges[edge].0 == i {
|
|
succs.push(edges[edge].1);
|
|
edge += 1;
|
|
}
|
|
if succs.len() == 0 {
|
|
pos.ins().return_(&[arg0]);
|
|
} else if succs.len() == 1 {
|
|
pos.ins().jump(blocks[succs[0]], &[]);
|
|
} else if succs.len() == 2 {
|
|
pos.ins()
|
|
.brif(arg0, blocks[succs[0]], &[], blocks[succs[1]], &[]);
|
|
} else {
|
|
panic!("Too many successors");
|
|
}
|
|
}
|
|
|
|
let mut cfg = ControlFlowGraph::new();
|
|
cfg.compute(&func);
|
|
let dom_tree = DominatorTree::with_function(&func, &cfg);
|
|
|
|
BlockLoweringOrder::new(&func, &dom_tree)
|
|
}
|
|
|
|
#[test]
|
|
fn test_blockorder_diamond() {
|
|
let order = build_test_func(4, &[(0, 1), (0, 2), (1, 3), (2, 3)]);
|
|
|
|
// This test case doesn't need to introduce any critical edges, as all regalloc allocations
|
|
// can sit on either the entry or exit of blocks 1 and 2.
|
|
assert_eq!(order.lowered_order.len(), 4);
|
|
}
|
|
|
|
#[test]
|
|
fn test_blockorder_critedge() {
|
|
// 0
|
|
// / \
|
|
// 1 2
|
|
// / \ \
|
|
// 3 4 |
|
|
// |\ _|____|
|
|
// | \/ |
|
|
// | /\ |
|
|
// 5 6
|
|
//
|
|
// (3 -> 5, and 3 -> 6 are critical edges and must be split)
|
|
//
|
|
let order = build_test_func(
|
|
7,
|
|
&[
|
|
(0, 1),
|
|
(0, 2),
|
|
(1, 3),
|
|
(1, 4),
|
|
(2, 5),
|
|
(3, 5),
|
|
(3, 6),
|
|
(4, 6),
|
|
],
|
|
);
|
|
|
|
assert_eq!(order.lowered_order.len(), 9);
|
|
println!("ordered = {:?}", order.lowered_order);
|
|
|
|
// block 0
|
|
assert_eq!(order.lowered_order[0].orig_block().unwrap().as_u32(), 0);
|
|
assert!(order.lowered_order[0].in_edge().is_none());
|
|
assert!(order.lowered_order[0].out_edge().is_none());
|
|
|
|
// block 2
|
|
assert_eq!(order.lowered_order[1].orig_block().unwrap().as_u32(), 2);
|
|
assert!(order.lowered_order[1].in_edge().is_none());
|
|
assert!(order.lowered_order[1].out_edge().is_none());
|
|
|
|
// block 1
|
|
assert_eq!(order.lowered_order[2].orig_block().unwrap().as_u32(), 1);
|
|
assert!(order.lowered_order[2].in_edge().is_none());
|
|
assert!(order.lowered_order[2].out_edge().is_none());
|
|
|
|
// block 4
|
|
assert_eq!(order.lowered_order[3].orig_block().unwrap().as_u32(), 4);
|
|
assert!(order.lowered_order[3].in_edge().is_none());
|
|
assert!(order.lowered_order[3].out_edge().is_none());
|
|
|
|
// block 3
|
|
assert_eq!(order.lowered_order[4].orig_block().unwrap().as_u32(), 3);
|
|
assert!(order.lowered_order[4].in_edge().is_none());
|
|
assert!(order.lowered_order[4].out_edge().is_none());
|
|
|
|
// critical edge 3 -> 5
|
|
assert!(order.lowered_order[5].orig_block().is_none());
|
|
assert_eq!(order.lowered_order[5].in_edge().unwrap().as_u32(), 3);
|
|
assert_eq!(order.lowered_order[5].out_edge().unwrap().as_u32(), 5);
|
|
|
|
// critical edge 3 -> 6
|
|
assert!(order.lowered_order[6].orig_block().is_none());
|
|
assert_eq!(order.lowered_order[6].in_edge().unwrap().as_u32(), 3);
|
|
assert_eq!(order.lowered_order[6].out_edge().unwrap().as_u32(), 6);
|
|
|
|
// block 6
|
|
assert_eq!(order.lowered_order[7].orig_block().unwrap().as_u32(), 6);
|
|
assert!(order.lowered_order[7].in_edge().is_none());
|
|
assert!(order.lowered_order[7].out_edge().is_none());
|
|
|
|
// block 5
|
|
assert_eq!(order.lowered_order[8].orig_block().unwrap().as_u32(), 5);
|
|
assert!(order.lowered_order[8].in_edge().is_none());
|
|
assert!(order.lowered_order[8].out_edge().is_none());
|
|
}
|
|
}
|