Bypass state machine for single-predecessor chains (#4955)

In the common case where there is a chain of sealed blocks that each
have exactly one predecessor, we can keep track of any sub-sequence of
those blocks in O(1) space. So there's no need to use the state machine
stack to propagate variable definitions back along the chain.

Instead, we can do one loop to find which block to stop at, then either
get the variable definition from that block or introduce a block
parameter there, and finally do one more loop to update variable
definitions in all the intervening blocks.

The existing implementation already had to do a graph traversal to
propagate variable definitions correctly, so this doesn't visit any more
blocks than before. However, this change also makes it possible to
integrate cycle detection with the graph traversal. That eliminates the
need for the in_predecessor_cycle flags, and any possibility of spiky
performance profiles in maintaining those flags.

As far as performance goes, this is all pretty much a wash: Changes to
CPU time and CPU cycles are within noise, according to hyperfine and
Sightglass/perf. But it's a substantially simpler implementation, with
fewer invisible interactions between functions.
This commit is contained in:
Jamey Sharp
2022-09-28 17:05:08 -07:00
committed by GitHub
parent 2e954668c7
commit 6c8620b688

View File

@@ -12,7 +12,7 @@ use alloc::vec::Vec;
use core::convert::TryInto;
use core::mem;
use cranelift_codegen::cursor::{Cursor, FuncCursor};
use cranelift_codegen::entity::SecondaryMap;
use cranelift_codegen::entity::{EntitySet, SecondaryMap};
use cranelift_codegen::ir::immediates::{Ieee32, Ieee64};
use cranelift_codegen::ir::instructions::BranchInfo;
use cranelift_codegen::ir::types::{F32, F64};
@@ -52,6 +52,9 @@ pub struct SSABuilder {
/// Side effects accumulated in the `use_var`/`predecessors_lookup` state machine.
side_effects: SideEffects,
/// Reused storage for cycle-detection.
visited: EntitySet<Block>,
}
/// Side effects of a `use_var` or a `seal_block` method call.
@@ -99,13 +102,6 @@ struct SSABlockData {
predecessors: PredBlockSmallVec,
// A block is sealed if all of its predecessors have been declared.
sealed: bool,
// A predecessor-cycle occurs if this block is reachable by following only single-predecessor
// edges. This is used in `use_var_nonlocal`, which can avoid adding block parameters as long
// as it can find a unique block to get a variable's definition from.
in_predecessor_cycle: bool,
// A block is sealed with respect to predecessor cycles if its `in_predecessor_cycle` flag
// can't change.
sealed_predecessors: bool,
// List of current Block arguments for which an earlier def has not been found yet.
undef_variables: Vec<(Variable, Value)>,
}
@@ -124,12 +120,10 @@ impl SSABlockData {
.expect("the predecessor you are trying to remove is not declared");
self.predecessors.swap_remove(pred).block
}
}
#[derive(Eq, PartialEq)]
enum Action {
Add,
Remove,
fn has_one_predecessor(&self) -> bool {
self.sealed && self.predecessors.len() == 1
}
}
impl SSABuilder {
@@ -141,6 +135,7 @@ impl SSABuilder {
calls: Vec::new(),
results: Vec::new(),
side_effects: SideEffects::new(),
visited: EntitySet::new(),
}
}
@@ -172,18 +167,9 @@ enum ZeroOneOrMore<T> {
More,
}
/// Cases used internally by `use_var_nonlocal()` for avoiding the borrow checker.
#[derive(Debug)]
enum UseVarCases {
Unsealed(Value),
SealedOnePredecessor(Block),
SealedMultiplePredecessors(Value, Block),
}
/// States for the `use_var`/`predecessors_lookup` state machine.
enum Call {
UseVar(Block),
FinishSealedOnePredecessor(Block),
FinishPredecessorsLookup(Value, Block),
}
@@ -289,56 +275,99 @@ impl SSABuilder {
/// Resolve the minimal SSA Value of `var` in `block` by traversing predecessors.
///
/// This function sets up state for `run_state_machine()` but does not execute it.
fn use_var_nonlocal(&mut self, func: &mut Function, var: Variable, ty: Type, block: Block) {
// This function is split into two parts to appease the borrow checker.
// Part 1: With a mutable borrow of self, update the DataFlowGraph if necessary.
let data = &mut self.ssa_blocks[block];
let case = if data.sealed {
// Optimize the common case of one predecessor: no param needed. However, if following
// single-predecessor edges would go through a cycle ending up back here, then this
// optimization is unsafe, so fall back to adding a param anyway.
if data.predecessors.len() == 1 && !data.in_predecessor_cycle {
UseVarCases::SealedOnePredecessor(data.predecessors[0].block)
} else {
// Break potential cycles by eagerly adding an operandless param.
let val = func.dfg.append_block_param(block, ty);
UseVarCases::SealedMultiplePredecessors(val, block)
}
} else {
let val = func.dfg.append_block_param(block, ty);
data.undef_variables.push((var, val));
UseVarCases::Unsealed(val)
};
fn use_var_nonlocal(&mut self, func: &mut Function, var: Variable, ty: Type, mut block: Block) {
// Find the most recent definition of `var`, and the block the definition comes from.
let (val, from) = self.find_var(func, var, ty, block);
// Part 2: Prepare SSABuilder state for run_state_machine().
match case {
UseVarCases::SealedOnePredecessor(pred) => {
// Get the Value directly from the single predecessor.
self.calls.push(Call::FinishSealedOnePredecessor(block));
self.calls.push(Call::UseVar(pred));
// The `from` block returned from `find_var` is guaranteed to be on the path we follow by
// traversing only single-predecessor edges. It might be equal to `block` if there is no
// such path, but in that case `find_var` ensures that the variable is defined in this block
// by a new block parameter. It also might be somewhere in a cycle, but even then this loop
// will terminate the first time it encounters that block, rather than continuing around the
// cycle forever.
//
// Why is it okay to copy the definition to all intervening blocks? For the initial block,
// this may not be the final definition of this variable within this block, but if we've
// gotten here then we know there is no earlier definition in the block already.
//
// For the remaining blocks: Recall that a block is only allowed to be set as a predecessor
// after all its instructions have already been filled in, so when we follow a predecessor
// edge to a block, we know there will never be any more local variable definitions added to
// that block. We also know that `find_var` didn't find a definition for this variable in
// any of the blocks before `from`.
//
// So in either case there is no definition in these blocks yet and we can blindly set one.
let var_defs = &mut self.variables[var];
while block != from {
let data = &self.ssa_blocks[block];
debug_assert!(data.sealed);
debug_assert_eq!(data.predecessors.len(), 1);
debug_assert!(var_defs[block].is_none());
var_defs[block] = PackedOption::from(val);
block = data.predecessors[0].block;
}
}
UseVarCases::Unsealed(val) => {
// Define the operandless param added above to prevent lookup cycles.
self.def_var(var, val, block);
// Nothing more can be known at this point.
/// Find the most recent definition of this variable, returning both the definition and the
/// block in which it was found. If we can't find a definition that's provably the right one for
/// all paths to the current block, then append a block parameter to some block and use that as
/// the definition. Either way, also arrange that the definition will be on the `results` stack
/// when `run_state_machine` is done processing the current step.
///
/// If a block has exactly one predecessor, and the block is sealed so we know its predecessors
/// will never change, then its definition for this variable is the same as the definition from
/// that one predecessor. In this case it's easy to see that no block parameter is necessary,
/// but we need to look at the predecessor to see if a block parameter might be needed there.
/// That holds transitively across any chain of sealed blocks with exactly one predecessor each.
///
/// This runs into a problem, though, if such a chain has a cycle: Blindly following a cyclic
/// chain that never defines this variable would lead to an infinite loop in the compiler. It
/// doesn't really matter what code we generate in that case. Since each block in the cycle has
/// exactly one predecessor, there's no way to enter the cycle from the function's entry block;
/// and since all blocks in the cycle are sealed, the entire cycle is permanently dead code. But
/// we still have to prevent the possibility of an infinite loop.
///
/// To break cycles, we can pick any block within the cycle as the one where we'll add a block
/// parameter. It's convenient to pick the block at which we entered the cycle, because that's
/// the first place where we can detect that we just followed a cycle. Adding a block parameter
/// gives us a definition we can reuse throughout the rest of the cycle.
fn find_var(
&mut self,
func: &mut Function,
var: Variable,
ty: Type,
mut block: Block,
) -> (Value, Block) {
// Try to find an existing definition along single-predecessor edges first.
let var_defs = &mut self.variables[var];
self.visited.clear();
while self.ssa_blocks[block].has_one_predecessor() && self.visited.insert(block) {
block = self.ssa_blocks[block].predecessors[0].block;
if let Some(val) = var_defs[block].expand() {
self.results.push(val);
return (val, block);
}
}
// We've promised to return the most recent block where `var` was defined, but we didn't
// find a usable definition. So create one.
let val = func.dfg.append_block_param(block, ty);
var_defs[block] = PackedOption::from(val);
// Now every predecessor needs to pass its definition of this variable to the newly added
// block parameter. To do that we have to "recursively" call `use_var`, but there are two
// problems with doing that. First, we need to keep a fixed bound on stack depth, so we
// can't actually recurse; instead we defer to `run_state_machine`. Second, if we don't
// know all our predecessors yet, we have to defer this work until the block gets sealed.
if self.ssa_blocks[block].sealed {
// Once all the `calls` added here complete, this leaves either `val` or an equivalent
// definition on the `results` stack.
self.begin_predecessors_lookup(val, block);
} else {
self.ssa_blocks[block].undef_variables.push((var, val));
self.results.push(val);
}
UseVarCases::SealedMultiplePredecessors(val, block) => {
// Define the operandless param added above to prevent lookup cycles.
self.def_var(var, val, block);
// Look up a use_var for each precessor.
self.begin_predecessors_lookup(val, block);
}
}
}
/// For blocks with a single predecessor, once we've determined the value,
/// record a local def for it for future queries to find.
fn finish_sealed_one_predecessor(&mut self, var: Variable, block: Block) {
let val = *self.results.last().unwrap();
self.def_var(var, val, block);
(val, block)
}
/// Declares a new basic block to construct corresponding data for SSA construction.
@@ -359,7 +388,6 @@ impl SSABuilder {
/// of a jump table.
pub fn declare_block_predecessor(&mut self, block: Block, pred: Block, inst: Inst) {
debug_assert!(!self.is_sealed(block));
self.update_predecessor_cycle(block, pred, Action::Add);
self.ssa_blocks[block].add_predecessor(pred, inst)
}
@@ -369,88 +397,7 @@ impl SSABuilder {
/// Note: use only when you know what you are doing, this might break the SSA building problem
pub fn remove_block_predecessor(&mut self, block: Block, inst: Inst) -> Block {
debug_assert!(!self.is_sealed(block));
let pred = self.ssa_blocks[block].remove_predecessor(inst);
self.update_predecessor_cycle(block, pred, Action::Remove);
pred
}
/// Call this when `pred` is not in the predecessors of `block`: before adding, or after
/// removing.
fn update_predecessor_cycle(&mut self, block: Block, mut pred: Block, action: Action) {
let block_ref = &self.ssa_blocks[block];
// Not counting `pred`, how many predecessors does `block` have? And will it have exactly
// one after the caller is done changing things?
let will_have_one_pred = match block_ref.predecessors.as_slice() {
// None: `pred` might reach `block` in a cycle. There'll be one predecessor afterward
// if we're adding `pred`.
[] => action == Action::Add,
// One: the other predecessor might reach `block` in a cycle. There'll be one
// predecessor afterward if we're removing `pred`.
[other] => {
pred = other.block;
action == Action::Remove
}
// Two or more: there is definitely no cycle through `block`, either before or after.
_ => {
debug_assert!(!block_ref.in_predecessor_cycle);
return;
}
};
if will_have_one_pred {
// This block is about to have exactly one predecessor. Is there a path from that block
// to this one, following only single-predecessor edges?
debug_assert!(!block_ref.in_predecessor_cycle);
// Shadow `pred` so we can check for a cycle without forgetting where it started.
let mut pred = pred;
// This loop terminates even if it encounters a cycle in the control-flow graph. Any
// path through blocks which have exactly one predecessor each must eventually reach
// one of three kinds of blocks:
// 1. The block that we started from, so it is part of a cycle; or
// 2. A block that does not have exactly one predecessor, so there's no cycle; or
// 3. A block which we've previously flagged as being part of a cycle.
// In case 3, the block we're currently updating is not part of that existing
// cycle and can't be part of any other cycle.
// Case 1 is the loop termination condition.
while pred != block {
let pred_ref = &self.ssa_blocks[pred];
if pred_ref.sealed_predecessors {
// We're changing `block`'s predecessors, and we've reached a block whose
// single-predecessor path can't change, so `block` must not be on that path.
return;
}
// In cases 2 and 3, we aren't forming a cycle so there's nothing to update.
if pred_ref.in_predecessor_cycle || pred_ref.predecessors.len() != 1 {
return;
}
pred = pred_ref.predecessors[0].block;
}
} else if !block_ref.in_predecessor_cycle {
// This block was not in a predecessor cycle before, and won't have exactly one
// predecessor afterward, so it still isn't in one. Nothing to update.
return;
}
// At this point we know that if `block`'s only predecessor were `pred`, then both blocks
// would be part of a predecessor cycle. Either `will_have_one_pred` is true and we're
// forming a new cycle, or there was an existing cycle that we're now breaking. In either
// case, there's a path of single-predecessor edges going from `pred` to `block` and we
// just need to flip the `in_predecessor_cycle` flag on every block in that path.
self.ssa_blocks[block].in_predecessor_cycle = will_have_one_pred;
while pred != block {
let pred_ref = &mut self.ssa_blocks[pred];
debug_assert_ne!(pred_ref.in_predecessor_cycle, will_have_one_pred);
debug_assert_eq!(pred_ref.predecessors.len(), 1);
pred_ref.in_predecessor_cycle = will_have_one_pred;
pred = pred_ref.predecessors[0].block;
}
self.ssa_blocks[block].remove_predecessor(inst)
}
/// Completes the global value numbering for a `Block`, all of its predecessors having been
@@ -508,29 +455,10 @@ impl SSABuilder {
/// Set the `sealed` flag for `block`.
fn mark_block_sealed(&mut self, block: Block) {
// This sealed block's `in_predecessor_cycle` flag can't change once either:
let sealed_predecessors = match &self.ssa_blocks[block].predecessors[..] {
// It has exactly one predecessor where the flag also can't change;
[pred] => pred.block == block || self.ssa_blocks[pred.block].sealed_predecessors,
// Or we can tell locally that it isn't part of a predecessor cycle.
_ => {
debug_assert!(!self.ssa_blocks[block].in_predecessor_cycle);
true
}
};
// Note that we only update `sealed_predecessors` at the time that this block is sealed,
// and using information from one predecessor at most. If this block is sealed before its
// predecessor, then we conservatively assume that `in_predecessor_cycle` could still
// change at any time. So `update_predecessor_cycle` will do more work than it would if we
// computed this precisely, but this function remains constant-time. Frontends which seal
// every block as soon as possible, like cranelift-wasm, shouldn't see much difference.
let block_data = &mut self.ssa_blocks[block];
debug_assert!(!block_data.sealed);
debug_assert!(!block_data.sealed_predecessors);
debug_assert!(block_data.undef_variables.is_empty());
block_data.sealed = true;
block_data.sealed_predecessors = sealed_predecessors;
// We could call data.predecessors.shrink_to_fit() here, if
// important, because no further predecessors will be added
@@ -809,9 +737,6 @@ impl SSABuilder {
}
self.use_var_nonlocal(func, var, ty, ssa_block);
}
Call::FinishSealedOnePredecessor(ssa_block) => {
self.finish_sealed_one_predecessor(var, ssa_block);
}
Call::FinishPredecessorsLookup(sentinel, dest_block) => {
self.finish_predecessors_lookup(func, sentinel, var, dest_block);
}