wasmtime/cranelift/codegen/src/remove_constant_phis.rs

//! A Constant-Phi-Node removal pass.

use crate::dominator_tree::DominatorTree;
use crate::entity::EntityList;
use crate::fx::FxHashMap;
use crate::fx::FxHashSet;
use crate::ir::instructions::BranchInfo;
use crate::ir::Function;
use crate::ir::{Block, Inst, Value};
use crate::timing;

use smallvec::{smallvec, SmallVec};
use std::vec::Vec;

// A note on notation.  For the sake of clarity, this file uses the phrase
// "formal parameters" to mean the `Value`s listed in the block head, and
// "actual parameters" to mean the `Value`s passed in a branch or a jump:
//
// block4(v16: i32, v18: i32):    <-- formal parameters
//   ...
//   brnz v27, block7(v22, v24)   <-- actual parameters
//   jump block6

// This transformation pass (conceptually) partitions all values in the
// function into two groups:
//
// * Group A: values defined by block formal parameters, except for the entry block.
//
// * Group B: All other values: that is, values defined by instructions,
//   and the formals of the entry block.
//
// For each value in Group A, it attempts to establish whether it will have
// the value of exactly one member of Group B.  If so, the formal parameter is
// deleted, all corresponding actual parameters (in jumps/branches to the
// defining block) are deleted, and a rename is inserted.
//
// The entry block is special-cased because (1) we don't know what values flow
// to its formals and (2) in any case we can't change its formals.
//
// Work proceeds in three phases.
//
// * Phase 1: examine all instructions.  For each block, make up a useful
//   grab-bag of information, `BlockSummary`, that summarises the block's
//   formals and jump/branch instruction.  This is used by Phases 2 and 3.
//
// * Phase 2: for each value in Group A, try to find a single Group B value
//   that flows to it.  This is done using a classical iterative forward
//   dataflow analysis over a simple constant-propagation style lattice.  It
//   converges quickly in practice -- I have seen at most 4 iterations.  This
//   is relatively cheap because the iteration is done over the
//   `BlockSummary`s, and does not visit each instruction.  The resulting
//   fixed point is stored in a `SolverState`.
//
// * Phase 3: using the `SolverState` and `BlockSummary`, edit the function to
//   remove redundant formals and actuals, and to insert suitable renames.
//
// Note that the effectiveness of the analysis depends on on the fact that
// there are no copy instructions in Cranelift's IR.  If there were, the
// computation of `actual_absval` in Phase 2 would have to be extended to
// chase through such copies.
//
// For large functions, the analysis cost using the new AArch64 backend is about
// 0.6% of the non-optimising compile time, as measured by instruction counts.
// This transformation usually pays for itself several times over, though, by
// reducing the isel/regalloc cost downstream.  Gains of up to 7% have been
// seen for large functions.

// The `Value`s (Group B) that can flow to a formal parameter (Group A).
#[derive(Clone, Copy, Debug, PartialEq)]
enum AbstractValue {
    // Two or more values flow to this formal.
    Many,
    // Exactly one value, as stated, flows to this formal.  The `Value`s that
    // can appear here are exactly: `Value`s defined by `Inst`s, plus the
    // `Value`s defined by the formals of the entry block.  Note that this is
    // exactly the set of `Value`s that are *not* tracked in the solver below
    // (see `SolverState`).
    One(Value /*Group B*/),
    // No value flows to this formal.
    None,
}

impl AbstractValue {
    fn join(self, other: AbstractValue) -> AbstractValue {
        match (self, other) {
            // Joining with `None` has no effect
            (AbstractValue::None, p2) => p2,
            (p1, AbstractValue::None) => p1,
            // Joining with `Many` produces `Many`
            (AbstractValue::Many, _p2) => AbstractValue::Many,
            (_p1, AbstractValue::Many) => AbstractValue::Many,
            // The only interesting case
            (AbstractValue::One(v1), AbstractValue::One(v2)) => {
                if v1 == v2 {
                    AbstractValue::One(v1)
                } else {
                    AbstractValue::Many
                }
            }
        }
    }
    fn is_one(self) -> bool {
        if let AbstractValue::One(_) = self {
            true
        } else {
            false
        }
    }
}

// For some block, a useful bundle of info.  The `Block` itself is not stored
// here since it will be the key in the associated `FxHashMap` -- see
// `summaries` below.  For the `SmallVec` tuning params: most blocks have
// few parameters, hence `4`.  And almost all blocks have either one or two
// successors, hence `2`.
#[derive(Debug)]
struct BlockSummary {
    // Formal parameters for this `Block`
    formals: SmallVec<[Value; 4] /*Group A*/>,
    // For each `Inst` in this block that transfers to another block: the
    // `Inst` itself, the destination `Block`, and the actual parameters
    // passed.  We don't bother to include transfers that pass zero parameters
    // since that makes more work for the solver for no purpose.
    dests: SmallVec<[(Inst, Block, SmallVec<[Value; 4] /*both Groups A and B*/>); 2]>,
}
impl BlockSummary {
    fn new(formals: SmallVec<[Value; 4]>) -> Self {
        Self {
            formals,
            dests: smallvec![],
        }
    }
}

// Solver state.  This holds a AbstractValue for each formal parameter, except
// for those from the entry block.
struct SolverState {
    absvals: FxHashMap<Value /*Group A*/, AbstractValue>,
}
impl SolverState {
    fn new() -> Self {
        Self {
            absvals: FxHashMap::default(),
        }
    }
    fn get(&self, actual: Value) -> AbstractValue {
        match self.absvals.get(&actual) {
            Some(lp) => *lp,
            None => panic!("SolverState::get: formal param {:?} is untracked?!", actual),
        }
    }
    fn maybe_get(&self, actual: Value) -> Option<&AbstractValue> {
        self.absvals.get(&actual)
    }
    fn set(&mut self, actual: Value, lp: AbstractValue) {
        match self.absvals.insert(actual, lp) {
            Some(_old_lp) => {}
            None => panic!("SolverState::set: formal param {:?} is untracked?!", actual),
        }
    }
}

/// Detect phis in `func` that will only ever produce one value, using a
/// classic forward dataflow analysis.  Then remove them.
#[inline(never)]
pub fn do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree) {
    let _tt = timing::remove_constant_phis();
    debug_assert!(domtree.is_valid());

    // Get the blocks, in reverse postorder
    let mut blocks_reverse_postorder = Vec::<Block>::new();
    for block in domtree.cfg_postorder() {
        blocks_reverse_postorder.push(*block);
    }
    blocks_reverse_postorder.reverse();

    // Phase 1 of 3: for each block, make a summary containing all relevant
    // info.  The solver will iterate over the summaries, rather than having
    // to inspect each instruction in each block.
    let mut summaries = FxHashMap::<Block, BlockSummary>::default();

    for b in &blocks_reverse_postorder {
        let formals = func.dfg.block_params(*b);
        let mut summary = BlockSummary::new(SmallVec::from(formals));

        for inst in func.layout.block_insts(*b) {
            let idetails = &func.dfg[inst];
            // Note that multi-dest transfers (i.e., branch tables) don't
            // carry parameters in our IR, so we only have to care about
            // `SingleDest` here.
            if let BranchInfo::SingleDest(dest, _) = idetails.analyze_branch(&func.dfg.value_lists)
            {
                let inst_var_args = func.dfg.inst_variable_args(inst);
                // Skip branches/jumps that carry no params.
                if inst_var_args.len() > 0 {
                    let mut actuals = SmallVec::<[Value; 4]>::new();
                    for arg in inst_var_args {
                        let arg = func.dfg.resolve_aliases(*arg);
                        actuals.push(arg);
                    }
                    summary.dests.push((inst, dest, actuals));
                }
            }
        }

        // Ensure the invariant that all blocks (except for the entry) appear
        // in the summary, *unless* they have neither formals nor any
        // param-carrying branches/jumps.
        if formals.len() > 0 || summary.dests.len() > 0 {
            summaries.insert(*b, summary);
        }
    }

    // Phase 2 of 3: iterate over the summaries in reverse postorder,
    // computing new `AbstractValue`s for each tracked `Value`.  The set of
    // tracked `Value`s is exactly Group A as described above.

    let entry_block = func
        .layout
        .entry_block()
        .expect("remove_constant_phis: entry block unknown");

    // Set up initial solver state
    let mut state = SolverState::new();

    for b in &blocks_reverse_postorder {
        // For each block, get the formals
        if *b == entry_block {
            continue;
        }
        let formals: &[Value] = func.dfg.block_params(*b);
        for formal in formals {
            let mb_old_absval = state.absvals.insert(*formal, AbstractValue::None);
            assert!(mb_old_absval.is_none());
        }
    }

    // Solve: repeatedly traverse the blocks in reverse postorder, until there
    // are no changes.
    let mut iter_no = 0;
    loop {
        iter_no += 1;
        let mut changed = false;

        for src in &blocks_reverse_postorder {
            let mb_src_summary = summaries.get(src);
            // The src block might have no summary.  This means it has no
            // branches/jumps that carry parameters *and* it doesn't take any
            // parameters itself.  Phase 1 ensures this.  So we can ignore it.
            if mb_src_summary.is_none() {
                continue;
            }
            let src_summary = mb_src_summary.unwrap();
            for (_inst, dst, src_actuals) in &src_summary.dests {
                assert!(*dst != entry_block);
                // By contrast, the dst block must have a summary.  Phase 1
                // will have only included an entry in `src_summary.dests` if
                // that branch/jump carried at least one parameter.  So the
                // dst block does take parameters, so it must have a summary.
                let dst_summary = summaries
                    .get(dst)
                    .expect("remove_constant_phis: dst block has no summary");
                let dst_formals = &dst_summary.formals;
                assert!(src_actuals.len() == dst_formals.len());
                for (formal, actual) in dst_formals.iter().zip(src_actuals.iter()) {
                    // Find the abstract value for `actual`.  If it is a block
                    // formal parameter then the most recent abstract value is
                    // to be found in the solver state.  If not, then it's a
                    // real value defining point (not a phi), in which case
                    // return it itself.
                    let actual_absval = match state.maybe_get(*actual) {
                        Some(pt) => *pt,
                        None => AbstractValue::One(*actual),
                    };

                    // And `join` the new value with the old.
                    let formal_absval_old = state.get(*formal);
                    let formal_absval_new = formal_absval_old.join(actual_absval);
                    if formal_absval_new != formal_absval_old {
                        changed = true;
                        state.set(*formal, formal_absval_new);
                    }
                }
            }
        }

        if !changed {
            break;
        }
    }
    let mut n_consts = 0;
    for absval in state.absvals.values() {
        if absval.is_one() {
            n_consts += 1;
        }
    }

    // Phase 3 of 3: edit the function to remove constant formals, using the
    // summaries and the final solver state as a guide.

    // Make up a set of blocks that need editing.
    let mut need_editing = FxHashSet::<Block>::default();
    for (block, summary) in &summaries {
        if *block == entry_block {
            continue;
        }
        for formal in &summary.formals {
            let formal_absval = state.get(*formal);
            if formal_absval.is_one() {
                need_editing.insert(*block);
                break;
            }
        }
    }

    // Firstly, deal with the formals.  For each formal which is redundant,
    // remove it, and also add a reroute from it to the constant value which
    // it we know it to be.
    for b in &need_editing {
        let mut del_these = SmallVec::<[(Value, Value); 32]>::new();
        let formals: &[Value] = func.dfg.block_params(*b);
        for formal in formals {
            // The state must give an absval for `formal`.
            if let AbstractValue::One(replacement_val) = state.get(*formal) {
                del_these.push((*formal, replacement_val));
            }
        }
        // We can delete the formals in any order.  However,
        // `remove_block_param` works by sliding backwards all arguments to
        // the right of the it is asked to delete.  Hence when removing more
        // than one formal, it is significantly more efficient to ask it to
        // remove the rightmost formal first, and hence this `reverse`.
        del_these.reverse();
        for (redundant_formal, replacement_val) in del_these {
            func.dfg.remove_block_param(redundant_formal);
            func.dfg.change_to_alias(redundant_formal, replacement_val);
        }
    }

    // Secondly, visit all branch insns.  If the destination has had its
    // formals changed, change the actuals accordingly.  Don't scan all insns,
    // rather just visit those as listed in the summaries we prepared earlier.
    for (_src_block, summary) in &summaries {
        for (inst, dst_block, _src_actuals) in &summary.dests {
            if !need_editing.contains(dst_block) {
                continue;
            }

            let old_actuals = func.dfg[*inst].take_value_list().unwrap();
            let num_old_actuals = old_actuals.len(&func.dfg.value_lists);
            let num_fixed_actuals = func.dfg[*inst]
                .opcode()
                .constraints()
                .num_fixed_value_arguments();
            let dst_summary = summaries.get(&dst_block).unwrap();

            // Check that the numbers of arguments make sense.
            assert!(num_fixed_actuals <= num_old_actuals);
            assert!(num_fixed_actuals + dst_summary.formals.len() == num_old_actuals);

            // Create a new value list.
            let mut new_actuals = EntityList::<Value>::new();
            // Copy the fixed args to the new list
            for i in 0..num_fixed_actuals {
                let val = old_actuals.get(i, &func.dfg.value_lists).unwrap();
                new_actuals.push(val, &mut func.dfg.value_lists);
            }

            // Copy the variable args (the actual block params) to the new
            // list, filtering out redundant ones.
            for i in 0..dst_summary.formals.len() {
                let actual_i = old_actuals
                    .get(num_fixed_actuals + i, &func.dfg.value_lists)
                    .unwrap();
                let formal_i = dst_summary.formals[i];
                let is_redundant = state.get(formal_i).is_one();
                if !is_redundant {
                    new_actuals.push(actual_i, &mut func.dfg.value_lists);
                }
            }
            func.dfg[*inst].put_value_list(new_actuals);
        }
    }

    log::debug!(
        "do_remove_constant_phis: done, {} iters.   {} formals, of which {} const.",
        iter_no,
        state.absvals.len(),
        n_consts
    );
}