Files
wasmtime/cranelift/codegen/src/egraph.rs
Trevor Elliott 25bf8e0e67 Make DataFlowGraph::insts public, but restricted (#5450)
We have some operations defined on DataFlowGraph purely to work around borrow-checker issues with InstructionData and other data on DataFlowGraph. Part of the problem is that indexing the DFG directly hides the fact that we're only indexing the insts field of the DFG.

This PR makes the insts field of the DFG public, but wraps it in a newtype that only allows indexing. This means that the borrow checker is better able to tell when operations on memory held by the DFG won't conflict, which comes up frequently when mutating ValueLists held by InstructionData.
2022-12-16 10:46:09 -08:00

570 lines
23 KiB
Rust

//! Support for egraphs represented in the DataFlowGraph.
use crate::alias_analysis::{AliasAnalysis, LastStores};
use crate::ctxhash::{CtxEq, CtxHash, CtxHashMap};
use crate::cursor::{Cursor, CursorPosition, FuncCursor};
use crate::dominator_tree::DominatorTree;
use crate::egraph::domtree::DomTreeWithChildren;
use crate::egraph::elaborate::Elaborator;
use crate::fx::FxHashSet;
use crate::inst_predicates::is_pure_for_egraph;
use crate::ir::{
DataFlowGraph, Function, Inst, InstructionData, Type, Value, ValueDef, ValueListPool,
};
use crate::loop_analysis::LoopAnalysis;
use crate::opts::generated_code::ContextIter;
use crate::opts::IsleContext;
use crate::trace;
use crate::unionfind::UnionFind;
use cranelift_entity::packed_option::ReservedValue;
use cranelift_entity::SecondaryMap;
use std::hash::Hasher;
mod cost;
mod domtree;
mod elaborate;
/// Pass over a Function that does the whole aegraph thing.
///
/// - Removes non-skeleton nodes from the Layout.
/// - Performs a GVN-and-rule-application pass over all Values
/// reachable from the skeleton, potentially creating new Union
/// nodes (i.e., an aegraph) so that some values have multiple
/// representations.
/// - Does "extraction" on the aegraph: selects the best value out of
/// the tree-of-Union nodes for each used value.
/// - Does "scoped elaboration" on the aegraph: chooses one or more
/// locations for pure nodes to become instructions again in the
/// layout, as forced by the skeleton.
///
/// At the beginning and end of this pass, the CLIF should be in a
/// state that passes the verifier and, additionally, has no Union
/// nodes. During the pass, Union nodes may exist, and instructions in
/// the layout may refer to results of instructions that are not
/// placed in the layout.
pub struct EgraphPass<'a> {
/// The function we're operating on.
func: &'a mut Function,
/// Dominator tree, used for elaboration pass.
domtree: &'a DominatorTree,
/// Alias analysis, used during optimization.
alias_analysis: &'a mut AliasAnalysis<'a>,
/// "Domtree with children": like `domtree`, but with an explicit
/// list of children, rather than just parent pointers.
domtree_children: DomTreeWithChildren,
/// Loop analysis results, used for built-in LICM during
/// elaboration.
loop_analysis: &'a LoopAnalysis,
/// Which canonical Values do we want to rematerialize in each
/// block where they're used?
///
/// (A canonical Value is the *oldest* Value in an eclass,
/// i.e. tree of union value-nodes).
remat_values: FxHashSet<Value>,
/// Stats collected while we run this pass.
pub(crate) stats: Stats,
/// Union-find that maps all members of a Union tree (eclass) back
/// to the *oldest* (lowest-numbered) `Value`.
eclasses: UnionFind<Value>,
}
/// Context passed through node insertion and optimization.
pub(crate) struct OptimizeCtx<'opt, 'analysis>
where
'analysis: 'opt,
{
// Borrowed from EgraphPass:
pub(crate) func: &'opt mut Function,
pub(crate) value_to_opt_value: &'opt mut SecondaryMap<Value, Value>,
pub(crate) gvn_map: &'opt mut CtxHashMap<(Type, InstructionData), Value>,
pub(crate) eclasses: &'opt mut UnionFind<Value>,
pub(crate) remat_values: &'opt mut FxHashSet<Value>,
pub(crate) stats: &'opt mut Stats,
pub(crate) alias_analysis: &'opt mut AliasAnalysis<'analysis>,
pub(crate) alias_analysis_state: &'opt mut LastStores,
// Held locally during optimization of one node (recursively):
pub(crate) rewrite_depth: usize,
pub(crate) subsume_values: FxHashSet<Value>,
}
/// For passing to `insert_pure_enode`. Sometimes the enode already
/// exists as an Inst (from the original CLIF), and sometimes we're in
/// the middle of creating it and want to avoid inserting it if
/// possible until we know we need it.
pub(crate) enum NewOrExistingInst {
New(InstructionData, Type),
Existing(Inst),
}
impl NewOrExistingInst {
fn get_inst_key<'a>(&'a self, dfg: &'a DataFlowGraph) -> (Type, InstructionData) {
match self {
NewOrExistingInst::New(data, ty) => (*ty, *data),
NewOrExistingInst::Existing(inst) => {
let ty = dfg.ctrl_typevar(*inst);
(ty, dfg.insts[*inst].clone())
}
}
}
}
impl<'opt, 'analysis> OptimizeCtx<'opt, 'analysis>
where
'analysis: 'opt,
{
/// Optimization of a single instruction.
///
/// This does a few things:
/// - Looks up the instruction in the GVN deduplication map. If we
/// already have the same instruction somewhere else, with the
/// same args, then we can alias the original instruction's
/// results and omit this instruction entirely.
/// - Note that we do this canonicalization based on the
/// instruction with its arguments as *canonical* eclass IDs,
/// that is, the oldest (smallest index) `Value` reachable in
/// the tree-of-unions (whole eclass). This ensures that we
/// properly canonicalize newer nodes that use newer "versions"
/// of a value that are still equal to the older versions.
/// - If the instruction is "new" (not deduplicated), then apply
/// optimization rules:
/// - All of the mid-end rules written in ISLE.
/// - Store-to-load forwarding.
/// - Update the value-to-opt-value map, and update the eclass
/// union-find, if we rewrote the value to different form(s).
pub(crate) fn insert_pure_enode(&mut self, inst: NewOrExistingInst) -> Value {
// Create the external context for looking up and updating the
// GVN map. This is necessary so that instructions themselves
// do not have to carry all the references or data for a full
// `Eq` or `Hash` impl.
let gvn_context = GVNContext {
union_find: self.eclasses,
value_lists: &self.func.dfg.value_lists,
};
self.stats.pure_inst += 1;
if let NewOrExistingInst::New(..) = inst {
self.stats.new_inst += 1;
}
// Does this instruction already exist? If so, add entries to
// the value-map to rewrite uses of its results to the results
// of the original (existing) instruction. If not, optimize
// the new instruction.
if let Some(&orig_result) = self
.gvn_map
.get(&inst.get_inst_key(&self.func.dfg), &gvn_context)
{
self.stats.pure_inst_deduped += 1;
if let NewOrExistingInst::Existing(inst) = inst {
debug_assert_eq!(self.func.dfg.inst_results(inst).len(), 1);
let result = self.func.dfg.first_result(inst);
self.value_to_opt_value[result] = orig_result;
self.eclasses.union(result, orig_result);
self.stats.union += 1;
result
} else {
orig_result
}
} else {
// Now actually insert the InstructionData and attach
// result value (exactly one).
let (inst, result, ty) = match inst {
NewOrExistingInst::New(data, typevar) => {
let inst = self.func.dfg.make_inst(data);
// TODO: reuse return value?
self.func.dfg.make_inst_results(inst, typevar);
let result = self.func.dfg.first_result(inst);
// Add to eclass unionfind.
self.eclasses.add(result);
// New inst. We need to do the analysis of its result.
(inst, result, typevar)
}
NewOrExistingInst::Existing(inst) => {
let result = self.func.dfg.first_result(inst);
let ty = self.func.dfg.ctrl_typevar(inst);
(inst, result, ty)
}
};
let opt_value = self.optimize_pure_enode(inst);
let gvn_context = GVNContext {
union_find: self.eclasses,
value_lists: &self.func.dfg.value_lists,
};
self.gvn_map.insert(
(ty, self.func.dfg.insts[inst].clone()),
opt_value,
&gvn_context,
);
self.value_to_opt_value[result] = opt_value;
opt_value
}
}
/// Optimizes an enode by applying any matching mid-end rewrite
/// rules (or store-to-load forwarding, which is a special case),
/// unioning together all possible optimized (or rewritten) forms
/// of this expression into an eclass and returning the `Value`
/// that represents that eclass.
fn optimize_pure_enode(&mut self, inst: Inst) -> Value {
// A pure node always has exactly one result.
let orig_value = self.func.dfg.first_result(inst);
let mut isle_ctx = IsleContext { ctx: self };
// Limit rewrite depth. When we apply optimization rules, they
// may create new nodes (values) and those are, recursively,
// optimized eagerly as soon as they are created. So we may
// have more than one ISLE invocation on the stack. (This is
// necessary so that as the toplevel builds the
// right-hand-side expression bottom-up, it uses the "latest"
// optimized values for all the constituent parts.) To avoid
// infinite or problematic recursion, we bound the rewrite
// depth to a small constant here.
const REWRITE_LIMIT: usize = 5;
if isle_ctx.ctx.rewrite_depth > REWRITE_LIMIT {
isle_ctx.ctx.stats.rewrite_depth_limit += 1;
return orig_value;
}
isle_ctx.ctx.rewrite_depth += 1;
// Invoke the ISLE toplevel constructor, getting all new
// values produced as equivalents to this value.
trace!("Calling into ISLE with original value {}", orig_value);
isle_ctx.ctx.stats.rewrite_rule_invoked += 1;
let mut optimized_values =
crate::opts::generated_code::constructor_simplify(&mut isle_ctx, orig_value);
// Create a union of all new values with the original (or
// maybe just one new value marked as "subsuming" the
// original, if present.)
let mut union_value = orig_value;
while let Some(optimized_value) = optimized_values.next(&mut isle_ctx) {
trace!(
"Returned from ISLE for {}, got {:?}",
orig_value,
optimized_value
);
if optimized_value == orig_value {
trace!(" -> same as orig value; skipping");
continue;
}
if isle_ctx.ctx.subsume_values.contains(&optimized_value) {
// Merge in the unionfind so canonicalization
// still works, but take *only* the subsuming
// value, and break now.
isle_ctx.ctx.eclasses.union(optimized_value, union_value);
union_value = optimized_value;
break;
}
let old_union_value = union_value;
union_value = isle_ctx
.ctx
.func
.dfg
.union(old_union_value, optimized_value);
isle_ctx.ctx.stats.union += 1;
trace!(" -> union: now {}", union_value);
isle_ctx.ctx.eclasses.add(union_value);
isle_ctx
.ctx
.eclasses
.union(old_union_value, optimized_value);
isle_ctx.ctx.eclasses.union(old_union_value, union_value);
}
isle_ctx.ctx.rewrite_depth -= 1;
union_value
}
/// Optimize a "skeleton" instruction, possibly removing
/// it. Returns `true` if the instruction should be removed from
/// the layout.
fn optimize_skeleton_inst(&mut self, inst: Inst) -> bool {
self.stats.skeleton_inst += 1;
// Not pure, but may still be a load or store:
// process it to see if we can optimize it.
if let Some(new_result) =
self.alias_analysis
.process_inst(self.func, self.alias_analysis_state, inst)
{
self.stats.alias_analysis_removed += 1;
let result = self.func.dfg.first_result(inst);
self.value_to_opt_value[result] = new_result;
true
} else {
// Set all results to identity-map to themselves
// in the value-to-opt-value map.
for &result in self.func.dfg.inst_results(inst) {
self.value_to_opt_value[result] = result;
self.eclasses.add(result);
}
false
}
}
}
impl<'a> EgraphPass<'a> {
/// Create a new EgraphPass.
pub fn new(
func: &'a mut Function,
domtree: &'a DominatorTree,
loop_analysis: &'a LoopAnalysis,
alias_analysis: &'a mut AliasAnalysis<'a>,
) -> Self {
let num_values = func.dfg.num_values();
let domtree_children = DomTreeWithChildren::new(func, domtree);
Self {
func,
domtree,
domtree_children,
loop_analysis,
alias_analysis,
stats: Stats::default(),
eclasses: UnionFind::with_capacity(num_values),
remat_values: FxHashSet::default(),
}
}
/// Run the process.
pub fn run(&mut self) {
self.remove_pure_and_optimize();
trace!("egraph built:\n{}\n", self.func.display());
if cfg!(feature = "trace-log") {
for (value, def) in self.func.dfg.values_and_defs() {
trace!(" -> {} = {:?}", value, def);
match def {
ValueDef::Result(i, 0) => {
trace!(" -> {} = {:?}", i, self.func.dfg.insts[i]);
}
_ => {}
}
}
}
trace!("stats: {:?}", self.stats);
self.elaborate();
}
/// Remove pure nodes from the `Layout` of the function, ensuring
/// that only the "side-effect skeleton" remains, and also
/// optimize the pure nodes. This is the first step of
/// egraph-based processing and turns the pure CFG-based CLIF into
/// a CFG skeleton with a sea of (optimized) nodes tying it
/// together.
///
/// As we walk through the code, we eagerly apply optimization
/// rules; at any given point we have a "latest version" of an
/// eclass of possible representations for a `Value` in the
/// original program, which is itself a `Value` at the root of a
/// union-tree. We keep a map from the original values to these
/// optimized values. When we encounter any instruction (pure or
/// side-effecting skeleton) we rewrite its arguments to capture
/// the "latest" optimized forms of these values. (We need to do
/// this as part of this pass, and not later using a finished map,
/// because the eclass can continue to be updated and we need to
/// only refer to its subset that exists at this stage, to
/// maintain acyclicity.)
fn remove_pure_and_optimize(&mut self) {
let mut cursor = FuncCursor::new(self.func);
let mut value_to_opt_value: SecondaryMap<Value, Value> =
SecondaryMap::with_default(Value::reserved_value());
let mut gvn_map: CtxHashMap<(Type, InstructionData), Value> =
CtxHashMap::with_capacity(cursor.func.dfg.num_values());
// In domtree preorder, visit blocks. (TODO: factor out an
// iterator from this and elaborator.)
let root = self.domtree_children.root();
let mut block_stack = vec![root];
while let Some(block) = block_stack.pop() {
// We popped this block; push children
// immediately, then process this block.
block_stack.extend(self.domtree_children.children(block));
trace!("Processing block {}", block);
cursor.set_position(CursorPosition::Before(block));
let mut alias_analysis_state = self.alias_analysis.block_starting_state(block);
for &param in cursor.func.dfg.block_params(block) {
trace!("creating initial singleton eclass for blockparam {}", param);
self.eclasses.add(param);
value_to_opt_value[param] = param;
}
while let Some(inst) = cursor.next_inst() {
trace!("Processing inst {}", inst);
// While we're passing over all insts, create initial
// singleton eclasses for all result and blockparam
// values. Also do initial analysis of all inst
// results.
for &result in cursor.func.dfg.inst_results(inst) {
trace!("creating initial singleton eclass for {}", result);
self.eclasses.add(result);
}
// Rewrite args of *all* instructions using the
// value-to-opt-value map.
cursor.func.dfg.resolve_aliases_in_arguments(inst);
for arg in cursor.func.dfg.inst_args_mut(inst) {
let new_value = value_to_opt_value[*arg];
trace!("rewriting arg {} of inst {} to {}", arg, inst, new_value);
debug_assert_ne!(new_value, Value::reserved_value());
*arg = new_value;
}
// Build a context for optimization, with borrows of
// state. We can't invoke a method on `self` because
// we've borrowed `self.func` mutably (as
// `cursor.func`) so we pull apart the pieces instead
// here.
let mut ctx = OptimizeCtx {
func: cursor.func,
value_to_opt_value: &mut value_to_opt_value,
gvn_map: &mut gvn_map,
eclasses: &mut self.eclasses,
rewrite_depth: 0,
subsume_values: FxHashSet::default(),
remat_values: &mut self.remat_values,
stats: &mut self.stats,
alias_analysis: self.alias_analysis,
alias_analysis_state: &mut alias_analysis_state,
};
if is_pure_for_egraph(ctx.func, inst) {
// Insert into GVN map and optimize any new nodes
// inserted (recursively performing this work for
// any nodes the optimization rules produce).
let inst = NewOrExistingInst::Existing(inst);
ctx.insert_pure_enode(inst);
// We've now rewritten all uses, or will when we
// see them, and the instruction exists as a pure
// enode in the eclass, so we can remove it.
cursor.remove_inst_and_step_back();
} else {
if ctx.optimize_skeleton_inst(inst) {
cursor.remove_inst_and_step_back();
}
}
}
}
}
/// Scoped elaboration: compute a final ordering of op computation
/// for each block and update the given Func body. After this
/// runs, the function body is back into the state where every
/// Inst with an used result is placed in the layout (possibly
/// duplicated, if our code-motion logic decides this is the best
/// option).
///
/// This works in concert with the domtree. We do a preorder
/// traversal of the domtree, tracking a scoped map from Id to
/// (new) Value. The map's scopes correspond to levels in the
/// domtree.
///
/// At each block, we iterate forward over the side-effecting
/// eclasses, and recursively generate their arg eclasses, then
/// emit the ops themselves.
///
/// To use an eclass in a given block, we first look it up in the
/// scoped map, and get the Value if already present. If not, we
/// need to generate it. We emit the extracted enode for this
/// eclass after recursively generating its args. Eclasses are
/// thus computed "as late as possible", but then memoized into
/// the Id-to-Value map and available to all dominated blocks and
/// for the rest of this block. (This subsumes GVN.)
fn elaborate(&mut self) {
let mut elaborator = Elaborator::new(
self.func,
self.domtree,
&self.domtree_children,
self.loop_analysis,
&mut self.remat_values,
&mut self.eclasses,
&mut self.stats,
);
elaborator.elaborate();
self.check_post_egraph();
}
#[cfg(debug_assertions)]
fn check_post_egraph(&self) {
// Verify that no union nodes are reachable from inst args,
// and that all inst args' defining instructions are in the
// layout.
for block in self.func.layout.blocks() {
for inst in self.func.layout.block_insts(block) {
for &arg in self.func.dfg.inst_args(inst) {
match self.func.dfg.value_def(arg) {
ValueDef::Result(i, _) => {
debug_assert!(self.func.layout.inst_block(i).is_some());
}
ValueDef::Union(..) => {
panic!("egraph union node {} still reachable at {}!", arg, inst);
}
_ => {}
}
}
}
}
}
#[cfg(not(debug_assertions))]
fn check_post_egraph(&self) {}
}
/// Implementation of external-context equality and hashing on
/// InstructionData. This allows us to deduplicate instructions given
/// some context that lets us see its value lists and the mapping from
/// any value to "canonical value" (in an eclass).
struct GVNContext<'a> {
value_lists: &'a ValueListPool,
union_find: &'a UnionFind<Value>,
}
impl<'a> CtxEq<(Type, InstructionData), (Type, InstructionData)> for GVNContext<'a> {
fn ctx_eq(
&self,
(a_ty, a_inst): &(Type, InstructionData),
(b_ty, b_inst): &(Type, InstructionData),
) -> bool {
a_ty == b_ty
&& a_inst.eq(b_inst, self.value_lists, |value| {
self.union_find.find(value)
})
}
}
impl<'a> CtxHash<(Type, InstructionData)> for GVNContext<'a> {
fn ctx_hash<H: Hasher>(&self, state: &mut H, (ty, inst): &(Type, InstructionData)) {
std::hash::Hash::hash(&ty, state);
inst.hash(state, self.value_lists, |value| self.union_find.find(value));
}
}
/// Statistics collected during egraph-based processing.
#[derive(Clone, Debug, Default)]
pub(crate) struct Stats {
pub(crate) pure_inst: u64,
pub(crate) pure_inst_deduped: u64,
pub(crate) skeleton_inst: u64,
pub(crate) alias_analysis_removed: u64,
pub(crate) new_inst: u64,
pub(crate) union: u64,
pub(crate) subsume: u64,
pub(crate) remat: u64,
pub(crate) rewrite_rule_invoked: u64,
pub(crate) rewrite_depth_limit: u64,
pub(crate) elaborate_visit_node: u64,
pub(crate) elaborate_memoize_hit: u64,
pub(crate) elaborate_memoize_miss: u64,
pub(crate) elaborate_memoize_miss_remat: u64,
pub(crate) elaborate_licm_hoist: u64,
pub(crate) elaborate_func: u64,
pub(crate) elaborate_func_pre_insts: u64,
pub(crate) elaborate_func_post_insts: u64,
}