* Optimizations to egraph framework: - Save elaborated results by canonical value, not latest value (union value). Previously we were artificially skipping and re-elaborating some values we already had because we were not finding them in the map. - Make some changes to handling of icmp results: when icmp became I8-typed (when bools went away), many uses became `(uextend $I32 (icmp $I8 ...))`, and so patterns in lowering backends were no longer matching. This PR includes an x64-specific change to match `(brz (uextend (icmp ...)))` and similarly for `brnz`, but it also takes advantage of the ability to write rules easily in the egraph mid-end to rewrite selects with icmp inputs appropriately. - Extend constprop to understand selects in the egraph mid-end. With these changes, bz2.wasm sees a ~1% speedup, and spidermonkey.wasm with a fib.js input sees a 16.8% speedup: ``` $ time taskset 1 target/release/wasmtime run --allow-precompiled --dir=. ./spidermonkey.base.cwasm ./fib.js 1346269 taskset 1 target/release/wasmtime run --allow-precompiled --dir=. ./fib.js 2.14s user 0.01s system 99% cpu 2.148 total $ time taskset 1 target/release/wasmtime run --allow-precompiled --dir=. ./spidermonkey.egraphs.cwasm ./fib.js 1346269 taskset 1 target/release/wasmtime run --allow-precompiled --dir=. ./fib.js 1.78s user 0.01s system 99% cpu 1.788 total ``` * Review feedback.
687 lines
30 KiB
Rust
687 lines
30 KiB
Rust
//! Elaboration phase: lowers EGraph back to sequences of operations
|
|
//! in CFG nodes.
|
|
|
|
use super::cost::{pure_op_cost, Cost};
|
|
use super::domtree::DomTreeWithChildren;
|
|
use super::Stats;
|
|
use crate::dominator_tree::DominatorTree;
|
|
use crate::fx::FxHashSet;
|
|
use crate::ir::ValueDef;
|
|
use crate::ir::{Block, Function, Inst, Value};
|
|
use crate::loop_analysis::{Loop, LoopAnalysis, LoopLevel};
|
|
use crate::scoped_hash_map::ScopedHashMap;
|
|
use crate::trace;
|
|
use crate::unionfind::UnionFind;
|
|
use alloc::vec::Vec;
|
|
use cranelift_entity::{packed_option::ReservedValue, SecondaryMap};
|
|
use smallvec::{smallvec, SmallVec};
|
|
use std::ops::Add;
|
|
|
|
pub(crate) struct Elaborator<'a> {
|
|
func: &'a mut Function,
|
|
domtree: &'a DominatorTree,
|
|
domtree_children: &'a DomTreeWithChildren,
|
|
loop_analysis: &'a LoopAnalysis,
|
|
eclasses: &'a mut UnionFind<Value>,
|
|
/// Map from Value that is produced by a pure Inst (and was thus
|
|
/// not in the side-effecting skeleton) to the value produced by
|
|
/// an elaborated inst (placed in the layout) to whose results we
|
|
/// refer in the final code.
|
|
///
|
|
/// The first time we use some result of an instruction during
|
|
/// elaboration, we can place it and insert an identity map (inst
|
|
/// results to that same inst's results) in this scoped
|
|
/// map. Within that block and its dom-tree children, that mapping
|
|
/// is visible and we can continue to use it. This allows us to
|
|
/// avoid cloning the instruction. However, if we pop that scope
|
|
/// and use it somewhere else as well, we will need to
|
|
/// duplicate. We detect this case by checking, when a value that
|
|
/// we want is not present in this map, whether the producing inst
|
|
/// is already placed in the Layout. If so, we duplicate, and
|
|
/// insert non-identity mappings from the original inst's results
|
|
/// to the cloned inst's results.
|
|
value_to_elaborated_value: ScopedHashMap<Value, ElaboratedValue>,
|
|
/// Map from Value to the best (lowest-cost) Value in its eclass
|
|
/// (tree of union value-nodes).
|
|
value_to_best_value: SecondaryMap<Value, (Cost, Value)>,
|
|
/// Stack of blocks and loops in current elaboration path.
|
|
loop_stack: SmallVec<[LoopStackEntry; 8]>,
|
|
/// The current block into which we are elaborating.
|
|
cur_block: Block,
|
|
/// Values that opt rules have indicated should be rematerialized
|
|
/// in every block they are used (e.g., immediates or other
|
|
/// "cheap-to-compute" ops).
|
|
remat_values: &'a FxHashSet<Value>,
|
|
/// Explicitly-unrolled value elaboration stack.
|
|
elab_stack: Vec<ElabStackEntry>,
|
|
/// Results from the elab stack.
|
|
elab_result_stack: Vec<ElaboratedValue>,
|
|
/// Explicitly-unrolled block elaboration stack.
|
|
block_stack: Vec<BlockStackEntry>,
|
|
/// Stats for various events during egraph processing, to help
|
|
/// with optimization of this infrastructure.
|
|
stats: &'a mut Stats,
|
|
}
|
|
|
|
#[derive(Clone, Copy, Debug)]
|
|
struct ElaboratedValue {
|
|
in_block: Block,
|
|
value: Value,
|
|
}
|
|
|
|
#[derive(Clone, Debug)]
|
|
struct LoopStackEntry {
|
|
/// The loop identifier.
|
|
lp: Loop,
|
|
/// The hoist point: a block that immediately dominates this
|
|
/// loop. May not be an immediate predecessor, but will be a valid
|
|
/// point to place all loop-invariant ops: they must depend only
|
|
/// on inputs that dominate the loop, so are available at (the end
|
|
/// of) this block.
|
|
hoist_block: Block,
|
|
/// The depth in the scope map.
|
|
scope_depth: u32,
|
|
}
|
|
|
|
#[derive(Clone, Debug)]
|
|
enum ElabStackEntry {
|
|
/// Next action is to resolve this value into an elaborated inst
|
|
/// (placed into the layout) that produces the value, and
|
|
/// recursively elaborate the insts that produce its args.
|
|
///
|
|
/// Any inserted ops should be inserted before `before`, which is
|
|
/// the instruction demanding this value.
|
|
Start { value: Value, before: Inst },
|
|
/// Args have been pushed; waiting for results.
|
|
PendingInst {
|
|
inst: Inst,
|
|
result_idx: usize,
|
|
num_args: usize,
|
|
remat: bool,
|
|
before: Inst,
|
|
},
|
|
}
|
|
|
|
#[derive(Clone, Debug)]
|
|
enum BlockStackEntry {
|
|
Elaborate { block: Block, idom: Option<Block> },
|
|
Pop,
|
|
}
|
|
|
|
impl<'a> Elaborator<'a> {
|
|
pub(crate) fn new(
|
|
func: &'a mut Function,
|
|
domtree: &'a DominatorTree,
|
|
domtree_children: &'a DomTreeWithChildren,
|
|
loop_analysis: &'a LoopAnalysis,
|
|
remat_values: &'a FxHashSet<Value>,
|
|
eclasses: &'a mut UnionFind<Value>,
|
|
stats: &'a mut Stats,
|
|
) -> Self {
|
|
let num_values = func.dfg.num_values();
|
|
let mut value_to_best_value =
|
|
SecondaryMap::with_default((Cost::infinity(), Value::reserved_value()));
|
|
value_to_best_value.resize(num_values);
|
|
Self {
|
|
func,
|
|
domtree,
|
|
domtree_children,
|
|
loop_analysis,
|
|
eclasses,
|
|
value_to_elaborated_value: ScopedHashMap::with_capacity(num_values),
|
|
value_to_best_value,
|
|
loop_stack: smallvec![],
|
|
cur_block: Block::reserved_value(),
|
|
remat_values,
|
|
elab_stack: vec![],
|
|
elab_result_stack: vec![],
|
|
block_stack: vec![],
|
|
stats,
|
|
}
|
|
}
|
|
|
|
fn start_block(&mut self, idom: Option<Block>, block: Block) {
|
|
trace!(
|
|
"start_block: block {:?} with idom {:?} at loop depth {:?} scope depth {}",
|
|
block,
|
|
idom,
|
|
self.loop_stack.len(),
|
|
self.value_to_elaborated_value.depth()
|
|
);
|
|
|
|
// Pop any loop levels we're no longer in.
|
|
while let Some(inner_loop) = self.loop_stack.last() {
|
|
if self.loop_analysis.is_in_loop(block, inner_loop.lp) {
|
|
break;
|
|
}
|
|
self.loop_stack.pop();
|
|
}
|
|
|
|
// Note that if the *entry* block is a loop header, we will
|
|
// not make note of the loop here because it will not have an
|
|
// immediate dominator. We must disallow this case because we
|
|
// will skip adding the `LoopStackEntry` here but our
|
|
// `LoopAnalysis` will otherwise still make note of this loop
|
|
// and loop depths will not match.
|
|
if let Some(idom) = idom {
|
|
if let Some(lp) = self.loop_analysis.is_loop_header(block) {
|
|
self.loop_stack.push(LoopStackEntry {
|
|
lp,
|
|
// Any code hoisted out of this loop will have code
|
|
// placed in `idom`, and will have def mappings
|
|
// inserted in to the scoped hashmap at that block's
|
|
// level.
|
|
hoist_block: idom,
|
|
scope_depth: (self.value_to_elaborated_value.depth() - 1) as u32,
|
|
});
|
|
trace!(
|
|
" -> loop header, pushing; depth now {}",
|
|
self.loop_stack.len()
|
|
);
|
|
}
|
|
} else {
|
|
debug_assert!(
|
|
self.loop_analysis.is_loop_header(block).is_none(),
|
|
"Entry block (domtree root) cannot be a loop header!"
|
|
);
|
|
}
|
|
|
|
trace!("block {}: loop stack is {:?}", block, self.loop_stack);
|
|
|
|
self.cur_block = block;
|
|
}
|
|
|
|
fn compute_best_values(&mut self) {
|
|
let best = &mut self.value_to_best_value;
|
|
for (value, def) in self.func.dfg.values_and_defs() {
|
|
trace!("computing best for value {:?} def {:?}", value, def);
|
|
match def {
|
|
ValueDef::Union(x, y) => {
|
|
// Pick the best of the two options based on
|
|
// min-cost. This works because each element of `best`
|
|
// is a `(cost, value)` tuple; `cost` comes first so
|
|
// the natural comparison works based on cost, and
|
|
// breaks ties based on value number.
|
|
trace!(" -> best of {:?} and {:?}", best[x], best[y]);
|
|
best[value] = std::cmp::min(best[x], best[y]);
|
|
trace!(" -> {:?}", best[value]);
|
|
}
|
|
ValueDef::Param(_, _) => {
|
|
best[value] = (Cost::zero(), value);
|
|
}
|
|
// If the Inst is inserted into the layout (which is,
|
|
// at this point, only the side-effecting skeleton),
|
|
// then it must be computed and thus we give it zero
|
|
// cost.
|
|
ValueDef::Result(inst, _) if self.func.layout.inst_block(inst).is_some() => {
|
|
best[value] = (Cost::zero(), value);
|
|
}
|
|
ValueDef::Result(inst, _) => {
|
|
trace!(" -> value {}: result, computing cost", value);
|
|
let inst_data = &self.func.dfg[inst];
|
|
let loop_level = self
|
|
.func
|
|
.layout
|
|
.inst_block(inst)
|
|
.map(|block| self.loop_analysis.loop_level(block))
|
|
.unwrap_or(LoopLevel::root());
|
|
// N.B.: at this point we know that the opcode is
|
|
// pure, so `pure_op_cost`'s precondition is
|
|
// satisfied.
|
|
let cost = pure_op_cost(inst_data.opcode()).at_level(loop_level.level())
|
|
+ self
|
|
.func
|
|
.dfg
|
|
.inst_args(inst)
|
|
.iter()
|
|
.map(|value| best[*value].0)
|
|
// Can't use `.sum()` for `Cost` types; do
|
|
// an explicit reduce instead.
|
|
.fold(Cost::zero(), Cost::add);
|
|
best[value] = (cost, value);
|
|
}
|
|
};
|
|
debug_assert_ne!(best[value].0, Cost::infinity());
|
|
debug_assert_ne!(best[value].1, Value::reserved_value());
|
|
trace!("best for eclass {:?}: {:?}", value, best[value]);
|
|
}
|
|
}
|
|
|
|
/// Elaborate use of an eclass, inserting any needed new
|
|
/// instructions before the given inst `before`. Should only be
|
|
/// given values corresponding to results of instructions or
|
|
/// blockparams.
|
|
fn elaborate_eclass_use(&mut self, value: Value, before: Inst) -> ElaboratedValue {
|
|
debug_assert_ne!(value, Value::reserved_value());
|
|
|
|
// Kick off the process by requesting this result
|
|
// value.
|
|
self.elab_stack
|
|
.push(ElabStackEntry::Start { value, before });
|
|
|
|
// Now run the explicit-stack recursion until we reach
|
|
// the root.
|
|
self.process_elab_stack();
|
|
debug_assert_eq!(self.elab_result_stack.len(), 1);
|
|
self.elab_result_stack.pop().unwrap()
|
|
}
|
|
|
|
fn process_elab_stack(&mut self) {
|
|
while let Some(entry) = self.elab_stack.last() {
|
|
match entry {
|
|
&ElabStackEntry::Start { value, before } => {
|
|
// We always replace the Start entry, so pop it now.
|
|
self.elab_stack.pop();
|
|
|
|
debug_assert_ne!(value, Value::reserved_value());
|
|
let value = self.func.dfg.resolve_aliases(value);
|
|
|
|
self.stats.elaborate_visit_node += 1;
|
|
let canonical_value = self.eclasses.find_and_update(value);
|
|
debug_assert_ne!(canonical_value, Value::reserved_value());
|
|
trace!(
|
|
"elaborate: value {} canonical {} before {}",
|
|
value,
|
|
canonical_value,
|
|
before
|
|
);
|
|
|
|
let remat = if let Some(elab_val) =
|
|
self.value_to_elaborated_value.get(&canonical_value)
|
|
{
|
|
// Value is available. Look at the defined
|
|
// block, and determine whether this node kind
|
|
// allows rematerialization if the value comes
|
|
// from another block. If so, ignore the hit
|
|
// and recompute below.
|
|
let remat = elab_val.in_block != self.cur_block
|
|
&& self.remat_values.contains(&canonical_value);
|
|
if !remat {
|
|
trace!("elaborate: value {} -> {:?}", value, elab_val);
|
|
self.stats.elaborate_memoize_hit += 1;
|
|
self.elab_result_stack.push(*elab_val);
|
|
continue;
|
|
}
|
|
trace!("elaborate: value {} -> remat", canonical_value);
|
|
self.stats.elaborate_memoize_miss_remat += 1;
|
|
// The op is pure at this point, so it is always valid to
|
|
// remove from this map.
|
|
self.value_to_elaborated_value.remove(&canonical_value);
|
|
true
|
|
} else {
|
|
// Value not available; but still look up
|
|
// whether it's been flagged for remat because
|
|
// this affects placement.
|
|
let remat = self.remat_values.contains(&canonical_value);
|
|
trace!(" -> not present in map; remat = {}", remat);
|
|
remat
|
|
};
|
|
self.stats.elaborate_memoize_miss += 1;
|
|
|
|
// Get the best option; we use `value` (latest
|
|
// value) here so we have a full view of the
|
|
// eclass.
|
|
trace!("looking up best value for {}", value);
|
|
let (_, best_value) = self.value_to_best_value[value];
|
|
debug_assert_ne!(best_value, Value::reserved_value());
|
|
trace!("elaborate: value {} -> best {}", value, best_value,);
|
|
|
|
// Now resolve the value to its definition to see
|
|
// how we can compute it.
|
|
let (inst, result_idx) = match self.func.dfg.value_def(best_value) {
|
|
ValueDef::Result(inst, result_idx) => {
|
|
trace!(
|
|
" -> value {} is result {} of {}",
|
|
best_value,
|
|
result_idx,
|
|
inst
|
|
);
|
|
(inst, result_idx)
|
|
}
|
|
ValueDef::Param(_, _) => {
|
|
// We don't need to do anything to compute
|
|
// this value; just push its result on the
|
|
// result stack (blockparams are already
|
|
// available).
|
|
trace!(" -> value {} is a blockparam", best_value);
|
|
self.elab_result_stack.push(ElaboratedValue {
|
|
in_block: self.cur_block,
|
|
value: best_value,
|
|
});
|
|
continue;
|
|
}
|
|
ValueDef::Union(_, _) => {
|
|
panic!("Should never have a Union value as the best value");
|
|
}
|
|
};
|
|
|
|
trace!(
|
|
" -> result {} of inst {:?}",
|
|
result_idx,
|
|
self.func.dfg[inst]
|
|
);
|
|
|
|
// We're going to need to use this instruction
|
|
// result, placing the instruction into the
|
|
// layout. First, enqueue all args to be
|
|
// elaborated. Push state to receive the results
|
|
// and later elab this inst.
|
|
let args = self.func.dfg.inst_args(inst);
|
|
let num_args = args.len();
|
|
self.elab_stack.push(ElabStackEntry::PendingInst {
|
|
inst,
|
|
result_idx,
|
|
num_args,
|
|
remat,
|
|
before,
|
|
});
|
|
// Push args in reverse order so we process the
|
|
// first arg first.
|
|
for &arg in args.iter().rev() {
|
|
debug_assert_ne!(arg, Value::reserved_value());
|
|
self.elab_stack
|
|
.push(ElabStackEntry::Start { value: arg, before });
|
|
}
|
|
}
|
|
|
|
&ElabStackEntry::PendingInst {
|
|
inst,
|
|
result_idx,
|
|
num_args,
|
|
remat,
|
|
before,
|
|
} => {
|
|
self.elab_stack.pop();
|
|
|
|
trace!(
|
|
"PendingInst: {} result {} args {} remat {} before {}",
|
|
inst,
|
|
result_idx,
|
|
num_args,
|
|
remat,
|
|
before
|
|
);
|
|
|
|
// We should have all args resolved at this
|
|
// point. Grab them and drain them out, removing
|
|
// them.
|
|
let arg_idx = self.elab_result_stack.len() - num_args;
|
|
let arg_values = &self.elab_result_stack[arg_idx..];
|
|
|
|
// Compute max loop depth.
|
|
let loop_hoist_level = arg_values
|
|
.iter()
|
|
.map(|&value| {
|
|
// Find the outermost loop level at which
|
|
// the value's defining block *is not* a
|
|
// member. This is the loop-nest level
|
|
// whose hoist-block we hoist to.
|
|
let hoist_level = self
|
|
.loop_stack
|
|
.iter()
|
|
.position(|loop_entry| {
|
|
!self.loop_analysis.is_in_loop(value.in_block, loop_entry.lp)
|
|
})
|
|
.unwrap_or(self.loop_stack.len());
|
|
trace!(
|
|
" -> arg: elab_value {:?} hoist level {:?}",
|
|
value,
|
|
hoist_level
|
|
);
|
|
hoist_level
|
|
})
|
|
.max()
|
|
.unwrap_or(self.loop_stack.len());
|
|
trace!(
|
|
" -> loop hoist level: {:?}; cur loop depth: {:?}, loop_stack: {:?}",
|
|
loop_hoist_level,
|
|
self.loop_stack.len(),
|
|
self.loop_stack,
|
|
);
|
|
|
|
// We know that this is a pure inst, because
|
|
// non-pure roots have already been placed in the
|
|
// value-to-elab'd-value map and are never subject
|
|
// to remat, so they will not reach this stage of
|
|
// processing.
|
|
//
|
|
// We now must determine the location at which we
|
|
// place the instruction. This is the current
|
|
// block *unless* we hoist above a loop when all
|
|
// args are loop-invariant (and this op is pure).
|
|
let (scope_depth, before, insert_block) =
|
|
if loop_hoist_level == self.loop_stack.len() || remat {
|
|
// Depends on some value at the current
|
|
// loop depth, or remat forces it here:
|
|
// place it at the current location.
|
|
(
|
|
self.value_to_elaborated_value.depth(),
|
|
before,
|
|
self.func.layout.inst_block(before).unwrap(),
|
|
)
|
|
} else {
|
|
// Does not depend on any args at current
|
|
// loop depth: hoist out of loop.
|
|
self.stats.elaborate_licm_hoist += 1;
|
|
let data = &self.loop_stack[loop_hoist_level];
|
|
// `data.hoist_block` should dominate `before`'s block.
|
|
let before_block = self.func.layout.inst_block(before).unwrap();
|
|
debug_assert!(self.domtree.dominates(
|
|
data.hoist_block,
|
|
before_block,
|
|
&self.func.layout
|
|
));
|
|
// Determine the instruction at which we
|
|
// insert in `data.hoist_block`.
|
|
let before = self
|
|
.func
|
|
.layout
|
|
.canonical_branch_inst(&self.func.dfg, data.hoist_block)
|
|
.unwrap();
|
|
(data.scope_depth as usize, before, data.hoist_block)
|
|
};
|
|
|
|
trace!(
|
|
" -> decided to place: before {} insert_block {}",
|
|
before,
|
|
insert_block
|
|
);
|
|
|
|
// Now we need to place `inst` at the computed
|
|
// location (just before `before`). Note that
|
|
// `inst` may already have been placed somewhere
|
|
// else, because a pure node may be elaborated at
|
|
// more than one place. In this case, we need to
|
|
// duplicate the instruction (and return the
|
|
// `Value`s for that duplicated instance
|
|
// instead).
|
|
trace!("need inst {} before {}", inst, before);
|
|
let inst = if self.func.layout.inst_block(inst).is_some() {
|
|
// Clone the inst!
|
|
let new_inst = self.func.dfg.clone_inst(inst);
|
|
trace!(
|
|
" -> inst {} already has a location; cloned to {}",
|
|
inst,
|
|
new_inst
|
|
);
|
|
// Create mappings in the
|
|
// value-to-elab'd-value map from original
|
|
// results to cloned results.
|
|
for (&result, &new_result) in self
|
|
.func
|
|
.dfg
|
|
.inst_results(inst)
|
|
.iter()
|
|
.zip(self.func.dfg.inst_results(new_inst).iter())
|
|
{
|
|
let elab_value = ElaboratedValue {
|
|
value: new_result,
|
|
in_block: insert_block,
|
|
};
|
|
let canonical_result = self.eclasses.find_and_update(result);
|
|
self.value_to_elaborated_value.insert_if_absent_with_depth(
|
|
canonical_result,
|
|
elab_value,
|
|
scope_depth,
|
|
);
|
|
|
|
self.eclasses.add(new_result);
|
|
self.eclasses.union(result, new_result);
|
|
self.value_to_best_value[new_result] = self.value_to_best_value[result];
|
|
|
|
trace!(
|
|
" -> cloned inst has new result {} for orig {}",
|
|
new_result,
|
|
result
|
|
);
|
|
}
|
|
new_inst
|
|
} else {
|
|
trace!(" -> no location; using original inst");
|
|
// Create identity mappings from result values
|
|
// to themselves in this scope, since we're
|
|
// using the original inst.
|
|
for &result in self.func.dfg.inst_results(inst) {
|
|
let elab_value = ElaboratedValue {
|
|
value: result,
|
|
in_block: insert_block,
|
|
};
|
|
let canonical_result = self.eclasses.find_and_update(result);
|
|
self.value_to_elaborated_value.insert_if_absent_with_depth(
|
|
canonical_result,
|
|
elab_value,
|
|
scope_depth,
|
|
);
|
|
trace!(" -> inserting identity mapping for {}", result);
|
|
}
|
|
inst
|
|
};
|
|
// Place the inst just before `before`.
|
|
self.func.layout.insert_inst(inst, before);
|
|
|
|
// Update the inst's arguments.
|
|
let args_dest = self.func.dfg.inst_args_mut(inst);
|
|
for (dest, val) in args_dest.iter_mut().zip(arg_values.iter()) {
|
|
*dest = val.value;
|
|
}
|
|
|
|
// Now that we've consumed the arg values, pop
|
|
// them off the stack.
|
|
self.elab_result_stack.truncate(arg_idx);
|
|
|
|
// Push the requested result index of the
|
|
// instruction onto the elab-results stack.
|
|
self.elab_result_stack.push(ElaboratedValue {
|
|
in_block: insert_block,
|
|
value: self.func.dfg.inst_results(inst)[result_idx],
|
|
});
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
fn elaborate_block(&mut self, idom: Option<Block>, block: Block) {
|
|
trace!("elaborate_block: block {}", block);
|
|
self.start_block(idom, block);
|
|
|
|
// Iterate over the side-effecting skeleton using the linked
|
|
// list in Layout. We will insert instructions that are
|
|
// elaborated *before* `inst`, so we can always use its
|
|
// next-link to continue the iteration.
|
|
let mut next_inst = self.func.layout.first_inst(block);
|
|
let mut first_branch = None;
|
|
while let Some(inst) = next_inst {
|
|
trace!(
|
|
"elaborating inst {} with results {:?}",
|
|
inst,
|
|
self.func.dfg.inst_results(inst)
|
|
);
|
|
// Record the first branch we see in the block; all
|
|
// elaboration for args of *any* branch must be inserted
|
|
// before the *first* branch, because the branch group
|
|
// must remain contiguous at the end of the block.
|
|
if self.func.dfg[inst].opcode().is_branch() && first_branch == None {
|
|
first_branch = Some(inst);
|
|
}
|
|
|
|
// Determine where elaboration inserts insts.
|
|
let before = first_branch.unwrap_or(inst);
|
|
trace!(" -> inserting before {}", before);
|
|
|
|
// For each arg of the inst, elaborate its value.
|
|
for i in 0..self.func.dfg.inst_args(inst).len() {
|
|
// Don't borrow across the below.
|
|
let arg = self.func.dfg.inst_args(inst)[i];
|
|
trace!(" -> arg {}", arg);
|
|
// Elaborate the arg, placing any newly-inserted insts
|
|
// before `before`. Get the updated value, which may
|
|
// be different than the original.
|
|
let arg = self.elaborate_eclass_use(arg, before);
|
|
trace!(" -> rewrote arg to {:?}", arg);
|
|
self.func.dfg.inst_args_mut(inst)[i] = arg.value;
|
|
}
|
|
|
|
// We need to put the results of this instruction in the
|
|
// map now.
|
|
for &result in self.func.dfg.inst_results(inst) {
|
|
trace!(" -> result {}", result);
|
|
let canonical_result = self.eclasses.find_and_update(result);
|
|
self.value_to_elaborated_value.insert_if_absent(
|
|
canonical_result,
|
|
ElaboratedValue {
|
|
in_block: block,
|
|
value: result,
|
|
},
|
|
);
|
|
}
|
|
|
|
next_inst = self.func.layout.next_inst(inst);
|
|
}
|
|
}
|
|
|
|
fn elaborate_domtree(&mut self, domtree: &DomTreeWithChildren) {
|
|
let root = domtree.root();
|
|
self.block_stack.push(BlockStackEntry::Elaborate {
|
|
block: root,
|
|
idom: None,
|
|
});
|
|
while let Some(top) = self.block_stack.pop() {
|
|
match top {
|
|
BlockStackEntry::Elaborate { block, idom } => {
|
|
self.block_stack.push(BlockStackEntry::Pop);
|
|
self.value_to_elaborated_value.increment_depth();
|
|
|
|
self.elaborate_block(idom, block);
|
|
|
|
// Push children. We are doing a preorder
|
|
// traversal so we do this after processing this
|
|
// block above.
|
|
let block_stack_end = self.block_stack.len();
|
|
for child in domtree.children(block) {
|
|
self.block_stack.push(BlockStackEntry::Elaborate {
|
|
block: child,
|
|
idom: Some(block),
|
|
});
|
|
}
|
|
// Reverse what we just pushed so we elaborate in
|
|
// original block order. (The domtree iter is a
|
|
// single-ended iter over a singly-linked list so
|
|
// we can't `.rev()` above.)
|
|
self.block_stack[block_stack_end..].reverse();
|
|
}
|
|
BlockStackEntry::Pop => {
|
|
self.value_to_elaborated_value.decrement_depth();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
pub(crate) fn elaborate(&mut self) {
|
|
self.stats.elaborate_func += 1;
|
|
self.stats.elaborate_func_pre_insts += self.func.dfg.num_insts() as u64;
|
|
self.compute_best_values();
|
|
self.elaborate_domtree(&self.domtree_children);
|
|
self.stats.elaborate_func_post_insts += self.func.dfg.num_insts() as u64;
|
|
}
|
|
}
|