egraphs: a few miscellaneous compile-time optimizations. (#5072)
* egraphs: a few miscellaneous compile-time optimizations. These optimizations together are worth about a 2% compile-time reduction, as measured on one core with spidermonkey.wasm as an input, using `hyperfine` on `wasmtime compile`. The changes included are: - Some better pre-allocation (blockparams and side-effects concatenated list vecs); - Avoiding the indirection of storing list-of-types for every Pure and Inst node, when almost all nodes produce only a single result; instead, store arity and single type if it exists, and allow result projection nodes to fill in types otherwise; - Pack the `MemoryState` enum into one `u32` (this together with the above removal of the type slice allows `Node` to shrink from 48 bytes to 32 bytes); - always-inline an accessor (`entry` on `CtxHash`) that wasn't (`always(inline)` appears to be load-bearing, rather than just `inline`); - Split the update-analysis path into two hotpaths, one for the union case and one for the new-node case (and the former can avoid recomputing for the contained node when replacing a node with node-and-child eclass entry). * Review feedback. * Fix test build. * Fix to lowering when unused output with invalid type is present.
This commit is contained in:
@@ -1,6 +1,7 @@
|
|||||||
//! Egraph-based mid-end optimization framework.
|
//! Egraph-based mid-end optimization framework.
|
||||||
|
|
||||||
use crate::dominator_tree::DominatorTree;
|
use crate::dominator_tree::DominatorTree;
|
||||||
|
use crate::egraph::stores::PackedMemoryState;
|
||||||
use crate::flowgraph::ControlFlowGraph;
|
use crate::flowgraph::ControlFlowGraph;
|
||||||
use crate::loop_analysis::{LoopAnalysis, LoopLevel};
|
use crate::loop_analysis::{LoopAnalysis, LoopLevel};
|
||||||
use crate::trace;
|
use crate::trace;
|
||||||
@@ -100,7 +101,9 @@ impl<'a> FuncEGraph<'a> {
|
|||||||
loop_analysis: &'a LoopAnalysis,
|
loop_analysis: &'a LoopAnalysis,
|
||||||
cfg: &ControlFlowGraph,
|
cfg: &ControlFlowGraph,
|
||||||
) -> FuncEGraph<'a> {
|
) -> FuncEGraph<'a> {
|
||||||
let node_count_estimate = func.dfg.num_values() * 2;
|
let num_values = func.dfg.num_values();
|
||||||
|
let num_blocks = func.dfg.num_blocks();
|
||||||
|
let node_count_estimate = num_values * 2;
|
||||||
let alias_analysis = AliasAnalysis::new(func, cfg);
|
let alias_analysis = AliasAnalysis::new(func, cfg);
|
||||||
let mut this = Self {
|
let mut this = Self {
|
||||||
domtree,
|
domtree,
|
||||||
@@ -108,16 +111,19 @@ impl<'a> FuncEGraph<'a> {
|
|||||||
alias_analysis,
|
alias_analysis,
|
||||||
egraph: EGraph::with_capacity(node_count_estimate, Some(Analysis)),
|
egraph: EGraph::with_capacity(node_count_estimate, Some(Analysis)),
|
||||||
node_ctx: NodeCtx::with_capacity_for_dfg(&func.dfg),
|
node_ctx: NodeCtx::with_capacity_for_dfg(&func.dfg),
|
||||||
side_effects: SecondaryMap::default(),
|
side_effects: SecondaryMap::with_capacity(num_blocks),
|
||||||
side_effect_ids: vec![],
|
side_effect_ids: Vec::with_capacity(node_count_estimate),
|
||||||
store_nodes: FxHashMap::default(),
|
store_nodes: FxHashMap::default(),
|
||||||
blockparams: SecondaryMap::default(),
|
blockparams: SecondaryMap::with_capacity(num_blocks),
|
||||||
blockparam_ids_tys: vec![],
|
blockparam_ids_tys: Vec::with_capacity(num_blocks * 10),
|
||||||
remat_ids: FxHashSet::default(),
|
remat_ids: FxHashSet::default(),
|
||||||
subsume_ids: FxHashSet::default(),
|
subsume_ids: FxHashSet::default(),
|
||||||
stats: Default::default(),
|
stats: Default::default(),
|
||||||
rewrite_depth: 0,
|
rewrite_depth: 0,
|
||||||
};
|
};
|
||||||
|
this.store_nodes.reserve(func.dfg.num_values() / 8);
|
||||||
|
this.remat_ids.reserve(func.dfg.num_values() / 4);
|
||||||
|
this.subsume_ids.reserve(func.dfg.num_values() / 4);
|
||||||
this.build(func);
|
this.build(func);
|
||||||
this
|
this
|
||||||
}
|
}
|
||||||
@@ -172,12 +178,11 @@ impl<'a> FuncEGraph<'a> {
|
|||||||
);
|
);
|
||||||
|
|
||||||
let results = func.dfg.inst_results(inst);
|
let results = func.dfg.inst_results(inst);
|
||||||
|
let ty = if results.len() == 1 {
|
||||||
let types = self
|
func.dfg.value_type(results[0])
|
||||||
.node_ctx
|
} else {
|
||||||
.types
|
crate::ir::types::INVALID
|
||||||
.from_iter(results.iter().map(|&val| func.dfg.value_type(val)));
|
};
|
||||||
let types = types.freeze(&mut self.node_ctx.types);
|
|
||||||
|
|
||||||
let load_mem_state = self.alias_analysis.get_state_for_load(inst);
|
let load_mem_state = self.alias_analysis.get_state_for_load(inst);
|
||||||
let is_readonly_load = match func.dfg[inst] {
|
let is_readonly_load = match func.dfg[inst] {
|
||||||
@@ -193,21 +198,26 @@ impl<'a> FuncEGraph<'a> {
|
|||||||
let op = InstructionImms::from(&func.dfg[inst]);
|
let op = InstructionImms::from(&func.dfg[inst]);
|
||||||
let opcode = op.opcode();
|
let opcode = op.opcode();
|
||||||
let srcloc = func.srclocs[inst];
|
let srcloc = func.srclocs[inst];
|
||||||
|
let arity = u16::try_from(results.len())
|
||||||
|
.expect("More than 2^16 results from an instruction");
|
||||||
|
|
||||||
let node = if is_readonly_load {
|
let node = if is_readonly_load {
|
||||||
self.stats.node_created += 1;
|
self.stats.node_created += 1;
|
||||||
self.stats.node_pure += 1;
|
self.stats.node_pure += 1;
|
||||||
Node::Pure { op, args, types }
|
Node::Pure {
|
||||||
|
op,
|
||||||
|
args,
|
||||||
|
ty,
|
||||||
|
arity,
|
||||||
|
}
|
||||||
} else if let Some(load_mem_state) = load_mem_state {
|
} else if let Some(load_mem_state) = load_mem_state {
|
||||||
let addr = args.as_slice(&self.node_ctx.args)[0];
|
let addr = args.as_slice(&self.node_ctx.args)[0];
|
||||||
let ty = types.as_slice(&self.node_ctx.types)[0];
|
|
||||||
trace!("load at inst {} has mem state {:?}", inst, load_mem_state);
|
trace!("load at inst {} has mem state {:?}", inst, load_mem_state);
|
||||||
self.stats.node_created += 1;
|
self.stats.node_created += 1;
|
||||||
self.stats.node_load += 1;
|
self.stats.node_load += 1;
|
||||||
Node::Load {
|
Node::Load {
|
||||||
op,
|
op,
|
||||||
ty,
|
ty,
|
||||||
inst,
|
|
||||||
addr,
|
addr,
|
||||||
mem_state: load_mem_state,
|
mem_state: load_mem_state,
|
||||||
srcloc,
|
srcloc,
|
||||||
@@ -217,16 +227,21 @@ impl<'a> FuncEGraph<'a> {
|
|||||||
self.stats.node_inst += 1;
|
self.stats.node_inst += 1;
|
||||||
Node::Inst {
|
Node::Inst {
|
||||||
op,
|
op,
|
||||||
inst,
|
|
||||||
args,
|
args,
|
||||||
types,
|
ty,
|
||||||
|
arity,
|
||||||
srcloc,
|
srcloc,
|
||||||
loop_level,
|
loop_level,
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
self.stats.node_created += 1;
|
self.stats.node_created += 1;
|
||||||
self.stats.node_pure += 1;
|
self.stats.node_pure += 1;
|
||||||
Node::Pure { op, args, types }
|
Node::Pure {
|
||||||
|
op,
|
||||||
|
args,
|
||||||
|
ty,
|
||||||
|
arity,
|
||||||
|
}
|
||||||
};
|
};
|
||||||
let dedup_needed = self.node_ctx.needs_dedup(&node);
|
let dedup_needed = self.node_ctx.needs_dedup(&node);
|
||||||
let is_pure = matches!(node, Node::Pure { .. });
|
let is_pure = matches!(node, Node::Pure { .. });
|
||||||
|
|||||||
@@ -66,7 +66,11 @@ enum ElabStackEntry {
|
|||||||
},
|
},
|
||||||
/// Waiting for a result to return one projected value of a
|
/// Waiting for a result to return one projected value of a
|
||||||
/// multi-value result.
|
/// multi-value result.
|
||||||
PendingProjection { canonical: Id, index: usize },
|
PendingProjection {
|
||||||
|
canonical: Id,
|
||||||
|
index: usize,
|
||||||
|
ty: Type,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
@@ -189,15 +193,15 @@ impl<'a> Elaborator<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn add_node(&mut self, node: &Node, args: &[Value], to_block: Block) -> ValueList {
|
fn add_node(&mut self, node: &Node, args: &[Value], to_block: Block) -> ValueList {
|
||||||
let (instdata, result_tys) = match node {
|
let (instdata, result_ty, arity) = match node {
|
||||||
Node::Pure { op, types, .. } | Node::Inst { op, types, .. } => (
|
Node::Pure { op, ty, arity, .. } | Node::Inst { op, ty, arity, .. } => (
|
||||||
op.with_args(args, &mut self.func.dfg.value_lists),
|
op.with_args(args, &mut self.func.dfg.value_lists),
|
||||||
types.as_slice(&self.node_ctx.types),
|
*ty,
|
||||||
),
|
*arity,
|
||||||
Node::Load { op, ty, .. } => (
|
|
||||||
op.with_args(args, &mut self.func.dfg.value_lists),
|
|
||||||
std::slice::from_ref(ty),
|
|
||||||
),
|
),
|
||||||
|
Node::Load { op, ty, .. } => {
|
||||||
|
(op.with_args(args, &mut self.func.dfg.value_lists), *ty, 1)
|
||||||
|
}
|
||||||
_ => panic!("Cannot `add_node()` on block param or projection"),
|
_ => panic!("Cannot `add_node()` on block param or projection"),
|
||||||
};
|
};
|
||||||
let srcloc = match node {
|
let srcloc = match node {
|
||||||
@@ -237,8 +241,12 @@ impl<'a> Elaborator<'a> {
|
|||||||
let inst = self.func.dfg.make_inst(instdata);
|
let inst = self.func.dfg.make_inst(instdata);
|
||||||
self.func.srclocs[inst] = srcloc;
|
self.func.srclocs[inst] = srcloc;
|
||||||
|
|
||||||
for &ty in result_tys {
|
if arity == 1 {
|
||||||
self.func.dfg.append_result(inst, ty);
|
self.func.dfg.append_result(inst, result_ty);
|
||||||
|
} else {
|
||||||
|
for _ in 0..arity {
|
||||||
|
self.func.dfg.append_result(inst, crate::ir::types::INVALID);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if is_terminator_group_inst {
|
if is_terminator_group_inst {
|
||||||
@@ -371,11 +379,15 @@ impl<'a> Elaborator<'a> {
|
|||||||
// the value we are projecting a part of, then
|
// the value we are projecting a part of, then
|
||||||
// eventually return here (saving state with a
|
// eventually return here (saving state with a
|
||||||
// PendingProjection).
|
// PendingProjection).
|
||||||
if let Node::Result { value, result, .. } = node {
|
if let Node::Result {
|
||||||
|
value, result, ty, ..
|
||||||
|
} = node
|
||||||
|
{
|
||||||
trace!(" -> result; pushing arg value {}", value);
|
trace!(" -> result; pushing arg value {}", value);
|
||||||
self.elab_stack.push(ElabStackEntry::PendingProjection {
|
self.elab_stack.push(ElabStackEntry::PendingProjection {
|
||||||
index: *result,
|
index: *result,
|
||||||
canonical,
|
canonical,
|
||||||
|
ty: *ty,
|
||||||
});
|
});
|
||||||
self.elab_stack.push(ElabStackEntry::Start { id: *value });
|
self.elab_stack.push(ElabStackEntry::Start { id: *value });
|
||||||
continue;
|
continue;
|
||||||
@@ -493,7 +505,11 @@ impl<'a> Elaborator<'a> {
|
|||||||
// Push onto the elab-results stack.
|
// Push onto the elab-results stack.
|
||||||
self.elab_result_stack.push(result)
|
self.elab_result_stack.push(result)
|
||||||
}
|
}
|
||||||
&ElabStackEntry::PendingProjection { index, canonical } => {
|
&ElabStackEntry::PendingProjection {
|
||||||
|
ty,
|
||||||
|
index,
|
||||||
|
canonical,
|
||||||
|
} => {
|
||||||
self.elab_stack.pop();
|
self.elab_stack.pop();
|
||||||
|
|
||||||
// Grab the input from the elab-result stack.
|
// Grab the input from the elab-result stack.
|
||||||
@@ -511,10 +527,12 @@ impl<'a> Elaborator<'a> {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
let values = values.as_slice(&self.func.dfg.value_lists);
|
let values = values.as_slice(&self.func.dfg.value_lists);
|
||||||
|
let value = values[index];
|
||||||
|
self.func.dfg.fill_in_value_type(value, ty);
|
||||||
let value = IdValue::Value {
|
let value = IdValue::Value {
|
||||||
depth,
|
depth,
|
||||||
block,
|
block,
|
||||||
value: values[index],
|
value,
|
||||||
};
|
};
|
||||||
self.id_to_value.insert_if_absent(canonical, value.clone());
|
self.id_to_value.insert_if_absent(canonical, value.clone());
|
||||||
|
|
||||||
|
|||||||
@@ -1,9 +1,9 @@
|
|||||||
//! Node definition for EGraph representation.
|
//! Node definition for EGraph representation.
|
||||||
|
|
||||||
use super::MemoryState;
|
use super::PackedMemoryState;
|
||||||
use crate::ir::{Block, DataFlowGraph, Inst, InstructionImms, Opcode, RelSourceLoc, Type};
|
use crate::ir::{Block, DataFlowGraph, InstructionImms, Opcode, RelSourceLoc, Type};
|
||||||
use crate::loop_analysis::LoopLevel;
|
use crate::loop_analysis::LoopLevel;
|
||||||
use cranelift_egraph::{BumpArena, BumpSlice, CtxEq, CtxHash, Id, Language, UnionFind};
|
use cranelift_egraph::{CtxEq, CtxHash, Id, Language, UnionFind};
|
||||||
use cranelift_entity::{EntityList, ListPool};
|
use cranelift_entity::{EntityList, ListPool};
|
||||||
use std::hash::{Hash, Hasher};
|
use std::hash::{Hash, Hasher};
|
||||||
|
|
||||||
@@ -31,8 +31,10 @@ pub enum Node {
|
|||||||
op: InstructionImms,
|
op: InstructionImms,
|
||||||
/// eclass arguments to the operator.
|
/// eclass arguments to the operator.
|
||||||
args: EntityList<Id>,
|
args: EntityList<Id>,
|
||||||
/// Types of results.
|
/// Type of result, if one.
|
||||||
types: BumpSlice<Type>,
|
ty: Type,
|
||||||
|
/// Number of results.
|
||||||
|
arity: u16,
|
||||||
},
|
},
|
||||||
/// A CLIF instruction that has side-effects or is otherwise not
|
/// A CLIF instruction that has side-effects or is otherwise not
|
||||||
/// representable by `Pure`.
|
/// representable by `Pure`.
|
||||||
@@ -41,15 +43,10 @@ pub enum Node {
|
|||||||
op: InstructionImms,
|
op: InstructionImms,
|
||||||
/// eclass arguments to the operator.
|
/// eclass arguments to the operator.
|
||||||
args: EntityList<Id>,
|
args: EntityList<Id>,
|
||||||
/// Types of results.
|
/// Type of result, if one.
|
||||||
types: BumpSlice<Type>,
|
ty: Type,
|
||||||
/// The index of the original instruction. We include this so
|
/// Number of results.
|
||||||
/// that the `Inst`s are not deduplicated: every instance is a
|
arity: u16,
|
||||||
/// logically separate and unique side-effect. However,
|
|
||||||
/// because we clear the DataFlowGraph before elaboration,
|
|
||||||
/// this `Inst` is *not* valid to fetch any details from the
|
|
||||||
/// original instruction.
|
|
||||||
inst: Inst,
|
|
||||||
/// The source location to preserve.
|
/// The source location to preserve.
|
||||||
srcloc: RelSourceLoc,
|
srcloc: RelSourceLoc,
|
||||||
/// The loop level of this Inst.
|
/// The loop level of this Inst.
|
||||||
@@ -83,14 +80,9 @@ pub enum Node {
|
|||||||
/// the key).
|
/// the key).
|
||||||
addr: Id,
|
addr: Id,
|
||||||
/// The abstract memory state that this load accesses.
|
/// The abstract memory state that this load accesses.
|
||||||
mem_state: MemoryState,
|
mem_state: PackedMemoryState,
|
||||||
|
|
||||||
// -- not included in dedup key:
|
// -- not included in dedup key:
|
||||||
/// The `Inst` we will use for a trap location for this
|
|
||||||
/// load. Excluded from Eq/Hash so that loads that are
|
|
||||||
/// identical except for the specific instance will dedup on
|
|
||||||
/// top of each other.
|
|
||||||
inst: Inst,
|
|
||||||
/// Source location, for traps. Not included in Eq/Hash.
|
/// Source location, for traps. Not included in Eq/Hash.
|
||||||
srcloc: RelSourceLoc,
|
srcloc: RelSourceLoc,
|
||||||
},
|
},
|
||||||
@@ -107,18 +99,14 @@ impl Node {
|
|||||||
|
|
||||||
/// Shared pools for type and id lists in nodes.
|
/// Shared pools for type and id lists in nodes.
|
||||||
pub struct NodeCtx {
|
pub struct NodeCtx {
|
||||||
/// Arena for result-type arrays.
|
|
||||||
pub types: BumpArena<Type>,
|
|
||||||
/// Arena for arg eclass-ID lists.
|
/// Arena for arg eclass-ID lists.
|
||||||
pub args: ListPool<Id>,
|
pub args: ListPool<Id>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl NodeCtx {
|
impl NodeCtx {
|
||||||
pub(crate) fn with_capacity_for_dfg(dfg: &DataFlowGraph) -> Self {
|
pub(crate) fn with_capacity_for_dfg(dfg: &DataFlowGraph) -> Self {
|
||||||
let n_types = dfg.num_values();
|
|
||||||
let n_args = dfg.value_lists.capacity();
|
let n_args = dfg.value_lists.capacity();
|
||||||
Self {
|
Self {
|
||||||
types: BumpArena::arena_with_capacity(n_types),
|
|
||||||
args: ListPool::with_capacity(n_args),
|
args: ListPool::with_capacity(n_args),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -168,26 +156,23 @@ impl CtxEq<Node, Node> for NodeCtx {
|
|||||||
&Node::Pure {
|
&Node::Pure {
|
||||||
ref op,
|
ref op,
|
||||||
ref args,
|
ref args,
|
||||||
ref types,
|
ty,
|
||||||
|
arity: _,
|
||||||
},
|
},
|
||||||
&Node::Pure {
|
&Node::Pure {
|
||||||
op: ref other_op,
|
op: ref other_op,
|
||||||
args: ref other_args,
|
args: ref other_args,
|
||||||
types: ref other_types,
|
ty: other_ty,
|
||||||
|
arity: _,
|
||||||
},
|
},
|
||||||
) => {
|
) => *op == *other_op && self.ids_eq(args, other_args, uf) && ty == other_ty,
|
||||||
*op == *other_op
|
|
||||||
&& self.ids_eq(args, other_args, uf)
|
|
||||||
&& types.as_slice(&self.types) == other_types.as_slice(&self.types)
|
|
||||||
}
|
|
||||||
(
|
(
|
||||||
&Node::Inst { inst, ref args, .. },
|
&Node::Inst { ref args, .. },
|
||||||
&Node::Inst {
|
&Node::Inst {
|
||||||
inst: other_inst,
|
|
||||||
args: ref other_args,
|
args: ref other_args,
|
||||||
..
|
..
|
||||||
},
|
},
|
||||||
) => inst == other_inst && self.ids_eq(args, other_args, uf),
|
) => self.ids_eq(args, other_args, uf),
|
||||||
(
|
(
|
||||||
&Node::Load {
|
&Node::Load {
|
||||||
ref op,
|
ref op,
|
||||||
@@ -249,16 +234,14 @@ impl CtxHash<Node> for NodeCtx {
|
|||||||
&Node::Pure {
|
&Node::Pure {
|
||||||
ref op,
|
ref op,
|
||||||
ref args,
|
ref args,
|
||||||
types: _,
|
ty,
|
||||||
|
arity: _,
|
||||||
} => {
|
} => {
|
||||||
op.hash(&mut state);
|
op.hash(&mut state);
|
||||||
self.hash_ids(args, &mut state, uf);
|
self.hash_ids(args, &mut state, uf);
|
||||||
// Don't hash `types`: it requires an indirection
|
ty.hash(&mut state);
|
||||||
// (hence cache misses), and result type *should* be
|
|
||||||
// fully determined by op and args.
|
|
||||||
}
|
}
|
||||||
&Node::Inst { inst, ref args, .. } => {
|
&Node::Inst { ref args, .. } => {
|
||||||
inst.hash(&mut state);
|
|
||||||
self.hash_ids(args, &mut state, uf);
|
self.hash_ids(args, &mut state, uf);
|
||||||
}
|
}
|
||||||
&Node::Load {
|
&Node::Load {
|
||||||
@@ -370,3 +353,14 @@ impl Language for NodeCtx {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
#[test]
|
||||||
|
#[cfg(target_pointer_width = "64")]
|
||||||
|
fn node_size() {
|
||||||
|
use super::*;
|
||||||
|
assert_eq!(std::mem::size_of::<InstructionImms>(), 16);
|
||||||
|
assert_eq!(std::mem::size_of::<Node>(), 32);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -62,7 +62,7 @@ use crate::fx::{FxHashMap, FxHashSet};
|
|||||||
use crate::inst_predicates::has_memory_fence_semantics;
|
use crate::inst_predicates::has_memory_fence_semantics;
|
||||||
use crate::ir::{Block, Function, Inst, InstructionData, MemFlags, Opcode};
|
use crate::ir::{Block, Function, Inst, InstructionData, MemFlags, Opcode};
|
||||||
use crate::trace;
|
use crate::trace;
|
||||||
use cranelift_entity::SecondaryMap;
|
use cranelift_entity::{EntityRef, SecondaryMap};
|
||||||
use smallvec::{smallvec, SmallVec};
|
use smallvec::{smallvec, SmallVec};
|
||||||
|
|
||||||
/// For a given program point, the vector of last-store instruction
|
/// For a given program point, the vector of last-store instruction
|
||||||
@@ -97,6 +97,32 @@ pub enum MemoryState {
|
|||||||
AfterInst(Inst),
|
AfterInst(Inst),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Memory state index, packed into a u32.
|
||||||
|
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
|
pub struct PackedMemoryState(u32);
|
||||||
|
|
||||||
|
impl From<MemoryState> for PackedMemoryState {
|
||||||
|
fn from(state: MemoryState) -> Self {
|
||||||
|
match state {
|
||||||
|
MemoryState::Entry => Self(0),
|
||||||
|
MemoryState::Store(i) => Self(1 | (i.index() as u32) << 2),
|
||||||
|
MemoryState::BeforeInst(i) => Self(2 | (i.index() as u32) << 2),
|
||||||
|
MemoryState::AfterInst(i) => Self(3 | (i.index() as u32) << 2),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PackedMemoryState {
|
||||||
|
/// Does this memory state refer to a specific store instruction?
|
||||||
|
pub fn as_store(&self) -> Option<Inst> {
|
||||||
|
if self.0 & 3 == 1 {
|
||||||
|
Some(Inst::from_bits(self.0 >> 2))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl LastStores {
|
impl LastStores {
|
||||||
fn update(&mut self, func: &Function, inst: Inst) {
|
fn update(&mut self, func: &Function, inst: Inst) {
|
||||||
let opcode = func.dfg[inst].opcode();
|
let opcode = func.dfg[inst].opcode();
|
||||||
@@ -148,7 +174,7 @@ impl LastStores {
|
|||||||
pub struct AliasAnalysis {
|
pub struct AliasAnalysis {
|
||||||
/// Last-store instruction (or none) for a given load. Use a hash map
|
/// Last-store instruction (or none) for a given load. Use a hash map
|
||||||
/// instead of a `SecondaryMap` because this is sparse.
|
/// instead of a `SecondaryMap` because this is sparse.
|
||||||
load_mem_state: FxHashMap<Inst, MemoryState>,
|
load_mem_state: FxHashMap<Inst, PackedMemoryState>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl AliasAnalysis {
|
impl AliasAnalysis {
|
||||||
@@ -165,7 +191,7 @@ impl AliasAnalysis {
|
|||||||
cfg: &ControlFlowGraph,
|
cfg: &ControlFlowGraph,
|
||||||
) -> SecondaryMap<Block, Option<LastStores>> {
|
) -> SecondaryMap<Block, Option<LastStores>> {
|
||||||
let mut block_input = SecondaryMap::with_capacity(func.dfg.num_blocks());
|
let mut block_input = SecondaryMap::with_capacity(func.dfg.num_blocks());
|
||||||
let mut worklist: SmallVec<[Block; 8]> = smallvec![];
|
let mut worklist: SmallVec<[Block; 16]> = smallvec![];
|
||||||
let mut worklist_set = FxHashSet::default();
|
let mut worklist_set = FxHashSet::default();
|
||||||
let entry = func.layout.entry_block().unwrap();
|
let entry = func.layout.entry_block().unwrap();
|
||||||
worklist.push(entry);
|
worklist.push(entry);
|
||||||
@@ -210,8 +236,9 @@ impl AliasAnalysis {
|
|||||||
fn compute_load_last_stores(
|
fn compute_load_last_stores(
|
||||||
func: &Function,
|
func: &Function,
|
||||||
block_input: SecondaryMap<Block, Option<LastStores>>,
|
block_input: SecondaryMap<Block, Option<LastStores>>,
|
||||||
) -> FxHashMap<Inst, MemoryState> {
|
) -> FxHashMap<Inst, PackedMemoryState> {
|
||||||
let mut load_mem_state = FxHashMap::default();
|
let mut load_mem_state = FxHashMap::default();
|
||||||
|
load_mem_state.reserve(func.dfg.num_insts() / 8);
|
||||||
|
|
||||||
for block in func.layout.blocks() {
|
for block in func.layout.blocks() {
|
||||||
let mut state = block_input[block].clone().unwrap();
|
let mut state = block_input[block].clone().unwrap();
|
||||||
@@ -249,7 +276,7 @@ impl AliasAnalysis {
|
|||||||
mem_state,
|
mem_state,
|
||||||
);
|
);
|
||||||
|
|
||||||
load_mem_state.insert(inst, mem_state);
|
load_mem_state.insert(inst, mem_state.into());
|
||||||
}
|
}
|
||||||
|
|
||||||
state.update(func, inst);
|
state.update(func, inst);
|
||||||
@@ -260,7 +287,7 @@ impl AliasAnalysis {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Get the state seen by a load, if any.
|
/// Get the state seen by a load, if any.
|
||||||
pub fn get_state_for_load(&self, inst: Inst) -> Option<MemoryState> {
|
pub fn get_state_for_load(&self, inst: Inst) -> Option<PackedMemoryState> {
|
||||||
self.load_mem_state.get(&inst).copied()
|
self.load_mem_state.get(&inst).copied()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -274,6 +274,12 @@ impl DataFlowGraph {
|
|||||||
self.values[v].ty()
|
self.values[v].ty()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Fill in the type of a value, only if currently invalid (as a placeholder).
|
||||||
|
pub(crate) fn fill_in_value_type(&mut self, v: Value, ty: Type) {
|
||||||
|
debug_assert!(self.values[v].ty().is_invalid());
|
||||||
|
self.values[v].set_type(ty);
|
||||||
|
}
|
||||||
|
|
||||||
/// Get the definition of a value.
|
/// Get the definition of a value.
|
||||||
///
|
///
|
||||||
/// This is either the instruction that defined it or the Block that has the value as an
|
/// This is either the instruction that defined it or the Block that has the value as an
|
||||||
|
|||||||
@@ -381,7 +381,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
|
|||||||
for inst in f.layout.block_insts(bb) {
|
for inst in f.layout.block_insts(bb) {
|
||||||
for &result in f.dfg.inst_results(inst) {
|
for &result in f.dfg.inst_results(inst) {
|
||||||
let ty = f.dfg.value_type(result);
|
let ty = f.dfg.value_type(result);
|
||||||
if value_regs[result].is_invalid() {
|
if value_regs[result].is_invalid() && !ty.is_invalid() {
|
||||||
let regs = alloc_vregs(ty, &mut next_vreg, &mut vcode)?;
|
let regs = alloc_vregs(ty, &mut next_vreg, &mut vcode)?;
|
||||||
value_regs[result] = regs;
|
value_regs[result] = regs;
|
||||||
trace!(
|
trace!(
|
||||||
|
|||||||
@@ -2,7 +2,6 @@
|
|||||||
|
|
||||||
use crate::egraph::Analysis;
|
use crate::egraph::Analysis;
|
||||||
use crate::egraph::FuncEGraph;
|
use crate::egraph::FuncEGraph;
|
||||||
use crate::egraph::MemoryState;
|
|
||||||
pub use crate::egraph::{Node, NodeCtx};
|
pub use crate::egraph::{Node, NodeCtx};
|
||||||
use crate::ir::condcodes;
|
use crate::ir::condcodes;
|
||||||
pub use crate::ir::condcodes::{FloatCC, IntCC};
|
pub use crate::ir::condcodes::{FloatCC, IntCC};
|
||||||
@@ -97,35 +96,37 @@ pub(crate) fn store_to_load<'a>(id: Id, egraph: &mut FuncEGraph<'a>) -> Id {
|
|||||||
},
|
},
|
||||||
ty: load_ty,
|
ty: load_ty,
|
||||||
addr: load_addr,
|
addr: load_addr,
|
||||||
mem_state: MemoryState::Store(store_inst),
|
mem_state,
|
||||||
..
|
..
|
||||||
} = load_key.node(&egraph.egraph.nodes)
|
} = load_key.node(&egraph.egraph.nodes)
|
||||||
{
|
{
|
||||||
trace!(" -> got load op for id {}", id);
|
if let Some(store_inst) = mem_state.as_store() {
|
||||||
if let Some((store_ty, store_id)) = egraph.store_nodes.get(&store_inst) {
|
trace!(" -> got load op for id {}", id);
|
||||||
trace!(" -> got store id: {} ty: {}", store_id, store_ty);
|
if let Some((store_ty, store_id)) = egraph.store_nodes.get(&store_inst) {
|
||||||
let store_key = egraph.egraph.classes[*store_id].get_node().unwrap();
|
trace!(" -> got store id: {} ty: {}", store_id, store_ty);
|
||||||
if let Node::Inst {
|
let store_key = egraph.egraph.classes[*store_id].get_node().unwrap();
|
||||||
op:
|
if let Node::Inst {
|
||||||
InstructionImms::Store {
|
op:
|
||||||
opcode: Opcode::Store,
|
InstructionImms::Store {
|
||||||
offset: store_offset,
|
opcode: Opcode::Store,
|
||||||
..
|
offset: store_offset,
|
||||||
},
|
..
|
||||||
args: store_args,
|
},
|
||||||
..
|
args: store_args,
|
||||||
} = store_key.node(&egraph.egraph.nodes)
|
..
|
||||||
{
|
} = store_key.node(&egraph.egraph.nodes)
|
||||||
let store_args = store_args.as_slice(&egraph.node_ctx.args);
|
|
||||||
let store_data = store_args[0];
|
|
||||||
let store_addr = store_args[1];
|
|
||||||
if *load_offset == *store_offset
|
|
||||||
&& *load_ty == *store_ty
|
|
||||||
&& egraph.egraph.unionfind.equiv_id_mut(*load_addr, store_addr)
|
|
||||||
{
|
{
|
||||||
trace!(" -> same offset, type, address; forwarding");
|
let store_args = store_args.as_slice(&egraph.node_ctx.args);
|
||||||
egraph.stats.store_to_load_forward += 1;
|
let store_data = store_args[0];
|
||||||
return store_data;
|
let store_addr = store_args[1];
|
||||||
|
if *load_offset == *store_offset
|
||||||
|
&& *load_ty == *store_ty
|
||||||
|
&& egraph.egraph.unionfind.equiv_id_mut(*load_addr, store_addr)
|
||||||
|
{
|
||||||
|
trace!(" -> same offset, type, address; forwarding");
|
||||||
|
egraph.stats.store_to_load_forward += 1;
|
||||||
|
return store_data;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -155,12 +156,20 @@ where
|
|||||||
while let Some(node) = self.iter.next(&ctx.egraph.egraph) {
|
while let Some(node) = self.iter.next(&ctx.egraph.egraph) {
|
||||||
trace!("iter from root {}: node {:?}", self.root, node);
|
trace!("iter from root {}: node {:?}", self.root, node);
|
||||||
match node {
|
match node {
|
||||||
Node::Pure { op, args, types }
|
Node::Pure {
|
||||||
|
op,
|
||||||
|
args,
|
||||||
|
ty,
|
||||||
|
arity,
|
||||||
|
}
|
||||||
| Node::Inst {
|
| Node::Inst {
|
||||||
op, args, types, ..
|
op,
|
||||||
} if types.len() == 1 => {
|
args,
|
||||||
let ty = types.as_slice(&ctx.egraph.node_ctx.types)[0];
|
ty,
|
||||||
return Some((ty, op.clone(), args.clone()));
|
arity,
|
||||||
|
..
|
||||||
|
} if *arity == 1 => {
|
||||||
|
return Some((*ty, op.clone(), args.clone()));
|
||||||
}
|
}
|
||||||
_ => {}
|
_ => {}
|
||||||
}
|
}
|
||||||
@@ -176,8 +185,8 @@ impl<'a, 'b> generated_code::Context for IsleContext<'a, 'b> {
|
|||||||
let mut iter = self.egraph.egraph.enodes(eclass);
|
let mut iter = self.egraph.egraph.enodes(eclass);
|
||||||
while let Some(node) = iter.next(&self.egraph.egraph) {
|
while let Some(node) = iter.next(&self.egraph.egraph) {
|
||||||
match node {
|
match node {
|
||||||
&Node::Pure { types, .. } | &Node::Inst { types, .. } if types.len() == 1 => {
|
&Node::Pure { ty, arity, .. } | &Node::Inst { ty, arity, .. } if arity == 1 => {
|
||||||
return Some(types.as_slice(&self.egraph.node_ctx.types)[0]);
|
return Some(ty);
|
||||||
}
|
}
|
||||||
&Node::Load { ty, .. } => return Some(ty),
|
&Node::Load { ty, .. } => return Some(ty),
|
||||||
&Node::Result { ty, .. } => return Some(ty),
|
&Node::Result { ty, .. } => return Some(ty),
|
||||||
@@ -207,14 +216,16 @@ impl<'a, 'b> generated_code::Context for IsleContext<'a, 'b> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn pure_enode_ctor(&mut self, ty: Type, op: &InstructionImms, args: IdArray) -> Id {
|
fn pure_enode_ctor(&mut self, ty: Type, op: &InstructionImms, args: IdArray) -> Id {
|
||||||
let types = self.egraph.node_ctx.types.single(ty);
|
|
||||||
let types = types.freeze(&mut self.egraph.node_ctx.types);
|
|
||||||
let op = op.clone();
|
let op = op.clone();
|
||||||
match self
|
match self.egraph.egraph.add(
|
||||||
.egraph
|
Node::Pure {
|
||||||
.egraph
|
op,
|
||||||
.add(Node::Pure { op, args, types }, &mut self.egraph.node_ctx)
|
args,
|
||||||
{
|
ty,
|
||||||
|
arity: 1,
|
||||||
|
},
|
||||||
|
&mut self.egraph.node_ctx,
|
||||||
|
) {
|
||||||
NewOrExisting::New(id) => {
|
NewOrExisting::New(id) => {
|
||||||
self.egraph.stats.node_created += 1;
|
self.egraph.stats.node_created += 1;
|
||||||
self.egraph.stats.node_pure += 1;
|
self.egraph.stats.node_pure += 1;
|
||||||
|
|||||||
@@ -134,6 +134,7 @@ impl<K, V> CtxHashMap<K, V> {
|
|||||||
|
|
||||||
/// Return an Entry cursor on a given bucket for a key, allowing
|
/// Return an Entry cursor on a given bucket for a key, allowing
|
||||||
/// for fetching the current value or inserting a new one.
|
/// for fetching the current value or inserting a new one.
|
||||||
|
#[inline(always)]
|
||||||
pub fn entry<'a, Ctx: CtxEq<K, K> + CtxHash<K>>(
|
pub fn entry<'a, Ctx: CtxEq<K, K> + CtxHash<K>>(
|
||||||
&'a mut self,
|
&'a mut self,
|
||||||
k: K,
|
k: K,
|
||||||
|
|||||||
@@ -530,7 +530,7 @@ where
|
|||||||
|
|
||||||
// Update analysis.
|
// Update analysis.
|
||||||
let node_ctx = ctx.node_ctx;
|
let node_ctx = ctx.node_ctx;
|
||||||
self.update_analysis(node_ctx, eclass_id);
|
self.update_analysis_new(node_ctx, eclass_id, key);
|
||||||
|
|
||||||
NewOrExisting::New(eclass_id)
|
NewOrExisting::New(eclass_id)
|
||||||
}
|
}
|
||||||
@@ -568,7 +568,7 @@ where
|
|||||||
b
|
b
|
||||||
);
|
);
|
||||||
self.classes[a] = EClass::node_and_child(node, b);
|
self.classes[a] = EClass::node_and_child(node, b);
|
||||||
self.update_analysis(ctx, a);
|
self.update_analysis_union(ctx, a, a, b);
|
||||||
return a;
|
return a;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -576,7 +576,7 @@ where
|
|||||||
self.unionfind.add(u);
|
self.unionfind.add(u);
|
||||||
self.unionfind.union(u, b);
|
self.unionfind.union(u, b);
|
||||||
trace!(" -> union id {} and id {} into id {}", a, b, u);
|
trace!(" -> union id {} and id {} into id {}", a, b, u);
|
||||||
self.update_analysis(ctx, u);
|
self.update_analysis_union(ctx, u, a, b);
|
||||||
u
|
u
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -605,26 +605,20 @@ where
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Update analysis for a given eclass node.
|
/// Update analysis for a given eclass node (new-enode case).
|
||||||
fn update_analysis(&mut self, ctx: &L, eclass: Id) {
|
fn update_analysis_new(&mut self, ctx: &L, eclass: Id, node: NodeKey) {
|
||||||
if let Some((analysis, state)) = self.analysis.as_mut() {
|
if let Some((analysis, state)) = self.analysis.as_mut() {
|
||||||
let eclass_data = self.classes[eclass];
|
let node = node.node(&self.nodes);
|
||||||
let value = if let Some(node_key) = eclass_data.as_node() {
|
state[eclass] = analysis.for_node(ctx, node, state);
|
||||||
let node = node_key.node(&self.nodes);
|
}
|
||||||
analysis.for_node(ctx, node, state)
|
}
|
||||||
} else if let Some((node_key, child)) = eclass_data.as_node_and_child() {
|
|
||||||
let node = node_key.node(&self.nodes);
|
/// Update analysis for a given eclass node (union case).
|
||||||
let value = analysis.for_node(ctx, node, state);
|
fn update_analysis_union(&mut self, ctx: &L, eclass: Id, a: Id, b: Id) {
|
||||||
let child_value = &state[child];
|
if let Some((analysis, state)) = self.analysis.as_mut() {
|
||||||
analysis.meet(ctx, &value, child_value)
|
let a = &state[a];
|
||||||
} else if let Some((c1, c2)) = eclass_data.as_union() {
|
let b = &state[b];
|
||||||
let c1 = &state[c1];
|
state[eclass] = analysis.meet(ctx, a, b);
|
||||||
let c2 = &state[c2];
|
|
||||||
analysis.meet(ctx, c1, c2)
|
|
||||||
} else {
|
|
||||||
panic!("Invalid eclass node: {:?}", eclass_data);
|
|
||||||
};
|
|
||||||
state[eclass] = value;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -646,6 +640,7 @@ pub struct NodeIter<L: Language, A: Analysis<L = L>> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<L: Language, A: Analysis<L = L>> NodeIter<L, A> {
|
impl<L: Language, A: Analysis<L = L>> NodeIter<L, A> {
|
||||||
|
#[inline(always)]
|
||||||
pub fn next<'a>(&mut self, egraph: &'a EGraph<L, A>) -> Option<&'a L::Node> {
|
pub fn next<'a>(&mut self, egraph: &'a EGraph<L, A>) -> Option<&'a L::Node> {
|
||||||
while let Some(next) = self.stack.pop() {
|
while let Some(next) = self.stack.pop() {
|
||||||
let eclass = egraph.classes[next];
|
let eclass = egraph.classes[next];
|
||||||
|
|||||||
Reference in New Issue
Block a user