diff --git a/cranelift/codegen/src/egraph.rs b/cranelift/codegen/src/egraph.rs index f9216596a7..d8d625671b 100644 --- a/cranelift/codegen/src/egraph.rs +++ b/cranelift/codegen/src/egraph.rs @@ -1,6 +1,7 @@ //! Egraph-based mid-end optimization framework. use crate::dominator_tree::DominatorTree; +use crate::egraph::stores::PackedMemoryState; use crate::flowgraph::ControlFlowGraph; use crate::loop_analysis::{LoopAnalysis, LoopLevel}; use crate::trace; @@ -100,7 +101,9 @@ impl<'a> FuncEGraph<'a> { loop_analysis: &'a LoopAnalysis, cfg: &ControlFlowGraph, ) -> FuncEGraph<'a> { - let node_count_estimate = func.dfg.num_values() * 2; + let num_values = func.dfg.num_values(); + let num_blocks = func.dfg.num_blocks(); + let node_count_estimate = num_values * 2; let alias_analysis = AliasAnalysis::new(func, cfg); let mut this = Self { domtree, @@ -108,16 +111,19 @@ impl<'a> FuncEGraph<'a> { alias_analysis, egraph: EGraph::with_capacity(node_count_estimate, Some(Analysis)), node_ctx: NodeCtx::with_capacity_for_dfg(&func.dfg), - side_effects: SecondaryMap::default(), - side_effect_ids: vec![], + side_effects: SecondaryMap::with_capacity(num_blocks), + side_effect_ids: Vec::with_capacity(node_count_estimate), store_nodes: FxHashMap::default(), - blockparams: SecondaryMap::default(), - blockparam_ids_tys: vec![], + blockparams: SecondaryMap::with_capacity(num_blocks), + blockparam_ids_tys: Vec::with_capacity(num_blocks * 10), remat_ids: FxHashSet::default(), subsume_ids: FxHashSet::default(), stats: Default::default(), rewrite_depth: 0, }; + this.store_nodes.reserve(func.dfg.num_values() / 8); + this.remat_ids.reserve(func.dfg.num_values() / 4); + this.subsume_ids.reserve(func.dfg.num_values() / 4); this.build(func); this } @@ -172,12 +178,11 @@ impl<'a> FuncEGraph<'a> { ); let results = func.dfg.inst_results(inst); - - let types = self - .node_ctx - .types - .from_iter(results.iter().map(|&val| func.dfg.value_type(val))); - let types = types.freeze(&mut self.node_ctx.types); + let ty = if results.len() == 1 { + func.dfg.value_type(results[0]) + } else { + crate::ir::types::INVALID + }; let load_mem_state = self.alias_analysis.get_state_for_load(inst); let is_readonly_load = match func.dfg[inst] { @@ -193,21 +198,26 @@ impl<'a> FuncEGraph<'a> { let op = InstructionImms::from(&func.dfg[inst]); let opcode = op.opcode(); let srcloc = func.srclocs[inst]; + let arity = u16::try_from(results.len()) + .expect("More than 2^16 results from an instruction"); let node = if is_readonly_load { self.stats.node_created += 1; self.stats.node_pure += 1; - Node::Pure { op, args, types } + Node::Pure { + op, + args, + ty, + arity, + } } else if let Some(load_mem_state) = load_mem_state { let addr = args.as_slice(&self.node_ctx.args)[0]; - let ty = types.as_slice(&self.node_ctx.types)[0]; trace!("load at inst {} has mem state {:?}", inst, load_mem_state); self.stats.node_created += 1; self.stats.node_load += 1; Node::Load { op, ty, - inst, addr, mem_state: load_mem_state, srcloc, @@ -217,16 +227,21 @@ impl<'a> FuncEGraph<'a> { self.stats.node_inst += 1; Node::Inst { op, - inst, args, - types, + ty, + arity, srcloc, loop_level, } } else { self.stats.node_created += 1; self.stats.node_pure += 1; - Node::Pure { op, args, types } + Node::Pure { + op, + args, + ty, + arity, + } }; let dedup_needed = self.node_ctx.needs_dedup(&node); let is_pure = matches!(node, Node::Pure { .. }); diff --git a/cranelift/codegen/src/egraph/elaborate.rs b/cranelift/codegen/src/egraph/elaborate.rs index 47e6f40c32..ee465def22 100644 --- a/cranelift/codegen/src/egraph/elaborate.rs +++ b/cranelift/codegen/src/egraph/elaborate.rs @@ -66,7 +66,11 @@ enum ElabStackEntry { }, /// Waiting for a result to return one projected value of a /// multi-value result. - PendingProjection { canonical: Id, index: usize }, + PendingProjection { + canonical: Id, + index: usize, + ty: Type, + }, } #[derive(Clone, Debug)] @@ -189,15 +193,15 @@ impl<'a> Elaborator<'a> { } fn add_node(&mut self, node: &Node, args: &[Value], to_block: Block) -> ValueList { - let (instdata, result_tys) = match node { - Node::Pure { op, types, .. } | Node::Inst { op, types, .. } => ( + let (instdata, result_ty, arity) = match node { + Node::Pure { op, ty, arity, .. } | Node::Inst { op, ty, arity, .. } => ( op.with_args(args, &mut self.func.dfg.value_lists), - types.as_slice(&self.node_ctx.types), - ), - Node::Load { op, ty, .. } => ( - op.with_args(args, &mut self.func.dfg.value_lists), - std::slice::from_ref(ty), + *ty, + *arity, ), + Node::Load { op, ty, .. } => { + (op.with_args(args, &mut self.func.dfg.value_lists), *ty, 1) + } _ => panic!("Cannot `add_node()` on block param or projection"), }; let srcloc = match node { @@ -237,8 +241,12 @@ impl<'a> Elaborator<'a> { let inst = self.func.dfg.make_inst(instdata); self.func.srclocs[inst] = srcloc; - for &ty in result_tys { - self.func.dfg.append_result(inst, ty); + if arity == 1 { + self.func.dfg.append_result(inst, result_ty); + } else { + for _ in 0..arity { + self.func.dfg.append_result(inst, crate::ir::types::INVALID); + } } if is_terminator_group_inst { @@ -371,11 +379,15 @@ impl<'a> Elaborator<'a> { // the value we are projecting a part of, then // eventually return here (saving state with a // PendingProjection). - if let Node::Result { value, result, .. } = node { + if let Node::Result { + value, result, ty, .. + } = node + { trace!(" -> result; pushing arg value {}", value); self.elab_stack.push(ElabStackEntry::PendingProjection { index: *result, canonical, + ty: *ty, }); self.elab_stack.push(ElabStackEntry::Start { id: *value }); continue; @@ -493,7 +505,11 @@ impl<'a> Elaborator<'a> { // Push onto the elab-results stack. self.elab_result_stack.push(result) } - &ElabStackEntry::PendingProjection { index, canonical } => { + &ElabStackEntry::PendingProjection { + ty, + index, + canonical, + } => { self.elab_stack.pop(); // Grab the input from the elab-result stack. @@ -511,10 +527,12 @@ impl<'a> Elaborator<'a> { } }; let values = values.as_slice(&self.func.dfg.value_lists); + let value = values[index]; + self.func.dfg.fill_in_value_type(value, ty); let value = IdValue::Value { depth, block, - value: values[index], + value, }; self.id_to_value.insert_if_absent(canonical, value.clone()); diff --git a/cranelift/codegen/src/egraph/node.rs b/cranelift/codegen/src/egraph/node.rs index 3fb3502241..01d8e4128c 100644 --- a/cranelift/codegen/src/egraph/node.rs +++ b/cranelift/codegen/src/egraph/node.rs @@ -1,9 +1,9 @@ //! Node definition for EGraph representation. -use super::MemoryState; -use crate::ir::{Block, DataFlowGraph, Inst, InstructionImms, Opcode, RelSourceLoc, Type}; +use super::PackedMemoryState; +use crate::ir::{Block, DataFlowGraph, InstructionImms, Opcode, RelSourceLoc, Type}; use crate::loop_analysis::LoopLevel; -use cranelift_egraph::{BumpArena, BumpSlice, CtxEq, CtxHash, Id, Language, UnionFind}; +use cranelift_egraph::{CtxEq, CtxHash, Id, Language, UnionFind}; use cranelift_entity::{EntityList, ListPool}; use std::hash::{Hash, Hasher}; @@ -31,8 +31,10 @@ pub enum Node { op: InstructionImms, /// eclass arguments to the operator. args: EntityList, - /// Types of results. - types: BumpSlice, + /// Type of result, if one. + ty: Type, + /// Number of results. + arity: u16, }, /// A CLIF instruction that has side-effects or is otherwise not /// representable by `Pure`. @@ -41,15 +43,10 @@ pub enum Node { op: InstructionImms, /// eclass arguments to the operator. args: EntityList, - /// Types of results. - types: BumpSlice, - /// The index of the original instruction. We include this so - /// that the `Inst`s are not deduplicated: every instance is a - /// logically separate and unique side-effect. However, - /// because we clear the DataFlowGraph before elaboration, - /// this `Inst` is *not* valid to fetch any details from the - /// original instruction. - inst: Inst, + /// Type of result, if one. + ty: Type, + /// Number of results. + arity: u16, /// The source location to preserve. srcloc: RelSourceLoc, /// The loop level of this Inst. @@ -83,14 +80,9 @@ pub enum Node { /// the key). addr: Id, /// The abstract memory state that this load accesses. - mem_state: MemoryState, + mem_state: PackedMemoryState, // -- not included in dedup key: - /// The `Inst` we will use for a trap location for this - /// load. Excluded from Eq/Hash so that loads that are - /// identical except for the specific instance will dedup on - /// top of each other. - inst: Inst, /// Source location, for traps. Not included in Eq/Hash. srcloc: RelSourceLoc, }, @@ -107,18 +99,14 @@ impl Node { /// Shared pools for type and id lists in nodes. pub struct NodeCtx { - /// Arena for result-type arrays. - pub types: BumpArena, /// Arena for arg eclass-ID lists. pub args: ListPool, } impl NodeCtx { pub(crate) fn with_capacity_for_dfg(dfg: &DataFlowGraph) -> Self { - let n_types = dfg.num_values(); let n_args = dfg.value_lists.capacity(); Self { - types: BumpArena::arena_with_capacity(n_types), args: ListPool::with_capacity(n_args), } } @@ -168,26 +156,23 @@ impl CtxEq for NodeCtx { &Node::Pure { ref op, ref args, - ref types, + ty, + arity: _, }, &Node::Pure { op: ref other_op, args: ref other_args, - types: ref other_types, + ty: other_ty, + arity: _, }, - ) => { - *op == *other_op - && self.ids_eq(args, other_args, uf) - && types.as_slice(&self.types) == other_types.as_slice(&self.types) - } + ) => *op == *other_op && self.ids_eq(args, other_args, uf) && ty == other_ty, ( - &Node::Inst { inst, ref args, .. }, + &Node::Inst { ref args, .. }, &Node::Inst { - inst: other_inst, args: ref other_args, .. }, - ) => inst == other_inst && self.ids_eq(args, other_args, uf), + ) => self.ids_eq(args, other_args, uf), ( &Node::Load { ref op, @@ -249,16 +234,14 @@ impl CtxHash for NodeCtx { &Node::Pure { ref op, ref args, - types: _, + ty, + arity: _, } => { op.hash(&mut state); self.hash_ids(args, &mut state, uf); - // Don't hash `types`: it requires an indirection - // (hence cache misses), and result type *should* be - // fully determined by op and args. + ty.hash(&mut state); } - &Node::Inst { inst, ref args, .. } => { - inst.hash(&mut state); + &Node::Inst { ref args, .. } => { self.hash_ids(args, &mut state, uf); } &Node::Load { @@ -370,3 +353,14 @@ impl Language for NodeCtx { } } } + +#[cfg(test)] +mod test { + #[test] + #[cfg(target_pointer_width = "64")] + fn node_size() { + use super::*; + assert_eq!(std::mem::size_of::(), 16); + assert_eq!(std::mem::size_of::(), 32); + } +} diff --git a/cranelift/codegen/src/egraph/stores.rs b/cranelift/codegen/src/egraph/stores.rs index 8ca3bd6671..9746eba159 100644 --- a/cranelift/codegen/src/egraph/stores.rs +++ b/cranelift/codegen/src/egraph/stores.rs @@ -62,7 +62,7 @@ use crate::fx::{FxHashMap, FxHashSet}; use crate::inst_predicates::has_memory_fence_semantics; use crate::ir::{Block, Function, Inst, InstructionData, MemFlags, Opcode}; use crate::trace; -use cranelift_entity::SecondaryMap; +use cranelift_entity::{EntityRef, SecondaryMap}; use smallvec::{smallvec, SmallVec}; /// For a given program point, the vector of last-store instruction @@ -97,6 +97,32 @@ pub enum MemoryState { AfterInst(Inst), } +/// Memory state index, packed into a u32. +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct PackedMemoryState(u32); + +impl From for PackedMemoryState { + fn from(state: MemoryState) -> Self { + match state { + MemoryState::Entry => Self(0), + MemoryState::Store(i) => Self(1 | (i.index() as u32) << 2), + MemoryState::BeforeInst(i) => Self(2 | (i.index() as u32) << 2), + MemoryState::AfterInst(i) => Self(3 | (i.index() as u32) << 2), + } + } +} + +impl PackedMemoryState { + /// Does this memory state refer to a specific store instruction? + pub fn as_store(&self) -> Option { + if self.0 & 3 == 1 { + Some(Inst::from_bits(self.0 >> 2)) + } else { + None + } + } +} + impl LastStores { fn update(&mut self, func: &Function, inst: Inst) { let opcode = func.dfg[inst].opcode(); @@ -148,7 +174,7 @@ impl LastStores { pub struct AliasAnalysis { /// Last-store instruction (or none) for a given load. Use a hash map /// instead of a `SecondaryMap` because this is sparse. - load_mem_state: FxHashMap, + load_mem_state: FxHashMap, } impl AliasAnalysis { @@ -165,7 +191,7 @@ impl AliasAnalysis { cfg: &ControlFlowGraph, ) -> SecondaryMap> { let mut block_input = SecondaryMap::with_capacity(func.dfg.num_blocks()); - let mut worklist: SmallVec<[Block; 8]> = smallvec![]; + let mut worklist: SmallVec<[Block; 16]> = smallvec![]; let mut worklist_set = FxHashSet::default(); let entry = func.layout.entry_block().unwrap(); worklist.push(entry); @@ -210,8 +236,9 @@ impl AliasAnalysis { fn compute_load_last_stores( func: &Function, block_input: SecondaryMap>, - ) -> FxHashMap { + ) -> FxHashMap { let mut load_mem_state = FxHashMap::default(); + load_mem_state.reserve(func.dfg.num_insts() / 8); for block in func.layout.blocks() { let mut state = block_input[block].clone().unwrap(); @@ -249,7 +276,7 @@ impl AliasAnalysis { mem_state, ); - load_mem_state.insert(inst, mem_state); + load_mem_state.insert(inst, mem_state.into()); } state.update(func, inst); @@ -260,7 +287,7 @@ impl AliasAnalysis { } /// Get the state seen by a load, if any. - pub fn get_state_for_load(&self, inst: Inst) -> Option { + pub fn get_state_for_load(&self, inst: Inst) -> Option { self.load_mem_state.get(&inst).copied() } } diff --git a/cranelift/codegen/src/ir/dfg.rs b/cranelift/codegen/src/ir/dfg.rs index 1da10598c8..26497ab679 100644 --- a/cranelift/codegen/src/ir/dfg.rs +++ b/cranelift/codegen/src/ir/dfg.rs @@ -274,6 +274,12 @@ impl DataFlowGraph { self.values[v].ty() } + /// Fill in the type of a value, only if currently invalid (as a placeholder). + pub(crate) fn fill_in_value_type(&mut self, v: Value, ty: Type) { + debug_assert!(self.values[v].ty().is_invalid()); + self.values[v].set_type(ty); + } + /// Get the definition of a value. /// /// This is either the instruction that defined it or the Block that has the value as an diff --git a/cranelift/codegen/src/machinst/lower.rs b/cranelift/codegen/src/machinst/lower.rs index bccf40e116..551eccad94 100644 --- a/cranelift/codegen/src/machinst/lower.rs +++ b/cranelift/codegen/src/machinst/lower.rs @@ -381,7 +381,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> { for inst in f.layout.block_insts(bb) { for &result in f.dfg.inst_results(inst) { let ty = f.dfg.value_type(result); - if value_regs[result].is_invalid() { + if value_regs[result].is_invalid() && !ty.is_invalid() { let regs = alloc_vregs(ty, &mut next_vreg, &mut vcode)?; value_regs[result] = regs; trace!( diff --git a/cranelift/codegen/src/opts.rs b/cranelift/codegen/src/opts.rs index 61a2a3aebf..a64f32c0d3 100644 --- a/cranelift/codegen/src/opts.rs +++ b/cranelift/codegen/src/opts.rs @@ -2,7 +2,6 @@ use crate::egraph::Analysis; use crate::egraph::FuncEGraph; -use crate::egraph::MemoryState; pub use crate::egraph::{Node, NodeCtx}; use crate::ir::condcodes; pub use crate::ir::condcodes::{FloatCC, IntCC}; @@ -97,35 +96,37 @@ pub(crate) fn store_to_load<'a>(id: Id, egraph: &mut FuncEGraph<'a>) -> Id { }, ty: load_ty, addr: load_addr, - mem_state: MemoryState::Store(store_inst), + mem_state, .. } = load_key.node(&egraph.egraph.nodes) { - trace!(" -> got load op for id {}", id); - if let Some((store_ty, store_id)) = egraph.store_nodes.get(&store_inst) { - trace!(" -> got store id: {} ty: {}", store_id, store_ty); - let store_key = egraph.egraph.classes[*store_id].get_node().unwrap(); - if let Node::Inst { - op: - InstructionImms::Store { - opcode: Opcode::Store, - offset: store_offset, - .. - }, - args: store_args, - .. - } = store_key.node(&egraph.egraph.nodes) - { - let store_args = store_args.as_slice(&egraph.node_ctx.args); - let store_data = store_args[0]; - let store_addr = store_args[1]; - if *load_offset == *store_offset - && *load_ty == *store_ty - && egraph.egraph.unionfind.equiv_id_mut(*load_addr, store_addr) + if let Some(store_inst) = mem_state.as_store() { + trace!(" -> got load op for id {}", id); + if let Some((store_ty, store_id)) = egraph.store_nodes.get(&store_inst) { + trace!(" -> got store id: {} ty: {}", store_id, store_ty); + let store_key = egraph.egraph.classes[*store_id].get_node().unwrap(); + if let Node::Inst { + op: + InstructionImms::Store { + opcode: Opcode::Store, + offset: store_offset, + .. + }, + args: store_args, + .. + } = store_key.node(&egraph.egraph.nodes) { - trace!(" -> same offset, type, address; forwarding"); - egraph.stats.store_to_load_forward += 1; - return store_data; + let store_args = store_args.as_slice(&egraph.node_ctx.args); + let store_data = store_args[0]; + let store_addr = store_args[1]; + if *load_offset == *store_offset + && *load_ty == *store_ty + && egraph.egraph.unionfind.equiv_id_mut(*load_addr, store_addr) + { + trace!(" -> same offset, type, address; forwarding"); + egraph.stats.store_to_load_forward += 1; + return store_data; + } } } } @@ -155,12 +156,20 @@ where while let Some(node) = self.iter.next(&ctx.egraph.egraph) { trace!("iter from root {}: node {:?}", self.root, node); match node { - Node::Pure { op, args, types } + Node::Pure { + op, + args, + ty, + arity, + } | Node::Inst { - op, args, types, .. - } if types.len() == 1 => { - let ty = types.as_slice(&ctx.egraph.node_ctx.types)[0]; - return Some((ty, op.clone(), args.clone())); + op, + args, + ty, + arity, + .. + } if *arity == 1 => { + return Some((*ty, op.clone(), args.clone())); } _ => {} } @@ -176,8 +185,8 @@ impl<'a, 'b> generated_code::Context for IsleContext<'a, 'b> { let mut iter = self.egraph.egraph.enodes(eclass); while let Some(node) = iter.next(&self.egraph.egraph) { match node { - &Node::Pure { types, .. } | &Node::Inst { types, .. } if types.len() == 1 => { - return Some(types.as_slice(&self.egraph.node_ctx.types)[0]); + &Node::Pure { ty, arity, .. } | &Node::Inst { ty, arity, .. } if arity == 1 => { + return Some(ty); } &Node::Load { ty, .. } => return Some(ty), &Node::Result { ty, .. } => return Some(ty), @@ -207,14 +216,16 @@ impl<'a, 'b> generated_code::Context for IsleContext<'a, 'b> { } fn pure_enode_ctor(&mut self, ty: Type, op: &InstructionImms, args: IdArray) -> Id { - let types = self.egraph.node_ctx.types.single(ty); - let types = types.freeze(&mut self.egraph.node_ctx.types); let op = op.clone(); - match self - .egraph - .egraph - .add(Node::Pure { op, args, types }, &mut self.egraph.node_ctx) - { + match self.egraph.egraph.add( + Node::Pure { + op, + args, + ty, + arity: 1, + }, + &mut self.egraph.node_ctx, + ) { NewOrExisting::New(id) => { self.egraph.stats.node_created += 1; self.egraph.stats.node_pure += 1; diff --git a/cranelift/egraph/src/ctxhash.rs b/cranelift/egraph/src/ctxhash.rs index 467a3b62a2..f70086a68c 100644 --- a/cranelift/egraph/src/ctxhash.rs +++ b/cranelift/egraph/src/ctxhash.rs @@ -134,6 +134,7 @@ impl CtxHashMap { /// Return an Entry cursor on a given bucket for a key, allowing /// for fetching the current value or inserting a new one. + #[inline(always)] pub fn entry<'a, Ctx: CtxEq + CtxHash>( &'a mut self, k: K, diff --git a/cranelift/egraph/src/lib.rs b/cranelift/egraph/src/lib.rs index af0f1729fe..e94416c2f2 100644 --- a/cranelift/egraph/src/lib.rs +++ b/cranelift/egraph/src/lib.rs @@ -530,7 +530,7 @@ where // Update analysis. let node_ctx = ctx.node_ctx; - self.update_analysis(node_ctx, eclass_id); + self.update_analysis_new(node_ctx, eclass_id, key); NewOrExisting::New(eclass_id) } @@ -568,7 +568,7 @@ where b ); self.classes[a] = EClass::node_and_child(node, b); - self.update_analysis(ctx, a); + self.update_analysis_union(ctx, a, a, b); return a; } @@ -576,7 +576,7 @@ where self.unionfind.add(u); self.unionfind.union(u, b); trace!(" -> union id {} and id {} into id {}", a, b, u); - self.update_analysis(ctx, u); + self.update_analysis_union(ctx, u, a, b); u } @@ -605,26 +605,20 @@ where } } - /// Update analysis for a given eclass node. - fn update_analysis(&mut self, ctx: &L, eclass: Id) { + /// Update analysis for a given eclass node (new-enode case). + fn update_analysis_new(&mut self, ctx: &L, eclass: Id, node: NodeKey) { if let Some((analysis, state)) = self.analysis.as_mut() { - let eclass_data = self.classes[eclass]; - let value = if let Some(node_key) = eclass_data.as_node() { - let node = node_key.node(&self.nodes); - analysis.for_node(ctx, node, state) - } else if let Some((node_key, child)) = eclass_data.as_node_and_child() { - let node = node_key.node(&self.nodes); - let value = analysis.for_node(ctx, node, state); - let child_value = &state[child]; - analysis.meet(ctx, &value, child_value) - } else if let Some((c1, c2)) = eclass_data.as_union() { - let c1 = &state[c1]; - let c2 = &state[c2]; - analysis.meet(ctx, c1, c2) - } else { - panic!("Invalid eclass node: {:?}", eclass_data); - }; - state[eclass] = value; + let node = node.node(&self.nodes); + state[eclass] = analysis.for_node(ctx, node, state); + } + } + + /// Update analysis for a given eclass node (union case). + fn update_analysis_union(&mut self, ctx: &L, eclass: Id, a: Id, b: Id) { + if let Some((analysis, state)) = self.analysis.as_mut() { + let a = &state[a]; + let b = &state[b]; + state[eclass] = analysis.meet(ctx, a, b); } } @@ -646,6 +640,7 @@ pub struct NodeIter> { } impl> NodeIter { + #[inline(always)] pub fn next<'a>(&mut self, egraph: &'a EGraph) -> Option<&'a L::Node> { while let Some(next) = self.stack.pop() { let eclass = egraph.classes[next];