From ae7688059de8ec520376309880d2401254f0fae7 Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Mon, 15 Aug 2022 14:36:01 -0700 Subject: [PATCH] Cranelift: Use bump allocation in `remove_constant_phis` pass (#4710) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Cranelift: Use bump allocation in `remove_constant_phis` pass This makes compilation 2-6% faster for Sightglass's bz2 benchmark: ``` compilation :: cycles :: benchmarks/bz2/benchmark.wasm Δ = 7290648.36 ± 4245152.07 (confidence = 99%) bump.so is 1.02x to 1.06x faster than main.so! [166388177 183238542.98 214732518] bump.so [172836648 190529191.34 217514271] main.so compilation :: cycles :: benchmarks/pulldown-cmark/benchmark.wasm No difference in performance. [182220055 225793551.12 277857575] bump.so [193212613 227784078.61 277175335] main.so compilation :: cycles :: benchmarks/spidermonkey/benchmark.wasm No difference in performance. [3848442474 4295214144.37 4665127241] bump.so [3969505457 4262415290.10 4563869974] main.so ``` * Add audit for `bumpalo` * Add an audit of `arrayvec` version 0.7.2 * Remove unnecessary `collect` into `Vec` I wasn't able to measure any perf difference here, but its nice to do anyways. * Use a `SecondaryMap` for keeping track of summaries --- Cargo.lock | 8 + cranelift/codegen/Cargo.toml | 2 + cranelift/codegen/src/remove_constant_phis.rs | 154 +++++++++++------- supply-chain/audits.toml | 15 ++ 4 files changed, 116 insertions(+), 63 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 76caf07c2a..8af525b846 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -102,6 +102,12 @@ dependencies = [ "derive_arbitrary", ] +[[package]] +name = "arrayvec" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6" + [[package]] name = "async-trait" version = "0.1.53" @@ -529,7 +535,9 @@ dependencies = [ name = "cranelift-codegen" version = "0.88.0" dependencies = [ + "arrayvec", "bincode", + "bumpalo", "cranelift-bforest", "cranelift-codegen-meta", "cranelift-codegen-shared", diff --git a/cranelift/codegen/Cargo.toml b/cranelift/codegen/Cargo.toml index a0f3eb8136..59f8bedc58 100644 --- a/cranelift/codegen/Cargo.toml +++ b/cranelift/codegen/Cargo.toml @@ -13,6 +13,8 @@ build = "build.rs" edition = "2021" [dependencies] +arrayvec = "0.7" +bumpalo = "3" cranelift-codegen-shared = { path = "./shared", version = "0.88.0" } cranelift-entity = { path = "../entity", version = "0.88.0" } cranelift-bforest = { path = "../bforest", version = "0.88.0" } diff --git a/cranelift/codegen/src/remove_constant_phis.rs b/cranelift/codegen/src/remove_constant_phis.rs index bff86832a8..a6a8b073ff 100644 --- a/cranelift/codegen/src/remove_constant_phis.rs +++ b/cranelift/codegen/src/remove_constant_phis.rs @@ -4,13 +4,15 @@ use crate::dominator_tree::DominatorTree; use crate::entity::EntityList; use crate::fx::FxHashMap; use crate::fx::FxHashSet; +use crate::ir; use crate::ir::instructions::BranchInfo; use crate::ir::Function; use crate::ir::{Block, Inst, Value}; use crate::timing; - -use smallvec::{smallvec, SmallVec}; -use std::vec::Vec; +use arrayvec::ArrayVec; +use bumpalo::Bump; +use cranelift_entity::SecondaryMap; +use smallvec::SmallVec; // A note on notation. For the sake of clarity, this file uses the phrase // "formal parameters" to mean the `Value`s listed in the block head, and @@ -107,27 +109,73 @@ impl AbstractValue { } } +#[derive(Clone, Copy, Debug)] +struct OutEdge<'a> { + /// An instruction that transfers control. + inst: Inst, + /// The block that control is transferred to. + block: Block, + /// The arguments to that block. + /// + /// These values can be from both groups A and B. + args: &'a [Value], +} + +impl<'a> OutEdge<'a> { + /// Construct a new `OutEdge` for the given instruction. + /// + /// Returns `None` if this is an edge without any block arguments, which + /// means we can ignore it for this analysis's purposes. + #[inline] + fn new(bump: &'a Bump, dfg: &ir::DataFlowGraph, inst: Inst, block: Block) -> Option { + let inst_var_args = dfg.inst_variable_args(inst); + + // Skip edges without params. + if inst_var_args.is_empty() { + return None; + } + + Some(OutEdge { + inst, + block, + args: bump.alloc_slice_fill_iter( + inst_var_args + .iter() + .map(|value| dfg.resolve_aliases(*value)), + ), + }) + } +} + /// For some block, a useful bundle of info. The `Block` itself is not stored /// here since it will be the key in the associated `FxHashMap` -- see /// `summaries` below. For the `SmallVec` tuning params: most blocks have /// few parameters, hence `4`. And almost all blocks have either one or two /// successors, hence `2`. -#[derive(Debug)] -struct BlockSummary { - /// Formal parameters for this `Block` - formals: SmallVec<[Value; 4] /*Group A*/>, +#[derive(Clone, Debug, Default)] +struct BlockSummary<'a> { + /// Formal parameters for this `Block`. + /// + /// These values are from group A. + formals: &'a [Value], - /// For each `Inst` in this block that transfers to another block: the - /// `Inst` itself, the destination `Block`, and the actual parameters - /// passed. We don't bother to include transfers that pass zero parameters + /// Each outgoing edge from this block. + /// + /// We don't bother to include transfers that pass zero parameters /// since that makes more work for the solver for no purpose. - dests: SmallVec<[(Inst, Block, SmallVec<[Value; 4] /*both Groups A and B*/>); 2]>, + /// + /// Note that, because blocks used with `br_table`s cannot have block + /// arguments, there are at most two outgoing edges from these blocks. + dests: ArrayVec, 2>, } -impl BlockSummary { - fn new(formals: SmallVec<[Value; 4]>) -> Self { + +impl<'a> BlockSummary<'a> { + /// Construct a new `BlockSummary`, using `values` as its backing storage. + #[inline] + fn new(bump: &'a Bump, formals: &[Value]) -> Self { Self { - formals, - dests: smallvec![], + formals: bump.alloc_slice_copy(formals), + dests: Default::default(), } } } @@ -171,21 +219,17 @@ pub fn do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree) let _tt = timing::remove_constant_phis(); debug_assert!(domtree.is_valid()); - // Get the blocks, in reverse postorder - let blocks_reverse_postorder = domtree - .cfg_postorder() - .into_iter() - .rev() - .collect::>(); - // Phase 1 of 3: for each block, make a summary containing all relevant // info. The solver will iterate over the summaries, rather than having // to inspect each instruction in each block. - let mut summaries = FxHashMap::::default(); + let bump = + Bump::with_capacity(domtree.cfg_postorder().len() * 4 * std::mem::size_of::()); + let mut summaries = + SecondaryMap::::with_capacity(domtree.cfg_postorder().len()); - for &&b in &blocks_reverse_postorder { + for b in domtree.cfg_postorder().iter().rev().copied() { let formals = func.dfg.block_params(b); - let mut summary = BlockSummary::new(SmallVec::from(formals)); + let mut summary = BlockSummary::new(&bump, formals); for inst in func.layout.block_insts(b) { let idetails = &func.dfg[inst]; @@ -194,15 +238,8 @@ pub fn do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree) // `SingleDest` here. if let BranchInfo::SingleDest(dest, _) = idetails.analyze_branch(&func.dfg.value_lists) { - let inst_var_args = func.dfg.inst_variable_args(inst); - // Skip branches/jumps that carry no params. - if inst_var_args.len() > 0 { - let mut actuals = SmallVec::<[Value; 4]>::new(); - for arg in inst_var_args { - let arg = func.dfg.resolve_aliases(*arg); - actuals.push(arg); - } - summary.dests.push((inst, dest, actuals)); + if let Some(edge) = OutEdge::new(&bump, &func.dfg, inst, dest) { + summary.dests.push(edge); } } } @@ -211,7 +248,7 @@ pub fn do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree) // in the summary, *unless* they have neither formals nor any // param-carrying branches/jumps. if formals.len() > 0 || summary.dests.len() > 0 { - summaries.insert(b, summary); + summaries[b] = summary; } } @@ -227,7 +264,7 @@ pub fn do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree) // Set up initial solver state let mut state = SolverState::new(); - for &&b in &blocks_reverse_postorder { + for b in domtree.cfg_postorder().iter().rev().copied() { // For each block, get the formals if b == entry_block { continue; @@ -246,27 +283,18 @@ pub fn do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree) iter_no += 1; let mut changed = false; - for &src in &blocks_reverse_postorder { - let mb_src_summary = summaries.get(src); - // The src block might have no summary. This means it has no - // branches/jumps that carry parameters *and* it doesn't take any - // parameters itself. Phase 1 ensures this. So we can ignore it. - if mb_src_summary.is_none() { - continue; - } - let src_summary = mb_src_summary.unwrap(); - for (_inst, dst, src_actuals) in &src_summary.dests { - assert!(*dst != entry_block); + for src in domtree.cfg_postorder().iter().rev().copied() { + let src_summary = &summaries[src]; + for edge in &src_summary.dests { + assert!(edge.block != entry_block); // By contrast, the dst block must have a summary. Phase 1 // will have only included an entry in `src_summary.dests` if // that branch/jump carried at least one parameter. So the // dst block does take parameters, so it must have a summary. - let dst_summary = summaries - .get(dst) - .expect("remove_constant_phis: dst block has no summary"); + let dst_summary = &summaries[edge.block]; let dst_formals = &dst_summary.formals; - assert_eq!(src_actuals.len(), dst_formals.len()); - for (formal, actual) in dst_formals.iter().zip(src_actuals.iter()) { + assert_eq!(edge.args.len(), dst_formals.len()); + for (formal, actual) in dst_formals.iter().zip(edge.args) { // Find the abstract value for `actual`. If it is a block // formal parameter then the most recent abstract value is // to be found in the solver state. If not, then it's a @@ -305,14 +333,14 @@ pub fn do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree) // Make up a set of blocks that need editing. let mut need_editing = FxHashSet::::default(); - for (block, summary) in &summaries { - if *block == entry_block { + for (block, summary) in summaries.iter() { + if block == entry_block { continue; } - for formal in &summary.formals { + for formal in summary.formals { let formal_absval = state.get(*formal); if formal_absval.is_one() { - need_editing.insert(*block); + need_editing.insert(block); break; } } @@ -344,19 +372,19 @@ pub fn do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree) // Secondly, visit all branch insns. If the destination has had its // formals changed, change the actuals accordingly. Don't scan all insns, // rather just visit those as listed in the summaries we prepared earlier. - for (_src_block, summary) in &summaries { - for (inst, dst_block, _src_actuals) in &summary.dests { - if !need_editing.contains(dst_block) { + for summary in summaries.values() { + for edge in &summary.dests { + if !need_editing.contains(&edge.block) { continue; } - let old_actuals = func.dfg[*inst].take_value_list().unwrap(); + let old_actuals = func.dfg[edge.inst].take_value_list().unwrap(); let num_old_actuals = old_actuals.len(&func.dfg.value_lists); - let num_fixed_actuals = func.dfg[*inst] + let num_fixed_actuals = func.dfg[edge.inst] .opcode() .constraints() .num_fixed_value_arguments(); - let dst_summary = summaries.get(&dst_block).unwrap(); + let dst_summary = &summaries[edge.block]; // Check that the numbers of arguments make sense. assert!(num_fixed_actuals <= num_old_actuals); @@ -384,7 +412,7 @@ pub fn do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree) new_actuals.push(actual_i, &mut func.dfg.value_lists); } } - func.dfg[*inst].put_value_list(new_actuals); + func.dfg[edge.inst].put_value_list(new_actuals); } } diff --git a/supply-chain/audits.toml b/supply-chain/audits.toml index c172585c19..841e41faef 100644 --- a/supply-chain/audits.toml +++ b/supply-chain/audits.toml @@ -1,12 +1,27 @@ # cargo-vet audits file +[[audits.arrayvec]] +who = "Nick Fitzgerald " +criteria = "safe-to-deploy" +version = "0.7.2" +notes = """ +Well documented invariants, good assertions for those invariants in unsafe code, +and tested with MIRI to boot. LGTM. +""" + [[audits.backtrace]] who = "Alex Crichton " criteria = "safe-to-deploy" version = "0.3.66" notes = "I am the author of this crate." +[[audits.bumpalo]] +who = "Nick Fitzgerald " +criteria = "safe-to-deploy" +version = "3.9.1" +notes = "I am the author of this crate." + [[audits.cc]] who = "Alex Crichton " criteria = "safe-to-deploy"