Cranelift: Use bump allocation in remove_constant_phis pass (#4710)

* Cranelift: Use bump allocation in `remove_constant_phis` pass

This makes compilation 2-6% faster for Sightglass's bz2 benchmark:

```
compilation :: cycles :: benchmarks/bz2/benchmark.wasm

  Δ = 7290648.36 ± 4245152.07 (confidence = 99%)

  bump.so is 1.02x to 1.06x faster than main.so!

  [166388177 183238542.98 214732518] bump.so
  [172836648 190529191.34 217514271] main.so

compilation :: cycles :: benchmarks/pulldown-cmark/benchmark.wasm

  No difference in performance.

  [182220055 225793551.12 277857575] bump.so
  [193212613 227784078.61 277175335] main.so

compilation :: cycles :: benchmarks/spidermonkey/benchmark.wasm

  No difference in performance.

  [3848442474 4295214144.37 4665127241] bump.so
  [3969505457 4262415290.10 4563869974] main.so
```

* Add audit for `bumpalo`

* Add an audit of `arrayvec` version 0.7.2

* Remove unnecessary `collect` into `Vec`

I wasn't able to measure any perf difference here, but its nice to do anyways.

* Use a `SecondaryMap` for keeping track of summaries
This commit is contained in:
Nick Fitzgerald
2022-08-15 14:36:01 -07:00
committed by GitHub
parent cc955e4e7e
commit ae7688059d
4 changed files with 116 additions and 63 deletions

8
Cargo.lock generated
View File

@@ -102,6 +102,12 @@ dependencies = [
"derive_arbitrary", "derive_arbitrary",
] ]
[[package]]
name = "arrayvec"
version = "0.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6"
[[package]] [[package]]
name = "async-trait" name = "async-trait"
version = "0.1.53" version = "0.1.53"
@@ -529,7 +535,9 @@ dependencies = [
name = "cranelift-codegen" name = "cranelift-codegen"
version = "0.88.0" version = "0.88.0"
dependencies = [ dependencies = [
"arrayvec",
"bincode", "bincode",
"bumpalo",
"cranelift-bforest", "cranelift-bforest",
"cranelift-codegen-meta", "cranelift-codegen-meta",
"cranelift-codegen-shared", "cranelift-codegen-shared",

View File

@@ -13,6 +13,8 @@ build = "build.rs"
edition = "2021" edition = "2021"
[dependencies] [dependencies]
arrayvec = "0.7"
bumpalo = "3"
cranelift-codegen-shared = { path = "./shared", version = "0.88.0" } cranelift-codegen-shared = { path = "./shared", version = "0.88.0" }
cranelift-entity = { path = "../entity", version = "0.88.0" } cranelift-entity = { path = "../entity", version = "0.88.0" }
cranelift-bforest = { path = "../bforest", version = "0.88.0" } cranelift-bforest = { path = "../bforest", version = "0.88.0" }

View File

@@ -4,13 +4,15 @@ use crate::dominator_tree::DominatorTree;
use crate::entity::EntityList; use crate::entity::EntityList;
use crate::fx::FxHashMap; use crate::fx::FxHashMap;
use crate::fx::FxHashSet; use crate::fx::FxHashSet;
use crate::ir;
use crate::ir::instructions::BranchInfo; use crate::ir::instructions::BranchInfo;
use crate::ir::Function; use crate::ir::Function;
use crate::ir::{Block, Inst, Value}; use crate::ir::{Block, Inst, Value};
use crate::timing; use crate::timing;
use arrayvec::ArrayVec;
use smallvec::{smallvec, SmallVec}; use bumpalo::Bump;
use std::vec::Vec; use cranelift_entity::SecondaryMap;
use smallvec::SmallVec;
// A note on notation. For the sake of clarity, this file uses the phrase // A note on notation. For the sake of clarity, this file uses the phrase
// "formal parameters" to mean the `Value`s listed in the block head, and // "formal parameters" to mean the `Value`s listed in the block head, and
@@ -107,27 +109,73 @@ impl AbstractValue {
} }
} }
#[derive(Clone, Copy, Debug)]
struct OutEdge<'a> {
/// An instruction that transfers control.
inst: Inst,
/// The block that control is transferred to.
block: Block,
/// The arguments to that block.
///
/// These values can be from both groups A and B.
args: &'a [Value],
}
impl<'a> OutEdge<'a> {
/// Construct a new `OutEdge` for the given instruction.
///
/// Returns `None` if this is an edge without any block arguments, which
/// means we can ignore it for this analysis's purposes.
#[inline]
fn new(bump: &'a Bump, dfg: &ir::DataFlowGraph, inst: Inst, block: Block) -> Option<Self> {
let inst_var_args = dfg.inst_variable_args(inst);
// Skip edges without params.
if inst_var_args.is_empty() {
return None;
}
Some(OutEdge {
inst,
block,
args: bump.alloc_slice_fill_iter(
inst_var_args
.iter()
.map(|value| dfg.resolve_aliases(*value)),
),
})
}
}
/// For some block, a useful bundle of info. The `Block` itself is not stored /// For some block, a useful bundle of info. The `Block` itself is not stored
/// here since it will be the key in the associated `FxHashMap` -- see /// here since it will be the key in the associated `FxHashMap` -- see
/// `summaries` below. For the `SmallVec` tuning params: most blocks have /// `summaries` below. For the `SmallVec` tuning params: most blocks have
/// few parameters, hence `4`. And almost all blocks have either one or two /// few parameters, hence `4`. And almost all blocks have either one or two
/// successors, hence `2`. /// successors, hence `2`.
#[derive(Debug)] #[derive(Clone, Debug, Default)]
struct BlockSummary { struct BlockSummary<'a> {
/// Formal parameters for this `Block` /// Formal parameters for this `Block`.
formals: SmallVec<[Value; 4] /*Group A*/>, ///
/// These values are from group A.
formals: &'a [Value],
/// For each `Inst` in this block that transfers to another block: the /// Each outgoing edge from this block.
/// `Inst` itself, the destination `Block`, and the actual parameters ///
/// passed. We don't bother to include transfers that pass zero parameters /// We don't bother to include transfers that pass zero parameters
/// since that makes more work for the solver for no purpose. /// since that makes more work for the solver for no purpose.
dests: SmallVec<[(Inst, Block, SmallVec<[Value; 4] /*both Groups A and B*/>); 2]>, ///
/// Note that, because blocks used with `br_table`s cannot have block
/// arguments, there are at most two outgoing edges from these blocks.
dests: ArrayVec<OutEdge<'a>, 2>,
} }
impl BlockSummary {
fn new(formals: SmallVec<[Value; 4]>) -> Self { impl<'a> BlockSummary<'a> {
/// Construct a new `BlockSummary`, using `values` as its backing storage.
#[inline]
fn new(bump: &'a Bump, formals: &[Value]) -> Self {
Self { Self {
formals, formals: bump.alloc_slice_copy(formals),
dests: smallvec![], dests: Default::default(),
} }
} }
} }
@@ -171,21 +219,17 @@ pub fn do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree)
let _tt = timing::remove_constant_phis(); let _tt = timing::remove_constant_phis();
debug_assert!(domtree.is_valid()); debug_assert!(domtree.is_valid());
// Get the blocks, in reverse postorder
let blocks_reverse_postorder = domtree
.cfg_postorder()
.into_iter()
.rev()
.collect::<Vec<_>>();
// Phase 1 of 3: for each block, make a summary containing all relevant // Phase 1 of 3: for each block, make a summary containing all relevant
// info. The solver will iterate over the summaries, rather than having // info. The solver will iterate over the summaries, rather than having
// to inspect each instruction in each block. // to inspect each instruction in each block.
let mut summaries = FxHashMap::<Block, BlockSummary>::default(); let bump =
Bump::with_capacity(domtree.cfg_postorder().len() * 4 * std::mem::size_of::<Value>());
let mut summaries =
SecondaryMap::<Block, BlockSummary>::with_capacity(domtree.cfg_postorder().len());
for &&b in &blocks_reverse_postorder { for b in domtree.cfg_postorder().iter().rev().copied() {
let formals = func.dfg.block_params(b); let formals = func.dfg.block_params(b);
let mut summary = BlockSummary::new(SmallVec::from(formals)); let mut summary = BlockSummary::new(&bump, formals);
for inst in func.layout.block_insts(b) { for inst in func.layout.block_insts(b) {
let idetails = &func.dfg[inst]; let idetails = &func.dfg[inst];
@@ -194,15 +238,8 @@ pub fn do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree)
// `SingleDest` here. // `SingleDest` here.
if let BranchInfo::SingleDest(dest, _) = idetails.analyze_branch(&func.dfg.value_lists) if let BranchInfo::SingleDest(dest, _) = idetails.analyze_branch(&func.dfg.value_lists)
{ {
let inst_var_args = func.dfg.inst_variable_args(inst); if let Some(edge) = OutEdge::new(&bump, &func.dfg, inst, dest) {
// Skip branches/jumps that carry no params. summary.dests.push(edge);
if inst_var_args.len() > 0 {
let mut actuals = SmallVec::<[Value; 4]>::new();
for arg in inst_var_args {
let arg = func.dfg.resolve_aliases(*arg);
actuals.push(arg);
}
summary.dests.push((inst, dest, actuals));
} }
} }
} }
@@ -211,7 +248,7 @@ pub fn do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree)
// in the summary, *unless* they have neither formals nor any // in the summary, *unless* they have neither formals nor any
// param-carrying branches/jumps. // param-carrying branches/jumps.
if formals.len() > 0 || summary.dests.len() > 0 { if formals.len() > 0 || summary.dests.len() > 0 {
summaries.insert(b, summary); summaries[b] = summary;
} }
} }
@@ -227,7 +264,7 @@ pub fn do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree)
// Set up initial solver state // Set up initial solver state
let mut state = SolverState::new(); let mut state = SolverState::new();
for &&b in &blocks_reverse_postorder { for b in domtree.cfg_postorder().iter().rev().copied() {
// For each block, get the formals // For each block, get the formals
if b == entry_block { if b == entry_block {
continue; continue;
@@ -246,27 +283,18 @@ pub fn do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree)
iter_no += 1; iter_no += 1;
let mut changed = false; let mut changed = false;
for &src in &blocks_reverse_postorder { for src in domtree.cfg_postorder().iter().rev().copied() {
let mb_src_summary = summaries.get(src); let src_summary = &summaries[src];
// The src block might have no summary. This means it has no for edge in &src_summary.dests {
// branches/jumps that carry parameters *and* it doesn't take any assert!(edge.block != entry_block);
// parameters itself. Phase 1 ensures this. So we can ignore it.
if mb_src_summary.is_none() {
continue;
}
let src_summary = mb_src_summary.unwrap();
for (_inst, dst, src_actuals) in &src_summary.dests {
assert!(*dst != entry_block);
// By contrast, the dst block must have a summary. Phase 1 // By contrast, the dst block must have a summary. Phase 1
// will have only included an entry in `src_summary.dests` if // will have only included an entry in `src_summary.dests` if
// that branch/jump carried at least one parameter. So the // that branch/jump carried at least one parameter. So the
// dst block does take parameters, so it must have a summary. // dst block does take parameters, so it must have a summary.
let dst_summary = summaries let dst_summary = &summaries[edge.block];
.get(dst)
.expect("remove_constant_phis: dst block has no summary");
let dst_formals = &dst_summary.formals; let dst_formals = &dst_summary.formals;
assert_eq!(src_actuals.len(), dst_formals.len()); assert_eq!(edge.args.len(), dst_formals.len());
for (formal, actual) in dst_formals.iter().zip(src_actuals.iter()) { for (formal, actual) in dst_formals.iter().zip(edge.args) {
// Find the abstract value for `actual`. If it is a block // Find the abstract value for `actual`. If it is a block
// formal parameter then the most recent abstract value is // formal parameter then the most recent abstract value is
// to be found in the solver state. If not, then it's a // to be found in the solver state. If not, then it's a
@@ -305,14 +333,14 @@ pub fn do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree)
// Make up a set of blocks that need editing. // Make up a set of blocks that need editing.
let mut need_editing = FxHashSet::<Block>::default(); let mut need_editing = FxHashSet::<Block>::default();
for (block, summary) in &summaries { for (block, summary) in summaries.iter() {
if *block == entry_block { if block == entry_block {
continue; continue;
} }
for formal in &summary.formals { for formal in summary.formals {
let formal_absval = state.get(*formal); let formal_absval = state.get(*formal);
if formal_absval.is_one() { if formal_absval.is_one() {
need_editing.insert(*block); need_editing.insert(block);
break; break;
} }
} }
@@ -344,19 +372,19 @@ pub fn do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree)
// Secondly, visit all branch insns. If the destination has had its // Secondly, visit all branch insns. If the destination has had its
// formals changed, change the actuals accordingly. Don't scan all insns, // formals changed, change the actuals accordingly. Don't scan all insns,
// rather just visit those as listed in the summaries we prepared earlier. // rather just visit those as listed in the summaries we prepared earlier.
for (_src_block, summary) in &summaries { for summary in summaries.values() {
for (inst, dst_block, _src_actuals) in &summary.dests { for edge in &summary.dests {
if !need_editing.contains(dst_block) { if !need_editing.contains(&edge.block) {
continue; continue;
} }
let old_actuals = func.dfg[*inst].take_value_list().unwrap(); let old_actuals = func.dfg[edge.inst].take_value_list().unwrap();
let num_old_actuals = old_actuals.len(&func.dfg.value_lists); let num_old_actuals = old_actuals.len(&func.dfg.value_lists);
let num_fixed_actuals = func.dfg[*inst] let num_fixed_actuals = func.dfg[edge.inst]
.opcode() .opcode()
.constraints() .constraints()
.num_fixed_value_arguments(); .num_fixed_value_arguments();
let dst_summary = summaries.get(&dst_block).unwrap(); let dst_summary = &summaries[edge.block];
// Check that the numbers of arguments make sense. // Check that the numbers of arguments make sense.
assert!(num_fixed_actuals <= num_old_actuals); assert!(num_fixed_actuals <= num_old_actuals);
@@ -384,7 +412,7 @@ pub fn do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree)
new_actuals.push(actual_i, &mut func.dfg.value_lists); new_actuals.push(actual_i, &mut func.dfg.value_lists);
} }
} }
func.dfg[*inst].put_value_list(new_actuals); func.dfg[edge.inst].put_value_list(new_actuals);
} }
} }

View File

@@ -1,12 +1,27 @@
# cargo-vet audits file # cargo-vet audits file
[[audits.arrayvec]]
who = "Nick Fitzgerald <fitzgen@gmail.com>"
criteria = "safe-to-deploy"
version = "0.7.2"
notes = """
Well documented invariants, good assertions for those invariants in unsafe code,
and tested with MIRI to boot. LGTM.
"""
[[audits.backtrace]] [[audits.backtrace]]
who = "Alex Crichton <alex@alexcrichton.com>" who = "Alex Crichton <alex@alexcrichton.com>"
criteria = "safe-to-deploy" criteria = "safe-to-deploy"
version = "0.3.66" version = "0.3.66"
notes = "I am the author of this crate." notes = "I am the author of this crate."
[[audits.bumpalo]]
who = "Nick Fitzgerald <fitzgen@gmail.com>"
criteria = "safe-to-deploy"
version = "3.9.1"
notes = "I am the author of this crate."
[[audits.cc]] [[audits.cc]]
who = "Alex Crichton <alex@alexcrichton.com>" who = "Alex Crichton <alex@alexcrichton.com>"
criteria = "safe-to-deploy" criteria = "safe-to-deploy"