Cranelift: Use bump allocation in remove_constant_phis pass (#4710)
* Cranelift: Use bump allocation in `remove_constant_phis` pass This makes compilation 2-6% faster for Sightglass's bz2 benchmark: ``` compilation :: cycles :: benchmarks/bz2/benchmark.wasm Δ = 7290648.36 ± 4245152.07 (confidence = 99%) bump.so is 1.02x to 1.06x faster than main.so! [166388177 183238542.98 214732518] bump.so [172836648 190529191.34 217514271] main.so compilation :: cycles :: benchmarks/pulldown-cmark/benchmark.wasm No difference in performance. [182220055 225793551.12 277857575] bump.so [193212613 227784078.61 277175335] main.so compilation :: cycles :: benchmarks/spidermonkey/benchmark.wasm No difference in performance. [3848442474 4295214144.37 4665127241] bump.so [3969505457 4262415290.10 4563869974] main.so ``` * Add audit for `bumpalo` * Add an audit of `arrayvec` version 0.7.2 * Remove unnecessary `collect` into `Vec` I wasn't able to measure any perf difference here, but its nice to do anyways. * Use a `SecondaryMap` for keeping track of summaries
This commit is contained in:
8
Cargo.lock
generated
8
Cargo.lock
generated
@@ -102,6 +102,12 @@ dependencies = [
|
|||||||
"derive_arbitrary",
|
"derive_arbitrary",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "arrayvec"
|
||||||
|
version = "0.7.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "async-trait"
|
name = "async-trait"
|
||||||
version = "0.1.53"
|
version = "0.1.53"
|
||||||
@@ -529,7 +535,9 @@ dependencies = [
|
|||||||
name = "cranelift-codegen"
|
name = "cranelift-codegen"
|
||||||
version = "0.88.0"
|
version = "0.88.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"arrayvec",
|
||||||
"bincode",
|
"bincode",
|
||||||
|
"bumpalo",
|
||||||
"cranelift-bforest",
|
"cranelift-bforest",
|
||||||
"cranelift-codegen-meta",
|
"cranelift-codegen-meta",
|
||||||
"cranelift-codegen-shared",
|
"cranelift-codegen-shared",
|
||||||
|
|||||||
@@ -13,6 +13,8 @@ build = "build.rs"
|
|||||||
edition = "2021"
|
edition = "2021"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
arrayvec = "0.7"
|
||||||
|
bumpalo = "3"
|
||||||
cranelift-codegen-shared = { path = "./shared", version = "0.88.0" }
|
cranelift-codegen-shared = { path = "./shared", version = "0.88.0" }
|
||||||
cranelift-entity = { path = "../entity", version = "0.88.0" }
|
cranelift-entity = { path = "../entity", version = "0.88.0" }
|
||||||
cranelift-bforest = { path = "../bforest", version = "0.88.0" }
|
cranelift-bforest = { path = "../bforest", version = "0.88.0" }
|
||||||
|
|||||||
@@ -4,13 +4,15 @@ use crate::dominator_tree::DominatorTree;
|
|||||||
use crate::entity::EntityList;
|
use crate::entity::EntityList;
|
||||||
use crate::fx::FxHashMap;
|
use crate::fx::FxHashMap;
|
||||||
use crate::fx::FxHashSet;
|
use crate::fx::FxHashSet;
|
||||||
|
use crate::ir;
|
||||||
use crate::ir::instructions::BranchInfo;
|
use crate::ir::instructions::BranchInfo;
|
||||||
use crate::ir::Function;
|
use crate::ir::Function;
|
||||||
use crate::ir::{Block, Inst, Value};
|
use crate::ir::{Block, Inst, Value};
|
||||||
use crate::timing;
|
use crate::timing;
|
||||||
|
use arrayvec::ArrayVec;
|
||||||
use smallvec::{smallvec, SmallVec};
|
use bumpalo::Bump;
|
||||||
use std::vec::Vec;
|
use cranelift_entity::SecondaryMap;
|
||||||
|
use smallvec::SmallVec;
|
||||||
|
|
||||||
// A note on notation. For the sake of clarity, this file uses the phrase
|
// A note on notation. For the sake of clarity, this file uses the phrase
|
||||||
// "formal parameters" to mean the `Value`s listed in the block head, and
|
// "formal parameters" to mean the `Value`s listed in the block head, and
|
||||||
@@ -107,27 +109,73 @@ impl AbstractValue {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, Debug)]
|
||||||
|
struct OutEdge<'a> {
|
||||||
|
/// An instruction that transfers control.
|
||||||
|
inst: Inst,
|
||||||
|
/// The block that control is transferred to.
|
||||||
|
block: Block,
|
||||||
|
/// The arguments to that block.
|
||||||
|
///
|
||||||
|
/// These values can be from both groups A and B.
|
||||||
|
args: &'a [Value],
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> OutEdge<'a> {
|
||||||
|
/// Construct a new `OutEdge` for the given instruction.
|
||||||
|
///
|
||||||
|
/// Returns `None` if this is an edge without any block arguments, which
|
||||||
|
/// means we can ignore it for this analysis's purposes.
|
||||||
|
#[inline]
|
||||||
|
fn new(bump: &'a Bump, dfg: &ir::DataFlowGraph, inst: Inst, block: Block) -> Option<Self> {
|
||||||
|
let inst_var_args = dfg.inst_variable_args(inst);
|
||||||
|
|
||||||
|
// Skip edges without params.
|
||||||
|
if inst_var_args.is_empty() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
Some(OutEdge {
|
||||||
|
inst,
|
||||||
|
block,
|
||||||
|
args: bump.alloc_slice_fill_iter(
|
||||||
|
inst_var_args
|
||||||
|
.iter()
|
||||||
|
.map(|value| dfg.resolve_aliases(*value)),
|
||||||
|
),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// For some block, a useful bundle of info. The `Block` itself is not stored
|
/// For some block, a useful bundle of info. The `Block` itself is not stored
|
||||||
/// here since it will be the key in the associated `FxHashMap` -- see
|
/// here since it will be the key in the associated `FxHashMap` -- see
|
||||||
/// `summaries` below. For the `SmallVec` tuning params: most blocks have
|
/// `summaries` below. For the `SmallVec` tuning params: most blocks have
|
||||||
/// few parameters, hence `4`. And almost all blocks have either one or two
|
/// few parameters, hence `4`. And almost all blocks have either one or two
|
||||||
/// successors, hence `2`.
|
/// successors, hence `2`.
|
||||||
#[derive(Debug)]
|
#[derive(Clone, Debug, Default)]
|
||||||
struct BlockSummary {
|
struct BlockSummary<'a> {
|
||||||
/// Formal parameters for this `Block`
|
/// Formal parameters for this `Block`.
|
||||||
formals: SmallVec<[Value; 4] /*Group A*/>,
|
///
|
||||||
|
/// These values are from group A.
|
||||||
|
formals: &'a [Value],
|
||||||
|
|
||||||
/// For each `Inst` in this block that transfers to another block: the
|
/// Each outgoing edge from this block.
|
||||||
/// `Inst` itself, the destination `Block`, and the actual parameters
|
///
|
||||||
/// passed. We don't bother to include transfers that pass zero parameters
|
/// We don't bother to include transfers that pass zero parameters
|
||||||
/// since that makes more work for the solver for no purpose.
|
/// since that makes more work for the solver for no purpose.
|
||||||
dests: SmallVec<[(Inst, Block, SmallVec<[Value; 4] /*both Groups A and B*/>); 2]>,
|
///
|
||||||
|
/// Note that, because blocks used with `br_table`s cannot have block
|
||||||
|
/// arguments, there are at most two outgoing edges from these blocks.
|
||||||
|
dests: ArrayVec<OutEdge<'a>, 2>,
|
||||||
}
|
}
|
||||||
impl BlockSummary {
|
|
||||||
fn new(formals: SmallVec<[Value; 4]>) -> Self {
|
impl<'a> BlockSummary<'a> {
|
||||||
|
/// Construct a new `BlockSummary`, using `values` as its backing storage.
|
||||||
|
#[inline]
|
||||||
|
fn new(bump: &'a Bump, formals: &[Value]) -> Self {
|
||||||
Self {
|
Self {
|
||||||
formals,
|
formals: bump.alloc_slice_copy(formals),
|
||||||
dests: smallvec![],
|
dests: Default::default(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -171,21 +219,17 @@ pub fn do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree)
|
|||||||
let _tt = timing::remove_constant_phis();
|
let _tt = timing::remove_constant_phis();
|
||||||
debug_assert!(domtree.is_valid());
|
debug_assert!(domtree.is_valid());
|
||||||
|
|
||||||
// Get the blocks, in reverse postorder
|
|
||||||
let blocks_reverse_postorder = domtree
|
|
||||||
.cfg_postorder()
|
|
||||||
.into_iter()
|
|
||||||
.rev()
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
|
|
||||||
// Phase 1 of 3: for each block, make a summary containing all relevant
|
// Phase 1 of 3: for each block, make a summary containing all relevant
|
||||||
// info. The solver will iterate over the summaries, rather than having
|
// info. The solver will iterate over the summaries, rather than having
|
||||||
// to inspect each instruction in each block.
|
// to inspect each instruction in each block.
|
||||||
let mut summaries = FxHashMap::<Block, BlockSummary>::default();
|
let bump =
|
||||||
|
Bump::with_capacity(domtree.cfg_postorder().len() * 4 * std::mem::size_of::<Value>());
|
||||||
|
let mut summaries =
|
||||||
|
SecondaryMap::<Block, BlockSummary>::with_capacity(domtree.cfg_postorder().len());
|
||||||
|
|
||||||
for &&b in &blocks_reverse_postorder {
|
for b in domtree.cfg_postorder().iter().rev().copied() {
|
||||||
let formals = func.dfg.block_params(b);
|
let formals = func.dfg.block_params(b);
|
||||||
let mut summary = BlockSummary::new(SmallVec::from(formals));
|
let mut summary = BlockSummary::new(&bump, formals);
|
||||||
|
|
||||||
for inst in func.layout.block_insts(b) {
|
for inst in func.layout.block_insts(b) {
|
||||||
let idetails = &func.dfg[inst];
|
let idetails = &func.dfg[inst];
|
||||||
@@ -194,15 +238,8 @@ pub fn do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree)
|
|||||||
// `SingleDest` here.
|
// `SingleDest` here.
|
||||||
if let BranchInfo::SingleDest(dest, _) = idetails.analyze_branch(&func.dfg.value_lists)
|
if let BranchInfo::SingleDest(dest, _) = idetails.analyze_branch(&func.dfg.value_lists)
|
||||||
{
|
{
|
||||||
let inst_var_args = func.dfg.inst_variable_args(inst);
|
if let Some(edge) = OutEdge::new(&bump, &func.dfg, inst, dest) {
|
||||||
// Skip branches/jumps that carry no params.
|
summary.dests.push(edge);
|
||||||
if inst_var_args.len() > 0 {
|
|
||||||
let mut actuals = SmallVec::<[Value; 4]>::new();
|
|
||||||
for arg in inst_var_args {
|
|
||||||
let arg = func.dfg.resolve_aliases(*arg);
|
|
||||||
actuals.push(arg);
|
|
||||||
}
|
|
||||||
summary.dests.push((inst, dest, actuals));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -211,7 +248,7 @@ pub fn do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree)
|
|||||||
// in the summary, *unless* they have neither formals nor any
|
// in the summary, *unless* they have neither formals nor any
|
||||||
// param-carrying branches/jumps.
|
// param-carrying branches/jumps.
|
||||||
if formals.len() > 0 || summary.dests.len() > 0 {
|
if formals.len() > 0 || summary.dests.len() > 0 {
|
||||||
summaries.insert(b, summary);
|
summaries[b] = summary;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -227,7 +264,7 @@ pub fn do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree)
|
|||||||
// Set up initial solver state
|
// Set up initial solver state
|
||||||
let mut state = SolverState::new();
|
let mut state = SolverState::new();
|
||||||
|
|
||||||
for &&b in &blocks_reverse_postorder {
|
for b in domtree.cfg_postorder().iter().rev().copied() {
|
||||||
// For each block, get the formals
|
// For each block, get the formals
|
||||||
if b == entry_block {
|
if b == entry_block {
|
||||||
continue;
|
continue;
|
||||||
@@ -246,27 +283,18 @@ pub fn do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree)
|
|||||||
iter_no += 1;
|
iter_no += 1;
|
||||||
let mut changed = false;
|
let mut changed = false;
|
||||||
|
|
||||||
for &src in &blocks_reverse_postorder {
|
for src in domtree.cfg_postorder().iter().rev().copied() {
|
||||||
let mb_src_summary = summaries.get(src);
|
let src_summary = &summaries[src];
|
||||||
// The src block might have no summary. This means it has no
|
for edge in &src_summary.dests {
|
||||||
// branches/jumps that carry parameters *and* it doesn't take any
|
assert!(edge.block != entry_block);
|
||||||
// parameters itself. Phase 1 ensures this. So we can ignore it.
|
|
||||||
if mb_src_summary.is_none() {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
let src_summary = mb_src_summary.unwrap();
|
|
||||||
for (_inst, dst, src_actuals) in &src_summary.dests {
|
|
||||||
assert!(*dst != entry_block);
|
|
||||||
// By contrast, the dst block must have a summary. Phase 1
|
// By contrast, the dst block must have a summary. Phase 1
|
||||||
// will have only included an entry in `src_summary.dests` if
|
// will have only included an entry in `src_summary.dests` if
|
||||||
// that branch/jump carried at least one parameter. So the
|
// that branch/jump carried at least one parameter. So the
|
||||||
// dst block does take parameters, so it must have a summary.
|
// dst block does take parameters, so it must have a summary.
|
||||||
let dst_summary = summaries
|
let dst_summary = &summaries[edge.block];
|
||||||
.get(dst)
|
|
||||||
.expect("remove_constant_phis: dst block has no summary");
|
|
||||||
let dst_formals = &dst_summary.formals;
|
let dst_formals = &dst_summary.formals;
|
||||||
assert_eq!(src_actuals.len(), dst_formals.len());
|
assert_eq!(edge.args.len(), dst_formals.len());
|
||||||
for (formal, actual) in dst_formals.iter().zip(src_actuals.iter()) {
|
for (formal, actual) in dst_formals.iter().zip(edge.args) {
|
||||||
// Find the abstract value for `actual`. If it is a block
|
// Find the abstract value for `actual`. If it is a block
|
||||||
// formal parameter then the most recent abstract value is
|
// formal parameter then the most recent abstract value is
|
||||||
// to be found in the solver state. If not, then it's a
|
// to be found in the solver state. If not, then it's a
|
||||||
@@ -305,14 +333,14 @@ pub fn do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree)
|
|||||||
|
|
||||||
// Make up a set of blocks that need editing.
|
// Make up a set of blocks that need editing.
|
||||||
let mut need_editing = FxHashSet::<Block>::default();
|
let mut need_editing = FxHashSet::<Block>::default();
|
||||||
for (block, summary) in &summaries {
|
for (block, summary) in summaries.iter() {
|
||||||
if *block == entry_block {
|
if block == entry_block {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
for formal in &summary.formals {
|
for formal in summary.formals {
|
||||||
let formal_absval = state.get(*formal);
|
let formal_absval = state.get(*formal);
|
||||||
if formal_absval.is_one() {
|
if formal_absval.is_one() {
|
||||||
need_editing.insert(*block);
|
need_editing.insert(block);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -344,19 +372,19 @@ pub fn do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree)
|
|||||||
// Secondly, visit all branch insns. If the destination has had its
|
// Secondly, visit all branch insns. If the destination has had its
|
||||||
// formals changed, change the actuals accordingly. Don't scan all insns,
|
// formals changed, change the actuals accordingly. Don't scan all insns,
|
||||||
// rather just visit those as listed in the summaries we prepared earlier.
|
// rather just visit those as listed in the summaries we prepared earlier.
|
||||||
for (_src_block, summary) in &summaries {
|
for summary in summaries.values() {
|
||||||
for (inst, dst_block, _src_actuals) in &summary.dests {
|
for edge in &summary.dests {
|
||||||
if !need_editing.contains(dst_block) {
|
if !need_editing.contains(&edge.block) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
let old_actuals = func.dfg[*inst].take_value_list().unwrap();
|
let old_actuals = func.dfg[edge.inst].take_value_list().unwrap();
|
||||||
let num_old_actuals = old_actuals.len(&func.dfg.value_lists);
|
let num_old_actuals = old_actuals.len(&func.dfg.value_lists);
|
||||||
let num_fixed_actuals = func.dfg[*inst]
|
let num_fixed_actuals = func.dfg[edge.inst]
|
||||||
.opcode()
|
.opcode()
|
||||||
.constraints()
|
.constraints()
|
||||||
.num_fixed_value_arguments();
|
.num_fixed_value_arguments();
|
||||||
let dst_summary = summaries.get(&dst_block).unwrap();
|
let dst_summary = &summaries[edge.block];
|
||||||
|
|
||||||
// Check that the numbers of arguments make sense.
|
// Check that the numbers of arguments make sense.
|
||||||
assert!(num_fixed_actuals <= num_old_actuals);
|
assert!(num_fixed_actuals <= num_old_actuals);
|
||||||
@@ -384,7 +412,7 @@ pub fn do_remove_constant_phis(func: &mut Function, domtree: &mut DominatorTree)
|
|||||||
new_actuals.push(actual_i, &mut func.dfg.value_lists);
|
new_actuals.push(actual_i, &mut func.dfg.value_lists);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
func.dfg[*inst].put_value_list(new_actuals);
|
func.dfg[edge.inst].put_value_list(new_actuals);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,12 +1,27 @@
|
|||||||
|
|
||||||
# cargo-vet audits file
|
# cargo-vet audits file
|
||||||
|
|
||||||
|
[[audits.arrayvec]]
|
||||||
|
who = "Nick Fitzgerald <fitzgen@gmail.com>"
|
||||||
|
criteria = "safe-to-deploy"
|
||||||
|
version = "0.7.2"
|
||||||
|
notes = """
|
||||||
|
Well documented invariants, good assertions for those invariants in unsafe code,
|
||||||
|
and tested with MIRI to boot. LGTM.
|
||||||
|
"""
|
||||||
|
|
||||||
[[audits.backtrace]]
|
[[audits.backtrace]]
|
||||||
who = "Alex Crichton <alex@alexcrichton.com>"
|
who = "Alex Crichton <alex@alexcrichton.com>"
|
||||||
criteria = "safe-to-deploy"
|
criteria = "safe-to-deploy"
|
||||||
version = "0.3.66"
|
version = "0.3.66"
|
||||||
notes = "I am the author of this crate."
|
notes = "I am the author of this crate."
|
||||||
|
|
||||||
|
[[audits.bumpalo]]
|
||||||
|
who = "Nick Fitzgerald <fitzgen@gmail.com>"
|
||||||
|
criteria = "safe-to-deploy"
|
||||||
|
version = "3.9.1"
|
||||||
|
notes = "I am the author of this crate."
|
||||||
|
|
||||||
[[audits.cc]]
|
[[audits.cc]]
|
||||||
who = "Alex Crichton <alex@alexcrichton.com>"
|
who = "Alex Crichton <alex@alexcrichton.com>"
|
||||||
criteria = "safe-to-deploy"
|
criteria = "safe-to-deploy"
|
||||||
|
|||||||
Reference in New Issue
Block a user