cranelift-frontend: Reuse visited block sets in SSABuilder::can_optimize_var_lookup (#4536)
First, we switch from a `BTreeSet` to a `HashSet` because clearing a `BTreeSet` will deallocate the btree's nodes but clearing a `HashSet` will not deallocate the backing hash table, saving the space to reuse for future insertions. Then, we reuse the same set (and therefore the same allocation) across every call to `can_optimize_var_lookup`. This results in a 1.22x to 1.32x speed up on various Sightglass benchmarks: ``` compilation :: nanoseconds :: benchmarks/pulldown-cmark/benchmark.wasm Δ = 39478181.76 ± 3441880.32 (confidence = 99%) main.so is 0.75x to 0.79x faster than reuse-set.so! reuse-set.so is 1.27x to 1.32x faster than main.so! [160128343 172174751.09 213325968] main.so [115055695 132696569.33 200782128] reuse-set.so compilation :: nanoseconds :: benchmarks/bz2/benchmark.wasm Δ = 22576954.88 ± 1830771.68 (confidence = 99%) main.so is 0.77x to 0.81x faster than reuse-set.so! reuse-set.so is 1.25x to 1.29x faster than main.so! [100449245 106820149.65 118628066] main.so [77039172 84243194.77 128168647] reuse-set.so compilation :: nanoseconds :: benchmarks/spidermonkey/benchmark.wasm Δ = 664533554.97 ± 22109170.05 (confidence = 99%) main.so is 0.81x to 0.82x faster than reuse-set.so! reuse-set.so is 1.22x to 1.23x faster than main.so! [3549762523 3640587103.35 3798662501] main.so [2793335181 2976053548.38 3192950484] reuse-set.so ```
This commit is contained in:
@@ -8,7 +8,6 @@
|
||||
//! <https://link.springer.com/content/pdf/10.1007/978-3-642-37051-9_6.pdf>
|
||||
|
||||
use crate::Variable;
|
||||
use alloc::collections::BTreeSet;
|
||||
use alloc::vec::Vec;
|
||||
use core::convert::TryInto;
|
||||
use core::mem;
|
||||
@@ -22,6 +21,7 @@ use cranelift_codegen::ir::{
|
||||
};
|
||||
use cranelift_codegen::packed_option::PackedOption;
|
||||
use smallvec::SmallVec;
|
||||
use std::collections::HashSet;
|
||||
|
||||
/// Structure containing the data relevant the construction of SSA for a given function.
|
||||
///
|
||||
@@ -53,6 +53,10 @@ pub struct SSABuilder {
|
||||
|
||||
/// Side effects accumulated in the `use_var`/`predecessors_lookup` state machine.
|
||||
side_effects: SideEffects,
|
||||
|
||||
/// Reused allocation for blocks we've already visited in the
|
||||
/// `can_optimize_var_lookup` method.
|
||||
visited: HashSet<Block>,
|
||||
}
|
||||
|
||||
/// Side effects of a `use_var` or a `seal_block` method call.
|
||||
@@ -129,6 +133,7 @@ impl SSABuilder {
|
||||
calls: Vec::new(),
|
||||
results: Vec::new(),
|
||||
side_effects: SideEffects::new(),
|
||||
visited: Default::default(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -282,14 +287,14 @@ impl SSABuilder {
|
||||
/// marking visited blocks and aborting if we find a previously seen block.
|
||||
/// We stop the search if we find a block with multiple predecessors since the
|
||||
/// original algorithm can handle these cases.
|
||||
fn can_optimize_var_lookup(&self, block: Block) -> bool {
|
||||
fn can_optimize_var_lookup(&mut self, block: Block) -> bool {
|
||||
// Check that the initial block only has one predecessor. This is only a requirement
|
||||
// for the first block.
|
||||
if self.predecessors(block).len() != 1 {
|
||||
return false;
|
||||
}
|
||||
|
||||
let mut visited = BTreeSet::new();
|
||||
self.visited.clear();
|
||||
let mut current = block;
|
||||
loop {
|
||||
let predecessors = self.predecessors(current);
|
||||
@@ -307,11 +312,11 @@ impl SSABuilder {
|
||||
return true;
|
||||
}
|
||||
|
||||
if visited.contains(¤t) {
|
||||
let next_current = predecessors[0].block;
|
||||
if !self.visited.insert(current) {
|
||||
return false;
|
||||
}
|
||||
visited.insert(current);
|
||||
current = predecessors[0].block;
|
||||
current = next_current;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user