cranelift-frontend: Reuse visited block sets in SSABuilder::can_optimize_var_lookup (#4536)

First, we switch from a `BTreeSet` to a `HashSet` because clearing a `BTreeSet`
will deallocate the btree's nodes but clearing a `HashSet` will not deallocate
the backing hash table, saving the space to reuse for future insertions.

Then, we reuse the same set (and therefore the same allocation) across every
call to `can_optimize_var_lookup`.

This results in a 1.22x to 1.32x speed up on various Sightglass benchmarks:

```
compilation :: nanoseconds :: benchmarks/pulldown-cmark/benchmark.wasm

  Δ = 39478181.76 ± 3441880.32 (confidence = 99%)

  main.so is 0.75x to 0.79x faster than reuse-set.so!
  reuse-set.so is 1.27x to 1.32x faster than main.so!

  [160128343 172174751.09 213325968] main.so
  [115055695 132696569.33 200782128] reuse-set.so

compilation :: nanoseconds :: benchmarks/bz2/benchmark.wasm

  Δ = 22576954.88 ± 1830771.68 (confidence = 99%)

  main.so is 0.77x to 0.81x faster than reuse-set.so!
  reuse-set.so is 1.25x to 1.29x faster than main.so!

  [100449245 106820149.65 118628066] main.so
  [77039172 84243194.77 128168647] reuse-set.so

compilation :: nanoseconds :: benchmarks/spidermonkey/benchmark.wasm

  Δ = 664533554.97 ± 22109170.05 (confidence = 99%)

  main.so is 0.81x to 0.82x faster than reuse-set.so!
  reuse-set.so is 1.22x to 1.23x faster than main.so!

  [3549762523 3640587103.35 3798662501] main.so
  [2793335181 2976053548.38 3192950484] reuse-set.so
```
This commit is contained in:
Nick Fitzgerald
2022-07-26 18:38:24 -07:00
committed by GitHub
parent 0e6ffd0243
commit 50b9195882

View File

@@ -8,7 +8,6 @@
//! <https://link.springer.com/content/pdf/10.1007/978-3-642-37051-9_6.pdf>
use crate::Variable;
use alloc::collections::BTreeSet;
use alloc::vec::Vec;
use core::convert::TryInto;
use core::mem;
@@ -22,6 +21,7 @@ use cranelift_codegen::ir::{
};
use cranelift_codegen::packed_option::PackedOption;
use smallvec::SmallVec;
use std::collections::HashSet;
/// Structure containing the data relevant the construction of SSA for a given function.
///
@@ -53,6 +53,10 @@ pub struct SSABuilder {
/// Side effects accumulated in the `use_var`/`predecessors_lookup` state machine.
side_effects: SideEffects,
/// Reused allocation for blocks we've already visited in the
/// `can_optimize_var_lookup` method.
visited: HashSet<Block>,
}
/// Side effects of a `use_var` or a `seal_block` method call.
@@ -129,6 +133,7 @@ impl SSABuilder {
calls: Vec::new(),
results: Vec::new(),
side_effects: SideEffects::new(),
visited: Default::default(),
}
}
@@ -282,14 +287,14 @@ impl SSABuilder {
/// marking visited blocks and aborting if we find a previously seen block.
/// We stop the search if we find a block with multiple predecessors since the
/// original algorithm can handle these cases.
fn can_optimize_var_lookup(&self, block: Block) -> bool {
fn can_optimize_var_lookup(&mut self, block: Block) -> bool {
// Check that the initial block only has one predecessor. This is only a requirement
// for the first block.
if self.predecessors(block).len() != 1 {
return false;
}
let mut visited = BTreeSet::new();
self.visited.clear();
let mut current = block;
loop {
let predecessors = self.predecessors(current);
@@ -307,11 +312,11 @@ impl SSABuilder {
return true;
}
if visited.contains(&current) {
let next_current = predecessors[0].block;
if !self.visited.insert(current) {
return false;
}
visited.insert(current);
current = predecessors[0].block;
current = next_current;
}
}