From 50b919588294f918aa61f030f05115bfc797ab86 Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Tue, 26 Jul 2022 18:38:24 -0700 Subject: [PATCH] cranelift-frontend: Reuse visited block sets in `SSABuilder::can_optimize_var_lookup` (#4536) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First, we switch from a `BTreeSet` to a `HashSet` because clearing a `BTreeSet` will deallocate the btree's nodes but clearing a `HashSet` will not deallocate the backing hash table, saving the space to reuse for future insertions. Then, we reuse the same set (and therefore the same allocation) across every call to `can_optimize_var_lookup`. This results in a 1.22x to 1.32x speed up on various Sightglass benchmarks: ``` compilation :: nanoseconds :: benchmarks/pulldown-cmark/benchmark.wasm Δ = 39478181.76 ± 3441880.32 (confidence = 99%) main.so is 0.75x to 0.79x faster than reuse-set.so! reuse-set.so is 1.27x to 1.32x faster than main.so! [160128343 172174751.09 213325968] main.so [115055695 132696569.33 200782128] reuse-set.so compilation :: nanoseconds :: benchmarks/bz2/benchmark.wasm Δ = 22576954.88 ± 1830771.68 (confidence = 99%) main.so is 0.77x to 0.81x faster than reuse-set.so! reuse-set.so is 1.25x to 1.29x faster than main.so! [100449245 106820149.65 118628066] main.so [77039172 84243194.77 128168647] reuse-set.so compilation :: nanoseconds :: benchmarks/spidermonkey/benchmark.wasm Δ = 664533554.97 ± 22109170.05 (confidence = 99%) main.so is 0.81x to 0.82x faster than reuse-set.so! reuse-set.so is 1.22x to 1.23x faster than main.so! [3549762523 3640587103.35 3798662501] main.so [2793335181 2976053548.38 3192950484] reuse-set.so ``` --- cranelift/frontend/src/ssa.rs | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/cranelift/frontend/src/ssa.rs b/cranelift/frontend/src/ssa.rs index ce03103176..086a44cc27 100644 --- a/cranelift/frontend/src/ssa.rs +++ b/cranelift/frontend/src/ssa.rs @@ -8,7 +8,6 @@ //! use crate::Variable; -use alloc::collections::BTreeSet; use alloc::vec::Vec; use core::convert::TryInto; use core::mem; @@ -22,6 +21,7 @@ use cranelift_codegen::ir::{ }; use cranelift_codegen::packed_option::PackedOption; use smallvec::SmallVec; +use std::collections::HashSet; /// Structure containing the data relevant the construction of SSA for a given function. /// @@ -53,6 +53,10 @@ pub struct SSABuilder { /// Side effects accumulated in the `use_var`/`predecessors_lookup` state machine. side_effects: SideEffects, + + /// Reused allocation for blocks we've already visited in the + /// `can_optimize_var_lookup` method. + visited: HashSet, } /// Side effects of a `use_var` or a `seal_block` method call. @@ -129,6 +133,7 @@ impl SSABuilder { calls: Vec::new(), results: Vec::new(), side_effects: SideEffects::new(), + visited: Default::default(), } } @@ -282,14 +287,14 @@ impl SSABuilder { /// marking visited blocks and aborting if we find a previously seen block. /// We stop the search if we find a block with multiple predecessors since the /// original algorithm can handle these cases. - fn can_optimize_var_lookup(&self, block: Block) -> bool { + fn can_optimize_var_lookup(&mut self, block: Block) -> bool { // Check that the initial block only has one predecessor. This is only a requirement // for the first block. if self.predecessors(block).len() != 1 { return false; } - let mut visited = BTreeSet::new(); + self.visited.clear(); let mut current = block; loop { let predecessors = self.predecessors(current); @@ -307,11 +312,11 @@ impl SSABuilder { return true; } - if visited.contains(¤t) { + let next_current = predecessors[0].block; + if !self.visited.insert(current) { return false; } - visited.insert(current); - current = predecessors[0].block; + current = next_current; } }