VirtRegs::find: use SmallVec instead of Vec for val_stack.

Pushing on the `val_stack` vector is CL's biggest source of calls to
malloc/realloc/free, by some margin.  It accounts for about 27.7% of all heap
blocks allocated when compiling wasm_lua_binarytrees.  This change removes
pretty much all dynamic allocation by changing to a SmallVec<[Value; 8]>
instead.  A fixed size of 4 gets all the gains to be had, in testing, so 8
gives some safety margin and is harmless from a stack-use perspective: 8
Values will occupy 32 bytes.

As a bonus, this change also reduces the compiler's dynamic instruction count
by about 0.5%.
This commit is contained in:
Julian Seward
2019-09-07 17:34:08 +02:00
committed by Benjamin Bouvier
parent 705bfacf10
commit 955cdd5f83
2 changed files with 3 additions and 1 deletions

View File

@@ -21,6 +21,7 @@ hashmap_core = { version = "0.1.9", optional = true }
target-lexicon = "0.8.1"
log = { version = "0.4.6", default-features = false }
serde = { version = "1.0.94", features = ["derive"], optional = true }
smallvec = { version = "0.6.10" }
# It is a goal of the cranelift-codegen crate to have minimal external dependencies.
# Please don't add any unless they are essential to the task of creating binary
# machine code. Integration tests that need external dependencies can be

View File

@@ -21,6 +21,7 @@ use crate::packed_option::PackedOption;
use crate::ref_slice::ref_slice;
use core::cmp::Ordering;
use core::fmt;
use smallvec::SmallVec;
use std::vec::Vec;
/// A virtual register reference.
@@ -292,7 +293,7 @@ impl VirtRegs {
/// Find the leader value and rank of the set containing `v`.
/// Compress the path if needed.
fn find(&mut self, mut val: Value) -> (Value, u32) {
let mut val_stack = vec![];
let mut val_stack = SmallVec::<[Value; 8]>::new();
let found = loop {
match UFEntry::decode(self.union_find[val]) {
UFEntry::Rank(rank) => break (val, rank),