From 07a5a88972d9a9dd6d263d9fa610968598aa07b5 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 6 May 2021 20:03:44 -0700 Subject: [PATCH] BitVec perf: use adaptive hybrid chunked small-array + FxHashMap. --- Cargo.toml | 1 + src/bitvec.rs | 262 ++++++++++++++++++++++++++++++------------------- src/ion/mod.rs | 31 +++--- 3 files changed, 177 insertions(+), 117 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index c54201c..802881b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,6 +10,7 @@ repository = "https://github.com/bytecodealliance/regalloc2" [dependencies] log = { version = "0.4.8", default-features = false } smallvec = "1.6.1" +fxhash = "0.2.1" # The below are only needed for fuzzing. # Keep this in sync with libfuzzer_sys's crate version: diff --git a/src/bitvec.rs b/src/bitvec.rs index 5a1c949..7c0dbe2 100644 --- a/src/bitvec.rs +++ b/src/bitvec.rs @@ -5,158 +5,216 @@ //! Bit vectors. -use smallvec::{smallvec, SmallVec}; +use fxhash::FxHashMap; + +/// A hybrid large/small-mode sparse mapping from integer indices to elements. +#[derive(Clone, Debug)] +enum AdaptiveMap { + Small(u32, [u32; 4], [u64; 4]), + Large(FxHashMap), +} + +const INVALID: u32 = 0xffff_ffff; + +impl AdaptiveMap { + fn new() -> Self { + Self::Small(0, [INVALID, INVALID, INVALID, INVALID], [0, 0, 0, 0]) + } + fn expand(&mut self) { + match self { + &mut Self::Small(len, ref keys, ref values) => { + let mut map = FxHashMap::default(); + for i in 0..len { + map.insert(keys[i as usize], values[i as usize]); + } + *self = Self::Large(map); + } + _ => {} + } + } + fn get_or_insert<'a>(&'a mut self, key: u32) -> &'a mut u64 { + let needs_expand = match self { + &mut Self::Small(len, ref keys, ..) => len == 4 && !keys.iter().any(|k| *k == key), + _ => false, + }; + if needs_expand { + self.expand(); + } + + match self { + &mut Self::Small(ref mut len, ref mut keys, ref mut values) => { + for i in 0..*len { + if keys[i as usize] == key { + return &mut values[i as usize]; + } + } + assert!(*len < 4); + let idx = *len; + *len += 1; + keys[idx as usize] = key; + values[idx as usize] = 0; + &mut values[idx as usize] + } + &mut Self::Large(ref mut map) => map.entry(key).or_insert(0), + } + } + fn get_mut(&mut self, key: u32) -> Option<&mut u64> { + match self { + &mut Self::Small(len, ref keys, ref mut values) => { + for i in 0..len { + if keys[i as usize] == key { + return Some(&mut values[i as usize]); + } + } + None + } + &mut Self::Large(ref mut map) => map.get_mut(&key), + } + } + fn get(&self, key: u32) -> Option<&u64> { + match self { + &Self::Small(len, ref keys, ref values) => { + for i in 0..len { + if keys[i as usize] == key { + return Some(&values[i as usize]); + } + } + None + } + &Self::Large(ref map) => map.get(&key), + } + } + fn iter<'a>(&'a self) -> AdaptiveMapIter<'a> { + match self { + &Self::Small(len, ref keys, ref values) => { + AdaptiveMapIter::Small(&keys[0..len as usize], &values[0..len as usize]) + } + &Self::Large(ref map) => AdaptiveMapIter::Large(map.iter()), + } + } +} + +enum AdaptiveMapIter<'a> { + Small(&'a [u32], &'a [u64]), + Large(std::collections::hash_map::Iter<'a, u32, u64>), +} + +impl<'a> std::iter::Iterator for AdaptiveMapIter<'a> { + type Item = (u32, u64); + fn next(&mut self) -> Option { + match self { + &mut Self::Small(ref mut keys, ref mut values) => { + if keys.is_empty() { + None + } else { + let (k, v) = ((*keys)[0], (*values)[0]); + *keys = &(*keys)[1..]; + *values = &(*values)[1..]; + Some((k, v)) + } + } + &mut Self::Large(ref mut it) => it.next().map(|(&k, &v)| (k, v)), + } + } +} /// A conceptually infinite-length bitvector that allows bitwise operations and /// iteration over set bits efficiently. #[derive(Clone, Debug)] pub struct BitVec { - bits: SmallVec<[u64; 2]>, + elems: AdaptiveMap, } const BITS_PER_WORD: usize = 64; impl BitVec { pub fn new() -> Self { - Self { bits: smallvec![] } - } - - pub fn with_capacity(len: usize) -> Self { - let words = (len + BITS_PER_WORD - 1) / BITS_PER_WORD; Self { - bits: SmallVec::with_capacity(words), + elems: AdaptiveMap::new(), } } - #[inline(never)] - fn ensure_idx(&mut self, word: usize) { - let mut target_len = std::cmp::max(2, self.bits.len()); - while word >= target_len { - target_len *= 2; - } - self.bits.resize(target_len, 0); + #[inline(always)] + fn elem(&mut self, bit_index: usize) -> &mut u64 { + let word_index = (bit_index / BITS_PER_WORD) as u32; + self.elems.get_or_insert(word_index) + } + + #[inline(always)] + fn maybe_elem_mut(&mut self, bit_index: usize) -> Option<&mut u64> { + let word_index = (bit_index / BITS_PER_WORD) as u32; + self.elems.get_mut(word_index) + } + + #[inline(always)] + fn maybe_elem(&self, bit_index: usize) -> Option<&u64> { + let word_index = (bit_index / BITS_PER_WORD) as u32; + self.elems.get(word_index) } #[inline(always)] pub fn set(&mut self, idx: usize, val: bool) { - let word = idx / BITS_PER_WORD; let bit = idx % BITS_PER_WORD; if val { - if word >= self.bits.len() { - self.ensure_idx(word); - } - self.bits[word] |= 1 << bit; - } else { - if word < self.bits.len() { - self.bits[word] &= !(1 << bit); - } + *self.elem(idx) |= 1 << bit; + } else if let Some(word) = self.maybe_elem_mut(idx) { + *word &= !(1 << bit); } } pub fn assign(&mut self, other: &Self) { - if other.bits.len() > 0 { - self.ensure_idx(other.bits.len() - 1); - } - for i in 0..other.bits.len() { - self.bits[i] = other.bits[i]; - } - for i in other.bits.len()..self.bits.len() { - self.bits[i] = 0; - } + self.elems = other.elems.clone(); } #[inline(always)] pub fn get(&self, idx: usize) -> bool { - let word = idx / BITS_PER_WORD; let bit = idx % BITS_PER_WORD; - if word >= self.bits.len() { - false + if let Some(word) = self.maybe_elem(idx) { + (*word & (1 << bit)) != 0 } else { - (self.bits[word] & (1 << bit)) != 0 + false } } pub fn or(&mut self, other: &Self) -> bool { - if other.bits.is_empty() { - return false; - } - let last_idx = other.bits.len() - 1; - self.ensure_idx(last_idx); - let mut changed = 0; - for (self_word, other_word) in self.bits.iter_mut().zip(other.bits.iter()) { - if *other_word == 0 { - // Avoid cache misses in `self` if `other` is zeroes. + for (word_idx, bits) in other.elems.iter() { + if bits == 0 { continue; } - changed |= *other_word & !*self_word; - *self_word |= *other_word; + let word_idx = word_idx as usize; + let self_word = self.elem(word_idx * BITS_PER_WORD); + changed |= bits & !*self_word; + *self_word |= bits; } changed != 0 } - pub fn and(&mut self, other: &Self) { - if other.bits.len() < self.bits.len() { - self.bits.truncate(other.bits.len()); - } - - for (self_word, other_word) in self.bits.iter_mut().zip(other.bits.iter()) { - *self_word &= *other_word; - } - } - - pub fn iter<'a>(&'a self) -> SetBitsIter<'a> { - let cur_word = if self.bits.len() > 0 { self.bits[0] } else { 0 }; - SetBitsIter { - words: &self.bits[..], - word_idx: 0, - cur_word, - } + pub fn iter<'a>(&'a self) -> impl Iterator + 'a { + self.elems.iter().flat_map(|(word_idx, bits)| { + let word_idx = word_idx as usize; + set_bits(bits).map(move |i| BITS_PER_WORD * word_idx + i) + }) } } -impl std::cmp::PartialEq for BitVec { - fn eq(&self, other: &Self) -> bool { - let limit = std::cmp::min(self.bits.len(), other.bits.len()); - for i in 0..limit { - if self.bits[i] != other.bits[i] { - return false; - } - } - for i in limit..self.bits.len() { - if self.bits[i] != 0 { - return false; - } - } - for i in limit..other.bits.len() { - if other.bits[i] != 0 { - return false; - } - } - true - } -} -impl std::cmp::Eq for BitVec {} - -pub struct SetBitsIter<'a> { - words: &'a [u64], - word_idx: usize, - cur_word: u64, +fn set_bits(bits: u64) -> impl Iterator { + let iter = SetBitsIter(bits); + iter } -impl<'a> Iterator for SetBitsIter<'a> { +pub struct SetBitsIter(u64); + +impl Iterator for SetBitsIter { type Item = usize; fn next(&mut self) -> Option { - while self.cur_word == 0 { - if self.word_idx + 1 >= self.words.len() { - return None; - } - self.word_idx += 1; - self.cur_word = self.words[self.word_idx]; + if self.0 == 0 { + None + } else { + let bitidx = self.0.trailing_zeros(); + self.0 &= !(1 << bitidx); + Some(bitidx as usize) } - let bitidx = self.cur_word.trailing_zeros(); - self.cur_word &= !(1 << bitidx); - Some(self.word_idx * BITS_PER_WORD + bitidx as usize) } } diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 0b9fc12..bca281d 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -1007,8 +1007,6 @@ impl<'a, F: Function> Env<'a, F> { self.liveins.push(BitVec::new()); } - let num_vregs = self.func.num_vregs(); - let mut num_ranges = 0; // Create Uses and Defs referring to VRegs, and place the Uses @@ -1040,7 +1038,7 @@ impl<'a, F: Function> Env<'a, F> { // Init live-set to union of liveins from successors // (excluding backedges; those are handled below). - let mut live = BitVec::with_capacity(num_vregs); + let mut live = BitVec::new(); for &succ in self.func.block_succs(block) { live.or(&self.liveins[succ.index()]); } @@ -1655,18 +1653,21 @@ impl<'a, F: Function> Env<'a, F> { return false; } - // Sanity check: both bundles should contain only ranges with appropriate VReg classes. - let mut iter = self.bundles[from.index()].first_range; - while iter.is_valid() { - let vreg = self.ranges[iter.index()].vreg; - assert_eq!(rc, self.vregs[vreg.index()].reg.class()); - iter = self.ranges[iter.index()].next_in_bundle; - } - let mut iter = self.bundles[to.index()].first_range; - while iter.is_valid() { - let vreg = self.ranges[iter.index()].vreg; - assert_eq!(rc, self.vregs[vreg.index()].reg.class()); - iter = self.ranges[iter.index()].next_in_bundle; + #[cfg(debug)] + { + // Sanity check: both bundles should contain only ranges with appropriate VReg classes. + let mut iter = self.bundles[from.index()].first_range; + while iter.is_valid() { + let vreg = self.ranges[iter.index()].vreg; + assert_eq!(rc, self.vregs[vreg.index()].reg.class()); + iter = self.ranges[iter.index()].next_in_bundle; + } + let mut iter = self.bundles[to.index()].first_range; + while iter.is_valid() { + let vreg = self.ranges[iter.index()].vreg; + assert_eq!(rc, self.vregs[vreg.index()].reg.class()); + iter = self.ranges[iter.index()].next_in_bundle; + } } // Check for overlap in LiveRanges.