From 07a5a88972d9a9dd6d263d9fa610968598aa07b5 Mon Sep 17 00:00:00 2001
From: Chris Fallin <chris@cfallin.org>
Date: Thu, 6 May 2021 20:03:44 -0700
Subject: [PATCH] BitVec perf: use adaptive hybrid chunked small-array +
 FxHashMap.

---
 Cargo.toml     |   1 +
 src/bitvec.rs  | 262 ++++++++++++++++++++++++++++++-------------------
 src/ion/mod.rs |  31 +++---
 3 files changed, 177 insertions(+), 117 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index c54201c..802881b 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -10,6 +10,7 @@ repository = "https://github.com/bytecodealliance/regalloc2"
 [dependencies]
 log = { version = "0.4.8", default-features = false }
 smallvec = "1.6.1"
+fxhash = "0.2.1"
 
 # The below are only needed for fuzzing.
 # Keep this in sync with libfuzzer_sys's crate version:
diff --git a/src/bitvec.rs b/src/bitvec.rs
index 5a1c949..7c0dbe2 100644
--- a/src/bitvec.rs
+++ b/src/bitvec.rs
@@ -5,158 +5,216 @@
 
 //! Bit vectors.
 
-use smallvec::{smallvec, SmallVec};
+use fxhash::FxHashMap;
+
+/// A hybrid large/small-mode sparse mapping from integer indices to elements.
+#[derive(Clone, Debug)]
+enum AdaptiveMap {
+    Small(u32, [u32; 4], [u64; 4]),
+    Large(FxHashMap<u32, u64>),
+}
+
+const INVALID: u32 = 0xffff_ffff;
+
+impl AdaptiveMap {
+    fn new() -> Self {
+        Self::Small(0, [INVALID, INVALID, INVALID, INVALID], [0, 0, 0, 0])
+    }
+    fn expand(&mut self) {
+        match self {
+            &mut Self::Small(len, ref keys, ref values) => {
+                let mut map = FxHashMap::default();
+                for i in 0..len {
+                    map.insert(keys[i as usize], values[i as usize]);
+                }
+                *self = Self::Large(map);
+            }
+            _ => {}
+        }
+    }
+    fn get_or_insert<'a>(&'a mut self, key: u32) -> &'a mut u64 {
+        let needs_expand = match self {
+            &mut Self::Small(len, ref keys, ..) => len == 4 && !keys.iter().any(|k| *k == key),
+            _ => false,
+        };
+        if needs_expand {
+            self.expand();
+        }
+
+        match self {
+            &mut Self::Small(ref mut len, ref mut keys, ref mut values) => {
+                for i in 0..*len {
+                    if keys[i as usize] == key {
+                        return &mut values[i as usize];
+                    }
+                }
+                assert!(*len < 4);
+                let idx = *len;
+                *len += 1;
+                keys[idx as usize] = key;
+                values[idx as usize] = 0;
+                &mut values[idx as usize]
+            }
+            &mut Self::Large(ref mut map) => map.entry(key).or_insert(0),
+        }
+    }
+    fn get_mut(&mut self, key: u32) -> Option<&mut u64> {
+        match self {
+            &mut Self::Small(len, ref keys, ref mut values) => {
+                for i in 0..len {
+                    if keys[i as usize] == key {
+                        return Some(&mut values[i as usize]);
+                    }
+                }
+                None
+            }
+            &mut Self::Large(ref mut map) => map.get_mut(&key),
+        }
+    }
+    fn get(&self, key: u32) -> Option<&u64> {
+        match self {
+            &Self::Small(len, ref keys, ref values) => {
+                for i in 0..len {
+                    if keys[i as usize] == key {
+                        return Some(&values[i as usize]);
+                    }
+                }
+                None
+            }
+            &Self::Large(ref map) => map.get(&key),
+        }
+    }
+    fn iter<'a>(&'a self) -> AdaptiveMapIter<'a> {
+        match self {
+            &Self::Small(len, ref keys, ref values) => {
+                AdaptiveMapIter::Small(&keys[0..len as usize], &values[0..len as usize])
+            }
+            &Self::Large(ref map) => AdaptiveMapIter::Large(map.iter()),
+        }
+    }
+}
+
+enum AdaptiveMapIter<'a> {
+    Small(&'a [u32], &'a [u64]),
+    Large(std::collections::hash_map::Iter<'a, u32, u64>),
+}
+
+impl<'a> std::iter::Iterator for AdaptiveMapIter<'a> {
+    type Item = (u32, u64);
+    fn next(&mut self) -> Option<Self::Item> {
+        match self {
+            &mut Self::Small(ref mut keys, ref mut values) => {
+                if keys.is_empty() {
+                    None
+                } else {
+                    let (k, v) = ((*keys)[0], (*values)[0]);
+                    *keys = &(*keys)[1..];
+                    *values = &(*values)[1..];
+                    Some((k, v))
+                }
+            }
+            &mut Self::Large(ref mut it) => it.next().map(|(&k, &v)| (k, v)),
+        }
+    }
+}
 
 /// A conceptually infinite-length bitvector that allows bitwise operations and
 /// iteration over set bits efficiently.
 #[derive(Clone, Debug)]
 pub struct BitVec {
-    bits: SmallVec<[u64; 2]>,
+    elems: AdaptiveMap,
 }
 
 const BITS_PER_WORD: usize = 64;
 
 impl BitVec {
     pub fn new() -> Self {
-        Self { bits: smallvec![] }
-    }
-
-    pub fn with_capacity(len: usize) -> Self {
-        let words = (len + BITS_PER_WORD - 1) / BITS_PER_WORD;
         Self {
-            bits: SmallVec::with_capacity(words),
+            elems: AdaptiveMap::new(),
         }
     }
 
-    #[inline(never)]
-    fn ensure_idx(&mut self, word: usize) {
-        let mut target_len = std::cmp::max(2, self.bits.len());
-        while word >= target_len {
-            target_len *= 2;
-        }
-        self.bits.resize(target_len, 0);
+    #[inline(always)]
+    fn elem(&mut self, bit_index: usize) -> &mut u64 {
+        let word_index = (bit_index / BITS_PER_WORD) as u32;
+        self.elems.get_or_insert(word_index)
+    }
+
+    #[inline(always)]
+    fn maybe_elem_mut(&mut self, bit_index: usize) -> Option<&mut u64> {
+        let word_index = (bit_index / BITS_PER_WORD) as u32;
+        self.elems.get_mut(word_index)
+    }
+
+    #[inline(always)]
+    fn maybe_elem(&self, bit_index: usize) -> Option<&u64> {
+        let word_index = (bit_index / BITS_PER_WORD) as u32;
+        self.elems.get(word_index)
     }
 
     #[inline(always)]
     pub fn set(&mut self, idx: usize, val: bool) {
-        let word = idx / BITS_PER_WORD;
         let bit = idx % BITS_PER_WORD;
         if val {
-            if word >= self.bits.len() {
-                self.ensure_idx(word);
-            }
-            self.bits[word] |= 1 << bit;
-        } else {
-            if word < self.bits.len() {
-                self.bits[word] &= !(1 << bit);
-            }
+            *self.elem(idx) |= 1 << bit;
+        } else if let Some(word) = self.maybe_elem_mut(idx) {
+            *word &= !(1 << bit);
         }
     }
 
     pub fn assign(&mut self, other: &Self) {
-        if other.bits.len() > 0 {
-            self.ensure_idx(other.bits.len() - 1);
-        }
-        for i in 0..other.bits.len() {
-            self.bits[i] = other.bits[i];
-        }
-        for i in other.bits.len()..self.bits.len() {
-            self.bits[i] = 0;
-        }
+        self.elems = other.elems.clone();
     }
 
     #[inline(always)]
     pub fn get(&self, idx: usize) -> bool {
-        let word = idx / BITS_PER_WORD;
         let bit = idx % BITS_PER_WORD;
-        if word >= self.bits.len() {
-            false
+        if let Some(word) = self.maybe_elem(idx) {
+            (*word & (1 << bit)) != 0
         } else {
-            (self.bits[word] & (1 << bit)) != 0
+            false
         }
     }
 
     pub fn or(&mut self, other: &Self) -> bool {
-        if other.bits.is_empty() {
-            return false;
-        }
-        let last_idx = other.bits.len() - 1;
-        self.ensure_idx(last_idx);
-
         let mut changed = 0;
-        for (self_word, other_word) in self.bits.iter_mut().zip(other.bits.iter()) {
-            if *other_word == 0 {
-                // Avoid cache misses in `self` if `other` is zeroes.
+        for (word_idx, bits) in other.elems.iter() {
+            if bits == 0 {
                 continue;
             }
-            changed |= *other_word & !*self_word;
-            *self_word |= *other_word;
+            let word_idx = word_idx as usize;
+            let self_word = self.elem(word_idx * BITS_PER_WORD);
+            changed |= bits & !*self_word;
+            *self_word |= bits;
         }
         changed != 0
     }
 
-    pub fn and(&mut self, other: &Self) {
-        if other.bits.len() < self.bits.len() {
-            self.bits.truncate(other.bits.len());
-        }
-
-        for (self_word, other_word) in self.bits.iter_mut().zip(other.bits.iter()) {
-            *self_word &= *other_word;
-        }
-    }
-
-    pub fn iter<'a>(&'a self) -> SetBitsIter<'a> {
-        let cur_word = if self.bits.len() > 0 { self.bits[0] } else { 0 };
-        SetBitsIter {
-            words: &self.bits[..],
-            word_idx: 0,
-            cur_word,
-        }
+    pub fn iter<'a>(&'a self) -> impl Iterator<Item = usize> + 'a {
+        self.elems.iter().flat_map(|(word_idx, bits)| {
+            let word_idx = word_idx as usize;
+            set_bits(bits).map(move |i| BITS_PER_WORD * word_idx + i)
+        })
     }
 }
 
-impl std::cmp::PartialEq for BitVec {
-    fn eq(&self, other: &Self) -> bool {
-        let limit = std::cmp::min(self.bits.len(), other.bits.len());
-        for i in 0..limit {
-            if self.bits[i] != other.bits[i] {
-                return false;
-            }
-        }
-        for i in limit..self.bits.len() {
-            if self.bits[i] != 0 {
-                return false;
-            }
-        }
-        for i in limit..other.bits.len() {
-            if other.bits[i] != 0 {
-                return false;
-            }
-        }
-        true
-    }
-}
-impl std::cmp::Eq for BitVec {}
-
-pub struct SetBitsIter<'a> {
-    words: &'a [u64],
-    word_idx: usize,
-    cur_word: u64,
+fn set_bits(bits: u64) -> impl Iterator<Item = usize> {
+    let iter = SetBitsIter(bits);
+    iter
 }
 
-impl<'a> Iterator for SetBitsIter<'a> {
+pub struct SetBitsIter(u64);
+
+impl Iterator for SetBitsIter {
     type Item = usize;
     fn next(&mut self) -> Option<usize> {
-        while self.cur_word == 0 {
-            if self.word_idx + 1 >= self.words.len() {
-                return None;
-            }
-            self.word_idx += 1;
-            self.cur_word = self.words[self.word_idx];
+        if self.0 == 0 {
+            None
+        } else {
+            let bitidx = self.0.trailing_zeros();
+            self.0 &= !(1 << bitidx);
+            Some(bitidx as usize)
         }
-        let bitidx = self.cur_word.trailing_zeros();
-        self.cur_word &= !(1 << bitidx);
-        Some(self.word_idx * BITS_PER_WORD + bitidx as usize)
     }
 }
 
diff --git a/src/ion/mod.rs b/src/ion/mod.rs
index 0b9fc12..bca281d 100644
--- a/src/ion/mod.rs
+++ b/src/ion/mod.rs
@@ -1007,8 +1007,6 @@ impl<'a, F: Function> Env<'a, F> {
             self.liveins.push(BitVec::new());
         }
 
-        let num_vregs = self.func.num_vregs();
-
         let mut num_ranges = 0;
 
         // Create Uses and Defs referring to VRegs, and place the Uses
@@ -1040,7 +1038,7 @@ impl<'a, F: Function> Env<'a, F> {
 
             // Init live-set to union of liveins from successors
             // (excluding backedges; those are handled below).
-            let mut live = BitVec::with_capacity(num_vregs);
+            let mut live = BitVec::new();
             for &succ in self.func.block_succs(block) {
                 live.or(&self.liveins[succ.index()]);
             }
@@ -1655,18 +1653,21 @@ impl<'a, F: Function> Env<'a, F> {
             return false;
         }
 
-        // Sanity check: both bundles should contain only ranges with appropriate VReg classes.
-        let mut iter = self.bundles[from.index()].first_range;
-        while iter.is_valid() {
-            let vreg = self.ranges[iter.index()].vreg;
-            assert_eq!(rc, self.vregs[vreg.index()].reg.class());
-            iter = self.ranges[iter.index()].next_in_bundle;
-        }
-        let mut iter = self.bundles[to.index()].first_range;
-        while iter.is_valid() {
-            let vreg = self.ranges[iter.index()].vreg;
-            assert_eq!(rc, self.vregs[vreg.index()].reg.class());
-            iter = self.ranges[iter.index()].next_in_bundle;
+        #[cfg(debug)]
+        {
+            // Sanity check: both bundles should contain only ranges with appropriate VReg classes.
+            let mut iter = self.bundles[from.index()].first_range;
+            while iter.is_valid() {
+                let vreg = self.ranges[iter.index()].vreg;
+                assert_eq!(rc, self.vregs[vreg.index()].reg.class());
+                iter = self.ranges[iter.index()].next_in_bundle;
+            }
+            let mut iter = self.bundles[to.index()].first_range;
+            while iter.is_valid() {
+                let vreg = self.ranges[iter.index()].vreg;
+                assert_eq!(rc, self.vregs[vreg.index()].reg.class());
+                iter = self.ranges[iter.index()].next_in_bundle;
+            }
         }
 
         // Check for overlap in LiveRanges.