Some perf opts in liveness computation and set impl:

- Cache the most recent u64 chunk in the set to avoid some hashmap
  lookups;
- Defer the live-set union'ing over the loop body until query time
  (remember the set that would have been union'd in instead), and lazily
  propagate the liveness bit at that query time, union-find style;
- Do n-1 rather than n union operations for n successors (first is a
  clone instead);
- Don't union in liveness sets from blocks we haven't visited yet (the
  loop-body/backedge handling handles these).
This commit is contained in:
Chris Fallin
2021-05-06 22:46:16 -07:00
parent a148dccac3
commit 2ff02b50a3
2 changed files with 97 additions and 24 deletions

View File

@@ -3,34 +3,53 @@
* exception. See `LICENSE` for details.
*/
//! Bit vectors.
//! Index sets: sets of integers that represent indices into a space.
//!
//! For historical reasons this is called a `BitVec` but it is no
//! longer a dense bitvector; the chunked adaptive-sparse data
//! structure here has better performance.
use fxhash::FxHashMap;
use std::cell::Cell;
const SMALL_ELEMS: usize = 12;
/// A hybrid large/small-mode sparse mapping from integer indices to elements.
/// A hybrid large/small-mode sparse mapping from integer indices to
/// elements.
///
/// The trailing `(u32, u64)` elements in each variant is a one-item
/// cache to allow fast access when streaming through.
#[derive(Clone, Debug)]
enum AdaptiveMap {
Small(u32, [u32; SMALL_ELEMS], [u64; SMALL_ELEMS]),
Large(FxHashMap<u32, u64>),
Small(
u32,
[u32; SMALL_ELEMS],
[u64; SMALL_ELEMS],
Cell<(u32, u64)>,
),
Large(FxHashMap<u32, u64>, Cell<(u32, u64)>),
}
const INVALID: u32 = 0xffff_ffff;
impl AdaptiveMap {
fn new() -> Self {
Self::Small(0, [INVALID; SMALL_ELEMS], [0; SMALL_ELEMS])
Self::Small(
0,
[INVALID; SMALL_ELEMS],
[0; SMALL_ELEMS],
Cell::new((INVALID, 0)),
)
}
#[inline(never)]
fn expand(&mut self) {
match self {
&mut Self::Small(len, ref keys, ref values) => {
&mut Self::Small(len, ref keys, ref values, ref cache) => {
let mut map = FxHashMap::default();
for i in 0..len {
map.insert(keys[i as usize], values[i as usize]);
}
*self = Self::Large(map);
*self = Self::Large(map, cache.clone());
}
_ => {}
}
@@ -48,7 +67,10 @@ impl AdaptiveMap {
}
match self {
&mut Self::Small(ref mut len, ref mut keys, ref mut values) => {
&mut Self::Small(ref mut len, ref mut keys, ref mut values, ref cached) => {
if cached.get().0 == key {
cached.set((INVALID, 0));
}
for i in 0..*len {
if keys[i as usize] == key {
return &mut values[i as usize];
@@ -61,13 +83,21 @@ impl AdaptiveMap {
values[idx as usize] = 0;
&mut values[idx as usize]
}
&mut Self::Large(ref mut map) => map.entry(key).or_insert(0),
&mut Self::Large(ref mut map, ref cached) => {
if cached.get().0 == key {
cached.set((INVALID, 0));
}
map.entry(key).or_insert(0)
}
}
}
#[inline(always)]
fn get_mut(&mut self, key: u32) -> Option<&mut u64> {
match self {
&mut Self::Small(len, ref keys, ref mut values) => {
&mut Self::Small(len, ref keys, ref mut values, ref cached) => {
if cached.get().0 == key {
cached.set((INVALID, 0));
}
for i in 0..len {
if keys[i as usize] == key {
return Some(&mut values[i as usize]);
@@ -75,29 +105,48 @@ impl AdaptiveMap {
}
None
}
&mut Self::Large(ref mut map) => map.get_mut(&key),
&mut Self::Large(ref mut map, ref cached) => {
if cached.get().0 == key {
cached.set((INVALID, 0));
}
map.get_mut(&key)
}
}
}
#[inline(always)]
fn get(&self, key: u32) -> Option<&u64> {
fn get(&self, key: u32) -> Option<u64> {
match self {
&Self::Small(len, ref keys, ref values) => {
&Self::Small(len, ref keys, ref values, ref cached) => {
if cached.get().0 == key {
return Some(cached.get().1);
}
for i in 0..len {
if keys[i as usize] == key {
return Some(&values[i as usize]);
let value = values[i as usize];
cached.set((key, value));
return Some(value);
}
}
None
}
&Self::Large(ref map) => map.get(&key),
&Self::Large(ref map, ref cached) => {
if cached.get().0 == key {
return Some(cached.get().1);
}
let value = map.get(&key).cloned();
if let Some(value) = value {
cached.set((key, value));
}
value
}
}
}
fn iter<'a>(&'a self) -> AdaptiveMapIter<'a> {
match self {
&Self::Small(len, ref keys, ref values) => {
&Self::Small(len, ref keys, ref values, ..) => {
AdaptiveMapIter::Small(&keys[0..len as usize], &values[0..len as usize])
}
&Self::Large(ref map) => AdaptiveMapIter::Large(map.iter()),
&Self::Large(ref map, ..) => AdaptiveMapIter::Large(map.iter()),
}
}
}
@@ -155,7 +204,7 @@ impl BitVec {
}
#[inline(always)]
fn maybe_elem(&self, bit_index: usize) -> Option<&u64> {
fn maybe_elem(&self, bit_index: usize) -> Option<u64> {
let word_index = (bit_index / BITS_PER_WORD) as u32;
self.elems.get(word_index)
}
@@ -178,7 +227,7 @@ impl BitVec {
pub fn get(&self, idx: usize) -> bool {
let bit = idx % BITS_PER_WORD;
if let Some(word) = self.maybe_elem(idx) {
(*word & (1 << bit)) != 0
(word & (1 << bit)) != 0
} else {
false
}

View File

@@ -267,6 +267,7 @@ struct Env<'a, F: Function> {
env: &'a MachineEnv,
cfginfo: CFGInfo,
liveins: Vec<BitVec>,
livein_parents: Vec<Vec<Block>>,
/// Blockparam outputs: from-vreg, (end of) from-block, (start of)
/// to-block, to-vreg. The field order is significant: these are sorted so
/// that a scan over vregs, then blocks in each range, can scan in
@@ -663,6 +664,7 @@ impl<'a, F: Function> Env<'a, F> {
cfginfo,
liveins: vec![],
livein_parents: vec![],
blockparam_outs: vec![],
blockparam_ins: vec![],
blockparam_allocs: vec![],
@@ -1013,10 +1015,24 @@ impl<'a, F: Function> Env<'a, F> {
.insert(LiveRangeKey::from_range(&range), lr);
}
fn is_live_in(&mut self, block: Block, vreg: VRegIndex) -> bool {
if self.liveins[block.index()].get(vreg.index()) {
return true;
}
for &parent in &self.livein_parents[block.index()] {
if self.liveins[parent.index()].get(vreg.index()) {
self.liveins[block.index()].set(vreg.index(), true);
return true;
}
}
false
}
fn compute_liveness(&mut self) {
// Create initial LiveIn bitsets.
for _ in 0..self.func.blocks() {
self.liveins.push(BitVec::new());
self.livein_parents.push(vec![]);
}
let mut num_ranges = 0;
@@ -1050,10 +1066,18 @@ impl<'a, F: Function> Env<'a, F> {
// Init live-set to union of liveins from successors
// (excluding backedges; those are handled below).
let mut live = BitVec::new();
let mut live = None;
for &succ in self.func.block_succs(block) {
live.or(&self.liveins[succ.index()]);
if block_to_postorder[succ.index()].is_none() {
continue;
}
if live.is_none() {
live = Some(self.liveins[succ.index()].clone());
} else {
live.as_mut().unwrap().or(&self.liveins[succ.index()]);
}
}
let mut live = live.unwrap_or(BitVec::new());
// Initially, registers are assumed live for the whole block.
for vreg in live.iter() {
@@ -1404,7 +1428,7 @@ impl<'a, F: Function> Env<'a, F> {
);
log::debug!(" -> loop range {:?}", loop_range);
for &loopblock in loop_blocks {
self.liveins[loopblock.index()].or(&live);
self.livein_parents[loopblock.index()].push(block);
}
for vreg in live.iter() {
log::debug!(
@@ -3404,7 +3428,7 @@ impl<'a, F: Function> Env<'a, F> {
continue;
}
log::debug!(" -> out of this range, requires half-move if live");
if self.liveins[succ.index()].get(vreg.index()) {
if self.is_live_in(succ, vreg) {
log::debug!(" -> live at input to succ, adding halfmove");
half_moves.push(HalfMove {
key: half_move_key(block, succ, vreg, HalfMoveKind::Source),
@@ -3524,7 +3548,7 @@ impl<'a, F: Function> Env<'a, F> {
blockparam_in_idx += 1;
}
if !self.liveins[block.index()].get(vreg.index()) {
if !self.is_live_in(block, vreg) {
block = block.next();
continue;
}