Limit split count per original bundle with fallback 1-to-N split. (#59)

* Limit split count per original bundle with fallback 1-to-N split.

Right now, splitting a bundle produces two halves. Furthermore, it has
cost linear in the length of the bundle, because the resulting
half-bundles have their requirements recomputed with a new scan, and
because we copy half the use-list over to the tail end sub-bundle.

This works fine when a bundle has a handful of splits overall, but not
when an input has a systematic pattern of conflicts that will require
O(|bundle|) splits (e.g., every Use is constrained to a different fixed
register than the last one). In such a case, we get quadratic behavior.

This PR adds a per-spillset (so, per-original-bundle) counter for
splits, and when it reaches a preset threshold (10 for now), we instead
split directly into minimal bundles along the whole length of the
bundle, putting the regions without uses in the spill bundle.

This basically approximates what a non-splitting allocator would do: it
"spills" the whole bundle to possibly a stackslot, or a second-chance
register allocation at best, via the spill bundle; and then does minimal
reservations of registers just at uses/defs and moves the "spilled"
value into/out of them immediately.

Together with another small optimization, this PR results in a 4x
compilation speedup and 24x memory use reduction on one particularly bad
case with alternating conflicting requirements on a vreg (see
bytecodealliance/wasmtime#4291 for details).

* Review comments.
This commit is contained in:
Chris Fallin
2022-06-27 13:23:09 -07:00
committed by GitHub
parent 9733cb2227
commit 4eb2a2528b
4 changed files with 265 additions and 4 deletions

View File

@@ -252,8 +252,11 @@ pub struct SpillSet {
pub spill_bundle: LiveBundleIndex,
pub required: bool,
pub size: u8,
pub splits: u8,
}
pub(crate) const MAX_SPLITS_PER_SPILLSET: u8 = 10;
#[derive(Clone, Debug)]
pub struct VRegData {
pub ranges: LiveRangeList,

View File

@@ -291,6 +291,7 @@ impl<'a, F: Function> Env<'a, F> {
class: reg.class(),
reg_hint: PReg::invalid(),
spill_bundle: LiveBundleIndex::invalid(),
splits: 0,
});
self.bundles[bundle.index()].spillset = ssidx;
}

View File

@@ -15,18 +15,18 @@
use super::{
spill_weight_from_constraint, Env, LiveBundleIndex, LiveBundleVec, LiveRangeFlag,
LiveRangeIndex, LiveRangeKey, LiveRangeList, LiveRangeListEntry, PRegIndex, RegTraversalIter,
Requirement, SpillWeight, UseList,
Requirement, SpillWeight, UseList, VRegIndex,
};
use crate::{
ion::data_structures::{
CodeRange, BUNDLE_MAX_NORMAL_SPILL_WEIGHT, MINIMAL_BUNDLE_SPILL_WEIGHT,
MINIMAL_FIXED_BUNDLE_SPILL_WEIGHT,
CodeRange, BUNDLE_MAX_NORMAL_SPILL_WEIGHT, MAX_SPLITS_PER_SPILLSET,
MINIMAL_BUNDLE_SPILL_WEIGHT, MINIMAL_FIXED_BUNDLE_SPILL_WEIGHT,
},
Allocation, Function, Inst, InstPosition, OperandConstraint, OperandKind, PReg, ProgPoint,
RegAllocError,
};
use fxhash::FxHashSet;
use smallvec::smallvec;
use smallvec::{smallvec, SmallVec};
use std::fmt::Debug;
#[derive(Clone, Debug, PartialEq, Eq)]
@@ -410,6 +410,16 @@ impl<'a, F: Function> Env<'a, F> {
let spillset = self.bundles[bundle.index()].spillset;
// Have we reached the maximum split count? If so, fall back
// to a "minimal bundles and spill bundle" setup for this
// bundle. See the doc-comment on
// `split_into_minimal_bundles()` above for more.
if self.spillsets[spillset.index()].splits >= MAX_SPLITS_PER_SPILLSET {
self.split_into_minimal_bundles(bundle, reg_hint);
return;
}
self.spillsets[spillset.index()].splits += 1;
debug_assert!(!self.bundles[bundle.index()].ranges.is_empty());
// Split point *at* start is OK; this means we peel off
// exactly one use to create a minimal bundle.
@@ -511,6 +521,7 @@ impl<'a, F: Function> Env<'a, F> {
self.bundles[bundle.index()]
.ranges
.truncate(last_lr_in_old_bundle_idx + 1);
self.bundles[bundle.index()].ranges.shrink_to_fit();
// If the first entry in `new_lr_list` is a LR that is split
// down the middle, replace it with a new LR and chop off the
@@ -537,6 +548,7 @@ impl<'a, F: Function> Env<'a, F> {
.collect();
self.ranges[new_lr.index()].uses = rest_uses;
self.ranges[orig_lr.index()].uses.truncate(first_use);
self.ranges[orig_lr.index()].uses.shrink_to_fit();
self.recompute_range_properties(orig_lr);
self.recompute_range_properties(new_lr);
new_lr_list[0].index = new_lr;
@@ -728,6 +740,247 @@ impl<'a, F: Function> Env<'a, F> {
}
}
/// Splits the given bundle into minimal bundles per Use, falling
/// back onto the spill bundle. This must work for any bundle no
/// matter how many conflicts.
///
/// This is meant to solve a quadratic-cost problem that exists
/// with "normal" splitting as implemented above. With that
/// procedure, , splitting a bundle produces two
/// halves. Furthermore, it has cost linear in the length of the
/// bundle, because the resulting half-bundles have their
/// requirements recomputed with a new scan, and because we copy
/// half the use-list over to the tail end sub-bundle.
///
/// This works fine when a bundle has a handful of splits overall,
/// but not when an input has a systematic pattern of conflicts
/// that will require O(|bundle|) splits (e.g., every Use is
/// constrained to a different fixed register than the last
/// one). In such a case, we get quadratic behavior.
///
/// This method implements a direct split into minimal bundles
/// along the whole length of the bundle, putting the regions
/// without uses in the spill bundle. We do this once the number
/// of splits in an original bundle (tracked by spillset) reaches
/// a pre-determined limit.
///
/// This basically approximates what a non-splitting allocator
/// would do: it "spills" the whole bundle to possibly a
/// stackslot, or a second-chance register allocation at best, via
/// the spill bundle; and then does minimal reservations of
/// registers just at uses/defs and moves the "spilled" value
/// into/out of them immediately.
pub fn split_into_minimal_bundles(&mut self, bundle: LiveBundleIndex, reg_hint: PReg) {
let mut removed_lrs: FxHashSet<LiveRangeIndex> = FxHashSet::default();
let mut removed_lrs_vregs: FxHashSet<VRegIndex> = FxHashSet::default();
let mut new_lrs: SmallVec<[(VRegIndex, LiveRangeIndex); 16]> = smallvec![];
let mut new_bundles: SmallVec<[LiveBundleIndex; 16]> = smallvec![];
let spill = self
.get_or_create_spill_bundle(bundle, /* create_if_absent = */ true)
.unwrap();
trace!(
"Splitting bundle {:?} into minimal bundles with reg hint {}",
bundle,
reg_hint
);
let mut last_lr: Option<LiveRangeIndex> = None;
let mut last_bundle: Option<LiveBundleIndex> = None;
let mut last_inst: Option<Inst> = None;
let mut last_vreg: Option<VRegIndex> = None;
for entry_idx in 0..self.bundles[bundle.index()].ranges.len() {
// Iterate manually; don't borrow `self`.
let entry = self.bundles[bundle.index()].ranges[entry_idx];
removed_lrs.insert(entry.index);
let vreg = self.ranges[entry.index.index()].vreg;
removed_lrs_vregs.insert(vreg);
trace!(" -> removing old LR {:?} for vreg {:?}", entry.index, vreg);
let mut last_live_pos = entry.range.from;
for use_idx in 0..self.ranges[entry.index.index()].uses.len() {
let u = self.ranges[entry.index.index()].uses[use_idx];
trace!(" -> use {:?} (last_live_pos {:?})", u, last_live_pos);
// If we just created a LR for this inst at the last
// pos, add this use to the same LR.
if Some(u.pos.inst()) == last_inst && Some(vreg) == last_vreg {
self.ranges[last_lr.unwrap().index()].uses.push(u);
trace!(" -> appended to last LR {:?}", last_lr.unwrap());
continue;
}
// If the last bundle was at the same inst, add a new
// LR to the same bundle; otherwise, create a LR and a
// new bundle.
if Some(u.pos.inst()) == last_inst {
let cr = CodeRange {
from: u.pos,
to: ProgPoint::before(u.pos.inst().next()),
};
let lr = self.create_liverange(cr);
new_lrs.push((vreg, lr));
self.ranges[lr.index()].uses.push(u);
self.ranges[lr.index()].vreg = vreg;
trace!(
" -> created new LR {:?} but adding to existing bundle {:?}",
lr,
last_bundle.unwrap()
);
// Edit the previous LR to end mid-inst.
self.bundles[last_bundle.unwrap().index()]
.ranges
.last_mut()
.unwrap()
.range
.to = u.pos;
self.ranges[last_lr.unwrap().index()].range.to = u.pos;
// Add this LR to the bundle.
self.bundles[last_bundle.unwrap().index()]
.ranges
.push(LiveRangeListEntry {
range: cr,
index: lr,
});
self.ranges[lr.index()].bundle = last_bundle.unwrap();
last_live_pos = ProgPoint::before(u.pos.inst().next());
continue;
}
// Otherwise, create a new LR.
let pos = ProgPoint::before(u.pos.inst());
let cr = CodeRange {
from: pos,
to: ProgPoint::before(u.pos.inst().next()),
};
let lr = self.create_liverange(cr);
new_lrs.push((vreg, lr));
self.ranges[lr.index()].uses.push(u);
self.ranges[lr.index()].vreg = vreg;
// Create a new bundle that contains only this LR.
let new_bundle = self.create_bundle();
self.ranges[lr.index()].bundle = new_bundle;
self.bundles[new_bundle.index()].spillset = self.bundles[bundle.index()].spillset;
self.bundles[new_bundle.index()]
.ranges
.push(LiveRangeListEntry {
range: cr,
index: lr,
});
new_bundles.push(new_bundle);
// If this use was a Def, set the StartsAtDef flag for
// the new LR. (N.B.: *not* Mod, only Def, because Mod
// needs an input. This flag specifically indicates
// that the LR does not require the value to be moved
// into location at start because it (re)defines the
// value.)
if u.operand.kind() == OperandKind::Def {
self.ranges[lr.index()].set_flag(LiveRangeFlag::StartsAtDef);
}
trace!(
" -> created new LR {:?} range {:?} with new bundle {:?} for this use",
lr,
cr,
new_bundle
);
// If there was any intervening range in the LR not
// covered by the minimal new LR above, add it to the
// spillset.
if pos > last_live_pos {
let cr = CodeRange {
from: last_live_pos,
to: pos,
};
let spill_lr = self.create_liverange(cr);
self.ranges[spill_lr.index()].vreg = vreg;
self.ranges[spill_lr.index()].bundle = spill;
new_lrs.push((vreg, spill_lr));
self.bundles[spill.index()].ranges.push(LiveRangeListEntry {
range: cr,
index: spill_lr,
});
self.ranges[spill_lr.index()].bundle = spill;
trace!(
" -> put intervening range {:?} in new LR {:?} in spill bundle {:?}",
cr,
spill_lr,
spill
);
}
last_live_pos = ProgPoint::before(u.pos.inst().next());
last_lr = Some(lr);
last_bundle = Some(new_bundle);
last_inst = Some(u.pos.inst());
last_vreg = Some(vreg);
}
// Clear the use-list from the original LR.
self.ranges[entry.index.index()].uses = Default::default();
// If there is space from the last use to the end of the
// LR, put that in the spill bundle too.
if entry.range.to > last_live_pos {
let cr = CodeRange {
from: last_live_pos,
to: entry.range.to,
};
let spill_lr = self.create_liverange(cr);
self.ranges[spill_lr.index()].vreg = vreg;
self.ranges[spill_lr.index()].bundle = spill;
new_lrs.push((vreg, spill_lr));
self.bundles[spill.index()].ranges.push(LiveRangeListEntry {
range: cr,
index: spill_lr,
});
self.ranges[spill_lr.index()].bundle = spill;
trace!(
" -> put trailing range {:?} in new LR {:?} in spill bundle {:?}",
cr,
spill_lr,
spill
);
}
}
// Clear the LR list in the original bundle.
self.bundles[bundle.index()].ranges.clear();
self.bundles[bundle.index()].ranges.shrink_to_fit();
// Remove all of the removed LRs from respective vregs' lists.
for vreg in removed_lrs_vregs {
self.vregs[vreg.index()]
.ranges
.retain(|entry| !removed_lrs.contains(&entry.index));
}
// Add the new LRs to their respective vreg lists.
for (vreg, lr) in new_lrs {
let range = self.ranges[lr.index()].range;
let entry = LiveRangeListEntry { range, index: lr };
self.vregs[vreg.index()].ranges.push(entry);
}
// Recompute bundle properties for all new bundles and enqueue
// them.
for bundle in new_bundles {
if self.bundles[bundle.index()].ranges.len() > 0 {
self.recompute_bundle_properties(bundle);
let prio = self.bundles[bundle.index()].prio;
self.allocation_queue
.insert(bundle, prio as usize, reg_hint);
}
}
}
pub fn process_bundle(
&mut self,
bundle: LiveBundleIndex,

View File

@@ -25,6 +25,10 @@ impl<'a, F: Function> Env<'a, F> {
for i in 0..self.spilled_bundles.len() {
let bundle = self.spilled_bundles[i]; // don't borrow self
if self.bundles[bundle.index()].ranges.is_empty() {
continue;
}
let class = self.spillsets[self.bundles[bundle.index()].spillset.index()].class;
let hint = self.spillsets[self.bundles[bundle.index()].spillset.index()].reg_hint;