Improve prog-move handling: no use/def records, just directly connect the LRs.

Also requires some metadata in edit output to properly hook up the
checker in regalloc.rs to track user-moves without seeing the original
insts with operands.
This commit is contained in:
Chris Fallin
2021-05-11 23:59:12 -07:00
parent 6066d02f6f
commit 37fa3ec763
4 changed files with 96 additions and 71 deletions

View File

@@ -41,7 +41,7 @@ fuzz_target!(|testcase: TestCase| {
let scratch = Allocation::reg(PReg::new(31, RegClass::Int));
let mut par = ParallelMoves::new(scratch);
for &(src, dst) in &testcase.moves {
par.add(src, dst);
par.add(src, dst, ());
}
let moves = par.resolve();
@@ -59,7 +59,7 @@ fuzz_target!(|testcase: TestCase| {
for i in 0..32 {
regfile[i] = Some(i);
}
for (src, dst) in moves {
for (src, dst, _) in moves {
if let (Some(preg_src), Some(preg_dst)) = (src.as_reg(), dst.as_reg()) {
let data = regfile[preg_src.hw_enc()];
regfile[preg_dst.hw_enc()] = data;

View File

@@ -144,30 +144,31 @@ struct LiveRange {
enum LiveRangeFlag {
Minimal = 1,
Fixed = 2,
StartsAtDef = 4,
}
impl LiveRange {
#[inline(always)]
pub fn set_flag(&mut self, flag: LiveRangeFlag) {
self.uses_spill_weight_and_flags |= (flag as u32) << 30;
self.uses_spill_weight_and_flags |= (flag as u32) << 29;
}
#[inline(always)]
pub fn clear_flag(&mut self, flag: LiveRangeFlag) {
self.uses_spill_weight_and_flags &= !((flag as u32) << 30);
self.uses_spill_weight_and_flags &= !((flag as u32) << 29);
}
#[inline(always)]
pub fn has_flag(&self, flag: LiveRangeFlag) -> bool {
self.uses_spill_weight_and_flags & ((flag as u32) << 30) != 0
self.uses_spill_weight_and_flags & ((flag as u32) << 29) != 0
}
#[inline(always)]
pub fn uses_spill_weight(&self) -> u32 {
self.uses_spill_weight_and_flags & 0x3fff_ffff
self.uses_spill_weight_and_flags & 0x1fff_ffff
}
#[inline(always)]
pub fn set_uses_spill_weight(&mut self, weight: u32) {
assert!(weight < (1 << 30));
assert!(weight < (1 << 29));
self.uses_spill_weight_and_flags =
(self.uses_spill_weight_and_flags & 0xc000_0000) | weight;
(self.uses_spill_weight_and_flags & 0xe000_0000) | weight;
}
}
@@ -634,6 +635,7 @@ struct InsertedMove {
prio: InsertMovePrio,
from_alloc: Allocation,
to_alloc: Allocation,
to_vreg: Option<VReg>,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
@@ -1333,18 +1335,6 @@ impl<'a, F: Function> Env<'a, F> {
// dead.
if src.vreg() != dst.vreg() {
log::debug!(" -> move inst{}: src {} -> dst {}", inst.index(), src, dst);
if log::log_enabled!(log::Level::Debug) {
self.annotate(
ProgPoint::after(inst),
format!(
" prog-move v{} ({:?}) -> v{} ({:?})",
src.vreg().vreg(),
src.policy(),
dst.vreg().vreg(),
dst.policy(),
),
);
}
assert_eq!(src.class(), dst.class());
assert_eq!(src.kind(), OperandKind::Use);
@@ -1377,6 +1367,19 @@ impl<'a, F: Function> Env<'a, F> {
OperandPos::Before,
);
if log::log_enabled!(log::Level::Debug) {
self.annotate(
ProgPoint::after(inst),
format!(
" prog-move v{} ({:?}) -> v{} ({:?})",
src.vreg().vreg(),
src_policy,
dst.vreg().vreg(),
dst_policy,
),
);
}
// N.B.: in order to integrate with the move
// resolution that joins LRs in general, we
// conceptually treat the move as happening
@@ -1422,15 +1425,11 @@ impl<'a, F: Function> Env<'a, F> {
log::debug!(" -> started at block start; trimming to {:?}", pos);
self.ranges_hot[dst_lr.index()].range.from = pos;
}
self.ranges[dst_lr.index()].set_flag(LiveRangeFlag::StartsAtDef);
live.set(dst.vreg().vreg(), false);
vreg_ranges[dst.vreg().vreg()] = LiveRangeIndex::invalid();
self.vreg_regs[dst.vreg().vreg()] = dst.vreg();
let u = UseIndex::new(self.uses.len());
self.uses
.push(Use::new(dst, pos, UseIndex::invalid(), SLOT_NONE as u8));
self.insert_use_into_liverange_and_update_stats(dst_lr, u);
// Handle the use w.r.t. liveranges: make it live
// and create an initial LR back to the start of
// the block.
@@ -1448,11 +1447,6 @@ impl<'a, F: Function> Env<'a, F> {
log::debug!(" -> src LR {:?}", src_lr);
let u = UseIndex::new(self.uses.len());
self.uses
.push(Use::new(src, pos, UseIndex::invalid(), SLOT_NONE as u8));
self.insert_use_into_liverange_and_update_stats(src_lr, u);
// Add to live-set.
let src_is_dead_after_move = !live.get(src.vreg().vreg());
live.set(src.vreg().vreg(), true);
@@ -1572,6 +1566,8 @@ impl<'a, F: Function> Env<'a, F> {
self.ranges_hot[lr.index()].range.from = pos;
}
self.ranges[lr.index()].set_flag(LiveRangeFlag::StartsAtDef);
// Remove from live-set.
live.set(operand.vreg().vreg(), false);
vreg_ranges[operand.vreg().vreg()] = LiveRangeIndex::invalid();
@@ -1981,8 +1977,9 @@ impl<'a, F: Function> Env<'a, F> {
self.annotate(
self.ranges_hot[iter0.index()].range.from,
format!(
" MERGE range{} from bundle{} to bundle{}",
" MERGE range{} v{} from bundle{} to bundle{}",
iter0.index(),
self.ranges[iter0.index()].vreg.index(),
from.index(),
to.index(),
),
@@ -2022,8 +2019,9 @@ impl<'a, F: Function> Env<'a, F> {
self.annotate(
self.ranges_hot[next.index()].range.from,
format!(
" MERGE range{} from bundle{} to bundle{}",
" MERGE range{} v{} from bundle{} to bundle{}",
next.index(),
self.ranges[next.index()].vreg.index(),
from.index(),
to.index(),
),
@@ -3611,6 +3609,7 @@ impl<'a, F: Function> Env<'a, F> {
prio: InsertMovePrio,
from_alloc: Allocation,
to_alloc: Allocation,
to_vreg: Option<VReg>,
) {
debug!(
"insert_move: pos {:?} prio {:?} from_alloc {:?} to_alloc {:?}",
@@ -3627,6 +3626,7 @@ impl<'a, F: Function> Env<'a, F> {
prio,
from_alloc,
to_alloc,
to_vreg,
});
}
@@ -3785,12 +3785,8 @@ impl<'a, F: Function> Env<'a, F> {
if prev.is_valid() {
let prev_alloc = self.get_alloc_for_range(prev);
let prev_range = self.ranges_hot[prev.index()].range;
let first_use = self.ranges[iter.index()].first_use;
let first_is_def = if first_use.is_valid() {
self.uses[first_use.index()].operand.kind() == OperandKind::Def
} else {
false
};
let first_is_def =
self.ranges[iter.index()].has_flag(LiveRangeFlag::StartsAtDef);
debug_assert!(prev_alloc != Allocation::none());
if prev_range.to == range.from
&& !self.is_start_of_block(range.from)
@@ -3805,7 +3801,13 @@ impl<'a, F: Function> Env<'a, F> {
vreg.index()
);
assert_eq!(range.from.pos(), InstPosition::Before);
self.insert_move(range.from, InsertMovePrio::Regular, prev_alloc, alloc);
self.insert_move(
range.from,
InsertMovePrio::Regular,
prev_alloc,
alloc,
None,
);
}
}
@@ -4192,7 +4194,7 @@ impl<'a, F: Function> Env<'a, F> {
if last == Some(dest.alloc) {
continue;
}
self.insert_move(insertion_point, prio, src.alloc, dest.alloc);
self.insert_move(insertion_point, prio, src.alloc, dest.alloc, None);
last = Some(dest.alloc);
}
}
@@ -4212,6 +4214,7 @@ impl<'a, F: Function> Env<'a, F> {
InsertMovePrio::MultiFixedReg,
Allocation::reg(self.pregs[from_preg.index()].reg),
Allocation::reg(self.pregs[to_preg.index()].reg),
None,
);
self.set_alloc(
progpoint.inst(),
@@ -4294,6 +4297,7 @@ impl<'a, F: Function> Env<'a, F> {
InsertMovePrio::ReusedInput,
input_alloc,
output_alloc,
None,
);
self.set_alloc(inst, input_idx, output_alloc);
}
@@ -4310,14 +4314,15 @@ impl<'a, F: Function> Env<'a, F> {
let prog_move_srcs = std::mem::replace(&mut self.prog_move_srcs, vec![]);
let prog_move_dsts = std::mem::replace(&mut self.prog_move_dsts, vec![]);
assert_eq!(prog_move_srcs.len(), prog_move_dsts.len());
for (&((_, from_inst), from_alloc), &((_, to_inst), to_alloc)) in
for (&((_, from_inst), from_alloc), &((to_vreg, to_inst), to_alloc)) in
prog_move_srcs.iter().zip(prog_move_dsts.iter())
{
log::debug!(
"program move at inst {:?}: alloc {:?} -> {:?}",
"program move at inst {:?}: alloc {:?} -> {:?} (v{})",
from_inst,
from_alloc,
to_alloc
to_alloc,
to_vreg.index(),
);
assert!(!from_alloc.is_none());
assert!(!to_alloc.is_none());
@@ -4332,6 +4337,7 @@ impl<'a, F: Function> Env<'a, F> {
InsertMovePrio::Regular,
from_alloc,
to_alloc,
Some(self.vreg_regs[to_vreg.index()]),
);
}
}
@@ -4389,17 +4395,25 @@ impl<'a, F: Function> Env<'a, F> {
));
log::debug!("parallel moves at pos {:?} prio {:?}", pos, prio);
for m in moves {
if m.from_alloc != m.to_alloc {
if (m.from_alloc != m.to_alloc) || m.to_vreg.is_some() {
log::debug!(" {} -> {}", m.from_alloc, m.to_alloc,);
parallel_moves.add(m.from_alloc, m.to_alloc);
parallel_moves.add(m.from_alloc, m.to_alloc, m.to_vreg);
}
}
let resolved = parallel_moves.resolve();
for (src, dst) in resolved {
log::debug!(" resolved: {} -> {}", src, dst);
self.add_edit(pos, prio, Edit::Move { from: src, to: dst });
for (src, dst, to_vreg) in resolved {
log::debug!(" resolved: {} -> {} ({:?})", src, dst, to_vreg);
self.add_edit(
pos,
prio,
Edit::Move {
from: src,
to: dst,
to_vreg,
},
);
}
}
}
@@ -4446,10 +4460,10 @@ impl<'a, F: Function> Env<'a, F> {
for i in 0..self.edits.len() {
let &(pos, _, ref edit) = &self.edits[i];
match edit {
&Edit::Move { from, to } => {
&Edit::Move { from, to, to_vreg } => {
self.annotate(
ProgPoint::from_index(pos),
format!("move {} -> {}", from, to),
format!("move {} -> {} ({:?})", from, to, to_vreg),
);
}
&Edit::BlockParams {
@@ -4466,8 +4480,8 @@ impl<'a, F: Function> Env<'a, F> {
fn add_edit(&mut self, pos: ProgPoint, prio: InsertMovePrio, edit: Edit) {
match &edit {
&Edit::Move { from, to } if from == to => return,
&Edit::Move { from, to } if from.is_reg() && to.is_reg() => {
&Edit::Move { from, to, to_vreg } if from == to && to_vreg.is_none() => return,
&Edit::Move { from, to, .. } if from.is_reg() && to.is_reg() => {
assert_eq!(from.as_reg().unwrap().class(), to.as_reg().unwrap().class());
}
_ => {}

View File

@@ -879,7 +879,18 @@ impl ProgPoint {
pub enum Edit {
/// Move one allocation to another. Each allocation may be a
/// register or a stack slot (spillslot).
Move { from: Allocation, to: Allocation },
///
/// `to_vreg`, if defined, is useful as metadata: it indicates
/// that the moved value is a def of a new vreg.
///
/// `Move` edits will be generated even if src and dst allocation
/// are the same if the vreg changes; this allows proper metadata
/// tracking even when moves are elided.
Move {
from: Allocation,
to: Allocation,
to_vreg: Option<VReg>,
},
/// Define blockparams' locations. Note that this is not typically
/// turned into machine code, but can be useful metadata (e.g. for
/// the checker).

View File

@@ -6,7 +6,7 @@
use crate::Allocation;
use smallvec::{smallvec, SmallVec};
pub type MoveVec = SmallVec<[(Allocation, Allocation); 16]>;
pub type MoveVec<T> = SmallVec<[(Allocation, Allocation, T); 16]>;
/// A `ParallelMoves` represents a list of alloc-to-alloc moves that
/// must happen in parallel -- i.e., all reads of sources semantically
@@ -14,12 +14,12 @@ pub type MoveVec = SmallVec<[(Allocation, Allocation); 16]>;
/// allowed to overwrite sources. It can compute a list of sequential
/// moves that will produce the equivalent data movement, possibly
/// using a scratch register if one is necessary.
pub struct ParallelMoves {
parallel_moves: MoveVec,
pub struct ParallelMoves<T: Clone + Copy + Default> {
parallel_moves: MoveVec<T>,
scratch: Allocation,
}
impl ParallelMoves {
impl<T: Clone + Copy + Default> ParallelMoves<T> {
pub fn new(scratch: Allocation) -> Self {
Self {
parallel_moves: smallvec![],
@@ -27,16 +27,16 @@ impl ParallelMoves {
}
}
pub fn add(&mut self, from: Allocation, to: Allocation) {
self.parallel_moves.push((from, to));
pub fn add(&mut self, from: Allocation, to: Allocation, t: T) {
self.parallel_moves.push((from, to, t));
}
fn sources_overlap_dests(&self) -> bool {
// Assumes `parallel_moves` has already been sorted in `resolve()` below.
for &(_, dst) in &self.parallel_moves {
for &(_, dst, _) in &self.parallel_moves {
if self
.parallel_moves
.binary_search_by_key(&dst, |&(src, _)| src)
.binary_search_by_key(&dst, |&(src, _, _)| src)
.is_ok()
{
return true;
@@ -45,7 +45,7 @@ impl ParallelMoves {
false
}
pub fn resolve(mut self) -> MoveVec {
pub fn resolve(mut self) -> MoveVec<T> {
// Easy case: zero or one move. Just return our vec.
if self.parallel_moves.len() <= 1 {
return self.parallel_moves;
@@ -53,7 +53,7 @@ impl ParallelMoves {
// Sort moves by source so that we can efficiently test for
// presence.
self.parallel_moves.sort();
self.parallel_moves.sort_by_key(|&(src, dst, _)| (src, dst));
// Do any dests overlap sources? If not, we can also just
// return the list.
@@ -77,10 +77,10 @@ impl ParallelMoves {
// Sort moves by destination and check that each destination
// has only one writer.
self.parallel_moves.sort_by_key(|&(_, dst)| dst);
self.parallel_moves.sort_by_key(|&(_, dst, _)| dst);
if cfg!(debug) {
let mut last_dst = None;
for &(_, dst) in &self.parallel_moves {
for &(_, dst, _) in &self.parallel_moves {
if last_dst.is_some() {
assert!(last_dst.unwrap() != dst);
}
@@ -94,10 +94,10 @@ impl ParallelMoves {
// above so we can efficiently find such a move, if any.
let mut must_come_before: SmallVec<[Option<usize>; 16]> =
smallvec![None; self.parallel_moves.len()];
for (i, &(src, _)) in self.parallel_moves.iter().enumerate() {
for (i, &(src, _, _)) in self.parallel_moves.iter().enumerate() {
if let Ok(move_to_dst_idx) = self
.parallel_moves
.binary_search_by_key(&src, |&(_, dst)| dst)
.binary_search_by_key(&src, |&(_, dst, _)| dst)
{
must_come_before[i] = Some(move_to_dst_idx);
}
@@ -107,7 +107,7 @@ impl ParallelMoves {
// then reverse at the end for RPO. Unlike Tarjan's SCC
// algorithm, we can emit a cycle as soon as we find one, as
// noted above.
let mut ret: MoveVec = smallvec![];
let mut ret: MoveVec<T> = smallvec![];
let mut stack: SmallVec<[usize; 16]> = smallvec![];
let mut visited: SmallVec<[bool; 16]> = smallvec![false; self.parallel_moves.len()];
let mut onstack: SmallVec<[bool; 16]> = smallvec![false; self.parallel_moves.len()];
@@ -176,14 +176,14 @@ impl ParallelMoves {
let mut scratch_src = None;
while let Some(move_idx) = stack.pop() {
onstack[move_idx] = false;
let (mut src, dst) = self.parallel_moves[move_idx];
let (mut src, dst, dst_t) = self.parallel_moves[move_idx];
if last_dst.is_none() {
scratch_src = Some(src);
src = self.scratch;
} else {
assert_eq!(last_dst.unwrap(), src);
}
ret.push((src, dst));
ret.push((src, dst, dst_t));
last_dst = Some(dst);
@@ -192,7 +192,7 @@ impl ParallelMoves {
}
}
if let Some(src) = scratch_src {
ret.push((src, self.scratch));
ret.push((src, self.scratch, T::default()));
}
}
}