Files
regalloc2/src/moves.rs
Amanieu d'Antras 2bd03256b3 Make regalloc2 #![no_std] (#119)
* Make regalloc2 `#![no_std]`

This crate doesn't require any features from the standard library, so it
can be made `no_std` to allow it to be used in environments that can't
use the Rust standard library.

This PR mainly performs the following mechanical changes:
- `std::collections` is replaced with `alloc::collections`.
- `std::*` is replaced with `core::*`.
- `Vec`, `vec!`, `format!` and `ToString` are imported when needed since
  they are no longer in the prelude.
- `HashSet` and `HashMap` are taken from the `hashbrown` crate, which is
  the same implementation that the standard library uses.
- `FxHashSet` and `FxHashMap` are typedefs in `lib.rs` that are based on
  the `hashbrown` types.

The only functional change is that `RegAllocError` no longer implements
the `Error` trait since that is not available in `core`.

Dependencies were adjusted to not require `std` and this is tested in CI
by building against the `thumbv6m-none-eabi` target that doesn't have
`std`.

* Add the Error trait impl back under a "std" feature
2023-03-09 11:25:59 -08:00

439 lines
18 KiB
Rust

/*
* Released under the terms of the Apache 2.0 license with LLVM
* exception. See `LICENSE` for details.
*/
use crate::{ion::data_structures::u64_key, Allocation, PReg};
use core::fmt::Debug;
use smallvec::{smallvec, SmallVec};
/// A list of moves to be performed in sequence, with auxiliary data
/// attached to each.
pub type MoveVec<T> = SmallVec<[(Allocation, Allocation, T); 16]>;
/// A list of moves to be performance in sequence, like a
/// `MoveVec<T>`, except that an unchosen scratch space may occur as
/// well, represented by `Allocation::none()`.
#[derive(Clone, Debug)]
pub enum MoveVecWithScratch<T> {
/// No scratch was actually used.
NoScratch(MoveVec<T>),
/// A scratch space was used.
Scratch(MoveVec<T>),
}
/// A `ParallelMoves` represents a list of alloc-to-alloc moves that
/// must happen in parallel -- i.e., all reads of sources semantically
/// happen before all writes of destinations, and destinations are
/// allowed to overwrite sources. It can compute a list of sequential
/// moves that will produce the equivalent data movement, possibly
/// using a scratch register if one is necessary.
pub struct ParallelMoves<T: Clone + Copy + Default> {
parallel_moves: MoveVec<T>,
}
impl<T: Clone + Copy + Default> ParallelMoves<T> {
pub fn new() -> Self {
Self {
parallel_moves: smallvec![],
}
}
pub fn add(&mut self, from: Allocation, to: Allocation, t: T) {
self.parallel_moves.push((from, to, t));
}
fn sources_overlap_dests(&self) -> bool {
// Assumes `parallel_moves` has already been sorted in `resolve()` below.
for &(_, dst, _) in &self.parallel_moves {
if self
.parallel_moves
.binary_search_by_key(&dst, |&(src, _, _)| src)
.is_ok()
{
return true;
}
}
false
}
/// Resolve the parallel-moves problem to a sequence of separate
/// moves, such that the combined effect of the sequential moves
/// is as-if all of the moves added to this `ParallelMoves`
/// resolver happened in parallel.
///
/// Sometimes, if there is a cycle, a scratch register is
/// necessary to allow the moves to occur sequentially. In this
/// case, `Allocation::none()` is returned to represent the
/// scratch register. The caller may choose to always hold a
/// separate scratch register unused to allow this to be trivially
/// rewritten; or may dynamically search for or create a free
/// register as needed, if none are available.
pub fn resolve(mut self) -> MoveVecWithScratch<T> {
// Easy case: zero or one move. Just return our vec.
if self.parallel_moves.len() <= 1 {
return MoveVecWithScratch::NoScratch(self.parallel_moves);
}
// Sort moves by source so that we can efficiently test for
// presence.
self.parallel_moves
.sort_by_key(|&(src, dst, _)| u64_key(src.bits(), dst.bits()));
// Do any dests overlap sources? If not, we can also just
// return the list.
if !self.sources_overlap_dests() {
return MoveVecWithScratch::NoScratch(self.parallel_moves);
}
// General case: some moves overwrite dests that other moves
// read as sources. We'll use a general algorithm.
//
// *Important property*: because we expect that each register
// has only one writer (otherwise the effect of the parallel
// move is undefined), each move can only block one other move
// (with its one source corresponding to the one writer of
// that source). Thus, we *can only have simple cycles* (those
// that are a ring of nodes, i.e., with only one path from a
// node back to itself); there are no SCCs that are more
// complex than that. We leverage this fact below to avoid
// having to do a full Tarjan SCC DFS (with lowest-index
// computation, etc.): instead, as soon as we find a cycle, we
// know we have the full cycle and we can do a cyclic move
// sequence and continue.
// Sort moves by destination and check that each destination
// has only one writer.
self.parallel_moves.sort_by_key(|&(_, dst, _)| dst);
if cfg!(debug_assertions) {
let mut last_dst = None;
for &(_, dst, _) in &self.parallel_moves {
if last_dst.is_some() {
debug_assert!(last_dst.unwrap() != dst);
}
last_dst = Some(dst);
}
}
// Construct a mapping from move indices to moves they must
// come before. Any given move must come before a move that
// overwrites its destination; we have moves sorted by dest
// above so we can efficiently find such a move, if any.
let mut must_come_before: SmallVec<[Option<usize>; 16]> =
smallvec![None; self.parallel_moves.len()];
for (i, &(src, _, _)) in self.parallel_moves.iter().enumerate() {
if let Ok(move_to_dst_idx) = self
.parallel_moves
.binary_search_by_key(&src, |&(_, dst, _)| dst)
{
must_come_before[i] = Some(move_to_dst_idx);
}
}
// Do a simple stack-based DFS and emit moves in postorder,
// then reverse at the end for RPO. Unlike Tarjan's SCC
// algorithm, we can emit a cycle as soon as we find one, as
// noted above.
let mut ret: MoveVec<T> = smallvec![];
let mut stack: SmallVec<[usize; 16]> = smallvec![];
let mut visited: SmallVec<[bool; 16]> = smallvec![false; self.parallel_moves.len()];
let mut onstack: SmallVec<[bool; 16]> = smallvec![false; self.parallel_moves.len()];
let mut scratch_used = false;
stack.push(0);
onstack[0] = true;
loop {
if stack.is_empty() {
if let Some(next) = visited.iter().position(|&flag| !flag) {
stack.push(next);
onstack[next] = true;
} else {
break;
}
}
let top = *stack.last().unwrap();
visited[top] = true;
match must_come_before[top] {
None => {
ret.push(self.parallel_moves[top]);
onstack[top] = false;
stack.pop();
while let Some(top) = stack.pop() {
ret.push(self.parallel_moves[top]);
onstack[top] = false;
}
}
Some(next) if visited[next] && !onstack[next] => {
ret.push(self.parallel_moves[top]);
onstack[top] = false;
stack.pop();
while let Some(top) = stack.pop() {
ret.push(self.parallel_moves[top]);
onstack[top] = false;
}
}
Some(next) if !visited[next] && !onstack[next] => {
stack.push(next);
onstack[next] = true;
continue;
}
Some(next) => {
// Found a cycle -- emit a cyclic-move sequence
// for the cycle on the top of stack, then normal
// moves below it. Recall that these moves will be
// reversed in sequence, so from the original
// parallel move set
//
// { B := A, C := B, A := B }
//
// we will generate something like:
//
// A := scratch
// B := A
// C := B
// scratch := C
//
// which will become:
//
// scratch := C
// C := B
// B := A
// A := scratch
let mut last_dst = None;
let mut scratch_src = None;
while let Some(move_idx) = stack.pop() {
onstack[move_idx] = false;
let (mut src, dst, dst_t) = self.parallel_moves[move_idx];
if last_dst.is_none() {
scratch_src = Some(src);
src = Allocation::none();
scratch_used = true;
} else {
debug_assert_eq!(last_dst.unwrap(), src);
}
ret.push((src, dst, dst_t));
last_dst = Some(dst);
if move_idx == next {
break;
}
}
if let Some(src) = scratch_src {
ret.push((src, Allocation::none(), T::default()));
}
}
}
}
ret.reverse();
if scratch_used {
MoveVecWithScratch::Scratch(ret)
} else {
MoveVecWithScratch::NoScratch(ret)
}
}
}
impl<T> MoveVecWithScratch<T> {
/// Fills in the scratch space, if needed, with the given
/// register/allocation and returns a final list of moves. The
/// scratch register must not occur anywhere in the parallel-move
/// problem given to the resolver that produced this
/// `MoveVecWithScratch`.
pub fn with_scratch(self, scratch: Allocation) -> MoveVec<T> {
match self {
MoveVecWithScratch::NoScratch(moves) => moves,
MoveVecWithScratch::Scratch(mut moves) => {
for (src, dst, _) in &mut moves {
debug_assert!(
*src != scratch && *dst != scratch,
"Scratch register should not also be an actual source or dest of moves"
);
debug_assert!(
!(src.is_none() && dst.is_none()),
"Move resolution should not have produced a scratch-to-scratch move"
);
if src.is_none() {
*src = scratch;
}
if dst.is_none() {
*dst = scratch;
}
}
moves
}
}
}
/// Unwrap without a scratch register.
pub fn without_scratch(self) -> Option<MoveVec<T>> {
match self {
MoveVecWithScratch::NoScratch(moves) => Some(moves),
MoveVecWithScratch::Scratch(..) => None,
}
}
/// Do we need a scratch register?
pub fn needs_scratch(&self) -> bool {
match self {
MoveVecWithScratch::NoScratch(..) => false,
MoveVecWithScratch::Scratch(..) => true,
}
}
/// Do any moves go from stack to stack?
pub fn stack_to_stack(&self, is_stack_alloc: impl Fn(Allocation) -> bool) -> bool {
match self {
MoveVecWithScratch::NoScratch(moves) | MoveVecWithScratch::Scratch(moves) => moves
.iter()
.any(|&(src, dst, _)| is_stack_alloc(src) && is_stack_alloc(dst)),
}
}
}
/// Final stage of move resolution: finding or using scratch
/// registers, creating them if necessary by using stackslots, and
/// ensuring that the final list of moves contains no stack-to-stack
/// moves.
///
/// The resolved list of moves may need one or two scratch registers,
/// and maybe a stackslot, to ensure these conditions. Our general
/// strategy is in two steps.
///
/// First, we find a scratch register, so we only have to worry about
/// a list of moves, all with real locations as src and dest. If we're
/// lucky and there are any registers not allocated at this
/// program-point, we can use a real register. Otherwise, we use an
/// extra stackslot. This is fine, because at this step,
/// stack-to-stack moves are OK.
///
/// Then, we resolve stack-to-stack moves into stack-to-reg /
/// reg-to-stack pairs. For this, we try to allocate a second free
/// register. If unavailable, we create another scratch stackslot, and
/// we pick a "victim" register in the appropriate class, and we
/// resolve into: victim -> extra-stackslot; stack-src -> victim;
/// victim -> stack-dst; extra-stackslot -> victim.
///
/// Sometimes move elision will be able to clean this up a bit. But,
/// for simplicity reasons, let's keep the concerns separated! So we
/// always do the full expansion above.
pub struct MoveAndScratchResolver<GetReg, GetStackSlot, IsStackAlloc>
where
GetReg: FnMut() -> Option<Allocation>,
GetStackSlot: FnMut() -> Allocation,
IsStackAlloc: Fn(Allocation) -> bool,
{
/// Scratch register for stack-to-stack move expansion.
stack_stack_scratch_reg: Option<Allocation>,
/// Stackslot into which we need to save the stack-to-stack
/// scratch reg before doing any stack-to-stack moves, if we stole
/// the reg.
stack_stack_scratch_reg_save: Option<Allocation>,
/// Closure that finds us a PReg at the current location.
find_free_reg: GetReg,
/// Closure that gets us a stackslot, if needed.
get_stackslot: GetStackSlot,
/// Closure to determine whether an `Allocation` refers to a stack slot.
is_stack_alloc: IsStackAlloc,
/// The victim PReg to evict to another stackslot at every
/// stack-to-stack move if a free PReg is not otherwise
/// available. Provided by caller and statically chosen. This is a
/// very last-ditch option, so static choice is OK.
victim: PReg,
}
impl<GetReg, GetStackSlot, IsStackAlloc> MoveAndScratchResolver<GetReg, GetStackSlot, IsStackAlloc>
where
GetReg: FnMut() -> Option<Allocation>,
GetStackSlot: FnMut() -> Allocation,
IsStackAlloc: Fn(Allocation) -> bool,
{
pub fn new(
find_free_reg: GetReg,
get_stackslot: GetStackSlot,
is_stack_alloc: IsStackAlloc,
victim: PReg,
) -> Self {
Self {
stack_stack_scratch_reg: None,
stack_stack_scratch_reg_save: None,
find_free_reg,
get_stackslot,
is_stack_alloc,
victim,
}
}
pub fn compute<T: Debug + Copy>(mut self, moves: MoveVecWithScratch<T>) -> MoveVec<T> {
// First, do we have a vec with no stack-to-stack moves or use
// of a scratch register? Fast return if so.
if !moves.needs_scratch() && !moves.stack_to_stack(&self.is_stack_alloc) {
return moves.without_scratch().unwrap();
}
let mut result = smallvec![];
// Now, find a scratch allocation in order to resolve cycles.
let scratch = (self.find_free_reg)().unwrap_or_else(|| (self.get_stackslot)());
trace!("scratch resolver: scratch alloc {:?}", scratch);
let moves = moves.with_scratch(scratch);
for &(src, dst, data) in &moves {
// Do we have a stack-to-stack move? If so, resolve.
if (self.is_stack_alloc)(src) && (self.is_stack_alloc)(dst) {
trace!("scratch resolver: stack to stack: {:?} -> {:?}", src, dst);
// Lazily allocate a stack-to-stack scratch.
if self.stack_stack_scratch_reg.is_none() {
if let Some(reg) = (self.find_free_reg)() {
trace!(
"scratch resolver: have free stack-to-stack scratch preg: {:?}",
reg
);
self.stack_stack_scratch_reg = Some(reg);
} else {
self.stack_stack_scratch_reg = Some(Allocation::reg(self.victim));
self.stack_stack_scratch_reg_save = Some((self.get_stackslot)());
trace!("scratch resolver: stack-to-stack using victim {:?} with save stackslot {:?}",
self.stack_stack_scratch_reg,
self.stack_stack_scratch_reg_save);
}
}
// If we have a "victimless scratch", then do a
// stack-to-scratch / scratch-to-stack sequence.
if self.stack_stack_scratch_reg_save.is_none() {
result.push((src, self.stack_stack_scratch_reg.unwrap(), data));
result.push((self.stack_stack_scratch_reg.unwrap(), dst, data));
}
// Otherwise, save the current value in the
// stack-to-stack scratch reg (which is our victim) to
// the extra stackslot, then do the stack-to-scratch /
// scratch-to-stack sequence, then restore it.
else {
result.push((
self.stack_stack_scratch_reg.unwrap(),
self.stack_stack_scratch_reg_save.unwrap(),
data,
));
result.push((src, self.stack_stack_scratch_reg.unwrap(), data));
result.push((self.stack_stack_scratch_reg.unwrap(), dst, data));
result.push((
self.stack_stack_scratch_reg_save.unwrap(),
self.stack_stack_scratch_reg.unwrap(),
data,
));
}
} else {
// Normal move.
result.push((src, dst, data));
}
}
trace!("scratch resolver: got {:?}", result);
result
}
}