diff --git a/fuzz/fuzz_targets/moves.rs b/fuzz/fuzz_targets/moves.rs index e62342f..b150016 100644 --- a/fuzz/fuzz_targets/moves.rs +++ b/fuzz/fuzz_targets/moves.rs @@ -7,30 +7,56 @@ use libfuzzer_sys::arbitrary::{Arbitrary, Result, Unstructured}; use libfuzzer_sys::fuzz_target; -use regalloc2::fuzzing::moves::ParallelMoves; -use regalloc2::{Allocation, PReg, RegClass}; -use std::collections::HashSet; +use regalloc2::fuzzing::moves::{MoveAndScratchResolver, ParallelMoves}; +use regalloc2::{Allocation, PReg, RegClass, SpillSlot}; +use std::collections::{HashMap, HashSet}; #[derive(Clone, Debug)] struct TestCase { moves: Vec<(Allocation, Allocation)>, + available_pregs: Vec, } impl Arbitrary for TestCase { fn arbitrary(u: &mut Unstructured) -> Result { - let mut ret = TestCase { moves: vec![] }; + let mut ret = TestCase { + moves: vec![], + available_pregs: vec![], + }; let mut written = HashSet::new(); + // An arbitrary sequence of moves between registers 0 to 29 + // inclusive. while bool::arbitrary(u)? { - let reg1 = u.int_in_range(0..=30)?; - let reg2 = u.int_in_range(0..=30)?; - if written.contains(®2) { + let src = if bool::arbitrary(u)? { + let reg = u.int_in_range(0..=29)?; + Allocation::reg(PReg::new(reg, RegClass::Int)) + } else { + let slot = u.int_in_range(0..=31)?; + Allocation::stack(SpillSlot::new(slot, RegClass::Int)) + }; + let dst = if bool::arbitrary(u)? { + let reg = u.int_in_range(0..=29)?; + Allocation::reg(PReg::new(reg, RegClass::Int)) + } else { + let slot = u.int_in_range(0..=31)?; + Allocation::stack(SpillSlot::new(slot, RegClass::Int)) + }; + + // Stop if we are going to write a reg more than once: + // that creates an invalid parallel move set. + if written.contains(&dst) { break; } - written.insert(reg2); - ret.moves.push(( - Allocation::reg(PReg::new(reg1, RegClass::Int)), - Allocation::reg(PReg::new(reg2, RegClass::Int)), - )); + written.insert(dst); + + ret.moves.push((src, dst)); + } + + // We might have some unallocated registers free for scratch + // space... + for i in u.int_in_range(0..=2) { + let reg = PReg::new(30 + i, RegClass::Int); + ret.available_pregs.push(Allocation::reg(reg)); } Ok(ret) } @@ -38,44 +64,64 @@ impl Arbitrary for TestCase { fuzz_target!(|testcase: TestCase| { let _ = env_logger::try_init(); - let scratch = Allocation::reg(PReg::new(31, RegClass::Int)); - let mut par = ParallelMoves::new(scratch); + let mut par = ParallelMoves::new(); for &(src, dst) in &testcase.moves { par.add(src, dst, ()); } + let moves = par.resolve(); + log::trace!("raw resolved moves: {:?}", moves); + + // Resolve uses of scratch reg and stack-to-stack moves with the + // scratch resolver. + let mut avail = testcase.available_pregs.clone(); + let get_reg = || avail.pop(); + let mut next_slot = 32; + let get_stackslot = || { + let slot = next_slot; + next_slot += 1; + Allocation::stack(SpillSlot::new(slot, RegClass::Int)) + }; + let preferred_victim = PReg::new(0, RegClass::Int); + let scratch_resolver = MoveAndScratchResolver::new(get_reg, get_stackslot, preferred_victim); + let moves = scratch_resolver.compute(moves); + log::trace!("resolved moves: {:?}", moves); // Compute the final source reg for each dest reg in the original // parallel-move set. - let mut final_src_per_dest: Vec> = vec![None; 32]; + let mut final_src_per_dest: HashMap = HashMap::new(); for &(src, dst) in &testcase.moves { - if let (Some(preg_src), Some(preg_dst)) = (src.as_reg(), dst.as_reg()) { - final_src_per_dest[preg_dst.hw_enc()] = Some(preg_src.hw_enc()); - } + final_src_per_dest.insert(dst, src); } + log::trace!("expected final state: {:?}", final_src_per_dest); // Simulate the sequence of moves. - let mut regfile: Vec> = vec![None; 32]; - for i in 0..32 { - regfile[i] = Some(i); - } + let mut locations: HashMap = HashMap::new(); for (src, dst, _) in moves { - if let (Some(preg_src), Some(preg_dst)) = (src.as_reg(), dst.as_reg()) { - let data = regfile[preg_src.hw_enc()]; - regfile[preg_dst.hw_enc()] = data; - } else { - panic!("Bad allocation in move list"); + if src.is_stack() && dst.is_stack() { + panic!("Stack-to-stack move!"); } + + let data = locations.get(&src).cloned().unwrap_or(src); + locations.insert(dst, data); } + log::trace!("simulated final state: {:?}", locations); // Assert that the expected register-moves occurred. - // N.B.: range up to 31 (not 32) to skip scratch register. - for i in 0..31 { - if let Some(orig_src) = final_src_per_dest[i] { - assert_eq!(regfile[i], Some(orig_src)); + for (reg, data) in locations { + if let Some(&expected_data) = final_src_per_dest.get(®) { + assert_eq!(expected_data, data); } else { - // Should be untouched. - assert_eq!(regfile[i], Some(i)); + if data != reg { + // If not just the original value, then this location + // has been modified, but it was not part of the + // original parallel move. It must have been an + // available preg or a scratch stackslot. + assert!( + testcase.available_pregs.contains(®) + || (reg.is_stack() && reg.as_stack().unwrap().index() >= 32) + ); + } } } }); diff --git a/src/fuzzing/func.rs b/src/fuzzing/func.rs index b0d7ac7..e924c3d 100644 --- a/src/fuzzing/func.rs +++ b/src/fuzzing/func.rs @@ -625,18 +625,15 @@ impl std::fmt::Debug for Func { } pub fn machine_env() -> MachineEnv { - // Reg 63 is the scratch reg. fn regs(r: std::ops::Range) -> Vec { r.map(|i| PReg::new(i, RegClass::Int)).collect() } let preferred_regs_by_class: [Vec; 2] = [regs(0..24), vec![]]; let non_preferred_regs_by_class: [Vec; 2] = [regs(24..32), vec![]]; - let scratch_by_class: [PReg; 2] = [PReg::new(63, RegClass::Int), PReg::new(0, RegClass::Float)]; - let fixed_stack_slots = regs(32..63); + let fixed_stack_slots = regs(32..64); MachineEnv { preferred_regs_by_class, non_preferred_regs_by_class, - scratch_by_class, fixed_stack_slots, } } diff --git a/src/ion/data_structures.rs b/src/ion/data_structures.rs index 89f7350..394c6f2 100644 --- a/src/ion/data_structures.rs +++ b/src/ion/data_structures.rs @@ -351,7 +351,8 @@ pub struct Env<'a, F: Function> { pub spillslots: Vec, pub slots_by_size: Vec, - pub extra_spillslot: Vec>, + pub extra_spillslots_by_class: [SmallVec<[Allocation; 2]>; 2], + pub preferred_victim_by_class: [PReg; 2], // Program moves: these are moves in the provided program that we // handle with our internal machinery, in order to avoid the diff --git a/src/ion/liveranges.rs b/src/ion/liveranges.rs index af99393..ae1b14e 100644 --- a/src/ion/liveranges.rs +++ b/src/ion/liveranges.rs @@ -109,6 +109,13 @@ impl<'a, F: Function> Env<'a, F> { for &preg in &self.env.fixed_stack_slots { self.pregs[preg.index()].is_stack = true; } + for class in 0..self.preferred_victim_by_class.len() { + self.preferred_victim_by_class[class] = self.env.non_preferred_regs_by_class[class] + .last() + .or(self.env.preferred_regs_by_class[class].last()) + .cloned() + .unwrap_or(PReg::invalid()); + } // Create VRegs from the vreg count. for idx in 0..self.func.num_vregs() { // We'll fill in the real details when we see the def. diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 8b8ceb3..e7b7028 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -31,6 +31,7 @@ use liveranges::*; pub(crate) mod merge; pub(crate) mod process; use process::*; +use smallvec::smallvec; pub(crate) mod dump; pub(crate) mod moves; pub(crate) mod spill; @@ -66,7 +67,8 @@ impl<'a, F: Function> Env<'a, F> { slots_by_size: vec![], allocated_bundle_count: 0, - extra_spillslot: vec![None, None], + extra_spillslots_by_class: [smallvec![], smallvec![]], + preferred_victim_by_class: [PReg::invalid(), PReg::invalid()], prog_move_srcs: Vec::with_capacity(n / 2), prog_move_dsts: Vec::with_capacity(n / 2), diff --git a/src/ion/moves.rs b/src/ion/moves.rs index 9002997..15ceab4 100644 --- a/src/ion/moves.rs +++ b/src/ion/moves.rs @@ -16,12 +16,16 @@ use super::{ Env, InsertMovePrio, InsertedMove, LiveRangeFlag, LiveRangeIndex, RedundantMoveEliminator, VRegIndex, SLOT_NONE, }; -use crate::ion::data_structures::{BlockparamIn, BlockparamOut, CodeRange, PosWithPrio}; -use crate::moves::ParallelMoves; +use crate::ion::data_structures::{ + BlockparamIn, BlockparamOut, CodeRange, LiveRangeKey, PosWithPrio, +}; +use crate::ion::reg_traversal::RegTraversalIter; +use crate::moves::{MoveAndScratchResolver, ParallelMoves}; use crate::{ Allocation, Block, Edit, Function, Inst, InstPosition, OperandConstraint, OperandKind, - OperandPos, PReg, ProgPoint, RegClass, VReg, + OperandPos, PReg, ProgPoint, RegClass, SpillSlot, VReg, }; +use fxhash::FxHashMap; use smallvec::{smallvec, SmallVec}; use std::fmt::Debug; @@ -965,8 +969,7 @@ impl<'a, F: Function> Env<'a, F> { // have two separate ParallelMove instances. They need to // be separate because moves between the two classes are // impossible. (We could enhance ParallelMoves to - // understand register classes and take multiple scratch - // regs, but this seems simpler.) + // understand register classes, but this seems simpler.) let mut int_moves: SmallVec<[InsertedMove; 8]> = smallvec![]; let mut float_moves: SmallVec<[InsertedMove; 8]> = smallvec![]; @@ -993,8 +996,7 @@ impl<'a, F: Function> Env<'a, F> { // All moves in `moves` semantically happen in // parallel. Let's resolve these to a sequence of moves // that can be done one at a time. - let scratch = self.env.scratch_by_class[regclass as u8 as usize]; - let mut parallel_moves = ParallelMoves::new(Allocation::reg(scratch)); + let mut parallel_moves = ParallelMoves::new(); trace!( "parallel moves at pos {:?} prio {:?}", pos_prio.pos, @@ -1008,59 +1010,79 @@ impl<'a, F: Function> Env<'a, F> { } let resolved = parallel_moves.resolve(); - - // If (i) the scratch register is used, and (ii) a - // stack-to-stack move exists, then we need to - // allocate an additional scratch spillslot to which - // we can temporarily spill the scratch reg when we - // lower the stack-to-stack move to a - // stack-to-scratch-to-stack sequence. - let scratch_used = resolved.iter().any(|&(src, dst, _)| { - src == Allocation::reg(scratch) || dst == Allocation::reg(scratch) + let mut scratch_iter = RegTraversalIter::new( + self.env, + regclass, + PReg::invalid(), + PReg::invalid(), + 0, + None, + ); + let key = LiveRangeKey::from_range(&CodeRange { + from: pos_prio.pos, + to: pos_prio.pos.next(), }); - let stack_stack_move = resolved.iter().any(|&(src, dst, _)| { - self.allocation_is_stack(src) && self.allocation_is_stack(dst) - }); - let extra_slot = if scratch_used && stack_stack_move { - if self.extra_spillslot[regclass as u8 as usize].is_none() { - let slot = self.allocate_spillslot(regclass); - self.extra_spillslot[regclass as u8 as usize] = Some(slot); + let get_reg = || { + while let Some(preg) = scratch_iter.next() { + if !self.pregs[preg.index()] + .allocations + .btree + .contains_key(&key) + { + let alloc = Allocation::reg(preg); + if moves + .iter() + .any(|m| m.from_alloc == alloc || m.to_alloc == alloc) + { + // Skip pregs used by moves in this + // parallel move set, even if not + // marked used at progpoint: edge move + // liveranges meet but don't overlap + // so otherwise we may incorrectly + // overwrite a source reg. + continue; + } + return Some(alloc); + } } - self.extra_spillslot[regclass as u8 as usize] - } else { None }; + let mut stackslot_idx = 0; + let get_stackslot = || { + let idx = stackslot_idx; + stackslot_idx += 1; + // We can't borrow `self` as mutable, so we create + // these placeholders then allocate the actual + // slots if needed with `self.allocate_spillslot` + // below. + Allocation::stack(SpillSlot::new(SpillSlot::MAX - idx, regclass)) + }; + let preferred_victim = self.preferred_victim_by_class[regclass as usize]; + + let scratch_resolver = + MoveAndScratchResolver::new(get_reg, get_stackslot, preferred_victim); + + let resolved = scratch_resolver.compute(resolved); + + let mut rewrites = FxHashMap::default(); + for i in 0..stackslot_idx { + if i >= self.extra_spillslots_by_class[regclass as usize].len() { + let slot = self.allocate_spillslot(regclass); + self.extra_spillslots_by_class[regclass as usize].push(slot); + } + rewrites.insert( + Allocation::stack(SpillSlot::new(SpillSlot::MAX - i, regclass)), + self.extra_spillslots_by_class[regclass as usize][i], + ); + } - let mut scratch_used_yet = false; for (src, dst, to_vreg) in resolved { + let src = rewrites.get(&src).cloned().unwrap_or(src); + let dst = rewrites.get(&dst).cloned().unwrap_or(dst); trace!(" resolved: {} -> {} ({:?})", src, dst, to_vreg); let action = redundant_moves.process_move(src, dst, to_vreg); if !action.elide { - if dst == Allocation::reg(scratch) { - scratch_used_yet = true; - } - if self.allocation_is_stack(src) && self.allocation_is_stack(dst) { - if !scratch_used_yet { - self.add_move_edit(pos_prio, src, Allocation::reg(scratch)); - self.add_move_edit(pos_prio, Allocation::reg(scratch), dst); - } else { - debug_assert!(extra_slot.is_some()); - self.add_move_edit( - pos_prio, - Allocation::reg(scratch), - extra_slot.unwrap(), - ); - self.add_move_edit(pos_prio, src, Allocation::reg(scratch)); - self.add_move_edit(pos_prio, Allocation::reg(scratch), dst); - self.add_move_edit( - pos_prio, - extra_slot.unwrap(), - Allocation::reg(scratch), - ); - } - } else { - self.add_move_edit(pos_prio, src, dst); - } + self.add_move_edit(pos_prio, src, dst); } else { trace!(" -> redundant move elided"); } @@ -1081,7 +1103,7 @@ impl<'a, F: Function> Env<'a, F> { let &(pos_prio, ref edit) = &self.edits[i]; match edit { &Edit::Move { from, to } => { - self.annotate(pos_prio.pos, format!("move {} -> {})", from, to)); + self.annotate(pos_prio.pos, format!("move {} -> {}", from, to)); } } } diff --git a/src/lib.rs b/src/lib.rs index 5c778f2..95ca1e5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -248,10 +248,13 @@ pub struct SpillSlot { } impl SpillSlot { + /// The maximum spillslot index. + pub const MAX: usize = (1 << 24) - 1; + /// Create a new SpillSlot of a given class. #[inline(always)] pub fn new(slot: usize, class: RegClass) -> Self { - debug_assert!(slot < (1 << 24)); + debug_assert!(slot <= Self::MAX); SpillSlot { bits: (slot as u32) | (class as u8 as u32) << 24, } @@ -1250,25 +1253,11 @@ pub struct MachineEnv { /// but still better than spilling. pub non_preferred_regs_by_class: [Vec; 2], - /// One scratch register per class. This is needed to perform - /// moves between registers when cyclic move patterns occur. The - /// register should not be placed in either the preferred or - /// non-preferred list (i.e., it is not otherwise allocatable). - /// - /// Note that the register allocator will freely use this register - /// between instructions, but *within* the machine code generated - /// by a single (regalloc-level) instruction, the client is free - /// to use the scratch register. E.g., if one "instruction" causes - /// the emission of two machine-code instructions, this lowering - /// can use the scratch register between them. - pub scratch_by_class: [PReg; 2], - /// Some `PReg`s can be designated as locations on the stack rather than /// actual registers. These can be used to tell the register allocator about /// pre-defined stack slots used for function arguments and return values. /// - /// `PReg`s in this list cannot be used as a scratch register or as an - /// allocatable regsiter. + /// `PReg`s in this list cannot be used as an allocatable register. pub fixed_stack_slots: Vec, } diff --git a/src/moves.rs b/src/moves.rs index 3828f1b..26d5d55 100644 --- a/src/moves.rs +++ b/src/moves.rs @@ -3,11 +3,25 @@ * exception. See `LICENSE` for details. */ -use crate::{ion::data_structures::u64_key, Allocation}; +use crate::{ion::data_structures::u64_key, Allocation, PReg}; use smallvec::{smallvec, SmallVec}; +use std::fmt::Debug; +/// A list of moves to be performed in sequence, with auxiliary data +/// attached to each. pub type MoveVec = SmallVec<[(Allocation, Allocation, T); 16]>; +/// A list of moves to be performance in sequence, like a +/// `MoveVec`, except that an unchosen scratch space may occur as +/// well, represented by `Allocation::none()`. +#[derive(Clone, Debug)] +pub enum MoveVecWithScratch { + /// No scratch was actually used. + NoScratch(MoveVec), + /// A scratch space was used. + Scratch(MoveVec), +} + /// A `ParallelMoves` represents a list of alloc-to-alloc moves that /// must happen in parallel -- i.e., all reads of sources semantically /// happen before all writes of destinations, and destinations are @@ -16,14 +30,12 @@ pub type MoveVec = SmallVec<[(Allocation, Allocation, T); 16]>; /// using a scratch register if one is necessary. pub struct ParallelMoves { parallel_moves: MoveVec, - scratch: Allocation, } impl ParallelMoves { - pub fn new(scratch: Allocation) -> Self { + pub fn new() -> Self { Self { parallel_moves: smallvec![], - scratch, } } @@ -45,10 +57,22 @@ impl ParallelMoves { false } - pub fn resolve(mut self) -> MoveVec { + /// Resolve the parallel-moves problem to a sequence of separate + /// moves, such that the combined effect of the sequential moves + /// is as-if all of the moves added to this `ParallelMoves` + /// resolver happened in parallel. + /// + /// Sometimes, if there is a cycle, a scratch register is + /// necessary to allow the moves to occur sequentially. In this + /// case, `Allocation::none()` is returned to represent the + /// scratch register. The caller may choose to always hold a + /// separate scratch register unused to allow this to be trivially + /// rewritten; or may dynamically search for or create a free + /// register as needed, if none are available. + pub fn resolve(mut self) -> MoveVecWithScratch { // Easy case: zero or one move. Just return our vec. if self.parallel_moves.len() <= 1 { - return self.parallel_moves; + return MoveVecWithScratch::NoScratch(self.parallel_moves); } // Sort moves by source so that we can efficiently test for @@ -59,7 +83,7 @@ impl ParallelMoves { // Do any dests overlap sources? If not, we can also just // return the list. if !self.sources_overlap_dests() { - return self.parallel_moves; + return MoveVecWithScratch::NoScratch(self.parallel_moves); } // General case: some moves overwrite dests that other moves @@ -114,6 +138,7 @@ impl ParallelMoves { let mut stack: SmallVec<[usize; 16]> = smallvec![]; let mut visited: SmallVec<[bool; 16]> = smallvec![false; self.parallel_moves.len()]; let mut onstack: SmallVec<[bool; 16]> = smallvec![false; self.parallel_moves.len()]; + let mut scratch_used = false; stack.push(0); onstack[0] = true; @@ -182,7 +207,8 @@ impl ParallelMoves { let (mut src, dst, dst_t) = self.parallel_moves[move_idx]; if last_dst.is_none() { scratch_src = Some(src); - src = self.scratch; + src = Allocation::none(); + scratch_used = true; } else { debug_assert_eq!(last_dst.unwrap(), src); } @@ -195,13 +221,208 @@ impl ParallelMoves { } } if let Some(src) = scratch_src { - ret.push((src, self.scratch, T::default())); + ret.push((src, Allocation::none(), T::default())); } } } } ret.reverse(); - ret + + if scratch_used { + MoveVecWithScratch::Scratch(ret) + } else { + MoveVecWithScratch::NoScratch(ret) + } + } +} + +impl MoveVecWithScratch { + /// Fills in the scratch space, if needed, with the given + /// register/allocation and returns a final list of moves. The + /// scratch register must not occur anywhere in the parallel-move + /// problem given to the resolver that produced this + /// `MoveVecWithScratch`. + pub fn with_scratch(self, scratch: Allocation) -> MoveVec { + match self { + MoveVecWithScratch::NoScratch(moves) => moves, + MoveVecWithScratch::Scratch(mut moves) => { + for (src, dst, _) in &mut moves { + debug_assert!( + *src != scratch && *dst != scratch, + "Scratch register should not also be an actual source or dest of moves" + ); + debug_assert!( + !(src.is_none() && dst.is_none()), + "Move resolution should not have produced a scratch-to-scratch move" + ); + if src.is_none() { + *src = scratch; + } + if dst.is_none() { + *dst = scratch; + } + } + moves + } + } + } + + /// Unwrap without a scratch register. + pub fn without_scratch(self) -> Option> { + match self { + MoveVecWithScratch::NoScratch(moves) => Some(moves), + MoveVecWithScratch::Scratch(..) => None, + } + } + + /// Do we need a scratch register? + pub fn needs_scratch(&self) -> bool { + match self { + MoveVecWithScratch::NoScratch(..) => false, + MoveVecWithScratch::Scratch(..) => true, + } + } + + /// Do any moves go from stack to stack? + pub fn stack_to_stack(&self) -> bool { + match self { + MoveVecWithScratch::NoScratch(moves) | MoveVecWithScratch::Scratch(moves) => moves + .iter() + .any(|(src, dst, _)| src.is_stack() && dst.is_stack()), + } + } +} + +/// Final stage of move resolution: finding or using scratch +/// registers, creating them if necessary by using stackslots, and +/// ensuring that the final list of moves contains no stack-to-stack +/// moves. +/// +/// The resolved list of moves may need one or two scratch registers, +/// and maybe a stackslot, to ensure these conditions. Our general +/// strategy is in two steps. +/// +/// First, we find a scratch register, so we only have to worry about +/// a list of moves, all with real locations as src and dest. If we're +/// lucky and there are any registers not allocated at this +/// program-point, we can use a real register. Otherwise, we use an +/// extra stackslot. This is fine, because at this step, +/// stack-to-stack moves are OK. +/// +/// Then, we resolve stack-to-stack moves into stack-to-reg / +/// reg-to-stack pairs. For this, we try to allocate a second free +/// register. If unavailable, we create another scratch stackslot, and +/// we pick a "victim" register in the appropriate class, and we +/// resolve into: victim -> extra-stackslot; stack-src -> victim; +/// victim -> stack-dst; extra-stackslot -> victim. +/// +/// Sometimes move elision will be able to clean this up a bit. But, +/// for simplicity reasons, let's keep the concerns separated! So we +/// always do the full expansion above. +pub struct MoveAndScratchResolver +where + GetReg: FnMut() -> Option, + GetStackSlot: FnMut() -> Allocation, +{ + /// Scratch register for stack-to-stack move expansion. + stack_stack_scratch_reg: Option, + /// Stackslot into which we need to save the stack-to-stack + /// scratch reg before doing any stack-to-stack moves, if we stole + /// the reg. + stack_stack_scratch_reg_save: Option, + /// Closure that finds us a PReg at the current location. + find_free_reg: GetReg, + /// Closure that gets us a stackslot, if needed. + get_stackslot: GetStackSlot, + /// The victim PReg to evict to another stackslot at every + /// stack-to-stack move if a free PReg is not otherwise + /// available. Provided by caller and statically chosen. This is a + /// very last-ditch option, so static choice is OK. + victim: PReg, +} + +impl MoveAndScratchResolver +where + GetReg: FnMut() -> Option, + GetStackSlot: FnMut() -> Allocation, +{ + pub fn new(find_free_reg: GetReg, get_stackslot: GetStackSlot, victim: PReg) -> Self { + Self { + stack_stack_scratch_reg: None, + stack_stack_scratch_reg_save: None, + find_free_reg, + get_stackslot, + victim, + } + } + + pub fn compute(mut self, moves: MoveVecWithScratch) -> MoveVec { + // First, do we have a vec with no stack-to-stack moves or use + // of a scratch register? Fast return if so. + if !moves.needs_scratch() && !moves.stack_to_stack() { + return moves.without_scratch().unwrap(); + } + + let mut result = smallvec![]; + + // Now, find a scratch allocation in order to resolve cycles. + let scratch = (self.find_free_reg)().unwrap_or_else(|| (self.get_stackslot)()); + log::trace!("scratch resolver: scratch alloc {:?}", scratch); + + let moves = moves.with_scratch(scratch); + for &(src, dst, data) in &moves { + // Do we have a stack-to-stack move? If so, resolve. + if src.is_stack() && dst.is_stack() { + log::trace!("scratch resolver: stack to stack: {:?} -> {:?}", src, dst); + // Lazily allocate a stack-to-stack scratch. + if self.stack_stack_scratch_reg.is_none() { + if let Some(reg) = (self.find_free_reg)() { + log::trace!( + "scratch resolver: have free stack-to-stack scratch preg: {:?}", + reg + ); + self.stack_stack_scratch_reg = Some(reg); + } else { + self.stack_stack_scratch_reg = Some(Allocation::reg(self.victim)); + self.stack_stack_scratch_reg_save = Some((self.get_stackslot)()); + log::trace!("scratch resolver: stack-to-stack using victim {:?} with save stackslot {:?}", + self.stack_stack_scratch_reg, + self.stack_stack_scratch_reg_save); + } + } + + // If we have a "victimless scratch", then do a + // stack-to-scratch / scratch-to-stack sequence. + if self.stack_stack_scratch_reg_save.is_none() { + result.push((src, self.stack_stack_scratch_reg.unwrap(), data)); + result.push((self.stack_stack_scratch_reg.unwrap(), dst, data)); + } + // Otherwise, save the current value in the + // stack-to-stack scratch reg (which is our victim) to + // the extra stackslot, then do the stack-to-scratch / + // scratch-to-stack sequence, then restore it. + else { + result.push(( + self.stack_stack_scratch_reg.unwrap(), + self.stack_stack_scratch_reg_save.unwrap(), + data, + )); + result.push((src, self.stack_stack_scratch_reg.unwrap(), data)); + result.push((self.stack_stack_scratch_reg.unwrap(), dst, data)); + result.push(( + self.stack_stack_scratch_reg_save.unwrap(), + self.stack_stack_scratch_reg.unwrap(), + data, + )); + } + } else { + // Normal move. + result.push((src, dst, data)); + } + } + + log::trace!("scratch resolver: got {:?}", result); + result } }