diff --git a/src/fuzzing/func.rs b/src/fuzzing/func.rs index 258c0b1..d89df60 100644 --- a/src/fuzzing/func.rs +++ b/src/fuzzing/func.rs @@ -146,7 +146,7 @@ impl Function for Func { self.num_vregs } - fn spillslot_size(&self, regclass: RegClass, _: VReg) -> usize { + fn spillslot_size(&self, regclass: RegClass) -> usize { match regclass { RegClass::Int => 1, RegClass::Float => 2, diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 7325aba..33c584d 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -351,6 +351,8 @@ struct Env<'a, F: Function> { spillslots: Vec, slots_by_size: Vec, + extra_spillslot: Vec>, + // Program moves: these are moves in the provided program that we // handle with our internal machinery, in order to avoid the // overhead of ordinary operand processing. We expect the client @@ -399,7 +401,6 @@ struct Env<'a, F: Function> { struct SpillSlotData { ranges: LiveRangeSet, class: RegClass, - size: u32, alloc: Allocation, next_spillslot: SpillSlotIndex, } @@ -969,6 +970,8 @@ impl<'a, F: Function> Env<'a, F> { spillslots: vec![], slots_by_size: vec![], + extra_spillslot: vec![None, None], + prog_move_srcs: Vec::with_capacity(n / 2), prog_move_dsts: Vec::with_capacity(n / 2), prog_move_merges: Vec::with_capacity(n / 2), @@ -2402,7 +2405,7 @@ impl<'a, F: Function> Env<'a, F> { // Create a spillslot for this bundle. let ssidx = SpillSetIndex::new(self.spillsets.len()); let reg = self.vreg_regs[vreg.index()]; - let size = self.func.spillslot_size(reg.class(), reg) as u8; + let size = self.func.spillslot_size(reg.class()) as u8; self.spillsets.push(SpillSet { vregs: smallvec![vreg], slot: SpillSlotIndex::invalid(), @@ -3791,7 +3794,6 @@ impl<'a, F: Function> Env<'a, F> { self.spillslots.push(SpillSlotData { ranges: LiveRangeSet::new(), next_spillslot: next, - size: size as u32, alloc: Allocation::none(), class: self.spillsets[spillset.index()].class, }); @@ -3805,24 +3807,29 @@ impl<'a, F: Function> Env<'a, F> { } // Assign actual slot indices to spillslots. - let mut offset: u32 = 0; - for data in &mut self.spillslots { - // Align up to `size`. - debug_assert!(data.size.is_power_of_two()); - offset = (offset + data.size - 1) & !(data.size - 1); - let slot = if self.func.multi_spillslot_named_by_last_slot() { - offset + data.size - 1 - } else { - offset - }; - data.alloc = Allocation::stack(SpillSlot::new(slot as usize, data.class)); - offset += data.size; + for i in 0..self.spillslots.len() { + self.spillslots[i].alloc = self.allocate_spillslot(self.spillslots[i].class); } - self.num_spillslots = offset; log::debug!("spillslot allocator done"); } + fn allocate_spillslot(&mut self, class: RegClass) -> Allocation { + let size = self.func.spillslot_size(class) as u32; + let mut offset = self.num_spillslots; + // Align up to `size`. + debug_assert!(size.is_power_of_two()); + offset = (offset + size - 1) & !(size - 1); + let slot = if self.func.multi_spillslot_named_by_last_slot() { + offset + size - 1 + } else { + offset + }; + offset += size; + self.num_spillslots = offset; + Allocation::stack(SpillSlot::new(slot as usize, class)) + } + fn is_start_of_block(&self, pos: ProgPoint) -> bool { let block = self.cfginfo.insn_block[pos.inst().index()]; pos == self.cfginfo.block_entry[block.index()] @@ -4740,9 +4747,8 @@ impl<'a, F: Function> Env<'a, F> { // All moves in `moves` semantically happen in // parallel. Let's resolve these to a sequence of moves // that can be done one at a time. - let mut parallel_moves = ParallelMoves::new(Allocation::reg( - self.env.scratch_by_class[regclass as u8 as usize], - )); + let scratch = self.env.scratch_by_class[regclass as u8 as usize]; + let mut parallel_moves = ParallelMoves::new(Allocation::reg(scratch)); log::debug!("parallel moves at pos {:?} prio {:?}", pos, prio); for m in moves { if (m.from_alloc != m.to_alloc) || m.to_vreg.is_some() { @@ -4753,19 +4759,106 @@ impl<'a, F: Function> Env<'a, F> { let resolved = parallel_moves.resolve(); + // If (i) the scratch register is used, and (ii) a + // stack-to-stack move exists, then we need to + // allocate an additional scratch spillslot to which + // we can temporarily spill the scratch reg when we + // lower the stack-to-stack move to a + // stack-to-scratch-to-stack sequence. + let scratch_used = resolved.iter().any(|&(src, dst, _)| { + src == Allocation::reg(scratch) || dst == Allocation::reg(scratch) + }); + let stack_stack_move = resolved + .iter() + .any(|&(src, dst, _)| src.is_stack() && dst.is_stack()); + let extra_slot = if scratch_used && stack_stack_move { + if self.extra_spillslot[regclass as u8 as usize].is_none() { + let slot = self.allocate_spillslot(regclass); + self.extra_spillslot[regclass as u8 as usize] = Some(slot); + } + self.extra_spillslot[regclass as u8 as usize] + } else { + None + }; + + let mut scratch_used_yet = false; for (src, dst, to_vreg) in resolved { log::debug!(" resolved: {} -> {} ({:?})", src, dst, to_vreg); let action = redundant_moves.process_move(src, dst, to_vreg); if !action.elide { - self.add_edit( - pos, - prio, - Edit::Move { - from: src, - to: dst, - to_vreg, - }, - ); + if dst == Allocation::reg(scratch) { + scratch_used_yet = true; + } + if src.is_stack() && dst.is_stack() { + if !scratch_used_yet { + self.add_edit( + pos, + prio, + Edit::Move { + from: src, + to: Allocation::reg(scratch), + to_vreg, + }, + ); + self.add_edit( + pos, + prio, + Edit::Move { + from: Allocation::reg(scratch), + to: dst, + to_vreg, + }, + ); + } else { + assert!(extra_slot.is_some()); + self.add_edit( + pos, + prio, + Edit::Move { + from: Allocation::reg(scratch), + to: extra_slot.unwrap(), + to_vreg: None, + }, + ); + self.add_edit( + pos, + prio, + Edit::Move { + from: src, + to: Allocation::reg(scratch), + to_vreg, + }, + ); + self.add_edit( + pos, + prio, + Edit::Move { + from: Allocation::reg(scratch), + to: dst, + to_vreg, + }, + ); + self.add_edit( + pos, + prio, + Edit::Move { + from: extra_slot.unwrap(), + to: Allocation::reg(scratch), + to_vreg: None, + }, + ); + } + } else { + self.add_edit( + pos, + prio, + Edit::Move { + from: src, + to: dst, + to_vreg, + }, + ); + } } else { log::debug!(" -> redundant move elided"); } diff --git a/src/lib.rs b/src/lib.rs index 7941ebe..7d55624 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -797,16 +797,9 @@ pub trait Function { /// 128-bit vector value will require two slots. The regalloc will always /// align on this size. /// - /// This passes the associated virtual register to the client as well, - /// because the way in which we spill a real register may depend on the - /// value that we are using it for. E.g., if a machine has V128 registers - /// but we also use them for F32 and F64 values, we may use a different - /// store-slot size and smaller-operand store/load instructions for an F64 - /// than for a true V128. - /// /// (This trait method's design and doc text derives from /// regalloc.rs' trait of the same name.) - fn spillslot_size(&self, regclass: RegClass, for_vreg: VReg) -> usize; + fn spillslot_size(&self, regclass: RegClass) -> usize; /// When providing a spillslot number for a multi-slot spillslot, /// do we provide the first or the last? This is usually related