Avoid stack-to-stack moves by allocating an extra spillslot and re-using the scratch reg instead.
This commit is contained in:
@@ -146,7 +146,7 @@ impl Function for Func {
|
|||||||
self.num_vregs
|
self.num_vregs
|
||||||
}
|
}
|
||||||
|
|
||||||
fn spillslot_size(&self, regclass: RegClass, _: VReg) -> usize {
|
fn spillslot_size(&self, regclass: RegClass) -> usize {
|
||||||
match regclass {
|
match regclass {
|
||||||
RegClass::Int => 1,
|
RegClass::Int => 1,
|
||||||
RegClass::Float => 2,
|
RegClass::Float => 2,
|
||||||
|
|||||||
125
src/ion/mod.rs
125
src/ion/mod.rs
@@ -351,6 +351,8 @@ struct Env<'a, F: Function> {
|
|||||||
spillslots: Vec<SpillSlotData>,
|
spillslots: Vec<SpillSlotData>,
|
||||||
slots_by_size: Vec<SpillSlotList>,
|
slots_by_size: Vec<SpillSlotList>,
|
||||||
|
|
||||||
|
extra_spillslot: Vec<Option<Allocation>>,
|
||||||
|
|
||||||
// Program moves: these are moves in the provided program that we
|
// Program moves: these are moves in the provided program that we
|
||||||
// handle with our internal machinery, in order to avoid the
|
// handle with our internal machinery, in order to avoid the
|
||||||
// overhead of ordinary operand processing. We expect the client
|
// overhead of ordinary operand processing. We expect the client
|
||||||
@@ -399,7 +401,6 @@ struct Env<'a, F: Function> {
|
|||||||
struct SpillSlotData {
|
struct SpillSlotData {
|
||||||
ranges: LiveRangeSet,
|
ranges: LiveRangeSet,
|
||||||
class: RegClass,
|
class: RegClass,
|
||||||
size: u32,
|
|
||||||
alloc: Allocation,
|
alloc: Allocation,
|
||||||
next_spillslot: SpillSlotIndex,
|
next_spillslot: SpillSlotIndex,
|
||||||
}
|
}
|
||||||
@@ -969,6 +970,8 @@ impl<'a, F: Function> Env<'a, F> {
|
|||||||
spillslots: vec![],
|
spillslots: vec![],
|
||||||
slots_by_size: vec![],
|
slots_by_size: vec![],
|
||||||
|
|
||||||
|
extra_spillslot: vec![None, None],
|
||||||
|
|
||||||
prog_move_srcs: Vec::with_capacity(n / 2),
|
prog_move_srcs: Vec::with_capacity(n / 2),
|
||||||
prog_move_dsts: Vec::with_capacity(n / 2),
|
prog_move_dsts: Vec::with_capacity(n / 2),
|
||||||
prog_move_merges: Vec::with_capacity(n / 2),
|
prog_move_merges: Vec::with_capacity(n / 2),
|
||||||
@@ -2402,7 +2405,7 @@ impl<'a, F: Function> Env<'a, F> {
|
|||||||
// Create a spillslot for this bundle.
|
// Create a spillslot for this bundle.
|
||||||
let ssidx = SpillSetIndex::new(self.spillsets.len());
|
let ssidx = SpillSetIndex::new(self.spillsets.len());
|
||||||
let reg = self.vreg_regs[vreg.index()];
|
let reg = self.vreg_regs[vreg.index()];
|
||||||
let size = self.func.spillslot_size(reg.class(), reg) as u8;
|
let size = self.func.spillslot_size(reg.class()) as u8;
|
||||||
self.spillsets.push(SpillSet {
|
self.spillsets.push(SpillSet {
|
||||||
vregs: smallvec![vreg],
|
vregs: smallvec![vreg],
|
||||||
slot: SpillSlotIndex::invalid(),
|
slot: SpillSlotIndex::invalid(),
|
||||||
@@ -3791,7 +3794,6 @@ impl<'a, F: Function> Env<'a, F> {
|
|||||||
self.spillslots.push(SpillSlotData {
|
self.spillslots.push(SpillSlotData {
|
||||||
ranges: LiveRangeSet::new(),
|
ranges: LiveRangeSet::new(),
|
||||||
next_spillslot: next,
|
next_spillslot: next,
|
||||||
size: size as u32,
|
|
||||||
alloc: Allocation::none(),
|
alloc: Allocation::none(),
|
||||||
class: self.spillsets[spillset.index()].class,
|
class: self.spillsets[spillset.index()].class,
|
||||||
});
|
});
|
||||||
@@ -3805,22 +3807,27 @@ impl<'a, F: Function> Env<'a, F> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Assign actual slot indices to spillslots.
|
// Assign actual slot indices to spillslots.
|
||||||
let mut offset: u32 = 0;
|
for i in 0..self.spillslots.len() {
|
||||||
for data in &mut self.spillslots {
|
self.spillslots[i].alloc = self.allocate_spillslot(self.spillslots[i].class);
|
||||||
|
}
|
||||||
|
|
||||||
|
log::debug!("spillslot allocator done");
|
||||||
|
}
|
||||||
|
|
||||||
|
fn allocate_spillslot(&mut self, class: RegClass) -> Allocation {
|
||||||
|
let size = self.func.spillslot_size(class) as u32;
|
||||||
|
let mut offset = self.num_spillslots;
|
||||||
// Align up to `size`.
|
// Align up to `size`.
|
||||||
debug_assert!(data.size.is_power_of_two());
|
debug_assert!(size.is_power_of_two());
|
||||||
offset = (offset + data.size - 1) & !(data.size - 1);
|
offset = (offset + size - 1) & !(size - 1);
|
||||||
let slot = if self.func.multi_spillslot_named_by_last_slot() {
|
let slot = if self.func.multi_spillslot_named_by_last_slot() {
|
||||||
offset + data.size - 1
|
offset + size - 1
|
||||||
} else {
|
} else {
|
||||||
offset
|
offset
|
||||||
};
|
};
|
||||||
data.alloc = Allocation::stack(SpillSlot::new(slot as usize, data.class));
|
offset += size;
|
||||||
offset += data.size;
|
|
||||||
}
|
|
||||||
self.num_spillslots = offset;
|
self.num_spillslots = offset;
|
||||||
|
Allocation::stack(SpillSlot::new(slot as usize, class))
|
||||||
log::debug!("spillslot allocator done");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_start_of_block(&self, pos: ProgPoint) -> bool {
|
fn is_start_of_block(&self, pos: ProgPoint) -> bool {
|
||||||
@@ -4740,9 +4747,8 @@ impl<'a, F: Function> Env<'a, F> {
|
|||||||
// All moves in `moves` semantically happen in
|
// All moves in `moves` semantically happen in
|
||||||
// parallel. Let's resolve these to a sequence of moves
|
// parallel. Let's resolve these to a sequence of moves
|
||||||
// that can be done one at a time.
|
// that can be done one at a time.
|
||||||
let mut parallel_moves = ParallelMoves::new(Allocation::reg(
|
let scratch = self.env.scratch_by_class[regclass as u8 as usize];
|
||||||
self.env.scratch_by_class[regclass as u8 as usize],
|
let mut parallel_moves = ParallelMoves::new(Allocation::reg(scratch));
|
||||||
));
|
|
||||||
log::debug!("parallel moves at pos {:?} prio {:?}", pos, prio);
|
log::debug!("parallel moves at pos {:?} prio {:?}", pos, prio);
|
||||||
for m in moves {
|
for m in moves {
|
||||||
if (m.from_alloc != m.to_alloc) || m.to_vreg.is_some() {
|
if (m.from_alloc != m.to_alloc) || m.to_vreg.is_some() {
|
||||||
@@ -4753,10 +4759,96 @@ impl<'a, F: Function> Env<'a, F> {
|
|||||||
|
|
||||||
let resolved = parallel_moves.resolve();
|
let resolved = parallel_moves.resolve();
|
||||||
|
|
||||||
|
// If (i) the scratch register is used, and (ii) a
|
||||||
|
// stack-to-stack move exists, then we need to
|
||||||
|
// allocate an additional scratch spillslot to which
|
||||||
|
// we can temporarily spill the scratch reg when we
|
||||||
|
// lower the stack-to-stack move to a
|
||||||
|
// stack-to-scratch-to-stack sequence.
|
||||||
|
let scratch_used = resolved.iter().any(|&(src, dst, _)| {
|
||||||
|
src == Allocation::reg(scratch) || dst == Allocation::reg(scratch)
|
||||||
|
});
|
||||||
|
let stack_stack_move = resolved
|
||||||
|
.iter()
|
||||||
|
.any(|&(src, dst, _)| src.is_stack() && dst.is_stack());
|
||||||
|
let extra_slot = if scratch_used && stack_stack_move {
|
||||||
|
if self.extra_spillslot[regclass as u8 as usize].is_none() {
|
||||||
|
let slot = self.allocate_spillslot(regclass);
|
||||||
|
self.extra_spillslot[regclass as u8 as usize] = Some(slot);
|
||||||
|
}
|
||||||
|
self.extra_spillslot[regclass as u8 as usize]
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut scratch_used_yet = false;
|
||||||
for (src, dst, to_vreg) in resolved {
|
for (src, dst, to_vreg) in resolved {
|
||||||
log::debug!(" resolved: {} -> {} ({:?})", src, dst, to_vreg);
|
log::debug!(" resolved: {} -> {} ({:?})", src, dst, to_vreg);
|
||||||
let action = redundant_moves.process_move(src, dst, to_vreg);
|
let action = redundant_moves.process_move(src, dst, to_vreg);
|
||||||
if !action.elide {
|
if !action.elide {
|
||||||
|
if dst == Allocation::reg(scratch) {
|
||||||
|
scratch_used_yet = true;
|
||||||
|
}
|
||||||
|
if src.is_stack() && dst.is_stack() {
|
||||||
|
if !scratch_used_yet {
|
||||||
|
self.add_edit(
|
||||||
|
pos,
|
||||||
|
prio,
|
||||||
|
Edit::Move {
|
||||||
|
from: src,
|
||||||
|
to: Allocation::reg(scratch),
|
||||||
|
to_vreg,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
self.add_edit(
|
||||||
|
pos,
|
||||||
|
prio,
|
||||||
|
Edit::Move {
|
||||||
|
from: Allocation::reg(scratch),
|
||||||
|
to: dst,
|
||||||
|
to_vreg,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
assert!(extra_slot.is_some());
|
||||||
|
self.add_edit(
|
||||||
|
pos,
|
||||||
|
prio,
|
||||||
|
Edit::Move {
|
||||||
|
from: Allocation::reg(scratch),
|
||||||
|
to: extra_slot.unwrap(),
|
||||||
|
to_vreg: None,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
self.add_edit(
|
||||||
|
pos,
|
||||||
|
prio,
|
||||||
|
Edit::Move {
|
||||||
|
from: src,
|
||||||
|
to: Allocation::reg(scratch),
|
||||||
|
to_vreg,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
self.add_edit(
|
||||||
|
pos,
|
||||||
|
prio,
|
||||||
|
Edit::Move {
|
||||||
|
from: Allocation::reg(scratch),
|
||||||
|
to: dst,
|
||||||
|
to_vreg,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
self.add_edit(
|
||||||
|
pos,
|
||||||
|
prio,
|
||||||
|
Edit::Move {
|
||||||
|
from: extra_slot.unwrap(),
|
||||||
|
to: Allocation::reg(scratch),
|
||||||
|
to_vreg: None,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
self.add_edit(
|
self.add_edit(
|
||||||
pos,
|
pos,
|
||||||
prio,
|
prio,
|
||||||
@@ -4766,6 +4858,7 @@ impl<'a, F: Function> Env<'a, F> {
|
|||||||
to_vreg,
|
to_vreg,
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
log::debug!(" -> redundant move elided");
|
log::debug!(" -> redundant move elided");
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -797,16 +797,9 @@ pub trait Function {
|
|||||||
/// 128-bit vector value will require two slots. The regalloc will always
|
/// 128-bit vector value will require two slots. The regalloc will always
|
||||||
/// align on this size.
|
/// align on this size.
|
||||||
///
|
///
|
||||||
/// This passes the associated virtual register to the client as well,
|
|
||||||
/// because the way in which we spill a real register may depend on the
|
|
||||||
/// value that we are using it for. E.g., if a machine has V128 registers
|
|
||||||
/// but we also use them for F32 and F64 values, we may use a different
|
|
||||||
/// store-slot size and smaller-operand store/load instructions for an F64
|
|
||||||
/// than for a true V128.
|
|
||||||
///
|
|
||||||
/// (This trait method's design and doc text derives from
|
/// (This trait method's design and doc text derives from
|
||||||
/// regalloc.rs' trait of the same name.)
|
/// regalloc.rs' trait of the same name.)
|
||||||
fn spillslot_size(&self, regclass: RegClass, for_vreg: VReg) -> usize;
|
fn spillslot_size(&self, regclass: RegClass) -> usize;
|
||||||
|
|
||||||
/// When providing a spillslot number for a multi-slot spillslot,
|
/// When providing a spillslot number for a multi-slot spillslot,
|
||||||
/// do we provide the first or the last? This is usually related
|
/// do we provide the first or the last? This is usually related
|
||||||
|
|||||||
Reference in New Issue
Block a user