Rearrange some struct fields to work better with u64_key/u128_key
This allows the compiler to load the whole key with 1 or 2 64-bit accesses, assuming little-endian ordering. Improves instruction count by ~1%.
This commit is contained in:
@@ -277,6 +277,56 @@ pub struct MultiFixedRegFixup {
|
||||
pub vreg: VRegIndex,
|
||||
}
|
||||
|
||||
/// The field order is significant: these are sorted so that a
|
||||
/// scan over vregs, then blocks in each range, can scan in
|
||||
/// order through this (sorted) list and add allocs to the
|
||||
/// half-move list.
|
||||
///
|
||||
/// The fields in this struct are reversed in sort order so that the entire
|
||||
/// struct can be treated as a u128 for sorting purposes.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
#[repr(C)]
|
||||
pub struct BlockparamOut {
|
||||
pub to_vreg: VRegIndex,
|
||||
pub to_block: Block,
|
||||
pub from_block: Block,
|
||||
pub from_vreg: VRegIndex,
|
||||
}
|
||||
impl BlockparamOut {
|
||||
#[inline(always)]
|
||||
pub fn key(&self) -> u128 {
|
||||
u128_key(
|
||||
self.from_vreg.raw_u32(),
|
||||
self.from_block.raw_u32(),
|
||||
self.to_block.raw_u32(),
|
||||
self.to_vreg.raw_u32(),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// As above for `BlockparamIn`, field order is significant.
|
||||
///
|
||||
/// The fields in this struct are reversed in sort order so that the entire
|
||||
/// struct can be treated as a u128 for sorting purposes.
|
||||
#[derive(Clone, Debug)]
|
||||
#[repr(C)]
|
||||
pub struct BlockparamIn {
|
||||
pub from_block: Block,
|
||||
pub to_block: Block,
|
||||
pub to_vreg: VRegIndex,
|
||||
}
|
||||
impl BlockparamIn {
|
||||
#[inline(always)]
|
||||
pub fn key(&self) -> u128 {
|
||||
u128_key(
|
||||
self.to_vreg.raw_u32(),
|
||||
self.to_block.raw_u32(),
|
||||
self.from_block.raw_u32(),
|
||||
0,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Env<'a, F: Function> {
|
||||
pub func: &'a F,
|
||||
@@ -284,16 +334,8 @@ pub struct Env<'a, F: Function> {
|
||||
pub cfginfo: CFGInfo,
|
||||
pub liveins: Vec<IndexSet>,
|
||||
pub liveouts: Vec<IndexSet>,
|
||||
/// Blockparam outputs: from-vreg, (end of) from-block, (start of)
|
||||
/// to-block, to-vreg. The field order is significant: these are sorted so
|
||||
/// that a scan over vregs, then blocks in each range, can scan in
|
||||
/// order through this (sorted) list and add allocs to the
|
||||
/// half-move list.
|
||||
pub blockparam_outs: Vec<(VRegIndex, Block, Block, VRegIndex)>,
|
||||
/// Blockparam inputs: to-vreg, (start of) to-block, (end of)
|
||||
/// from-block. As above for `blockparam_outs`, field order is
|
||||
/// significant.
|
||||
pub blockparam_ins: Vec<(VRegIndex, Block, Block)>,
|
||||
pub blockparam_outs: Vec<BlockparamOut>,
|
||||
pub blockparam_ins: Vec<BlockparamIn>,
|
||||
/// Blockparam allocs: block, idx, vreg, alloc. Info to describe
|
||||
/// blockparam locations at block entry, for metadata purposes
|
||||
/// (e.g. for the checker).
|
||||
@@ -343,7 +385,7 @@ pub struct Env<'a, F: Function> {
|
||||
pub inserted_moves: Vec<InsertedMove>,
|
||||
|
||||
// Output:
|
||||
pub edits: Vec<(u32, InsertMovePrio, Edit)>,
|
||||
pub edits: Vec<(PosWithPrio, Edit)>,
|
||||
pub allocs: Vec<Allocation>,
|
||||
pub inst_alloc_offsets: Vec<u32>,
|
||||
pub num_spillslots: u32,
|
||||
@@ -487,8 +529,7 @@ impl LiveRangeSet {
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct InsertedMove {
|
||||
pub pos: ProgPoint,
|
||||
pub prio: InsertMovePrio,
|
||||
pub pos_prio: PosWithPrio,
|
||||
pub from_alloc: Allocation,
|
||||
pub to_alloc: Allocation,
|
||||
pub to_vreg: Option<VReg>,
|
||||
@@ -505,6 +546,21 @@ pub enum InsertMovePrio {
|
||||
OutEdgeMoves,
|
||||
}
|
||||
|
||||
/// The fields in this struct are reversed in sort order so that the entire
|
||||
/// struct can be treated as a u64 for sorting purposes.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
#[repr(C)]
|
||||
pub struct PosWithPrio {
|
||||
pub prio: u32,
|
||||
pub pos: ProgPoint,
|
||||
}
|
||||
|
||||
impl PosWithPrio {
|
||||
pub fn key(self) -> u64 {
|
||||
u64_key(self.pos.to_index(), self.prio)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Default)]
|
||||
pub struct Stats {
|
||||
pub livein_blocks: usize,
|
||||
|
||||
@@ -18,7 +18,7 @@ use super::{
|
||||
SpillSetIndex, Use, VRegData, VRegIndex, SLOT_NONE,
|
||||
};
|
||||
use crate::indexset::IndexSet;
|
||||
use crate::ion::data_structures::{u128_key, MultiFixedRegFixup};
|
||||
use crate::ion::data_structures::{BlockparamIn, BlockparamOut, MultiFixedRegFixup};
|
||||
use crate::{
|
||||
Allocation, Block, Function, Inst, InstPosition, Operand, OperandConstraint, OperandKind,
|
||||
OperandPos, PReg, ProgPoint, RegAllocError, VReg,
|
||||
@@ -430,8 +430,12 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
{
|
||||
let blockparam_out = VRegIndex::new(blockparam_out.vreg());
|
||||
let blockparam_in = VRegIndex::new(blockparam_in.vreg());
|
||||
self.blockparam_outs
|
||||
.push((blockparam_out, block, succ, blockparam_in));
|
||||
self.blockparam_outs.push(BlockparamOut {
|
||||
to_vreg: blockparam_in,
|
||||
to_block: succ,
|
||||
from_block: block,
|
||||
from_vreg: blockparam_out,
|
||||
});
|
||||
|
||||
// Include outgoing blockparams in the initial live set.
|
||||
live.set(blockparam_out.index(), true);
|
||||
@@ -1050,7 +1054,11 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
// add `blockparam_ins` entries.
|
||||
let vreg_idx = VRegIndex::new(vreg.vreg());
|
||||
for &pred in self.func.block_preds(block) {
|
||||
self.blockparam_ins.push((vreg_idx, block, pred));
|
||||
self.blockparam_ins.push(BlockparamIn {
|
||||
to_vreg: vreg_idx,
|
||||
to_block: block,
|
||||
from_block: pred,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1141,24 +1149,8 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
}
|
||||
}
|
||||
|
||||
self.blockparam_ins
|
||||
.sort_unstable_by_key(|(to_vreg, to_block, from_block)| {
|
||||
u128_key(
|
||||
to_vreg.raw_u32(),
|
||||
to_block.raw_u32(),
|
||||
from_block.raw_u32(),
|
||||
0,
|
||||
)
|
||||
});
|
||||
self.blockparam_outs
|
||||
.sort_unstable_by_key(|(from_vreg, from_block, to_block, to_vreg)| {
|
||||
u128_key(
|
||||
from_vreg.raw_u32(),
|
||||
from_block.raw_u32(),
|
||||
to_block.raw_u32(),
|
||||
to_vreg.raw_u32(),
|
||||
)
|
||||
});
|
||||
self.blockparam_ins.sort_unstable_by_key(|x| x.key());
|
||||
self.blockparam_outs.sort_unstable_by_key(|x| x.key());
|
||||
self.prog_move_srcs.sort_unstable_by_key(|(pos, _)| *pos);
|
||||
self.prog_move_dsts.sort_unstable_by_key(|(pos, _)| *pos);
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@ use super::{
|
||||
Env, LiveBundleIndex, LiveRangeIndex, LiveRangeKey, SpillSet, SpillSetIndex, SpillSlotIndex,
|
||||
VRegIndex,
|
||||
};
|
||||
use crate::{Function, Inst, OperandConstraint, PReg};
|
||||
use crate::{ion::data_structures::BlockparamOut, Function, Inst, OperandConstraint, PReg};
|
||||
use smallvec::smallvec;
|
||||
|
||||
impl<'a, F: Function> Env<'a, F> {
|
||||
@@ -332,7 +332,9 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
|
||||
// Attempt to merge blockparams with their inputs.
|
||||
for i in 0..self.blockparam_outs.len() {
|
||||
let (from_vreg, _, _, to_vreg) = self.blockparam_outs[i];
|
||||
let BlockparamOut {
|
||||
from_vreg, to_vreg, ..
|
||||
} = self.blockparam_outs[i];
|
||||
log::trace!(
|
||||
"trying to merge blockparam v{} with input v{}",
|
||||
to_vreg.index(),
|
||||
|
||||
@@ -133,7 +133,7 @@ pub fn run<F: Function>(
|
||||
edits: env
|
||||
.edits
|
||||
.into_iter()
|
||||
.map(|(pos, _, edit)| (ProgPoint::from_index(pos), edit))
|
||||
.map(|(pos_prio, edit)| (pos_prio.pos, edit))
|
||||
.collect(),
|
||||
allocs: env.allocs,
|
||||
inst_alloc_offsets: env.inst_alloc_offsets,
|
||||
|
||||
101
src/ion/moves.rs
101
src/ion/moves.rs
@@ -17,7 +17,7 @@ use super::{
|
||||
VRegIndex, SLOT_NONE,
|
||||
};
|
||||
|
||||
use crate::ion::data_structures::u64_key;
|
||||
use crate::ion::data_structures::{BlockparamIn, BlockparamOut, PosWithPrio};
|
||||
use crate::moves::ParallelMoves;
|
||||
use crate::{
|
||||
Allocation, Block, Edit, Function, Inst, InstPosition, OperandConstraint, OperandKind,
|
||||
@@ -68,8 +68,10 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
_ => {}
|
||||
}
|
||||
self.inserted_moves.push(InsertedMove {
|
||||
pos,
|
||||
prio,
|
||||
pos_prio: PosWithPrio {
|
||||
pos,
|
||||
prio: prio as u32,
|
||||
},
|
||||
from_alloc,
|
||||
to_alloc,
|
||||
to_vreg,
|
||||
@@ -334,8 +336,12 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
blockparam_out_idx,
|
||||
);
|
||||
while blockparam_out_idx < self.blockparam_outs.len() {
|
||||
let (from_vreg, from_block, to_block, to_vreg) =
|
||||
self.blockparam_outs[blockparam_out_idx];
|
||||
let BlockparamOut {
|
||||
from_vreg,
|
||||
from_block,
|
||||
to_block,
|
||||
to_vreg,
|
||||
} = self.blockparam_outs[blockparam_out_idx];
|
||||
if (from_vreg, from_block) > (vreg, block) {
|
||||
break;
|
||||
}
|
||||
@@ -399,8 +405,11 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
blockparam_in_idx
|
||||
);
|
||||
while blockparam_in_idx < self.blockparam_ins.len() {
|
||||
let (to_vreg, to_block, from_block) =
|
||||
self.blockparam_ins[blockparam_in_idx];
|
||||
let BlockparamIn {
|
||||
from_block,
|
||||
to_block,
|
||||
to_vreg,
|
||||
} = self.blockparam_ins[blockparam_in_idx];
|
||||
if (to_vreg, to_block) > (vreg, block) {
|
||||
break;
|
||||
}
|
||||
@@ -855,7 +864,7 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
// resolve (see cases below).
|
||||
let mut i = 0;
|
||||
self.inserted_moves
|
||||
.sort_unstable_by_key(|m| u64_key(m.pos.to_index(), m.prio as u32));
|
||||
.sort_unstable_by_key(|m| m.pos_prio.key());
|
||||
|
||||
// Redundant-move elimination state tracker.
|
||||
let mut redundant_moves = RedundantMoveEliminator::default();
|
||||
@@ -912,18 +921,14 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
|
||||
while i < self.inserted_moves.len() {
|
||||
let start = i;
|
||||
let pos = self.inserted_moves[i].pos;
|
||||
let prio = self.inserted_moves[i].prio;
|
||||
while i < self.inserted_moves.len()
|
||||
&& self.inserted_moves[i].pos == pos
|
||||
&& self.inserted_moves[i].prio == prio
|
||||
{
|
||||
let pos_prio = self.inserted_moves[i].pos_prio;
|
||||
while i < self.inserted_moves.len() && self.inserted_moves[i].pos_prio == pos_prio {
|
||||
i += 1;
|
||||
}
|
||||
let moves = &self.inserted_moves[start..i];
|
||||
|
||||
redundant_move_process_side_effects(self, &mut redundant_moves, last_pos, pos);
|
||||
last_pos = pos;
|
||||
redundant_move_process_side_effects(self, &mut redundant_moves, last_pos, pos_prio.pos);
|
||||
last_pos = pos_prio.pos;
|
||||
|
||||
// Gather all the moves with Int class and Float class
|
||||
// separately. These cannot interact, so it is safe to
|
||||
@@ -966,7 +971,11 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
// that can be done one at a time.
|
||||
let scratch = self.env.scratch_by_class[regclass as u8 as usize];
|
||||
let mut parallel_moves = ParallelMoves::new(Allocation::reg(scratch));
|
||||
log::trace!("parallel moves at pos {:?} prio {:?}", pos, prio);
|
||||
log::trace!(
|
||||
"parallel moves at pos {:?} prio {:?}",
|
||||
pos_prio.pos,
|
||||
pos_prio.prio
|
||||
);
|
||||
for m in moves {
|
||||
if (m.from_alloc != m.to_alloc) || m.to_vreg.is_some() {
|
||||
log::trace!(" {} -> {}", m.from_alloc, m.to_alloc,);
|
||||
@@ -1009,15 +1018,13 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
if self.allocation_is_stack(src) && self.allocation_is_stack(dst) {
|
||||
if !scratch_used_yet {
|
||||
self.add_move_edit(
|
||||
pos,
|
||||
prio,
|
||||
pos_prio,
|
||||
src,
|
||||
Allocation::reg(scratch),
|
||||
to_vreg,
|
||||
);
|
||||
self.add_move_edit(
|
||||
pos,
|
||||
prio,
|
||||
pos_prio,
|
||||
Allocation::reg(scratch),
|
||||
dst,
|
||||
to_vreg,
|
||||
@@ -1025,36 +1032,32 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
} else {
|
||||
debug_assert!(extra_slot.is_some());
|
||||
self.add_move_edit(
|
||||
pos,
|
||||
prio,
|
||||
pos_prio,
|
||||
Allocation::reg(scratch),
|
||||
extra_slot.unwrap(),
|
||||
None,
|
||||
);
|
||||
self.add_move_edit(
|
||||
pos,
|
||||
prio,
|
||||
pos_prio,
|
||||
src,
|
||||
Allocation::reg(scratch),
|
||||
to_vreg,
|
||||
);
|
||||
self.add_move_edit(
|
||||
pos,
|
||||
prio,
|
||||
pos_prio,
|
||||
Allocation::reg(scratch),
|
||||
dst,
|
||||
to_vreg,
|
||||
);
|
||||
self.add_move_edit(
|
||||
pos,
|
||||
prio,
|
||||
pos_prio,
|
||||
extra_slot.unwrap(),
|
||||
Allocation::reg(scratch),
|
||||
None,
|
||||
);
|
||||
}
|
||||
} else {
|
||||
self.add_move_edit(pos, prio, src, dst, to_vreg);
|
||||
self.add_move_edit(pos_prio, src, dst, to_vreg);
|
||||
}
|
||||
} else {
|
||||
log::trace!(" -> redundant move elided");
|
||||
@@ -1066,8 +1069,7 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
alloc,
|
||||
vreg
|
||||
);
|
||||
self.edits
|
||||
.push((pos.to_index(), prio, Edit::DefAlloc { alloc, vreg }));
|
||||
self.edits.push((pos_prio, Edit::DefAlloc { alloc, vreg }));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1076,8 +1078,8 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
for m in &self_moves {
|
||||
log::trace!(
|
||||
"self move at pos {:?} prio {:?}: {} -> {} to_vreg {:?}",
|
||||
pos,
|
||||
prio,
|
||||
pos_prio.pos,
|
||||
pos_prio.prio,
|
||||
m.from_alloc,
|
||||
m.to_alloc,
|
||||
m.to_vreg
|
||||
@@ -1086,8 +1088,7 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
debug_assert!(action.elide);
|
||||
if let Some((alloc, vreg)) = action.def_alloc {
|
||||
log::trace!(" -> DefAlloc: alloc {} vreg {}", alloc, vreg);
|
||||
self.edits
|
||||
.push((pos.to_index(), prio, Edit::DefAlloc { alloc, vreg }));
|
||||
self.edits.push((pos_prio, Edit::DefAlloc { alloc, vreg }));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1096,6 +1097,7 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
{
|
||||
// Add edits to describe blockparam locations too. This is
|
||||
// required by the checker. This comes after any edge-moves.
|
||||
use crate::ion::data_structures::u64_key;
|
||||
self.blockparam_allocs
|
||||
.sort_unstable_by_key(|&(block, idx, _, _)| u64_key(block.raw_u32(), idx));
|
||||
self.stats.blockparam_allocs_count = self.blockparam_allocs.len();
|
||||
@@ -1119,8 +1121,10 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
debug_assert_eq!(allocs.len(), self.func.block_params(block).len());
|
||||
for (vreg, alloc) in vregs.into_iter().zip(allocs.into_iter()) {
|
||||
self.edits.push((
|
||||
self.cfginfo.block_entry[block.index()].to_index(),
|
||||
InsertMovePrio::BlockParam,
|
||||
PosWithPrio {
|
||||
pos: self.cfginfo.block_entry[block.index()],
|
||||
prio: InsertMovePrio::BlockParam as u32,
|
||||
},
|
||||
Edit::DefAlloc { alloc, vreg },
|
||||
));
|
||||
}
|
||||
@@ -1131,24 +1135,20 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
// be a stable sort! We have to keep the order produced by the
|
||||
// parallel-move resolver for all moves within a single sort
|
||||
// key.
|
||||
self.edits
|
||||
.sort_by_key(|&(pos, prio, _)| u64_key(pos, prio as u32));
|
||||
self.edits.sort_by_key(|&(pos_prio, _)| pos_prio.key());
|
||||
self.stats.edits_count = self.edits.len();
|
||||
|
||||
// Add debug annotations.
|
||||
if self.annotations_enabled {
|
||||
for i in 0..self.edits.len() {
|
||||
let &(pos, _, ref edit) = &self.edits[i];
|
||||
let &(pos_prio, ref edit) = &self.edits[i];
|
||||
match edit {
|
||||
&Edit::Move { from, to } => {
|
||||
self.annotate(
|
||||
ProgPoint::from_index(pos),
|
||||
format!("move {} -> {})", from, to),
|
||||
);
|
||||
self.annotate(pos_prio.pos, format!("move {} -> {})", from, to));
|
||||
}
|
||||
&Edit::DefAlloc { alloc, vreg } => {
|
||||
let s = format!("defalloc {:?} := {:?}", alloc, vreg);
|
||||
self.annotate(ProgPoint::from_index(pos), s);
|
||||
self.annotate(pos_prio.pos, s);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1157,8 +1157,7 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
|
||||
pub fn add_move_edit(
|
||||
&mut self,
|
||||
pos: ProgPoint,
|
||||
prio: InsertMovePrio,
|
||||
pos_prio: PosWithPrio,
|
||||
from: Allocation,
|
||||
to: Allocation,
|
||||
_to_vreg: Option<VReg>,
|
||||
@@ -1167,15 +1166,13 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
if from.is_reg() && to.is_reg() {
|
||||
debug_assert_eq!(from.as_reg().unwrap().class(), to.as_reg().unwrap().class());
|
||||
}
|
||||
self.edits
|
||||
.push((pos.to_index(), prio, Edit::Move { from, to }));
|
||||
self.edits.push((pos_prio, Edit::Move { from, to }));
|
||||
}
|
||||
|
||||
#[cfg(feature = "checker")]
|
||||
if let Some(to_vreg) = _to_vreg {
|
||||
self.edits.push((
|
||||
pos.to_index(),
|
||||
prio,
|
||||
pos_prio,
|
||||
Edit::DefAlloc {
|
||||
alloc: to,
|
||||
vreg: to_vreg,
|
||||
|
||||
Reference in New Issue
Block a user