Rearrange some struct fields to work better with u64_key/u128_key

This allows the compiler to load the whole key with 1 or 2 64-bit
accesses, assuming little-endian ordering.

Improves instruction count by ~1%.
This commit is contained in:
Amanieu d'Antras
2022-01-11 12:48:28 +00:00
parent 693fb6a975
commit 2d9d5dd82b
5 changed files with 137 additions and 90 deletions

View File

@@ -277,6 +277,56 @@ pub struct MultiFixedRegFixup {
pub vreg: VRegIndex,
}
/// The field order is significant: these are sorted so that a
/// scan over vregs, then blocks in each range, can scan in
/// order through this (sorted) list and add allocs to the
/// half-move list.
///
/// The fields in this struct are reversed in sort order so that the entire
/// struct can be treated as a u128 for sorting purposes.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
#[repr(C)]
pub struct BlockparamOut {
pub to_vreg: VRegIndex,
pub to_block: Block,
pub from_block: Block,
pub from_vreg: VRegIndex,
}
impl BlockparamOut {
#[inline(always)]
pub fn key(&self) -> u128 {
u128_key(
self.from_vreg.raw_u32(),
self.from_block.raw_u32(),
self.to_block.raw_u32(),
self.to_vreg.raw_u32(),
)
}
}
/// As above for `BlockparamIn`, field order is significant.
///
/// The fields in this struct are reversed in sort order so that the entire
/// struct can be treated as a u128 for sorting purposes.
#[derive(Clone, Debug)]
#[repr(C)]
pub struct BlockparamIn {
pub from_block: Block,
pub to_block: Block,
pub to_vreg: VRegIndex,
}
impl BlockparamIn {
#[inline(always)]
pub fn key(&self) -> u128 {
u128_key(
self.to_vreg.raw_u32(),
self.to_block.raw_u32(),
self.from_block.raw_u32(),
0,
)
}
}
#[derive(Clone, Debug)]
pub struct Env<'a, F: Function> {
pub func: &'a F,
@@ -284,16 +334,8 @@ pub struct Env<'a, F: Function> {
pub cfginfo: CFGInfo,
pub liveins: Vec<IndexSet>,
pub liveouts: Vec<IndexSet>,
/// Blockparam outputs: from-vreg, (end of) from-block, (start of)
/// to-block, to-vreg. The field order is significant: these are sorted so
/// that a scan over vregs, then blocks in each range, can scan in
/// order through this (sorted) list and add allocs to the
/// half-move list.
pub blockparam_outs: Vec<(VRegIndex, Block, Block, VRegIndex)>,
/// Blockparam inputs: to-vreg, (start of) to-block, (end of)
/// from-block. As above for `blockparam_outs`, field order is
/// significant.
pub blockparam_ins: Vec<(VRegIndex, Block, Block)>,
pub blockparam_outs: Vec<BlockparamOut>,
pub blockparam_ins: Vec<BlockparamIn>,
/// Blockparam allocs: block, idx, vreg, alloc. Info to describe
/// blockparam locations at block entry, for metadata purposes
/// (e.g. for the checker).
@@ -343,7 +385,7 @@ pub struct Env<'a, F: Function> {
pub inserted_moves: Vec<InsertedMove>,
// Output:
pub edits: Vec<(u32, InsertMovePrio, Edit)>,
pub edits: Vec<(PosWithPrio, Edit)>,
pub allocs: Vec<Allocation>,
pub inst_alloc_offsets: Vec<u32>,
pub num_spillslots: u32,
@@ -487,8 +529,7 @@ impl LiveRangeSet {
#[derive(Clone, Debug)]
pub struct InsertedMove {
pub pos: ProgPoint,
pub prio: InsertMovePrio,
pub pos_prio: PosWithPrio,
pub from_alloc: Allocation,
pub to_alloc: Allocation,
pub to_vreg: Option<VReg>,
@@ -505,6 +546,21 @@ pub enum InsertMovePrio {
OutEdgeMoves,
}
/// The fields in this struct are reversed in sort order so that the entire
/// struct can be treated as a u64 for sorting purposes.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
#[repr(C)]
pub struct PosWithPrio {
pub prio: u32,
pub pos: ProgPoint,
}
impl PosWithPrio {
pub fn key(self) -> u64 {
u64_key(self.pos.to_index(), self.prio)
}
}
#[derive(Clone, Copy, Debug, Default)]
pub struct Stats {
pub livein_blocks: usize,

View File

@@ -18,7 +18,7 @@ use super::{
SpillSetIndex, Use, VRegData, VRegIndex, SLOT_NONE,
};
use crate::indexset::IndexSet;
use crate::ion::data_structures::{u128_key, MultiFixedRegFixup};
use crate::ion::data_structures::{BlockparamIn, BlockparamOut, MultiFixedRegFixup};
use crate::{
Allocation, Block, Function, Inst, InstPosition, Operand, OperandConstraint, OperandKind,
OperandPos, PReg, ProgPoint, RegAllocError, VReg,
@@ -430,8 +430,12 @@ impl<'a, F: Function> Env<'a, F> {
{
let blockparam_out = VRegIndex::new(blockparam_out.vreg());
let blockparam_in = VRegIndex::new(blockparam_in.vreg());
self.blockparam_outs
.push((blockparam_out, block, succ, blockparam_in));
self.blockparam_outs.push(BlockparamOut {
to_vreg: blockparam_in,
to_block: succ,
from_block: block,
from_vreg: blockparam_out,
});
// Include outgoing blockparams in the initial live set.
live.set(blockparam_out.index(), true);
@@ -1050,7 +1054,11 @@ impl<'a, F: Function> Env<'a, F> {
// add `blockparam_ins` entries.
let vreg_idx = VRegIndex::new(vreg.vreg());
for &pred in self.func.block_preds(block) {
self.blockparam_ins.push((vreg_idx, block, pred));
self.blockparam_ins.push(BlockparamIn {
to_vreg: vreg_idx,
to_block: block,
from_block: pred,
});
}
}
}
@@ -1141,24 +1149,8 @@ impl<'a, F: Function> Env<'a, F> {
}
}
self.blockparam_ins
.sort_unstable_by_key(|(to_vreg, to_block, from_block)| {
u128_key(
to_vreg.raw_u32(),
to_block.raw_u32(),
from_block.raw_u32(),
0,
)
});
self.blockparam_outs
.sort_unstable_by_key(|(from_vreg, from_block, to_block, to_vreg)| {
u128_key(
from_vreg.raw_u32(),
from_block.raw_u32(),
to_block.raw_u32(),
to_vreg.raw_u32(),
)
});
self.blockparam_ins.sort_unstable_by_key(|x| x.key());
self.blockparam_outs.sort_unstable_by_key(|x| x.key());
self.prog_move_srcs.sort_unstable_by_key(|(pos, _)| *pos);
self.prog_move_dsts.sort_unstable_by_key(|(pos, _)| *pos);

View File

@@ -16,7 +16,7 @@ use super::{
Env, LiveBundleIndex, LiveRangeIndex, LiveRangeKey, SpillSet, SpillSetIndex, SpillSlotIndex,
VRegIndex,
};
use crate::{Function, Inst, OperandConstraint, PReg};
use crate::{ion::data_structures::BlockparamOut, Function, Inst, OperandConstraint, PReg};
use smallvec::smallvec;
impl<'a, F: Function> Env<'a, F> {
@@ -332,7 +332,9 @@ impl<'a, F: Function> Env<'a, F> {
// Attempt to merge blockparams with their inputs.
for i in 0..self.blockparam_outs.len() {
let (from_vreg, _, _, to_vreg) = self.blockparam_outs[i];
let BlockparamOut {
from_vreg, to_vreg, ..
} = self.blockparam_outs[i];
log::trace!(
"trying to merge blockparam v{} with input v{}",
to_vreg.index(),

View File

@@ -133,7 +133,7 @@ pub fn run<F: Function>(
edits: env
.edits
.into_iter()
.map(|(pos, _, edit)| (ProgPoint::from_index(pos), edit))
.map(|(pos_prio, edit)| (pos_prio.pos, edit))
.collect(),
allocs: env.allocs,
inst_alloc_offsets: env.inst_alloc_offsets,

View File

@@ -17,7 +17,7 @@ use super::{
VRegIndex, SLOT_NONE,
};
use crate::ion::data_structures::u64_key;
use crate::ion::data_structures::{BlockparamIn, BlockparamOut, PosWithPrio};
use crate::moves::ParallelMoves;
use crate::{
Allocation, Block, Edit, Function, Inst, InstPosition, OperandConstraint, OperandKind,
@@ -68,8 +68,10 @@ impl<'a, F: Function> Env<'a, F> {
_ => {}
}
self.inserted_moves.push(InsertedMove {
pos,
prio,
pos_prio: PosWithPrio {
pos,
prio: prio as u32,
},
from_alloc,
to_alloc,
to_vreg,
@@ -334,8 +336,12 @@ impl<'a, F: Function> Env<'a, F> {
blockparam_out_idx,
);
while blockparam_out_idx < self.blockparam_outs.len() {
let (from_vreg, from_block, to_block, to_vreg) =
self.blockparam_outs[blockparam_out_idx];
let BlockparamOut {
from_vreg,
from_block,
to_block,
to_vreg,
} = self.blockparam_outs[blockparam_out_idx];
if (from_vreg, from_block) > (vreg, block) {
break;
}
@@ -399,8 +405,11 @@ impl<'a, F: Function> Env<'a, F> {
blockparam_in_idx
);
while blockparam_in_idx < self.blockparam_ins.len() {
let (to_vreg, to_block, from_block) =
self.blockparam_ins[blockparam_in_idx];
let BlockparamIn {
from_block,
to_block,
to_vreg,
} = self.blockparam_ins[blockparam_in_idx];
if (to_vreg, to_block) > (vreg, block) {
break;
}
@@ -855,7 +864,7 @@ impl<'a, F: Function> Env<'a, F> {
// resolve (see cases below).
let mut i = 0;
self.inserted_moves
.sort_unstable_by_key(|m| u64_key(m.pos.to_index(), m.prio as u32));
.sort_unstable_by_key(|m| m.pos_prio.key());
// Redundant-move elimination state tracker.
let mut redundant_moves = RedundantMoveEliminator::default();
@@ -912,18 +921,14 @@ impl<'a, F: Function> Env<'a, F> {
while i < self.inserted_moves.len() {
let start = i;
let pos = self.inserted_moves[i].pos;
let prio = self.inserted_moves[i].prio;
while i < self.inserted_moves.len()
&& self.inserted_moves[i].pos == pos
&& self.inserted_moves[i].prio == prio
{
let pos_prio = self.inserted_moves[i].pos_prio;
while i < self.inserted_moves.len() && self.inserted_moves[i].pos_prio == pos_prio {
i += 1;
}
let moves = &self.inserted_moves[start..i];
redundant_move_process_side_effects(self, &mut redundant_moves, last_pos, pos);
last_pos = pos;
redundant_move_process_side_effects(self, &mut redundant_moves, last_pos, pos_prio.pos);
last_pos = pos_prio.pos;
// Gather all the moves with Int class and Float class
// separately. These cannot interact, so it is safe to
@@ -966,7 +971,11 @@ impl<'a, F: Function> Env<'a, F> {
// that can be done one at a time.
let scratch = self.env.scratch_by_class[regclass as u8 as usize];
let mut parallel_moves = ParallelMoves::new(Allocation::reg(scratch));
log::trace!("parallel moves at pos {:?} prio {:?}", pos, prio);
log::trace!(
"parallel moves at pos {:?} prio {:?}",
pos_prio.pos,
pos_prio.prio
);
for m in moves {
if (m.from_alloc != m.to_alloc) || m.to_vreg.is_some() {
log::trace!(" {} -> {}", m.from_alloc, m.to_alloc,);
@@ -1009,15 +1018,13 @@ impl<'a, F: Function> Env<'a, F> {
if self.allocation_is_stack(src) && self.allocation_is_stack(dst) {
if !scratch_used_yet {
self.add_move_edit(
pos,
prio,
pos_prio,
src,
Allocation::reg(scratch),
to_vreg,
);
self.add_move_edit(
pos,
prio,
pos_prio,
Allocation::reg(scratch),
dst,
to_vreg,
@@ -1025,36 +1032,32 @@ impl<'a, F: Function> Env<'a, F> {
} else {
debug_assert!(extra_slot.is_some());
self.add_move_edit(
pos,
prio,
pos_prio,
Allocation::reg(scratch),
extra_slot.unwrap(),
None,
);
self.add_move_edit(
pos,
prio,
pos_prio,
src,
Allocation::reg(scratch),
to_vreg,
);
self.add_move_edit(
pos,
prio,
pos_prio,
Allocation::reg(scratch),
dst,
to_vreg,
);
self.add_move_edit(
pos,
prio,
pos_prio,
extra_slot.unwrap(),
Allocation::reg(scratch),
None,
);
}
} else {
self.add_move_edit(pos, prio, src, dst, to_vreg);
self.add_move_edit(pos_prio, src, dst, to_vreg);
}
} else {
log::trace!(" -> redundant move elided");
@@ -1066,8 +1069,7 @@ impl<'a, F: Function> Env<'a, F> {
alloc,
vreg
);
self.edits
.push((pos.to_index(), prio, Edit::DefAlloc { alloc, vreg }));
self.edits.push((pos_prio, Edit::DefAlloc { alloc, vreg }));
}
}
}
@@ -1076,8 +1078,8 @@ impl<'a, F: Function> Env<'a, F> {
for m in &self_moves {
log::trace!(
"self move at pos {:?} prio {:?}: {} -> {} to_vreg {:?}",
pos,
prio,
pos_prio.pos,
pos_prio.prio,
m.from_alloc,
m.to_alloc,
m.to_vreg
@@ -1086,8 +1088,7 @@ impl<'a, F: Function> Env<'a, F> {
debug_assert!(action.elide);
if let Some((alloc, vreg)) = action.def_alloc {
log::trace!(" -> DefAlloc: alloc {} vreg {}", alloc, vreg);
self.edits
.push((pos.to_index(), prio, Edit::DefAlloc { alloc, vreg }));
self.edits.push((pos_prio, Edit::DefAlloc { alloc, vreg }));
}
}
}
@@ -1096,6 +1097,7 @@ impl<'a, F: Function> Env<'a, F> {
{
// Add edits to describe blockparam locations too. This is
// required by the checker. This comes after any edge-moves.
use crate::ion::data_structures::u64_key;
self.blockparam_allocs
.sort_unstable_by_key(|&(block, idx, _, _)| u64_key(block.raw_u32(), idx));
self.stats.blockparam_allocs_count = self.blockparam_allocs.len();
@@ -1119,8 +1121,10 @@ impl<'a, F: Function> Env<'a, F> {
debug_assert_eq!(allocs.len(), self.func.block_params(block).len());
for (vreg, alloc) in vregs.into_iter().zip(allocs.into_iter()) {
self.edits.push((
self.cfginfo.block_entry[block.index()].to_index(),
InsertMovePrio::BlockParam,
PosWithPrio {
pos: self.cfginfo.block_entry[block.index()],
prio: InsertMovePrio::BlockParam as u32,
},
Edit::DefAlloc { alloc, vreg },
));
}
@@ -1131,24 +1135,20 @@ impl<'a, F: Function> Env<'a, F> {
// be a stable sort! We have to keep the order produced by the
// parallel-move resolver for all moves within a single sort
// key.
self.edits
.sort_by_key(|&(pos, prio, _)| u64_key(pos, prio as u32));
self.edits.sort_by_key(|&(pos_prio, _)| pos_prio.key());
self.stats.edits_count = self.edits.len();
// Add debug annotations.
if self.annotations_enabled {
for i in 0..self.edits.len() {
let &(pos, _, ref edit) = &self.edits[i];
let &(pos_prio, ref edit) = &self.edits[i];
match edit {
&Edit::Move { from, to } => {
self.annotate(
ProgPoint::from_index(pos),
format!("move {} -> {})", from, to),
);
self.annotate(pos_prio.pos, format!("move {} -> {})", from, to));
}
&Edit::DefAlloc { alloc, vreg } => {
let s = format!("defalloc {:?} := {:?}", alloc, vreg);
self.annotate(ProgPoint::from_index(pos), s);
self.annotate(pos_prio.pos, s);
}
}
}
@@ -1157,8 +1157,7 @@ impl<'a, F: Function> Env<'a, F> {
pub fn add_move_edit(
&mut self,
pos: ProgPoint,
prio: InsertMovePrio,
pos_prio: PosWithPrio,
from: Allocation,
to: Allocation,
_to_vreg: Option<VReg>,
@@ -1167,15 +1166,13 @@ impl<'a, F: Function> Env<'a, F> {
if from.is_reg() && to.is_reg() {
debug_assert_eq!(from.as_reg().unwrap().class(), to.as_reg().unwrap().class());
}
self.edits
.push((pos.to_index(), prio, Edit::Move { from, to }));
self.edits.push((pos_prio, Edit::Move { from, to }));
}
#[cfg(feature = "checker")]
if let Some(to_vreg) = _to_vreg {
self.edits.push((
pos.to_index(),
prio,
pos_prio,
Edit::DefAlloc {
alloc: to,
vreg: to_vreg,