Combine sort keys into u64/u128
This allows the compiler to perform branch-less comparisons, which are more efficient. This results in ~5% fewer instructions executed.
This commit is contained in:
@@ -543,3 +543,17 @@ pub struct Stats {
|
||||
pub halfmoves_count: usize,
|
||||
pub edits_count: usize,
|
||||
}
|
||||
|
||||
// Helper function for generating sorting keys. The order of arguments is from
|
||||
// the most significant field to the least significant one.
|
||||
//
|
||||
// These work best when the fields are stored in reverse order in memory so that
|
||||
// they can be loaded with a single u64 load.
|
||||
#[inline(always)]
|
||||
pub fn u64_key(b: u32, a: u32) -> u64 {
|
||||
a as u64 | (b as u64) << 32
|
||||
}
|
||||
#[inline(always)]
|
||||
pub fn u128_key(d: u32, c: u32, b: u32, a: u32) -> u128 {
|
||||
a as u128 | (b as u128) << 32 | (c as u128) << 64 | (d as u128) << 96
|
||||
}
|
||||
|
||||
@@ -18,7 +18,7 @@ use super::{
|
||||
SpillSetIndex, Use, VRegData, VRegIndex, SLOT_NONE,
|
||||
};
|
||||
use crate::indexset::IndexSet;
|
||||
use crate::ion::data_structures::MultiFixedRegFixup;
|
||||
use crate::ion::data_structures::{u128_key, MultiFixedRegFixup};
|
||||
use crate::{
|
||||
Allocation, Block, Function, Inst, InstPosition, Operand, OperandConstraint, OperandKind,
|
||||
OperandPos, PReg, ProgPoint, RegAllocError, VReg,
|
||||
@@ -1141,8 +1141,24 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
}
|
||||
}
|
||||
|
||||
self.blockparam_ins.sort_unstable();
|
||||
self.blockparam_outs.sort_unstable();
|
||||
self.blockparam_ins
|
||||
.sort_unstable_by_key(|(to_vreg, to_block, from_block)| {
|
||||
u128_key(
|
||||
to_vreg.raw_u32(),
|
||||
to_block.raw_u32(),
|
||||
from_block.raw_u32(),
|
||||
0,
|
||||
)
|
||||
});
|
||||
self.blockparam_outs
|
||||
.sort_unstable_by_key(|(from_vreg, from_block, to_block, to_vreg)| {
|
||||
u128_key(
|
||||
from_vreg.raw_u32(),
|
||||
from_block.raw_u32(),
|
||||
to_block.raw_u32(),
|
||||
to_vreg.raw_u32(),
|
||||
)
|
||||
});
|
||||
self.prog_move_srcs.sort_unstable_by_key(|(pos, _)| *pos);
|
||||
self.prog_move_dsts.sort_unstable_by_key(|(pos, _)| *pos);
|
||||
|
||||
|
||||
@@ -17,6 +17,7 @@ use super::{
|
||||
VRegIndex, SLOT_NONE,
|
||||
};
|
||||
|
||||
use crate::ion::data_structures::u64_key;
|
||||
use crate::moves::ParallelMoves;
|
||||
use crate::{
|
||||
Allocation, Block, Edit, Function, Inst, InstPosition, OperandConstraint, OperandKind,
|
||||
@@ -850,7 +851,7 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
// resolve (see cases below).
|
||||
let mut i = 0;
|
||||
self.inserted_moves
|
||||
.sort_unstable_by_key(|m| (m.pos.to_index(), m.prio));
|
||||
.sort_unstable_by_key(|m| u64_key(m.pos.to_index(), m.prio as u32));
|
||||
|
||||
// Redundant-move elimination state tracker.
|
||||
let mut redundant_moves = RedundantMoveEliminator::default();
|
||||
@@ -1104,7 +1105,7 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
// Add edits to describe blockparam locations too. This is
|
||||
// required by the checker. This comes after any edge-moves.
|
||||
self.blockparam_allocs
|
||||
.sort_unstable_by_key(|&(block, idx, _, _)| (block, idx));
|
||||
.sort_unstable_by_key(|&(block, idx, _, _)| u64_key(block.raw_u32(), idx));
|
||||
self.stats.blockparam_allocs_count = self.blockparam_allocs.len();
|
||||
let mut i = 0;
|
||||
while i < self.blockparam_allocs.len() {
|
||||
@@ -1137,7 +1138,8 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
// be a stable sort! We have to keep the order produced by the
|
||||
// parallel-move resolver for all moves within a single sort
|
||||
// key.
|
||||
self.edits.sort_by_key(|&(pos, prio, _)| (pos, prio));
|
||||
self.edits
|
||||
.sort_by_key(|&(pos, prio, _)| u64_key(pos, prio as u32));
|
||||
self.stats.edits_count = self.edits.len();
|
||||
|
||||
// Add debug annotations.
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
//! Stackmap computation.
|
||||
|
||||
use super::{Env, ProgPoint, VRegIndex};
|
||||
use crate::Function;
|
||||
use crate::{ion::data_structures::u64_key, Function};
|
||||
|
||||
impl<'a, F: Function> Env<'a, F> {
|
||||
pub fn compute_stackmaps(&mut self) {
|
||||
@@ -64,7 +64,8 @@ impl<'a, F: Function> Env<'a, F> {
|
||||
}
|
||||
}
|
||||
|
||||
self.safepoint_slots.sort_unstable();
|
||||
self.safepoint_slots
|
||||
.sort_unstable_by_key(|(progpoint, slot)| u64_key(progpoint.to_index(), slot.bits()));
|
||||
log::trace!("final safepoint slots info: {:?}", self.safepoint_slots);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
* exception. See `LICENSE` for details.
|
||||
*/
|
||||
|
||||
use crate::Allocation;
|
||||
use crate::{ion::data_structures::u64_key, Allocation};
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
|
||||
pub type MoveVec<T> = SmallVec<[(Allocation, Allocation, T); 16]>;
|
||||
@@ -53,7 +53,8 @@ impl<T: Clone + Copy + Default> ParallelMoves<T> {
|
||||
|
||||
// Sort moves by source so that we can efficiently test for
|
||||
// presence.
|
||||
self.parallel_moves.sort_by_key(|&(src, dst, _)| (src, dst));
|
||||
self.parallel_moves
|
||||
.sort_by_key(|&(src, dst, _)| u64_key(src.bits(), dst.bits()));
|
||||
|
||||
// Do any dests overlap sources? If not, we can also just
|
||||
// return the list.
|
||||
|
||||
Reference in New Issue
Block a user