Combine sort keys into u64/u128

This allows the compiler to perform branch-less comparisons, which are
more efficient.

This results in ~5% fewer instructions executed.
This commit is contained in:
Amanieu d'Antras
2022-01-11 10:38:50 +00:00
parent 053375f049
commit d95a9d9399
5 changed files with 44 additions and 10 deletions

View File

@@ -543,3 +543,17 @@ pub struct Stats {
pub halfmoves_count: usize,
pub edits_count: usize,
}
// Helper function for generating sorting keys. The order of arguments is from
// the most significant field to the least significant one.
//
// These work best when the fields are stored in reverse order in memory so that
// they can be loaded with a single u64 load.
#[inline(always)]
pub fn u64_key(b: u32, a: u32) -> u64 {
a as u64 | (b as u64) << 32
}
#[inline(always)]
pub fn u128_key(d: u32, c: u32, b: u32, a: u32) -> u128 {
a as u128 | (b as u128) << 32 | (c as u128) << 64 | (d as u128) << 96
}

View File

@@ -18,7 +18,7 @@ use super::{
SpillSetIndex, Use, VRegData, VRegIndex, SLOT_NONE,
};
use crate::indexset::IndexSet;
use crate::ion::data_structures::MultiFixedRegFixup;
use crate::ion::data_structures::{u128_key, MultiFixedRegFixup};
use crate::{
Allocation, Block, Function, Inst, InstPosition, Operand, OperandConstraint, OperandKind,
OperandPos, PReg, ProgPoint, RegAllocError, VReg,
@@ -1141,8 +1141,24 @@ impl<'a, F: Function> Env<'a, F> {
}
}
self.blockparam_ins.sort_unstable();
self.blockparam_outs.sort_unstable();
self.blockparam_ins
.sort_unstable_by_key(|(to_vreg, to_block, from_block)| {
u128_key(
to_vreg.raw_u32(),
to_block.raw_u32(),
from_block.raw_u32(),
0,
)
});
self.blockparam_outs
.sort_unstable_by_key(|(from_vreg, from_block, to_block, to_vreg)| {
u128_key(
from_vreg.raw_u32(),
from_block.raw_u32(),
to_block.raw_u32(),
to_vreg.raw_u32(),
)
});
self.prog_move_srcs.sort_unstable_by_key(|(pos, _)| *pos);
self.prog_move_dsts.sort_unstable_by_key(|(pos, _)| *pos);

View File

@@ -17,6 +17,7 @@ use super::{
VRegIndex, SLOT_NONE,
};
use crate::ion::data_structures::u64_key;
use crate::moves::ParallelMoves;
use crate::{
Allocation, Block, Edit, Function, Inst, InstPosition, OperandConstraint, OperandKind,
@@ -850,7 +851,7 @@ impl<'a, F: Function> Env<'a, F> {
// resolve (see cases below).
let mut i = 0;
self.inserted_moves
.sort_unstable_by_key(|m| (m.pos.to_index(), m.prio));
.sort_unstable_by_key(|m| u64_key(m.pos.to_index(), m.prio as u32));
// Redundant-move elimination state tracker.
let mut redundant_moves = RedundantMoveEliminator::default();
@@ -1104,7 +1105,7 @@ impl<'a, F: Function> Env<'a, F> {
// Add edits to describe blockparam locations too. This is
// required by the checker. This comes after any edge-moves.
self.blockparam_allocs
.sort_unstable_by_key(|&(block, idx, _, _)| (block, idx));
.sort_unstable_by_key(|&(block, idx, _, _)| u64_key(block.raw_u32(), idx));
self.stats.blockparam_allocs_count = self.blockparam_allocs.len();
let mut i = 0;
while i < self.blockparam_allocs.len() {
@@ -1137,7 +1138,8 @@ impl<'a, F: Function> Env<'a, F> {
// be a stable sort! We have to keep the order produced by the
// parallel-move resolver for all moves within a single sort
// key.
self.edits.sort_by_key(|&(pos, prio, _)| (pos, prio));
self.edits
.sort_by_key(|&(pos, prio, _)| u64_key(pos, prio as u32));
self.stats.edits_count = self.edits.len();
// Add debug annotations.

View File

@@ -13,7 +13,7 @@
//! Stackmap computation.
use super::{Env, ProgPoint, VRegIndex};
use crate::Function;
use crate::{ion::data_structures::u64_key, Function};
impl<'a, F: Function> Env<'a, F> {
pub fn compute_stackmaps(&mut self) {
@@ -64,7 +64,8 @@ impl<'a, F: Function> Env<'a, F> {
}
}
self.safepoint_slots.sort_unstable();
self.safepoint_slots
.sort_unstable_by_key(|(progpoint, slot)| u64_key(progpoint.to_index(), slot.bits()));
log::trace!("final safepoint slots info: {:?}", self.safepoint_slots);
}
}

View File

@@ -3,7 +3,7 @@
* exception. See `LICENSE` for details.
*/
use crate::Allocation;
use crate::{ion::data_structures::u64_key, Allocation};
use smallvec::{smallvec, SmallVec};
pub type MoveVec<T> = SmallVec<[(Allocation, Allocation, T); 16]>;
@@ -53,7 +53,8 @@ impl<T: Clone + Copy + Default> ParallelMoves<T> {
// Sort moves by source so that we can efficiently test for
// presence.
self.parallel_moves.sort_by_key(|&(src, dst, _)| (src, dst));
self.parallel_moves
.sort_by_key(|&(src, dst, _)| u64_key(src.bits(), dst.bits()));
// Do any dests overlap sources? If not, we can also just
// return the list.