Combine sort keys into u64/u128
This allows the compiler to perform branch-less comparisons, which are more efficient. This results in ~5% fewer instructions executed.
This commit is contained in:
@@ -543,3 +543,17 @@ pub struct Stats {
|
|||||||
pub halfmoves_count: usize,
|
pub halfmoves_count: usize,
|
||||||
pub edits_count: usize,
|
pub edits_count: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Helper function for generating sorting keys. The order of arguments is from
|
||||||
|
// the most significant field to the least significant one.
|
||||||
|
//
|
||||||
|
// These work best when the fields are stored in reverse order in memory so that
|
||||||
|
// they can be loaded with a single u64 load.
|
||||||
|
#[inline(always)]
|
||||||
|
pub fn u64_key(b: u32, a: u32) -> u64 {
|
||||||
|
a as u64 | (b as u64) << 32
|
||||||
|
}
|
||||||
|
#[inline(always)]
|
||||||
|
pub fn u128_key(d: u32, c: u32, b: u32, a: u32) -> u128 {
|
||||||
|
a as u128 | (b as u128) << 32 | (c as u128) << 64 | (d as u128) << 96
|
||||||
|
}
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ use super::{
|
|||||||
SpillSetIndex, Use, VRegData, VRegIndex, SLOT_NONE,
|
SpillSetIndex, Use, VRegData, VRegIndex, SLOT_NONE,
|
||||||
};
|
};
|
||||||
use crate::indexset::IndexSet;
|
use crate::indexset::IndexSet;
|
||||||
use crate::ion::data_structures::MultiFixedRegFixup;
|
use crate::ion::data_structures::{u128_key, MultiFixedRegFixup};
|
||||||
use crate::{
|
use crate::{
|
||||||
Allocation, Block, Function, Inst, InstPosition, Operand, OperandConstraint, OperandKind,
|
Allocation, Block, Function, Inst, InstPosition, Operand, OperandConstraint, OperandKind,
|
||||||
OperandPos, PReg, ProgPoint, RegAllocError, VReg,
|
OperandPos, PReg, ProgPoint, RegAllocError, VReg,
|
||||||
@@ -1141,8 +1141,24 @@ impl<'a, F: Function> Env<'a, F> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
self.blockparam_ins.sort_unstable();
|
self.blockparam_ins
|
||||||
self.blockparam_outs.sort_unstable();
|
.sort_unstable_by_key(|(to_vreg, to_block, from_block)| {
|
||||||
|
u128_key(
|
||||||
|
to_vreg.raw_u32(),
|
||||||
|
to_block.raw_u32(),
|
||||||
|
from_block.raw_u32(),
|
||||||
|
0,
|
||||||
|
)
|
||||||
|
});
|
||||||
|
self.blockparam_outs
|
||||||
|
.sort_unstable_by_key(|(from_vreg, from_block, to_block, to_vreg)| {
|
||||||
|
u128_key(
|
||||||
|
from_vreg.raw_u32(),
|
||||||
|
from_block.raw_u32(),
|
||||||
|
to_block.raw_u32(),
|
||||||
|
to_vreg.raw_u32(),
|
||||||
|
)
|
||||||
|
});
|
||||||
self.prog_move_srcs.sort_unstable_by_key(|(pos, _)| *pos);
|
self.prog_move_srcs.sort_unstable_by_key(|(pos, _)| *pos);
|
||||||
self.prog_move_dsts.sort_unstable_by_key(|(pos, _)| *pos);
|
self.prog_move_dsts.sort_unstable_by_key(|(pos, _)| *pos);
|
||||||
|
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ use super::{
|
|||||||
VRegIndex, SLOT_NONE,
|
VRegIndex, SLOT_NONE,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
use crate::ion::data_structures::u64_key;
|
||||||
use crate::moves::ParallelMoves;
|
use crate::moves::ParallelMoves;
|
||||||
use crate::{
|
use crate::{
|
||||||
Allocation, Block, Edit, Function, Inst, InstPosition, OperandConstraint, OperandKind,
|
Allocation, Block, Edit, Function, Inst, InstPosition, OperandConstraint, OperandKind,
|
||||||
@@ -850,7 +851,7 @@ impl<'a, F: Function> Env<'a, F> {
|
|||||||
// resolve (see cases below).
|
// resolve (see cases below).
|
||||||
let mut i = 0;
|
let mut i = 0;
|
||||||
self.inserted_moves
|
self.inserted_moves
|
||||||
.sort_unstable_by_key(|m| (m.pos.to_index(), m.prio));
|
.sort_unstable_by_key(|m| u64_key(m.pos.to_index(), m.prio as u32));
|
||||||
|
|
||||||
// Redundant-move elimination state tracker.
|
// Redundant-move elimination state tracker.
|
||||||
let mut redundant_moves = RedundantMoveEliminator::default();
|
let mut redundant_moves = RedundantMoveEliminator::default();
|
||||||
@@ -1104,7 +1105,7 @@ impl<'a, F: Function> Env<'a, F> {
|
|||||||
// Add edits to describe blockparam locations too. This is
|
// Add edits to describe blockparam locations too. This is
|
||||||
// required by the checker. This comes after any edge-moves.
|
// required by the checker. This comes after any edge-moves.
|
||||||
self.blockparam_allocs
|
self.blockparam_allocs
|
||||||
.sort_unstable_by_key(|&(block, idx, _, _)| (block, idx));
|
.sort_unstable_by_key(|&(block, idx, _, _)| u64_key(block.raw_u32(), idx));
|
||||||
self.stats.blockparam_allocs_count = self.blockparam_allocs.len();
|
self.stats.blockparam_allocs_count = self.blockparam_allocs.len();
|
||||||
let mut i = 0;
|
let mut i = 0;
|
||||||
while i < self.blockparam_allocs.len() {
|
while i < self.blockparam_allocs.len() {
|
||||||
@@ -1137,7 +1138,8 @@ impl<'a, F: Function> Env<'a, F> {
|
|||||||
// be a stable sort! We have to keep the order produced by the
|
// be a stable sort! We have to keep the order produced by the
|
||||||
// parallel-move resolver for all moves within a single sort
|
// parallel-move resolver for all moves within a single sort
|
||||||
// key.
|
// key.
|
||||||
self.edits.sort_by_key(|&(pos, prio, _)| (pos, prio));
|
self.edits
|
||||||
|
.sort_by_key(|&(pos, prio, _)| u64_key(pos, prio as u32));
|
||||||
self.stats.edits_count = self.edits.len();
|
self.stats.edits_count = self.edits.len();
|
||||||
|
|
||||||
// Add debug annotations.
|
// Add debug annotations.
|
||||||
|
|||||||
@@ -13,7 +13,7 @@
|
|||||||
//! Stackmap computation.
|
//! Stackmap computation.
|
||||||
|
|
||||||
use super::{Env, ProgPoint, VRegIndex};
|
use super::{Env, ProgPoint, VRegIndex};
|
||||||
use crate::Function;
|
use crate::{ion::data_structures::u64_key, Function};
|
||||||
|
|
||||||
impl<'a, F: Function> Env<'a, F> {
|
impl<'a, F: Function> Env<'a, F> {
|
||||||
pub fn compute_stackmaps(&mut self) {
|
pub fn compute_stackmaps(&mut self) {
|
||||||
@@ -64,7 +64,8 @@ impl<'a, F: Function> Env<'a, F> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
self.safepoint_slots.sort_unstable();
|
self.safepoint_slots
|
||||||
|
.sort_unstable_by_key(|(progpoint, slot)| u64_key(progpoint.to_index(), slot.bits()));
|
||||||
log::trace!("final safepoint slots info: {:?}", self.safepoint_slots);
|
log::trace!("final safepoint slots info: {:?}", self.safepoint_slots);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
* exception. See `LICENSE` for details.
|
* exception. See `LICENSE` for details.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
use crate::Allocation;
|
use crate::{ion::data_structures::u64_key, Allocation};
|
||||||
use smallvec::{smallvec, SmallVec};
|
use smallvec::{smallvec, SmallVec};
|
||||||
|
|
||||||
pub type MoveVec<T> = SmallVec<[(Allocation, Allocation, T); 16]>;
|
pub type MoveVec<T> = SmallVec<[(Allocation, Allocation, T); 16]>;
|
||||||
@@ -53,7 +53,8 @@ impl<T: Clone + Copy + Default> ParallelMoves<T> {
|
|||||||
|
|
||||||
// Sort moves by source so that we can efficiently test for
|
// Sort moves by source so that we can efficiently test for
|
||||||
// presence.
|
// presence.
|
||||||
self.parallel_moves.sort_by_key(|&(src, dst, _)| (src, dst));
|
self.parallel_moves
|
||||||
|
.sort_by_key(|&(src, dst, _)| u64_key(src.bits(), dst.bits()));
|
||||||
|
|
||||||
// Do any dests overlap sources? If not, we can also just
|
// Do any dests overlap sources? If not, we can also just
|
||||||
// return the list.
|
// return the list.
|
||||||
|
|||||||
Reference in New Issue
Block a user