From d95a9d9399036fb26fe17837084530a1d8839ccb Mon Sep 17 00:00:00 2001 From: Amanieu d'Antras Date: Tue, 11 Jan 2022 10:38:50 +0000 Subject: [PATCH] Combine sort keys into u64/u128 This allows the compiler to perform branch-less comparisons, which are more efficient. This results in ~5% fewer instructions executed. --- src/ion/data_structures.rs | 14 ++++++++++++++ src/ion/liveranges.rs | 22 +++++++++++++++++++--- src/ion/moves.rs | 8 +++++--- src/ion/stackmap.rs | 5 +++-- src/moves.rs | 5 +++-- 5 files changed, 44 insertions(+), 10 deletions(-) diff --git a/src/ion/data_structures.rs b/src/ion/data_structures.rs index b47c5c3..e798d04 100644 --- a/src/ion/data_structures.rs +++ b/src/ion/data_structures.rs @@ -543,3 +543,17 @@ pub struct Stats { pub halfmoves_count: usize, pub edits_count: usize, } + +// Helper function for generating sorting keys. The order of arguments is from +// the most significant field to the least significant one. +// +// These work best when the fields are stored in reverse order in memory so that +// they can be loaded with a single u64 load. +#[inline(always)] +pub fn u64_key(b: u32, a: u32) -> u64 { + a as u64 | (b as u64) << 32 +} +#[inline(always)] +pub fn u128_key(d: u32, c: u32, b: u32, a: u32) -> u128 { + a as u128 | (b as u128) << 32 | (c as u128) << 64 | (d as u128) << 96 +} diff --git a/src/ion/liveranges.rs b/src/ion/liveranges.rs index 7dd9d56..3213214 100644 --- a/src/ion/liveranges.rs +++ b/src/ion/liveranges.rs @@ -18,7 +18,7 @@ use super::{ SpillSetIndex, Use, VRegData, VRegIndex, SLOT_NONE, }; use crate::indexset::IndexSet; -use crate::ion::data_structures::MultiFixedRegFixup; +use crate::ion::data_structures::{u128_key, MultiFixedRegFixup}; use crate::{ Allocation, Block, Function, Inst, InstPosition, Operand, OperandConstraint, OperandKind, OperandPos, PReg, ProgPoint, RegAllocError, VReg, @@ -1141,8 +1141,24 @@ impl<'a, F: Function> Env<'a, F> { } } - self.blockparam_ins.sort_unstable(); - self.blockparam_outs.sort_unstable(); + self.blockparam_ins + .sort_unstable_by_key(|(to_vreg, to_block, from_block)| { + u128_key( + to_vreg.raw_u32(), + to_block.raw_u32(), + from_block.raw_u32(), + 0, + ) + }); + self.blockparam_outs + .sort_unstable_by_key(|(from_vreg, from_block, to_block, to_vreg)| { + u128_key( + from_vreg.raw_u32(), + from_block.raw_u32(), + to_block.raw_u32(), + to_vreg.raw_u32(), + ) + }); self.prog_move_srcs.sort_unstable_by_key(|(pos, _)| *pos); self.prog_move_dsts.sort_unstable_by_key(|(pos, _)| *pos); diff --git a/src/ion/moves.rs b/src/ion/moves.rs index b01bf59..30a8c89 100644 --- a/src/ion/moves.rs +++ b/src/ion/moves.rs @@ -17,6 +17,7 @@ use super::{ VRegIndex, SLOT_NONE, }; +use crate::ion::data_structures::u64_key; use crate::moves::ParallelMoves; use crate::{ Allocation, Block, Edit, Function, Inst, InstPosition, OperandConstraint, OperandKind, @@ -850,7 +851,7 @@ impl<'a, F: Function> Env<'a, F> { // resolve (see cases below). let mut i = 0; self.inserted_moves - .sort_unstable_by_key(|m| (m.pos.to_index(), m.prio)); + .sort_unstable_by_key(|m| u64_key(m.pos.to_index(), m.prio as u32)); // Redundant-move elimination state tracker. let mut redundant_moves = RedundantMoveEliminator::default(); @@ -1104,7 +1105,7 @@ impl<'a, F: Function> Env<'a, F> { // Add edits to describe blockparam locations too. This is // required by the checker. This comes after any edge-moves. self.blockparam_allocs - .sort_unstable_by_key(|&(block, idx, _, _)| (block, idx)); + .sort_unstable_by_key(|&(block, idx, _, _)| u64_key(block.raw_u32(), idx)); self.stats.blockparam_allocs_count = self.blockparam_allocs.len(); let mut i = 0; while i < self.blockparam_allocs.len() { @@ -1137,7 +1138,8 @@ impl<'a, F: Function> Env<'a, F> { // be a stable sort! We have to keep the order produced by the // parallel-move resolver for all moves within a single sort // key. - self.edits.sort_by_key(|&(pos, prio, _)| (pos, prio)); + self.edits + .sort_by_key(|&(pos, prio, _)| u64_key(pos, prio as u32)); self.stats.edits_count = self.edits.len(); // Add debug annotations. diff --git a/src/ion/stackmap.rs b/src/ion/stackmap.rs index 3bae749..f8cd471 100644 --- a/src/ion/stackmap.rs +++ b/src/ion/stackmap.rs @@ -13,7 +13,7 @@ //! Stackmap computation. use super::{Env, ProgPoint, VRegIndex}; -use crate::Function; +use crate::{ion::data_structures::u64_key, Function}; impl<'a, F: Function> Env<'a, F> { pub fn compute_stackmaps(&mut self) { @@ -64,7 +64,8 @@ impl<'a, F: Function> Env<'a, F> { } } - self.safepoint_slots.sort_unstable(); + self.safepoint_slots + .sort_unstable_by_key(|(progpoint, slot)| u64_key(progpoint.to_index(), slot.bits())); log::trace!("final safepoint slots info: {:?}", self.safepoint_slots); } } diff --git a/src/moves.rs b/src/moves.rs index e961654..3828f1b 100644 --- a/src/moves.rs +++ b/src/moves.rs @@ -3,7 +3,7 @@ * exception. See `LICENSE` for details. */ -use crate::Allocation; +use crate::{ion::data_structures::u64_key, Allocation}; use smallvec::{smallvec, SmallVec}; pub type MoveVec = SmallVec<[(Allocation, Allocation, T); 16]>; @@ -53,7 +53,8 @@ impl ParallelMoves { // Sort moves by source so that we can efficiently test for // presence. - self.parallel_moves.sort_by_key(|&(src, dst, _)| (src, dst)); + self.parallel_moves + .sort_by_key(|&(src, dst, _)| u64_key(src.bits(), dst.bits())); // Do any dests overlap sources? If not, we can also just // return the list.