From 1a7a0c5e3dd843a71cf8cbfb1a7d6a8bd1537933 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Thu, 6 May 2021 16:09:39 -0700 Subject: [PATCH] Some performance tweaks -- try to reduce register probe count with soem more hints. Also fix spill-weight caching. --- src/ion/mod.rs | 150 ++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 130 insertions(+), 20 deletions(-) diff --git a/src/ion/mod.rs b/src/ion/mod.rs index 138c06d..3a2820a 100644 --- a/src/ion/mod.rs +++ b/src/ion/mod.rs @@ -35,6 +35,26 @@ * - Rematerialization */ +/* + Performance ideas: + + - moves: don't consider as normal inst with uses/defs + - explicit list of src/dst pairs? elide completely if + remain in same bundle; otherwise only should appear + when generating halfmoves + - sorted list of (inst, src-vreg, dst-vreg) and + (vreg, inst) + - ignore inst during all passes over code + - when same bundle at both ends, ignore during resolution + - otherwise, fill in move-list alloc slots during scans + + - conflict hints? (note on one bundle that it definitely conflicts + with another, so avoid probing the other's alloc) + + - partial allocation -- place one LR, split rest off into separate + bundle, in one pass? + */ + #![allow(dead_code, unused_imports)] use crate::bitvec::BitVec; @@ -118,6 +138,9 @@ struct LiveRange { next_in_bundle: LiveRangeIndex, next_in_reg: LiveRangeIndex, + + reg_hint: Option, // if a bundle partly fits, this is used to + // record LRs that do fit } #[derive(Clone, Copy, Debug, PartialEq, Eq)] @@ -202,7 +225,7 @@ impl LiveBundle { #[inline(always)] fn cached_spill_weight(&self) -> u32 { - self.spill_weight_and_props & !((1 << 30) - 1) + self.spill_weight_and_props & ((1 << 30) - 1) } } @@ -521,6 +544,8 @@ pub struct Stats { process_bundle_count: usize, process_bundle_reg_probes_fixed: usize, process_bundle_reg_success_fixed: usize, + process_bundle_bounding_range_probes_any: usize, + process_bundle_bounding_range_success_any: usize, process_bundle_reg_probes_any: usize, process_bundle_reg_success_any: usize, evict_bundle_event: usize, @@ -558,7 +583,8 @@ pub struct Stats { struct RegTraversalIter<'a> { env: &'a MachineEnv, class: usize, - hint_reg: Option, + hints: [Option; 2], + hint_idx: usize, pref_idx: usize, non_pref_idx: usize, offset: usize, @@ -568,13 +594,20 @@ impl<'a> RegTraversalIter<'a> { pub fn new( env: &'a MachineEnv, class: RegClass, - hint_reg: Option, + mut hint_reg: Option, + mut hint2_reg: Option, offset: usize, ) -> Self { + if hint_reg.is_none() { + hint_reg = hint2_reg; + hint2_reg = None; + } + let hints = [hint_reg, hint2_reg]; Self { env, class: class as u8 as usize, - hint_reg, + hints, + hint_idx: 0, pref_idx: 0, non_pref_idx: 0, offset, @@ -586,19 +619,27 @@ impl<'a> std::iter::Iterator for RegTraversalIter<'a> { type Item = PReg; fn next(&mut self) -> Option { - if let Some(preg) = self.hint_reg.take() { - return Some(preg); + if self.hint_idx < 2 && self.hints[self.hint_idx].is_some() { + let h = self.hints[self.hint_idx]; + self.hint_idx += 1; + return h; } - if self.pref_idx < self.env.preferred_regs_by_class[self.class].len() { + while self.pref_idx < self.env.preferred_regs_by_class[self.class].len() { let arr = &self.env.preferred_regs_by_class[self.class][..]; let r = arr[(self.pref_idx + self.offset) % arr.len()]; self.pref_idx += 1; + if Some(r) == self.hints[0] || Some(r) == self.hints[1] { + continue; + } return Some(r); } - if self.non_pref_idx < self.env.non_preferred_regs_by_class[self.class].len() { + while self.non_pref_idx < self.env.non_preferred_regs_by_class[self.class].len() { let arr = &self.env.non_preferred_regs_by_class[self.class][..]; let r = arr[(self.non_pref_idx + self.offset) % arr.len()]; self.non_pref_idx += 1; + if Some(r) == self.hints[0] || Some(r) == self.hints[1] { + continue; + } return Some(r); } None @@ -699,6 +740,7 @@ impl<'a, F: Function> Env<'a, F> { last_use: UseIndex::invalid(), next_in_bundle: LiveRangeIndex::invalid(), next_in_reg: LiveRangeIndex::invalid(), + reg_hint: None, }); LiveRangeIndex::new(idx) } @@ -1889,6 +1931,26 @@ impl<'a, F: Function> Env<'a, F> { Some(needed) } + fn bundle_bounding_range_if_multiple(&self, bundle: LiveBundleIndex) -> Option { + let first_range = self.bundles[bundle.index()].first_range; + let last_range = self.bundles[bundle.index()].last_range; + if first_range.is_invalid() || first_range == last_range { + return None; + } + Some(CodeRange { + from: self.ranges[first_range.index()].range.from, + to: self.ranges[last_range.index()].range.to, + }) + } + + fn range_definitely_fits_in_reg(&self, range: CodeRange, reg: PRegIndex) -> bool { + self.pregs[reg.index()] + .allocations + .btree + .get(&LiveRangeKey::from_range(&range)) + .is_none() + } + fn try_to_allocate_bundle_to_reg( &mut self, bundle: LiveBundleIndex, @@ -1899,6 +1961,7 @@ impl<'a, F: Function> Env<'a, F> { let mut iter = self.bundles[bundle.index()].first_range; while iter.is_valid() { let range = &self.ranges[iter.index()]; + let next = range.next_in_bundle; log::debug!(" -> range {:?}", range); // Note that the comparator function here tests for *overlap*, so we // are checking whether the BTree contains any preg range that @@ -1923,8 +1986,10 @@ impl<'a, F: Function> Env<'a, F> { // range from a direct use of the PReg (due to clobber). return AllocRegResult::ConflictWithFixed; } + } else { + self.ranges[iter.index()].reg_hint = Some(self.pregs[reg.index()].reg); } - iter = range.next_in_bundle; + iter = next; } if conflicts.len() > 0 { @@ -1985,11 +2050,18 @@ impl<'a, F: Function> Env<'a, F> { } fn maximum_spill_weight_in_bundle_set(&self, bundles: &LiveBundleVec) -> u32 { - bundles + log::debug!("maximum_spill_weight_in_bundle_set: {:?}", bundles); + let m = bundles .iter() - .map(|&b| self.bundles[b.index()].cached_spill_weight()) + .map(|&b| { + let w = self.bundles[b.index()].cached_spill_weight(); + log::debug!("bundle{}: {}", b.index(), w); + w + }) .max() - .unwrap_or(0) + .unwrap_or(0); + log::debug!(" -> max: {}", m); + m } fn recompute_bundle_properties(&mut self, bundle: LiveBundleIndex) { @@ -2055,7 +2127,7 @@ impl<'a, F: Function> Env<'a, F> { ); total / self.bundles[bundle.index()].prio } else { - total + 0 } }; @@ -2577,13 +2649,19 @@ impl<'a, F: Function> Env<'a, F> { // Find any requirements: for every LR, for every def/use, gather // requirements (fixed-reg, any-reg, any) and merge them. let req = self.compute_requirement(bundle); - // Grab a hint from our spillset, if any. + // Grab a hint from our spillset, if any, and from the first LR, if any. let hint_reg = self.spillsets[self.bundles[bundle.index()].spillset.index()].reg_hint; + let hint2_reg = if self.bundles[bundle.index()].first_range.is_valid() { + self.ranges[self.bundles[bundle.index()].first_range.index()].reg_hint + } else { + None + }; log::debug!( - "process_bundle: bundle {:?} requirement {:?} hint {:?}", + "process_bundle: bundle {:?} requirement {:?} hint {:?} hint2 {:?}", bundle, req, hint_reg, + hint2_reg ); // Try to allocate! @@ -2636,7 +2714,35 @@ impl<'a, F: Function> Env<'a, F> { .index() + bundle.index(); - for preg in RegTraversalIter::new(self.env, class, hint_reg, scan_offset) { + // If the bundle is more than one range, see if we + // can find a reg that the bounding range fits + // completely in first. Use that if so. Otherwise, + // do a detailed (liverange-by-liverange) probe of + // each reg in preference order. + let bounding_range = self.bundle_bounding_range_if_multiple(bundle); + if let Some(bounding_range) = bounding_range { + for preg in + RegTraversalIter::new(self.env, class, hint_reg, hint2_reg, scan_offset) + { + let preg_idx = PRegIndex::new(preg.index()); + self.stats.process_bundle_bounding_range_probes_any += 1; + if self.range_definitely_fits_in_reg(bounding_range, preg_idx) { + let result = self.try_to_allocate_bundle_to_reg(bundle, preg_idx); + self.stats.process_bundle_bounding_range_success_any += 1; + let alloc = match result { + AllocRegResult::Allocated(alloc) => alloc, + _ => panic!("Impossible result: {:?}", result), + }; + self.spillsets[self.bundles[bundle.index()].spillset.index()] + .reg_hint = Some(alloc.as_reg().unwrap()); + return; + } + } + } + + for preg in + RegTraversalIter::new(self.env, class, hint_reg, hint2_reg, scan_offset) + { self.stats.process_bundle_reg_probes_any += 1; let preg_idx = PRegIndex::new(preg.index()); match self.try_to_allocate_bundle_to_reg(bundle, preg_idx) { @@ -2709,9 +2815,13 @@ impl<'a, F: Function> Env<'a, F> { // If the maximum spill weight in the conflicting-bundles set is >= this bundle's spill // weight, then don't evict. - if self.maximum_spill_weight_in_bundle_set(&conflicting_bundles) - >= self.bundle_spill_weight(bundle) - { + let max_spill_weight = self.maximum_spill_weight_in_bundle_set(&conflicting_bundles); + log::debug!( + " -> max_spill_weight = {}; our spill weight {}", + max_spill_weight, + self.bundle_spill_weight(bundle) + ); + if max_spill_weight >= self.bundle_spill_weight(bundle) { log::debug!(" -> we're already the cheapest bundle to spill -- going to split"); break; } @@ -2748,7 +2858,7 @@ impl<'a, F: Function> Env<'a, F> { let class = any_vreg.class(); let mut success = false; self.stats.spill_bundle_reg_probes += 1; - for preg in RegTraversalIter::new(self.env, class, None, bundle.index()) { + for preg in RegTraversalIter::new(self.env, class, None, None, bundle.index()) { let preg_idx = PRegIndex::new(preg.index()); if let AllocRegResult::Allocated(_) = self.try_to_allocate_bundle_to_reg(bundle, preg_idx)