Optimizations: (i) range-summary array; (ii) early exit from btree probe loop (one conflict bundle is enough, empirically)

2021-05-07 17:03:44 -07:00
parent 0f3454b4d7
commit 3ddcf05fea
1 changed files with 190 additions and 27 deletions
--- a/src/ion/mod.rs
+++ b/src/ion/mod.rs
@@ -36,6 +36,10 @@
   - partial allocation -- place one LR, split rest off into separate
     bundle, in one pass?
   - coarse-grained "register contention" counters per fixed region;
     randomly sample these, adding up a vector of them, to choose
     register probe order?
 */
 #![allow(dead_code, unused_imports)]
@@ -122,8 +126,6 @@ struct LiveRange {
    next_in_bundle: LiveRangeIndex,
    next_in_reg: LiveRangeIndex,
    // if a bundle partly fits, this is used to record LRs that do fit
    reg_hint: PReg,
    merged_into: LiveRangeIndex,
 }
@@ -186,6 +188,7 @@ struct LiveBundle {
    allocation: Allocation,
    prio: u32, // recomputed after every bulk update
    spill_weight_and_props: u32,
    range_summary: RangeSummary,
 }
 impl LiveBundle {
@@ -212,6 +215,73 @@ impl LiveBundle {
    }
 }
 #[derive(Clone, Debug)]
 struct RangeSummary {
    /// Indices in `range_ranges` dense array of packed CodeRange structs.
    from: u32,
    to: u32,
    bound: CodeRange,
 }
 impl RangeSummary {
    fn new() -> Self {
        Self {
            from: 0,
            to: 0,
            bound: CodeRange {
                from: ProgPoint::from_index(0),
                to: ProgPoint::from_index(0),
            },
        }
    }
    fn iter<'a>(&'a self, range_array: &'a [CodeRange]) -> RangeSummaryIter<'a> {
        RangeSummaryIter {
            idx: self.from as usize,
            start: self.from as usize,
            limit: self.to as usize,
            bound: self.bound,
            arr: range_array,
        }
    }
 }
 #[derive(Clone, Copy, Debug)]
 struct RangeSummaryIter<'a> {
    idx: usize,
    start: usize,
    limit: usize,
    bound: CodeRange,
    arr: &'a [CodeRange],
 }
 impl<'a> std::iter::Iterator for RangeSummaryIter<'a> {
    type Item = CodeRange;
    fn next(&mut self) -> Option<Self::Item> {
        if self.idx == self.limit {
            return None;
        }
        while self.idx < self.limit && self.arr[self.idx].to <= self.bound.from {
            self.idx += 1;
        }
        let mut cur = self.arr[self.idx];
        if cur.from >= self.bound.to {
            self.idx = self.limit;
            return None;
        }
        if cur.from < self.bound.from {
            cur.from = self.bound.from;
        }
        if cur.to > self.bound.to {
            cur.to = self.bound.to;
        }
        self.idx += 1;
        Some(cur)
    }
 }
 #[derive(Clone, Debug)]
 struct SpillSet {
    bundles: LiveBundleVec,
@@ -285,6 +355,7 @@ struct Env<'a, F: Function> {
    blockparam_allocs: Vec<(Block, u32, VRegIndex, Allocation)>,
    ranges: Vec<LiveRange>,
    range_ranges: Vec<CodeRange>,
    bundles: Vec<LiveBundle>,
    spillsets: Vec<SpillSet>,
    uses: Vec<Use>,
@@ -382,6 +453,7 @@ struct LiveRangeKey {
 }
 impl LiveRangeKey {
    #[inline(always)]
    fn from_range(range: &CodeRange) -> Self {
        Self {
            from: range.from.to_index(),
@@ -550,8 +622,10 @@ pub struct Stats {
    process_bundle_count: usize,
    process_bundle_reg_probes_fixed: usize,
    process_bundle_reg_success_fixed: usize,
    process_bundle_bounding_range_probe_start_any: usize,
    process_bundle_bounding_range_probes_any: usize,
    process_bundle_bounding_range_success_any: usize,
    process_bundle_reg_probe_start_any: usize,
    process_bundle_reg_probes_any: usize,
    process_bundle_reg_success_any: usize,
    evict_bundle_event: usize,
@@ -677,6 +751,7 @@ impl<'a, F: Function> Env<'a, F> {
            blockparam_allocs: vec![],
            bundles: vec![],
            ranges: vec![],
            range_ranges: vec![],
            spillsets: vec![],
            uses: vec![],
            vregs: vec![],
@@ -766,7 +841,6 @@ impl<'a, F: Function> Env<'a, F> {
            last_use: UseIndex::invalid(),
            next_in_bundle: LiveRangeIndex::invalid(),
            next_in_reg: LiveRangeIndex::invalid(),
            reg_hint: PReg::invalid(),
            merged_into: LiveRangeIndex::invalid(),
        });
        LiveRangeIndex::new(idx)
@@ -1617,6 +1691,7 @@ impl<'a, F: Function> Env<'a, F> {
            spillset: SpillSetIndex::invalid(),
            prio: 0,
            spill_weight_and_props: 0,
            range_summary: RangeSummary::new(),
        });
        LiveBundleIndex::new(bundle)
    }
@@ -1872,6 +1947,35 @@ impl<'a, F: Function> Env<'a, F> {
            self.merge_bundles(/* from */ dest_bundle, /* to */ src_bundle);
        }
        // Now create range summaries for all bundles.
        for bundle in 0..self.bundles.len() {
            let bundle = LiveBundleIndex::new(bundle);
            let mut iter = self.bundles[bundle.index()].first_range;
            let start_idx = self.range_ranges.len();
            let start_pos = if iter.is_valid() {
                self.ranges[iter.index()].range.from
            } else {
                ProgPoint::from_index(0)
            };
            let mut end_pos = start_pos;
            while iter.is_valid() {
                let range = self.ranges[iter.index()].range;
                end_pos = range.to;
                self.range_ranges.push(range);
                iter = self.ranges[iter.index()].next_in_bundle;
            }
            let end_idx = self.range_ranges.len();
            let bound = CodeRange {
                from: start_pos,
                to: end_pos,
            };
            self.bundles[bundle.index()].range_summary = RangeSummary {
                from: start_idx as u32,
                to: end_idx as u32,
                bound,
            };
        }
        log::debug!("done merging bundles");
    }
@@ -2060,18 +2164,21 @@ impl<'a, F: Function> Env<'a, F> {
    ) -> AllocRegResult {
        log::debug!("try_to_allocate_bundle_to_reg: {:?} -> {:?}", bundle, reg);
        let mut conflicts = smallvec![];
-        let mut iter = self.bundles[bundle.index()].first_range;
+        // Use the range-summary array; this allows fast streaming
-        while iter.is_valid() {
+        // access to CodeRanges (which are just two u32s packed
-            let range = &self.ranges[iter.index()];
+        // together) which is important for this hot loop.
-            let next = range.next_in_bundle;
+        let iter = self.bundles[bundle.index()]
            .range_summary
            .iter(&self.range_ranges[..]);
        for range in iter {
            log::debug!(" -> range {:?}", range);
            // Note that the comparator function here tests for *overlap*, so we
            // are checking whether the BTree contains any preg range that
-            // *overlaps* with range `iter`, not literally the range `iter`.
+            // *overlaps* with range `range`, not literally the range `range`.
            if let Some(preg_range) = self.pregs[reg.index()]
                .allocations
                .btree
-                .get(&LiveRangeKey::from_range(&range.range))
+                .get(&LiveRangeKey::from_range(&range))
            {
                log::debug!(" -> btree contains range {:?} that overlaps", preg_range);
                if self.ranges[preg_range.index()].vreg.is_valid() {
@@ -2083,15 +2190,25 @@ impl<'a, F: Function> Env<'a, F> {
                    if !conflicts.iter().any(|b| *b == conflict_bundle) {
                        conflicts.push(conflict_bundle);
                    }
                    // Empirically, it seems to be essentially as good
                    // to return only one conflicting bundle as all of
                    // them; it is very rare that the combination of
                    // all conflicting bundles yields a maximum spill
                    // weight that is enough to keep them in place
                    // when a single conflict does not. It is also a
                    // quite significant compile-time win to *stop
                    // scanning* as soon as we have a conflict. To
                    // experiment with this, however, just remove this
                    // `break`; the rest of the code will do the right
                    // thing.
                    break;
                } else {
                    log::debug!("   -> conflict with fixed reservation");
                    // range from a direct use of the PReg (due to clobber).
                    return AllocRegResult::ConflictWithFixed;
                }
            } else {
                self.ranges[iter.index()].reg_hint = self.pregs[reg.index()].reg;
            }
            iter = next;
        }
        if conflicts.len() > 0 {
@@ -2567,6 +2684,7 @@ impl<'a, F: Function> Env<'a, F> {
        let mut iter = self.bundles[bundle.index()].first_range;
        self.bundles[bundle.index()].first_range = LiveRangeIndex::invalid();
        self.bundles[bundle.index()].last_range = LiveRangeIndex::invalid();
        let mut range_summary_idx = self.bundles[bundle.index()].range_summary.from;
        while iter.is_valid() {
            // Read `next` link now and then clear it -- we rebuild the list below.
            let next = self.ranges[iter.index()].next_in_bundle;
@@ -2587,6 +2705,7 @@ impl<'a, F: Function> Env<'a, F> {
                self.bundles[cur_bundle.index()].spillset = self.bundles[bundle.index()].spillset;
                new_bundles.push(cur_bundle);
                split_idx += 1;
                self.bundles[cur_bundle.index()].range_summary.from = range_summary_idx;
            }
            while split_idx < split_points.len() && split_points[split_idx] <= range.from {
                split_idx += 1;
@@ -2720,7 +2839,10 @@ impl<'a, F: Function> Env<'a, F> {
                // Create a new bundle to hold the rest-range.
                let rest_bundle = self.create_bundle();
                self.bundles[cur_bundle.index()].range_summary.to = range_summary_idx + 1;
                cur_bundle = rest_bundle;
                self.bundles[cur_bundle.index()].range_summary.from = range_summary_idx;
                self.bundles[cur_bundle.index()].range_summary.to = range_summary_idx + 1;
                new_bundles.push(rest_bundle);
                self.bundles[rest_bundle.index()].first_range = rest_lr;
                self.bundles[rest_bundle.index()].last_range = rest_lr;
@@ -2732,6 +2854,13 @@ impl<'a, F: Function> Env<'a, F> {
            }
            iter = next;
            range_summary_idx += 1;
            self.bundles[cur_bundle.index()].range_summary.to = range_summary_idx;
        }
        self.fixup_range_summary_bound(bundle);
        for &b in &new_bundles {
            self.fixup_range_summary_bound(b);
        }
        // Enqueue all split-bundles on the allocation queue.
@@ -2739,7 +2868,7 @@ impl<'a, F: Function> Env<'a, F> {
        self.bundles[bundle.index()].prio = prio;
        self.recompute_bundle_properties(bundle);
        self.allocation_queue.insert(bundle, prio as usize);
-        for b in new_bundles {
+        for &b in &new_bundles {
            let prio = self.compute_bundle_prio(b);
            self.bundles[b.index()].prio = prio;
            self.recompute_bundle_properties(b);
@@ -2747,23 +2876,47 @@ impl<'a, F: Function> Env<'a, F> {
        }
    }
    fn fixup_range_summary_bound(&mut self, bundle: LiveBundleIndex) {
        let bundledata = &mut self.bundles[bundle.index()];
        let from = if bundledata.first_range.is_valid() {
            self.ranges[bundledata.first_range.index()].range.from
        } else {
            ProgPoint::from_index(0)
        };
        let to = if bundledata.last_range.is_valid() {
            self.ranges[bundledata.last_range.index()].range.to
        } else {
            ProgPoint::from_index(0)
        };
        bundledata.range_summary.bound = CodeRange { from, to };
        #[cfg(debug_assertions)]
        {
            // Sanity check: ensure that ranges returned by the range
            // summary correspond to actual ranges.
            let mut iter = self.bundles[bundle.index()].first_range;
            let mut summary_iter = self.bundles[bundle.index()]
                .range_summary
                .iter(&self.range_ranges[..]);
            while iter.is_valid() {
                assert_eq!(summary_iter.next(), Some(self.ranges[iter.index()].range));
                iter = self.ranges[iter.index()].next_in_bundle;
            }
            assert_eq!(summary_iter.next(), None);
        }
    }
    fn process_bundle(&mut self, bundle: LiveBundleIndex) {
        // Find any requirements: for every LR, for every def/use, gather
        // requirements (fixed-reg, any-reg, any) and merge them.
        let req = self.compute_requirement(bundle);
-        // Grab a hint from our spillset, if any, and from the first LR, if any.
+        // Grab a hint from our spillset, if any.
        let hint_reg = self.spillsets[self.bundles[bundle.index()].spillset.index()].reg_hint;
        let hint2_reg = if self.bundles[bundle.index()].first_range.is_valid() {
            self.ranges[self.bundles[bundle.index()].first_range.index()].reg_hint
        } else {
            PReg::invalid()
        };
        log::debug!(
-            "process_bundle: bundle {:?} requirement {:?} hint {:?} hint2 {:?}",
+            "process_bundle: bundle {:?} requirement {:?} hint {:?}",
            bundle,
            req,
            hint_reg,
            hint2_reg
        );
        // Try to allocate!
@@ -2830,9 +2983,14 @@ impl<'a, F: Function> Env<'a, F> {
                    let bounding_range = self.bundle_bounding_range_if_multiple(bundle);
                    if let Some(bounding_range) = bounding_range {
                        log::debug!("initial scan with bounding range {:?}", bounding_range);
-                        for preg in
+                        self.stats.process_bundle_bounding_range_probe_start_any += 1;
-                            RegTraversalIter::new(self.env, class, hint_reg, hint2_reg, scan_offset)
+                        for preg in RegTraversalIter::new(
-                        {
+                            self.env,
                            class,
                            hint_reg,
                            PReg::invalid(),
                            scan_offset,
                        ) {
                            let preg_idx = PRegIndex::new(preg.index());
                            log::debug!("trying preg {:?}", preg_idx);
                            self.stats.process_bundle_bounding_range_probes_any += 1;
@@ -2851,9 +3009,14 @@ impl<'a, F: Function> Env<'a, F> {
                        }
                    }
-                    for preg in
+                    self.stats.process_bundle_reg_probe_start_any += 1;
-                        RegTraversalIter::new(self.env, class, hint_reg, hint2_reg, scan_offset)
+                    for preg in RegTraversalIter::new(
-                    {
+                        self.env,
                        class,
                        hint_reg,
                        PReg::invalid(),
                        scan_offset,
                    ) {
                        self.stats.process_bundle_reg_probes_any += 1;
                        let preg_idx = PRegIndex::new(preg.index());
                        log::debug!("trying preg {:?}", preg_idx);