Misc usability and functionality enhancements:

- Support preferred and non-preferred subsets of a register class. This allows allocating, e.g., caller-saved registers before callee-saved registers. - Allow branch blockparam args to start an a certain offset in branch operands; this allows branches to have other operands too (e.g., conditional-branch inputs). - Allow `OperandOrAllocation` to be constructed from an `Allocation` and `OperandKind` as well (i.e., an allocation with an use/def bit).
2021-04-30 21:14:09 -07:00
parent 414f3f828d
commit 49c54b6144
3 changed files with 104 additions and 44 deletions
--- a/src/fuzzing/func.rs
+++ b/src/fuzzing/func.rs
@@ -116,6 +116,12 @@ impl Function for Func {
        self.insts[insn.index()].op == InstOpcode::Branch
    }

+    fn branch_blockparam_arg_offset(&self, _: Block, _: Inst) -> usize {
+        // Branch blockparam args always start at zero for this
+        // Function implementation.
+        0
+    }
+
    fn is_safepoint(&self, insn: Inst) -> bool {
        self.insts[insn.index()].is_safepoint
    }
@@ -576,12 +582,16 @@ impl std::fmt::Debug for Func {
 pub fn machine_env() -> MachineEnv {
    // Reg 31 is the scratch reg.
    let regs: Vec<PReg> = (0..31).map(|i| PReg::new(i, RegClass::Int)).collect();
-    let regs_by_class: Vec<Vec<PReg>> = vec![regs.clone(), vec![]];
+    let preferred_regs_by_class: Vec<Vec<PReg>> =
+        vec![regs.iter().cloned().take(24).collect(), vec![]];
+    let non_preferred_regs_by_class: Vec<Vec<PReg>> =
+        vec![regs.iter().cloned().skip(24).collect(), vec![]];
    let scratch_by_class: Vec<PReg> =
        vec![PReg::new(31, RegClass::Int), PReg::new(0, RegClass::Float)];
    MachineEnv {
        regs,
-        regs_by_class,
+        preferred_regs_by_class,
+        non_preferred_regs_by_class,
        scratch_by_class,
    }
 }
--- a/src/ion/mod.rs
+++ b/src/ion/mod.rs
@@ -560,6 +560,69 @@ pub struct Stats {
    edits_count: usize,
 }

+/// This iterator represents a traversal through all allocatable
+/// registers of a given class, in a certain order designed to
+/// minimize allocation contention.
+///
+/// The order in which we try registers is somewhat complex:
+/// - First, if there is a hint, we try that.
+/// - Then, we try registers in a traversal order that is based on an
+///   "offset" (usually the bundle index) spreading pressure evenly
+///   among registers to reduce commitment-map contention.
+/// - Within that scan, we try registers in two groups: first,
+///   prferred registers; then, non-preferred registers. (In normal
+///   usage, these consist of caller-save and callee-save registers
+///   respectively, to minimize clobber-saves; but they need not.)
+struct RegTraversalIter<'a> {
+    env: &'a MachineEnv,
+    class: usize,
+    hint_reg: Option<PReg>,
+    pref_idx: usize,
+    non_pref_idx: usize,
+    offset: usize,
+}
+
+impl<'a> RegTraversalIter<'a> {
+    pub fn new(
+        env: &'a MachineEnv,
+        class: RegClass,
+        hint_reg: Option<PReg>,
+        offset: usize,
+    ) -> Self {
+        Self {
+            env,
+            class: class as u8 as usize,
+            hint_reg,
+            pref_idx: 0,
+            non_pref_idx: 0,
+            offset,
+        }
+    }
+}
+
+impl<'a> std::iter::Iterator for RegTraversalIter<'a> {
+    type Item = PReg;
+
+    fn next(&mut self) -> Option<PReg> {
+        if let Some(preg) = self.hint_reg.take() {
+            return Some(preg);
+        }
+        if self.pref_idx < self.env.preferred_regs_by_class[self.class].len() {
+            let arr = &self.env.preferred_regs_by_class[self.class][..];
+            let r = arr[(self.pref_idx + self.offset) % arr.len()];
+            self.pref_idx += 1;
+            return Some(r);
+        }
+        if self.non_pref_idx < self.env.non_preferred_regs_by_class[self.class].len() {
+            let arr = &self.env.non_preferred_regs_by_class[self.class][..];
+            let r = arr[(self.non_pref_idx + self.offset) % arr.len()];
+            self.non_pref_idx += 1;
+            return Some(r);
+        }
+        None
+    }
+}
+
 impl<'a, F: Function> Env<'a, F> {
    pub(crate) fn new(func: &'a F, env: &'a MachineEnv, cfginfo: CFGInfo) -> Self {
        Self {
@@ -987,7 +1050,7 @@ impl<'a, F: Function> Env<'a, F> {
            // return), create blockparam_out entries.
            if self.func.is_branch(insns.last()) {
                let operands = self.func.inst_operands(insns.last());
-                let mut i = 0;
+                let mut i = self.func.branch_blockparam_arg_offset(block, insns.last());
                for &succ in self.func.block_succs(block) {
                    for &blockparam in self.func.block_params(succ) {
                        let from_vreg = VRegIndex::new(operands[i].vreg().vreg());
@@ -2671,12 +2734,7 @@ impl<'a, F: Function> Env<'a, F> {
                Requirement::Register(class) => {
                    // Scan all pregs and attempt to allocate.
                    let mut lowest_cost_conflict_set: Option<LiveBundleVec> = None;
-                    let n_regs = self.env.regs_by_class[class as u8 as usize].len();
-                    let loop_count = if hint_reg.is_some() {
-                        n_regs + 1
-                    } else {
-                        n_regs
-                    };
+
                    // Heuristic: start the scan for an available
                    // register at an offset influenced both by our
                    // location in the code and by the bundle we're
@@ -2688,35 +2746,8 @@ impl<'a, F: Function> Env<'a, F> {
                        .inst
                        .index()
                        + bundle.index();
-                    for i in 0..loop_count {
-                        // The order in which we try registers is somewhat complex:
-                        // - First, if there is a hint, we try that.
-                        // - Then, we try registers in a traversal
-                        //   order that is based on the bundle index,
-                        //   spreading pressure evenly among registers
-                        //   to reduce commitment-map
-                        //   contention. (TODO: account for
-                        //   caller-save vs. callee-saves here too.)
-                        //   Note that we avoid retrying the hint_reg;
-                        //   this is why the loop count is n_regs + 1
-                        //   if there is a hint reg, because we always
-                        //   skip one iteration.
-                        let preg = match (i, hint_reg) {
-                            (0, Some(hint_reg)) => hint_reg,
-                            (i, Some(hint_reg)) => {
-                                let reg = self.env.regs_by_class[class as u8 as usize]
-                                    [(i - 1 + scan_offset) % n_regs];
-                                if reg == hint_reg {
-                                    continue;
-                                }
-                                reg
-                            }
-                            (i, None) => {
-                                self.env.regs_by_class[class as u8 as usize]
-                                    [(i + scan_offset) % n_regs]
-                            }
-                        };

+                    for preg in RegTraversalIter::new(self.env, class, hint_reg, scan_offset) {
                        self.stats.process_bundle_reg_probes_any += 1;
                        let preg_idx = PRegIndex::new(preg.index());
                        match self.try_to_allocate_bundle_to_reg(bundle, preg_idx) {
@@ -2828,10 +2859,7 @@ impl<'a, F: Function> Env<'a, F> {
            let class = any_vreg.class();
            let mut success = false;
            self.stats.spill_bundle_reg_probes += 1;
-            let nregs = self.env.regs_by_class[class as u8 as usize].len();
-            for i in 0..nregs {
-                let i = (i + bundle.index()) % nregs;
-                let preg = self.env.regs_by_class[class as u8 as usize][i]; // don't borrow self
+            for preg in RegTraversalIter::new(self.env, class, None, bundle.index()) {
                let preg_idx = PRegIndex::new(preg.index());
                if let AllocRegResult::Allocated(_) =
                    self.try_to_allocate_bundle_to_reg(bundle, preg_idx)
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -579,6 +579,15 @@ impl OperandOrAllocation {
        debug_assert!(alloc.bits() >> 29 >= 5);
        Self { bits: alloc.bits() }
    }
+    pub fn from_alloc_and_kind(alloc: Allocation, kind: OperandKind) -> Self {
+        debug_assert!(alloc.bits() >> 29 >= 5);
+        let bits = alloc.bits()
+            | match kind {
+                OperandKind::Def => 0,
+                OperandKind::Use => 1 << 28,
+            };
+        Self { bits }
+    }
    pub fn is_operand(&self) -> bool {
        (self.bits >> 29) <= 4
    }
@@ -659,10 +668,22 @@ pub trait Function {
    fn is_ret(&self, insn: Inst) -> bool;

    /// Determine whether an instruction is the end-of-block
-    /// branch. If so, its operands *must* be the block parameters for
-    /// each of its block's `block_succs` successor blocks, in order.
+    /// branch. If so, its operands at the indices given by
+    /// `branch_blockparam_arg_offset()` below *must* be the block
+    /// parameters for each of its block's `block_succs` successor
+    /// blocks, in order.
    fn is_branch(&self, insn: Inst) -> bool;

+    /// If `insn` is a branch at the end of `block`, returns the
+    /// operand index at which outgoing blockparam arguments are
+    /// found. Starting at this index, blockparam arguments for each
+    /// successor block's blockparams, in order, must be found.
+    ///
+    /// It is an error if `self.inst_operands(insn).len() -
+    /// self.branch_blockparam_arg_offset(insn)` is not exactly equal
+    /// to the sum of blockparam counts for all successor blocks.
+    fn branch_blockparam_arg_offset(&self, block: Block, insn: Inst) -> usize;
+
    /// Determine whether an instruction is a safepoint and requires a stackmap.
    fn is_safepoint(&self, _: Inst) -> bool {
        false
@@ -842,7 +863,8 @@ pub enum Edit {
 #[derive(Clone, Debug)]
 pub struct MachineEnv {
    regs: Vec<PReg>,
-    regs_by_class: Vec<Vec<PReg>>,
+    preferred_regs_by_class: Vec<Vec<PReg>>,
+    non_preferred_regs_by_class: Vec<Vec<PReg>>,
    scratch_by_class: Vec<PReg>,
 }