Implement a move resolver for the register constraint solver.

After finding a register solution, it need to be executed as a sequence
of regmove instructions. This often requires a topological ordering of
the moves so they don't conflict.

When the solution contains cycles, try to grab an available scratch
register to implement the copies. Panic if that fails (later, we'll
implement emergency spilling in this case).

Make sure we handle odd aliasing in the arm32 floating point register
bank. Not everything is a simple cycle in that case, so make sure we
don't assume so.
This commit is contained in:
Jakob Stoklund Olesen
2017-05-10 15:36:14 -07:00
parent b521254149
commit 9be262e878
3 changed files with 302 additions and 8 deletions

View File

@@ -164,6 +164,12 @@ impl fmt::Display for RegClassData {
} }
} }
impl fmt::Debug for RegClassData {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.write_str(self.name)
}
}
/// A small reference to a register class. /// A small reference to a register class.
/// ///
/// Use this when storing register classes in compact data structures. The `RegInfo::rc()` method /// Use this when storing register classes in compact data structures. The `RegInfo::rc()` method

View File

@@ -34,6 +34,7 @@
use entity_map::EntityMap; use entity_map::EntityMap;
use dominator_tree::DominatorTree; use dominator_tree::DominatorTree;
use ir::{Ebb, Inst, Value, Function, Cursor, ValueLoc, DataFlowGraph, Signature, ArgumentLoc}; use ir::{Ebb, Inst, Value, Function, Cursor, ValueLoc, DataFlowGraph, Signature, ArgumentLoc};
use ir::InstBuilder;
use isa::{TargetIsa, Encoding, EncInfo, OperandConstraint, ConstraintKind}; use isa::{TargetIsa, Encoding, EncInfo, OperandConstraint, ConstraintKind};
use isa::{RegUnit, RegClass, RegInfo, regs_overlap}; use isa::{RegUnit, RegClass, RegInfo, regs_overlap};
use regalloc::affinity::Affinity; use regalloc::affinity::Affinity;
@@ -302,7 +303,7 @@ impl<'a> Context<'a> {
fn visit_inst(&mut self, fn visit_inst(&mut self,
inst: Inst, inst: Inst,
encoding: Encoding, encoding: Encoding,
_pos: &mut Cursor, pos: &mut Cursor,
dfg: &mut DataFlowGraph, dfg: &mut DataFlowGraph,
tracker: &mut LiveValueTracker, tracker: &mut LiveValueTracker,
regs: &mut AllocatableSet, regs: &mut AllocatableSet,
@@ -356,6 +357,11 @@ impl<'a> Context<'a> {
.quick_solve() .quick_solve()
.unwrap_or_else(|_| self.iterate_solution()); .unwrap_or_else(|_| self.iterate_solution());
// The solution and/or fixed input constraints may require us to shuffle the set of live
// registers around.
self.shuffle_inputs(pos, dfg, regs);
// Apply the solution to the defs. // Apply the solution to the defs.
for v in self.solver.vars().iter().filter(|&v| v.is_define()) { for v in self.solver.vars().iter().filter(|&v| v.is_define()) {
*locations.ensure(v.value) = ValueLoc::Reg(v.solution); *locations.ensure(v.value) = ValueLoc::Reg(v.solution);
@@ -497,4 +503,23 @@ impl<'a> Context<'a> {
fn iterate_solution(&self) -> AllocatableSet { fn iterate_solution(&self) -> AllocatableSet {
unimplemented!(); unimplemented!();
} }
/// Emit `regmove` instructions as needed to move the live registers into place before the
/// instruction. Also update `self.divert` accordingly.
///
/// The `pos` cursor is expected to point at the instruction. The register moves are inserted
/// before.
///
/// The solver needs to be reminded of the available registers before any moves are inserted.
fn shuffle_inputs(&mut self,
pos: &mut Cursor,
dfg: &mut DataFlowGraph,
regs: &mut AllocatableSet) {
self.solver.schedule_moves(regs);
for m in self.solver.moves() {
self.divert.regmove(m.value, m.from, m.to);
dfg.ins(pos).regmove(m.value, m.from, m.to);
}
}
} }

View File

@@ -221,12 +221,12 @@ impl fmt::Display for Variable {
} }
} }
#[derive(Clone, Debug)]
struct Assignment { pub struct Assignment {
value: Value, pub value: Value,
from: RegUnit, pub from: RegUnit,
to: RegUnit, pub to: RegUnit,
rc: RegClass, pub rc: RegClass,
} }
impl SparseMapValue<Value> for Assignment { impl SparseMapValue<Value> for Assignment {
@@ -235,6 +235,14 @@ impl SparseMapValue<Value> for Assignment {
} }
} }
#[cfg(test)]
impl PartialEq for Assignment {
fn eq(&self, other: &Assignment) -> bool {
self.value == other.value && self.from == other.from && self.to == other.to &&
self.rc.index == other.rc.index
}
}
/// Constraint solver for register allocation around a single instruction. /// Constraint solver for register allocation around a single instruction.
/// ///
/// Start by programming in the instruction constraints. /// Start by programming in the instruction constraints.
@@ -296,6 +304,11 @@ pub struct Solver {
/// - Live-through variables are marked as available. /// - Live-through variables are marked as available.
/// ///
regs_out: AllocatableSet, regs_out: AllocatableSet,
/// List of register moves scheduled to avoid conflicts.
///
/// This is used as working space by the `schedule_moves()` function.
moves: Vec<Assignment>,
} }
/// Interface for programming the constraints into the solver. /// Interface for programming the constraints into the solver.
@@ -308,6 +321,7 @@ impl Solver {
inputs_done: false, inputs_done: false,
regs_in: AllocatableSet::new(), regs_in: AllocatableSet::new(),
regs_out: AllocatableSet::new(), regs_out: AllocatableSet::new(),
moves: Vec::new(),
} }
} }
@@ -508,7 +522,7 @@ impl Solver {
/// This is expected to succeed for most instructions since the constraint problem is almost /// This is expected to succeed for most instructions since the constraint problem is almost
/// always trivial. /// always trivial.
/// ///
/// Returns `true` is a solution was found. /// Returns `Ok(regs)` if a solution was found.
pub fn quick_solve(&mut self) -> Result<AllocatableSet, RegClass> { pub fn quick_solve(&mut self) -> Result<AllocatableSet, RegClass> {
self.find_solution() self.find_solution()
} }
@@ -547,3 +561,252 @@ impl Solver {
&self.vars &self.vars
} }
} }
/// Interface for working with parallel copies once a solution has been found.
impl Solver {
/// Collect all the register moves we need to execute.
fn collect_moves(&mut self) {
self.moves.clear();
// Collect moves from the chosen solution for all non-define variables.
for v in &self.vars {
if let Some(from) = v.from {
self.moves
.push(Assignment {
value: v.value,
from,
to: v.solution,
rc: v.constraint,
});
}
}
self.moves.extend(self.assignments.values().cloned());
}
/// Try to schedule a sequence of `regmove` instructions that will shuffle registers into
/// place.
///
/// This may require the use of additional available registers, and it can fail if no
/// additional registers are available.
///
/// TODO: Handle failure by generating a sequence of register swaps, or by temporarily spilling
/// a register.
///
/// Returns the number of spills that had to be emitted.
pub fn schedule_moves(&mut self, regs: &AllocatableSet) -> usize {
self.collect_moves();
let mut avail = regs.clone();
let mut i = 0;
while i < self.moves.len() {
// Find the first move that can be executed now.
if let Some(j) = self.moves[i..]
.iter()
.position(|m| avail.is_avail(m.rc, m.to)) {
// This move can be executed now.
self.moves.swap(i, i + j);
let m = &self.moves[i];
avail.take(m.rc, m.to);
avail.free(m.rc, m.from);
i += 1;
continue;
}
// When we get here, non of the `moves[i..]` can be executed. This means there are only
// cycles remaining. The cycles can be broken in a few ways:
//
// 1. Grab an available register and use it to break a cycle.
// 2. Move a value temporarily into a stack slot instead of a register.
// 3. Use swap instructions.
//
// TODO: So far we only implement 1.
// Pick an assignment with the largest possible width. This is more likely to break up
// a cycle than an assignment with fewer register units. For example, it may be
// necessary to move two arm32 S-registers out of the way before a D-register can move
// into place.
//
// We use `min_by_key` and `!` instead of `max_by_key` because it preserves the
// existing order of moves with the same width.
let j = self.moves[i..]
.iter()
.enumerate()
.min_by_key(|&(_, m)| !m.rc.width)
.unwrap()
.0;
self.moves.swap(i, i + j);
let m = self.moves[i].clone();
if let Some(reg) = avail.iter(m.rc).next() {
// Alter the move so it is guaranteed to be picked up when we loop. It is important
// that this move is scheduled immediately, otherwise we would have multiple moves
// of the same value, and they would not be commutable.
self.moves[i].to = reg;
// Append a fixup move so we end up in the right place. This move will be scheduled
// later. That's ok because it is the single remaining move of `m.value` after the
// next iteration.
self.moves
.push(Assignment {
value: m.value,
rc: m.rc,
from: reg,
to: m.to,
});
// TODO: What if allocating an extra register is not enough to break a cycle? This
// can happen when there are registers of different widths in a cycle. For ARM, we
// may have to move two S-registers out of the way before we can resolve a cycle
// involving a D-register.
} else {
panic!("Not enough registers in {} to schedule moves", m.rc);
}
}
// Spilling not implemented yet.
0
}
/// Borrow the scheduled set of register moves that was computed by `schedule_moves()`.
pub fn moves(&self) -> &[Assignment] {
&self.moves
}
}
#[cfg(test)]
mod tests {
use entity_map::EntityRef;
use ir::Value;
use isa::{TargetIsa, RegClass, RegUnit};
use regalloc::AllocatableSet;
use std::borrow::Borrow;
use super::{Solver, Assignment};
// Make an arm32 `TargetIsa`, if possible.
fn arm32() -> Option<Box<TargetIsa>> {
use settings;
use isa;
let shared_builder = settings::builder();
let shared_flags = settings::Flags::new(&shared_builder);
isa::lookup("arm32").map(|b| b.finish(shared_flags))
}
// Get a register class by name.
fn rc_by_name(isa: &TargetIsa, name: &str) -> RegClass {
isa.register_info()
.classes
.iter()
.find(|rc| rc.name == name)
.expect("Can't find named register class.")
}
// Construct a move.
fn mov(value: Value, rc: RegClass, from: RegUnit, to: RegUnit) -> Assignment {
Assignment {
value,
rc,
from,
to,
}
}
#[test]
fn simple_moves() {
let isa = arm32().expect("This test requires arm32 support");
let isa = isa.borrow();
let gpr = rc_by_name(isa, "GPR");
let r0 = gpr.unit(0);
let r1 = gpr.unit(1);
let r2 = gpr.unit(2);
let mut regs = AllocatableSet::new();
let mut solver = Solver::new();
let v10 = Value::new(10);
let v11 = Value::new(11);
// As simple as it gets: Value is in r1, we want r0.
regs.take(gpr, r1);
solver.reset(&regs);
solver.reassign_in(v10, gpr, r1, r0);
solver.inputs_done();
assert!(solver.quick_solve().is_ok());
assert_eq!(solver.schedule_moves(&regs), 0);
assert_eq!(solver.moves(), &[mov(v10, gpr, r1, r0)]);
// A bit harder: r0, r1 need to go in r1, r2.
regs.take(gpr, r0);
solver.reset(&regs);
solver.reassign_in(v10, gpr, r0, r1);
solver.reassign_in(v11, gpr, r1, r2);
solver.inputs_done();
assert!(solver.quick_solve().is_ok());
assert_eq!(solver.schedule_moves(&regs), 0);
assert_eq!(solver.moves(),
&[mov(v11, gpr, r1, r2), mov(v10, gpr, r0, r1)]);
// Swap r0 and r1 in three moves using r2 as a scratch.
solver.reset(&regs);
solver.reassign_in(v10, gpr, r0, r1);
solver.reassign_in(v11, gpr, r1, r0);
solver.inputs_done();
assert!(solver.quick_solve().is_ok());
assert_eq!(solver.schedule_moves(&regs), 0);
assert_eq!(solver.moves(),
&[mov(v10, gpr, r0, r2),
mov(v11, gpr, r1, r0),
mov(v10, gpr, r2, r1)]);
}
#[test]
fn harder_move_cycles() {
let isa = arm32().expect("This test requires arm32 support");
let isa = isa.borrow();
let s = rc_by_name(isa, "S");
let d = rc_by_name(isa, "D");
let d0 = d.unit(0);
let d1 = d.unit(1);
let d2 = d.unit(2);
let s0 = s.unit(0);
let s1 = s.unit(1);
let s2 = s.unit(2);
let s3 = s.unit(3);
let mut regs = AllocatableSet::new();
let mut solver = Solver::new();
let v10 = Value::new(10);
let v11 = Value::new(11);
let v12 = Value::new(12);
// Not a simple cycle: Swap d0 <-> (s2, s3)
regs.take(d, d0);
regs.take(d, d1);
solver.reset(&regs);
solver.reassign_in(v10, d, d0, d1);
solver.reassign_in(v11, s, s2, s0);
solver.reassign_in(v12, s, s3, s1);
solver.inputs_done();
assert!(solver.quick_solve().is_ok());
assert_eq!(solver.schedule_moves(&regs), 0);
assert_eq!(solver.moves(),
&[mov(v10, d, d0, d2),
mov(v11, s, s2, s0),
mov(v12, s, s3, s1),
mov(v10, d, d2, d1)]);
// Same problem in the other direction: Swap (s0, s1) <-> d1.
//
// If we divert the moves in order, we will need to allocate *two* temporary S registers. A
// trivial algorithm might assume that allocating a single temp is enough.
solver.reset(&regs);
solver.reassign_in(v11, s, s0, s2);
solver.reassign_in(v12, s, s1, s3);
solver.reassign_in(v10, d, d1, d0);
solver.inputs_done();
assert!(solver.quick_solve().is_ok());
assert_eq!(solver.schedule_moves(&regs), 0);
assert_eq!(solver.moves(),
&[mov(v10, d, d1, d2),
mov(v12, s, s1, s3),
mov(v11, s, s0, s2),
mov(v10, d, d2, d0)]);
}
}