Implement coloring::iterate_solution().

It can happen that the currently live registers are blocking a smaller
register class completely, so the only way of solving the allocation
problem is to turn some of the live-through registers into solver
variables.

When the quick_solve attempt fails, try to free up registers in the
critical register class by turning live-through values into solver
variables.
This commit is contained in:
Jakob Stoklund Olesen
2017-09-29 14:38:30 -07:00
parent 86e22e7de5
commit 51a6901a7f
5 changed files with 181 additions and 6 deletions

View File

@@ -0,0 +1,107 @@
test compile
set is_64bit
isa intel haswell
function #00000009(i64 [%rdi], f32 [%xmm0], f64 [%xmm1], i32 [%rsi], i32 [%rdx], i64 vmctx [%r14]) -> i64 [%rax] spiderwasm {
ebb0(v0: i64, v1: f32, v2: f64, v3: i32, v4: i32, v5: i64):
v32 = iconst.i32 0
v6 = bitcast.f32 v32
v7 = iconst.i64 0
v33 = iconst.i64 0
v8 = bitcast.f64 v33
v34 = iconst.i32 0xbe99_999a
v9 = bitcast.f32 v34
v10 = iconst.i32 40
v11 = iconst.i32 -7
v35 = iconst.i32 0x40b0_0000
v12 = bitcast.f32 v35
v13 = iconst.i64 6
v36 = iconst.i64 0x4020_0000_0000_0000
v14 = bitcast.f64 v36
v44 = iconst.i64 0
v37 = icmp slt v0, v44
brnz v37, ebb2
v38 = fcvt_from_sint.f64 v0
jump ebb3(v38)
ebb2:
v45 = iconst.i32 1
v39 = ushr.i64 v0, v45
v40 = band_imm.i64 v0, 1
v41 = bor v39, v40
v42 = fcvt_from_sint.f64 v41
v43 = fadd v42, v42
jump ebb3(v43)
ebb3(v15: f64):
v16 = fpromote.f64 v9
v46 = uextend.i64 v10
v17 = fcvt_from_sint.f64 v46
v18 = fcvt_from_sint.f64 v11
v19 = fpromote.f64 v12
v54 = iconst.i64 0
v47 = icmp.i64 slt v13, v54
brnz v47, ebb4
v48 = fcvt_from_sint.f64 v13
jump ebb5(v48)
ebb4:
v55 = iconst.i32 1
v49 = ushr.i64 v13, v55
v50 = band_imm.i64 v13, 1
v51 = bor v49, v50
v52 = fcvt_from_sint.f64 v51
v53 = fadd v52, v52
jump ebb5(v53)
ebb5(v20: f64):
v63 = iconst.i64 0
v56 = icmp.i64 slt v7, v63
brnz v56, ebb6
v57 = fcvt_from_sint.f64 v7
jump ebb7(v57)
ebb6:
v64 = iconst.i32 1
v58 = ushr.i64 v7, v64
v59 = band_imm.i64 v7, 1
v60 = bor v58, v59
v61 = fcvt_from_sint.f64 v60
v62 = fadd v61, v61
jump ebb7(v62)
ebb7(v21: f64):
v22 = fadd v21, v14
v23 = fadd.f64 v20, v22
v24 = fadd.f64 v19, v23
v25 = fadd.f64 v18, v24
v26 = fadd.f64 v17, v25
v27 = fadd.f64 v2, v26
v28 = fadd.f64 v16, v27
v29 = fadd.f64 v15, v28
v30 = x86_cvtt2si.i64 v29
v69 = iconst.i64 0x8000_0000_0000_0000
v65 = icmp ne v30, v69
brnz v65, ebb8
v66 = fcmp uno v29, v29
brz v66, ebb9
trap bad_toint
ebb9:
v70 = iconst.i64 0xc3e0_0000_0000_0000
v67 = bitcast.f64 v70
v68 = fcmp gt v67, v29
brz v68, ebb10
trap int_ovf
ebb10:
jump ebb8
ebb8:
jump ebb1(v30)
ebb1(v31: i64):
return v31
}

View File

@@ -119,6 +119,7 @@ enc_i32_i64(x86.udivmodx, r.div, 0xf7, rrr=6)
enc_i32_i64(base.copy, r.umr, 0x89)
enc_i32_i64(base.regmove, r.rmov, 0x89)
enc_flt(base.regmove.b1, r.rmov, 0x89)
# Immediate instructions with sign-extended 8-bit and 32-bit immediate.
for inst, rrr in [

View File

@@ -167,7 +167,7 @@ impl RegClassData {
}
/// Returns true if `other` is a subclass of this register class.
/// A register class is considerd to be a subclass of itself.
/// A register class is considered to be a subclass of itself.
pub fn has_subclass<RCI: Into<RegClassIndex>>(&self, other: RCI) -> bool {
self.subclasses & (1 << other.into().0) != 0
}
@@ -276,6 +276,11 @@ impl RegInfo {
pub fn rc(&self, idx: RegClassIndex) -> RegClass {
&self.classes[idx.index()]
}
/// Get the top-level register class containing `rc`.
pub fn toprc(&self, rc: RegClass) -> RegClass {
&self.classes[rc.toprc as usize]
}
}
/// Temporary object that holds enough information to print a register unit.

View File

@@ -367,9 +367,10 @@ impl<'a> Context<'a> {
// Finally, we've fully programmed the constraint solver.
// We expect a quick solution in most cases.
let mut output_regs = self.solver.quick_solve().unwrap_or_else(
|_| self.iterate_solution(),
);
let mut output_regs = self.solver.quick_solve().unwrap_or_else(|rc| {
dbg!("quick_solve needs more registers in {}", rc);
self.iterate_solution(throughs, locations)
});
// The solution and/or fixed input constraints may require us to shuffle the set of live
@@ -731,8 +732,52 @@ impl<'a> Context<'a> {
///
/// We may need to move more registers around before a solution is possible. Use an iterative
/// algorithm that adds one more variable until a solution can be found.
fn iterate_solution(&self) -> AllocatableSet {
unimplemented!();
fn iterate_solution(
&mut self,
throughs: &[LiveValue],
locations: &mut ValueLocations,
) -> AllocatableSet {
loop {
dbg!("real_solve for {} variables", self.solver.vars().len());
let rc = match self.solver.real_solve() {
Ok(regs) => return regs,
Err(rc) => rc,
};
// Do we have any live-through `rc` registers that are not already variables?
assert!(
self.try_add_var(rc, throughs, locations),
"Ran out of registers in {}",
rc
);
}
}
/// Try to add an `rc` variable to the solver from the `throughs` set.
fn try_add_var(
&mut self,
rc: RegClass,
throughs: &[LiveValue],
locations: &mut ValueLocations,
) -> bool {
dbg!("Trying to add a {} reg from {} values", rc, throughs.len());
for lv in throughs {
if let Affinity::Reg(rci) = lv.affinity {
let rc2 = self.reginfo.rc(rci);
let reg2 = self.divert.reg(lv.value, locations);
if rc.contains(reg2) && self.solver.can_add_var(lv.value, rc2, reg2) {
// The new variable gets to roam the whole top-level register class because
// it is not actually constrained by the instruction. We just want it out
// of the way.
let toprc = self.reginfo.toprc(rc2);
self.solver.add_var(lv.value, toprc, reg2, &self.reginfo);
return true;
}
}
}
false
}
/// Emit `regmove` instructions as needed to move the live registers into place before the

View File

@@ -590,6 +590,18 @@ impl Solver {
self.find_solution()
}
/// Try harder to find a solution.
///
/// Call this method after `quick_solve()` fails.
///
/// This may return an error with a register class that has run out of registers. If registers
/// can be freed up in the starving class, this method can be called again after adding
/// variables for the freed registers.
pub fn real_solve(&mut self) -> Result<AllocatableSet, RegClass> {
// TODO: Sort variables to assign smallest register classes first.
self.find_solution()
}
/// Search for a solution with the current list of variables.
///
/// If a solution was found, returns `Ok(regs)` with the set of available registers on the
@@ -623,6 +635,11 @@ impl Solver {
pub fn vars(&self) -> &[Variable] {
&self.vars
}
/// Check if `value` can be added as a variable to help find a solution.
pub fn can_add_var(&mut self, _value: Value, constraint: RegClass, from: RegUnit) -> bool {
!self.regs_in.is_avail(constraint, from)
}
}
/// Interface for working with parallel copies once a solution has been found.