Implement coloring::iterate_solution().
It can happen that the currently live registers are blocking a smaller register class completely, so the only way of solving the allocation problem is to turn some of the live-through registers into solver variables. When the quick_solve attempt fails, try to free up registers in the critical register class by turning live-through values into solver variables.
This commit is contained in:
107
cranelift/filetests/regalloc/iterate.cton
Normal file
107
cranelift/filetests/regalloc/iterate.cton
Normal file
@@ -0,0 +1,107 @@
|
|||||||
|
test compile
|
||||||
|
set is_64bit
|
||||||
|
isa intel haswell
|
||||||
|
|
||||||
|
function #00000009(i64 [%rdi], f32 [%xmm0], f64 [%xmm1], i32 [%rsi], i32 [%rdx], i64 vmctx [%r14]) -> i64 [%rax] spiderwasm {
|
||||||
|
ebb0(v0: i64, v1: f32, v2: f64, v3: i32, v4: i32, v5: i64):
|
||||||
|
v32 = iconst.i32 0
|
||||||
|
v6 = bitcast.f32 v32
|
||||||
|
v7 = iconst.i64 0
|
||||||
|
v33 = iconst.i64 0
|
||||||
|
v8 = bitcast.f64 v33
|
||||||
|
v34 = iconst.i32 0xbe99_999a
|
||||||
|
v9 = bitcast.f32 v34
|
||||||
|
v10 = iconst.i32 40
|
||||||
|
v11 = iconst.i32 -7
|
||||||
|
v35 = iconst.i32 0x40b0_0000
|
||||||
|
v12 = bitcast.f32 v35
|
||||||
|
v13 = iconst.i64 6
|
||||||
|
v36 = iconst.i64 0x4020_0000_0000_0000
|
||||||
|
v14 = bitcast.f64 v36
|
||||||
|
v44 = iconst.i64 0
|
||||||
|
v37 = icmp slt v0, v44
|
||||||
|
brnz v37, ebb2
|
||||||
|
v38 = fcvt_from_sint.f64 v0
|
||||||
|
jump ebb3(v38)
|
||||||
|
|
||||||
|
ebb2:
|
||||||
|
v45 = iconst.i32 1
|
||||||
|
v39 = ushr.i64 v0, v45
|
||||||
|
v40 = band_imm.i64 v0, 1
|
||||||
|
v41 = bor v39, v40
|
||||||
|
v42 = fcvt_from_sint.f64 v41
|
||||||
|
v43 = fadd v42, v42
|
||||||
|
jump ebb3(v43)
|
||||||
|
|
||||||
|
ebb3(v15: f64):
|
||||||
|
v16 = fpromote.f64 v9
|
||||||
|
v46 = uextend.i64 v10
|
||||||
|
v17 = fcvt_from_sint.f64 v46
|
||||||
|
v18 = fcvt_from_sint.f64 v11
|
||||||
|
v19 = fpromote.f64 v12
|
||||||
|
v54 = iconst.i64 0
|
||||||
|
v47 = icmp.i64 slt v13, v54
|
||||||
|
brnz v47, ebb4
|
||||||
|
v48 = fcvt_from_sint.f64 v13
|
||||||
|
jump ebb5(v48)
|
||||||
|
|
||||||
|
ebb4:
|
||||||
|
v55 = iconst.i32 1
|
||||||
|
v49 = ushr.i64 v13, v55
|
||||||
|
v50 = band_imm.i64 v13, 1
|
||||||
|
v51 = bor v49, v50
|
||||||
|
v52 = fcvt_from_sint.f64 v51
|
||||||
|
v53 = fadd v52, v52
|
||||||
|
jump ebb5(v53)
|
||||||
|
|
||||||
|
ebb5(v20: f64):
|
||||||
|
v63 = iconst.i64 0
|
||||||
|
v56 = icmp.i64 slt v7, v63
|
||||||
|
brnz v56, ebb6
|
||||||
|
v57 = fcvt_from_sint.f64 v7
|
||||||
|
jump ebb7(v57)
|
||||||
|
|
||||||
|
ebb6:
|
||||||
|
v64 = iconst.i32 1
|
||||||
|
v58 = ushr.i64 v7, v64
|
||||||
|
v59 = band_imm.i64 v7, 1
|
||||||
|
v60 = bor v58, v59
|
||||||
|
v61 = fcvt_from_sint.f64 v60
|
||||||
|
v62 = fadd v61, v61
|
||||||
|
jump ebb7(v62)
|
||||||
|
|
||||||
|
ebb7(v21: f64):
|
||||||
|
v22 = fadd v21, v14
|
||||||
|
v23 = fadd.f64 v20, v22
|
||||||
|
v24 = fadd.f64 v19, v23
|
||||||
|
v25 = fadd.f64 v18, v24
|
||||||
|
v26 = fadd.f64 v17, v25
|
||||||
|
v27 = fadd.f64 v2, v26
|
||||||
|
v28 = fadd.f64 v16, v27
|
||||||
|
v29 = fadd.f64 v15, v28
|
||||||
|
v30 = x86_cvtt2si.i64 v29
|
||||||
|
v69 = iconst.i64 0x8000_0000_0000_0000
|
||||||
|
v65 = icmp ne v30, v69
|
||||||
|
brnz v65, ebb8
|
||||||
|
v66 = fcmp uno v29, v29
|
||||||
|
brz v66, ebb9
|
||||||
|
trap bad_toint
|
||||||
|
|
||||||
|
ebb9:
|
||||||
|
v70 = iconst.i64 0xc3e0_0000_0000_0000
|
||||||
|
v67 = bitcast.f64 v70
|
||||||
|
v68 = fcmp gt v67, v29
|
||||||
|
brz v68, ebb10
|
||||||
|
trap int_ovf
|
||||||
|
|
||||||
|
ebb10:
|
||||||
|
jump ebb8
|
||||||
|
|
||||||
|
ebb8:
|
||||||
|
jump ebb1(v30)
|
||||||
|
|
||||||
|
ebb1(v31: i64):
|
||||||
|
return v31
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -119,6 +119,7 @@ enc_i32_i64(x86.udivmodx, r.div, 0xf7, rrr=6)
|
|||||||
|
|
||||||
enc_i32_i64(base.copy, r.umr, 0x89)
|
enc_i32_i64(base.copy, r.umr, 0x89)
|
||||||
enc_i32_i64(base.regmove, r.rmov, 0x89)
|
enc_i32_i64(base.regmove, r.rmov, 0x89)
|
||||||
|
enc_flt(base.regmove.b1, r.rmov, 0x89)
|
||||||
|
|
||||||
# Immediate instructions with sign-extended 8-bit and 32-bit immediate.
|
# Immediate instructions with sign-extended 8-bit and 32-bit immediate.
|
||||||
for inst, rrr in [
|
for inst, rrr in [
|
||||||
|
|||||||
@@ -167,7 +167,7 @@ impl RegClassData {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Returns true if `other` is a subclass of this register class.
|
/// Returns true if `other` is a subclass of this register class.
|
||||||
/// A register class is considerd to be a subclass of itself.
|
/// A register class is considered to be a subclass of itself.
|
||||||
pub fn has_subclass<RCI: Into<RegClassIndex>>(&self, other: RCI) -> bool {
|
pub fn has_subclass<RCI: Into<RegClassIndex>>(&self, other: RCI) -> bool {
|
||||||
self.subclasses & (1 << other.into().0) != 0
|
self.subclasses & (1 << other.into().0) != 0
|
||||||
}
|
}
|
||||||
@@ -276,6 +276,11 @@ impl RegInfo {
|
|||||||
pub fn rc(&self, idx: RegClassIndex) -> RegClass {
|
pub fn rc(&self, idx: RegClassIndex) -> RegClass {
|
||||||
&self.classes[idx.index()]
|
&self.classes[idx.index()]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Get the top-level register class containing `rc`.
|
||||||
|
pub fn toprc(&self, rc: RegClass) -> RegClass {
|
||||||
|
&self.classes[rc.toprc as usize]
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Temporary object that holds enough information to print a register unit.
|
/// Temporary object that holds enough information to print a register unit.
|
||||||
|
|||||||
@@ -367,9 +367,10 @@ impl<'a> Context<'a> {
|
|||||||
|
|
||||||
// Finally, we've fully programmed the constraint solver.
|
// Finally, we've fully programmed the constraint solver.
|
||||||
// We expect a quick solution in most cases.
|
// We expect a quick solution in most cases.
|
||||||
let mut output_regs = self.solver.quick_solve().unwrap_or_else(
|
let mut output_regs = self.solver.quick_solve().unwrap_or_else(|rc| {
|
||||||
|_| self.iterate_solution(),
|
dbg!("quick_solve needs more registers in {}", rc);
|
||||||
);
|
self.iterate_solution(throughs, locations)
|
||||||
|
});
|
||||||
|
|
||||||
|
|
||||||
// The solution and/or fixed input constraints may require us to shuffle the set of live
|
// The solution and/or fixed input constraints may require us to shuffle the set of live
|
||||||
@@ -731,8 +732,52 @@ impl<'a> Context<'a> {
|
|||||||
///
|
///
|
||||||
/// We may need to move more registers around before a solution is possible. Use an iterative
|
/// We may need to move more registers around before a solution is possible. Use an iterative
|
||||||
/// algorithm that adds one more variable until a solution can be found.
|
/// algorithm that adds one more variable until a solution can be found.
|
||||||
fn iterate_solution(&self) -> AllocatableSet {
|
fn iterate_solution(
|
||||||
unimplemented!();
|
&mut self,
|
||||||
|
throughs: &[LiveValue],
|
||||||
|
locations: &mut ValueLocations,
|
||||||
|
) -> AllocatableSet {
|
||||||
|
loop {
|
||||||
|
dbg!("real_solve for {} variables", self.solver.vars().len());
|
||||||
|
let rc = match self.solver.real_solve() {
|
||||||
|
Ok(regs) => return regs,
|
||||||
|
Err(rc) => rc,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Do we have any live-through `rc` registers that are not already variables?
|
||||||
|
assert!(
|
||||||
|
self.try_add_var(rc, throughs, locations),
|
||||||
|
"Ran out of registers in {}",
|
||||||
|
rc
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Try to add an `rc` variable to the solver from the `throughs` set.
|
||||||
|
fn try_add_var(
|
||||||
|
&mut self,
|
||||||
|
rc: RegClass,
|
||||||
|
throughs: &[LiveValue],
|
||||||
|
locations: &mut ValueLocations,
|
||||||
|
) -> bool {
|
||||||
|
dbg!("Trying to add a {} reg from {} values", rc, throughs.len());
|
||||||
|
|
||||||
|
for lv in throughs {
|
||||||
|
if let Affinity::Reg(rci) = lv.affinity {
|
||||||
|
let rc2 = self.reginfo.rc(rci);
|
||||||
|
let reg2 = self.divert.reg(lv.value, locations);
|
||||||
|
if rc.contains(reg2) && self.solver.can_add_var(lv.value, rc2, reg2) {
|
||||||
|
// The new variable gets to roam the whole top-level register class because
|
||||||
|
// it is not actually constrained by the instruction. We just want it out
|
||||||
|
// of the way.
|
||||||
|
let toprc = self.reginfo.toprc(rc2);
|
||||||
|
self.solver.add_var(lv.value, toprc, reg2, &self.reginfo);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
false
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Emit `regmove` instructions as needed to move the live registers into place before the
|
/// Emit `regmove` instructions as needed to move the live registers into place before the
|
||||||
|
|||||||
@@ -590,6 +590,18 @@ impl Solver {
|
|||||||
self.find_solution()
|
self.find_solution()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Try harder to find a solution.
|
||||||
|
///
|
||||||
|
/// Call this method after `quick_solve()` fails.
|
||||||
|
///
|
||||||
|
/// This may return an error with a register class that has run out of registers. If registers
|
||||||
|
/// can be freed up in the starving class, this method can be called again after adding
|
||||||
|
/// variables for the freed registers.
|
||||||
|
pub fn real_solve(&mut self) -> Result<AllocatableSet, RegClass> {
|
||||||
|
// TODO: Sort variables to assign smallest register classes first.
|
||||||
|
self.find_solution()
|
||||||
|
}
|
||||||
|
|
||||||
/// Search for a solution with the current list of variables.
|
/// Search for a solution with the current list of variables.
|
||||||
///
|
///
|
||||||
/// If a solution was found, returns `Ok(regs)` with the set of available registers on the
|
/// If a solution was found, returns `Ok(regs)` with the set of available registers on the
|
||||||
@@ -623,6 +635,11 @@ impl Solver {
|
|||||||
pub fn vars(&self) -> &[Variable] {
|
pub fn vars(&self) -> &[Variable] {
|
||||||
&self.vars
|
&self.vars
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Check if `value` can be added as a variable to help find a solution.
|
||||||
|
pub fn can_add_var(&mut self, _value: Value, constraint: RegClass, from: RegUnit) -> bool {
|
||||||
|
!self.regs_in.is_avail(constraint, from)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Interface for working with parallel copies once a solution has been found.
|
/// Interface for working with parallel copies once a solution has been found.
|
||||||
|
|||||||
Reference in New Issue
Block a user