From 51a6901a7fbccf99065c901181c1c9e43bed7099 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Fri, 29 Sep 2017 14:38:30 -0700 Subject: [PATCH] Implement coloring::iterate_solution(). It can happen that the currently live registers are blocking a smaller register class completely, so the only way of solving the allocation problem is to turn some of the live-through registers into solver variables. When the quick_solve attempt fails, try to free up registers in the critical register class by turning live-through values into solver variables. --- cranelift/filetests/regalloc/iterate.cton | 107 ++++++++++++++++++++++ lib/cretonne/meta/isa/intel/encodings.py | 1 + lib/cretonne/src/isa/registers.rs | 7 +- lib/cretonne/src/regalloc/coloring.rs | 55 ++++++++++- lib/cretonne/src/regalloc/solver.rs | 17 ++++ 5 files changed, 181 insertions(+), 6 deletions(-) create mode 100644 cranelift/filetests/regalloc/iterate.cton diff --git a/cranelift/filetests/regalloc/iterate.cton b/cranelift/filetests/regalloc/iterate.cton new file mode 100644 index 0000000000..cacd9a4318 --- /dev/null +++ b/cranelift/filetests/regalloc/iterate.cton @@ -0,0 +1,107 @@ +test compile +set is_64bit +isa intel haswell + +function #00000009(i64 [%rdi], f32 [%xmm0], f64 [%xmm1], i32 [%rsi], i32 [%rdx], i64 vmctx [%r14]) -> i64 [%rax] spiderwasm { +ebb0(v0: i64, v1: f32, v2: f64, v3: i32, v4: i32, v5: i64): + v32 = iconst.i32 0 + v6 = bitcast.f32 v32 + v7 = iconst.i64 0 + v33 = iconst.i64 0 + v8 = bitcast.f64 v33 + v34 = iconst.i32 0xbe99_999a + v9 = bitcast.f32 v34 + v10 = iconst.i32 40 + v11 = iconst.i32 -7 + v35 = iconst.i32 0x40b0_0000 + v12 = bitcast.f32 v35 + v13 = iconst.i64 6 + v36 = iconst.i64 0x4020_0000_0000_0000 + v14 = bitcast.f64 v36 + v44 = iconst.i64 0 + v37 = icmp slt v0, v44 + brnz v37, ebb2 + v38 = fcvt_from_sint.f64 v0 + jump ebb3(v38) + +ebb2: + v45 = iconst.i32 1 + v39 = ushr.i64 v0, v45 + v40 = band_imm.i64 v0, 1 + v41 = bor v39, v40 + v42 = fcvt_from_sint.f64 v41 + v43 = fadd v42, v42 + jump ebb3(v43) + +ebb3(v15: f64): + v16 = fpromote.f64 v9 + v46 = uextend.i64 v10 + v17 = fcvt_from_sint.f64 v46 + v18 = fcvt_from_sint.f64 v11 + v19 = fpromote.f64 v12 + v54 = iconst.i64 0 + v47 = icmp.i64 slt v13, v54 + brnz v47, ebb4 + v48 = fcvt_from_sint.f64 v13 + jump ebb5(v48) + +ebb4: + v55 = iconst.i32 1 + v49 = ushr.i64 v13, v55 + v50 = band_imm.i64 v13, 1 + v51 = bor v49, v50 + v52 = fcvt_from_sint.f64 v51 + v53 = fadd v52, v52 + jump ebb5(v53) + +ebb5(v20: f64): + v63 = iconst.i64 0 + v56 = icmp.i64 slt v7, v63 + brnz v56, ebb6 + v57 = fcvt_from_sint.f64 v7 + jump ebb7(v57) + +ebb6: + v64 = iconst.i32 1 + v58 = ushr.i64 v7, v64 + v59 = band_imm.i64 v7, 1 + v60 = bor v58, v59 + v61 = fcvt_from_sint.f64 v60 + v62 = fadd v61, v61 + jump ebb7(v62) + +ebb7(v21: f64): + v22 = fadd v21, v14 + v23 = fadd.f64 v20, v22 + v24 = fadd.f64 v19, v23 + v25 = fadd.f64 v18, v24 + v26 = fadd.f64 v17, v25 + v27 = fadd.f64 v2, v26 + v28 = fadd.f64 v16, v27 + v29 = fadd.f64 v15, v28 + v30 = x86_cvtt2si.i64 v29 + v69 = iconst.i64 0x8000_0000_0000_0000 + v65 = icmp ne v30, v69 + brnz v65, ebb8 + v66 = fcmp uno v29, v29 + brz v66, ebb9 + trap bad_toint + +ebb9: + v70 = iconst.i64 0xc3e0_0000_0000_0000 + v67 = bitcast.f64 v70 + v68 = fcmp gt v67, v29 + brz v68, ebb10 + trap int_ovf + +ebb10: + jump ebb8 + +ebb8: + jump ebb1(v30) + +ebb1(v31: i64): + return v31 +} + + diff --git a/lib/cretonne/meta/isa/intel/encodings.py b/lib/cretonne/meta/isa/intel/encodings.py index 9dee014771..eb4cee439e 100644 --- a/lib/cretonne/meta/isa/intel/encodings.py +++ b/lib/cretonne/meta/isa/intel/encodings.py @@ -119,6 +119,7 @@ enc_i32_i64(x86.udivmodx, r.div, 0xf7, rrr=6) enc_i32_i64(base.copy, r.umr, 0x89) enc_i32_i64(base.regmove, r.rmov, 0x89) +enc_flt(base.regmove.b1, r.rmov, 0x89) # Immediate instructions with sign-extended 8-bit and 32-bit immediate. for inst, rrr in [ diff --git a/lib/cretonne/src/isa/registers.rs b/lib/cretonne/src/isa/registers.rs index a43d5cfd62..a1de76d97e 100644 --- a/lib/cretonne/src/isa/registers.rs +++ b/lib/cretonne/src/isa/registers.rs @@ -167,7 +167,7 @@ impl RegClassData { } /// Returns true if `other` is a subclass of this register class. - /// A register class is considerd to be a subclass of itself. + /// A register class is considered to be a subclass of itself. pub fn has_subclass>(&self, other: RCI) -> bool { self.subclasses & (1 << other.into().0) != 0 } @@ -276,6 +276,11 @@ impl RegInfo { pub fn rc(&self, idx: RegClassIndex) -> RegClass { &self.classes[idx.index()] } + + /// Get the top-level register class containing `rc`. + pub fn toprc(&self, rc: RegClass) -> RegClass { + &self.classes[rc.toprc as usize] + } } /// Temporary object that holds enough information to print a register unit. diff --git a/lib/cretonne/src/regalloc/coloring.rs b/lib/cretonne/src/regalloc/coloring.rs index e46c95c101..ce6e22f756 100644 --- a/lib/cretonne/src/regalloc/coloring.rs +++ b/lib/cretonne/src/regalloc/coloring.rs @@ -367,9 +367,10 @@ impl<'a> Context<'a> { // Finally, we've fully programmed the constraint solver. // We expect a quick solution in most cases. - let mut output_regs = self.solver.quick_solve().unwrap_or_else( - |_| self.iterate_solution(), - ); + let mut output_regs = self.solver.quick_solve().unwrap_or_else(|rc| { + dbg!("quick_solve needs more registers in {}", rc); + self.iterate_solution(throughs, locations) + }); // The solution and/or fixed input constraints may require us to shuffle the set of live @@ -731,8 +732,52 @@ impl<'a> Context<'a> { /// /// We may need to move more registers around before a solution is possible. Use an iterative /// algorithm that adds one more variable until a solution can be found. - fn iterate_solution(&self) -> AllocatableSet { - unimplemented!(); + fn iterate_solution( + &mut self, + throughs: &[LiveValue], + locations: &mut ValueLocations, + ) -> AllocatableSet { + loop { + dbg!("real_solve for {} variables", self.solver.vars().len()); + let rc = match self.solver.real_solve() { + Ok(regs) => return regs, + Err(rc) => rc, + }; + + // Do we have any live-through `rc` registers that are not already variables? + assert!( + self.try_add_var(rc, throughs, locations), + "Ran out of registers in {}", + rc + ); + } + } + + /// Try to add an `rc` variable to the solver from the `throughs` set. + fn try_add_var( + &mut self, + rc: RegClass, + throughs: &[LiveValue], + locations: &mut ValueLocations, + ) -> bool { + dbg!("Trying to add a {} reg from {} values", rc, throughs.len()); + + for lv in throughs { + if let Affinity::Reg(rci) = lv.affinity { + let rc2 = self.reginfo.rc(rci); + let reg2 = self.divert.reg(lv.value, locations); + if rc.contains(reg2) && self.solver.can_add_var(lv.value, rc2, reg2) { + // The new variable gets to roam the whole top-level register class because + // it is not actually constrained by the instruction. We just want it out + // of the way. + let toprc = self.reginfo.toprc(rc2); + self.solver.add_var(lv.value, toprc, reg2, &self.reginfo); + return true; + } + } + } + + false } /// Emit `regmove` instructions as needed to move the live registers into place before the diff --git a/lib/cretonne/src/regalloc/solver.rs b/lib/cretonne/src/regalloc/solver.rs index c9215f4ebb..bfa6d22eb3 100644 --- a/lib/cretonne/src/regalloc/solver.rs +++ b/lib/cretonne/src/regalloc/solver.rs @@ -590,6 +590,18 @@ impl Solver { self.find_solution() } + /// Try harder to find a solution. + /// + /// Call this method after `quick_solve()` fails. + /// + /// This may return an error with a register class that has run out of registers. If registers + /// can be freed up in the starving class, this method can be called again after adding + /// variables for the freed registers. + pub fn real_solve(&mut self) -> Result { + // TODO: Sort variables to assign smallest register classes first. + self.find_solution() + } + /// Search for a solution with the current list of variables. /// /// If a solution was found, returns `Ok(regs)` with the set of available registers on the @@ -623,6 +635,11 @@ impl Solver { pub fn vars(&self) -> &[Variable] { &self.vars } + + /// Check if `value` can be added as a variable to help find a solution. + pub fn can_add_var(&mut self, _value: Value, constraint: RegClass, from: RegUnit) -> bool { + !self.regs_in.is_avail(constraint, from) + } } /// Interface for working with parallel copies once a solution has been found.