Cranelift: implement redundant fill removal on tree-shaped CFG regions. Mozilla bug 1570584. (#906)

This commit is contained in:
julian-seward1
2019-08-25 19:37:34 +02:00
committed by GitHub
parent cc57e84cbd
commit b8fb52446c
19 changed files with 1262 additions and 24 deletions

View File

@@ -45,7 +45,7 @@
use crate::cursor::{Cursor, EncCursor};
use crate::dominator_tree::DominatorTree;
use crate::ir::{AbiParam, ArgumentLoc, InstBuilder, ValueDef};
use crate::ir::{Ebb, Function, Inst, Layout, SigRef, Value, ValueLoc};
use crate::ir::{Ebb, Function, Inst, InstructionData, Layout, Opcode, SigRef, Value, ValueLoc};
use crate::isa::{regs_overlap, RegClass, RegInfo, RegUnit};
use crate::isa::{ConstraintKind, EncInfo, OperandConstraint, RecipeConstraints, TargetIsa};
use crate::packed_option::PackedOption;
@@ -428,10 +428,26 @@ impl<'a> Context<'a> {
// Finally, we've fully programmed the constraint solver.
// We expect a quick solution in most cases.
let output_regs = self.solver.quick_solve(&regs.global).unwrap_or_else(|_| {
debug!("quick_solve failed for {}", self.solver);
self.iterate_solution(throughs, &regs.global, &mut replace_global_defines)
});
let is_reload = match &self.cur.func.dfg[inst] {
InstructionData::Unary {
opcode: Opcode::Fill,
arg: _,
} => true,
_ => false,
};
let output_regs = self
.solver
.quick_solve(&regs.global, is_reload)
.unwrap_or_else(|_| {
debug!("quick_solve failed for {}", self.solver);
self.iterate_solution(
throughs,
&regs.global,
&mut replace_global_defines,
is_reload,
)
});
// The solution and/or fixed input constraints may require us to shuffle the set of live
// registers around.
@@ -847,12 +863,13 @@ impl<'a> Context<'a> {
throughs: &[LiveValue],
global_regs: &RegisterSet,
replace_global_defines: &mut bool,
is_reload: bool,
) -> RegisterSet {
// Make sure `try_add_var()` below doesn't create a variable with too loose constraints.
self.program_complete_input_constraints();
loop {
match self.solver.real_solve(global_regs) {
match self.solver.real_solve(global_regs, is_reload) {
Ok(regs) => return regs,
Err(SolverError::Divert(rc)) => {
// Do we have any live-through `rc` registers that are not already variables?

View File

@@ -126,6 +126,7 @@ impl RegisterSet {
}
/// Iterator over available registers in a register class.
#[derive(Clone)]
pub struct RegSetIter {
regs: RegUnitMask,
}
@@ -161,6 +162,31 @@ impl Iterator for RegSetIter {
}
}
impl RegSetIter {
pub fn rnext(&mut self) -> Option<RegUnit> {
let num_words = self.regs.len();
let bits_per_word = 8 * size_of_val(&self.regs[0]);
// Find the last set bit in `self.regs`.
for i in 0..num_words {
let word_ix = num_words - 1 - i;
let word = &mut self.regs[word_ix];
if *word != 0 {
let lzeroes = word.leading_zeros() as usize;
// Clear that highest bit so we won't find it again.
*word &= !(1 << (bits_per_word - 1 - lzeroes));
return Some((word_ix * bits_per_word + bits_per_word - 1 - lzeroes) as RegUnit);
}
}
// All of `self.regs` is 0.
None
}
}
impl ExactSizeIterator for RegSetIter {}
/// Displaying an `RegisterSet` correctly requires the associated `RegInfo` from the target ISA.
@@ -261,6 +287,45 @@ mod tests {
classes: &[],
};
const RSI_1: RegSetIter = RegSetIter {
regs: [0x31415927, 0x27182818, 0x14141356],
};
const RSI_2: RegSetIter = RegSetIter {
regs: [0x00000000, 0x00000000, 0x00000000],
};
const RSI_3: RegSetIter = RegSetIter {
regs: [0xffffffff, 0xffffffff, 0xffffffff],
};
fn reverse_regset_iteration_work(rsi: &RegSetIter) {
// Check the reverse iterator by comparing its output with the forward iterator.
let rsi_f = (*rsi).clone();
let results_f = rsi_f.collect::<Vec<_>>();
let mut rsi_r = (*rsi).clone();
let mut results_r = Vec::<RegUnit>::new();
while let Some(r) = rsi_r.rnext() {
results_r.push(r);
}
let len_f = results_f.len();
let len_r = results_r.len();
assert_eq!(len_f, len_r);
for i in 0..len_f {
assert_eq!(results_f[i], results_r[len_f - 1 - i]);
}
}
#[test]
fn reverse_regset_iteration() {
reverse_regset_iteration_work(&RSI_1);
reverse_regset_iteration_work(&RSI_2);
reverse_regset_iteration_work(&RSI_3);
}
#[test]
fn put_and_take() {
let mut regs = RegisterSet::new();

View File

@@ -852,8 +852,12 @@ impl Solver {
/// always trivial.
///
/// Returns `Ok(regs)` if a solution was found.
pub fn quick_solve(&mut self, global_regs: &RegisterSet) -> Result<RegisterSet, SolverError> {
self.find_solution(global_regs)
pub fn quick_solve(
&mut self,
global_regs: &RegisterSet,
is_reload: bool,
) -> Result<RegisterSet, SolverError> {
self.find_solution(global_regs, is_reload)
}
/// Try harder to find a solution.
@@ -863,7 +867,11 @@ impl Solver {
/// This may return an error with a register class that has run out of registers. If registers
/// can be freed up in the starving class, this method can be called again after adding
/// variables for the freed registers.
pub fn real_solve(&mut self, global_regs: &RegisterSet) -> Result<RegisterSet, SolverError> {
pub fn real_solve(
&mut self,
global_regs: &RegisterSet,
is_reload: bool,
) -> Result<RegisterSet, SolverError> {
// Compute domain sizes for all the variables given the current register sets.
for v in &mut self.vars {
let d = v.iter(&self.regs_in, &self.regs_out, global_regs).len();
@@ -901,7 +909,7 @@ impl Solver {
});
debug!("real_solve for {}", self);
self.find_solution(global_regs)
self.find_solution(global_regs, is_reload)
}
/// Search for a solution with the current list of variables.
@@ -909,7 +917,11 @@ impl Solver {
/// If a solution was found, returns `Ok(regs)` with the set of available registers on the
/// output side after the solution. If no solution could be found, returns `Err(rc)` with the
/// constraint register class that needs more available registers.
fn find_solution(&mut self, global_regs: &RegisterSet) -> Result<RegisterSet, SolverError> {
fn find_solution(
&mut self,
global_regs: &RegisterSet,
is_reload: bool,
) -> Result<RegisterSet, SolverError> {
// Available registers on the input and output sides respectively.
let mut iregs = self.regs_in.clone();
let mut oregs = self.regs_out.clone();
@@ -917,7 +929,20 @@ impl Solver {
for v in &mut self.vars {
let rc = v.constraint;
let reg = match v.iter(&iregs, &oregs, &gregs).next() {
// Decide which register to assign. In order to try and keep registers holding
// reloaded values separate from all other registers to the extent possible, we choose
// the first available register in the normal case, but the last available one in the
// case of a reload. See "A side note on register choice heuristics" in
// src/redundant_reload_remover.rs for further details.
let mut reg_set_iter = v.iter(&iregs, &oregs, &gregs);
let maybe_reg = if is_reload {
reg_set_iter.rnext()
} else {
reg_set_iter.next()
};
let reg = match maybe_reg {
Some(reg) => reg,
None => {
// If `v` must avoid global interference, there is not point in requesting
@@ -1207,7 +1232,7 @@ mod tests {
solver.reset(&regs);
solver.reassign_in(v10, gpr, r1, r0);
solver.inputs_done();
assert!(solver.quick_solve(&gregs).is_ok());
assert!(solver.quick_solve(&gregs, false).is_ok());
assert_eq!(solver.schedule_moves(&regs), 0);
assert_eq!(solver.moves(), &[mov(v10, gpr, r1, r0)]);
@@ -1217,7 +1242,7 @@ mod tests {
solver.reassign_in(v10, gpr, r0, r1);
solver.reassign_in(v11, gpr, r1, r2);
solver.inputs_done();
assert!(solver.quick_solve(&gregs).is_ok());
assert!(solver.quick_solve(&gregs, false).is_ok());
assert_eq!(solver.schedule_moves(&regs), 0);
assert_eq!(
solver.moves(),
@@ -1229,7 +1254,7 @@ mod tests {
solver.reassign_in(v10, gpr, r0, r1);
solver.reassign_in(v11, gpr, r1, r0);
solver.inputs_done();
assert!(solver.quick_solve(&gregs).is_ok());
assert!(solver.quick_solve(&gregs, false).is_ok());
assert_eq!(solver.schedule_moves(&regs), 0);
assert_eq!(
solver.moves(),
@@ -1269,7 +1294,7 @@ mod tests {
solver.reassign_in(v11, s, s2, s0);
solver.reassign_in(v12, s, s3, s1);
solver.inputs_done();
assert!(solver.quick_solve(&gregs).is_ok());
assert!(solver.quick_solve(&gregs, false).is_ok());
assert_eq!(solver.schedule_moves(&regs), 0);
assert_eq!(
solver.moves(),
@@ -1290,7 +1315,7 @@ mod tests {
solver.reassign_in(v12, s, s1, s3);
solver.reassign_in(v10, d, d1, d0);
solver.inputs_done();
assert!(solver.quick_solve(&gregs).is_ok());
assert!(solver.quick_solve(&gregs, false).is_ok());
assert_eq!(solver.schedule_moves(&regs), 0);
assert_eq!(
solver.moves(),
@@ -1335,7 +1360,7 @@ mod tests {
solver.reassign_in(v11, gpr, r1, r2);
solver.reassign_in(v12, gpr, r2, r0);
solver.inputs_done();
assert!(solver.quick_solve(&gregs).is_ok());
assert!(solver.quick_solve(&gregs, false).is_ok());
assert_eq!(solver.schedule_moves(&regs), 1);
assert_eq!(
solver.moves(),
@@ -1359,7 +1384,7 @@ mod tests {
solver.reassign_in(v15, gpr, r5, r3);
solver.inputs_done();
assert!(solver.quick_solve(&gregs).is_ok());
assert!(solver.quick_solve(&gregs, false).is_ok());
// We resolve two cycles with one spill.
assert_eq!(solver.schedule_moves(&regs), 1);
assert_eq!(