Merge pull request #1846 from julian-seward1/better-phis

Rewrite `lower_edge` to produce better phi-translations:
This commit is contained in:
Chris Fallin
2020-06-09 09:56:52 -07:00
committed by GitHub

View File

@@ -412,112 +412,100 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
fn lower_edge(&mut self, pred: Block, inst: Inst, succ: Block) -> CodegenResult<()> { fn lower_edge(&mut self, pred: Block, inst: Inst, succ: Block) -> CodegenResult<()> {
debug!("lower_edge: pred {} succ {}", pred, succ); debug!("lower_edge: pred {} succ {}", pred, succ);
let mut src_regs: SmallVec<[Option<Reg>; 16]> = SmallVec::new(); let num_args = self.f.dfg.block_params(succ).len();
let mut src_consts: SmallVec<[Option<u64>; 16]> = SmallVec::new(); debug_assert!(num_args == self.f.dfg.inst_variable_args(inst).len());
let mut dst_regs: SmallVec<[Writable<Reg>; 16]> = SmallVec::new();
fn overlap(a: &[Option<Reg>], b: &[Writable<Reg>]) -> bool { // Most blocks have no params, so skip all the hoop-jumping below and make an early exit.
let mut set = FxHashSet::default(); if num_args == 0 {
for &maybe_reg in a { return Ok(());
if let Some(r) = maybe_reg {
set.insert(r);
}
}
for &reg in b {
if set.contains(&reg.to_reg()) {
return true;
}
}
false
} }
// Create a temporary for each block parameter. // Make up two vectors of info:
let phi_classes: SmallVec<[Type; 16]> = self //
// * one for dsts which are to be assigned constants. We'll deal with those second, so
// as to minimise live ranges.
//
// * one for dsts whose sources are non-constants.
let mut const_bundles = SmallVec::<[(Type, Writable<Reg>, u64); 16]>::new();
let mut var_bundles = SmallVec::<[(Type, Writable<Reg>, Reg); 16]>::new();
let mut i = 0;
for (dst_val, src_val) in self
.f .f
.dfg .dfg
.block_params(succ) .block_params(succ)
.iter() .iter()
.map(|p| self.f.dfg.value_type(*p)) .zip(self.f.dfg.inst_variable_args(inst).iter())
.collect(); {
let src_val = self.f.dfg.resolve_aliases(*src_val);
let ty = self.f.dfg.value_type(src_val);
debug_assert!(ty == self.f.dfg.value_type(*dst_val));
let dst_reg = self.value_regs[*dst_val];
let input = self.get_input_for_val(inst, src_val);
debug!("jump arg {} is {}, reg {:?}", i, src_val, input.reg);
i += 1;
// Create all of the phi uses (reads) from jump args to temps.
// Round up all the source and destination regs
for (i, arg) in self.f.dfg.inst_variable_args(inst).iter().enumerate() {
let arg = self.f.dfg.resolve_aliases(*arg);
let input = self.get_input_for_val(inst, arg);
debug!("jump arg {} is {}, reg {:?}", i, arg, input.reg);
if let Some(c) = input.constant { if let Some(c) = input.constant {
src_consts.push(Some(c)); const_bundles.push((ty, Writable::from_reg(dst_reg), c));
src_regs.push(None);
} else { } else {
self.use_input_reg(input); self.use_input_reg(input);
src_regs.push(Some(input.reg)); let src_reg = input.reg;
src_consts.push(None); // Skip self-assignments. Not only are they pointless, they falsely trigger the
// overlap-check below and hence can cause a lot of unnecessary copying through
// temporaries.
if dst_reg != src_reg {
var_bundles.push((ty, Writable::from_reg(dst_reg), src_reg));
}
} }
} }
for (i, param) in self.f.dfg.block_params(succ).iter().enumerate() {
debug!("bb arg {} is {}", i, param);
dst_regs.push(Writable::from_reg(self.value_regs[*param]));
}
debug_assert!(src_regs.len() == dst_regs.len());
debug_assert!(src_consts.len() == dst_regs.len());
debug_assert!(phi_classes.len() == dst_regs.len());
debug!(
"src_regs = {:?} src_consts = {:?} dst_regs = {:?}",
src_regs, src_consts, dst_regs
);
// If, as is mostly the case, the source and destination register // Deal first with the moves whose sources are variables.
// sets are non overlapping, then we can copy directly, so as to
// save the register allocator work. // FIXME: use regalloc.rs' SparseSetU here. This would avoid all heap allocation
if !overlap(&src_regs[..], &dst_regs[..]) { // for cases of up to circa 16 args. Currently not possible because regalloc.rs
for i in 0..dst_regs.len() { // does not export it.
let src_reg = src_regs[i]; let mut src_reg_set = FxHashSet::<Reg>::default();
let src_const = src_consts[i]; for (_, _, src_reg) in &var_bundles {
let dst_reg = dst_regs[i]; src_reg_set.insert(*src_reg);
let ty = phi_classes[i]; }
if let Some(r) = src_reg { let mut overlaps = false;
self.emit(I::gen_move(dst_reg, r, ty)); for (_, dst_reg, _) in &var_bundles {
} else { if src_reg_set.contains(&dst_reg.to_reg()) {
// Generate constants fresh in phi-edge to avoid long overlaps = true;
// live-ranges. Note that these are also excluded from the break;
// overlap check, which increases the chance that we don't }
// have to do a two-stage copy. }
for inst in I::gen_constant(dst_reg, src_const.unwrap(), ty).into_iter() {
self.emit(inst); // If, as is mostly the case, the source and destination register sets are non
} // overlapping, then we can copy directly, so as to save the register allocator work.
} if !overlaps {
for (ty, dst_reg, src_reg) in &var_bundles {
self.emit(I::gen_move(*dst_reg, *src_reg, *ty));
} }
} else { } else {
// There's some overlap, so play safe and copy via temps. // There's some overlap, so play safe and copy via temps.
let mut tmp_regs: SmallVec<[Writable<Reg>; 16]> = SmallVec::new(); let mut tmp_regs = SmallVec::<[Writable<Reg>; 16]>::new();
for &ty in &phi_classes { for (ty, _, _) in &var_bundles {
tmp_regs.push(self.alloc_tmp(I::rc_for_type(ty)?, ty)); tmp_regs.push(self.alloc_tmp(I::rc_for_type(*ty)?, *ty));
} }
for ((ty, _, src_reg), tmp_reg) in var_bundles.iter().zip(tmp_regs.iter()) {
debug!("phi_temps = {:?}", tmp_regs); self.emit(I::gen_move(*tmp_reg, *src_reg, *ty));
debug_assert!(tmp_regs.len() == src_regs.len());
for i in 0..dst_regs.len() {
let src_reg = src_regs[i];
let tmp_reg = tmp_regs[i];
let ty = phi_classes[i];
let src_const = src_consts[i];
if let Some(src_reg) = src_reg {
self.emit(I::gen_move(tmp_reg, src_reg, ty));
} else {
for inst in I::gen_constant(tmp_reg, src_const.unwrap(), ty).into_iter() {
self.emit(inst);
}
}
} }
for i in 0..dst_regs.len() { for ((ty, dst_reg, _), tmp_reg) in var_bundles.iter().zip(tmp_regs.iter()) {
let tmp_reg = tmp_regs[i].to_reg(); self.emit(I::gen_move(*dst_reg, (*tmp_reg).to_reg(), *ty));
let dst_reg = dst_regs[i];
let ty = phi_classes[i];
self.emit(I::gen_move(dst_reg, tmp_reg, ty));
} }
} }
// Now, finally, deal with the moves whose sources are constants.
for (ty, dst_reg, const_u64) in &const_bundles {
for inst in I::gen_constant(*dst_reg, *const_u64, *ty).into_iter() {
self.emit(inst);
}
}
Ok(()) Ok(())
} }