Switch Cranelift over to regalloc2. (#3989)
This PR switches Cranelift over to the new register allocator, regalloc2. See [this document](https://gist.github.com/cfallin/08553421a91f150254fe878f67301801) for a summary of the design changes. This switchover has implications for core VCode/MachInst types and the lowering pass. Overall, this change brings improvements to both compile time and speed of generated code (runtime), as reported in #3942: ``` Benchmark Compilation (wallclock) Execution (wallclock) blake3-scalar 25% faster 28% faster blake3-simd no diff no diff meshoptimizer 19% faster 17% faster pulldown-cmark 17% faster no diff bz2 15% faster no diff SpiderMonkey, 21% faster 2% faster fib(30) clang.wasm 42% faster N/A ```
This commit is contained in:
@@ -1,8 +1,7 @@
|
||||
use crate::ir::{types, Inst, Value, ValueList};
|
||||
use crate::machinst::{get_output_reg, InsnOutput, LowerCtx, MachInst, RegRenamer};
|
||||
use crate::machinst::{get_output_reg, InsnOutput, LowerCtx, Reg, Writable};
|
||||
use alloc::boxed::Box;
|
||||
use alloc::vec::Vec;
|
||||
use regalloc::{Reg, Writable};
|
||||
use smallvec::SmallVec;
|
||||
use std::cell::Cell;
|
||||
|
||||
@@ -107,7 +106,8 @@ macro_rules! isle_prelude_methods {
|
||||
|
||||
#[inline]
|
||||
fn invalid_reg(&mut self) -> Reg {
|
||||
Reg::invalid()
|
||||
use crate::machinst::valueregs::InvalidSentinel;
|
||||
Reg::invalid_sentinel()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
@@ -467,7 +467,6 @@ where
|
||||
pub lower_ctx: &'a mut C,
|
||||
pub flags: &'a F,
|
||||
pub isa_flags: &'a I,
|
||||
pub emitted_insts: SmallVec<[(C::I, bool); N]>,
|
||||
}
|
||||
|
||||
/// Shared lowering code amongst all backends for doing ISLE-based lowering.
|
||||
@@ -482,7 +481,6 @@ pub(crate) fn lower_common<C, F, I, IF, const N: usize>(
|
||||
outputs: &[InsnOutput],
|
||||
inst: Inst,
|
||||
isle_lower: IF,
|
||||
map_regs: fn(&mut C::I, &RegRenamer),
|
||||
) -> Result<(), ()>
|
||||
where
|
||||
C: LowerCtx,
|
||||
@@ -495,7 +493,6 @@ where
|
||||
lower_ctx,
|
||||
flags,
|
||||
isa_flags,
|
||||
emitted_insts: SmallVec::new(),
|
||||
};
|
||||
|
||||
let temp_regs = isle_lower(&mut isle_ctx, inst).ok_or(())?;
|
||||
@@ -514,10 +511,15 @@ where
|
||||
}
|
||||
|
||||
// The ISLE generated code emits its own registers to define the
|
||||
// instruction's lowered values in. We rename those registers to the
|
||||
// registers they were assigned when their value was used as an operand in
|
||||
// earlier lowerings.
|
||||
let mut renamer = RegRenamer::default();
|
||||
// instruction's lowered values in. However, other instructions
|
||||
// that use this SSA value will be lowered assuming that the value
|
||||
// is generated into a pre-assigned, different, register.
|
||||
//
|
||||
// To connect the two, we set up "aliases" in the VCodeBuilder
|
||||
// that apply when it is building the Operand table for the
|
||||
// regalloc to use. These aliases effectively rewrite any use of
|
||||
// the pre-assigned register to the register that was returned by
|
||||
// the ISLE lowering logic.
|
||||
for i in 0..outputs.len() {
|
||||
let regs = temp_regs[i];
|
||||
let dsts = get_output_reg(isle_ctx.lower_ctx, outputs[i]);
|
||||
@@ -528,41 +530,11 @@ where
|
||||
// Flags values do not occupy any registers.
|
||||
assert!(regs.len() == 0);
|
||||
} else {
|
||||
let (_, tys) = <C::I>::rc_for_type(ty).unwrap();
|
||||
assert!(regs.len() == tys.len());
|
||||
assert!(regs.len() == dsts.len());
|
||||
for ((dst, temp), ty) in dsts.regs().iter().zip(regs.regs().iter()).zip(tys) {
|
||||
renamer.add_rename(*temp, dst.to_reg(), *ty);
|
||||
for (dst, temp) in dsts.regs().iter().zip(regs.regs().iter()) {
|
||||
isle_ctx.lower_ctx.set_vreg_alias(dst.to_reg(), *temp);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (inst, _) in isle_ctx.emitted_insts.iter_mut() {
|
||||
map_regs(inst, &renamer);
|
||||
}
|
||||
|
||||
// If any renamed register wasn't actually defined in the ISLE-generated
|
||||
// instructions then what we're actually doing is "renaming" an input to a
|
||||
// new name which requires manually inserting a `mov` instruction. Note that
|
||||
// this typically doesn't happen and is only here for cases where the input
|
||||
// is sometimes passed through unmodified to the output, such as
|
||||
// zero-extending a 64-bit input to a 128-bit output which doesn't actually
|
||||
// change the input and simply produces another zero'd register.
|
||||
for (old, new, ty) in renamer.unmapped_defs() {
|
||||
isle_ctx
|
||||
.lower_ctx
|
||||
.emit(<C::I>::gen_move(Writable::from_reg(new), old, ty));
|
||||
}
|
||||
|
||||
// Once everything is remapped we forward all emitted instructions to the
|
||||
// `lower_ctx`. Note that this happens after the synthetic mov's above in
|
||||
// case any of these instruction use those movs.
|
||||
for (inst, is_safepoint) in isle_ctx.emitted_insts {
|
||||
if is_safepoint {
|
||||
lower_ctx.emit_safepoint(inst);
|
||||
} else {
|
||||
lower_ctx.emit(inst);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user