Switch Cranelift over to regalloc2. (#3989)
This PR switches Cranelift over to the new register allocator, regalloc2. See [this document](https://gist.github.com/cfallin/08553421a91f150254fe878f67301801) for a summary of the design changes. This switchover has implications for core VCode/MachInst types and the lowering pass. Overall, this change brings improvements to both compile time and speed of generated code (runtime), as reported in #3942: ``` Benchmark Compilation (wallclock) Execution (wallclock) blake3-scalar 25% faster 28% faster blake3-simd no diff no diff meshoptimizer 19% faster 17% faster pulldown-cmark 17% faster no diff bz2 15% faster no diff SpiderMonkey, 21% faster 2% faster fib(30) clang.wasm 42% faster N/A ```
This commit is contained in:
@@ -2,12 +2,11 @@
|
||||
|
||||
use crate::ir::Function;
|
||||
use crate::isa::TargetIsa;
|
||||
use crate::log::DeferredDisplay;
|
||||
use crate::machinst::*;
|
||||
use crate::settings;
|
||||
use crate::timing;
|
||||
|
||||
use regalloc::{allocate_registers_with_opts, Algorithm, Options, PrettyPrint};
|
||||
use regalloc2::RegallocOptions;
|
||||
use regalloc2::{self, MachineEnv};
|
||||
|
||||
/// Compile the given function down to VCode with allocated registers, ready
|
||||
/// for binary emission.
|
||||
@@ -15,100 +14,38 @@ pub fn compile<B: LowerBackend + TargetIsa>(
|
||||
f: &Function,
|
||||
b: &B,
|
||||
abi: Box<dyn ABICallee<I = B::MInst>>,
|
||||
reg_universe: &RealRegUniverse,
|
||||
machine_env: &MachineEnv,
|
||||
emit_info: <B::MInst as MachInstEmit>::Info,
|
||||
) -> CodegenResult<VCode<B::MInst>>
|
||||
where
|
||||
B::MInst: PrettyPrint,
|
||||
{
|
||||
) -> CodegenResult<(VCode<B::MInst>, regalloc2::Output)> {
|
||||
// Compute lowered block order.
|
||||
let block_order = BlockLoweringOrder::new(f);
|
||||
// Build the lowering context.
|
||||
let lower = Lower::new(f, abi, emit_info, block_order)?;
|
||||
// Lower the IR.
|
||||
let (mut vcode, stack_map_request_info) = {
|
||||
let vcode = {
|
||||
let _tt = timing::vcode_lower();
|
||||
lower.lower(b)?
|
||||
};
|
||||
|
||||
// Creating the vcode string representation may be costly for large functions, so defer its
|
||||
// rendering.
|
||||
log::trace!(
|
||||
"vcode from lowering: \n{}",
|
||||
DeferredDisplay::new(|| vcode.show_rru(Some(reg_universe)))
|
||||
);
|
||||
log::trace!("vcode from lowering: \n{:?}", vcode);
|
||||
|
||||
// Perform register allocation.
|
||||
let (run_checker, algorithm) = match vcode.flags().regalloc() {
|
||||
settings::Regalloc::Backtracking => (false, Algorithm::Backtracking(Default::default())),
|
||||
settings::Regalloc::BacktrackingChecked => {
|
||||
(true, Algorithm::Backtracking(Default::default()))
|
||||
}
|
||||
settings::Regalloc::ExperimentalLinearScan => {
|
||||
(false, Algorithm::LinearScan(Default::default()))
|
||||
}
|
||||
settings::Regalloc::ExperimentalLinearScanChecked => {
|
||||
(true, Algorithm::LinearScan(Default::default()))
|
||||
}
|
||||
};
|
||||
|
||||
#[cfg(feature = "regalloc-snapshot")]
|
||||
{
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
if let Some(path) = std::env::var("SERIALIZE_REGALLOC").ok() {
|
||||
let snapshot = regalloc::IRSnapshot::from_function(&vcode, reg_universe);
|
||||
let serialized = bincode::serialize(&snapshot).expect("couldn't serialize snapshot");
|
||||
|
||||
let file_path = Path::new(&path).join(Path::new(&format!("ir{}.bin", f.name)));
|
||||
fs::write(file_path, &serialized).expect("couldn't write IR snapshot file");
|
||||
}
|
||||
}
|
||||
|
||||
// If either there are no reference-typed values, or else there are
|
||||
// but there are no safepoints at which we need to know about them,
|
||||
// then we don't need stack maps.
|
||||
let sri = if stack_map_request_info.reftyped_vregs.len() > 0
|
||||
&& stack_map_request_info.safepoint_insns.len() > 0
|
||||
{
|
||||
Some(&stack_map_request_info)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let result = {
|
||||
let regalloc_result = {
|
||||
let _tt = timing::regalloc();
|
||||
allocate_registers_with_opts(
|
||||
&mut vcode,
|
||||
reg_universe,
|
||||
sri,
|
||||
Options {
|
||||
run_checker,
|
||||
algorithm,
|
||||
},
|
||||
)
|
||||
.map_err(|err| {
|
||||
log::error!(
|
||||
"Register allocation error for vcode\n{}\nError: {:?}",
|
||||
vcode.show_rru(Some(reg_universe)),
|
||||
let mut options = RegallocOptions::default();
|
||||
options.verbose_log = log::log_enabled!(log::Level::Trace);
|
||||
regalloc2::run(&vcode, machine_env, &options)
|
||||
.map_err(|err| {
|
||||
log::error!(
|
||||
"Register allocation error for vcode\n{:?}\nError: {:?}\nCLIF for error:\n{:?}",
|
||||
vcode,
|
||||
err,
|
||||
f,
|
||||
);
|
||||
err
|
||||
);
|
||||
err
|
||||
})
|
||||
.expect("register allocation")
|
||||
})
|
||||
.expect("register allocation")
|
||||
};
|
||||
|
||||
// Reorder vcode into final order and copy out final instruction sequence
|
||||
// all at once. This also inserts prologues/epilogues.
|
||||
{
|
||||
let _tt = timing::vcode_post_ra();
|
||||
vcode.replace_insns_from_regalloc(result);
|
||||
}
|
||||
|
||||
log::trace!(
|
||||
"vcode after regalloc: final version:\n{}",
|
||||
DeferredDisplay::new(|| vcode.show_rru(Some(reg_universe)))
|
||||
);
|
||||
|
||||
Ok(vcode)
|
||||
Ok((vcode, regalloc_result))
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user