Switch Cranelift over to regalloc2. (#3989)

This PR switches Cranelift over to the new register allocator, regalloc2.

See [this document](https://gist.github.com/cfallin/08553421a91f150254fe878f67301801)
for a summary of the design changes. This switchover has implications for
core VCode/MachInst types and the lowering pass.

Overall, this change brings improvements to both compile time and speed of
generated code (runtime), as reported in #3942:

```
Benchmark       Compilation (wallclock)     Execution (wallclock)
blake3-scalar   25% faster                  28% faster
blake3-simd     no diff                     no diff
meshoptimizer   19% faster                  17% faster
pulldown-cmark  17% faster                  no diff
bz2             15% faster                  no diff
SpiderMonkey,   21% faster                  2% faster
  fib(30)
clang.wasm      42% faster                  N/A
```
This commit is contained in:
Chris Fallin
2022-04-14 10:28:21 -07:00
committed by GitHub
parent bfae6384aa
commit a0318f36f0
181 changed files with 16887 additions and 21587 deletions

View File

@@ -11,7 +11,7 @@ use crate::result::CodegenResult;
use crate::settings as shared_settings;
use alloc::{boxed::Box, vec::Vec};
use core::fmt;
use regalloc::{PrettyPrint, RealRegUniverse};
use regalloc2::MachineEnv;
use target_lexicon::{Aarch64Architecture, Architecture, Triple};
// New backend:
@@ -21,7 +21,7 @@ mod lower;
mod lower_inst;
mod settings;
use inst::create_reg_universe;
use inst::create_reg_env;
use self::inst::EmitInfo;
@@ -30,7 +30,7 @@ pub struct AArch64Backend {
triple: Triple,
flags: shared_settings::Flags,
isa_flags: aarch64_settings::Flags,
reg_universe: RealRegUniverse,
machine_env: MachineEnv,
}
impl AArch64Backend {
@@ -40,12 +40,12 @@ impl AArch64Backend {
flags: shared_settings::Flags,
isa_flags: aarch64_settings::Flags,
) -> AArch64Backend {
let reg_universe = create_reg_universe(&flags);
let machine_env = create_reg_env(&flags);
AArch64Backend {
triple,
flags,
isa_flags,
reg_universe,
machine_env,
}
}
@@ -55,10 +55,10 @@ impl AArch64Backend {
&self,
func: &Function,
flags: shared_settings::Flags,
) -> CodegenResult<VCode<inst::Inst>> {
) -> CodegenResult<(VCode<inst::Inst>, regalloc2::Output)> {
let emit_info = EmitInfo::new(flags.clone());
let abi = Box::new(abi::AArch64ABICallee::new(func, flags, self.isa_flags())?);
compile::compile::<AArch64Backend>(func, self, abi, &self.reg_universe, emit_info)
compile::compile::<AArch64Backend>(func, self, abi, &self.machine_env, emit_info)
}
}
@@ -69,28 +69,27 @@ impl TargetIsa for AArch64Backend {
want_disasm: bool,
) -> CodegenResult<MachCompileResult> {
let flags = self.flags();
let vcode = self.compile_vcode(func, flags.clone())?;
let (vcode, regalloc_result) = self.compile_vcode(func, flags.clone())?;
let (buffer, bb_starts, bb_edges) = vcode.emit();
let frame_size = vcode.frame_size();
let stackslot_offsets = vcode.stackslot_offsets().clone();
let want_disasm = want_disasm || log::log_enabled!(log::Level::Debug);
let emit_result = vcode.emit(&regalloc_result, want_disasm, flags.machine_code_cfg_info());
let frame_size = emit_result.frame_size;
let value_labels_ranges = emit_result.value_labels_ranges;
let buffer = emit_result.buffer.finish();
let stackslot_offsets = emit_result.stackslot_offsets;
let disasm = if want_disasm {
Some(vcode.show_rru(Some(&create_reg_universe(flags))))
} else {
None
};
let buffer = buffer.finish();
if let Some(disasm) = emit_result.disasm.as_ref() {
log::debug!("disassembly:\n{}", disasm);
}
Ok(MachCompileResult {
buffer,
frame_size,
disasm,
value_labels_ranges: Default::default(),
disasm: emit_result.disasm,
value_labels_ranges,
stackslot_offsets,
bb_starts,
bb_edges,
bb_starts: emit_result.bb_offsets,
bb_edges: emit_result.bb_edges,
})
}
@@ -218,11 +217,11 @@ mod test {
let buffer = backend.compile_function(&mut func, false).unwrap().buffer;
let code = buffer.data();
// mov x1, #0x1234
// add w0, w0, w1
// mov x3, #0x1234
// add w0, w0, w3
// ret
let golden = vec![
0x81, 0x46, 0x82, 0xd2, 0x00, 0x00, 0x01, 0x0b, 0xc0, 0x03, 0x5f, 0xd6,
0x83, 0x46, 0x82, 0xd2, 0x00, 0x00, 0x03, 0x0b, 0xc0, 0x03, 0x5f, 0xd6,
];
assert_eq!(code, &golden[..]);
@@ -273,23 +272,24 @@ mod test {
.unwrap();
let code = result.buffer.data();
// mov x1, #0x1234 // #4660
// add w0, w0, w1
// mov w1, w0
// cbnz x1, 0x28
// mov x1, #0x1234 // #4660
// add w1, w0, w1
// mov w1, w1
// cbnz x1, 0x18
// mov w1, w0
// cbnz x1, 0x18
// mov x1, #0x1234 // #4660
// sub w0, w0, w1
// mov x10, #0x1234 // #4660
// add w12, w0, w10
// mov w11, w12
// cbnz x11, 0x20
// mov x13, #0x1234 // #4660
// add w15, w12, w13
// mov w14, w15
// cbnz x14, 0x10
// mov w1, w12
// cbnz x1, 0x10
// mov x2, #0x1234 // #4660
// sub w0, w12, w2
// ret
let golden = vec![
129, 70, 130, 210, 0, 0, 1, 11, 225, 3, 0, 42, 161, 0, 0, 181, 129, 70, 130, 210, 1, 0,
1, 11, 225, 3, 1, 42, 161, 255, 255, 181, 225, 3, 0, 42, 97, 255, 255, 181, 129, 70,
130, 210, 0, 0, 1, 75, 192, 3, 95, 214,
138, 70, 130, 210, 12, 0, 10, 11, 235, 3, 12, 42, 171, 0, 0, 181, 141, 70, 130, 210,
143, 1, 13, 11, 238, 3, 15, 42, 174, 255, 255, 181, 225, 3, 12, 42, 97, 255, 255, 181,
130, 70, 130, 210, 128, 1, 2, 75, 192, 3, 95, 214,
];
assert_eq!(code, &golden[..]);