Switch Cranelift over to regalloc2. (#3989)

This PR switches Cranelift over to the new register allocator, regalloc2.

See [this document](https://gist.github.com/cfallin/08553421a91f150254fe878f67301801)
for a summary of the design changes. This switchover has implications for
core VCode/MachInst types and the lowering pass.

Overall, this change brings improvements to both compile time and speed of
generated code (runtime), as reported in #3942:

```
Benchmark       Compilation (wallclock)     Execution (wallclock)
blake3-scalar   25% faster                  28% faster
blake3-simd     no diff                     no diff
meshoptimizer   19% faster                  17% faster
pulldown-cmark  17% faster                  no diff
bz2             15% faster                  no diff
SpiderMonkey,   21% faster                  2% faster
  fib(30)
clang.wasm      42% faster                  N/A
```
This commit is contained in:
Chris Fallin
2022-04-14 10:28:21 -07:00
committed by GitHub
parent bfae6384aa
commit a0318f36f0
181 changed files with 16887 additions and 21587 deletions

View File

@@ -125,6 +125,7 @@
use super::abi::*;
use crate::binemit::StackMap;
use crate::fx::FxHashSet;
use crate::ir::types::*;
use crate::ir::{ArgumentExtension, ArgumentPurpose, StackSlot};
use crate::machinst::*;
@@ -132,7 +133,6 @@ use crate::settings;
use crate::CodegenResult;
use crate::{ir, isa};
use alloc::vec::Vec;
use regalloc::{RealReg, Reg, RegClass, Set, SpillSlot, Writable};
use smallvec::{smallvec, SmallVec};
use std::convert::TryFrom;
use std::marker::PhantomData;
@@ -257,16 +257,6 @@ pub enum ArgsOrRets {
Rets,
}
/// Is an instruction returned by an ABI machine-specific backend a safepoint,
/// or not?
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum InstIsSafepoint {
/// The instruction is a safepoint.
Yes,
/// The instruction is not a safepoint.
No,
}
/// Abstract location for a machine-specific ABI impl to translate into the
/// appropriate addressing mode.
#[derive(Clone, Copy, Debug)]
@@ -319,11 +309,7 @@ pub trait ABIMachineSpec {
/// Returns word register class.
fn word_reg_class() -> RegClass {
match Self::word_bits() {
32 => RegClass::I32,
64 => RegClass::I64,
_ => unreachable!(),
}
RegClass::Int
}
/// Returns required stack alignment in bytes.
@@ -366,7 +352,7 @@ pub trait ABIMachineSpec {
) -> Self::I;
/// Generate a return instruction.
fn gen_ret() -> Self::I;
fn gen_ret(rets: Vec<Reg>) -> Self::I;
/// Generate an "epilogue placeholder" instruction, recognized by lowering
/// when using the Baldrdash ABI.
@@ -442,7 +428,7 @@ pub trait ABIMachineSpec {
/// contains the registers in a sorted order.
fn get_clobbered_callee_saves(
call_conv: isa::CallConv,
regs: &Set<Writable<RealReg>>,
regs: &[Writable<RealReg>],
) -> Vec<Writable<RealReg>>;
/// Determine whether it is necessary to generate the usual frame-setup
@@ -466,7 +452,7 @@ pub trait ABIMachineSpec {
call_conv: isa::CallConv,
setup_frame: bool,
flags: &settings::Flags,
clobbered_callee_saves: &Vec<Writable<RealReg>>,
clobbered_callee_saves: &[Writable<RealReg>],
fixed_frame_storage_size: u32,
outgoing_args_size: u32,
) -> (u64, SmallVec<[Self::I; 16]>);
@@ -478,7 +464,7 @@ pub trait ABIMachineSpec {
fn gen_clobber_restore(
call_conv: isa::CallConv,
flags: &settings::Flags,
clobbers: &Set<Writable<RealReg>>,
clobbers: &[Writable<RealReg>],
fixed_frame_storage_size: u32,
outgoing_args_size: u32,
) -> SmallVec<[Self::I; 16]>;
@@ -493,7 +479,7 @@ pub trait ABIMachineSpec {
tmp: Writable<Reg>,
callee_conv: isa::CallConv,
callee_conv: isa::CallConv,
) -> SmallVec<[(InstIsSafepoint, Self::I); 2]>;
) -> SmallVec<[Self::I; 2]>;
/// Generate a memcpy invocation. Used to set up struct args. May clobber
/// caller-save registers; we only memcpy before we start to set up args for
@@ -530,6 +516,7 @@ pub trait ABIMachineSpec {
}
/// ABI information shared between body (callee) and caller.
#[derive(Clone)]
struct ABISig {
/// Argument locations (regs or stack slots). Stack offsets are relative to
/// SP on entry to function.
@@ -604,7 +591,7 @@ pub struct ABICalleeImpl<M: ABIMachineSpec> {
/// Stack size to be reserved for outgoing arguments.
outgoing_args_size: u32,
/// Clobbered registers, from regalloc.
clobbered: Set<Writable<RealReg>>,
clobbered: Vec<Writable<RealReg>>,
/// Total number of spillslots, from regalloc.
spillslots: Option<usize>,
/// Storage allocated for the fixed part of the stack frame. This is
@@ -655,24 +642,13 @@ fn get_special_purpose_param_register(
let idx = f.signature.special_param_index(purpose)?;
match &abi.args[idx] {
&ABIArg::Slots { ref slots, .. } => match &slots[0] {
&ABIArgSlot::Reg { reg, .. } => Some(reg.to_reg()),
&ABIArgSlot::Reg { reg, .. } => Some(reg.into()),
_ => None,
},
_ => None,
}
}
fn ty_from_class(class: RegClass) -> Type {
match class {
RegClass::I32 => I32,
RegClass::I64 => I64,
RegClass::F32 => F32,
RegClass::F64 => F64,
RegClass::V128 => I8X16,
_ => panic!("Unknown regclass: {:?}", class),
}
}
impl<M: ABIMachineSpec> ABICalleeImpl<M> {
/// Create a new body ABI instance.
pub fn new(
@@ -739,7 +715,7 @@ impl<M: ABIMachineSpec> ABICalleeImpl<M> {
stackslots,
stackslots_size: stack_offset,
outgoing_args_size: 0,
clobbered: Set::empty(),
clobbered: vec![],
spillslots: None,
fixed_frame_storage_size: 0,
total_frame_size: None,
@@ -961,34 +937,6 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
self.sig.call_conv
}
fn liveins(&self) -> Set<RealReg> {
let mut set: Set<RealReg> = Set::empty();
for arg in &self.sig.args {
if let &ABIArg::Slots { ref slots, .. } = arg {
for slot in slots {
if let ABIArgSlot::Reg { reg, .. } = slot {
set.insert(*reg);
}
}
}
}
set
}
fn liveouts(&self) -> Set<RealReg> {
let mut set: Set<RealReg> = Set::empty();
for ret in &self.sig.rets {
if let &ABIArg::Slots { ref slots, .. } = ret {
for slot in slots {
if let ABIArgSlot::Reg { reg, .. } = slot {
set.insert(*reg);
}
}
}
}
set
}
fn num_args(&self) -> usize {
self.sig.args.len()
}
@@ -1019,7 +967,7 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
// Extension mode doesn't matter (we're copying out, not in; we
// ignore high bits by convention).
&ABIArgSlot::Reg { reg, ty, .. } => {
insts.push(M::gen_move(*into_reg, reg.to_reg(), ty));
insts.push(M::gen_move(*into_reg, reg.into(), ty));
}
&ABIArgSlot::Stack { offset, ty, .. } => {
insts.push(M::gen_load_stack(
@@ -1069,20 +1017,21 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
match &self.sig.rets[idx] {
&ABIArg::Slots { ref slots, .. } => {
assert_eq!(from_regs.len(), slots.len());
for (slot, from_reg) in slots.iter().zip(from_regs.regs().iter()) {
for (slot, &from_reg) in slots.iter().zip(from_regs.regs().iter()) {
match slot {
&ABIArgSlot::Reg {
reg, ty, extension, ..
} => {
let from_bits = ty_bits(ty) as u8;
let ext = M::get_ext_mode(self.sig.call_conv, extension);
let reg: Writable<Reg> = Writable::from_reg(Reg::from(reg));
match (ext, from_bits) {
(ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n)
if n < word_bits =>
{
let signed = ext == ArgumentExtension::Sext;
ret.push(M::gen_extend(
Writable::from_reg(reg.to_reg()),
reg,
from_reg.to_reg(),
signed,
from_bits,
@@ -1090,11 +1039,7 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
));
}
_ => {
ret.push(M::gen_move(
Writable::from_reg(reg.to_reg()),
from_reg.to_reg(),
ty,
));
ret.push(M::gen_move(reg, from_reg.to_reg(), ty));
}
};
}
@@ -1118,7 +1063,7 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
(ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n)
if n < word_bits =>
{
assert_eq!(M::word_reg_class(), from_reg.to_reg().get_class());
assert_eq!(M::word_reg_class(), from_reg.to_reg().class());
let signed = ext == ArgumentExtension::Sext;
ret.push(M::gen_extend(
Writable::from_reg(from_reg.to_reg()),
@@ -1166,7 +1111,22 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
}
fn gen_ret(&self) -> Self::I {
M::gen_ret()
let mut rets = vec![];
for ret in &self.sig.rets {
match ret {
ABIArg::Slots { slots, .. } => {
for slot in slots {
match slot {
ABIArgSlot::Reg { reg, .. } => rets.push(Reg::from(*reg)),
_ => {}
}
}
}
_ => {}
}
}
M::gen_ret(rets)
}
fn gen_epilogue_placeholder(&self) -> Self::I {
@@ -1177,7 +1137,7 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
self.spillslots = Some(slots);
}
fn set_clobbered(&mut self, clobbered: Set<Writable<RealReg>>) {
fn set_clobbered(&mut self, clobbered: Vec<Writable<RealReg>>) {
self.clobbered = clobbered;
}
@@ -1198,7 +1158,7 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
into_regs: ValueRegs<Writable<Reg>>,
) -> SmallInstVec<Self::I> {
// Offset from beginning of spillslot area, which is at nominal SP + stackslots_size.
let islot = slot.get() as i64;
let islot = slot.index() as i64;
let spill_off = islot * M::word_bytes() as i64;
let sp_off = self.stackslots_size as i64 + spill_off;
log::trace!("load_spillslot: slot {:?} -> sp_off {}", slot, sp_off);
@@ -1214,7 +1174,7 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
from_regs: ValueRegs<Reg>,
) -> SmallInstVec<Self::I> {
// Offset from beginning of spillslot area, which is at nominal SP + stackslots_size.
let islot = slot.get() as i64;
let islot = slot.index() as i64;
let spill_off = islot * M::word_bytes() as i64;
let sp_off = self.stackslots_size as i64 + spill_off;
log::trace!("store_spillslot: slot {:?} -> sp_off {}", slot, sp_off);
@@ -1245,7 +1205,7 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
let first_spillslot_word =
((self.stackslots_size + virtual_sp_offset as u32) / bytes) as usize;
for &slot in slots {
let slot = slot.get() as usize;
let slot = slot.index();
bits[first_spillslot_word + slot] = true;
}
@@ -1347,7 +1307,10 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
insts.extend(M::gen_epilogue_frame_restore(&self.flags));
}
insts.push(M::gen_ret());
// This `ret` doesn't need any return registers attached
// because we are post-regalloc and don't need to
// represent the implicit uses anymore.
insts.push(M::gen_ret(vec![]));
}
log::trace!("Epilogue: {:?}", insts);
@@ -1368,19 +1331,19 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
}
fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg) -> Self::I {
let ty = ty_from_class(from_reg.to_reg().get_class());
self.store_spillslot(to_slot, ty, ValueRegs::one(from_reg.to_reg()))
let ty = Self::I::canonical_type_for_rc(Reg::from(from_reg).class());
self.store_spillslot(to_slot, ty, ValueRegs::one(Reg::from(from_reg)))
.into_iter()
.next()
.unwrap()
}
fn gen_reload(&self, to_reg: Writable<RealReg>, from_slot: SpillSlot) -> Self::I {
let ty = ty_from_class(to_reg.to_reg().get_class());
let ty = Self::I::canonical_type_for_rc(to_reg.to_reg().class());
self.load_spillslot(
from_slot,
ty,
writable_value_regs(ValueRegs::one(to_reg.to_reg().to_reg())),
writable_value_regs(ValueRegs::one(Reg::from(to_reg.to_reg()))),
)
.into_iter()
.next()
@@ -1390,13 +1353,13 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
fn abisig_to_uses_and_defs<M: ABIMachineSpec>(sig: &ABISig) -> (Vec<Reg>, Vec<Writable<Reg>>) {
// Compute uses: all arg regs.
let mut uses = Vec::new();
let mut uses = FxHashSet::default();
for arg in &sig.args {
if let &ABIArg::Slots { ref slots, .. } = arg {
for slot in slots {
match slot {
&ABIArgSlot::Reg { reg, .. } => {
uses.push(reg.to_reg());
uses.insert(Reg::from(reg));
}
_ => {}
}
@@ -1405,13 +1368,15 @@ fn abisig_to_uses_and_defs<M: ABIMachineSpec>(sig: &ABISig) -> (Vec<Reg>, Vec<Wr
}
// Compute defs: all retval regs, and all caller-save (clobbered) regs.
let mut defs = M::get_regs_clobbered_by_call(sig.call_conv);
let mut defs: FxHashSet<_> = M::get_regs_clobbered_by_call(sig.call_conv)
.into_iter()
.collect();
for ret in &sig.rets {
if let &ABIArg::Slots { ref slots, .. } = ret {
for slot in slots {
match slot {
&ABIArgSlot::Reg { reg, .. } => {
defs.push(Writable::from_reg(reg.to_reg()));
defs.insert(Writable::from_reg(Reg::from(reg)));
}
_ => {}
}
@@ -1419,6 +1384,11 @@ fn abisig_to_uses_and_defs<M: ABIMachineSpec>(sig: &ABISig) -> (Vec<Reg>, Vec<Wr
}
}
let mut uses = uses.into_iter().collect::<Vec<_>>();
let mut defs = defs.into_iter().collect::<Vec<_>>();
uses.sort_unstable();
defs.sort_unstable();
(uses, defs)
}
@@ -1567,14 +1537,14 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
} => {
let ext = M::get_ext_mode(self.sig.call_conv, extension);
if ext != ir::ArgumentExtension::None && ty_bits(ty) < word_bits {
assert_eq!(word_rc, reg.get_class());
assert_eq!(word_rc, reg.class());
let signed = match ext {
ir::ArgumentExtension::Uext => false,
ir::ArgumentExtension::Sext => true,
_ => unreachable!(),
};
ctx.emit(M::gen_extend(
Writable::from_reg(reg.to_reg()),
Writable::from_reg(Reg::from(reg)),
*from_reg,
signed,
ty_bits(ty) as u8,
@@ -1582,7 +1552,7 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
));
} else {
ctx.emit(M::gen_move(
Writable::from_reg(reg.to_reg()),
Writable::from_reg(Reg::from(reg)),
*from_reg,
ty,
));
@@ -1597,7 +1567,7 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
let mut ty = ty;
let ext = M::get_ext_mode(self.sig.call_conv, extension);
if ext != ir::ArgumentExtension::None && ty_bits(ty) < word_bits {
assert_eq!(word_rc, from_reg.get_class());
assert_eq!(word_rc, from_reg.class());
let signed = match ext {
ir::ArgumentExtension::Uext => false,
ir::ArgumentExtension::Sext => true,
@@ -1680,7 +1650,7 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
// Extension mode doesn't matter because we're copying out, not in,
// and we ignore high bits in our own registers by convention.
&ABIArgSlot::Reg { reg, ty, .. } => {
ctx.emit(M::gen_move(*into_reg, reg.to_reg(), ty));
ctx.emit(M::gen_move(*into_reg, Reg::from(reg), ty));
}
&ABIArgSlot::Stack { offset, ty, .. } => {
let ret_area_base = self.sig.stack_arg_space;
@@ -1716,7 +1686,7 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
self.emit_copy_regs_to_arg(ctx, i, ValueRegs::one(rd.to_reg()));
}
let tmp = ctx.alloc_tmp(word_type).only_reg().unwrap();
for (is_safepoint, inst) in M::gen_call(
for inst in M::gen_call(
&self.dest,
uses,
defs,
@@ -1727,10 +1697,7 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
)
.into_iter()
{
match is_safepoint {
InstIsSafepoint::Yes => ctx.emit_safepoint(inst),
InstIsSafepoint::No => ctx.emit(inst),
}
ctx.emit(inst);
}
}
}