Switch Cranelift over to regalloc2. (#3989)
This PR switches Cranelift over to the new register allocator, regalloc2. See [this document](https://gist.github.com/cfallin/08553421a91f150254fe878f67301801) for a summary of the design changes. This switchover has implications for core VCode/MachInst types and the lowering pass. Overall, this change brings improvements to both compile time and speed of generated code (runtime), as reported in #3942: ``` Benchmark Compilation (wallclock) Execution (wallclock) blake3-scalar 25% faster 28% faster blake3-simd no diff no diff meshoptimizer 19% faster 17% faster pulldown-cmark 17% faster no diff bz2 15% faster no diff SpiderMonkey, 21% faster 2% faster fib(30) clang.wasm 42% faster N/A ```
This commit is contained in:
@@ -125,6 +125,7 @@
|
||||
|
||||
use super::abi::*;
|
||||
use crate::binemit::StackMap;
|
||||
use crate::fx::FxHashSet;
|
||||
use crate::ir::types::*;
|
||||
use crate::ir::{ArgumentExtension, ArgumentPurpose, StackSlot};
|
||||
use crate::machinst::*;
|
||||
@@ -132,7 +133,6 @@ use crate::settings;
|
||||
use crate::CodegenResult;
|
||||
use crate::{ir, isa};
|
||||
use alloc::vec::Vec;
|
||||
use regalloc::{RealReg, Reg, RegClass, Set, SpillSlot, Writable};
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
use std::convert::TryFrom;
|
||||
use std::marker::PhantomData;
|
||||
@@ -257,16 +257,6 @@ pub enum ArgsOrRets {
|
||||
Rets,
|
||||
}
|
||||
|
||||
/// Is an instruction returned by an ABI machine-specific backend a safepoint,
|
||||
/// or not?
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum InstIsSafepoint {
|
||||
/// The instruction is a safepoint.
|
||||
Yes,
|
||||
/// The instruction is not a safepoint.
|
||||
No,
|
||||
}
|
||||
|
||||
/// Abstract location for a machine-specific ABI impl to translate into the
|
||||
/// appropriate addressing mode.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
@@ -319,11 +309,7 @@ pub trait ABIMachineSpec {
|
||||
|
||||
/// Returns word register class.
|
||||
fn word_reg_class() -> RegClass {
|
||||
match Self::word_bits() {
|
||||
32 => RegClass::I32,
|
||||
64 => RegClass::I64,
|
||||
_ => unreachable!(),
|
||||
}
|
||||
RegClass::Int
|
||||
}
|
||||
|
||||
/// Returns required stack alignment in bytes.
|
||||
@@ -366,7 +352,7 @@ pub trait ABIMachineSpec {
|
||||
) -> Self::I;
|
||||
|
||||
/// Generate a return instruction.
|
||||
fn gen_ret() -> Self::I;
|
||||
fn gen_ret(rets: Vec<Reg>) -> Self::I;
|
||||
|
||||
/// Generate an "epilogue placeholder" instruction, recognized by lowering
|
||||
/// when using the Baldrdash ABI.
|
||||
@@ -442,7 +428,7 @@ pub trait ABIMachineSpec {
|
||||
/// contains the registers in a sorted order.
|
||||
fn get_clobbered_callee_saves(
|
||||
call_conv: isa::CallConv,
|
||||
regs: &Set<Writable<RealReg>>,
|
||||
regs: &[Writable<RealReg>],
|
||||
) -> Vec<Writable<RealReg>>;
|
||||
|
||||
/// Determine whether it is necessary to generate the usual frame-setup
|
||||
@@ -466,7 +452,7 @@ pub trait ABIMachineSpec {
|
||||
call_conv: isa::CallConv,
|
||||
setup_frame: bool,
|
||||
flags: &settings::Flags,
|
||||
clobbered_callee_saves: &Vec<Writable<RealReg>>,
|
||||
clobbered_callee_saves: &[Writable<RealReg>],
|
||||
fixed_frame_storage_size: u32,
|
||||
outgoing_args_size: u32,
|
||||
) -> (u64, SmallVec<[Self::I; 16]>);
|
||||
@@ -478,7 +464,7 @@ pub trait ABIMachineSpec {
|
||||
fn gen_clobber_restore(
|
||||
call_conv: isa::CallConv,
|
||||
flags: &settings::Flags,
|
||||
clobbers: &Set<Writable<RealReg>>,
|
||||
clobbers: &[Writable<RealReg>],
|
||||
fixed_frame_storage_size: u32,
|
||||
outgoing_args_size: u32,
|
||||
) -> SmallVec<[Self::I; 16]>;
|
||||
@@ -493,7 +479,7 @@ pub trait ABIMachineSpec {
|
||||
tmp: Writable<Reg>,
|
||||
callee_conv: isa::CallConv,
|
||||
callee_conv: isa::CallConv,
|
||||
) -> SmallVec<[(InstIsSafepoint, Self::I); 2]>;
|
||||
) -> SmallVec<[Self::I; 2]>;
|
||||
|
||||
/// Generate a memcpy invocation. Used to set up struct args. May clobber
|
||||
/// caller-save registers; we only memcpy before we start to set up args for
|
||||
@@ -530,6 +516,7 @@ pub trait ABIMachineSpec {
|
||||
}
|
||||
|
||||
/// ABI information shared between body (callee) and caller.
|
||||
#[derive(Clone)]
|
||||
struct ABISig {
|
||||
/// Argument locations (regs or stack slots). Stack offsets are relative to
|
||||
/// SP on entry to function.
|
||||
@@ -604,7 +591,7 @@ pub struct ABICalleeImpl<M: ABIMachineSpec> {
|
||||
/// Stack size to be reserved for outgoing arguments.
|
||||
outgoing_args_size: u32,
|
||||
/// Clobbered registers, from regalloc.
|
||||
clobbered: Set<Writable<RealReg>>,
|
||||
clobbered: Vec<Writable<RealReg>>,
|
||||
/// Total number of spillslots, from regalloc.
|
||||
spillslots: Option<usize>,
|
||||
/// Storage allocated for the fixed part of the stack frame. This is
|
||||
@@ -655,24 +642,13 @@ fn get_special_purpose_param_register(
|
||||
let idx = f.signature.special_param_index(purpose)?;
|
||||
match &abi.args[idx] {
|
||||
&ABIArg::Slots { ref slots, .. } => match &slots[0] {
|
||||
&ABIArgSlot::Reg { reg, .. } => Some(reg.to_reg()),
|
||||
&ABIArgSlot::Reg { reg, .. } => Some(reg.into()),
|
||||
_ => None,
|
||||
},
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn ty_from_class(class: RegClass) -> Type {
|
||||
match class {
|
||||
RegClass::I32 => I32,
|
||||
RegClass::I64 => I64,
|
||||
RegClass::F32 => F32,
|
||||
RegClass::F64 => F64,
|
||||
RegClass::V128 => I8X16,
|
||||
_ => panic!("Unknown regclass: {:?}", class),
|
||||
}
|
||||
}
|
||||
|
||||
impl<M: ABIMachineSpec> ABICalleeImpl<M> {
|
||||
/// Create a new body ABI instance.
|
||||
pub fn new(
|
||||
@@ -739,7 +715,7 @@ impl<M: ABIMachineSpec> ABICalleeImpl<M> {
|
||||
stackslots,
|
||||
stackslots_size: stack_offset,
|
||||
outgoing_args_size: 0,
|
||||
clobbered: Set::empty(),
|
||||
clobbered: vec![],
|
||||
spillslots: None,
|
||||
fixed_frame_storage_size: 0,
|
||||
total_frame_size: None,
|
||||
@@ -961,34 +937,6 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
|
||||
self.sig.call_conv
|
||||
}
|
||||
|
||||
fn liveins(&self) -> Set<RealReg> {
|
||||
let mut set: Set<RealReg> = Set::empty();
|
||||
for arg in &self.sig.args {
|
||||
if let &ABIArg::Slots { ref slots, .. } = arg {
|
||||
for slot in slots {
|
||||
if let ABIArgSlot::Reg { reg, .. } = slot {
|
||||
set.insert(*reg);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
set
|
||||
}
|
||||
|
||||
fn liveouts(&self) -> Set<RealReg> {
|
||||
let mut set: Set<RealReg> = Set::empty();
|
||||
for ret in &self.sig.rets {
|
||||
if let &ABIArg::Slots { ref slots, .. } = ret {
|
||||
for slot in slots {
|
||||
if let ABIArgSlot::Reg { reg, .. } = slot {
|
||||
set.insert(*reg);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
set
|
||||
}
|
||||
|
||||
fn num_args(&self) -> usize {
|
||||
self.sig.args.len()
|
||||
}
|
||||
@@ -1019,7 +967,7 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
|
||||
// Extension mode doesn't matter (we're copying out, not in; we
|
||||
// ignore high bits by convention).
|
||||
&ABIArgSlot::Reg { reg, ty, .. } => {
|
||||
insts.push(M::gen_move(*into_reg, reg.to_reg(), ty));
|
||||
insts.push(M::gen_move(*into_reg, reg.into(), ty));
|
||||
}
|
||||
&ABIArgSlot::Stack { offset, ty, .. } => {
|
||||
insts.push(M::gen_load_stack(
|
||||
@@ -1069,20 +1017,21 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
|
||||
match &self.sig.rets[idx] {
|
||||
&ABIArg::Slots { ref slots, .. } => {
|
||||
assert_eq!(from_regs.len(), slots.len());
|
||||
for (slot, from_reg) in slots.iter().zip(from_regs.regs().iter()) {
|
||||
for (slot, &from_reg) in slots.iter().zip(from_regs.regs().iter()) {
|
||||
match slot {
|
||||
&ABIArgSlot::Reg {
|
||||
reg, ty, extension, ..
|
||||
} => {
|
||||
let from_bits = ty_bits(ty) as u8;
|
||||
let ext = M::get_ext_mode(self.sig.call_conv, extension);
|
||||
let reg: Writable<Reg> = Writable::from_reg(Reg::from(reg));
|
||||
match (ext, from_bits) {
|
||||
(ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n)
|
||||
if n < word_bits =>
|
||||
{
|
||||
let signed = ext == ArgumentExtension::Sext;
|
||||
ret.push(M::gen_extend(
|
||||
Writable::from_reg(reg.to_reg()),
|
||||
reg,
|
||||
from_reg.to_reg(),
|
||||
signed,
|
||||
from_bits,
|
||||
@@ -1090,11 +1039,7 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
|
||||
));
|
||||
}
|
||||
_ => {
|
||||
ret.push(M::gen_move(
|
||||
Writable::from_reg(reg.to_reg()),
|
||||
from_reg.to_reg(),
|
||||
ty,
|
||||
));
|
||||
ret.push(M::gen_move(reg, from_reg.to_reg(), ty));
|
||||
}
|
||||
};
|
||||
}
|
||||
@@ -1118,7 +1063,7 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
|
||||
(ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n)
|
||||
if n < word_bits =>
|
||||
{
|
||||
assert_eq!(M::word_reg_class(), from_reg.to_reg().get_class());
|
||||
assert_eq!(M::word_reg_class(), from_reg.to_reg().class());
|
||||
let signed = ext == ArgumentExtension::Sext;
|
||||
ret.push(M::gen_extend(
|
||||
Writable::from_reg(from_reg.to_reg()),
|
||||
@@ -1166,7 +1111,22 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
|
||||
}
|
||||
|
||||
fn gen_ret(&self) -> Self::I {
|
||||
M::gen_ret()
|
||||
let mut rets = vec![];
|
||||
for ret in &self.sig.rets {
|
||||
match ret {
|
||||
ABIArg::Slots { slots, .. } => {
|
||||
for slot in slots {
|
||||
match slot {
|
||||
ABIArgSlot::Reg { reg, .. } => rets.push(Reg::from(*reg)),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
M::gen_ret(rets)
|
||||
}
|
||||
|
||||
fn gen_epilogue_placeholder(&self) -> Self::I {
|
||||
@@ -1177,7 +1137,7 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
|
||||
self.spillslots = Some(slots);
|
||||
}
|
||||
|
||||
fn set_clobbered(&mut self, clobbered: Set<Writable<RealReg>>) {
|
||||
fn set_clobbered(&mut self, clobbered: Vec<Writable<RealReg>>) {
|
||||
self.clobbered = clobbered;
|
||||
}
|
||||
|
||||
@@ -1198,7 +1158,7 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
|
||||
into_regs: ValueRegs<Writable<Reg>>,
|
||||
) -> SmallInstVec<Self::I> {
|
||||
// Offset from beginning of spillslot area, which is at nominal SP + stackslots_size.
|
||||
let islot = slot.get() as i64;
|
||||
let islot = slot.index() as i64;
|
||||
let spill_off = islot * M::word_bytes() as i64;
|
||||
let sp_off = self.stackslots_size as i64 + spill_off;
|
||||
log::trace!("load_spillslot: slot {:?} -> sp_off {}", slot, sp_off);
|
||||
@@ -1214,7 +1174,7 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
|
||||
from_regs: ValueRegs<Reg>,
|
||||
) -> SmallInstVec<Self::I> {
|
||||
// Offset from beginning of spillslot area, which is at nominal SP + stackslots_size.
|
||||
let islot = slot.get() as i64;
|
||||
let islot = slot.index() as i64;
|
||||
let spill_off = islot * M::word_bytes() as i64;
|
||||
let sp_off = self.stackslots_size as i64 + spill_off;
|
||||
log::trace!("store_spillslot: slot {:?} -> sp_off {}", slot, sp_off);
|
||||
@@ -1245,7 +1205,7 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
|
||||
let first_spillslot_word =
|
||||
((self.stackslots_size + virtual_sp_offset as u32) / bytes) as usize;
|
||||
for &slot in slots {
|
||||
let slot = slot.get() as usize;
|
||||
let slot = slot.index();
|
||||
bits[first_spillslot_word + slot] = true;
|
||||
}
|
||||
|
||||
@@ -1347,7 +1307,10 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
|
||||
insts.extend(M::gen_epilogue_frame_restore(&self.flags));
|
||||
}
|
||||
|
||||
insts.push(M::gen_ret());
|
||||
// This `ret` doesn't need any return registers attached
|
||||
// because we are post-regalloc and don't need to
|
||||
// represent the implicit uses anymore.
|
||||
insts.push(M::gen_ret(vec![]));
|
||||
}
|
||||
|
||||
log::trace!("Epilogue: {:?}", insts);
|
||||
@@ -1368,19 +1331,19 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
|
||||
}
|
||||
|
||||
fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg) -> Self::I {
|
||||
let ty = ty_from_class(from_reg.to_reg().get_class());
|
||||
self.store_spillslot(to_slot, ty, ValueRegs::one(from_reg.to_reg()))
|
||||
let ty = Self::I::canonical_type_for_rc(Reg::from(from_reg).class());
|
||||
self.store_spillslot(to_slot, ty, ValueRegs::one(Reg::from(from_reg)))
|
||||
.into_iter()
|
||||
.next()
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
fn gen_reload(&self, to_reg: Writable<RealReg>, from_slot: SpillSlot) -> Self::I {
|
||||
let ty = ty_from_class(to_reg.to_reg().get_class());
|
||||
let ty = Self::I::canonical_type_for_rc(to_reg.to_reg().class());
|
||||
self.load_spillslot(
|
||||
from_slot,
|
||||
ty,
|
||||
writable_value_regs(ValueRegs::one(to_reg.to_reg().to_reg())),
|
||||
writable_value_regs(ValueRegs::one(Reg::from(to_reg.to_reg()))),
|
||||
)
|
||||
.into_iter()
|
||||
.next()
|
||||
@@ -1390,13 +1353,13 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
|
||||
|
||||
fn abisig_to_uses_and_defs<M: ABIMachineSpec>(sig: &ABISig) -> (Vec<Reg>, Vec<Writable<Reg>>) {
|
||||
// Compute uses: all arg regs.
|
||||
let mut uses = Vec::new();
|
||||
let mut uses = FxHashSet::default();
|
||||
for arg in &sig.args {
|
||||
if let &ABIArg::Slots { ref slots, .. } = arg {
|
||||
for slot in slots {
|
||||
match slot {
|
||||
&ABIArgSlot::Reg { reg, .. } => {
|
||||
uses.push(reg.to_reg());
|
||||
uses.insert(Reg::from(reg));
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
@@ -1405,13 +1368,15 @@ fn abisig_to_uses_and_defs<M: ABIMachineSpec>(sig: &ABISig) -> (Vec<Reg>, Vec<Wr
|
||||
}
|
||||
|
||||
// Compute defs: all retval regs, and all caller-save (clobbered) regs.
|
||||
let mut defs = M::get_regs_clobbered_by_call(sig.call_conv);
|
||||
let mut defs: FxHashSet<_> = M::get_regs_clobbered_by_call(sig.call_conv)
|
||||
.into_iter()
|
||||
.collect();
|
||||
for ret in &sig.rets {
|
||||
if let &ABIArg::Slots { ref slots, .. } = ret {
|
||||
for slot in slots {
|
||||
match slot {
|
||||
&ABIArgSlot::Reg { reg, .. } => {
|
||||
defs.push(Writable::from_reg(reg.to_reg()));
|
||||
defs.insert(Writable::from_reg(Reg::from(reg)));
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
@@ -1419,6 +1384,11 @@ fn abisig_to_uses_and_defs<M: ABIMachineSpec>(sig: &ABISig) -> (Vec<Reg>, Vec<Wr
|
||||
}
|
||||
}
|
||||
|
||||
let mut uses = uses.into_iter().collect::<Vec<_>>();
|
||||
let mut defs = defs.into_iter().collect::<Vec<_>>();
|
||||
uses.sort_unstable();
|
||||
defs.sort_unstable();
|
||||
|
||||
(uses, defs)
|
||||
}
|
||||
|
||||
@@ -1567,14 +1537,14 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
|
||||
} => {
|
||||
let ext = M::get_ext_mode(self.sig.call_conv, extension);
|
||||
if ext != ir::ArgumentExtension::None && ty_bits(ty) < word_bits {
|
||||
assert_eq!(word_rc, reg.get_class());
|
||||
assert_eq!(word_rc, reg.class());
|
||||
let signed = match ext {
|
||||
ir::ArgumentExtension::Uext => false,
|
||||
ir::ArgumentExtension::Sext => true,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
ctx.emit(M::gen_extend(
|
||||
Writable::from_reg(reg.to_reg()),
|
||||
Writable::from_reg(Reg::from(reg)),
|
||||
*from_reg,
|
||||
signed,
|
||||
ty_bits(ty) as u8,
|
||||
@@ -1582,7 +1552,7 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
|
||||
));
|
||||
} else {
|
||||
ctx.emit(M::gen_move(
|
||||
Writable::from_reg(reg.to_reg()),
|
||||
Writable::from_reg(Reg::from(reg)),
|
||||
*from_reg,
|
||||
ty,
|
||||
));
|
||||
@@ -1597,7 +1567,7 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
|
||||
let mut ty = ty;
|
||||
let ext = M::get_ext_mode(self.sig.call_conv, extension);
|
||||
if ext != ir::ArgumentExtension::None && ty_bits(ty) < word_bits {
|
||||
assert_eq!(word_rc, from_reg.get_class());
|
||||
assert_eq!(word_rc, from_reg.class());
|
||||
let signed = match ext {
|
||||
ir::ArgumentExtension::Uext => false,
|
||||
ir::ArgumentExtension::Sext => true,
|
||||
@@ -1680,7 +1650,7 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
|
||||
// Extension mode doesn't matter because we're copying out, not in,
|
||||
// and we ignore high bits in our own registers by convention.
|
||||
&ABIArgSlot::Reg { reg, ty, .. } => {
|
||||
ctx.emit(M::gen_move(*into_reg, reg.to_reg(), ty));
|
||||
ctx.emit(M::gen_move(*into_reg, Reg::from(reg), ty));
|
||||
}
|
||||
&ABIArgSlot::Stack { offset, ty, .. } => {
|
||||
let ret_area_base = self.sig.stack_arg_space;
|
||||
@@ -1716,7 +1686,7 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
|
||||
self.emit_copy_regs_to_arg(ctx, i, ValueRegs::one(rd.to_reg()));
|
||||
}
|
||||
let tmp = ctx.alloc_tmp(word_type).only_reg().unwrap();
|
||||
for (is_safepoint, inst) in M::gen_call(
|
||||
for inst in M::gen_call(
|
||||
&self.dest,
|
||||
uses,
|
||||
defs,
|
||||
@@ -1727,10 +1697,7 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
|
||||
)
|
||||
.into_iter()
|
||||
{
|
||||
match is_safepoint {
|
||||
InstIsSafepoint::Yes => ctx.emit_safepoint(inst),
|
||||
InstIsSafepoint::No => ctx.emit(inst),
|
||||
}
|
||||
ctx.emit(inst);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user