Switch Cranelift over to regalloc2. (#3989)
This PR switches Cranelift over to the new register allocator, regalloc2. See [this document](https://gist.github.com/cfallin/08553421a91f150254fe878f67301801) for a summary of the design changes. This switchover has implications for core VCode/MachInst types and the lowering pass. Overall, this change brings improvements to both compile time and speed of generated code (runtime), as reported in #3942: ``` Benchmark Compilation (wallclock) Execution (wallclock) blake3-scalar 25% faster 28% faster blake3-simd no diff no diff meshoptimizer 19% faster 17% faster pulldown-cmark 17% faster no diff bz2 15% faster no diff SpiderMonkey, 21% faster 2% faster fib(30) clang.wasm 42% faster N/A ```
This commit is contained in:
@@ -5,7 +5,6 @@ use crate::ir::{Signature, StackSlot};
|
||||
use crate::isa::CallConv;
|
||||
use crate::machinst::*;
|
||||
use crate::settings;
|
||||
use regalloc::{Reg, Set, SpillSlot, Writable};
|
||||
use smallvec::SmallVec;
|
||||
|
||||
/// A small vector of instructions (with some reasonable size); appropriate for
|
||||
@@ -42,12 +41,6 @@ pub trait ABICallee {
|
||||
/// Get the calling convention implemented by this ABI object.
|
||||
fn call_conv(&self) -> CallConv;
|
||||
|
||||
/// Get the liveins of the function.
|
||||
fn liveins(&self) -> Set<RealReg>;
|
||||
|
||||
/// Get the liveouts of the function.
|
||||
fn liveouts(&self) -> Set<RealReg>;
|
||||
|
||||
/// Number of arguments.
|
||||
fn num_args(&self) -> usize;
|
||||
|
||||
@@ -106,7 +99,7 @@ pub trait ABICallee {
|
||||
fn set_num_spillslots(&mut self, slots: usize);
|
||||
|
||||
/// Update with the clobbered registers, post-regalloc.
|
||||
fn set_clobbered(&mut self, clobbered: Set<Writable<RealReg>>);
|
||||
fn set_clobbered(&mut self, clobbered: Vec<Writable<RealReg>>);
|
||||
|
||||
/// Get the address of a stackslot.
|
||||
fn stackslot_addr(&self, slot: StackSlot, offset: u32, into_reg: Writable<Reg>) -> Self::I;
|
||||
|
||||
@@ -125,6 +125,7 @@
|
||||
|
||||
use super::abi::*;
|
||||
use crate::binemit::StackMap;
|
||||
use crate::fx::FxHashSet;
|
||||
use crate::ir::types::*;
|
||||
use crate::ir::{ArgumentExtension, ArgumentPurpose, StackSlot};
|
||||
use crate::machinst::*;
|
||||
@@ -132,7 +133,6 @@ use crate::settings;
|
||||
use crate::CodegenResult;
|
||||
use crate::{ir, isa};
|
||||
use alloc::vec::Vec;
|
||||
use regalloc::{RealReg, Reg, RegClass, Set, SpillSlot, Writable};
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
use std::convert::TryFrom;
|
||||
use std::marker::PhantomData;
|
||||
@@ -257,16 +257,6 @@ pub enum ArgsOrRets {
|
||||
Rets,
|
||||
}
|
||||
|
||||
/// Is an instruction returned by an ABI machine-specific backend a safepoint,
|
||||
/// or not?
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum InstIsSafepoint {
|
||||
/// The instruction is a safepoint.
|
||||
Yes,
|
||||
/// The instruction is not a safepoint.
|
||||
No,
|
||||
}
|
||||
|
||||
/// Abstract location for a machine-specific ABI impl to translate into the
|
||||
/// appropriate addressing mode.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
@@ -319,11 +309,7 @@ pub trait ABIMachineSpec {
|
||||
|
||||
/// Returns word register class.
|
||||
fn word_reg_class() -> RegClass {
|
||||
match Self::word_bits() {
|
||||
32 => RegClass::I32,
|
||||
64 => RegClass::I64,
|
||||
_ => unreachable!(),
|
||||
}
|
||||
RegClass::Int
|
||||
}
|
||||
|
||||
/// Returns required stack alignment in bytes.
|
||||
@@ -366,7 +352,7 @@ pub trait ABIMachineSpec {
|
||||
) -> Self::I;
|
||||
|
||||
/// Generate a return instruction.
|
||||
fn gen_ret() -> Self::I;
|
||||
fn gen_ret(rets: Vec<Reg>) -> Self::I;
|
||||
|
||||
/// Generate an "epilogue placeholder" instruction, recognized by lowering
|
||||
/// when using the Baldrdash ABI.
|
||||
@@ -442,7 +428,7 @@ pub trait ABIMachineSpec {
|
||||
/// contains the registers in a sorted order.
|
||||
fn get_clobbered_callee_saves(
|
||||
call_conv: isa::CallConv,
|
||||
regs: &Set<Writable<RealReg>>,
|
||||
regs: &[Writable<RealReg>],
|
||||
) -> Vec<Writable<RealReg>>;
|
||||
|
||||
/// Determine whether it is necessary to generate the usual frame-setup
|
||||
@@ -466,7 +452,7 @@ pub trait ABIMachineSpec {
|
||||
call_conv: isa::CallConv,
|
||||
setup_frame: bool,
|
||||
flags: &settings::Flags,
|
||||
clobbered_callee_saves: &Vec<Writable<RealReg>>,
|
||||
clobbered_callee_saves: &[Writable<RealReg>],
|
||||
fixed_frame_storage_size: u32,
|
||||
outgoing_args_size: u32,
|
||||
) -> (u64, SmallVec<[Self::I; 16]>);
|
||||
@@ -478,7 +464,7 @@ pub trait ABIMachineSpec {
|
||||
fn gen_clobber_restore(
|
||||
call_conv: isa::CallConv,
|
||||
flags: &settings::Flags,
|
||||
clobbers: &Set<Writable<RealReg>>,
|
||||
clobbers: &[Writable<RealReg>],
|
||||
fixed_frame_storage_size: u32,
|
||||
outgoing_args_size: u32,
|
||||
) -> SmallVec<[Self::I; 16]>;
|
||||
@@ -493,7 +479,7 @@ pub trait ABIMachineSpec {
|
||||
tmp: Writable<Reg>,
|
||||
callee_conv: isa::CallConv,
|
||||
callee_conv: isa::CallConv,
|
||||
) -> SmallVec<[(InstIsSafepoint, Self::I); 2]>;
|
||||
) -> SmallVec<[Self::I; 2]>;
|
||||
|
||||
/// Generate a memcpy invocation. Used to set up struct args. May clobber
|
||||
/// caller-save registers; we only memcpy before we start to set up args for
|
||||
@@ -530,6 +516,7 @@ pub trait ABIMachineSpec {
|
||||
}
|
||||
|
||||
/// ABI information shared between body (callee) and caller.
|
||||
#[derive(Clone)]
|
||||
struct ABISig {
|
||||
/// Argument locations (regs or stack slots). Stack offsets are relative to
|
||||
/// SP on entry to function.
|
||||
@@ -604,7 +591,7 @@ pub struct ABICalleeImpl<M: ABIMachineSpec> {
|
||||
/// Stack size to be reserved for outgoing arguments.
|
||||
outgoing_args_size: u32,
|
||||
/// Clobbered registers, from regalloc.
|
||||
clobbered: Set<Writable<RealReg>>,
|
||||
clobbered: Vec<Writable<RealReg>>,
|
||||
/// Total number of spillslots, from regalloc.
|
||||
spillslots: Option<usize>,
|
||||
/// Storage allocated for the fixed part of the stack frame. This is
|
||||
@@ -655,24 +642,13 @@ fn get_special_purpose_param_register(
|
||||
let idx = f.signature.special_param_index(purpose)?;
|
||||
match &abi.args[idx] {
|
||||
&ABIArg::Slots { ref slots, .. } => match &slots[0] {
|
||||
&ABIArgSlot::Reg { reg, .. } => Some(reg.to_reg()),
|
||||
&ABIArgSlot::Reg { reg, .. } => Some(reg.into()),
|
||||
_ => None,
|
||||
},
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn ty_from_class(class: RegClass) -> Type {
|
||||
match class {
|
||||
RegClass::I32 => I32,
|
||||
RegClass::I64 => I64,
|
||||
RegClass::F32 => F32,
|
||||
RegClass::F64 => F64,
|
||||
RegClass::V128 => I8X16,
|
||||
_ => panic!("Unknown regclass: {:?}", class),
|
||||
}
|
||||
}
|
||||
|
||||
impl<M: ABIMachineSpec> ABICalleeImpl<M> {
|
||||
/// Create a new body ABI instance.
|
||||
pub fn new(
|
||||
@@ -739,7 +715,7 @@ impl<M: ABIMachineSpec> ABICalleeImpl<M> {
|
||||
stackslots,
|
||||
stackslots_size: stack_offset,
|
||||
outgoing_args_size: 0,
|
||||
clobbered: Set::empty(),
|
||||
clobbered: vec![],
|
||||
spillslots: None,
|
||||
fixed_frame_storage_size: 0,
|
||||
total_frame_size: None,
|
||||
@@ -961,34 +937,6 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
|
||||
self.sig.call_conv
|
||||
}
|
||||
|
||||
fn liveins(&self) -> Set<RealReg> {
|
||||
let mut set: Set<RealReg> = Set::empty();
|
||||
for arg in &self.sig.args {
|
||||
if let &ABIArg::Slots { ref slots, .. } = arg {
|
||||
for slot in slots {
|
||||
if let ABIArgSlot::Reg { reg, .. } = slot {
|
||||
set.insert(*reg);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
set
|
||||
}
|
||||
|
||||
fn liveouts(&self) -> Set<RealReg> {
|
||||
let mut set: Set<RealReg> = Set::empty();
|
||||
for ret in &self.sig.rets {
|
||||
if let &ABIArg::Slots { ref slots, .. } = ret {
|
||||
for slot in slots {
|
||||
if let ABIArgSlot::Reg { reg, .. } = slot {
|
||||
set.insert(*reg);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
set
|
||||
}
|
||||
|
||||
fn num_args(&self) -> usize {
|
||||
self.sig.args.len()
|
||||
}
|
||||
@@ -1019,7 +967,7 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
|
||||
// Extension mode doesn't matter (we're copying out, not in; we
|
||||
// ignore high bits by convention).
|
||||
&ABIArgSlot::Reg { reg, ty, .. } => {
|
||||
insts.push(M::gen_move(*into_reg, reg.to_reg(), ty));
|
||||
insts.push(M::gen_move(*into_reg, reg.into(), ty));
|
||||
}
|
||||
&ABIArgSlot::Stack { offset, ty, .. } => {
|
||||
insts.push(M::gen_load_stack(
|
||||
@@ -1069,20 +1017,21 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
|
||||
match &self.sig.rets[idx] {
|
||||
&ABIArg::Slots { ref slots, .. } => {
|
||||
assert_eq!(from_regs.len(), slots.len());
|
||||
for (slot, from_reg) in slots.iter().zip(from_regs.regs().iter()) {
|
||||
for (slot, &from_reg) in slots.iter().zip(from_regs.regs().iter()) {
|
||||
match slot {
|
||||
&ABIArgSlot::Reg {
|
||||
reg, ty, extension, ..
|
||||
} => {
|
||||
let from_bits = ty_bits(ty) as u8;
|
||||
let ext = M::get_ext_mode(self.sig.call_conv, extension);
|
||||
let reg: Writable<Reg> = Writable::from_reg(Reg::from(reg));
|
||||
match (ext, from_bits) {
|
||||
(ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n)
|
||||
if n < word_bits =>
|
||||
{
|
||||
let signed = ext == ArgumentExtension::Sext;
|
||||
ret.push(M::gen_extend(
|
||||
Writable::from_reg(reg.to_reg()),
|
||||
reg,
|
||||
from_reg.to_reg(),
|
||||
signed,
|
||||
from_bits,
|
||||
@@ -1090,11 +1039,7 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
|
||||
));
|
||||
}
|
||||
_ => {
|
||||
ret.push(M::gen_move(
|
||||
Writable::from_reg(reg.to_reg()),
|
||||
from_reg.to_reg(),
|
||||
ty,
|
||||
));
|
||||
ret.push(M::gen_move(reg, from_reg.to_reg(), ty));
|
||||
}
|
||||
};
|
||||
}
|
||||
@@ -1118,7 +1063,7 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
|
||||
(ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n)
|
||||
if n < word_bits =>
|
||||
{
|
||||
assert_eq!(M::word_reg_class(), from_reg.to_reg().get_class());
|
||||
assert_eq!(M::word_reg_class(), from_reg.to_reg().class());
|
||||
let signed = ext == ArgumentExtension::Sext;
|
||||
ret.push(M::gen_extend(
|
||||
Writable::from_reg(from_reg.to_reg()),
|
||||
@@ -1166,7 +1111,22 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
|
||||
}
|
||||
|
||||
fn gen_ret(&self) -> Self::I {
|
||||
M::gen_ret()
|
||||
let mut rets = vec![];
|
||||
for ret in &self.sig.rets {
|
||||
match ret {
|
||||
ABIArg::Slots { slots, .. } => {
|
||||
for slot in slots {
|
||||
match slot {
|
||||
ABIArgSlot::Reg { reg, .. } => rets.push(Reg::from(*reg)),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
M::gen_ret(rets)
|
||||
}
|
||||
|
||||
fn gen_epilogue_placeholder(&self) -> Self::I {
|
||||
@@ -1177,7 +1137,7 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
|
||||
self.spillslots = Some(slots);
|
||||
}
|
||||
|
||||
fn set_clobbered(&mut self, clobbered: Set<Writable<RealReg>>) {
|
||||
fn set_clobbered(&mut self, clobbered: Vec<Writable<RealReg>>) {
|
||||
self.clobbered = clobbered;
|
||||
}
|
||||
|
||||
@@ -1198,7 +1158,7 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
|
||||
into_regs: ValueRegs<Writable<Reg>>,
|
||||
) -> SmallInstVec<Self::I> {
|
||||
// Offset from beginning of spillslot area, which is at nominal SP + stackslots_size.
|
||||
let islot = slot.get() as i64;
|
||||
let islot = slot.index() as i64;
|
||||
let spill_off = islot * M::word_bytes() as i64;
|
||||
let sp_off = self.stackslots_size as i64 + spill_off;
|
||||
log::trace!("load_spillslot: slot {:?} -> sp_off {}", slot, sp_off);
|
||||
@@ -1214,7 +1174,7 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
|
||||
from_regs: ValueRegs<Reg>,
|
||||
) -> SmallInstVec<Self::I> {
|
||||
// Offset from beginning of spillslot area, which is at nominal SP + stackslots_size.
|
||||
let islot = slot.get() as i64;
|
||||
let islot = slot.index() as i64;
|
||||
let spill_off = islot * M::word_bytes() as i64;
|
||||
let sp_off = self.stackslots_size as i64 + spill_off;
|
||||
log::trace!("store_spillslot: slot {:?} -> sp_off {}", slot, sp_off);
|
||||
@@ -1245,7 +1205,7 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
|
||||
let first_spillslot_word =
|
||||
((self.stackslots_size + virtual_sp_offset as u32) / bytes) as usize;
|
||||
for &slot in slots {
|
||||
let slot = slot.get() as usize;
|
||||
let slot = slot.index();
|
||||
bits[first_spillslot_word + slot] = true;
|
||||
}
|
||||
|
||||
@@ -1347,7 +1307,10 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
|
||||
insts.extend(M::gen_epilogue_frame_restore(&self.flags));
|
||||
}
|
||||
|
||||
insts.push(M::gen_ret());
|
||||
// This `ret` doesn't need any return registers attached
|
||||
// because we are post-regalloc and don't need to
|
||||
// represent the implicit uses anymore.
|
||||
insts.push(M::gen_ret(vec![]));
|
||||
}
|
||||
|
||||
log::trace!("Epilogue: {:?}", insts);
|
||||
@@ -1368,19 +1331,19 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
|
||||
}
|
||||
|
||||
fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg) -> Self::I {
|
||||
let ty = ty_from_class(from_reg.to_reg().get_class());
|
||||
self.store_spillslot(to_slot, ty, ValueRegs::one(from_reg.to_reg()))
|
||||
let ty = Self::I::canonical_type_for_rc(Reg::from(from_reg).class());
|
||||
self.store_spillslot(to_slot, ty, ValueRegs::one(Reg::from(from_reg)))
|
||||
.into_iter()
|
||||
.next()
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
fn gen_reload(&self, to_reg: Writable<RealReg>, from_slot: SpillSlot) -> Self::I {
|
||||
let ty = ty_from_class(to_reg.to_reg().get_class());
|
||||
let ty = Self::I::canonical_type_for_rc(to_reg.to_reg().class());
|
||||
self.load_spillslot(
|
||||
from_slot,
|
||||
ty,
|
||||
writable_value_regs(ValueRegs::one(to_reg.to_reg().to_reg())),
|
||||
writable_value_regs(ValueRegs::one(Reg::from(to_reg.to_reg()))),
|
||||
)
|
||||
.into_iter()
|
||||
.next()
|
||||
@@ -1390,13 +1353,13 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
|
||||
|
||||
fn abisig_to_uses_and_defs<M: ABIMachineSpec>(sig: &ABISig) -> (Vec<Reg>, Vec<Writable<Reg>>) {
|
||||
// Compute uses: all arg regs.
|
||||
let mut uses = Vec::new();
|
||||
let mut uses = FxHashSet::default();
|
||||
for arg in &sig.args {
|
||||
if let &ABIArg::Slots { ref slots, .. } = arg {
|
||||
for slot in slots {
|
||||
match slot {
|
||||
&ABIArgSlot::Reg { reg, .. } => {
|
||||
uses.push(reg.to_reg());
|
||||
uses.insert(Reg::from(reg));
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
@@ -1405,13 +1368,15 @@ fn abisig_to_uses_and_defs<M: ABIMachineSpec>(sig: &ABISig) -> (Vec<Reg>, Vec<Wr
|
||||
}
|
||||
|
||||
// Compute defs: all retval regs, and all caller-save (clobbered) regs.
|
||||
let mut defs = M::get_regs_clobbered_by_call(sig.call_conv);
|
||||
let mut defs: FxHashSet<_> = M::get_regs_clobbered_by_call(sig.call_conv)
|
||||
.into_iter()
|
||||
.collect();
|
||||
for ret in &sig.rets {
|
||||
if let &ABIArg::Slots { ref slots, .. } = ret {
|
||||
for slot in slots {
|
||||
match slot {
|
||||
&ABIArgSlot::Reg { reg, .. } => {
|
||||
defs.push(Writable::from_reg(reg.to_reg()));
|
||||
defs.insert(Writable::from_reg(Reg::from(reg)));
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
@@ -1419,6 +1384,11 @@ fn abisig_to_uses_and_defs<M: ABIMachineSpec>(sig: &ABISig) -> (Vec<Reg>, Vec<Wr
|
||||
}
|
||||
}
|
||||
|
||||
let mut uses = uses.into_iter().collect::<Vec<_>>();
|
||||
let mut defs = defs.into_iter().collect::<Vec<_>>();
|
||||
uses.sort_unstable();
|
||||
defs.sort_unstable();
|
||||
|
||||
(uses, defs)
|
||||
}
|
||||
|
||||
@@ -1567,14 +1537,14 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
|
||||
} => {
|
||||
let ext = M::get_ext_mode(self.sig.call_conv, extension);
|
||||
if ext != ir::ArgumentExtension::None && ty_bits(ty) < word_bits {
|
||||
assert_eq!(word_rc, reg.get_class());
|
||||
assert_eq!(word_rc, reg.class());
|
||||
let signed = match ext {
|
||||
ir::ArgumentExtension::Uext => false,
|
||||
ir::ArgumentExtension::Sext => true,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
ctx.emit(M::gen_extend(
|
||||
Writable::from_reg(reg.to_reg()),
|
||||
Writable::from_reg(Reg::from(reg)),
|
||||
*from_reg,
|
||||
signed,
|
||||
ty_bits(ty) as u8,
|
||||
@@ -1582,7 +1552,7 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
|
||||
));
|
||||
} else {
|
||||
ctx.emit(M::gen_move(
|
||||
Writable::from_reg(reg.to_reg()),
|
||||
Writable::from_reg(Reg::from(reg)),
|
||||
*from_reg,
|
||||
ty,
|
||||
));
|
||||
@@ -1597,7 +1567,7 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
|
||||
let mut ty = ty;
|
||||
let ext = M::get_ext_mode(self.sig.call_conv, extension);
|
||||
if ext != ir::ArgumentExtension::None && ty_bits(ty) < word_bits {
|
||||
assert_eq!(word_rc, from_reg.get_class());
|
||||
assert_eq!(word_rc, from_reg.class());
|
||||
let signed = match ext {
|
||||
ir::ArgumentExtension::Uext => false,
|
||||
ir::ArgumentExtension::Sext => true,
|
||||
@@ -1680,7 +1650,7 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
|
||||
// Extension mode doesn't matter because we're copying out, not in,
|
||||
// and we ignore high bits in our own registers by convention.
|
||||
&ABIArgSlot::Reg { reg, ty, .. } => {
|
||||
ctx.emit(M::gen_move(*into_reg, reg.to_reg(), ty));
|
||||
ctx.emit(M::gen_move(*into_reg, Reg::from(reg), ty));
|
||||
}
|
||||
&ABIArgSlot::Stack { offset, ty, .. } => {
|
||||
let ret_area_base = self.sig.stack_arg_space;
|
||||
@@ -1716,7 +1686,7 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
|
||||
self.emit_copy_regs_to_arg(ctx, i, ValueRegs::one(rd.to_reg()));
|
||||
}
|
||||
let tmp = ctx.alloc_tmp(word_type).only_reg().unwrap();
|
||||
for (is_safepoint, inst) in M::gen_call(
|
||||
for inst in M::gen_call(
|
||||
&self.dest,
|
||||
uses,
|
||||
defs,
|
||||
@@ -1727,10 +1697,7 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
|
||||
)
|
||||
.into_iter()
|
||||
{
|
||||
match is_safepoint {
|
||||
InstIsSafepoint::Yes => ctx.emit_safepoint(inst),
|
||||
InstIsSafepoint::No => ctx.emit(inst),
|
||||
}
|
||||
ctx.emit(inst);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -127,6 +127,9 @@ pub enum LoweredBlock {
|
||||
/// to the next, i.e., corresponding to the included edge-block. This
|
||||
/// will be an instruction in `block`.
|
||||
edge_inst: Inst,
|
||||
/// The successor index in this edge, to distinguish multiple
|
||||
/// edges between the same block pair.
|
||||
succ_idx: usize,
|
||||
/// The successor CLIF block.
|
||||
succ: Block,
|
||||
},
|
||||
@@ -138,6 +141,9 @@ pub enum LoweredBlock {
|
||||
/// The edge (jump) instruction corresponding to the included
|
||||
/// edge-block. This will be an instruction in `pred`.
|
||||
edge_inst: Inst,
|
||||
/// The successor index in this edge, to distinguish multiple
|
||||
/// edges between the same block pair.
|
||||
succ_idx: usize,
|
||||
/// The original CLIF block included in this lowered block.
|
||||
block: Block,
|
||||
},
|
||||
@@ -150,6 +156,9 @@ pub enum LoweredBlock {
|
||||
/// The edge (jump) instruction corresponding to this edge's transition.
|
||||
/// This will be an instruction in `pred`.
|
||||
edge_inst: Inst,
|
||||
/// The successor index in this edge, to distinguish multiple
|
||||
/// edges between the same block pair.
|
||||
succ_idx: usize,
|
||||
/// The successor CLIF block.
|
||||
succ: Block,
|
||||
},
|
||||
@@ -168,29 +177,34 @@ impl LoweredBlock {
|
||||
}
|
||||
|
||||
/// The associated in-edge, if any.
|
||||
#[cfg(test)]
|
||||
pub fn in_edge(self) -> Option<(Block, Inst, Block)> {
|
||||
match self {
|
||||
LoweredBlock::EdgeAndOrig {
|
||||
pred,
|
||||
edge_inst,
|
||||
block,
|
||||
..
|
||||
} => Some((pred, edge_inst, block)),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// the associated out-edge, if any. Also includes edge-only blocks.
|
||||
#[cfg(test)]
|
||||
pub fn out_edge(self) -> Option<(Block, Inst, Block)> {
|
||||
match self {
|
||||
LoweredBlock::OrigAndEdge {
|
||||
block,
|
||||
edge_inst,
|
||||
succ,
|
||||
..
|
||||
} => Some((block, edge_inst, succ)),
|
||||
LoweredBlock::Edge {
|
||||
pred,
|
||||
edge_inst,
|
||||
succ,
|
||||
..
|
||||
} => Some((pred, edge_inst, succ)),
|
||||
_ => None,
|
||||
}
|
||||
@@ -207,15 +221,17 @@ impl BlockLoweringOrder {
|
||||
let mut block_out_count = SecondaryMap::with_default(0);
|
||||
|
||||
// Cache the block successors to avoid re-examining branches below.
|
||||
let mut block_succs: SmallVec<[(Inst, Block); 128]> = SmallVec::new();
|
||||
let mut block_succs: SmallVec<[(Inst, usize, Block); 128]> = SmallVec::new();
|
||||
let mut block_succ_range = SecondaryMap::with_default((0, 0));
|
||||
let mut fallthrough_return_block = None;
|
||||
for block in f.layout.blocks() {
|
||||
let block_succ_start = block_succs.len();
|
||||
let mut succ_idx = 0;
|
||||
visit_block_succs(f, block, |inst, succ| {
|
||||
block_out_count[block] += 1;
|
||||
block_in_count[succ] += 1;
|
||||
block_succs.push((inst, succ));
|
||||
block_succs.push((inst, succ_idx, succ));
|
||||
succ_idx += 1;
|
||||
});
|
||||
let block_succ_end = block_succs.len();
|
||||
block_succ_range[block] = (block_succ_start, block_succ_end);
|
||||
@@ -262,13 +278,14 @@ impl BlockLoweringOrder {
|
||||
// At an orig block; successors are always edge blocks,
|
||||
// possibly with orig blocks following.
|
||||
let range = block_succ_range[block];
|
||||
for &(edge_inst, succ) in &block_succs[range.0..range.1] {
|
||||
for &(edge_inst, succ_idx, succ) in &block_succs[range.0..range.1] {
|
||||
if block_in_count[succ] == 1 {
|
||||
ret.push((
|
||||
edge_inst,
|
||||
LoweredBlock::EdgeAndOrig {
|
||||
pred: block,
|
||||
edge_inst,
|
||||
succ_idx,
|
||||
block: succ,
|
||||
},
|
||||
));
|
||||
@@ -278,6 +295,7 @@ impl BlockLoweringOrder {
|
||||
LoweredBlock::Edge {
|
||||
pred: block,
|
||||
edge_inst,
|
||||
succ_idx,
|
||||
succ,
|
||||
},
|
||||
));
|
||||
@@ -298,12 +316,13 @@ impl BlockLoweringOrder {
|
||||
// implicit return succ).
|
||||
if range.1 - range.0 > 0 {
|
||||
debug_assert!(range.1 - range.0 == 1);
|
||||
let (succ_edge_inst, succ_succ) = block_succs[range.0];
|
||||
let (succ_edge_inst, succ_succ_idx, succ_succ) = block_succs[range.0];
|
||||
ret.push((
|
||||
edge_inst,
|
||||
LoweredBlock::OrigAndEdge {
|
||||
block: succ,
|
||||
edge_inst: succ_edge_inst,
|
||||
succ_idx: succ_succ_idx,
|
||||
succ: succ_succ,
|
||||
},
|
||||
));
|
||||
@@ -395,7 +414,7 @@ impl BlockLoweringOrder {
|
||||
let mut lowered_succ_ranges = vec![];
|
||||
let mut lb_to_bindex = FxHashMap::default();
|
||||
for (block, succ_range) in rpo.into_iter() {
|
||||
let index = lowered_order.len() as BlockIndex;
|
||||
let index = BlockIndex::new(lowered_order.len());
|
||||
lb_to_bindex.insert(block, index);
|
||||
lowered_order.push(block);
|
||||
lowered_succ_ranges.push(succ_range);
|
||||
@@ -416,7 +435,7 @@ impl BlockLoweringOrder {
|
||||
|
||||
let mut orig_map = SecondaryMap::with_default(None);
|
||||
for (i, lb) in lowered_order.iter().enumerate() {
|
||||
let i = i as BlockIndex;
|
||||
let i = BlockIndex::new(i);
|
||||
if let Some(b) = lb.orig_block() {
|
||||
orig_map[b] = Some(i);
|
||||
}
|
||||
@@ -441,7 +460,7 @@ impl BlockLoweringOrder {
|
||||
|
||||
/// Get the successor indices for a lowered block.
|
||||
pub fn succ_indices(&self, block: BlockIndex) -> &[(Inst, BlockIndex)] {
|
||||
let range = self.lowered_succ_ranges[block as usize];
|
||||
let range = self.lowered_succ_ranges[block.index()];
|
||||
&self.lowered_succ_indices[range.0..range.1]
|
||||
}
|
||||
|
||||
|
||||
@@ -269,7 +269,7 @@ impl MachLabel {
|
||||
/// Get a label for a block. (The first N MachLabels are always reseved for
|
||||
/// the N blocks in the vcode.)
|
||||
pub fn from_block(bindex: BlockIndex) -> MachLabel {
|
||||
MachLabel(bindex)
|
||||
MachLabel(bindex.index() as u32)
|
||||
}
|
||||
|
||||
/// Get the numeric label index.
|
||||
@@ -334,7 +334,7 @@ impl<I: VCodeInst> MachBuffer<I> {
|
||||
/// times, e.g. after calling `add_{cond,uncond}_branch()` and
|
||||
/// before emitting branch bytes.
|
||||
fn check_label_branch_invariants(&self) {
|
||||
if !cfg!(debug_assertions) || cfg!(fuzzing) {
|
||||
if !cfg!(fuzzing) {
|
||||
return;
|
||||
}
|
||||
let cur_off = self.cur_offset();
|
||||
@@ -489,12 +489,11 @@ impl<I: VCodeInst> MachBuffer<I> {
|
||||
}
|
||||
|
||||
/// Reserve the first N MachLabels for blocks.
|
||||
pub fn reserve_labels_for_blocks(&mut self, blocks: BlockIndex) {
|
||||
pub fn reserve_labels_for_blocks(&mut self, blocks: usize) {
|
||||
trace!("MachBuffer: first {} labels are for blocks", blocks);
|
||||
debug_assert!(self.label_offsets.is_empty());
|
||||
self.label_offsets
|
||||
.resize(blocks as usize, UNKNOWN_LABEL_OFFSET);
|
||||
self.label_aliases.resize(blocks as usize, UNKNOWN_LABEL);
|
||||
self.label_offsets.resize(blocks, UNKNOWN_LABEL_OFFSET);
|
||||
self.label_aliases.resize(blocks, UNKNOWN_LABEL);
|
||||
|
||||
// Post-invariant: as for `get_label()`.
|
||||
}
|
||||
@@ -1599,14 +1598,14 @@ impl MachBranch {
|
||||
/// resolving labels internally in the buffer.
|
||||
pub struct MachTextSectionBuilder<I: VCodeInst> {
|
||||
buf: MachBuffer<I>,
|
||||
next_func: u32,
|
||||
next_func: usize,
|
||||
force_veneers: bool,
|
||||
}
|
||||
|
||||
impl<I: VCodeInst> MachTextSectionBuilder<I> {
|
||||
pub fn new(num_funcs: u32) -> MachTextSectionBuilder<I> {
|
||||
let mut buf = MachBuffer::new();
|
||||
buf.reserve_labels_for_blocks(num_funcs);
|
||||
buf.reserve_labels_for_blocks(num_funcs as usize);
|
||||
MachTextSectionBuilder {
|
||||
buf,
|
||||
next_func: 0,
|
||||
@@ -1627,7 +1626,8 @@ impl<I: VCodeInst> TextSectionBuilder for MachTextSectionBuilder<I> {
|
||||
self.buf.align_to(align.unwrap_or(I::LabelUse::ALIGN));
|
||||
let pos = self.buf.cur_offset();
|
||||
if named {
|
||||
self.buf.bind_label(MachLabel::from_block(self.next_func));
|
||||
self.buf
|
||||
.bind_label(MachLabel::from_block(BlockIndex::new(self.next_func)));
|
||||
self.next_func += 1;
|
||||
}
|
||||
self.buf.put_data(func);
|
||||
@@ -1635,7 +1635,7 @@ impl<I: VCodeInst> TextSectionBuilder for MachTextSectionBuilder<I> {
|
||||
}
|
||||
|
||||
fn resolve_reloc(&mut self, offset: u64, reloc: Reloc, addend: Addend, target: u32) -> bool {
|
||||
let label = MachLabel::from_block(target);
|
||||
let label = MachLabel::from_block(BlockIndex::new(target as usize));
|
||||
let offset = u32::try_from(offset).unwrap();
|
||||
match I::LabelUse::from_reloc(reloc, addend) {
|
||||
Some(label_use) => {
|
||||
@@ -1652,7 +1652,7 @@ impl<I: VCodeInst> TextSectionBuilder for MachTextSectionBuilder<I> {
|
||||
|
||||
fn finish(&mut self) -> Vec<u8> {
|
||||
// Double-check all functions were pushed.
|
||||
assert_eq!(self.next_func, self.buf.label_offsets.len() as u32);
|
||||
assert_eq!(self.next_func, self.buf.label_offsets.len());
|
||||
|
||||
// Finish up any veneers, if necessary.
|
||||
self.buf
|
||||
@@ -1675,7 +1675,7 @@ mod test {
|
||||
use std::vec::Vec;
|
||||
|
||||
fn label(n: u32) -> MachLabel {
|
||||
MachLabel::from_block(n)
|
||||
MachLabel::from_block(BlockIndex::new(n as usize))
|
||||
}
|
||||
fn target(n: u32) -> BranchTarget {
|
||||
BranchTarget::Label(label(n))
|
||||
@@ -1690,7 +1690,7 @@ mod test {
|
||||
buf.reserve_labels_for_blocks(2);
|
||||
buf.bind_label(label(0));
|
||||
let inst = Inst::Jump { dest: target(1) };
|
||||
inst.emit(&mut buf, &info, &mut state);
|
||||
inst.emit(&[], &mut buf, &info, &mut state);
|
||||
buf.bind_label(label(1));
|
||||
let buf = buf.finish();
|
||||
assert_eq!(0, buf.total_size());
|
||||
@@ -1710,15 +1710,15 @@ mod test {
|
||||
taken: target(1),
|
||||
not_taken: target(2),
|
||||
};
|
||||
inst.emit(&mut buf, &info, &mut state);
|
||||
inst.emit(&[], &mut buf, &info, &mut state);
|
||||
|
||||
buf.bind_label(label(1));
|
||||
let inst = Inst::Jump { dest: target(3) };
|
||||
inst.emit(&mut buf, &info, &mut state);
|
||||
inst.emit(&[], &mut buf, &info, &mut state);
|
||||
|
||||
buf.bind_label(label(2));
|
||||
let inst = Inst::Jump { dest: target(3) };
|
||||
inst.emit(&mut buf, &info, &mut state);
|
||||
inst.emit(&[], &mut buf, &info, &mut state);
|
||||
|
||||
buf.bind_label(label(3));
|
||||
|
||||
@@ -1740,17 +1740,17 @@ mod test {
|
||||
taken: target(1),
|
||||
not_taken: target(2),
|
||||
};
|
||||
inst.emit(&mut buf, &info, &mut state);
|
||||
inst.emit(&[], &mut buf, &info, &mut state);
|
||||
|
||||
buf.bind_label(label(1));
|
||||
let inst = Inst::Udf {
|
||||
trap_code: TrapCode::Interrupt,
|
||||
};
|
||||
inst.emit(&mut buf, &info, &mut state);
|
||||
inst.emit(&[], &mut buf, &info, &mut state);
|
||||
|
||||
buf.bind_label(label(2));
|
||||
let inst = Inst::Nop4;
|
||||
inst.emit(&mut buf, &info, &mut state);
|
||||
inst.emit(&[], &mut buf, &info, &mut state);
|
||||
|
||||
buf.bind_label(label(3));
|
||||
|
||||
@@ -1762,9 +1762,9 @@ mod test {
|
||||
kind: CondBrKind::NotZero(xreg(0)),
|
||||
trap_code: TrapCode::Interrupt,
|
||||
};
|
||||
inst.emit(&mut buf2, &info, &mut state);
|
||||
inst.emit(&[], &mut buf2, &info, &mut state);
|
||||
let inst = Inst::Nop4;
|
||||
inst.emit(&mut buf2, &info, &mut state);
|
||||
inst.emit(&[], &mut buf2, &info, &mut state);
|
||||
|
||||
let buf2 = buf2.finish();
|
||||
|
||||
@@ -1785,7 +1785,7 @@ mod test {
|
||||
taken: target(2),
|
||||
not_taken: target(3),
|
||||
};
|
||||
inst.emit(&mut buf, &info, &mut state);
|
||||
inst.emit(&[], &mut buf, &info, &mut state);
|
||||
|
||||
buf.bind_label(label(1));
|
||||
while buf.cur_offset() < 2000000 {
|
||||
@@ -1793,16 +1793,16 @@ mod test {
|
||||
buf.emit_island(0);
|
||||
}
|
||||
let inst = Inst::Nop4;
|
||||
inst.emit(&mut buf, &info, &mut state);
|
||||
inst.emit(&[], &mut buf, &info, &mut state);
|
||||
}
|
||||
|
||||
buf.bind_label(label(2));
|
||||
let inst = Inst::Nop4;
|
||||
inst.emit(&mut buf, &info, &mut state);
|
||||
inst.emit(&[], &mut buf, &info, &mut state);
|
||||
|
||||
buf.bind_label(label(3));
|
||||
let inst = Inst::Nop4;
|
||||
inst.emit(&mut buf, &info, &mut state);
|
||||
inst.emit(&[], &mut buf, &info, &mut state);
|
||||
|
||||
let buf = buf.finish();
|
||||
|
||||
@@ -1831,7 +1831,7 @@ mod test {
|
||||
// go directly to the target.
|
||||
not_taken: BranchTarget::ResolvedOffset(2000000 + 4 - 4),
|
||||
};
|
||||
inst.emit(&mut buf2, &info, &mut state);
|
||||
inst.emit(&[], &mut buf2, &info, &mut state);
|
||||
|
||||
let buf2 = buf2.finish();
|
||||
|
||||
@@ -1848,16 +1848,16 @@ mod test {
|
||||
|
||||
buf.bind_label(label(0));
|
||||
let inst = Inst::Nop4;
|
||||
inst.emit(&mut buf, &info, &mut state);
|
||||
inst.emit(&[], &mut buf, &info, &mut state);
|
||||
|
||||
buf.bind_label(label(1));
|
||||
let inst = Inst::Nop4;
|
||||
inst.emit(&mut buf, &info, &mut state);
|
||||
inst.emit(&[], &mut buf, &info, &mut state);
|
||||
|
||||
buf.bind_label(label(2));
|
||||
while buf.cur_offset() < 2000000 {
|
||||
let inst = Inst::Nop4;
|
||||
inst.emit(&mut buf, &info, &mut state);
|
||||
inst.emit(&[], &mut buf, &info, &mut state);
|
||||
}
|
||||
|
||||
buf.bind_label(label(3));
|
||||
@@ -1866,7 +1866,7 @@ mod test {
|
||||
taken: target(0),
|
||||
not_taken: target(1),
|
||||
};
|
||||
inst.emit(&mut buf, &info, &mut state);
|
||||
inst.emit(&[], &mut buf, &info, &mut state);
|
||||
|
||||
let buf = buf.finish();
|
||||
|
||||
@@ -1879,11 +1879,11 @@ mod test {
|
||||
taken: BranchTarget::ResolvedOffset(8),
|
||||
not_taken: BranchTarget::ResolvedOffset(4 - (2000000 + 4)),
|
||||
};
|
||||
inst.emit(&mut buf2, &info, &mut state);
|
||||
inst.emit(&[], &mut buf2, &info, &mut state);
|
||||
let inst = Inst::Jump {
|
||||
dest: BranchTarget::ResolvedOffset(-(2000000 + 8)),
|
||||
};
|
||||
inst.emit(&mut buf2, &info, &mut state);
|
||||
inst.emit(&[], &mut buf2, &info, &mut state);
|
||||
|
||||
let buf2 = buf2.finish();
|
||||
|
||||
@@ -1937,38 +1937,38 @@ mod test {
|
||||
taken: target(1),
|
||||
not_taken: target(2),
|
||||
};
|
||||
inst.emit(&mut buf, &info, &mut state);
|
||||
inst.emit(&[], &mut buf, &info, &mut state);
|
||||
|
||||
buf.bind_label(label(1));
|
||||
let inst = Inst::Jump { dest: target(3) };
|
||||
inst.emit(&mut buf, &info, &mut state);
|
||||
inst.emit(&[], &mut buf, &info, &mut state);
|
||||
|
||||
buf.bind_label(label(2));
|
||||
let inst = Inst::Nop4;
|
||||
inst.emit(&mut buf, &info, &mut state);
|
||||
inst.emit(&mut buf, &info, &mut state);
|
||||
inst.emit(&[], &mut buf, &info, &mut state);
|
||||
inst.emit(&[], &mut buf, &info, &mut state);
|
||||
let inst = Inst::Jump { dest: target(0) };
|
||||
inst.emit(&mut buf, &info, &mut state);
|
||||
inst.emit(&[], &mut buf, &info, &mut state);
|
||||
|
||||
buf.bind_label(label(3));
|
||||
let inst = Inst::Jump { dest: target(4) };
|
||||
inst.emit(&mut buf, &info, &mut state);
|
||||
inst.emit(&[], &mut buf, &info, &mut state);
|
||||
|
||||
buf.bind_label(label(4));
|
||||
let inst = Inst::Jump { dest: target(5) };
|
||||
inst.emit(&mut buf, &info, &mut state);
|
||||
inst.emit(&[], &mut buf, &info, &mut state);
|
||||
|
||||
buf.bind_label(label(5));
|
||||
let inst = Inst::Jump { dest: target(7) };
|
||||
inst.emit(&mut buf, &info, &mut state);
|
||||
inst.emit(&[], &mut buf, &info, &mut state);
|
||||
|
||||
buf.bind_label(label(6));
|
||||
let inst = Inst::Nop4;
|
||||
inst.emit(&mut buf, &info, &mut state);
|
||||
inst.emit(&[], &mut buf, &info, &mut state);
|
||||
|
||||
buf.bind_label(label(7));
|
||||
let inst = Inst::Ret;
|
||||
inst.emit(&mut buf, &info, &mut state);
|
||||
let inst = Inst::Ret { rets: vec![] };
|
||||
inst.emit(&[], &mut buf, &info, &mut state);
|
||||
|
||||
let buf = buf.finish();
|
||||
|
||||
@@ -2009,23 +2009,23 @@ mod test {
|
||||
|
||||
buf.bind_label(label(0));
|
||||
let inst = Inst::Jump { dest: target(1) };
|
||||
inst.emit(&mut buf, &info, &mut state);
|
||||
inst.emit(&[], &mut buf, &info, &mut state);
|
||||
|
||||
buf.bind_label(label(1));
|
||||
let inst = Inst::Jump { dest: target(2) };
|
||||
inst.emit(&mut buf, &info, &mut state);
|
||||
inst.emit(&[], &mut buf, &info, &mut state);
|
||||
|
||||
buf.bind_label(label(2));
|
||||
let inst = Inst::Jump { dest: target(3) };
|
||||
inst.emit(&mut buf, &info, &mut state);
|
||||
inst.emit(&[], &mut buf, &info, &mut state);
|
||||
|
||||
buf.bind_label(label(3));
|
||||
let inst = Inst::Jump { dest: target(4) };
|
||||
inst.emit(&mut buf, &info, &mut state);
|
||||
inst.emit(&[], &mut buf, &info, &mut state);
|
||||
|
||||
buf.bind_label(label(4));
|
||||
let inst = Inst::Jump { dest: target(1) };
|
||||
inst.emit(&mut buf, &info, &mut state);
|
||||
inst.emit(&[], &mut buf, &info, &mut state);
|
||||
|
||||
let buf = buf.finish();
|
||||
|
||||
|
||||
@@ -2,12 +2,11 @@
|
||||
|
||||
use crate::ir::Function;
|
||||
use crate::isa::TargetIsa;
|
||||
use crate::log::DeferredDisplay;
|
||||
use crate::machinst::*;
|
||||
use crate::settings;
|
||||
use crate::timing;
|
||||
|
||||
use regalloc::{allocate_registers_with_opts, Algorithm, Options, PrettyPrint};
|
||||
use regalloc2::RegallocOptions;
|
||||
use regalloc2::{self, MachineEnv};
|
||||
|
||||
/// Compile the given function down to VCode with allocated registers, ready
|
||||
/// for binary emission.
|
||||
@@ -15,100 +14,38 @@ pub fn compile<B: LowerBackend + TargetIsa>(
|
||||
f: &Function,
|
||||
b: &B,
|
||||
abi: Box<dyn ABICallee<I = B::MInst>>,
|
||||
reg_universe: &RealRegUniverse,
|
||||
machine_env: &MachineEnv,
|
||||
emit_info: <B::MInst as MachInstEmit>::Info,
|
||||
) -> CodegenResult<VCode<B::MInst>>
|
||||
where
|
||||
B::MInst: PrettyPrint,
|
||||
{
|
||||
) -> CodegenResult<(VCode<B::MInst>, regalloc2::Output)> {
|
||||
// Compute lowered block order.
|
||||
let block_order = BlockLoweringOrder::new(f);
|
||||
// Build the lowering context.
|
||||
let lower = Lower::new(f, abi, emit_info, block_order)?;
|
||||
// Lower the IR.
|
||||
let (mut vcode, stack_map_request_info) = {
|
||||
let vcode = {
|
||||
let _tt = timing::vcode_lower();
|
||||
lower.lower(b)?
|
||||
};
|
||||
|
||||
// Creating the vcode string representation may be costly for large functions, so defer its
|
||||
// rendering.
|
||||
log::trace!(
|
||||
"vcode from lowering: \n{}",
|
||||
DeferredDisplay::new(|| vcode.show_rru(Some(reg_universe)))
|
||||
);
|
||||
log::trace!("vcode from lowering: \n{:?}", vcode);
|
||||
|
||||
// Perform register allocation.
|
||||
let (run_checker, algorithm) = match vcode.flags().regalloc() {
|
||||
settings::Regalloc::Backtracking => (false, Algorithm::Backtracking(Default::default())),
|
||||
settings::Regalloc::BacktrackingChecked => {
|
||||
(true, Algorithm::Backtracking(Default::default()))
|
||||
}
|
||||
settings::Regalloc::ExperimentalLinearScan => {
|
||||
(false, Algorithm::LinearScan(Default::default()))
|
||||
}
|
||||
settings::Regalloc::ExperimentalLinearScanChecked => {
|
||||
(true, Algorithm::LinearScan(Default::default()))
|
||||
}
|
||||
};
|
||||
|
||||
#[cfg(feature = "regalloc-snapshot")]
|
||||
{
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
if let Some(path) = std::env::var("SERIALIZE_REGALLOC").ok() {
|
||||
let snapshot = regalloc::IRSnapshot::from_function(&vcode, reg_universe);
|
||||
let serialized = bincode::serialize(&snapshot).expect("couldn't serialize snapshot");
|
||||
|
||||
let file_path = Path::new(&path).join(Path::new(&format!("ir{}.bin", f.name)));
|
||||
fs::write(file_path, &serialized).expect("couldn't write IR snapshot file");
|
||||
}
|
||||
}
|
||||
|
||||
// If either there are no reference-typed values, or else there are
|
||||
// but there are no safepoints at which we need to know about them,
|
||||
// then we don't need stack maps.
|
||||
let sri = if stack_map_request_info.reftyped_vregs.len() > 0
|
||||
&& stack_map_request_info.safepoint_insns.len() > 0
|
||||
{
|
||||
Some(&stack_map_request_info)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let result = {
|
||||
let regalloc_result = {
|
||||
let _tt = timing::regalloc();
|
||||
allocate_registers_with_opts(
|
||||
&mut vcode,
|
||||
reg_universe,
|
||||
sri,
|
||||
Options {
|
||||
run_checker,
|
||||
algorithm,
|
||||
},
|
||||
)
|
||||
.map_err(|err| {
|
||||
log::error!(
|
||||
"Register allocation error for vcode\n{}\nError: {:?}",
|
||||
vcode.show_rru(Some(reg_universe)),
|
||||
let mut options = RegallocOptions::default();
|
||||
options.verbose_log = log::log_enabled!(log::Level::Trace);
|
||||
regalloc2::run(&vcode, machine_env, &options)
|
||||
.map_err(|err| {
|
||||
log::error!(
|
||||
"Register allocation error for vcode\n{:?}\nError: {:?}\nCLIF for error:\n{:?}",
|
||||
vcode,
|
||||
err,
|
||||
f,
|
||||
);
|
||||
err
|
||||
);
|
||||
err
|
||||
})
|
||||
.expect("register allocation")
|
||||
})
|
||||
.expect("register allocation")
|
||||
};
|
||||
|
||||
// Reorder vcode into final order and copy out final instruction sequence
|
||||
// all at once. This also inserts prologues/epilogues.
|
||||
{
|
||||
let _tt = timing::vcode_post_ra();
|
||||
vcode.replace_insns_from_regalloc(result);
|
||||
}
|
||||
|
||||
log::trace!(
|
||||
"vcode after regalloc: final version:\n{}",
|
||||
DeferredDisplay::new(|| vcode.show_rru(Some(reg_universe)))
|
||||
);
|
||||
|
||||
Ok(vcode)
|
||||
Ok((vcode, regalloc_result))
|
||||
}
|
||||
|
||||
@@ -1,525 +0,0 @@
|
||||
//! Debug info analysis: computes value-label ranges from value-label markers in
|
||||
//! generated VCode.
|
||||
//!
|
||||
//! We "reverse-engineer" debug info like this because it is far more reliable
|
||||
//! than generating it while emitting code and keeping it in sync.
|
||||
//!
|
||||
//! This works by (i) observing "value-label marker" instructions, which are
|
||||
//! semantically just an assignment from a register to a "value label" (which
|
||||
//! one can think of as another register; they represent, e.g., Wasm locals) at
|
||||
//! a certain point in the code, and (ii) observing loads and stores to the
|
||||
//! stack and register moves.
|
||||
//!
|
||||
//! We track, at every program point, the correspondence between each value
|
||||
//! label and *all* locations in which it resides. E.g., if it is stored to the
|
||||
//! stack, we remember that it is in both a register and the stack slot; but if
|
||||
//! the register is later overwritten, then we have it just in the stack slot.
|
||||
//! This allows us to avoid false-positives observing loads/stores that we think
|
||||
//! are spillslots but really aren't.
|
||||
//!
|
||||
//! We do a standard forward dataflow analysis to compute this info.
|
||||
|
||||
use crate::ir::ValueLabel;
|
||||
use crate::machinst::*;
|
||||
use crate::value_label::{LabelValueLoc, ValueLabelsRanges, ValueLocRange};
|
||||
use log::trace;
|
||||
use regalloc::{Reg, RegUsageCollector};
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::hash::Hash;
|
||||
|
||||
/// Location of a labeled value: in a register or in a stack slot. Note that a
|
||||
/// value may live in more than one location; `AnalysisInfo` maps each
|
||||
/// value-label to multiple `ValueLoc`s.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||
enum ValueLoc {
|
||||
Reg(Reg),
|
||||
/// Nominal-SP offset.
|
||||
Stack(i64),
|
||||
}
|
||||
|
||||
impl From<ValueLoc> for LabelValueLoc {
|
||||
fn from(v: ValueLoc) -> Self {
|
||||
match v {
|
||||
ValueLoc::Reg(r) => LabelValueLoc::Reg(r),
|
||||
ValueLoc::Stack(off) => LabelValueLoc::SPOffset(off),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ValueLoc {
|
||||
fn is_reg(self) -> bool {
|
||||
match self {
|
||||
ValueLoc::Reg(_) => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
fn is_stack(self) -> bool {
|
||||
match self {
|
||||
ValueLoc::Stack(_) => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Mappings at one program point.
|
||||
#[derive(Clone, Debug)]
|
||||
struct AnalysisInfo {
|
||||
/// Nominal SP relative to real SP. If `None`, then the offset is
|
||||
/// indeterminate (i.e., we merged to the lattice 'bottom' element). This
|
||||
/// should not happen in well-formed code.
|
||||
nominal_sp_offset: Option<i64>,
|
||||
/// Forward map from labeled values to sets of locations.
|
||||
label_to_locs: HashMap<ValueLabel, HashSet<ValueLoc>>,
|
||||
/// Reverse map for each register indicating the value it holds, if any.
|
||||
reg_to_label: HashMap<Reg, ValueLabel>,
|
||||
/// Reverse map for each stack offset indicating the value it holds, if any.
|
||||
stack_to_label: HashMap<i64, ValueLabel>,
|
||||
}
|
||||
|
||||
/// Get the registers written (mod'd or def'd) by a machine instruction.
|
||||
fn get_inst_writes<M: MachInst>(m: &M) -> Vec<Reg> {
|
||||
// TODO: expose this part of regalloc.rs's interface publicly.
|
||||
let mut vecs = RegUsageCollector::get_empty_reg_vecs_test_framework_only(false);
|
||||
let mut coll = RegUsageCollector::new(&mut vecs);
|
||||
m.get_regs(&mut coll);
|
||||
vecs.defs.extend(vecs.mods.into_iter());
|
||||
vecs.defs
|
||||
}
|
||||
|
||||
impl AnalysisInfo {
|
||||
/// Create a new analysis state. This is the "top" lattice element at which
|
||||
/// the fixpoint dataflow analysis starts.
|
||||
fn new() -> Self {
|
||||
AnalysisInfo {
|
||||
nominal_sp_offset: Some(0),
|
||||
label_to_locs: HashMap::new(),
|
||||
reg_to_label: HashMap::new(),
|
||||
stack_to_label: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Remove all locations for a given labeled value. Used when the labeled
|
||||
/// value is redefined (so old values become stale).
|
||||
fn clear_label(&mut self, label: ValueLabel) {
|
||||
if let Some(locs) = self.label_to_locs.remove(&label) {
|
||||
for loc in locs {
|
||||
match loc {
|
||||
ValueLoc::Reg(r) => {
|
||||
self.reg_to_label.remove(&r);
|
||||
}
|
||||
ValueLoc::Stack(off) => {
|
||||
self.stack_to_label.remove(&off);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Remove a label from a register, if any. Used, e.g., if the register is
|
||||
/// overwritten.
|
||||
fn clear_reg(&mut self, reg: Reg) {
|
||||
if let Some(label) = self.reg_to_label.remove(®) {
|
||||
if let Some(locs) = self.label_to_locs.get_mut(&label) {
|
||||
locs.remove(&ValueLoc::Reg(reg));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Remove a label from a stack offset, if any. Used, e.g., when the stack
|
||||
/// slot is overwritten.
|
||||
fn clear_stack_off(&mut self, off: i64) {
|
||||
if let Some(label) = self.stack_to_label.remove(&off) {
|
||||
if let Some(locs) = self.label_to_locs.get_mut(&label) {
|
||||
locs.remove(&ValueLoc::Stack(off));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Indicate that a labeled value is newly defined and its new value is in
|
||||
/// `reg`.
|
||||
fn def_label_at_reg(&mut self, label: ValueLabel, reg: Reg) {
|
||||
self.clear_label(label);
|
||||
self.label_to_locs
|
||||
.entry(label)
|
||||
.or_insert_with(|| HashSet::new())
|
||||
.insert(ValueLoc::Reg(reg));
|
||||
self.reg_to_label.insert(reg, label);
|
||||
}
|
||||
|
||||
/// Process a store from a register to a stack slot (offset).
|
||||
fn store_reg(&mut self, reg: Reg, off: i64) {
|
||||
self.clear_stack_off(off);
|
||||
if let Some(label) = self.reg_to_label.get(®) {
|
||||
if let Some(locs) = self.label_to_locs.get_mut(label) {
|
||||
locs.insert(ValueLoc::Stack(off));
|
||||
}
|
||||
self.stack_to_label.insert(off, *label);
|
||||
}
|
||||
}
|
||||
|
||||
/// Process a load from a stack slot (offset) to a register.
|
||||
fn load_reg(&mut self, reg: Reg, off: i64) {
|
||||
self.clear_reg(reg);
|
||||
if let Some(&label) = self.stack_to_label.get(&off) {
|
||||
if let Some(locs) = self.label_to_locs.get_mut(&label) {
|
||||
locs.insert(ValueLoc::Reg(reg));
|
||||
}
|
||||
self.reg_to_label.insert(reg, label);
|
||||
}
|
||||
}
|
||||
|
||||
/// Process a move from one register to another.
|
||||
fn move_reg(&mut self, to: Reg, from: Reg) {
|
||||
self.clear_reg(to);
|
||||
if let Some(&label) = self.reg_to_label.get(&from) {
|
||||
if let Some(locs) = self.label_to_locs.get_mut(&label) {
|
||||
locs.insert(ValueLoc::Reg(to));
|
||||
}
|
||||
self.reg_to_label.insert(to, label);
|
||||
}
|
||||
}
|
||||
|
||||
/// Update the analysis state w.r.t. an instruction's effects. Given the
|
||||
/// state just before `inst`, this method updates `self` to be the state
|
||||
/// just after `inst`.
|
||||
fn step<M: MachInst>(&mut self, inst: &M) {
|
||||
for write in get_inst_writes(inst) {
|
||||
self.clear_reg(write);
|
||||
}
|
||||
if let Some((label, reg)) = inst.defines_value_label() {
|
||||
self.def_label_at_reg(label, reg);
|
||||
}
|
||||
match inst.stack_op_info() {
|
||||
Some(MachInstStackOpInfo::LoadNomSPOff(reg, offset)) => {
|
||||
self.load_reg(reg, offset + self.nominal_sp_offset.unwrap());
|
||||
}
|
||||
Some(MachInstStackOpInfo::StoreNomSPOff(reg, offset)) => {
|
||||
self.store_reg(reg, offset + self.nominal_sp_offset.unwrap());
|
||||
}
|
||||
Some(MachInstStackOpInfo::NomSPAdj(offset)) => {
|
||||
if self.nominal_sp_offset.is_some() {
|
||||
self.nominal_sp_offset = Some(self.nominal_sp_offset.unwrap() + offset);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
if let Some((to, from)) = inst.is_move() {
|
||||
let to = to.to_reg();
|
||||
self.move_reg(to, from);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Trait used to implement the dataflow analysis' meet (intersect) function
|
||||
/// onthe `AnalysisInfo` components. For efficiency, this is implemented as a
|
||||
/// mutation on the LHS, rather than a pure functional operation.
|
||||
trait IntersectFrom {
|
||||
fn intersect_from(&mut self, other: &Self) -> IntersectResult;
|
||||
}
|
||||
|
||||
/// Result of an intersection operation. Indicates whether the mutated LHS
|
||||
/// (which becomes the intersection result) differs from the original LHS. Also
|
||||
/// indicates if the value has become "empty" and should be removed from a
|
||||
/// parent container, if any.
|
||||
struct IntersectResult {
|
||||
/// Did the intersection change the LHS input (the one that was mutated into
|
||||
/// the result)? This is needed to drive the fixpoint loop; when no more
|
||||
/// changes occur, then we have converted.
|
||||
changed: bool,
|
||||
/// Is the resulting value "empty"? This can be used when a container, such
|
||||
/// as a map, holds values of this (intersection result) type; when
|
||||
/// `is_empty` is true for the merge of the values at a particular key, we
|
||||
/// can remove that key from the merged (intersected) result. This is not
|
||||
/// necessary for analysis correctness but reduces the memory and runtime
|
||||
/// cost of the fixpoint loop.
|
||||
is_empty: bool,
|
||||
}
|
||||
|
||||
impl IntersectFrom for AnalysisInfo {
|
||||
fn intersect_from(&mut self, other: &Self) -> IntersectResult {
|
||||
let mut changed = false;
|
||||
changed |= self
|
||||
.nominal_sp_offset
|
||||
.intersect_from(&other.nominal_sp_offset)
|
||||
.changed;
|
||||
changed |= self
|
||||
.label_to_locs
|
||||
.intersect_from(&other.label_to_locs)
|
||||
.changed;
|
||||
changed |= self
|
||||
.reg_to_label
|
||||
.intersect_from(&other.reg_to_label)
|
||||
.changed;
|
||||
changed |= self
|
||||
.stack_to_label
|
||||
.intersect_from(&other.stack_to_label)
|
||||
.changed;
|
||||
IntersectResult {
|
||||
changed,
|
||||
is_empty: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<K, V> IntersectFrom for HashMap<K, V>
|
||||
where
|
||||
K: Copy + Eq + Hash,
|
||||
V: IntersectFrom,
|
||||
{
|
||||
/// Intersection for hashmap: remove keys that are not in both inputs;
|
||||
/// recursively intersect values for keys in common.
|
||||
fn intersect_from(&mut self, other: &Self) -> IntersectResult {
|
||||
let mut changed = false;
|
||||
let mut remove_keys = vec![];
|
||||
for k in self.keys() {
|
||||
if !other.contains_key(k) {
|
||||
remove_keys.push(*k);
|
||||
}
|
||||
}
|
||||
for k in &remove_keys {
|
||||
changed = true;
|
||||
self.remove(k);
|
||||
}
|
||||
|
||||
remove_keys.clear();
|
||||
for k in other.keys() {
|
||||
if let Some(v) = self.get_mut(k) {
|
||||
let result = v.intersect_from(other.get(k).unwrap());
|
||||
changed |= result.changed;
|
||||
if result.is_empty {
|
||||
remove_keys.push(*k);
|
||||
}
|
||||
}
|
||||
}
|
||||
for k in &remove_keys {
|
||||
changed = true;
|
||||
self.remove(k);
|
||||
}
|
||||
|
||||
IntersectResult {
|
||||
changed,
|
||||
is_empty: self.len() == 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<T> IntersectFrom for HashSet<T>
|
||||
where
|
||||
T: Copy + Eq + Hash,
|
||||
{
|
||||
/// Intersection for hashset: just take the set intersection.
|
||||
fn intersect_from(&mut self, other: &Self) -> IntersectResult {
|
||||
let mut changed = false;
|
||||
let mut remove = vec![];
|
||||
for val in self.iter() {
|
||||
if !other.contains(val) {
|
||||
remove.push(*val);
|
||||
}
|
||||
}
|
||||
for val in remove {
|
||||
changed = true;
|
||||
self.remove(&val);
|
||||
}
|
||||
|
||||
IntersectResult {
|
||||
changed,
|
||||
is_empty: self.len() == 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
impl IntersectFrom for ValueLabel {
|
||||
// Intersection for labeled value: remove if not equal. This is equivalent
|
||||
// to a three-level lattice with top, bottom, and unordered set of
|
||||
// individual labels in between.
|
||||
fn intersect_from(&mut self, other: &Self) -> IntersectResult {
|
||||
IntersectResult {
|
||||
changed: false,
|
||||
is_empty: *self != *other,
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<T> IntersectFrom for Option<T>
|
||||
where
|
||||
T: Copy + Eq,
|
||||
{
|
||||
/// Intersectino for Option<T>: recursively intersect if both `Some`, else
|
||||
/// `None`.
|
||||
fn intersect_from(&mut self, other: &Self) -> IntersectResult {
|
||||
let mut changed = false;
|
||||
if !(self.is_some() && other.is_some() && self == other) {
|
||||
changed = true;
|
||||
*self = None;
|
||||
}
|
||||
IntersectResult {
|
||||
changed,
|
||||
is_empty: self.is_none(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute the value-label ranges (locations for program-point ranges for
|
||||
/// labeled values) from a given `VCode` compilation result.
|
||||
///
|
||||
/// In order to compute this information, we perform a dataflow analysis on the
|
||||
/// machine code. To do so, and translate the results into a form usable by the
|
||||
/// debug-info consumers, we need to know two additional things:
|
||||
///
|
||||
/// - The machine-code layout (code offsets) of the instructions. DWARF is
|
||||
/// encoded in terms of instruction *ends* (and we reason about value
|
||||
/// locations at program points *after* instructions, to match this), so we
|
||||
/// take an array `inst_ends`, giving us code offsets for each instruction's
|
||||
/// end-point. (Note that this is one *past* the last byte; so a 4-byte
|
||||
/// instruction at offset 0 has an end offset of 4.)
|
||||
///
|
||||
/// - The locations of the labels to which branches will jump. Branches can tell
|
||||
/// us about their targets in terms of `MachLabel`s, but we don't know where
|
||||
/// those `MachLabel`s will be placed in the linear array of instructions. We
|
||||
/// take the array `label_insn_index` to provide this info: for a label with
|
||||
/// index `l`, `label_insn_index[l]` is the index of the instruction before
|
||||
/// which that label is bound.
|
||||
pub(crate) fn compute<I: VCodeInst>(
|
||||
insts: &[I],
|
||||
layout_info: &InstsLayoutInfo,
|
||||
) -> ValueLabelsRanges {
|
||||
let inst_start = |idx: usize| {
|
||||
if idx == 0 {
|
||||
0
|
||||
} else {
|
||||
layout_info.inst_end_offsets[idx - 1]
|
||||
}
|
||||
};
|
||||
|
||||
trace!("compute: insts =");
|
||||
for i in 0..insts.len() {
|
||||
trace!(
|
||||
" #{} end: {} -> {:?}",
|
||||
i,
|
||||
layout_info.inst_end_offsets[i],
|
||||
insts[i]
|
||||
);
|
||||
}
|
||||
trace!("label_insn_index: {:?}", layout_info.label_inst_indices);
|
||||
|
||||
// Info at each block head, indexed by label.
|
||||
let mut block_starts: HashMap<u32, AnalysisInfo> = HashMap::new();
|
||||
|
||||
// Initialize state at entry.
|
||||
block_starts.insert(0, AnalysisInfo::new());
|
||||
|
||||
// Worklist: label indices for basic blocks.
|
||||
let mut worklist = Vec::new();
|
||||
let mut worklist_set = HashSet::new();
|
||||
worklist.push(0);
|
||||
worklist_set.insert(0);
|
||||
|
||||
while !worklist.is_empty() {
|
||||
let block = worklist.pop().unwrap();
|
||||
worklist_set.remove(&block);
|
||||
|
||||
let mut state = block_starts.get(&block).unwrap().clone();
|
||||
trace!("at block {} -> state: {:?}", block, state);
|
||||
// Iterate for each instruction in the block (we break at the first
|
||||
// terminator we see).
|
||||
let mut index = layout_info.label_inst_indices[block as usize];
|
||||
while index < insts.len() as u32 {
|
||||
state.step(&insts[index as usize]);
|
||||
trace!(" -> inst #{}: {:?}", index, insts[index as usize]);
|
||||
trace!(" --> state: {:?}", state);
|
||||
|
||||
let term = insts[index as usize].is_term();
|
||||
if term.is_term() {
|
||||
for succ in term.get_succs() {
|
||||
trace!(" SUCCESSOR block {}", succ.get());
|
||||
if let Some(succ_state) = block_starts.get_mut(&succ.get()) {
|
||||
trace!(" orig state: {:?}", succ_state);
|
||||
if succ_state.intersect_from(&state).changed {
|
||||
if worklist_set.insert(succ.get()) {
|
||||
worklist.push(succ.get());
|
||||
}
|
||||
trace!(" (changed)");
|
||||
}
|
||||
trace!(" new state: {:?}", succ_state);
|
||||
} else {
|
||||
// First time seeing this block
|
||||
block_starts.insert(succ.get(), state.clone());
|
||||
worklist.push(succ.get());
|
||||
worklist_set.insert(succ.get());
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
index += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Now iterate over blocks one last time, collecting
|
||||
// value-label locations.
|
||||
|
||||
let mut value_labels_ranges: ValueLabelsRanges = HashMap::new();
|
||||
for block in 0..layout_info.label_inst_indices.len() {
|
||||
let start_index = layout_info.label_inst_indices[block];
|
||||
let end_index = if block == layout_info.label_inst_indices.len() - 1 {
|
||||
insts.len() as u32
|
||||
} else {
|
||||
layout_info.label_inst_indices[block + 1]
|
||||
};
|
||||
let block = block as u32;
|
||||
let mut state = block_starts.get(&block).unwrap().clone();
|
||||
for index in start_index..end_index {
|
||||
let offset = inst_start(index as usize);
|
||||
let end = layout_info.inst_end_offsets[index as usize];
|
||||
|
||||
// Cold blocks cause instructions to occur out-of-order wrt
|
||||
// others. We rely on the monotonic mapping from instruction
|
||||
// index to offset in machine code for this analysis to work,
|
||||
// so we just skip debuginfo for cold blocks. This should be
|
||||
// generally fine, as cold blocks generally constitute
|
||||
// slowpaths for expansions of particular ops, rather than
|
||||
// user-written code.
|
||||
if layout_info.start_of_cold_code.is_some()
|
||||
&& offset >= layout_info.start_of_cold_code.unwrap()
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
assert!(offset <= end);
|
||||
state.step(&insts[index as usize]);
|
||||
|
||||
for (label, locs) in &state.label_to_locs {
|
||||
trace!(" inst {} has label {:?} -> locs {:?}", index, label, locs);
|
||||
// Find an appropriate loc: a register if possible, otherwise pick the first stack
|
||||
// loc.
|
||||
let reg = locs.iter().cloned().find(|l| l.is_reg());
|
||||
let loc = reg.or_else(|| locs.iter().cloned().find(|l| l.is_stack()));
|
||||
if let Some(loc) = loc {
|
||||
let loc = LabelValueLoc::from(loc);
|
||||
let list = value_labels_ranges.entry(*label).or_insert_with(|| vec![]);
|
||||
// If the existing location list for this value-label is
|
||||
// either empty, or has an end location that does not extend
|
||||
// to the current offset, then we have to append a new
|
||||
// entry. Otherwise, we can extend the current entry.
|
||||
//
|
||||
// Note that `end` is one past the end of the instruction;
|
||||
// it appears that `end` is exclusive, so a mapping valid at
|
||||
// offset 5 will have start = 5, end = 6.
|
||||
if list
|
||||
.last()
|
||||
.map(|last| last.end <= offset || last.loc != loc)
|
||||
.unwrap_or(true)
|
||||
{
|
||||
list.push(ValueLocRange {
|
||||
loc,
|
||||
start: end,
|
||||
end: end + 1,
|
||||
});
|
||||
} else {
|
||||
list.last_mut().unwrap().end = end + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
trace!("ret: {:?}", value_labels_ranges);
|
||||
value_labels_ranges
|
||||
}
|
||||
@@ -1,8 +1,8 @@
|
||||
//! Miscellaneous helpers for machine backends.
|
||||
|
||||
use super::{InsnOutput, LowerCtx, VCodeInst, ValueRegs};
|
||||
use super::{Reg, Writable};
|
||||
use crate::ir::Type;
|
||||
use regalloc::{Reg, Writable};
|
||||
use std::ops::{Add, BitAnd, Not, Sub};
|
||||
|
||||
/// Returns the size (in bits) of a given type.
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
use crate::ir::{types, Inst, Value, ValueList};
|
||||
use crate::machinst::{get_output_reg, InsnOutput, LowerCtx, MachInst, RegRenamer};
|
||||
use crate::machinst::{get_output_reg, InsnOutput, LowerCtx, Reg, Writable};
|
||||
use alloc::boxed::Box;
|
||||
use alloc::vec::Vec;
|
||||
use regalloc::{Reg, Writable};
|
||||
use smallvec::SmallVec;
|
||||
use std::cell::Cell;
|
||||
|
||||
@@ -107,7 +106,8 @@ macro_rules! isle_prelude_methods {
|
||||
|
||||
#[inline]
|
||||
fn invalid_reg(&mut self) -> Reg {
|
||||
Reg::invalid()
|
||||
use crate::machinst::valueregs::InvalidSentinel;
|
||||
Reg::invalid_sentinel()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
@@ -467,7 +467,6 @@ where
|
||||
pub lower_ctx: &'a mut C,
|
||||
pub flags: &'a F,
|
||||
pub isa_flags: &'a I,
|
||||
pub emitted_insts: SmallVec<[(C::I, bool); N]>,
|
||||
}
|
||||
|
||||
/// Shared lowering code amongst all backends for doing ISLE-based lowering.
|
||||
@@ -482,7 +481,6 @@ pub(crate) fn lower_common<C, F, I, IF, const N: usize>(
|
||||
outputs: &[InsnOutput],
|
||||
inst: Inst,
|
||||
isle_lower: IF,
|
||||
map_regs: fn(&mut C::I, &RegRenamer),
|
||||
) -> Result<(), ()>
|
||||
where
|
||||
C: LowerCtx,
|
||||
@@ -495,7 +493,6 @@ where
|
||||
lower_ctx,
|
||||
flags,
|
||||
isa_flags,
|
||||
emitted_insts: SmallVec::new(),
|
||||
};
|
||||
|
||||
let temp_regs = isle_lower(&mut isle_ctx, inst).ok_or(())?;
|
||||
@@ -514,10 +511,15 @@ where
|
||||
}
|
||||
|
||||
// The ISLE generated code emits its own registers to define the
|
||||
// instruction's lowered values in. We rename those registers to the
|
||||
// registers they were assigned when their value was used as an operand in
|
||||
// earlier lowerings.
|
||||
let mut renamer = RegRenamer::default();
|
||||
// instruction's lowered values in. However, other instructions
|
||||
// that use this SSA value will be lowered assuming that the value
|
||||
// is generated into a pre-assigned, different, register.
|
||||
//
|
||||
// To connect the two, we set up "aliases" in the VCodeBuilder
|
||||
// that apply when it is building the Operand table for the
|
||||
// regalloc to use. These aliases effectively rewrite any use of
|
||||
// the pre-assigned register to the register that was returned by
|
||||
// the ISLE lowering logic.
|
||||
for i in 0..outputs.len() {
|
||||
let regs = temp_regs[i];
|
||||
let dsts = get_output_reg(isle_ctx.lower_ctx, outputs[i]);
|
||||
@@ -528,41 +530,11 @@ where
|
||||
// Flags values do not occupy any registers.
|
||||
assert!(regs.len() == 0);
|
||||
} else {
|
||||
let (_, tys) = <C::I>::rc_for_type(ty).unwrap();
|
||||
assert!(regs.len() == tys.len());
|
||||
assert!(regs.len() == dsts.len());
|
||||
for ((dst, temp), ty) in dsts.regs().iter().zip(regs.regs().iter()).zip(tys) {
|
||||
renamer.add_rename(*temp, dst.to_reg(), *ty);
|
||||
for (dst, temp) in dsts.regs().iter().zip(regs.regs().iter()) {
|
||||
isle_ctx.lower_ctx.set_vreg_alias(dst.to_reg(), *temp);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (inst, _) in isle_ctx.emitted_insts.iter_mut() {
|
||||
map_regs(inst, &renamer);
|
||||
}
|
||||
|
||||
// If any renamed register wasn't actually defined in the ISLE-generated
|
||||
// instructions then what we're actually doing is "renaming" an input to a
|
||||
// new name which requires manually inserting a `mov` instruction. Note that
|
||||
// this typically doesn't happen and is only here for cases where the input
|
||||
// is sometimes passed through unmodified to the output, such as
|
||||
// zero-extending a 64-bit input to a 128-bit output which doesn't actually
|
||||
// change the input and simply produces another zero'd register.
|
||||
for (old, new, ty) in renamer.unmapped_defs() {
|
||||
isle_ctx
|
||||
.lower_ctx
|
||||
.emit(<C::I>::gen_move(Writable::from_reg(new), old, ty));
|
||||
}
|
||||
|
||||
// Once everything is remapped we forward all emitted instructions to the
|
||||
// `lower_ctx`. Note that this happens after the synthetic mov's above in
|
||||
// case any of these instruction use those movs.
|
||||
for (inst, is_safepoint) in isle_ctx.emitted_insts {
|
||||
if is_safepoint {
|
||||
lower_ctx.emit_safepoint(inst);
|
||||
} else {
|
||||
lower_ctx.emit(inst);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -18,17 +18,19 @@ use crate::ir::{
|
||||
};
|
||||
use crate::machinst::{
|
||||
non_writable_value_regs, writable_value_regs, ABICallee, BlockIndex, BlockLoweringOrder,
|
||||
LoweredBlock, MachLabel, VCode, VCodeBuilder, VCodeConstant, VCodeConstantData, VCodeConstants,
|
||||
VCodeInst, ValueRegs,
|
||||
LoweredBlock, MachLabel, Reg, VCode, VCodeBuilder, VCodeConstant, VCodeConstantData,
|
||||
VCodeConstants, VCodeInst, ValueRegs, Writable,
|
||||
};
|
||||
use crate::CodegenResult;
|
||||
use alloc::boxed::Box;
|
||||
use alloc::vec::Vec;
|
||||
use core::convert::TryInto;
|
||||
use regalloc::{Reg, StackmapRequestInfo, Writable};
|
||||
use regalloc2::VReg;
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
use std::fmt::Debug;
|
||||
|
||||
use super::{first_user_vreg_index, VCodeBuildDirection};
|
||||
|
||||
/// An "instruction color" partitions CLIF instructions by side-effecting ops.
|
||||
/// All instructions with the same "color" are guaranteed not to be separated by
|
||||
/// any side-effecting op (for this purpose, loads are also considered
|
||||
@@ -160,8 +162,6 @@ pub trait LowerCtx {
|
||||
fn alloc_tmp(&mut self, ty: Type) -> ValueRegs<Writable<Reg>>;
|
||||
/// Emit a machine instruction.
|
||||
fn emit(&mut self, mach_inst: Self::I);
|
||||
/// Emit a machine instruction that is a safepoint.
|
||||
fn emit_safepoint(&mut self, mach_inst: Self::I);
|
||||
/// Indicate that the side-effect of an instruction has been sunk to the
|
||||
/// current scan location. This should only be done with the instruction's
|
||||
/// original results are not used (i.e., `put_input_in_regs` is not invoked
|
||||
@@ -178,6 +178,9 @@ pub trait LowerCtx {
|
||||
/// Cause the value in `reg` to be in a virtual reg, by copying it into a new virtual reg
|
||||
/// if `reg` is a real reg. `ty` describes the type of the value in `reg`.
|
||||
fn ensure_in_vreg(&mut self, reg: Reg, ty: Type) -> Reg;
|
||||
|
||||
/// Note that one vreg is to be treated as an alias of another.
|
||||
fn set_vreg_alias(&mut self, from: Reg, to: Reg);
|
||||
}
|
||||
|
||||
/// A representation of all of the ways in which a value is available, aside
|
||||
@@ -232,14 +235,6 @@ pub trait LowerBackend {
|
||||
}
|
||||
}
|
||||
|
||||
/// A pending instruction to insert and auxiliary information about it: its source location and
|
||||
/// whether it is a safepoint.
|
||||
struct InstTuple<I: VCodeInst> {
|
||||
loc: SourceLoc,
|
||||
is_safepoint: bool,
|
||||
inst: I,
|
||||
}
|
||||
|
||||
/// Machine-independent lowering driver / machine-instruction container. Maintains a correspondence
|
||||
/// from original Inst to MachInsts.
|
||||
pub struct Lower<'func, I: VCodeInst> {
|
||||
@@ -287,20 +282,10 @@ pub struct Lower<'func, I: VCodeInst> {
|
||||
inst_sunk: FxHashSet<Inst>,
|
||||
|
||||
/// Next virtual register number to allocate.
|
||||
next_vreg: u32,
|
||||
|
||||
/// Insts in reverse block order, before final copy to vcode.
|
||||
block_insts: Vec<InstTuple<I>>,
|
||||
|
||||
/// Ranges in `block_insts` constituting BBs.
|
||||
block_ranges: Vec<(usize, usize)>,
|
||||
|
||||
/// Instructions collected for the BB in progress, in reverse order, with
|
||||
/// source-locs attached.
|
||||
bb_insts: Vec<InstTuple<I>>,
|
||||
next_vreg: usize,
|
||||
|
||||
/// Instructions collected for the CLIF inst in progress, in forward order.
|
||||
ir_insts: Vec<InstTuple<I>>,
|
||||
ir_insts: Vec<I>,
|
||||
|
||||
/// The register to use for GetPinnedReg, if any, on this architecture.
|
||||
pinned_reg: Option<Reg>,
|
||||
@@ -324,22 +309,22 @@ pub enum RelocDistance {
|
||||
|
||||
fn alloc_vregs<I: VCodeInst>(
|
||||
ty: Type,
|
||||
next_vreg: &mut u32,
|
||||
next_vreg: &mut usize,
|
||||
vcode: &mut VCodeBuilder<I>,
|
||||
) -> CodegenResult<ValueRegs<Reg>> {
|
||||
let v = *next_vreg;
|
||||
let (regclasses, tys) = I::rc_for_type(ty)?;
|
||||
*next_vreg += regclasses.len() as u32;
|
||||
let regs = match regclasses {
|
||||
&[rc0] => ValueRegs::one(Reg::new_virtual(rc0, v)),
|
||||
&[rc0, rc1] => ValueRegs::two(Reg::new_virtual(rc0, v), Reg::new_virtual(rc1, v + 1)),
|
||||
*next_vreg += regclasses.len();
|
||||
let regs: ValueRegs<Reg> = match regclasses {
|
||||
&[rc0] => ValueRegs::one(VReg::new(v, rc0).into()),
|
||||
&[rc0, rc1] => ValueRegs::two(VReg::new(v, rc0).into(), VReg::new(v + 1, rc1).into()),
|
||||
// We can extend this if/when we support 32-bit targets; e.g.,
|
||||
// an i128 on a 32-bit machine will need up to four machine regs
|
||||
// for a `Value`.
|
||||
_ => panic!("Value must reside in 1 or 2 registers"),
|
||||
};
|
||||
for (®_ty, ®) in tys.iter().zip(regs.regs().iter()) {
|
||||
vcode.set_vreg_type(reg.to_virtual_reg(), reg_ty);
|
||||
vcode.set_vreg_type(reg.to_virtual_reg().unwrap(), reg_ty);
|
||||
}
|
||||
Ok(regs)
|
||||
}
|
||||
@@ -358,9 +343,15 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
|
||||
block_order: BlockLoweringOrder,
|
||||
) -> CodegenResult<Lower<'func, I>> {
|
||||
let constants = VCodeConstants::with_capacity(f.dfg.constants.len());
|
||||
let mut vcode = VCodeBuilder::new(abi, emit_info, block_order, constants);
|
||||
let mut vcode = VCodeBuilder::new(
|
||||
abi,
|
||||
emit_info,
|
||||
block_order,
|
||||
constants,
|
||||
VCodeBuildDirection::Backward,
|
||||
);
|
||||
|
||||
let mut next_vreg: u32 = 0;
|
||||
let mut next_vreg: usize = first_user_vreg_index();
|
||||
|
||||
let mut value_regs = SecondaryMap::with_default(ValueRegs::invalid());
|
||||
|
||||
@@ -381,10 +372,11 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
|
||||
let regs = alloc_vregs(ty, &mut next_vreg, &mut vcode)?;
|
||||
value_regs[result] = regs;
|
||||
log::trace!(
|
||||
"bb {} inst {} ({:?}): result regs {:?}",
|
||||
"bb {} inst {} ({:?}): result {} regs {:?}",
|
||||
bb,
|
||||
inst,
|
||||
f.dfg[inst],
|
||||
result,
|
||||
regs,
|
||||
);
|
||||
}
|
||||
@@ -459,9 +451,6 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
|
||||
inst_sunk: FxHashSet::default(),
|
||||
cur_scan_entry_color: None,
|
||||
cur_inst: None,
|
||||
block_insts: vec![],
|
||||
block_ranges: vec![],
|
||||
bb_insts: vec![],
|
||||
ir_insts: vec![],
|
||||
pinned_reg: None,
|
||||
vm_context,
|
||||
@@ -475,6 +464,12 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
|
||||
entry_bb,
|
||||
self.f.dfg.block_params(entry_bb)
|
||||
);
|
||||
|
||||
// Make the vmctx available in debuginfo.
|
||||
if let Some(vmctx_val) = self.f.special_param(ArgumentPurpose::VMContext) {
|
||||
self.emit_value_label_marks_for_value(vmctx_val);
|
||||
}
|
||||
|
||||
for (i, param) in self.f.dfg.block_params(entry_bb).iter().enumerate() {
|
||||
if !self.vcode.abi().arg_is_needed_in_body(i) {
|
||||
continue;
|
||||
@@ -509,14 +504,6 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
|
||||
}
|
||||
|
||||
fn gen_retval_setup(&mut self, gen_ret_inst: GenerateReturn) {
|
||||
// Hack: to keep `vmctx` alive, if it exists, we emit a value label here
|
||||
// for it if debug info is requested. This ensures that it exists either
|
||||
// in a register or spillslot throughout the entire function body, and
|
||||
// allows for a better debugging experience.
|
||||
if let Some(vmctx_val) = self.f.special_param(ArgumentPurpose::VMContext) {
|
||||
self.emit_value_label_marks_for_value(vmctx_val);
|
||||
}
|
||||
|
||||
let retval_regs = self.retval_regs.clone();
|
||||
for (i, regs) in retval_regs.into_iter().enumerate() {
|
||||
let regs = writable_value_regs(regs);
|
||||
@@ -534,141 +521,16 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
|
||||
GenerateReturn::No => self.vcode.abi().gen_epilogue_placeholder(),
|
||||
};
|
||||
self.emit(inst);
|
||||
}
|
||||
|
||||
fn lower_edge(&mut self, pred: Block, inst: Inst, succ: Block) -> CodegenResult<()> {
|
||||
log::trace!("lower_edge: pred {} succ {}", pred, succ);
|
||||
|
||||
let num_args = self.f.dfg.block_params(succ).len();
|
||||
debug_assert!(num_args == self.f.dfg.inst_variable_args(inst).len());
|
||||
|
||||
// Most blocks have no params, so skip all the hoop-jumping below and make an early exit.
|
||||
if num_args == 0 {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
self.cur_inst = Some(inst);
|
||||
|
||||
// Make up two vectors of info:
|
||||
//
|
||||
// * one for dsts which are to be assigned constants. We'll deal with those second, so
|
||||
// as to minimise live ranges.
|
||||
//
|
||||
// * one for dsts whose sources are non-constants.
|
||||
|
||||
let mut const_bundles: SmallVec<[_; 16]> = SmallVec::new();
|
||||
let mut var_bundles: SmallVec<[_; 16]> = SmallVec::new();
|
||||
|
||||
let mut i = 0;
|
||||
for (dst_val, src_val) in self
|
||||
.f
|
||||
.dfg
|
||||
.block_params(succ)
|
||||
.iter()
|
||||
.zip(self.f.dfg.inst_variable_args(inst).iter())
|
||||
{
|
||||
let src_val = self.f.dfg.resolve_aliases(*src_val);
|
||||
let ty = self.f.dfg.value_type(src_val);
|
||||
|
||||
debug_assert!(ty == self.f.dfg.value_type(*dst_val));
|
||||
let dst_regs = self.value_regs[*dst_val];
|
||||
|
||||
let input = self.get_value_as_source_or_const(src_val);
|
||||
log::trace!("jump arg {} is {}", i, src_val);
|
||||
i += 1;
|
||||
|
||||
if let Some(c) = input.constant {
|
||||
log::trace!(" -> constant {}", c);
|
||||
const_bundles.push((ty, writable_value_regs(dst_regs), c));
|
||||
} else {
|
||||
let src_regs = self.put_value_in_regs(src_val);
|
||||
log::trace!(" -> reg {:?}", src_regs);
|
||||
// Skip self-assignments. Not only are they pointless, they falsely trigger the
|
||||
// overlap-check below and hence can cause a lot of unnecessary copying through
|
||||
// temporaries.
|
||||
if dst_regs != src_regs {
|
||||
var_bundles.push((ty, writable_value_regs(dst_regs), src_regs));
|
||||
}
|
||||
// Hack: generate a virtual instruction that uses vmctx in
|
||||
// order to keep it alive for the duration of the function,
|
||||
// for the benefit of debuginfo.
|
||||
if self.f.dfg.values_labels.is_some() {
|
||||
if let Some(vmctx_val) = self.f.special_param(ArgumentPurpose::VMContext) {
|
||||
let vmctx_reg = self.value_regs[vmctx_val].only_reg().unwrap();
|
||||
self.emit(I::gen_dummy_use(vmctx_reg));
|
||||
}
|
||||
}
|
||||
|
||||
// Deal first with the moves whose sources are variables.
|
||||
|
||||
// FIXME: use regalloc.rs' SparseSetU here. This would avoid all heap allocation
|
||||
// for cases of up to circa 16 args. Currently not possible because regalloc.rs
|
||||
// does not export it.
|
||||
let mut src_reg_set = FxHashSet::<Reg>::default();
|
||||
for (_, _, src_regs) in &var_bundles {
|
||||
for ® in src_regs.regs() {
|
||||
src_reg_set.insert(reg);
|
||||
}
|
||||
}
|
||||
let mut overlaps = false;
|
||||
'outer: for (_, dst_regs, _) in &var_bundles {
|
||||
for ® in dst_regs.regs() {
|
||||
if src_reg_set.contains(®.to_reg()) {
|
||||
overlaps = true;
|
||||
break 'outer;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If, as is mostly the case, the source and destination register sets are non
|
||||
// overlapping, then we can copy directly, so as to save the register allocator work.
|
||||
if !overlaps {
|
||||
for (ty, dst_regs, src_regs) in &var_bundles {
|
||||
let (_, reg_tys) = I::rc_for_type(*ty)?;
|
||||
for ((dst, src), reg_ty) in dst_regs
|
||||
.regs()
|
||||
.iter()
|
||||
.zip(src_regs.regs().iter())
|
||||
.zip(reg_tys.iter())
|
||||
{
|
||||
self.emit(I::gen_move(*dst, *src, *reg_ty));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// There's some overlap, so play safe and copy via temps.
|
||||
let mut tmp_regs = SmallVec::<[ValueRegs<Writable<Reg>>; 16]>::new();
|
||||
for (ty, _, _) in &var_bundles {
|
||||
tmp_regs.push(self.alloc_tmp(*ty));
|
||||
}
|
||||
for ((ty, _, src_reg), tmp_reg) in var_bundles.iter().zip(tmp_regs.iter()) {
|
||||
let (_, reg_tys) = I::rc_for_type(*ty)?;
|
||||
for ((tmp, src), reg_ty) in tmp_reg
|
||||
.regs()
|
||||
.iter()
|
||||
.zip(src_reg.regs().iter())
|
||||
.zip(reg_tys.iter())
|
||||
{
|
||||
self.emit(I::gen_move(*tmp, *src, *reg_ty));
|
||||
}
|
||||
}
|
||||
for ((ty, dst_reg, _), tmp_reg) in var_bundles.iter().zip(tmp_regs.iter()) {
|
||||
let (_, reg_tys) = I::rc_for_type(*ty)?;
|
||||
for ((dst, tmp), reg_ty) in dst_reg
|
||||
.regs()
|
||||
.iter()
|
||||
.zip(tmp_reg.regs().iter())
|
||||
.zip(reg_tys.iter())
|
||||
{
|
||||
self.emit(I::gen_move(*dst, tmp.to_reg(), *reg_ty));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Now, finally, deal with the moves whose sources are constants.
|
||||
for (ty, dst_reg, const_val) in &const_bundles {
|
||||
for inst in I::gen_constant(*dst_reg, *const_val as u128, *ty, |ty| {
|
||||
self.alloc_tmp(ty).only_reg().unwrap()
|
||||
})
|
||||
.into_iter()
|
||||
{
|
||||
self.emit(inst);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Has this instruction been sunk to a use-site (i.e., away from its
|
||||
@@ -694,21 +556,24 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
|
||||
self.cur_scan_entry_color = Some(self.block_end_colors[block]);
|
||||
// Lowering loop:
|
||||
// - For each non-branch instruction, in reverse order:
|
||||
// - If side-effecting (load, store, branch/call/return, possible trap), or if
|
||||
// used outside of this block, or if demanded by another inst, then lower.
|
||||
// - If side-effecting (load, store, branch/call/return,
|
||||
// possible trap), or if used outside of this block, or if
|
||||
// demanded by another inst, then lower.
|
||||
//
|
||||
// That's it! Lowering of side-effecting ops will force all *needed*
|
||||
// (live) non-side-effecting ops to be lowered at the right places, via
|
||||
// the `use_input_reg()` callback on the `LowerCtx` (that's us). That's
|
||||
// because `use_input_reg()` sets the eager/demand bit for any insts
|
||||
// whose result registers are used.
|
||||
// That's it! Lowering of side-effecting ops will force all
|
||||
// *needed* (live) non-side-effecting ops to be lowered at the
|
||||
// right places, via the `use_input_reg()` callback on the
|
||||
// `LowerCtx` (that's us). That's because `use_input_reg()`
|
||||
// sets the eager/demand bit for any insts whose result
|
||||
// registers are used.
|
||||
//
|
||||
// We build up the BB in reverse instruction order in `bb_insts`.
|
||||
// Because the machine backend calls `ctx.emit()` in forward order, we
|
||||
// collect per-IR-inst lowered instructions in `ir_insts`, then reverse
|
||||
// these and append to `bb_insts` as we go backward through the block.
|
||||
// `bb_insts` are then reversed again and appended to the VCode at the
|
||||
// end of the BB (in the toplevel driver `lower()`).
|
||||
// We set the VCodeBuilder to "backward" mode, so we emit
|
||||
// blocks in reverse order wrt the BlockIndex sequence, and
|
||||
// emit instructions in reverse order within blocks. Because
|
||||
// the machine backend calls `ctx.emit()` in forward order, we
|
||||
// collect per-IR-inst lowered instructions in `ir_insts`,
|
||||
// then reverse these and append to the VCode at the end of
|
||||
// each IR instruction.
|
||||
for inst in self.f.layout.block_insts(block).rev() {
|
||||
let data = &self.f.dfg[inst];
|
||||
let has_side_effect = has_lowering_side_effect(self.f, inst);
|
||||
@@ -750,9 +615,6 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
|
||||
if has_side_effect || value_needed {
|
||||
log::trace!("lowering: inst {}: {:?}", inst, self.f.dfg[inst]);
|
||||
backend.lower(self, inst)?;
|
||||
// Emit value-label markers if needed, to later recover debug
|
||||
// mappings.
|
||||
self.emit_value_label_markers_for_inst(inst);
|
||||
}
|
||||
if data.opcode().is_return() {
|
||||
// Return: handle specially, using ABI-appropriate sequence.
|
||||
@@ -767,11 +629,33 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
|
||||
|
||||
let loc = self.srcloc(inst);
|
||||
self.finish_ir_inst(loc);
|
||||
|
||||
// Emit value-label markers if needed, to later recover
|
||||
// debug mappings. This must happen before the instruction
|
||||
// (so after we emit, in bottom-to-top pass).
|
||||
self.emit_value_label_markers_for_inst(inst);
|
||||
}
|
||||
|
||||
// Add the block params to this block.
|
||||
self.add_block_params(block)?;
|
||||
|
||||
self.cur_scan_entry_color = None;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn add_block_params(&mut self, block: Block) -> CodegenResult<()> {
|
||||
for ¶m in self.f.dfg.block_params(block) {
|
||||
let ty = self.f.dfg.value_type(param);
|
||||
let (_reg_rcs, reg_tys) = I::rc_for_type(ty)?;
|
||||
debug_assert_eq!(reg_tys.len(), self.value_regs[param].len());
|
||||
for (®, &rty) in self.value_regs[param].regs().iter().zip(reg_tys.iter()) {
|
||||
self.vcode
|
||||
.add_block_param(reg.to_virtual_reg().unwrap(), rty);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn get_value_labels<'a>(&'a self, val: Value, depth: usize) -> Option<&'a [ValueLabelStart]> {
|
||||
if let Some(ref values_labels) = self.f.dfg.values_labels {
|
||||
log::trace!(
|
||||
@@ -794,7 +678,6 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
|
||||
}
|
||||
|
||||
fn emit_value_label_marks_for_value(&mut self, val: Value) {
|
||||
let mut markers: SmallVec<[I; 4]> = smallvec![];
|
||||
let regs = self.value_regs[val];
|
||||
if regs.len() > 1 {
|
||||
return;
|
||||
@@ -813,12 +696,9 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
|
||||
reg,
|
||||
label,
|
||||
);
|
||||
markers.push(I::gen_value_label_marker(label, reg));
|
||||
self.vcode.add_value_label(reg, label);
|
||||
}
|
||||
}
|
||||
for marker in markers {
|
||||
self.emit(marker);
|
||||
}
|
||||
}
|
||||
|
||||
fn emit_value_label_markers_for_inst(&mut self, inst: Inst) {
|
||||
@@ -849,36 +729,17 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
|
||||
}
|
||||
|
||||
fn finish_ir_inst(&mut self, loc: SourceLoc) {
|
||||
// `bb_insts` is kept in reverse order, so emit the instructions in
|
||||
// reverse order.
|
||||
for mut tuple in self.ir_insts.drain(..).rev() {
|
||||
tuple.loc = loc;
|
||||
self.bb_insts.push(tuple);
|
||||
self.vcode.set_srcloc(loc);
|
||||
// The VCodeBuilder builds in reverse order (and reverses at
|
||||
// the end), but `ir_insts` is in forward order, so reverse
|
||||
// it.
|
||||
for inst in self.ir_insts.drain(..).rev() {
|
||||
self.vcode.push(inst);
|
||||
}
|
||||
}
|
||||
|
||||
fn finish_bb(&mut self) {
|
||||
let start = self.block_insts.len();
|
||||
for tuple in self.bb_insts.drain(..).rev() {
|
||||
self.block_insts.push(tuple);
|
||||
}
|
||||
let end = self.block_insts.len();
|
||||
self.block_ranges.push((start, end));
|
||||
}
|
||||
|
||||
fn copy_bbs_to_vcode(&mut self) {
|
||||
for &(start, end) in self.block_ranges.iter().rev() {
|
||||
for &InstTuple {
|
||||
loc,
|
||||
is_safepoint,
|
||||
ref inst,
|
||||
} in &self.block_insts[start..end]
|
||||
{
|
||||
self.vcode.set_srcloc(loc);
|
||||
self.vcode.push(inst.clone(), is_safepoint);
|
||||
}
|
||||
self.vcode.end_bb();
|
||||
}
|
||||
self.vcode.end_bb();
|
||||
}
|
||||
|
||||
fn lower_clif_branches<B: LowerBackend<MInst = I>>(
|
||||
@@ -900,9 +761,28 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
|
||||
backend.lower_branch_group(self, branches, targets)?;
|
||||
let loc = self.srcloc(branches[0]);
|
||||
self.finish_ir_inst(loc);
|
||||
// Add block param outputs for current block.
|
||||
self.lower_branch_blockparam_args(block);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn lower_branch_blockparam_args(&mut self, block: Block) {
|
||||
visit_block_succs(self.f, block, |inst, _succ| {
|
||||
let branch_args = self.f.dfg.inst_variable_args(inst);
|
||||
let mut branch_arg_vregs: SmallVec<[Reg; 16]> = smallvec![];
|
||||
for &arg in branch_args {
|
||||
let arg = self.f.dfg.resolve_aliases(arg);
|
||||
let regs = self.put_value_in_regs(arg);
|
||||
for &vreg in regs.regs() {
|
||||
let vreg = self.vcode.resolve_vreg_alias(vreg.into());
|
||||
branch_arg_vregs.push(vreg.into());
|
||||
}
|
||||
}
|
||||
self.vcode.add_branch_args_for_succ(&branch_arg_vregs[..]);
|
||||
});
|
||||
self.finish_ir_inst(SourceLoc::default());
|
||||
}
|
||||
|
||||
fn collect_branches_and_targets(
|
||||
&self,
|
||||
bindex: BlockIndex,
|
||||
@@ -927,10 +807,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
|
||||
}
|
||||
|
||||
/// Lower the function.
|
||||
pub fn lower<B: LowerBackend<MInst = I>>(
|
||||
mut self,
|
||||
backend: &B,
|
||||
) -> CodegenResult<(VCode<I>, StackmapRequestInfo)> {
|
||||
pub fn lower<B: LowerBackend<MInst = I>>(mut self, backend: &B) -> CodegenResult<VCode<I>> {
|
||||
log::trace!("about to lower function: {:?}", self.f);
|
||||
|
||||
// Initialize the ABI object, giving it a temp if requested.
|
||||
@@ -945,7 +822,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
|
||||
// not the whole `Lower` impl).
|
||||
self.pinned_reg = backend.maybe_pinned_reg();
|
||||
|
||||
self.vcode.set_entry(0);
|
||||
self.vcode.set_entry(BlockIndex::new(0));
|
||||
|
||||
// Reused vectors for branch lowering.
|
||||
let mut branches: SmallVec<[Inst; 2]> = SmallVec::new();
|
||||
@@ -963,7 +840,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
|
||||
|
||||
// Main lowering loop over lowered blocks.
|
||||
for (bindex, lb) in lowered_order.iter().enumerate().rev() {
|
||||
let bindex = bindex as BlockIndex;
|
||||
let bindex = BlockIndex::new(bindex);
|
||||
|
||||
// Lower the block body in reverse order (see comment in
|
||||
// `lower_clif_block()` for rationale).
|
||||
@@ -976,30 +853,41 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
|
||||
self.finish_ir_inst(self.srcloc(branches[0]));
|
||||
}
|
||||
} else {
|
||||
// If no orig block, this must be a pure edge block; get the successor and
|
||||
// emit a jump.
|
||||
// If no orig block, this must be a pure edge block;
|
||||
// get the successor and emit a jump. Add block params
|
||||
// according to the one successor, and pass them
|
||||
// through; note that the successor must have an
|
||||
// original block.
|
||||
let (_, succ) = self.vcode.block_order().succ_indices(bindex)[0];
|
||||
|
||||
let orig_succ = lowered_order[succ.index()];
|
||||
let orig_succ = orig_succ
|
||||
.orig_block()
|
||||
.expect("Edge block succ must be body block");
|
||||
|
||||
let mut branch_arg_vregs: SmallVec<[Reg; 16]> = smallvec![];
|
||||
for ty in self.f.dfg.block_param_types(orig_succ) {
|
||||
let regs = alloc_vregs(ty, &mut self.next_vreg, &mut self.vcode)?;
|
||||
for ® in regs.regs() {
|
||||
branch_arg_vregs.push(reg);
|
||||
let vreg = reg.to_virtual_reg().unwrap();
|
||||
self.vcode
|
||||
.add_block_param(vreg, self.vcode.get_vreg_type(vreg));
|
||||
}
|
||||
}
|
||||
self.vcode.add_branch_args_for_succ(&branch_arg_vregs[..]);
|
||||
|
||||
self.emit(I::gen_jump(MachLabel::from_block(succ)));
|
||||
self.finish_ir_inst(SourceLoc::default());
|
||||
}
|
||||
|
||||
// Out-edge phi moves.
|
||||
if let Some((pred, inst, succ)) = lb.out_edge() {
|
||||
self.lower_edge(pred, inst, succ)?;
|
||||
self.finish_ir_inst(SourceLoc::default());
|
||||
}
|
||||
// Original block body.
|
||||
if let Some(bb) = lb.orig_block() {
|
||||
self.lower_clif_block(backend, bb)?;
|
||||
self.emit_value_label_markers_for_block_args(bb);
|
||||
}
|
||||
// In-edge phi moves.
|
||||
if let Some((pred, inst, succ)) = lb.in_edge() {
|
||||
self.lower_edge(pred, inst, succ)?;
|
||||
self.finish_ir_inst(SourceLoc::default());
|
||||
}
|
||||
|
||||
if bindex == 0 {
|
||||
if bindex.index() == 0 {
|
||||
// Set up the function with arg vreg inits.
|
||||
self.gen_arg_setup();
|
||||
self.finish_ir_inst(SourceLoc::default());
|
||||
@@ -1008,13 +896,12 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
|
||||
self.finish_bb();
|
||||
}
|
||||
|
||||
self.copy_bbs_to_vcode();
|
||||
|
||||
// Now that we've emitted all instructions into the VCodeBuilder, let's build the VCode.
|
||||
let (vcode, stack_map_info) = self.vcode.build();
|
||||
// Now that we've emitted all instructions into the
|
||||
// VCodeBuilder, let's build the VCode.
|
||||
let vcode = self.vcode.build();
|
||||
log::trace!("built vcode: {:?}", vcode);
|
||||
|
||||
Ok((vcode, stack_map_info))
|
||||
Ok(vcode)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1278,19 +1165,8 @@ impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> {
|
||||
}
|
||||
|
||||
fn emit(&mut self, mach_inst: I) {
|
||||
self.ir_insts.push(InstTuple {
|
||||
loc: SourceLoc::default(),
|
||||
is_safepoint: false,
|
||||
inst: mach_inst,
|
||||
});
|
||||
}
|
||||
|
||||
fn emit_safepoint(&mut self, mach_inst: I) {
|
||||
self.ir_insts.push(InstTuple {
|
||||
loc: SourceLoc::default(),
|
||||
is_safepoint: true,
|
||||
inst: mach_inst,
|
||||
});
|
||||
log::trace!("emit: {:?}", mach_inst);
|
||||
self.ir_insts.push(mach_inst);
|
||||
}
|
||||
|
||||
fn sink_inst(&mut self, ir_inst: Inst) {
|
||||
@@ -1336,7 +1212,7 @@ impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> {
|
||||
}
|
||||
|
||||
fn ensure_in_vreg(&mut self, reg: Reg, ty: Type) -> Reg {
|
||||
if reg.is_virtual() {
|
||||
if reg.to_virtual_reg().is_some() {
|
||||
reg
|
||||
} else {
|
||||
let new_reg = self.alloc_tmp(ty).only_reg().unwrap();
|
||||
@@ -1344,6 +1220,11 @@ impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> {
|
||||
new_reg.to_reg()
|
||||
}
|
||||
}
|
||||
|
||||
fn set_vreg_alias(&mut self, from: Reg, to: Reg) {
|
||||
log::trace!("set vreg alias: from {:?} to {:?}", from, to);
|
||||
self.vcode.set_vreg_alias(from, to);
|
||||
}
|
||||
}
|
||||
|
||||
/// Visit all successors of a block with a given visitor closure.
|
||||
|
||||
@@ -8,14 +8,10 @@
|
||||
//!
|
||||
//! The container for machine instructions, at various stages of construction,
|
||||
//! is the `VCode` struct. We refer to a sequence of machine instructions organized
|
||||
//! into basic blocks as "vcode". This is short for "virtual-register code", though
|
||||
//! it's a bit of a misnomer because near the end of the pipeline, vcode has all
|
||||
//! real registers. Nevertheless, the name is catchy and we like it.
|
||||
//! into basic blocks as "vcode". This is short for "virtual-register code".
|
||||
//!
|
||||
//! The compilation pipeline, from an `ir::Function` (already optimized as much as
|
||||
//! you like by machine-independent optimization passes) onward, is as follows.
|
||||
//! (N.B.: though we show the VCode separately at each stage, the passes
|
||||
//! mutate the VCode in place; these are not separate copies of the code.)
|
||||
//!
|
||||
//! ```plain
|
||||
//!
|
||||
@@ -31,37 +27,25 @@
|
||||
//! | with unknown offsets.
|
||||
//! | - critical edges (actually all edges)
|
||||
//! | are split.)
|
||||
//! | [regalloc]
|
||||
//! |
|
||||
//! VCode<arch_backend::Inst> (machine instructions:
|
||||
//! | - all real registers.
|
||||
//! | - new instruction sequence returned
|
||||
//! | out-of-band in RegAllocResult.
|
||||
//! | - instruction sequence has spills,
|
||||
//! | reloads, and moves inserted.
|
||||
//! | - other invariants same as above.)
|
||||
//! | [regalloc --> `regalloc2::Output`; VCode is unchanged]
|
||||
//! |
|
||||
//! | [preamble/postamble]
|
||||
//! | [binary emission via MachBuffer]
|
||||
//! |
|
||||
//! VCode<arch_backend::Inst> (machine instructions:
|
||||
//! | - stack-frame size known.
|
||||
//! | - out-of-band instruction sequence
|
||||
//! | has preamble prepended to entry
|
||||
//! | block, and postamble injected before
|
||||
//! | every return instruction.
|
||||
//! | - all symbolic stack references to
|
||||
//! | stackslots and spillslots are resolved
|
||||
//! | to concrete FP-offset mem addresses.)
|
||||
//! |
|
||||
//! | [binary emission via MachBuffer
|
||||
//! | with streaming branch resolution/simplification]
|
||||
//! |
|
||||
//! Vec<u8> (machine code!)
|
||||
//! Vec<u8> (machine code:
|
||||
//! | - two-dest branches resolved via
|
||||
//! | streaming branch resolution/simplification.
|
||||
//! | - regalloc `Allocation` results used directly
|
||||
//! | by instruction emission code.
|
||||
//! | - prologue and epilogue(s) built and emitted
|
||||
//! | directly during emission.
|
||||
//! | - nominal-SP-relative offsets resolved
|
||||
//! | by tracking EmitState.)
|
||||
//!
|
||||
//! ```
|
||||
|
||||
use crate::binemit::{Addend, CodeInfo, CodeOffset, Reloc, StackMap};
|
||||
use crate::ir::{SourceLoc, StackSlot, Type, ValueLabel};
|
||||
use crate::ir::{SourceLoc, StackSlot, Type};
|
||||
use crate::result::CodegenResult;
|
||||
use crate::settings::Flags;
|
||||
use crate::value_label::ValueLabelsRanges;
|
||||
@@ -69,10 +53,7 @@ use alloc::boxed::Box;
|
||||
use alloc::vec::Vec;
|
||||
use core::fmt::Debug;
|
||||
use cranelift_entity::PrimaryMap;
|
||||
use regalloc::RegUsageCollector;
|
||||
use regalloc::{
|
||||
RealReg, RealRegUniverse, Reg, RegClass, RegUsageMapper, SpillSlot, VirtualReg, Writable,
|
||||
};
|
||||
use regalloc2::{Allocation, VReg};
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
use std::string::String;
|
||||
|
||||
@@ -98,20 +79,15 @@ pub use helpers::*;
|
||||
pub mod inst_common;
|
||||
pub use inst_common::*;
|
||||
pub mod valueregs;
|
||||
pub use reg::*;
|
||||
pub use valueregs::*;
|
||||
pub mod debug;
|
||||
pub use regmapping::*;
|
||||
pub mod regmapping;
|
||||
pub mod reg;
|
||||
|
||||
/// A machine instruction.
|
||||
pub trait MachInst: Clone + Debug {
|
||||
/// Return the registers referenced by this machine instruction along with
|
||||
/// the modes of reference (use, def, modify).
|
||||
fn get_regs(&self, collector: &mut RegUsageCollector);
|
||||
|
||||
/// Map virtual registers to physical registers using the given virt->phys
|
||||
/// maps corresponding to the program points prior to, and after, this instruction.
|
||||
fn map_regs<RUM: RegUsageMapper>(&mut self, maps: &RUM);
|
||||
fn get_operands<F: Fn(VReg) -> VReg>(&self, collector: &mut OperandCollector<'_, F>);
|
||||
|
||||
/// If this is a simple move, return the (source, destination) tuple of registers.
|
||||
fn is_move(&self) -> Option<(Writable<Reg>, Reg)>;
|
||||
@@ -128,11 +104,6 @@ pub trait MachInst: Clone + Debug {
|
||||
true
|
||||
}
|
||||
|
||||
/// If this is a load or store to the stack, return that info.
|
||||
fn stack_op_info(&self) -> Option<MachInstStackOpInfo> {
|
||||
None
|
||||
}
|
||||
|
||||
/// Generate a move.
|
||||
fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Self;
|
||||
|
||||
@@ -144,10 +115,9 @@ pub trait MachInst: Clone + Debug {
|
||||
alloc_tmp: F,
|
||||
) -> SmallVec<[Self; 4]>;
|
||||
|
||||
/// Possibly operate on a value directly in a spill-slot rather than a
|
||||
/// register. Useful if the machine has register-memory instruction forms
|
||||
/// (e.g., add directly from or directly to memory), like x86.
|
||||
fn maybe_direct_reload(&self, reg: VirtualReg, slot: SpillSlot) -> Option<Self>;
|
||||
/// Generate a dummy instruction that will keep a value alive but
|
||||
/// has no other purpose.
|
||||
fn gen_dummy_use(reg: Reg) -> Self;
|
||||
|
||||
/// Determine register class(es) to store the given Cranelift type, and the
|
||||
/// Cranelift type actually stored in the underlying register(s). May return
|
||||
@@ -163,6 +133,13 @@ pub trait MachInst: Clone + Debug {
|
||||
/// generating spills and reloads for individual registers.
|
||||
fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])>;
|
||||
|
||||
/// Get an appropriate type that can fully hold a value in a given
|
||||
/// register class. This may not be the only type that maps to
|
||||
/// that class, but when used with `gen_move()` or the ABI trait's
|
||||
/// load/spill constructors, it should produce instruction(s) that
|
||||
/// move the entire register contents.
|
||||
fn canonical_type_for_rc(rc: RegClass) -> Type;
|
||||
|
||||
/// Generate a jump to another target. Used during lowering of
|
||||
/// control flow.
|
||||
fn gen_jump(target: MachLabel) -> Self;
|
||||
@@ -187,16 +164,8 @@ pub trait MachInst: Clone + Debug {
|
||||
/// be dependent on compilation flags.
|
||||
fn ref_type_regclass(_flags: &Flags) -> RegClass;
|
||||
|
||||
/// Does this instruction define a ValueLabel? Returns the `Reg` whose value
|
||||
/// becomes the new value of the `ValueLabel` after this instruction.
|
||||
fn defines_value_label(&self) -> Option<(ValueLabel, Reg)> {
|
||||
None
|
||||
}
|
||||
|
||||
/// Create a marker instruction that defines a value label.
|
||||
fn gen_value_label_marker(_label: ValueLabel, _reg: Reg) -> Self {
|
||||
Self::gen_nop(0)
|
||||
}
|
||||
/// Is this a safepoint?
|
||||
fn is_safepoint(&self) -> bool;
|
||||
|
||||
/// A label-use kind: a type that describes the types of label references that
|
||||
/// can occur in an instruction.
|
||||
@@ -266,35 +235,6 @@ pub enum MachTerminator<'a> {
|
||||
Indirect(&'a [MachLabel]),
|
||||
}
|
||||
|
||||
impl<'a> MachTerminator<'a> {
|
||||
/// Get the successor labels named in a `MachTerminator`.
|
||||
pub fn get_succs(&self) -> SmallVec<[MachLabel; 2]> {
|
||||
let mut ret = smallvec![];
|
||||
match self {
|
||||
&MachTerminator::Uncond(l) => {
|
||||
ret.push(l);
|
||||
}
|
||||
&MachTerminator::Cond(l1, l2) => {
|
||||
ret.push(l1);
|
||||
ret.push(l2);
|
||||
}
|
||||
&MachTerminator::Indirect(ls) => {
|
||||
ret.extend(ls.iter().cloned());
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
ret
|
||||
}
|
||||
|
||||
/// Is this a terminator?
|
||||
pub fn is_term(&self) -> bool {
|
||||
match self {
|
||||
MachTerminator::None => false,
|
||||
_ => true,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A trait describing the ability to encode a MachInst into binary machine code.
|
||||
pub trait MachInstEmit: MachInst {
|
||||
/// Persistent state carried across `emit` invocations.
|
||||
@@ -302,9 +242,15 @@ pub trait MachInstEmit: MachInst {
|
||||
/// Constant information used in `emit` invocations.
|
||||
type Info;
|
||||
/// Emit the instruction.
|
||||
fn emit(&self, code: &mut MachBuffer<Self>, info: &Self::Info, state: &mut Self::State);
|
||||
fn emit(
|
||||
&self,
|
||||
allocs: &[Allocation],
|
||||
code: &mut MachBuffer<Self>,
|
||||
info: &Self::Info,
|
||||
state: &mut Self::State,
|
||||
);
|
||||
/// Pretty-print the instruction.
|
||||
fn pretty_print(&self, mb_rru: Option<&RealRegUniverse>, state: &mut Self::State) -> String;
|
||||
fn pretty_print_inst(&self, allocs: &[Allocation], state: &mut Self::State) -> String;
|
||||
}
|
||||
|
||||
/// A trait describing the emission state carried between MachInsts when
|
||||
@@ -409,15 +355,3 @@ pub enum UnwindInfoKind {
|
||||
#[cfg(feature = "unwind")]
|
||||
Windows,
|
||||
}
|
||||
|
||||
/// Info about an operation that loads or stores from/to the stack.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub enum MachInstStackOpInfo {
|
||||
/// Load from an offset from the nominal stack pointer into the given reg.
|
||||
LoadNomSPOff(Reg, i64),
|
||||
/// Store to an offset from the nominal stack pointer from the given reg.
|
||||
StoreNomSPOff(Reg, i64),
|
||||
/// Adjustment of nominal-SP up or down. This value is added to subsequent
|
||||
/// offsets in loads/stores above to produce real-SP offsets.
|
||||
NomSPAdj(i64),
|
||||
}
|
||||
|
||||
504
cranelift/codegen/src/machinst/reg.rs
Normal file
504
cranelift/codegen/src/machinst/reg.rs
Normal file
@@ -0,0 +1,504 @@
|
||||
//! Definitions for registers, operands, etc. Provides a thin
|
||||
//! interface over the register allocator so that we can more easily
|
||||
//! swap it out or shim it when necessary.
|
||||
|
||||
use crate::machinst::MachInst;
|
||||
use alloc::{string::String, vec::Vec};
|
||||
use core::{fmt::Debug, hash::Hash};
|
||||
use regalloc2::{Allocation, Operand, PReg, VReg};
|
||||
use smallvec::{smallvec, SmallVec};
|
||||
|
||||
#[cfg(feature = "enable-serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// The first 128 vregs (64 int, 64 float/vec) are "pinned" to
|
||||
/// physical registers: this means that they are always constrained to
|
||||
/// the corresponding register at all use/mod/def sites.
|
||||
///
|
||||
/// Arbitrary vregs can also be constrained to physical registers at
|
||||
/// particular use/def/mod sites, and this is preferable; but pinned
|
||||
/// vregs allow us to migrate code that has been written using
|
||||
/// RealRegs directly.
|
||||
const PINNED_VREGS: usize = 128;
|
||||
|
||||
/// Convert a `VReg` to its pinned `PReg`, if any.
|
||||
pub fn pinned_vreg_to_preg(vreg: VReg) -> Option<PReg> {
|
||||
if vreg.vreg() < PINNED_VREGS {
|
||||
Some(PReg::from_index(vreg.vreg()))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Give the first available vreg for generated code (i.e., after all
|
||||
/// pinned vregs).
|
||||
pub fn first_user_vreg_index() -> usize {
|
||||
// This is just the constant defined above, but we keep the
|
||||
// constant private and expose only this helper function with the
|
||||
// specific name in order to ensure other parts of the code don't
|
||||
// open-code and depend on the index-space scheme.
|
||||
PINNED_VREGS
|
||||
}
|
||||
|
||||
/// A register named in an instruction. This register can be either a
|
||||
/// virtual register or a fixed physical register. It does not have
|
||||
/// any constraints applied to it: those can be added later in
|
||||
/// `MachInst::get_operands()` when the `Reg`s are converted to
|
||||
/// `Operand`s.
|
||||
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
|
||||
pub struct Reg(VReg);
|
||||
|
||||
impl Reg {
|
||||
/// Get the physical register (`RealReg`), if this register is
|
||||
/// one.
|
||||
pub fn to_real_reg(self) -> Option<RealReg> {
|
||||
if pinned_vreg_to_preg(self.0).is_some() {
|
||||
Some(RealReg(self.0))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the virtual (non-physical) register, if this register is
|
||||
/// one.
|
||||
pub fn to_virtual_reg(self) -> Option<VirtualReg> {
|
||||
if pinned_vreg_to_preg(self.0).is_none() {
|
||||
Some(VirtualReg(self.0))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the class of this register.
|
||||
pub fn class(self) -> RegClass {
|
||||
self.0.class()
|
||||
}
|
||||
|
||||
/// Is this a real (physical) reg?
|
||||
pub fn is_real(self) -> bool {
|
||||
self.to_real_reg().is_some()
|
||||
}
|
||||
|
||||
/// Is this a virtual reg?
|
||||
pub fn is_virtual(self) -> bool {
|
||||
self.to_virtual_reg().is_some()
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for Reg {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
if let Some(rreg) = self.to_real_reg() {
|
||||
let preg: PReg = rreg.into();
|
||||
write!(f, "{}", preg)
|
||||
} else if let Some(vreg) = self.to_virtual_reg() {
|
||||
let vreg: VReg = vreg.into();
|
||||
write!(f, "{}", vreg)
|
||||
} else {
|
||||
unreachable!()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A real (physical) register. This corresponds to one of the target
|
||||
/// ISA's named registers and can be used as an instruction operand.
|
||||
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
|
||||
pub struct RealReg(VReg);
|
||||
|
||||
impl RealReg {
|
||||
/// Get the class of this register.
|
||||
pub fn class(self) -> RegClass {
|
||||
self.0.class()
|
||||
}
|
||||
|
||||
pub fn hw_enc(self) -> u8 {
|
||||
PReg::from(self).hw_enc() as u8
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for RealReg {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
Reg::from(*self).fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
/// A virtual register. This can be allocated into a real (physical)
|
||||
/// register of the appropriate register class, but which one is not
|
||||
/// specified. Virtual registers are used when generating `MachInst`s,
|
||||
/// before register allocation occurs, in order to allow us to name as
|
||||
/// many register-carried values as necessary.
|
||||
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
|
||||
pub struct VirtualReg(VReg);
|
||||
|
||||
impl VirtualReg {
|
||||
/// Get the class of this register.
|
||||
pub fn class(self) -> RegClass {
|
||||
self.0.class()
|
||||
}
|
||||
|
||||
pub fn index(self) -> usize {
|
||||
self.0.vreg()
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for VirtualReg {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
Reg::from(*self).fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
/// A type wrapper that indicates a register type is writable. The
|
||||
/// underlying register can be extracted, and the type wrapper can be
|
||||
/// built using an arbitrary register. Hence, this type-level wrapper
|
||||
/// is not strictly a guarantee. However, "casting" to a writable
|
||||
/// register is an explicit operation for which we can
|
||||
/// audit. Ordinarily, internal APIs in the compiler backend should
|
||||
/// take a `Writable<Reg>` whenever the register is written, and the
|
||||
/// usual, frictionless way to get one of these is to allocate a new
|
||||
/// temporary.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
|
||||
pub struct Writable<T: Clone + Copy + Debug + PartialEq + Eq + PartialOrd + Ord + Hash> {
|
||||
reg: T,
|
||||
}
|
||||
|
||||
impl<T: Clone + Copy + Debug + PartialEq + Eq + PartialOrd + Ord + Hash> Writable<T> {
|
||||
/// Explicitly construct a `Writable<T>` from a `T`. As noted in
|
||||
/// the documentation for `Writable`, this is not hidden or
|
||||
/// disallowed from the outside; anyone can perform the "cast";
|
||||
/// but it is explicit so that we can audit the use sites.
|
||||
pub fn from_reg(reg: T) -> Writable<T> {
|
||||
Writable { reg }
|
||||
}
|
||||
|
||||
/// Get the underlying register, which can be read.
|
||||
pub fn to_reg(self) -> T {
|
||||
self.reg
|
||||
}
|
||||
|
||||
/// Map the underlying register to another value or type.
|
||||
pub fn map<U, F>(self, f: F) -> Writable<U>
|
||||
where
|
||||
U: Clone + Copy + Debug + PartialEq + Eq + PartialOrd + Ord + Hash,
|
||||
F: Fn(T) -> U,
|
||||
{
|
||||
Writable { reg: f(self.reg) }
|
||||
}
|
||||
}
|
||||
|
||||
// Conversions between regalloc2 types (VReg) and our types
|
||||
// (VirtualReg, RealReg, Reg).
|
||||
|
||||
impl std::convert::From<regalloc2::VReg> for Reg {
|
||||
fn from(vreg: regalloc2::VReg) -> Reg {
|
||||
Reg(vreg)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::convert::From<regalloc2::VReg> for VirtualReg {
|
||||
fn from(vreg: regalloc2::VReg) -> VirtualReg {
|
||||
debug_assert!(pinned_vreg_to_preg(vreg).is_none());
|
||||
VirtualReg(vreg)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::convert::From<regalloc2::VReg> for RealReg {
|
||||
fn from(vreg: regalloc2::VReg) -> RealReg {
|
||||
debug_assert!(pinned_vreg_to_preg(vreg).is_some());
|
||||
RealReg(vreg)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::convert::From<Reg> for regalloc2::VReg {
|
||||
/// Extract the underlying `regalloc2::VReg`. Note that physical
|
||||
/// registers also map to particular (special) VRegs, so this
|
||||
/// method can be used either on virtual or physical `Reg`s.
|
||||
fn from(reg: Reg) -> regalloc2::VReg {
|
||||
reg.0
|
||||
}
|
||||
}
|
||||
|
||||
impl std::convert::From<VirtualReg> for regalloc2::VReg {
|
||||
fn from(reg: VirtualReg) -> regalloc2::VReg {
|
||||
reg.0
|
||||
}
|
||||
}
|
||||
|
||||
impl std::convert::From<RealReg> for regalloc2::VReg {
|
||||
fn from(reg: RealReg) -> regalloc2::VReg {
|
||||
reg.0
|
||||
}
|
||||
}
|
||||
|
||||
impl std::convert::From<RealReg> for regalloc2::PReg {
|
||||
fn from(reg: RealReg) -> regalloc2::PReg {
|
||||
PReg::from_index(reg.0.vreg())
|
||||
}
|
||||
}
|
||||
|
||||
impl std::convert::From<regalloc2::PReg> for RealReg {
|
||||
fn from(preg: regalloc2::PReg) -> RealReg {
|
||||
RealReg(VReg::new(preg.index(), preg.class()))
|
||||
}
|
||||
}
|
||||
|
||||
impl std::convert::From<regalloc2::PReg> for Reg {
|
||||
fn from(preg: regalloc2::PReg) -> Reg {
|
||||
Reg(VReg::new(preg.index(), preg.class()))
|
||||
}
|
||||
}
|
||||
|
||||
impl std::convert::From<RealReg> for Reg {
|
||||
fn from(reg: RealReg) -> Reg {
|
||||
Reg(reg.0)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::convert::From<VirtualReg> for Reg {
|
||||
fn from(reg: VirtualReg) -> Reg {
|
||||
Reg(reg.0)
|
||||
}
|
||||
}
|
||||
|
||||
/// A spill slot.
|
||||
pub type SpillSlot = regalloc2::SpillSlot;
|
||||
|
||||
/// A register class. Each register in the ISA has one class, and the
|
||||
/// classes are disjoint. Most modern ISAs will have just two classes:
|
||||
/// the integer/general-purpose registers (GPRs), and the float/vector
|
||||
/// registers (typically used for both).
|
||||
///
|
||||
/// Note that unlike some other compiler backend/register allocator
|
||||
/// designs, we do not allow for overlapping classes, i.e. registers
|
||||
/// that belong to more than one class, because doing so makes the
|
||||
/// allocation problem significantly more complex. Instead, when a
|
||||
/// register can be addressed under different names for different
|
||||
/// sizes (for example), the backend author should pick classes that
|
||||
/// denote some fundamental allocation unit that encompasses the whole
|
||||
/// register. For example, always allocate 128-bit vector registers
|
||||
/// `v0`..`vN`, even though `f32` and `f64` values may use only the
|
||||
/// low 32/64 bits of those registers and name them differently.
|
||||
pub type RegClass = regalloc2::RegClass;
|
||||
|
||||
/// An OperandCollector is a wrapper around a Vec of Operands
|
||||
/// (flattened array for a whole sequence of instructions) that
|
||||
/// gathers operands from a single instruction and provides the range
|
||||
/// in the flattened array.
|
||||
#[derive(Debug)]
|
||||
pub struct OperandCollector<'a, F: Fn(VReg) -> VReg> {
|
||||
operands: &'a mut Vec<Operand>,
|
||||
operands_start: usize,
|
||||
clobbers: Vec<PReg>,
|
||||
renamer: F,
|
||||
}
|
||||
|
||||
impl<'a, F: Fn(VReg) -> VReg> OperandCollector<'a, F> {
|
||||
/// Start gathering operands into one flattened operand array.
|
||||
pub fn new(operands: &'a mut Vec<Operand>, renamer: F) -> Self {
|
||||
let operands_start = operands.len();
|
||||
Self {
|
||||
operands,
|
||||
operands_start,
|
||||
clobbers: vec![],
|
||||
renamer,
|
||||
}
|
||||
}
|
||||
|
||||
/// Add an operand.
|
||||
fn add_operand(&mut self, operand: Operand) {
|
||||
let vreg = (self.renamer)(operand.vreg());
|
||||
let operand = Operand::new(vreg, operand.constraint(), operand.kind(), operand.pos());
|
||||
self.operands.push(operand);
|
||||
}
|
||||
|
||||
/// Add a clobber.
|
||||
fn add_clobber(&mut self, clobber: PReg) {
|
||||
self.clobbers.push(clobber);
|
||||
}
|
||||
|
||||
/// Finish the operand collection and return the tuple giving the
|
||||
/// range of indices in the flattened operand array, and the
|
||||
/// clobber array.
|
||||
pub fn finish(self) -> ((u32, u32), Vec<PReg>) {
|
||||
let start = self.operands_start as u32;
|
||||
let end = self.operands.len() as u32;
|
||||
((start, end), self.clobbers)
|
||||
}
|
||||
|
||||
/// Add a register use, at the start of the instruction (`Before`
|
||||
/// position).
|
||||
pub fn reg_use(&mut self, reg: Reg) {
|
||||
self.add_operand(Operand::reg_use(reg.into()));
|
||||
}
|
||||
|
||||
/// Add multiple register uses.
|
||||
pub fn reg_uses(&mut self, regs: &[Reg]) {
|
||||
for ® in regs {
|
||||
self.reg_use(reg);
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a register def, at the end of the instruction (`After`
|
||||
/// position). Use only when this def will be written after all
|
||||
/// uses are read.
|
||||
pub fn reg_def(&mut self, reg: Writable<Reg>) {
|
||||
self.add_operand(Operand::reg_def(reg.to_reg().into()));
|
||||
}
|
||||
|
||||
/// Add multiple register defs.
|
||||
pub fn reg_defs(&mut self, regs: &[Writable<Reg>]) {
|
||||
for ® in regs {
|
||||
self.reg_def(reg);
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a register "early def", which logically occurs at the
|
||||
/// beginning of the instruction, alongside all uses. Use this
|
||||
/// when the def may be written before all uses are read; the
|
||||
/// regalloc will ensure that it does not overwrite any uses.
|
||||
pub fn reg_early_def(&mut self, reg: Writable<Reg>) {
|
||||
self.add_operand(Operand::reg_def_at_start(reg.to_reg().into()));
|
||||
}
|
||||
|
||||
/// Add a register "fixed use", which ties a vreg to a particular
|
||||
/// RealReg at this point.
|
||||
pub fn reg_fixed_use(&mut self, reg: Reg, rreg: Reg) {
|
||||
let rreg = rreg.to_real_reg().expect("fixed reg is not a RealReg");
|
||||
self.add_operand(Operand::reg_fixed_use(reg.into(), rreg.into()));
|
||||
}
|
||||
|
||||
/// Add a register "fixed def", which ties a vreg to a particular
|
||||
/// RealReg at this point.
|
||||
pub fn reg_fixed_def(&mut self, reg: Writable<Reg>, rreg: Reg) {
|
||||
let rreg = rreg.to_real_reg().expect("fixed reg is not a RealReg");
|
||||
self.add_operand(Operand::reg_fixed_def(reg.to_reg().into(), rreg.into()));
|
||||
}
|
||||
|
||||
/// Add a register def that reuses an earlier use-operand's
|
||||
/// allocation. The index of that earlier operand (relative to the
|
||||
/// current instruction's start of operands) must be known.
|
||||
pub fn reg_reuse_def(&mut self, reg: Writable<Reg>, idx: usize) {
|
||||
if reg.to_reg().to_virtual_reg().is_some() {
|
||||
self.add_operand(Operand::reg_reuse_def(reg.to_reg().into(), idx));
|
||||
} else {
|
||||
// Sometimes destination registers that reuse a source are
|
||||
// given with RealReg args. In this case, we assume the
|
||||
// creator of the instruction knows what they are doing
|
||||
// and just emit a normal def to the pinned vreg.
|
||||
self.add_operand(Operand::reg_def(reg.to_reg().into()));
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a register use+def, or "modify", where the reg must stay
|
||||
/// in the same register on the input and output side of the
|
||||
/// instruction.
|
||||
pub fn reg_mod(&mut self, reg: Writable<Reg>) {
|
||||
self.add_operand(Operand::new(
|
||||
reg.to_reg().into(),
|
||||
regalloc2::OperandConstraint::Reg,
|
||||
regalloc2::OperandKind::Mod,
|
||||
regalloc2::OperandPos::Early,
|
||||
));
|
||||
}
|
||||
|
||||
/// Add a register clobber. This is a register that is written by
|
||||
/// the instruction, so must be reserved (not used) for the whole
|
||||
/// instruction, but is not used afterward.
|
||||
#[allow(dead_code)] // FIXME: use clobbers rather than defs for calls!
|
||||
pub fn reg_clobber(&mut self, reg: Writable<RealReg>) {
|
||||
self.add_clobber(PReg::from(reg.to_reg()));
|
||||
}
|
||||
}
|
||||
|
||||
/// Use an OperandCollector to count the number of operands on an instruction.
|
||||
pub fn count_operands<I: MachInst>(inst: &I) -> usize {
|
||||
let mut ops = vec![];
|
||||
let mut coll = OperandCollector::new(&mut ops, |vreg| vreg);
|
||||
inst.get_operands(&mut coll);
|
||||
let ((start, end), _) = coll.finish();
|
||||
debug_assert_eq!(0, start);
|
||||
end as usize
|
||||
}
|
||||
|
||||
/// Pretty-print part of a disassembly, with knowledge of
|
||||
/// operand/instruction size, and optionally with regalloc
|
||||
/// results. This can be used, for example, to print either `rax` or
|
||||
/// `eax` for the register by those names on x86-64, depending on a
|
||||
/// 64- or 32-bit context.
|
||||
pub trait PrettyPrint {
|
||||
fn pretty_print(&self, size_bytes: u8, allocs: &mut AllocationConsumer<'_>) -> String;
|
||||
|
||||
fn pretty_print_default(&self) -> String {
|
||||
self.pretty_print(0, &mut AllocationConsumer::new(&[]))
|
||||
}
|
||||
}
|
||||
|
||||
/// A consumer of an (optional) list of Allocations along with Regs
|
||||
/// that provides RealRegs where available.
|
||||
///
|
||||
/// This is meant to be used during code emission or
|
||||
/// pretty-printing. In at least the latter case, regalloc results may
|
||||
/// or may not be available, so we may end up printing either vregs or
|
||||
/// rregs. Even pre-regalloc, though, some registers may be RealRegs
|
||||
/// that were provided when the instruction was created.
|
||||
///
|
||||
/// This struct should be used in a specific way: when matching on an
|
||||
/// instruction, provide it the Regs in the same order as they were
|
||||
/// provided to the OperandCollector.
|
||||
#[derive(Clone)]
|
||||
pub struct AllocationConsumer<'a> {
|
||||
allocs: std::slice::Iter<'a, Allocation>,
|
||||
}
|
||||
|
||||
impl<'a> AllocationConsumer<'a> {
|
||||
pub fn new(allocs: &'a [Allocation]) -> Self {
|
||||
Self {
|
||||
allocs: allocs.iter(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn next(&mut self, pre_regalloc_reg: Reg) -> Reg {
|
||||
let alloc = self.allocs.next();
|
||||
let alloc = alloc.map(|alloc| {
|
||||
Reg::from(
|
||||
alloc
|
||||
.as_reg()
|
||||
.expect("Should not have gotten a stack allocation"),
|
||||
)
|
||||
});
|
||||
|
||||
match (pre_regalloc_reg.to_real_reg(), alloc) {
|
||||
(Some(rreg), None) => rreg.into(),
|
||||
(Some(rreg), Some(alloc)) => {
|
||||
debug_assert_eq!(Reg::from(rreg), alloc);
|
||||
alloc
|
||||
}
|
||||
(None, Some(alloc)) => alloc,
|
||||
_ => pre_regalloc_reg,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn next_writable(&mut self, pre_regalloc_reg: Writable<Reg>) -> Writable<Reg> {
|
||||
Writable::from_reg(self.next(pre_regalloc_reg.to_reg()))
|
||||
}
|
||||
|
||||
pub fn next_n(&mut self, count: usize) -> SmallVec<[Allocation; 4]> {
|
||||
let mut allocs = smallvec![];
|
||||
for _ in 0..count {
|
||||
if let Some(next) = self.allocs.next() {
|
||||
allocs.push(*next);
|
||||
} else {
|
||||
return allocs;
|
||||
}
|
||||
}
|
||||
allocs
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> std::default::Default for AllocationConsumer<'a> {
|
||||
fn default() -> Self {
|
||||
Self { allocs: [].iter() }
|
||||
}
|
||||
}
|
||||
@@ -1,108 +0,0 @@
|
||||
use crate::ir::Type;
|
||||
use regalloc::{Reg, RegUsageMapper, Writable};
|
||||
use smallvec::SmallVec;
|
||||
use std::cell::Cell;
|
||||
|
||||
// Define our own register-mapping trait so we can do arbitrary register
|
||||
// renaming that are more free form than what `regalloc` constrains us to with
|
||||
// its `RegUsageMapper` trait definition.
|
||||
pub trait RegMapper {
|
||||
fn get_use(&self, reg: Reg) -> Option<Reg>;
|
||||
fn get_def(&self, reg: Reg) -> Option<Reg>;
|
||||
fn get_mod(&self, reg: Reg) -> Option<Reg>;
|
||||
|
||||
fn map_use(&self, r: &mut Reg) {
|
||||
if let Some(new) = self.get_use(*r) {
|
||||
*r = new;
|
||||
}
|
||||
}
|
||||
|
||||
fn map_def(&self, r: &mut Writable<Reg>) {
|
||||
if let Some(new) = self.get_def(r.to_reg()) {
|
||||
*r = Writable::from_reg(new);
|
||||
}
|
||||
}
|
||||
|
||||
fn map_mod(&self, r: &mut Writable<Reg>) {
|
||||
if let Some(new) = self.get_mod(r.to_reg()) {
|
||||
*r = Writable::from_reg(new);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> RegMapper for T
|
||||
where
|
||||
T: RegUsageMapper,
|
||||
{
|
||||
fn get_use(&self, reg: Reg) -> Option<Reg> {
|
||||
let v = reg.as_virtual_reg()?;
|
||||
self.get_use(v).map(|r| r.to_reg())
|
||||
}
|
||||
|
||||
fn get_def(&self, reg: Reg) -> Option<Reg> {
|
||||
let v = reg.as_virtual_reg()?;
|
||||
self.get_def(v).map(|r| r.to_reg())
|
||||
}
|
||||
|
||||
fn get_mod(&self, reg: Reg) -> Option<Reg> {
|
||||
let v = reg.as_virtual_reg()?;
|
||||
self.get_mod(v).map(|r| r.to_reg())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct RegRenamer {
|
||||
// Map of `(old, new, used, ty)` register names. Use a `SmallVec` because
|
||||
// we typically only have one or two renamings.
|
||||
//
|
||||
// The `used` flag indicates whether the mapping has been used for
|
||||
// `get_def`, later used afterwards during `unmapped_defs` to know what
|
||||
// moves need to be generated.
|
||||
renames: SmallVec<[(Reg, Reg, Cell<bool>, Type); 2]>,
|
||||
}
|
||||
|
||||
impl RegRenamer {
|
||||
/// Adds a new mapping which means that `old` reg should now be called
|
||||
/// `new`. The type of `old` is `ty` as specified.
|
||||
pub fn add_rename(&mut self, old: Reg, new: Reg, ty: Type) {
|
||||
self.renames.push((old, new, Cell::new(false), ty));
|
||||
}
|
||||
|
||||
fn get_rename(&self, reg: Reg, set_used_def: bool) -> Option<Reg> {
|
||||
let (_, new, used_def, _) = self.renames.iter().find(|(old, _, _, _)| reg == *old)?;
|
||||
used_def.set(used_def.get() || set_used_def);
|
||||
Some(*new)
|
||||
}
|
||||
|
||||
/// Returns the list of register mappings, with their type, which were not
|
||||
/// actually mapped.
|
||||
///
|
||||
/// This list is used because it means that the `old` name for the register
|
||||
/// was never actually defined, so to correctly rename this register the
|
||||
/// caller needs to move `old` into `new`.
|
||||
///
|
||||
/// This yields tuples of `(old, new, ty)`.
|
||||
pub fn unmapped_defs(&self) -> impl Iterator<Item = (Reg, Reg, Type)> + '_ {
|
||||
self.renames.iter().filter_map(|(old, new, used_def, ty)| {
|
||||
if used_def.get() {
|
||||
None
|
||||
} else {
|
||||
Some((*old, *new, *ty))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl RegMapper for RegRenamer {
|
||||
fn get_use(&self, reg: Reg) -> Option<Reg> {
|
||||
self.get_rename(reg, false)
|
||||
}
|
||||
|
||||
fn get_def(&self, reg: Reg) -> Option<Reg> {
|
||||
self.get_rename(reg, true)
|
||||
}
|
||||
|
||||
fn get_mod(&self, reg: Reg) -> Option<Reg> {
|
||||
self.get_rename(reg, false)
|
||||
}
|
||||
}
|
||||
@@ -1,7 +1,9 @@
|
||||
//! Data structure for tracking the (possibly multiple) registers that hold one
|
||||
//! SSA `Value`.
|
||||
|
||||
use regalloc::{RealReg, Reg, VirtualReg, Writable};
|
||||
use regalloc2::{PReg, VReg};
|
||||
|
||||
use super::{RealReg, Reg, VirtualReg, Writable};
|
||||
use std::fmt::Debug;
|
||||
|
||||
const VALUE_REGS_PARTS: usize = 2;
|
||||
@@ -35,17 +37,17 @@ pub trait InvalidSentinel: Copy + Eq {
|
||||
}
|
||||
impl InvalidSentinel for Reg {
|
||||
fn invalid_sentinel() -> Self {
|
||||
Reg::invalid()
|
||||
Reg::from(VReg::invalid())
|
||||
}
|
||||
}
|
||||
impl InvalidSentinel for VirtualReg {
|
||||
fn invalid_sentinel() -> Self {
|
||||
VirtualReg::invalid()
|
||||
VirtualReg::from(VReg::invalid())
|
||||
}
|
||||
}
|
||||
impl InvalidSentinel for RealReg {
|
||||
fn invalid_sentinel() -> Self {
|
||||
RealReg::invalid()
|
||||
RealReg::from(PReg::invalid())
|
||||
}
|
||||
}
|
||||
impl InvalidSentinel for Writable<Reg> {
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user