Switch Cranelift over to regalloc2. (#3989)

This PR switches Cranelift over to the new register allocator, regalloc2.

See [this document](https://gist.github.com/cfallin/08553421a91f150254fe878f67301801)
for a summary of the design changes. This switchover has implications for
core VCode/MachInst types and the lowering pass.

Overall, this change brings improvements to both compile time and speed of
generated code (runtime), as reported in #3942:

```
Benchmark       Compilation (wallclock)     Execution (wallclock)
blake3-scalar   25% faster                  28% faster
blake3-simd     no diff                     no diff
meshoptimizer   19% faster                  17% faster
pulldown-cmark  17% faster                  no diff
bz2             15% faster                  no diff
SpiderMonkey,   21% faster                  2% faster
  fib(30)
clang.wasm      42% faster                  N/A
```
This commit is contained in:
Chris Fallin
2022-04-14 10:28:21 -07:00
committed by GitHub
parent bfae6384aa
commit a0318f36f0
181 changed files with 16887 additions and 21587 deletions

View File

@@ -5,7 +5,6 @@ use crate::ir::{Signature, StackSlot};
use crate::isa::CallConv;
use crate::machinst::*;
use crate::settings;
use regalloc::{Reg, Set, SpillSlot, Writable};
use smallvec::SmallVec;
/// A small vector of instructions (with some reasonable size); appropriate for
@@ -42,12 +41,6 @@ pub trait ABICallee {
/// Get the calling convention implemented by this ABI object.
fn call_conv(&self) -> CallConv;
/// Get the liveins of the function.
fn liveins(&self) -> Set<RealReg>;
/// Get the liveouts of the function.
fn liveouts(&self) -> Set<RealReg>;
/// Number of arguments.
fn num_args(&self) -> usize;
@@ -106,7 +99,7 @@ pub trait ABICallee {
fn set_num_spillslots(&mut self, slots: usize);
/// Update with the clobbered registers, post-regalloc.
fn set_clobbered(&mut self, clobbered: Set<Writable<RealReg>>);
fn set_clobbered(&mut self, clobbered: Vec<Writable<RealReg>>);
/// Get the address of a stackslot.
fn stackslot_addr(&self, slot: StackSlot, offset: u32, into_reg: Writable<Reg>) -> Self::I;

View File

@@ -125,6 +125,7 @@
use super::abi::*;
use crate::binemit::StackMap;
use crate::fx::FxHashSet;
use crate::ir::types::*;
use crate::ir::{ArgumentExtension, ArgumentPurpose, StackSlot};
use crate::machinst::*;
@@ -132,7 +133,6 @@ use crate::settings;
use crate::CodegenResult;
use crate::{ir, isa};
use alloc::vec::Vec;
use regalloc::{RealReg, Reg, RegClass, Set, SpillSlot, Writable};
use smallvec::{smallvec, SmallVec};
use std::convert::TryFrom;
use std::marker::PhantomData;
@@ -257,16 +257,6 @@ pub enum ArgsOrRets {
Rets,
}
/// Is an instruction returned by an ABI machine-specific backend a safepoint,
/// or not?
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum InstIsSafepoint {
/// The instruction is a safepoint.
Yes,
/// The instruction is not a safepoint.
No,
}
/// Abstract location for a machine-specific ABI impl to translate into the
/// appropriate addressing mode.
#[derive(Clone, Copy, Debug)]
@@ -319,11 +309,7 @@ pub trait ABIMachineSpec {
/// Returns word register class.
fn word_reg_class() -> RegClass {
match Self::word_bits() {
32 => RegClass::I32,
64 => RegClass::I64,
_ => unreachable!(),
}
RegClass::Int
}
/// Returns required stack alignment in bytes.
@@ -366,7 +352,7 @@ pub trait ABIMachineSpec {
) -> Self::I;
/// Generate a return instruction.
fn gen_ret() -> Self::I;
fn gen_ret(rets: Vec<Reg>) -> Self::I;
/// Generate an "epilogue placeholder" instruction, recognized by lowering
/// when using the Baldrdash ABI.
@@ -442,7 +428,7 @@ pub trait ABIMachineSpec {
/// contains the registers in a sorted order.
fn get_clobbered_callee_saves(
call_conv: isa::CallConv,
regs: &Set<Writable<RealReg>>,
regs: &[Writable<RealReg>],
) -> Vec<Writable<RealReg>>;
/// Determine whether it is necessary to generate the usual frame-setup
@@ -466,7 +452,7 @@ pub trait ABIMachineSpec {
call_conv: isa::CallConv,
setup_frame: bool,
flags: &settings::Flags,
clobbered_callee_saves: &Vec<Writable<RealReg>>,
clobbered_callee_saves: &[Writable<RealReg>],
fixed_frame_storage_size: u32,
outgoing_args_size: u32,
) -> (u64, SmallVec<[Self::I; 16]>);
@@ -478,7 +464,7 @@ pub trait ABIMachineSpec {
fn gen_clobber_restore(
call_conv: isa::CallConv,
flags: &settings::Flags,
clobbers: &Set<Writable<RealReg>>,
clobbers: &[Writable<RealReg>],
fixed_frame_storage_size: u32,
outgoing_args_size: u32,
) -> SmallVec<[Self::I; 16]>;
@@ -493,7 +479,7 @@ pub trait ABIMachineSpec {
tmp: Writable<Reg>,
callee_conv: isa::CallConv,
callee_conv: isa::CallConv,
) -> SmallVec<[(InstIsSafepoint, Self::I); 2]>;
) -> SmallVec<[Self::I; 2]>;
/// Generate a memcpy invocation. Used to set up struct args. May clobber
/// caller-save registers; we only memcpy before we start to set up args for
@@ -530,6 +516,7 @@ pub trait ABIMachineSpec {
}
/// ABI information shared between body (callee) and caller.
#[derive(Clone)]
struct ABISig {
/// Argument locations (regs or stack slots). Stack offsets are relative to
/// SP on entry to function.
@@ -604,7 +591,7 @@ pub struct ABICalleeImpl<M: ABIMachineSpec> {
/// Stack size to be reserved for outgoing arguments.
outgoing_args_size: u32,
/// Clobbered registers, from regalloc.
clobbered: Set<Writable<RealReg>>,
clobbered: Vec<Writable<RealReg>>,
/// Total number of spillslots, from regalloc.
spillslots: Option<usize>,
/// Storage allocated for the fixed part of the stack frame. This is
@@ -655,24 +642,13 @@ fn get_special_purpose_param_register(
let idx = f.signature.special_param_index(purpose)?;
match &abi.args[idx] {
&ABIArg::Slots { ref slots, .. } => match &slots[0] {
&ABIArgSlot::Reg { reg, .. } => Some(reg.to_reg()),
&ABIArgSlot::Reg { reg, .. } => Some(reg.into()),
_ => None,
},
_ => None,
}
}
fn ty_from_class(class: RegClass) -> Type {
match class {
RegClass::I32 => I32,
RegClass::I64 => I64,
RegClass::F32 => F32,
RegClass::F64 => F64,
RegClass::V128 => I8X16,
_ => panic!("Unknown regclass: {:?}", class),
}
}
impl<M: ABIMachineSpec> ABICalleeImpl<M> {
/// Create a new body ABI instance.
pub fn new(
@@ -739,7 +715,7 @@ impl<M: ABIMachineSpec> ABICalleeImpl<M> {
stackslots,
stackslots_size: stack_offset,
outgoing_args_size: 0,
clobbered: Set::empty(),
clobbered: vec![],
spillslots: None,
fixed_frame_storage_size: 0,
total_frame_size: None,
@@ -961,34 +937,6 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
self.sig.call_conv
}
fn liveins(&self) -> Set<RealReg> {
let mut set: Set<RealReg> = Set::empty();
for arg in &self.sig.args {
if let &ABIArg::Slots { ref slots, .. } = arg {
for slot in slots {
if let ABIArgSlot::Reg { reg, .. } = slot {
set.insert(*reg);
}
}
}
}
set
}
fn liveouts(&self) -> Set<RealReg> {
let mut set: Set<RealReg> = Set::empty();
for ret in &self.sig.rets {
if let &ABIArg::Slots { ref slots, .. } = ret {
for slot in slots {
if let ABIArgSlot::Reg { reg, .. } = slot {
set.insert(*reg);
}
}
}
}
set
}
fn num_args(&self) -> usize {
self.sig.args.len()
}
@@ -1019,7 +967,7 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
// Extension mode doesn't matter (we're copying out, not in; we
// ignore high bits by convention).
&ABIArgSlot::Reg { reg, ty, .. } => {
insts.push(M::gen_move(*into_reg, reg.to_reg(), ty));
insts.push(M::gen_move(*into_reg, reg.into(), ty));
}
&ABIArgSlot::Stack { offset, ty, .. } => {
insts.push(M::gen_load_stack(
@@ -1069,20 +1017,21 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
match &self.sig.rets[idx] {
&ABIArg::Slots { ref slots, .. } => {
assert_eq!(from_regs.len(), slots.len());
for (slot, from_reg) in slots.iter().zip(from_regs.regs().iter()) {
for (slot, &from_reg) in slots.iter().zip(from_regs.regs().iter()) {
match slot {
&ABIArgSlot::Reg {
reg, ty, extension, ..
} => {
let from_bits = ty_bits(ty) as u8;
let ext = M::get_ext_mode(self.sig.call_conv, extension);
let reg: Writable<Reg> = Writable::from_reg(Reg::from(reg));
match (ext, from_bits) {
(ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n)
if n < word_bits =>
{
let signed = ext == ArgumentExtension::Sext;
ret.push(M::gen_extend(
Writable::from_reg(reg.to_reg()),
reg,
from_reg.to_reg(),
signed,
from_bits,
@@ -1090,11 +1039,7 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
));
}
_ => {
ret.push(M::gen_move(
Writable::from_reg(reg.to_reg()),
from_reg.to_reg(),
ty,
));
ret.push(M::gen_move(reg, from_reg.to_reg(), ty));
}
};
}
@@ -1118,7 +1063,7 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
(ArgumentExtension::Uext, n) | (ArgumentExtension::Sext, n)
if n < word_bits =>
{
assert_eq!(M::word_reg_class(), from_reg.to_reg().get_class());
assert_eq!(M::word_reg_class(), from_reg.to_reg().class());
let signed = ext == ArgumentExtension::Sext;
ret.push(M::gen_extend(
Writable::from_reg(from_reg.to_reg()),
@@ -1166,7 +1111,22 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
}
fn gen_ret(&self) -> Self::I {
M::gen_ret()
let mut rets = vec![];
for ret in &self.sig.rets {
match ret {
ABIArg::Slots { slots, .. } => {
for slot in slots {
match slot {
ABIArgSlot::Reg { reg, .. } => rets.push(Reg::from(*reg)),
_ => {}
}
}
}
_ => {}
}
}
M::gen_ret(rets)
}
fn gen_epilogue_placeholder(&self) -> Self::I {
@@ -1177,7 +1137,7 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
self.spillslots = Some(slots);
}
fn set_clobbered(&mut self, clobbered: Set<Writable<RealReg>>) {
fn set_clobbered(&mut self, clobbered: Vec<Writable<RealReg>>) {
self.clobbered = clobbered;
}
@@ -1198,7 +1158,7 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
into_regs: ValueRegs<Writable<Reg>>,
) -> SmallInstVec<Self::I> {
// Offset from beginning of spillslot area, which is at nominal SP + stackslots_size.
let islot = slot.get() as i64;
let islot = slot.index() as i64;
let spill_off = islot * M::word_bytes() as i64;
let sp_off = self.stackslots_size as i64 + spill_off;
log::trace!("load_spillslot: slot {:?} -> sp_off {}", slot, sp_off);
@@ -1214,7 +1174,7 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
from_regs: ValueRegs<Reg>,
) -> SmallInstVec<Self::I> {
// Offset from beginning of spillslot area, which is at nominal SP + stackslots_size.
let islot = slot.get() as i64;
let islot = slot.index() as i64;
let spill_off = islot * M::word_bytes() as i64;
let sp_off = self.stackslots_size as i64 + spill_off;
log::trace!("store_spillslot: slot {:?} -> sp_off {}", slot, sp_off);
@@ -1245,7 +1205,7 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
let first_spillslot_word =
((self.stackslots_size + virtual_sp_offset as u32) / bytes) as usize;
for &slot in slots {
let slot = slot.get() as usize;
let slot = slot.index();
bits[first_spillslot_word + slot] = true;
}
@@ -1347,7 +1307,10 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
insts.extend(M::gen_epilogue_frame_restore(&self.flags));
}
insts.push(M::gen_ret());
// This `ret` doesn't need any return registers attached
// because we are post-regalloc and don't need to
// represent the implicit uses anymore.
insts.push(M::gen_ret(vec![]));
}
log::trace!("Epilogue: {:?}", insts);
@@ -1368,19 +1331,19 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
}
fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg) -> Self::I {
let ty = ty_from_class(from_reg.to_reg().get_class());
self.store_spillslot(to_slot, ty, ValueRegs::one(from_reg.to_reg()))
let ty = Self::I::canonical_type_for_rc(Reg::from(from_reg).class());
self.store_spillslot(to_slot, ty, ValueRegs::one(Reg::from(from_reg)))
.into_iter()
.next()
.unwrap()
}
fn gen_reload(&self, to_reg: Writable<RealReg>, from_slot: SpillSlot) -> Self::I {
let ty = ty_from_class(to_reg.to_reg().get_class());
let ty = Self::I::canonical_type_for_rc(to_reg.to_reg().class());
self.load_spillslot(
from_slot,
ty,
writable_value_regs(ValueRegs::one(to_reg.to_reg().to_reg())),
writable_value_regs(ValueRegs::one(Reg::from(to_reg.to_reg()))),
)
.into_iter()
.next()
@@ -1390,13 +1353,13 @@ impl<M: ABIMachineSpec> ABICallee for ABICalleeImpl<M> {
fn abisig_to_uses_and_defs<M: ABIMachineSpec>(sig: &ABISig) -> (Vec<Reg>, Vec<Writable<Reg>>) {
// Compute uses: all arg regs.
let mut uses = Vec::new();
let mut uses = FxHashSet::default();
for arg in &sig.args {
if let &ABIArg::Slots { ref slots, .. } = arg {
for slot in slots {
match slot {
&ABIArgSlot::Reg { reg, .. } => {
uses.push(reg.to_reg());
uses.insert(Reg::from(reg));
}
_ => {}
}
@@ -1405,13 +1368,15 @@ fn abisig_to_uses_and_defs<M: ABIMachineSpec>(sig: &ABISig) -> (Vec<Reg>, Vec<Wr
}
// Compute defs: all retval regs, and all caller-save (clobbered) regs.
let mut defs = M::get_regs_clobbered_by_call(sig.call_conv);
let mut defs: FxHashSet<_> = M::get_regs_clobbered_by_call(sig.call_conv)
.into_iter()
.collect();
for ret in &sig.rets {
if let &ABIArg::Slots { ref slots, .. } = ret {
for slot in slots {
match slot {
&ABIArgSlot::Reg { reg, .. } => {
defs.push(Writable::from_reg(reg.to_reg()));
defs.insert(Writable::from_reg(Reg::from(reg)));
}
_ => {}
}
@@ -1419,6 +1384,11 @@ fn abisig_to_uses_and_defs<M: ABIMachineSpec>(sig: &ABISig) -> (Vec<Reg>, Vec<Wr
}
}
let mut uses = uses.into_iter().collect::<Vec<_>>();
let mut defs = defs.into_iter().collect::<Vec<_>>();
uses.sort_unstable();
defs.sort_unstable();
(uses, defs)
}
@@ -1567,14 +1537,14 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
} => {
let ext = M::get_ext_mode(self.sig.call_conv, extension);
if ext != ir::ArgumentExtension::None && ty_bits(ty) < word_bits {
assert_eq!(word_rc, reg.get_class());
assert_eq!(word_rc, reg.class());
let signed = match ext {
ir::ArgumentExtension::Uext => false,
ir::ArgumentExtension::Sext => true,
_ => unreachable!(),
};
ctx.emit(M::gen_extend(
Writable::from_reg(reg.to_reg()),
Writable::from_reg(Reg::from(reg)),
*from_reg,
signed,
ty_bits(ty) as u8,
@@ -1582,7 +1552,7 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
));
} else {
ctx.emit(M::gen_move(
Writable::from_reg(reg.to_reg()),
Writable::from_reg(Reg::from(reg)),
*from_reg,
ty,
));
@@ -1597,7 +1567,7 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
let mut ty = ty;
let ext = M::get_ext_mode(self.sig.call_conv, extension);
if ext != ir::ArgumentExtension::None && ty_bits(ty) < word_bits {
assert_eq!(word_rc, from_reg.get_class());
assert_eq!(word_rc, from_reg.class());
let signed = match ext {
ir::ArgumentExtension::Uext => false,
ir::ArgumentExtension::Sext => true,
@@ -1680,7 +1650,7 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
// Extension mode doesn't matter because we're copying out, not in,
// and we ignore high bits in our own registers by convention.
&ABIArgSlot::Reg { reg, ty, .. } => {
ctx.emit(M::gen_move(*into_reg, reg.to_reg(), ty));
ctx.emit(M::gen_move(*into_reg, Reg::from(reg), ty));
}
&ABIArgSlot::Stack { offset, ty, .. } => {
let ret_area_base = self.sig.stack_arg_space;
@@ -1716,7 +1686,7 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
self.emit_copy_regs_to_arg(ctx, i, ValueRegs::one(rd.to_reg()));
}
let tmp = ctx.alloc_tmp(word_type).only_reg().unwrap();
for (is_safepoint, inst) in M::gen_call(
for inst in M::gen_call(
&self.dest,
uses,
defs,
@@ -1727,10 +1697,7 @@ impl<M: ABIMachineSpec> ABICaller for ABICallerImpl<M> {
)
.into_iter()
{
match is_safepoint {
InstIsSafepoint::Yes => ctx.emit_safepoint(inst),
InstIsSafepoint::No => ctx.emit(inst),
}
ctx.emit(inst);
}
}
}

View File

@@ -127,6 +127,9 @@ pub enum LoweredBlock {
/// to the next, i.e., corresponding to the included edge-block. This
/// will be an instruction in `block`.
edge_inst: Inst,
/// The successor index in this edge, to distinguish multiple
/// edges between the same block pair.
succ_idx: usize,
/// The successor CLIF block.
succ: Block,
},
@@ -138,6 +141,9 @@ pub enum LoweredBlock {
/// The edge (jump) instruction corresponding to the included
/// edge-block. This will be an instruction in `pred`.
edge_inst: Inst,
/// The successor index in this edge, to distinguish multiple
/// edges between the same block pair.
succ_idx: usize,
/// The original CLIF block included in this lowered block.
block: Block,
},
@@ -150,6 +156,9 @@ pub enum LoweredBlock {
/// The edge (jump) instruction corresponding to this edge's transition.
/// This will be an instruction in `pred`.
edge_inst: Inst,
/// The successor index in this edge, to distinguish multiple
/// edges between the same block pair.
succ_idx: usize,
/// The successor CLIF block.
succ: Block,
},
@@ -168,29 +177,34 @@ impl LoweredBlock {
}
/// The associated in-edge, if any.
#[cfg(test)]
pub fn in_edge(self) -> Option<(Block, Inst, Block)> {
match self {
LoweredBlock::EdgeAndOrig {
pred,
edge_inst,
block,
..
} => Some((pred, edge_inst, block)),
_ => None,
}
}
/// the associated out-edge, if any. Also includes edge-only blocks.
#[cfg(test)]
pub fn out_edge(self) -> Option<(Block, Inst, Block)> {
match self {
LoweredBlock::OrigAndEdge {
block,
edge_inst,
succ,
..
} => Some((block, edge_inst, succ)),
LoweredBlock::Edge {
pred,
edge_inst,
succ,
..
} => Some((pred, edge_inst, succ)),
_ => None,
}
@@ -207,15 +221,17 @@ impl BlockLoweringOrder {
let mut block_out_count = SecondaryMap::with_default(0);
// Cache the block successors to avoid re-examining branches below.
let mut block_succs: SmallVec<[(Inst, Block); 128]> = SmallVec::new();
let mut block_succs: SmallVec<[(Inst, usize, Block); 128]> = SmallVec::new();
let mut block_succ_range = SecondaryMap::with_default((0, 0));
let mut fallthrough_return_block = None;
for block in f.layout.blocks() {
let block_succ_start = block_succs.len();
let mut succ_idx = 0;
visit_block_succs(f, block, |inst, succ| {
block_out_count[block] += 1;
block_in_count[succ] += 1;
block_succs.push((inst, succ));
block_succs.push((inst, succ_idx, succ));
succ_idx += 1;
});
let block_succ_end = block_succs.len();
block_succ_range[block] = (block_succ_start, block_succ_end);
@@ -262,13 +278,14 @@ impl BlockLoweringOrder {
// At an orig block; successors are always edge blocks,
// possibly with orig blocks following.
let range = block_succ_range[block];
for &(edge_inst, succ) in &block_succs[range.0..range.1] {
for &(edge_inst, succ_idx, succ) in &block_succs[range.0..range.1] {
if block_in_count[succ] == 1 {
ret.push((
edge_inst,
LoweredBlock::EdgeAndOrig {
pred: block,
edge_inst,
succ_idx,
block: succ,
},
));
@@ -278,6 +295,7 @@ impl BlockLoweringOrder {
LoweredBlock::Edge {
pred: block,
edge_inst,
succ_idx,
succ,
},
));
@@ -298,12 +316,13 @@ impl BlockLoweringOrder {
// implicit return succ).
if range.1 - range.0 > 0 {
debug_assert!(range.1 - range.0 == 1);
let (succ_edge_inst, succ_succ) = block_succs[range.0];
let (succ_edge_inst, succ_succ_idx, succ_succ) = block_succs[range.0];
ret.push((
edge_inst,
LoweredBlock::OrigAndEdge {
block: succ,
edge_inst: succ_edge_inst,
succ_idx: succ_succ_idx,
succ: succ_succ,
},
));
@@ -395,7 +414,7 @@ impl BlockLoweringOrder {
let mut lowered_succ_ranges = vec![];
let mut lb_to_bindex = FxHashMap::default();
for (block, succ_range) in rpo.into_iter() {
let index = lowered_order.len() as BlockIndex;
let index = BlockIndex::new(lowered_order.len());
lb_to_bindex.insert(block, index);
lowered_order.push(block);
lowered_succ_ranges.push(succ_range);
@@ -416,7 +435,7 @@ impl BlockLoweringOrder {
let mut orig_map = SecondaryMap::with_default(None);
for (i, lb) in lowered_order.iter().enumerate() {
let i = i as BlockIndex;
let i = BlockIndex::new(i);
if let Some(b) = lb.orig_block() {
orig_map[b] = Some(i);
}
@@ -441,7 +460,7 @@ impl BlockLoweringOrder {
/// Get the successor indices for a lowered block.
pub fn succ_indices(&self, block: BlockIndex) -> &[(Inst, BlockIndex)] {
let range = self.lowered_succ_ranges[block as usize];
let range = self.lowered_succ_ranges[block.index()];
&self.lowered_succ_indices[range.0..range.1]
}

View File

@@ -269,7 +269,7 @@ impl MachLabel {
/// Get a label for a block. (The first N MachLabels are always reseved for
/// the N blocks in the vcode.)
pub fn from_block(bindex: BlockIndex) -> MachLabel {
MachLabel(bindex)
MachLabel(bindex.index() as u32)
}
/// Get the numeric label index.
@@ -334,7 +334,7 @@ impl<I: VCodeInst> MachBuffer<I> {
/// times, e.g. after calling `add_{cond,uncond}_branch()` and
/// before emitting branch bytes.
fn check_label_branch_invariants(&self) {
if !cfg!(debug_assertions) || cfg!(fuzzing) {
if !cfg!(fuzzing) {
return;
}
let cur_off = self.cur_offset();
@@ -489,12 +489,11 @@ impl<I: VCodeInst> MachBuffer<I> {
}
/// Reserve the first N MachLabels for blocks.
pub fn reserve_labels_for_blocks(&mut self, blocks: BlockIndex) {
pub fn reserve_labels_for_blocks(&mut self, blocks: usize) {
trace!("MachBuffer: first {} labels are for blocks", blocks);
debug_assert!(self.label_offsets.is_empty());
self.label_offsets
.resize(blocks as usize, UNKNOWN_LABEL_OFFSET);
self.label_aliases.resize(blocks as usize, UNKNOWN_LABEL);
self.label_offsets.resize(blocks, UNKNOWN_LABEL_OFFSET);
self.label_aliases.resize(blocks, UNKNOWN_LABEL);
// Post-invariant: as for `get_label()`.
}
@@ -1599,14 +1598,14 @@ impl MachBranch {
/// resolving labels internally in the buffer.
pub struct MachTextSectionBuilder<I: VCodeInst> {
buf: MachBuffer<I>,
next_func: u32,
next_func: usize,
force_veneers: bool,
}
impl<I: VCodeInst> MachTextSectionBuilder<I> {
pub fn new(num_funcs: u32) -> MachTextSectionBuilder<I> {
let mut buf = MachBuffer::new();
buf.reserve_labels_for_blocks(num_funcs);
buf.reserve_labels_for_blocks(num_funcs as usize);
MachTextSectionBuilder {
buf,
next_func: 0,
@@ -1627,7 +1626,8 @@ impl<I: VCodeInst> TextSectionBuilder for MachTextSectionBuilder<I> {
self.buf.align_to(align.unwrap_or(I::LabelUse::ALIGN));
let pos = self.buf.cur_offset();
if named {
self.buf.bind_label(MachLabel::from_block(self.next_func));
self.buf
.bind_label(MachLabel::from_block(BlockIndex::new(self.next_func)));
self.next_func += 1;
}
self.buf.put_data(func);
@@ -1635,7 +1635,7 @@ impl<I: VCodeInst> TextSectionBuilder for MachTextSectionBuilder<I> {
}
fn resolve_reloc(&mut self, offset: u64, reloc: Reloc, addend: Addend, target: u32) -> bool {
let label = MachLabel::from_block(target);
let label = MachLabel::from_block(BlockIndex::new(target as usize));
let offset = u32::try_from(offset).unwrap();
match I::LabelUse::from_reloc(reloc, addend) {
Some(label_use) => {
@@ -1652,7 +1652,7 @@ impl<I: VCodeInst> TextSectionBuilder for MachTextSectionBuilder<I> {
fn finish(&mut self) -> Vec<u8> {
// Double-check all functions were pushed.
assert_eq!(self.next_func, self.buf.label_offsets.len() as u32);
assert_eq!(self.next_func, self.buf.label_offsets.len());
// Finish up any veneers, if necessary.
self.buf
@@ -1675,7 +1675,7 @@ mod test {
use std::vec::Vec;
fn label(n: u32) -> MachLabel {
MachLabel::from_block(n)
MachLabel::from_block(BlockIndex::new(n as usize))
}
fn target(n: u32) -> BranchTarget {
BranchTarget::Label(label(n))
@@ -1690,7 +1690,7 @@ mod test {
buf.reserve_labels_for_blocks(2);
buf.bind_label(label(0));
let inst = Inst::Jump { dest: target(1) };
inst.emit(&mut buf, &info, &mut state);
inst.emit(&[], &mut buf, &info, &mut state);
buf.bind_label(label(1));
let buf = buf.finish();
assert_eq!(0, buf.total_size());
@@ -1710,15 +1710,15 @@ mod test {
taken: target(1),
not_taken: target(2),
};
inst.emit(&mut buf, &info, &mut state);
inst.emit(&[], &mut buf, &info, &mut state);
buf.bind_label(label(1));
let inst = Inst::Jump { dest: target(3) };
inst.emit(&mut buf, &info, &mut state);
inst.emit(&[], &mut buf, &info, &mut state);
buf.bind_label(label(2));
let inst = Inst::Jump { dest: target(3) };
inst.emit(&mut buf, &info, &mut state);
inst.emit(&[], &mut buf, &info, &mut state);
buf.bind_label(label(3));
@@ -1740,17 +1740,17 @@ mod test {
taken: target(1),
not_taken: target(2),
};
inst.emit(&mut buf, &info, &mut state);
inst.emit(&[], &mut buf, &info, &mut state);
buf.bind_label(label(1));
let inst = Inst::Udf {
trap_code: TrapCode::Interrupt,
};
inst.emit(&mut buf, &info, &mut state);
inst.emit(&[], &mut buf, &info, &mut state);
buf.bind_label(label(2));
let inst = Inst::Nop4;
inst.emit(&mut buf, &info, &mut state);
inst.emit(&[], &mut buf, &info, &mut state);
buf.bind_label(label(3));
@@ -1762,9 +1762,9 @@ mod test {
kind: CondBrKind::NotZero(xreg(0)),
trap_code: TrapCode::Interrupt,
};
inst.emit(&mut buf2, &info, &mut state);
inst.emit(&[], &mut buf2, &info, &mut state);
let inst = Inst::Nop4;
inst.emit(&mut buf2, &info, &mut state);
inst.emit(&[], &mut buf2, &info, &mut state);
let buf2 = buf2.finish();
@@ -1785,7 +1785,7 @@ mod test {
taken: target(2),
not_taken: target(3),
};
inst.emit(&mut buf, &info, &mut state);
inst.emit(&[], &mut buf, &info, &mut state);
buf.bind_label(label(1));
while buf.cur_offset() < 2000000 {
@@ -1793,16 +1793,16 @@ mod test {
buf.emit_island(0);
}
let inst = Inst::Nop4;
inst.emit(&mut buf, &info, &mut state);
inst.emit(&[], &mut buf, &info, &mut state);
}
buf.bind_label(label(2));
let inst = Inst::Nop4;
inst.emit(&mut buf, &info, &mut state);
inst.emit(&[], &mut buf, &info, &mut state);
buf.bind_label(label(3));
let inst = Inst::Nop4;
inst.emit(&mut buf, &info, &mut state);
inst.emit(&[], &mut buf, &info, &mut state);
let buf = buf.finish();
@@ -1831,7 +1831,7 @@ mod test {
// go directly to the target.
not_taken: BranchTarget::ResolvedOffset(2000000 + 4 - 4),
};
inst.emit(&mut buf2, &info, &mut state);
inst.emit(&[], &mut buf2, &info, &mut state);
let buf2 = buf2.finish();
@@ -1848,16 +1848,16 @@ mod test {
buf.bind_label(label(0));
let inst = Inst::Nop4;
inst.emit(&mut buf, &info, &mut state);
inst.emit(&[], &mut buf, &info, &mut state);
buf.bind_label(label(1));
let inst = Inst::Nop4;
inst.emit(&mut buf, &info, &mut state);
inst.emit(&[], &mut buf, &info, &mut state);
buf.bind_label(label(2));
while buf.cur_offset() < 2000000 {
let inst = Inst::Nop4;
inst.emit(&mut buf, &info, &mut state);
inst.emit(&[], &mut buf, &info, &mut state);
}
buf.bind_label(label(3));
@@ -1866,7 +1866,7 @@ mod test {
taken: target(0),
not_taken: target(1),
};
inst.emit(&mut buf, &info, &mut state);
inst.emit(&[], &mut buf, &info, &mut state);
let buf = buf.finish();
@@ -1879,11 +1879,11 @@ mod test {
taken: BranchTarget::ResolvedOffset(8),
not_taken: BranchTarget::ResolvedOffset(4 - (2000000 + 4)),
};
inst.emit(&mut buf2, &info, &mut state);
inst.emit(&[], &mut buf2, &info, &mut state);
let inst = Inst::Jump {
dest: BranchTarget::ResolvedOffset(-(2000000 + 8)),
};
inst.emit(&mut buf2, &info, &mut state);
inst.emit(&[], &mut buf2, &info, &mut state);
let buf2 = buf2.finish();
@@ -1937,38 +1937,38 @@ mod test {
taken: target(1),
not_taken: target(2),
};
inst.emit(&mut buf, &info, &mut state);
inst.emit(&[], &mut buf, &info, &mut state);
buf.bind_label(label(1));
let inst = Inst::Jump { dest: target(3) };
inst.emit(&mut buf, &info, &mut state);
inst.emit(&[], &mut buf, &info, &mut state);
buf.bind_label(label(2));
let inst = Inst::Nop4;
inst.emit(&mut buf, &info, &mut state);
inst.emit(&mut buf, &info, &mut state);
inst.emit(&[], &mut buf, &info, &mut state);
inst.emit(&[], &mut buf, &info, &mut state);
let inst = Inst::Jump { dest: target(0) };
inst.emit(&mut buf, &info, &mut state);
inst.emit(&[], &mut buf, &info, &mut state);
buf.bind_label(label(3));
let inst = Inst::Jump { dest: target(4) };
inst.emit(&mut buf, &info, &mut state);
inst.emit(&[], &mut buf, &info, &mut state);
buf.bind_label(label(4));
let inst = Inst::Jump { dest: target(5) };
inst.emit(&mut buf, &info, &mut state);
inst.emit(&[], &mut buf, &info, &mut state);
buf.bind_label(label(5));
let inst = Inst::Jump { dest: target(7) };
inst.emit(&mut buf, &info, &mut state);
inst.emit(&[], &mut buf, &info, &mut state);
buf.bind_label(label(6));
let inst = Inst::Nop4;
inst.emit(&mut buf, &info, &mut state);
inst.emit(&[], &mut buf, &info, &mut state);
buf.bind_label(label(7));
let inst = Inst::Ret;
inst.emit(&mut buf, &info, &mut state);
let inst = Inst::Ret { rets: vec![] };
inst.emit(&[], &mut buf, &info, &mut state);
let buf = buf.finish();
@@ -2009,23 +2009,23 @@ mod test {
buf.bind_label(label(0));
let inst = Inst::Jump { dest: target(1) };
inst.emit(&mut buf, &info, &mut state);
inst.emit(&[], &mut buf, &info, &mut state);
buf.bind_label(label(1));
let inst = Inst::Jump { dest: target(2) };
inst.emit(&mut buf, &info, &mut state);
inst.emit(&[], &mut buf, &info, &mut state);
buf.bind_label(label(2));
let inst = Inst::Jump { dest: target(3) };
inst.emit(&mut buf, &info, &mut state);
inst.emit(&[], &mut buf, &info, &mut state);
buf.bind_label(label(3));
let inst = Inst::Jump { dest: target(4) };
inst.emit(&mut buf, &info, &mut state);
inst.emit(&[], &mut buf, &info, &mut state);
buf.bind_label(label(4));
let inst = Inst::Jump { dest: target(1) };
inst.emit(&mut buf, &info, &mut state);
inst.emit(&[], &mut buf, &info, &mut state);
let buf = buf.finish();

View File

@@ -2,12 +2,11 @@
use crate::ir::Function;
use crate::isa::TargetIsa;
use crate::log::DeferredDisplay;
use crate::machinst::*;
use crate::settings;
use crate::timing;
use regalloc::{allocate_registers_with_opts, Algorithm, Options, PrettyPrint};
use regalloc2::RegallocOptions;
use regalloc2::{self, MachineEnv};
/// Compile the given function down to VCode with allocated registers, ready
/// for binary emission.
@@ -15,100 +14,38 @@ pub fn compile<B: LowerBackend + TargetIsa>(
f: &Function,
b: &B,
abi: Box<dyn ABICallee<I = B::MInst>>,
reg_universe: &RealRegUniverse,
machine_env: &MachineEnv,
emit_info: <B::MInst as MachInstEmit>::Info,
) -> CodegenResult<VCode<B::MInst>>
where
B::MInst: PrettyPrint,
{
) -> CodegenResult<(VCode<B::MInst>, regalloc2::Output)> {
// Compute lowered block order.
let block_order = BlockLoweringOrder::new(f);
// Build the lowering context.
let lower = Lower::new(f, abi, emit_info, block_order)?;
// Lower the IR.
let (mut vcode, stack_map_request_info) = {
let vcode = {
let _tt = timing::vcode_lower();
lower.lower(b)?
};
// Creating the vcode string representation may be costly for large functions, so defer its
// rendering.
log::trace!(
"vcode from lowering: \n{}",
DeferredDisplay::new(|| vcode.show_rru(Some(reg_universe)))
);
log::trace!("vcode from lowering: \n{:?}", vcode);
// Perform register allocation.
let (run_checker, algorithm) = match vcode.flags().regalloc() {
settings::Regalloc::Backtracking => (false, Algorithm::Backtracking(Default::default())),
settings::Regalloc::BacktrackingChecked => {
(true, Algorithm::Backtracking(Default::default()))
}
settings::Regalloc::ExperimentalLinearScan => {
(false, Algorithm::LinearScan(Default::default()))
}
settings::Regalloc::ExperimentalLinearScanChecked => {
(true, Algorithm::LinearScan(Default::default()))
}
};
#[cfg(feature = "regalloc-snapshot")]
{
use std::fs;
use std::path::Path;
if let Some(path) = std::env::var("SERIALIZE_REGALLOC").ok() {
let snapshot = regalloc::IRSnapshot::from_function(&vcode, reg_universe);
let serialized = bincode::serialize(&snapshot).expect("couldn't serialize snapshot");
let file_path = Path::new(&path).join(Path::new(&format!("ir{}.bin", f.name)));
fs::write(file_path, &serialized).expect("couldn't write IR snapshot file");
}
}
// If either there are no reference-typed values, or else there are
// but there are no safepoints at which we need to know about them,
// then we don't need stack maps.
let sri = if stack_map_request_info.reftyped_vregs.len() > 0
&& stack_map_request_info.safepoint_insns.len() > 0
{
Some(&stack_map_request_info)
} else {
None
};
let result = {
let regalloc_result = {
let _tt = timing::regalloc();
allocate_registers_with_opts(
&mut vcode,
reg_universe,
sri,
Options {
run_checker,
algorithm,
},
)
.map_err(|err| {
log::error!(
"Register allocation error for vcode\n{}\nError: {:?}",
vcode.show_rru(Some(reg_universe)),
let mut options = RegallocOptions::default();
options.verbose_log = log::log_enabled!(log::Level::Trace);
regalloc2::run(&vcode, machine_env, &options)
.map_err(|err| {
log::error!(
"Register allocation error for vcode\n{:?}\nError: {:?}\nCLIF for error:\n{:?}",
vcode,
err,
f,
);
err
);
err
})
.expect("register allocation")
})
.expect("register allocation")
};
// Reorder vcode into final order and copy out final instruction sequence
// all at once. This also inserts prologues/epilogues.
{
let _tt = timing::vcode_post_ra();
vcode.replace_insns_from_regalloc(result);
}
log::trace!(
"vcode after regalloc: final version:\n{}",
DeferredDisplay::new(|| vcode.show_rru(Some(reg_universe)))
);
Ok(vcode)
Ok((vcode, regalloc_result))
}

View File

@@ -1,525 +0,0 @@
//! Debug info analysis: computes value-label ranges from value-label markers in
//! generated VCode.
//!
//! We "reverse-engineer" debug info like this because it is far more reliable
//! than generating it while emitting code and keeping it in sync.
//!
//! This works by (i) observing "value-label marker" instructions, which are
//! semantically just an assignment from a register to a "value label" (which
//! one can think of as another register; they represent, e.g., Wasm locals) at
//! a certain point in the code, and (ii) observing loads and stores to the
//! stack and register moves.
//!
//! We track, at every program point, the correspondence between each value
//! label and *all* locations in which it resides. E.g., if it is stored to the
//! stack, we remember that it is in both a register and the stack slot; but if
//! the register is later overwritten, then we have it just in the stack slot.
//! This allows us to avoid false-positives observing loads/stores that we think
//! are spillslots but really aren't.
//!
//! We do a standard forward dataflow analysis to compute this info.
use crate::ir::ValueLabel;
use crate::machinst::*;
use crate::value_label::{LabelValueLoc, ValueLabelsRanges, ValueLocRange};
use log::trace;
use regalloc::{Reg, RegUsageCollector};
use std::collections::{HashMap, HashSet};
use std::hash::Hash;
/// Location of a labeled value: in a register or in a stack slot. Note that a
/// value may live in more than one location; `AnalysisInfo` maps each
/// value-label to multiple `ValueLoc`s.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
enum ValueLoc {
Reg(Reg),
/// Nominal-SP offset.
Stack(i64),
}
impl From<ValueLoc> for LabelValueLoc {
fn from(v: ValueLoc) -> Self {
match v {
ValueLoc::Reg(r) => LabelValueLoc::Reg(r),
ValueLoc::Stack(off) => LabelValueLoc::SPOffset(off),
}
}
}
impl ValueLoc {
fn is_reg(self) -> bool {
match self {
ValueLoc::Reg(_) => true,
_ => false,
}
}
fn is_stack(self) -> bool {
match self {
ValueLoc::Stack(_) => true,
_ => false,
}
}
}
/// Mappings at one program point.
#[derive(Clone, Debug)]
struct AnalysisInfo {
/// Nominal SP relative to real SP. If `None`, then the offset is
/// indeterminate (i.e., we merged to the lattice 'bottom' element). This
/// should not happen in well-formed code.
nominal_sp_offset: Option<i64>,
/// Forward map from labeled values to sets of locations.
label_to_locs: HashMap<ValueLabel, HashSet<ValueLoc>>,
/// Reverse map for each register indicating the value it holds, if any.
reg_to_label: HashMap<Reg, ValueLabel>,
/// Reverse map for each stack offset indicating the value it holds, if any.
stack_to_label: HashMap<i64, ValueLabel>,
}
/// Get the registers written (mod'd or def'd) by a machine instruction.
fn get_inst_writes<M: MachInst>(m: &M) -> Vec<Reg> {
// TODO: expose this part of regalloc.rs's interface publicly.
let mut vecs = RegUsageCollector::get_empty_reg_vecs_test_framework_only(false);
let mut coll = RegUsageCollector::new(&mut vecs);
m.get_regs(&mut coll);
vecs.defs.extend(vecs.mods.into_iter());
vecs.defs
}
impl AnalysisInfo {
/// Create a new analysis state. This is the "top" lattice element at which
/// the fixpoint dataflow analysis starts.
fn new() -> Self {
AnalysisInfo {
nominal_sp_offset: Some(0),
label_to_locs: HashMap::new(),
reg_to_label: HashMap::new(),
stack_to_label: HashMap::new(),
}
}
/// Remove all locations for a given labeled value. Used when the labeled
/// value is redefined (so old values become stale).
fn clear_label(&mut self, label: ValueLabel) {
if let Some(locs) = self.label_to_locs.remove(&label) {
for loc in locs {
match loc {
ValueLoc::Reg(r) => {
self.reg_to_label.remove(&r);
}
ValueLoc::Stack(off) => {
self.stack_to_label.remove(&off);
}
}
}
}
}
/// Remove a label from a register, if any. Used, e.g., if the register is
/// overwritten.
fn clear_reg(&mut self, reg: Reg) {
if let Some(label) = self.reg_to_label.remove(&reg) {
if let Some(locs) = self.label_to_locs.get_mut(&label) {
locs.remove(&ValueLoc::Reg(reg));
}
}
}
/// Remove a label from a stack offset, if any. Used, e.g., when the stack
/// slot is overwritten.
fn clear_stack_off(&mut self, off: i64) {
if let Some(label) = self.stack_to_label.remove(&off) {
if let Some(locs) = self.label_to_locs.get_mut(&label) {
locs.remove(&ValueLoc::Stack(off));
}
}
}
/// Indicate that a labeled value is newly defined and its new value is in
/// `reg`.
fn def_label_at_reg(&mut self, label: ValueLabel, reg: Reg) {
self.clear_label(label);
self.label_to_locs
.entry(label)
.or_insert_with(|| HashSet::new())
.insert(ValueLoc::Reg(reg));
self.reg_to_label.insert(reg, label);
}
/// Process a store from a register to a stack slot (offset).
fn store_reg(&mut self, reg: Reg, off: i64) {
self.clear_stack_off(off);
if let Some(label) = self.reg_to_label.get(&reg) {
if let Some(locs) = self.label_to_locs.get_mut(label) {
locs.insert(ValueLoc::Stack(off));
}
self.stack_to_label.insert(off, *label);
}
}
/// Process a load from a stack slot (offset) to a register.
fn load_reg(&mut self, reg: Reg, off: i64) {
self.clear_reg(reg);
if let Some(&label) = self.stack_to_label.get(&off) {
if let Some(locs) = self.label_to_locs.get_mut(&label) {
locs.insert(ValueLoc::Reg(reg));
}
self.reg_to_label.insert(reg, label);
}
}
/// Process a move from one register to another.
fn move_reg(&mut self, to: Reg, from: Reg) {
self.clear_reg(to);
if let Some(&label) = self.reg_to_label.get(&from) {
if let Some(locs) = self.label_to_locs.get_mut(&label) {
locs.insert(ValueLoc::Reg(to));
}
self.reg_to_label.insert(to, label);
}
}
/// Update the analysis state w.r.t. an instruction's effects. Given the
/// state just before `inst`, this method updates `self` to be the state
/// just after `inst`.
fn step<M: MachInst>(&mut self, inst: &M) {
for write in get_inst_writes(inst) {
self.clear_reg(write);
}
if let Some((label, reg)) = inst.defines_value_label() {
self.def_label_at_reg(label, reg);
}
match inst.stack_op_info() {
Some(MachInstStackOpInfo::LoadNomSPOff(reg, offset)) => {
self.load_reg(reg, offset + self.nominal_sp_offset.unwrap());
}
Some(MachInstStackOpInfo::StoreNomSPOff(reg, offset)) => {
self.store_reg(reg, offset + self.nominal_sp_offset.unwrap());
}
Some(MachInstStackOpInfo::NomSPAdj(offset)) => {
if self.nominal_sp_offset.is_some() {
self.nominal_sp_offset = Some(self.nominal_sp_offset.unwrap() + offset);
}
}
_ => {}
}
if let Some((to, from)) = inst.is_move() {
let to = to.to_reg();
self.move_reg(to, from);
}
}
}
/// Trait used to implement the dataflow analysis' meet (intersect) function
/// onthe `AnalysisInfo` components. For efficiency, this is implemented as a
/// mutation on the LHS, rather than a pure functional operation.
trait IntersectFrom {
fn intersect_from(&mut self, other: &Self) -> IntersectResult;
}
/// Result of an intersection operation. Indicates whether the mutated LHS
/// (which becomes the intersection result) differs from the original LHS. Also
/// indicates if the value has become "empty" and should be removed from a
/// parent container, if any.
struct IntersectResult {
/// Did the intersection change the LHS input (the one that was mutated into
/// the result)? This is needed to drive the fixpoint loop; when no more
/// changes occur, then we have converted.
changed: bool,
/// Is the resulting value "empty"? This can be used when a container, such
/// as a map, holds values of this (intersection result) type; when
/// `is_empty` is true for the merge of the values at a particular key, we
/// can remove that key from the merged (intersected) result. This is not
/// necessary for analysis correctness but reduces the memory and runtime
/// cost of the fixpoint loop.
is_empty: bool,
}
impl IntersectFrom for AnalysisInfo {
fn intersect_from(&mut self, other: &Self) -> IntersectResult {
let mut changed = false;
changed |= self
.nominal_sp_offset
.intersect_from(&other.nominal_sp_offset)
.changed;
changed |= self
.label_to_locs
.intersect_from(&other.label_to_locs)
.changed;
changed |= self
.reg_to_label
.intersect_from(&other.reg_to_label)
.changed;
changed |= self
.stack_to_label
.intersect_from(&other.stack_to_label)
.changed;
IntersectResult {
changed,
is_empty: false,
}
}
}
impl<K, V> IntersectFrom for HashMap<K, V>
where
K: Copy + Eq + Hash,
V: IntersectFrom,
{
/// Intersection for hashmap: remove keys that are not in both inputs;
/// recursively intersect values for keys in common.
fn intersect_from(&mut self, other: &Self) -> IntersectResult {
let mut changed = false;
let mut remove_keys = vec![];
for k in self.keys() {
if !other.contains_key(k) {
remove_keys.push(*k);
}
}
for k in &remove_keys {
changed = true;
self.remove(k);
}
remove_keys.clear();
for k in other.keys() {
if let Some(v) = self.get_mut(k) {
let result = v.intersect_from(other.get(k).unwrap());
changed |= result.changed;
if result.is_empty {
remove_keys.push(*k);
}
}
}
for k in &remove_keys {
changed = true;
self.remove(k);
}
IntersectResult {
changed,
is_empty: self.len() == 0,
}
}
}
impl<T> IntersectFrom for HashSet<T>
where
T: Copy + Eq + Hash,
{
/// Intersection for hashset: just take the set intersection.
fn intersect_from(&mut self, other: &Self) -> IntersectResult {
let mut changed = false;
let mut remove = vec![];
for val in self.iter() {
if !other.contains(val) {
remove.push(*val);
}
}
for val in remove {
changed = true;
self.remove(&val);
}
IntersectResult {
changed,
is_empty: self.len() == 0,
}
}
}
impl IntersectFrom for ValueLabel {
// Intersection for labeled value: remove if not equal. This is equivalent
// to a three-level lattice with top, bottom, and unordered set of
// individual labels in between.
fn intersect_from(&mut self, other: &Self) -> IntersectResult {
IntersectResult {
changed: false,
is_empty: *self != *other,
}
}
}
impl<T> IntersectFrom for Option<T>
where
T: Copy + Eq,
{
/// Intersectino for Option<T>: recursively intersect if both `Some`, else
/// `None`.
fn intersect_from(&mut self, other: &Self) -> IntersectResult {
let mut changed = false;
if !(self.is_some() && other.is_some() && self == other) {
changed = true;
*self = None;
}
IntersectResult {
changed,
is_empty: self.is_none(),
}
}
}
/// Compute the value-label ranges (locations for program-point ranges for
/// labeled values) from a given `VCode` compilation result.
///
/// In order to compute this information, we perform a dataflow analysis on the
/// machine code. To do so, and translate the results into a form usable by the
/// debug-info consumers, we need to know two additional things:
///
/// - The machine-code layout (code offsets) of the instructions. DWARF is
/// encoded in terms of instruction *ends* (and we reason about value
/// locations at program points *after* instructions, to match this), so we
/// take an array `inst_ends`, giving us code offsets for each instruction's
/// end-point. (Note that this is one *past* the last byte; so a 4-byte
/// instruction at offset 0 has an end offset of 4.)
///
/// - The locations of the labels to which branches will jump. Branches can tell
/// us about their targets in terms of `MachLabel`s, but we don't know where
/// those `MachLabel`s will be placed in the linear array of instructions. We
/// take the array `label_insn_index` to provide this info: for a label with
/// index `l`, `label_insn_index[l]` is the index of the instruction before
/// which that label is bound.
pub(crate) fn compute<I: VCodeInst>(
insts: &[I],
layout_info: &InstsLayoutInfo,
) -> ValueLabelsRanges {
let inst_start = |idx: usize| {
if idx == 0 {
0
} else {
layout_info.inst_end_offsets[idx - 1]
}
};
trace!("compute: insts =");
for i in 0..insts.len() {
trace!(
" #{} end: {} -> {:?}",
i,
layout_info.inst_end_offsets[i],
insts[i]
);
}
trace!("label_insn_index: {:?}", layout_info.label_inst_indices);
// Info at each block head, indexed by label.
let mut block_starts: HashMap<u32, AnalysisInfo> = HashMap::new();
// Initialize state at entry.
block_starts.insert(0, AnalysisInfo::new());
// Worklist: label indices for basic blocks.
let mut worklist = Vec::new();
let mut worklist_set = HashSet::new();
worklist.push(0);
worklist_set.insert(0);
while !worklist.is_empty() {
let block = worklist.pop().unwrap();
worklist_set.remove(&block);
let mut state = block_starts.get(&block).unwrap().clone();
trace!("at block {} -> state: {:?}", block, state);
// Iterate for each instruction in the block (we break at the first
// terminator we see).
let mut index = layout_info.label_inst_indices[block as usize];
while index < insts.len() as u32 {
state.step(&insts[index as usize]);
trace!(" -> inst #{}: {:?}", index, insts[index as usize]);
trace!(" --> state: {:?}", state);
let term = insts[index as usize].is_term();
if term.is_term() {
for succ in term.get_succs() {
trace!(" SUCCESSOR block {}", succ.get());
if let Some(succ_state) = block_starts.get_mut(&succ.get()) {
trace!(" orig state: {:?}", succ_state);
if succ_state.intersect_from(&state).changed {
if worklist_set.insert(succ.get()) {
worklist.push(succ.get());
}
trace!(" (changed)");
}
trace!(" new state: {:?}", succ_state);
} else {
// First time seeing this block
block_starts.insert(succ.get(), state.clone());
worklist.push(succ.get());
worklist_set.insert(succ.get());
}
}
break;
}
index += 1;
}
}
// Now iterate over blocks one last time, collecting
// value-label locations.
let mut value_labels_ranges: ValueLabelsRanges = HashMap::new();
for block in 0..layout_info.label_inst_indices.len() {
let start_index = layout_info.label_inst_indices[block];
let end_index = if block == layout_info.label_inst_indices.len() - 1 {
insts.len() as u32
} else {
layout_info.label_inst_indices[block + 1]
};
let block = block as u32;
let mut state = block_starts.get(&block).unwrap().clone();
for index in start_index..end_index {
let offset = inst_start(index as usize);
let end = layout_info.inst_end_offsets[index as usize];
// Cold blocks cause instructions to occur out-of-order wrt
// others. We rely on the monotonic mapping from instruction
// index to offset in machine code for this analysis to work,
// so we just skip debuginfo for cold blocks. This should be
// generally fine, as cold blocks generally constitute
// slowpaths for expansions of particular ops, rather than
// user-written code.
if layout_info.start_of_cold_code.is_some()
&& offset >= layout_info.start_of_cold_code.unwrap()
{
continue;
}
assert!(offset <= end);
state.step(&insts[index as usize]);
for (label, locs) in &state.label_to_locs {
trace!(" inst {} has label {:?} -> locs {:?}", index, label, locs);
// Find an appropriate loc: a register if possible, otherwise pick the first stack
// loc.
let reg = locs.iter().cloned().find(|l| l.is_reg());
let loc = reg.or_else(|| locs.iter().cloned().find(|l| l.is_stack()));
if let Some(loc) = loc {
let loc = LabelValueLoc::from(loc);
let list = value_labels_ranges.entry(*label).or_insert_with(|| vec![]);
// If the existing location list for this value-label is
// either empty, or has an end location that does not extend
// to the current offset, then we have to append a new
// entry. Otherwise, we can extend the current entry.
//
// Note that `end` is one past the end of the instruction;
// it appears that `end` is exclusive, so a mapping valid at
// offset 5 will have start = 5, end = 6.
if list
.last()
.map(|last| last.end <= offset || last.loc != loc)
.unwrap_or(true)
{
list.push(ValueLocRange {
loc,
start: end,
end: end + 1,
});
} else {
list.last_mut().unwrap().end = end + 1;
}
}
}
}
}
trace!("ret: {:?}", value_labels_ranges);
value_labels_ranges
}

View File

@@ -1,8 +1,8 @@
//! Miscellaneous helpers for machine backends.
use super::{InsnOutput, LowerCtx, VCodeInst, ValueRegs};
use super::{Reg, Writable};
use crate::ir::Type;
use regalloc::{Reg, Writable};
use std::ops::{Add, BitAnd, Not, Sub};
/// Returns the size (in bits) of a given type.

View File

@@ -1,8 +1,7 @@
use crate::ir::{types, Inst, Value, ValueList};
use crate::machinst::{get_output_reg, InsnOutput, LowerCtx, MachInst, RegRenamer};
use crate::machinst::{get_output_reg, InsnOutput, LowerCtx, Reg, Writable};
use alloc::boxed::Box;
use alloc::vec::Vec;
use regalloc::{Reg, Writable};
use smallvec::SmallVec;
use std::cell::Cell;
@@ -107,7 +106,8 @@ macro_rules! isle_prelude_methods {
#[inline]
fn invalid_reg(&mut self) -> Reg {
Reg::invalid()
use crate::machinst::valueregs::InvalidSentinel;
Reg::invalid_sentinel()
}
#[inline]
@@ -467,7 +467,6 @@ where
pub lower_ctx: &'a mut C,
pub flags: &'a F,
pub isa_flags: &'a I,
pub emitted_insts: SmallVec<[(C::I, bool); N]>,
}
/// Shared lowering code amongst all backends for doing ISLE-based lowering.
@@ -482,7 +481,6 @@ pub(crate) fn lower_common<C, F, I, IF, const N: usize>(
outputs: &[InsnOutput],
inst: Inst,
isle_lower: IF,
map_regs: fn(&mut C::I, &RegRenamer),
) -> Result<(), ()>
where
C: LowerCtx,
@@ -495,7 +493,6 @@ where
lower_ctx,
flags,
isa_flags,
emitted_insts: SmallVec::new(),
};
let temp_regs = isle_lower(&mut isle_ctx, inst).ok_or(())?;
@@ -514,10 +511,15 @@ where
}
// The ISLE generated code emits its own registers to define the
// instruction's lowered values in. We rename those registers to the
// registers they were assigned when their value was used as an operand in
// earlier lowerings.
let mut renamer = RegRenamer::default();
// instruction's lowered values in. However, other instructions
// that use this SSA value will be lowered assuming that the value
// is generated into a pre-assigned, different, register.
//
// To connect the two, we set up "aliases" in the VCodeBuilder
// that apply when it is building the Operand table for the
// regalloc to use. These aliases effectively rewrite any use of
// the pre-assigned register to the register that was returned by
// the ISLE lowering logic.
for i in 0..outputs.len() {
let regs = temp_regs[i];
let dsts = get_output_reg(isle_ctx.lower_ctx, outputs[i]);
@@ -528,41 +530,11 @@ where
// Flags values do not occupy any registers.
assert!(regs.len() == 0);
} else {
let (_, tys) = <C::I>::rc_for_type(ty).unwrap();
assert!(regs.len() == tys.len());
assert!(regs.len() == dsts.len());
for ((dst, temp), ty) in dsts.regs().iter().zip(regs.regs().iter()).zip(tys) {
renamer.add_rename(*temp, dst.to_reg(), *ty);
for (dst, temp) in dsts.regs().iter().zip(regs.regs().iter()) {
isle_ctx.lower_ctx.set_vreg_alias(dst.to_reg(), *temp);
}
}
}
for (inst, _) in isle_ctx.emitted_insts.iter_mut() {
map_regs(inst, &renamer);
}
// If any renamed register wasn't actually defined in the ISLE-generated
// instructions then what we're actually doing is "renaming" an input to a
// new name which requires manually inserting a `mov` instruction. Note that
// this typically doesn't happen and is only here for cases where the input
// is sometimes passed through unmodified to the output, such as
// zero-extending a 64-bit input to a 128-bit output which doesn't actually
// change the input and simply produces another zero'd register.
for (old, new, ty) in renamer.unmapped_defs() {
isle_ctx
.lower_ctx
.emit(<C::I>::gen_move(Writable::from_reg(new), old, ty));
}
// Once everything is remapped we forward all emitted instructions to the
// `lower_ctx`. Note that this happens after the synthetic mov's above in
// case any of these instruction use those movs.
for (inst, is_safepoint) in isle_ctx.emitted_insts {
if is_safepoint {
lower_ctx.emit_safepoint(inst);
} else {
lower_ctx.emit(inst);
}
}
Ok(())
}

View File

@@ -18,17 +18,19 @@ use crate::ir::{
};
use crate::machinst::{
non_writable_value_regs, writable_value_regs, ABICallee, BlockIndex, BlockLoweringOrder,
LoweredBlock, MachLabel, VCode, VCodeBuilder, VCodeConstant, VCodeConstantData, VCodeConstants,
VCodeInst, ValueRegs,
LoweredBlock, MachLabel, Reg, VCode, VCodeBuilder, VCodeConstant, VCodeConstantData,
VCodeConstants, VCodeInst, ValueRegs, Writable,
};
use crate::CodegenResult;
use alloc::boxed::Box;
use alloc::vec::Vec;
use core::convert::TryInto;
use regalloc::{Reg, StackmapRequestInfo, Writable};
use regalloc2::VReg;
use smallvec::{smallvec, SmallVec};
use std::fmt::Debug;
use super::{first_user_vreg_index, VCodeBuildDirection};
/// An "instruction color" partitions CLIF instructions by side-effecting ops.
/// All instructions with the same "color" are guaranteed not to be separated by
/// any side-effecting op (for this purpose, loads are also considered
@@ -160,8 +162,6 @@ pub trait LowerCtx {
fn alloc_tmp(&mut self, ty: Type) -> ValueRegs<Writable<Reg>>;
/// Emit a machine instruction.
fn emit(&mut self, mach_inst: Self::I);
/// Emit a machine instruction that is a safepoint.
fn emit_safepoint(&mut self, mach_inst: Self::I);
/// Indicate that the side-effect of an instruction has been sunk to the
/// current scan location. This should only be done with the instruction's
/// original results are not used (i.e., `put_input_in_regs` is not invoked
@@ -178,6 +178,9 @@ pub trait LowerCtx {
/// Cause the value in `reg` to be in a virtual reg, by copying it into a new virtual reg
/// if `reg` is a real reg. `ty` describes the type of the value in `reg`.
fn ensure_in_vreg(&mut self, reg: Reg, ty: Type) -> Reg;
/// Note that one vreg is to be treated as an alias of another.
fn set_vreg_alias(&mut self, from: Reg, to: Reg);
}
/// A representation of all of the ways in which a value is available, aside
@@ -232,14 +235,6 @@ pub trait LowerBackend {
}
}
/// A pending instruction to insert and auxiliary information about it: its source location and
/// whether it is a safepoint.
struct InstTuple<I: VCodeInst> {
loc: SourceLoc,
is_safepoint: bool,
inst: I,
}
/// Machine-independent lowering driver / machine-instruction container. Maintains a correspondence
/// from original Inst to MachInsts.
pub struct Lower<'func, I: VCodeInst> {
@@ -287,20 +282,10 @@ pub struct Lower<'func, I: VCodeInst> {
inst_sunk: FxHashSet<Inst>,
/// Next virtual register number to allocate.
next_vreg: u32,
/// Insts in reverse block order, before final copy to vcode.
block_insts: Vec<InstTuple<I>>,
/// Ranges in `block_insts` constituting BBs.
block_ranges: Vec<(usize, usize)>,
/// Instructions collected for the BB in progress, in reverse order, with
/// source-locs attached.
bb_insts: Vec<InstTuple<I>>,
next_vreg: usize,
/// Instructions collected for the CLIF inst in progress, in forward order.
ir_insts: Vec<InstTuple<I>>,
ir_insts: Vec<I>,
/// The register to use for GetPinnedReg, if any, on this architecture.
pinned_reg: Option<Reg>,
@@ -324,22 +309,22 @@ pub enum RelocDistance {
fn alloc_vregs<I: VCodeInst>(
ty: Type,
next_vreg: &mut u32,
next_vreg: &mut usize,
vcode: &mut VCodeBuilder<I>,
) -> CodegenResult<ValueRegs<Reg>> {
let v = *next_vreg;
let (regclasses, tys) = I::rc_for_type(ty)?;
*next_vreg += regclasses.len() as u32;
let regs = match regclasses {
&[rc0] => ValueRegs::one(Reg::new_virtual(rc0, v)),
&[rc0, rc1] => ValueRegs::two(Reg::new_virtual(rc0, v), Reg::new_virtual(rc1, v + 1)),
*next_vreg += regclasses.len();
let regs: ValueRegs<Reg> = match regclasses {
&[rc0] => ValueRegs::one(VReg::new(v, rc0).into()),
&[rc0, rc1] => ValueRegs::two(VReg::new(v, rc0).into(), VReg::new(v + 1, rc1).into()),
// We can extend this if/when we support 32-bit targets; e.g.,
// an i128 on a 32-bit machine will need up to four machine regs
// for a `Value`.
_ => panic!("Value must reside in 1 or 2 registers"),
};
for (&reg_ty, &reg) in tys.iter().zip(regs.regs().iter()) {
vcode.set_vreg_type(reg.to_virtual_reg(), reg_ty);
vcode.set_vreg_type(reg.to_virtual_reg().unwrap(), reg_ty);
}
Ok(regs)
}
@@ -358,9 +343,15 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
block_order: BlockLoweringOrder,
) -> CodegenResult<Lower<'func, I>> {
let constants = VCodeConstants::with_capacity(f.dfg.constants.len());
let mut vcode = VCodeBuilder::new(abi, emit_info, block_order, constants);
let mut vcode = VCodeBuilder::new(
abi,
emit_info,
block_order,
constants,
VCodeBuildDirection::Backward,
);
let mut next_vreg: u32 = 0;
let mut next_vreg: usize = first_user_vreg_index();
let mut value_regs = SecondaryMap::with_default(ValueRegs::invalid());
@@ -381,10 +372,11 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
let regs = alloc_vregs(ty, &mut next_vreg, &mut vcode)?;
value_regs[result] = regs;
log::trace!(
"bb {} inst {} ({:?}): result regs {:?}",
"bb {} inst {} ({:?}): result {} regs {:?}",
bb,
inst,
f.dfg[inst],
result,
regs,
);
}
@@ -459,9 +451,6 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
inst_sunk: FxHashSet::default(),
cur_scan_entry_color: None,
cur_inst: None,
block_insts: vec![],
block_ranges: vec![],
bb_insts: vec![],
ir_insts: vec![],
pinned_reg: None,
vm_context,
@@ -475,6 +464,12 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
entry_bb,
self.f.dfg.block_params(entry_bb)
);
// Make the vmctx available in debuginfo.
if let Some(vmctx_val) = self.f.special_param(ArgumentPurpose::VMContext) {
self.emit_value_label_marks_for_value(vmctx_val);
}
for (i, param) in self.f.dfg.block_params(entry_bb).iter().enumerate() {
if !self.vcode.abi().arg_is_needed_in_body(i) {
continue;
@@ -509,14 +504,6 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
}
fn gen_retval_setup(&mut self, gen_ret_inst: GenerateReturn) {
// Hack: to keep `vmctx` alive, if it exists, we emit a value label here
// for it if debug info is requested. This ensures that it exists either
// in a register or spillslot throughout the entire function body, and
// allows for a better debugging experience.
if let Some(vmctx_val) = self.f.special_param(ArgumentPurpose::VMContext) {
self.emit_value_label_marks_for_value(vmctx_val);
}
let retval_regs = self.retval_regs.clone();
for (i, regs) in retval_regs.into_iter().enumerate() {
let regs = writable_value_regs(regs);
@@ -534,141 +521,16 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
GenerateReturn::No => self.vcode.abi().gen_epilogue_placeholder(),
};
self.emit(inst);
}
fn lower_edge(&mut self, pred: Block, inst: Inst, succ: Block) -> CodegenResult<()> {
log::trace!("lower_edge: pred {} succ {}", pred, succ);
let num_args = self.f.dfg.block_params(succ).len();
debug_assert!(num_args == self.f.dfg.inst_variable_args(inst).len());
// Most blocks have no params, so skip all the hoop-jumping below and make an early exit.
if num_args == 0 {
return Ok(());
}
self.cur_inst = Some(inst);
// Make up two vectors of info:
//
// * one for dsts which are to be assigned constants. We'll deal with those second, so
// as to minimise live ranges.
//
// * one for dsts whose sources are non-constants.
let mut const_bundles: SmallVec<[_; 16]> = SmallVec::new();
let mut var_bundles: SmallVec<[_; 16]> = SmallVec::new();
let mut i = 0;
for (dst_val, src_val) in self
.f
.dfg
.block_params(succ)
.iter()
.zip(self.f.dfg.inst_variable_args(inst).iter())
{
let src_val = self.f.dfg.resolve_aliases(*src_val);
let ty = self.f.dfg.value_type(src_val);
debug_assert!(ty == self.f.dfg.value_type(*dst_val));
let dst_regs = self.value_regs[*dst_val];
let input = self.get_value_as_source_or_const(src_val);
log::trace!("jump arg {} is {}", i, src_val);
i += 1;
if let Some(c) = input.constant {
log::trace!(" -> constant {}", c);
const_bundles.push((ty, writable_value_regs(dst_regs), c));
} else {
let src_regs = self.put_value_in_regs(src_val);
log::trace!(" -> reg {:?}", src_regs);
// Skip self-assignments. Not only are they pointless, they falsely trigger the
// overlap-check below and hence can cause a lot of unnecessary copying through
// temporaries.
if dst_regs != src_regs {
var_bundles.push((ty, writable_value_regs(dst_regs), src_regs));
}
// Hack: generate a virtual instruction that uses vmctx in
// order to keep it alive for the duration of the function,
// for the benefit of debuginfo.
if self.f.dfg.values_labels.is_some() {
if let Some(vmctx_val) = self.f.special_param(ArgumentPurpose::VMContext) {
let vmctx_reg = self.value_regs[vmctx_val].only_reg().unwrap();
self.emit(I::gen_dummy_use(vmctx_reg));
}
}
// Deal first with the moves whose sources are variables.
// FIXME: use regalloc.rs' SparseSetU here. This would avoid all heap allocation
// for cases of up to circa 16 args. Currently not possible because regalloc.rs
// does not export it.
let mut src_reg_set = FxHashSet::<Reg>::default();
for (_, _, src_regs) in &var_bundles {
for &reg in src_regs.regs() {
src_reg_set.insert(reg);
}
}
let mut overlaps = false;
'outer: for (_, dst_regs, _) in &var_bundles {
for &reg in dst_regs.regs() {
if src_reg_set.contains(&reg.to_reg()) {
overlaps = true;
break 'outer;
}
}
}
// If, as is mostly the case, the source and destination register sets are non
// overlapping, then we can copy directly, so as to save the register allocator work.
if !overlaps {
for (ty, dst_regs, src_regs) in &var_bundles {
let (_, reg_tys) = I::rc_for_type(*ty)?;
for ((dst, src), reg_ty) in dst_regs
.regs()
.iter()
.zip(src_regs.regs().iter())
.zip(reg_tys.iter())
{
self.emit(I::gen_move(*dst, *src, *reg_ty));
}
}
} else {
// There's some overlap, so play safe and copy via temps.
let mut tmp_regs = SmallVec::<[ValueRegs<Writable<Reg>>; 16]>::new();
for (ty, _, _) in &var_bundles {
tmp_regs.push(self.alloc_tmp(*ty));
}
for ((ty, _, src_reg), tmp_reg) in var_bundles.iter().zip(tmp_regs.iter()) {
let (_, reg_tys) = I::rc_for_type(*ty)?;
for ((tmp, src), reg_ty) in tmp_reg
.regs()
.iter()
.zip(src_reg.regs().iter())
.zip(reg_tys.iter())
{
self.emit(I::gen_move(*tmp, *src, *reg_ty));
}
}
for ((ty, dst_reg, _), tmp_reg) in var_bundles.iter().zip(tmp_regs.iter()) {
let (_, reg_tys) = I::rc_for_type(*ty)?;
for ((dst, tmp), reg_ty) in dst_reg
.regs()
.iter()
.zip(tmp_reg.regs().iter())
.zip(reg_tys.iter())
{
self.emit(I::gen_move(*dst, tmp.to_reg(), *reg_ty));
}
}
}
// Now, finally, deal with the moves whose sources are constants.
for (ty, dst_reg, const_val) in &const_bundles {
for inst in I::gen_constant(*dst_reg, *const_val as u128, *ty, |ty| {
self.alloc_tmp(ty).only_reg().unwrap()
})
.into_iter()
{
self.emit(inst);
}
}
Ok(())
}
/// Has this instruction been sunk to a use-site (i.e., away from its
@@ -694,21 +556,24 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
self.cur_scan_entry_color = Some(self.block_end_colors[block]);
// Lowering loop:
// - For each non-branch instruction, in reverse order:
// - If side-effecting (load, store, branch/call/return, possible trap), or if
// used outside of this block, or if demanded by another inst, then lower.
// - If side-effecting (load, store, branch/call/return,
// possible trap), or if used outside of this block, or if
// demanded by another inst, then lower.
//
// That's it! Lowering of side-effecting ops will force all *needed*
// (live) non-side-effecting ops to be lowered at the right places, via
// the `use_input_reg()` callback on the `LowerCtx` (that's us). That's
// because `use_input_reg()` sets the eager/demand bit for any insts
// whose result registers are used.
// That's it! Lowering of side-effecting ops will force all
// *needed* (live) non-side-effecting ops to be lowered at the
// right places, via the `use_input_reg()` callback on the
// `LowerCtx` (that's us). That's because `use_input_reg()`
// sets the eager/demand bit for any insts whose result
// registers are used.
//
// We build up the BB in reverse instruction order in `bb_insts`.
// Because the machine backend calls `ctx.emit()` in forward order, we
// collect per-IR-inst lowered instructions in `ir_insts`, then reverse
// these and append to `bb_insts` as we go backward through the block.
// `bb_insts` are then reversed again and appended to the VCode at the
// end of the BB (in the toplevel driver `lower()`).
// We set the VCodeBuilder to "backward" mode, so we emit
// blocks in reverse order wrt the BlockIndex sequence, and
// emit instructions in reverse order within blocks. Because
// the machine backend calls `ctx.emit()` in forward order, we
// collect per-IR-inst lowered instructions in `ir_insts`,
// then reverse these and append to the VCode at the end of
// each IR instruction.
for inst in self.f.layout.block_insts(block).rev() {
let data = &self.f.dfg[inst];
let has_side_effect = has_lowering_side_effect(self.f, inst);
@@ -750,9 +615,6 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
if has_side_effect || value_needed {
log::trace!("lowering: inst {}: {:?}", inst, self.f.dfg[inst]);
backend.lower(self, inst)?;
// Emit value-label markers if needed, to later recover debug
// mappings.
self.emit_value_label_markers_for_inst(inst);
}
if data.opcode().is_return() {
// Return: handle specially, using ABI-appropriate sequence.
@@ -767,11 +629,33 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
let loc = self.srcloc(inst);
self.finish_ir_inst(loc);
// Emit value-label markers if needed, to later recover
// debug mappings. This must happen before the instruction
// (so after we emit, in bottom-to-top pass).
self.emit_value_label_markers_for_inst(inst);
}
// Add the block params to this block.
self.add_block_params(block)?;
self.cur_scan_entry_color = None;
Ok(())
}
fn add_block_params(&mut self, block: Block) -> CodegenResult<()> {
for &param in self.f.dfg.block_params(block) {
let ty = self.f.dfg.value_type(param);
let (_reg_rcs, reg_tys) = I::rc_for_type(ty)?;
debug_assert_eq!(reg_tys.len(), self.value_regs[param].len());
for (&reg, &rty) in self.value_regs[param].regs().iter().zip(reg_tys.iter()) {
self.vcode
.add_block_param(reg.to_virtual_reg().unwrap(), rty);
}
}
Ok(())
}
fn get_value_labels<'a>(&'a self, val: Value, depth: usize) -> Option<&'a [ValueLabelStart]> {
if let Some(ref values_labels) = self.f.dfg.values_labels {
log::trace!(
@@ -794,7 +678,6 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
}
fn emit_value_label_marks_for_value(&mut self, val: Value) {
let mut markers: SmallVec<[I; 4]> = smallvec![];
let regs = self.value_regs[val];
if regs.len() > 1 {
return;
@@ -813,12 +696,9 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
reg,
label,
);
markers.push(I::gen_value_label_marker(label, reg));
self.vcode.add_value_label(reg, label);
}
}
for marker in markers {
self.emit(marker);
}
}
fn emit_value_label_markers_for_inst(&mut self, inst: Inst) {
@@ -849,36 +729,17 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
}
fn finish_ir_inst(&mut self, loc: SourceLoc) {
// `bb_insts` is kept in reverse order, so emit the instructions in
// reverse order.
for mut tuple in self.ir_insts.drain(..).rev() {
tuple.loc = loc;
self.bb_insts.push(tuple);
self.vcode.set_srcloc(loc);
// The VCodeBuilder builds in reverse order (and reverses at
// the end), but `ir_insts` is in forward order, so reverse
// it.
for inst in self.ir_insts.drain(..).rev() {
self.vcode.push(inst);
}
}
fn finish_bb(&mut self) {
let start = self.block_insts.len();
for tuple in self.bb_insts.drain(..).rev() {
self.block_insts.push(tuple);
}
let end = self.block_insts.len();
self.block_ranges.push((start, end));
}
fn copy_bbs_to_vcode(&mut self) {
for &(start, end) in self.block_ranges.iter().rev() {
for &InstTuple {
loc,
is_safepoint,
ref inst,
} in &self.block_insts[start..end]
{
self.vcode.set_srcloc(loc);
self.vcode.push(inst.clone(), is_safepoint);
}
self.vcode.end_bb();
}
self.vcode.end_bb();
}
fn lower_clif_branches<B: LowerBackend<MInst = I>>(
@@ -900,9 +761,28 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
backend.lower_branch_group(self, branches, targets)?;
let loc = self.srcloc(branches[0]);
self.finish_ir_inst(loc);
// Add block param outputs for current block.
self.lower_branch_blockparam_args(block);
Ok(())
}
fn lower_branch_blockparam_args(&mut self, block: Block) {
visit_block_succs(self.f, block, |inst, _succ| {
let branch_args = self.f.dfg.inst_variable_args(inst);
let mut branch_arg_vregs: SmallVec<[Reg; 16]> = smallvec![];
for &arg in branch_args {
let arg = self.f.dfg.resolve_aliases(arg);
let regs = self.put_value_in_regs(arg);
for &vreg in regs.regs() {
let vreg = self.vcode.resolve_vreg_alias(vreg.into());
branch_arg_vregs.push(vreg.into());
}
}
self.vcode.add_branch_args_for_succ(&branch_arg_vregs[..]);
});
self.finish_ir_inst(SourceLoc::default());
}
fn collect_branches_and_targets(
&self,
bindex: BlockIndex,
@@ -927,10 +807,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
}
/// Lower the function.
pub fn lower<B: LowerBackend<MInst = I>>(
mut self,
backend: &B,
) -> CodegenResult<(VCode<I>, StackmapRequestInfo)> {
pub fn lower<B: LowerBackend<MInst = I>>(mut self, backend: &B) -> CodegenResult<VCode<I>> {
log::trace!("about to lower function: {:?}", self.f);
// Initialize the ABI object, giving it a temp if requested.
@@ -945,7 +822,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
// not the whole `Lower` impl).
self.pinned_reg = backend.maybe_pinned_reg();
self.vcode.set_entry(0);
self.vcode.set_entry(BlockIndex::new(0));
// Reused vectors for branch lowering.
let mut branches: SmallVec<[Inst; 2]> = SmallVec::new();
@@ -963,7 +840,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
// Main lowering loop over lowered blocks.
for (bindex, lb) in lowered_order.iter().enumerate().rev() {
let bindex = bindex as BlockIndex;
let bindex = BlockIndex::new(bindex);
// Lower the block body in reverse order (see comment in
// `lower_clif_block()` for rationale).
@@ -976,30 +853,41 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
self.finish_ir_inst(self.srcloc(branches[0]));
}
} else {
// If no orig block, this must be a pure edge block; get the successor and
// emit a jump.
// If no orig block, this must be a pure edge block;
// get the successor and emit a jump. Add block params
// according to the one successor, and pass them
// through; note that the successor must have an
// original block.
let (_, succ) = self.vcode.block_order().succ_indices(bindex)[0];
let orig_succ = lowered_order[succ.index()];
let orig_succ = orig_succ
.orig_block()
.expect("Edge block succ must be body block");
let mut branch_arg_vregs: SmallVec<[Reg; 16]> = smallvec![];
for ty in self.f.dfg.block_param_types(orig_succ) {
let regs = alloc_vregs(ty, &mut self.next_vreg, &mut self.vcode)?;
for &reg in regs.regs() {
branch_arg_vregs.push(reg);
let vreg = reg.to_virtual_reg().unwrap();
self.vcode
.add_block_param(vreg, self.vcode.get_vreg_type(vreg));
}
}
self.vcode.add_branch_args_for_succ(&branch_arg_vregs[..]);
self.emit(I::gen_jump(MachLabel::from_block(succ)));
self.finish_ir_inst(SourceLoc::default());
}
// Out-edge phi moves.
if let Some((pred, inst, succ)) = lb.out_edge() {
self.lower_edge(pred, inst, succ)?;
self.finish_ir_inst(SourceLoc::default());
}
// Original block body.
if let Some(bb) = lb.orig_block() {
self.lower_clif_block(backend, bb)?;
self.emit_value_label_markers_for_block_args(bb);
}
// In-edge phi moves.
if let Some((pred, inst, succ)) = lb.in_edge() {
self.lower_edge(pred, inst, succ)?;
self.finish_ir_inst(SourceLoc::default());
}
if bindex == 0 {
if bindex.index() == 0 {
// Set up the function with arg vreg inits.
self.gen_arg_setup();
self.finish_ir_inst(SourceLoc::default());
@@ -1008,13 +896,12 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
self.finish_bb();
}
self.copy_bbs_to_vcode();
// Now that we've emitted all instructions into the VCodeBuilder, let's build the VCode.
let (vcode, stack_map_info) = self.vcode.build();
// Now that we've emitted all instructions into the
// VCodeBuilder, let's build the VCode.
let vcode = self.vcode.build();
log::trace!("built vcode: {:?}", vcode);
Ok((vcode, stack_map_info))
Ok(vcode)
}
}
@@ -1278,19 +1165,8 @@ impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> {
}
fn emit(&mut self, mach_inst: I) {
self.ir_insts.push(InstTuple {
loc: SourceLoc::default(),
is_safepoint: false,
inst: mach_inst,
});
}
fn emit_safepoint(&mut self, mach_inst: I) {
self.ir_insts.push(InstTuple {
loc: SourceLoc::default(),
is_safepoint: true,
inst: mach_inst,
});
log::trace!("emit: {:?}", mach_inst);
self.ir_insts.push(mach_inst);
}
fn sink_inst(&mut self, ir_inst: Inst) {
@@ -1336,7 +1212,7 @@ impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> {
}
fn ensure_in_vreg(&mut self, reg: Reg, ty: Type) -> Reg {
if reg.is_virtual() {
if reg.to_virtual_reg().is_some() {
reg
} else {
let new_reg = self.alloc_tmp(ty).only_reg().unwrap();
@@ -1344,6 +1220,11 @@ impl<'func, I: VCodeInst> LowerCtx for Lower<'func, I> {
new_reg.to_reg()
}
}
fn set_vreg_alias(&mut self, from: Reg, to: Reg) {
log::trace!("set vreg alias: from {:?} to {:?}", from, to);
self.vcode.set_vreg_alias(from, to);
}
}
/// Visit all successors of a block with a given visitor closure.

View File

@@ -8,14 +8,10 @@
//!
//! The container for machine instructions, at various stages of construction,
//! is the `VCode` struct. We refer to a sequence of machine instructions organized
//! into basic blocks as "vcode". This is short for "virtual-register code", though
//! it's a bit of a misnomer because near the end of the pipeline, vcode has all
//! real registers. Nevertheless, the name is catchy and we like it.
//! into basic blocks as "vcode". This is short for "virtual-register code".
//!
//! The compilation pipeline, from an `ir::Function` (already optimized as much as
//! you like by machine-independent optimization passes) onward, is as follows.
//! (N.B.: though we show the VCode separately at each stage, the passes
//! mutate the VCode in place; these are not separate copies of the code.)
//!
//! ```plain
//!
@@ -31,37 +27,25 @@
//! | with unknown offsets.
//! | - critical edges (actually all edges)
//! | are split.)
//! | [regalloc]
//! |
//! VCode<arch_backend::Inst> (machine instructions:
//! | - all real registers.
//! | - new instruction sequence returned
//! | out-of-band in RegAllocResult.
//! | - instruction sequence has spills,
//! | reloads, and moves inserted.
//! | - other invariants same as above.)
//! | [regalloc --> `regalloc2::Output`; VCode is unchanged]
//! |
//! | [preamble/postamble]
//! | [binary emission via MachBuffer]
//! |
//! VCode<arch_backend::Inst> (machine instructions:
//! | - stack-frame size known.
//! | - out-of-band instruction sequence
//! | has preamble prepended to entry
//! | block, and postamble injected before
//! | every return instruction.
//! | - all symbolic stack references to
//! | stackslots and spillslots are resolved
//! | to concrete FP-offset mem addresses.)
//! |
//! | [binary emission via MachBuffer
//! | with streaming branch resolution/simplification]
//! |
//! Vec<u8> (machine code!)
//! Vec<u8> (machine code:
//! | - two-dest branches resolved via
//! | streaming branch resolution/simplification.
//! | - regalloc `Allocation` results used directly
//! | by instruction emission code.
//! | - prologue and epilogue(s) built and emitted
//! | directly during emission.
//! | - nominal-SP-relative offsets resolved
//! | by tracking EmitState.)
//!
//! ```
use crate::binemit::{Addend, CodeInfo, CodeOffset, Reloc, StackMap};
use crate::ir::{SourceLoc, StackSlot, Type, ValueLabel};
use crate::ir::{SourceLoc, StackSlot, Type};
use crate::result::CodegenResult;
use crate::settings::Flags;
use crate::value_label::ValueLabelsRanges;
@@ -69,10 +53,7 @@ use alloc::boxed::Box;
use alloc::vec::Vec;
use core::fmt::Debug;
use cranelift_entity::PrimaryMap;
use regalloc::RegUsageCollector;
use regalloc::{
RealReg, RealRegUniverse, Reg, RegClass, RegUsageMapper, SpillSlot, VirtualReg, Writable,
};
use regalloc2::{Allocation, VReg};
use smallvec::{smallvec, SmallVec};
use std::string::String;
@@ -98,20 +79,15 @@ pub use helpers::*;
pub mod inst_common;
pub use inst_common::*;
pub mod valueregs;
pub use reg::*;
pub use valueregs::*;
pub mod debug;
pub use regmapping::*;
pub mod regmapping;
pub mod reg;
/// A machine instruction.
pub trait MachInst: Clone + Debug {
/// Return the registers referenced by this machine instruction along with
/// the modes of reference (use, def, modify).
fn get_regs(&self, collector: &mut RegUsageCollector);
/// Map virtual registers to physical registers using the given virt->phys
/// maps corresponding to the program points prior to, and after, this instruction.
fn map_regs<RUM: RegUsageMapper>(&mut self, maps: &RUM);
fn get_operands<F: Fn(VReg) -> VReg>(&self, collector: &mut OperandCollector<'_, F>);
/// If this is a simple move, return the (source, destination) tuple of registers.
fn is_move(&self) -> Option<(Writable<Reg>, Reg)>;
@@ -128,11 +104,6 @@ pub trait MachInst: Clone + Debug {
true
}
/// If this is a load or store to the stack, return that info.
fn stack_op_info(&self) -> Option<MachInstStackOpInfo> {
None
}
/// Generate a move.
fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Self;
@@ -144,10 +115,9 @@ pub trait MachInst: Clone + Debug {
alloc_tmp: F,
) -> SmallVec<[Self; 4]>;
/// Possibly operate on a value directly in a spill-slot rather than a
/// register. Useful if the machine has register-memory instruction forms
/// (e.g., add directly from or directly to memory), like x86.
fn maybe_direct_reload(&self, reg: VirtualReg, slot: SpillSlot) -> Option<Self>;
/// Generate a dummy instruction that will keep a value alive but
/// has no other purpose.
fn gen_dummy_use(reg: Reg) -> Self;
/// Determine register class(es) to store the given Cranelift type, and the
/// Cranelift type actually stored in the underlying register(s). May return
@@ -163,6 +133,13 @@ pub trait MachInst: Clone + Debug {
/// generating spills and reloads for individual registers.
fn rc_for_type(ty: Type) -> CodegenResult<(&'static [RegClass], &'static [Type])>;
/// Get an appropriate type that can fully hold a value in a given
/// register class. This may not be the only type that maps to
/// that class, but when used with `gen_move()` or the ABI trait's
/// load/spill constructors, it should produce instruction(s) that
/// move the entire register contents.
fn canonical_type_for_rc(rc: RegClass) -> Type;
/// Generate a jump to another target. Used during lowering of
/// control flow.
fn gen_jump(target: MachLabel) -> Self;
@@ -187,16 +164,8 @@ pub trait MachInst: Clone + Debug {
/// be dependent on compilation flags.
fn ref_type_regclass(_flags: &Flags) -> RegClass;
/// Does this instruction define a ValueLabel? Returns the `Reg` whose value
/// becomes the new value of the `ValueLabel` after this instruction.
fn defines_value_label(&self) -> Option<(ValueLabel, Reg)> {
None
}
/// Create a marker instruction that defines a value label.
fn gen_value_label_marker(_label: ValueLabel, _reg: Reg) -> Self {
Self::gen_nop(0)
}
/// Is this a safepoint?
fn is_safepoint(&self) -> bool;
/// A label-use kind: a type that describes the types of label references that
/// can occur in an instruction.
@@ -266,35 +235,6 @@ pub enum MachTerminator<'a> {
Indirect(&'a [MachLabel]),
}
impl<'a> MachTerminator<'a> {
/// Get the successor labels named in a `MachTerminator`.
pub fn get_succs(&self) -> SmallVec<[MachLabel; 2]> {
let mut ret = smallvec![];
match self {
&MachTerminator::Uncond(l) => {
ret.push(l);
}
&MachTerminator::Cond(l1, l2) => {
ret.push(l1);
ret.push(l2);
}
&MachTerminator::Indirect(ls) => {
ret.extend(ls.iter().cloned());
}
_ => {}
}
ret
}
/// Is this a terminator?
pub fn is_term(&self) -> bool {
match self {
MachTerminator::None => false,
_ => true,
}
}
}
/// A trait describing the ability to encode a MachInst into binary machine code.
pub trait MachInstEmit: MachInst {
/// Persistent state carried across `emit` invocations.
@@ -302,9 +242,15 @@ pub trait MachInstEmit: MachInst {
/// Constant information used in `emit` invocations.
type Info;
/// Emit the instruction.
fn emit(&self, code: &mut MachBuffer<Self>, info: &Self::Info, state: &mut Self::State);
fn emit(
&self,
allocs: &[Allocation],
code: &mut MachBuffer<Self>,
info: &Self::Info,
state: &mut Self::State,
);
/// Pretty-print the instruction.
fn pretty_print(&self, mb_rru: Option<&RealRegUniverse>, state: &mut Self::State) -> String;
fn pretty_print_inst(&self, allocs: &[Allocation], state: &mut Self::State) -> String;
}
/// A trait describing the emission state carried between MachInsts when
@@ -409,15 +355,3 @@ pub enum UnwindInfoKind {
#[cfg(feature = "unwind")]
Windows,
}
/// Info about an operation that loads or stores from/to the stack.
#[derive(Clone, Copy, Debug)]
pub enum MachInstStackOpInfo {
/// Load from an offset from the nominal stack pointer into the given reg.
LoadNomSPOff(Reg, i64),
/// Store to an offset from the nominal stack pointer from the given reg.
StoreNomSPOff(Reg, i64),
/// Adjustment of nominal-SP up or down. This value is added to subsequent
/// offsets in loads/stores above to produce real-SP offsets.
NomSPAdj(i64),
}

View File

@@ -0,0 +1,504 @@
//! Definitions for registers, operands, etc. Provides a thin
//! interface over the register allocator so that we can more easily
//! swap it out or shim it when necessary.
use crate::machinst::MachInst;
use alloc::{string::String, vec::Vec};
use core::{fmt::Debug, hash::Hash};
use regalloc2::{Allocation, Operand, PReg, VReg};
use smallvec::{smallvec, SmallVec};
#[cfg(feature = "enable-serde")]
use serde::{Deserialize, Serialize};
/// The first 128 vregs (64 int, 64 float/vec) are "pinned" to
/// physical registers: this means that they are always constrained to
/// the corresponding register at all use/mod/def sites.
///
/// Arbitrary vregs can also be constrained to physical registers at
/// particular use/def/mod sites, and this is preferable; but pinned
/// vregs allow us to migrate code that has been written using
/// RealRegs directly.
const PINNED_VREGS: usize = 128;
/// Convert a `VReg` to its pinned `PReg`, if any.
pub fn pinned_vreg_to_preg(vreg: VReg) -> Option<PReg> {
if vreg.vreg() < PINNED_VREGS {
Some(PReg::from_index(vreg.vreg()))
} else {
None
}
}
/// Give the first available vreg for generated code (i.e., after all
/// pinned vregs).
pub fn first_user_vreg_index() -> usize {
// This is just the constant defined above, but we keep the
// constant private and expose only this helper function with the
// specific name in order to ensure other parts of the code don't
// open-code and depend on the index-space scheme.
PINNED_VREGS
}
/// A register named in an instruction. This register can be either a
/// virtual register or a fixed physical register. It does not have
/// any constraints applied to it: those can be added later in
/// `MachInst::get_operands()` when the `Reg`s are converted to
/// `Operand`s.
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub struct Reg(VReg);
impl Reg {
/// Get the physical register (`RealReg`), if this register is
/// one.
pub fn to_real_reg(self) -> Option<RealReg> {
if pinned_vreg_to_preg(self.0).is_some() {
Some(RealReg(self.0))
} else {
None
}
}
/// Get the virtual (non-physical) register, if this register is
/// one.
pub fn to_virtual_reg(self) -> Option<VirtualReg> {
if pinned_vreg_to_preg(self.0).is_none() {
Some(VirtualReg(self.0))
} else {
None
}
}
/// Get the class of this register.
pub fn class(self) -> RegClass {
self.0.class()
}
/// Is this a real (physical) reg?
pub fn is_real(self) -> bool {
self.to_real_reg().is_some()
}
/// Is this a virtual reg?
pub fn is_virtual(self) -> bool {
self.to_virtual_reg().is_some()
}
}
impl std::fmt::Debug for Reg {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
if let Some(rreg) = self.to_real_reg() {
let preg: PReg = rreg.into();
write!(f, "{}", preg)
} else if let Some(vreg) = self.to_virtual_reg() {
let vreg: VReg = vreg.into();
write!(f, "{}", vreg)
} else {
unreachable!()
}
}
}
/// A real (physical) register. This corresponds to one of the target
/// ISA's named registers and can be used as an instruction operand.
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub struct RealReg(VReg);
impl RealReg {
/// Get the class of this register.
pub fn class(self) -> RegClass {
self.0.class()
}
pub fn hw_enc(self) -> u8 {
PReg::from(self).hw_enc() as u8
}
}
impl std::fmt::Debug for RealReg {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
Reg::from(*self).fmt(f)
}
}
/// A virtual register. This can be allocated into a real (physical)
/// register of the appropriate register class, but which one is not
/// specified. Virtual registers are used when generating `MachInst`s,
/// before register allocation occurs, in order to allow us to name as
/// many register-carried values as necessary.
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub struct VirtualReg(VReg);
impl VirtualReg {
/// Get the class of this register.
pub fn class(self) -> RegClass {
self.0.class()
}
pub fn index(self) -> usize {
self.0.vreg()
}
}
impl std::fmt::Debug for VirtualReg {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
Reg::from(*self).fmt(f)
}
}
/// A type wrapper that indicates a register type is writable. The
/// underlying register can be extracted, and the type wrapper can be
/// built using an arbitrary register. Hence, this type-level wrapper
/// is not strictly a guarantee. However, "casting" to a writable
/// register is an explicit operation for which we can
/// audit. Ordinarily, internal APIs in the compiler backend should
/// take a `Writable<Reg>` whenever the register is written, and the
/// usual, frictionless way to get one of these is to allocate a new
/// temporary.
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub struct Writable<T: Clone + Copy + Debug + PartialEq + Eq + PartialOrd + Ord + Hash> {
reg: T,
}
impl<T: Clone + Copy + Debug + PartialEq + Eq + PartialOrd + Ord + Hash> Writable<T> {
/// Explicitly construct a `Writable<T>` from a `T`. As noted in
/// the documentation for `Writable`, this is not hidden or
/// disallowed from the outside; anyone can perform the "cast";
/// but it is explicit so that we can audit the use sites.
pub fn from_reg(reg: T) -> Writable<T> {
Writable { reg }
}
/// Get the underlying register, which can be read.
pub fn to_reg(self) -> T {
self.reg
}
/// Map the underlying register to another value or type.
pub fn map<U, F>(self, f: F) -> Writable<U>
where
U: Clone + Copy + Debug + PartialEq + Eq + PartialOrd + Ord + Hash,
F: Fn(T) -> U,
{
Writable { reg: f(self.reg) }
}
}
// Conversions between regalloc2 types (VReg) and our types
// (VirtualReg, RealReg, Reg).
impl std::convert::From<regalloc2::VReg> for Reg {
fn from(vreg: regalloc2::VReg) -> Reg {
Reg(vreg)
}
}
impl std::convert::From<regalloc2::VReg> for VirtualReg {
fn from(vreg: regalloc2::VReg) -> VirtualReg {
debug_assert!(pinned_vreg_to_preg(vreg).is_none());
VirtualReg(vreg)
}
}
impl std::convert::From<regalloc2::VReg> for RealReg {
fn from(vreg: regalloc2::VReg) -> RealReg {
debug_assert!(pinned_vreg_to_preg(vreg).is_some());
RealReg(vreg)
}
}
impl std::convert::From<Reg> for regalloc2::VReg {
/// Extract the underlying `regalloc2::VReg`. Note that physical
/// registers also map to particular (special) VRegs, so this
/// method can be used either on virtual or physical `Reg`s.
fn from(reg: Reg) -> regalloc2::VReg {
reg.0
}
}
impl std::convert::From<VirtualReg> for regalloc2::VReg {
fn from(reg: VirtualReg) -> regalloc2::VReg {
reg.0
}
}
impl std::convert::From<RealReg> for regalloc2::VReg {
fn from(reg: RealReg) -> regalloc2::VReg {
reg.0
}
}
impl std::convert::From<RealReg> for regalloc2::PReg {
fn from(reg: RealReg) -> regalloc2::PReg {
PReg::from_index(reg.0.vreg())
}
}
impl std::convert::From<regalloc2::PReg> for RealReg {
fn from(preg: regalloc2::PReg) -> RealReg {
RealReg(VReg::new(preg.index(), preg.class()))
}
}
impl std::convert::From<regalloc2::PReg> for Reg {
fn from(preg: regalloc2::PReg) -> Reg {
Reg(VReg::new(preg.index(), preg.class()))
}
}
impl std::convert::From<RealReg> for Reg {
fn from(reg: RealReg) -> Reg {
Reg(reg.0)
}
}
impl std::convert::From<VirtualReg> for Reg {
fn from(reg: VirtualReg) -> Reg {
Reg(reg.0)
}
}
/// A spill slot.
pub type SpillSlot = regalloc2::SpillSlot;
/// A register class. Each register in the ISA has one class, and the
/// classes are disjoint. Most modern ISAs will have just two classes:
/// the integer/general-purpose registers (GPRs), and the float/vector
/// registers (typically used for both).
///
/// Note that unlike some other compiler backend/register allocator
/// designs, we do not allow for overlapping classes, i.e. registers
/// that belong to more than one class, because doing so makes the
/// allocation problem significantly more complex. Instead, when a
/// register can be addressed under different names for different
/// sizes (for example), the backend author should pick classes that
/// denote some fundamental allocation unit that encompasses the whole
/// register. For example, always allocate 128-bit vector registers
/// `v0`..`vN`, even though `f32` and `f64` values may use only the
/// low 32/64 bits of those registers and name them differently.
pub type RegClass = regalloc2::RegClass;
/// An OperandCollector is a wrapper around a Vec of Operands
/// (flattened array for a whole sequence of instructions) that
/// gathers operands from a single instruction and provides the range
/// in the flattened array.
#[derive(Debug)]
pub struct OperandCollector<'a, F: Fn(VReg) -> VReg> {
operands: &'a mut Vec<Operand>,
operands_start: usize,
clobbers: Vec<PReg>,
renamer: F,
}
impl<'a, F: Fn(VReg) -> VReg> OperandCollector<'a, F> {
/// Start gathering operands into one flattened operand array.
pub fn new(operands: &'a mut Vec<Operand>, renamer: F) -> Self {
let operands_start = operands.len();
Self {
operands,
operands_start,
clobbers: vec![],
renamer,
}
}
/// Add an operand.
fn add_operand(&mut self, operand: Operand) {
let vreg = (self.renamer)(operand.vreg());
let operand = Operand::new(vreg, operand.constraint(), operand.kind(), operand.pos());
self.operands.push(operand);
}
/// Add a clobber.
fn add_clobber(&mut self, clobber: PReg) {
self.clobbers.push(clobber);
}
/// Finish the operand collection and return the tuple giving the
/// range of indices in the flattened operand array, and the
/// clobber array.
pub fn finish(self) -> ((u32, u32), Vec<PReg>) {
let start = self.operands_start as u32;
let end = self.operands.len() as u32;
((start, end), self.clobbers)
}
/// Add a register use, at the start of the instruction (`Before`
/// position).
pub fn reg_use(&mut self, reg: Reg) {
self.add_operand(Operand::reg_use(reg.into()));
}
/// Add multiple register uses.
pub fn reg_uses(&mut self, regs: &[Reg]) {
for &reg in regs {
self.reg_use(reg);
}
}
/// Add a register def, at the end of the instruction (`After`
/// position). Use only when this def will be written after all
/// uses are read.
pub fn reg_def(&mut self, reg: Writable<Reg>) {
self.add_operand(Operand::reg_def(reg.to_reg().into()));
}
/// Add multiple register defs.
pub fn reg_defs(&mut self, regs: &[Writable<Reg>]) {
for &reg in regs {
self.reg_def(reg);
}
}
/// Add a register "early def", which logically occurs at the
/// beginning of the instruction, alongside all uses. Use this
/// when the def may be written before all uses are read; the
/// regalloc will ensure that it does not overwrite any uses.
pub fn reg_early_def(&mut self, reg: Writable<Reg>) {
self.add_operand(Operand::reg_def_at_start(reg.to_reg().into()));
}
/// Add a register "fixed use", which ties a vreg to a particular
/// RealReg at this point.
pub fn reg_fixed_use(&mut self, reg: Reg, rreg: Reg) {
let rreg = rreg.to_real_reg().expect("fixed reg is not a RealReg");
self.add_operand(Operand::reg_fixed_use(reg.into(), rreg.into()));
}
/// Add a register "fixed def", which ties a vreg to a particular
/// RealReg at this point.
pub fn reg_fixed_def(&mut self, reg: Writable<Reg>, rreg: Reg) {
let rreg = rreg.to_real_reg().expect("fixed reg is not a RealReg");
self.add_operand(Operand::reg_fixed_def(reg.to_reg().into(), rreg.into()));
}
/// Add a register def that reuses an earlier use-operand's
/// allocation. The index of that earlier operand (relative to the
/// current instruction's start of operands) must be known.
pub fn reg_reuse_def(&mut self, reg: Writable<Reg>, idx: usize) {
if reg.to_reg().to_virtual_reg().is_some() {
self.add_operand(Operand::reg_reuse_def(reg.to_reg().into(), idx));
} else {
// Sometimes destination registers that reuse a source are
// given with RealReg args. In this case, we assume the
// creator of the instruction knows what they are doing
// and just emit a normal def to the pinned vreg.
self.add_operand(Operand::reg_def(reg.to_reg().into()));
}
}
/// Add a register use+def, or "modify", where the reg must stay
/// in the same register on the input and output side of the
/// instruction.
pub fn reg_mod(&mut self, reg: Writable<Reg>) {
self.add_operand(Operand::new(
reg.to_reg().into(),
regalloc2::OperandConstraint::Reg,
regalloc2::OperandKind::Mod,
regalloc2::OperandPos::Early,
));
}
/// Add a register clobber. This is a register that is written by
/// the instruction, so must be reserved (not used) for the whole
/// instruction, but is not used afterward.
#[allow(dead_code)] // FIXME: use clobbers rather than defs for calls!
pub fn reg_clobber(&mut self, reg: Writable<RealReg>) {
self.add_clobber(PReg::from(reg.to_reg()));
}
}
/// Use an OperandCollector to count the number of operands on an instruction.
pub fn count_operands<I: MachInst>(inst: &I) -> usize {
let mut ops = vec![];
let mut coll = OperandCollector::new(&mut ops, |vreg| vreg);
inst.get_operands(&mut coll);
let ((start, end), _) = coll.finish();
debug_assert_eq!(0, start);
end as usize
}
/// Pretty-print part of a disassembly, with knowledge of
/// operand/instruction size, and optionally with regalloc
/// results. This can be used, for example, to print either `rax` or
/// `eax` for the register by those names on x86-64, depending on a
/// 64- or 32-bit context.
pub trait PrettyPrint {
fn pretty_print(&self, size_bytes: u8, allocs: &mut AllocationConsumer<'_>) -> String;
fn pretty_print_default(&self) -> String {
self.pretty_print(0, &mut AllocationConsumer::new(&[]))
}
}
/// A consumer of an (optional) list of Allocations along with Regs
/// that provides RealRegs where available.
///
/// This is meant to be used during code emission or
/// pretty-printing. In at least the latter case, regalloc results may
/// or may not be available, so we may end up printing either vregs or
/// rregs. Even pre-regalloc, though, some registers may be RealRegs
/// that were provided when the instruction was created.
///
/// This struct should be used in a specific way: when matching on an
/// instruction, provide it the Regs in the same order as they were
/// provided to the OperandCollector.
#[derive(Clone)]
pub struct AllocationConsumer<'a> {
allocs: std::slice::Iter<'a, Allocation>,
}
impl<'a> AllocationConsumer<'a> {
pub fn new(allocs: &'a [Allocation]) -> Self {
Self {
allocs: allocs.iter(),
}
}
pub fn next(&mut self, pre_regalloc_reg: Reg) -> Reg {
let alloc = self.allocs.next();
let alloc = alloc.map(|alloc| {
Reg::from(
alloc
.as_reg()
.expect("Should not have gotten a stack allocation"),
)
});
match (pre_regalloc_reg.to_real_reg(), alloc) {
(Some(rreg), None) => rreg.into(),
(Some(rreg), Some(alloc)) => {
debug_assert_eq!(Reg::from(rreg), alloc);
alloc
}
(None, Some(alloc)) => alloc,
_ => pre_regalloc_reg,
}
}
pub fn next_writable(&mut self, pre_regalloc_reg: Writable<Reg>) -> Writable<Reg> {
Writable::from_reg(self.next(pre_regalloc_reg.to_reg()))
}
pub fn next_n(&mut self, count: usize) -> SmallVec<[Allocation; 4]> {
let mut allocs = smallvec![];
for _ in 0..count {
if let Some(next) = self.allocs.next() {
allocs.push(*next);
} else {
return allocs;
}
}
allocs
}
}
impl<'a> std::default::Default for AllocationConsumer<'a> {
fn default() -> Self {
Self { allocs: [].iter() }
}
}

View File

@@ -1,108 +0,0 @@
use crate::ir::Type;
use regalloc::{Reg, RegUsageMapper, Writable};
use smallvec::SmallVec;
use std::cell::Cell;
// Define our own register-mapping trait so we can do arbitrary register
// renaming that are more free form than what `regalloc` constrains us to with
// its `RegUsageMapper` trait definition.
pub trait RegMapper {
fn get_use(&self, reg: Reg) -> Option<Reg>;
fn get_def(&self, reg: Reg) -> Option<Reg>;
fn get_mod(&self, reg: Reg) -> Option<Reg>;
fn map_use(&self, r: &mut Reg) {
if let Some(new) = self.get_use(*r) {
*r = new;
}
}
fn map_def(&self, r: &mut Writable<Reg>) {
if let Some(new) = self.get_def(r.to_reg()) {
*r = Writable::from_reg(new);
}
}
fn map_mod(&self, r: &mut Writable<Reg>) {
if let Some(new) = self.get_mod(r.to_reg()) {
*r = Writable::from_reg(new);
}
}
}
impl<T> RegMapper for T
where
T: RegUsageMapper,
{
fn get_use(&self, reg: Reg) -> Option<Reg> {
let v = reg.as_virtual_reg()?;
self.get_use(v).map(|r| r.to_reg())
}
fn get_def(&self, reg: Reg) -> Option<Reg> {
let v = reg.as_virtual_reg()?;
self.get_def(v).map(|r| r.to_reg())
}
fn get_mod(&self, reg: Reg) -> Option<Reg> {
let v = reg.as_virtual_reg()?;
self.get_mod(v).map(|r| r.to_reg())
}
}
#[derive(Debug, Default)]
pub struct RegRenamer {
// Map of `(old, new, used, ty)` register names. Use a `SmallVec` because
// we typically only have one or two renamings.
//
// The `used` flag indicates whether the mapping has been used for
// `get_def`, later used afterwards during `unmapped_defs` to know what
// moves need to be generated.
renames: SmallVec<[(Reg, Reg, Cell<bool>, Type); 2]>,
}
impl RegRenamer {
/// Adds a new mapping which means that `old` reg should now be called
/// `new`. The type of `old` is `ty` as specified.
pub fn add_rename(&mut self, old: Reg, new: Reg, ty: Type) {
self.renames.push((old, new, Cell::new(false), ty));
}
fn get_rename(&self, reg: Reg, set_used_def: bool) -> Option<Reg> {
let (_, new, used_def, _) = self.renames.iter().find(|(old, _, _, _)| reg == *old)?;
used_def.set(used_def.get() || set_used_def);
Some(*new)
}
/// Returns the list of register mappings, with their type, which were not
/// actually mapped.
///
/// This list is used because it means that the `old` name for the register
/// was never actually defined, so to correctly rename this register the
/// caller needs to move `old` into `new`.
///
/// This yields tuples of `(old, new, ty)`.
pub fn unmapped_defs(&self) -> impl Iterator<Item = (Reg, Reg, Type)> + '_ {
self.renames.iter().filter_map(|(old, new, used_def, ty)| {
if used_def.get() {
None
} else {
Some((*old, *new, *ty))
}
})
}
}
impl RegMapper for RegRenamer {
fn get_use(&self, reg: Reg) -> Option<Reg> {
self.get_rename(reg, false)
}
fn get_def(&self, reg: Reg) -> Option<Reg> {
self.get_rename(reg, true)
}
fn get_mod(&self, reg: Reg) -> Option<Reg> {
self.get_rename(reg, false)
}
}

View File

@@ -1,7 +1,9 @@
//! Data structure for tracking the (possibly multiple) registers that hold one
//! SSA `Value`.
use regalloc::{RealReg, Reg, VirtualReg, Writable};
use regalloc2::{PReg, VReg};
use super::{RealReg, Reg, VirtualReg, Writable};
use std::fmt::Debug;
const VALUE_REGS_PARTS: usize = 2;
@@ -35,17 +37,17 @@ pub trait InvalidSentinel: Copy + Eq {
}
impl InvalidSentinel for Reg {
fn invalid_sentinel() -> Self {
Reg::invalid()
Reg::from(VReg::invalid())
}
}
impl InvalidSentinel for VirtualReg {
fn invalid_sentinel() -> Self {
VirtualReg::invalid()
VirtualReg::from(VReg::invalid())
}
}
impl InvalidSentinel for RealReg {
fn invalid_sentinel() -> Self {
RealReg::invalid()
RealReg::from(PReg::invalid())
}
}
impl InvalidSentinel for Writable<Reg> {

File diff suppressed because it is too large Load Diff