ARM64 backend, part 3 / 11: MachInst infrastructure.

This patch adds the MachInst, or Machine Instruction, infrastructure.
This is the machine-independent portion of the new backend design. It
contains the implementation of the "vcode" (virtual-registerized code)
container, the top-level lowering algorithm and compilation pipeline,
and the trait definitions that the machine backends will fill in.

This backend infrastructure is included in the compilation of the
`codegen` crate, but it is not yet tied into the public APIs; that patch
will come last, after all the other pieces are filled in.

This patch contains code written by Julian Seward <jseward@acm.org> and
Benjamin Bouvier <public@benj.me>, originally developed on a side-branch
before rebasing and condensing into this patch series. See the `arm64`
branch at `https://github.com/cfallin/wasmtime` for original development
history.

Co-authored-by: Julian Seward <jseward@acm.org>
Co-authored-by: Benjamin Bouvier <public@benj.me>
This commit is contained in:
Chris Fallin
2020-04-09 12:27:26 -07:00
parent f80fe949c6
commit d83574261c
14 changed files with 2662 additions and 2 deletions

17
Cargo.lock generated
View File

@@ -379,6 +379,7 @@ dependencies = [
"gimli",
"hashbrown 0.7.1",
"log",
"regalloc",
"serde",
"smallvec",
"target-lexicon",
@@ -1599,6 +1600,16 @@ dependencies = [
"rust-argon2",
]
[[package]]
name = "regalloc"
version = "0.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "89ce0cd835fa6e91bbf5d010beee19d0c2e97e4ad5e13c399a31122cfc83bdd6"
dependencies = [
"log",
"rustc-hash",
]
[[package]]
name = "regex"
version = "1.3.6"
@@ -1663,6 +1674,12 @@ version = "0.1.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c691c0e608126e00913e33f0ccf3727d5fc84573623b8d65b2df340b5201783"
[[package]]
name = "rustc-hash"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
[[package]]
name = "rustc_version"
version = "0.2.3"

View File

@@ -24,6 +24,7 @@ gimli = { version = "0.20.0", default-features = false, features = ["write"], op
smallvec = { version = "1.0.0" }
thiserror = "1.0.4"
byteorder = { version = "1.3.2", default-features = false }
regalloc = "0.0.17"
# It is a goal of the cranelift-codegen crate to have minimal external dependencies.
# Please don't add any unless they are essential to the task of creating binary
# machine code. Integration tests that need external dependencies can be
@@ -33,7 +34,7 @@ byteorder = { version = "1.3.2", default-features = false }
cranelift-codegen-meta = { path = "meta", version = "0.62.0" }
[features]
default = ["std", "unwind"]
default = ["std", "unwind", "all-arch"]
# The "std" feature enables use of libstd. The "core" feature enables use
# of some minimal std-like replacement libraries. At least one of these two

View File

@@ -55,9 +55,10 @@ pub use crate::isa::stack::{StackBase, StackBaseMask, StackRef};
use crate::binemit;
use crate::flowgraph;
use crate::ir;
use crate::isa::enc_tables::Encodings;
pub use crate::isa::enc_tables::Encodings;
#[cfg(feature = "unwind")]
use crate::isa::fde::RegisterMappingError;
use crate::machinst::MachBackend;
use crate::regalloc;
use crate::result::CodegenResult;
use crate::settings;
@@ -400,6 +401,11 @@ pub trait TargetIsa: fmt::Display + Send + Sync {
) {
// No-op by default
}
/// Get the new-style MachBackend, if this is an adapter around one.
fn get_mach_backend(&self) -> Option<&dyn MachBackend> {
None
}
}
impl Debug for &dyn TargetIsa {

View File

@@ -71,6 +71,7 @@ pub mod flowgraph;
pub mod ir;
pub mod isa;
pub mod loop_analysis;
pub mod machinst;
pub mod print_errors;
pub mod settings;
pub mod timing;
@@ -90,6 +91,7 @@ mod iterators;
mod legalizer;
mod licm;
mod nan_canonicalization;
mod num_uses;
mod partition_slice;
mod postopt;
mod predicates;

View File

@@ -0,0 +1,142 @@
//! ABI definitions.
use crate::ir;
use crate::ir::StackSlot;
use crate::machinst::*;
use crate::settings;
use regalloc::{Reg, Set, SpillSlot, VirtualReg, Writable};
/// Trait implemented by an object that tracks ABI-related state (e.g., stack
/// layout) and can generate code while emitting the *body* of a function.
pub trait ABIBody<I: VCodeInst> {
/// Get the liveins of the function.
fn liveins(&self) -> Set<RealReg>;
/// Get the liveouts of the function.
fn liveouts(&self) -> Set<RealReg>;
/// Number of arguments.
fn num_args(&self) -> usize;
/// Number of return values.
fn num_retvals(&self) -> usize;
/// Number of stack slots (not spill slots).
fn num_stackslots(&self) -> usize;
/// Generate an instruction which copies an argument to a destination
/// register.
fn gen_copy_arg_to_reg(&self, idx: usize, into_reg: Writable<Reg>) -> I;
/// Generate an instruction which copies a source register to a return
/// value slot.
fn gen_copy_reg_to_retval(&self, idx: usize, from_reg: Reg) -> I;
/// Generate a return instruction.
fn gen_ret(&self) -> I;
/// Generate an epilogue placeholder.
fn gen_epilogue_placeholder(&self) -> I;
// -----------------------------------------------------------------
// Every function above this line may only be called pre-regalloc.
// Every function below this line may only be called post-regalloc.
// `spillslots()` must be called before any other post-regalloc
// function.
// ----------------------------------------------------------------
/// Update with the number of spillslots, post-regalloc.
fn set_num_spillslots(&mut self, slots: usize);
/// Update with the clobbered registers, post-regalloc.
fn set_clobbered(&mut self, clobbered: Set<Writable<RealReg>>);
/// Load from a stackslot.
fn load_stackslot(
&self,
slot: StackSlot,
offset: usize,
ty: Type,
into_reg: Writable<Reg>,
) -> I;
/// Store to a stackslot.
fn store_stackslot(&self, slot: StackSlot, offset: usize, ty: Type, from_reg: Reg) -> I;
/// Load from a spillslot.
fn load_spillslot(&self, slot: SpillSlot, ty: Type, into_reg: Writable<Reg>) -> I;
/// Store to a spillslot.
fn store_spillslot(&self, slot: SpillSlot, ty: Type, from_reg: Reg) -> I;
/// Generate a prologue, post-regalloc. This should include any stack
/// frame or other setup necessary to use the other methods (`load_arg`,
/// `store_retval`, and spillslot accesses.) |self| is mutable so that we
/// can store information in it which will be useful when creating the
/// epilogue.
fn gen_prologue(&mut self, flags: &settings::Flags) -> Vec<I>;
/// Generate an epilogue, post-regalloc. Note that this must generate the
/// actual return instruction (rather than emitting this in the lowering
/// logic), because the epilogue code comes before the return and the two are
/// likely closely related.
fn gen_epilogue(&self, flags: &settings::Flags) -> Vec<I>;
/// Returns the full frame size for the given function, after prologue emission has run. This
/// comprises the spill space, incoming argument space, alignment padding, etc.
fn frame_size(&self) -> u32;
/// Get the spill-slot size.
fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32;
/// Generate a spill.
fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Type) -> I;
/// Generate a reload (fill).
fn gen_reload(&self, to_reg: Writable<RealReg>, from_slot: SpillSlot, ty: Type) -> I;
}
/// Trait implemented by an object that tracks ABI-related state and can
/// generate code while emitting a *call* to a function.
///
/// An instance of this trait returns information for a *particular*
/// callsite. It will usually be computed from the called function's
/// signature.
///
/// Unlike `ABIBody` above, methods on this trait are not invoked directly
/// by the machine-independent code. Rather, the machine-specific lowering
/// code will typically create an `ABICall` when creating machine instructions
/// for an IR call instruction inside `lower()`, directly emit the arg and
/// and retval copies, and attach the register use/def info to the call.
///
/// This trait is thus provided for convenience to the backends.
pub trait ABICall<I: VCodeInst> {
/// Get the number of arguments expected.
fn num_args(&self) -> usize;
/// Save the clobbered registers.
/// Copy an argument value from a source register, prior to the call.
fn gen_copy_reg_to_arg(&self, idx: usize, from_reg: Reg) -> I;
/// Copy a return value into a destination register, after the call returns.
fn gen_copy_retval_to_reg(&self, idx: usize, into_reg: Writable<Reg>) -> I;
/// Pre-adjust the stack, prior to argument copies and call.
fn gen_stack_pre_adjust(&self) -> Vec<I>;
/// Post-adjust the satck, after call return and return-value copies.
fn gen_stack_post_adjust(&self) -> Vec<I>;
/// Generate the call itself.
///
/// The returned instruction should have proper use- and def-sets according
/// to the argument registers, return-value registers, and clobbered
/// registers for this function signature in this ABI.
///
/// (Arg registers are uses, and retval registers are defs. Clobbered
/// registers are also logically defs, but should never be read; their
/// values are "defined" (to the regalloc) but "undefined" in every other
/// sense.)
fn gen_call(&self) -> Vec<I>;
}

View File

@@ -0,0 +1,123 @@
//! Adapter for a `MachBackend` to implement the `TargetIsa` trait.
use crate::binemit;
use crate::ir;
use crate::isa::{EncInfo, Encoding, Encodings, Legalize, RegClass, RegInfo, TargetIsa};
use crate::machinst::*;
use crate::regalloc::{RegDiversions, RegisterSet};
use crate::settings::Flags;
use std::borrow::Cow;
use std::fmt;
use target_lexicon::Triple;
/// A wrapper around a `MachBackend` that provides a `TargetIsa` impl.
pub struct TargetIsaAdapter {
backend: Box<dyn MachBackend + Send + Sync + 'static>,
triple: Triple,
}
impl TargetIsaAdapter {
/// Create a new `TargetIsa` wrapper around a `MachBackend`.
pub fn new<B: MachBackend + Send + Sync + 'static>(backend: B) -> TargetIsaAdapter {
let triple = backend.triple();
TargetIsaAdapter {
backend: Box::new(backend),
triple,
}
}
}
impl fmt::Display for TargetIsaAdapter {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "MachBackend")
}
}
impl TargetIsa for TargetIsaAdapter {
fn name(&self) -> &'static str {
self.backend.name()
}
fn triple(&self) -> &Triple {
&self.triple
}
fn flags(&self) -> &Flags {
self.backend.flags()
}
fn register_info(&self) -> RegInfo {
// Called from function's Display impl, so we need a stub here.
RegInfo {
banks: &[],
classes: &[],
}
}
fn legal_encodings<'a>(
&'a self,
_func: &'a ir::Function,
_inst: &'a ir::InstructionData,
_ctrl_typevar: ir::Type,
) -> Encodings<'a> {
panic!("Should not be called when new-style backend is available!")
}
fn encode(
&self,
_func: &ir::Function,
_inst: &ir::InstructionData,
_ctrl_typevar: ir::Type,
) -> Result<Encoding, Legalize> {
panic!("Should not be called when new-style backend is available!")
}
fn encoding_info(&self) -> EncInfo {
panic!("Should not be called when new-style backend is available!")
}
fn legalize_signature(&self, _sig: &mut Cow<ir::Signature>, _current: bool) {
panic!("Should not be called when new-style backend is available!")
}
fn regclass_for_abi_type(&self, _ty: ir::Type) -> RegClass {
panic!("Should not be called when new-style backend is available!")
}
fn allocatable_registers(&self, _func: &ir::Function) -> RegisterSet {
panic!("Should not be called when new-style backend is available!")
}
fn prologue_epilogue(&self, _func: &mut ir::Function) -> CodegenResult<()> {
panic!("Should not be called when new-style backend is available!")
}
#[cfg(feature = "testing_hooks")]
fn emit_inst(
&self,
_func: &ir::Function,
_inst: ir::Inst,
_divert: &mut RegDiversions,
_sink: &mut dyn binemit::CodeSink,
) {
panic!("Should not be called when new-style backend is available!")
}
/// Emit a whole function into memory.
fn emit_function_to_memory(&self, _func: &ir::Function, _sink: &mut binemit::MemoryCodeSink) {
panic!("Should not be called when new-style backend is available!")
}
fn get_mach_backend(&self) -> Option<&dyn MachBackend> {
Some(&*self.backend)
}
fn unsigned_add_overflow_condition(&self) -> ir::condcodes::IntCC {
self.backend.unsigned_add_overflow_condition()
}
fn unsigned_sub_overflow_condition(&self) -> ir::condcodes::IntCC {
self.backend.unsigned_sub_overflow_condition()
}
}

View File

@@ -0,0 +1,59 @@
//! Computation of basic block order in emitted code.
use crate::machinst::*;
/// Simple reverse postorder-based block order emission.
///
/// TODO: use a proper algorithm, such as the bottom-up straight-line-section
/// construction algorithm.
struct BlockRPO {
visited: Vec<bool>,
postorder: Vec<BlockIndex>,
deferred_last: Option<BlockIndex>,
}
impl BlockRPO {
fn new<I: VCodeInst>(vcode: &VCode<I>) -> BlockRPO {
BlockRPO {
visited: vec![false; vcode.num_blocks()],
postorder: vec![],
deferred_last: None,
}
}
fn visit<I: VCodeInst>(&mut self, vcode: &VCode<I>, block: BlockIndex) {
self.visited[block as usize] = true;
for succ in vcode.succs(block) {
if !self.visited[*succ as usize] {
self.visit(vcode, *succ);
}
}
let (start, end) = &vcode.block_ranges[block as usize];
for i in *start..*end {
if vcode.insts[i as usize].is_epilogue_placeholder() {
debug_assert!(self.deferred_last.is_none());
self.deferred_last = Some(block);
return;
}
}
self.postorder.push(block);
}
fn rpo(self) -> Vec<BlockIndex> {
let mut rpo = self.postorder;
rpo.reverse();
if let Some(block) = self.deferred_last {
rpo.push(block);
}
rpo
}
}
/// Compute the final block order.
pub fn compute_final_block_order<I: VCodeInst>(vcode: &VCode<I>) -> Vec<BlockIndex> {
let mut rpo = BlockRPO::new(vcode);
rpo.visit(vcode, vcode.entry());
rpo.rpo()
}

View File

@@ -0,0 +1,76 @@
//! Compilation backend pipeline: optimized IR to VCode / binemit.
use crate::ir::Function;
use crate::machinst::*;
use crate::settings;
use crate::timing;
use log::debug;
use regalloc::{allocate_registers, RegAllocAlgorithm};
use std::env;
/// Compile the given function down to VCode with allocated registers, ready
/// for binary emission.
pub fn compile<B: LowerBackend>(
f: &mut Function,
b: &B,
abi: Box<dyn ABIBody<B::MInst>>,
flags: &settings::Flags,
) -> VCode<B::MInst>
where
B::MInst: ShowWithRRU,
{
// This lowers the CL IR.
let mut vcode = Lower::new(f, abi).lower(b);
let universe = &B::MInst::reg_universe();
debug!("vcode from lowering: \n{}", vcode.show_rru(Some(universe)));
// Perform register allocation.
let algorithm = match env::var("REGALLOC") {
Ok(str) => match str.as_str() {
"lsrac" => RegAllocAlgorithm::LinearScanChecked,
"lsra" => RegAllocAlgorithm::LinearScan,
// to wit: btc doesn't mean "bitcoin" here
"btc" => RegAllocAlgorithm::BacktrackingChecked,
_ => RegAllocAlgorithm::Backtracking,
},
// By default use backtracking, which is the fastest.
Err(_) => RegAllocAlgorithm::Backtracking,
};
let result = {
let _tt = timing::regalloc();
allocate_registers(
&mut vcode, algorithm, universe, /*request_block_annotations=*/ false,
)
.map_err(|err| {
debug!(
"Register allocation error for vcode\n{}\nError: {:?}",
vcode.show_rru(Some(universe)),
err
);
err
})
.expect("register allocation")
};
// Reorder vcode into final order and copy out final instruction sequence
// all at once. This also inserts prologues/epilogues.
vcode.replace_insns_from_regalloc(result, flags);
vcode.remove_redundant_branches();
// Do final passes over code to finalize branches.
vcode.finalize_branches();
debug!(
"vcode after regalloc: final version:\n{}",
vcode.show_rru(Some(universe))
);
//println!("{}\n", vcode.show_rru(Some(&B::MInst::reg_universe())));
vcode
}

View File

@@ -0,0 +1,723 @@
//! This module implements lowering (instruction selection) from Cranelift IR
//! to machine instructions with virtual registers. This is *almost* the final
//! machine code, except for register allocation.
use crate::binemit::CodeSink;
use crate::dce::has_side_effect;
use crate::entity::SecondaryMap;
use crate::ir::{
Block, ExternalName, Function, GlobalValueData, Inst, InstructionData, MemFlags, Opcode,
Signature, SourceLoc, Type, Value, ValueDef,
};
use crate::isa::registers::RegUnit;
use crate::machinst::{
ABIBody, BlockIndex, MachInst, MachInstEmit, VCode, VCodeBuilder, VCodeInst,
};
use crate::num_uses::NumUses;
use regalloc::Function as RegallocFunction;
use regalloc::{RealReg, Reg, RegClass, Set, VirtualReg, Writable};
use alloc::boxed::Box;
use alloc::vec::Vec;
use log::debug;
use smallvec::SmallVec;
use std::collections::VecDeque;
use std::ops::Range;
/// A context that machine-specific lowering code can use to emit lowered instructions. This is the
/// view of the machine-independent per-function lowering context that is seen by the machine
/// backend.
pub trait LowerCtx<I> {
/// Get the instdata for a given IR instruction.
fn data(&self, ir_inst: Inst) -> &InstructionData;
/// Get the controlling type for a polymorphic IR instruction.
fn ty(&self, ir_inst: Inst) -> Type;
/// Emit a machine instruction.
fn emit(&mut self, mach_inst: I);
/// Indicate that an IR instruction has been merged, and so one of its
/// uses is gone (replaced by uses of the instruction's inputs). This
/// helps the lowering algorithm to perform on-the-fly DCE, skipping over
/// unused instructions (such as immediates incorporated directly).
fn merged(&mut self, from_inst: Inst);
/// Get the producing instruction, if any, and output number, for the `idx`th input to the
/// given IR instruction
fn input_inst(&self, ir_inst: Inst, idx: usize) -> Option<(Inst, usize)>;
/// Map a Value to its associated writable (probably virtual) Reg.
fn value_to_writable_reg(&self, val: Value) -> Writable<Reg>;
/// Map a Value to its associated (probably virtual) Reg.
fn value_to_reg(&self, val: Value) -> Reg;
/// Get the `idx`th input to the given IR instruction as a virtual register.
fn input(&self, ir_inst: Inst, idx: usize) -> Reg;
/// Get the `idx`th output of the given IR instruction as a virtual register.
fn output(&self, ir_inst: Inst, idx: usize) -> Writable<Reg>;
/// Get the number of inputs to the given IR instruction.
fn num_inputs(&self, ir_inst: Inst) -> usize;
/// Get the number of outputs to the given IR instruction.
fn num_outputs(&self, ir_inst: Inst) -> usize;
/// Get the type for an instruction's input.
fn input_ty(&self, ir_inst: Inst, idx: usize) -> Type;
/// Get the type for an instruction's output.
fn output_ty(&self, ir_inst: Inst, idx: usize) -> Type;
/// Get a new temp.
fn tmp(&mut self, rc: RegClass, ty: Type) -> Writable<Reg>;
/// Get the number of block params.
fn num_bb_params(&self, bb: Block) -> usize;
/// Get the register for a block param.
fn bb_param(&self, bb: Block, idx: usize) -> Reg;
/// Get the register for a return value.
fn retval(&self, idx: usize) -> Writable<Reg>;
/// Get the target for a call instruction, as an `ExternalName`.
fn call_target<'b>(&'b self, ir_inst: Inst) -> Option<&'b ExternalName>;
/// Get the signature for a call or call-indirect instruction.
fn call_sig<'b>(&'b self, ir_inst: Inst) -> Option<&'b Signature>;
/// Get the symbol name and offset for a symbol_value instruction.
fn symbol_value<'b>(&'b self, ir_inst: Inst) -> Option<(&'b ExternalName, i64)>;
/// Returns the memory flags of a given memory access.
fn memflags(&self, ir_inst: Inst) -> Option<MemFlags>;
/// Get the source location for a given instruction.
fn srcloc(&self, ir_inst: Inst) -> SourceLoc;
}
/// A machine backend.
pub trait LowerBackend {
/// The machine instruction type.
type MInst: VCodeInst;
/// Lower a single instruction. Instructions are lowered in reverse order.
/// This function need not handle branches; those are always passed to
/// `lower_branch_group` below.
fn lower<C: LowerCtx<Self::MInst>>(&self, ctx: &mut C, inst: Inst);
/// Lower a block-terminating group of branches (which together can be seen as one
/// N-way branch), given a vcode BlockIndex for each target.
fn lower_branch_group<C: LowerCtx<Self::MInst>>(
&self,
ctx: &mut C,
insts: &[Inst],
targets: &[BlockIndex],
fallthrough: Option<BlockIndex>,
);
}
/// Machine-independent lowering driver / machine-instruction container. Maintains a correspondence
/// from original Inst to MachInsts.
pub struct Lower<'a, I: VCodeInst> {
// The function to lower.
f: &'a Function,
// Lowered machine instructions.
vcode: VCodeBuilder<I>,
// Number of active uses (minus `dec_use()` calls by backend) of each instruction.
num_uses: SecondaryMap<Inst, u32>,
// Mapping from `Value` (SSA value in IR) to virtual register.
value_regs: SecondaryMap<Value, Reg>,
// Return-value vregs.
retval_regs: Vec<Reg>,
// Next virtual register number to allocate.
next_vreg: u32,
}
fn alloc_vreg(
value_regs: &mut SecondaryMap<Value, Reg>,
regclass: RegClass,
value: Value,
next_vreg: &mut u32,
) -> VirtualReg {
if value_regs[value].get_index() == 0 {
// default value in map.
let v = *next_vreg;
*next_vreg += 1;
value_regs[value] = Reg::new_virtual(regclass, v);
}
value_regs[value].as_virtual_reg().unwrap()
}
enum GenerateReturn {
Yes,
No,
}
impl<'a, I: VCodeInst> Lower<'a, I> {
/// Prepare a new lowering context for the given IR function.
pub fn new(f: &'a Function, abi: Box<dyn ABIBody<I>>) -> Lower<'a, I> {
let mut vcode = VCodeBuilder::new(abi);
let num_uses = NumUses::compute(f).take_uses();
let mut next_vreg: u32 = 1;
// Default register should never be seen, but the `value_regs` map needs a default and we
// don't want to push `Option` everywhere. All values will be assigned registers by the
// loops over block parameters and instruction results below.
//
// We do not use vreg 0 so that we can detect any unassigned register that leaks through.
let default_register = Reg::new_virtual(RegClass::I32, 0);
let mut value_regs = SecondaryMap::with_default(default_register);
// Assign a vreg to each value.
for bb in f.layout.blocks() {
for param in f.dfg.block_params(bb) {
let vreg = alloc_vreg(
&mut value_regs,
I::rc_for_type(f.dfg.value_type(*param)),
*param,
&mut next_vreg,
);
vcode.set_vreg_type(vreg, f.dfg.value_type(*param));
}
for inst in f.layout.block_insts(bb) {
for result in f.dfg.inst_results(inst) {
let vreg = alloc_vreg(
&mut value_regs,
I::rc_for_type(f.dfg.value_type(*result)),
*result,
&mut next_vreg,
);
vcode.set_vreg_type(vreg, f.dfg.value_type(*result));
}
}
}
// Assign a vreg to each return value.
let mut retval_regs = vec![];
for ret in &f.signature.returns {
let v = next_vreg;
next_vreg += 1;
let regclass = I::rc_for_type(ret.value_type);
let vreg = Reg::new_virtual(regclass, v);
retval_regs.push(vreg);
vcode.set_vreg_type(vreg.as_virtual_reg().unwrap(), ret.value_type);
}
Lower {
f,
vcode,
num_uses,
value_regs,
retval_regs,
next_vreg,
}
}
fn gen_arg_setup(&mut self) {
if let Some(entry_bb) = self.f.layout.entry_block() {
debug!(
"gen_arg_setup: entry BB {} args are:\n{:?}",
entry_bb,
self.f.dfg.block_params(entry_bb)
);
for (i, param) in self.f.dfg.block_params(entry_bb).iter().enumerate() {
let reg = Writable::from_reg(self.value_regs[*param]);
let insn = self.vcode.abi().gen_copy_arg_to_reg(i, reg);
self.vcode.push(insn);
}
}
}
fn gen_retval_setup(&mut self, gen_ret_inst: GenerateReturn) {
for (i, reg) in self.retval_regs.iter().enumerate() {
let insn = self.vcode.abi().gen_copy_reg_to_retval(i, *reg);
self.vcode.push(insn);
}
let inst = match gen_ret_inst {
GenerateReturn::Yes => self.vcode.abi().gen_ret(),
GenerateReturn::No => self.vcode.abi().gen_epilogue_placeholder(),
};
self.vcode.push(inst);
}
fn find_reachable_bbs(&self) -> SmallVec<[Block; 16]> {
if let Some(entry) = self.f.layout.entry_block() {
let mut ret = SmallVec::new();
let mut queue = VecDeque::new();
let mut visited = SecondaryMap::with_default(false);
queue.push_back(entry);
visited[entry] = true;
while !queue.is_empty() {
let b = queue.pop_front().unwrap();
ret.push(b);
let mut succs: SmallVec<[Block; 16]> = SmallVec::new();
for inst in self.f.layout.block_insts(b) {
if self.f.dfg[inst].opcode().is_branch() {
succs.extend(branch_targets(self.f, b, inst).into_iter());
}
}
for succ in succs.into_iter() {
if !visited[succ] {
queue.push_back(succ);
visited[succ] = true;
}
}
}
ret
} else {
SmallVec::new()
}
}
/// Lower the function.
pub fn lower<B: LowerBackend<MInst = I>>(mut self, backend: &B) -> VCode<I> {
// Find all reachable blocks.
let mut bbs = self.find_reachable_bbs();
// Work backward (reverse block order, reverse through each block), skipping insns with zero
// uses.
bbs.reverse();
// This records a Block-to-BlockIndex map so that branch targets can be resolved.
let mut next_bindex = self.vcode.init_bb_map(&bbs[..]);
// Allocate a separate BlockIndex for each control-flow instruction so that we can create
// the edge blocks later. Each entry for a control-flow inst is the edge block; the list
// has (cf-inst, edge block, orig block) tuples.
let mut edge_blocks_by_inst: SecondaryMap<Inst, Vec<BlockIndex>> =
SecondaryMap::with_default(vec![]);
let mut edge_blocks: Vec<(Inst, BlockIndex, Block)> = vec![];
debug!("about to lower function: {:?}", self.f);
debug!("bb map: {:?}", self.vcode.blocks_by_bb());
for bb in bbs.iter() {
for inst in self.f.layout.block_insts(*bb) {
let op = self.f.dfg[inst].opcode();
if op.is_branch() {
// Find the original target.
let mut add_succ = |next_bb| {
let edge_block = next_bindex;
next_bindex += 1;
edge_blocks_by_inst[inst].push(edge_block);
edge_blocks.push((inst, edge_block, next_bb));
};
for succ in branch_targets(self.f, *bb, inst).into_iter() {
add_succ(succ);
}
}
}
}
for bb in bbs.iter() {
debug!("lowering bb: {}", bb);
// If this is a return block, produce the return value setup.
let last_insn = self.f.layout.block_insts(*bb).last().unwrap();
let last_insn_opcode = self.f.dfg[last_insn].opcode();
if last_insn_opcode.is_return() {
let gen_ret = if last_insn_opcode == Opcode::Return {
GenerateReturn::Yes
} else {
debug_assert!(last_insn_opcode == Opcode::FallthroughReturn);
GenerateReturn::No
};
self.gen_retval_setup(gen_ret);
self.vcode.end_ir_inst();
}
// Find the branches at the end first, and process those, if any.
let mut branches: SmallVec<[Inst; 2]> = SmallVec::new();
let mut targets: SmallVec<[BlockIndex; 2]> = SmallVec::new();
for inst in self.f.layout.block_insts(*bb).rev() {
debug!("lower: inst {}", inst);
if edge_blocks_by_inst[inst].len() > 0 {
branches.push(inst);
for target in edge_blocks_by_inst[inst].iter().rev().cloned() {
targets.push(target);
}
} else {
// We've reached the end of the branches -- process all as a group, first.
if branches.len() > 0 {
let fallthrough = self.f.layout.next_block(*bb);
let fallthrough = fallthrough.map(|bb| self.vcode.bb_to_bindex(bb));
branches.reverse();
targets.reverse();
debug!(
"lower_branch_group: targets = {:?} branches = {:?}",
targets, branches
);
backend.lower_branch_group(
&mut self,
&branches[..],
&targets[..],
fallthrough,
);
self.vcode.end_ir_inst();
branches.clear();
targets.clear();
}
// Only codegen an instruction if it either has a side
// effect, or has at least one use of one of its results.
let num_uses = self.num_uses[inst];
let side_effect = has_side_effect(self.f, inst);
if side_effect || num_uses > 0 {
backend.lower(&mut self, inst);
self.vcode.end_ir_inst();
} else {
// If we're skipping the instruction, we need to dec-ref
// its arguments.
for arg in self.f.dfg.inst_args(inst) {
let val = self.f.dfg.resolve_aliases(*arg);
match self.f.dfg.value_def(val) {
ValueDef::Result(src_inst, _) => {
self.dec_use(src_inst);
}
_ => {}
}
}
}
}
}
// There are possibly some branches left if the block contained only branches.
if branches.len() > 0 {
let fallthrough = self.f.layout.next_block(*bb);
let fallthrough = fallthrough.map(|bb| self.vcode.bb_to_bindex(bb));
branches.reverse();
targets.reverse();
debug!(
"lower_branch_group: targets = {:?} branches = {:?}",
targets, branches
);
backend.lower_branch_group(&mut self, &branches[..], &targets[..], fallthrough);
self.vcode.end_ir_inst();
branches.clear();
targets.clear();
}
// If this is the entry block, produce the argument setup.
if Some(*bb) == self.f.layout.entry_block() {
self.gen_arg_setup();
self.vcode.end_ir_inst();
}
let vcode_bb = self.vcode.end_bb();
debug!("finished building bb: BlockIndex {}", vcode_bb);
debug!("bb_to_bindex map says: {}", self.vcode.bb_to_bindex(*bb));
assert!(vcode_bb == self.vcode.bb_to_bindex(*bb));
if Some(*bb) == self.f.layout.entry_block() {
self.vcode.set_entry(vcode_bb);
}
}
// Now create the edge blocks, with phi lowering (block parameter copies).
for (inst, edge_block, orig_block) in edge_blocks.into_iter() {
debug!(
"creating edge block: inst {}, edge_block {}, orig_block {}",
inst, edge_block, orig_block
);
// Create a temporary for each block parameter.
let phi_classes: Vec<(Type, RegClass)> = self
.f
.dfg
.block_params(orig_block)
.iter()
.map(|p| self.f.dfg.value_type(*p))
.map(|ty| (ty, I::rc_for_type(ty)))
.collect();
// FIXME sewardj 2020Feb29: use SmallVec
let mut src_regs = vec![];
let mut dst_regs = vec![];
// Create all of the phi uses (reads) from jump args to temps.
// Round up all the source and destination regs
for (i, arg) in self.f.dfg.inst_variable_args(inst).iter().enumerate() {
let arg = self.f.dfg.resolve_aliases(*arg);
debug!("jump arg {} is {}", i, arg);
src_regs.push(self.value_regs[arg]);
}
for (i, param) in self.f.dfg.block_params(orig_block).iter().enumerate() {
debug!("bb arg {} is {}", i, param);
dst_regs.push(Writable::from_reg(self.value_regs[*param]));
}
debug_assert!(src_regs.len() == dst_regs.len());
debug_assert!(phi_classes.len() == dst_regs.len());
// If, as is mostly the case, the source and destination register
// sets are non overlapping, then we can copy directly, so as to
// save the register allocator work.
if !Set::<Reg>::from_vec(src_regs.clone()).intersects(&Set::<Reg>::from_vec(
dst_regs.iter().map(|r| r.to_reg()).collect(),
)) {
for (dst_reg, (src_reg, (ty, _))) in
dst_regs.iter().zip(src_regs.iter().zip(phi_classes))
{
self.vcode.push(I::gen_move(*dst_reg, *src_reg, ty));
}
} else {
// There's some overlap, so play safe and copy via temps.
let tmp_regs: Vec<Writable<Reg>> = phi_classes
.iter()
.map(|&(ty, rc)| self.tmp(rc, ty)) // borrows `self` mutably.
.collect();
debug!("phi_temps = {:?}", tmp_regs);
debug_assert!(tmp_regs.len() == src_regs.len());
for (tmp_reg, (src_reg, &(ty, _))) in
tmp_regs.iter().zip(src_regs.iter().zip(phi_classes.iter()))
{
self.vcode.push(I::gen_move(*tmp_reg, *src_reg, ty));
}
for (dst_reg, (tmp_reg, &(ty, _))) in
dst_regs.iter().zip(tmp_regs.iter().zip(phi_classes.iter()))
{
self.vcode.push(I::gen_move(*dst_reg, tmp_reg.to_reg(), ty));
}
}
// Create the unconditional jump to the original target block.
self.vcode
.push(I::gen_jump(self.vcode.bb_to_bindex(orig_block)));
// End the IR inst and block. (We lower this as if it were one IR instruction so that
// we can emit machine instructions in forward order.)
self.vcode.end_ir_inst();
let blocknum = self.vcode.end_bb();
assert!(blocknum == edge_block);
}
// Now that we've emitted all instructions into the VCodeBuilder, let's build the VCode.
self.vcode.build()
}
/// Reduce the use-count of an IR instruction. Use this when, e.g., isel incorporates the
/// computation of an input instruction directly, so that input instruction has one
/// fewer use.
fn dec_use(&mut self, ir_inst: Inst) {
assert!(self.num_uses[ir_inst] > 0);
self.num_uses[ir_inst] -= 1;
debug!(
"incref: ir_inst {} now has {} uses",
ir_inst, self.num_uses[ir_inst]
);
}
/// Increase the use-count of an IR instruction. Use this when, e.g., isel incorporates
/// the computation of an input instruction directly, so that input instruction's
/// inputs are now used directly by the merged instruction.
fn inc_use(&mut self, ir_inst: Inst) {
self.num_uses[ir_inst] += 1;
debug!(
"decref: ir_inst {} now has {} uses",
ir_inst, self.num_uses[ir_inst]
);
}
}
impl<'a, I: VCodeInst> LowerCtx<I> for Lower<'a, I> {
/// Get the instdata for a given IR instruction.
fn data(&self, ir_inst: Inst) -> &InstructionData {
&self.f.dfg[ir_inst]
}
/// Get the controlling type for a polymorphic IR instruction.
fn ty(&self, ir_inst: Inst) -> Type {
self.f.dfg.ctrl_typevar(ir_inst)
}
/// Emit a machine instruction.
fn emit(&mut self, mach_inst: I) {
self.vcode.push(mach_inst);
}
/// Indicate that a merge has occurred.
fn merged(&mut self, from_inst: Inst) {
debug!("merged: inst {}", from_inst);
// First, inc-ref all inputs of `from_inst`, because they are now used
// directly by `into_inst`.
for arg in self.f.dfg.inst_args(from_inst) {
let arg = self.f.dfg.resolve_aliases(*arg);
match self.f.dfg.value_def(arg) {
ValueDef::Result(src_inst, _) => {
debug!(" -> inc-reffing src inst {}", src_inst);
self.inc_use(src_inst);
}
_ => {}
}
}
// Then, dec-ref the merged instruction itself. It still retains references
// to its arguments (inc-ref'd above). If its refcount has reached zero,
// it will be skipped during emission and its args will be dec-ref'd at that
// time.
self.dec_use(from_inst);
}
/// Get the producing instruction, if any, and output number, for the `idx`th input to the
/// given IR instruction.
fn input_inst(&self, ir_inst: Inst, idx: usize) -> Option<(Inst, usize)> {
let val = self.f.dfg.inst_args(ir_inst)[idx];
let val = self.f.dfg.resolve_aliases(val);
match self.f.dfg.value_def(val) {
ValueDef::Result(src_inst, result_idx) => Some((src_inst, result_idx)),
_ => None,
}
}
/// Map a Value to its associated writable (probably virtual) Reg.
fn value_to_writable_reg(&self, val: Value) -> Writable<Reg> {
let val = self.f.dfg.resolve_aliases(val);
Writable::from_reg(self.value_regs[val])
}
/// Map a Value to its associated (probably virtual) Reg.
fn value_to_reg(&self, val: Value) -> Reg {
let val = self.f.dfg.resolve_aliases(val);
self.value_regs[val]
}
/// Get the `idx`th input to the given IR instruction as a virtual register.
fn input(&self, ir_inst: Inst, idx: usize) -> Reg {
let val = self.f.dfg.inst_args(ir_inst)[idx];
let val = self.f.dfg.resolve_aliases(val);
self.value_to_reg(val)
}
/// Get the `idx`th output of the given IR instruction as a virtual register.
fn output(&self, ir_inst: Inst, idx: usize) -> Writable<Reg> {
let val = self.f.dfg.inst_results(ir_inst)[idx];
self.value_to_writable_reg(val)
}
/// Get a new temp.
fn tmp(&mut self, rc: RegClass, ty: Type) -> Writable<Reg> {
let v = self.next_vreg;
self.next_vreg += 1;
let vreg = Reg::new_virtual(rc, v);
self.vcode.set_vreg_type(vreg.as_virtual_reg().unwrap(), ty);
Writable::from_reg(vreg)
}
/// Get the number of inputs for the given IR instruction.
fn num_inputs(&self, ir_inst: Inst) -> usize {
self.f.dfg.inst_args(ir_inst).len()
}
/// Get the number of outputs for the given IR instruction.
fn num_outputs(&self, ir_inst: Inst) -> usize {
self.f.dfg.inst_results(ir_inst).len()
}
/// Get the type for an instruction's input.
fn input_ty(&self, ir_inst: Inst, idx: usize) -> Type {
let val = self.f.dfg.inst_args(ir_inst)[idx];
let val = self.f.dfg.resolve_aliases(val);
self.f.dfg.value_type(val)
}
/// Get the type for an instruction's output.
fn output_ty(&self, ir_inst: Inst, idx: usize) -> Type {
self.f.dfg.value_type(self.f.dfg.inst_results(ir_inst)[idx])
}
/// Get the number of block params.
fn num_bb_params(&self, bb: Block) -> usize {
self.f.dfg.block_params(bb).len()
}
/// Get the register for a block param.
fn bb_param(&self, bb: Block, idx: usize) -> Reg {
let val = self.f.dfg.block_params(bb)[idx];
self.value_regs[val]
}
/// Get the register for a return value.
fn retval(&self, idx: usize) -> Writable<Reg> {
Writable::from_reg(self.retval_regs[idx])
}
/// Get the target for a call instruction, as an `ExternalName`.
fn call_target<'b>(&'b self, ir_inst: Inst) -> Option<&'b ExternalName> {
match &self.f.dfg[ir_inst] {
&InstructionData::Call { func_ref, .. }
| &InstructionData::FuncAddr { func_ref, .. } => {
let funcdata = &self.f.dfg.ext_funcs[func_ref];
Some(&funcdata.name)
}
_ => None,
}
}
/// Get the signature for a call or call-indirect instruction.
fn call_sig<'b>(&'b self, ir_inst: Inst) -> Option<&'b Signature> {
match &self.f.dfg[ir_inst] {
&InstructionData::Call { func_ref, .. } => {
let funcdata = &self.f.dfg.ext_funcs[func_ref];
Some(&self.f.dfg.signatures[funcdata.signature])
}
&InstructionData::CallIndirect { sig_ref, .. } => Some(&self.f.dfg.signatures[sig_ref]),
_ => None,
}
}
/// Get the symbol name and offset for a symbol_value instruction.
fn symbol_value<'b>(&'b self, ir_inst: Inst) -> Option<(&'b ExternalName, i64)> {
match &self.f.dfg[ir_inst] {
&InstructionData::UnaryGlobalValue { global_value, .. } => {
let gvdata = &self.f.global_values[global_value];
match gvdata {
&GlobalValueData::Symbol {
ref name,
ref offset,
..
} => {
let offset = offset.bits();
Some((name, offset))
}
_ => None,
}
}
_ => None,
}
}
/// Returns the memory flags of a given memory access.
fn memflags(&self, ir_inst: Inst) -> Option<MemFlags> {
match &self.f.dfg[ir_inst] {
&InstructionData::Load { flags, .. }
| &InstructionData::LoadComplex { flags, .. }
| &InstructionData::Store { flags, .. }
| &InstructionData::StoreComplex { flags, .. } => Some(flags),
_ => None,
}
}
/// Get the source location for a given instruction.
fn srcloc(&self, ir_inst: Inst) -> SourceLoc {
self.f.srclocs[ir_inst]
}
}
fn branch_targets(f: &Function, block: Block, inst: Inst) -> SmallVec<[Block; 16]> {
let mut ret = SmallVec::new();
if f.dfg[inst].opcode() == Opcode::Fallthrough {
ret.push(f.layout.next_block(block).unwrap());
} else {
match &f.dfg[inst] {
&InstructionData::Jump { destination, .. }
| &InstructionData::Branch { destination, .. }
| &InstructionData::BranchInt { destination, .. }
| &InstructionData::BranchIcmp { destination, .. }
| &InstructionData::BranchFloat { destination, .. } => {
ret.push(destination);
}
&InstructionData::BranchTable {
destination, table, ..
} => {
ret.push(destination);
for dest in f.jump_tables[table].as_slice() {
ret.push(*dest);
}
}
_ => {}
}
}
ret
}

View File

@@ -0,0 +1,288 @@
//! This module exposes the machine-specific backend definition pieces.
//!
//! The MachInst infrastructure is the compiler backend, from CLIF
//! (ir::Function) to machine code. The purpose of this infrastructure is, at a
//! high level, to do instruction selection/lowering (to machine instructions),
//! register allocation, and then perform all the fixups to branches, constant
//! data references, etc., needed to actually generate machine code.
//!
//! The container for machine instructions, at various stages of construction,
//! is the `VCode` struct. We refer to a sequence of machine instructions organized
//! into basic blocks as "vcode". This is short for "virtual-register code", though
//! it's a bit of a misnomer because near the end of the pipeline, vcode has all
//! real registers. Nevertheless, the name is catchy and we like it.
//!
//! The compilation pipeline, from an `ir::Function` (already optimized as much as
//! you like by machine-independent optimization passes) onward, is as follows.
//! (N.B.: though we show the VCode separately at each stage, the passes
//! mutate the VCode in place; these are not separate copies of the code.)
//!
//! | ir::Function (SSA IR, machine-independent opcodes)
//! | |
//! | | [lower]
//! | |
//! | VCode<arch_backend::Inst> (machine instructions:
//! | | - mostly virtual registers.
//! | | - cond branches in two-target form.
//! | | - branch targets are block indices.
//! | | - in-memory constants held by insns,
//! | | with unknown offsets.
//! | | - critical edges (actually all edges)
//! | | are split.)
//! | | [regalloc]
//! | |
//! | VCode<arch_backend::Inst> (machine instructions:
//! | | - all real registers.
//! | | - new instruction sequence returned
//! | | out-of-band in RegAllocResult.
//! | | - instruction sequence has spills,
//! | | reloads, and moves inserted.
//! | | - other invariants same as above.)
//! | |
//! | | [preamble/postamble]
//! | |
//! | VCode<arch_backend::Inst> (machine instructions:
//! | | - stack-frame size known.
//! | | - out-of-band instruction sequence
//! | | has preamble prepended to entry
//! | | block, and postamble injected before
//! | | every return instruction.
//! | | - all symbolic stack references to
//! | | stackslots and spillslots are resolved
//! | | to concrete FP-offset mem addresses.)
//! | | [block/insn ordering]
//! | |
//! | VCode<arch_backend::Inst> (machine instructions:
//! | | - vcode.final_block_order is filled in.
//! | | - new insn sequence from regalloc is
//! | | placed back into vcode and block
//! | | boundaries are updated.)
//! | | [redundant branch/block
//! | | removal]
//! | |
//! | VCode<arch_backend::Inst> (machine instructions:
//! | | - all blocks that were just an
//! | | unconditional branch are removed.)
//! | |
//! | | [branch finalization
//! | | (fallthroughs)]
//! | |
//! | VCode<arch_backend::Inst> (machine instructions:
//! | | - all branches are in lowered one-
//! | | target form, but targets are still
//! | | block indices.)
//! | |
//! | | [branch finalization
//! | | (offsets)]
//! | |
//! | VCode<arch_backend::Inst> (machine instructions:
//! | | - all branch offsets from start of
//! | | function are known, and all branches
//! | | have resolved-offset targets.)
//! | |
//! | | [MemArg finalization]
//! | |
//! | VCode<arch_backend::Inst> (machine instructions:
//! | | - all MemArg references to the constant
//! | | pool are replaced with offsets.
//! | | - all constant-pool data is collected
//! | | in the VCode.)
//! | |
//! | | [binary emission]
//! | |
//! | Vec<u8> (machine code!)
//! |
#![allow(unused_imports)]
use crate::binemit::{
CodeInfo, CodeOffset, CodeSink, MemoryCodeSink, RelocSink, StackmapSink, TrapSink,
};
use crate::entity::EntityRef;
use crate::entity::SecondaryMap;
use crate::ir::condcodes::IntCC;
use crate::ir::ValueLocations;
use crate::ir::{DataFlowGraph, Function, Inst, Opcode, Type, Value};
use crate::isa::RegUnit;
use crate::result::CodegenResult;
use crate::settings::Flags;
use crate::HashMap;
use alloc::boxed::Box;
use alloc::vec::Vec;
use core::fmt::Debug;
use core::iter::Sum;
use regalloc::Map as RegallocMap;
use regalloc::RegUsageCollector;
use regalloc::{RealReg, RealRegUniverse, Reg, RegClass, SpillSlot, VirtualReg, Writable};
use smallvec::SmallVec;
use std::hash::Hash;
use std::string::String;
use target_lexicon::Triple;
pub mod lower;
pub use lower::*;
pub mod vcode;
pub use vcode::*;
pub mod compile;
pub use compile::*;
pub mod blockorder;
pub use blockorder::*;
pub mod abi;
pub use abi::*;
pub mod pp;
pub use pp::*;
pub mod sections;
pub use sections::*;
pub mod adapter;
pub use adapter::*;
/// A machine instruction.
pub trait MachInst: Clone + Debug {
/// Return the registers referenced by this machine instruction along with
/// the modes of reference (use, def, modify).
fn get_regs(&self, collector: &mut RegUsageCollector);
/// Map virtual registers to physical registers using the given virt->phys
/// maps corresponding to the program points prior to, and after, this instruction.
fn map_regs(
&mut self,
pre_map: &RegallocMap<VirtualReg, RealReg>,
post_map: &RegallocMap<VirtualReg, RealReg>,
);
/// If this is a simple move, return the (source, destination) tuple of registers.
fn is_move(&self) -> Option<(Writable<Reg>, Reg)>;
/// Is this a terminator (branch or ret)? If so, return its type
/// (ret/uncond/cond) and target if applicable.
fn is_term<'a>(&'a self) -> MachTerminator<'a>;
/// Returns true if the instruction is an epilogue placeholder.
fn is_epilogue_placeholder(&self) -> bool;
/// Generate a move.
fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Self;
/// Generate a zero-length no-op.
fn gen_zero_len_nop() -> Self;
/// Possibly operate on a value directly in a spill-slot rather than a
/// register. Useful if the machine has register-memory instruction forms
/// (e.g., add directly from or directly to memory), like x86.
fn maybe_direct_reload(&self, reg: VirtualReg, slot: SpillSlot) -> Option<Self>;
/// Determine a register class to store the given CraneLift type.
fn rc_for_type(ty: Type) -> RegClass;
/// Generate a jump to another target. Used during lowering of
/// control flow.
fn gen_jump(target: BlockIndex) -> Self;
/// Generate a NOP. The `preferred_size` parameter allows the caller to
/// request a NOP of that size, or as close to it as possible. The machine
/// backend may return a NOP whose binary encoding is smaller than the
/// preferred size, but must not return a NOP that is larger. However,
/// the instruction must have a nonzero size.
fn gen_nop(preferred_size: usize) -> Self;
/// Rewrite block targets using the block-target map.
fn with_block_rewrites(&mut self, block_target_map: &[BlockIndex]);
/// Finalize branches once the block order (fallthrough) is known.
fn with_fallthrough_block(&mut self, fallthrough_block: Option<BlockIndex>);
/// Update instruction once block offsets are known. These offsets are
/// relative to the beginning of the function. `targets` is indexed by
/// BlockIndex.
fn with_block_offsets(&mut self, my_offset: CodeOffset, targets: &[CodeOffset]);
/// Get the register universe for this backend.
fn reg_universe() -> RealRegUniverse;
/// Align a basic block offset (from start of function). By default, no
/// alignment occurs.
fn align_basic_block(offset: CodeOffset) -> CodeOffset {
offset
}
}
/// Describes a block terminator (not call) in the vcode, when its branches
/// have not yet been finalized (so a branch may have two targets).
#[derive(Clone, Debug, PartialEq, Eq)]
pub enum MachTerminator<'a> {
/// Not a terminator.
None,
/// A return instruction.
Ret,
/// An unconditional branch to another block.
Uncond(BlockIndex),
/// A conditional branch to one of two other blocks.
Cond(BlockIndex, BlockIndex),
/// An indirect branch with known possible targets.
Indirect(&'a [BlockIndex]),
}
/// A trait describing the ability to encode a MachInst into binary machine code.
pub trait MachInstEmit<O: MachSectionOutput> {
/// Emit the instruction.
fn emit(&self, code: &mut O);
}
/// The result of a `MachBackend::compile_function()` call. Contains machine
/// code (as bytes) and a disassembly, if requested.
pub struct MachCompileResult {
/// Machine code.
pub sections: MachSections,
/// Size of stack frame, in bytes.
pub frame_size: u32,
/// Disassembly, if requested.
pub disasm: Option<String>,
}
impl MachCompileResult {
/// Get a `CodeInfo` describing section sizes from this compilation result.
pub fn code_info(&self) -> CodeInfo {
let code_size = self.sections.total_size();
CodeInfo {
code_size,
jumptables_size: 0,
rodata_size: 0,
total_size: code_size,
}
}
}
/// Top-level machine backend trait, which wraps all monomorphized code and
/// allows a virtual call from the machine-independent `Function::compile()`.
pub trait MachBackend {
/// Compile the given function. Consumes the function.
fn compile_function(
&self,
func: Function,
want_disasm: bool,
) -> CodegenResult<MachCompileResult>;
/// Return flags for this backend.
fn flags(&self) -> &Flags;
/// Return triple for this backend.
fn triple(&self) -> Triple;
/// Return name for this backend.
fn name(&self) -> &'static str;
/// Return the register universe for this backend.
fn reg_universe(&self) -> RealRegUniverse;
/// Machine-specific condcode info needed by TargetIsa.
fn unsigned_add_overflow_condition(&self) -> IntCC {
// TODO: this is what x86 specifies. Is this right for arm64?
IntCC::UnsignedLessThan
}
/// Machine-specific condcode info needed by TargetIsa.
fn unsigned_sub_overflow_condition(&self) -> IntCC {
// TODO: this is what x86 specifies. Is this right for arm64?
IntCC::UnsignedLessThan
}
}

View File

@@ -0,0 +1,66 @@
//! Pretty-printing for machine code (virtual-registerized or final).
use regalloc::{RealRegUniverse, Reg, Writable};
use std::fmt::Debug;
use std::hash::Hash;
use std::string::{String, ToString};
// FIXME: Should this go into regalloc.rs instead?
/// A trait for printing instruction bits and pieces, with the the ability to
/// take a contextualising RealRegUniverse that is used to give proper names to
/// registers.
pub trait ShowWithRRU {
/// Return a string that shows the implementing object in context of the
/// given `RealRegUniverse`, if provided.
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String;
/// The same as |show_rru|, but with an optional hint giving a size in
/// bytes. Its interpretation is object-dependent, and it is intended to
/// pass around enough information to facilitate printing sub-parts of
/// real registers correctly. Objects may ignore size hints that are
/// irrelevant to them.
fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, _size: u8) -> String {
// Default implementation is to ignore the hint.
self.show_rru(mb_rru)
}
}
impl ShowWithRRU for Reg {
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
if self.is_real() {
if let Some(rru) = mb_rru {
let reg_ix = self.get_index();
if reg_ix < rru.regs.len() {
return rru.regs[reg_ix].1.to_string();
} else {
// We have a real reg which isn't listed in the universe.
// Per the regalloc.rs interface requirements, this is
// Totally Not Allowed. Print it generically anyway, so
// we have something to debug.
return format!("!!{:?}!!", self);
}
}
}
// The reg is virtual, or we have no universe. Be generic.
format!("%{:?}", self)
}
fn show_rru_sized(&self, _mb_rru: Option<&RealRegUniverse>, _size: u8) -> String {
// For the specific case of Reg, we demand not to have a size hint,
// since interpretation of the size is target specific, but this code
// is used by all targets.
panic!("Reg::show_rru_sized: impossible to implement");
}
}
impl<R: ShowWithRRU + Copy + Ord + Hash + Eq + Debug> ShowWithRRU for Writable<R> {
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
self.to_reg().show_rru(mb_rru)
}
fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
self.to_reg().show_rru_sized(mb_rru, size)
}
}

View File

@@ -0,0 +1,351 @@
//! In-memory representation of compiled machine code, in multiple sections
//! (text, constant pool / rodata, etc). Emission occurs into multiple sections
//! simultaneously, so we buffer the result in memory and hand off to the
//! caller at the end of compilation.
use crate::binemit::{Addend, CodeOffset, CodeSink, Reloc, RelocSink, StackmapSink, TrapSink};
use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode};
use alloc::vec::Vec;
/// A collection of sections with defined start-offsets.
pub struct MachSections {
/// Sections, in offset order.
pub sections: Vec<MachSection>,
}
impl MachSections {
/// New, empty set of sections.
pub fn new() -> MachSections {
MachSections { sections: vec![] }
}
/// Add a section with a known offset and size. Returns the index.
pub fn add_section(&mut self, start: CodeOffset, length: CodeOffset) -> usize {
let idx = self.sections.len();
self.sections.push(MachSection::new(start, length));
idx
}
/// Mutably borrow the given section by index.
pub fn get_section<'a>(&'a mut self, idx: usize) -> &'a mut MachSection {
&mut self.sections[idx]
}
/// Get mutable borrows of two sections simultaneously. Used during
/// instruction emission to provide references to the .text and .rodata
/// (constant pool) sections.
pub fn two_sections<'a>(
&'a mut self,
idx1: usize,
idx2: usize,
) -> (&'a mut MachSection, &'a mut MachSection) {
assert!(idx1 < idx2);
assert!(idx1 < self.sections.len());
assert!(idx2 < self.sections.len());
let (first, rest) = self.sections.split_at_mut(idx2);
(&mut first[idx1], &mut rest[0])
}
/// Emit this set of sections to a set of sinks for the code,
/// relocations, traps, and stackmap.
pub fn emit<CS: CodeSink>(&self, sink: &mut CS) {
// N.B.: we emit every section into the .text section as far as
// the `CodeSink` is concerned; we do not bother to segregate
// the contents into the actual program text, the jumptable and the
// rodata (constant pool). This allows us to generate code assuming
// that these will not be relocated relative to each other, and avoids
// having to designate each section as belonging in one of the three
// fixed categories defined by `CodeSink`. If this becomes a problem
// later (e.g. because of memory permissions or similar), we can
// add this designation and segregate the output; take care, however,
// to add the appropriate relocations in this case.
for section in &self.sections {
if section.data.len() > 0 {
while sink.offset() < section.start_offset {
sink.put1(0);
}
section.emit(sink);
}
}
sink.begin_jumptables();
sink.begin_rodata();
sink.end_codegen();
}
/// Get the total required size for these sections.
pub fn total_size(&self) -> CodeOffset {
if self.sections.len() == 0 {
0
} else {
// Find the last non-empty section.
self.sections
.iter()
.rev()
.find(|s| s.data.len() > 0)
.map(|s| s.cur_offset_from_start())
.unwrap_or(0)
}
}
}
/// An abstraction over MachSection and MachSectionSize: some
/// receiver of section data.
pub trait MachSectionOutput {
/// Get the current offset from the start of all sections.
fn cur_offset_from_start(&self) -> CodeOffset;
/// Get the start offset of this section.
fn start_offset(&self) -> CodeOffset;
/// Add 1 byte to the section.
fn put1(&mut self, _: u8);
/// Add 2 bytes to the section.
fn put2(&mut self, value: u16) {
self.put1((value & 0xff) as u8);
self.put1(((value >> 8) & 0xff) as u8);
}
/// Add 4 bytes to the section.
fn put4(&mut self, value: u32) {
self.put1((value & 0xff) as u8);
self.put1(((value >> 8) & 0xff) as u8);
self.put1(((value >> 16) & 0xff) as u8);
self.put1(((value >> 24) & 0xff) as u8);
}
/// Add 8 bytes to the section.
fn put8(&mut self, value: u64) {
self.put1((value & 0xff) as u8);
self.put1(((value >> 8) & 0xff) as u8);
self.put1(((value >> 16) & 0xff) as u8);
self.put1(((value >> 24) & 0xff) as u8);
self.put1(((value >> 32) & 0xff) as u8);
self.put1(((value >> 40) & 0xff) as u8);
self.put1(((value >> 48) & 0xff) as u8);
self.put1(((value >> 56) & 0xff) as u8);
}
/// Add a slice of bytes to the section.
fn put_data(&mut self, data: &[u8]);
/// Add a relocation at the current offset.
fn add_reloc(&mut self, loc: SourceLoc, kind: Reloc, name: &ExternalName, addend: Addend);
/// Add a trap record at the current offset.
fn add_trap(&mut self, loc: SourceLoc, code: TrapCode);
/// Add a call return address record at the current offset.
fn add_call_site(&mut self, loc: SourceLoc, opcode: Opcode);
/// Align up to the given alignment.
fn align_to(&mut self, align_to: CodeOffset) {
assert!(align_to.is_power_of_two());
while self.cur_offset_from_start() & (align_to - 1) != 0 {
self.put1(0);
}
}
}
/// A section of output to be emitted to a CodeSink / RelocSink in bulk.
/// Multiple sections may be created with known start offsets in advance; the
/// usual use-case is to create the .text (code) and .rodata (constant pool) at
/// once, after computing the length of the code, so that constant references
/// can use known offsets as instructions are emitted.
pub struct MachSection {
/// The starting offset of this section.
pub start_offset: CodeOffset,
/// The limit of this section, defined by the start of the next section.
pub length_limit: CodeOffset,
/// The section contents, as raw bytes.
pub data: Vec<u8>,
/// Any relocations referring to this section.
pub relocs: Vec<MachReloc>,
/// Any trap records referring to this section.
pub traps: Vec<MachTrap>,
/// Any call site record referring to this section.
pub call_sites: Vec<MachCallSite>,
}
impl MachSection {
/// Create a new section, known to start at `start_offset` and with a size limited to `length_limit`.
pub fn new(start_offset: CodeOffset, length_limit: CodeOffset) -> MachSection {
MachSection {
start_offset,
length_limit,
data: vec![],
relocs: vec![],
traps: vec![],
call_sites: vec![],
}
}
/// Emit this section to the CodeSink and other associated sinks. The
/// current offset of the CodeSink must match the starting offset of this
/// section.
pub fn emit<CS: CodeSink>(&self, sink: &mut CS) {
assert!(sink.offset() == self.start_offset);
let mut next_reloc = 0;
let mut next_trap = 0;
let mut next_call_site = 0;
for (idx, byte) in self.data.iter().enumerate() {
if next_reloc < self.relocs.len() {
let reloc = &self.relocs[next_reloc];
if reloc.offset == idx as CodeOffset {
sink.reloc_external(reloc.srcloc, reloc.kind, &reloc.name, reloc.addend);
next_reloc += 1;
}
}
if next_trap < self.traps.len() {
let trap = &self.traps[next_trap];
if trap.offset == idx as CodeOffset {
sink.trap(trap.code, trap.srcloc);
next_trap += 1;
}
}
if next_call_site < self.call_sites.len() {
let call_site = &self.call_sites[next_call_site];
if call_site.ret_addr == idx as CodeOffset {
sink.add_call_site(call_site.opcode, call_site.srcloc);
next_call_site += 1;
}
}
sink.put1(*byte);
}
}
}
impl MachSectionOutput for MachSection {
fn cur_offset_from_start(&self) -> CodeOffset {
self.start_offset + self.data.len() as CodeOffset
}
fn start_offset(&self) -> CodeOffset {
self.start_offset
}
fn put1(&mut self, value: u8) {
assert!(((self.data.len() + 1) as CodeOffset) <= self.length_limit);
self.data.push(value);
}
fn put_data(&mut self, data: &[u8]) {
assert!(((self.data.len() + data.len()) as CodeOffset) <= self.length_limit);
self.data.extend_from_slice(data);
}
fn add_reloc(&mut self, srcloc: SourceLoc, kind: Reloc, name: &ExternalName, addend: Addend) {
let name = name.clone();
self.relocs.push(MachReloc {
offset: self.data.len() as CodeOffset,
srcloc,
kind,
name,
addend,
});
}
fn add_trap(&mut self, srcloc: SourceLoc, code: TrapCode) {
self.traps.push(MachTrap {
offset: self.data.len() as CodeOffset,
srcloc,
code,
});
}
fn add_call_site(&mut self, srcloc: SourceLoc, opcode: Opcode) {
self.call_sites.push(MachCallSite {
ret_addr: self.data.len() as CodeOffset,
srcloc,
opcode,
});
}
}
/// A MachSectionOutput implementation that records only size.
pub struct MachSectionSize {
/// The starting offset of this section.
pub start_offset: CodeOffset,
/// The current offset of this section.
pub offset: CodeOffset,
}
impl MachSectionSize {
/// Create a new size-counting dummy section.
pub fn new(start_offset: CodeOffset) -> MachSectionSize {
MachSectionSize {
start_offset,
offset: start_offset,
}
}
/// Return the size this section would take if emitted with a real sink.
pub fn size(&self) -> CodeOffset {
self.offset - self.start_offset
}
}
impl MachSectionOutput for MachSectionSize {
fn cur_offset_from_start(&self) -> CodeOffset {
// All size-counting sections conceptually start at offset 0; this doesn't
// matter when counting code size.
self.offset
}
fn start_offset(&self) -> CodeOffset {
self.start_offset
}
fn put1(&mut self, _: u8) {
self.offset += 1;
}
fn put_data(&mut self, data: &[u8]) {
self.offset += data.len() as CodeOffset;
}
fn add_reloc(&mut self, _: SourceLoc, _: Reloc, _: &ExternalName, _: Addend) {}
fn add_trap(&mut self, _: SourceLoc, _: TrapCode) {}
fn add_call_site(&mut self, _: SourceLoc, _: Opcode) {}
}
/// A relocation resulting from a compilation.
pub struct MachReloc {
/// The offset at which the relocation applies, *relative to the
/// containing section*.
pub offset: CodeOffset,
/// The original source location.
pub srcloc: SourceLoc,
/// The kind of relocation.
pub kind: Reloc,
/// The external symbol / name to which this relocation refers.
pub name: ExternalName,
/// The addend to add to the symbol value.
pub addend: i64,
}
/// A trap record resulting from a compilation.
pub struct MachTrap {
/// The offset at which the trap instruction occurs, *relative to the
/// containing section*.
pub offset: CodeOffset,
/// The original source location.
pub srcloc: SourceLoc,
/// The trap code.
pub code: TrapCode,
}
/// A call site record resulting from a compilation.
pub struct MachCallSite {
/// The offset of the call's return address, *relative to the containing section*.
pub ret_addr: CodeOffset,
/// The original source location.
pub srcloc: SourceLoc,
/// The call's opcode.
pub opcode: Opcode,
}

View File

@@ -0,0 +1,738 @@
//! This implements the VCode container: a CFG of Insts that have been lowered.
//!
//! VCode is virtual-register code. An instruction in VCode is almost a machine
//! instruction; however, its register slots can refer to virtual registers in
//! addition to real machine registers.
//!
//! VCode is structured with traditional basic blocks, and
//! each block must be terminated by an unconditional branch (one target), a
//! conditional branch (two targets), or a return (no targets). Note that this
//! slightly differs from the machine code of most ISAs: in most ISAs, a
//! conditional branch has one target (and the not-taken case falls through).
//! However, we expect that machine backends will elide branches to the following
//! block (i.e., zero-offset jumps), and will be able to codegen a branch-cond /
//! branch-uncond pair if *both* targets are not fallthrough. This allows us to
//! play with layout prior to final binary emission, as well, if we want.
//!
//! See the main module comment in `mod.rs` for more details on the VCode-based
//! backend pipeline.
use crate::binemit::Reloc;
use crate::ir;
use crate::machinst::*;
use crate::settings;
use regalloc::Function as RegallocFunction;
use regalloc::Set as RegallocSet;
use regalloc::{BlockIx, InstIx, Range, RegAllocResult, RegClass, RegUsageCollector};
use alloc::boxed::Box;
use alloc::vec::Vec;
use log::debug;
use smallvec::SmallVec;
use std::fmt;
use std::iter;
use std::ops::Index;
use std::string::String;
/// Index referring to an instruction in VCode.
pub type InsnIndex = u32;
/// Index referring to a basic block in VCode.
pub type BlockIndex = u32;
/// VCodeInst wraps all requirements for a MachInst to be in VCode: it must be
/// a `MachInst` and it must be able to emit itself at least to a `SizeCodeSink`.
pub trait VCodeInst: MachInst + MachInstEmit<MachSection> + MachInstEmit<MachSectionSize> {}
impl<I: MachInst + MachInstEmit<MachSection> + MachInstEmit<MachSectionSize>> VCodeInst for I {}
/// A function in "VCode" (virtualized-register code) form, after lowering.
/// This is essentially a standard CFG of basic blocks, where each basic block
/// consists of lowered instructions produced by the machine-specific backend.
pub struct VCode<I: VCodeInst> {
/// Function liveins.
liveins: RegallocSet<RealReg>,
/// Function liveouts.
liveouts: RegallocSet<RealReg>,
/// VReg IR-level types.
vreg_types: Vec<Type>,
/// Lowered machine instructions in order corresponding to the original IR.
pub insts: Vec<I>,
/// Entry block.
entry: BlockIndex,
/// Block instruction indices.
pub block_ranges: Vec<(InsnIndex, InsnIndex)>,
/// Block successors: index range in the successor-list below.
block_succ_range: Vec<(usize, usize)>,
/// Block successor lists, concatenated into one Vec. The `block_succ_range`
/// list of tuples above gives (start, end) ranges within this list that
/// correspond to each basic block's successors.
block_succs: Vec<BlockIndex>,
/// Block indices by IR block.
block_by_bb: SecondaryMap<ir::Block, BlockIndex>,
/// IR block for each VCode Block. The length of this Vec will likely be
/// less than the total number of Blocks, because new Blocks (for edge
/// splits, for example) are appended during lowering.
bb_by_block: Vec<ir::Block>,
/// Order of block IDs in final generated code.
final_block_order: Vec<BlockIndex>,
/// Final block offsets. Computed during branch finalization and used
/// during emission.
final_block_offsets: Vec<CodeOffset>,
/// Size of code, accounting for block layout / alignment.
code_size: CodeOffset,
/// ABI object.
abi: Box<dyn ABIBody<I>>,
}
/// A builder for a VCode function body. This builder is designed for the
/// lowering approach that we take: we traverse basic blocks in forward
/// (original IR) order, but within each basic block, we generate code from
/// bottom to top; and within each IR instruction that we visit in this reverse
/// order, we emit machine instructions in *forward* order again.
///
/// Hence, to produce the final instructions in proper order, we perform two
/// swaps. First, the machine instructions (`I` instances) are produced in
/// forward order for an individual IR instruction. Then these are *reversed*
/// and concatenated to `bb_insns` at the end of the IR instruction lowering.
/// The `bb_insns` vec will thus contain all machine instructions for a basic
/// block, in reverse order. Finally, when we're done with a basic block, we
/// reverse the whole block's vec of instructions again, and concatenate onto
/// the VCode's insts.
pub struct VCodeBuilder<I: VCodeInst> {
/// In-progress VCode.
vcode: VCode<I>,
/// Current basic block instructions, in reverse order (because blocks are
/// built bottom-to-top).
bb_insns: SmallVec<[I; 32]>,
/// Current IR-inst instructions, in forward order.
ir_inst_insns: SmallVec<[I; 4]>,
/// Start of succs for the current block in the concatenated succs list.
succ_start: usize,
}
impl<I: VCodeInst> VCodeBuilder<I> {
/// Create a new VCodeBuilder.
pub fn new(abi: Box<dyn ABIBody<I>>) -> VCodeBuilder<I> {
let vcode = VCode::new(abi);
VCodeBuilder {
vcode,
bb_insns: SmallVec::new(),
ir_inst_insns: SmallVec::new(),
succ_start: 0,
}
}
/// Access the ABI object.
pub fn abi(&mut self) -> &mut dyn ABIBody<I> {
&mut *self.vcode.abi
}
/// Set the type of a VReg.
pub fn set_vreg_type(&mut self, vreg: VirtualReg, ty: Type) {
while self.vcode.vreg_types.len() <= vreg.get_index() {
self.vcode.vreg_types.push(ir::types::I8); // Default type.
}
self.vcode.vreg_types[vreg.get_index()] = ty;
}
/// Return the underlying bb-to-BlockIndex map.
pub fn blocks_by_bb(&self) -> &SecondaryMap<ir::Block, BlockIndex> {
&self.vcode.block_by_bb
}
/// Initialize the bb-to-BlockIndex map. Returns the first free
/// BlockIndex.
pub fn init_bb_map(&mut self, blocks: &[ir::Block]) -> BlockIndex {
let mut bindex: BlockIndex = 0;
for bb in blocks.iter() {
self.vcode.block_by_bb[*bb] = bindex;
self.vcode.bb_by_block.push(*bb);
bindex += 1;
}
bindex
}
/// Get the BlockIndex for an IR block.
pub fn bb_to_bindex(&self, bb: ir::Block) -> BlockIndex {
self.vcode.block_by_bb[bb]
}
/// Set the current block as the entry block.
pub fn set_entry(&mut self, block: BlockIndex) {
self.vcode.entry = block;
}
/// End the current IR instruction. Must be called after pushing any
/// instructions and prior to ending the basic block.
pub fn end_ir_inst(&mut self) {
while let Some(i) = self.ir_inst_insns.pop() {
self.bb_insns.push(i);
}
}
/// End the current basic block. Must be called after emitting vcode insts
/// for IR insts and prior to ending the function (building the VCode).
pub fn end_bb(&mut self) -> BlockIndex {
assert!(self.ir_inst_insns.is_empty());
let block_num = self.vcode.block_ranges.len() as BlockIndex;
// Push the instructions.
let start_idx = self.vcode.insts.len() as InsnIndex;
while let Some(i) = self.bb_insns.pop() {
self.vcode.insts.push(i);
}
let end_idx = self.vcode.insts.len() as InsnIndex;
// Add the instruction index range to the list of blocks.
self.vcode.block_ranges.push((start_idx, end_idx));
// End the successors list.
let succ_end = self.vcode.block_succs.len();
self.vcode
.block_succ_range
.push((self.succ_start, succ_end));
self.succ_start = succ_end;
block_num
}
/// Push an instruction for the current BB and current IR inst within the BB.
pub fn push(&mut self, insn: I) {
match insn.is_term() {
MachTerminator::None | MachTerminator::Ret => {}
MachTerminator::Uncond(target) => {
self.vcode.block_succs.push(target);
}
MachTerminator::Cond(true_branch, false_branch) => {
self.vcode.block_succs.push(true_branch);
self.vcode.block_succs.push(false_branch);
}
MachTerminator::Indirect(targets) => {
for target in targets {
self.vcode.block_succs.push(*target);
}
}
}
self.ir_inst_insns.push(insn);
}
/// Build the final VCode.
pub fn build(self) -> VCode<I> {
assert!(self.ir_inst_insns.is_empty());
assert!(self.bb_insns.is_empty());
self.vcode
}
}
fn block_ranges(indices: &[InstIx], len: usize) -> Vec<(usize, usize)> {
let v = indices
.iter()
.map(|iix| iix.get() as usize)
.chain(iter::once(len))
.collect::<Vec<usize>>();
v.windows(2).map(|p| (p[0], p[1])).collect()
}
fn is_redundant_move<I: VCodeInst>(insn: &I) -> bool {
if let Some((to, from)) = insn.is_move() {
to.to_reg() == from
} else {
false
}
}
fn is_trivial_jump_block<I: VCodeInst>(vcode: &VCode<I>, block: BlockIndex) -> Option<BlockIndex> {
let range = vcode.block_insns(BlockIx::new(block));
debug!(
"is_trivial_jump_block: block {} has len {}",
block,
range.len()
);
if range.len() != 1 {
return None;
}
let insn = range.first();
debug!(
" -> only insn is: {:?} with terminator {:?}",
vcode.get_insn(insn),
vcode.get_insn(insn).is_term()
);
match vcode.get_insn(insn).is_term() {
MachTerminator::Uncond(target) => Some(target),
_ => None,
}
}
impl<I: VCodeInst> VCode<I> {
/// New empty VCode.
fn new(abi: Box<dyn ABIBody<I>>) -> VCode<I> {
VCode {
liveins: abi.liveins(),
liveouts: abi.liveouts(),
vreg_types: vec![],
insts: vec![],
entry: 0,
block_ranges: vec![],
block_succ_range: vec![],
block_succs: vec![],
block_by_bb: SecondaryMap::with_default(0),
bb_by_block: vec![],
final_block_order: vec![],
final_block_offsets: vec![],
code_size: 0,
abi,
}
}
/// Get the IR-level type of a VReg.
pub fn vreg_type(&self, vreg: VirtualReg) -> Type {
self.vreg_types[vreg.get_index()]
}
/// Get the entry block.
pub fn entry(&self) -> BlockIndex {
self.entry
}
/// Get the number of blocks. Block indices will be in the range `0 ..
/// (self.num_blocks() - 1)`.
pub fn num_blocks(&self) -> usize {
self.block_ranges.len()
}
/// Stack frame size for the full function's body.
pub fn frame_size(&self) -> u32 {
self.abi.frame_size()
}
/// Get the successors for a block.
pub fn succs(&self, block: BlockIndex) -> &[BlockIndex] {
let (start, end) = self.block_succ_range[block as usize];
&self.block_succs[start..end]
}
/// Take the results of register allocation, with a sequence of
/// instructions including spliced fill/reload/move instructions, and replace
/// the VCode with them.
pub fn replace_insns_from_regalloc(
&mut self,
result: RegAllocResult<Self>,
flags: &settings::Flags,
) {
self.final_block_order = compute_final_block_order(self);
// Record the spillslot count and clobbered registers for the ABI/stack
// setup code.
self.abi.set_num_spillslots(result.num_spill_slots as usize);
self.abi
.set_clobbered(result.clobbered_registers.map(|r| Writable::from_reg(*r)));
// We want to move instructions over in final block order, using the new
// block-start map given by the regalloc.
let block_ranges: Vec<(usize, usize)> =
block_ranges(result.target_map.elems(), result.insns.len());
let mut final_insns = vec![];
let mut final_block_ranges = vec![(0, 0); self.num_blocks()];
for block in &self.final_block_order {
let (start, end) = block_ranges[*block as usize];
let final_start = final_insns.len() as InsnIndex;
if *block == self.entry {
// Start with the prologue.
final_insns.extend(self.abi.gen_prologue(flags).into_iter());
}
for i in start..end {
let insn = &result.insns[i];
// Elide redundant moves at this point (we only know what is
// redundant once registers are allocated).
if is_redundant_move(insn) {
continue;
}
// Whenever encountering a return instruction, replace it
// with the epilogue.
let is_ret = insn.is_term() == MachTerminator::Ret;
if is_ret {
final_insns.extend(self.abi.gen_epilogue(flags).into_iter());
} else {
final_insns.push(insn.clone());
}
}
let final_end = final_insns.len() as InsnIndex;
final_block_ranges[*block as usize] = (final_start, final_end);
}
self.insts = final_insns;
self.block_ranges = final_block_ranges;
}
/// Removes redundant branches, rewriting targets to point directly to the
/// ultimate block at the end of a chain of trivial one-target jumps.
pub fn remove_redundant_branches(&mut self) {
// For each block, compute the actual target block, looking through up to one
// block with single-target jumps (this will remove empty edge blocks inserted
// by phi-lowering).
let block_rewrites: Vec<BlockIndex> = (0..self.num_blocks() as u32)
.map(|bix| is_trivial_jump_block(self, bix).unwrap_or(bix))
.collect();
let mut refcounts: Vec<usize> = vec![0; self.num_blocks()];
debug!(
"remove_redundant_branches: block_rewrites = {:?}",
block_rewrites
);
refcounts[self.entry as usize] = 1;
for block in 0..self.num_blocks() as u32 {
for insn in self.block_insns(BlockIx::new(block)) {
self.get_insn_mut(insn)
.with_block_rewrites(&block_rewrites[..]);
match self.get_insn(insn).is_term() {
MachTerminator::Uncond(bix) => {
refcounts[bix as usize] += 1;
}
MachTerminator::Cond(bix1, bix2) => {
refcounts[bix1 as usize] += 1;
refcounts[bix2 as usize] += 1;
}
MachTerminator::Indirect(blocks) => {
for block in blocks {
refcounts[*block as usize] += 1;
}
}
_ => {}
}
}
}
let deleted: Vec<bool> = refcounts.iter().map(|r| *r == 0).collect();
let block_order = std::mem::replace(&mut self.final_block_order, vec![]);
self.final_block_order = block_order
.into_iter()
.filter(|b| !deleted[*b as usize])
.collect();
// Rewrite successor information based on the block-rewrite map.
for succ in &mut self.block_succs {
let new_succ = block_rewrites[*succ as usize];
*succ = new_succ;
}
}
/// Mutate branch instructions to (i) lower two-way condbrs to one-way,
/// depending on fallthrough; and (ii) use concrete offsets.
pub fn finalize_branches(&mut self)
where
I: MachInstEmit<MachSectionSize>,
{
// Compute fallthrough block, indexed by block.
let num_final_blocks = self.final_block_order.len();
let mut block_fallthrough: Vec<Option<BlockIndex>> = vec![None; self.num_blocks()];
for i in 0..(num_final_blocks - 1) {
let from = self.final_block_order[i];
let to = self.final_block_order[i + 1];
block_fallthrough[from as usize] = Some(to);
}
// Pass over VCode instructions and finalize two-way branches into
// one-way branches with fallthrough.
for block in 0..self.num_blocks() {
let next_block = block_fallthrough[block];
let (start, end) = self.block_ranges[block];
for iix in start..end {
let insn = &mut self.insts[iix as usize];
insn.with_fallthrough_block(next_block);
}
}
// Compute block offsets.
let mut code_section = MachSectionSize::new(0);
let mut block_offsets = vec![0; self.num_blocks()];
for block in &self.final_block_order {
code_section.offset = I::align_basic_block(code_section.offset);
block_offsets[*block as usize] = code_section.offset;
let (start, end) = self.block_ranges[*block as usize];
for iix in start..end {
self.insts[iix as usize].emit(&mut code_section);
}
}
// We now have the section layout.
self.final_block_offsets = block_offsets;
self.code_size = code_section.size();
// Update branches with known block offsets. This looks like the
// traversal above, but (i) does not update block_offsets, rather uses
// it (so forward references are now possible), and (ii) mutates the
// instructions.
let mut code_section = MachSectionSize::new(0);
for block in &self.final_block_order {
code_section.offset = I::align_basic_block(code_section.offset);
let (start, end) = self.block_ranges[*block as usize];
for iix in start..end {
self.insts[iix as usize]
.with_block_offsets(code_section.offset, &self.final_block_offsets[..]);
self.insts[iix as usize].emit(&mut code_section);
}
}
}
/// Emit the instructions to a list of sections.
pub fn emit(&self) -> MachSections
where
I: MachInstEmit<MachSection>,
{
let mut sections = MachSections::new();
let code_idx = sections.add_section(0, self.code_size);
let code_section = sections.get_section(code_idx);
for block in &self.final_block_order {
let new_offset = I::align_basic_block(code_section.cur_offset_from_start());
while new_offset > code_section.cur_offset_from_start() {
// Pad with NOPs up to the aligned block offset.
let nop = I::gen_nop((new_offset - code_section.cur_offset_from_start()) as usize);
nop.emit(code_section);
}
assert_eq!(code_section.cur_offset_from_start(), new_offset);
let (start, end) = self.block_ranges[*block as usize];
for iix in start..end {
self.insts[iix as usize].emit(code_section);
}
}
sections
}
/// Get the IR block for a BlockIndex, if one exists.
pub fn bindex_to_bb(&self, block: BlockIndex) -> Option<ir::Block> {
if (block as usize) < self.bb_by_block.len() {
Some(self.bb_by_block[block as usize])
} else {
None
}
}
}
impl<I: VCodeInst> RegallocFunction for VCode<I> {
type Inst = I;
fn insns(&self) -> &[I] {
&self.insts[..]
}
fn insns_mut(&mut self) -> &mut [I] {
&mut self.insts[..]
}
fn get_insn(&self, insn: InstIx) -> &I {
&self.insts[insn.get() as usize]
}
fn get_insn_mut(&mut self, insn: InstIx) -> &mut I {
&mut self.insts[insn.get() as usize]
}
fn blocks(&self) -> Range<BlockIx> {
Range::new(BlockIx::new(0), self.block_ranges.len())
}
fn entry_block(&self) -> BlockIx {
BlockIx::new(self.entry)
}
fn block_insns(&self, block: BlockIx) -> Range<InstIx> {
let (start, end) = self.block_ranges[block.get() as usize];
Range::new(InstIx::new(start), (end - start) as usize)
}
fn block_succs(&self, block: BlockIx) -> Vec<BlockIx> {
let (start, end) = self.block_succ_range[block.get() as usize];
self.block_succs[start..end]
.iter()
.cloned()
.map(BlockIx::new)
.collect()
}
fn is_ret(&self, insn: InstIx) -> bool {
match self.insts[insn.get() as usize].is_term() {
MachTerminator::Ret => true,
_ => false,
}
}
fn get_regs(insn: &I, collector: &mut RegUsageCollector) {
insn.get_regs(collector)
}
fn map_regs(
insn: &mut I,
pre_map: &RegallocMap<VirtualReg, RealReg>,
post_map: &RegallocMap<VirtualReg, RealReg>,
) {
insn.map_regs(pre_map, post_map);
}
fn is_move(&self, insn: &I) -> Option<(Writable<Reg>, Reg)> {
insn.is_move()
}
fn get_spillslot_size(&self, regclass: RegClass, vreg: VirtualReg) -> u32 {
let ty = self.vreg_type(vreg);
self.abi.get_spillslot_size(regclass, ty)
}
fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, vreg: VirtualReg) -> I {
let ty = self.vreg_type(vreg);
self.abi.gen_spill(to_slot, from_reg, ty)
}
fn gen_reload(&self, to_reg: Writable<RealReg>, from_slot: SpillSlot, vreg: VirtualReg) -> I {
let ty = self.vreg_type(vreg);
self.abi.gen_reload(to_reg, from_slot, ty)
}
fn gen_move(&self, to_reg: Writable<RealReg>, from_reg: RealReg, vreg: VirtualReg) -> I {
let ty = self.vreg_type(vreg);
I::gen_move(to_reg.map(|r| r.to_reg()), from_reg.to_reg(), ty)
}
fn gen_zero_len_nop(&self) -> I {
I::gen_zero_len_nop()
}
fn maybe_direct_reload(&self, insn: &I, reg: VirtualReg, slot: SpillSlot) -> Option<I> {
insn.maybe_direct_reload(reg, slot)
}
fn func_liveins(&self) -> RegallocSet<RealReg> {
self.liveins.clone()
}
fn func_liveouts(&self) -> RegallocSet<RealReg> {
self.liveouts.clone()
}
}
// N.B.: Debug impl assumes that VCode has already been through all compilation
// passes, and so has a final block order and offsets.
impl<I: VCodeInst> fmt::Debug for VCode<I> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
writeln!(f, "VCode_Debug {{")?;
writeln!(f, " Entry block: {}", self.entry)?;
writeln!(f, " Final block order: {:?}", self.final_block_order)?;
for block in 0..self.num_blocks() {
writeln!(f, "Block {}:", block,)?;
for succ in self.succs(block as BlockIndex) {
writeln!(f, " (successor: Block {})", succ)?;
}
let (start, end) = self.block_ranges[block];
writeln!(f, " (instruction range: {} .. {})", start, end)?;
for inst in start..end {
writeln!(f, " Inst {}: {:?}", inst, self.insts[inst as usize])?;
}
}
writeln!(f, "}}")?;
Ok(())
}
}
// Pretty-printing with `RealRegUniverse` context.
impl<I: VCodeInst + ShowWithRRU> ShowWithRRU for VCode<I> {
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
use crate::alloc::string::ToString;
use std::fmt::Write;
// Calculate an order in which to display the blocks. This is the same
// as final_block_order, but also includes blocks which are in the
// representation but not in final_block_order.
let mut display_order = Vec::<usize>::new();
// First display blocks in |final_block_order|
for bix in &self.final_block_order {
assert!((*bix as usize) < self.num_blocks());
display_order.push(*bix as usize);
}
// Now also take care of those not listed in |final_block_order|.
// This is quadratic, but it's also debug-only code.
for bix in 0..self.num_blocks() {
if display_order.contains(&bix) {
continue;
}
display_order.push(bix);
}
let mut s = String::new();
s = s + &format!("VCode_ShowWithRRU {{{{");
s = s + &"\n".to_string();
s = s + &format!(" Entry block: {}", self.entry);
s = s + &"\n".to_string();
s = s + &format!(" Final block order: {:?}", self.final_block_order);
s = s + &"\n".to_string();
for i in 0..self.num_blocks() {
let block = display_order[i];
let omitted =
(if !self.final_block_order.is_empty() && i >= self.final_block_order.len() {
"** OMITTED **"
} else {
""
})
.to_string();
s = s + &format!("Block {}: {}", block, omitted);
s = s + &"\n".to_string();
if let Some(bb) = self.bindex_to_bb(block as BlockIndex) {
s = s + &format!(" (original IR block: {})\n", bb);
}
for succ in self.succs(block as BlockIndex) {
s = s + &format!(" (successor: Block {})", succ);
s = s + &"\n".to_string();
}
let (start, end) = self.block_ranges[block];
s = s + &format!(" (instruction range: {} .. {})", start, end);
s = s + &"\n".to_string();
for inst in start..end {
s = s + &format!(
" Inst {}: {}",
inst,
self.insts[inst as usize].show_rru(mb_rru)
);
s = s + &"\n".to_string();
}
}
s = s + &format!("}}}}");
s = s + &"\n".to_string();
s
}
}

View File

@@ -0,0 +1,68 @@
//! A pass that computes the number of uses of any given instruction.
#![allow(dead_code)]
#![allow(unused_imports)]
use crate::cursor::{Cursor, FuncCursor};
use crate::dce::has_side_effect;
use crate::entity::SecondaryMap;
use crate::ir::dfg::ValueDef;
use crate::ir::instructions::InstructionData;
use crate::ir::Value;
use crate::ir::{DataFlowGraph, Function, Inst, Opcode};
/// Auxiliary data structure that counts the number of uses of any given
/// instruction in a Function. This is used during instruction selection
/// to essentially do incremental DCE: when an instruction is no longer
/// needed because its computation has been isel'd into another machine
/// instruction at every use site, we can skip it.
#[derive(Clone, Debug)]
pub struct NumUses {
uses: SecondaryMap<Inst, u32>,
}
impl NumUses {
fn new() -> NumUses {
NumUses {
uses: SecondaryMap::with_default(0),
}
}
/// Compute the NumUses analysis result for a function.
pub fn compute(func: &Function) -> NumUses {
let mut uses = NumUses::new();
for bb in func.layout.blocks() {
for inst in func.layout.block_insts(bb) {
for arg in func.dfg.inst_args(inst) {
let v = func.dfg.resolve_aliases(*arg);
uses.add_value(&func.dfg, v);
}
}
}
uses
}
fn add_value(&mut self, dfg: &DataFlowGraph, v: Value) {
match dfg.value_def(v) {
ValueDef::Result(inst, _) => {
self.uses[inst] += 1;
}
_ => {}
}
}
/// How many times is an instruction used?
pub fn use_count(&self, i: Inst) -> usize {
self.uses[i] as usize
}
/// Is an instruction used at all?
pub fn is_used(&self, i: Inst) -> bool {
self.use_count(i) > 0
}
/// Take the complete uses map, consuming this analysis result.
pub fn take_uses(self) -> SecondaryMap<Inst, u32> {
self.uses
}
}