ARM64 backend, part 3 / 11: MachInst infrastructure.
This patch adds the MachInst, or Machine Instruction, infrastructure. This is the machine-independent portion of the new backend design. It contains the implementation of the "vcode" (virtual-registerized code) container, the top-level lowering algorithm and compilation pipeline, and the trait definitions that the machine backends will fill in. This backend infrastructure is included in the compilation of the `codegen` crate, but it is not yet tied into the public APIs; that patch will come last, after all the other pieces are filled in. This patch contains code written by Julian Seward <jseward@acm.org> and Benjamin Bouvier <public@benj.me>, originally developed on a side-branch before rebasing and condensing into this patch series. See the `arm64` branch at `https://github.com/cfallin/wasmtime` for original development history. Co-authored-by: Julian Seward <jseward@acm.org> Co-authored-by: Benjamin Bouvier <public@benj.me>
This commit is contained in:
17
Cargo.lock
generated
17
Cargo.lock
generated
@@ -379,6 +379,7 @@ dependencies = [
|
||||
"gimli",
|
||||
"hashbrown 0.7.1",
|
||||
"log",
|
||||
"regalloc",
|
||||
"serde",
|
||||
"smallvec",
|
||||
"target-lexicon",
|
||||
@@ -1599,6 +1600,16 @@ dependencies = [
|
||||
"rust-argon2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regalloc"
|
||||
version = "0.0.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "89ce0cd835fa6e91bbf5d010beee19d0c2e97e4ad5e13c399a31122cfc83bdd6"
|
||||
dependencies = [
|
||||
"log",
|
||||
"rustc-hash",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.3.6"
|
||||
@@ -1663,6 +1674,12 @@ version = "0.1.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4c691c0e608126e00913e33f0ccf3727d5fc84573623b8d65b2df340b5201783"
|
||||
|
||||
[[package]]
|
||||
name = "rustc-hash"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
|
||||
|
||||
[[package]]
|
||||
name = "rustc_version"
|
||||
version = "0.2.3"
|
||||
|
||||
@@ -24,6 +24,7 @@ gimli = { version = "0.20.0", default-features = false, features = ["write"], op
|
||||
smallvec = { version = "1.0.0" }
|
||||
thiserror = "1.0.4"
|
||||
byteorder = { version = "1.3.2", default-features = false }
|
||||
regalloc = "0.0.17"
|
||||
# It is a goal of the cranelift-codegen crate to have minimal external dependencies.
|
||||
# Please don't add any unless they are essential to the task of creating binary
|
||||
# machine code. Integration tests that need external dependencies can be
|
||||
@@ -33,7 +34,7 @@ byteorder = { version = "1.3.2", default-features = false }
|
||||
cranelift-codegen-meta = { path = "meta", version = "0.62.0" }
|
||||
|
||||
[features]
|
||||
default = ["std", "unwind"]
|
||||
default = ["std", "unwind", "all-arch"]
|
||||
|
||||
# The "std" feature enables use of libstd. The "core" feature enables use
|
||||
# of some minimal std-like replacement libraries. At least one of these two
|
||||
|
||||
@@ -55,9 +55,10 @@ pub use crate::isa::stack::{StackBase, StackBaseMask, StackRef};
|
||||
use crate::binemit;
|
||||
use crate::flowgraph;
|
||||
use crate::ir;
|
||||
use crate::isa::enc_tables::Encodings;
|
||||
pub use crate::isa::enc_tables::Encodings;
|
||||
#[cfg(feature = "unwind")]
|
||||
use crate::isa::fde::RegisterMappingError;
|
||||
use crate::machinst::MachBackend;
|
||||
use crate::regalloc;
|
||||
use crate::result::CodegenResult;
|
||||
use crate::settings;
|
||||
@@ -400,6 +401,11 @@ pub trait TargetIsa: fmt::Display + Send + Sync {
|
||||
) {
|
||||
// No-op by default
|
||||
}
|
||||
|
||||
/// Get the new-style MachBackend, if this is an adapter around one.
|
||||
fn get_mach_backend(&self) -> Option<&dyn MachBackend> {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for &dyn TargetIsa {
|
||||
|
||||
@@ -71,6 +71,7 @@ pub mod flowgraph;
|
||||
pub mod ir;
|
||||
pub mod isa;
|
||||
pub mod loop_analysis;
|
||||
pub mod machinst;
|
||||
pub mod print_errors;
|
||||
pub mod settings;
|
||||
pub mod timing;
|
||||
@@ -90,6 +91,7 @@ mod iterators;
|
||||
mod legalizer;
|
||||
mod licm;
|
||||
mod nan_canonicalization;
|
||||
mod num_uses;
|
||||
mod partition_slice;
|
||||
mod postopt;
|
||||
mod predicates;
|
||||
|
||||
142
cranelift/codegen/src/machinst/abi.rs
Normal file
142
cranelift/codegen/src/machinst/abi.rs
Normal file
@@ -0,0 +1,142 @@
|
||||
//! ABI definitions.
|
||||
|
||||
use crate::ir;
|
||||
use crate::ir::StackSlot;
|
||||
use crate::machinst::*;
|
||||
use crate::settings;
|
||||
|
||||
use regalloc::{Reg, Set, SpillSlot, VirtualReg, Writable};
|
||||
|
||||
/// Trait implemented by an object that tracks ABI-related state (e.g., stack
|
||||
/// layout) and can generate code while emitting the *body* of a function.
|
||||
pub trait ABIBody<I: VCodeInst> {
|
||||
/// Get the liveins of the function.
|
||||
fn liveins(&self) -> Set<RealReg>;
|
||||
|
||||
/// Get the liveouts of the function.
|
||||
fn liveouts(&self) -> Set<RealReg>;
|
||||
|
||||
/// Number of arguments.
|
||||
fn num_args(&self) -> usize;
|
||||
|
||||
/// Number of return values.
|
||||
fn num_retvals(&self) -> usize;
|
||||
|
||||
/// Number of stack slots (not spill slots).
|
||||
fn num_stackslots(&self) -> usize;
|
||||
|
||||
/// Generate an instruction which copies an argument to a destination
|
||||
/// register.
|
||||
fn gen_copy_arg_to_reg(&self, idx: usize, into_reg: Writable<Reg>) -> I;
|
||||
|
||||
/// Generate an instruction which copies a source register to a return
|
||||
/// value slot.
|
||||
fn gen_copy_reg_to_retval(&self, idx: usize, from_reg: Reg) -> I;
|
||||
|
||||
/// Generate a return instruction.
|
||||
fn gen_ret(&self) -> I;
|
||||
|
||||
/// Generate an epilogue placeholder.
|
||||
fn gen_epilogue_placeholder(&self) -> I;
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
// Every function above this line may only be called pre-regalloc.
|
||||
// Every function below this line may only be called post-regalloc.
|
||||
// `spillslots()` must be called before any other post-regalloc
|
||||
// function.
|
||||
// ----------------------------------------------------------------
|
||||
|
||||
/// Update with the number of spillslots, post-regalloc.
|
||||
fn set_num_spillslots(&mut self, slots: usize);
|
||||
|
||||
/// Update with the clobbered registers, post-regalloc.
|
||||
fn set_clobbered(&mut self, clobbered: Set<Writable<RealReg>>);
|
||||
|
||||
/// Load from a stackslot.
|
||||
fn load_stackslot(
|
||||
&self,
|
||||
slot: StackSlot,
|
||||
offset: usize,
|
||||
ty: Type,
|
||||
into_reg: Writable<Reg>,
|
||||
) -> I;
|
||||
|
||||
/// Store to a stackslot.
|
||||
fn store_stackslot(&self, slot: StackSlot, offset: usize, ty: Type, from_reg: Reg) -> I;
|
||||
|
||||
/// Load from a spillslot.
|
||||
fn load_spillslot(&self, slot: SpillSlot, ty: Type, into_reg: Writable<Reg>) -> I;
|
||||
|
||||
/// Store to a spillslot.
|
||||
fn store_spillslot(&self, slot: SpillSlot, ty: Type, from_reg: Reg) -> I;
|
||||
|
||||
/// Generate a prologue, post-regalloc. This should include any stack
|
||||
/// frame or other setup necessary to use the other methods (`load_arg`,
|
||||
/// `store_retval`, and spillslot accesses.) |self| is mutable so that we
|
||||
/// can store information in it which will be useful when creating the
|
||||
/// epilogue.
|
||||
fn gen_prologue(&mut self, flags: &settings::Flags) -> Vec<I>;
|
||||
|
||||
/// Generate an epilogue, post-regalloc. Note that this must generate the
|
||||
/// actual return instruction (rather than emitting this in the lowering
|
||||
/// logic), because the epilogue code comes before the return and the two are
|
||||
/// likely closely related.
|
||||
fn gen_epilogue(&self, flags: &settings::Flags) -> Vec<I>;
|
||||
|
||||
/// Returns the full frame size for the given function, after prologue emission has run. This
|
||||
/// comprises the spill space, incoming argument space, alignment padding, etc.
|
||||
fn frame_size(&self) -> u32;
|
||||
|
||||
/// Get the spill-slot size.
|
||||
fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32;
|
||||
|
||||
/// Generate a spill.
|
||||
fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Type) -> I;
|
||||
|
||||
/// Generate a reload (fill).
|
||||
fn gen_reload(&self, to_reg: Writable<RealReg>, from_slot: SpillSlot, ty: Type) -> I;
|
||||
}
|
||||
|
||||
/// Trait implemented by an object that tracks ABI-related state and can
|
||||
/// generate code while emitting a *call* to a function.
|
||||
///
|
||||
/// An instance of this trait returns information for a *particular*
|
||||
/// callsite. It will usually be computed from the called function's
|
||||
/// signature.
|
||||
///
|
||||
/// Unlike `ABIBody` above, methods on this trait are not invoked directly
|
||||
/// by the machine-independent code. Rather, the machine-specific lowering
|
||||
/// code will typically create an `ABICall` when creating machine instructions
|
||||
/// for an IR call instruction inside `lower()`, directly emit the arg and
|
||||
/// and retval copies, and attach the register use/def info to the call.
|
||||
///
|
||||
/// This trait is thus provided for convenience to the backends.
|
||||
pub trait ABICall<I: VCodeInst> {
|
||||
/// Get the number of arguments expected.
|
||||
fn num_args(&self) -> usize;
|
||||
|
||||
/// Save the clobbered registers.
|
||||
/// Copy an argument value from a source register, prior to the call.
|
||||
fn gen_copy_reg_to_arg(&self, idx: usize, from_reg: Reg) -> I;
|
||||
|
||||
/// Copy a return value into a destination register, after the call returns.
|
||||
fn gen_copy_retval_to_reg(&self, idx: usize, into_reg: Writable<Reg>) -> I;
|
||||
|
||||
/// Pre-adjust the stack, prior to argument copies and call.
|
||||
fn gen_stack_pre_adjust(&self) -> Vec<I>;
|
||||
|
||||
/// Post-adjust the satck, after call return and return-value copies.
|
||||
fn gen_stack_post_adjust(&self) -> Vec<I>;
|
||||
|
||||
/// Generate the call itself.
|
||||
///
|
||||
/// The returned instruction should have proper use- and def-sets according
|
||||
/// to the argument registers, return-value registers, and clobbered
|
||||
/// registers for this function signature in this ABI.
|
||||
///
|
||||
/// (Arg registers are uses, and retval registers are defs. Clobbered
|
||||
/// registers are also logically defs, but should never be read; their
|
||||
/// values are "defined" (to the regalloc) but "undefined" in every other
|
||||
/// sense.)
|
||||
fn gen_call(&self) -> Vec<I>;
|
||||
}
|
||||
123
cranelift/codegen/src/machinst/adapter.rs
Normal file
123
cranelift/codegen/src/machinst/adapter.rs
Normal file
@@ -0,0 +1,123 @@
|
||||
//! Adapter for a `MachBackend` to implement the `TargetIsa` trait.
|
||||
|
||||
use crate::binemit;
|
||||
use crate::ir;
|
||||
use crate::isa::{EncInfo, Encoding, Encodings, Legalize, RegClass, RegInfo, TargetIsa};
|
||||
use crate::machinst::*;
|
||||
use crate::regalloc::{RegDiversions, RegisterSet};
|
||||
use crate::settings::Flags;
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::fmt;
|
||||
use target_lexicon::Triple;
|
||||
|
||||
/// A wrapper around a `MachBackend` that provides a `TargetIsa` impl.
|
||||
pub struct TargetIsaAdapter {
|
||||
backend: Box<dyn MachBackend + Send + Sync + 'static>,
|
||||
triple: Triple,
|
||||
}
|
||||
|
||||
impl TargetIsaAdapter {
|
||||
/// Create a new `TargetIsa` wrapper around a `MachBackend`.
|
||||
pub fn new<B: MachBackend + Send + Sync + 'static>(backend: B) -> TargetIsaAdapter {
|
||||
let triple = backend.triple();
|
||||
TargetIsaAdapter {
|
||||
backend: Box::new(backend),
|
||||
triple,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for TargetIsaAdapter {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "MachBackend")
|
||||
}
|
||||
}
|
||||
|
||||
impl TargetIsa for TargetIsaAdapter {
|
||||
fn name(&self) -> &'static str {
|
||||
self.backend.name()
|
||||
}
|
||||
|
||||
fn triple(&self) -> &Triple {
|
||||
&self.triple
|
||||
}
|
||||
|
||||
fn flags(&self) -> &Flags {
|
||||
self.backend.flags()
|
||||
}
|
||||
|
||||
fn register_info(&self) -> RegInfo {
|
||||
// Called from function's Display impl, so we need a stub here.
|
||||
RegInfo {
|
||||
banks: &[],
|
||||
classes: &[],
|
||||
}
|
||||
}
|
||||
|
||||
fn legal_encodings<'a>(
|
||||
&'a self,
|
||||
_func: &'a ir::Function,
|
||||
_inst: &'a ir::InstructionData,
|
||||
_ctrl_typevar: ir::Type,
|
||||
) -> Encodings<'a> {
|
||||
panic!("Should not be called when new-style backend is available!")
|
||||
}
|
||||
|
||||
fn encode(
|
||||
&self,
|
||||
_func: &ir::Function,
|
||||
_inst: &ir::InstructionData,
|
||||
_ctrl_typevar: ir::Type,
|
||||
) -> Result<Encoding, Legalize> {
|
||||
panic!("Should not be called when new-style backend is available!")
|
||||
}
|
||||
|
||||
fn encoding_info(&self) -> EncInfo {
|
||||
panic!("Should not be called when new-style backend is available!")
|
||||
}
|
||||
|
||||
fn legalize_signature(&self, _sig: &mut Cow<ir::Signature>, _current: bool) {
|
||||
panic!("Should not be called when new-style backend is available!")
|
||||
}
|
||||
|
||||
fn regclass_for_abi_type(&self, _ty: ir::Type) -> RegClass {
|
||||
panic!("Should not be called when new-style backend is available!")
|
||||
}
|
||||
|
||||
fn allocatable_registers(&self, _func: &ir::Function) -> RegisterSet {
|
||||
panic!("Should not be called when new-style backend is available!")
|
||||
}
|
||||
|
||||
fn prologue_epilogue(&self, _func: &mut ir::Function) -> CodegenResult<()> {
|
||||
panic!("Should not be called when new-style backend is available!")
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing_hooks")]
|
||||
fn emit_inst(
|
||||
&self,
|
||||
_func: &ir::Function,
|
||||
_inst: ir::Inst,
|
||||
_divert: &mut RegDiversions,
|
||||
_sink: &mut dyn binemit::CodeSink,
|
||||
) {
|
||||
panic!("Should not be called when new-style backend is available!")
|
||||
}
|
||||
|
||||
/// Emit a whole function into memory.
|
||||
fn emit_function_to_memory(&self, _func: &ir::Function, _sink: &mut binemit::MemoryCodeSink) {
|
||||
panic!("Should not be called when new-style backend is available!")
|
||||
}
|
||||
|
||||
fn get_mach_backend(&self) -> Option<&dyn MachBackend> {
|
||||
Some(&*self.backend)
|
||||
}
|
||||
|
||||
fn unsigned_add_overflow_condition(&self) -> ir::condcodes::IntCC {
|
||||
self.backend.unsigned_add_overflow_condition()
|
||||
}
|
||||
|
||||
fn unsigned_sub_overflow_condition(&self) -> ir::condcodes::IntCC {
|
||||
self.backend.unsigned_sub_overflow_condition()
|
||||
}
|
||||
}
|
||||
59
cranelift/codegen/src/machinst/blockorder.rs
Normal file
59
cranelift/codegen/src/machinst/blockorder.rs
Normal file
@@ -0,0 +1,59 @@
|
||||
//! Computation of basic block order in emitted code.
|
||||
|
||||
use crate::machinst::*;
|
||||
|
||||
/// Simple reverse postorder-based block order emission.
|
||||
///
|
||||
/// TODO: use a proper algorithm, such as the bottom-up straight-line-section
|
||||
/// construction algorithm.
|
||||
struct BlockRPO {
|
||||
visited: Vec<bool>,
|
||||
postorder: Vec<BlockIndex>,
|
||||
deferred_last: Option<BlockIndex>,
|
||||
}
|
||||
|
||||
impl BlockRPO {
|
||||
fn new<I: VCodeInst>(vcode: &VCode<I>) -> BlockRPO {
|
||||
BlockRPO {
|
||||
visited: vec![false; vcode.num_blocks()],
|
||||
postorder: vec![],
|
||||
deferred_last: None,
|
||||
}
|
||||
}
|
||||
|
||||
fn visit<I: VCodeInst>(&mut self, vcode: &VCode<I>, block: BlockIndex) {
|
||||
self.visited[block as usize] = true;
|
||||
for succ in vcode.succs(block) {
|
||||
if !self.visited[*succ as usize] {
|
||||
self.visit(vcode, *succ);
|
||||
}
|
||||
}
|
||||
|
||||
let (start, end) = &vcode.block_ranges[block as usize];
|
||||
for i in *start..*end {
|
||||
if vcode.insts[i as usize].is_epilogue_placeholder() {
|
||||
debug_assert!(self.deferred_last.is_none());
|
||||
self.deferred_last = Some(block);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
self.postorder.push(block);
|
||||
}
|
||||
|
||||
fn rpo(self) -> Vec<BlockIndex> {
|
||||
let mut rpo = self.postorder;
|
||||
rpo.reverse();
|
||||
if let Some(block) = self.deferred_last {
|
||||
rpo.push(block);
|
||||
}
|
||||
rpo
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute the final block order.
|
||||
pub fn compute_final_block_order<I: VCodeInst>(vcode: &VCode<I>) -> Vec<BlockIndex> {
|
||||
let mut rpo = BlockRPO::new(vcode);
|
||||
rpo.visit(vcode, vcode.entry());
|
||||
rpo.rpo()
|
||||
}
|
||||
76
cranelift/codegen/src/machinst/compile.rs
Normal file
76
cranelift/codegen/src/machinst/compile.rs
Normal file
@@ -0,0 +1,76 @@
|
||||
//! Compilation backend pipeline: optimized IR to VCode / binemit.
|
||||
|
||||
use crate::ir::Function;
|
||||
use crate::machinst::*;
|
||||
use crate::settings;
|
||||
use crate::timing;
|
||||
|
||||
use log::debug;
|
||||
use regalloc::{allocate_registers, RegAllocAlgorithm};
|
||||
use std::env;
|
||||
|
||||
/// Compile the given function down to VCode with allocated registers, ready
|
||||
/// for binary emission.
|
||||
pub fn compile<B: LowerBackend>(
|
||||
f: &mut Function,
|
||||
b: &B,
|
||||
abi: Box<dyn ABIBody<B::MInst>>,
|
||||
flags: &settings::Flags,
|
||||
) -> VCode<B::MInst>
|
||||
where
|
||||
B::MInst: ShowWithRRU,
|
||||
{
|
||||
// This lowers the CL IR.
|
||||
let mut vcode = Lower::new(f, abi).lower(b);
|
||||
|
||||
let universe = &B::MInst::reg_universe();
|
||||
|
||||
debug!("vcode from lowering: \n{}", vcode.show_rru(Some(universe)));
|
||||
|
||||
// Perform register allocation.
|
||||
let algorithm = match env::var("REGALLOC") {
|
||||
Ok(str) => match str.as_str() {
|
||||
"lsrac" => RegAllocAlgorithm::LinearScanChecked,
|
||||
"lsra" => RegAllocAlgorithm::LinearScan,
|
||||
// to wit: btc doesn't mean "bitcoin" here
|
||||
"btc" => RegAllocAlgorithm::BacktrackingChecked,
|
||||
_ => RegAllocAlgorithm::Backtracking,
|
||||
},
|
||||
// By default use backtracking, which is the fastest.
|
||||
Err(_) => RegAllocAlgorithm::Backtracking,
|
||||
};
|
||||
|
||||
let result = {
|
||||
let _tt = timing::regalloc();
|
||||
allocate_registers(
|
||||
&mut vcode, algorithm, universe, /*request_block_annotations=*/ false,
|
||||
)
|
||||
.map_err(|err| {
|
||||
debug!(
|
||||
"Register allocation error for vcode\n{}\nError: {:?}",
|
||||
vcode.show_rru(Some(universe)),
|
||||
err
|
||||
);
|
||||
err
|
||||
})
|
||||
.expect("register allocation")
|
||||
};
|
||||
|
||||
// Reorder vcode into final order and copy out final instruction sequence
|
||||
// all at once. This also inserts prologues/epilogues.
|
||||
vcode.replace_insns_from_regalloc(result, flags);
|
||||
|
||||
vcode.remove_redundant_branches();
|
||||
|
||||
// Do final passes over code to finalize branches.
|
||||
vcode.finalize_branches();
|
||||
|
||||
debug!(
|
||||
"vcode after regalloc: final version:\n{}",
|
||||
vcode.show_rru(Some(universe))
|
||||
);
|
||||
|
||||
//println!("{}\n", vcode.show_rru(Some(&B::MInst::reg_universe())));
|
||||
|
||||
vcode
|
||||
}
|
||||
723
cranelift/codegen/src/machinst/lower.rs
Normal file
723
cranelift/codegen/src/machinst/lower.rs
Normal file
@@ -0,0 +1,723 @@
|
||||
//! This module implements lowering (instruction selection) from Cranelift IR
|
||||
//! to machine instructions with virtual registers. This is *almost* the final
|
||||
//! machine code, except for register allocation.
|
||||
|
||||
use crate::binemit::CodeSink;
|
||||
use crate::dce::has_side_effect;
|
||||
use crate::entity::SecondaryMap;
|
||||
use crate::ir::{
|
||||
Block, ExternalName, Function, GlobalValueData, Inst, InstructionData, MemFlags, Opcode,
|
||||
Signature, SourceLoc, Type, Value, ValueDef,
|
||||
};
|
||||
use crate::isa::registers::RegUnit;
|
||||
use crate::machinst::{
|
||||
ABIBody, BlockIndex, MachInst, MachInstEmit, VCode, VCodeBuilder, VCodeInst,
|
||||
};
|
||||
use crate::num_uses::NumUses;
|
||||
|
||||
use regalloc::Function as RegallocFunction;
|
||||
use regalloc::{RealReg, Reg, RegClass, Set, VirtualReg, Writable};
|
||||
|
||||
use alloc::boxed::Box;
|
||||
use alloc::vec::Vec;
|
||||
use log::debug;
|
||||
use smallvec::SmallVec;
|
||||
use std::collections::VecDeque;
|
||||
use std::ops::Range;
|
||||
|
||||
/// A context that machine-specific lowering code can use to emit lowered instructions. This is the
|
||||
/// view of the machine-independent per-function lowering context that is seen by the machine
|
||||
/// backend.
|
||||
pub trait LowerCtx<I> {
|
||||
/// Get the instdata for a given IR instruction.
|
||||
fn data(&self, ir_inst: Inst) -> &InstructionData;
|
||||
/// Get the controlling type for a polymorphic IR instruction.
|
||||
fn ty(&self, ir_inst: Inst) -> Type;
|
||||
/// Emit a machine instruction.
|
||||
fn emit(&mut self, mach_inst: I);
|
||||
/// Indicate that an IR instruction has been merged, and so one of its
|
||||
/// uses is gone (replaced by uses of the instruction's inputs). This
|
||||
/// helps the lowering algorithm to perform on-the-fly DCE, skipping over
|
||||
/// unused instructions (such as immediates incorporated directly).
|
||||
fn merged(&mut self, from_inst: Inst);
|
||||
/// Get the producing instruction, if any, and output number, for the `idx`th input to the
|
||||
/// given IR instruction
|
||||
fn input_inst(&self, ir_inst: Inst, idx: usize) -> Option<(Inst, usize)>;
|
||||
/// Map a Value to its associated writable (probably virtual) Reg.
|
||||
fn value_to_writable_reg(&self, val: Value) -> Writable<Reg>;
|
||||
/// Map a Value to its associated (probably virtual) Reg.
|
||||
fn value_to_reg(&self, val: Value) -> Reg;
|
||||
/// Get the `idx`th input to the given IR instruction as a virtual register.
|
||||
fn input(&self, ir_inst: Inst, idx: usize) -> Reg;
|
||||
/// Get the `idx`th output of the given IR instruction as a virtual register.
|
||||
fn output(&self, ir_inst: Inst, idx: usize) -> Writable<Reg>;
|
||||
/// Get the number of inputs to the given IR instruction.
|
||||
fn num_inputs(&self, ir_inst: Inst) -> usize;
|
||||
/// Get the number of outputs to the given IR instruction.
|
||||
fn num_outputs(&self, ir_inst: Inst) -> usize;
|
||||
/// Get the type for an instruction's input.
|
||||
fn input_ty(&self, ir_inst: Inst, idx: usize) -> Type;
|
||||
/// Get the type for an instruction's output.
|
||||
fn output_ty(&self, ir_inst: Inst, idx: usize) -> Type;
|
||||
/// Get a new temp.
|
||||
fn tmp(&mut self, rc: RegClass, ty: Type) -> Writable<Reg>;
|
||||
/// Get the number of block params.
|
||||
fn num_bb_params(&self, bb: Block) -> usize;
|
||||
/// Get the register for a block param.
|
||||
fn bb_param(&self, bb: Block, idx: usize) -> Reg;
|
||||
/// Get the register for a return value.
|
||||
fn retval(&self, idx: usize) -> Writable<Reg>;
|
||||
/// Get the target for a call instruction, as an `ExternalName`.
|
||||
fn call_target<'b>(&'b self, ir_inst: Inst) -> Option<&'b ExternalName>;
|
||||
/// Get the signature for a call or call-indirect instruction.
|
||||
fn call_sig<'b>(&'b self, ir_inst: Inst) -> Option<&'b Signature>;
|
||||
/// Get the symbol name and offset for a symbol_value instruction.
|
||||
fn symbol_value<'b>(&'b self, ir_inst: Inst) -> Option<(&'b ExternalName, i64)>;
|
||||
/// Returns the memory flags of a given memory access.
|
||||
fn memflags(&self, ir_inst: Inst) -> Option<MemFlags>;
|
||||
/// Get the source location for a given instruction.
|
||||
fn srcloc(&self, ir_inst: Inst) -> SourceLoc;
|
||||
}
|
||||
|
||||
/// A machine backend.
|
||||
pub trait LowerBackend {
|
||||
/// The machine instruction type.
|
||||
type MInst: VCodeInst;
|
||||
|
||||
/// Lower a single instruction. Instructions are lowered in reverse order.
|
||||
/// This function need not handle branches; those are always passed to
|
||||
/// `lower_branch_group` below.
|
||||
fn lower<C: LowerCtx<Self::MInst>>(&self, ctx: &mut C, inst: Inst);
|
||||
|
||||
/// Lower a block-terminating group of branches (which together can be seen as one
|
||||
/// N-way branch), given a vcode BlockIndex for each target.
|
||||
fn lower_branch_group<C: LowerCtx<Self::MInst>>(
|
||||
&self,
|
||||
ctx: &mut C,
|
||||
insts: &[Inst],
|
||||
targets: &[BlockIndex],
|
||||
fallthrough: Option<BlockIndex>,
|
||||
);
|
||||
}
|
||||
|
||||
/// Machine-independent lowering driver / machine-instruction container. Maintains a correspondence
|
||||
/// from original Inst to MachInsts.
|
||||
pub struct Lower<'a, I: VCodeInst> {
|
||||
// The function to lower.
|
||||
f: &'a Function,
|
||||
|
||||
// Lowered machine instructions.
|
||||
vcode: VCodeBuilder<I>,
|
||||
|
||||
// Number of active uses (minus `dec_use()` calls by backend) of each instruction.
|
||||
num_uses: SecondaryMap<Inst, u32>,
|
||||
|
||||
// Mapping from `Value` (SSA value in IR) to virtual register.
|
||||
value_regs: SecondaryMap<Value, Reg>,
|
||||
|
||||
// Return-value vregs.
|
||||
retval_regs: Vec<Reg>,
|
||||
|
||||
// Next virtual register number to allocate.
|
||||
next_vreg: u32,
|
||||
}
|
||||
|
||||
fn alloc_vreg(
|
||||
value_regs: &mut SecondaryMap<Value, Reg>,
|
||||
regclass: RegClass,
|
||||
value: Value,
|
||||
next_vreg: &mut u32,
|
||||
) -> VirtualReg {
|
||||
if value_regs[value].get_index() == 0 {
|
||||
// default value in map.
|
||||
let v = *next_vreg;
|
||||
*next_vreg += 1;
|
||||
value_regs[value] = Reg::new_virtual(regclass, v);
|
||||
}
|
||||
value_regs[value].as_virtual_reg().unwrap()
|
||||
}
|
||||
|
||||
enum GenerateReturn {
|
||||
Yes,
|
||||
No,
|
||||
}
|
||||
|
||||
impl<'a, I: VCodeInst> Lower<'a, I> {
|
||||
/// Prepare a new lowering context for the given IR function.
|
||||
pub fn new(f: &'a Function, abi: Box<dyn ABIBody<I>>) -> Lower<'a, I> {
|
||||
let mut vcode = VCodeBuilder::new(abi);
|
||||
|
||||
let num_uses = NumUses::compute(f).take_uses();
|
||||
|
||||
let mut next_vreg: u32 = 1;
|
||||
|
||||
// Default register should never be seen, but the `value_regs` map needs a default and we
|
||||
// don't want to push `Option` everywhere. All values will be assigned registers by the
|
||||
// loops over block parameters and instruction results below.
|
||||
//
|
||||
// We do not use vreg 0 so that we can detect any unassigned register that leaks through.
|
||||
let default_register = Reg::new_virtual(RegClass::I32, 0);
|
||||
let mut value_regs = SecondaryMap::with_default(default_register);
|
||||
|
||||
// Assign a vreg to each value.
|
||||
for bb in f.layout.blocks() {
|
||||
for param in f.dfg.block_params(bb) {
|
||||
let vreg = alloc_vreg(
|
||||
&mut value_regs,
|
||||
I::rc_for_type(f.dfg.value_type(*param)),
|
||||
*param,
|
||||
&mut next_vreg,
|
||||
);
|
||||
vcode.set_vreg_type(vreg, f.dfg.value_type(*param));
|
||||
}
|
||||
for inst in f.layout.block_insts(bb) {
|
||||
for result in f.dfg.inst_results(inst) {
|
||||
let vreg = alloc_vreg(
|
||||
&mut value_regs,
|
||||
I::rc_for_type(f.dfg.value_type(*result)),
|
||||
*result,
|
||||
&mut next_vreg,
|
||||
);
|
||||
vcode.set_vreg_type(vreg, f.dfg.value_type(*result));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Assign a vreg to each return value.
|
||||
let mut retval_regs = vec![];
|
||||
for ret in &f.signature.returns {
|
||||
let v = next_vreg;
|
||||
next_vreg += 1;
|
||||
let regclass = I::rc_for_type(ret.value_type);
|
||||
let vreg = Reg::new_virtual(regclass, v);
|
||||
retval_regs.push(vreg);
|
||||
vcode.set_vreg_type(vreg.as_virtual_reg().unwrap(), ret.value_type);
|
||||
}
|
||||
|
||||
Lower {
|
||||
f,
|
||||
vcode,
|
||||
num_uses,
|
||||
value_regs,
|
||||
retval_regs,
|
||||
next_vreg,
|
||||
}
|
||||
}
|
||||
|
||||
fn gen_arg_setup(&mut self) {
|
||||
if let Some(entry_bb) = self.f.layout.entry_block() {
|
||||
debug!(
|
||||
"gen_arg_setup: entry BB {} args are:\n{:?}",
|
||||
entry_bb,
|
||||
self.f.dfg.block_params(entry_bb)
|
||||
);
|
||||
for (i, param) in self.f.dfg.block_params(entry_bb).iter().enumerate() {
|
||||
let reg = Writable::from_reg(self.value_regs[*param]);
|
||||
let insn = self.vcode.abi().gen_copy_arg_to_reg(i, reg);
|
||||
self.vcode.push(insn);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn gen_retval_setup(&mut self, gen_ret_inst: GenerateReturn) {
|
||||
for (i, reg) in self.retval_regs.iter().enumerate() {
|
||||
let insn = self.vcode.abi().gen_copy_reg_to_retval(i, *reg);
|
||||
self.vcode.push(insn);
|
||||
}
|
||||
let inst = match gen_ret_inst {
|
||||
GenerateReturn::Yes => self.vcode.abi().gen_ret(),
|
||||
GenerateReturn::No => self.vcode.abi().gen_epilogue_placeholder(),
|
||||
};
|
||||
self.vcode.push(inst);
|
||||
}
|
||||
|
||||
fn find_reachable_bbs(&self) -> SmallVec<[Block; 16]> {
|
||||
if let Some(entry) = self.f.layout.entry_block() {
|
||||
let mut ret = SmallVec::new();
|
||||
let mut queue = VecDeque::new();
|
||||
let mut visited = SecondaryMap::with_default(false);
|
||||
queue.push_back(entry);
|
||||
visited[entry] = true;
|
||||
while !queue.is_empty() {
|
||||
let b = queue.pop_front().unwrap();
|
||||
ret.push(b);
|
||||
let mut succs: SmallVec<[Block; 16]> = SmallVec::new();
|
||||
for inst in self.f.layout.block_insts(b) {
|
||||
if self.f.dfg[inst].opcode().is_branch() {
|
||||
succs.extend(branch_targets(self.f, b, inst).into_iter());
|
||||
}
|
||||
}
|
||||
for succ in succs.into_iter() {
|
||||
if !visited[succ] {
|
||||
queue.push_back(succ);
|
||||
visited[succ] = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ret
|
||||
} else {
|
||||
SmallVec::new()
|
||||
}
|
||||
}
|
||||
|
||||
/// Lower the function.
|
||||
pub fn lower<B: LowerBackend<MInst = I>>(mut self, backend: &B) -> VCode<I> {
|
||||
// Find all reachable blocks.
|
||||
let mut bbs = self.find_reachable_bbs();
|
||||
// Work backward (reverse block order, reverse through each block), skipping insns with zero
|
||||
// uses.
|
||||
bbs.reverse();
|
||||
|
||||
// This records a Block-to-BlockIndex map so that branch targets can be resolved.
|
||||
let mut next_bindex = self.vcode.init_bb_map(&bbs[..]);
|
||||
|
||||
// Allocate a separate BlockIndex for each control-flow instruction so that we can create
|
||||
// the edge blocks later. Each entry for a control-flow inst is the edge block; the list
|
||||
// has (cf-inst, edge block, orig block) tuples.
|
||||
let mut edge_blocks_by_inst: SecondaryMap<Inst, Vec<BlockIndex>> =
|
||||
SecondaryMap::with_default(vec![]);
|
||||
let mut edge_blocks: Vec<(Inst, BlockIndex, Block)> = vec![];
|
||||
|
||||
debug!("about to lower function: {:?}", self.f);
|
||||
debug!("bb map: {:?}", self.vcode.blocks_by_bb());
|
||||
|
||||
for bb in bbs.iter() {
|
||||
for inst in self.f.layout.block_insts(*bb) {
|
||||
let op = self.f.dfg[inst].opcode();
|
||||
if op.is_branch() {
|
||||
// Find the original target.
|
||||
let mut add_succ = |next_bb| {
|
||||
let edge_block = next_bindex;
|
||||
next_bindex += 1;
|
||||
edge_blocks_by_inst[inst].push(edge_block);
|
||||
edge_blocks.push((inst, edge_block, next_bb));
|
||||
};
|
||||
for succ in branch_targets(self.f, *bb, inst).into_iter() {
|
||||
add_succ(succ);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for bb in bbs.iter() {
|
||||
debug!("lowering bb: {}", bb);
|
||||
|
||||
// If this is a return block, produce the return value setup.
|
||||
let last_insn = self.f.layout.block_insts(*bb).last().unwrap();
|
||||
let last_insn_opcode = self.f.dfg[last_insn].opcode();
|
||||
if last_insn_opcode.is_return() {
|
||||
let gen_ret = if last_insn_opcode == Opcode::Return {
|
||||
GenerateReturn::Yes
|
||||
} else {
|
||||
debug_assert!(last_insn_opcode == Opcode::FallthroughReturn);
|
||||
GenerateReturn::No
|
||||
};
|
||||
self.gen_retval_setup(gen_ret);
|
||||
self.vcode.end_ir_inst();
|
||||
}
|
||||
|
||||
// Find the branches at the end first, and process those, if any.
|
||||
let mut branches: SmallVec<[Inst; 2]> = SmallVec::new();
|
||||
let mut targets: SmallVec<[BlockIndex; 2]> = SmallVec::new();
|
||||
|
||||
for inst in self.f.layout.block_insts(*bb).rev() {
|
||||
debug!("lower: inst {}", inst);
|
||||
if edge_blocks_by_inst[inst].len() > 0 {
|
||||
branches.push(inst);
|
||||
for target in edge_blocks_by_inst[inst].iter().rev().cloned() {
|
||||
targets.push(target);
|
||||
}
|
||||
} else {
|
||||
// We've reached the end of the branches -- process all as a group, first.
|
||||
if branches.len() > 0 {
|
||||
let fallthrough = self.f.layout.next_block(*bb);
|
||||
let fallthrough = fallthrough.map(|bb| self.vcode.bb_to_bindex(bb));
|
||||
branches.reverse();
|
||||
targets.reverse();
|
||||
debug!(
|
||||
"lower_branch_group: targets = {:?} branches = {:?}",
|
||||
targets, branches
|
||||
);
|
||||
backend.lower_branch_group(
|
||||
&mut self,
|
||||
&branches[..],
|
||||
&targets[..],
|
||||
fallthrough,
|
||||
);
|
||||
self.vcode.end_ir_inst();
|
||||
branches.clear();
|
||||
targets.clear();
|
||||
}
|
||||
|
||||
// Only codegen an instruction if it either has a side
|
||||
// effect, or has at least one use of one of its results.
|
||||
let num_uses = self.num_uses[inst];
|
||||
let side_effect = has_side_effect(self.f, inst);
|
||||
if side_effect || num_uses > 0 {
|
||||
backend.lower(&mut self, inst);
|
||||
self.vcode.end_ir_inst();
|
||||
} else {
|
||||
// If we're skipping the instruction, we need to dec-ref
|
||||
// its arguments.
|
||||
for arg in self.f.dfg.inst_args(inst) {
|
||||
let val = self.f.dfg.resolve_aliases(*arg);
|
||||
match self.f.dfg.value_def(val) {
|
||||
ValueDef::Result(src_inst, _) => {
|
||||
self.dec_use(src_inst);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// There are possibly some branches left if the block contained only branches.
|
||||
if branches.len() > 0 {
|
||||
let fallthrough = self.f.layout.next_block(*bb);
|
||||
let fallthrough = fallthrough.map(|bb| self.vcode.bb_to_bindex(bb));
|
||||
branches.reverse();
|
||||
targets.reverse();
|
||||
debug!(
|
||||
"lower_branch_group: targets = {:?} branches = {:?}",
|
||||
targets, branches
|
||||
);
|
||||
backend.lower_branch_group(&mut self, &branches[..], &targets[..], fallthrough);
|
||||
self.vcode.end_ir_inst();
|
||||
branches.clear();
|
||||
targets.clear();
|
||||
}
|
||||
|
||||
// If this is the entry block, produce the argument setup.
|
||||
if Some(*bb) == self.f.layout.entry_block() {
|
||||
self.gen_arg_setup();
|
||||
self.vcode.end_ir_inst();
|
||||
}
|
||||
|
||||
let vcode_bb = self.vcode.end_bb();
|
||||
debug!("finished building bb: BlockIndex {}", vcode_bb);
|
||||
debug!("bb_to_bindex map says: {}", self.vcode.bb_to_bindex(*bb));
|
||||
assert!(vcode_bb == self.vcode.bb_to_bindex(*bb));
|
||||
if Some(*bb) == self.f.layout.entry_block() {
|
||||
self.vcode.set_entry(vcode_bb);
|
||||
}
|
||||
}
|
||||
|
||||
// Now create the edge blocks, with phi lowering (block parameter copies).
|
||||
for (inst, edge_block, orig_block) in edge_blocks.into_iter() {
|
||||
debug!(
|
||||
"creating edge block: inst {}, edge_block {}, orig_block {}",
|
||||
inst, edge_block, orig_block
|
||||
);
|
||||
|
||||
// Create a temporary for each block parameter.
|
||||
let phi_classes: Vec<(Type, RegClass)> = self
|
||||
.f
|
||||
.dfg
|
||||
.block_params(orig_block)
|
||||
.iter()
|
||||
.map(|p| self.f.dfg.value_type(*p))
|
||||
.map(|ty| (ty, I::rc_for_type(ty)))
|
||||
.collect();
|
||||
|
||||
// FIXME sewardj 2020Feb29: use SmallVec
|
||||
let mut src_regs = vec![];
|
||||
let mut dst_regs = vec![];
|
||||
|
||||
// Create all of the phi uses (reads) from jump args to temps.
|
||||
|
||||
// Round up all the source and destination regs
|
||||
for (i, arg) in self.f.dfg.inst_variable_args(inst).iter().enumerate() {
|
||||
let arg = self.f.dfg.resolve_aliases(*arg);
|
||||
debug!("jump arg {} is {}", i, arg);
|
||||
src_regs.push(self.value_regs[arg]);
|
||||
}
|
||||
for (i, param) in self.f.dfg.block_params(orig_block).iter().enumerate() {
|
||||
debug!("bb arg {} is {}", i, param);
|
||||
dst_regs.push(Writable::from_reg(self.value_regs[*param]));
|
||||
}
|
||||
debug_assert!(src_regs.len() == dst_regs.len());
|
||||
debug_assert!(phi_classes.len() == dst_regs.len());
|
||||
|
||||
// If, as is mostly the case, the source and destination register
|
||||
// sets are non overlapping, then we can copy directly, so as to
|
||||
// save the register allocator work.
|
||||
if !Set::<Reg>::from_vec(src_regs.clone()).intersects(&Set::<Reg>::from_vec(
|
||||
dst_regs.iter().map(|r| r.to_reg()).collect(),
|
||||
)) {
|
||||
for (dst_reg, (src_reg, (ty, _))) in
|
||||
dst_regs.iter().zip(src_regs.iter().zip(phi_classes))
|
||||
{
|
||||
self.vcode.push(I::gen_move(*dst_reg, *src_reg, ty));
|
||||
}
|
||||
} else {
|
||||
// There's some overlap, so play safe and copy via temps.
|
||||
|
||||
let tmp_regs: Vec<Writable<Reg>> = phi_classes
|
||||
.iter()
|
||||
.map(|&(ty, rc)| self.tmp(rc, ty)) // borrows `self` mutably.
|
||||
.collect();
|
||||
|
||||
debug!("phi_temps = {:?}", tmp_regs);
|
||||
debug_assert!(tmp_regs.len() == src_regs.len());
|
||||
|
||||
for (tmp_reg, (src_reg, &(ty, _))) in
|
||||
tmp_regs.iter().zip(src_regs.iter().zip(phi_classes.iter()))
|
||||
{
|
||||
self.vcode.push(I::gen_move(*tmp_reg, *src_reg, ty));
|
||||
}
|
||||
for (dst_reg, (tmp_reg, &(ty, _))) in
|
||||
dst_regs.iter().zip(tmp_regs.iter().zip(phi_classes.iter()))
|
||||
{
|
||||
self.vcode.push(I::gen_move(*dst_reg, tmp_reg.to_reg(), ty));
|
||||
}
|
||||
}
|
||||
|
||||
// Create the unconditional jump to the original target block.
|
||||
self.vcode
|
||||
.push(I::gen_jump(self.vcode.bb_to_bindex(orig_block)));
|
||||
|
||||
// End the IR inst and block. (We lower this as if it were one IR instruction so that
|
||||
// we can emit machine instructions in forward order.)
|
||||
self.vcode.end_ir_inst();
|
||||
let blocknum = self.vcode.end_bb();
|
||||
assert!(blocknum == edge_block);
|
||||
}
|
||||
|
||||
// Now that we've emitted all instructions into the VCodeBuilder, let's build the VCode.
|
||||
self.vcode.build()
|
||||
}
|
||||
|
||||
/// Reduce the use-count of an IR instruction. Use this when, e.g., isel incorporates the
|
||||
/// computation of an input instruction directly, so that input instruction has one
|
||||
/// fewer use.
|
||||
fn dec_use(&mut self, ir_inst: Inst) {
|
||||
assert!(self.num_uses[ir_inst] > 0);
|
||||
self.num_uses[ir_inst] -= 1;
|
||||
debug!(
|
||||
"incref: ir_inst {} now has {} uses",
|
||||
ir_inst, self.num_uses[ir_inst]
|
||||
);
|
||||
}
|
||||
|
||||
/// Increase the use-count of an IR instruction. Use this when, e.g., isel incorporates
|
||||
/// the computation of an input instruction directly, so that input instruction's
|
||||
/// inputs are now used directly by the merged instruction.
|
||||
fn inc_use(&mut self, ir_inst: Inst) {
|
||||
self.num_uses[ir_inst] += 1;
|
||||
debug!(
|
||||
"decref: ir_inst {} now has {} uses",
|
||||
ir_inst, self.num_uses[ir_inst]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, I: VCodeInst> LowerCtx<I> for Lower<'a, I> {
|
||||
/// Get the instdata for a given IR instruction.
|
||||
fn data(&self, ir_inst: Inst) -> &InstructionData {
|
||||
&self.f.dfg[ir_inst]
|
||||
}
|
||||
|
||||
/// Get the controlling type for a polymorphic IR instruction.
|
||||
fn ty(&self, ir_inst: Inst) -> Type {
|
||||
self.f.dfg.ctrl_typevar(ir_inst)
|
||||
}
|
||||
|
||||
/// Emit a machine instruction.
|
||||
fn emit(&mut self, mach_inst: I) {
|
||||
self.vcode.push(mach_inst);
|
||||
}
|
||||
|
||||
/// Indicate that a merge has occurred.
|
||||
fn merged(&mut self, from_inst: Inst) {
|
||||
debug!("merged: inst {}", from_inst);
|
||||
// First, inc-ref all inputs of `from_inst`, because they are now used
|
||||
// directly by `into_inst`.
|
||||
for arg in self.f.dfg.inst_args(from_inst) {
|
||||
let arg = self.f.dfg.resolve_aliases(*arg);
|
||||
match self.f.dfg.value_def(arg) {
|
||||
ValueDef::Result(src_inst, _) => {
|
||||
debug!(" -> inc-reffing src inst {}", src_inst);
|
||||
self.inc_use(src_inst);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
// Then, dec-ref the merged instruction itself. It still retains references
|
||||
// to its arguments (inc-ref'd above). If its refcount has reached zero,
|
||||
// it will be skipped during emission and its args will be dec-ref'd at that
|
||||
// time.
|
||||
self.dec_use(from_inst);
|
||||
}
|
||||
|
||||
/// Get the producing instruction, if any, and output number, for the `idx`th input to the
|
||||
/// given IR instruction.
|
||||
fn input_inst(&self, ir_inst: Inst, idx: usize) -> Option<(Inst, usize)> {
|
||||
let val = self.f.dfg.inst_args(ir_inst)[idx];
|
||||
let val = self.f.dfg.resolve_aliases(val);
|
||||
match self.f.dfg.value_def(val) {
|
||||
ValueDef::Result(src_inst, result_idx) => Some((src_inst, result_idx)),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Map a Value to its associated writable (probably virtual) Reg.
|
||||
fn value_to_writable_reg(&self, val: Value) -> Writable<Reg> {
|
||||
let val = self.f.dfg.resolve_aliases(val);
|
||||
Writable::from_reg(self.value_regs[val])
|
||||
}
|
||||
|
||||
/// Map a Value to its associated (probably virtual) Reg.
|
||||
fn value_to_reg(&self, val: Value) -> Reg {
|
||||
let val = self.f.dfg.resolve_aliases(val);
|
||||
self.value_regs[val]
|
||||
}
|
||||
|
||||
/// Get the `idx`th input to the given IR instruction as a virtual register.
|
||||
fn input(&self, ir_inst: Inst, idx: usize) -> Reg {
|
||||
let val = self.f.dfg.inst_args(ir_inst)[idx];
|
||||
let val = self.f.dfg.resolve_aliases(val);
|
||||
self.value_to_reg(val)
|
||||
}
|
||||
|
||||
/// Get the `idx`th output of the given IR instruction as a virtual register.
|
||||
fn output(&self, ir_inst: Inst, idx: usize) -> Writable<Reg> {
|
||||
let val = self.f.dfg.inst_results(ir_inst)[idx];
|
||||
self.value_to_writable_reg(val)
|
||||
}
|
||||
|
||||
/// Get a new temp.
|
||||
fn tmp(&mut self, rc: RegClass, ty: Type) -> Writable<Reg> {
|
||||
let v = self.next_vreg;
|
||||
self.next_vreg += 1;
|
||||
let vreg = Reg::new_virtual(rc, v);
|
||||
self.vcode.set_vreg_type(vreg.as_virtual_reg().unwrap(), ty);
|
||||
Writable::from_reg(vreg)
|
||||
}
|
||||
|
||||
/// Get the number of inputs for the given IR instruction.
|
||||
fn num_inputs(&self, ir_inst: Inst) -> usize {
|
||||
self.f.dfg.inst_args(ir_inst).len()
|
||||
}
|
||||
|
||||
/// Get the number of outputs for the given IR instruction.
|
||||
fn num_outputs(&self, ir_inst: Inst) -> usize {
|
||||
self.f.dfg.inst_results(ir_inst).len()
|
||||
}
|
||||
|
||||
/// Get the type for an instruction's input.
|
||||
fn input_ty(&self, ir_inst: Inst, idx: usize) -> Type {
|
||||
let val = self.f.dfg.inst_args(ir_inst)[idx];
|
||||
let val = self.f.dfg.resolve_aliases(val);
|
||||
self.f.dfg.value_type(val)
|
||||
}
|
||||
|
||||
/// Get the type for an instruction's output.
|
||||
fn output_ty(&self, ir_inst: Inst, idx: usize) -> Type {
|
||||
self.f.dfg.value_type(self.f.dfg.inst_results(ir_inst)[idx])
|
||||
}
|
||||
|
||||
/// Get the number of block params.
|
||||
fn num_bb_params(&self, bb: Block) -> usize {
|
||||
self.f.dfg.block_params(bb).len()
|
||||
}
|
||||
|
||||
/// Get the register for a block param.
|
||||
fn bb_param(&self, bb: Block, idx: usize) -> Reg {
|
||||
let val = self.f.dfg.block_params(bb)[idx];
|
||||
self.value_regs[val]
|
||||
}
|
||||
|
||||
/// Get the register for a return value.
|
||||
fn retval(&self, idx: usize) -> Writable<Reg> {
|
||||
Writable::from_reg(self.retval_regs[idx])
|
||||
}
|
||||
|
||||
/// Get the target for a call instruction, as an `ExternalName`.
|
||||
fn call_target<'b>(&'b self, ir_inst: Inst) -> Option<&'b ExternalName> {
|
||||
match &self.f.dfg[ir_inst] {
|
||||
&InstructionData::Call { func_ref, .. }
|
||||
| &InstructionData::FuncAddr { func_ref, .. } => {
|
||||
let funcdata = &self.f.dfg.ext_funcs[func_ref];
|
||||
Some(&funcdata.name)
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
/// Get the signature for a call or call-indirect instruction.
|
||||
fn call_sig<'b>(&'b self, ir_inst: Inst) -> Option<&'b Signature> {
|
||||
match &self.f.dfg[ir_inst] {
|
||||
&InstructionData::Call { func_ref, .. } => {
|
||||
let funcdata = &self.f.dfg.ext_funcs[func_ref];
|
||||
Some(&self.f.dfg.signatures[funcdata.signature])
|
||||
}
|
||||
&InstructionData::CallIndirect { sig_ref, .. } => Some(&self.f.dfg.signatures[sig_ref]),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the symbol name and offset for a symbol_value instruction.
|
||||
fn symbol_value<'b>(&'b self, ir_inst: Inst) -> Option<(&'b ExternalName, i64)> {
|
||||
match &self.f.dfg[ir_inst] {
|
||||
&InstructionData::UnaryGlobalValue { global_value, .. } => {
|
||||
let gvdata = &self.f.global_values[global_value];
|
||||
match gvdata {
|
||||
&GlobalValueData::Symbol {
|
||||
ref name,
|
||||
ref offset,
|
||||
..
|
||||
} => {
|
||||
let offset = offset.bits();
|
||||
Some((name, offset))
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the memory flags of a given memory access.
|
||||
fn memflags(&self, ir_inst: Inst) -> Option<MemFlags> {
|
||||
match &self.f.dfg[ir_inst] {
|
||||
&InstructionData::Load { flags, .. }
|
||||
| &InstructionData::LoadComplex { flags, .. }
|
||||
| &InstructionData::Store { flags, .. }
|
||||
| &InstructionData::StoreComplex { flags, .. } => Some(flags),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the source location for a given instruction.
|
||||
fn srcloc(&self, ir_inst: Inst) -> SourceLoc {
|
||||
self.f.srclocs[ir_inst]
|
||||
}
|
||||
}
|
||||
|
||||
fn branch_targets(f: &Function, block: Block, inst: Inst) -> SmallVec<[Block; 16]> {
|
||||
let mut ret = SmallVec::new();
|
||||
if f.dfg[inst].opcode() == Opcode::Fallthrough {
|
||||
ret.push(f.layout.next_block(block).unwrap());
|
||||
} else {
|
||||
match &f.dfg[inst] {
|
||||
&InstructionData::Jump { destination, .. }
|
||||
| &InstructionData::Branch { destination, .. }
|
||||
| &InstructionData::BranchInt { destination, .. }
|
||||
| &InstructionData::BranchIcmp { destination, .. }
|
||||
| &InstructionData::BranchFloat { destination, .. } => {
|
||||
ret.push(destination);
|
||||
}
|
||||
&InstructionData::BranchTable {
|
||||
destination, table, ..
|
||||
} => {
|
||||
ret.push(destination);
|
||||
for dest in f.jump_tables[table].as_slice() {
|
||||
ret.push(*dest);
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
ret
|
||||
}
|
||||
288
cranelift/codegen/src/machinst/mod.rs
Normal file
288
cranelift/codegen/src/machinst/mod.rs
Normal file
@@ -0,0 +1,288 @@
|
||||
//! This module exposes the machine-specific backend definition pieces.
|
||||
//!
|
||||
//! The MachInst infrastructure is the compiler backend, from CLIF
|
||||
//! (ir::Function) to machine code. The purpose of this infrastructure is, at a
|
||||
//! high level, to do instruction selection/lowering (to machine instructions),
|
||||
//! register allocation, and then perform all the fixups to branches, constant
|
||||
//! data references, etc., needed to actually generate machine code.
|
||||
//!
|
||||
//! The container for machine instructions, at various stages of construction,
|
||||
//! is the `VCode` struct. We refer to a sequence of machine instructions organized
|
||||
//! into basic blocks as "vcode". This is short for "virtual-register code", though
|
||||
//! it's a bit of a misnomer because near the end of the pipeline, vcode has all
|
||||
//! real registers. Nevertheless, the name is catchy and we like it.
|
||||
//!
|
||||
//! The compilation pipeline, from an `ir::Function` (already optimized as much as
|
||||
//! you like by machine-independent optimization passes) onward, is as follows.
|
||||
//! (N.B.: though we show the VCode separately at each stage, the passes
|
||||
//! mutate the VCode in place; these are not separate copies of the code.)
|
||||
//!
|
||||
//! | ir::Function (SSA IR, machine-independent opcodes)
|
||||
//! | |
|
||||
//! | | [lower]
|
||||
//! | |
|
||||
//! | VCode<arch_backend::Inst> (machine instructions:
|
||||
//! | | - mostly virtual registers.
|
||||
//! | | - cond branches in two-target form.
|
||||
//! | | - branch targets are block indices.
|
||||
//! | | - in-memory constants held by insns,
|
||||
//! | | with unknown offsets.
|
||||
//! | | - critical edges (actually all edges)
|
||||
//! | | are split.)
|
||||
//! | | [regalloc]
|
||||
//! | |
|
||||
//! | VCode<arch_backend::Inst> (machine instructions:
|
||||
//! | | - all real registers.
|
||||
//! | | - new instruction sequence returned
|
||||
//! | | out-of-band in RegAllocResult.
|
||||
//! | | - instruction sequence has spills,
|
||||
//! | | reloads, and moves inserted.
|
||||
//! | | - other invariants same as above.)
|
||||
//! | |
|
||||
//! | | [preamble/postamble]
|
||||
//! | |
|
||||
//! | VCode<arch_backend::Inst> (machine instructions:
|
||||
//! | | - stack-frame size known.
|
||||
//! | | - out-of-band instruction sequence
|
||||
//! | | has preamble prepended to entry
|
||||
//! | | block, and postamble injected before
|
||||
//! | | every return instruction.
|
||||
//! | | - all symbolic stack references to
|
||||
//! | | stackslots and spillslots are resolved
|
||||
//! | | to concrete FP-offset mem addresses.)
|
||||
//! | | [block/insn ordering]
|
||||
//! | |
|
||||
//! | VCode<arch_backend::Inst> (machine instructions:
|
||||
//! | | - vcode.final_block_order is filled in.
|
||||
//! | | - new insn sequence from regalloc is
|
||||
//! | | placed back into vcode and block
|
||||
//! | | boundaries are updated.)
|
||||
//! | | [redundant branch/block
|
||||
//! | | removal]
|
||||
//! | |
|
||||
//! | VCode<arch_backend::Inst> (machine instructions:
|
||||
//! | | - all blocks that were just an
|
||||
//! | | unconditional branch are removed.)
|
||||
//! | |
|
||||
//! | | [branch finalization
|
||||
//! | | (fallthroughs)]
|
||||
//! | |
|
||||
//! | VCode<arch_backend::Inst> (machine instructions:
|
||||
//! | | - all branches are in lowered one-
|
||||
//! | | target form, but targets are still
|
||||
//! | | block indices.)
|
||||
//! | |
|
||||
//! | | [branch finalization
|
||||
//! | | (offsets)]
|
||||
//! | |
|
||||
//! | VCode<arch_backend::Inst> (machine instructions:
|
||||
//! | | - all branch offsets from start of
|
||||
//! | | function are known, and all branches
|
||||
//! | | have resolved-offset targets.)
|
||||
//! | |
|
||||
//! | | [MemArg finalization]
|
||||
//! | |
|
||||
//! | VCode<arch_backend::Inst> (machine instructions:
|
||||
//! | | - all MemArg references to the constant
|
||||
//! | | pool are replaced with offsets.
|
||||
//! | | - all constant-pool data is collected
|
||||
//! | | in the VCode.)
|
||||
//! | |
|
||||
//! | | [binary emission]
|
||||
//! | |
|
||||
//! | Vec<u8> (machine code!)
|
||||
//! |
|
||||
|
||||
#![allow(unused_imports)]
|
||||
|
||||
use crate::binemit::{
|
||||
CodeInfo, CodeOffset, CodeSink, MemoryCodeSink, RelocSink, StackmapSink, TrapSink,
|
||||
};
|
||||
use crate::entity::EntityRef;
|
||||
use crate::entity::SecondaryMap;
|
||||
use crate::ir::condcodes::IntCC;
|
||||
use crate::ir::ValueLocations;
|
||||
use crate::ir::{DataFlowGraph, Function, Inst, Opcode, Type, Value};
|
||||
use crate::isa::RegUnit;
|
||||
use crate::result::CodegenResult;
|
||||
use crate::settings::Flags;
|
||||
use crate::HashMap;
|
||||
use alloc::boxed::Box;
|
||||
use alloc::vec::Vec;
|
||||
use core::fmt::Debug;
|
||||
use core::iter::Sum;
|
||||
use regalloc::Map as RegallocMap;
|
||||
use regalloc::RegUsageCollector;
|
||||
use regalloc::{RealReg, RealRegUniverse, Reg, RegClass, SpillSlot, VirtualReg, Writable};
|
||||
use smallvec::SmallVec;
|
||||
use std::hash::Hash;
|
||||
use std::string::String;
|
||||
use target_lexicon::Triple;
|
||||
|
||||
pub mod lower;
|
||||
pub use lower::*;
|
||||
pub mod vcode;
|
||||
pub use vcode::*;
|
||||
pub mod compile;
|
||||
pub use compile::*;
|
||||
pub mod blockorder;
|
||||
pub use blockorder::*;
|
||||
pub mod abi;
|
||||
pub use abi::*;
|
||||
pub mod pp;
|
||||
pub use pp::*;
|
||||
pub mod sections;
|
||||
pub use sections::*;
|
||||
pub mod adapter;
|
||||
pub use adapter::*;
|
||||
|
||||
/// A machine instruction.
|
||||
pub trait MachInst: Clone + Debug {
|
||||
/// Return the registers referenced by this machine instruction along with
|
||||
/// the modes of reference (use, def, modify).
|
||||
fn get_regs(&self, collector: &mut RegUsageCollector);
|
||||
|
||||
/// Map virtual registers to physical registers using the given virt->phys
|
||||
/// maps corresponding to the program points prior to, and after, this instruction.
|
||||
fn map_regs(
|
||||
&mut self,
|
||||
pre_map: &RegallocMap<VirtualReg, RealReg>,
|
||||
post_map: &RegallocMap<VirtualReg, RealReg>,
|
||||
);
|
||||
|
||||
/// If this is a simple move, return the (source, destination) tuple of registers.
|
||||
fn is_move(&self) -> Option<(Writable<Reg>, Reg)>;
|
||||
|
||||
/// Is this a terminator (branch or ret)? If so, return its type
|
||||
/// (ret/uncond/cond) and target if applicable.
|
||||
fn is_term<'a>(&'a self) -> MachTerminator<'a>;
|
||||
|
||||
/// Returns true if the instruction is an epilogue placeholder.
|
||||
fn is_epilogue_placeholder(&self) -> bool;
|
||||
|
||||
/// Generate a move.
|
||||
fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Self;
|
||||
|
||||
/// Generate a zero-length no-op.
|
||||
fn gen_zero_len_nop() -> Self;
|
||||
|
||||
/// Possibly operate on a value directly in a spill-slot rather than a
|
||||
/// register. Useful if the machine has register-memory instruction forms
|
||||
/// (e.g., add directly from or directly to memory), like x86.
|
||||
fn maybe_direct_reload(&self, reg: VirtualReg, slot: SpillSlot) -> Option<Self>;
|
||||
|
||||
/// Determine a register class to store the given CraneLift type.
|
||||
fn rc_for_type(ty: Type) -> RegClass;
|
||||
|
||||
/// Generate a jump to another target. Used during lowering of
|
||||
/// control flow.
|
||||
fn gen_jump(target: BlockIndex) -> Self;
|
||||
|
||||
/// Generate a NOP. The `preferred_size` parameter allows the caller to
|
||||
/// request a NOP of that size, or as close to it as possible. The machine
|
||||
/// backend may return a NOP whose binary encoding is smaller than the
|
||||
/// preferred size, but must not return a NOP that is larger. However,
|
||||
/// the instruction must have a nonzero size.
|
||||
fn gen_nop(preferred_size: usize) -> Self;
|
||||
|
||||
/// Rewrite block targets using the block-target map.
|
||||
fn with_block_rewrites(&mut self, block_target_map: &[BlockIndex]);
|
||||
|
||||
/// Finalize branches once the block order (fallthrough) is known.
|
||||
fn with_fallthrough_block(&mut self, fallthrough_block: Option<BlockIndex>);
|
||||
|
||||
/// Update instruction once block offsets are known. These offsets are
|
||||
/// relative to the beginning of the function. `targets` is indexed by
|
||||
/// BlockIndex.
|
||||
fn with_block_offsets(&mut self, my_offset: CodeOffset, targets: &[CodeOffset]);
|
||||
|
||||
/// Get the register universe for this backend.
|
||||
fn reg_universe() -> RealRegUniverse;
|
||||
|
||||
/// Align a basic block offset (from start of function). By default, no
|
||||
/// alignment occurs.
|
||||
fn align_basic_block(offset: CodeOffset) -> CodeOffset {
|
||||
offset
|
||||
}
|
||||
}
|
||||
|
||||
/// Describes a block terminator (not call) in the vcode, when its branches
|
||||
/// have not yet been finalized (so a branch may have two targets).
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub enum MachTerminator<'a> {
|
||||
/// Not a terminator.
|
||||
None,
|
||||
/// A return instruction.
|
||||
Ret,
|
||||
/// An unconditional branch to another block.
|
||||
Uncond(BlockIndex),
|
||||
/// A conditional branch to one of two other blocks.
|
||||
Cond(BlockIndex, BlockIndex),
|
||||
/// An indirect branch with known possible targets.
|
||||
Indirect(&'a [BlockIndex]),
|
||||
}
|
||||
|
||||
/// A trait describing the ability to encode a MachInst into binary machine code.
|
||||
pub trait MachInstEmit<O: MachSectionOutput> {
|
||||
/// Emit the instruction.
|
||||
fn emit(&self, code: &mut O);
|
||||
}
|
||||
|
||||
/// The result of a `MachBackend::compile_function()` call. Contains machine
|
||||
/// code (as bytes) and a disassembly, if requested.
|
||||
pub struct MachCompileResult {
|
||||
/// Machine code.
|
||||
pub sections: MachSections,
|
||||
/// Size of stack frame, in bytes.
|
||||
pub frame_size: u32,
|
||||
/// Disassembly, if requested.
|
||||
pub disasm: Option<String>,
|
||||
}
|
||||
|
||||
impl MachCompileResult {
|
||||
/// Get a `CodeInfo` describing section sizes from this compilation result.
|
||||
pub fn code_info(&self) -> CodeInfo {
|
||||
let code_size = self.sections.total_size();
|
||||
CodeInfo {
|
||||
code_size,
|
||||
jumptables_size: 0,
|
||||
rodata_size: 0,
|
||||
total_size: code_size,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Top-level machine backend trait, which wraps all monomorphized code and
|
||||
/// allows a virtual call from the machine-independent `Function::compile()`.
|
||||
pub trait MachBackend {
|
||||
/// Compile the given function. Consumes the function.
|
||||
fn compile_function(
|
||||
&self,
|
||||
func: Function,
|
||||
want_disasm: bool,
|
||||
) -> CodegenResult<MachCompileResult>;
|
||||
|
||||
/// Return flags for this backend.
|
||||
fn flags(&self) -> &Flags;
|
||||
|
||||
/// Return triple for this backend.
|
||||
fn triple(&self) -> Triple;
|
||||
|
||||
/// Return name for this backend.
|
||||
fn name(&self) -> &'static str;
|
||||
|
||||
/// Return the register universe for this backend.
|
||||
fn reg_universe(&self) -> RealRegUniverse;
|
||||
|
||||
/// Machine-specific condcode info needed by TargetIsa.
|
||||
fn unsigned_add_overflow_condition(&self) -> IntCC {
|
||||
// TODO: this is what x86 specifies. Is this right for arm64?
|
||||
IntCC::UnsignedLessThan
|
||||
}
|
||||
|
||||
/// Machine-specific condcode info needed by TargetIsa.
|
||||
fn unsigned_sub_overflow_condition(&self) -> IntCC {
|
||||
// TODO: this is what x86 specifies. Is this right for arm64?
|
||||
IntCC::UnsignedLessThan
|
||||
}
|
||||
}
|
||||
66
cranelift/codegen/src/machinst/pp.rs
Normal file
66
cranelift/codegen/src/machinst/pp.rs
Normal file
@@ -0,0 +1,66 @@
|
||||
//! Pretty-printing for machine code (virtual-registerized or final).
|
||||
|
||||
use regalloc::{RealRegUniverse, Reg, Writable};
|
||||
|
||||
use std::fmt::Debug;
|
||||
use std::hash::Hash;
|
||||
use std::string::{String, ToString};
|
||||
|
||||
// FIXME: Should this go into regalloc.rs instead?
|
||||
|
||||
/// A trait for printing instruction bits and pieces, with the the ability to
|
||||
/// take a contextualising RealRegUniverse that is used to give proper names to
|
||||
/// registers.
|
||||
pub trait ShowWithRRU {
|
||||
/// Return a string that shows the implementing object in context of the
|
||||
/// given `RealRegUniverse`, if provided.
|
||||
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String;
|
||||
|
||||
/// The same as |show_rru|, but with an optional hint giving a size in
|
||||
/// bytes. Its interpretation is object-dependent, and it is intended to
|
||||
/// pass around enough information to facilitate printing sub-parts of
|
||||
/// real registers correctly. Objects may ignore size hints that are
|
||||
/// irrelevant to them.
|
||||
fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, _size: u8) -> String {
|
||||
// Default implementation is to ignore the hint.
|
||||
self.show_rru(mb_rru)
|
||||
}
|
||||
}
|
||||
|
||||
impl ShowWithRRU for Reg {
|
||||
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
if self.is_real() {
|
||||
if let Some(rru) = mb_rru {
|
||||
let reg_ix = self.get_index();
|
||||
if reg_ix < rru.regs.len() {
|
||||
return rru.regs[reg_ix].1.to_string();
|
||||
} else {
|
||||
// We have a real reg which isn't listed in the universe.
|
||||
// Per the regalloc.rs interface requirements, this is
|
||||
// Totally Not Allowed. Print it generically anyway, so
|
||||
// we have something to debug.
|
||||
return format!("!!{:?}!!", self);
|
||||
}
|
||||
}
|
||||
}
|
||||
// The reg is virtual, or we have no universe. Be generic.
|
||||
format!("%{:?}", self)
|
||||
}
|
||||
|
||||
fn show_rru_sized(&self, _mb_rru: Option<&RealRegUniverse>, _size: u8) -> String {
|
||||
// For the specific case of Reg, we demand not to have a size hint,
|
||||
// since interpretation of the size is target specific, but this code
|
||||
// is used by all targets.
|
||||
panic!("Reg::show_rru_sized: impossible to implement");
|
||||
}
|
||||
}
|
||||
|
||||
impl<R: ShowWithRRU + Copy + Ord + Hash + Eq + Debug> ShowWithRRU for Writable<R> {
|
||||
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
self.to_reg().show_rru(mb_rru)
|
||||
}
|
||||
|
||||
fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
|
||||
self.to_reg().show_rru_sized(mb_rru, size)
|
||||
}
|
||||
}
|
||||
351
cranelift/codegen/src/machinst/sections.rs
Normal file
351
cranelift/codegen/src/machinst/sections.rs
Normal file
@@ -0,0 +1,351 @@
|
||||
//! In-memory representation of compiled machine code, in multiple sections
|
||||
//! (text, constant pool / rodata, etc). Emission occurs into multiple sections
|
||||
//! simultaneously, so we buffer the result in memory and hand off to the
|
||||
//! caller at the end of compilation.
|
||||
|
||||
use crate::binemit::{Addend, CodeOffset, CodeSink, Reloc, RelocSink, StackmapSink, TrapSink};
|
||||
use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode};
|
||||
|
||||
use alloc::vec::Vec;
|
||||
|
||||
/// A collection of sections with defined start-offsets.
|
||||
pub struct MachSections {
|
||||
/// Sections, in offset order.
|
||||
pub sections: Vec<MachSection>,
|
||||
}
|
||||
|
||||
impl MachSections {
|
||||
/// New, empty set of sections.
|
||||
pub fn new() -> MachSections {
|
||||
MachSections { sections: vec![] }
|
||||
}
|
||||
|
||||
/// Add a section with a known offset and size. Returns the index.
|
||||
pub fn add_section(&mut self, start: CodeOffset, length: CodeOffset) -> usize {
|
||||
let idx = self.sections.len();
|
||||
self.sections.push(MachSection::new(start, length));
|
||||
idx
|
||||
}
|
||||
|
||||
/// Mutably borrow the given section by index.
|
||||
pub fn get_section<'a>(&'a mut self, idx: usize) -> &'a mut MachSection {
|
||||
&mut self.sections[idx]
|
||||
}
|
||||
|
||||
/// Get mutable borrows of two sections simultaneously. Used during
|
||||
/// instruction emission to provide references to the .text and .rodata
|
||||
/// (constant pool) sections.
|
||||
pub fn two_sections<'a>(
|
||||
&'a mut self,
|
||||
idx1: usize,
|
||||
idx2: usize,
|
||||
) -> (&'a mut MachSection, &'a mut MachSection) {
|
||||
assert!(idx1 < idx2);
|
||||
assert!(idx1 < self.sections.len());
|
||||
assert!(idx2 < self.sections.len());
|
||||
let (first, rest) = self.sections.split_at_mut(idx2);
|
||||
(&mut first[idx1], &mut rest[0])
|
||||
}
|
||||
|
||||
/// Emit this set of sections to a set of sinks for the code,
|
||||
/// relocations, traps, and stackmap.
|
||||
pub fn emit<CS: CodeSink>(&self, sink: &mut CS) {
|
||||
// N.B.: we emit every section into the .text section as far as
|
||||
// the `CodeSink` is concerned; we do not bother to segregate
|
||||
// the contents into the actual program text, the jumptable and the
|
||||
// rodata (constant pool). This allows us to generate code assuming
|
||||
// that these will not be relocated relative to each other, and avoids
|
||||
// having to designate each section as belonging in one of the three
|
||||
// fixed categories defined by `CodeSink`. If this becomes a problem
|
||||
// later (e.g. because of memory permissions or similar), we can
|
||||
// add this designation and segregate the output; take care, however,
|
||||
// to add the appropriate relocations in this case.
|
||||
|
||||
for section in &self.sections {
|
||||
if section.data.len() > 0 {
|
||||
while sink.offset() < section.start_offset {
|
||||
sink.put1(0);
|
||||
}
|
||||
section.emit(sink);
|
||||
}
|
||||
}
|
||||
sink.begin_jumptables();
|
||||
sink.begin_rodata();
|
||||
sink.end_codegen();
|
||||
}
|
||||
|
||||
/// Get the total required size for these sections.
|
||||
pub fn total_size(&self) -> CodeOffset {
|
||||
if self.sections.len() == 0 {
|
||||
0
|
||||
} else {
|
||||
// Find the last non-empty section.
|
||||
self.sections
|
||||
.iter()
|
||||
.rev()
|
||||
.find(|s| s.data.len() > 0)
|
||||
.map(|s| s.cur_offset_from_start())
|
||||
.unwrap_or(0)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// An abstraction over MachSection and MachSectionSize: some
|
||||
/// receiver of section data.
|
||||
pub trait MachSectionOutput {
|
||||
/// Get the current offset from the start of all sections.
|
||||
fn cur_offset_from_start(&self) -> CodeOffset;
|
||||
|
||||
/// Get the start offset of this section.
|
||||
fn start_offset(&self) -> CodeOffset;
|
||||
|
||||
/// Add 1 byte to the section.
|
||||
fn put1(&mut self, _: u8);
|
||||
|
||||
/// Add 2 bytes to the section.
|
||||
fn put2(&mut self, value: u16) {
|
||||
self.put1((value & 0xff) as u8);
|
||||
self.put1(((value >> 8) & 0xff) as u8);
|
||||
}
|
||||
|
||||
/// Add 4 bytes to the section.
|
||||
fn put4(&mut self, value: u32) {
|
||||
self.put1((value & 0xff) as u8);
|
||||
self.put1(((value >> 8) & 0xff) as u8);
|
||||
self.put1(((value >> 16) & 0xff) as u8);
|
||||
self.put1(((value >> 24) & 0xff) as u8);
|
||||
}
|
||||
|
||||
/// Add 8 bytes to the section.
|
||||
fn put8(&mut self, value: u64) {
|
||||
self.put1((value & 0xff) as u8);
|
||||
self.put1(((value >> 8) & 0xff) as u8);
|
||||
self.put1(((value >> 16) & 0xff) as u8);
|
||||
self.put1(((value >> 24) & 0xff) as u8);
|
||||
self.put1(((value >> 32) & 0xff) as u8);
|
||||
self.put1(((value >> 40) & 0xff) as u8);
|
||||
self.put1(((value >> 48) & 0xff) as u8);
|
||||
self.put1(((value >> 56) & 0xff) as u8);
|
||||
}
|
||||
|
||||
/// Add a slice of bytes to the section.
|
||||
fn put_data(&mut self, data: &[u8]);
|
||||
|
||||
/// Add a relocation at the current offset.
|
||||
fn add_reloc(&mut self, loc: SourceLoc, kind: Reloc, name: &ExternalName, addend: Addend);
|
||||
|
||||
/// Add a trap record at the current offset.
|
||||
fn add_trap(&mut self, loc: SourceLoc, code: TrapCode);
|
||||
|
||||
/// Add a call return address record at the current offset.
|
||||
fn add_call_site(&mut self, loc: SourceLoc, opcode: Opcode);
|
||||
|
||||
/// Align up to the given alignment.
|
||||
fn align_to(&mut self, align_to: CodeOffset) {
|
||||
assert!(align_to.is_power_of_two());
|
||||
while self.cur_offset_from_start() & (align_to - 1) != 0 {
|
||||
self.put1(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A section of output to be emitted to a CodeSink / RelocSink in bulk.
|
||||
/// Multiple sections may be created with known start offsets in advance; the
|
||||
/// usual use-case is to create the .text (code) and .rodata (constant pool) at
|
||||
/// once, after computing the length of the code, so that constant references
|
||||
/// can use known offsets as instructions are emitted.
|
||||
pub struct MachSection {
|
||||
/// The starting offset of this section.
|
||||
pub start_offset: CodeOffset,
|
||||
/// The limit of this section, defined by the start of the next section.
|
||||
pub length_limit: CodeOffset,
|
||||
/// The section contents, as raw bytes.
|
||||
pub data: Vec<u8>,
|
||||
/// Any relocations referring to this section.
|
||||
pub relocs: Vec<MachReloc>,
|
||||
/// Any trap records referring to this section.
|
||||
pub traps: Vec<MachTrap>,
|
||||
/// Any call site record referring to this section.
|
||||
pub call_sites: Vec<MachCallSite>,
|
||||
}
|
||||
|
||||
impl MachSection {
|
||||
/// Create a new section, known to start at `start_offset` and with a size limited to `length_limit`.
|
||||
pub fn new(start_offset: CodeOffset, length_limit: CodeOffset) -> MachSection {
|
||||
MachSection {
|
||||
start_offset,
|
||||
length_limit,
|
||||
data: vec![],
|
||||
relocs: vec![],
|
||||
traps: vec![],
|
||||
call_sites: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
/// Emit this section to the CodeSink and other associated sinks. The
|
||||
/// current offset of the CodeSink must match the starting offset of this
|
||||
/// section.
|
||||
pub fn emit<CS: CodeSink>(&self, sink: &mut CS) {
|
||||
assert!(sink.offset() == self.start_offset);
|
||||
|
||||
let mut next_reloc = 0;
|
||||
let mut next_trap = 0;
|
||||
let mut next_call_site = 0;
|
||||
for (idx, byte) in self.data.iter().enumerate() {
|
||||
if next_reloc < self.relocs.len() {
|
||||
let reloc = &self.relocs[next_reloc];
|
||||
if reloc.offset == idx as CodeOffset {
|
||||
sink.reloc_external(reloc.srcloc, reloc.kind, &reloc.name, reloc.addend);
|
||||
next_reloc += 1;
|
||||
}
|
||||
}
|
||||
if next_trap < self.traps.len() {
|
||||
let trap = &self.traps[next_trap];
|
||||
if trap.offset == idx as CodeOffset {
|
||||
sink.trap(trap.code, trap.srcloc);
|
||||
next_trap += 1;
|
||||
}
|
||||
}
|
||||
if next_call_site < self.call_sites.len() {
|
||||
let call_site = &self.call_sites[next_call_site];
|
||||
if call_site.ret_addr == idx as CodeOffset {
|
||||
sink.add_call_site(call_site.opcode, call_site.srcloc);
|
||||
next_call_site += 1;
|
||||
}
|
||||
}
|
||||
sink.put1(*byte);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl MachSectionOutput for MachSection {
|
||||
fn cur_offset_from_start(&self) -> CodeOffset {
|
||||
self.start_offset + self.data.len() as CodeOffset
|
||||
}
|
||||
|
||||
fn start_offset(&self) -> CodeOffset {
|
||||
self.start_offset
|
||||
}
|
||||
|
||||
fn put1(&mut self, value: u8) {
|
||||
assert!(((self.data.len() + 1) as CodeOffset) <= self.length_limit);
|
||||
self.data.push(value);
|
||||
}
|
||||
|
||||
fn put_data(&mut self, data: &[u8]) {
|
||||
assert!(((self.data.len() + data.len()) as CodeOffset) <= self.length_limit);
|
||||
self.data.extend_from_slice(data);
|
||||
}
|
||||
|
||||
fn add_reloc(&mut self, srcloc: SourceLoc, kind: Reloc, name: &ExternalName, addend: Addend) {
|
||||
let name = name.clone();
|
||||
self.relocs.push(MachReloc {
|
||||
offset: self.data.len() as CodeOffset,
|
||||
srcloc,
|
||||
kind,
|
||||
name,
|
||||
addend,
|
||||
});
|
||||
}
|
||||
|
||||
fn add_trap(&mut self, srcloc: SourceLoc, code: TrapCode) {
|
||||
self.traps.push(MachTrap {
|
||||
offset: self.data.len() as CodeOffset,
|
||||
srcloc,
|
||||
code,
|
||||
});
|
||||
}
|
||||
|
||||
fn add_call_site(&mut self, srcloc: SourceLoc, opcode: Opcode) {
|
||||
self.call_sites.push(MachCallSite {
|
||||
ret_addr: self.data.len() as CodeOffset,
|
||||
srcloc,
|
||||
opcode,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/// A MachSectionOutput implementation that records only size.
|
||||
pub struct MachSectionSize {
|
||||
/// The starting offset of this section.
|
||||
pub start_offset: CodeOffset,
|
||||
/// The current offset of this section.
|
||||
pub offset: CodeOffset,
|
||||
}
|
||||
|
||||
impl MachSectionSize {
|
||||
/// Create a new size-counting dummy section.
|
||||
pub fn new(start_offset: CodeOffset) -> MachSectionSize {
|
||||
MachSectionSize {
|
||||
start_offset,
|
||||
offset: start_offset,
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the size this section would take if emitted with a real sink.
|
||||
pub fn size(&self) -> CodeOffset {
|
||||
self.offset - self.start_offset
|
||||
}
|
||||
}
|
||||
|
||||
impl MachSectionOutput for MachSectionSize {
|
||||
fn cur_offset_from_start(&self) -> CodeOffset {
|
||||
// All size-counting sections conceptually start at offset 0; this doesn't
|
||||
// matter when counting code size.
|
||||
self.offset
|
||||
}
|
||||
|
||||
fn start_offset(&self) -> CodeOffset {
|
||||
self.start_offset
|
||||
}
|
||||
|
||||
fn put1(&mut self, _: u8) {
|
||||
self.offset += 1;
|
||||
}
|
||||
|
||||
fn put_data(&mut self, data: &[u8]) {
|
||||
self.offset += data.len() as CodeOffset;
|
||||
}
|
||||
|
||||
fn add_reloc(&mut self, _: SourceLoc, _: Reloc, _: &ExternalName, _: Addend) {}
|
||||
|
||||
fn add_trap(&mut self, _: SourceLoc, _: TrapCode) {}
|
||||
|
||||
fn add_call_site(&mut self, _: SourceLoc, _: Opcode) {}
|
||||
}
|
||||
|
||||
/// A relocation resulting from a compilation.
|
||||
pub struct MachReloc {
|
||||
/// The offset at which the relocation applies, *relative to the
|
||||
/// containing section*.
|
||||
pub offset: CodeOffset,
|
||||
/// The original source location.
|
||||
pub srcloc: SourceLoc,
|
||||
/// The kind of relocation.
|
||||
pub kind: Reloc,
|
||||
/// The external symbol / name to which this relocation refers.
|
||||
pub name: ExternalName,
|
||||
/// The addend to add to the symbol value.
|
||||
pub addend: i64,
|
||||
}
|
||||
|
||||
/// A trap record resulting from a compilation.
|
||||
pub struct MachTrap {
|
||||
/// The offset at which the trap instruction occurs, *relative to the
|
||||
/// containing section*.
|
||||
pub offset: CodeOffset,
|
||||
/// The original source location.
|
||||
pub srcloc: SourceLoc,
|
||||
/// The trap code.
|
||||
pub code: TrapCode,
|
||||
}
|
||||
|
||||
/// A call site record resulting from a compilation.
|
||||
pub struct MachCallSite {
|
||||
/// The offset of the call's return address, *relative to the containing section*.
|
||||
pub ret_addr: CodeOffset,
|
||||
/// The original source location.
|
||||
pub srcloc: SourceLoc,
|
||||
/// The call's opcode.
|
||||
pub opcode: Opcode,
|
||||
}
|
||||
738
cranelift/codegen/src/machinst/vcode.rs
Normal file
738
cranelift/codegen/src/machinst/vcode.rs
Normal file
@@ -0,0 +1,738 @@
|
||||
//! This implements the VCode container: a CFG of Insts that have been lowered.
|
||||
//!
|
||||
//! VCode is virtual-register code. An instruction in VCode is almost a machine
|
||||
//! instruction; however, its register slots can refer to virtual registers in
|
||||
//! addition to real machine registers.
|
||||
//!
|
||||
//! VCode is structured with traditional basic blocks, and
|
||||
//! each block must be terminated by an unconditional branch (one target), a
|
||||
//! conditional branch (two targets), or a return (no targets). Note that this
|
||||
//! slightly differs from the machine code of most ISAs: in most ISAs, a
|
||||
//! conditional branch has one target (and the not-taken case falls through).
|
||||
//! However, we expect that machine backends will elide branches to the following
|
||||
//! block (i.e., zero-offset jumps), and will be able to codegen a branch-cond /
|
||||
//! branch-uncond pair if *both* targets are not fallthrough. This allows us to
|
||||
//! play with layout prior to final binary emission, as well, if we want.
|
||||
//!
|
||||
//! See the main module comment in `mod.rs` for more details on the VCode-based
|
||||
//! backend pipeline.
|
||||
|
||||
use crate::binemit::Reloc;
|
||||
use crate::ir;
|
||||
use crate::machinst::*;
|
||||
use crate::settings;
|
||||
|
||||
use regalloc::Function as RegallocFunction;
|
||||
use regalloc::Set as RegallocSet;
|
||||
use regalloc::{BlockIx, InstIx, Range, RegAllocResult, RegClass, RegUsageCollector};
|
||||
|
||||
use alloc::boxed::Box;
|
||||
use alloc::vec::Vec;
|
||||
use log::debug;
|
||||
use smallvec::SmallVec;
|
||||
use std::fmt;
|
||||
use std::iter;
|
||||
use std::ops::Index;
|
||||
use std::string::String;
|
||||
|
||||
/// Index referring to an instruction in VCode.
|
||||
pub type InsnIndex = u32;
|
||||
/// Index referring to a basic block in VCode.
|
||||
pub type BlockIndex = u32;
|
||||
|
||||
/// VCodeInst wraps all requirements for a MachInst to be in VCode: it must be
|
||||
/// a `MachInst` and it must be able to emit itself at least to a `SizeCodeSink`.
|
||||
pub trait VCodeInst: MachInst + MachInstEmit<MachSection> + MachInstEmit<MachSectionSize> {}
|
||||
impl<I: MachInst + MachInstEmit<MachSection> + MachInstEmit<MachSectionSize>> VCodeInst for I {}
|
||||
|
||||
/// A function in "VCode" (virtualized-register code) form, after lowering.
|
||||
/// This is essentially a standard CFG of basic blocks, where each basic block
|
||||
/// consists of lowered instructions produced by the machine-specific backend.
|
||||
pub struct VCode<I: VCodeInst> {
|
||||
/// Function liveins.
|
||||
liveins: RegallocSet<RealReg>,
|
||||
|
||||
/// Function liveouts.
|
||||
liveouts: RegallocSet<RealReg>,
|
||||
|
||||
/// VReg IR-level types.
|
||||
vreg_types: Vec<Type>,
|
||||
|
||||
/// Lowered machine instructions in order corresponding to the original IR.
|
||||
pub insts: Vec<I>,
|
||||
|
||||
/// Entry block.
|
||||
entry: BlockIndex,
|
||||
|
||||
/// Block instruction indices.
|
||||
pub block_ranges: Vec<(InsnIndex, InsnIndex)>,
|
||||
|
||||
/// Block successors: index range in the successor-list below.
|
||||
block_succ_range: Vec<(usize, usize)>,
|
||||
|
||||
/// Block successor lists, concatenated into one Vec. The `block_succ_range`
|
||||
/// list of tuples above gives (start, end) ranges within this list that
|
||||
/// correspond to each basic block's successors.
|
||||
block_succs: Vec<BlockIndex>,
|
||||
|
||||
/// Block indices by IR block.
|
||||
block_by_bb: SecondaryMap<ir::Block, BlockIndex>,
|
||||
|
||||
/// IR block for each VCode Block. The length of this Vec will likely be
|
||||
/// less than the total number of Blocks, because new Blocks (for edge
|
||||
/// splits, for example) are appended during lowering.
|
||||
bb_by_block: Vec<ir::Block>,
|
||||
|
||||
/// Order of block IDs in final generated code.
|
||||
final_block_order: Vec<BlockIndex>,
|
||||
|
||||
/// Final block offsets. Computed during branch finalization and used
|
||||
/// during emission.
|
||||
final_block_offsets: Vec<CodeOffset>,
|
||||
|
||||
/// Size of code, accounting for block layout / alignment.
|
||||
code_size: CodeOffset,
|
||||
|
||||
/// ABI object.
|
||||
abi: Box<dyn ABIBody<I>>,
|
||||
}
|
||||
|
||||
/// A builder for a VCode function body. This builder is designed for the
|
||||
/// lowering approach that we take: we traverse basic blocks in forward
|
||||
/// (original IR) order, but within each basic block, we generate code from
|
||||
/// bottom to top; and within each IR instruction that we visit in this reverse
|
||||
/// order, we emit machine instructions in *forward* order again.
|
||||
///
|
||||
/// Hence, to produce the final instructions in proper order, we perform two
|
||||
/// swaps. First, the machine instructions (`I` instances) are produced in
|
||||
/// forward order for an individual IR instruction. Then these are *reversed*
|
||||
/// and concatenated to `bb_insns` at the end of the IR instruction lowering.
|
||||
/// The `bb_insns` vec will thus contain all machine instructions for a basic
|
||||
/// block, in reverse order. Finally, when we're done with a basic block, we
|
||||
/// reverse the whole block's vec of instructions again, and concatenate onto
|
||||
/// the VCode's insts.
|
||||
pub struct VCodeBuilder<I: VCodeInst> {
|
||||
/// In-progress VCode.
|
||||
vcode: VCode<I>,
|
||||
|
||||
/// Current basic block instructions, in reverse order (because blocks are
|
||||
/// built bottom-to-top).
|
||||
bb_insns: SmallVec<[I; 32]>,
|
||||
|
||||
/// Current IR-inst instructions, in forward order.
|
||||
ir_inst_insns: SmallVec<[I; 4]>,
|
||||
|
||||
/// Start of succs for the current block in the concatenated succs list.
|
||||
succ_start: usize,
|
||||
}
|
||||
|
||||
impl<I: VCodeInst> VCodeBuilder<I> {
|
||||
/// Create a new VCodeBuilder.
|
||||
pub fn new(abi: Box<dyn ABIBody<I>>) -> VCodeBuilder<I> {
|
||||
let vcode = VCode::new(abi);
|
||||
VCodeBuilder {
|
||||
vcode,
|
||||
bb_insns: SmallVec::new(),
|
||||
ir_inst_insns: SmallVec::new(),
|
||||
succ_start: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Access the ABI object.
|
||||
pub fn abi(&mut self) -> &mut dyn ABIBody<I> {
|
||||
&mut *self.vcode.abi
|
||||
}
|
||||
|
||||
/// Set the type of a VReg.
|
||||
pub fn set_vreg_type(&mut self, vreg: VirtualReg, ty: Type) {
|
||||
while self.vcode.vreg_types.len() <= vreg.get_index() {
|
||||
self.vcode.vreg_types.push(ir::types::I8); // Default type.
|
||||
}
|
||||
self.vcode.vreg_types[vreg.get_index()] = ty;
|
||||
}
|
||||
|
||||
/// Return the underlying bb-to-BlockIndex map.
|
||||
pub fn blocks_by_bb(&self) -> &SecondaryMap<ir::Block, BlockIndex> {
|
||||
&self.vcode.block_by_bb
|
||||
}
|
||||
|
||||
/// Initialize the bb-to-BlockIndex map. Returns the first free
|
||||
/// BlockIndex.
|
||||
pub fn init_bb_map(&mut self, blocks: &[ir::Block]) -> BlockIndex {
|
||||
let mut bindex: BlockIndex = 0;
|
||||
for bb in blocks.iter() {
|
||||
self.vcode.block_by_bb[*bb] = bindex;
|
||||
self.vcode.bb_by_block.push(*bb);
|
||||
bindex += 1;
|
||||
}
|
||||
bindex
|
||||
}
|
||||
|
||||
/// Get the BlockIndex for an IR block.
|
||||
pub fn bb_to_bindex(&self, bb: ir::Block) -> BlockIndex {
|
||||
self.vcode.block_by_bb[bb]
|
||||
}
|
||||
|
||||
/// Set the current block as the entry block.
|
||||
pub fn set_entry(&mut self, block: BlockIndex) {
|
||||
self.vcode.entry = block;
|
||||
}
|
||||
|
||||
/// End the current IR instruction. Must be called after pushing any
|
||||
/// instructions and prior to ending the basic block.
|
||||
pub fn end_ir_inst(&mut self) {
|
||||
while let Some(i) = self.ir_inst_insns.pop() {
|
||||
self.bb_insns.push(i);
|
||||
}
|
||||
}
|
||||
|
||||
/// End the current basic block. Must be called after emitting vcode insts
|
||||
/// for IR insts and prior to ending the function (building the VCode).
|
||||
pub fn end_bb(&mut self) -> BlockIndex {
|
||||
assert!(self.ir_inst_insns.is_empty());
|
||||
let block_num = self.vcode.block_ranges.len() as BlockIndex;
|
||||
// Push the instructions.
|
||||
let start_idx = self.vcode.insts.len() as InsnIndex;
|
||||
while let Some(i) = self.bb_insns.pop() {
|
||||
self.vcode.insts.push(i);
|
||||
}
|
||||
let end_idx = self.vcode.insts.len() as InsnIndex;
|
||||
// Add the instruction index range to the list of blocks.
|
||||
self.vcode.block_ranges.push((start_idx, end_idx));
|
||||
// End the successors list.
|
||||
let succ_end = self.vcode.block_succs.len();
|
||||
self.vcode
|
||||
.block_succ_range
|
||||
.push((self.succ_start, succ_end));
|
||||
self.succ_start = succ_end;
|
||||
|
||||
block_num
|
||||
}
|
||||
|
||||
/// Push an instruction for the current BB and current IR inst within the BB.
|
||||
pub fn push(&mut self, insn: I) {
|
||||
match insn.is_term() {
|
||||
MachTerminator::None | MachTerminator::Ret => {}
|
||||
MachTerminator::Uncond(target) => {
|
||||
self.vcode.block_succs.push(target);
|
||||
}
|
||||
MachTerminator::Cond(true_branch, false_branch) => {
|
||||
self.vcode.block_succs.push(true_branch);
|
||||
self.vcode.block_succs.push(false_branch);
|
||||
}
|
||||
MachTerminator::Indirect(targets) => {
|
||||
for target in targets {
|
||||
self.vcode.block_succs.push(*target);
|
||||
}
|
||||
}
|
||||
}
|
||||
self.ir_inst_insns.push(insn);
|
||||
}
|
||||
|
||||
/// Build the final VCode.
|
||||
pub fn build(self) -> VCode<I> {
|
||||
assert!(self.ir_inst_insns.is_empty());
|
||||
assert!(self.bb_insns.is_empty());
|
||||
self.vcode
|
||||
}
|
||||
}
|
||||
|
||||
fn block_ranges(indices: &[InstIx], len: usize) -> Vec<(usize, usize)> {
|
||||
let v = indices
|
||||
.iter()
|
||||
.map(|iix| iix.get() as usize)
|
||||
.chain(iter::once(len))
|
||||
.collect::<Vec<usize>>();
|
||||
v.windows(2).map(|p| (p[0], p[1])).collect()
|
||||
}
|
||||
|
||||
fn is_redundant_move<I: VCodeInst>(insn: &I) -> bool {
|
||||
if let Some((to, from)) = insn.is_move() {
|
||||
to.to_reg() == from
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
fn is_trivial_jump_block<I: VCodeInst>(vcode: &VCode<I>, block: BlockIndex) -> Option<BlockIndex> {
|
||||
let range = vcode.block_insns(BlockIx::new(block));
|
||||
|
||||
debug!(
|
||||
"is_trivial_jump_block: block {} has len {}",
|
||||
block,
|
||||
range.len()
|
||||
);
|
||||
|
||||
if range.len() != 1 {
|
||||
return None;
|
||||
}
|
||||
let insn = range.first();
|
||||
|
||||
debug!(
|
||||
" -> only insn is: {:?} with terminator {:?}",
|
||||
vcode.get_insn(insn),
|
||||
vcode.get_insn(insn).is_term()
|
||||
);
|
||||
|
||||
match vcode.get_insn(insn).is_term() {
|
||||
MachTerminator::Uncond(target) => Some(target),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
impl<I: VCodeInst> VCode<I> {
|
||||
/// New empty VCode.
|
||||
fn new(abi: Box<dyn ABIBody<I>>) -> VCode<I> {
|
||||
VCode {
|
||||
liveins: abi.liveins(),
|
||||
liveouts: abi.liveouts(),
|
||||
vreg_types: vec![],
|
||||
insts: vec![],
|
||||
entry: 0,
|
||||
block_ranges: vec![],
|
||||
block_succ_range: vec![],
|
||||
block_succs: vec![],
|
||||
block_by_bb: SecondaryMap::with_default(0),
|
||||
bb_by_block: vec![],
|
||||
final_block_order: vec![],
|
||||
final_block_offsets: vec![],
|
||||
code_size: 0,
|
||||
abi,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the IR-level type of a VReg.
|
||||
pub fn vreg_type(&self, vreg: VirtualReg) -> Type {
|
||||
self.vreg_types[vreg.get_index()]
|
||||
}
|
||||
|
||||
/// Get the entry block.
|
||||
pub fn entry(&self) -> BlockIndex {
|
||||
self.entry
|
||||
}
|
||||
|
||||
/// Get the number of blocks. Block indices will be in the range `0 ..
|
||||
/// (self.num_blocks() - 1)`.
|
||||
pub fn num_blocks(&self) -> usize {
|
||||
self.block_ranges.len()
|
||||
}
|
||||
|
||||
/// Stack frame size for the full function's body.
|
||||
pub fn frame_size(&self) -> u32 {
|
||||
self.abi.frame_size()
|
||||
}
|
||||
|
||||
/// Get the successors for a block.
|
||||
pub fn succs(&self, block: BlockIndex) -> &[BlockIndex] {
|
||||
let (start, end) = self.block_succ_range[block as usize];
|
||||
&self.block_succs[start..end]
|
||||
}
|
||||
|
||||
/// Take the results of register allocation, with a sequence of
|
||||
/// instructions including spliced fill/reload/move instructions, and replace
|
||||
/// the VCode with them.
|
||||
pub fn replace_insns_from_regalloc(
|
||||
&mut self,
|
||||
result: RegAllocResult<Self>,
|
||||
flags: &settings::Flags,
|
||||
) {
|
||||
self.final_block_order = compute_final_block_order(self);
|
||||
|
||||
// Record the spillslot count and clobbered registers for the ABI/stack
|
||||
// setup code.
|
||||
self.abi.set_num_spillslots(result.num_spill_slots as usize);
|
||||
self.abi
|
||||
.set_clobbered(result.clobbered_registers.map(|r| Writable::from_reg(*r)));
|
||||
|
||||
// We want to move instructions over in final block order, using the new
|
||||
// block-start map given by the regalloc.
|
||||
let block_ranges: Vec<(usize, usize)> =
|
||||
block_ranges(result.target_map.elems(), result.insns.len());
|
||||
let mut final_insns = vec![];
|
||||
let mut final_block_ranges = vec![(0, 0); self.num_blocks()];
|
||||
|
||||
for block in &self.final_block_order {
|
||||
let (start, end) = block_ranges[*block as usize];
|
||||
let final_start = final_insns.len() as InsnIndex;
|
||||
|
||||
if *block == self.entry {
|
||||
// Start with the prologue.
|
||||
final_insns.extend(self.abi.gen_prologue(flags).into_iter());
|
||||
}
|
||||
|
||||
for i in start..end {
|
||||
let insn = &result.insns[i];
|
||||
|
||||
// Elide redundant moves at this point (we only know what is
|
||||
// redundant once registers are allocated).
|
||||
if is_redundant_move(insn) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Whenever encountering a return instruction, replace it
|
||||
// with the epilogue.
|
||||
let is_ret = insn.is_term() == MachTerminator::Ret;
|
||||
if is_ret {
|
||||
final_insns.extend(self.abi.gen_epilogue(flags).into_iter());
|
||||
} else {
|
||||
final_insns.push(insn.clone());
|
||||
}
|
||||
}
|
||||
|
||||
let final_end = final_insns.len() as InsnIndex;
|
||||
final_block_ranges[*block as usize] = (final_start, final_end);
|
||||
}
|
||||
|
||||
self.insts = final_insns;
|
||||
self.block_ranges = final_block_ranges;
|
||||
}
|
||||
|
||||
/// Removes redundant branches, rewriting targets to point directly to the
|
||||
/// ultimate block at the end of a chain of trivial one-target jumps.
|
||||
pub fn remove_redundant_branches(&mut self) {
|
||||
// For each block, compute the actual target block, looking through up to one
|
||||
// block with single-target jumps (this will remove empty edge blocks inserted
|
||||
// by phi-lowering).
|
||||
let block_rewrites: Vec<BlockIndex> = (0..self.num_blocks() as u32)
|
||||
.map(|bix| is_trivial_jump_block(self, bix).unwrap_or(bix))
|
||||
.collect();
|
||||
let mut refcounts: Vec<usize> = vec![0; self.num_blocks()];
|
||||
|
||||
debug!(
|
||||
"remove_redundant_branches: block_rewrites = {:?}",
|
||||
block_rewrites
|
||||
);
|
||||
|
||||
refcounts[self.entry as usize] = 1;
|
||||
|
||||
for block in 0..self.num_blocks() as u32 {
|
||||
for insn in self.block_insns(BlockIx::new(block)) {
|
||||
self.get_insn_mut(insn)
|
||||
.with_block_rewrites(&block_rewrites[..]);
|
||||
match self.get_insn(insn).is_term() {
|
||||
MachTerminator::Uncond(bix) => {
|
||||
refcounts[bix as usize] += 1;
|
||||
}
|
||||
MachTerminator::Cond(bix1, bix2) => {
|
||||
refcounts[bix1 as usize] += 1;
|
||||
refcounts[bix2 as usize] += 1;
|
||||
}
|
||||
MachTerminator::Indirect(blocks) => {
|
||||
for block in blocks {
|
||||
refcounts[*block as usize] += 1;
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let deleted: Vec<bool> = refcounts.iter().map(|r| *r == 0).collect();
|
||||
|
||||
let block_order = std::mem::replace(&mut self.final_block_order, vec![]);
|
||||
self.final_block_order = block_order
|
||||
.into_iter()
|
||||
.filter(|b| !deleted[*b as usize])
|
||||
.collect();
|
||||
|
||||
// Rewrite successor information based on the block-rewrite map.
|
||||
for succ in &mut self.block_succs {
|
||||
let new_succ = block_rewrites[*succ as usize];
|
||||
*succ = new_succ;
|
||||
}
|
||||
}
|
||||
|
||||
/// Mutate branch instructions to (i) lower two-way condbrs to one-way,
|
||||
/// depending on fallthrough; and (ii) use concrete offsets.
|
||||
pub fn finalize_branches(&mut self)
|
||||
where
|
||||
I: MachInstEmit<MachSectionSize>,
|
||||
{
|
||||
// Compute fallthrough block, indexed by block.
|
||||
let num_final_blocks = self.final_block_order.len();
|
||||
let mut block_fallthrough: Vec<Option<BlockIndex>> = vec![None; self.num_blocks()];
|
||||
for i in 0..(num_final_blocks - 1) {
|
||||
let from = self.final_block_order[i];
|
||||
let to = self.final_block_order[i + 1];
|
||||
block_fallthrough[from as usize] = Some(to);
|
||||
}
|
||||
|
||||
// Pass over VCode instructions and finalize two-way branches into
|
||||
// one-way branches with fallthrough.
|
||||
for block in 0..self.num_blocks() {
|
||||
let next_block = block_fallthrough[block];
|
||||
let (start, end) = self.block_ranges[block];
|
||||
|
||||
for iix in start..end {
|
||||
let insn = &mut self.insts[iix as usize];
|
||||
insn.with_fallthrough_block(next_block);
|
||||
}
|
||||
}
|
||||
|
||||
// Compute block offsets.
|
||||
let mut code_section = MachSectionSize::new(0);
|
||||
let mut block_offsets = vec![0; self.num_blocks()];
|
||||
for block in &self.final_block_order {
|
||||
code_section.offset = I::align_basic_block(code_section.offset);
|
||||
block_offsets[*block as usize] = code_section.offset;
|
||||
let (start, end) = self.block_ranges[*block as usize];
|
||||
for iix in start..end {
|
||||
self.insts[iix as usize].emit(&mut code_section);
|
||||
}
|
||||
}
|
||||
|
||||
// We now have the section layout.
|
||||
self.final_block_offsets = block_offsets;
|
||||
self.code_size = code_section.size();
|
||||
|
||||
// Update branches with known block offsets. This looks like the
|
||||
// traversal above, but (i) does not update block_offsets, rather uses
|
||||
// it (so forward references are now possible), and (ii) mutates the
|
||||
// instructions.
|
||||
let mut code_section = MachSectionSize::new(0);
|
||||
for block in &self.final_block_order {
|
||||
code_section.offset = I::align_basic_block(code_section.offset);
|
||||
let (start, end) = self.block_ranges[*block as usize];
|
||||
for iix in start..end {
|
||||
self.insts[iix as usize]
|
||||
.with_block_offsets(code_section.offset, &self.final_block_offsets[..]);
|
||||
self.insts[iix as usize].emit(&mut code_section);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Emit the instructions to a list of sections.
|
||||
pub fn emit(&self) -> MachSections
|
||||
where
|
||||
I: MachInstEmit<MachSection>,
|
||||
{
|
||||
let mut sections = MachSections::new();
|
||||
let code_idx = sections.add_section(0, self.code_size);
|
||||
let code_section = sections.get_section(code_idx);
|
||||
|
||||
for block in &self.final_block_order {
|
||||
let new_offset = I::align_basic_block(code_section.cur_offset_from_start());
|
||||
while new_offset > code_section.cur_offset_from_start() {
|
||||
// Pad with NOPs up to the aligned block offset.
|
||||
let nop = I::gen_nop((new_offset - code_section.cur_offset_from_start()) as usize);
|
||||
nop.emit(code_section);
|
||||
}
|
||||
assert_eq!(code_section.cur_offset_from_start(), new_offset);
|
||||
|
||||
let (start, end) = self.block_ranges[*block as usize];
|
||||
for iix in start..end {
|
||||
self.insts[iix as usize].emit(code_section);
|
||||
}
|
||||
}
|
||||
|
||||
sections
|
||||
}
|
||||
|
||||
/// Get the IR block for a BlockIndex, if one exists.
|
||||
pub fn bindex_to_bb(&self, block: BlockIndex) -> Option<ir::Block> {
|
||||
if (block as usize) < self.bb_by_block.len() {
|
||||
Some(self.bb_by_block[block as usize])
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<I: VCodeInst> RegallocFunction for VCode<I> {
|
||||
type Inst = I;
|
||||
|
||||
fn insns(&self) -> &[I] {
|
||||
&self.insts[..]
|
||||
}
|
||||
|
||||
fn insns_mut(&mut self) -> &mut [I] {
|
||||
&mut self.insts[..]
|
||||
}
|
||||
|
||||
fn get_insn(&self, insn: InstIx) -> &I {
|
||||
&self.insts[insn.get() as usize]
|
||||
}
|
||||
|
||||
fn get_insn_mut(&mut self, insn: InstIx) -> &mut I {
|
||||
&mut self.insts[insn.get() as usize]
|
||||
}
|
||||
|
||||
fn blocks(&self) -> Range<BlockIx> {
|
||||
Range::new(BlockIx::new(0), self.block_ranges.len())
|
||||
}
|
||||
|
||||
fn entry_block(&self) -> BlockIx {
|
||||
BlockIx::new(self.entry)
|
||||
}
|
||||
|
||||
fn block_insns(&self, block: BlockIx) -> Range<InstIx> {
|
||||
let (start, end) = self.block_ranges[block.get() as usize];
|
||||
Range::new(InstIx::new(start), (end - start) as usize)
|
||||
}
|
||||
|
||||
fn block_succs(&self, block: BlockIx) -> Vec<BlockIx> {
|
||||
let (start, end) = self.block_succ_range[block.get() as usize];
|
||||
self.block_succs[start..end]
|
||||
.iter()
|
||||
.cloned()
|
||||
.map(BlockIx::new)
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn is_ret(&self, insn: InstIx) -> bool {
|
||||
match self.insts[insn.get() as usize].is_term() {
|
||||
MachTerminator::Ret => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn get_regs(insn: &I, collector: &mut RegUsageCollector) {
|
||||
insn.get_regs(collector)
|
||||
}
|
||||
|
||||
fn map_regs(
|
||||
insn: &mut I,
|
||||
pre_map: &RegallocMap<VirtualReg, RealReg>,
|
||||
post_map: &RegallocMap<VirtualReg, RealReg>,
|
||||
) {
|
||||
insn.map_regs(pre_map, post_map);
|
||||
}
|
||||
|
||||
fn is_move(&self, insn: &I) -> Option<(Writable<Reg>, Reg)> {
|
||||
insn.is_move()
|
||||
}
|
||||
|
||||
fn get_spillslot_size(&self, regclass: RegClass, vreg: VirtualReg) -> u32 {
|
||||
let ty = self.vreg_type(vreg);
|
||||
self.abi.get_spillslot_size(regclass, ty)
|
||||
}
|
||||
|
||||
fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, vreg: VirtualReg) -> I {
|
||||
let ty = self.vreg_type(vreg);
|
||||
self.abi.gen_spill(to_slot, from_reg, ty)
|
||||
}
|
||||
|
||||
fn gen_reload(&self, to_reg: Writable<RealReg>, from_slot: SpillSlot, vreg: VirtualReg) -> I {
|
||||
let ty = self.vreg_type(vreg);
|
||||
self.abi.gen_reload(to_reg, from_slot, ty)
|
||||
}
|
||||
|
||||
fn gen_move(&self, to_reg: Writable<RealReg>, from_reg: RealReg, vreg: VirtualReg) -> I {
|
||||
let ty = self.vreg_type(vreg);
|
||||
I::gen_move(to_reg.map(|r| r.to_reg()), from_reg.to_reg(), ty)
|
||||
}
|
||||
|
||||
fn gen_zero_len_nop(&self) -> I {
|
||||
I::gen_zero_len_nop()
|
||||
}
|
||||
|
||||
fn maybe_direct_reload(&self, insn: &I, reg: VirtualReg, slot: SpillSlot) -> Option<I> {
|
||||
insn.maybe_direct_reload(reg, slot)
|
||||
}
|
||||
|
||||
fn func_liveins(&self) -> RegallocSet<RealReg> {
|
||||
self.liveins.clone()
|
||||
}
|
||||
|
||||
fn func_liveouts(&self) -> RegallocSet<RealReg> {
|
||||
self.liveouts.clone()
|
||||
}
|
||||
}
|
||||
|
||||
// N.B.: Debug impl assumes that VCode has already been through all compilation
|
||||
// passes, and so has a final block order and offsets.
|
||||
|
||||
impl<I: VCodeInst> fmt::Debug for VCode<I> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
writeln!(f, "VCode_Debug {{")?;
|
||||
writeln!(f, " Entry block: {}", self.entry)?;
|
||||
writeln!(f, " Final block order: {:?}", self.final_block_order)?;
|
||||
|
||||
for block in 0..self.num_blocks() {
|
||||
writeln!(f, "Block {}:", block,)?;
|
||||
for succ in self.succs(block as BlockIndex) {
|
||||
writeln!(f, " (successor: Block {})", succ)?;
|
||||
}
|
||||
let (start, end) = self.block_ranges[block];
|
||||
writeln!(f, " (instruction range: {} .. {})", start, end)?;
|
||||
for inst in start..end {
|
||||
writeln!(f, " Inst {}: {:?}", inst, self.insts[inst as usize])?;
|
||||
}
|
||||
}
|
||||
|
||||
writeln!(f, "}}")?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
// Pretty-printing with `RealRegUniverse` context.
|
||||
impl<I: VCodeInst + ShowWithRRU> ShowWithRRU for VCode<I> {
|
||||
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
|
||||
use crate::alloc::string::ToString;
|
||||
use std::fmt::Write;
|
||||
|
||||
// Calculate an order in which to display the blocks. This is the same
|
||||
// as final_block_order, but also includes blocks which are in the
|
||||
// representation but not in final_block_order.
|
||||
let mut display_order = Vec::<usize>::new();
|
||||
// First display blocks in |final_block_order|
|
||||
for bix in &self.final_block_order {
|
||||
assert!((*bix as usize) < self.num_blocks());
|
||||
display_order.push(*bix as usize);
|
||||
}
|
||||
// Now also take care of those not listed in |final_block_order|.
|
||||
// This is quadratic, but it's also debug-only code.
|
||||
for bix in 0..self.num_blocks() {
|
||||
if display_order.contains(&bix) {
|
||||
continue;
|
||||
}
|
||||
display_order.push(bix);
|
||||
}
|
||||
|
||||
let mut s = String::new();
|
||||
s = s + &format!("VCode_ShowWithRRU {{{{");
|
||||
s = s + &"\n".to_string();
|
||||
s = s + &format!(" Entry block: {}", self.entry);
|
||||
s = s + &"\n".to_string();
|
||||
s = s + &format!(" Final block order: {:?}", self.final_block_order);
|
||||
s = s + &"\n".to_string();
|
||||
|
||||
for i in 0..self.num_blocks() {
|
||||
let block = display_order[i];
|
||||
|
||||
let omitted =
|
||||
(if !self.final_block_order.is_empty() && i >= self.final_block_order.len() {
|
||||
"** OMITTED **"
|
||||
} else {
|
||||
""
|
||||
})
|
||||
.to_string();
|
||||
|
||||
s = s + &format!("Block {}: {}", block, omitted);
|
||||
s = s + &"\n".to_string();
|
||||
if let Some(bb) = self.bindex_to_bb(block as BlockIndex) {
|
||||
s = s + &format!(" (original IR block: {})\n", bb);
|
||||
}
|
||||
for succ in self.succs(block as BlockIndex) {
|
||||
s = s + &format!(" (successor: Block {})", succ);
|
||||
s = s + &"\n".to_string();
|
||||
}
|
||||
let (start, end) = self.block_ranges[block];
|
||||
s = s + &format!(" (instruction range: {} .. {})", start, end);
|
||||
s = s + &"\n".to_string();
|
||||
for inst in start..end {
|
||||
s = s + &format!(
|
||||
" Inst {}: {}",
|
||||
inst,
|
||||
self.insts[inst as usize].show_rru(mb_rru)
|
||||
);
|
||||
s = s + &"\n".to_string();
|
||||
}
|
||||
}
|
||||
|
||||
s = s + &format!("}}}}");
|
||||
s = s + &"\n".to_string();
|
||||
|
||||
s
|
||||
}
|
||||
}
|
||||
68
cranelift/codegen/src/num_uses.rs
Normal file
68
cranelift/codegen/src/num_uses.rs
Normal file
@@ -0,0 +1,68 @@
|
||||
//! A pass that computes the number of uses of any given instruction.
|
||||
|
||||
#![allow(dead_code)]
|
||||
#![allow(unused_imports)]
|
||||
|
||||
use crate::cursor::{Cursor, FuncCursor};
|
||||
use crate::dce::has_side_effect;
|
||||
use crate::entity::SecondaryMap;
|
||||
use crate::ir::dfg::ValueDef;
|
||||
use crate::ir::instructions::InstructionData;
|
||||
use crate::ir::Value;
|
||||
use crate::ir::{DataFlowGraph, Function, Inst, Opcode};
|
||||
|
||||
/// Auxiliary data structure that counts the number of uses of any given
|
||||
/// instruction in a Function. This is used during instruction selection
|
||||
/// to essentially do incremental DCE: when an instruction is no longer
|
||||
/// needed because its computation has been isel'd into another machine
|
||||
/// instruction at every use site, we can skip it.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct NumUses {
|
||||
uses: SecondaryMap<Inst, u32>,
|
||||
}
|
||||
|
||||
impl NumUses {
|
||||
fn new() -> NumUses {
|
||||
NumUses {
|
||||
uses: SecondaryMap::with_default(0),
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute the NumUses analysis result for a function.
|
||||
pub fn compute(func: &Function) -> NumUses {
|
||||
let mut uses = NumUses::new();
|
||||
for bb in func.layout.blocks() {
|
||||
for inst in func.layout.block_insts(bb) {
|
||||
for arg in func.dfg.inst_args(inst) {
|
||||
let v = func.dfg.resolve_aliases(*arg);
|
||||
uses.add_value(&func.dfg, v);
|
||||
}
|
||||
}
|
||||
}
|
||||
uses
|
||||
}
|
||||
|
||||
fn add_value(&mut self, dfg: &DataFlowGraph, v: Value) {
|
||||
match dfg.value_def(v) {
|
||||
ValueDef::Result(inst, _) => {
|
||||
self.uses[inst] += 1;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
/// How many times is an instruction used?
|
||||
pub fn use_count(&self, i: Inst) -> usize {
|
||||
self.uses[i] as usize
|
||||
}
|
||||
|
||||
/// Is an instruction used at all?
|
||||
pub fn is_used(&self, i: Inst) -> bool {
|
||||
self.use_count(i) > 0
|
||||
}
|
||||
|
||||
/// Take the complete uses map, consuming this analysis result.
|
||||
pub fn take_uses(self) -> SecondaryMap<Inst, u32> {
|
||||
self.uses
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user