ARM64 backend, part 3 / 11: MachInst infrastructure.
This patch adds the MachInst, or Machine Instruction, infrastructure. This is the machine-independent portion of the new backend design. It contains the implementation of the "vcode" (virtual-registerized code) container, the top-level lowering algorithm and compilation pipeline, and the trait definitions that the machine backends will fill in. This backend infrastructure is included in the compilation of the `codegen` crate, but it is not yet tied into the public APIs; that patch will come last, after all the other pieces are filled in. This patch contains code written by Julian Seward <jseward@acm.org> and Benjamin Bouvier <public@benj.me>, originally developed on a side-branch before rebasing and condensing into this patch series. See the `arm64` branch at `https://github.com/cfallin/wasmtime` for original development history. Co-authored-by: Julian Seward <jseward@acm.org> Co-authored-by: Benjamin Bouvier <public@benj.me>
This commit is contained in:
17
Cargo.lock
generated
17
Cargo.lock
generated
@@ -379,6 +379,7 @@ dependencies = [
|
|||||||
"gimli",
|
"gimli",
|
||||||
"hashbrown 0.7.1",
|
"hashbrown 0.7.1",
|
||||||
"log",
|
"log",
|
||||||
|
"regalloc",
|
||||||
"serde",
|
"serde",
|
||||||
"smallvec",
|
"smallvec",
|
||||||
"target-lexicon",
|
"target-lexicon",
|
||||||
@@ -1599,6 +1600,16 @@ dependencies = [
|
|||||||
"rust-argon2",
|
"rust-argon2",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regalloc"
|
||||||
|
version = "0.0.17"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "89ce0cd835fa6e91bbf5d010beee19d0c2e97e4ad5e13c399a31122cfc83bdd6"
|
||||||
|
dependencies = [
|
||||||
|
"log",
|
||||||
|
"rustc-hash",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "regex"
|
name = "regex"
|
||||||
version = "1.3.6"
|
version = "1.3.6"
|
||||||
@@ -1663,6 +1674,12 @@ version = "0.1.16"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "4c691c0e608126e00913e33f0ccf3727d5fc84573623b8d65b2df340b5201783"
|
checksum = "4c691c0e608126e00913e33f0ccf3727d5fc84573623b8d65b2df340b5201783"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "rustc-hash"
|
||||||
|
version = "1.1.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "rustc_version"
|
name = "rustc_version"
|
||||||
version = "0.2.3"
|
version = "0.2.3"
|
||||||
|
|||||||
@@ -24,6 +24,7 @@ gimli = { version = "0.20.0", default-features = false, features = ["write"], op
|
|||||||
smallvec = { version = "1.0.0" }
|
smallvec = { version = "1.0.0" }
|
||||||
thiserror = "1.0.4"
|
thiserror = "1.0.4"
|
||||||
byteorder = { version = "1.3.2", default-features = false }
|
byteorder = { version = "1.3.2", default-features = false }
|
||||||
|
regalloc = "0.0.17"
|
||||||
# It is a goal of the cranelift-codegen crate to have minimal external dependencies.
|
# It is a goal of the cranelift-codegen crate to have minimal external dependencies.
|
||||||
# Please don't add any unless they are essential to the task of creating binary
|
# Please don't add any unless they are essential to the task of creating binary
|
||||||
# machine code. Integration tests that need external dependencies can be
|
# machine code. Integration tests that need external dependencies can be
|
||||||
@@ -33,7 +34,7 @@ byteorder = { version = "1.3.2", default-features = false }
|
|||||||
cranelift-codegen-meta = { path = "meta", version = "0.62.0" }
|
cranelift-codegen-meta = { path = "meta", version = "0.62.0" }
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = ["std", "unwind"]
|
default = ["std", "unwind", "all-arch"]
|
||||||
|
|
||||||
# The "std" feature enables use of libstd. The "core" feature enables use
|
# The "std" feature enables use of libstd. The "core" feature enables use
|
||||||
# of some minimal std-like replacement libraries. At least one of these two
|
# of some minimal std-like replacement libraries. At least one of these two
|
||||||
|
|||||||
@@ -55,9 +55,10 @@ pub use crate::isa::stack::{StackBase, StackBaseMask, StackRef};
|
|||||||
use crate::binemit;
|
use crate::binemit;
|
||||||
use crate::flowgraph;
|
use crate::flowgraph;
|
||||||
use crate::ir;
|
use crate::ir;
|
||||||
use crate::isa::enc_tables::Encodings;
|
pub use crate::isa::enc_tables::Encodings;
|
||||||
#[cfg(feature = "unwind")]
|
#[cfg(feature = "unwind")]
|
||||||
use crate::isa::fde::RegisterMappingError;
|
use crate::isa::fde::RegisterMappingError;
|
||||||
|
use crate::machinst::MachBackend;
|
||||||
use crate::regalloc;
|
use crate::regalloc;
|
||||||
use crate::result::CodegenResult;
|
use crate::result::CodegenResult;
|
||||||
use crate::settings;
|
use crate::settings;
|
||||||
@@ -400,6 +401,11 @@ pub trait TargetIsa: fmt::Display + Send + Sync {
|
|||||||
) {
|
) {
|
||||||
// No-op by default
|
// No-op by default
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Get the new-style MachBackend, if this is an adapter around one.
|
||||||
|
fn get_mach_backend(&self) -> Option<&dyn MachBackend> {
|
||||||
|
None
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Debug for &dyn TargetIsa {
|
impl Debug for &dyn TargetIsa {
|
||||||
|
|||||||
@@ -71,6 +71,7 @@ pub mod flowgraph;
|
|||||||
pub mod ir;
|
pub mod ir;
|
||||||
pub mod isa;
|
pub mod isa;
|
||||||
pub mod loop_analysis;
|
pub mod loop_analysis;
|
||||||
|
pub mod machinst;
|
||||||
pub mod print_errors;
|
pub mod print_errors;
|
||||||
pub mod settings;
|
pub mod settings;
|
||||||
pub mod timing;
|
pub mod timing;
|
||||||
@@ -90,6 +91,7 @@ mod iterators;
|
|||||||
mod legalizer;
|
mod legalizer;
|
||||||
mod licm;
|
mod licm;
|
||||||
mod nan_canonicalization;
|
mod nan_canonicalization;
|
||||||
|
mod num_uses;
|
||||||
mod partition_slice;
|
mod partition_slice;
|
||||||
mod postopt;
|
mod postopt;
|
||||||
mod predicates;
|
mod predicates;
|
||||||
|
|||||||
142
cranelift/codegen/src/machinst/abi.rs
Normal file
142
cranelift/codegen/src/machinst/abi.rs
Normal file
@@ -0,0 +1,142 @@
|
|||||||
|
//! ABI definitions.
|
||||||
|
|
||||||
|
use crate::ir;
|
||||||
|
use crate::ir::StackSlot;
|
||||||
|
use crate::machinst::*;
|
||||||
|
use crate::settings;
|
||||||
|
|
||||||
|
use regalloc::{Reg, Set, SpillSlot, VirtualReg, Writable};
|
||||||
|
|
||||||
|
/// Trait implemented by an object that tracks ABI-related state (e.g., stack
|
||||||
|
/// layout) and can generate code while emitting the *body* of a function.
|
||||||
|
pub trait ABIBody<I: VCodeInst> {
|
||||||
|
/// Get the liveins of the function.
|
||||||
|
fn liveins(&self) -> Set<RealReg>;
|
||||||
|
|
||||||
|
/// Get the liveouts of the function.
|
||||||
|
fn liveouts(&self) -> Set<RealReg>;
|
||||||
|
|
||||||
|
/// Number of arguments.
|
||||||
|
fn num_args(&self) -> usize;
|
||||||
|
|
||||||
|
/// Number of return values.
|
||||||
|
fn num_retvals(&self) -> usize;
|
||||||
|
|
||||||
|
/// Number of stack slots (not spill slots).
|
||||||
|
fn num_stackslots(&self) -> usize;
|
||||||
|
|
||||||
|
/// Generate an instruction which copies an argument to a destination
|
||||||
|
/// register.
|
||||||
|
fn gen_copy_arg_to_reg(&self, idx: usize, into_reg: Writable<Reg>) -> I;
|
||||||
|
|
||||||
|
/// Generate an instruction which copies a source register to a return
|
||||||
|
/// value slot.
|
||||||
|
fn gen_copy_reg_to_retval(&self, idx: usize, from_reg: Reg) -> I;
|
||||||
|
|
||||||
|
/// Generate a return instruction.
|
||||||
|
fn gen_ret(&self) -> I;
|
||||||
|
|
||||||
|
/// Generate an epilogue placeholder.
|
||||||
|
fn gen_epilogue_placeholder(&self) -> I;
|
||||||
|
|
||||||
|
// -----------------------------------------------------------------
|
||||||
|
// Every function above this line may only be called pre-regalloc.
|
||||||
|
// Every function below this line may only be called post-regalloc.
|
||||||
|
// `spillslots()` must be called before any other post-regalloc
|
||||||
|
// function.
|
||||||
|
// ----------------------------------------------------------------
|
||||||
|
|
||||||
|
/// Update with the number of spillslots, post-regalloc.
|
||||||
|
fn set_num_spillslots(&mut self, slots: usize);
|
||||||
|
|
||||||
|
/// Update with the clobbered registers, post-regalloc.
|
||||||
|
fn set_clobbered(&mut self, clobbered: Set<Writable<RealReg>>);
|
||||||
|
|
||||||
|
/// Load from a stackslot.
|
||||||
|
fn load_stackslot(
|
||||||
|
&self,
|
||||||
|
slot: StackSlot,
|
||||||
|
offset: usize,
|
||||||
|
ty: Type,
|
||||||
|
into_reg: Writable<Reg>,
|
||||||
|
) -> I;
|
||||||
|
|
||||||
|
/// Store to a stackslot.
|
||||||
|
fn store_stackslot(&self, slot: StackSlot, offset: usize, ty: Type, from_reg: Reg) -> I;
|
||||||
|
|
||||||
|
/// Load from a spillslot.
|
||||||
|
fn load_spillslot(&self, slot: SpillSlot, ty: Type, into_reg: Writable<Reg>) -> I;
|
||||||
|
|
||||||
|
/// Store to a spillslot.
|
||||||
|
fn store_spillslot(&self, slot: SpillSlot, ty: Type, from_reg: Reg) -> I;
|
||||||
|
|
||||||
|
/// Generate a prologue, post-regalloc. This should include any stack
|
||||||
|
/// frame or other setup necessary to use the other methods (`load_arg`,
|
||||||
|
/// `store_retval`, and spillslot accesses.) |self| is mutable so that we
|
||||||
|
/// can store information in it which will be useful when creating the
|
||||||
|
/// epilogue.
|
||||||
|
fn gen_prologue(&mut self, flags: &settings::Flags) -> Vec<I>;
|
||||||
|
|
||||||
|
/// Generate an epilogue, post-regalloc. Note that this must generate the
|
||||||
|
/// actual return instruction (rather than emitting this in the lowering
|
||||||
|
/// logic), because the epilogue code comes before the return and the two are
|
||||||
|
/// likely closely related.
|
||||||
|
fn gen_epilogue(&self, flags: &settings::Flags) -> Vec<I>;
|
||||||
|
|
||||||
|
/// Returns the full frame size for the given function, after prologue emission has run. This
|
||||||
|
/// comprises the spill space, incoming argument space, alignment padding, etc.
|
||||||
|
fn frame_size(&self) -> u32;
|
||||||
|
|
||||||
|
/// Get the spill-slot size.
|
||||||
|
fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32;
|
||||||
|
|
||||||
|
/// Generate a spill.
|
||||||
|
fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Type) -> I;
|
||||||
|
|
||||||
|
/// Generate a reload (fill).
|
||||||
|
fn gen_reload(&self, to_reg: Writable<RealReg>, from_slot: SpillSlot, ty: Type) -> I;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Trait implemented by an object that tracks ABI-related state and can
|
||||||
|
/// generate code while emitting a *call* to a function.
|
||||||
|
///
|
||||||
|
/// An instance of this trait returns information for a *particular*
|
||||||
|
/// callsite. It will usually be computed from the called function's
|
||||||
|
/// signature.
|
||||||
|
///
|
||||||
|
/// Unlike `ABIBody` above, methods on this trait are not invoked directly
|
||||||
|
/// by the machine-independent code. Rather, the machine-specific lowering
|
||||||
|
/// code will typically create an `ABICall` when creating machine instructions
|
||||||
|
/// for an IR call instruction inside `lower()`, directly emit the arg and
|
||||||
|
/// and retval copies, and attach the register use/def info to the call.
|
||||||
|
///
|
||||||
|
/// This trait is thus provided for convenience to the backends.
|
||||||
|
pub trait ABICall<I: VCodeInst> {
|
||||||
|
/// Get the number of arguments expected.
|
||||||
|
fn num_args(&self) -> usize;
|
||||||
|
|
||||||
|
/// Save the clobbered registers.
|
||||||
|
/// Copy an argument value from a source register, prior to the call.
|
||||||
|
fn gen_copy_reg_to_arg(&self, idx: usize, from_reg: Reg) -> I;
|
||||||
|
|
||||||
|
/// Copy a return value into a destination register, after the call returns.
|
||||||
|
fn gen_copy_retval_to_reg(&self, idx: usize, into_reg: Writable<Reg>) -> I;
|
||||||
|
|
||||||
|
/// Pre-adjust the stack, prior to argument copies and call.
|
||||||
|
fn gen_stack_pre_adjust(&self) -> Vec<I>;
|
||||||
|
|
||||||
|
/// Post-adjust the satck, after call return and return-value copies.
|
||||||
|
fn gen_stack_post_adjust(&self) -> Vec<I>;
|
||||||
|
|
||||||
|
/// Generate the call itself.
|
||||||
|
///
|
||||||
|
/// The returned instruction should have proper use- and def-sets according
|
||||||
|
/// to the argument registers, return-value registers, and clobbered
|
||||||
|
/// registers for this function signature in this ABI.
|
||||||
|
///
|
||||||
|
/// (Arg registers are uses, and retval registers are defs. Clobbered
|
||||||
|
/// registers are also logically defs, but should never be read; their
|
||||||
|
/// values are "defined" (to the regalloc) but "undefined" in every other
|
||||||
|
/// sense.)
|
||||||
|
fn gen_call(&self) -> Vec<I>;
|
||||||
|
}
|
||||||
123
cranelift/codegen/src/machinst/adapter.rs
Normal file
123
cranelift/codegen/src/machinst/adapter.rs
Normal file
@@ -0,0 +1,123 @@
|
|||||||
|
//! Adapter for a `MachBackend` to implement the `TargetIsa` trait.
|
||||||
|
|
||||||
|
use crate::binemit;
|
||||||
|
use crate::ir;
|
||||||
|
use crate::isa::{EncInfo, Encoding, Encodings, Legalize, RegClass, RegInfo, TargetIsa};
|
||||||
|
use crate::machinst::*;
|
||||||
|
use crate::regalloc::{RegDiversions, RegisterSet};
|
||||||
|
use crate::settings::Flags;
|
||||||
|
|
||||||
|
use std::borrow::Cow;
|
||||||
|
use std::fmt;
|
||||||
|
use target_lexicon::Triple;
|
||||||
|
|
||||||
|
/// A wrapper around a `MachBackend` that provides a `TargetIsa` impl.
|
||||||
|
pub struct TargetIsaAdapter {
|
||||||
|
backend: Box<dyn MachBackend + Send + Sync + 'static>,
|
||||||
|
triple: Triple,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TargetIsaAdapter {
|
||||||
|
/// Create a new `TargetIsa` wrapper around a `MachBackend`.
|
||||||
|
pub fn new<B: MachBackend + Send + Sync + 'static>(backend: B) -> TargetIsaAdapter {
|
||||||
|
let triple = backend.triple();
|
||||||
|
TargetIsaAdapter {
|
||||||
|
backend: Box::new(backend),
|
||||||
|
triple,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for TargetIsaAdapter {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
write!(f, "MachBackend")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TargetIsa for TargetIsaAdapter {
|
||||||
|
fn name(&self) -> &'static str {
|
||||||
|
self.backend.name()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn triple(&self) -> &Triple {
|
||||||
|
&self.triple
|
||||||
|
}
|
||||||
|
|
||||||
|
fn flags(&self) -> &Flags {
|
||||||
|
self.backend.flags()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn register_info(&self) -> RegInfo {
|
||||||
|
// Called from function's Display impl, so we need a stub here.
|
||||||
|
RegInfo {
|
||||||
|
banks: &[],
|
||||||
|
classes: &[],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn legal_encodings<'a>(
|
||||||
|
&'a self,
|
||||||
|
_func: &'a ir::Function,
|
||||||
|
_inst: &'a ir::InstructionData,
|
||||||
|
_ctrl_typevar: ir::Type,
|
||||||
|
) -> Encodings<'a> {
|
||||||
|
panic!("Should not be called when new-style backend is available!")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn encode(
|
||||||
|
&self,
|
||||||
|
_func: &ir::Function,
|
||||||
|
_inst: &ir::InstructionData,
|
||||||
|
_ctrl_typevar: ir::Type,
|
||||||
|
) -> Result<Encoding, Legalize> {
|
||||||
|
panic!("Should not be called when new-style backend is available!")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn encoding_info(&self) -> EncInfo {
|
||||||
|
panic!("Should not be called when new-style backend is available!")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn legalize_signature(&self, _sig: &mut Cow<ir::Signature>, _current: bool) {
|
||||||
|
panic!("Should not be called when new-style backend is available!")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn regclass_for_abi_type(&self, _ty: ir::Type) -> RegClass {
|
||||||
|
panic!("Should not be called when new-style backend is available!")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn allocatable_registers(&self, _func: &ir::Function) -> RegisterSet {
|
||||||
|
panic!("Should not be called when new-style backend is available!")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn prologue_epilogue(&self, _func: &mut ir::Function) -> CodegenResult<()> {
|
||||||
|
panic!("Should not be called when new-style backend is available!")
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "testing_hooks")]
|
||||||
|
fn emit_inst(
|
||||||
|
&self,
|
||||||
|
_func: &ir::Function,
|
||||||
|
_inst: ir::Inst,
|
||||||
|
_divert: &mut RegDiversions,
|
||||||
|
_sink: &mut dyn binemit::CodeSink,
|
||||||
|
) {
|
||||||
|
panic!("Should not be called when new-style backend is available!")
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Emit a whole function into memory.
|
||||||
|
fn emit_function_to_memory(&self, _func: &ir::Function, _sink: &mut binemit::MemoryCodeSink) {
|
||||||
|
panic!("Should not be called when new-style backend is available!")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_mach_backend(&self) -> Option<&dyn MachBackend> {
|
||||||
|
Some(&*self.backend)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn unsigned_add_overflow_condition(&self) -> ir::condcodes::IntCC {
|
||||||
|
self.backend.unsigned_add_overflow_condition()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn unsigned_sub_overflow_condition(&self) -> ir::condcodes::IntCC {
|
||||||
|
self.backend.unsigned_sub_overflow_condition()
|
||||||
|
}
|
||||||
|
}
|
||||||
59
cranelift/codegen/src/machinst/blockorder.rs
Normal file
59
cranelift/codegen/src/machinst/blockorder.rs
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
//! Computation of basic block order in emitted code.
|
||||||
|
|
||||||
|
use crate::machinst::*;
|
||||||
|
|
||||||
|
/// Simple reverse postorder-based block order emission.
|
||||||
|
///
|
||||||
|
/// TODO: use a proper algorithm, such as the bottom-up straight-line-section
|
||||||
|
/// construction algorithm.
|
||||||
|
struct BlockRPO {
|
||||||
|
visited: Vec<bool>,
|
||||||
|
postorder: Vec<BlockIndex>,
|
||||||
|
deferred_last: Option<BlockIndex>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl BlockRPO {
|
||||||
|
fn new<I: VCodeInst>(vcode: &VCode<I>) -> BlockRPO {
|
||||||
|
BlockRPO {
|
||||||
|
visited: vec![false; vcode.num_blocks()],
|
||||||
|
postorder: vec![],
|
||||||
|
deferred_last: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn visit<I: VCodeInst>(&mut self, vcode: &VCode<I>, block: BlockIndex) {
|
||||||
|
self.visited[block as usize] = true;
|
||||||
|
for succ in vcode.succs(block) {
|
||||||
|
if !self.visited[*succ as usize] {
|
||||||
|
self.visit(vcode, *succ);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let (start, end) = &vcode.block_ranges[block as usize];
|
||||||
|
for i in *start..*end {
|
||||||
|
if vcode.insts[i as usize].is_epilogue_placeholder() {
|
||||||
|
debug_assert!(self.deferred_last.is_none());
|
||||||
|
self.deferred_last = Some(block);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
self.postorder.push(block);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn rpo(self) -> Vec<BlockIndex> {
|
||||||
|
let mut rpo = self.postorder;
|
||||||
|
rpo.reverse();
|
||||||
|
if let Some(block) = self.deferred_last {
|
||||||
|
rpo.push(block);
|
||||||
|
}
|
||||||
|
rpo
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Compute the final block order.
|
||||||
|
pub fn compute_final_block_order<I: VCodeInst>(vcode: &VCode<I>) -> Vec<BlockIndex> {
|
||||||
|
let mut rpo = BlockRPO::new(vcode);
|
||||||
|
rpo.visit(vcode, vcode.entry());
|
||||||
|
rpo.rpo()
|
||||||
|
}
|
||||||
76
cranelift/codegen/src/machinst/compile.rs
Normal file
76
cranelift/codegen/src/machinst/compile.rs
Normal file
@@ -0,0 +1,76 @@
|
|||||||
|
//! Compilation backend pipeline: optimized IR to VCode / binemit.
|
||||||
|
|
||||||
|
use crate::ir::Function;
|
||||||
|
use crate::machinst::*;
|
||||||
|
use crate::settings;
|
||||||
|
use crate::timing;
|
||||||
|
|
||||||
|
use log::debug;
|
||||||
|
use regalloc::{allocate_registers, RegAllocAlgorithm};
|
||||||
|
use std::env;
|
||||||
|
|
||||||
|
/// Compile the given function down to VCode with allocated registers, ready
|
||||||
|
/// for binary emission.
|
||||||
|
pub fn compile<B: LowerBackend>(
|
||||||
|
f: &mut Function,
|
||||||
|
b: &B,
|
||||||
|
abi: Box<dyn ABIBody<B::MInst>>,
|
||||||
|
flags: &settings::Flags,
|
||||||
|
) -> VCode<B::MInst>
|
||||||
|
where
|
||||||
|
B::MInst: ShowWithRRU,
|
||||||
|
{
|
||||||
|
// This lowers the CL IR.
|
||||||
|
let mut vcode = Lower::new(f, abi).lower(b);
|
||||||
|
|
||||||
|
let universe = &B::MInst::reg_universe();
|
||||||
|
|
||||||
|
debug!("vcode from lowering: \n{}", vcode.show_rru(Some(universe)));
|
||||||
|
|
||||||
|
// Perform register allocation.
|
||||||
|
let algorithm = match env::var("REGALLOC") {
|
||||||
|
Ok(str) => match str.as_str() {
|
||||||
|
"lsrac" => RegAllocAlgorithm::LinearScanChecked,
|
||||||
|
"lsra" => RegAllocAlgorithm::LinearScan,
|
||||||
|
// to wit: btc doesn't mean "bitcoin" here
|
||||||
|
"btc" => RegAllocAlgorithm::BacktrackingChecked,
|
||||||
|
_ => RegAllocAlgorithm::Backtracking,
|
||||||
|
},
|
||||||
|
// By default use backtracking, which is the fastest.
|
||||||
|
Err(_) => RegAllocAlgorithm::Backtracking,
|
||||||
|
};
|
||||||
|
|
||||||
|
let result = {
|
||||||
|
let _tt = timing::regalloc();
|
||||||
|
allocate_registers(
|
||||||
|
&mut vcode, algorithm, universe, /*request_block_annotations=*/ false,
|
||||||
|
)
|
||||||
|
.map_err(|err| {
|
||||||
|
debug!(
|
||||||
|
"Register allocation error for vcode\n{}\nError: {:?}",
|
||||||
|
vcode.show_rru(Some(universe)),
|
||||||
|
err
|
||||||
|
);
|
||||||
|
err
|
||||||
|
})
|
||||||
|
.expect("register allocation")
|
||||||
|
};
|
||||||
|
|
||||||
|
// Reorder vcode into final order and copy out final instruction sequence
|
||||||
|
// all at once. This also inserts prologues/epilogues.
|
||||||
|
vcode.replace_insns_from_regalloc(result, flags);
|
||||||
|
|
||||||
|
vcode.remove_redundant_branches();
|
||||||
|
|
||||||
|
// Do final passes over code to finalize branches.
|
||||||
|
vcode.finalize_branches();
|
||||||
|
|
||||||
|
debug!(
|
||||||
|
"vcode after regalloc: final version:\n{}",
|
||||||
|
vcode.show_rru(Some(universe))
|
||||||
|
);
|
||||||
|
|
||||||
|
//println!("{}\n", vcode.show_rru(Some(&B::MInst::reg_universe())));
|
||||||
|
|
||||||
|
vcode
|
||||||
|
}
|
||||||
723
cranelift/codegen/src/machinst/lower.rs
Normal file
723
cranelift/codegen/src/machinst/lower.rs
Normal file
@@ -0,0 +1,723 @@
|
|||||||
|
//! This module implements lowering (instruction selection) from Cranelift IR
|
||||||
|
//! to machine instructions with virtual registers. This is *almost* the final
|
||||||
|
//! machine code, except for register allocation.
|
||||||
|
|
||||||
|
use crate::binemit::CodeSink;
|
||||||
|
use crate::dce::has_side_effect;
|
||||||
|
use crate::entity::SecondaryMap;
|
||||||
|
use crate::ir::{
|
||||||
|
Block, ExternalName, Function, GlobalValueData, Inst, InstructionData, MemFlags, Opcode,
|
||||||
|
Signature, SourceLoc, Type, Value, ValueDef,
|
||||||
|
};
|
||||||
|
use crate::isa::registers::RegUnit;
|
||||||
|
use crate::machinst::{
|
||||||
|
ABIBody, BlockIndex, MachInst, MachInstEmit, VCode, VCodeBuilder, VCodeInst,
|
||||||
|
};
|
||||||
|
use crate::num_uses::NumUses;
|
||||||
|
|
||||||
|
use regalloc::Function as RegallocFunction;
|
||||||
|
use regalloc::{RealReg, Reg, RegClass, Set, VirtualReg, Writable};
|
||||||
|
|
||||||
|
use alloc::boxed::Box;
|
||||||
|
use alloc::vec::Vec;
|
||||||
|
use log::debug;
|
||||||
|
use smallvec::SmallVec;
|
||||||
|
use std::collections::VecDeque;
|
||||||
|
use std::ops::Range;
|
||||||
|
|
||||||
|
/// A context that machine-specific lowering code can use to emit lowered instructions. This is the
|
||||||
|
/// view of the machine-independent per-function lowering context that is seen by the machine
|
||||||
|
/// backend.
|
||||||
|
pub trait LowerCtx<I> {
|
||||||
|
/// Get the instdata for a given IR instruction.
|
||||||
|
fn data(&self, ir_inst: Inst) -> &InstructionData;
|
||||||
|
/// Get the controlling type for a polymorphic IR instruction.
|
||||||
|
fn ty(&self, ir_inst: Inst) -> Type;
|
||||||
|
/// Emit a machine instruction.
|
||||||
|
fn emit(&mut self, mach_inst: I);
|
||||||
|
/// Indicate that an IR instruction has been merged, and so one of its
|
||||||
|
/// uses is gone (replaced by uses of the instruction's inputs). This
|
||||||
|
/// helps the lowering algorithm to perform on-the-fly DCE, skipping over
|
||||||
|
/// unused instructions (such as immediates incorporated directly).
|
||||||
|
fn merged(&mut self, from_inst: Inst);
|
||||||
|
/// Get the producing instruction, if any, and output number, for the `idx`th input to the
|
||||||
|
/// given IR instruction
|
||||||
|
fn input_inst(&self, ir_inst: Inst, idx: usize) -> Option<(Inst, usize)>;
|
||||||
|
/// Map a Value to its associated writable (probably virtual) Reg.
|
||||||
|
fn value_to_writable_reg(&self, val: Value) -> Writable<Reg>;
|
||||||
|
/// Map a Value to its associated (probably virtual) Reg.
|
||||||
|
fn value_to_reg(&self, val: Value) -> Reg;
|
||||||
|
/// Get the `idx`th input to the given IR instruction as a virtual register.
|
||||||
|
fn input(&self, ir_inst: Inst, idx: usize) -> Reg;
|
||||||
|
/// Get the `idx`th output of the given IR instruction as a virtual register.
|
||||||
|
fn output(&self, ir_inst: Inst, idx: usize) -> Writable<Reg>;
|
||||||
|
/// Get the number of inputs to the given IR instruction.
|
||||||
|
fn num_inputs(&self, ir_inst: Inst) -> usize;
|
||||||
|
/// Get the number of outputs to the given IR instruction.
|
||||||
|
fn num_outputs(&self, ir_inst: Inst) -> usize;
|
||||||
|
/// Get the type for an instruction's input.
|
||||||
|
fn input_ty(&self, ir_inst: Inst, idx: usize) -> Type;
|
||||||
|
/// Get the type for an instruction's output.
|
||||||
|
fn output_ty(&self, ir_inst: Inst, idx: usize) -> Type;
|
||||||
|
/// Get a new temp.
|
||||||
|
fn tmp(&mut self, rc: RegClass, ty: Type) -> Writable<Reg>;
|
||||||
|
/// Get the number of block params.
|
||||||
|
fn num_bb_params(&self, bb: Block) -> usize;
|
||||||
|
/// Get the register for a block param.
|
||||||
|
fn bb_param(&self, bb: Block, idx: usize) -> Reg;
|
||||||
|
/// Get the register for a return value.
|
||||||
|
fn retval(&self, idx: usize) -> Writable<Reg>;
|
||||||
|
/// Get the target for a call instruction, as an `ExternalName`.
|
||||||
|
fn call_target<'b>(&'b self, ir_inst: Inst) -> Option<&'b ExternalName>;
|
||||||
|
/// Get the signature for a call or call-indirect instruction.
|
||||||
|
fn call_sig<'b>(&'b self, ir_inst: Inst) -> Option<&'b Signature>;
|
||||||
|
/// Get the symbol name and offset for a symbol_value instruction.
|
||||||
|
fn symbol_value<'b>(&'b self, ir_inst: Inst) -> Option<(&'b ExternalName, i64)>;
|
||||||
|
/// Returns the memory flags of a given memory access.
|
||||||
|
fn memflags(&self, ir_inst: Inst) -> Option<MemFlags>;
|
||||||
|
/// Get the source location for a given instruction.
|
||||||
|
fn srcloc(&self, ir_inst: Inst) -> SourceLoc;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A machine backend.
|
||||||
|
pub trait LowerBackend {
|
||||||
|
/// The machine instruction type.
|
||||||
|
type MInst: VCodeInst;
|
||||||
|
|
||||||
|
/// Lower a single instruction. Instructions are lowered in reverse order.
|
||||||
|
/// This function need not handle branches; those are always passed to
|
||||||
|
/// `lower_branch_group` below.
|
||||||
|
fn lower<C: LowerCtx<Self::MInst>>(&self, ctx: &mut C, inst: Inst);
|
||||||
|
|
||||||
|
/// Lower a block-terminating group of branches (which together can be seen as one
|
||||||
|
/// N-way branch), given a vcode BlockIndex for each target.
|
||||||
|
fn lower_branch_group<C: LowerCtx<Self::MInst>>(
|
||||||
|
&self,
|
||||||
|
ctx: &mut C,
|
||||||
|
insts: &[Inst],
|
||||||
|
targets: &[BlockIndex],
|
||||||
|
fallthrough: Option<BlockIndex>,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Machine-independent lowering driver / machine-instruction container. Maintains a correspondence
|
||||||
|
/// from original Inst to MachInsts.
|
||||||
|
pub struct Lower<'a, I: VCodeInst> {
|
||||||
|
// The function to lower.
|
||||||
|
f: &'a Function,
|
||||||
|
|
||||||
|
// Lowered machine instructions.
|
||||||
|
vcode: VCodeBuilder<I>,
|
||||||
|
|
||||||
|
// Number of active uses (minus `dec_use()` calls by backend) of each instruction.
|
||||||
|
num_uses: SecondaryMap<Inst, u32>,
|
||||||
|
|
||||||
|
// Mapping from `Value` (SSA value in IR) to virtual register.
|
||||||
|
value_regs: SecondaryMap<Value, Reg>,
|
||||||
|
|
||||||
|
// Return-value vregs.
|
||||||
|
retval_regs: Vec<Reg>,
|
||||||
|
|
||||||
|
// Next virtual register number to allocate.
|
||||||
|
next_vreg: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn alloc_vreg(
|
||||||
|
value_regs: &mut SecondaryMap<Value, Reg>,
|
||||||
|
regclass: RegClass,
|
||||||
|
value: Value,
|
||||||
|
next_vreg: &mut u32,
|
||||||
|
) -> VirtualReg {
|
||||||
|
if value_regs[value].get_index() == 0 {
|
||||||
|
// default value in map.
|
||||||
|
let v = *next_vreg;
|
||||||
|
*next_vreg += 1;
|
||||||
|
value_regs[value] = Reg::new_virtual(regclass, v);
|
||||||
|
}
|
||||||
|
value_regs[value].as_virtual_reg().unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
|
enum GenerateReturn {
|
||||||
|
Yes,
|
||||||
|
No,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a, I: VCodeInst> Lower<'a, I> {
|
||||||
|
/// Prepare a new lowering context for the given IR function.
|
||||||
|
pub fn new(f: &'a Function, abi: Box<dyn ABIBody<I>>) -> Lower<'a, I> {
|
||||||
|
let mut vcode = VCodeBuilder::new(abi);
|
||||||
|
|
||||||
|
let num_uses = NumUses::compute(f).take_uses();
|
||||||
|
|
||||||
|
let mut next_vreg: u32 = 1;
|
||||||
|
|
||||||
|
// Default register should never be seen, but the `value_regs` map needs a default and we
|
||||||
|
// don't want to push `Option` everywhere. All values will be assigned registers by the
|
||||||
|
// loops over block parameters and instruction results below.
|
||||||
|
//
|
||||||
|
// We do not use vreg 0 so that we can detect any unassigned register that leaks through.
|
||||||
|
let default_register = Reg::new_virtual(RegClass::I32, 0);
|
||||||
|
let mut value_regs = SecondaryMap::with_default(default_register);
|
||||||
|
|
||||||
|
// Assign a vreg to each value.
|
||||||
|
for bb in f.layout.blocks() {
|
||||||
|
for param in f.dfg.block_params(bb) {
|
||||||
|
let vreg = alloc_vreg(
|
||||||
|
&mut value_regs,
|
||||||
|
I::rc_for_type(f.dfg.value_type(*param)),
|
||||||
|
*param,
|
||||||
|
&mut next_vreg,
|
||||||
|
);
|
||||||
|
vcode.set_vreg_type(vreg, f.dfg.value_type(*param));
|
||||||
|
}
|
||||||
|
for inst in f.layout.block_insts(bb) {
|
||||||
|
for result in f.dfg.inst_results(inst) {
|
||||||
|
let vreg = alloc_vreg(
|
||||||
|
&mut value_regs,
|
||||||
|
I::rc_for_type(f.dfg.value_type(*result)),
|
||||||
|
*result,
|
||||||
|
&mut next_vreg,
|
||||||
|
);
|
||||||
|
vcode.set_vreg_type(vreg, f.dfg.value_type(*result));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Assign a vreg to each return value.
|
||||||
|
let mut retval_regs = vec![];
|
||||||
|
for ret in &f.signature.returns {
|
||||||
|
let v = next_vreg;
|
||||||
|
next_vreg += 1;
|
||||||
|
let regclass = I::rc_for_type(ret.value_type);
|
||||||
|
let vreg = Reg::new_virtual(regclass, v);
|
||||||
|
retval_regs.push(vreg);
|
||||||
|
vcode.set_vreg_type(vreg.as_virtual_reg().unwrap(), ret.value_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
Lower {
|
||||||
|
f,
|
||||||
|
vcode,
|
||||||
|
num_uses,
|
||||||
|
value_regs,
|
||||||
|
retval_regs,
|
||||||
|
next_vreg,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn gen_arg_setup(&mut self) {
|
||||||
|
if let Some(entry_bb) = self.f.layout.entry_block() {
|
||||||
|
debug!(
|
||||||
|
"gen_arg_setup: entry BB {} args are:\n{:?}",
|
||||||
|
entry_bb,
|
||||||
|
self.f.dfg.block_params(entry_bb)
|
||||||
|
);
|
||||||
|
for (i, param) in self.f.dfg.block_params(entry_bb).iter().enumerate() {
|
||||||
|
let reg = Writable::from_reg(self.value_regs[*param]);
|
||||||
|
let insn = self.vcode.abi().gen_copy_arg_to_reg(i, reg);
|
||||||
|
self.vcode.push(insn);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn gen_retval_setup(&mut self, gen_ret_inst: GenerateReturn) {
|
||||||
|
for (i, reg) in self.retval_regs.iter().enumerate() {
|
||||||
|
let insn = self.vcode.abi().gen_copy_reg_to_retval(i, *reg);
|
||||||
|
self.vcode.push(insn);
|
||||||
|
}
|
||||||
|
let inst = match gen_ret_inst {
|
||||||
|
GenerateReturn::Yes => self.vcode.abi().gen_ret(),
|
||||||
|
GenerateReturn::No => self.vcode.abi().gen_epilogue_placeholder(),
|
||||||
|
};
|
||||||
|
self.vcode.push(inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn find_reachable_bbs(&self) -> SmallVec<[Block; 16]> {
|
||||||
|
if let Some(entry) = self.f.layout.entry_block() {
|
||||||
|
let mut ret = SmallVec::new();
|
||||||
|
let mut queue = VecDeque::new();
|
||||||
|
let mut visited = SecondaryMap::with_default(false);
|
||||||
|
queue.push_back(entry);
|
||||||
|
visited[entry] = true;
|
||||||
|
while !queue.is_empty() {
|
||||||
|
let b = queue.pop_front().unwrap();
|
||||||
|
ret.push(b);
|
||||||
|
let mut succs: SmallVec<[Block; 16]> = SmallVec::new();
|
||||||
|
for inst in self.f.layout.block_insts(b) {
|
||||||
|
if self.f.dfg[inst].opcode().is_branch() {
|
||||||
|
succs.extend(branch_targets(self.f, b, inst).into_iter());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for succ in succs.into_iter() {
|
||||||
|
if !visited[succ] {
|
||||||
|
queue.push_back(succ);
|
||||||
|
visited[succ] = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ret
|
||||||
|
} else {
|
||||||
|
SmallVec::new()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Lower the function.
|
||||||
|
pub fn lower<B: LowerBackend<MInst = I>>(mut self, backend: &B) -> VCode<I> {
|
||||||
|
// Find all reachable blocks.
|
||||||
|
let mut bbs = self.find_reachable_bbs();
|
||||||
|
// Work backward (reverse block order, reverse through each block), skipping insns with zero
|
||||||
|
// uses.
|
||||||
|
bbs.reverse();
|
||||||
|
|
||||||
|
// This records a Block-to-BlockIndex map so that branch targets can be resolved.
|
||||||
|
let mut next_bindex = self.vcode.init_bb_map(&bbs[..]);
|
||||||
|
|
||||||
|
// Allocate a separate BlockIndex for each control-flow instruction so that we can create
|
||||||
|
// the edge blocks later. Each entry for a control-flow inst is the edge block; the list
|
||||||
|
// has (cf-inst, edge block, orig block) tuples.
|
||||||
|
let mut edge_blocks_by_inst: SecondaryMap<Inst, Vec<BlockIndex>> =
|
||||||
|
SecondaryMap::with_default(vec![]);
|
||||||
|
let mut edge_blocks: Vec<(Inst, BlockIndex, Block)> = vec![];
|
||||||
|
|
||||||
|
debug!("about to lower function: {:?}", self.f);
|
||||||
|
debug!("bb map: {:?}", self.vcode.blocks_by_bb());
|
||||||
|
|
||||||
|
for bb in bbs.iter() {
|
||||||
|
for inst in self.f.layout.block_insts(*bb) {
|
||||||
|
let op = self.f.dfg[inst].opcode();
|
||||||
|
if op.is_branch() {
|
||||||
|
// Find the original target.
|
||||||
|
let mut add_succ = |next_bb| {
|
||||||
|
let edge_block = next_bindex;
|
||||||
|
next_bindex += 1;
|
||||||
|
edge_blocks_by_inst[inst].push(edge_block);
|
||||||
|
edge_blocks.push((inst, edge_block, next_bb));
|
||||||
|
};
|
||||||
|
for succ in branch_targets(self.f, *bb, inst).into_iter() {
|
||||||
|
add_succ(succ);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for bb in bbs.iter() {
|
||||||
|
debug!("lowering bb: {}", bb);
|
||||||
|
|
||||||
|
// If this is a return block, produce the return value setup.
|
||||||
|
let last_insn = self.f.layout.block_insts(*bb).last().unwrap();
|
||||||
|
let last_insn_opcode = self.f.dfg[last_insn].opcode();
|
||||||
|
if last_insn_opcode.is_return() {
|
||||||
|
let gen_ret = if last_insn_opcode == Opcode::Return {
|
||||||
|
GenerateReturn::Yes
|
||||||
|
} else {
|
||||||
|
debug_assert!(last_insn_opcode == Opcode::FallthroughReturn);
|
||||||
|
GenerateReturn::No
|
||||||
|
};
|
||||||
|
self.gen_retval_setup(gen_ret);
|
||||||
|
self.vcode.end_ir_inst();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find the branches at the end first, and process those, if any.
|
||||||
|
let mut branches: SmallVec<[Inst; 2]> = SmallVec::new();
|
||||||
|
let mut targets: SmallVec<[BlockIndex; 2]> = SmallVec::new();
|
||||||
|
|
||||||
|
for inst in self.f.layout.block_insts(*bb).rev() {
|
||||||
|
debug!("lower: inst {}", inst);
|
||||||
|
if edge_blocks_by_inst[inst].len() > 0 {
|
||||||
|
branches.push(inst);
|
||||||
|
for target in edge_blocks_by_inst[inst].iter().rev().cloned() {
|
||||||
|
targets.push(target);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// We've reached the end of the branches -- process all as a group, first.
|
||||||
|
if branches.len() > 0 {
|
||||||
|
let fallthrough = self.f.layout.next_block(*bb);
|
||||||
|
let fallthrough = fallthrough.map(|bb| self.vcode.bb_to_bindex(bb));
|
||||||
|
branches.reverse();
|
||||||
|
targets.reverse();
|
||||||
|
debug!(
|
||||||
|
"lower_branch_group: targets = {:?} branches = {:?}",
|
||||||
|
targets, branches
|
||||||
|
);
|
||||||
|
backend.lower_branch_group(
|
||||||
|
&mut self,
|
||||||
|
&branches[..],
|
||||||
|
&targets[..],
|
||||||
|
fallthrough,
|
||||||
|
);
|
||||||
|
self.vcode.end_ir_inst();
|
||||||
|
branches.clear();
|
||||||
|
targets.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only codegen an instruction if it either has a side
|
||||||
|
// effect, or has at least one use of one of its results.
|
||||||
|
let num_uses = self.num_uses[inst];
|
||||||
|
let side_effect = has_side_effect(self.f, inst);
|
||||||
|
if side_effect || num_uses > 0 {
|
||||||
|
backend.lower(&mut self, inst);
|
||||||
|
self.vcode.end_ir_inst();
|
||||||
|
} else {
|
||||||
|
// If we're skipping the instruction, we need to dec-ref
|
||||||
|
// its arguments.
|
||||||
|
for arg in self.f.dfg.inst_args(inst) {
|
||||||
|
let val = self.f.dfg.resolve_aliases(*arg);
|
||||||
|
match self.f.dfg.value_def(val) {
|
||||||
|
ValueDef::Result(src_inst, _) => {
|
||||||
|
self.dec_use(src_inst);
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// There are possibly some branches left if the block contained only branches.
|
||||||
|
if branches.len() > 0 {
|
||||||
|
let fallthrough = self.f.layout.next_block(*bb);
|
||||||
|
let fallthrough = fallthrough.map(|bb| self.vcode.bb_to_bindex(bb));
|
||||||
|
branches.reverse();
|
||||||
|
targets.reverse();
|
||||||
|
debug!(
|
||||||
|
"lower_branch_group: targets = {:?} branches = {:?}",
|
||||||
|
targets, branches
|
||||||
|
);
|
||||||
|
backend.lower_branch_group(&mut self, &branches[..], &targets[..], fallthrough);
|
||||||
|
self.vcode.end_ir_inst();
|
||||||
|
branches.clear();
|
||||||
|
targets.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
// If this is the entry block, produce the argument setup.
|
||||||
|
if Some(*bb) == self.f.layout.entry_block() {
|
||||||
|
self.gen_arg_setup();
|
||||||
|
self.vcode.end_ir_inst();
|
||||||
|
}
|
||||||
|
|
||||||
|
let vcode_bb = self.vcode.end_bb();
|
||||||
|
debug!("finished building bb: BlockIndex {}", vcode_bb);
|
||||||
|
debug!("bb_to_bindex map says: {}", self.vcode.bb_to_bindex(*bb));
|
||||||
|
assert!(vcode_bb == self.vcode.bb_to_bindex(*bb));
|
||||||
|
if Some(*bb) == self.f.layout.entry_block() {
|
||||||
|
self.vcode.set_entry(vcode_bb);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now create the edge blocks, with phi lowering (block parameter copies).
|
||||||
|
for (inst, edge_block, orig_block) in edge_blocks.into_iter() {
|
||||||
|
debug!(
|
||||||
|
"creating edge block: inst {}, edge_block {}, orig_block {}",
|
||||||
|
inst, edge_block, orig_block
|
||||||
|
);
|
||||||
|
|
||||||
|
// Create a temporary for each block parameter.
|
||||||
|
let phi_classes: Vec<(Type, RegClass)> = self
|
||||||
|
.f
|
||||||
|
.dfg
|
||||||
|
.block_params(orig_block)
|
||||||
|
.iter()
|
||||||
|
.map(|p| self.f.dfg.value_type(*p))
|
||||||
|
.map(|ty| (ty, I::rc_for_type(ty)))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// FIXME sewardj 2020Feb29: use SmallVec
|
||||||
|
let mut src_regs = vec![];
|
||||||
|
let mut dst_regs = vec![];
|
||||||
|
|
||||||
|
// Create all of the phi uses (reads) from jump args to temps.
|
||||||
|
|
||||||
|
// Round up all the source and destination regs
|
||||||
|
for (i, arg) in self.f.dfg.inst_variable_args(inst).iter().enumerate() {
|
||||||
|
let arg = self.f.dfg.resolve_aliases(*arg);
|
||||||
|
debug!("jump arg {} is {}", i, arg);
|
||||||
|
src_regs.push(self.value_regs[arg]);
|
||||||
|
}
|
||||||
|
for (i, param) in self.f.dfg.block_params(orig_block).iter().enumerate() {
|
||||||
|
debug!("bb arg {} is {}", i, param);
|
||||||
|
dst_regs.push(Writable::from_reg(self.value_regs[*param]));
|
||||||
|
}
|
||||||
|
debug_assert!(src_regs.len() == dst_regs.len());
|
||||||
|
debug_assert!(phi_classes.len() == dst_regs.len());
|
||||||
|
|
||||||
|
// If, as is mostly the case, the source and destination register
|
||||||
|
// sets are non overlapping, then we can copy directly, so as to
|
||||||
|
// save the register allocator work.
|
||||||
|
if !Set::<Reg>::from_vec(src_regs.clone()).intersects(&Set::<Reg>::from_vec(
|
||||||
|
dst_regs.iter().map(|r| r.to_reg()).collect(),
|
||||||
|
)) {
|
||||||
|
for (dst_reg, (src_reg, (ty, _))) in
|
||||||
|
dst_regs.iter().zip(src_regs.iter().zip(phi_classes))
|
||||||
|
{
|
||||||
|
self.vcode.push(I::gen_move(*dst_reg, *src_reg, ty));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// There's some overlap, so play safe and copy via temps.
|
||||||
|
|
||||||
|
let tmp_regs: Vec<Writable<Reg>> = phi_classes
|
||||||
|
.iter()
|
||||||
|
.map(|&(ty, rc)| self.tmp(rc, ty)) // borrows `self` mutably.
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
debug!("phi_temps = {:?}", tmp_regs);
|
||||||
|
debug_assert!(tmp_regs.len() == src_regs.len());
|
||||||
|
|
||||||
|
for (tmp_reg, (src_reg, &(ty, _))) in
|
||||||
|
tmp_regs.iter().zip(src_regs.iter().zip(phi_classes.iter()))
|
||||||
|
{
|
||||||
|
self.vcode.push(I::gen_move(*tmp_reg, *src_reg, ty));
|
||||||
|
}
|
||||||
|
for (dst_reg, (tmp_reg, &(ty, _))) in
|
||||||
|
dst_regs.iter().zip(tmp_regs.iter().zip(phi_classes.iter()))
|
||||||
|
{
|
||||||
|
self.vcode.push(I::gen_move(*dst_reg, tmp_reg.to_reg(), ty));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create the unconditional jump to the original target block.
|
||||||
|
self.vcode
|
||||||
|
.push(I::gen_jump(self.vcode.bb_to_bindex(orig_block)));
|
||||||
|
|
||||||
|
// End the IR inst and block. (We lower this as if it were one IR instruction so that
|
||||||
|
// we can emit machine instructions in forward order.)
|
||||||
|
self.vcode.end_ir_inst();
|
||||||
|
let blocknum = self.vcode.end_bb();
|
||||||
|
assert!(blocknum == edge_block);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now that we've emitted all instructions into the VCodeBuilder, let's build the VCode.
|
||||||
|
self.vcode.build()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Reduce the use-count of an IR instruction. Use this when, e.g., isel incorporates the
|
||||||
|
/// computation of an input instruction directly, so that input instruction has one
|
||||||
|
/// fewer use.
|
||||||
|
fn dec_use(&mut self, ir_inst: Inst) {
|
||||||
|
assert!(self.num_uses[ir_inst] > 0);
|
||||||
|
self.num_uses[ir_inst] -= 1;
|
||||||
|
debug!(
|
||||||
|
"incref: ir_inst {} now has {} uses",
|
||||||
|
ir_inst, self.num_uses[ir_inst]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Increase the use-count of an IR instruction. Use this when, e.g., isel incorporates
|
||||||
|
/// the computation of an input instruction directly, so that input instruction's
|
||||||
|
/// inputs are now used directly by the merged instruction.
|
||||||
|
fn inc_use(&mut self, ir_inst: Inst) {
|
||||||
|
self.num_uses[ir_inst] += 1;
|
||||||
|
debug!(
|
||||||
|
"decref: ir_inst {} now has {} uses",
|
||||||
|
ir_inst, self.num_uses[ir_inst]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a, I: VCodeInst> LowerCtx<I> for Lower<'a, I> {
|
||||||
|
/// Get the instdata for a given IR instruction.
|
||||||
|
fn data(&self, ir_inst: Inst) -> &InstructionData {
|
||||||
|
&self.f.dfg[ir_inst]
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the controlling type for a polymorphic IR instruction.
|
||||||
|
fn ty(&self, ir_inst: Inst) -> Type {
|
||||||
|
self.f.dfg.ctrl_typevar(ir_inst)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Emit a machine instruction.
|
||||||
|
fn emit(&mut self, mach_inst: I) {
|
||||||
|
self.vcode.push(mach_inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Indicate that a merge has occurred.
|
||||||
|
fn merged(&mut self, from_inst: Inst) {
|
||||||
|
debug!("merged: inst {}", from_inst);
|
||||||
|
// First, inc-ref all inputs of `from_inst`, because they are now used
|
||||||
|
// directly by `into_inst`.
|
||||||
|
for arg in self.f.dfg.inst_args(from_inst) {
|
||||||
|
let arg = self.f.dfg.resolve_aliases(*arg);
|
||||||
|
match self.f.dfg.value_def(arg) {
|
||||||
|
ValueDef::Result(src_inst, _) => {
|
||||||
|
debug!(" -> inc-reffing src inst {}", src_inst);
|
||||||
|
self.inc_use(src_inst);
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Then, dec-ref the merged instruction itself. It still retains references
|
||||||
|
// to its arguments (inc-ref'd above). If its refcount has reached zero,
|
||||||
|
// it will be skipped during emission and its args will be dec-ref'd at that
|
||||||
|
// time.
|
||||||
|
self.dec_use(from_inst);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the producing instruction, if any, and output number, for the `idx`th input to the
|
||||||
|
/// given IR instruction.
|
||||||
|
fn input_inst(&self, ir_inst: Inst, idx: usize) -> Option<(Inst, usize)> {
|
||||||
|
let val = self.f.dfg.inst_args(ir_inst)[idx];
|
||||||
|
let val = self.f.dfg.resolve_aliases(val);
|
||||||
|
match self.f.dfg.value_def(val) {
|
||||||
|
ValueDef::Result(src_inst, result_idx) => Some((src_inst, result_idx)),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Map a Value to its associated writable (probably virtual) Reg.
|
||||||
|
fn value_to_writable_reg(&self, val: Value) -> Writable<Reg> {
|
||||||
|
let val = self.f.dfg.resolve_aliases(val);
|
||||||
|
Writable::from_reg(self.value_regs[val])
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Map a Value to its associated (probably virtual) Reg.
|
||||||
|
fn value_to_reg(&self, val: Value) -> Reg {
|
||||||
|
let val = self.f.dfg.resolve_aliases(val);
|
||||||
|
self.value_regs[val]
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the `idx`th input to the given IR instruction as a virtual register.
|
||||||
|
fn input(&self, ir_inst: Inst, idx: usize) -> Reg {
|
||||||
|
let val = self.f.dfg.inst_args(ir_inst)[idx];
|
||||||
|
let val = self.f.dfg.resolve_aliases(val);
|
||||||
|
self.value_to_reg(val)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the `idx`th output of the given IR instruction as a virtual register.
|
||||||
|
fn output(&self, ir_inst: Inst, idx: usize) -> Writable<Reg> {
|
||||||
|
let val = self.f.dfg.inst_results(ir_inst)[idx];
|
||||||
|
self.value_to_writable_reg(val)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get a new temp.
|
||||||
|
fn tmp(&mut self, rc: RegClass, ty: Type) -> Writable<Reg> {
|
||||||
|
let v = self.next_vreg;
|
||||||
|
self.next_vreg += 1;
|
||||||
|
let vreg = Reg::new_virtual(rc, v);
|
||||||
|
self.vcode.set_vreg_type(vreg.as_virtual_reg().unwrap(), ty);
|
||||||
|
Writable::from_reg(vreg)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the number of inputs for the given IR instruction.
|
||||||
|
fn num_inputs(&self, ir_inst: Inst) -> usize {
|
||||||
|
self.f.dfg.inst_args(ir_inst).len()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the number of outputs for the given IR instruction.
|
||||||
|
fn num_outputs(&self, ir_inst: Inst) -> usize {
|
||||||
|
self.f.dfg.inst_results(ir_inst).len()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the type for an instruction's input.
|
||||||
|
fn input_ty(&self, ir_inst: Inst, idx: usize) -> Type {
|
||||||
|
let val = self.f.dfg.inst_args(ir_inst)[idx];
|
||||||
|
let val = self.f.dfg.resolve_aliases(val);
|
||||||
|
self.f.dfg.value_type(val)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the type for an instruction's output.
|
||||||
|
fn output_ty(&self, ir_inst: Inst, idx: usize) -> Type {
|
||||||
|
self.f.dfg.value_type(self.f.dfg.inst_results(ir_inst)[idx])
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the number of block params.
|
||||||
|
fn num_bb_params(&self, bb: Block) -> usize {
|
||||||
|
self.f.dfg.block_params(bb).len()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the register for a block param.
|
||||||
|
fn bb_param(&self, bb: Block, idx: usize) -> Reg {
|
||||||
|
let val = self.f.dfg.block_params(bb)[idx];
|
||||||
|
self.value_regs[val]
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the register for a return value.
|
||||||
|
fn retval(&self, idx: usize) -> Writable<Reg> {
|
||||||
|
Writable::from_reg(self.retval_regs[idx])
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the target for a call instruction, as an `ExternalName`.
|
||||||
|
fn call_target<'b>(&'b self, ir_inst: Inst) -> Option<&'b ExternalName> {
|
||||||
|
match &self.f.dfg[ir_inst] {
|
||||||
|
&InstructionData::Call { func_ref, .. }
|
||||||
|
| &InstructionData::FuncAddr { func_ref, .. } => {
|
||||||
|
let funcdata = &self.f.dfg.ext_funcs[func_ref];
|
||||||
|
Some(&funcdata.name)
|
||||||
|
}
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/// Get the signature for a call or call-indirect instruction.
|
||||||
|
fn call_sig<'b>(&'b self, ir_inst: Inst) -> Option<&'b Signature> {
|
||||||
|
match &self.f.dfg[ir_inst] {
|
||||||
|
&InstructionData::Call { func_ref, .. } => {
|
||||||
|
let funcdata = &self.f.dfg.ext_funcs[func_ref];
|
||||||
|
Some(&self.f.dfg.signatures[funcdata.signature])
|
||||||
|
}
|
||||||
|
&InstructionData::CallIndirect { sig_ref, .. } => Some(&self.f.dfg.signatures[sig_ref]),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the symbol name and offset for a symbol_value instruction.
|
||||||
|
fn symbol_value<'b>(&'b self, ir_inst: Inst) -> Option<(&'b ExternalName, i64)> {
|
||||||
|
match &self.f.dfg[ir_inst] {
|
||||||
|
&InstructionData::UnaryGlobalValue { global_value, .. } => {
|
||||||
|
let gvdata = &self.f.global_values[global_value];
|
||||||
|
match gvdata {
|
||||||
|
&GlobalValueData::Symbol {
|
||||||
|
ref name,
|
||||||
|
ref offset,
|
||||||
|
..
|
||||||
|
} => {
|
||||||
|
let offset = offset.bits();
|
||||||
|
Some((name, offset))
|
||||||
|
}
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the memory flags of a given memory access.
|
||||||
|
fn memflags(&self, ir_inst: Inst) -> Option<MemFlags> {
|
||||||
|
match &self.f.dfg[ir_inst] {
|
||||||
|
&InstructionData::Load { flags, .. }
|
||||||
|
| &InstructionData::LoadComplex { flags, .. }
|
||||||
|
| &InstructionData::Store { flags, .. }
|
||||||
|
| &InstructionData::StoreComplex { flags, .. } => Some(flags),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the source location for a given instruction.
|
||||||
|
fn srcloc(&self, ir_inst: Inst) -> SourceLoc {
|
||||||
|
self.f.srclocs[ir_inst]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn branch_targets(f: &Function, block: Block, inst: Inst) -> SmallVec<[Block; 16]> {
|
||||||
|
let mut ret = SmallVec::new();
|
||||||
|
if f.dfg[inst].opcode() == Opcode::Fallthrough {
|
||||||
|
ret.push(f.layout.next_block(block).unwrap());
|
||||||
|
} else {
|
||||||
|
match &f.dfg[inst] {
|
||||||
|
&InstructionData::Jump { destination, .. }
|
||||||
|
| &InstructionData::Branch { destination, .. }
|
||||||
|
| &InstructionData::BranchInt { destination, .. }
|
||||||
|
| &InstructionData::BranchIcmp { destination, .. }
|
||||||
|
| &InstructionData::BranchFloat { destination, .. } => {
|
||||||
|
ret.push(destination);
|
||||||
|
}
|
||||||
|
&InstructionData::BranchTable {
|
||||||
|
destination, table, ..
|
||||||
|
} => {
|
||||||
|
ret.push(destination);
|
||||||
|
for dest in f.jump_tables[table].as_slice() {
|
||||||
|
ret.push(*dest);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ret
|
||||||
|
}
|
||||||
288
cranelift/codegen/src/machinst/mod.rs
Normal file
288
cranelift/codegen/src/machinst/mod.rs
Normal file
@@ -0,0 +1,288 @@
|
|||||||
|
//! This module exposes the machine-specific backend definition pieces.
|
||||||
|
//!
|
||||||
|
//! The MachInst infrastructure is the compiler backend, from CLIF
|
||||||
|
//! (ir::Function) to machine code. The purpose of this infrastructure is, at a
|
||||||
|
//! high level, to do instruction selection/lowering (to machine instructions),
|
||||||
|
//! register allocation, and then perform all the fixups to branches, constant
|
||||||
|
//! data references, etc., needed to actually generate machine code.
|
||||||
|
//!
|
||||||
|
//! The container for machine instructions, at various stages of construction,
|
||||||
|
//! is the `VCode` struct. We refer to a sequence of machine instructions organized
|
||||||
|
//! into basic blocks as "vcode". This is short for "virtual-register code", though
|
||||||
|
//! it's a bit of a misnomer because near the end of the pipeline, vcode has all
|
||||||
|
//! real registers. Nevertheless, the name is catchy and we like it.
|
||||||
|
//!
|
||||||
|
//! The compilation pipeline, from an `ir::Function` (already optimized as much as
|
||||||
|
//! you like by machine-independent optimization passes) onward, is as follows.
|
||||||
|
//! (N.B.: though we show the VCode separately at each stage, the passes
|
||||||
|
//! mutate the VCode in place; these are not separate copies of the code.)
|
||||||
|
//!
|
||||||
|
//! | ir::Function (SSA IR, machine-independent opcodes)
|
||||||
|
//! | |
|
||||||
|
//! | | [lower]
|
||||||
|
//! | |
|
||||||
|
//! | VCode<arch_backend::Inst> (machine instructions:
|
||||||
|
//! | | - mostly virtual registers.
|
||||||
|
//! | | - cond branches in two-target form.
|
||||||
|
//! | | - branch targets are block indices.
|
||||||
|
//! | | - in-memory constants held by insns,
|
||||||
|
//! | | with unknown offsets.
|
||||||
|
//! | | - critical edges (actually all edges)
|
||||||
|
//! | | are split.)
|
||||||
|
//! | | [regalloc]
|
||||||
|
//! | |
|
||||||
|
//! | VCode<arch_backend::Inst> (machine instructions:
|
||||||
|
//! | | - all real registers.
|
||||||
|
//! | | - new instruction sequence returned
|
||||||
|
//! | | out-of-band in RegAllocResult.
|
||||||
|
//! | | - instruction sequence has spills,
|
||||||
|
//! | | reloads, and moves inserted.
|
||||||
|
//! | | - other invariants same as above.)
|
||||||
|
//! | |
|
||||||
|
//! | | [preamble/postamble]
|
||||||
|
//! | |
|
||||||
|
//! | VCode<arch_backend::Inst> (machine instructions:
|
||||||
|
//! | | - stack-frame size known.
|
||||||
|
//! | | - out-of-band instruction sequence
|
||||||
|
//! | | has preamble prepended to entry
|
||||||
|
//! | | block, and postamble injected before
|
||||||
|
//! | | every return instruction.
|
||||||
|
//! | | - all symbolic stack references to
|
||||||
|
//! | | stackslots and spillslots are resolved
|
||||||
|
//! | | to concrete FP-offset mem addresses.)
|
||||||
|
//! | | [block/insn ordering]
|
||||||
|
//! | |
|
||||||
|
//! | VCode<arch_backend::Inst> (machine instructions:
|
||||||
|
//! | | - vcode.final_block_order is filled in.
|
||||||
|
//! | | - new insn sequence from regalloc is
|
||||||
|
//! | | placed back into vcode and block
|
||||||
|
//! | | boundaries are updated.)
|
||||||
|
//! | | [redundant branch/block
|
||||||
|
//! | | removal]
|
||||||
|
//! | |
|
||||||
|
//! | VCode<arch_backend::Inst> (machine instructions:
|
||||||
|
//! | | - all blocks that were just an
|
||||||
|
//! | | unconditional branch are removed.)
|
||||||
|
//! | |
|
||||||
|
//! | | [branch finalization
|
||||||
|
//! | | (fallthroughs)]
|
||||||
|
//! | |
|
||||||
|
//! | VCode<arch_backend::Inst> (machine instructions:
|
||||||
|
//! | | - all branches are in lowered one-
|
||||||
|
//! | | target form, but targets are still
|
||||||
|
//! | | block indices.)
|
||||||
|
//! | |
|
||||||
|
//! | | [branch finalization
|
||||||
|
//! | | (offsets)]
|
||||||
|
//! | |
|
||||||
|
//! | VCode<arch_backend::Inst> (machine instructions:
|
||||||
|
//! | | - all branch offsets from start of
|
||||||
|
//! | | function are known, and all branches
|
||||||
|
//! | | have resolved-offset targets.)
|
||||||
|
//! | |
|
||||||
|
//! | | [MemArg finalization]
|
||||||
|
//! | |
|
||||||
|
//! | VCode<arch_backend::Inst> (machine instructions:
|
||||||
|
//! | | - all MemArg references to the constant
|
||||||
|
//! | | pool are replaced with offsets.
|
||||||
|
//! | | - all constant-pool data is collected
|
||||||
|
//! | | in the VCode.)
|
||||||
|
//! | |
|
||||||
|
//! | | [binary emission]
|
||||||
|
//! | |
|
||||||
|
//! | Vec<u8> (machine code!)
|
||||||
|
//! |
|
||||||
|
|
||||||
|
#![allow(unused_imports)]
|
||||||
|
|
||||||
|
use crate::binemit::{
|
||||||
|
CodeInfo, CodeOffset, CodeSink, MemoryCodeSink, RelocSink, StackmapSink, TrapSink,
|
||||||
|
};
|
||||||
|
use crate::entity::EntityRef;
|
||||||
|
use crate::entity::SecondaryMap;
|
||||||
|
use crate::ir::condcodes::IntCC;
|
||||||
|
use crate::ir::ValueLocations;
|
||||||
|
use crate::ir::{DataFlowGraph, Function, Inst, Opcode, Type, Value};
|
||||||
|
use crate::isa::RegUnit;
|
||||||
|
use crate::result::CodegenResult;
|
||||||
|
use crate::settings::Flags;
|
||||||
|
use crate::HashMap;
|
||||||
|
use alloc::boxed::Box;
|
||||||
|
use alloc::vec::Vec;
|
||||||
|
use core::fmt::Debug;
|
||||||
|
use core::iter::Sum;
|
||||||
|
use regalloc::Map as RegallocMap;
|
||||||
|
use regalloc::RegUsageCollector;
|
||||||
|
use regalloc::{RealReg, RealRegUniverse, Reg, RegClass, SpillSlot, VirtualReg, Writable};
|
||||||
|
use smallvec::SmallVec;
|
||||||
|
use std::hash::Hash;
|
||||||
|
use std::string::String;
|
||||||
|
use target_lexicon::Triple;
|
||||||
|
|
||||||
|
pub mod lower;
|
||||||
|
pub use lower::*;
|
||||||
|
pub mod vcode;
|
||||||
|
pub use vcode::*;
|
||||||
|
pub mod compile;
|
||||||
|
pub use compile::*;
|
||||||
|
pub mod blockorder;
|
||||||
|
pub use blockorder::*;
|
||||||
|
pub mod abi;
|
||||||
|
pub use abi::*;
|
||||||
|
pub mod pp;
|
||||||
|
pub use pp::*;
|
||||||
|
pub mod sections;
|
||||||
|
pub use sections::*;
|
||||||
|
pub mod adapter;
|
||||||
|
pub use adapter::*;
|
||||||
|
|
||||||
|
/// A machine instruction.
|
||||||
|
pub trait MachInst: Clone + Debug {
|
||||||
|
/// Return the registers referenced by this machine instruction along with
|
||||||
|
/// the modes of reference (use, def, modify).
|
||||||
|
fn get_regs(&self, collector: &mut RegUsageCollector);
|
||||||
|
|
||||||
|
/// Map virtual registers to physical registers using the given virt->phys
|
||||||
|
/// maps corresponding to the program points prior to, and after, this instruction.
|
||||||
|
fn map_regs(
|
||||||
|
&mut self,
|
||||||
|
pre_map: &RegallocMap<VirtualReg, RealReg>,
|
||||||
|
post_map: &RegallocMap<VirtualReg, RealReg>,
|
||||||
|
);
|
||||||
|
|
||||||
|
/// If this is a simple move, return the (source, destination) tuple of registers.
|
||||||
|
fn is_move(&self) -> Option<(Writable<Reg>, Reg)>;
|
||||||
|
|
||||||
|
/// Is this a terminator (branch or ret)? If so, return its type
|
||||||
|
/// (ret/uncond/cond) and target if applicable.
|
||||||
|
fn is_term<'a>(&'a self) -> MachTerminator<'a>;
|
||||||
|
|
||||||
|
/// Returns true if the instruction is an epilogue placeholder.
|
||||||
|
fn is_epilogue_placeholder(&self) -> bool;
|
||||||
|
|
||||||
|
/// Generate a move.
|
||||||
|
fn gen_move(to_reg: Writable<Reg>, from_reg: Reg, ty: Type) -> Self;
|
||||||
|
|
||||||
|
/// Generate a zero-length no-op.
|
||||||
|
fn gen_zero_len_nop() -> Self;
|
||||||
|
|
||||||
|
/// Possibly operate on a value directly in a spill-slot rather than a
|
||||||
|
/// register. Useful if the machine has register-memory instruction forms
|
||||||
|
/// (e.g., add directly from or directly to memory), like x86.
|
||||||
|
fn maybe_direct_reload(&self, reg: VirtualReg, slot: SpillSlot) -> Option<Self>;
|
||||||
|
|
||||||
|
/// Determine a register class to store the given CraneLift type.
|
||||||
|
fn rc_for_type(ty: Type) -> RegClass;
|
||||||
|
|
||||||
|
/// Generate a jump to another target. Used during lowering of
|
||||||
|
/// control flow.
|
||||||
|
fn gen_jump(target: BlockIndex) -> Self;
|
||||||
|
|
||||||
|
/// Generate a NOP. The `preferred_size` parameter allows the caller to
|
||||||
|
/// request a NOP of that size, or as close to it as possible. The machine
|
||||||
|
/// backend may return a NOP whose binary encoding is smaller than the
|
||||||
|
/// preferred size, but must not return a NOP that is larger. However,
|
||||||
|
/// the instruction must have a nonzero size.
|
||||||
|
fn gen_nop(preferred_size: usize) -> Self;
|
||||||
|
|
||||||
|
/// Rewrite block targets using the block-target map.
|
||||||
|
fn with_block_rewrites(&mut self, block_target_map: &[BlockIndex]);
|
||||||
|
|
||||||
|
/// Finalize branches once the block order (fallthrough) is known.
|
||||||
|
fn with_fallthrough_block(&mut self, fallthrough_block: Option<BlockIndex>);
|
||||||
|
|
||||||
|
/// Update instruction once block offsets are known. These offsets are
|
||||||
|
/// relative to the beginning of the function. `targets` is indexed by
|
||||||
|
/// BlockIndex.
|
||||||
|
fn with_block_offsets(&mut self, my_offset: CodeOffset, targets: &[CodeOffset]);
|
||||||
|
|
||||||
|
/// Get the register universe for this backend.
|
||||||
|
fn reg_universe() -> RealRegUniverse;
|
||||||
|
|
||||||
|
/// Align a basic block offset (from start of function). By default, no
|
||||||
|
/// alignment occurs.
|
||||||
|
fn align_basic_block(offset: CodeOffset) -> CodeOffset {
|
||||||
|
offset
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Describes a block terminator (not call) in the vcode, when its branches
|
||||||
|
/// have not yet been finalized (so a branch may have two targets).
|
||||||
|
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||||
|
pub enum MachTerminator<'a> {
|
||||||
|
/// Not a terminator.
|
||||||
|
None,
|
||||||
|
/// A return instruction.
|
||||||
|
Ret,
|
||||||
|
/// An unconditional branch to another block.
|
||||||
|
Uncond(BlockIndex),
|
||||||
|
/// A conditional branch to one of two other blocks.
|
||||||
|
Cond(BlockIndex, BlockIndex),
|
||||||
|
/// An indirect branch with known possible targets.
|
||||||
|
Indirect(&'a [BlockIndex]),
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A trait describing the ability to encode a MachInst into binary machine code.
|
||||||
|
pub trait MachInstEmit<O: MachSectionOutput> {
|
||||||
|
/// Emit the instruction.
|
||||||
|
fn emit(&self, code: &mut O);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The result of a `MachBackend::compile_function()` call. Contains machine
|
||||||
|
/// code (as bytes) and a disassembly, if requested.
|
||||||
|
pub struct MachCompileResult {
|
||||||
|
/// Machine code.
|
||||||
|
pub sections: MachSections,
|
||||||
|
/// Size of stack frame, in bytes.
|
||||||
|
pub frame_size: u32,
|
||||||
|
/// Disassembly, if requested.
|
||||||
|
pub disasm: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MachCompileResult {
|
||||||
|
/// Get a `CodeInfo` describing section sizes from this compilation result.
|
||||||
|
pub fn code_info(&self) -> CodeInfo {
|
||||||
|
let code_size = self.sections.total_size();
|
||||||
|
CodeInfo {
|
||||||
|
code_size,
|
||||||
|
jumptables_size: 0,
|
||||||
|
rodata_size: 0,
|
||||||
|
total_size: code_size,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Top-level machine backend trait, which wraps all monomorphized code and
|
||||||
|
/// allows a virtual call from the machine-independent `Function::compile()`.
|
||||||
|
pub trait MachBackend {
|
||||||
|
/// Compile the given function. Consumes the function.
|
||||||
|
fn compile_function(
|
||||||
|
&self,
|
||||||
|
func: Function,
|
||||||
|
want_disasm: bool,
|
||||||
|
) -> CodegenResult<MachCompileResult>;
|
||||||
|
|
||||||
|
/// Return flags for this backend.
|
||||||
|
fn flags(&self) -> &Flags;
|
||||||
|
|
||||||
|
/// Return triple for this backend.
|
||||||
|
fn triple(&self) -> Triple;
|
||||||
|
|
||||||
|
/// Return name for this backend.
|
||||||
|
fn name(&self) -> &'static str;
|
||||||
|
|
||||||
|
/// Return the register universe for this backend.
|
||||||
|
fn reg_universe(&self) -> RealRegUniverse;
|
||||||
|
|
||||||
|
/// Machine-specific condcode info needed by TargetIsa.
|
||||||
|
fn unsigned_add_overflow_condition(&self) -> IntCC {
|
||||||
|
// TODO: this is what x86 specifies. Is this right for arm64?
|
||||||
|
IntCC::UnsignedLessThan
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Machine-specific condcode info needed by TargetIsa.
|
||||||
|
fn unsigned_sub_overflow_condition(&self) -> IntCC {
|
||||||
|
// TODO: this is what x86 specifies. Is this right for arm64?
|
||||||
|
IntCC::UnsignedLessThan
|
||||||
|
}
|
||||||
|
}
|
||||||
66
cranelift/codegen/src/machinst/pp.rs
Normal file
66
cranelift/codegen/src/machinst/pp.rs
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
//! Pretty-printing for machine code (virtual-registerized or final).
|
||||||
|
|
||||||
|
use regalloc::{RealRegUniverse, Reg, Writable};
|
||||||
|
|
||||||
|
use std::fmt::Debug;
|
||||||
|
use std::hash::Hash;
|
||||||
|
use std::string::{String, ToString};
|
||||||
|
|
||||||
|
// FIXME: Should this go into regalloc.rs instead?
|
||||||
|
|
||||||
|
/// A trait for printing instruction bits and pieces, with the the ability to
|
||||||
|
/// take a contextualising RealRegUniverse that is used to give proper names to
|
||||||
|
/// registers.
|
||||||
|
pub trait ShowWithRRU {
|
||||||
|
/// Return a string that shows the implementing object in context of the
|
||||||
|
/// given `RealRegUniverse`, if provided.
|
||||||
|
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String;
|
||||||
|
|
||||||
|
/// The same as |show_rru|, but with an optional hint giving a size in
|
||||||
|
/// bytes. Its interpretation is object-dependent, and it is intended to
|
||||||
|
/// pass around enough information to facilitate printing sub-parts of
|
||||||
|
/// real registers correctly. Objects may ignore size hints that are
|
||||||
|
/// irrelevant to them.
|
||||||
|
fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, _size: u8) -> String {
|
||||||
|
// Default implementation is to ignore the hint.
|
||||||
|
self.show_rru(mb_rru)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ShowWithRRU for Reg {
|
||||||
|
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
|
||||||
|
if self.is_real() {
|
||||||
|
if let Some(rru) = mb_rru {
|
||||||
|
let reg_ix = self.get_index();
|
||||||
|
if reg_ix < rru.regs.len() {
|
||||||
|
return rru.regs[reg_ix].1.to_string();
|
||||||
|
} else {
|
||||||
|
// We have a real reg which isn't listed in the universe.
|
||||||
|
// Per the regalloc.rs interface requirements, this is
|
||||||
|
// Totally Not Allowed. Print it generically anyway, so
|
||||||
|
// we have something to debug.
|
||||||
|
return format!("!!{:?}!!", self);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// The reg is virtual, or we have no universe. Be generic.
|
||||||
|
format!("%{:?}", self)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn show_rru_sized(&self, _mb_rru: Option<&RealRegUniverse>, _size: u8) -> String {
|
||||||
|
// For the specific case of Reg, we demand not to have a size hint,
|
||||||
|
// since interpretation of the size is target specific, but this code
|
||||||
|
// is used by all targets.
|
||||||
|
panic!("Reg::show_rru_sized: impossible to implement");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<R: ShowWithRRU + Copy + Ord + Hash + Eq + Debug> ShowWithRRU for Writable<R> {
|
||||||
|
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
|
||||||
|
self.to_reg().show_rru(mb_rru)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String {
|
||||||
|
self.to_reg().show_rru_sized(mb_rru, size)
|
||||||
|
}
|
||||||
|
}
|
||||||
351
cranelift/codegen/src/machinst/sections.rs
Normal file
351
cranelift/codegen/src/machinst/sections.rs
Normal file
@@ -0,0 +1,351 @@
|
|||||||
|
//! In-memory representation of compiled machine code, in multiple sections
|
||||||
|
//! (text, constant pool / rodata, etc). Emission occurs into multiple sections
|
||||||
|
//! simultaneously, so we buffer the result in memory and hand off to the
|
||||||
|
//! caller at the end of compilation.
|
||||||
|
|
||||||
|
use crate::binemit::{Addend, CodeOffset, CodeSink, Reloc, RelocSink, StackmapSink, TrapSink};
|
||||||
|
use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode};
|
||||||
|
|
||||||
|
use alloc::vec::Vec;
|
||||||
|
|
||||||
|
/// A collection of sections with defined start-offsets.
|
||||||
|
pub struct MachSections {
|
||||||
|
/// Sections, in offset order.
|
||||||
|
pub sections: Vec<MachSection>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MachSections {
|
||||||
|
/// New, empty set of sections.
|
||||||
|
pub fn new() -> MachSections {
|
||||||
|
MachSections { sections: vec![] }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add a section with a known offset and size. Returns the index.
|
||||||
|
pub fn add_section(&mut self, start: CodeOffset, length: CodeOffset) -> usize {
|
||||||
|
let idx = self.sections.len();
|
||||||
|
self.sections.push(MachSection::new(start, length));
|
||||||
|
idx
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Mutably borrow the given section by index.
|
||||||
|
pub fn get_section<'a>(&'a mut self, idx: usize) -> &'a mut MachSection {
|
||||||
|
&mut self.sections[idx]
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get mutable borrows of two sections simultaneously. Used during
|
||||||
|
/// instruction emission to provide references to the .text and .rodata
|
||||||
|
/// (constant pool) sections.
|
||||||
|
pub fn two_sections<'a>(
|
||||||
|
&'a mut self,
|
||||||
|
idx1: usize,
|
||||||
|
idx2: usize,
|
||||||
|
) -> (&'a mut MachSection, &'a mut MachSection) {
|
||||||
|
assert!(idx1 < idx2);
|
||||||
|
assert!(idx1 < self.sections.len());
|
||||||
|
assert!(idx2 < self.sections.len());
|
||||||
|
let (first, rest) = self.sections.split_at_mut(idx2);
|
||||||
|
(&mut first[idx1], &mut rest[0])
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Emit this set of sections to a set of sinks for the code,
|
||||||
|
/// relocations, traps, and stackmap.
|
||||||
|
pub fn emit<CS: CodeSink>(&self, sink: &mut CS) {
|
||||||
|
// N.B.: we emit every section into the .text section as far as
|
||||||
|
// the `CodeSink` is concerned; we do not bother to segregate
|
||||||
|
// the contents into the actual program text, the jumptable and the
|
||||||
|
// rodata (constant pool). This allows us to generate code assuming
|
||||||
|
// that these will not be relocated relative to each other, and avoids
|
||||||
|
// having to designate each section as belonging in one of the three
|
||||||
|
// fixed categories defined by `CodeSink`. If this becomes a problem
|
||||||
|
// later (e.g. because of memory permissions or similar), we can
|
||||||
|
// add this designation and segregate the output; take care, however,
|
||||||
|
// to add the appropriate relocations in this case.
|
||||||
|
|
||||||
|
for section in &self.sections {
|
||||||
|
if section.data.len() > 0 {
|
||||||
|
while sink.offset() < section.start_offset {
|
||||||
|
sink.put1(0);
|
||||||
|
}
|
||||||
|
section.emit(sink);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sink.begin_jumptables();
|
||||||
|
sink.begin_rodata();
|
||||||
|
sink.end_codegen();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the total required size for these sections.
|
||||||
|
pub fn total_size(&self) -> CodeOffset {
|
||||||
|
if self.sections.len() == 0 {
|
||||||
|
0
|
||||||
|
} else {
|
||||||
|
// Find the last non-empty section.
|
||||||
|
self.sections
|
||||||
|
.iter()
|
||||||
|
.rev()
|
||||||
|
.find(|s| s.data.len() > 0)
|
||||||
|
.map(|s| s.cur_offset_from_start())
|
||||||
|
.unwrap_or(0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An abstraction over MachSection and MachSectionSize: some
|
||||||
|
/// receiver of section data.
|
||||||
|
pub trait MachSectionOutput {
|
||||||
|
/// Get the current offset from the start of all sections.
|
||||||
|
fn cur_offset_from_start(&self) -> CodeOffset;
|
||||||
|
|
||||||
|
/// Get the start offset of this section.
|
||||||
|
fn start_offset(&self) -> CodeOffset;
|
||||||
|
|
||||||
|
/// Add 1 byte to the section.
|
||||||
|
fn put1(&mut self, _: u8);
|
||||||
|
|
||||||
|
/// Add 2 bytes to the section.
|
||||||
|
fn put2(&mut self, value: u16) {
|
||||||
|
self.put1((value & 0xff) as u8);
|
||||||
|
self.put1(((value >> 8) & 0xff) as u8);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add 4 bytes to the section.
|
||||||
|
fn put4(&mut self, value: u32) {
|
||||||
|
self.put1((value & 0xff) as u8);
|
||||||
|
self.put1(((value >> 8) & 0xff) as u8);
|
||||||
|
self.put1(((value >> 16) & 0xff) as u8);
|
||||||
|
self.put1(((value >> 24) & 0xff) as u8);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add 8 bytes to the section.
|
||||||
|
fn put8(&mut self, value: u64) {
|
||||||
|
self.put1((value & 0xff) as u8);
|
||||||
|
self.put1(((value >> 8) & 0xff) as u8);
|
||||||
|
self.put1(((value >> 16) & 0xff) as u8);
|
||||||
|
self.put1(((value >> 24) & 0xff) as u8);
|
||||||
|
self.put1(((value >> 32) & 0xff) as u8);
|
||||||
|
self.put1(((value >> 40) & 0xff) as u8);
|
||||||
|
self.put1(((value >> 48) & 0xff) as u8);
|
||||||
|
self.put1(((value >> 56) & 0xff) as u8);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add a slice of bytes to the section.
|
||||||
|
fn put_data(&mut self, data: &[u8]);
|
||||||
|
|
||||||
|
/// Add a relocation at the current offset.
|
||||||
|
fn add_reloc(&mut self, loc: SourceLoc, kind: Reloc, name: &ExternalName, addend: Addend);
|
||||||
|
|
||||||
|
/// Add a trap record at the current offset.
|
||||||
|
fn add_trap(&mut self, loc: SourceLoc, code: TrapCode);
|
||||||
|
|
||||||
|
/// Add a call return address record at the current offset.
|
||||||
|
fn add_call_site(&mut self, loc: SourceLoc, opcode: Opcode);
|
||||||
|
|
||||||
|
/// Align up to the given alignment.
|
||||||
|
fn align_to(&mut self, align_to: CodeOffset) {
|
||||||
|
assert!(align_to.is_power_of_two());
|
||||||
|
while self.cur_offset_from_start() & (align_to - 1) != 0 {
|
||||||
|
self.put1(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A section of output to be emitted to a CodeSink / RelocSink in bulk.
|
||||||
|
/// Multiple sections may be created with known start offsets in advance; the
|
||||||
|
/// usual use-case is to create the .text (code) and .rodata (constant pool) at
|
||||||
|
/// once, after computing the length of the code, so that constant references
|
||||||
|
/// can use known offsets as instructions are emitted.
|
||||||
|
pub struct MachSection {
|
||||||
|
/// The starting offset of this section.
|
||||||
|
pub start_offset: CodeOffset,
|
||||||
|
/// The limit of this section, defined by the start of the next section.
|
||||||
|
pub length_limit: CodeOffset,
|
||||||
|
/// The section contents, as raw bytes.
|
||||||
|
pub data: Vec<u8>,
|
||||||
|
/// Any relocations referring to this section.
|
||||||
|
pub relocs: Vec<MachReloc>,
|
||||||
|
/// Any trap records referring to this section.
|
||||||
|
pub traps: Vec<MachTrap>,
|
||||||
|
/// Any call site record referring to this section.
|
||||||
|
pub call_sites: Vec<MachCallSite>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MachSection {
|
||||||
|
/// Create a new section, known to start at `start_offset` and with a size limited to `length_limit`.
|
||||||
|
pub fn new(start_offset: CodeOffset, length_limit: CodeOffset) -> MachSection {
|
||||||
|
MachSection {
|
||||||
|
start_offset,
|
||||||
|
length_limit,
|
||||||
|
data: vec![],
|
||||||
|
relocs: vec![],
|
||||||
|
traps: vec![],
|
||||||
|
call_sites: vec![],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Emit this section to the CodeSink and other associated sinks. The
|
||||||
|
/// current offset of the CodeSink must match the starting offset of this
|
||||||
|
/// section.
|
||||||
|
pub fn emit<CS: CodeSink>(&self, sink: &mut CS) {
|
||||||
|
assert!(sink.offset() == self.start_offset);
|
||||||
|
|
||||||
|
let mut next_reloc = 0;
|
||||||
|
let mut next_trap = 0;
|
||||||
|
let mut next_call_site = 0;
|
||||||
|
for (idx, byte) in self.data.iter().enumerate() {
|
||||||
|
if next_reloc < self.relocs.len() {
|
||||||
|
let reloc = &self.relocs[next_reloc];
|
||||||
|
if reloc.offset == idx as CodeOffset {
|
||||||
|
sink.reloc_external(reloc.srcloc, reloc.kind, &reloc.name, reloc.addend);
|
||||||
|
next_reloc += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if next_trap < self.traps.len() {
|
||||||
|
let trap = &self.traps[next_trap];
|
||||||
|
if trap.offset == idx as CodeOffset {
|
||||||
|
sink.trap(trap.code, trap.srcloc);
|
||||||
|
next_trap += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if next_call_site < self.call_sites.len() {
|
||||||
|
let call_site = &self.call_sites[next_call_site];
|
||||||
|
if call_site.ret_addr == idx as CodeOffset {
|
||||||
|
sink.add_call_site(call_site.opcode, call_site.srcloc);
|
||||||
|
next_call_site += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sink.put1(*byte);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MachSectionOutput for MachSection {
|
||||||
|
fn cur_offset_from_start(&self) -> CodeOffset {
|
||||||
|
self.start_offset + self.data.len() as CodeOffset
|
||||||
|
}
|
||||||
|
|
||||||
|
fn start_offset(&self) -> CodeOffset {
|
||||||
|
self.start_offset
|
||||||
|
}
|
||||||
|
|
||||||
|
fn put1(&mut self, value: u8) {
|
||||||
|
assert!(((self.data.len() + 1) as CodeOffset) <= self.length_limit);
|
||||||
|
self.data.push(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn put_data(&mut self, data: &[u8]) {
|
||||||
|
assert!(((self.data.len() + data.len()) as CodeOffset) <= self.length_limit);
|
||||||
|
self.data.extend_from_slice(data);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add_reloc(&mut self, srcloc: SourceLoc, kind: Reloc, name: &ExternalName, addend: Addend) {
|
||||||
|
let name = name.clone();
|
||||||
|
self.relocs.push(MachReloc {
|
||||||
|
offset: self.data.len() as CodeOffset,
|
||||||
|
srcloc,
|
||||||
|
kind,
|
||||||
|
name,
|
||||||
|
addend,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add_trap(&mut self, srcloc: SourceLoc, code: TrapCode) {
|
||||||
|
self.traps.push(MachTrap {
|
||||||
|
offset: self.data.len() as CodeOffset,
|
||||||
|
srcloc,
|
||||||
|
code,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add_call_site(&mut self, srcloc: SourceLoc, opcode: Opcode) {
|
||||||
|
self.call_sites.push(MachCallSite {
|
||||||
|
ret_addr: self.data.len() as CodeOffset,
|
||||||
|
srcloc,
|
||||||
|
opcode,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A MachSectionOutput implementation that records only size.
|
||||||
|
pub struct MachSectionSize {
|
||||||
|
/// The starting offset of this section.
|
||||||
|
pub start_offset: CodeOffset,
|
||||||
|
/// The current offset of this section.
|
||||||
|
pub offset: CodeOffset,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MachSectionSize {
|
||||||
|
/// Create a new size-counting dummy section.
|
||||||
|
pub fn new(start_offset: CodeOffset) -> MachSectionSize {
|
||||||
|
MachSectionSize {
|
||||||
|
start_offset,
|
||||||
|
offset: start_offset,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the size this section would take if emitted with a real sink.
|
||||||
|
pub fn size(&self) -> CodeOffset {
|
||||||
|
self.offset - self.start_offset
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MachSectionOutput for MachSectionSize {
|
||||||
|
fn cur_offset_from_start(&self) -> CodeOffset {
|
||||||
|
// All size-counting sections conceptually start at offset 0; this doesn't
|
||||||
|
// matter when counting code size.
|
||||||
|
self.offset
|
||||||
|
}
|
||||||
|
|
||||||
|
fn start_offset(&self) -> CodeOffset {
|
||||||
|
self.start_offset
|
||||||
|
}
|
||||||
|
|
||||||
|
fn put1(&mut self, _: u8) {
|
||||||
|
self.offset += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn put_data(&mut self, data: &[u8]) {
|
||||||
|
self.offset += data.len() as CodeOffset;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add_reloc(&mut self, _: SourceLoc, _: Reloc, _: &ExternalName, _: Addend) {}
|
||||||
|
|
||||||
|
fn add_trap(&mut self, _: SourceLoc, _: TrapCode) {}
|
||||||
|
|
||||||
|
fn add_call_site(&mut self, _: SourceLoc, _: Opcode) {}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A relocation resulting from a compilation.
|
||||||
|
pub struct MachReloc {
|
||||||
|
/// The offset at which the relocation applies, *relative to the
|
||||||
|
/// containing section*.
|
||||||
|
pub offset: CodeOffset,
|
||||||
|
/// The original source location.
|
||||||
|
pub srcloc: SourceLoc,
|
||||||
|
/// The kind of relocation.
|
||||||
|
pub kind: Reloc,
|
||||||
|
/// The external symbol / name to which this relocation refers.
|
||||||
|
pub name: ExternalName,
|
||||||
|
/// The addend to add to the symbol value.
|
||||||
|
pub addend: i64,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A trap record resulting from a compilation.
|
||||||
|
pub struct MachTrap {
|
||||||
|
/// The offset at which the trap instruction occurs, *relative to the
|
||||||
|
/// containing section*.
|
||||||
|
pub offset: CodeOffset,
|
||||||
|
/// The original source location.
|
||||||
|
pub srcloc: SourceLoc,
|
||||||
|
/// The trap code.
|
||||||
|
pub code: TrapCode,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A call site record resulting from a compilation.
|
||||||
|
pub struct MachCallSite {
|
||||||
|
/// The offset of the call's return address, *relative to the containing section*.
|
||||||
|
pub ret_addr: CodeOffset,
|
||||||
|
/// The original source location.
|
||||||
|
pub srcloc: SourceLoc,
|
||||||
|
/// The call's opcode.
|
||||||
|
pub opcode: Opcode,
|
||||||
|
}
|
||||||
738
cranelift/codegen/src/machinst/vcode.rs
Normal file
738
cranelift/codegen/src/machinst/vcode.rs
Normal file
@@ -0,0 +1,738 @@
|
|||||||
|
//! This implements the VCode container: a CFG of Insts that have been lowered.
|
||||||
|
//!
|
||||||
|
//! VCode is virtual-register code. An instruction in VCode is almost a machine
|
||||||
|
//! instruction; however, its register slots can refer to virtual registers in
|
||||||
|
//! addition to real machine registers.
|
||||||
|
//!
|
||||||
|
//! VCode is structured with traditional basic blocks, and
|
||||||
|
//! each block must be terminated by an unconditional branch (one target), a
|
||||||
|
//! conditional branch (two targets), or a return (no targets). Note that this
|
||||||
|
//! slightly differs from the machine code of most ISAs: in most ISAs, a
|
||||||
|
//! conditional branch has one target (and the not-taken case falls through).
|
||||||
|
//! However, we expect that machine backends will elide branches to the following
|
||||||
|
//! block (i.e., zero-offset jumps), and will be able to codegen a branch-cond /
|
||||||
|
//! branch-uncond pair if *both* targets are not fallthrough. This allows us to
|
||||||
|
//! play with layout prior to final binary emission, as well, if we want.
|
||||||
|
//!
|
||||||
|
//! See the main module comment in `mod.rs` for more details on the VCode-based
|
||||||
|
//! backend pipeline.
|
||||||
|
|
||||||
|
use crate::binemit::Reloc;
|
||||||
|
use crate::ir;
|
||||||
|
use crate::machinst::*;
|
||||||
|
use crate::settings;
|
||||||
|
|
||||||
|
use regalloc::Function as RegallocFunction;
|
||||||
|
use regalloc::Set as RegallocSet;
|
||||||
|
use regalloc::{BlockIx, InstIx, Range, RegAllocResult, RegClass, RegUsageCollector};
|
||||||
|
|
||||||
|
use alloc::boxed::Box;
|
||||||
|
use alloc::vec::Vec;
|
||||||
|
use log::debug;
|
||||||
|
use smallvec::SmallVec;
|
||||||
|
use std::fmt;
|
||||||
|
use std::iter;
|
||||||
|
use std::ops::Index;
|
||||||
|
use std::string::String;
|
||||||
|
|
||||||
|
/// Index referring to an instruction in VCode.
|
||||||
|
pub type InsnIndex = u32;
|
||||||
|
/// Index referring to a basic block in VCode.
|
||||||
|
pub type BlockIndex = u32;
|
||||||
|
|
||||||
|
/// VCodeInst wraps all requirements for a MachInst to be in VCode: it must be
|
||||||
|
/// a `MachInst` and it must be able to emit itself at least to a `SizeCodeSink`.
|
||||||
|
pub trait VCodeInst: MachInst + MachInstEmit<MachSection> + MachInstEmit<MachSectionSize> {}
|
||||||
|
impl<I: MachInst + MachInstEmit<MachSection> + MachInstEmit<MachSectionSize>> VCodeInst for I {}
|
||||||
|
|
||||||
|
/// A function in "VCode" (virtualized-register code) form, after lowering.
|
||||||
|
/// This is essentially a standard CFG of basic blocks, where each basic block
|
||||||
|
/// consists of lowered instructions produced by the machine-specific backend.
|
||||||
|
pub struct VCode<I: VCodeInst> {
|
||||||
|
/// Function liveins.
|
||||||
|
liveins: RegallocSet<RealReg>,
|
||||||
|
|
||||||
|
/// Function liveouts.
|
||||||
|
liveouts: RegallocSet<RealReg>,
|
||||||
|
|
||||||
|
/// VReg IR-level types.
|
||||||
|
vreg_types: Vec<Type>,
|
||||||
|
|
||||||
|
/// Lowered machine instructions in order corresponding to the original IR.
|
||||||
|
pub insts: Vec<I>,
|
||||||
|
|
||||||
|
/// Entry block.
|
||||||
|
entry: BlockIndex,
|
||||||
|
|
||||||
|
/// Block instruction indices.
|
||||||
|
pub block_ranges: Vec<(InsnIndex, InsnIndex)>,
|
||||||
|
|
||||||
|
/// Block successors: index range in the successor-list below.
|
||||||
|
block_succ_range: Vec<(usize, usize)>,
|
||||||
|
|
||||||
|
/// Block successor lists, concatenated into one Vec. The `block_succ_range`
|
||||||
|
/// list of tuples above gives (start, end) ranges within this list that
|
||||||
|
/// correspond to each basic block's successors.
|
||||||
|
block_succs: Vec<BlockIndex>,
|
||||||
|
|
||||||
|
/// Block indices by IR block.
|
||||||
|
block_by_bb: SecondaryMap<ir::Block, BlockIndex>,
|
||||||
|
|
||||||
|
/// IR block for each VCode Block. The length of this Vec will likely be
|
||||||
|
/// less than the total number of Blocks, because new Blocks (for edge
|
||||||
|
/// splits, for example) are appended during lowering.
|
||||||
|
bb_by_block: Vec<ir::Block>,
|
||||||
|
|
||||||
|
/// Order of block IDs in final generated code.
|
||||||
|
final_block_order: Vec<BlockIndex>,
|
||||||
|
|
||||||
|
/// Final block offsets. Computed during branch finalization and used
|
||||||
|
/// during emission.
|
||||||
|
final_block_offsets: Vec<CodeOffset>,
|
||||||
|
|
||||||
|
/// Size of code, accounting for block layout / alignment.
|
||||||
|
code_size: CodeOffset,
|
||||||
|
|
||||||
|
/// ABI object.
|
||||||
|
abi: Box<dyn ABIBody<I>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A builder for a VCode function body. This builder is designed for the
|
||||||
|
/// lowering approach that we take: we traverse basic blocks in forward
|
||||||
|
/// (original IR) order, but within each basic block, we generate code from
|
||||||
|
/// bottom to top; and within each IR instruction that we visit in this reverse
|
||||||
|
/// order, we emit machine instructions in *forward* order again.
|
||||||
|
///
|
||||||
|
/// Hence, to produce the final instructions in proper order, we perform two
|
||||||
|
/// swaps. First, the machine instructions (`I` instances) are produced in
|
||||||
|
/// forward order for an individual IR instruction. Then these are *reversed*
|
||||||
|
/// and concatenated to `bb_insns` at the end of the IR instruction lowering.
|
||||||
|
/// The `bb_insns` vec will thus contain all machine instructions for a basic
|
||||||
|
/// block, in reverse order. Finally, when we're done with a basic block, we
|
||||||
|
/// reverse the whole block's vec of instructions again, and concatenate onto
|
||||||
|
/// the VCode's insts.
|
||||||
|
pub struct VCodeBuilder<I: VCodeInst> {
|
||||||
|
/// In-progress VCode.
|
||||||
|
vcode: VCode<I>,
|
||||||
|
|
||||||
|
/// Current basic block instructions, in reverse order (because blocks are
|
||||||
|
/// built bottom-to-top).
|
||||||
|
bb_insns: SmallVec<[I; 32]>,
|
||||||
|
|
||||||
|
/// Current IR-inst instructions, in forward order.
|
||||||
|
ir_inst_insns: SmallVec<[I; 4]>,
|
||||||
|
|
||||||
|
/// Start of succs for the current block in the concatenated succs list.
|
||||||
|
succ_start: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<I: VCodeInst> VCodeBuilder<I> {
|
||||||
|
/// Create a new VCodeBuilder.
|
||||||
|
pub fn new(abi: Box<dyn ABIBody<I>>) -> VCodeBuilder<I> {
|
||||||
|
let vcode = VCode::new(abi);
|
||||||
|
VCodeBuilder {
|
||||||
|
vcode,
|
||||||
|
bb_insns: SmallVec::new(),
|
||||||
|
ir_inst_insns: SmallVec::new(),
|
||||||
|
succ_start: 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Access the ABI object.
|
||||||
|
pub fn abi(&mut self) -> &mut dyn ABIBody<I> {
|
||||||
|
&mut *self.vcode.abi
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Set the type of a VReg.
|
||||||
|
pub fn set_vreg_type(&mut self, vreg: VirtualReg, ty: Type) {
|
||||||
|
while self.vcode.vreg_types.len() <= vreg.get_index() {
|
||||||
|
self.vcode.vreg_types.push(ir::types::I8); // Default type.
|
||||||
|
}
|
||||||
|
self.vcode.vreg_types[vreg.get_index()] = ty;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the underlying bb-to-BlockIndex map.
|
||||||
|
pub fn blocks_by_bb(&self) -> &SecondaryMap<ir::Block, BlockIndex> {
|
||||||
|
&self.vcode.block_by_bb
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Initialize the bb-to-BlockIndex map. Returns the first free
|
||||||
|
/// BlockIndex.
|
||||||
|
pub fn init_bb_map(&mut self, blocks: &[ir::Block]) -> BlockIndex {
|
||||||
|
let mut bindex: BlockIndex = 0;
|
||||||
|
for bb in blocks.iter() {
|
||||||
|
self.vcode.block_by_bb[*bb] = bindex;
|
||||||
|
self.vcode.bb_by_block.push(*bb);
|
||||||
|
bindex += 1;
|
||||||
|
}
|
||||||
|
bindex
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the BlockIndex for an IR block.
|
||||||
|
pub fn bb_to_bindex(&self, bb: ir::Block) -> BlockIndex {
|
||||||
|
self.vcode.block_by_bb[bb]
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Set the current block as the entry block.
|
||||||
|
pub fn set_entry(&mut self, block: BlockIndex) {
|
||||||
|
self.vcode.entry = block;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// End the current IR instruction. Must be called after pushing any
|
||||||
|
/// instructions and prior to ending the basic block.
|
||||||
|
pub fn end_ir_inst(&mut self) {
|
||||||
|
while let Some(i) = self.ir_inst_insns.pop() {
|
||||||
|
self.bb_insns.push(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// End the current basic block. Must be called after emitting vcode insts
|
||||||
|
/// for IR insts and prior to ending the function (building the VCode).
|
||||||
|
pub fn end_bb(&mut self) -> BlockIndex {
|
||||||
|
assert!(self.ir_inst_insns.is_empty());
|
||||||
|
let block_num = self.vcode.block_ranges.len() as BlockIndex;
|
||||||
|
// Push the instructions.
|
||||||
|
let start_idx = self.vcode.insts.len() as InsnIndex;
|
||||||
|
while let Some(i) = self.bb_insns.pop() {
|
||||||
|
self.vcode.insts.push(i);
|
||||||
|
}
|
||||||
|
let end_idx = self.vcode.insts.len() as InsnIndex;
|
||||||
|
// Add the instruction index range to the list of blocks.
|
||||||
|
self.vcode.block_ranges.push((start_idx, end_idx));
|
||||||
|
// End the successors list.
|
||||||
|
let succ_end = self.vcode.block_succs.len();
|
||||||
|
self.vcode
|
||||||
|
.block_succ_range
|
||||||
|
.push((self.succ_start, succ_end));
|
||||||
|
self.succ_start = succ_end;
|
||||||
|
|
||||||
|
block_num
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Push an instruction for the current BB and current IR inst within the BB.
|
||||||
|
pub fn push(&mut self, insn: I) {
|
||||||
|
match insn.is_term() {
|
||||||
|
MachTerminator::None | MachTerminator::Ret => {}
|
||||||
|
MachTerminator::Uncond(target) => {
|
||||||
|
self.vcode.block_succs.push(target);
|
||||||
|
}
|
||||||
|
MachTerminator::Cond(true_branch, false_branch) => {
|
||||||
|
self.vcode.block_succs.push(true_branch);
|
||||||
|
self.vcode.block_succs.push(false_branch);
|
||||||
|
}
|
||||||
|
MachTerminator::Indirect(targets) => {
|
||||||
|
for target in targets {
|
||||||
|
self.vcode.block_succs.push(*target);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
self.ir_inst_insns.push(insn);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Build the final VCode.
|
||||||
|
pub fn build(self) -> VCode<I> {
|
||||||
|
assert!(self.ir_inst_insns.is_empty());
|
||||||
|
assert!(self.bb_insns.is_empty());
|
||||||
|
self.vcode
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn block_ranges(indices: &[InstIx], len: usize) -> Vec<(usize, usize)> {
|
||||||
|
let v = indices
|
||||||
|
.iter()
|
||||||
|
.map(|iix| iix.get() as usize)
|
||||||
|
.chain(iter::once(len))
|
||||||
|
.collect::<Vec<usize>>();
|
||||||
|
v.windows(2).map(|p| (p[0], p[1])).collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_redundant_move<I: VCodeInst>(insn: &I) -> bool {
|
||||||
|
if let Some((to, from)) = insn.is_move() {
|
||||||
|
to.to_reg() == from
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_trivial_jump_block<I: VCodeInst>(vcode: &VCode<I>, block: BlockIndex) -> Option<BlockIndex> {
|
||||||
|
let range = vcode.block_insns(BlockIx::new(block));
|
||||||
|
|
||||||
|
debug!(
|
||||||
|
"is_trivial_jump_block: block {} has len {}",
|
||||||
|
block,
|
||||||
|
range.len()
|
||||||
|
);
|
||||||
|
|
||||||
|
if range.len() != 1 {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
let insn = range.first();
|
||||||
|
|
||||||
|
debug!(
|
||||||
|
" -> only insn is: {:?} with terminator {:?}",
|
||||||
|
vcode.get_insn(insn),
|
||||||
|
vcode.get_insn(insn).is_term()
|
||||||
|
);
|
||||||
|
|
||||||
|
match vcode.get_insn(insn).is_term() {
|
||||||
|
MachTerminator::Uncond(target) => Some(target),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<I: VCodeInst> VCode<I> {
|
||||||
|
/// New empty VCode.
|
||||||
|
fn new(abi: Box<dyn ABIBody<I>>) -> VCode<I> {
|
||||||
|
VCode {
|
||||||
|
liveins: abi.liveins(),
|
||||||
|
liveouts: abi.liveouts(),
|
||||||
|
vreg_types: vec![],
|
||||||
|
insts: vec![],
|
||||||
|
entry: 0,
|
||||||
|
block_ranges: vec![],
|
||||||
|
block_succ_range: vec![],
|
||||||
|
block_succs: vec![],
|
||||||
|
block_by_bb: SecondaryMap::with_default(0),
|
||||||
|
bb_by_block: vec![],
|
||||||
|
final_block_order: vec![],
|
||||||
|
final_block_offsets: vec![],
|
||||||
|
code_size: 0,
|
||||||
|
abi,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the IR-level type of a VReg.
|
||||||
|
pub fn vreg_type(&self, vreg: VirtualReg) -> Type {
|
||||||
|
self.vreg_types[vreg.get_index()]
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the entry block.
|
||||||
|
pub fn entry(&self) -> BlockIndex {
|
||||||
|
self.entry
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the number of blocks. Block indices will be in the range `0 ..
|
||||||
|
/// (self.num_blocks() - 1)`.
|
||||||
|
pub fn num_blocks(&self) -> usize {
|
||||||
|
self.block_ranges.len()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Stack frame size for the full function's body.
|
||||||
|
pub fn frame_size(&self) -> u32 {
|
||||||
|
self.abi.frame_size()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the successors for a block.
|
||||||
|
pub fn succs(&self, block: BlockIndex) -> &[BlockIndex] {
|
||||||
|
let (start, end) = self.block_succ_range[block as usize];
|
||||||
|
&self.block_succs[start..end]
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Take the results of register allocation, with a sequence of
|
||||||
|
/// instructions including spliced fill/reload/move instructions, and replace
|
||||||
|
/// the VCode with them.
|
||||||
|
pub fn replace_insns_from_regalloc(
|
||||||
|
&mut self,
|
||||||
|
result: RegAllocResult<Self>,
|
||||||
|
flags: &settings::Flags,
|
||||||
|
) {
|
||||||
|
self.final_block_order = compute_final_block_order(self);
|
||||||
|
|
||||||
|
// Record the spillslot count and clobbered registers for the ABI/stack
|
||||||
|
// setup code.
|
||||||
|
self.abi.set_num_spillslots(result.num_spill_slots as usize);
|
||||||
|
self.abi
|
||||||
|
.set_clobbered(result.clobbered_registers.map(|r| Writable::from_reg(*r)));
|
||||||
|
|
||||||
|
// We want to move instructions over in final block order, using the new
|
||||||
|
// block-start map given by the regalloc.
|
||||||
|
let block_ranges: Vec<(usize, usize)> =
|
||||||
|
block_ranges(result.target_map.elems(), result.insns.len());
|
||||||
|
let mut final_insns = vec![];
|
||||||
|
let mut final_block_ranges = vec![(0, 0); self.num_blocks()];
|
||||||
|
|
||||||
|
for block in &self.final_block_order {
|
||||||
|
let (start, end) = block_ranges[*block as usize];
|
||||||
|
let final_start = final_insns.len() as InsnIndex;
|
||||||
|
|
||||||
|
if *block == self.entry {
|
||||||
|
// Start with the prologue.
|
||||||
|
final_insns.extend(self.abi.gen_prologue(flags).into_iter());
|
||||||
|
}
|
||||||
|
|
||||||
|
for i in start..end {
|
||||||
|
let insn = &result.insns[i];
|
||||||
|
|
||||||
|
// Elide redundant moves at this point (we only know what is
|
||||||
|
// redundant once registers are allocated).
|
||||||
|
if is_redundant_move(insn) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Whenever encountering a return instruction, replace it
|
||||||
|
// with the epilogue.
|
||||||
|
let is_ret = insn.is_term() == MachTerminator::Ret;
|
||||||
|
if is_ret {
|
||||||
|
final_insns.extend(self.abi.gen_epilogue(flags).into_iter());
|
||||||
|
} else {
|
||||||
|
final_insns.push(insn.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let final_end = final_insns.len() as InsnIndex;
|
||||||
|
final_block_ranges[*block as usize] = (final_start, final_end);
|
||||||
|
}
|
||||||
|
|
||||||
|
self.insts = final_insns;
|
||||||
|
self.block_ranges = final_block_ranges;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Removes redundant branches, rewriting targets to point directly to the
|
||||||
|
/// ultimate block at the end of a chain of trivial one-target jumps.
|
||||||
|
pub fn remove_redundant_branches(&mut self) {
|
||||||
|
// For each block, compute the actual target block, looking through up to one
|
||||||
|
// block with single-target jumps (this will remove empty edge blocks inserted
|
||||||
|
// by phi-lowering).
|
||||||
|
let block_rewrites: Vec<BlockIndex> = (0..self.num_blocks() as u32)
|
||||||
|
.map(|bix| is_trivial_jump_block(self, bix).unwrap_or(bix))
|
||||||
|
.collect();
|
||||||
|
let mut refcounts: Vec<usize> = vec![0; self.num_blocks()];
|
||||||
|
|
||||||
|
debug!(
|
||||||
|
"remove_redundant_branches: block_rewrites = {:?}",
|
||||||
|
block_rewrites
|
||||||
|
);
|
||||||
|
|
||||||
|
refcounts[self.entry as usize] = 1;
|
||||||
|
|
||||||
|
for block in 0..self.num_blocks() as u32 {
|
||||||
|
for insn in self.block_insns(BlockIx::new(block)) {
|
||||||
|
self.get_insn_mut(insn)
|
||||||
|
.with_block_rewrites(&block_rewrites[..]);
|
||||||
|
match self.get_insn(insn).is_term() {
|
||||||
|
MachTerminator::Uncond(bix) => {
|
||||||
|
refcounts[bix as usize] += 1;
|
||||||
|
}
|
||||||
|
MachTerminator::Cond(bix1, bix2) => {
|
||||||
|
refcounts[bix1 as usize] += 1;
|
||||||
|
refcounts[bix2 as usize] += 1;
|
||||||
|
}
|
||||||
|
MachTerminator::Indirect(blocks) => {
|
||||||
|
for block in blocks {
|
||||||
|
refcounts[*block as usize] += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let deleted: Vec<bool> = refcounts.iter().map(|r| *r == 0).collect();
|
||||||
|
|
||||||
|
let block_order = std::mem::replace(&mut self.final_block_order, vec![]);
|
||||||
|
self.final_block_order = block_order
|
||||||
|
.into_iter()
|
||||||
|
.filter(|b| !deleted[*b as usize])
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
// Rewrite successor information based on the block-rewrite map.
|
||||||
|
for succ in &mut self.block_succs {
|
||||||
|
let new_succ = block_rewrites[*succ as usize];
|
||||||
|
*succ = new_succ;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Mutate branch instructions to (i) lower two-way condbrs to one-way,
|
||||||
|
/// depending on fallthrough; and (ii) use concrete offsets.
|
||||||
|
pub fn finalize_branches(&mut self)
|
||||||
|
where
|
||||||
|
I: MachInstEmit<MachSectionSize>,
|
||||||
|
{
|
||||||
|
// Compute fallthrough block, indexed by block.
|
||||||
|
let num_final_blocks = self.final_block_order.len();
|
||||||
|
let mut block_fallthrough: Vec<Option<BlockIndex>> = vec![None; self.num_blocks()];
|
||||||
|
for i in 0..(num_final_blocks - 1) {
|
||||||
|
let from = self.final_block_order[i];
|
||||||
|
let to = self.final_block_order[i + 1];
|
||||||
|
block_fallthrough[from as usize] = Some(to);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pass over VCode instructions and finalize two-way branches into
|
||||||
|
// one-way branches with fallthrough.
|
||||||
|
for block in 0..self.num_blocks() {
|
||||||
|
let next_block = block_fallthrough[block];
|
||||||
|
let (start, end) = self.block_ranges[block];
|
||||||
|
|
||||||
|
for iix in start..end {
|
||||||
|
let insn = &mut self.insts[iix as usize];
|
||||||
|
insn.with_fallthrough_block(next_block);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute block offsets.
|
||||||
|
let mut code_section = MachSectionSize::new(0);
|
||||||
|
let mut block_offsets = vec![0; self.num_blocks()];
|
||||||
|
for block in &self.final_block_order {
|
||||||
|
code_section.offset = I::align_basic_block(code_section.offset);
|
||||||
|
block_offsets[*block as usize] = code_section.offset;
|
||||||
|
let (start, end) = self.block_ranges[*block as usize];
|
||||||
|
for iix in start..end {
|
||||||
|
self.insts[iix as usize].emit(&mut code_section);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// We now have the section layout.
|
||||||
|
self.final_block_offsets = block_offsets;
|
||||||
|
self.code_size = code_section.size();
|
||||||
|
|
||||||
|
// Update branches with known block offsets. This looks like the
|
||||||
|
// traversal above, but (i) does not update block_offsets, rather uses
|
||||||
|
// it (so forward references are now possible), and (ii) mutates the
|
||||||
|
// instructions.
|
||||||
|
let mut code_section = MachSectionSize::new(0);
|
||||||
|
for block in &self.final_block_order {
|
||||||
|
code_section.offset = I::align_basic_block(code_section.offset);
|
||||||
|
let (start, end) = self.block_ranges[*block as usize];
|
||||||
|
for iix in start..end {
|
||||||
|
self.insts[iix as usize]
|
||||||
|
.with_block_offsets(code_section.offset, &self.final_block_offsets[..]);
|
||||||
|
self.insts[iix as usize].emit(&mut code_section);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Emit the instructions to a list of sections.
|
||||||
|
pub fn emit(&self) -> MachSections
|
||||||
|
where
|
||||||
|
I: MachInstEmit<MachSection>,
|
||||||
|
{
|
||||||
|
let mut sections = MachSections::new();
|
||||||
|
let code_idx = sections.add_section(0, self.code_size);
|
||||||
|
let code_section = sections.get_section(code_idx);
|
||||||
|
|
||||||
|
for block in &self.final_block_order {
|
||||||
|
let new_offset = I::align_basic_block(code_section.cur_offset_from_start());
|
||||||
|
while new_offset > code_section.cur_offset_from_start() {
|
||||||
|
// Pad with NOPs up to the aligned block offset.
|
||||||
|
let nop = I::gen_nop((new_offset - code_section.cur_offset_from_start()) as usize);
|
||||||
|
nop.emit(code_section);
|
||||||
|
}
|
||||||
|
assert_eq!(code_section.cur_offset_from_start(), new_offset);
|
||||||
|
|
||||||
|
let (start, end) = self.block_ranges[*block as usize];
|
||||||
|
for iix in start..end {
|
||||||
|
self.insts[iix as usize].emit(code_section);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sections
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the IR block for a BlockIndex, if one exists.
|
||||||
|
pub fn bindex_to_bb(&self, block: BlockIndex) -> Option<ir::Block> {
|
||||||
|
if (block as usize) < self.bb_by_block.len() {
|
||||||
|
Some(self.bb_by_block[block as usize])
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<I: VCodeInst> RegallocFunction for VCode<I> {
|
||||||
|
type Inst = I;
|
||||||
|
|
||||||
|
fn insns(&self) -> &[I] {
|
||||||
|
&self.insts[..]
|
||||||
|
}
|
||||||
|
|
||||||
|
fn insns_mut(&mut self) -> &mut [I] {
|
||||||
|
&mut self.insts[..]
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_insn(&self, insn: InstIx) -> &I {
|
||||||
|
&self.insts[insn.get() as usize]
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_insn_mut(&mut self, insn: InstIx) -> &mut I {
|
||||||
|
&mut self.insts[insn.get() as usize]
|
||||||
|
}
|
||||||
|
|
||||||
|
fn blocks(&self) -> Range<BlockIx> {
|
||||||
|
Range::new(BlockIx::new(0), self.block_ranges.len())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn entry_block(&self) -> BlockIx {
|
||||||
|
BlockIx::new(self.entry)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn block_insns(&self, block: BlockIx) -> Range<InstIx> {
|
||||||
|
let (start, end) = self.block_ranges[block.get() as usize];
|
||||||
|
Range::new(InstIx::new(start), (end - start) as usize)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn block_succs(&self, block: BlockIx) -> Vec<BlockIx> {
|
||||||
|
let (start, end) = self.block_succ_range[block.get() as usize];
|
||||||
|
self.block_succs[start..end]
|
||||||
|
.iter()
|
||||||
|
.cloned()
|
||||||
|
.map(BlockIx::new)
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_ret(&self, insn: InstIx) -> bool {
|
||||||
|
match self.insts[insn.get() as usize].is_term() {
|
||||||
|
MachTerminator::Ret => true,
|
||||||
|
_ => false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_regs(insn: &I, collector: &mut RegUsageCollector) {
|
||||||
|
insn.get_regs(collector)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn map_regs(
|
||||||
|
insn: &mut I,
|
||||||
|
pre_map: &RegallocMap<VirtualReg, RealReg>,
|
||||||
|
post_map: &RegallocMap<VirtualReg, RealReg>,
|
||||||
|
) {
|
||||||
|
insn.map_regs(pre_map, post_map);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_move(&self, insn: &I) -> Option<(Writable<Reg>, Reg)> {
|
||||||
|
insn.is_move()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_spillslot_size(&self, regclass: RegClass, vreg: VirtualReg) -> u32 {
|
||||||
|
let ty = self.vreg_type(vreg);
|
||||||
|
self.abi.get_spillslot_size(regclass, ty)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, vreg: VirtualReg) -> I {
|
||||||
|
let ty = self.vreg_type(vreg);
|
||||||
|
self.abi.gen_spill(to_slot, from_reg, ty)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn gen_reload(&self, to_reg: Writable<RealReg>, from_slot: SpillSlot, vreg: VirtualReg) -> I {
|
||||||
|
let ty = self.vreg_type(vreg);
|
||||||
|
self.abi.gen_reload(to_reg, from_slot, ty)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn gen_move(&self, to_reg: Writable<RealReg>, from_reg: RealReg, vreg: VirtualReg) -> I {
|
||||||
|
let ty = self.vreg_type(vreg);
|
||||||
|
I::gen_move(to_reg.map(|r| r.to_reg()), from_reg.to_reg(), ty)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn gen_zero_len_nop(&self) -> I {
|
||||||
|
I::gen_zero_len_nop()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn maybe_direct_reload(&self, insn: &I, reg: VirtualReg, slot: SpillSlot) -> Option<I> {
|
||||||
|
insn.maybe_direct_reload(reg, slot)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn func_liveins(&self) -> RegallocSet<RealReg> {
|
||||||
|
self.liveins.clone()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn func_liveouts(&self) -> RegallocSet<RealReg> {
|
||||||
|
self.liveouts.clone()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// N.B.: Debug impl assumes that VCode has already been through all compilation
|
||||||
|
// passes, and so has a final block order and offsets.
|
||||||
|
|
||||||
|
impl<I: VCodeInst> fmt::Debug for VCode<I> {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
writeln!(f, "VCode_Debug {{")?;
|
||||||
|
writeln!(f, " Entry block: {}", self.entry)?;
|
||||||
|
writeln!(f, " Final block order: {:?}", self.final_block_order)?;
|
||||||
|
|
||||||
|
for block in 0..self.num_blocks() {
|
||||||
|
writeln!(f, "Block {}:", block,)?;
|
||||||
|
for succ in self.succs(block as BlockIndex) {
|
||||||
|
writeln!(f, " (successor: Block {})", succ)?;
|
||||||
|
}
|
||||||
|
let (start, end) = self.block_ranges[block];
|
||||||
|
writeln!(f, " (instruction range: {} .. {})", start, end)?;
|
||||||
|
for inst in start..end {
|
||||||
|
writeln!(f, " Inst {}: {:?}", inst, self.insts[inst as usize])?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
writeln!(f, "}}")?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pretty-printing with `RealRegUniverse` context.
|
||||||
|
impl<I: VCodeInst + ShowWithRRU> ShowWithRRU for VCode<I> {
|
||||||
|
fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String {
|
||||||
|
use crate::alloc::string::ToString;
|
||||||
|
use std::fmt::Write;
|
||||||
|
|
||||||
|
// Calculate an order in which to display the blocks. This is the same
|
||||||
|
// as final_block_order, but also includes blocks which are in the
|
||||||
|
// representation but not in final_block_order.
|
||||||
|
let mut display_order = Vec::<usize>::new();
|
||||||
|
// First display blocks in |final_block_order|
|
||||||
|
for bix in &self.final_block_order {
|
||||||
|
assert!((*bix as usize) < self.num_blocks());
|
||||||
|
display_order.push(*bix as usize);
|
||||||
|
}
|
||||||
|
// Now also take care of those not listed in |final_block_order|.
|
||||||
|
// This is quadratic, but it's also debug-only code.
|
||||||
|
for bix in 0..self.num_blocks() {
|
||||||
|
if display_order.contains(&bix) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
display_order.push(bix);
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut s = String::new();
|
||||||
|
s = s + &format!("VCode_ShowWithRRU {{{{");
|
||||||
|
s = s + &"\n".to_string();
|
||||||
|
s = s + &format!(" Entry block: {}", self.entry);
|
||||||
|
s = s + &"\n".to_string();
|
||||||
|
s = s + &format!(" Final block order: {:?}", self.final_block_order);
|
||||||
|
s = s + &"\n".to_string();
|
||||||
|
|
||||||
|
for i in 0..self.num_blocks() {
|
||||||
|
let block = display_order[i];
|
||||||
|
|
||||||
|
let omitted =
|
||||||
|
(if !self.final_block_order.is_empty() && i >= self.final_block_order.len() {
|
||||||
|
"** OMITTED **"
|
||||||
|
} else {
|
||||||
|
""
|
||||||
|
})
|
||||||
|
.to_string();
|
||||||
|
|
||||||
|
s = s + &format!("Block {}: {}", block, omitted);
|
||||||
|
s = s + &"\n".to_string();
|
||||||
|
if let Some(bb) = self.bindex_to_bb(block as BlockIndex) {
|
||||||
|
s = s + &format!(" (original IR block: {})\n", bb);
|
||||||
|
}
|
||||||
|
for succ in self.succs(block as BlockIndex) {
|
||||||
|
s = s + &format!(" (successor: Block {})", succ);
|
||||||
|
s = s + &"\n".to_string();
|
||||||
|
}
|
||||||
|
let (start, end) = self.block_ranges[block];
|
||||||
|
s = s + &format!(" (instruction range: {} .. {})", start, end);
|
||||||
|
s = s + &"\n".to_string();
|
||||||
|
for inst in start..end {
|
||||||
|
s = s + &format!(
|
||||||
|
" Inst {}: {}",
|
||||||
|
inst,
|
||||||
|
self.insts[inst as usize].show_rru(mb_rru)
|
||||||
|
);
|
||||||
|
s = s + &"\n".to_string();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
s = s + &format!("}}}}");
|
||||||
|
s = s + &"\n".to_string();
|
||||||
|
|
||||||
|
s
|
||||||
|
}
|
||||||
|
}
|
||||||
68
cranelift/codegen/src/num_uses.rs
Normal file
68
cranelift/codegen/src/num_uses.rs
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
//! A pass that computes the number of uses of any given instruction.
|
||||||
|
|
||||||
|
#![allow(dead_code)]
|
||||||
|
#![allow(unused_imports)]
|
||||||
|
|
||||||
|
use crate::cursor::{Cursor, FuncCursor};
|
||||||
|
use crate::dce::has_side_effect;
|
||||||
|
use crate::entity::SecondaryMap;
|
||||||
|
use crate::ir::dfg::ValueDef;
|
||||||
|
use crate::ir::instructions::InstructionData;
|
||||||
|
use crate::ir::Value;
|
||||||
|
use crate::ir::{DataFlowGraph, Function, Inst, Opcode};
|
||||||
|
|
||||||
|
/// Auxiliary data structure that counts the number of uses of any given
|
||||||
|
/// instruction in a Function. This is used during instruction selection
|
||||||
|
/// to essentially do incremental DCE: when an instruction is no longer
|
||||||
|
/// needed because its computation has been isel'd into another machine
|
||||||
|
/// instruction at every use site, we can skip it.
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct NumUses {
|
||||||
|
uses: SecondaryMap<Inst, u32>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl NumUses {
|
||||||
|
fn new() -> NumUses {
|
||||||
|
NumUses {
|
||||||
|
uses: SecondaryMap::with_default(0),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Compute the NumUses analysis result for a function.
|
||||||
|
pub fn compute(func: &Function) -> NumUses {
|
||||||
|
let mut uses = NumUses::new();
|
||||||
|
for bb in func.layout.blocks() {
|
||||||
|
for inst in func.layout.block_insts(bb) {
|
||||||
|
for arg in func.dfg.inst_args(inst) {
|
||||||
|
let v = func.dfg.resolve_aliases(*arg);
|
||||||
|
uses.add_value(&func.dfg, v);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
uses
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add_value(&mut self, dfg: &DataFlowGraph, v: Value) {
|
||||||
|
match dfg.value_def(v) {
|
||||||
|
ValueDef::Result(inst, _) => {
|
||||||
|
self.uses[inst] += 1;
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// How many times is an instruction used?
|
||||||
|
pub fn use_count(&self, i: Inst) -> usize {
|
||||||
|
self.uses[i] as usize
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Is an instruction used at all?
|
||||||
|
pub fn is_used(&self, i: Inst) -> bool {
|
||||||
|
self.use_count(i) > 0
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Take the complete uses map, consuming this analysis result.
|
||||||
|
pub fn take_uses(self) -> SecondaryMap<Inst, u32> {
|
||||||
|
self.uses
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user