diff --git a/Cargo.lock b/Cargo.lock index b8c92cbc46..8d7d237b6e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -379,6 +379,7 @@ dependencies = [ "gimli", "hashbrown 0.7.1", "log", + "regalloc", "serde", "smallvec", "target-lexicon", @@ -1599,6 +1600,16 @@ dependencies = [ "rust-argon2", ] +[[package]] +name = "regalloc" +version = "0.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89ce0cd835fa6e91bbf5d010beee19d0c2e97e4ad5e13c399a31122cfc83bdd6" +dependencies = [ + "log", + "rustc-hash", +] + [[package]] name = "regex" version = "1.3.6" @@ -1663,6 +1674,12 @@ version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4c691c0e608126e00913e33f0ccf3727d5fc84573623b8d65b2df340b5201783" +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + [[package]] name = "rustc_version" version = "0.2.3" diff --git a/cranelift/codegen/Cargo.toml b/cranelift/codegen/Cargo.toml index 148fcf9327..83219d42e6 100644 --- a/cranelift/codegen/Cargo.toml +++ b/cranelift/codegen/Cargo.toml @@ -24,6 +24,7 @@ gimli = { version = "0.20.0", default-features = false, features = ["write"], op smallvec = { version = "1.0.0" } thiserror = "1.0.4" byteorder = { version = "1.3.2", default-features = false } +regalloc = "0.0.17" # It is a goal of the cranelift-codegen crate to have minimal external dependencies. # Please don't add any unless they are essential to the task of creating binary # machine code. Integration tests that need external dependencies can be @@ -33,7 +34,7 @@ byteorder = { version = "1.3.2", default-features = false } cranelift-codegen-meta = { path = "meta", version = "0.62.0" } [features] -default = ["std", "unwind"] +default = ["std", "unwind", "all-arch"] # The "std" feature enables use of libstd. The "core" feature enables use # of some minimal std-like replacement libraries. At least one of these two diff --git a/cranelift/codegen/src/isa/mod.rs b/cranelift/codegen/src/isa/mod.rs index bad6fd7e79..c94707690a 100644 --- a/cranelift/codegen/src/isa/mod.rs +++ b/cranelift/codegen/src/isa/mod.rs @@ -55,9 +55,10 @@ pub use crate::isa::stack::{StackBase, StackBaseMask, StackRef}; use crate::binemit; use crate::flowgraph; use crate::ir; -use crate::isa::enc_tables::Encodings; +pub use crate::isa::enc_tables::Encodings; #[cfg(feature = "unwind")] use crate::isa::fde::RegisterMappingError; +use crate::machinst::MachBackend; use crate::regalloc; use crate::result::CodegenResult; use crate::settings; @@ -400,6 +401,11 @@ pub trait TargetIsa: fmt::Display + Send + Sync { ) { // No-op by default } + + /// Get the new-style MachBackend, if this is an adapter around one. + fn get_mach_backend(&self) -> Option<&dyn MachBackend> { + None + } } impl Debug for &dyn TargetIsa { diff --git a/cranelift/codegen/src/lib.rs b/cranelift/codegen/src/lib.rs index 772562b916..2d6651a67e 100644 --- a/cranelift/codegen/src/lib.rs +++ b/cranelift/codegen/src/lib.rs @@ -71,6 +71,7 @@ pub mod flowgraph; pub mod ir; pub mod isa; pub mod loop_analysis; +pub mod machinst; pub mod print_errors; pub mod settings; pub mod timing; @@ -90,6 +91,7 @@ mod iterators; mod legalizer; mod licm; mod nan_canonicalization; +mod num_uses; mod partition_slice; mod postopt; mod predicates; diff --git a/cranelift/codegen/src/machinst/abi.rs b/cranelift/codegen/src/machinst/abi.rs new file mode 100644 index 0000000000..7aaa66fe14 --- /dev/null +++ b/cranelift/codegen/src/machinst/abi.rs @@ -0,0 +1,142 @@ +//! ABI definitions. + +use crate::ir; +use crate::ir::StackSlot; +use crate::machinst::*; +use crate::settings; + +use regalloc::{Reg, Set, SpillSlot, VirtualReg, Writable}; + +/// Trait implemented by an object that tracks ABI-related state (e.g., stack +/// layout) and can generate code while emitting the *body* of a function. +pub trait ABIBody { + /// Get the liveins of the function. + fn liveins(&self) -> Set; + + /// Get the liveouts of the function. + fn liveouts(&self) -> Set; + + /// Number of arguments. + fn num_args(&self) -> usize; + + /// Number of return values. + fn num_retvals(&self) -> usize; + + /// Number of stack slots (not spill slots). + fn num_stackslots(&self) -> usize; + + /// Generate an instruction which copies an argument to a destination + /// register. + fn gen_copy_arg_to_reg(&self, idx: usize, into_reg: Writable) -> I; + + /// Generate an instruction which copies a source register to a return + /// value slot. + fn gen_copy_reg_to_retval(&self, idx: usize, from_reg: Reg) -> I; + + /// Generate a return instruction. + fn gen_ret(&self) -> I; + + /// Generate an epilogue placeholder. + fn gen_epilogue_placeholder(&self) -> I; + + // ----------------------------------------------------------------- + // Every function above this line may only be called pre-regalloc. + // Every function below this line may only be called post-regalloc. + // `spillslots()` must be called before any other post-regalloc + // function. + // ---------------------------------------------------------------- + + /// Update with the number of spillslots, post-regalloc. + fn set_num_spillslots(&mut self, slots: usize); + + /// Update with the clobbered registers, post-regalloc. + fn set_clobbered(&mut self, clobbered: Set>); + + /// Load from a stackslot. + fn load_stackslot( + &self, + slot: StackSlot, + offset: usize, + ty: Type, + into_reg: Writable, + ) -> I; + + /// Store to a stackslot. + fn store_stackslot(&self, slot: StackSlot, offset: usize, ty: Type, from_reg: Reg) -> I; + + /// Load from a spillslot. + fn load_spillslot(&self, slot: SpillSlot, ty: Type, into_reg: Writable) -> I; + + /// Store to a spillslot. + fn store_spillslot(&self, slot: SpillSlot, ty: Type, from_reg: Reg) -> I; + + /// Generate a prologue, post-regalloc. This should include any stack + /// frame or other setup necessary to use the other methods (`load_arg`, + /// `store_retval`, and spillslot accesses.) |self| is mutable so that we + /// can store information in it which will be useful when creating the + /// epilogue. + fn gen_prologue(&mut self, flags: &settings::Flags) -> Vec; + + /// Generate an epilogue, post-regalloc. Note that this must generate the + /// actual return instruction (rather than emitting this in the lowering + /// logic), because the epilogue code comes before the return and the two are + /// likely closely related. + fn gen_epilogue(&self, flags: &settings::Flags) -> Vec; + + /// Returns the full frame size for the given function, after prologue emission has run. This + /// comprises the spill space, incoming argument space, alignment padding, etc. + fn frame_size(&self) -> u32; + + /// Get the spill-slot size. + fn get_spillslot_size(&self, rc: RegClass, ty: Type) -> u32; + + /// Generate a spill. + fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, ty: Type) -> I; + + /// Generate a reload (fill). + fn gen_reload(&self, to_reg: Writable, from_slot: SpillSlot, ty: Type) -> I; +} + +/// Trait implemented by an object that tracks ABI-related state and can +/// generate code while emitting a *call* to a function. +/// +/// An instance of this trait returns information for a *particular* +/// callsite. It will usually be computed from the called function's +/// signature. +/// +/// Unlike `ABIBody` above, methods on this trait are not invoked directly +/// by the machine-independent code. Rather, the machine-specific lowering +/// code will typically create an `ABICall` when creating machine instructions +/// for an IR call instruction inside `lower()`, directly emit the arg and +/// and retval copies, and attach the register use/def info to the call. +/// +/// This trait is thus provided for convenience to the backends. +pub trait ABICall { + /// Get the number of arguments expected. + fn num_args(&self) -> usize; + + /// Save the clobbered registers. + /// Copy an argument value from a source register, prior to the call. + fn gen_copy_reg_to_arg(&self, idx: usize, from_reg: Reg) -> I; + + /// Copy a return value into a destination register, after the call returns. + fn gen_copy_retval_to_reg(&self, idx: usize, into_reg: Writable) -> I; + + /// Pre-adjust the stack, prior to argument copies and call. + fn gen_stack_pre_adjust(&self) -> Vec; + + /// Post-adjust the satck, after call return and return-value copies. + fn gen_stack_post_adjust(&self) -> Vec; + + /// Generate the call itself. + /// + /// The returned instruction should have proper use- and def-sets according + /// to the argument registers, return-value registers, and clobbered + /// registers for this function signature in this ABI. + /// + /// (Arg registers are uses, and retval registers are defs. Clobbered + /// registers are also logically defs, but should never be read; their + /// values are "defined" (to the regalloc) but "undefined" in every other + /// sense.) + fn gen_call(&self) -> Vec; +} diff --git a/cranelift/codegen/src/machinst/adapter.rs b/cranelift/codegen/src/machinst/adapter.rs new file mode 100644 index 0000000000..3f7c5b7b57 --- /dev/null +++ b/cranelift/codegen/src/machinst/adapter.rs @@ -0,0 +1,123 @@ +//! Adapter for a `MachBackend` to implement the `TargetIsa` trait. + +use crate::binemit; +use crate::ir; +use crate::isa::{EncInfo, Encoding, Encodings, Legalize, RegClass, RegInfo, TargetIsa}; +use crate::machinst::*; +use crate::regalloc::{RegDiversions, RegisterSet}; +use crate::settings::Flags; + +use std::borrow::Cow; +use std::fmt; +use target_lexicon::Triple; + +/// A wrapper around a `MachBackend` that provides a `TargetIsa` impl. +pub struct TargetIsaAdapter { + backend: Box, + triple: Triple, +} + +impl TargetIsaAdapter { + /// Create a new `TargetIsa` wrapper around a `MachBackend`. + pub fn new(backend: B) -> TargetIsaAdapter { + let triple = backend.triple(); + TargetIsaAdapter { + backend: Box::new(backend), + triple, + } + } +} + +impl fmt::Display for TargetIsaAdapter { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "MachBackend") + } +} + +impl TargetIsa for TargetIsaAdapter { + fn name(&self) -> &'static str { + self.backend.name() + } + + fn triple(&self) -> &Triple { + &self.triple + } + + fn flags(&self) -> &Flags { + self.backend.flags() + } + + fn register_info(&self) -> RegInfo { + // Called from function's Display impl, so we need a stub here. + RegInfo { + banks: &[], + classes: &[], + } + } + + fn legal_encodings<'a>( + &'a self, + _func: &'a ir::Function, + _inst: &'a ir::InstructionData, + _ctrl_typevar: ir::Type, + ) -> Encodings<'a> { + panic!("Should not be called when new-style backend is available!") + } + + fn encode( + &self, + _func: &ir::Function, + _inst: &ir::InstructionData, + _ctrl_typevar: ir::Type, + ) -> Result { + panic!("Should not be called when new-style backend is available!") + } + + fn encoding_info(&self) -> EncInfo { + panic!("Should not be called when new-style backend is available!") + } + + fn legalize_signature(&self, _sig: &mut Cow, _current: bool) { + panic!("Should not be called when new-style backend is available!") + } + + fn regclass_for_abi_type(&self, _ty: ir::Type) -> RegClass { + panic!("Should not be called when new-style backend is available!") + } + + fn allocatable_registers(&self, _func: &ir::Function) -> RegisterSet { + panic!("Should not be called when new-style backend is available!") + } + + fn prologue_epilogue(&self, _func: &mut ir::Function) -> CodegenResult<()> { + panic!("Should not be called when new-style backend is available!") + } + + #[cfg(feature = "testing_hooks")] + fn emit_inst( + &self, + _func: &ir::Function, + _inst: ir::Inst, + _divert: &mut RegDiversions, + _sink: &mut dyn binemit::CodeSink, + ) { + panic!("Should not be called when new-style backend is available!") + } + + /// Emit a whole function into memory. + fn emit_function_to_memory(&self, _func: &ir::Function, _sink: &mut binemit::MemoryCodeSink) { + panic!("Should not be called when new-style backend is available!") + } + + fn get_mach_backend(&self) -> Option<&dyn MachBackend> { + Some(&*self.backend) + } + + fn unsigned_add_overflow_condition(&self) -> ir::condcodes::IntCC { + self.backend.unsigned_add_overflow_condition() + } + + fn unsigned_sub_overflow_condition(&self) -> ir::condcodes::IntCC { + self.backend.unsigned_sub_overflow_condition() + } +} diff --git a/cranelift/codegen/src/machinst/blockorder.rs b/cranelift/codegen/src/machinst/blockorder.rs new file mode 100644 index 0000000000..bfd4bf665a --- /dev/null +++ b/cranelift/codegen/src/machinst/blockorder.rs @@ -0,0 +1,59 @@ +//! Computation of basic block order in emitted code. + +use crate::machinst::*; + +/// Simple reverse postorder-based block order emission. +/// +/// TODO: use a proper algorithm, such as the bottom-up straight-line-section +/// construction algorithm. +struct BlockRPO { + visited: Vec, + postorder: Vec, + deferred_last: Option, +} + +impl BlockRPO { + fn new(vcode: &VCode) -> BlockRPO { + BlockRPO { + visited: vec![false; vcode.num_blocks()], + postorder: vec![], + deferred_last: None, + } + } + + fn visit(&mut self, vcode: &VCode, block: BlockIndex) { + self.visited[block as usize] = true; + for succ in vcode.succs(block) { + if !self.visited[*succ as usize] { + self.visit(vcode, *succ); + } + } + + let (start, end) = &vcode.block_ranges[block as usize]; + for i in *start..*end { + if vcode.insts[i as usize].is_epilogue_placeholder() { + debug_assert!(self.deferred_last.is_none()); + self.deferred_last = Some(block); + return; + } + } + + self.postorder.push(block); + } + + fn rpo(self) -> Vec { + let mut rpo = self.postorder; + rpo.reverse(); + if let Some(block) = self.deferred_last { + rpo.push(block); + } + rpo + } +} + +/// Compute the final block order. +pub fn compute_final_block_order(vcode: &VCode) -> Vec { + let mut rpo = BlockRPO::new(vcode); + rpo.visit(vcode, vcode.entry()); + rpo.rpo() +} diff --git a/cranelift/codegen/src/machinst/compile.rs b/cranelift/codegen/src/machinst/compile.rs new file mode 100644 index 0000000000..458db9ea36 --- /dev/null +++ b/cranelift/codegen/src/machinst/compile.rs @@ -0,0 +1,76 @@ +//! Compilation backend pipeline: optimized IR to VCode / binemit. + +use crate::ir::Function; +use crate::machinst::*; +use crate::settings; +use crate::timing; + +use log::debug; +use regalloc::{allocate_registers, RegAllocAlgorithm}; +use std::env; + +/// Compile the given function down to VCode with allocated registers, ready +/// for binary emission. +pub fn compile( + f: &mut Function, + b: &B, + abi: Box>, + flags: &settings::Flags, +) -> VCode +where + B::MInst: ShowWithRRU, +{ + // This lowers the CL IR. + let mut vcode = Lower::new(f, abi).lower(b); + + let universe = &B::MInst::reg_universe(); + + debug!("vcode from lowering: \n{}", vcode.show_rru(Some(universe))); + + // Perform register allocation. + let algorithm = match env::var("REGALLOC") { + Ok(str) => match str.as_str() { + "lsrac" => RegAllocAlgorithm::LinearScanChecked, + "lsra" => RegAllocAlgorithm::LinearScan, + // to wit: btc doesn't mean "bitcoin" here + "btc" => RegAllocAlgorithm::BacktrackingChecked, + _ => RegAllocAlgorithm::Backtracking, + }, + // By default use backtracking, which is the fastest. + Err(_) => RegAllocAlgorithm::Backtracking, + }; + + let result = { + let _tt = timing::regalloc(); + allocate_registers( + &mut vcode, algorithm, universe, /*request_block_annotations=*/ false, + ) + .map_err(|err| { + debug!( + "Register allocation error for vcode\n{}\nError: {:?}", + vcode.show_rru(Some(universe)), + err + ); + err + }) + .expect("register allocation") + }; + + // Reorder vcode into final order and copy out final instruction sequence + // all at once. This also inserts prologues/epilogues. + vcode.replace_insns_from_regalloc(result, flags); + + vcode.remove_redundant_branches(); + + // Do final passes over code to finalize branches. + vcode.finalize_branches(); + + debug!( + "vcode after regalloc: final version:\n{}", + vcode.show_rru(Some(universe)) + ); + + //println!("{}\n", vcode.show_rru(Some(&B::MInst::reg_universe()))); + + vcode +} diff --git a/cranelift/codegen/src/machinst/lower.rs b/cranelift/codegen/src/machinst/lower.rs new file mode 100644 index 0000000000..2165416ebc --- /dev/null +++ b/cranelift/codegen/src/machinst/lower.rs @@ -0,0 +1,723 @@ +//! This module implements lowering (instruction selection) from Cranelift IR +//! to machine instructions with virtual registers. This is *almost* the final +//! machine code, except for register allocation. + +use crate::binemit::CodeSink; +use crate::dce::has_side_effect; +use crate::entity::SecondaryMap; +use crate::ir::{ + Block, ExternalName, Function, GlobalValueData, Inst, InstructionData, MemFlags, Opcode, + Signature, SourceLoc, Type, Value, ValueDef, +}; +use crate::isa::registers::RegUnit; +use crate::machinst::{ + ABIBody, BlockIndex, MachInst, MachInstEmit, VCode, VCodeBuilder, VCodeInst, +}; +use crate::num_uses::NumUses; + +use regalloc::Function as RegallocFunction; +use regalloc::{RealReg, Reg, RegClass, Set, VirtualReg, Writable}; + +use alloc::boxed::Box; +use alloc::vec::Vec; +use log::debug; +use smallvec::SmallVec; +use std::collections::VecDeque; +use std::ops::Range; + +/// A context that machine-specific lowering code can use to emit lowered instructions. This is the +/// view of the machine-independent per-function lowering context that is seen by the machine +/// backend. +pub trait LowerCtx { + /// Get the instdata for a given IR instruction. + fn data(&self, ir_inst: Inst) -> &InstructionData; + /// Get the controlling type for a polymorphic IR instruction. + fn ty(&self, ir_inst: Inst) -> Type; + /// Emit a machine instruction. + fn emit(&mut self, mach_inst: I); + /// Indicate that an IR instruction has been merged, and so one of its + /// uses is gone (replaced by uses of the instruction's inputs). This + /// helps the lowering algorithm to perform on-the-fly DCE, skipping over + /// unused instructions (such as immediates incorporated directly). + fn merged(&mut self, from_inst: Inst); + /// Get the producing instruction, if any, and output number, for the `idx`th input to the + /// given IR instruction + fn input_inst(&self, ir_inst: Inst, idx: usize) -> Option<(Inst, usize)>; + /// Map a Value to its associated writable (probably virtual) Reg. + fn value_to_writable_reg(&self, val: Value) -> Writable; + /// Map a Value to its associated (probably virtual) Reg. + fn value_to_reg(&self, val: Value) -> Reg; + /// Get the `idx`th input to the given IR instruction as a virtual register. + fn input(&self, ir_inst: Inst, idx: usize) -> Reg; + /// Get the `idx`th output of the given IR instruction as a virtual register. + fn output(&self, ir_inst: Inst, idx: usize) -> Writable; + /// Get the number of inputs to the given IR instruction. + fn num_inputs(&self, ir_inst: Inst) -> usize; + /// Get the number of outputs to the given IR instruction. + fn num_outputs(&self, ir_inst: Inst) -> usize; + /// Get the type for an instruction's input. + fn input_ty(&self, ir_inst: Inst, idx: usize) -> Type; + /// Get the type for an instruction's output. + fn output_ty(&self, ir_inst: Inst, idx: usize) -> Type; + /// Get a new temp. + fn tmp(&mut self, rc: RegClass, ty: Type) -> Writable; + /// Get the number of block params. + fn num_bb_params(&self, bb: Block) -> usize; + /// Get the register for a block param. + fn bb_param(&self, bb: Block, idx: usize) -> Reg; + /// Get the register for a return value. + fn retval(&self, idx: usize) -> Writable; + /// Get the target for a call instruction, as an `ExternalName`. + fn call_target<'b>(&'b self, ir_inst: Inst) -> Option<&'b ExternalName>; + /// Get the signature for a call or call-indirect instruction. + fn call_sig<'b>(&'b self, ir_inst: Inst) -> Option<&'b Signature>; + /// Get the symbol name and offset for a symbol_value instruction. + fn symbol_value<'b>(&'b self, ir_inst: Inst) -> Option<(&'b ExternalName, i64)>; + /// Returns the memory flags of a given memory access. + fn memflags(&self, ir_inst: Inst) -> Option; + /// Get the source location for a given instruction. + fn srcloc(&self, ir_inst: Inst) -> SourceLoc; +} + +/// A machine backend. +pub trait LowerBackend { + /// The machine instruction type. + type MInst: VCodeInst; + + /// Lower a single instruction. Instructions are lowered in reverse order. + /// This function need not handle branches; those are always passed to + /// `lower_branch_group` below. + fn lower>(&self, ctx: &mut C, inst: Inst); + + /// Lower a block-terminating group of branches (which together can be seen as one + /// N-way branch), given a vcode BlockIndex for each target. + fn lower_branch_group>( + &self, + ctx: &mut C, + insts: &[Inst], + targets: &[BlockIndex], + fallthrough: Option, + ); +} + +/// Machine-independent lowering driver / machine-instruction container. Maintains a correspondence +/// from original Inst to MachInsts. +pub struct Lower<'a, I: VCodeInst> { + // The function to lower. + f: &'a Function, + + // Lowered machine instructions. + vcode: VCodeBuilder, + + // Number of active uses (minus `dec_use()` calls by backend) of each instruction. + num_uses: SecondaryMap, + + // Mapping from `Value` (SSA value in IR) to virtual register. + value_regs: SecondaryMap, + + // Return-value vregs. + retval_regs: Vec, + + // Next virtual register number to allocate. + next_vreg: u32, +} + +fn alloc_vreg( + value_regs: &mut SecondaryMap, + regclass: RegClass, + value: Value, + next_vreg: &mut u32, +) -> VirtualReg { + if value_regs[value].get_index() == 0 { + // default value in map. + let v = *next_vreg; + *next_vreg += 1; + value_regs[value] = Reg::new_virtual(regclass, v); + } + value_regs[value].as_virtual_reg().unwrap() +} + +enum GenerateReturn { + Yes, + No, +} + +impl<'a, I: VCodeInst> Lower<'a, I> { + /// Prepare a new lowering context for the given IR function. + pub fn new(f: &'a Function, abi: Box>) -> Lower<'a, I> { + let mut vcode = VCodeBuilder::new(abi); + + let num_uses = NumUses::compute(f).take_uses(); + + let mut next_vreg: u32 = 1; + + // Default register should never be seen, but the `value_regs` map needs a default and we + // don't want to push `Option` everywhere. All values will be assigned registers by the + // loops over block parameters and instruction results below. + // + // We do not use vreg 0 so that we can detect any unassigned register that leaks through. + let default_register = Reg::new_virtual(RegClass::I32, 0); + let mut value_regs = SecondaryMap::with_default(default_register); + + // Assign a vreg to each value. + for bb in f.layout.blocks() { + for param in f.dfg.block_params(bb) { + let vreg = alloc_vreg( + &mut value_regs, + I::rc_for_type(f.dfg.value_type(*param)), + *param, + &mut next_vreg, + ); + vcode.set_vreg_type(vreg, f.dfg.value_type(*param)); + } + for inst in f.layout.block_insts(bb) { + for result in f.dfg.inst_results(inst) { + let vreg = alloc_vreg( + &mut value_regs, + I::rc_for_type(f.dfg.value_type(*result)), + *result, + &mut next_vreg, + ); + vcode.set_vreg_type(vreg, f.dfg.value_type(*result)); + } + } + } + + // Assign a vreg to each return value. + let mut retval_regs = vec![]; + for ret in &f.signature.returns { + let v = next_vreg; + next_vreg += 1; + let regclass = I::rc_for_type(ret.value_type); + let vreg = Reg::new_virtual(regclass, v); + retval_regs.push(vreg); + vcode.set_vreg_type(vreg.as_virtual_reg().unwrap(), ret.value_type); + } + + Lower { + f, + vcode, + num_uses, + value_regs, + retval_regs, + next_vreg, + } + } + + fn gen_arg_setup(&mut self) { + if let Some(entry_bb) = self.f.layout.entry_block() { + debug!( + "gen_arg_setup: entry BB {} args are:\n{:?}", + entry_bb, + self.f.dfg.block_params(entry_bb) + ); + for (i, param) in self.f.dfg.block_params(entry_bb).iter().enumerate() { + let reg = Writable::from_reg(self.value_regs[*param]); + let insn = self.vcode.abi().gen_copy_arg_to_reg(i, reg); + self.vcode.push(insn); + } + } + } + + fn gen_retval_setup(&mut self, gen_ret_inst: GenerateReturn) { + for (i, reg) in self.retval_regs.iter().enumerate() { + let insn = self.vcode.abi().gen_copy_reg_to_retval(i, *reg); + self.vcode.push(insn); + } + let inst = match gen_ret_inst { + GenerateReturn::Yes => self.vcode.abi().gen_ret(), + GenerateReturn::No => self.vcode.abi().gen_epilogue_placeholder(), + }; + self.vcode.push(inst); + } + + fn find_reachable_bbs(&self) -> SmallVec<[Block; 16]> { + if let Some(entry) = self.f.layout.entry_block() { + let mut ret = SmallVec::new(); + let mut queue = VecDeque::new(); + let mut visited = SecondaryMap::with_default(false); + queue.push_back(entry); + visited[entry] = true; + while !queue.is_empty() { + let b = queue.pop_front().unwrap(); + ret.push(b); + let mut succs: SmallVec<[Block; 16]> = SmallVec::new(); + for inst in self.f.layout.block_insts(b) { + if self.f.dfg[inst].opcode().is_branch() { + succs.extend(branch_targets(self.f, b, inst).into_iter()); + } + } + for succ in succs.into_iter() { + if !visited[succ] { + queue.push_back(succ); + visited[succ] = true; + } + } + } + + ret + } else { + SmallVec::new() + } + } + + /// Lower the function. + pub fn lower>(mut self, backend: &B) -> VCode { + // Find all reachable blocks. + let mut bbs = self.find_reachable_bbs(); + // Work backward (reverse block order, reverse through each block), skipping insns with zero + // uses. + bbs.reverse(); + + // This records a Block-to-BlockIndex map so that branch targets can be resolved. + let mut next_bindex = self.vcode.init_bb_map(&bbs[..]); + + // Allocate a separate BlockIndex for each control-flow instruction so that we can create + // the edge blocks later. Each entry for a control-flow inst is the edge block; the list + // has (cf-inst, edge block, orig block) tuples. + let mut edge_blocks_by_inst: SecondaryMap> = + SecondaryMap::with_default(vec![]); + let mut edge_blocks: Vec<(Inst, BlockIndex, Block)> = vec![]; + + debug!("about to lower function: {:?}", self.f); + debug!("bb map: {:?}", self.vcode.blocks_by_bb()); + + for bb in bbs.iter() { + for inst in self.f.layout.block_insts(*bb) { + let op = self.f.dfg[inst].opcode(); + if op.is_branch() { + // Find the original target. + let mut add_succ = |next_bb| { + let edge_block = next_bindex; + next_bindex += 1; + edge_blocks_by_inst[inst].push(edge_block); + edge_blocks.push((inst, edge_block, next_bb)); + }; + for succ in branch_targets(self.f, *bb, inst).into_iter() { + add_succ(succ); + } + } + } + } + + for bb in bbs.iter() { + debug!("lowering bb: {}", bb); + + // If this is a return block, produce the return value setup. + let last_insn = self.f.layout.block_insts(*bb).last().unwrap(); + let last_insn_opcode = self.f.dfg[last_insn].opcode(); + if last_insn_opcode.is_return() { + let gen_ret = if last_insn_opcode == Opcode::Return { + GenerateReturn::Yes + } else { + debug_assert!(last_insn_opcode == Opcode::FallthroughReturn); + GenerateReturn::No + }; + self.gen_retval_setup(gen_ret); + self.vcode.end_ir_inst(); + } + + // Find the branches at the end first, and process those, if any. + let mut branches: SmallVec<[Inst; 2]> = SmallVec::new(); + let mut targets: SmallVec<[BlockIndex; 2]> = SmallVec::new(); + + for inst in self.f.layout.block_insts(*bb).rev() { + debug!("lower: inst {}", inst); + if edge_blocks_by_inst[inst].len() > 0 { + branches.push(inst); + for target in edge_blocks_by_inst[inst].iter().rev().cloned() { + targets.push(target); + } + } else { + // We've reached the end of the branches -- process all as a group, first. + if branches.len() > 0 { + let fallthrough = self.f.layout.next_block(*bb); + let fallthrough = fallthrough.map(|bb| self.vcode.bb_to_bindex(bb)); + branches.reverse(); + targets.reverse(); + debug!( + "lower_branch_group: targets = {:?} branches = {:?}", + targets, branches + ); + backend.lower_branch_group( + &mut self, + &branches[..], + &targets[..], + fallthrough, + ); + self.vcode.end_ir_inst(); + branches.clear(); + targets.clear(); + } + + // Only codegen an instruction if it either has a side + // effect, or has at least one use of one of its results. + let num_uses = self.num_uses[inst]; + let side_effect = has_side_effect(self.f, inst); + if side_effect || num_uses > 0 { + backend.lower(&mut self, inst); + self.vcode.end_ir_inst(); + } else { + // If we're skipping the instruction, we need to dec-ref + // its arguments. + for arg in self.f.dfg.inst_args(inst) { + let val = self.f.dfg.resolve_aliases(*arg); + match self.f.dfg.value_def(val) { + ValueDef::Result(src_inst, _) => { + self.dec_use(src_inst); + } + _ => {} + } + } + } + } + } + + // There are possibly some branches left if the block contained only branches. + if branches.len() > 0 { + let fallthrough = self.f.layout.next_block(*bb); + let fallthrough = fallthrough.map(|bb| self.vcode.bb_to_bindex(bb)); + branches.reverse(); + targets.reverse(); + debug!( + "lower_branch_group: targets = {:?} branches = {:?}", + targets, branches + ); + backend.lower_branch_group(&mut self, &branches[..], &targets[..], fallthrough); + self.vcode.end_ir_inst(); + branches.clear(); + targets.clear(); + } + + // If this is the entry block, produce the argument setup. + if Some(*bb) == self.f.layout.entry_block() { + self.gen_arg_setup(); + self.vcode.end_ir_inst(); + } + + let vcode_bb = self.vcode.end_bb(); + debug!("finished building bb: BlockIndex {}", vcode_bb); + debug!("bb_to_bindex map says: {}", self.vcode.bb_to_bindex(*bb)); + assert!(vcode_bb == self.vcode.bb_to_bindex(*bb)); + if Some(*bb) == self.f.layout.entry_block() { + self.vcode.set_entry(vcode_bb); + } + } + + // Now create the edge blocks, with phi lowering (block parameter copies). + for (inst, edge_block, orig_block) in edge_blocks.into_iter() { + debug!( + "creating edge block: inst {}, edge_block {}, orig_block {}", + inst, edge_block, orig_block + ); + + // Create a temporary for each block parameter. + let phi_classes: Vec<(Type, RegClass)> = self + .f + .dfg + .block_params(orig_block) + .iter() + .map(|p| self.f.dfg.value_type(*p)) + .map(|ty| (ty, I::rc_for_type(ty))) + .collect(); + + // FIXME sewardj 2020Feb29: use SmallVec + let mut src_regs = vec![]; + let mut dst_regs = vec![]; + + // Create all of the phi uses (reads) from jump args to temps. + + // Round up all the source and destination regs + for (i, arg) in self.f.dfg.inst_variable_args(inst).iter().enumerate() { + let arg = self.f.dfg.resolve_aliases(*arg); + debug!("jump arg {} is {}", i, arg); + src_regs.push(self.value_regs[arg]); + } + for (i, param) in self.f.dfg.block_params(orig_block).iter().enumerate() { + debug!("bb arg {} is {}", i, param); + dst_regs.push(Writable::from_reg(self.value_regs[*param])); + } + debug_assert!(src_regs.len() == dst_regs.len()); + debug_assert!(phi_classes.len() == dst_regs.len()); + + // If, as is mostly the case, the source and destination register + // sets are non overlapping, then we can copy directly, so as to + // save the register allocator work. + if !Set::::from_vec(src_regs.clone()).intersects(&Set::::from_vec( + dst_regs.iter().map(|r| r.to_reg()).collect(), + )) { + for (dst_reg, (src_reg, (ty, _))) in + dst_regs.iter().zip(src_regs.iter().zip(phi_classes)) + { + self.vcode.push(I::gen_move(*dst_reg, *src_reg, ty)); + } + } else { + // There's some overlap, so play safe and copy via temps. + + let tmp_regs: Vec> = phi_classes + .iter() + .map(|&(ty, rc)| self.tmp(rc, ty)) // borrows `self` mutably. + .collect(); + + debug!("phi_temps = {:?}", tmp_regs); + debug_assert!(tmp_regs.len() == src_regs.len()); + + for (tmp_reg, (src_reg, &(ty, _))) in + tmp_regs.iter().zip(src_regs.iter().zip(phi_classes.iter())) + { + self.vcode.push(I::gen_move(*tmp_reg, *src_reg, ty)); + } + for (dst_reg, (tmp_reg, &(ty, _))) in + dst_regs.iter().zip(tmp_regs.iter().zip(phi_classes.iter())) + { + self.vcode.push(I::gen_move(*dst_reg, tmp_reg.to_reg(), ty)); + } + } + + // Create the unconditional jump to the original target block. + self.vcode + .push(I::gen_jump(self.vcode.bb_to_bindex(orig_block))); + + // End the IR inst and block. (We lower this as if it were one IR instruction so that + // we can emit machine instructions in forward order.) + self.vcode.end_ir_inst(); + let blocknum = self.vcode.end_bb(); + assert!(blocknum == edge_block); + } + + // Now that we've emitted all instructions into the VCodeBuilder, let's build the VCode. + self.vcode.build() + } + + /// Reduce the use-count of an IR instruction. Use this when, e.g., isel incorporates the + /// computation of an input instruction directly, so that input instruction has one + /// fewer use. + fn dec_use(&mut self, ir_inst: Inst) { + assert!(self.num_uses[ir_inst] > 0); + self.num_uses[ir_inst] -= 1; + debug!( + "incref: ir_inst {} now has {} uses", + ir_inst, self.num_uses[ir_inst] + ); + } + + /// Increase the use-count of an IR instruction. Use this when, e.g., isel incorporates + /// the computation of an input instruction directly, so that input instruction's + /// inputs are now used directly by the merged instruction. + fn inc_use(&mut self, ir_inst: Inst) { + self.num_uses[ir_inst] += 1; + debug!( + "decref: ir_inst {} now has {} uses", + ir_inst, self.num_uses[ir_inst] + ); + } +} + +impl<'a, I: VCodeInst> LowerCtx for Lower<'a, I> { + /// Get the instdata for a given IR instruction. + fn data(&self, ir_inst: Inst) -> &InstructionData { + &self.f.dfg[ir_inst] + } + + /// Get the controlling type for a polymorphic IR instruction. + fn ty(&self, ir_inst: Inst) -> Type { + self.f.dfg.ctrl_typevar(ir_inst) + } + + /// Emit a machine instruction. + fn emit(&mut self, mach_inst: I) { + self.vcode.push(mach_inst); + } + + /// Indicate that a merge has occurred. + fn merged(&mut self, from_inst: Inst) { + debug!("merged: inst {}", from_inst); + // First, inc-ref all inputs of `from_inst`, because they are now used + // directly by `into_inst`. + for arg in self.f.dfg.inst_args(from_inst) { + let arg = self.f.dfg.resolve_aliases(*arg); + match self.f.dfg.value_def(arg) { + ValueDef::Result(src_inst, _) => { + debug!(" -> inc-reffing src inst {}", src_inst); + self.inc_use(src_inst); + } + _ => {} + } + } + // Then, dec-ref the merged instruction itself. It still retains references + // to its arguments (inc-ref'd above). If its refcount has reached zero, + // it will be skipped during emission and its args will be dec-ref'd at that + // time. + self.dec_use(from_inst); + } + + /// Get the producing instruction, if any, and output number, for the `idx`th input to the + /// given IR instruction. + fn input_inst(&self, ir_inst: Inst, idx: usize) -> Option<(Inst, usize)> { + let val = self.f.dfg.inst_args(ir_inst)[idx]; + let val = self.f.dfg.resolve_aliases(val); + match self.f.dfg.value_def(val) { + ValueDef::Result(src_inst, result_idx) => Some((src_inst, result_idx)), + _ => None, + } + } + + /// Map a Value to its associated writable (probably virtual) Reg. + fn value_to_writable_reg(&self, val: Value) -> Writable { + let val = self.f.dfg.resolve_aliases(val); + Writable::from_reg(self.value_regs[val]) + } + + /// Map a Value to its associated (probably virtual) Reg. + fn value_to_reg(&self, val: Value) -> Reg { + let val = self.f.dfg.resolve_aliases(val); + self.value_regs[val] + } + + /// Get the `idx`th input to the given IR instruction as a virtual register. + fn input(&self, ir_inst: Inst, idx: usize) -> Reg { + let val = self.f.dfg.inst_args(ir_inst)[idx]; + let val = self.f.dfg.resolve_aliases(val); + self.value_to_reg(val) + } + + /// Get the `idx`th output of the given IR instruction as a virtual register. + fn output(&self, ir_inst: Inst, idx: usize) -> Writable { + let val = self.f.dfg.inst_results(ir_inst)[idx]; + self.value_to_writable_reg(val) + } + + /// Get a new temp. + fn tmp(&mut self, rc: RegClass, ty: Type) -> Writable { + let v = self.next_vreg; + self.next_vreg += 1; + let vreg = Reg::new_virtual(rc, v); + self.vcode.set_vreg_type(vreg.as_virtual_reg().unwrap(), ty); + Writable::from_reg(vreg) + } + + /// Get the number of inputs for the given IR instruction. + fn num_inputs(&self, ir_inst: Inst) -> usize { + self.f.dfg.inst_args(ir_inst).len() + } + + /// Get the number of outputs for the given IR instruction. + fn num_outputs(&self, ir_inst: Inst) -> usize { + self.f.dfg.inst_results(ir_inst).len() + } + + /// Get the type for an instruction's input. + fn input_ty(&self, ir_inst: Inst, idx: usize) -> Type { + let val = self.f.dfg.inst_args(ir_inst)[idx]; + let val = self.f.dfg.resolve_aliases(val); + self.f.dfg.value_type(val) + } + + /// Get the type for an instruction's output. + fn output_ty(&self, ir_inst: Inst, idx: usize) -> Type { + self.f.dfg.value_type(self.f.dfg.inst_results(ir_inst)[idx]) + } + + /// Get the number of block params. + fn num_bb_params(&self, bb: Block) -> usize { + self.f.dfg.block_params(bb).len() + } + + /// Get the register for a block param. + fn bb_param(&self, bb: Block, idx: usize) -> Reg { + let val = self.f.dfg.block_params(bb)[idx]; + self.value_regs[val] + } + + /// Get the register for a return value. + fn retval(&self, idx: usize) -> Writable { + Writable::from_reg(self.retval_regs[idx]) + } + + /// Get the target for a call instruction, as an `ExternalName`. + fn call_target<'b>(&'b self, ir_inst: Inst) -> Option<&'b ExternalName> { + match &self.f.dfg[ir_inst] { + &InstructionData::Call { func_ref, .. } + | &InstructionData::FuncAddr { func_ref, .. } => { + let funcdata = &self.f.dfg.ext_funcs[func_ref]; + Some(&funcdata.name) + } + _ => None, + } + } + /// Get the signature for a call or call-indirect instruction. + fn call_sig<'b>(&'b self, ir_inst: Inst) -> Option<&'b Signature> { + match &self.f.dfg[ir_inst] { + &InstructionData::Call { func_ref, .. } => { + let funcdata = &self.f.dfg.ext_funcs[func_ref]; + Some(&self.f.dfg.signatures[funcdata.signature]) + } + &InstructionData::CallIndirect { sig_ref, .. } => Some(&self.f.dfg.signatures[sig_ref]), + _ => None, + } + } + + /// Get the symbol name and offset for a symbol_value instruction. + fn symbol_value<'b>(&'b self, ir_inst: Inst) -> Option<(&'b ExternalName, i64)> { + match &self.f.dfg[ir_inst] { + &InstructionData::UnaryGlobalValue { global_value, .. } => { + let gvdata = &self.f.global_values[global_value]; + match gvdata { + &GlobalValueData::Symbol { + ref name, + ref offset, + .. + } => { + let offset = offset.bits(); + Some((name, offset)) + } + _ => None, + } + } + _ => None, + } + } + + /// Returns the memory flags of a given memory access. + fn memflags(&self, ir_inst: Inst) -> Option { + match &self.f.dfg[ir_inst] { + &InstructionData::Load { flags, .. } + | &InstructionData::LoadComplex { flags, .. } + | &InstructionData::Store { flags, .. } + | &InstructionData::StoreComplex { flags, .. } => Some(flags), + _ => None, + } + } + + /// Get the source location for a given instruction. + fn srcloc(&self, ir_inst: Inst) -> SourceLoc { + self.f.srclocs[ir_inst] + } +} + +fn branch_targets(f: &Function, block: Block, inst: Inst) -> SmallVec<[Block; 16]> { + let mut ret = SmallVec::new(); + if f.dfg[inst].opcode() == Opcode::Fallthrough { + ret.push(f.layout.next_block(block).unwrap()); + } else { + match &f.dfg[inst] { + &InstructionData::Jump { destination, .. } + | &InstructionData::Branch { destination, .. } + | &InstructionData::BranchInt { destination, .. } + | &InstructionData::BranchIcmp { destination, .. } + | &InstructionData::BranchFloat { destination, .. } => { + ret.push(destination); + } + &InstructionData::BranchTable { + destination, table, .. + } => { + ret.push(destination); + for dest in f.jump_tables[table].as_slice() { + ret.push(*dest); + } + } + _ => {} + } + } + ret +} diff --git a/cranelift/codegen/src/machinst/mod.rs b/cranelift/codegen/src/machinst/mod.rs new file mode 100644 index 0000000000..93c9126b32 --- /dev/null +++ b/cranelift/codegen/src/machinst/mod.rs @@ -0,0 +1,288 @@ +//! This module exposes the machine-specific backend definition pieces. +//! +//! The MachInst infrastructure is the compiler backend, from CLIF +//! (ir::Function) to machine code. The purpose of this infrastructure is, at a +//! high level, to do instruction selection/lowering (to machine instructions), +//! register allocation, and then perform all the fixups to branches, constant +//! data references, etc., needed to actually generate machine code. +//! +//! The container for machine instructions, at various stages of construction, +//! is the `VCode` struct. We refer to a sequence of machine instructions organized +//! into basic blocks as "vcode". This is short for "virtual-register code", though +//! it's a bit of a misnomer because near the end of the pipeline, vcode has all +//! real registers. Nevertheless, the name is catchy and we like it. +//! +//! The compilation pipeline, from an `ir::Function` (already optimized as much as +//! you like by machine-independent optimization passes) onward, is as follows. +//! (N.B.: though we show the VCode separately at each stage, the passes +//! mutate the VCode in place; these are not separate copies of the code.) +//! +//! | ir::Function (SSA IR, machine-independent opcodes) +//! | | +//! | | [lower] +//! | | +//! | VCode (machine instructions: +//! | | - mostly virtual registers. +//! | | - cond branches in two-target form. +//! | | - branch targets are block indices. +//! | | - in-memory constants held by insns, +//! | | with unknown offsets. +//! | | - critical edges (actually all edges) +//! | | are split.) +//! | | [regalloc] +//! | | +//! | VCode (machine instructions: +//! | | - all real registers. +//! | | - new instruction sequence returned +//! | | out-of-band in RegAllocResult. +//! | | - instruction sequence has spills, +//! | | reloads, and moves inserted. +//! | | - other invariants same as above.) +//! | | +//! | | [preamble/postamble] +//! | | +//! | VCode (machine instructions: +//! | | - stack-frame size known. +//! | | - out-of-band instruction sequence +//! | | has preamble prepended to entry +//! | | block, and postamble injected before +//! | | every return instruction. +//! | | - all symbolic stack references to +//! | | stackslots and spillslots are resolved +//! | | to concrete FP-offset mem addresses.) +//! | | [block/insn ordering] +//! | | +//! | VCode (machine instructions: +//! | | - vcode.final_block_order is filled in. +//! | | - new insn sequence from regalloc is +//! | | placed back into vcode and block +//! | | boundaries are updated.) +//! | | [redundant branch/block +//! | | removal] +//! | | +//! | VCode (machine instructions: +//! | | - all blocks that were just an +//! | | unconditional branch are removed.) +//! | | +//! | | [branch finalization +//! | | (fallthroughs)] +//! | | +//! | VCode (machine instructions: +//! | | - all branches are in lowered one- +//! | | target form, but targets are still +//! | | block indices.) +//! | | +//! | | [branch finalization +//! | | (offsets)] +//! | | +//! | VCode (machine instructions: +//! | | - all branch offsets from start of +//! | | function are known, and all branches +//! | | have resolved-offset targets.) +//! | | +//! | | [MemArg finalization] +//! | | +//! | VCode (machine instructions: +//! | | - all MemArg references to the constant +//! | | pool are replaced with offsets. +//! | | - all constant-pool data is collected +//! | | in the VCode.) +//! | | +//! | | [binary emission] +//! | | +//! | Vec (machine code!) +//! | + +#![allow(unused_imports)] + +use crate::binemit::{ + CodeInfo, CodeOffset, CodeSink, MemoryCodeSink, RelocSink, StackmapSink, TrapSink, +}; +use crate::entity::EntityRef; +use crate::entity::SecondaryMap; +use crate::ir::condcodes::IntCC; +use crate::ir::ValueLocations; +use crate::ir::{DataFlowGraph, Function, Inst, Opcode, Type, Value}; +use crate::isa::RegUnit; +use crate::result::CodegenResult; +use crate::settings::Flags; +use crate::HashMap; +use alloc::boxed::Box; +use alloc::vec::Vec; +use core::fmt::Debug; +use core::iter::Sum; +use regalloc::Map as RegallocMap; +use regalloc::RegUsageCollector; +use regalloc::{RealReg, RealRegUniverse, Reg, RegClass, SpillSlot, VirtualReg, Writable}; +use smallvec::SmallVec; +use std::hash::Hash; +use std::string::String; +use target_lexicon::Triple; + +pub mod lower; +pub use lower::*; +pub mod vcode; +pub use vcode::*; +pub mod compile; +pub use compile::*; +pub mod blockorder; +pub use blockorder::*; +pub mod abi; +pub use abi::*; +pub mod pp; +pub use pp::*; +pub mod sections; +pub use sections::*; +pub mod adapter; +pub use adapter::*; + +/// A machine instruction. +pub trait MachInst: Clone + Debug { + /// Return the registers referenced by this machine instruction along with + /// the modes of reference (use, def, modify). + fn get_regs(&self, collector: &mut RegUsageCollector); + + /// Map virtual registers to physical registers using the given virt->phys + /// maps corresponding to the program points prior to, and after, this instruction. + fn map_regs( + &mut self, + pre_map: &RegallocMap, + post_map: &RegallocMap, + ); + + /// If this is a simple move, return the (source, destination) tuple of registers. + fn is_move(&self) -> Option<(Writable, Reg)>; + + /// Is this a terminator (branch or ret)? If so, return its type + /// (ret/uncond/cond) and target if applicable. + fn is_term<'a>(&'a self) -> MachTerminator<'a>; + + /// Returns true if the instruction is an epilogue placeholder. + fn is_epilogue_placeholder(&self) -> bool; + + /// Generate a move. + fn gen_move(to_reg: Writable, from_reg: Reg, ty: Type) -> Self; + + /// Generate a zero-length no-op. + fn gen_zero_len_nop() -> Self; + + /// Possibly operate on a value directly in a spill-slot rather than a + /// register. Useful if the machine has register-memory instruction forms + /// (e.g., add directly from or directly to memory), like x86. + fn maybe_direct_reload(&self, reg: VirtualReg, slot: SpillSlot) -> Option; + + /// Determine a register class to store the given CraneLift type. + fn rc_for_type(ty: Type) -> RegClass; + + /// Generate a jump to another target. Used during lowering of + /// control flow. + fn gen_jump(target: BlockIndex) -> Self; + + /// Generate a NOP. The `preferred_size` parameter allows the caller to + /// request a NOP of that size, or as close to it as possible. The machine + /// backend may return a NOP whose binary encoding is smaller than the + /// preferred size, but must not return a NOP that is larger. However, + /// the instruction must have a nonzero size. + fn gen_nop(preferred_size: usize) -> Self; + + /// Rewrite block targets using the block-target map. + fn with_block_rewrites(&mut self, block_target_map: &[BlockIndex]); + + /// Finalize branches once the block order (fallthrough) is known. + fn with_fallthrough_block(&mut self, fallthrough_block: Option); + + /// Update instruction once block offsets are known. These offsets are + /// relative to the beginning of the function. `targets` is indexed by + /// BlockIndex. + fn with_block_offsets(&mut self, my_offset: CodeOffset, targets: &[CodeOffset]); + + /// Get the register universe for this backend. + fn reg_universe() -> RealRegUniverse; + + /// Align a basic block offset (from start of function). By default, no + /// alignment occurs. + fn align_basic_block(offset: CodeOffset) -> CodeOffset { + offset + } +} + +/// Describes a block terminator (not call) in the vcode, when its branches +/// have not yet been finalized (so a branch may have two targets). +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum MachTerminator<'a> { + /// Not a terminator. + None, + /// A return instruction. + Ret, + /// An unconditional branch to another block. + Uncond(BlockIndex), + /// A conditional branch to one of two other blocks. + Cond(BlockIndex, BlockIndex), + /// An indirect branch with known possible targets. + Indirect(&'a [BlockIndex]), +} + +/// A trait describing the ability to encode a MachInst into binary machine code. +pub trait MachInstEmit { + /// Emit the instruction. + fn emit(&self, code: &mut O); +} + +/// The result of a `MachBackend::compile_function()` call. Contains machine +/// code (as bytes) and a disassembly, if requested. +pub struct MachCompileResult { + /// Machine code. + pub sections: MachSections, + /// Size of stack frame, in bytes. + pub frame_size: u32, + /// Disassembly, if requested. + pub disasm: Option, +} + +impl MachCompileResult { + /// Get a `CodeInfo` describing section sizes from this compilation result. + pub fn code_info(&self) -> CodeInfo { + let code_size = self.sections.total_size(); + CodeInfo { + code_size, + jumptables_size: 0, + rodata_size: 0, + total_size: code_size, + } + } +} + +/// Top-level machine backend trait, which wraps all monomorphized code and +/// allows a virtual call from the machine-independent `Function::compile()`. +pub trait MachBackend { + /// Compile the given function. Consumes the function. + fn compile_function( + &self, + func: Function, + want_disasm: bool, + ) -> CodegenResult; + + /// Return flags for this backend. + fn flags(&self) -> &Flags; + + /// Return triple for this backend. + fn triple(&self) -> Triple; + + /// Return name for this backend. + fn name(&self) -> &'static str; + + /// Return the register universe for this backend. + fn reg_universe(&self) -> RealRegUniverse; + + /// Machine-specific condcode info needed by TargetIsa. + fn unsigned_add_overflow_condition(&self) -> IntCC { + // TODO: this is what x86 specifies. Is this right for arm64? + IntCC::UnsignedLessThan + } + + /// Machine-specific condcode info needed by TargetIsa. + fn unsigned_sub_overflow_condition(&self) -> IntCC { + // TODO: this is what x86 specifies. Is this right for arm64? + IntCC::UnsignedLessThan + } +} diff --git a/cranelift/codegen/src/machinst/pp.rs b/cranelift/codegen/src/machinst/pp.rs new file mode 100644 index 0000000000..40e7c1b842 --- /dev/null +++ b/cranelift/codegen/src/machinst/pp.rs @@ -0,0 +1,66 @@ +//! Pretty-printing for machine code (virtual-registerized or final). + +use regalloc::{RealRegUniverse, Reg, Writable}; + +use std::fmt::Debug; +use std::hash::Hash; +use std::string::{String, ToString}; + +// FIXME: Should this go into regalloc.rs instead? + +/// A trait for printing instruction bits and pieces, with the the ability to +/// take a contextualising RealRegUniverse that is used to give proper names to +/// registers. +pub trait ShowWithRRU { + /// Return a string that shows the implementing object in context of the + /// given `RealRegUniverse`, if provided. + fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String; + + /// The same as |show_rru|, but with an optional hint giving a size in + /// bytes. Its interpretation is object-dependent, and it is intended to + /// pass around enough information to facilitate printing sub-parts of + /// real registers correctly. Objects may ignore size hints that are + /// irrelevant to them. + fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, _size: u8) -> String { + // Default implementation is to ignore the hint. + self.show_rru(mb_rru) + } +} + +impl ShowWithRRU for Reg { + fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { + if self.is_real() { + if let Some(rru) = mb_rru { + let reg_ix = self.get_index(); + if reg_ix < rru.regs.len() { + return rru.regs[reg_ix].1.to_string(); + } else { + // We have a real reg which isn't listed in the universe. + // Per the regalloc.rs interface requirements, this is + // Totally Not Allowed. Print it generically anyway, so + // we have something to debug. + return format!("!!{:?}!!", self); + } + } + } + // The reg is virtual, or we have no universe. Be generic. + format!("%{:?}", self) + } + + fn show_rru_sized(&self, _mb_rru: Option<&RealRegUniverse>, _size: u8) -> String { + // For the specific case of Reg, we demand not to have a size hint, + // since interpretation of the size is target specific, but this code + // is used by all targets. + panic!("Reg::show_rru_sized: impossible to implement"); + } +} + +impl ShowWithRRU for Writable { + fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { + self.to_reg().show_rru(mb_rru) + } + + fn show_rru_sized(&self, mb_rru: Option<&RealRegUniverse>, size: u8) -> String { + self.to_reg().show_rru_sized(mb_rru, size) + } +} diff --git a/cranelift/codegen/src/machinst/sections.rs b/cranelift/codegen/src/machinst/sections.rs new file mode 100644 index 0000000000..3e387239d0 --- /dev/null +++ b/cranelift/codegen/src/machinst/sections.rs @@ -0,0 +1,351 @@ +//! In-memory representation of compiled machine code, in multiple sections +//! (text, constant pool / rodata, etc). Emission occurs into multiple sections +//! simultaneously, so we buffer the result in memory and hand off to the +//! caller at the end of compilation. + +use crate::binemit::{Addend, CodeOffset, CodeSink, Reloc, RelocSink, StackmapSink, TrapSink}; +use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode}; + +use alloc::vec::Vec; + +/// A collection of sections with defined start-offsets. +pub struct MachSections { + /// Sections, in offset order. + pub sections: Vec, +} + +impl MachSections { + /// New, empty set of sections. + pub fn new() -> MachSections { + MachSections { sections: vec![] } + } + + /// Add a section with a known offset and size. Returns the index. + pub fn add_section(&mut self, start: CodeOffset, length: CodeOffset) -> usize { + let idx = self.sections.len(); + self.sections.push(MachSection::new(start, length)); + idx + } + + /// Mutably borrow the given section by index. + pub fn get_section<'a>(&'a mut self, idx: usize) -> &'a mut MachSection { + &mut self.sections[idx] + } + + /// Get mutable borrows of two sections simultaneously. Used during + /// instruction emission to provide references to the .text and .rodata + /// (constant pool) sections. + pub fn two_sections<'a>( + &'a mut self, + idx1: usize, + idx2: usize, + ) -> (&'a mut MachSection, &'a mut MachSection) { + assert!(idx1 < idx2); + assert!(idx1 < self.sections.len()); + assert!(idx2 < self.sections.len()); + let (first, rest) = self.sections.split_at_mut(idx2); + (&mut first[idx1], &mut rest[0]) + } + + /// Emit this set of sections to a set of sinks for the code, + /// relocations, traps, and stackmap. + pub fn emit(&self, sink: &mut CS) { + // N.B.: we emit every section into the .text section as far as + // the `CodeSink` is concerned; we do not bother to segregate + // the contents into the actual program text, the jumptable and the + // rodata (constant pool). This allows us to generate code assuming + // that these will not be relocated relative to each other, and avoids + // having to designate each section as belonging in one of the three + // fixed categories defined by `CodeSink`. If this becomes a problem + // later (e.g. because of memory permissions or similar), we can + // add this designation and segregate the output; take care, however, + // to add the appropriate relocations in this case. + + for section in &self.sections { + if section.data.len() > 0 { + while sink.offset() < section.start_offset { + sink.put1(0); + } + section.emit(sink); + } + } + sink.begin_jumptables(); + sink.begin_rodata(); + sink.end_codegen(); + } + + /// Get the total required size for these sections. + pub fn total_size(&self) -> CodeOffset { + if self.sections.len() == 0 { + 0 + } else { + // Find the last non-empty section. + self.sections + .iter() + .rev() + .find(|s| s.data.len() > 0) + .map(|s| s.cur_offset_from_start()) + .unwrap_or(0) + } + } +} + +/// An abstraction over MachSection and MachSectionSize: some +/// receiver of section data. +pub trait MachSectionOutput { + /// Get the current offset from the start of all sections. + fn cur_offset_from_start(&self) -> CodeOffset; + + /// Get the start offset of this section. + fn start_offset(&self) -> CodeOffset; + + /// Add 1 byte to the section. + fn put1(&mut self, _: u8); + + /// Add 2 bytes to the section. + fn put2(&mut self, value: u16) { + self.put1((value & 0xff) as u8); + self.put1(((value >> 8) & 0xff) as u8); + } + + /// Add 4 bytes to the section. + fn put4(&mut self, value: u32) { + self.put1((value & 0xff) as u8); + self.put1(((value >> 8) & 0xff) as u8); + self.put1(((value >> 16) & 0xff) as u8); + self.put1(((value >> 24) & 0xff) as u8); + } + + /// Add 8 bytes to the section. + fn put8(&mut self, value: u64) { + self.put1((value & 0xff) as u8); + self.put1(((value >> 8) & 0xff) as u8); + self.put1(((value >> 16) & 0xff) as u8); + self.put1(((value >> 24) & 0xff) as u8); + self.put1(((value >> 32) & 0xff) as u8); + self.put1(((value >> 40) & 0xff) as u8); + self.put1(((value >> 48) & 0xff) as u8); + self.put1(((value >> 56) & 0xff) as u8); + } + + /// Add a slice of bytes to the section. + fn put_data(&mut self, data: &[u8]); + + /// Add a relocation at the current offset. + fn add_reloc(&mut self, loc: SourceLoc, kind: Reloc, name: &ExternalName, addend: Addend); + + /// Add a trap record at the current offset. + fn add_trap(&mut self, loc: SourceLoc, code: TrapCode); + + /// Add a call return address record at the current offset. + fn add_call_site(&mut self, loc: SourceLoc, opcode: Opcode); + + /// Align up to the given alignment. + fn align_to(&mut self, align_to: CodeOffset) { + assert!(align_to.is_power_of_two()); + while self.cur_offset_from_start() & (align_to - 1) != 0 { + self.put1(0); + } + } +} + +/// A section of output to be emitted to a CodeSink / RelocSink in bulk. +/// Multiple sections may be created with known start offsets in advance; the +/// usual use-case is to create the .text (code) and .rodata (constant pool) at +/// once, after computing the length of the code, so that constant references +/// can use known offsets as instructions are emitted. +pub struct MachSection { + /// The starting offset of this section. + pub start_offset: CodeOffset, + /// The limit of this section, defined by the start of the next section. + pub length_limit: CodeOffset, + /// The section contents, as raw bytes. + pub data: Vec, + /// Any relocations referring to this section. + pub relocs: Vec, + /// Any trap records referring to this section. + pub traps: Vec, + /// Any call site record referring to this section. + pub call_sites: Vec, +} + +impl MachSection { + /// Create a new section, known to start at `start_offset` and with a size limited to `length_limit`. + pub fn new(start_offset: CodeOffset, length_limit: CodeOffset) -> MachSection { + MachSection { + start_offset, + length_limit, + data: vec![], + relocs: vec![], + traps: vec![], + call_sites: vec![], + } + } + + /// Emit this section to the CodeSink and other associated sinks. The + /// current offset of the CodeSink must match the starting offset of this + /// section. + pub fn emit(&self, sink: &mut CS) { + assert!(sink.offset() == self.start_offset); + + let mut next_reloc = 0; + let mut next_trap = 0; + let mut next_call_site = 0; + for (idx, byte) in self.data.iter().enumerate() { + if next_reloc < self.relocs.len() { + let reloc = &self.relocs[next_reloc]; + if reloc.offset == idx as CodeOffset { + sink.reloc_external(reloc.srcloc, reloc.kind, &reloc.name, reloc.addend); + next_reloc += 1; + } + } + if next_trap < self.traps.len() { + let trap = &self.traps[next_trap]; + if trap.offset == idx as CodeOffset { + sink.trap(trap.code, trap.srcloc); + next_trap += 1; + } + } + if next_call_site < self.call_sites.len() { + let call_site = &self.call_sites[next_call_site]; + if call_site.ret_addr == idx as CodeOffset { + sink.add_call_site(call_site.opcode, call_site.srcloc); + next_call_site += 1; + } + } + sink.put1(*byte); + } + } +} + +impl MachSectionOutput for MachSection { + fn cur_offset_from_start(&self) -> CodeOffset { + self.start_offset + self.data.len() as CodeOffset + } + + fn start_offset(&self) -> CodeOffset { + self.start_offset + } + + fn put1(&mut self, value: u8) { + assert!(((self.data.len() + 1) as CodeOffset) <= self.length_limit); + self.data.push(value); + } + + fn put_data(&mut self, data: &[u8]) { + assert!(((self.data.len() + data.len()) as CodeOffset) <= self.length_limit); + self.data.extend_from_slice(data); + } + + fn add_reloc(&mut self, srcloc: SourceLoc, kind: Reloc, name: &ExternalName, addend: Addend) { + let name = name.clone(); + self.relocs.push(MachReloc { + offset: self.data.len() as CodeOffset, + srcloc, + kind, + name, + addend, + }); + } + + fn add_trap(&mut self, srcloc: SourceLoc, code: TrapCode) { + self.traps.push(MachTrap { + offset: self.data.len() as CodeOffset, + srcloc, + code, + }); + } + + fn add_call_site(&mut self, srcloc: SourceLoc, opcode: Opcode) { + self.call_sites.push(MachCallSite { + ret_addr: self.data.len() as CodeOffset, + srcloc, + opcode, + }); + } +} + +/// A MachSectionOutput implementation that records only size. +pub struct MachSectionSize { + /// The starting offset of this section. + pub start_offset: CodeOffset, + /// The current offset of this section. + pub offset: CodeOffset, +} + +impl MachSectionSize { + /// Create a new size-counting dummy section. + pub fn new(start_offset: CodeOffset) -> MachSectionSize { + MachSectionSize { + start_offset, + offset: start_offset, + } + } + + /// Return the size this section would take if emitted with a real sink. + pub fn size(&self) -> CodeOffset { + self.offset - self.start_offset + } +} + +impl MachSectionOutput for MachSectionSize { + fn cur_offset_from_start(&self) -> CodeOffset { + // All size-counting sections conceptually start at offset 0; this doesn't + // matter when counting code size. + self.offset + } + + fn start_offset(&self) -> CodeOffset { + self.start_offset + } + + fn put1(&mut self, _: u8) { + self.offset += 1; + } + + fn put_data(&mut self, data: &[u8]) { + self.offset += data.len() as CodeOffset; + } + + fn add_reloc(&mut self, _: SourceLoc, _: Reloc, _: &ExternalName, _: Addend) {} + + fn add_trap(&mut self, _: SourceLoc, _: TrapCode) {} + + fn add_call_site(&mut self, _: SourceLoc, _: Opcode) {} +} + +/// A relocation resulting from a compilation. +pub struct MachReloc { + /// The offset at which the relocation applies, *relative to the + /// containing section*. + pub offset: CodeOffset, + /// The original source location. + pub srcloc: SourceLoc, + /// The kind of relocation. + pub kind: Reloc, + /// The external symbol / name to which this relocation refers. + pub name: ExternalName, + /// The addend to add to the symbol value. + pub addend: i64, +} + +/// A trap record resulting from a compilation. +pub struct MachTrap { + /// The offset at which the trap instruction occurs, *relative to the + /// containing section*. + pub offset: CodeOffset, + /// The original source location. + pub srcloc: SourceLoc, + /// The trap code. + pub code: TrapCode, +} + +/// A call site record resulting from a compilation. +pub struct MachCallSite { + /// The offset of the call's return address, *relative to the containing section*. + pub ret_addr: CodeOffset, + /// The original source location. + pub srcloc: SourceLoc, + /// The call's opcode. + pub opcode: Opcode, +} diff --git a/cranelift/codegen/src/machinst/vcode.rs b/cranelift/codegen/src/machinst/vcode.rs new file mode 100644 index 0000000000..64b1a4012a --- /dev/null +++ b/cranelift/codegen/src/machinst/vcode.rs @@ -0,0 +1,738 @@ +//! This implements the VCode container: a CFG of Insts that have been lowered. +//! +//! VCode is virtual-register code. An instruction in VCode is almost a machine +//! instruction; however, its register slots can refer to virtual registers in +//! addition to real machine registers. +//! +//! VCode is structured with traditional basic blocks, and +//! each block must be terminated by an unconditional branch (one target), a +//! conditional branch (two targets), or a return (no targets). Note that this +//! slightly differs from the machine code of most ISAs: in most ISAs, a +//! conditional branch has one target (and the not-taken case falls through). +//! However, we expect that machine backends will elide branches to the following +//! block (i.e., zero-offset jumps), and will be able to codegen a branch-cond / +//! branch-uncond pair if *both* targets are not fallthrough. This allows us to +//! play with layout prior to final binary emission, as well, if we want. +//! +//! See the main module comment in `mod.rs` for more details on the VCode-based +//! backend pipeline. + +use crate::binemit::Reloc; +use crate::ir; +use crate::machinst::*; +use crate::settings; + +use regalloc::Function as RegallocFunction; +use regalloc::Set as RegallocSet; +use regalloc::{BlockIx, InstIx, Range, RegAllocResult, RegClass, RegUsageCollector}; + +use alloc::boxed::Box; +use alloc::vec::Vec; +use log::debug; +use smallvec::SmallVec; +use std::fmt; +use std::iter; +use std::ops::Index; +use std::string::String; + +/// Index referring to an instruction in VCode. +pub type InsnIndex = u32; +/// Index referring to a basic block in VCode. +pub type BlockIndex = u32; + +/// VCodeInst wraps all requirements for a MachInst to be in VCode: it must be +/// a `MachInst` and it must be able to emit itself at least to a `SizeCodeSink`. +pub trait VCodeInst: MachInst + MachInstEmit + MachInstEmit {} +impl + MachInstEmit> VCodeInst for I {} + +/// A function in "VCode" (virtualized-register code) form, after lowering. +/// This is essentially a standard CFG of basic blocks, where each basic block +/// consists of lowered instructions produced by the machine-specific backend. +pub struct VCode { + /// Function liveins. + liveins: RegallocSet, + + /// Function liveouts. + liveouts: RegallocSet, + + /// VReg IR-level types. + vreg_types: Vec, + + /// Lowered machine instructions in order corresponding to the original IR. + pub insts: Vec, + + /// Entry block. + entry: BlockIndex, + + /// Block instruction indices. + pub block_ranges: Vec<(InsnIndex, InsnIndex)>, + + /// Block successors: index range in the successor-list below. + block_succ_range: Vec<(usize, usize)>, + + /// Block successor lists, concatenated into one Vec. The `block_succ_range` + /// list of tuples above gives (start, end) ranges within this list that + /// correspond to each basic block's successors. + block_succs: Vec, + + /// Block indices by IR block. + block_by_bb: SecondaryMap, + + /// IR block for each VCode Block. The length of this Vec will likely be + /// less than the total number of Blocks, because new Blocks (for edge + /// splits, for example) are appended during lowering. + bb_by_block: Vec, + + /// Order of block IDs in final generated code. + final_block_order: Vec, + + /// Final block offsets. Computed during branch finalization and used + /// during emission. + final_block_offsets: Vec, + + /// Size of code, accounting for block layout / alignment. + code_size: CodeOffset, + + /// ABI object. + abi: Box>, +} + +/// A builder for a VCode function body. This builder is designed for the +/// lowering approach that we take: we traverse basic blocks in forward +/// (original IR) order, but within each basic block, we generate code from +/// bottom to top; and within each IR instruction that we visit in this reverse +/// order, we emit machine instructions in *forward* order again. +/// +/// Hence, to produce the final instructions in proper order, we perform two +/// swaps. First, the machine instructions (`I` instances) are produced in +/// forward order for an individual IR instruction. Then these are *reversed* +/// and concatenated to `bb_insns` at the end of the IR instruction lowering. +/// The `bb_insns` vec will thus contain all machine instructions for a basic +/// block, in reverse order. Finally, when we're done with a basic block, we +/// reverse the whole block's vec of instructions again, and concatenate onto +/// the VCode's insts. +pub struct VCodeBuilder { + /// In-progress VCode. + vcode: VCode, + + /// Current basic block instructions, in reverse order (because blocks are + /// built bottom-to-top). + bb_insns: SmallVec<[I; 32]>, + + /// Current IR-inst instructions, in forward order. + ir_inst_insns: SmallVec<[I; 4]>, + + /// Start of succs for the current block in the concatenated succs list. + succ_start: usize, +} + +impl VCodeBuilder { + /// Create a new VCodeBuilder. + pub fn new(abi: Box>) -> VCodeBuilder { + let vcode = VCode::new(abi); + VCodeBuilder { + vcode, + bb_insns: SmallVec::new(), + ir_inst_insns: SmallVec::new(), + succ_start: 0, + } + } + + /// Access the ABI object. + pub fn abi(&mut self) -> &mut dyn ABIBody { + &mut *self.vcode.abi + } + + /// Set the type of a VReg. + pub fn set_vreg_type(&mut self, vreg: VirtualReg, ty: Type) { + while self.vcode.vreg_types.len() <= vreg.get_index() { + self.vcode.vreg_types.push(ir::types::I8); // Default type. + } + self.vcode.vreg_types[vreg.get_index()] = ty; + } + + /// Return the underlying bb-to-BlockIndex map. + pub fn blocks_by_bb(&self) -> &SecondaryMap { + &self.vcode.block_by_bb + } + + /// Initialize the bb-to-BlockIndex map. Returns the first free + /// BlockIndex. + pub fn init_bb_map(&mut self, blocks: &[ir::Block]) -> BlockIndex { + let mut bindex: BlockIndex = 0; + for bb in blocks.iter() { + self.vcode.block_by_bb[*bb] = bindex; + self.vcode.bb_by_block.push(*bb); + bindex += 1; + } + bindex + } + + /// Get the BlockIndex for an IR block. + pub fn bb_to_bindex(&self, bb: ir::Block) -> BlockIndex { + self.vcode.block_by_bb[bb] + } + + /// Set the current block as the entry block. + pub fn set_entry(&mut self, block: BlockIndex) { + self.vcode.entry = block; + } + + /// End the current IR instruction. Must be called after pushing any + /// instructions and prior to ending the basic block. + pub fn end_ir_inst(&mut self) { + while let Some(i) = self.ir_inst_insns.pop() { + self.bb_insns.push(i); + } + } + + /// End the current basic block. Must be called after emitting vcode insts + /// for IR insts and prior to ending the function (building the VCode). + pub fn end_bb(&mut self) -> BlockIndex { + assert!(self.ir_inst_insns.is_empty()); + let block_num = self.vcode.block_ranges.len() as BlockIndex; + // Push the instructions. + let start_idx = self.vcode.insts.len() as InsnIndex; + while let Some(i) = self.bb_insns.pop() { + self.vcode.insts.push(i); + } + let end_idx = self.vcode.insts.len() as InsnIndex; + // Add the instruction index range to the list of blocks. + self.vcode.block_ranges.push((start_idx, end_idx)); + // End the successors list. + let succ_end = self.vcode.block_succs.len(); + self.vcode + .block_succ_range + .push((self.succ_start, succ_end)); + self.succ_start = succ_end; + + block_num + } + + /// Push an instruction for the current BB and current IR inst within the BB. + pub fn push(&mut self, insn: I) { + match insn.is_term() { + MachTerminator::None | MachTerminator::Ret => {} + MachTerminator::Uncond(target) => { + self.vcode.block_succs.push(target); + } + MachTerminator::Cond(true_branch, false_branch) => { + self.vcode.block_succs.push(true_branch); + self.vcode.block_succs.push(false_branch); + } + MachTerminator::Indirect(targets) => { + for target in targets { + self.vcode.block_succs.push(*target); + } + } + } + self.ir_inst_insns.push(insn); + } + + /// Build the final VCode. + pub fn build(self) -> VCode { + assert!(self.ir_inst_insns.is_empty()); + assert!(self.bb_insns.is_empty()); + self.vcode + } +} + +fn block_ranges(indices: &[InstIx], len: usize) -> Vec<(usize, usize)> { + let v = indices + .iter() + .map(|iix| iix.get() as usize) + .chain(iter::once(len)) + .collect::>(); + v.windows(2).map(|p| (p[0], p[1])).collect() +} + +fn is_redundant_move(insn: &I) -> bool { + if let Some((to, from)) = insn.is_move() { + to.to_reg() == from + } else { + false + } +} + +fn is_trivial_jump_block(vcode: &VCode, block: BlockIndex) -> Option { + let range = vcode.block_insns(BlockIx::new(block)); + + debug!( + "is_trivial_jump_block: block {} has len {}", + block, + range.len() + ); + + if range.len() != 1 { + return None; + } + let insn = range.first(); + + debug!( + " -> only insn is: {:?} with terminator {:?}", + vcode.get_insn(insn), + vcode.get_insn(insn).is_term() + ); + + match vcode.get_insn(insn).is_term() { + MachTerminator::Uncond(target) => Some(target), + _ => None, + } +} + +impl VCode { + /// New empty VCode. + fn new(abi: Box>) -> VCode { + VCode { + liveins: abi.liveins(), + liveouts: abi.liveouts(), + vreg_types: vec![], + insts: vec![], + entry: 0, + block_ranges: vec![], + block_succ_range: vec![], + block_succs: vec![], + block_by_bb: SecondaryMap::with_default(0), + bb_by_block: vec![], + final_block_order: vec![], + final_block_offsets: vec![], + code_size: 0, + abi, + } + } + + /// Get the IR-level type of a VReg. + pub fn vreg_type(&self, vreg: VirtualReg) -> Type { + self.vreg_types[vreg.get_index()] + } + + /// Get the entry block. + pub fn entry(&self) -> BlockIndex { + self.entry + } + + /// Get the number of blocks. Block indices will be in the range `0 .. + /// (self.num_blocks() - 1)`. + pub fn num_blocks(&self) -> usize { + self.block_ranges.len() + } + + /// Stack frame size for the full function's body. + pub fn frame_size(&self) -> u32 { + self.abi.frame_size() + } + + /// Get the successors for a block. + pub fn succs(&self, block: BlockIndex) -> &[BlockIndex] { + let (start, end) = self.block_succ_range[block as usize]; + &self.block_succs[start..end] + } + + /// Take the results of register allocation, with a sequence of + /// instructions including spliced fill/reload/move instructions, and replace + /// the VCode with them. + pub fn replace_insns_from_regalloc( + &mut self, + result: RegAllocResult, + flags: &settings::Flags, + ) { + self.final_block_order = compute_final_block_order(self); + + // Record the spillslot count and clobbered registers for the ABI/stack + // setup code. + self.abi.set_num_spillslots(result.num_spill_slots as usize); + self.abi + .set_clobbered(result.clobbered_registers.map(|r| Writable::from_reg(*r))); + + // We want to move instructions over in final block order, using the new + // block-start map given by the regalloc. + let block_ranges: Vec<(usize, usize)> = + block_ranges(result.target_map.elems(), result.insns.len()); + let mut final_insns = vec![]; + let mut final_block_ranges = vec![(0, 0); self.num_blocks()]; + + for block in &self.final_block_order { + let (start, end) = block_ranges[*block as usize]; + let final_start = final_insns.len() as InsnIndex; + + if *block == self.entry { + // Start with the prologue. + final_insns.extend(self.abi.gen_prologue(flags).into_iter()); + } + + for i in start..end { + let insn = &result.insns[i]; + + // Elide redundant moves at this point (we only know what is + // redundant once registers are allocated). + if is_redundant_move(insn) { + continue; + } + + // Whenever encountering a return instruction, replace it + // with the epilogue. + let is_ret = insn.is_term() == MachTerminator::Ret; + if is_ret { + final_insns.extend(self.abi.gen_epilogue(flags).into_iter()); + } else { + final_insns.push(insn.clone()); + } + } + + let final_end = final_insns.len() as InsnIndex; + final_block_ranges[*block as usize] = (final_start, final_end); + } + + self.insts = final_insns; + self.block_ranges = final_block_ranges; + } + + /// Removes redundant branches, rewriting targets to point directly to the + /// ultimate block at the end of a chain of trivial one-target jumps. + pub fn remove_redundant_branches(&mut self) { + // For each block, compute the actual target block, looking through up to one + // block with single-target jumps (this will remove empty edge blocks inserted + // by phi-lowering). + let block_rewrites: Vec = (0..self.num_blocks() as u32) + .map(|bix| is_trivial_jump_block(self, bix).unwrap_or(bix)) + .collect(); + let mut refcounts: Vec = vec![0; self.num_blocks()]; + + debug!( + "remove_redundant_branches: block_rewrites = {:?}", + block_rewrites + ); + + refcounts[self.entry as usize] = 1; + + for block in 0..self.num_blocks() as u32 { + for insn in self.block_insns(BlockIx::new(block)) { + self.get_insn_mut(insn) + .with_block_rewrites(&block_rewrites[..]); + match self.get_insn(insn).is_term() { + MachTerminator::Uncond(bix) => { + refcounts[bix as usize] += 1; + } + MachTerminator::Cond(bix1, bix2) => { + refcounts[bix1 as usize] += 1; + refcounts[bix2 as usize] += 1; + } + MachTerminator::Indirect(blocks) => { + for block in blocks { + refcounts[*block as usize] += 1; + } + } + _ => {} + } + } + } + + let deleted: Vec = refcounts.iter().map(|r| *r == 0).collect(); + + let block_order = std::mem::replace(&mut self.final_block_order, vec![]); + self.final_block_order = block_order + .into_iter() + .filter(|b| !deleted[*b as usize]) + .collect(); + + // Rewrite successor information based on the block-rewrite map. + for succ in &mut self.block_succs { + let new_succ = block_rewrites[*succ as usize]; + *succ = new_succ; + } + } + + /// Mutate branch instructions to (i) lower two-way condbrs to one-way, + /// depending on fallthrough; and (ii) use concrete offsets. + pub fn finalize_branches(&mut self) + where + I: MachInstEmit, + { + // Compute fallthrough block, indexed by block. + let num_final_blocks = self.final_block_order.len(); + let mut block_fallthrough: Vec> = vec![None; self.num_blocks()]; + for i in 0..(num_final_blocks - 1) { + let from = self.final_block_order[i]; + let to = self.final_block_order[i + 1]; + block_fallthrough[from as usize] = Some(to); + } + + // Pass over VCode instructions and finalize two-way branches into + // one-way branches with fallthrough. + for block in 0..self.num_blocks() { + let next_block = block_fallthrough[block]; + let (start, end) = self.block_ranges[block]; + + for iix in start..end { + let insn = &mut self.insts[iix as usize]; + insn.with_fallthrough_block(next_block); + } + } + + // Compute block offsets. + let mut code_section = MachSectionSize::new(0); + let mut block_offsets = vec![0; self.num_blocks()]; + for block in &self.final_block_order { + code_section.offset = I::align_basic_block(code_section.offset); + block_offsets[*block as usize] = code_section.offset; + let (start, end) = self.block_ranges[*block as usize]; + for iix in start..end { + self.insts[iix as usize].emit(&mut code_section); + } + } + + // We now have the section layout. + self.final_block_offsets = block_offsets; + self.code_size = code_section.size(); + + // Update branches with known block offsets. This looks like the + // traversal above, but (i) does not update block_offsets, rather uses + // it (so forward references are now possible), and (ii) mutates the + // instructions. + let mut code_section = MachSectionSize::new(0); + for block in &self.final_block_order { + code_section.offset = I::align_basic_block(code_section.offset); + let (start, end) = self.block_ranges[*block as usize]; + for iix in start..end { + self.insts[iix as usize] + .with_block_offsets(code_section.offset, &self.final_block_offsets[..]); + self.insts[iix as usize].emit(&mut code_section); + } + } + } + + /// Emit the instructions to a list of sections. + pub fn emit(&self) -> MachSections + where + I: MachInstEmit, + { + let mut sections = MachSections::new(); + let code_idx = sections.add_section(0, self.code_size); + let code_section = sections.get_section(code_idx); + + for block in &self.final_block_order { + let new_offset = I::align_basic_block(code_section.cur_offset_from_start()); + while new_offset > code_section.cur_offset_from_start() { + // Pad with NOPs up to the aligned block offset. + let nop = I::gen_nop((new_offset - code_section.cur_offset_from_start()) as usize); + nop.emit(code_section); + } + assert_eq!(code_section.cur_offset_from_start(), new_offset); + + let (start, end) = self.block_ranges[*block as usize]; + for iix in start..end { + self.insts[iix as usize].emit(code_section); + } + } + + sections + } + + /// Get the IR block for a BlockIndex, if one exists. + pub fn bindex_to_bb(&self, block: BlockIndex) -> Option { + if (block as usize) < self.bb_by_block.len() { + Some(self.bb_by_block[block as usize]) + } else { + None + } + } +} + +impl RegallocFunction for VCode { + type Inst = I; + + fn insns(&self) -> &[I] { + &self.insts[..] + } + + fn insns_mut(&mut self) -> &mut [I] { + &mut self.insts[..] + } + + fn get_insn(&self, insn: InstIx) -> &I { + &self.insts[insn.get() as usize] + } + + fn get_insn_mut(&mut self, insn: InstIx) -> &mut I { + &mut self.insts[insn.get() as usize] + } + + fn blocks(&self) -> Range { + Range::new(BlockIx::new(0), self.block_ranges.len()) + } + + fn entry_block(&self) -> BlockIx { + BlockIx::new(self.entry) + } + + fn block_insns(&self, block: BlockIx) -> Range { + let (start, end) = self.block_ranges[block.get() as usize]; + Range::new(InstIx::new(start), (end - start) as usize) + } + + fn block_succs(&self, block: BlockIx) -> Vec { + let (start, end) = self.block_succ_range[block.get() as usize]; + self.block_succs[start..end] + .iter() + .cloned() + .map(BlockIx::new) + .collect() + } + + fn is_ret(&self, insn: InstIx) -> bool { + match self.insts[insn.get() as usize].is_term() { + MachTerminator::Ret => true, + _ => false, + } + } + + fn get_regs(insn: &I, collector: &mut RegUsageCollector) { + insn.get_regs(collector) + } + + fn map_regs( + insn: &mut I, + pre_map: &RegallocMap, + post_map: &RegallocMap, + ) { + insn.map_regs(pre_map, post_map); + } + + fn is_move(&self, insn: &I) -> Option<(Writable, Reg)> { + insn.is_move() + } + + fn get_spillslot_size(&self, regclass: RegClass, vreg: VirtualReg) -> u32 { + let ty = self.vreg_type(vreg); + self.abi.get_spillslot_size(regclass, ty) + } + + fn gen_spill(&self, to_slot: SpillSlot, from_reg: RealReg, vreg: VirtualReg) -> I { + let ty = self.vreg_type(vreg); + self.abi.gen_spill(to_slot, from_reg, ty) + } + + fn gen_reload(&self, to_reg: Writable, from_slot: SpillSlot, vreg: VirtualReg) -> I { + let ty = self.vreg_type(vreg); + self.abi.gen_reload(to_reg, from_slot, ty) + } + + fn gen_move(&self, to_reg: Writable, from_reg: RealReg, vreg: VirtualReg) -> I { + let ty = self.vreg_type(vreg); + I::gen_move(to_reg.map(|r| r.to_reg()), from_reg.to_reg(), ty) + } + + fn gen_zero_len_nop(&self) -> I { + I::gen_zero_len_nop() + } + + fn maybe_direct_reload(&self, insn: &I, reg: VirtualReg, slot: SpillSlot) -> Option { + insn.maybe_direct_reload(reg, slot) + } + + fn func_liveins(&self) -> RegallocSet { + self.liveins.clone() + } + + fn func_liveouts(&self) -> RegallocSet { + self.liveouts.clone() + } +} + +// N.B.: Debug impl assumes that VCode has already been through all compilation +// passes, and so has a final block order and offsets. + +impl fmt::Debug for VCode { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + writeln!(f, "VCode_Debug {{")?; + writeln!(f, " Entry block: {}", self.entry)?; + writeln!(f, " Final block order: {:?}", self.final_block_order)?; + + for block in 0..self.num_blocks() { + writeln!(f, "Block {}:", block,)?; + for succ in self.succs(block as BlockIndex) { + writeln!(f, " (successor: Block {})", succ)?; + } + let (start, end) = self.block_ranges[block]; + writeln!(f, " (instruction range: {} .. {})", start, end)?; + for inst in start..end { + writeln!(f, " Inst {}: {:?}", inst, self.insts[inst as usize])?; + } + } + + writeln!(f, "}}")?; + Ok(()) + } +} + +// Pretty-printing with `RealRegUniverse` context. +impl ShowWithRRU for VCode { + fn show_rru(&self, mb_rru: Option<&RealRegUniverse>) -> String { + use crate::alloc::string::ToString; + use std::fmt::Write; + + // Calculate an order in which to display the blocks. This is the same + // as final_block_order, but also includes blocks which are in the + // representation but not in final_block_order. + let mut display_order = Vec::::new(); + // First display blocks in |final_block_order| + for bix in &self.final_block_order { + assert!((*bix as usize) < self.num_blocks()); + display_order.push(*bix as usize); + } + // Now also take care of those not listed in |final_block_order|. + // This is quadratic, but it's also debug-only code. + for bix in 0..self.num_blocks() { + if display_order.contains(&bix) { + continue; + } + display_order.push(bix); + } + + let mut s = String::new(); + s = s + &format!("VCode_ShowWithRRU {{{{"); + s = s + &"\n".to_string(); + s = s + &format!(" Entry block: {}", self.entry); + s = s + &"\n".to_string(); + s = s + &format!(" Final block order: {:?}", self.final_block_order); + s = s + &"\n".to_string(); + + for i in 0..self.num_blocks() { + let block = display_order[i]; + + let omitted = + (if !self.final_block_order.is_empty() && i >= self.final_block_order.len() { + "** OMITTED **" + } else { + "" + }) + .to_string(); + + s = s + &format!("Block {}: {}", block, omitted); + s = s + &"\n".to_string(); + if let Some(bb) = self.bindex_to_bb(block as BlockIndex) { + s = s + &format!(" (original IR block: {})\n", bb); + } + for succ in self.succs(block as BlockIndex) { + s = s + &format!(" (successor: Block {})", succ); + s = s + &"\n".to_string(); + } + let (start, end) = self.block_ranges[block]; + s = s + &format!(" (instruction range: {} .. {})", start, end); + s = s + &"\n".to_string(); + for inst in start..end { + s = s + &format!( + " Inst {}: {}", + inst, + self.insts[inst as usize].show_rru(mb_rru) + ); + s = s + &"\n".to_string(); + } + } + + s = s + &format!("}}}}"); + s = s + &"\n".to_string(); + + s + } +} diff --git a/cranelift/codegen/src/num_uses.rs b/cranelift/codegen/src/num_uses.rs new file mode 100644 index 0000000000..c08741020c --- /dev/null +++ b/cranelift/codegen/src/num_uses.rs @@ -0,0 +1,68 @@ +//! A pass that computes the number of uses of any given instruction. + +#![allow(dead_code)] +#![allow(unused_imports)] + +use crate::cursor::{Cursor, FuncCursor}; +use crate::dce::has_side_effect; +use crate::entity::SecondaryMap; +use crate::ir::dfg::ValueDef; +use crate::ir::instructions::InstructionData; +use crate::ir::Value; +use crate::ir::{DataFlowGraph, Function, Inst, Opcode}; + +/// Auxiliary data structure that counts the number of uses of any given +/// instruction in a Function. This is used during instruction selection +/// to essentially do incremental DCE: when an instruction is no longer +/// needed because its computation has been isel'd into another machine +/// instruction at every use site, we can skip it. +#[derive(Clone, Debug)] +pub struct NumUses { + uses: SecondaryMap, +} + +impl NumUses { + fn new() -> NumUses { + NumUses { + uses: SecondaryMap::with_default(0), + } + } + + /// Compute the NumUses analysis result for a function. + pub fn compute(func: &Function) -> NumUses { + let mut uses = NumUses::new(); + for bb in func.layout.blocks() { + for inst in func.layout.block_insts(bb) { + for arg in func.dfg.inst_args(inst) { + let v = func.dfg.resolve_aliases(*arg); + uses.add_value(&func.dfg, v); + } + } + } + uses + } + + fn add_value(&mut self, dfg: &DataFlowGraph, v: Value) { + match dfg.value_def(v) { + ValueDef::Result(inst, _) => { + self.uses[inst] += 1; + } + _ => {} + } + } + + /// How many times is an instruction used? + pub fn use_count(&self, i: Inst) -> usize { + self.uses[i] as usize + } + + /// Is an instruction used at all? + pub fn is_used(&self, i: Inst) -> bool { + self.use_count(i) > 0 + } + + /// Take the complete uses map, consuming this analysis result. + pub fn take_uses(self) -> SecondaryMap { + self.uses + } +}