diff --git a/cranelift/codegen/src/abi.rs b/cranelift/codegen/src/abi.rs deleted file mode 100644 index 29fdbf4c86..0000000000 --- a/cranelift/codegen/src/abi.rs +++ /dev/null @@ -1,180 +0,0 @@ -//! Common helper code for ABI lowering. -//! -//! This module provides functions and data structures that are useful for implementing the -//! `TargetIsa::legalize_signature()` method. - -use crate::ir::{AbiParam, ArgumentExtension, ArgumentLoc, Type}; -use core::cmp::Ordering; - -/// Legalization action to perform on a single argument or return value when converting a -/// signature. -/// -/// An argument may go through a sequence of legalization steps before it reaches the final -/// `Assign` action. -#[derive(Clone, Copy, Debug)] -pub enum ArgAction { - /// Assign the argument to the given location. - Assign(ArgumentLoc), - - /// Convert the argument, then call again. - /// - /// This action can split an integer type into two smaller integer arguments, or it can split a - /// SIMD vector into halves. - Convert(ValueConversion), -} - -impl From for ArgAction { - fn from(x: ArgumentLoc) -> Self { - Self::Assign(x) - } -} - -impl From for ArgAction { - fn from(x: ValueConversion) -> Self { - Self::Convert(x) - } -} - -/// Legalization action to be applied to a value that is being passed to or from a legalized ABI. -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum ValueConversion { - /// Split an integer types into low and high parts, using `isplit`. - IntSplit, - - /// Split a vector type into halves with identical lane types, using `vsplit`. - VectorSplit, - - /// Bit-cast to an integer type of the same size. - IntBits, - - /// Sign-extend integer value to the required type. - Sext(Type), - - /// Unsigned zero-extend value to the required type. - Uext(Type), - - /// Pass value by pointer of given integer type. - Pointer(Type), -} - -/// Common trait for assigning arguments to registers or stack locations. -/// -/// This will be implemented by individual ISAs. -pub trait ArgAssigner { - /// Pick an assignment action for function argument (or return value) `arg`. - fn assign(&mut self, arg: &AbiParam) -> ArgAction; -} - -/// Determine the right action to take when passing a `have` value type to a call signature where -/// the next argument is `arg` which has a different value type. -/// -/// The signature legalization process in `legalize_args` above can replace a single argument value -/// with multiple arguments of smaller types. It can also change the type of an integer argument to -/// a larger integer type, requiring the smaller value to be sign- or zero-extended. -/// -/// The legalizer needs to repair the values at all ABI boundaries: -/// -/// - Incoming function arguments to the entry block. -/// - Function arguments passed to a call. -/// - Return values from a call. -/// - Return values passed to a return instruction. -/// -/// The `legalize_abi_value` function helps the legalizer with the process. When the legalizer -/// needs to pass a pre-legalized `have` argument, but the ABI argument `arg` has a different value -/// type, `legalize_abi_value(have, arg)` tells the legalizer how to create the needed value type -/// for the argument. -/// -/// It may be necessary to call `legalize_abi_value` more than once for a given argument before the -/// desired argument type appears. This will happen when a vector or integer type needs to be split -/// more than once, for example. -pub fn legalize_abi_value(have: Type, arg: &AbiParam) -> ValueConversion { - let have_bits = have.bits(); - let arg_bits = arg.value_type.bits(); - - if arg.legalized_to_pointer { - return ValueConversion::Pointer(arg.value_type); - } - - match have_bits.cmp(&arg_bits) { - // We have fewer bits than the ABI argument. - Ordering::Less => { - debug_assert!( - have.is_int() && arg.value_type.is_int(), - "Can only extend integer values" - ); - match arg.extension { - ArgumentExtension::Uext => ValueConversion::Uext(arg.value_type), - ArgumentExtension::Sext => ValueConversion::Sext(arg.value_type), - _ => panic!("No argument extension specified"), - } - } - // We have the same number of bits as the argument. - Ordering::Equal => { - // This must be an integer vector that is split and then extended. - debug_assert!(arg.value_type.is_int()); - debug_assert!(have.is_vector(), "expected vector type, got {}", have); - ValueConversion::VectorSplit - } - // We have more bits than the argument. - Ordering::Greater => { - if have.is_vector() { - ValueConversion::VectorSplit - } else if have.is_float() { - // Convert a float to int so it can be split the next time. - // ARM would do this to pass an `f64` in two registers. - ValueConversion::IntBits - } else { - ValueConversion::IntSplit - } - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::ir::types; - use crate::ir::AbiParam; - - #[test] - fn legalize() { - let mut arg = AbiParam::new(types::I32); - - assert_eq!( - legalize_abi_value(types::I64X2, &arg), - ValueConversion::VectorSplit - ); - assert_eq!( - legalize_abi_value(types::I64, &arg), - ValueConversion::IntSplit - ); - - // Vector of integers is broken down, then sign-extended. - arg.extension = ArgumentExtension::Sext; - assert_eq!( - legalize_abi_value(types::I16X4, &arg), - ValueConversion::VectorSplit - ); - assert_eq!( - legalize_abi_value(types::I16.by(2).unwrap(), &arg), - ValueConversion::VectorSplit - ); - assert_eq!( - legalize_abi_value(types::I16, &arg), - ValueConversion::Sext(types::I32) - ); - - // 64-bit float is split as an integer. - assert_eq!( - legalize_abi_value(types::F64, &arg), - ValueConversion::IntBits - ); - - // Value is passed by reference - arg.legalized_to_pointer = true; - assert_eq!( - legalize_abi_value(types::F64, &arg), - ValueConversion::Pointer(types::I32) - ); - } -} diff --git a/cranelift/codegen/src/binemit/mod.rs b/cranelift/codegen/src/binemit/mod.rs index 8861d92ce2..28b75865bd 100644 --- a/cranelift/codegen/src/binemit/mod.rs +++ b/cranelift/codegen/src/binemit/mod.rs @@ -4,23 +4,18 @@ //! binary machine code. mod memorysink; -mod relaxation; -mod shrink; mod stack_map; pub use self::memorysink::{ MemoryCodeSink, NullRelocSink, NullStackMapSink, NullTrapSink, RelocSink, StackMapSink, TrapSink, }; -pub use self::relaxation::relax_branches; -pub use self::shrink::shrink_instructions; pub use self::stack_map::StackMap; use crate::ir::entities::Value; use crate::ir::{ ConstantOffset, ExternalName, Function, Inst, JumpTable, Opcode, SourceLoc, TrapCode, }; use crate::isa::TargetIsa; -pub use crate::regalloc::RegDiversions; use core::fmt; #[cfg(feature = "enable-serde")] use serde::{Deserialize, Serialize}; @@ -183,16 +178,6 @@ pub trait CodeSink { } } -/// Report a bad encoding error. -#[cold] -pub fn bad_encoding(func: &Function, inst: Inst) -> ! { - panic!( - "Bad encoding {} for {}", - func.encodings[inst], - func.dfg.display_inst(inst, None) - ); -} - /// Emit a function to `sink`, given an instruction emitter function. /// /// This function is called from the `TargetIsa::emit_function()` implementations with the @@ -200,14 +185,12 @@ pub fn bad_encoding(func: &Function, inst: Inst) -> ! { pub fn emit_function(func: &Function, emit_inst: EI, sink: &mut CS, isa: &dyn TargetIsa) where CS: CodeSink, - EI: Fn(&Function, Inst, &mut RegDiversions, &mut CS, &dyn TargetIsa), + EI: Fn(&Function, Inst, &mut CS, &dyn TargetIsa), { - let mut divert = RegDiversions::new(); for block in func.layout.blocks() { - divert.at_block(&func.entry_diversions, block); debug_assert_eq!(func.offsets[block], sink.offset()); for inst in func.layout.block_insts(block) { - emit_inst(func, inst, &mut divert, sink, isa); + emit_inst(func, inst, sink, isa); } } diff --git a/cranelift/codegen/src/binemit/relaxation.rs b/cranelift/codegen/src/binemit/relaxation.rs deleted file mode 100644 index 0657f878dd..0000000000 --- a/cranelift/codegen/src/binemit/relaxation.rs +++ /dev/null @@ -1,396 +0,0 @@ -//! Branch relaxation and offset computation. -//! -//! # block header offsets -//! -//! Before we can generate binary machine code for branch instructions, we need to know the final -//! offsets of all the block headers in the function. This information is encoded in the -//! `func.offsets` table. -//! -//! # Branch relaxation -//! -//! Branch relaxation is the process of ensuring that all branches in the function have enough -//! range to encode their destination. It is common to have multiple branch encodings in an ISA. -//! For example, x86 branches can have either an 8-bit or a 32-bit displacement. -//! -//! On RISC architectures, it can happen that conditional branches have a shorter range than -//! unconditional branches: -//! -//! ```clif -//! brz v1, block17 -//! ``` -//! -//! can be transformed into: -//! -//! ```clif -//! brnz v1, block23 -//! jump block17 -//! block23: -//! ``` - -use crate::binemit::{CodeInfo, CodeOffset}; -use crate::cursor::{Cursor, FuncCursor}; -use crate::dominator_tree::DominatorTree; -use crate::flowgraph::ControlFlowGraph; -use crate::ir::{Block, Function, Inst, InstructionData, Opcode, Value, ValueList}; -use crate::isa::{EncInfo, TargetIsa}; -use crate::iterators::IteratorExtras; -use crate::regalloc::RegDiversions; -use crate::timing; -use crate::CodegenResult; -use core::convert::TryFrom; - -/// Relax branches and compute the final layout of block headers in `func`. -/// -/// Fill in the `func.offsets` table so the function is ready for binary emission. -pub fn relax_branches( - func: &mut Function, - _cfg: &mut ControlFlowGraph, - _domtree: &mut DominatorTree, - isa: &dyn TargetIsa, -) -> CodegenResult { - let _tt = timing::relax_branches(); - - let encinfo = isa.encoding_info(); - - // Clear all offsets so we can recognize blocks that haven't been visited yet. - func.offsets.clear(); - func.offsets.resize(func.dfg.num_blocks()); - - // Start by removing redundant jumps. - fold_redundant_jumps(func, _cfg, _domtree); - - // Convert jumps to fallthrough instructions where possible. - fallthroughs(func); - - let mut offset = 0; - let mut divert = RegDiversions::new(); - - // First, compute initial offsets for every block. - { - let mut cur = FuncCursor::new(func); - while let Some(block) = cur.next_block() { - divert.at_block(&cur.func.entry_diversions, block); - cur.func.offsets[block] = offset; - while let Some(inst) = cur.next_inst() { - divert.apply(&cur.func.dfg[inst]); - let enc = cur.func.encodings[inst]; - offset += encinfo.byte_size(enc, inst, &divert, &cur.func); - } - } - } - - // Then, run the relaxation algorithm until it converges. - let mut go_again = true; - while go_again { - go_again = false; - offset = 0; - - // Visit all instructions in layout order. - let mut cur = FuncCursor::new(func); - while let Some(block) = cur.next_block() { - divert.at_block(&cur.func.entry_diversions, block); - - // Record the offset for `block` and make sure we iterate until offsets are stable. - if cur.func.offsets[block] != offset { - cur.func.offsets[block] = offset; - go_again = true; - } - - while let Some(inst) = cur.next_inst() { - divert.apply(&cur.func.dfg[inst]); - - let enc = cur.func.encodings[inst]; - - // See if this is a branch has a range and a destination, and if the target is in - // range. - if let Some(range) = encinfo.branch_range(enc) { - if let Some(dest) = cur.func.dfg[inst].branch_destination() { - let dest_offset = cur.func.offsets[dest]; - if !range.contains(offset, dest_offset) { - offset += - relax_branch(&mut cur, &divert, offset, dest_offset, &encinfo, isa); - continue; - } - } - } - - offset += encinfo.byte_size(enc, inst, &divert, &cur.func); - } - } - } - - let code_size = offset; - let jumptables = offset; - - for (jt, jt_data) in func.jump_tables.iter() { - func.jt_offsets[jt] = offset; - // TODO: this should be computed based on the min size needed to hold the furthest branch. - offset += jt_data.len() as u32 * 4; - } - - let jumptables_size = offset - jumptables; - let rodata = offset; - - for constant in func.dfg.constants.entries_mut() { - constant.set_offset(offset); - offset += - u32::try_from(constant.len()).expect("Constants must have a length that fits in a u32") - } - - let rodata_size = offset - rodata; - - Ok(CodeInfo { - code_size, - jumptables_size, - rodata_size, - total_size: offset, - }) -} - -/// Folds an instruction if it is a redundant jump. -/// Returns whether folding was performed (which invalidates the CFG). -fn try_fold_redundant_jump( - func: &mut Function, - cfg: &mut ControlFlowGraph, - block: Block, - first_inst: Inst, -) -> bool { - let first_dest = match func.dfg[first_inst].branch_destination() { - Some(block) => block, // The instruction was a single-target branch. - None => { - return false; // The instruction was either multi-target or not a branch. - } - }; - - // For the moment, only attempt to fold a branch to a block that is parameterless. - // These blocks are mainly produced by critical edge splitting. - // - // TODO: Allow folding blocks that define SSA values and function as phi nodes. - if func.dfg.num_block_params(first_dest) != 0 { - return false; - } - - // Look at the first instruction of the first branch's destination. - // If it is an unconditional branch, maybe the second jump can be bypassed. - let second_inst = func.layout.first_inst(first_dest).expect("Instructions"); - if func.dfg[second_inst].opcode() != Opcode::Jump { - return false; - } - - // Now we need to fix up first_inst's block parameters to match second_inst's, - // without changing the branch-specific arguments. - // - // The intermediary block is allowed to reference any SSA value that dominates it, - // but that SSA value may not necessarily also dominate the instruction that's - // being patched. - - // Get the arguments and parameters passed by the first branch. - let num_fixed = func.dfg[first_inst] - .opcode() - .constraints() - .num_fixed_value_arguments(); - let (first_args, first_params) = func.dfg[first_inst] - .arguments(&func.dfg.value_lists) - .split_at(num_fixed); - - // Get the parameters passed by the second jump. - let num_fixed = func.dfg[second_inst] - .opcode() - .constraints() - .num_fixed_value_arguments(); - let (_, second_params) = func.dfg[second_inst] - .arguments(&func.dfg.value_lists) - .split_at(num_fixed); - let mut second_params = second_params.to_vec(); // Clone for rewriting below. - - // For each parameter passed by the second jump, if any of those parameters - // was a block parameter, rewrite it to refer to the value that the first jump - // passed in its parameters. Otherwise, make sure it dominates first_inst. - // - // For example: if we `block0: jump block1(v1)` to `block1(v2): jump block2(v2)`, - // we want to rewrite the original jump to `jump block2(v1)`. - let block_params: &[Value] = func.dfg.block_params(first_dest); - debug_assert!(block_params.len() == first_params.len()); - - for value in second_params.iter_mut() { - if let Some((n, _)) = block_params.iter().enumerate().find(|(_, &p)| p == *value) { - // This value was the Nth parameter passed to the second_inst's block. - // Rewrite it as the Nth parameter passed by first_inst. - *value = first_params[n]; - } - } - - // Build a value list of first_args (unchanged) followed by second_params (rewritten). - let arguments_vec: alloc::vec::Vec<_> = first_args - .iter() - .chain(second_params.iter()) - .copied() - .collect(); - let value_list = ValueList::from_slice(&arguments_vec, &mut func.dfg.value_lists); - - func.dfg[first_inst].take_value_list(); // Drop the current list. - func.dfg[first_inst].put_value_list(value_list); // Put the new list. - - // Bypass the second jump. - // This can disconnect the Block containing `second_inst`, to be cleaned up later. - let second_dest = func.dfg[second_inst].branch_destination().expect("Dest"); - func.change_branch_destination(first_inst, second_dest); - cfg.recompute_block(func, block); - - // The previously-intermediary Block may now be unreachable. Update CFG. - if cfg.pred_iter(first_dest).count() == 0 { - // Remove all instructions from that block. - while let Some(inst) = func.layout.first_inst(first_dest) { - func.layout.remove_inst(inst); - } - - // Remove the block... - cfg.recompute_block(func, first_dest); // ...from predecessor lists. - func.layout.remove_block(first_dest); // ...from the layout. - } - - true -} - -/// Redirects `jump` instructions that point to other `jump` instructions to the final destination. -/// This transformation may orphan some blocks. -fn fold_redundant_jumps( - func: &mut Function, - cfg: &mut ControlFlowGraph, - domtree: &mut DominatorTree, -) { - let mut folded = false; - - // Postorder iteration guarantees that a chain of jumps is visited from - // the end of the chain to the start of the chain. - for &block in domtree.cfg_postorder() { - // Only proceed if the first terminator instruction is a single-target branch. - let first_inst = func - .layout - .last_inst(block) - .expect("Block has no terminator"); - folded |= try_fold_redundant_jump(func, cfg, block, first_inst); - - // Also try the previous instruction. - if let Some(prev_inst) = func.layout.prev_inst(first_inst) { - folded |= try_fold_redundant_jump(func, cfg, block, prev_inst); - } - } - - // Folding jumps invalidates the dominator tree. - if folded { - domtree.compute(func, cfg); - } -} - -/// Convert `jump` instructions to `fallthrough` instructions where possible and verify that any -/// existing `fallthrough` instructions are correct. -fn fallthroughs(func: &mut Function) { - for (block, succ) in func.layout.blocks().adjacent_pairs() { - let term = func - .layout - .last_inst(block) - .expect("block has no terminator."); - if let InstructionData::Jump { - ref mut opcode, - destination, - .. - } = func.dfg[term] - { - match *opcode { - Opcode::Fallthrough => { - // Somebody used a fall-through instruction before the branch relaxation pass. - // Make sure it is correct, i.e. the destination is the layout successor. - debug_assert_eq!( - destination, succ, - "Illegal fallthrough from {} to {}, but {}'s successor is {}", - block, destination, block, succ - ) - } - Opcode::Jump => { - // If this is a jump to the successor block, change it to a fall-through. - if destination == succ { - *opcode = Opcode::Fallthrough; - func.encodings[term] = Default::default(); - } - } - _ => {} - } - } - } -} - -/// Relax the branch instruction at `cur` so it can cover the range `offset - dest_offset`. -/// -/// Return the size of the replacement instructions up to and including the location where `cur` is -/// left. -fn relax_branch( - cur: &mut FuncCursor, - divert: &RegDiversions, - offset: CodeOffset, - dest_offset: CodeOffset, - encinfo: &EncInfo, - isa: &dyn TargetIsa, -) -> CodeOffset { - let inst = cur.current_inst().unwrap(); - log::trace!( - "Relaxing [{}] {} for {:#x}-{:#x} range", - encinfo.display(cur.func.encodings[inst]), - cur.func.dfg.display_inst(inst, isa), - offset, - dest_offset - ); - - // Pick the smallest encoding that can handle the branch range. - let dfg = &cur.func.dfg; - let ctrl_type = dfg.ctrl_typevar(inst); - if let Some(enc) = isa - .legal_encodings(cur.func, &dfg[inst], ctrl_type) - .filter(|&enc| { - let range = encinfo.branch_range(enc).expect("Branch with no range"); - if !range.contains(offset, dest_offset) { - log::trace!(" trying [{}]: out of range", encinfo.display(enc)); - false - } else if encinfo.operand_constraints(enc) - != encinfo.operand_constraints(cur.func.encodings[inst]) - { - // Conservatively give up if the encoding has different constraints - // than the original, so that we don't risk picking a new encoding - // which the existing operands don't satisfy. We can't check for - // validity directly because we don't have a RegDiversions active so - // we don't know which registers are actually in use. - log::trace!(" trying [{}]: constraints differ", encinfo.display(enc)); - false - } else { - log::trace!(" trying [{}]: OK", encinfo.display(enc)); - true - } - }) - .min_by_key(|&enc| encinfo.byte_size(enc, inst, &divert, &cur.func)) - { - debug_assert!(enc != cur.func.encodings[inst]); - cur.func.encodings[inst] = enc; - return encinfo.byte_size(enc, inst, &divert, &cur.func); - } - - // Note: On some RISC ISAs, conditional branches have shorter range than unconditional - // branches, so one way of extending the range of a conditional branch is to invert its - // condition and make it branch over an unconditional jump which has the larger range. - // - // Splitting the block is problematic this late because there may be register diversions in - // effect across the conditional branch, and they can't survive the control flow edge to a new - // block. We have two options for handling that: - // - // 1. Set a flag on the new block that indicates it wants the preserve the register diversions of - // its layout predecessor, or - // 2. Use an encoding macro for the branch-over-jump pattern so we don't need to split the block. - // - // It seems that 1. would allow us to share code among RISC ISAs that need this. - // - // We can't allow register diversions to survive from the layout predecessor because the layout - // predecessor could contain kill points for some values that are live in this block, and - // diversions are not automatically cancelled when the live range of a value ends. - - // This assumes solution 2. above: - panic!("No branch in range for {:#x}-{:#x}", offset, dest_offset); -} diff --git a/cranelift/codegen/src/binemit/shrink.rs b/cranelift/codegen/src/binemit/shrink.rs deleted file mode 100644 index 1e961c9829..0000000000 --- a/cranelift/codegen/src/binemit/shrink.rs +++ /dev/null @@ -1,72 +0,0 @@ -//! Instruction shrinking. -//! -//! Sometimes there are multiple valid encodings for a given instruction. Cranelift often initially -//! chooses the largest one, because this typically provides the register allocator the most -//! flexibility. However, once register allocation is done, this is no longer important, and we -//! can switch to smaller encodings when possible. - -use crate::ir::instructions::InstructionData; -use crate::ir::Function; -use crate::isa::TargetIsa; -use crate::regalloc::RegDiversions; -use crate::timing; - -/// Pick the smallest valid encodings for instructions. -pub fn shrink_instructions(func: &mut Function, isa: &dyn TargetIsa) { - let _tt = timing::shrink_instructions(); - - let encinfo = isa.encoding_info(); - let mut divert = RegDiversions::new(); - - for block in func.layout.blocks() { - // Load diversions from predecessors. - divert.at_block(&func.entry_diversions, block); - - for inst in func.layout.block_insts(block) { - let enc = func.encodings[inst]; - if enc.is_legal() { - // regmove/regfill/regspill are special instructions with register immediates - // that represented as normal operands, so the normal predicates below don't - // handle them correctly. - // - // Also, they need to be presented to the `RegDiversions` to update the - // location tracking. - // - // TODO: Eventually, we want the register allocator to avoid leaving these special - // instructions behind, but for now, just temporarily avoid trying to shrink them. - let inst_data = &func.dfg[inst]; - match inst_data { - InstructionData::RegMove { .. } - | InstructionData::RegFill { .. } - | InstructionData::RegSpill { .. } => { - divert.apply(inst_data); - continue; - } - _ => (), - } - - let ctrl_type = func.dfg.ctrl_typevar(inst); - - // Pick the last encoding with constraints that are satisfied. - let best_enc = isa - .legal_encodings(func, &func.dfg[inst], ctrl_type) - .filter(|e| encinfo.constraints[e.recipe()].satisfied(inst, &divert, &func)) - .min_by_key(|e| encinfo.byte_size(*e, inst, &divert, &func)) - .unwrap(); - - if best_enc != enc { - func.encodings[inst] = best_enc; - - log::trace!( - "Shrunk [{}] to [{}] in {}, reducing the size from {} to {}", - encinfo.display(enc), - encinfo.display(best_enc), - func.dfg.display_inst(inst, isa), - encinfo.byte_size(enc, inst, &divert, &func), - encinfo.byte_size(best_enc, inst, &divert, &func) - ); - } - } - } - } -} diff --git a/cranelift/codegen/src/context.rs b/cranelift/codegen/src/context.rs index 8214fc7781..14d4eb88e4 100644 --- a/cranelift/codegen/src/context.rs +++ b/cranelift/codegen/src/context.rs @@ -9,24 +9,17 @@ //! contexts concurrently. Typically, you would have one context per compilation thread and only a //! single ISA instance. -use crate::binemit::{ - relax_branches, shrink_instructions, CodeInfo, MemoryCodeSink, RelocSink, StackMapSink, - TrapSink, -}; +use crate::binemit::{CodeInfo, MemoryCodeSink, RelocSink, StackMapSink, TrapSink}; use crate::dce::do_dce; use crate::dominator_tree::DominatorTree; use crate::flowgraph::ControlFlowGraph; use crate::ir::Function; use crate::isa::TargetIsa; -use crate::legalize_function; use crate::legalizer::simple_legalize; use crate::licm::do_licm; use crate::loop_analysis::LoopAnalysis; use crate::machinst::{MachCompileResult, MachStackMap}; use crate::nan_canonicalization::do_nan_canonicalization; -use crate::postopt::do_postopt; -use crate::redundant_reload_remover::RedundantReloadRemover; -use crate::regalloc; use crate::remove_constant_phis::do_remove_constant_phis; use crate::result::CodegenResult; use crate::settings::{FlagsOrIsa, OptLevel}; @@ -34,8 +27,7 @@ use crate::simple_gvn::do_simple_gvn; use crate::simple_preopt::do_preopt; use crate::timing; use crate::unreachable_code::eliminate_unreachable_code; -use crate::value_label::{build_value_labels_ranges, ComparableSourceLoc, ValueLabelsRanges}; -use crate::verifier::{verify_context, verify_locations, VerifierErrors, VerifierResult}; +use crate::verifier::{verify_context, VerifierErrors, VerifierResult}; #[cfg(feature = "souper-harvest")] use alloc::string::String; use alloc::vec::Vec; @@ -54,15 +46,9 @@ pub struct Context { /// Dominator tree for `func`. pub domtree: DominatorTree, - /// Register allocation context. - pub regalloc: regalloc::Context, - /// Loop analysis of `func`. pub loop_analysis: LoopAnalysis, - /// Redundant-reload remover context. - pub redundant_reload_remover: RedundantReloadRemover, - /// Result of MachBackend compilation, if computed. pub mach_compile_result: Option, @@ -88,9 +74,7 @@ impl Context { func, cfg: ControlFlowGraph::new(), domtree: DominatorTree::new(), - regalloc: regalloc::Context::new(), loop_analysis: LoopAnalysis::new(), - redundant_reload_remover: RedundantReloadRemover::new(), mach_compile_result: None, want_disasm: false, } @@ -101,9 +85,7 @@ impl Context { self.func.clear(); self.cfg.clear(); self.domtree.clear(); - self.regalloc.clear(); self.loop_analysis.clear(); - self.redundant_reload_remover.clear(); self.mach_compile_result = None; self.want_disasm = false; } @@ -137,13 +119,7 @@ impl Context { let old_len = mem.len(); mem.resize(old_len + info.total_size as usize, 0); let new_info = unsafe { - self.emit_to_memory( - isa, - mem.as_mut_ptr().add(old_len), - relocs, - traps, - stack_maps, - ) + self.emit_to_memory(mem.as_mut_ptr().add(old_len), relocs, traps, stack_maps) }; debug_assert!(new_info == info); Ok(info) @@ -177,7 +153,6 @@ impl Context { self.legalize(isa)?; if opt_level != OptLevel::None { - self.postopt(isa)?; self.compute_domtree(); self.compute_loop_analysis(); self.licm(isa)?; @@ -192,25 +167,12 @@ impl Context { self.remove_constant_phis(isa)?; - if let Some(backend) = isa.get_mach_backend() { - let result = backend.compile_function(&self.func, self.want_disasm)?; - let info = result.code_info(); - self.mach_compile_result = Some(result); - Ok(info) - } else { - self.regalloc(isa)?; - self.prologue_epilogue(isa)?; - if opt_level == OptLevel::Speed || opt_level == OptLevel::SpeedAndSize { - self.redundant_reload_remover(isa)?; - } - if opt_level == OptLevel::SpeedAndSize { - self.shrink_instructions(isa)?; - } - let result = self.relax_branches(isa); - - log::trace!("Compiled:\n{}", self.func.display(isa)); - result - } + // FIXME: make this non optional + let backend = isa.get_mach_backend().expect("only mach backends nowadays"); + let result = backend.compile_function(&self.func, self.want_disasm)?; + let info = result.code_info(); + self.mach_compile_result = Some(result); + Ok(info) } /// Emit machine code directly into raw memory. @@ -228,7 +190,6 @@ impl Context { /// Returns information about the emitted code and data. pub unsafe fn emit_to_memory( &self, - isa: &dyn TargetIsa, mem: *mut u8, relocs: &mut dyn RelocSink, traps: &mut dyn TrapSink, @@ -236,25 +197,24 @@ impl Context { ) -> CodeInfo { let _tt = timing::binemit(); let mut sink = MemoryCodeSink::new(mem, relocs, traps, stack_maps); - if let Some(ref result) = &self.mach_compile_result { - result.buffer.emit(&mut sink); - let info = sink.info; - // New backends do not emit StackMaps through the `CodeSink` because its interface - // requires `Value`s; instead, the `StackMap` objects are directly accessible via - // `result.buffer.stack_maps()`. - for &MachStackMap { - offset_end, - ref stack_map, - .. - } in result.buffer.stack_maps() - { - stack_maps.add_stack_map(offset_end, stack_map.clone()); - } - info - } else { - isa.emit_function_to_memory(&self.func, &mut sink); - sink.info + let result = self + .mach_compile_result + .as_ref() + .expect("only using mach backend now"); + result.buffer.emit(&mut sink); + let info = sink.info; + // New backends do not emit StackMaps through the `CodeSink` because its interface + // requires `Value`s; instead, the `StackMap` objects are directly accessible via + // `result.buffer.stack_maps()`. + for &MachStackMap { + offset_end, + ref stack_map, + .. + } in result.buffer.stack_maps() + { + stack_maps.add_stack_map(offset_end, stack_map.clone()); } + info } /// If available, return information about the code layout in the @@ -314,26 +274,6 @@ impl Context { Ok(()) } - /// Run the locations verifier on the function. - pub fn verify_locations(&self, isa: &dyn TargetIsa) -> VerifierResult<()> { - let mut errors = VerifierErrors::default(); - let _ = verify_locations(isa, &self.func, &self.cfg, None, &mut errors); - - if errors.is_empty() { - Ok(()) - } else { - Err(errors) - } - } - - /// Run the locations verifier only if the `enable_verifier` setting is true. - pub fn verify_locations_if(&self, isa: &dyn TargetIsa) -> CodegenResult<()> { - if isa.flags().enable_verifier() { - self.verify_locations(isa)?; - } - Ok(()) - } - /// Perform dead-code elimination on the function. pub fn dce<'a, FOI: Into>>(&mut self, fisa: FOI) -> CodegenResult<()> { do_dce(&mut self.func, &mut self.domtree); @@ -370,22 +310,10 @@ impl Context { // TODO: Avoid doing this when legalization doesn't actually mutate the CFG. self.domtree.clear(); self.loop_analysis.clear(); - if isa.get_mach_backend().is_some() { - // Run some specific legalizations only. - simple_legalize(&mut self.func, &mut self.cfg, isa); - self.verify_if(isa) - } else { - legalize_function(&mut self.func, &mut self.cfg, isa); - log::trace!("Legalized:\n{}", self.func.display(isa)); - self.verify_if(isa) - } - } - /// Perform post-legalization rewrites on the function. - pub fn postopt(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> { - do_postopt(&mut self.func, isa); - self.verify_if(isa)?; - Ok(()) + // Run some specific legalizations only. + simple_legalize(&mut self.func, &mut self.cfg, isa); + self.verify_if(isa) } /// Compute the control flow graph. @@ -437,58 +365,6 @@ impl Context { self.verify_if(fisa) } - /// Run the register allocator. - pub fn regalloc(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> { - self.regalloc - .run(isa, &mut self.func, &mut self.cfg, &mut self.domtree) - } - - /// Insert prologue and epilogues after computing the stack frame layout. - pub fn prologue_epilogue(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> { - isa.prologue_epilogue(&mut self.func)?; - self.verify_if(isa)?; - self.verify_locations_if(isa)?; - Ok(()) - } - - /// Do redundant-reload removal after allocation of both registers and stack slots. - pub fn redundant_reload_remover(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> { - self.redundant_reload_remover - .run(isa, &mut self.func, &self.cfg); - self.verify_if(isa)?; - Ok(()) - } - - /// Run the instruction shrinking pass. - pub fn shrink_instructions(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> { - shrink_instructions(&mut self.func, isa); - self.verify_if(isa)?; - self.verify_locations_if(isa)?; - Ok(()) - } - - /// Run the branch relaxation pass and return information about the function's code and - /// read-only data. - pub fn relax_branches(&mut self, isa: &dyn TargetIsa) -> CodegenResult { - let info = relax_branches(&mut self.func, &mut self.cfg, &mut self.domtree, isa)?; - self.verify_if(isa)?; - self.verify_locations_if(isa)?; - Ok(info) - } - - /// Builds ranges and location for specified value labels. - pub fn build_value_labels_ranges( - &self, - isa: &dyn TargetIsa, - ) -> CodegenResult { - Ok(build_value_labels_ranges::( - &self.func, - &self.regalloc, - self.mach_compile_result.as_ref(), - isa, - )) - } - /// Harvest candidate left-hand sides for superoptimization with Souper. #[cfg(feature = "souper-harvest")] pub fn souper_harvest( diff --git a/cranelift/codegen/src/cursor.rs b/cranelift/codegen/src/cursor.rs index c95aa5a3fb..f47ddf3d4e 100644 --- a/cranelift/codegen/src/cursor.rs +++ b/cranelift/codegen/src/cursor.rs @@ -634,7 +634,7 @@ impl<'c, 'f> ir::InstInserterBase<'c> for &'c mut FuncCursor<'f> { &mut self.func.dfg } - fn insert_built_inst(self, inst: ir::Inst, _: ir::Type) -> &'c mut ir::DataFlowGraph { + fn insert_built_inst(self, inst: ir::Inst) -> &'c mut ir::DataFlowGraph { // TODO: Remove this assertion once #796 is fixed. #[cfg(debug_assertions)] { @@ -759,11 +759,7 @@ impl<'c, 'f> ir::InstInserterBase<'c> for &'c mut EncCursor<'f> { &mut self.func.dfg } - fn insert_built_inst( - self, - inst: ir::Inst, - ctrl_typevar: ir::Type, - ) -> &'c mut ir::DataFlowGraph { + fn insert_built_inst(self, inst: ir::Inst) -> &'c mut ir::DataFlowGraph { // TODO: Remove this assertion once #796 is fixed. #[cfg(debug_assertions)] { @@ -787,6 +783,7 @@ impl<'c, 'f> ir::InstInserterBase<'c> for &'c mut EncCursor<'f> { }; }; } + // Insert the instruction and remember the reference. self.insert_inst(inst); self.built_inst = Some(inst); @@ -795,21 +792,6 @@ impl<'c, 'f> ir::InstInserterBase<'c> for &'c mut EncCursor<'f> { self.func.srclocs[inst] = self.srcloc; } - // Skip the encoding update if we're using a new (MachInst) backend; encodings come later, - // during lowering. - if self.isa.get_mach_backend().is_none() { - // Assign an encoding. - // XXX Is there a way to describe this error to the user? - #[cfg_attr(feature = "cargo-clippy", allow(clippy::match_wild_err_arm))] - match self - .isa - .encode(&self.func, &self.func.dfg[inst], ctrl_typevar) - { - Ok(e) => self.func.encodings[inst] = e, - Err(_) => panic!("can't encode {}", self.display_inst(inst)), - } - } - &mut self.func.dfg } } diff --git a/cranelift/codegen/src/inst_predicates.rs b/cranelift/codegen/src/inst_predicates.rs index 2160c83e4f..518487af2b 100644 --- a/cranelift/codegen/src/inst_predicates.rs +++ b/cranelift/codegen/src/inst_predicates.rs @@ -78,10 +78,3 @@ pub fn is_constant_64bit(func: &Function, inst: Inst) -> Option { _ => None, } } - -/// Is the given instruction a safepoint (i.e., potentially causes a GC, depending on the -/// embedding, and so requires reftyped values to be enumerated with a stack map)? -pub fn is_safepoint(func: &Function, inst: Inst) -> bool { - let op = func.dfg[inst].opcode(); - op.is_resumable_trap() || op.is_call() -} diff --git a/cranelift/codegen/src/ir/builder.rs b/cranelift/codegen/src/ir/builder.rs index 63054928f2..55611978e0 100644 --- a/cranelift/codegen/src/ir/builder.rs +++ b/cranelift/codegen/src/ir/builder.rs @@ -56,7 +56,7 @@ pub trait InstInserterBase<'f>: Sized { fn data_flow_graph_mut(&mut self) -> &mut DataFlowGraph; /// Insert a new instruction which belongs to the DFG. - fn insert_built_inst(self, inst: Inst, ctrl_typevar: Type) -> &'f mut DataFlowGraph; + fn insert_built_inst(self, inst: Inst) -> &'f mut DataFlowGraph; } use core::marker::PhantomData; @@ -129,7 +129,7 @@ impl<'f, IIB: InstInserterBase<'f>> InstBuilderBase<'f> for InsertBuilder<'f, II inst = dfg.make_inst(data); dfg.make_inst_results(inst, ctrl_typevar); } - (inst, self.inserter.insert_built_inst(inst, ctrl_typevar)) + (inst, self.inserter.insert_built_inst(inst)) } } @@ -166,7 +166,7 @@ where let ru = self.reuse.as_ref().iter().cloned(); dfg.make_inst_results_reusing(inst, ctrl_typevar, ru); } - (inst, self.inserter.insert_built_inst(inst, ctrl_typevar)) + (inst, self.inserter.insert_built_inst(inst)) } } diff --git a/cranelift/codegen/src/ir/function.rs b/cranelift/codegen/src/ir/function.rs index 7b1c957518..6f609b2a9a 100644 --- a/cranelift/codegen/src/ir/function.rs +++ b/cranelift/codegen/src/ir/function.rs @@ -3,7 +3,6 @@ //! The `Function` struct defined in this module owns all of its basic blocks and //! instructions. -use crate::binemit::CodeOffset; use crate::entity::{PrimaryMap, SecondaryMap}; use crate::ir; use crate::ir::{ @@ -11,11 +10,10 @@ use crate::ir::{ HeapData, Inst, InstructionData, JumpTable, JumpTableData, Opcode, SigRef, StackSlot, StackSlotData, Table, TableData, }; -use crate::ir::{BlockOffsets, InstEncodings, SourceLocs, StackSlots, ValueLocations}; +use crate::ir::{BlockOffsets, SourceLocs, StackSlots, ValueLocations}; use crate::ir::{DataFlowGraph, ExternalName, Layout, Signature}; use crate::ir::{JumpTableOffsets, JumpTables}; -use crate::isa::{CallConv, EncInfo, Encoding, Legalize, TargetIsa}; -use crate::regalloc::{EntryRegDiversions, RegDiversions}; +use crate::isa::{CallConv, TargetIsa}; use crate::value_label::ValueLabelsRanges; use crate::write::write_function; #[cfg(feature = "enable-serde")] @@ -106,19 +104,9 @@ pub struct Function { /// Layout of blocks and instructions in the function body. pub layout: Layout, - /// Encoding recipe and bits for the legal instructions. - /// Illegal instructions have the `Encoding::default()` value. - pub encodings: InstEncodings, - /// Location assigned to every value. pub locations: ValueLocations, - /// Non-default locations assigned to value at the entry of basic blocks. - /// - /// At the entry of each basic block, we might have values which are not in their default - /// ValueLocation. This field records these register-to-register moves as Diversions. - pub entry_diversions: EntryRegDiversions, - /// Code offsets of the block headers. /// /// This information is only transiently available after the `binemit::relax_branches` function @@ -168,9 +156,7 @@ impl Function { jump_tables: PrimaryMap::new(), dfg: DataFlowGraph::new(), layout: Layout::new(), - encodings: SecondaryMap::new(), locations: SecondaryMap::new(), - entry_diversions: EntryRegDiversions::new(), offsets: SecondaryMap::new(), jt_offsets: SecondaryMap::new(), srclocs: SecondaryMap::new(), @@ -190,9 +176,7 @@ impl Function { self.jump_tables.clear(); self.dfg.clear(); self.layout.clear(); - self.encodings.clear(); self.locations.clear(); - self.entry_diversions.clear(); self.offsets.clear(); self.jt_offsets.clear(); self.srclocs.clear(); @@ -268,51 +252,6 @@ impl Function { .map(|i| self.dfg.block_params(entry)[i]) } - /// Get an iterator over the instructions in `block`, including offsets and encoded instruction - /// sizes. - /// - /// The iterator returns `(offset, inst, size)` tuples, where `offset` if the offset in bytes - /// from the beginning of the function to the instruction, and `size` is the size of the - /// instruction in bytes, or 0 for unencoded instructions. - /// - /// This function can only be used after the code layout has been computed by the - /// `binemit::relax_branches()` function. - pub fn inst_offsets<'a>(&'a self, block: Block, encinfo: &EncInfo) -> InstOffsetIter<'a> { - assert!( - !self.offsets.is_empty(), - "Code layout must be computed first" - ); - let mut divert = RegDiversions::new(); - divert.at_block(&self.entry_diversions, block); - InstOffsetIter { - encinfo: encinfo.clone(), - func: self, - divert, - encodings: &self.encodings, - offset: self.offsets[block], - iter: self.layout.block_insts(block), - } - } - - /// Wrapper around `encode` which assigns `inst` the resulting encoding. - pub fn update_encoding(&mut self, inst: ir::Inst, isa: &dyn TargetIsa) -> Result<(), Legalize> { - if isa.get_mach_backend().is_some() { - Ok(()) - } else { - self.encode(inst, isa).map(|e| self.encodings[inst] = e) - } - } - - /// Wrapper around `TargetIsa::encode` for encoding an existing instruction - /// in the `Function`. - pub fn encode(&self, inst: ir::Inst, isa: &dyn TargetIsa) -> Result { - if isa.get_mach_backend().is_some() { - Ok(Encoding::new(0, 0)) - } else { - isa.encode(&self, &self.dfg[inst], self.dfg.ctrl_typevar(inst)) - } - } - /// Starts collection of debug information. pub fn collect_debug_info(&mut self) { self.dfg.collect_debug_info(); @@ -469,29 +408,3 @@ impl fmt::Debug for Function { write_function(fmt, self, &DisplayFunctionAnnotations::default()) } } - -/// Iterator returning instruction offsets and sizes: `(offset, inst, size)`. -pub struct InstOffsetIter<'a> { - encinfo: EncInfo, - divert: RegDiversions, - func: &'a Function, - encodings: &'a InstEncodings, - offset: CodeOffset, - iter: ir::layout::Insts<'a>, -} - -impl<'a> Iterator for InstOffsetIter<'a> { - type Item = (CodeOffset, ir::Inst, CodeOffset); - - fn next(&mut self) -> Option { - self.iter.next().map(|inst| { - self.divert.apply(&self.func.dfg[inst]); - let byte_size = - self.encinfo - .byte_size(self.encodings[inst], inst, &self.divert, self.func); - let offset = self.offset; - self.offset += byte_size; - (offset, inst, byte_size) - }) - } -} diff --git a/cranelift/codegen/src/ir/libcall.rs b/cranelift/codegen/src/ir/libcall.rs index 632b04a4c3..52360a9bf9 100644 --- a/cranelift/codegen/src/ir/libcall.rs +++ b/cranelift/codegen/src/ir/libcall.rs @@ -1,7 +1,7 @@ //! Naming well-known routines in the runtime library. use crate::ir::{ - types, AbiParam, ArgumentPurpose, ExtFuncData, ExternalName, FuncRef, Function, Inst, Opcode, + types, AbiParam, ArgumentPurpose, ExtFuncData, ExternalName, FuncRef, Function, Opcode, Signature, Type, }; use crate::isa::{CallConv, RegUnit, TargetIsa}; @@ -166,21 +166,6 @@ impl LibCall { } } -/// Get a function reference for `libcall` in `func`, following the signature -/// for `inst`. -/// -/// If there is an existing reference, use it, otherwise make a new one. -pub(crate) fn get_libcall_funcref( - libcall: LibCall, - call_conv: CallConv, - func: &mut Function, - inst: Inst, - isa: &dyn TargetIsa, -) -> FuncRef { - find_funcref(libcall, func) - .unwrap_or_else(|| make_funcref_for_inst(libcall, call_conv, func, inst, isa)) -} - /// Get a function reference for the probestack function in `func`. /// /// If there is an existing reference, use it, otherwise make a new one. @@ -227,33 +212,6 @@ fn make_funcref_for_probestack( make_funcref(LibCall::Probestack, func, sig, isa) } -/// Create a funcref for `libcall` with a signature matching `inst`. -fn make_funcref_for_inst( - libcall: LibCall, - call_conv: CallConv, - func: &mut Function, - inst: Inst, - isa: &dyn TargetIsa, -) -> FuncRef { - let mut sig = Signature::new(call_conv); - for &v in func.dfg.inst_args(inst) { - sig.params.push(AbiParam::new(func.dfg.value_type(v))); - } - for &v in func.dfg.inst_results(inst) { - sig.returns.push(AbiParam::new(func.dfg.value_type(v))); - } - - if call_conv.extends_baldrdash() { - // Adds the special VMContext parameter to the signature. - sig.params.push(AbiParam::special( - isa.pointer_type(), - ArgumentPurpose::VMContext, - )); - } - - make_funcref(libcall, func, sig, isa) -} - /// Create a funcref for `libcall`. fn make_funcref( libcall: LibCall, diff --git a/cranelift/codegen/src/ir/mod.rs b/cranelift/codegen/src/ir/mod.rs index c075da6824..4b325d366c 100644 --- a/cranelift/codegen/src/ir/mod.rs +++ b/cranelift/codegen/src/ir/mod.rs @@ -63,7 +63,6 @@ pub use cranelift_codegen_shared::condcodes; use crate::binemit; use crate::entity::{entity_impl, PrimaryMap, SecondaryMap}; -use crate::isa; /// Map of value locations. pub type ValueLocations = SecondaryMap; @@ -71,9 +70,6 @@ pub type ValueLocations = SecondaryMap; /// Map of jump tables. pub type JumpTables = PrimaryMap; -/// Map of instruction encodings. -pub type InstEncodings = SecondaryMap; - /// Code offsets for blocks. pub type BlockOffsets = SecondaryMap; diff --git a/cranelift/codegen/src/isa/constraints.rs b/cranelift/codegen/src/isa/constraints.rs index c87c3bd9d4..21a7426ea0 100644 --- a/cranelift/codegen/src/isa/constraints.rs +++ b/cranelift/codegen/src/isa/constraints.rs @@ -8,9 +8,8 @@ //! are satisfied. use crate::binemit::CodeOffset; -use crate::ir::{Function, Inst, ValueLoc}; +use crate::ir::ValueLoc; use crate::isa::{RegClass, RegUnit}; -use crate::regalloc::RegDiversions; /// Register constraint for a single value operand or instruction result. #[derive(PartialEq, Debug)] @@ -87,69 +86,6 @@ pub enum ConstraintKind { Stack, } -/// Value operand constraints for an encoding recipe. -#[derive(PartialEq, Clone)] -pub struct RecipeConstraints { - /// Constraints for the instruction's fixed value operands. - /// - /// If the instruction takes a variable number of operands, the register constraints for those - /// operands must be computed dynamically. - /// - /// - For branches and jumps, block arguments must match the expectations of the destination block. - /// - For calls and returns, the calling convention ABI specifies constraints. - pub ins: &'static [OperandConstraint], - - /// Constraints for the instruction's fixed results. - /// - /// If the instruction produces a variable number of results, it's probably a call and the - /// constraints must be derived from the calling convention ABI. - pub outs: &'static [OperandConstraint], - - /// Are any of the input constraints `FixedReg` or `FixedTied`? - pub fixed_ins: bool, - - /// Are any of the output constraints `FixedReg` or `FixedTied`? - pub fixed_outs: bool, - - /// Are any of the input/output constraints `Tied` (but not `FixedTied`)? - pub tied_ops: bool, - - /// Does this instruction clobber the CPU flags? - /// - /// When true, SSA values of type `iflags` or `fflags` can not be live across the instruction. - pub clobbers_flags: bool, -} - -impl RecipeConstraints { - /// Check that these constraints are satisfied by the operands on `inst`. - pub fn satisfied(&self, inst: Inst, divert: &RegDiversions, func: &Function) -> bool { - for (&arg, constraint) in func.dfg.inst_args(inst).iter().zip(self.ins) { - let loc = divert.get(arg, &func.locations); - - if let ConstraintKind::Tied(out_index) = constraint.kind { - let out_val = func.dfg.inst_results(inst)[out_index as usize]; - let out_loc = func.locations[out_val]; - if loc != out_loc { - return false; - } - } - - if !constraint.satisfied(loc) { - return false; - } - } - - for (&arg, constraint) in func.dfg.inst_results(inst).iter().zip(self.outs) { - let loc = divert.get(arg, &func.locations); - if !constraint.satisfied(loc) { - return false; - } - } - - true - } -} - /// Constraints on the range of a branch instruction. /// /// A branch instruction usually encodes its destination as a signed n-bit offset from an origin. diff --git a/cranelift/codegen/src/isa/enc_tables.rs b/cranelift/codegen/src/isa/enc_tables.rs deleted file mode 100644 index b18479bee7..0000000000 --- a/cranelift/codegen/src/isa/enc_tables.rs +++ /dev/null @@ -1,221 +0,0 @@ -//! Support types for generated encoding tables. -//! -//! This module contains types and functions for working with the encoding tables generated by -//! `cranelift-codegen/meta/src/gen_encodings.rs`. - -use crate::constant_hash::Table; -use crate::ir::{Function, InstructionData, Opcode, Type}; -use crate::isa::{Encoding, Legalize}; -use crate::settings::PredicateView; - -/// A recipe predicate. -/// -/// This is a predicate function capable of testing ISA and instruction predicates simultaneously. -/// -/// A None predicate is always satisfied. -pub type RecipePredicate = Option bool>; - -/// An instruction predicate. -/// -/// This is a predicate function that needs to be tested in addition to the recipe predicate. It -/// can't depend on ISA settings. -pub type InstPredicate = fn(&Function, &InstructionData) -> bool; - -/// Legalization action to perform when no encoding can be found for an instruction. -/// -/// This is an index into an ISA-specific table of legalization actions. -pub type LegalizeCode = u8; - -/// Level 1 hash table entry. -/// -/// One level 1 hash table is generated per CPU mode. This table is keyed by the controlling type -/// variable, using `INVALID` for non-polymorphic instructions. -/// -/// The hash table values are references to level 2 hash tables, encoded as an offset in `LEVEL2` -/// where the table begins, and the binary logarithm of its length. All the level 2 hash tables -/// have a power-of-two size. -/// -/// Entries are generic over the offset type. It will typically be `u32` or `u16`, depending on the -/// size of the `LEVEL2` table. -/// -/// Empty entries are encoded with a `!0` value for `log2len` which will always be out of range. -/// Entries that have a `legalize` value but no level 2 table have an `offset` field that is out of -/// bounds. -pub struct Level1Entry + Copy> { - pub ty: Type, - pub log2len: u8, - pub legalize: LegalizeCode, - pub offset: OffT, -} - -impl + Copy> Table for [Level1Entry] { - fn len(&self) -> usize { - self.len() - } - - fn key(&self, idx: usize) -> Option { - if self[idx].log2len != !0 { - Some(self[idx].ty) - } else { - None - } - } -} - -/// Level 2 hash table entry. -/// -/// The second level hash tables are keyed by `Opcode`, and contain an offset into the `ENCLISTS` -/// table where the encoding recipes for the instruction are stored. -/// -/// Entries are generic over the offset type which depends on the size of `ENCLISTS`. A `u16` -/// offset allows the entries to be only 32 bits each. There is no benefit to dropping down to `u8` -/// for tiny ISAs. The entries won't shrink below 32 bits since the opcode is expected to be 16 -/// bits. -/// -/// Empty entries are encoded with a `NotAnOpcode` `opcode` field. -pub struct Level2Entry + Copy> { - pub opcode: Option, - pub offset: OffT, -} - -impl + Copy> Table for [Level2Entry] { - fn len(&self) -> usize { - self.len() - } - - fn key(&self, idx: usize) -> Option { - self[idx].opcode - } -} - -/// Encoding list entry. -/// -/// Encoding lists are represented as sequences of u16 words. -pub type EncListEntry = u16; - -/// Number of bits used to represent a predicate. c.f. `meta/src/gen_encodings.rs`. -const PRED_BITS: u8 = 12; -const PRED_MASK: usize = (1 << PRED_BITS) - 1; -/// First code word representing a predicate check. c.f. `meta/src/gen_encodings.rs`. -const PRED_START: usize = 0x1000; - -/// An iterator over legal encodings for the instruction. -pub struct Encodings<'a> { - // Current offset into `enclist`, or out of bounds after we've reached the end. - offset: usize, - // Legalization code to use of no encoding is found. - legalize: LegalizeCode, - inst: &'a InstructionData, - func: &'a Function, - enclist: &'static [EncListEntry], - legalize_actions: &'static [Legalize], - recipe_preds: &'static [RecipePredicate], - inst_preds: &'static [InstPredicate], - isa_preds: PredicateView<'a>, -} - -impl<'a> Encodings<'a> { - /// Creates a new instance of `Encodings`. - /// - /// This iterator provides search for encodings that applies to the given instruction. The - /// encoding lists are laid out such that first call to `next` returns valid entry in the list - /// or `None`. - pub fn new( - offset: usize, - legalize: LegalizeCode, - inst: &'a InstructionData, - func: &'a Function, - enclist: &'static [EncListEntry], - legalize_actions: &'static [Legalize], - recipe_preds: &'static [RecipePredicate], - inst_preds: &'static [InstPredicate], - isa_preds: PredicateView<'a>, - ) -> Self { - Encodings { - offset, - inst, - func, - legalize, - isa_preds, - recipe_preds, - inst_preds, - enclist, - legalize_actions, - } - } - - /// Get the legalization action that caused the enumeration of encodings to stop. - /// This can be the default legalization action for the type or a custom code for the - /// instruction. - /// - /// This method must only be called after the iterator returns `None`. - pub fn legalize(&self) -> Legalize { - debug_assert_eq!(self.offset, !0, "Premature Encodings::legalize()"); - self.legalize_actions[self.legalize as usize] - } - - /// Check if the `rpred` recipe predicate is satisfied. - fn check_recipe(&self, rpred: RecipePredicate) -> bool { - match rpred { - Some(p) => p(self.isa_preds, self.inst), - None => true, - } - } - - /// Check an instruction or isa predicate. - fn check_pred(&self, pred: usize) -> bool { - if let Some(&p) = self.inst_preds.get(pred) { - p(self.func, self.inst) - } else { - let pred = pred - self.inst_preds.len(); - self.isa_preds.test(pred) - } - } -} - -impl<'a> Iterator for Encodings<'a> { - type Item = Encoding; - - fn next(&mut self) -> Option { - while let Some(entryref) = self.enclist.get(self.offset) { - let entry = *entryref as usize; - - // Check for "recipe+bits". - let recipe = entry >> 1; - if let Some(&rpred) = self.recipe_preds.get(recipe) { - let bits = self.offset + 1; - if entry & 1 == 0 { - self.offset += 2; // Next entry. - } else { - self.offset = !0; // Stop. - } - if self.check_recipe(rpred) { - return Some(Encoding::new(recipe as u16, self.enclist[bits])); - } - continue; - } - - // Check for "stop with legalize". - if entry < PRED_START { - self.legalize = (entry - 2 * self.recipe_preds.len()) as LegalizeCode; - self.offset = !0; // Stop. - return None; - } - - // Finally, this must be a predicate entry. - let pred_entry = entry - PRED_START; - let skip = pred_entry >> PRED_BITS; - let pred = pred_entry & PRED_MASK; - - if self.check_pred(pred) { - self.offset += 1; - } else if skip == 0 { - self.offset = !0; // Stop. - return None; - } else { - self.offset += 1 + skip; - } - } - None - } -} diff --git a/cranelift/codegen/src/isa/encoding.rs b/cranelift/codegen/src/isa/encoding.rs deleted file mode 100644 index 84001c5d36..0000000000 --- a/cranelift/codegen/src/isa/encoding.rs +++ /dev/null @@ -1,167 +0,0 @@ -//! The `Encoding` struct. - -use crate::binemit::CodeOffset; -use crate::ir::{Function, Inst}; -use crate::isa::constraints::{BranchRange, RecipeConstraints}; -use crate::regalloc::RegDiversions; -use core::fmt; - -#[cfg(feature = "enable-serde")] -use serde::{Deserialize, Serialize}; - -/// Bits needed to encode an instruction as binary machine code. -/// -/// The encoding consists of two parts, both specific to the target ISA: An encoding *recipe*, and -/// encoding *bits*. The recipe determines the native instruction format and the mapping of -/// operands to encoded bits. The encoding bits provide additional information to the recipe, -/// typically parts of the opcode. -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] -pub struct Encoding { - recipe: u16, - bits: u16, -} - -impl Encoding { - /// Create a new `Encoding` containing `(recipe, bits)`. - pub fn new(recipe: u16, bits: u16) -> Self { - Self { recipe, bits } - } - - /// Get the recipe number in this encoding. - pub fn recipe(self) -> usize { - self.recipe as usize - } - - /// Get the recipe-specific encoding bits. - pub fn bits(self) -> u16 { - self.bits - } - - /// Is this a legal encoding, or the default placeholder? - pub fn is_legal(self) -> bool { - self != Self::default() - } -} - -/// The default encoding is the illegal one. -impl Default for Encoding { - fn default() -> Self { - Self::new(0xffff, 0xffff) - } -} - -/// ISA-independent display of an encoding. -impl fmt::Display for Encoding { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - if self.is_legal() { - write!(f, "{}#{:02x}", self.recipe, self.bits) - } else { - write!(f, "-") - } - } -} - -/// Temporary object that holds enough context to properly display an encoding. -/// This is meant to be created by `EncInfo::display()`. -pub struct DisplayEncoding { - pub encoding: Encoding, - pub recipe_names: &'static [&'static str], -} - -impl fmt::Display for DisplayEncoding { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - if self.encoding.is_legal() { - write!( - f, - "{}#{:02x}", - self.recipe_names[self.encoding.recipe()], - self.encoding.bits - ) - } else { - write!(f, "-") - } - } -} - -type SizeCalculatorFn = fn(&RecipeSizing, Encoding, Inst, &RegDiversions, &Function) -> u8; - -/// Returns the base size of the Recipe, assuming it's fixed. This is the default for most -/// encodings; others can be variable and longer than this base size, depending on the registers -/// they're using and use a different function, specific per platform. -pub fn base_size( - sizing: &RecipeSizing, - _: Encoding, - _: Inst, - _: &RegDiversions, - _: &Function, -) -> u8 { - sizing.base_size -} - -/// Code size information for an encoding recipe. -/// -/// Encoding recipes may have runtime-determined instruction size. -pub struct RecipeSizing { - /// Minimum size in bytes of instructions encoded with this recipe. - pub base_size: u8, - - /// Method computing the instruction's real size, given inputs and outputs. - pub compute_size: SizeCalculatorFn, - - /// Allowed branch range in this recipe, if any. - /// - /// All encoding recipes for branches have exact branch range information. - pub branch_range: Option, -} - -/// Information about all the encodings in this ISA. -#[derive(Clone)] -pub struct EncInfo { - /// Constraints on value operands per recipe. - pub constraints: &'static [RecipeConstraints], - - /// Code size information per recipe. - pub sizing: &'static [RecipeSizing], - - /// Names of encoding recipes. - pub names: &'static [&'static str], -} - -impl EncInfo { - /// Get the value operand constraints for `enc` if it is a legal encoding. - pub fn operand_constraints(&self, enc: Encoding) -> Option<&'static RecipeConstraints> { - self.constraints.get(enc.recipe()) - } - - /// Create an object that can display an ISA-dependent encoding properly. - pub fn display(&self, enc: Encoding) -> DisplayEncoding { - DisplayEncoding { - encoding: enc, - recipe_names: self.names, - } - } - - /// Get the size in bytes of `inst`, if it were encoded with `enc`. - /// - /// Returns 0 for illegal encodings. - pub fn byte_size( - &self, - enc: Encoding, - inst: Inst, - divert: &RegDiversions, - func: &Function, - ) -> CodeOffset { - self.sizing.get(enc.recipe()).map_or(0, |s| { - let compute_size = s.compute_size; - CodeOffset::from(compute_size(&s, enc, inst, divert, func)) - }) - } - - /// Get the branch range that is supported by `enc`, if any. - /// - /// This will never return `None` for a legal branch encoding. - pub fn branch_range(&self, enc: Encoding) -> Option { - self.sizing.get(enc.recipe()).and_then(|s| s.branch_range) - } -} diff --git a/cranelift/codegen/src/isa/mod.rs b/cranelift/codegen/src/isa/mod.rs index 8de000409f..45aa5ad045 100644 --- a/cranelift/codegen/src/isa/mod.rs +++ b/cranelift/codegen/src/isa/mod.rs @@ -44,26 +44,19 @@ //! concurrent function compilations. pub use crate::isa::call_conv::CallConv; -pub use crate::isa::constraints::{ - BranchRange, ConstraintKind, OperandConstraint, RecipeConstraints, -}; -pub use crate::isa::enc_tables::Encodings; -pub use crate::isa::encoding::{base_size, EncInfo, Encoding}; +pub use crate::isa::constraints::{BranchRange, ConstraintKind, OperandConstraint}; pub use crate::isa::registers::{regs_overlap, RegClass, RegClassIndex, RegInfo, RegUnit}; pub use crate::isa::stack::{StackBase, StackBaseMask, StackRef}; -use crate::binemit; use crate::flowgraph; use crate::ir; #[cfg(feature = "unwind")] use crate::isa::unwind::systemv::RegisterMappingError; use crate::machinst::{MachBackend, UnwindInfoKind}; -use crate::regalloc; use crate::result::CodegenResult; use crate::settings; use crate::settings::SetResult; -use crate::timing; -use alloc::{borrow::Cow, boxed::Box, vec::Vec}; +use alloc::{boxed::Box, vec::Vec}; use core::any::Any; use core::fmt; use core::fmt::{Debug, Formatter}; @@ -88,8 +81,6 @@ pub mod unwind; mod call_conv; mod constraints; -mod enc_tables; -mod encoding; pub mod registers; mod stack; @@ -329,125 +320,6 @@ pub trait TargetIsa: fmt::Display + Send + Sync { Err(RegisterMappingError::UnsupportedArchitecture) } - /// Returns an iterator over legal encodings for the instruction. - fn legal_encodings<'a>( - &'a self, - func: &'a ir::Function, - inst: &'a ir::InstructionData, - ctrl_typevar: ir::Type, - ) -> Encodings<'a>; - - /// Encode an instruction after determining it is legal. - /// - /// If `inst` can legally be encoded in this ISA, produce the corresponding `Encoding` object. - /// Otherwise, return `Legalize` action. - /// - /// This is also the main entry point for determining if an instruction is legal. - fn encode( - &self, - func: &ir::Function, - inst: &ir::InstructionData, - ctrl_typevar: ir::Type, - ) -> Result { - let mut iter = self.legal_encodings(func, inst, ctrl_typevar); - iter.next().ok_or_else(|| iter.legalize()) - } - - /// Get a data structure describing the instruction encodings in this ISA. - fn encoding_info(&self) -> EncInfo; - - /// Legalize a function signature. - /// - /// This is used to legalize both the signature of the function being compiled and any called - /// functions. The signature should be modified by adding `ArgumentLoc` annotations to all - /// arguments and return values. - /// - /// Arguments with types that are not supported by the ABI can be expanded into multiple - /// arguments: - /// - /// - Integer types that are too large to fit in a register can be broken into multiple - /// arguments of a smaller integer type. - /// - Floating point types can be bit-cast to an integer type of the same size, and possible - /// broken into smaller integer types. - /// - Vector types can be bit-cast and broken down into smaller vectors or scalars. - /// - /// The legalizer will adapt argument and return values as necessary at all ABI boundaries. - /// - /// When this function is called to legalize the signature of the function currently being - /// compiled, `current` is true. The legalized signature can then also contain special purpose - /// arguments and return values such as: - /// - /// - A `link` argument representing the link registers on RISC architectures that don't push - /// the return address on the stack. - /// - A `link` return value which will receive the value that was passed to the `link` - /// argument. - /// - An `sret` argument can be added if one wasn't present already. This is necessary if the - /// signature returns more values than registers are available for returning values. - /// - An `sret` return value can be added if the ABI requires a function to return its `sret` - /// argument in a register. - /// - /// Arguments and return values for the caller's frame pointer and other callee-saved registers - /// should not be added by this function. These arguments are not added until after register - /// allocation. - fn legalize_signature(&self, sig: &mut Cow, current: bool); - - /// Get the register class that should be used to represent an ABI argument or return value of - /// type `ty`. This should be the top-level register class that contains the argument - /// registers. - /// - /// This function can assume that it will only be asked to provide register classes for types - /// that `legalize_signature()` produces in `ArgumentLoc::Reg` entries. - fn regclass_for_abi_type(&self, ty: ir::Type) -> RegClass; - - /// Get the set of allocatable registers that can be used when compiling `func`. - /// - /// This set excludes reserved registers like the stack pointer and other special-purpose - /// registers. - fn allocatable_registers(&self, func: &ir::Function) -> regalloc::RegisterSet; - - /// Compute the stack layout and insert prologue and epilogue code into `func`. - /// - /// Return an error if the stack frame is too large. - fn prologue_epilogue(&self, func: &mut ir::Function) -> CodegenResult<()> { - let _tt = timing::prologue_epilogue(); - // This default implementation is unlikely to be good enough. - use crate::ir::stackslot::{StackOffset, StackSize}; - use crate::stack_layout::layout_stack; - - let word_size = StackSize::from(self.pointer_bytes()); - - // Account for the SpiderMonkey standard prologue pushes. - if func.signature.call_conv.extends_baldrdash() { - let bytes = StackSize::from(self.flags().baldrdash_prologue_words()) * word_size; - let mut ss = ir::StackSlotData::new(ir::StackSlotKind::IncomingArg, bytes); - ss.offset = Some(-(bytes as StackOffset)); - func.stack_slots.push(ss); - } - - let is_leaf = func.is_leaf(); - layout_stack(&mut func.stack_slots, is_leaf, word_size)?; - Ok(()) - } - - /// Emit binary machine code for a single instruction into the `sink` trait object. - /// - /// Note that this will call `put*` methods on the `sink` trait object via its vtable which - /// is not the fastest way of emitting code. - /// - /// This function is under the "testing_hooks" feature, and is only suitable for use by - /// test harnesses. It increases code size, and is inefficient. - #[cfg(feature = "testing_hooks")] - fn emit_inst( - &self, - func: &ir::Function, - inst: ir::Inst, - divert: &mut regalloc::RegDiversions, - sink: &mut dyn binemit::CodeSink, - ); - - /// Emit a whole function into memory. - fn emit_function_to_memory(&self, func: &ir::Function, sink: &mut binemit::MemoryCodeSink); - /// IntCC condition for Unsigned Addition Overflow (Carry). fn unsigned_add_overflow_condition(&self) -> ir::condcodes::IntCC; diff --git a/cranelift/codegen/src/legalizer/boundary.rs b/cranelift/codegen/src/legalizer/boundary.rs deleted file mode 100644 index 3b51bc5b57..0000000000 --- a/cranelift/codegen/src/legalizer/boundary.rs +++ /dev/null @@ -1,1174 +0,0 @@ -//! Legalize ABI boundaries. -//! -//! This legalizer sub-module contains code for dealing with ABI boundaries: -//! -//! - Function arguments passed to the entry block. -//! - Function arguments passed to call instructions. -//! - Return values from call instructions. -//! - Return values passed to return instructions. -//! -//! The ABI boundary legalization happens in two phases: -//! -//! 1. The `legalize_signatures` function rewrites all the preamble signatures with ABI information -//! and possibly new argument types. It also rewrites the entry block arguments to match. -//! 2. The `handle_call_abi` and `handle_return_abi` functions rewrite call and return instructions -//! to match the new ABI signatures. -//! -//! Between the two phases, preamble signatures and call/return arguments don't match. This -//! intermediate state doesn't type check. - -use crate::abi::{legalize_abi_value, ValueConversion}; -use crate::cursor::{Cursor, FuncCursor}; -use crate::flowgraph::ControlFlowGraph; -use crate::ir::instructions::CallInfo; -use crate::ir::{ - AbiParam, ArgumentLoc, ArgumentPurpose, Block, DataFlowGraph, ExtFuncData, ExternalName, - Function, Inst, InstBuilder, LibCall, MemFlags, SigRef, Signature, StackSlotData, - StackSlotKind, Type, Value, ValueLoc, -}; -use crate::isa::TargetIsa; -use crate::legalizer::split::{isplit, vsplit}; -use alloc::borrow::Cow; -use alloc::vec::Vec; -use core::mem; -use cranelift_entity::EntityList; - -/// Legalize all the function signatures in `func`. -/// -/// This changes all signatures to be ABI-compliant with full `ArgumentLoc` annotations. It doesn't -/// change the entry block arguments, calls, or return instructions, so this can leave the function -/// in a state with type discrepancies. -pub fn legalize_signatures(func: &mut Function, isa: &dyn TargetIsa) { - if let Some(new) = legalize_signature(&func.signature, true, isa) { - let old = mem::replace(&mut func.signature, new); - func.old_signature = Some(old); - } - - for (sig_ref, sig_data) in func.dfg.signatures.iter_mut() { - if let Some(new) = legalize_signature(sig_data, false, isa) { - let old = mem::replace(sig_data, new); - func.dfg.old_signatures[sig_ref] = Some(old); - } - } - - if let Some(entry) = func.layout.entry_block() { - legalize_entry_params(func, entry); - spill_entry_params(func, entry); - } -} - -/// Legalize the libcall signature, which we may generate on the fly after -/// `legalize_signatures` has been called. -pub fn legalize_libcall_signature(signature: &mut Signature, isa: &dyn TargetIsa) { - if let Some(s) = legalize_signature(signature, false, isa) { - *signature = s; - } -} - -/// Legalize the given signature. -/// -/// `current` is true if this is the signature for the current function. -fn legalize_signature( - signature: &Signature, - current: bool, - isa: &dyn TargetIsa, -) -> Option { - let mut cow = Cow::Borrowed(signature); - isa.legalize_signature(&mut cow, current); - match cow { - Cow::Borrowed(_) => None, - Cow::Owned(s) => Some(s), - } -} - -/// Legalize the entry block parameters after `func`'s signature has been legalized. -/// -/// The legalized signature may contain more parameters than the original signature, and the -/// parameter types have been changed. This function goes through the parameters of the entry block -/// and replaces them with parameters of the right type for the ABI. -/// -/// The original entry block parameters are computed from the new ABI parameters by code inserted at -/// the top of the entry block. -fn legalize_entry_params(func: &mut Function, entry: Block) { - let mut has_sret = false; - let mut has_link = false; - let mut has_vmctx = false; - let mut has_sigid = false; - let mut has_stack_limit = false; - - // Insert position for argument conversion code. - // We want to insert instructions before the first instruction in the entry block. - // If the entry block is empty, append instructions to it instead. - let mut pos = FuncCursor::new(func).at_first_inst(entry); - - // Keep track of the argument types in the ABI-legalized signature. - let mut abi_arg = 0; - - // Process the block parameters one at a time, possibly replacing one argument with multiple new - // ones. We do this by detaching the entry block parameters first. - let block_params = pos.func.dfg.detach_block_params(entry); - let mut old_arg = 0; - while let Some(arg) = block_params.get(old_arg, &pos.func.dfg.value_lists) { - old_arg += 1; - - let abi_type = pos.func.signature.params[abi_arg]; - let arg_type = pos.func.dfg.value_type(arg); - if let ArgumentPurpose::StructArgument(size) = abi_type.purpose { - let offset = if let ArgumentLoc::Stack(offset) = abi_type.location { - offset - } else { - unreachable!("StructArgument must already have a Stack ArgumentLoc assigned"); - }; - let ss = pos.func.stack_slots.make_incoming_arg(size, offset); - let struct_arg = pos.ins().stack_addr(arg_type, ss, 0); - pos.func.dfg.change_to_alias(arg, struct_arg); - let dummy = pos - .func - .dfg - .append_block_param(entry, crate::ir::types::SARG_T); - pos.func.locations[dummy] = ValueLoc::Stack(ss); - abi_arg += 1; - continue; - } - - if arg_type == abi_type.value_type { - // No value translation is necessary, this argument matches the ABI type. - // Just use the original block argument value. This is the most common case. - pos.func.dfg.attach_block_param(entry, arg); - match abi_type.purpose { - ArgumentPurpose::Normal => {} - ArgumentPurpose::StructArgument(_) => unreachable!("Handled above"), - ArgumentPurpose::FramePointer => {} - ArgumentPurpose::CalleeSaved => {} - ArgumentPurpose::StructReturn => { - debug_assert!(!has_sret, "Multiple sret arguments found"); - has_sret = true; - } - ArgumentPurpose::VMContext => { - debug_assert!(!has_vmctx, "Multiple vmctx arguments found"); - has_vmctx = true; - } - ArgumentPurpose::SignatureId => { - debug_assert!(!has_sigid, "Multiple sigid arguments found"); - has_sigid = true; - } - ArgumentPurpose::StackLimit => { - debug_assert!(!has_stack_limit, "Multiple stack_limit arguments found"); - has_stack_limit = true; - } - ArgumentPurpose::Link => panic!("Unexpected link arg {}", abi_type), - ArgumentPurpose::CallerTLS | ArgumentPurpose::CalleeTLS => {} - } - abi_arg += 1; - } else { - // Compute the value we want for `arg` from the legalized ABI parameters. - let mut get_arg = |func: &mut Function, ty| { - let abi_type = func.signature.params[abi_arg]; - debug_assert_eq!( - abi_type.purpose, - ArgumentPurpose::Normal, - "Can't legalize special-purpose argument" - ); - if ty == abi_type.value_type { - abi_arg += 1; - Ok(func.dfg.append_block_param(entry, ty)) - } else { - Err(abi_type) - } - }; - let converted = convert_from_abi(&mut pos, arg_type, Some(arg), &mut get_arg); - // The old `arg` is no longer an attached block argument, but there are probably still - // uses of the value. - debug_assert_eq!(pos.func.dfg.resolve_aliases(arg), converted); - } - } - - // The legalized signature may contain additional parameters representing special-purpose - // registers. - for &arg in &pos.func.signature.params[abi_arg..] { - match arg.purpose { - // Any normal parameters should have been processed above. - ArgumentPurpose::Normal | ArgumentPurpose::StructArgument(_) => { - panic!("Leftover arg: {}", arg); - } - // The callee-save parameters should not appear until after register allocation is - // done. - ArgumentPurpose::FramePointer | ArgumentPurpose::CalleeSaved => { - panic!("Premature callee-saved arg {}", arg); - } - // These can be meaningfully added by `legalize_signature()`. - ArgumentPurpose::Link => { - debug_assert!(!has_link, "Multiple link parameters found"); - has_link = true; - } - ArgumentPurpose::StructReturn => { - debug_assert!(!has_sret, "Multiple sret parameters found"); - has_sret = true; - } - ArgumentPurpose::VMContext => { - debug_assert!(!has_vmctx, "Multiple vmctx parameters found"); - has_vmctx = true; - } - ArgumentPurpose::SignatureId => { - debug_assert!(!has_sigid, "Multiple sigid parameters found"); - has_sigid = true; - } - ArgumentPurpose::StackLimit => { - debug_assert!(!has_stack_limit, "Multiple stack_limit parameters found"); - has_stack_limit = true; - } - ArgumentPurpose::CallerTLS | ArgumentPurpose::CalleeTLS => {} - } - - // Just create entry block values to match here. We will use them in `handle_return_abi()` - // below. - pos.func.dfg.append_block_param(entry, arg.value_type); - } -} - -/// Legalize the results returned from a call instruction to match the ABI signature. -/// -/// The cursor `pos` points to a call instruction with at least one return value. The cursor will -/// be left pointing after the instructions inserted to convert the return values. -/// -/// This function is very similar to the `legalize_entry_params` function above. -/// -/// Returns the possibly new instruction representing the call. -fn legalize_inst_results(pos: &mut FuncCursor, mut get_abi_type: ResType) -> Inst -where - ResType: FnMut(&Function, usize) -> AbiParam, -{ - let call = pos - .current_inst() - .expect("Cursor must point to a call instruction"); - - // We theoretically allow for call instructions that return a number of fixed results before - // the call return values. In practice, it doesn't happen. - debug_assert_eq!( - pos.func.dfg[call] - .opcode() - .constraints() - .num_fixed_results(), - 0, - "Fixed results on calls not supported" - ); - - let results = pos.func.dfg.detach_results(call); - let mut next_res = 0; - let mut abi_res = 0; - - // Point immediately after the call. - pos.next_inst(); - - while let Some(res) = results.get(next_res, &pos.func.dfg.value_lists) { - next_res += 1; - - let res_type = pos.func.dfg.value_type(res); - if res_type == get_abi_type(pos.func, abi_res).value_type { - // No value translation is necessary, this result matches the ABI type. - pos.func.dfg.attach_result(call, res); - abi_res += 1; - } else { - let mut get_res = |func: &mut Function, ty| { - let abi_type = get_abi_type(func, abi_res); - if ty == abi_type.value_type { - let last_res = func.dfg.append_result(call, ty); - abi_res += 1; - Ok(last_res) - } else { - Err(abi_type) - } - }; - let v = convert_from_abi(pos, res_type, Some(res), &mut get_res); - debug_assert_eq!(pos.func.dfg.resolve_aliases(res), v); - } - } - - call -} - -fn assert_is_valid_sret_legalization( - old_ret_list: &EntityList, - old_sig: &Signature, - new_sig: &Signature, - pos: &FuncCursor, -) { - debug_assert_eq!( - old_sig.returns.len(), - old_ret_list.len(&pos.func.dfg.value_lists) - ); - - // Assert that the only difference in special parameters is that there - // is an appended struct return pointer parameter. - let old_special_params: Vec<_> = old_sig - .params - .iter() - .filter(|r| r.purpose != ArgumentPurpose::Normal) - .collect(); - let new_special_params: Vec<_> = new_sig - .params - .iter() - .filter(|r| r.purpose != ArgumentPurpose::Normal) - .collect(); - debug_assert_eq!(old_special_params.len() + 1, new_special_params.len()); - debug_assert!(old_special_params - .iter() - .zip(&new_special_params) - .all(|(old, new)| old.purpose == new.purpose)); - debug_assert_eq!( - new_special_params.last().unwrap().purpose, - ArgumentPurpose::StructReturn - ); - - // If the special returns have changed at all, then the only change - // should be that the struct return pointer is returned back out of the - // function, so that callers don't have to load its stack address again. - let old_special_returns: Vec<_> = old_sig - .returns - .iter() - .filter(|r| r.purpose != ArgumentPurpose::Normal) - .collect(); - let new_special_returns: Vec<_> = new_sig - .returns - .iter() - .filter(|r| r.purpose != ArgumentPurpose::Normal) - .collect(); - debug_assert!(old_special_returns - .iter() - .zip(&new_special_returns) - .all(|(old, new)| old.purpose == new.purpose)); - debug_assert!( - old_special_returns.len() == new_special_returns.len() - || (old_special_returns.len() + 1 == new_special_returns.len() - && new_special_returns.last().unwrap().purpose == ArgumentPurpose::StructReturn) - ); -} - -fn legalize_sret_call(isa: &dyn TargetIsa, pos: &mut FuncCursor, sig_ref: SigRef, call: Inst) { - let old_ret_list = pos.func.dfg.detach_results(call); - let old_sig = pos.func.dfg.old_signatures[sig_ref] - .take() - .expect("must have an old signature when using an `sret` parameter"); - - // We make a bunch of assumptions about the shape of the old, multi-return - // signature and the new, sret-using signature in this legalization - // function. Assert that these assumptions hold true in debug mode. - if cfg!(debug_assertions) { - assert_is_valid_sret_legalization( - &old_ret_list, - &old_sig, - &pos.func.dfg.signatures[sig_ref], - &pos, - ); - } - - // Go through and remove all normal return values from the `call` - // instruction's returns list. These will be stored into the stack slot that - // the sret points to. At the same time, calculate the size of the sret - // stack slot. - let mut sret_slot_size = 0; - for (i, ret) in old_sig.returns.iter().enumerate() { - let v = old_ret_list.get(i, &pos.func.dfg.value_lists).unwrap(); - let ty = pos.func.dfg.value_type(v); - if ret.purpose == ArgumentPurpose::Normal { - debug_assert_eq!(ret.location, ArgumentLoc::Unassigned); - let ty = legalized_type_for_sret(ty); - let size = ty.bytes(); - sret_slot_size = round_up_to_multiple_of_type_align(sret_slot_size, ty) + size; - } else { - let new_v = pos.func.dfg.append_result(call, ty); - pos.func.dfg.change_to_alias(v, new_v); - } - } - - let stack_slot = pos.func.stack_slots.push(StackSlotData { - kind: StackSlotKind::StructReturnSlot, - size: sret_slot_size, - offset: None, - }); - - // Append the sret pointer to the `call` instruction's arguments. - let ptr_type = Type::triple_pointer_type(isa.triple()); - let sret_arg = pos.ins().stack_addr(ptr_type, stack_slot, 0); - pos.func.dfg.append_inst_arg(call, sret_arg); - - // The sret pointer might be returned by the signature as well. If so, we - // need to add it to the `call` instruction's results list. - // - // Additionally, when the sret is explicitly returned in this calling - // convention, then use it when loading the sret returns back into ssa - // values to avoid keeping the original `sret_arg` live and potentially - // having to do spills and fills. - let sret = - if pos.func.dfg.signatures[sig_ref].uses_special_return(ArgumentPurpose::StructReturn) { - pos.func.dfg.append_result(call, ptr_type) - } else { - sret_arg - }; - - // Finally, load each of the call's return values out of the sret stack - // slot. - pos.goto_after_inst(call); - let mut offset = 0; - for i in 0..old_ret_list.len(&pos.func.dfg.value_lists) { - if old_sig.returns[i].purpose != ArgumentPurpose::Normal { - continue; - } - - let old_v = old_ret_list.get(i, &pos.func.dfg.value_lists).unwrap(); - let ty = pos.func.dfg.value_type(old_v); - let mut legalized_ty = legalized_type_for_sret(ty); - - offset = round_up_to_multiple_of_type_align(offset, legalized_ty); - - let new_legalized_v = - pos.ins() - .load(legalized_ty, MemFlags::trusted(), sret, offset as i32); - - // "Illegalize" the loaded value from the legalized type back to its - // original `ty`. This is basically the opposite of - // `legalize_type_for_sret_store`. - let mut new_v = new_legalized_v; - if ty.is_bool() { - legalized_ty = legalized_ty.as_bool_pedantic(); - new_v = pos.ins().raw_bitcast(legalized_ty, new_v); - - if ty.bits() < legalized_ty.bits() { - legalized_ty = ty; - new_v = pos.ins().breduce(legalized_ty, new_v); - } - } - - pos.func.dfg.change_to_alias(old_v, new_v); - - offset += legalized_ty.bytes(); - } - - pos.func.dfg.old_signatures[sig_ref] = Some(old_sig); -} - -/// Compute original value of type `ty` from the legalized ABI arguments. -/// -/// The conversion is recursive, controlled by the `get_arg` closure which is called to retrieve an -/// ABI argument. It returns: -/// -/// - `Ok(arg)` if the requested type matches the next ABI argument. -/// - `Err(arg_type)` if further conversions are needed from the ABI argument `arg_type`. -/// -/// If the `into_result` value is provided, the converted result will be written into that value. -fn convert_from_abi( - pos: &mut FuncCursor, - ty: Type, - into_result: Option, - get_arg: &mut GetArg, -) -> Value -where - GetArg: FnMut(&mut Function, Type) -> Result, -{ - // Terminate the recursion when we get the desired type. - let arg_type = match get_arg(pos.func, ty) { - Ok(v) => { - debug_assert_eq!(pos.func.dfg.value_type(v), ty); - debug_assert_eq!(into_result, None); - return v; - } - Err(t) => t, - }; - - // Reconstruct how `ty` was legalized into the `arg_type` argument. - let conversion = legalize_abi_value(ty, &arg_type); - - log::trace!("convert_from_abi({}): {:?}", ty, conversion); - - // The conversion describes value to ABI argument. We implement the reverse conversion here. - match conversion { - // Construct a `ty` by concatenating two ABI integers. - ValueConversion::IntSplit => { - let abi_ty = ty.half_width().expect("Invalid type for conversion"); - let lo = convert_from_abi(pos, abi_ty, None, get_arg); - let hi = convert_from_abi(pos, abi_ty, None, get_arg); - log::trace!( - "intsplit {}: {}, {}: {}", - lo, - pos.func.dfg.value_type(lo), - hi, - pos.func.dfg.value_type(hi) - ); - pos.ins().with_results([into_result]).iconcat(lo, hi) - } - // Construct a `ty` by concatenating two halves of a vector. - ValueConversion::VectorSplit => { - let abi_ty = ty.half_vector().expect("Invalid type for conversion"); - let lo = convert_from_abi(pos, abi_ty, None, get_arg); - let hi = convert_from_abi(pos, abi_ty, None, get_arg); - pos.ins().with_results([into_result]).vconcat(lo, hi) - } - // Construct a `ty` by bit-casting from an integer type. - ValueConversion::IntBits => { - debug_assert!(!ty.is_int()); - let abi_ty = Type::int(ty.bits()).expect("Invalid type for conversion"); - let arg = convert_from_abi(pos, abi_ty, None, get_arg); - pos.ins().with_results([into_result]).bitcast(ty, arg) - } - // ABI argument is a sign-extended version of the value we want. - ValueConversion::Sext(abi_ty) => { - let arg = convert_from_abi(pos, abi_ty, None, get_arg); - // TODO: Currently, we don't take advantage of the ABI argument being sign-extended. - // We could insert an `assert_sreduce` which would fold with a following `sextend` of - // this value. - pos.ins().with_results([into_result]).ireduce(ty, arg) - } - ValueConversion::Uext(abi_ty) => { - let arg = convert_from_abi(pos, abi_ty, None, get_arg); - // TODO: Currently, we don't take advantage of the ABI argument being sign-extended. - // We could insert an `assert_ureduce` which would fold with a following `uextend` of - // this value. - pos.ins().with_results([into_result]).ireduce(ty, arg) - } - // ABI argument is a pointer to the value we want. - ValueConversion::Pointer(abi_ty) => { - let arg = convert_from_abi(pos, abi_ty, None, get_arg); - pos.ins() - .with_results([into_result]) - .load(ty, MemFlags::new(), arg, 0) - } - } -} - -/// Convert `value` to match an ABI signature by inserting instructions at `pos`. -/// -/// This may require expanding the value to multiple ABI arguments. The conversion process is -/// recursive and controlled by the `put_arg` closure. When a candidate argument value is presented -/// to the closure, it will perform one of two actions: -/// -/// 1. If the suggested argument has an acceptable value type, consume it by adding it to the list -/// of arguments and return `Ok(())`. -/// 2. If the suggested argument doesn't have the right value type, don't change anything, but -/// return the `Err(AbiParam)` that is needed. -/// -fn convert_to_abi( - pos: &mut FuncCursor, - cfg: &ControlFlowGraph, - value: Value, - put_arg: &mut PutArg, -) where - PutArg: FnMut(&mut Function, Value) -> Result<(), AbiParam>, -{ - // Start by invoking the closure to either terminate the recursion or get the argument type - // we're trying to match. - let arg_type = match put_arg(pos.func, value) { - Ok(_) => return, - Err(t) => t, - }; - - let ty = pos.func.dfg.value_type(value); - match legalize_abi_value(ty, &arg_type) { - ValueConversion::IntSplit => { - let curpos = pos.position(); - let srcloc = pos.srcloc(); - let (lo, hi) = isplit(&mut pos.func, cfg, curpos, srcloc, value); - convert_to_abi(pos, cfg, lo, put_arg); - convert_to_abi(pos, cfg, hi, put_arg); - } - ValueConversion::VectorSplit => { - let curpos = pos.position(); - let srcloc = pos.srcloc(); - let (lo, hi) = vsplit(&mut pos.func, cfg, curpos, srcloc, value); - convert_to_abi(pos, cfg, lo, put_arg); - convert_to_abi(pos, cfg, hi, put_arg); - } - ValueConversion::IntBits => { - debug_assert!(!ty.is_int()); - let abi_ty = Type::int(ty.bits()).expect("Invalid type for conversion"); - let arg = pos.ins().bitcast(abi_ty, value); - convert_to_abi(pos, cfg, arg, put_arg); - } - ValueConversion::Sext(abi_ty) => { - let arg = pos.ins().sextend(abi_ty, value); - convert_to_abi(pos, cfg, arg, put_arg); - } - ValueConversion::Uext(abi_ty) => { - let arg = pos.ins().uextend(abi_ty, value); - convert_to_abi(pos, cfg, arg, put_arg); - } - ValueConversion::Pointer(abi_ty) => { - // Note: This conversion can only happen for call arguments, - // so we can allocate the value on stack safely. - let stack_slot = pos.func.create_stack_slot(StackSlotData { - kind: StackSlotKind::ExplicitSlot, - size: ty.bytes(), - offset: None, - }); - let arg = pos.ins().stack_addr(abi_ty, stack_slot, 0); - pos.ins().store(MemFlags::new(), value, arg, 0); - convert_to_abi(pos, cfg, arg, put_arg); - } - } -} - -/// Check if a sequence of arguments match a desired sequence of argument types. -fn check_arg_types(dfg: &DataFlowGraph, args: &[Value], types: &[AbiParam]) -> bool { - args.len() == types.len() - && args.iter().zip(types.iter()).all(|(v, at)| { - if let ArgumentPurpose::StructArgument(_) = at.purpose { - true - } else { - dfg.value_type(*v) == at.value_type - } - }) -} - -/// Check if the arguments of the call `inst` match the signature. -/// -/// Returns `Ok(())` if the signature matches and no changes are needed, or `Err(sig_ref)` if the -/// signature doesn't match. -fn check_call_signature(dfg: &DataFlowGraph, inst: Inst) -> Result<(), SigRef> { - // Extract the signature and argument values. - let (sig_ref, args) = match dfg[inst].analyze_call(&dfg.value_lists) { - CallInfo::Direct(func, args) => (dfg.ext_funcs[func].signature, args), - CallInfo::Indirect(sig_ref, args) => (sig_ref, args), - CallInfo::NotACall => panic!("Expected call, got {:?}", dfg[inst]), - }; - let sig = &dfg.signatures[sig_ref]; - - if check_arg_types(dfg, args, &sig.params[..]) - && check_arg_types(dfg, dfg.inst_results(inst), &sig.returns[..]) - { - // All types check out. - Ok(()) - } else { - // Call types need fixing. - Err(sig_ref) - } -} - -/// Check if the arguments of the return `inst` match the signature. -fn check_return_signature(dfg: &DataFlowGraph, inst: Inst, sig: &Signature) -> bool { - check_arg_types(dfg, dfg.inst_variable_args(inst), &sig.returns) -} - -/// Insert ABI conversion code for the arguments to the call or return instruction at `pos`. -/// -/// - `abi_args` is the number of arguments that the ABI signature requires. -/// - `get_abi_type` is a closure that can provide the desired `AbiParam` for a given ABI -/// argument number in `0..abi_args`. -/// -fn legalize_inst_arguments( - pos: &mut FuncCursor, - cfg: &ControlFlowGraph, - abi_args: usize, - mut get_abi_type: ArgType, -) where - ArgType: FnMut(&Function, usize) -> AbiParam, -{ - let inst = pos - .current_inst() - .expect("Cursor must point to a call instruction"); - - // Lift the value list out of the call instruction so we modify it. - let mut vlist = pos.func.dfg[inst] - .take_value_list() - .expect("Call must have a value list"); - - // The value list contains all arguments to the instruction, including the callee on an - // indirect call which isn't part of the call arguments that must match the ABI signature. - // Figure out how many fixed values are at the front of the list. We won't touch those. - let num_fixed_values = pos.func.dfg[inst] - .opcode() - .constraints() - .num_fixed_value_arguments(); - let have_args = vlist.len(&pos.func.dfg.value_lists) - num_fixed_values; - if abi_args < have_args { - // This happens with multiple return values after we've legalized the - // signature but haven't legalized the return instruction yet. This - // legalization is handled in `handle_return_abi`. - pos.func.dfg[inst].put_value_list(vlist); - return; - } - - // Grow the value list to the right size and shift all the existing arguments to the right. - // This lets us write the new argument values into the list without overwriting the old - // arguments. - // - // Before: - // - // <--> fixed_values - // <-----------> have_args - // [FFFFOOOOOOOOOOOOO] - // - // After grow_at(): - // - // <--> fixed_values - // <-----------> have_args - // <------------------> abi_args - // [FFFF-------OOOOOOOOOOOOO] - // ^ - // old_arg_offset - // - // After writing the new arguments: - // - // <--> fixed_values - // <------------------> abi_args - // [FFFFNNNNNNNNNNNNNNNNNNNN] - // - vlist.grow_at( - num_fixed_values, - abi_args - have_args, - &mut pos.func.dfg.value_lists, - ); - let old_arg_offset = num_fixed_values + abi_args - have_args; - - let mut abi_arg = 0; - for old_arg in 0..have_args { - let old_value = vlist - .get(old_arg_offset + old_arg, &pos.func.dfg.value_lists) - .unwrap(); - let mut put_arg = |func: &mut Function, arg| { - let abi_type = get_abi_type(func, abi_arg); - let struct_argument = if let ArgumentPurpose::StructArgument(_) = abi_type.purpose { - true - } else { - false - }; - if func.dfg.value_type(arg) == abi_type.value_type || struct_argument { - // This is the argument type we need. - vlist.as_mut_slice(&mut func.dfg.value_lists)[num_fixed_values + abi_arg] = arg; - abi_arg += 1; - Ok(()) - } else { - Err(abi_type) - } - }; - convert_to_abi(pos, cfg, old_value, &mut put_arg); - } - - // Put the modified value list back. - pos.func.dfg[inst].put_value_list(vlist); -} - -/// Ensure that the `ty` being returned is a type that can be loaded and stored -/// (potentially after another narrowing legalization) from memory, since it -/// will go into the `sret` space. -fn legalized_type_for_sret(ty: Type) -> Type { - if ty.is_bool() { - let bits = std::cmp::max(8, ty.bits()); - Type::int(bits).unwrap() - } else { - ty - } -} - -/// Insert any legalization code required to ensure that `val` can be stored -/// into the `sret` memory. Returns the (potentially new, potentially -/// unmodified) legalized value and its type. -fn legalize_type_for_sret_store(pos: &mut FuncCursor, val: Value, ty: Type) -> (Value, Type) { - if ty.is_bool() { - let bits = std::cmp::max(8, ty.bits()); - let ty = Type::int(bits).unwrap(); - let val = pos.ins().bint(ty, val); - (val, ty) - } else { - (val, ty) - } -} - -/// Insert ABI conversion code before and after the call instruction at `pos`. -/// -/// Instructions inserted before the call will compute the appropriate ABI values for the -/// callee's new ABI-legalized signature. The function call arguments are rewritten in place to -/// match the new signature. -/// -/// Instructions will be inserted after the call to convert returned ABI values back to the -/// original return values. The call's result values will be adapted to match the new signature. -/// -/// Returns `true` if any instructions were inserted. -pub fn handle_call_abi( - isa: &dyn TargetIsa, - mut inst: Inst, - func: &mut Function, - cfg: &ControlFlowGraph, -) -> bool { - let pos = &mut FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - // Start by checking if the argument types already match the signature. - let sig_ref = match check_call_signature(&pos.func.dfg, inst) { - Ok(_) => return spill_call_arguments(pos, isa), - Err(s) => s, - }; - - let sig = &pos.func.dfg.signatures[sig_ref]; - let old_sig = &pos.func.dfg.old_signatures[sig_ref]; - - if sig.uses_struct_return_param() - && old_sig - .as_ref() - .map_or(false, |s| !s.uses_struct_return_param()) - { - legalize_sret_call(isa, pos, sig_ref, inst); - } else { - if !pos.func.dfg.signatures[sig_ref].returns.is_empty() { - inst = legalize_inst_results(pos, |func, abi_res| { - func.dfg.signatures[sig_ref].returns[abi_res] - }); - } - } - - // Go back and fix the call arguments to match the ABI signature. - pos.goto_inst(inst); - let abi_args = pos.func.dfg.signatures[sig_ref].params.len(); - legalize_inst_arguments(pos, cfg, abi_args, |func, abi_arg| { - func.dfg.signatures[sig_ref].params[abi_arg] - }); - - debug_assert!( - check_call_signature(&pos.func.dfg, inst).is_ok(), - "Signature still wrong: {}, {}{}", - pos.func.dfg.display_inst(inst, None), - sig_ref, - pos.func.dfg.signatures[sig_ref] - ); - - // Go back and insert spills for any stack arguments. - pos.goto_inst(inst); - spill_call_arguments(pos, isa); - - // Yes, we changed stuff. - true -} - -/// Insert ABI conversion code before and after the return instruction at `inst`. -/// -/// Return `true` if any instructions were inserted. -pub fn handle_return_abi(inst: Inst, func: &mut Function, cfg: &ControlFlowGraph) -> bool { - // Check if the returned types already match the signature. - if check_return_signature(&func.dfg, inst, &func.signature) { - return false; - } - - // Count the special-purpose return values (`link`, `sret`, and `vmctx`) that were appended to - // the legalized signature. - let special_args = func - .signature - .returns - .iter() - .rev() - .take_while(|&rt| { - rt.purpose == ArgumentPurpose::Link - || rt.purpose == ArgumentPurpose::StructReturn - || rt.purpose == ArgumentPurpose::VMContext - }) - .count(); - let abi_args = func.signature.returns.len() - special_args; - - let pos = &mut FuncCursor::new(func).at_inst(inst); - pos.use_srcloc(inst); - - legalize_inst_arguments(pos, cfg, abi_args, |func, abi_arg| { - let arg = func.signature.returns[abi_arg]; - debug_assert!( - !arg.legalized_to_pointer, - "Return value cannot be legalized to pointer" - ); - arg - }); - // Append special return arguments for any `sret`, `link`, and `vmctx` return values added to - // the legalized signature. These values should simply be propagated from the entry block - // arguments. - if special_args > 0 { - log::trace!( - "Adding {} special-purpose arguments to {}", - special_args, - pos.func.dfg.display_inst(inst, None) - ); - let mut vlist = pos.func.dfg[inst].take_value_list().unwrap(); - let mut sret = None; - - for arg in &pos.func.signature.returns[abi_args..] { - match arg.purpose { - ArgumentPurpose::Link - | ArgumentPurpose::StructReturn - | ArgumentPurpose::VMContext => {} - ArgumentPurpose::Normal => panic!("unexpected return value {}", arg), - _ => panic!("Unsupported special purpose return value {}", arg), - } - // A `link`/`sret`/`vmctx` return value can only appear in a signature that has a - // unique matching argument. They are appended at the end, so search the signature from - // the end. - let idx = pos - .func - .signature - .params - .iter() - .rposition(|t| t.purpose == arg.purpose) - .expect("No matching special purpose argument."); - // Get the corresponding entry block value and add it to the return instruction's - // arguments. - let val = pos - .func - .dfg - .block_params(pos.func.layout.entry_block().unwrap())[idx]; - debug_assert_eq!(pos.func.dfg.value_type(val), arg.value_type); - vlist.push(val, &mut pos.func.dfg.value_lists); - - if let ArgumentPurpose::StructReturn = arg.purpose { - sret = Some(val); - } - } - - // Store all the regular returns into the retptr space and remove them - // from the `return` instruction's value list. - if let Some(sret) = sret { - let mut offset = 0; - let num_regular_rets = vlist.len(&pos.func.dfg.value_lists) - special_args; - for i in 0..num_regular_rets { - debug_assert_eq!( - pos.func.old_signature.as_ref().unwrap().returns[i].purpose, - ArgumentPurpose::Normal, - ); - - // The next return value to process is always at `0`, since the - // list is emptied as we iterate. - let v = vlist.get(0, &pos.func.dfg.value_lists).unwrap(); - let ty = pos.func.dfg.value_type(v); - let (v, ty) = legalize_type_for_sret_store(pos, v, ty); - - let size = ty.bytes(); - offset = round_up_to_multiple_of_type_align(offset, ty); - - pos.ins().store(MemFlags::trusted(), v, sret, offset as i32); - vlist.remove(0, &mut pos.func.dfg.value_lists); - - offset += size; - } - } - pos.func.dfg[inst].put_value_list(vlist); - } - - debug_assert_eq!( - pos.func.dfg.inst_variable_args(inst).len(), - abi_args + special_args - ); - debug_assert!( - check_return_signature(&pos.func.dfg, inst, &pos.func.signature), - "Signature still wrong: {} / signature {}", - pos.func.dfg.display_inst(inst, None), - pos.func.signature - ); - - // Yes, we changed stuff. - true -} - -fn round_up_to_multiple_of_type_align(bytes: u32, ty: Type) -> u32 { - // We don't have a dedicated alignment for types, so assume they are - // size-aligned. - let align = ty.bytes(); - round_up_to_multiple_of_pow2(bytes, align) -} - -/// Round `n` up to the next multiple of `to` that is greater than or equal to -/// `n`. -/// -/// `to` must be a power of two and greater than zero. -/// -/// This is useful for rounding an offset or pointer up to some type's required -/// alignment. -fn round_up_to_multiple_of_pow2(n: u32, to: u32) -> u32 { - debug_assert!(to > 0); - debug_assert!(to.is_power_of_two()); - - // The simple version of this function is - // - // (n + to - 1) / to * to - // - // Consider the numerator: `n + to - 1`. This is ensuring that if there is - // any remainder for `n / to`, then the result of the division is one - // greater than `n / to`, and that otherwise we get exactly the same result - // as `n / to` due to integer division rounding off the remainder. In other - // words, we only round up if `n` is not aligned to `to`. - // - // However, we know `to` is a power of two, and therefore `anything / to` is - // equivalent to `anything >> log2(to)` and `anything * to` is equivalent to - // `anything << log2(to)`. We can therefore rewrite our simplified function - // into the following: - // - // (n + to - 1) >> log2(to) << log2(to) - // - // But shifting a value right by some number of bits `b` and then shifting - // it left by that same number of bits `b` is equivalent to clearing the - // bottom `b` bits of the number. We can clear the bottom `b` bits of a - // number by bit-wise and'ing the number with the bit-wise not of `2^b - 1`. - // Plugging this into our function and simplifying, we get: - // - // (n + to - 1) >> log2(to) << log2(to) - // = (n + to - 1) & !(2^log2(to) - 1) - // = (n + to - 1) & !(to - 1) - // - // And now we have the final version of this function! - - (n + to - 1) & !(to - 1) -} - -/// Assign stack slots to incoming function parameters on the stack. -/// -/// Values that are passed into the function on the stack must be assigned to an `IncomingArg` -/// stack slot already during legalization. -fn spill_entry_params(func: &mut Function, entry: Block) { - for (abi, &arg) in func - .signature - .params - .iter() - .zip(func.dfg.block_params(entry)) - { - if let ArgumentPurpose::StructArgument(_) = abi.purpose { - // Location has already been assigned during legalization. - } else if let ArgumentLoc::Stack(offset) = abi.location { - let ss = func - .stack_slots - .make_incoming_arg(abi.value_type.bytes(), offset); - func.locations[arg] = ValueLoc::Stack(ss); - } - } -} - -/// Assign stack slots to outgoing function arguments on the stack. -/// -/// Values that are passed to a called function on the stack must be assigned to a matching -/// `OutgoingArg` stack slot. The assignment must happen immediately before the call. -/// -/// TODO: The outgoing stack slots can be written a bit earlier, as long as there are no branches -/// or calls between writing the stack slots and the call instruction. Writing the slots earlier -/// could help reduce register pressure before the call. -fn spill_call_arguments(pos: &mut FuncCursor, isa: &dyn TargetIsa) -> bool { - let inst = pos - .current_inst() - .expect("Cursor must point to a call instruction"); - let sig_ref = pos - .func - .dfg - .call_signature(inst) - .expect("Call instruction expected."); - - // Start by building a list of stack slots and arguments to be replaced. - // This requires borrowing `pos.func.dfg`, so we can't change anything. - let arglist = { - let locations = &pos.func.locations; - let stack_slots = &mut pos.func.stack_slots; - pos.func - .dfg - .inst_variable_args(inst) - .iter() - .zip(&pos.func.dfg.signatures[sig_ref].params) - .enumerate() - .filter_map(|(idx, (&arg, abi))| { - match abi.location { - ArgumentLoc::Stack(offset) => { - // Assign `arg` to a new stack slot, unless it's already in the correct - // slot. The legalization needs to be idempotent, so we should see a - // correct outgoing slot on the second pass. - let (ss, size) = match abi.purpose { - ArgumentPurpose::StructArgument(size) => { - (stack_slots.get_outgoing_arg(size, offset), Some(size)) - } - _ => ( - stack_slots.get_outgoing_arg(abi.value_type.bytes(), offset), - None, - ), - }; - if locations[arg] != ValueLoc::Stack(ss) { - Some((idx, arg, ss, size)) - } else { - None - } - } - _ => None, - } - }) - .collect::>() - }; - - if arglist.is_empty() { - return false; - } - - let mut libc_memcpy = None; - let mut import_memcpy = |func: &mut Function, pointer_type| { - if let Some(libc_memcpy) = libc_memcpy { - return libc_memcpy; - } - - let signature = { - let mut s = Signature::new(isa.default_call_conv()); - s.params.push(AbiParam::new(pointer_type)); - s.params.push(AbiParam::new(pointer_type)); - // The last argument of `memcpy` is a `size_t`. This is the same size as a pointer on - // all architectures we are interested in. - s.params.push(AbiParam::new(pointer_type)); - legalize_libcall_signature(&mut s, isa); - func.import_signature(s) - }; - - let func = func.import_function(ExtFuncData { - name: ExternalName::LibCall(LibCall::Memcpy), - signature, - colocated: false, - }); - libc_memcpy = Some(func); - func - }; - - // Insert the spill instructions and rewrite call arguments. - for (idx, arg, ss, size) in arglist { - let stack_val = if let Some(size) = size { - // Struct argument - let pointer_type = pos.func.dfg.value_type(arg); - let src = arg; - let dest = pos.ins().stack_addr(pointer_type, ss, 0); - let size = pos.ins().iconst(pointer_type, i64::from(size)); - - let libc_memcpy = import_memcpy(pos.func, pointer_type); - pos.ins().call(libc_memcpy, &[dest, src, size]); - pos.ins().dummy_sarg_t() - } else { - // Non struct argument - pos.ins().spill(arg) - }; - pos.func.locations[stack_val] = ValueLoc::Stack(ss); - pos.func.dfg.inst_variable_args_mut(inst)[idx] = stack_val; - } - - // We changed stuff. - true -} - -#[cfg(test)] -mod tests { - use super::round_up_to_multiple_of_pow2; - - #[test] - fn round_up_to_multiple_of_pow2_works() { - for (n, to, expected) in vec![ - (0, 1, 0), - (1, 1, 1), - (2, 1, 2), - (0, 2, 0), - (1, 2, 2), - (2, 2, 2), - (3, 2, 4), - (0, 4, 0), - (1, 4, 4), - (2, 4, 4), - (3, 4, 4), - (4, 4, 4), - (5, 4, 8), - ] { - let actual = round_up_to_multiple_of_pow2(n, to); - assert_eq!( - actual, expected, - "round_up_to_multiple_of_pow2(n = {}, to = {}) = {} (expected {})", - n, to, actual, expected - ); - } - } -} diff --git a/cranelift/codegen/src/legalizer/libcall.rs b/cranelift/codegen/src/legalizer/libcall.rs deleted file mode 100644 index 0973422a24..0000000000 --- a/cranelift/codegen/src/legalizer/libcall.rs +++ /dev/null @@ -1,40 +0,0 @@ -//! Expanding instructions as runtime library calls. - -use crate::ir; -use crate::ir::{libcall::get_libcall_funcref, InstBuilder}; -use crate::isa::{CallConv, TargetIsa}; -use crate::legalizer::boundary::legalize_libcall_signature; -use alloc::vec::Vec; - -/// Try to expand `inst` as a library call, returning true is successful. -pub fn expand_as_libcall(inst: ir::Inst, func: &mut ir::Function, isa: &dyn TargetIsa) -> bool { - // Does the opcode/ctrl_type combo even have a well-known runtime library name. - let libcall = match ir::LibCall::for_inst(func.dfg[inst].opcode(), func.dfg.ctrl_typevar(inst)) - { - Some(lc) => lc, - None => return false, - }; - - // Now we convert `inst` to a call. First save the arguments. - let mut args = Vec::new(); - args.extend_from_slice(func.dfg.inst_args(inst)); - - let call_conv = CallConv::for_libcall(isa.flags(), isa.default_call_conv()); - if call_conv.extends_baldrdash() { - let vmctx = func - .special_param(ir::ArgumentPurpose::VMContext) - .expect("Missing vmctx parameter for baldrdash libcall"); - args.push(vmctx); - } - - // The replace builder will preserve the instruction result values. - let funcref = get_libcall_funcref(libcall, call_conv, func, inst, isa); - func.dfg.replace(inst).call(funcref, &args); - - // Ask the ISA to legalize the signature. - let fn_data = &func.dfg.ext_funcs[funcref]; - let sig_data = &mut func.dfg.signatures[fn_data.signature]; - legalize_libcall_signature(sig_data, isa); - - true -} diff --git a/cranelift/codegen/src/legalizer/mod.rs b/cranelift/codegen/src/legalizer/mod.rs index a5a248c0e7..ad97e06fc3 100644 --- a/cranelift/codegen/src/legalizer/mod.rs +++ b/cranelift/codegen/src/legalizer/mod.rs @@ -19,179 +19,14 @@ use crate::ir::types::I32; use crate::ir::{self, InstBuilder, MemFlags}; use crate::isa::TargetIsa; -use crate::timing; -use alloc::collections::BTreeSet; - -mod boundary; mod globalvalue; mod heap; -mod libcall; -mod split; mod table; use self::globalvalue::expand_global_value; use self::heap::expand_heap_addr; -pub(crate) use self::libcall::expand_as_libcall; use self::table::expand_table_addr; -enum LegalizeInstResult { - Done, - Legalized, - SplitLegalizePending, -} - -/// Legalize `inst` for `isa`. -fn legalize_inst( - inst: ir::Inst, - pos: &mut FuncCursor, - cfg: &mut ControlFlowGraph, - isa: &dyn TargetIsa, -) -> LegalizeInstResult { - let opcode = pos.func.dfg[inst].opcode(); - - // Check for ABI boundaries that need to be converted to the legalized signature. - if opcode.is_call() { - if boundary::handle_call_abi(isa, inst, pos.func, cfg) { - return LegalizeInstResult::Legalized; - } - } else if opcode.is_return() { - if boundary::handle_return_abi(inst, pos.func, cfg) { - return LegalizeInstResult::Legalized; - } - } else if opcode.is_branch() { - split::simplify_branch_arguments(&mut pos.func.dfg, inst); - } else if opcode == ir::Opcode::Isplit { - pos.use_srcloc(inst); - - let arg = match pos.func.dfg[inst] { - ir::InstructionData::Unary { arg, .. } => pos.func.dfg.resolve_aliases(arg), - _ => panic!("Expected isplit: {}", pos.func.dfg.display_inst(inst, None)), - }; - - match pos.func.dfg.value_def(arg) { - ir::ValueDef::Result(inst, _num) => { - if let ir::InstructionData::Binary { - opcode: ir::Opcode::Iconcat, - .. - } = pos.func.dfg[inst] - { - // `arg` was created by an `iconcat` instruction. - } else { - // `arg` was not created by an `iconcat` instruction. Don't try to resolve it, - // as otherwise `split::isplit` will re-insert the original `isplit`, causing - // an endless loop. - return LegalizeInstResult::SplitLegalizePending; - } - } - ir::ValueDef::Param(_block, _num) => {} - } - - let res = pos.func.dfg.inst_results(inst).to_vec(); - assert_eq!(res.len(), 2); - let (resl, resh) = (res[0], res[1]); // Prevent borrowck error - - // Remove old isplit - pos.func.dfg.clear_results(inst); - pos.remove_inst(); - - let curpos = pos.position(); - let srcloc = pos.srcloc(); - let (xl, xh) = split::isplit(pos.func, cfg, curpos, srcloc, arg); - - pos.func.dfg.change_to_alias(resl, xl); - pos.func.dfg.change_to_alias(resh, xh); - - return LegalizeInstResult::Legalized; - } - - match pos.func.update_encoding(inst, isa) { - Ok(()) => LegalizeInstResult::Done, - Err(action) => { - // We should transform the instruction into legal equivalents. - // If the current instruction was replaced, we need to double back and revisit - // the expanded sequence. This is both to assign encodings and possible to - // expand further. - // There's a risk of infinite looping here if the legalization patterns are - // unsound. Should we attempt to detect that? - if action(inst, pos.func, cfg, isa) { - return LegalizeInstResult::Legalized; - } - - // We don't have any pattern expansion for this instruction either. - // Try converting it to a library call as a last resort. - if expand_as_libcall(inst, pos.func, isa) { - LegalizeInstResult::Legalized - } else { - LegalizeInstResult::Done - } - } - } -} - -/// Legalize `func` for `isa`. -/// -/// - Transform any instructions that don't have a legal representation in `isa`. -/// - Fill out `func.encodings`. -/// -pub fn legalize_function(func: &mut ir::Function, cfg: &mut ControlFlowGraph, isa: &dyn TargetIsa) { - let _tt = timing::legalize(); - debug_assert!(cfg.is_valid()); - - boundary::legalize_signatures(func, isa); - - func.encodings.resize(func.dfg.num_insts()); - - let mut pos = FuncCursor::new(func); - let func_begin = pos.position(); - - // Split block params before trying to legalize instructions, so that the newly introduced - // isplit instructions get legalized. - while let Some(block) = pos.next_block() { - split::split_block_params(pos.func, cfg, block); - } - - pos.set_position(func_begin); - - // This must be a set to prevent trying to legalize `isplit` and `vsplit` twice in certain cases. - let mut pending_splits = BTreeSet::new(); - - // Process blocks in layout order. Some legalization actions may split the current block or append - // new ones to the end. We need to make sure we visit those new blocks too. - while let Some(_block) = pos.next_block() { - // Keep track of the cursor position before the instruction being processed, so we can - // double back when replacing instructions. - let mut prev_pos = pos.position(); - - while let Some(inst) = pos.next_inst() { - match legalize_inst(inst, &mut pos, cfg, isa) { - // Remember this position in case we need to double back. - LegalizeInstResult::Done => prev_pos = pos.position(), - - // Go back and legalize the inserted return value conversion instructions. - LegalizeInstResult::Legalized => pos.set_position(prev_pos), - - // The argument of a `isplit` or `vsplit` instruction didn't resolve to a - // `iconcat` or `vconcat` instruction. Try again after legalizing the rest of - // the instructions. - LegalizeInstResult::SplitLegalizePending => { - pending_splits.insert(inst); - } - } - } - } - - // Try legalizing `isplit` and `vsplit` instructions, which could not previously be legalized. - for inst in pending_splits { - pos.goto_inst(inst); - legalize_inst(inst, &mut pos, cfg, isa); - } - - // Now that we've lowered all br_tables, we don't need the jump tables anymore. - if !isa.flags().enable_jump_tables() { - pos.func.jump_tables.clear(); - } -} - /// Perform a simple legalization by expansion of the function, without /// platform-specific transforms. pub fn simple_legalize(func: &mut ir::Function, cfg: &mut ControlFlowGraph, isa: &dyn TargetIsa) { diff --git a/cranelift/codegen/src/legalizer/split.rs b/cranelift/codegen/src/legalizer/split.rs deleted file mode 100644 index 7576926142..0000000000 --- a/cranelift/codegen/src/legalizer/split.rs +++ /dev/null @@ -1,405 +0,0 @@ -//! Value splitting. -//! -//! Some value types are too large to fit in registers, so they need to be split into smaller parts -//! that the ISA can operate on. There's two dimensions of splitting, represented by two -//! complementary instruction pairs: -//! -//! - `isplit` and `iconcat` for splitting integer types into smaller integers. -//! - `vsplit` and `vconcat` for splitting vector types into smaller vector types with the same -//! lane types. -//! -//! There is no floating point splitting. If an ISA doesn't support `f64` values, they probably -//! have to be bit-cast to `i64` and possibly split into two `i32` values that fit in registers. -//! This breakdown is handled by the ABI lowering. -//! -//! When legalizing a single instruction, it is wrapped in splits and concatenations: -//! -//! ```clif -//! v1 = bxor.i64 v2, v3 -//! ``` -//! -//! becomes: -//! -//! ```clif -//! v20, v21 = isplit v2 -//! v30, v31 = isplit v3 -//! v10 = bxor.i32 v20, v30 -//! v11 = bxor.i32 v21, v31 -//! v1 = iconcat v10, v11 -//! ``` -//! -//! This local expansion approach still leaves the original `i64` values in the code as operands on -//! the `split` and `concat` instructions. It also creates a lot of redundant code to clean up as -//! values are constantly split and concatenated. -//! -//! # Optimized splitting -//! -//! We can eliminate a lot of the splitting code quite easily. Whenever we need to split a value, -//! first check if the value is defined by the corresponding concatenation. If so, then just use -//! the two concatenation inputs directly: -//! -//! ```clif -//! v4 = iadd_imm.i64 v1, 1 -//! ``` -//! -//! becomes, using the expanded code from above: -//! -//! ```clif -//! v40, v5 = iadd_imm_cout.i32 v10, 1 -//! v6 = bint.i32 -//! v41 = iadd.i32 v11, v6 -//! v4 = iconcat v40, v41 -//! ``` -//! -//! This means that the `iconcat` instructions defining `v1` and `v4` end up with no uses, so they -//! can be trivially deleted by a dead code elimination pass. -//! -//! # block arguments -//! -//! If all instructions that produce an `i64` value are legalized as above, we will eventually end -//! up with no `i64` values anywhere, except for block arguments. We can work around this by -//! iteratively splitting block arguments too. That should leave us with no illegal value types -//! anywhere. -//! -//! It is possible to have circular dependencies of block arguments that are never used by any real -//! instructions. These loops will remain in the program. - -use crate::cursor::{Cursor, CursorPosition, FuncCursor}; -use crate::flowgraph::{BlockPredecessor, ControlFlowGraph}; -use crate::ir::{self, Block, Inst, InstBuilder, InstructionData, Opcode, Type, Value, ValueDef}; -use alloc::vec::Vec; -use core::iter; -use smallvec::SmallVec; - -/// Split `value` into two values using the `isplit` semantics. Do this by reusing existing values -/// if possible. -pub fn isplit( - func: &mut ir::Function, - cfg: &ControlFlowGraph, - pos: CursorPosition, - srcloc: ir::SourceLoc, - value: Value, -) -> (Value, Value) { - split_any(func, cfg, pos, srcloc, value, Opcode::Iconcat) -} - -/// Split `value` into halves using the `vsplit` semantics. Do this by reusing existing values if -/// possible. -pub fn vsplit( - func: &mut ir::Function, - cfg: &ControlFlowGraph, - pos: CursorPosition, - srcloc: ir::SourceLoc, - value: Value, -) -> (Value, Value) { - split_any(func, cfg, pos, srcloc, value, Opcode::Vconcat) -} - -/// After splitting a block argument, we need to go back and fix up all of the predecessor -/// instructions. This is potentially a recursive operation, but we don't implement it recursively -/// since that could use up too muck stack. -/// -/// Instead, the repairs are deferred and placed on a work list in stack form. -struct Repair { - concat: Opcode, - // The argument type after splitting. - split_type: Type, - // The destination block whose arguments have been split. - block: Block, - // Number of the original block argument which has been replaced by the low part. - num: usize, - // Number of the new block argument which represents the high part after the split. - hi_num: usize, -} - -/// Generic version of `isplit` and `vsplit` controlled by the `concat` opcode. -fn split_any( - func: &mut ir::Function, - cfg: &ControlFlowGraph, - pos: CursorPosition, - srcloc: ir::SourceLoc, - value: Value, - concat: Opcode, -) -> (Value, Value) { - let mut repairs = Vec::new(); - let pos = &mut FuncCursor::new(func).at_position(pos).with_srcloc(srcloc); - let result = split_value(pos, value, concat, &mut repairs); - - perform_repairs(pos, cfg, repairs); - - result -} - -pub fn split_block_params(func: &mut ir::Function, cfg: &ControlFlowGraph, block: Block) { - let pos = &mut FuncCursor::new(func).at_top(block); - let block_params = pos.func.dfg.block_params(block); - - // Add further splittable types here. - fn type_requires_splitting(ty: Type) -> bool { - ty == ir::types::I128 - } - - // A shortcut. If none of the param types require splitting, exit now. This helps because - // the loop below necessarily has to copy the block params into a new vector, so it's better to - // avoid doing so when possible. - if !block_params - .iter() - .any(|block_param| type_requires_splitting(pos.func.dfg.value_type(*block_param))) - { - return; - } - - let mut repairs = Vec::new(); - for (num, block_param) in block_params.to_vec().into_iter().enumerate() { - if !type_requires_splitting(pos.func.dfg.value_type(block_param)) { - continue; - } - - split_block_param(pos, block, num, block_param, Opcode::Iconcat, &mut repairs); - } - - perform_repairs(pos, cfg, repairs); -} - -fn perform_repairs(pos: &mut FuncCursor, cfg: &ControlFlowGraph, mut repairs: Vec) { - // We have split the value requested, and now we may need to fix some block predecessors. - while let Some(repair) = repairs.pop() { - for BlockPredecessor { inst, .. } in cfg.pred_iter(repair.block) { - let branch_opc = pos.func.dfg[inst].opcode(); - debug_assert!( - branch_opc.is_branch(), - "Predecessor not a branch: {}", - pos.func.dfg.display_inst(inst, None) - ); - let num_fixed_args = branch_opc.constraints().num_fixed_value_arguments(); - let mut args = pos.func.dfg[inst] - .take_value_list() - .expect("Branches must have value lists."); - let num_args = args.len(&pos.func.dfg.value_lists); - // Get the old value passed to the block argument we're repairing. - let old_arg = args - .get(num_fixed_args + repair.num, &pos.func.dfg.value_lists) - .expect("Too few branch arguments"); - - // It's possible that the CFG's predecessor list has duplicates. Detect them here. - if pos.func.dfg.value_type(old_arg) == repair.split_type { - pos.func.dfg[inst].put_value_list(args); - continue; - } - - // Split the old argument, possibly causing more repairs to be scheduled. - pos.goto_inst(inst); - - let inst_block = pos.func.layout.inst_block(inst).expect("inst in block"); - - // Insert split values prior to the terminal branch group. - let canonical = pos - .func - .layout - .canonical_branch_inst(&pos.func.dfg, inst_block); - if let Some(first_branch) = canonical { - pos.goto_inst(first_branch); - } - - let (lo, hi) = split_value(pos, old_arg, repair.concat, &mut repairs); - - // The `lo` part replaces the original argument. - *args - .get_mut(num_fixed_args + repair.num, &mut pos.func.dfg.value_lists) - .unwrap() = lo; - - // The `hi` part goes at the end. Since multiple repairs may have been scheduled to the - // same block, there could be multiple arguments missing. - if num_args > num_fixed_args + repair.hi_num { - *args - .get_mut( - num_fixed_args + repair.hi_num, - &mut pos.func.dfg.value_lists, - ) - .unwrap() = hi; - } else { - // We need to append one or more arguments. If we're adding more than one argument, - // there must be pending repairs on the stack that will fill in the correct values - // instead of `hi`. - args.extend( - iter::repeat(hi).take(1 + num_fixed_args + repair.hi_num - num_args), - &mut pos.func.dfg.value_lists, - ); - } - - // Put the value list back after manipulating it. - pos.func.dfg[inst].put_value_list(args); - } - } -} - -/// Split a single value using the integer or vector semantics given by the `concat` opcode. -/// -/// If the value is defined by a `concat` instruction, just reuse the operand values of that -/// instruction. -/// -/// Return the two new values representing the parts of `value`. -fn split_value( - pos: &mut FuncCursor, - value: Value, - concat: Opcode, - repairs: &mut Vec, -) -> (Value, Value) { - let value = pos.func.dfg.resolve_aliases(value); - let mut reuse = None; - - match pos.func.dfg.value_def(value) { - ValueDef::Result(inst, num) => { - // This is an instruction result. See if the value was created by a `concat` - // instruction. - if let InstructionData::Binary { opcode, args, .. } = pos.func.dfg[inst] { - debug_assert_eq!(num, 0); - if opcode == concat { - reuse = Some((args[0], args[1])); - } - } - } - ValueDef::Param(block, num) => { - // This is a block parameter. - // We can split the parameter value unless this is the entry block. - if pos.func.layout.entry_block() != Some(block) { - reuse = Some(split_block_param(pos, block, num, value, concat, repairs)); - } - } - } - - // Did the code above succeed in finding values we can reuse? - if let Some(pair) = reuse { - pair - } else { - // No, we'll just have to insert the requested split instruction at `pos`. Note that `pos` - // has not been moved by the block argument code above when `reuse` is `None`. - match concat { - Opcode::Iconcat => pos.ins().isplit(value), - Opcode::Vconcat => pos.ins().vsplit(value), - _ => panic!("Unhandled concat opcode: {}", concat), - } - } -} - -fn split_block_param( - pos: &mut FuncCursor, - block: Block, - param_num: usize, - value: Value, - concat: Opcode, - repairs: &mut Vec, -) -> (Value, Value) { - // We are going to replace the parameter at `num` with two new arguments. - // Determine the new value types. - let ty = pos.func.dfg.value_type(value); - let split_type = match concat { - Opcode::Iconcat => ty.half_width().expect("Invalid type for isplit"), - Opcode::Vconcat => ty.half_vector().expect("Invalid type for vsplit"), - _ => panic!("Unhandled concat opcode: {}", concat), - }; - - // Since the `repairs` stack potentially contains other parameter numbers for - // `block`, avoid shifting and renumbering block parameters. It could invalidate other - // `repairs` entries. - // - // Replace the original `value` with the low part, and append the high part at the - // end of the argument list. - let lo = pos.func.dfg.replace_block_param(value, split_type); - let hi_num = pos.func.dfg.num_block_params(block); - let hi = pos.func.dfg.append_block_param(block, split_type); - - // Now the original value is dangling. Insert a concatenation instruction that can - // compute it from the two new parameters. This also serves as a record of what we - // did so a future call to this function doesn't have to redo the work. - // - // Note that it is safe to move `pos` here since `reuse` was set above, so we don't - // need to insert a split instruction before returning. - pos.goto_first_inst(block); - pos.ins() - .with_result(value) - .Binary(concat, split_type, lo, hi); - - // Finally, splitting the block parameter is not enough. We also have to repair all - // of the predecessor instructions that branch here. - add_repair(concat, split_type, block, param_num, hi_num, repairs); - - (lo, hi) -} - -// Add a repair entry to the work list. -fn add_repair( - concat: Opcode, - split_type: Type, - block: Block, - num: usize, - hi_num: usize, - repairs: &mut Vec, -) { - repairs.push(Repair { - concat, - split_type, - block, - num, - hi_num, - }); -} - -/// Strip concat-split chains. Return a simpler way of computing the same value. -/// -/// Given this input: -/// -/// ```clif -/// v10 = iconcat v1, v2 -/// v11, v12 = isplit v10 -/// ``` -/// -/// This function resolves `v11` to `v1` and `v12` to `v2`. -fn resolve_splits(dfg: &ir::DataFlowGraph, value: Value) -> Value { - let value = dfg.resolve_aliases(value); - - // Deconstruct a split instruction. - let split_res; - let concat_opc; - let split_arg; - if let ValueDef::Result(inst, num) = dfg.value_def(value) { - split_res = num; - concat_opc = match dfg[inst].opcode() { - Opcode::Isplit => Opcode::Iconcat, - Opcode::Vsplit => Opcode::Vconcat, - _ => return value, - }; - split_arg = dfg.inst_args(inst)[0]; - } else { - return value; - } - - // See if split_arg is defined by a concatenation instruction. - if let ValueDef::Result(inst, _) = dfg.value_def(split_arg) { - if dfg[inst].opcode() == concat_opc { - return dfg.inst_args(inst)[split_res]; - } - } - - value -} - -/// Simplify the arguments to a branch *after* the instructions leading up to the branch have been -/// legalized. -/// -/// The branch argument repairs performed by `split_any()` above may be performed on branches that -/// have not yet been legalized. The repaired arguments can be defined by actual split -/// instructions in that case. -/// -/// After legalizing the instructions computing the value that was split, it is likely that we can -/// avoid depending on the split instruction. Its input probably comes from a concatenation. -pub fn simplify_branch_arguments(dfg: &mut ir::DataFlowGraph, branch: Inst) { - let mut new_args = SmallVec::<[Value; 32]>::new(); - - for &arg in dfg.inst_args(branch) { - let new_arg = resolve_splits(dfg, arg); - new_args.push(new_arg); - } - - dfg.inst_args_mut(branch).copy_from_slice(&new_args); -} diff --git a/cranelift/codegen/src/lib.rs b/cranelift/codegen/src/lib.rs index 25f1e6902d..571017910c 100644 --- a/cranelift/codegen/src/lib.rs +++ b/cranelift/codegen/src/lib.rs @@ -59,7 +59,6 @@ use hashbrown::{hash_map, HashMap, HashSet}; use std::collections::{hash_map, HashMap, HashSet}; pub use crate::context::Context; -pub use crate::legalizer::legalize_function; pub use crate::value_label::{ValueLabelsRanges, ValueLocRange}; pub use crate::verifier::verify_function; pub use crate::write::write_function; @@ -87,7 +86,6 @@ pub use crate::entity::packed_option; pub use crate::machinst::buffer::MachSrcLoc; pub use crate::machinst::TextSectionBuilder; -mod abi; mod bitset; mod constant_hash; mod context; @@ -101,18 +99,12 @@ mod licm; mod log; mod machinst; mod nan_canonicalization; -mod partition_slice; -mod postopt; mod predicates; -mod redundant_reload_remover; -mod regalloc; mod remove_constant_phis; mod result; mod scoped_hash_map; mod simple_gvn; mod simple_preopt; -mod stack_layout; -mod topo_order; mod unreachable_code; mod value_label; diff --git a/cranelift/codegen/src/machinst/adapter.rs b/cranelift/codegen/src/machinst/adapter.rs index 543084a0b5..20e198bdce 100644 --- a/cranelift/codegen/src/machinst/adapter.rs +++ b/cranelift/codegen/src/machinst/adapter.rs @@ -1,20 +1,14 @@ //! Adapter for a `MachBackend` to implement the `TargetIsa` trait. -use crate::binemit; use crate::ir; -use crate::isa::{EncInfo, Encoding, Encodings, Legalize, RegClass, RegInfo, TargetIsa}; +use crate::isa::{RegInfo, TargetIsa}; use crate::machinst::*; -use crate::regalloc::RegisterSet; use crate::settings::{self, Flags}; -#[cfg(feature = "testing_hooks")] -use crate::regalloc::RegDiversions; - #[cfg(feature = "unwind")] use crate::isa::unwind::systemv::RegisterMappingError; use core::any::Any; -use std::borrow::Cow; use std::fmt; use target_lexicon::Triple; @@ -74,60 +68,6 @@ impl TargetIsa for TargetIsaAdapter { } } - fn legal_encodings<'a>( - &'a self, - _func: &'a ir::Function, - _inst: &'a ir::InstructionData, - _ctrl_typevar: ir::Type, - ) -> Encodings<'a> { - panic!("Should not be called when new-style backend is available!") - } - - fn encode( - &self, - _func: &ir::Function, - _inst: &ir::InstructionData, - _ctrl_typevar: ir::Type, - ) -> Result { - panic!("Should not be called when new-style backend is available!") - } - - fn encoding_info(&self) -> EncInfo { - panic!("Should not be called when new-style backend is available!") - } - - fn legalize_signature(&self, _sig: &mut Cow, _current: bool) { - panic!("Should not be called when new-style backend is available!") - } - - fn regclass_for_abi_type(&self, _ty: ir::Type) -> RegClass { - panic!("Should not be called when new-style backend is available!") - } - - fn allocatable_registers(&self, _func: &ir::Function) -> RegisterSet { - panic!("Should not be called when new-style backend is available!") - } - - fn prologue_epilogue(&self, _func: &mut ir::Function) -> CodegenResult<()> { - panic!("Should not be called when new-style backend is available!") - } - - #[cfg(feature = "testing_hooks")] - fn emit_inst( - &self, - _func: &ir::Function, - _inst: ir::Inst, - _divert: &mut RegDiversions, - _sink: &mut dyn binemit::CodeSink, - ) { - panic!("Should not be called when new-style backend is available!") - } - - /// Emit a whole function into memory. - fn emit_function_to_memory(&self, _func: &ir::Function, _sink: &mut binemit::MemoryCodeSink) { - panic!("Should not be called when new-style backend is available!") - } - fn get_mach_backend(&self) -> Option<&dyn MachBackend> { Some(&*self.backend) } diff --git a/cranelift/codegen/src/partition_slice.rs b/cranelift/codegen/src/partition_slice.rs deleted file mode 100644 index 959f8c1102..0000000000 --- a/cranelift/codegen/src/partition_slice.rs +++ /dev/null @@ -1,97 +0,0 @@ -//! Rearrange the elements in a slice according to a predicate. - -use core::mem; - -/// Rearrange the elements of the mutable slice `s` such that elements where `p(t)` is true precede -/// the elements where `p(t)` is false. -/// -/// The order of elements is not preserved, unless the slice is already partitioned. -/// -/// Returns the number of elements where `p(t)` is true. -pub fn partition_slice(s: &mut [T], mut p: F) -> usize -where - F: FnMut(&T) -> bool, -{ - // The iterator works like a deque which we can pop from both ends. - let mut i = s.iter_mut(); - - // Number of elements for which the predicate is known to be true. - let mut pos = 0; - - loop { - // Find the first element for which the predicate fails. - let head = loop { - match i.next() { - Some(head) => { - if !p(&head) { - break head; - } - } - None => return pos, - } - pos += 1; - }; - - // Find the last element for which the predicate succeeds. - let tail = loop { - match i.next_back() { - Some(tail) => { - if p(&tail) { - break tail; - } - } - None => return pos, - } - }; - - // Swap the two elements into the right order. - mem::swap(head, tail); - pos += 1; - } -} - -#[cfg(test)] -mod tests { - use super::partition_slice; - use alloc::vec::Vec; - - fn check(x: &[u32], want: &[u32]) { - assert_eq!(x.len(), want.len()); - let want_count = want.iter().cloned().filter(|&x| x % 10 == 0).count(); - let mut v = Vec::new(); - v.extend(x.iter().cloned()); - let count = partition_slice(&mut v[..], |&x| x % 10 == 0); - assert_eq!(v, want); - assert_eq!(count, want_count); - } - - #[test] - fn empty() { - check(&[], &[]); - } - - #[test] - fn singles() { - check(&[0], &[0]); - check(&[1], &[1]); - check(&[10], &[10]); - } - - #[test] - fn doubles() { - check(&[0, 0], &[0, 0]); - check(&[0, 5], &[0, 5]); - check(&[5, 0], &[0, 5]); - check(&[5, 4], &[5, 4]); - } - - #[test] - fn longer() { - check(&[1, 2, 3], &[1, 2, 3]); - check(&[1, 2, 10], &[10, 2, 1]); // Note: 2, 1 order not required. - check(&[1, 10, 2], &[10, 1, 2]); // Note: 1, 2 order not required. - check(&[1, 20, 10], &[10, 20, 1]); // Note: 10, 20 order not required. - check(&[1, 20, 3, 10], &[10, 20, 3, 1]); - check(&[20, 3, 10, 1], &[20, 10, 3, 1]); - } -} diff --git a/cranelift/codegen/src/postopt.rs b/cranelift/codegen/src/postopt.rs deleted file mode 100644 index ada14e1ff8..0000000000 --- a/cranelift/codegen/src/postopt.rs +++ /dev/null @@ -1,427 +0,0 @@ -//! A post-legalization rewriting pass. - -#![allow(non_snake_case)] - -use crate::cursor::{Cursor, EncCursor}; -use crate::ir::condcodes::{CondCode, FloatCC, IntCC}; -use crate::ir::dfg::ValueDef; -use crate::ir::immediates::{Imm64, Offset32}; -use crate::ir::instructions::{Opcode, ValueList}; -use crate::ir::{Block, Function, Inst, InstBuilder, InstructionData, MemFlags, Type, Value}; -use crate::isa::TargetIsa; -use crate::timing; - -/// Information collected about a compare+branch sequence. -struct CmpBrInfo { - /// The branch instruction. - br_inst: Inst, - /// The icmp, icmp_imm, or fcmp instruction. - cmp_inst: Inst, - /// The destination of the branch. - destination: Block, - /// The arguments of the branch. - args: ValueList, - /// The first argument to the comparison. The second is in the `kind` field. - cmp_arg: Value, - /// If the branch is `brz` rather than `brnz`, we need to invert the condition - /// before the branch. - invert_branch_cond: bool, - /// The kind of comparison, and the second argument. - kind: CmpBrKind, -} - -enum CmpBrKind { - Icmp { cond: IntCC, arg: Value }, - IcmpImm { cond: IntCC, imm: Imm64 }, - Fcmp { cond: FloatCC, arg: Value }, -} - -/// Optimize comparisons to use flags values, to avoid materializing conditions -/// in integer registers. -/// -/// For example, optimize icmp/fcmp brz/brnz sequences into ifcmp/ffcmp brif/brff -/// sequences. -fn optimize_cpu_flags( - pos: &mut EncCursor, - inst: Inst, - last_flags_clobber: Option, - isa: &dyn TargetIsa, -) { - // Look for compare and branch patterns. - // This code could be considerably simplified with non-lexical lifetimes. - let info = match pos.func.dfg[inst] { - InstructionData::Branch { - opcode, - destination, - ref args, - } => { - let first_arg = args.first(&pos.func.dfg.value_lists).unwrap(); - let invert_branch_cond = match opcode { - Opcode::Brz => true, - Opcode::Brnz => false, - _ => panic!(), - }; - if let ValueDef::Result(cond_inst, _) = pos.func.dfg.value_def(first_arg) { - match pos.func.dfg[cond_inst] { - InstructionData::IntCompare { - cond, - args: cmp_args, - .. - } => CmpBrInfo { - br_inst: inst, - cmp_inst: cond_inst, - destination, - args: args.clone(), - cmp_arg: cmp_args[0], - invert_branch_cond, - kind: CmpBrKind::Icmp { - cond, - arg: cmp_args[1], - }, - }, - InstructionData::IntCompareImm { - cond, - arg: cmp_arg, - imm: cmp_imm, - .. - } => CmpBrInfo { - br_inst: inst, - cmp_inst: cond_inst, - destination, - args: args.clone(), - cmp_arg, - invert_branch_cond, - kind: CmpBrKind::IcmpImm { cond, imm: cmp_imm }, - }, - InstructionData::FloatCompare { - cond, - args: cmp_args, - .. - } => CmpBrInfo { - br_inst: inst, - cmp_inst: cond_inst, - destination, - args: args.clone(), - cmp_arg: cmp_args[0], - invert_branch_cond, - kind: CmpBrKind::Fcmp { - cond, - arg: cmp_args[1], - }, - }, - _ => return, - } - } else { - return; - } - } - // TODO: trapif, trueif, selectif, and their ff counterparts. - _ => return, - }; - - // If any instructions clobber the flags between the comparison and the branch, - // don't optimize them. - if last_flags_clobber != Some(info.cmp_inst) { - return; - } - - // We found a compare+branch pattern. Transform it to use flags. - let args = info.args.as_slice(&pos.func.dfg.value_lists)[1..].to_vec(); - pos.goto_inst(info.cmp_inst); - pos.use_srcloc(info.cmp_inst); - match info.kind { - CmpBrKind::Icmp { mut cond, arg } => { - let flags = pos.ins().ifcmp(info.cmp_arg, arg); - pos.func.dfg.replace(info.cmp_inst).trueif(cond, flags); - if info.invert_branch_cond { - cond = cond.inverse(); - } - pos.func - .dfg - .replace(info.br_inst) - .brif(cond, flags, info.destination, &args); - } - CmpBrKind::IcmpImm { mut cond, imm } => { - let flags = pos.ins().ifcmp_imm(info.cmp_arg, imm); - pos.func.dfg.replace(info.cmp_inst).trueif(cond, flags); - if info.invert_branch_cond { - cond = cond.inverse(); - } - pos.func - .dfg - .replace(info.br_inst) - .brif(cond, flags, info.destination, &args); - } - CmpBrKind::Fcmp { mut cond, arg } => { - let flags = pos.ins().ffcmp(info.cmp_arg, arg); - pos.func.dfg.replace(info.cmp_inst).trueff(cond, flags); - if info.invert_branch_cond { - cond = cond.inverse(); - } - pos.func - .dfg - .replace(info.br_inst) - .brff(cond, flags, info.destination, &args); - } - } - let ok = pos.func.update_encoding(info.cmp_inst, isa).is_ok(); - debug_assert!(ok); - let ok = pos.func.update_encoding(info.br_inst, isa).is_ok(); - debug_assert!(ok); -} - -struct MemOpInfo { - opcode: Opcode, - itype: Type, - arg: Value, - st_arg: Option, - flags: MemFlags, - offset: Offset32, -} - -fn optimize_complex_addresses(pos: &mut EncCursor, inst: Inst, isa: &dyn TargetIsa) { - // Look for simple loads and stores we can optimize. - let info = match pos.func.dfg[inst] { - InstructionData::Load { - opcode, - arg, - flags, - offset, - } => MemOpInfo { - opcode, - itype: pos.func.dfg.ctrl_typevar(inst), - arg, - st_arg: None, - flags, - offset, - }, - InstructionData::Store { - opcode, - args, - flags, - offset, - } => MemOpInfo { - opcode, - itype: pos.func.dfg.ctrl_typevar(inst), - arg: args[1], - st_arg: Some(args[0]), - flags, - offset, - }, - _ => return, - }; - - // Examine the instruction that defines the address operand. - if let ValueDef::Result(result_inst, _) = pos.func.dfg.value_def(info.arg) { - match pos.func.dfg[result_inst] { - InstructionData::Binary { - opcode: Opcode::Iadd, - args, - } => match info.opcode { - // Operand is an iadd. Fold it into a memory address with a complex address mode. - Opcode::Load => { - pos.func.dfg.replace(inst).load_complex( - info.itype, - info.flags, - &args, - info.offset, - ); - } - Opcode::Uload8 => { - pos.func.dfg.replace(inst).uload8_complex( - info.itype, - info.flags, - &args, - info.offset, - ); - } - Opcode::Sload8 => { - pos.func.dfg.replace(inst).sload8_complex( - info.itype, - info.flags, - &args, - info.offset, - ); - } - Opcode::Uload16 => { - pos.func.dfg.replace(inst).uload16_complex( - info.itype, - info.flags, - &args, - info.offset, - ); - } - Opcode::Sload16 => { - pos.func.dfg.replace(inst).sload16_complex( - info.itype, - info.flags, - &args, - info.offset, - ); - } - Opcode::Uload32 => { - pos.func - .dfg - .replace(inst) - .uload32_complex(info.flags, &args, info.offset); - } - Opcode::Sload32 => { - pos.func - .dfg - .replace(inst) - .sload32_complex(info.flags, &args, info.offset); - } - Opcode::Uload8x8 => { - pos.func - .dfg - .replace(inst) - .uload8x8_complex(info.flags, &args, info.offset); - } - Opcode::Sload8x8 => { - pos.func - .dfg - .replace(inst) - .sload8x8_complex(info.flags, &args, info.offset); - } - Opcode::Uload16x4 => { - pos.func - .dfg - .replace(inst) - .uload16x4_complex(info.flags, &args, info.offset); - } - Opcode::Sload16x4 => { - pos.func - .dfg - .replace(inst) - .sload16x4_complex(info.flags, &args, info.offset); - } - Opcode::Uload32x2 => { - pos.func - .dfg - .replace(inst) - .uload32x2_complex(info.flags, &args, info.offset); - } - Opcode::Sload32x2 => { - pos.func - .dfg - .replace(inst) - .sload32x2_complex(info.flags, &args, info.offset); - } - Opcode::Store => { - pos.func.dfg.replace(inst).store_complex( - info.flags, - info.st_arg.unwrap(), - &args, - info.offset, - ); - } - Opcode::Istore8 => { - pos.func.dfg.replace(inst).istore8_complex( - info.flags, - info.st_arg.unwrap(), - &args, - info.offset, - ); - } - Opcode::Istore16 => { - pos.func.dfg.replace(inst).istore16_complex( - info.flags, - info.st_arg.unwrap(), - &args, - info.offset, - ); - } - Opcode::Istore32 => { - pos.func.dfg.replace(inst).istore32_complex( - info.flags, - info.st_arg.unwrap(), - &args, - info.offset, - ); - } - _ => panic!("Unsupported load or store opcode"), - }, - InstructionData::BinaryImm64 { - opcode: Opcode::IaddImm, - arg, - imm, - } => match pos.func.dfg[inst] { - // Operand is an iadd_imm. Fold the immediate into the offset if possible. - InstructionData::Load { - arg: ref mut load_arg, - ref mut offset, - .. - } => { - if let Some(imm) = offset.try_add_i64(imm.into()) { - *load_arg = arg; - *offset = imm; - } else { - // Overflow. - return; - } - } - InstructionData::Store { - args: ref mut store_args, - ref mut offset, - .. - } => { - if let Some(imm) = offset.try_add_i64(imm.into()) { - store_args[1] = arg; - *offset = imm; - } else { - // Overflow. - return; - } - } - _ => panic!(), - }, - _ => { - // Address value is defined by some other kind of instruction. - return; - } - } - } else { - // Address value is not the result of an instruction. - return; - } - - let ok = pos.func.update_encoding(inst, isa).is_ok(); - debug_assert!( - ok, - "failed to update encoding for `{}`", - pos.func.dfg.display_inst(inst, isa) - ); -} - -//---------------------------------------------------------------------- -// -// The main post-opt pass. - -pub fn do_postopt(func: &mut Function, isa: &dyn TargetIsa) { - let _tt = timing::postopt(); - let mut pos = EncCursor::new(func, isa); - let is_mach_backend = isa.get_mach_backend().is_some(); - while let Some(_block) = pos.next_block() { - let mut last_flags_clobber = None; - while let Some(inst) = pos.next_inst() { - if !is_mach_backend && isa.uses_cpu_flags() { - // Optimize instructions to make use of flags. - optimize_cpu_flags(&mut pos, inst, last_flags_clobber, isa); - - // Track the most recent seen instruction that clobbers the flags. - if let Some(constraints) = isa - .encoding_info() - .operand_constraints(pos.func.encodings[inst]) - { - if constraints.clobbers_flags { - last_flags_clobber = Some(inst) - } - } - } - - if isa.uses_complex_addresses() { - optimize_complex_addresses(&mut pos, inst, isa); - } - } - } -} diff --git a/cranelift/codegen/src/redundant_reload_remover.rs b/cranelift/codegen/src/redundant_reload_remover.rs deleted file mode 100644 index 501c67ab6b..0000000000 --- a/cranelift/codegen/src/redundant_reload_remover.rs +++ /dev/null @@ -1,904 +0,0 @@ -//! This module implements a late-stage redundant-reload remover, which runs after registers have -//! been allocated and stack slots have been given specific offsets. - -use crate::cursor::{Cursor, CursorPosition, EncCursor, FuncCursor}; -use crate::entity::EntitySet; -use crate::flowgraph::ControlFlowGraph; -use crate::ir::dfg::DataFlowGraph; -use crate::ir::instructions::BranchInfo; -use crate::ir::stackslot::{StackSlotKind, StackSlots}; -use crate::ir::{ - Block, Function, Inst, InstBuilder, InstructionData, Opcode, StackSlotData, Type, Value, - ValueLoc, -}; -use crate::isa::{RegInfo, RegUnit, TargetIsa}; -use crate::regalloc::RegDiversions; -use alloc::vec::Vec; -use core::convert::TryInto; -use cranelift_entity::{PrimaryMap, SecondaryMap}; - -// ============================================================================================= -// A description of the redundant-fill-removal algorithm -// -// -// The algorithm works forwards through each Block. It carries along and updates a table, -// AvailEnv, with which it tracks registers that are known to have the same value as some stack -// slot. The actions on encountering an instruction depend on the instruction, as follows: -// -// ss1 = spill r0: update the AvailEnv so as to note that slot `ss1` and register `r0` -// have the same value. -// -// r1 = fill ss0: look in the AvailEnv. If it tells us that register `r1` and slot `ss0` -// have the same value, then delete the instruction by converting it to a -// `fill_nop`. -// -// If it tells us that some other register `r2` has the same value as -// slot `ss0`, convert the instruction into a copy from `r2` to `r1`. -// -// any other insn: remove from the AvailEnv, any bindings associated with registers -// written by this instruction, since they will be invalidated by it. -// -// Tracking the effects of `copy` instructions in AvailEnv for the case when both source and -// destination are registers does not cause any more fills to be removed or converted to copies. -// It's not clear why. -// -// There are various other instruction-handling cases in `visit_inst`, which are documented -// in-line, and do not change the core algorithm, so are not described here. -// -// The registers tracked by AvailEnv are the post-diversion registers that are really used by the -// code; they are not the pre-diversion names associated with each SSA `Value`. The second -// `fill` case above opportunistically copies values from registers that may have been diversion -// targets in some predecessor block, and so are no longer associated with any specific SSA-level -// name at the point the copy is made. Hence those copies (from `r2` to `r1`) cannot be done -// with an ordinary `copy` instruction. Instead they have to be done using a new `copy_to_ssa` -// instruction, which copies from an arbitrary register to a register-resident `Value` (that is, -// "back to" SSA-world). -// -// That completes the description of the core algorithm. -// -// In the case where a block `A` jumps to `B` and `A` is the only predecessor of `B`, the -// AvailEnv at the end of `A` will still be valid at the entry to `B`. In such a case, we can -// profitably transform `B` using the AvailEnv "inherited" from `A`. In order to take full -// advantage of this, this module partitions the function's CFG into tree-shaped groups of -// blocks, and processes each tree as described above. So the AvailEnv is only initialised to -// empty at the start of blocks that form the root of each tree; that is, for blocks which have -// two or more predecessors. - -// ============================================================================================= -// Top level algorithm structure -// -// The overall algorithm, for a function, starts like this: -// -// * (once per function): finds Blocks that have two or more predecessors, since they will be the -// roots of Block trees. Also, the entry node for the function is considered to be a root. -// -// It then continues with a loop that first finds a tree of Blocks ("discovery") and then removes -// redundant fills as described above ("processing"): -// -// * (discovery; once per tree): for each root, performs a depth first search to find all the Blocks -// in the tree, guided by RedundantReloadRemover::discovery_stack. -// -// * (processing; once per tree): the just-discovered tree is then processed as described above, -// guided by RedundantReloadRemover::processing_stack. -// -// In this way, all Blocks reachable from the function's entry point are eventually processed. Note -// that each tree is processed as soon as it has been discovered, so the algorithm never creates a -// list of trees for the function. -// -// The running state is stored in `RedundantReloadRemover`. This is allocated once and can be -// reused for multiple functions so as to minimise heap turnover. The fields are, roughly: -// -// num_regunits -- constant for the whole function; used by the tree processing phase -// num_preds_per_block -- constant for the whole function; used by the tree discovery process -// -// discovery_stack -- used to guide the tree discovery process -// nodes_in_tree -- the discovered nodes are recorded here -// -// processing_stack -- used to guide the tree processing process -// nodes_already_visited -- used to ensure the tree processing logic terminates in the case -// where a tree has a branch back to its root node. -// -// There is further documentation in line below, as appropriate. - -// ============================================================================================= -// A side note on register choice heuristics - -// The core algorithm opportunistically replaces fill instructions when it knows of a register -// that already holds the required value. How effective this is largely depends on how long -// reloaded values happen to stay alive before the relevant register is overwritten. And that -// depends on the register allocator's register choice heuristics. The worst case is, when the -// register allocator reuses registers as soon as possible after they become free. Unfortunately -// that was indeed the selection scheme, prior to development of this pass. -// -// As part of this work, the register selection scheme has been changed as follows: for registers -// written by any instruction other than a fill, use the lowest numbered available register. But -// for registers written by a fill instruction, use the highest numbered available register. The -// aim is to try and keep reload- and non-reload registers disjoint to the extent possible. -// Several other schemes were tried, but this one is simple and can be worth an extra 2% of -// performance in some cases. -// -// The relevant change is more or less a one-line change in the solver. - -// ============================================================================================= -// Data structures used for discovery of trees - -// `ZeroOneOrMany` is used to record the number of predecessors a Block block has. The `Zero` case -// is included so as to cleanly handle the case where the incoming graph has unreachable Blocks. - -#[derive(Clone, PartialEq)] -enum ZeroOneOrMany { - Zero, - One, - Many, -} - -// ============================================================================================= -// Data structures used for processing of trees - -// `SlotInfo` describes a spill slot in the obvious way. Note that it doesn't indicate which -// register(s) are currently associated with the slot. That job is done by `AvailEnv` instead. -// -// In the CL framework, stack slots are partitioned into disjoint sets, one for each -// `StackSlotKind`. The offset and size only give a unique identity within any particular -// `StackSlotKind`. So, to uniquely identify a stack slot, all three fields are necessary. - -#[derive(Clone, Copy)] -struct SlotInfo { - kind: StackSlotKind, - offset: i32, - size: u32, -} - -// `AvailEnv` maps each possible register to a stack slot that holds the same value. The index -// space of `AvailEnv::map` is exactly the set of registers available on the current target. If -// (as is mostly the case) a register is not known to have the same value as a stack slot, then -// its entry is `None` rather than `Some(..)`. -// -// Invariants for AvailEnv: -// -// AvailEnv may have multiple different registers bound to the same stack slot -- that is, `(kind, -// offset, size)` triple. That's OK, and reflects the reality that those two registers contain -// the same value. This could happen, for example, in the case -// -// ss1 = spill r0 -// .. -// r2 = fill ss1 -// -// Then both `r0` and `r2` will have the same value as `ss1`, provided that ".." doesn't write to -// `r1`. -// -// To say that two different registers may be bound to the same stack slot is the same as saying -// that it is allowed to have two different entries in AvailEnv with the same `(kind, offset, -// size)` triple. What is *not* allowed is to have partial overlaps. That is, if two SlotInfos -// have the same `kind` field and have `offset` and `size` fields that overlap, then their -// `offset` and `size` fields must be identical. This is so as to make the algorithm safe against -// situations where, for example, a 64 bit register is spilled, but then only the bottom 32 bits -// are reloaded from the slot. -// -// Although in such a case it seems likely that the Cranelift IR would be ill-typed, and so this -// case could probably not occur in practice. - -#[derive(Clone)] -struct AvailEnv { - map: Vec>, -} - -// `ProcessingStackElem` combines AvailEnv with contextual information needed to "navigate" within -// a Block. -// -// A ProcessingStackElem conceptually has the lifetime of exactly one Block: once the current Block is -// completed, the ProcessingStackElem will be abandoned. In practice the top level state, -// RedundantReloadRemover, caches them, so as to avoid heap turnover. -// -// Note that ProcessingStackElem must contain a CursorPosition. The CursorPosition, which -// indicates where we are in the current Block, cannot be implicitly maintained by looping over all -// the instructions in a Block in turn, because we may choose to suspend processing the current Block -// at a side exit, continue by processing the subtree reached via the side exit, and only later -// resume the current Block. - -struct ProcessingStackElem { - /// Indicates the AvailEnv at the current point in the Block. - avail_env: AvailEnv, - - /// Shows where we currently are inside the Block. - cursor: CursorPosition, - - /// Indicates the currently active register diversions at the current point. - diversions: RegDiversions, -} - -// ============================================================================================= -// The top level data structure - -// `RedundantReloadRemover` contains data structures for the two passes: discovery of tree shaped -// regions, and processing of them. These are allocated once and stay alive for the entire -// function, even though they are cleared out for each new tree shaped region. It also caches -// `num_regunits` and `num_preds_per_block`, which are computed at the start of each function and -// then remain constant. - -/// The redundant reload remover's state. -pub struct RedundantReloadRemover { - /// The total number of RegUnits available on this architecture. This is unknown when the - /// RedundantReloadRemover is created. It becomes known at the beginning of processing of a - /// function. - num_regunits: Option, - - /// This stores, for each Block, a characterisation of the number of predecessors it has. - num_preds_per_block: PrimaryMap, - - /// The stack used for the first phase (discovery). There is one element on the discovery - /// stack for each currently unexplored Block in the tree being searched. - discovery_stack: Vec, - - /// The nodes in the discovered tree are inserted here. - nodes_in_tree: EntitySet, - - /// The stack used during the second phase (transformation). There is one element on the - /// processing stack for each currently-open node in the tree being transformed. - processing_stack: Vec, - - /// Used in the second phase to avoid visiting nodes more than once. - nodes_already_visited: EntitySet, -} - -// ============================================================================================= -// Miscellaneous small helper functions - -// Is this a kind of stack slot that is safe to track in AvailEnv? This is probably overly -// conservative, but tracking only the SpillSlot and IncomingArgument kinds catches almost all -// available redundancy in practice. -fn is_slot_kind_tracked(kind: StackSlotKind) -> bool { - match kind { - StackSlotKind::SpillSlot | StackSlotKind::IncomingArg => true, - _ => false, - } -} - -// Find out if the range `[offset, +size)` overlaps with the range in `si`. -fn overlaps(si: &SlotInfo, offset: i32, size: u32) -> bool { - let a_offset = si.offset as i64; - let a_size = si.size as i64; - let b_offset = offset as i64; - let b_size = size as i64; - let no_overlap = a_offset + a_size <= b_offset || b_offset + b_size <= a_offset; - !no_overlap -} - -// Find, in `reginfo`, the register bank that `reg` lives in, and return the lower limit and size -// of the bank. This is so the caller can conveniently iterate over all RegUnits in the bank that -// `reg` lives in. -fn find_bank_limits(reginfo: &RegInfo, reg: RegUnit) -> (RegUnit, u16) { - if let Some(bank) = reginfo.bank_containing_regunit(reg) { - return (bank.first_unit, bank.units); - } - // We should never get here, since `reg` must come from *some* RegBank. - panic!("find_regclass_limits: reg not found"); -} - -// Returns the register that `v` is allocated to. Assumes that `v` actually resides in a -// register. -fn reg_of_value(locations: &SecondaryMap, v: Value) -> RegUnit { - match locations[v] { - ValueLoc::Reg(ru) => ru, - _ => panic!("reg_of_value: value isn't in a reg"), - } -} - -// Returns the stack slot that `v` is allocated to. Assumes that `v` actually resides in a stack -// slot. -fn slot_of_value<'s>( - locations: &SecondaryMap, - stack_slots: &'s StackSlots, - v: Value, -) -> &'s StackSlotData { - match locations[v] { - ValueLoc::Stack(slot) => &stack_slots[slot], - _ => panic!("slot_of_value: value isn't in a stack slot"), - } -} - -// ============================================================================================= -// Top level: discovery of tree shaped regions - -impl RedundantReloadRemover { - // A helper for `add_nodes_to_tree` below. - fn discovery_stack_push_successors_of(&mut self, cfg: &ControlFlowGraph, node: Block) { - for successor in cfg.succ_iter(node) { - self.discovery_stack.push(successor); - } - } - - // Visit the tree of Blocks rooted at `starting_point` and add them to `self.nodes_in_tree`. - // `self.num_preds_per_block` guides the process, ensuring we don't leave the tree-ish region - // and indirectly ensuring that the process will terminate in the presence of cycles in the - // graph. `self.discovery_stack` holds the search state in this function. - fn add_nodes_to_tree(&mut self, cfg: &ControlFlowGraph, starting_point: Block) { - // One might well ask why this doesn't loop forever when it encounters cycles in the - // control flow graph. The reason is that any cycle in the graph that is reachable from - // anywhere outside the cycle -- in particular, that is reachable from the function's - // entry node -- must have at least one node that has two or more predecessors. So the - // logic below won't follow into it, because it regards any such node as the root of some - // other tree. - debug_assert!(self.discovery_stack.is_empty()); - debug_assert!(self.nodes_in_tree.is_empty()); - - self.nodes_in_tree.insert(starting_point); - self.discovery_stack_push_successors_of(cfg, starting_point); - - while let Some(node) = self.discovery_stack.pop() { - match self.num_preds_per_block[node] { - // We arrived at a node with multiple predecessors, so it's a new root. Ignore it. - ZeroOneOrMany::Many => {} - // This node has just one predecessor, so we should incorporate it in the tree and - // immediately transition into searching from it instead. - ZeroOneOrMany::One => { - self.nodes_in_tree.insert(node); - self.discovery_stack_push_successors_of(cfg, node); - } - // This is meaningless. We arrived at a node that doesn't point back at where we - // came from. - ZeroOneOrMany::Zero => panic!("add_nodes_to_tree: inconsistent graph"), - } - } - } -} - -// ============================================================================================= -// Operations relating to `AvailEnv` - -impl AvailEnv { - // Create a new one. - fn new(size: usize) -> Self { - let mut env = Self { - map: Vec::>::new(), - }; - env.map.resize(size, None); - env - } - - // Debug only: checks (some of) the required AvailEnv invariants. - #[cfg(debug_assertions)] - fn check_invariants(&self) -> bool { - // Check that any overlapping entries overlap exactly. This is super lame (quadratic), - // but it's only used in debug builds. - for i in 0..self.map.len() { - if let Some(si) = self.map[i] { - for j in i + 1..self.map.len() { - if let Some(sj) = self.map[j] { - // "si and sj overlap, but not exactly" - if si.kind == sj.kind - && overlaps(&si, sj.offset, sj.size) - && !(si.offset == sj.offset && si.size == sj.size) - { - return false; - } - } - } - } - } - true - } - - // Invalidates the binding associated with `reg`. Note that by construction of AvailEnv, - // `reg` can only be associated with one binding at once. - fn invalidate_by_reg(&mut self, reg: RegUnit) { - self.map[reg as usize] = None; - } - - // Invalidates any binding that has any overlap with `(kind, offset, size)`. - fn invalidate_by_offset(&mut self, kind: StackSlotKind, offset: i32, size: u32) { - debug_assert!(is_slot_kind_tracked(kind)); - for i in 0..self.map.len() { - if let Some(si) = &self.map[i] { - if si.kind == kind && overlaps(&si, offset, size) { - self.map[i] = None; - } - } - } - } - - // Invalidates all bindings. - fn invalidate_all(&mut self) { - for i in 0..self.map.len() { - self.map[i] = None; - } - } - - // Updates AvailEnv to track the effect of a `regmove` instruction. - fn copy_reg(&mut self, src: RegUnit, dst: RegUnit) { - self.map[dst as usize] = self.map[src as usize]; - } - - // Does `env` have the exact binding characterised by `(reg, kind, offset, size)` ? - fn has_exact_binding(&self, reg: RegUnit, kind: StackSlotKind, offset: i32, size: u32) -> bool { - debug_assert!(is_slot_kind_tracked(kind)); - if let Some(si) = &self.map[reg as usize] { - return si.kind == kind && si.offset == offset && si.size == size; - } - // No such binding. - false - } - - // Does `env` have a binding characterised by `(kind, offset, size)` but to a register, let's - // call it `other_reg`, that isn't `reg`? If so, return `other_reg`. Note that `other_reg` - // will have the same bank as `reg`. It is a checked error to call this function with a - // binding matching all four of `(reg, kind, offset, size)`. - fn has_inexact_binding( - &self, - reginfo: &RegInfo, - reg: RegUnit, - kind: StackSlotKind, - offset: i32, - size: u32, - ) -> Option { - debug_assert!(is_slot_kind_tracked(kind)); - // Find the range of RegUnit numbers for the bank that contains `reg`, and use that as our - // search space. This is so as to guarantee that any match is restricted to the same bank - // as `reg`. - let (first_unit, num_units) = find_bank_limits(reginfo, reg); - for other_reg in first_unit..first_unit + num_units { - if let Some(si) = &self.map[other_reg as usize] { - if si.kind == kind && si.offset == offset && si.size == size { - if other_reg == reg { - panic!("has_inexact_binding: binding *is* exact!"); - } - return Some(other_reg); - } - } - } - // No such binding. - None - } - - // Create the binding `(reg, kind, offset, size)` in `env`, and throw away any previous - // binding associated with either `reg` or the `(kind, offset, size)` triple. - fn bind(&mut self, reg: RegUnit, kind: StackSlotKind, offset: i32, size: u32) { - debug_assert!(is_slot_kind_tracked(kind)); - self.invalidate_by_offset(kind, offset, size); - self.map[reg as usize] = Some(SlotInfo { kind, offset, size }); - } -} - -// Invalidates in `avail_env`, any binding associated with a regunit that is written by `inst`. -fn invalidate_regs_written_by_inst( - locations: &SecondaryMap, - diversions: &RegDiversions, - dfg: &DataFlowGraph, - avail_env: &mut AvailEnv, - inst: Inst, -) { - for v in dfg.inst_results(inst).iter() { - if let ValueLoc::Reg(ru) = locations[*v] { - // This must be true. It would be meaningless for an SSA value to be diverted before - // the point where it is defined. - debug_assert!(diversions.reg(*v, locations) == ru); - avail_env.invalidate_by_reg(ru); - } - } -} - -// ============================================================================================= -// Processing of individual instructions - -impl RedundantReloadRemover { - // Process `inst`, possibly changing it into a different instruction, and possibly changing - // `self.avail_env` and `func.dfg`. - fn visit_inst( - &mut self, - func: &mut Function, - reginfo: &RegInfo, - isa: &dyn TargetIsa, - inst: Inst, - ) { - // Get hold of the top-of-stack work item. This is the state that we will mutate during - // processing of this instruction. - debug_assert!(!self.processing_stack.is_empty()); - let ProcessingStackElem { - avail_env, - diversions, - .. - } = self.processing_stack.last_mut().unwrap(); - - #[cfg(debug_assertions)] - debug_assert!( - avail_env.check_invariants(), - "visit_inst: env invariants not ok" - ); - - let dfg = &mut func.dfg; - let locations = &func.locations; - let stack_slots = &func.stack_slots; - - // To avoid difficulties with the borrow checker, do this in two stages. First, examine - // the instruction to see if it can be deleted or modified, and park the relevant - // information in `transform`. Update `self.avail_env` too. Later, use `transform` to - // actually do the transformation if necessary. - enum Transform { - NoChange, - ChangeToNopFill(Value), // delete this insn entirely - ChangeToCopyToSSA(Type, RegUnit), // change it into a copy from the specified reg - } - let mut transform = Transform::NoChange; - - // In this match { .. } statement, either we must treat the instruction specially, or we - // must call `invalidate_regs_written_by_inst` on it. - match &dfg[inst] { - InstructionData::Unary { - opcode: Opcode::Spill, - arg: src_value, - } => { - // Extract: (src_reg, kind, offset, size) - // Invalidate: (kind, offset, size) - // Add new binding: {src_reg -> (kind, offset, size)} - // Don't forget that src_value might be diverted, so we have to deref it. - let slot = slot_of_value(locations, stack_slots, dfg.inst_results(inst)[0]); - let src_reg = diversions.reg(*src_value, locations); - let kind = slot.kind; - if is_slot_kind_tracked(kind) { - let offset = slot.offset.expect("visit_inst: spill with no offset"); - let size = slot.size; - avail_env.bind(src_reg, kind, offset, size); - } else { - // We don't expect this insn to write any regs. But to be consistent with the - // rule above, do this anyway. - invalidate_regs_written_by_inst(locations, diversions, dfg, avail_env, inst); - } - } - InstructionData::Unary { - opcode: Opcode::Fill, - arg: src_value, - } => { - // Extract: (dst_reg, kind, offset, size) - // Invalidate: (kind, offset, size) - // Add new: {dst_reg -> (dst_value, kind, offset, size)} - let slot = slot_of_value(locations, stack_slots, *src_value); - let dst_value = dfg.inst_results(inst)[0]; - let dst_reg = reg_of_value(locations, dst_value); - // This must be true. It would be meaningless for an SSA value to be diverted - // before it was defined. - debug_assert!(dst_reg == diversions.reg(dst_value, locations)); - let kind = slot.kind; - if is_slot_kind_tracked(kind) { - let offset = slot.offset.expect("visit_inst: fill with no offset"); - let size = slot.size; - if avail_env.has_exact_binding(dst_reg, kind, offset, size) { - // This instruction is an exact copy of a fill we saw earlier, and the - // loaded value is still valid. So we'll schedule this instruction for - // deletion (below). No need to make any changes to `avail_env`. - transform = Transform::ChangeToNopFill(*src_value); - } else if let Some(other_reg) = - avail_env.has_inexact_binding(reginfo, dst_reg, kind, offset, size) - { - // This fill is from the required slot, but into a different register - // `other_reg`. So replace it with a copy from `other_reg` to `dst_reg` - // and update `dst_reg`s binding to make it the same as `other_reg`'s, so - // as to maximise the chances of future matches after this instruction. - debug_assert!(other_reg != dst_reg); - transform = - Transform::ChangeToCopyToSSA(dfg.value_type(dst_value), other_reg); - avail_env.copy_reg(other_reg, dst_reg); - } else { - // This fill creates some new binding we don't know about. Update - // `avail_env` to track it. - avail_env.bind(dst_reg, kind, offset, size); - } - } else { - // Else it's "just another instruction that writes a reg", so we'd better - // treat it as such, just as we do below for instructions that we don't handle - // specially. - invalidate_regs_written_by_inst(locations, diversions, dfg, avail_env, inst); - } - } - InstructionData::RegMove { src, dst, .. } => { - // These happen relatively rarely, but just frequently enough that it's worth - // tracking the copy (at the machine level, it's really a copy) in `avail_env`. - avail_env.copy_reg(*src, *dst); - } - InstructionData::RegSpill { .. } - | InstructionData::RegFill { .. } - | InstructionData::Call { .. } - | InstructionData::CallIndirect { .. } - | InstructionData::StackLoad { .. } - | InstructionData::StackStore { .. } - | InstructionData::Unary { - opcode: Opcode::AdjustSpDown, - .. - } - | InstructionData::UnaryImm { - opcode: Opcode::AdjustSpUpImm, - .. - } - | InstructionData::UnaryImm { - opcode: Opcode::AdjustSpDownImm, - .. - } => { - // All of these change, or might change, the memory-register bindings tracked in - // `avail_env` in some way we don't know about, or at least, we might be able to - // track, but for which the effort-to-benefit ratio seems too low to bother. So - // play safe: forget everything we know. - // - // For Call/CallIndirect, we could do better when compiling for calling - // conventions that have callee-saved registers, since bindings for them would - // remain valid across the call. - avail_env.invalidate_all(); - } - _ => { - // Invalidate: any `avail_env` entry associated with a reg written by `inst`. - invalidate_regs_written_by_inst(locations, diversions, dfg, avail_env, inst); - } - } - - // Actually do the transformation. - match transform { - Transform::NoChange => {} - Transform::ChangeToNopFill(arg) => { - // Load is completely redundant. Convert it to a no-op. - dfg.replace(inst).fill_nop(arg); - let ok = func.update_encoding(inst, isa).is_ok(); - debug_assert!( - ok, - "fill_nop encoding missing for this type: `{}`", - func.dfg.display_inst(inst, isa) - ); - } - Transform::ChangeToCopyToSSA(ty, reg) => { - // We already have the relevant value in some other register. Convert the - // load into a reg-reg copy. - dfg.replace(inst).copy_to_ssa(ty, reg); - let ok = func.update_encoding(inst, isa).is_ok(); - debug_assert!(ok, "copy_to_ssa encoding missing for type {}", ty); - } - } - } -} - -// ============================================================================================= -// Top level: processing of tree shaped regions - -impl RedundantReloadRemover { - // Push a clone of the top-of-stack ProcessingStackElem. This will be used to process exactly - // one Block. The diversions are created new, rather than cloned, to reflect the fact - // that diversions are local to each Block. - fn processing_stack_push(&mut self, cursor: CursorPosition) { - let avail_env = if let Some(stack_top) = self.processing_stack.last() { - stack_top.avail_env.clone() - } else { - AvailEnv::new( - self.num_regunits - .expect("processing_stack_push: num_regunits unknown!") - as usize, - ) - }; - self.processing_stack.push(ProcessingStackElem { - avail_env, - cursor, - diversions: RegDiversions::new(), - }); - } - - // This pushes the node `dst` onto the processing stack, and sets up the new - // ProcessingStackElem accordingly. But it does all that only if `dst` is part of the current - // tree *and* we haven't yet visited it. - fn processing_stack_maybe_push(&mut self, dst: Block) { - if self.nodes_in_tree.contains(dst) && !self.nodes_already_visited.contains(dst) { - if !self.processing_stack.is_empty() { - // If this isn't the outermost node in the tree (that is, the root), then it must - // have exactly one predecessor. Nodes with no predecessors are dead and not - // incorporated in any tree. Nodes with two or more predecessors are the root of - // some other tree, and visiting them as if they were part of the current tree - // would be a serious error. - debug_assert!(self.num_preds_per_block[dst] == ZeroOneOrMany::One); - } - self.processing_stack_push(CursorPosition::Before(dst)); - self.nodes_already_visited.insert(dst); - } - } - - // Perform redundant-reload removal on the tree shaped region of graph defined by `root` and - // `self.nodes_in_tree`. The following state is modified: `self.processing_stack`, - // `self.nodes_already_visited`, and `func.dfg`. - fn process_tree( - &mut self, - func: &mut Function, - reginfo: &RegInfo, - isa: &dyn TargetIsa, - root: Block, - ) { - debug_assert!(self.nodes_in_tree.contains(root)); - debug_assert!(self.processing_stack.is_empty()); - debug_assert!(self.nodes_already_visited.is_empty()); - - // Create the initial work item - self.processing_stack_maybe_push(root); - - while !self.processing_stack.is_empty() { - // It seems somewhat ridiculous to construct a whole new FuncCursor just so we can do - // next_inst() on it once, and then copy the resulting position back out. But use of - // a function-global FuncCursor, or of the EncCursor in struct Context, leads to - // borrow checker problems, as does including FuncCursor directly in - // ProcessingStackElem. In any case this is not as bad as it looks, since profiling - // shows that the build-insert-step-extract work is reduced to just 8 machine - // instructions in an optimised x86_64 build, presumably because rustc can inline and - // then optimise out almost all the work. - let tos = self.processing_stack.len() - 1; - let mut pos = FuncCursor::new(func).at_position(self.processing_stack[tos].cursor); - let maybe_inst = pos.next_inst(); - self.processing_stack[tos].cursor = pos.position(); - - if let Some(inst) = maybe_inst { - // Deal with this insn, possibly changing it, possibly updating the top item of - // `self.processing_stack`. - self.visit_inst(func, reginfo, isa, inst); - - // Update diversions after the insn. - self.processing_stack[tos].diversions.apply(&func.dfg[inst]); - - // If the insn can branch outside this Block, push work items on the stack for all - // target Blocks that are part of the same tree and that we haven't yet visited. - // The next iteration of this instruction-processing loop will immediately start - // work on the most recently pushed Block, and will eventually continue in this Block - // when those new items have been removed from the stack. - match func.dfg.analyze_branch(inst) { - BranchInfo::NotABranch => (), - BranchInfo::SingleDest(dst, _) => { - self.processing_stack_maybe_push(dst); - } - BranchInfo::Table(jt, default) => { - func.jump_tables[jt] - .iter() - .for_each(|dst| self.processing_stack_maybe_push(*dst)); - if let Some(dst) = default { - self.processing_stack_maybe_push(dst); - } - } - } - } else { - // We've come to the end of the current work-item (Block). We'll already have - // processed the fallthrough/continuation/whatever for it using the logic above. - // Pop it off the stack and resume work on its parent. - self.processing_stack.pop(); - } - } - } -} - -// ============================================================================================= -// Top level: perform redundant fill removal for a complete function - -impl RedundantReloadRemover { - /// Create a new remover state. - pub fn new() -> Self { - Self { - num_regunits: None, - num_preds_per_block: PrimaryMap::::with_capacity(8), - discovery_stack: Vec::::with_capacity(16), - nodes_in_tree: EntitySet::::new(), - processing_stack: Vec::::with_capacity(8), - nodes_already_visited: EntitySet::::new(), - } - } - - /// Clear the state of the remover. - pub fn clear(&mut self) { - self.clear_for_new_function(); - } - - fn clear_for_new_function(&mut self) { - self.num_preds_per_block.clear(); - self.clear_for_new_tree(); - } - - fn clear_for_new_tree(&mut self) { - self.discovery_stack.clear(); - self.nodes_in_tree.clear(); - self.processing_stack.clear(); - self.nodes_already_visited.clear(); - } - - #[inline(never)] - fn do_redundant_fill_removal_on_function( - &mut self, - func: &mut Function, - reginfo: &RegInfo, - isa: &dyn TargetIsa, - cfg: &ControlFlowGraph, - ) { - // Fail in an obvious way if there are more than (2^32)-1 Blocks in this function. - let num_blocks: u32 = func.dfg.num_blocks().try_into().unwrap(); - - // Clear out per-tree state. - self.clear_for_new_function(); - - // Create a PrimaryMap that summarises the number of predecessors for each block, as 0, 1 - // or "many", and that also claims the entry block as having "many" predecessors. - self.num_preds_per_block.clear(); - self.num_preds_per_block.reserve(num_blocks as usize); - - for i in 0..num_blocks { - let mut pi = cfg.pred_iter(Block::from_u32(i)); - let mut n_pi = ZeroOneOrMany::Zero; - if pi.next().is_some() { - n_pi = ZeroOneOrMany::One; - if pi.next().is_some() { - n_pi = ZeroOneOrMany::Many; - // We don't care if there are more than two preds, so stop counting now. - } - } - self.num_preds_per_block.push(n_pi); - } - debug_assert!(self.num_preds_per_block.len() == num_blocks as usize); - - // The entry block must be the root of some tree, so set up the state to reflect that. - let entry_block = func - .layout - .entry_block() - .expect("do_redundant_fill_removal_on_function: entry block unknown"); - debug_assert!(self.num_preds_per_block[entry_block] == ZeroOneOrMany::Zero); - self.num_preds_per_block[entry_block] = ZeroOneOrMany::Many; - - // Now build and process trees. - for root_ix in 0..self.num_preds_per_block.len() { - let root = Block::from_u32(root_ix as u32); - - // Build a tree for each node that has two or more preds, and ignore all other nodes. - if self.num_preds_per_block[root] != ZeroOneOrMany::Many { - continue; - } - - // Clear out per-tree state. - self.clear_for_new_tree(); - - // Discovery phase: build the tree, as `root` and `self.nodes_in_tree`. - self.add_nodes_to_tree(cfg, root); - debug_assert!(self.nodes_in_tree.cardinality() > 0); - debug_assert!(self.num_preds_per_block[root] == ZeroOneOrMany::Many); - - // Processing phase: do redundant-reload-removal. - self.process_tree(func, reginfo, isa, root); - debug_assert!( - self.nodes_in_tree.cardinality() == self.nodes_already_visited.cardinality() - ); - } - } -} - -// ============================================================================================= -// Top level: the external interface - -struct Context<'a> { - // Current instruction as well as reference to function and ISA. - cur: EncCursor<'a>, - - // Cached ISA information. We save it here to avoid frequent virtual function calls on the - // `TargetIsa` trait object. - reginfo: RegInfo, - - // References to contextual data structures we need. - cfg: &'a ControlFlowGraph, - - // The running state. - state: &'a mut RedundantReloadRemover, -} - -impl RedundantReloadRemover { - /// Run the remover. - pub fn run(&mut self, isa: &dyn TargetIsa, func: &mut Function, cfg: &ControlFlowGraph) { - let ctx = Context { - cur: EncCursor::new(func, isa), - reginfo: isa.register_info(), - cfg, - state: self, - }; - let mut total_regunits = 0; - for rb in isa.register_info().banks { - total_regunits += rb.units; - } - ctx.state.num_regunits = Some(total_regunits); - ctx.state.do_redundant_fill_removal_on_function( - ctx.cur.func, - &ctx.reginfo, - ctx.cur.isa, - &ctx.cfg, - ); - } -} diff --git a/cranelift/codegen/src/regalloc/affinity.rs b/cranelift/codegen/src/regalloc/affinity.rs deleted file mode 100644 index efcc4dabfa..0000000000 --- a/cranelift/codegen/src/regalloc/affinity.rs +++ /dev/null @@ -1,126 +0,0 @@ -//! Value affinity for register allocation. -//! -//! An SSA value's affinity is a hint used to guide the register allocator. It specifies the class -//! of allocation that is likely to cause the least amount of fixup moves in order to satisfy -//! instruction operand constraints. -//! -//! For values that want to be in registers, the affinity hint includes a register class or -//! subclass. This is just a hint, and the register allocator is allowed to pick a register from a -//! larger register class instead. - -use crate::ir::{AbiParam, ArgumentLoc}; -use crate::isa::{ConstraintKind, OperandConstraint, RegClassIndex, RegInfo, TargetIsa}; -use core::fmt; - -/// Preferred register allocation for an SSA value. -#[derive(Clone, Copy, Debug)] -pub enum Affinity { - /// No affinity. - /// - /// This indicates a value that is not defined or used by any real instructions. It is a ghost - /// value that won't appear in the final program. - Unassigned, - - /// This value should be placed in a spill slot on the stack. - Stack, - - /// This value prefers a register from the given register class. - Reg(RegClassIndex), -} - -impl Default for Affinity { - fn default() -> Self { - Self::Unassigned - } -} - -impl Affinity { - /// Create an affinity that satisfies a single constraint. - /// - /// This will never create an `Affinity::Unassigned`. - /// Use the `Default` implementation for that. - pub fn new(constraint: &OperandConstraint) -> Self { - if constraint.kind == ConstraintKind::Stack { - Self::Stack - } else { - Self::Reg(constraint.regclass.into()) - } - } - - /// Create an affinity that matches an ABI argument for `isa`. - pub fn abi(arg: &AbiParam, isa: &dyn TargetIsa) -> Self { - match arg.location { - ArgumentLoc::Unassigned => Self::Unassigned, - ArgumentLoc::Reg(_) => Self::Reg(isa.regclass_for_abi_type(arg.value_type).into()), - ArgumentLoc::Stack(_) => Self::Stack, - } - } - - /// Is this the `Unassigned` affinity? - pub fn is_unassigned(self) -> bool { - match self { - Self::Unassigned => true, - _ => false, - } - } - - /// Is this the `Reg` affinity? - pub fn is_reg(self) -> bool { - match self { - Self::Reg(_) => true, - _ => false, - } - } - - /// Is this the `Stack` affinity? - pub fn is_stack(self) -> bool { - match self { - Self::Stack => true, - _ => false, - } - } - - /// Merge an operand constraint into this affinity. - /// - /// Note that this does not guarantee that the register allocator will pick a register that - /// satisfies the constraint. - pub fn merge(&mut self, constraint: &OperandConstraint, reginfo: &RegInfo) { - match *self { - Self::Unassigned => *self = Self::new(constraint), - Self::Reg(rc) => { - // If the preferred register class is a subclass of the constraint, there's no need - // to change anything. - if constraint.kind != ConstraintKind::Stack && !constraint.regclass.has_subclass(rc) - { - // If the register classes overlap, try to shrink our preferred register class. - if let Some(subclass) = constraint.regclass.intersect_index(reginfo.rc(rc)) { - *self = Self::Reg(subclass); - } - } - } - Self::Stack => {} - } - } - - /// Return an object that can display this value affinity, using the register info from the - /// target ISA. - pub fn display<'a, R: Into>>(self, regs: R) -> DisplayAffinity<'a> { - DisplayAffinity(self, regs.into()) - } -} - -/// Displaying an `Affinity` correctly requires the associated `RegInfo` from the target ISA. -pub struct DisplayAffinity<'a>(Affinity, Option<&'a RegInfo>); - -impl<'a> fmt::Display for DisplayAffinity<'a> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self.0 { - Affinity::Unassigned => write!(f, "unassigned"), - Affinity::Stack => write!(f, "stack"), - Affinity::Reg(rci) => match self.1 { - Some(regs) => write!(f, "{}", regs.rc(rci)), - None => write!(f, "{}", rci), - }, - } - } -} diff --git a/cranelift/codegen/src/regalloc/branch_splitting.rs b/cranelift/codegen/src/regalloc/branch_splitting.rs deleted file mode 100644 index 4e9a159f3e..0000000000 --- a/cranelift/codegen/src/regalloc/branch_splitting.rs +++ /dev/null @@ -1,169 +0,0 @@ -//! Split the outgoing edges of conditional branches that pass parameters. -//! -//! One of the reason for splitting edges is to be able to insert `copy` and `regmove` instructions -//! between a conditional branch and the following terminator. -use alloc::vec::Vec; - -use crate::cursor::{Cursor, EncCursor}; -use crate::dominator_tree::DominatorTree; -use crate::flowgraph::ControlFlowGraph; -use crate::ir::{Block, Function, Inst, InstBuilder, InstructionData, Opcode, ValueList}; -use crate::isa::TargetIsa; -use crate::topo_order::TopoOrder; - -pub fn run( - isa: &dyn TargetIsa, - func: &mut Function, - cfg: &mut ControlFlowGraph, - domtree: &mut DominatorTree, - topo: &mut TopoOrder, -) { - let mut ctx = Context { - has_new_blocks: false, - cur: EncCursor::new(func, isa), - domtree, - topo, - cfg, - }; - ctx.run() -} - -struct Context<'a> { - /// True if new blocks were inserted. - has_new_blocks: bool, - - /// Current instruction as well as reference to function and ISA. - cur: EncCursor<'a>, - - /// References to contextual data structures we need. - domtree: &'a mut DominatorTree, - topo: &'a mut TopoOrder, - cfg: &'a mut ControlFlowGraph, -} - -impl<'a> Context<'a> { - fn run(&mut self) { - // Any block order will do. - self.topo.reset(self.cur.func.layout.blocks()); - while let Some(block) = self.topo.next(&self.cur.func.layout, self.domtree) { - // Branches can only be at the last or second to last position in an extended basic - // block. - self.cur.goto_last_inst(block); - let terminator_inst = self.cur.current_inst().expect("terminator"); - if let Some(inst) = self.cur.prev_inst() { - let opcode = self.cur.func.dfg[inst].opcode(); - if opcode.is_branch() { - self.visit_conditional_branch(inst, opcode); - self.cur.goto_inst(terminator_inst); - self.visit_terminator_branch(terminator_inst); - } - } - } - - // If blocks were added the cfg and domtree are inconsistent and must be recomputed. - if self.has_new_blocks { - self.cfg.compute(&self.cur.func); - self.domtree.compute(&self.cur.func, self.cfg); - } - } - - fn visit_conditional_branch(&mut self, branch: Inst, opcode: Opcode) { - // TODO: target = dfg[branch].branch_destination().expect("conditional branch"); - let target = match self.cur.func.dfg[branch] { - InstructionData::Branch { destination, .. } - | InstructionData::BranchIcmp { destination, .. } - | InstructionData::BranchInt { destination, .. } - | InstructionData::BranchFloat { destination, .. } => destination, - _ => panic!("Unexpected instruction in visit_conditional_branch"), - }; - - // If there are any parameters, split the edge. - if self.should_split_edge(target) { - // Create the block the branch will jump to. - let new_block = self.cur.func.dfg.make_block(); - - // Insert the new block before the destination, such that it can fallthrough in the - // target block. - assert_ne!(Some(target), self.cur.layout().entry_block()); - self.cur.layout_mut().insert_block(new_block, target); - self.has_new_blocks = true; - - // Extract the arguments of the branch instruction, split the Block parameters and the - // branch arguments - let num_fixed = opcode.constraints().num_fixed_value_arguments(); - let dfg = &mut self.cur.func.dfg; - let old_args: Vec<_> = { - let args = dfg[branch].take_value_list().expect("block parameters"); - args.as_slice(&dfg.value_lists).iter().copied().collect() - }; - let (branch_args, block_params) = old_args.split_at(num_fixed); - - // Replace the branch destination by the new Block created with no parameters, and restore - // the branch arguments, without the original Block parameters. - { - let branch_args = ValueList::from_slice(branch_args, &mut dfg.value_lists); - let data = &mut dfg[branch]; - *data.branch_destination_mut().expect("branch") = new_block; - data.put_value_list(branch_args); - } - let ok = self.cur.func.update_encoding(branch, self.cur.isa).is_ok(); - debug_assert!(ok); - - // Insert a jump to the original target with its arguments into the new block. - self.cur.goto_first_insertion_point(new_block); - self.cur.ins().jump(target, block_params); - - // Reset the cursor to point to the branch. - self.cur.goto_inst(branch); - } - } - - fn visit_terminator_branch(&mut self, inst: Inst) { - let inst_data = &self.cur.func.dfg[inst]; - let opcode = inst_data.opcode(); - if opcode != Opcode::Jump && opcode != Opcode::Fallthrough { - // This opcode is ignored as it does not have any block parameters. - if opcode != Opcode::IndirectJumpTableBr { - debug_assert!(!opcode.is_branch()) - } - return; - } - - let target = match inst_data { - InstructionData::Jump { destination, .. } => destination, - _ => panic!( - "Unexpected instruction {} in visit_terminator_branch", - self.cur.display_inst(inst) - ), - }; - debug_assert!(self.cur.func.dfg[inst].opcode().is_terminator()); - - // If there are any parameters, split the edge. - if self.should_split_edge(*target) { - // Create the block the branch will jump to. - let new_block = self.cur.func.dfg.make_block(); - self.has_new_blocks = true; - - // Split the current block before its terminator, and insert a new jump instruction to - // jump to it. - let jump = self.cur.ins().jump(new_block, &[]); - self.cur.insert_block(new_block); - - // Reset the cursor to point to new terminator of the old block. - self.cur.goto_inst(jump); - } - } - - /// Returns whether we should introduce a new branch. - fn should_split_edge(&self, target: Block) -> bool { - // We should split the edge if the target has any parameters. - if !self.cur.func.dfg.block_params(target).is_empty() { - return true; - }; - - // Or, if the target has more than one block reaching it. - debug_assert!(self.cfg.pred_iter(target).next() != None); - - self.cfg.pred_iter(target).nth(1).is_some() - } -} diff --git a/cranelift/codegen/src/regalloc/coalescing.rs b/cranelift/codegen/src/regalloc/coalescing.rs deleted file mode 100644 index 512d77da0b..0000000000 --- a/cranelift/codegen/src/regalloc/coalescing.rs +++ /dev/null @@ -1,1106 +0,0 @@ -//! Constructing Conventional SSA form. -//! -//! Conventional SSA (CSSA) form is a subset of SSA form where any (transitively) phi-related -//! values do not interfere. We construct CSSA by building virtual registers that are as large as -//! possible and inserting copies where necessary such that all argument values passed to a block -//! parameter will belong to the same virtual register as the block parameter value itself. - -use crate::cursor::{Cursor, EncCursor}; -use crate::dbg::DisplayList; -use crate::dominator_tree::{DominatorTree, DominatorTreePreorder}; -use crate::flowgraph::{BlockPredecessor, ControlFlowGraph}; -use crate::fx::FxHashMap; -use crate::ir::{self, InstBuilder, ProgramOrder}; -use crate::ir::{Block, ExpandedProgramPoint, Function, Inst, Value}; -use crate::isa::{EncInfo, TargetIsa}; -use crate::regalloc::affinity::Affinity; -use crate::regalloc::liveness::Liveness; -use crate::regalloc::virtregs::{VirtReg, VirtRegs}; -use crate::timing; -use alloc::vec::Vec; -use core::cmp; -use core::fmt; -use core::iter; -use core::slice; - -// # Implementation -// -// The coalescing algorithm implemented follows this paper fairly closely: -// -// Budimlic, Z., Cooper, K. D., Harvey, T. J., et al. (2002). Fast copy coalescing and -// live-range identification (Vol. 37, pp. 25–32). ACM. https://doi.org/10.1145/543552.512534 -// -// We use a more efficient dominator forest representation (a linear stack) described here: -// -// Boissinot, B., Darte, A., & Rastello, F. (2009). Revisiting out-of-SSA translation for -// correctness, code quality and efficiency. -// -// The algorithm has two main phases: -// -// Phase 1: Union-find. -// -// We use the union-find support in `VirtRegs` to build virtual registers such that block parameter -// values always belong to the same virtual register as their corresponding block arguments at the -// predecessor branches. Trivial interferences between parameter and argument value live ranges are -// detected and resolved before unioning congruence classes, but non-trivial interferences between -// values that end up in the same congruence class are possible. -// -// Phase 2: Dominator forests. -// -// The virtual registers formed in phase 1 can contain interferences that we need to detect and -// eliminate. By ordering the values in a virtual register according to a dominator tree pre-order, -// we can identify all interferences in the virtual register in linear time. -// -// Interfering values are isolated and virtual registers rebuilt. - -/// Data structures to be used by the coalescing pass. -pub struct Coalescing { - preorder: DominatorTreePreorder, - forest: DomForest, - vcopies: VirtualCopies, - values: Vec, - predecessors: Vec, - backedges: Vec, -} - -/// One-shot context created once per invocation. -struct Context<'a> { - isa: &'a dyn TargetIsa, - encinfo: EncInfo, - - func: &'a mut Function, - cfg: &'a ControlFlowGraph, - domtree: &'a DominatorTree, - preorder: &'a DominatorTreePreorder, - liveness: &'a mut Liveness, - virtregs: &'a mut VirtRegs, - - forest: &'a mut DomForest, - vcopies: &'a mut VirtualCopies, - values: &'a mut Vec, - predecessors: &'a mut Vec, - backedges: &'a mut Vec, -} - -impl Coalescing { - /// Create a new coalescing pass. - pub fn new() -> Self { - Self { - forest: DomForest::new(), - preorder: DominatorTreePreorder::new(), - vcopies: VirtualCopies::new(), - values: Vec::new(), - predecessors: Vec::new(), - backedges: Vec::new(), - } - } - - /// Clear all data structures in this coalescing pass. - pub fn clear(&mut self) { - self.forest.clear(); - self.vcopies.clear(); - self.values.clear(); - self.predecessors.clear(); - self.backedges.clear(); - } - - /// Convert `func` to Conventional SSA form and build virtual registers in the process. - pub fn conventional_ssa( - &mut self, - isa: &dyn TargetIsa, - func: &mut Function, - cfg: &ControlFlowGraph, - domtree: &DominatorTree, - liveness: &mut Liveness, - virtregs: &mut VirtRegs, - ) { - let _tt = timing::ra_cssa(); - log::trace!("Coalescing for:\n{}", func.display(isa)); - self.preorder.compute(domtree, &func.layout); - let mut context = Context { - isa, - encinfo: isa.encoding_info(), - func, - cfg, - domtree, - preorder: &self.preorder, - liveness, - virtregs, - forest: &mut self.forest, - vcopies: &mut self.vcopies, - values: &mut self.values, - predecessors: &mut self.predecessors, - backedges: &mut self.backedges, - }; - - // Run phase 1 (union-find) of the coalescing algorithm on the current function. - for &block in domtree.cfg_postorder() { - context.union_find_block(block); - } - context.finish_union_find(); - - // Run phase 2 (dominator forests) on the current function. - context.process_vregs(); - } -} - -/// Phase 1: Union-find. -/// -/// The two entry points for phase 1 are `union_find_block()` and `finish_union_find`. -impl<'a> Context<'a> { - /// Run the union-find algorithm on the parameter values on `block`. - /// - /// This ensure that all block parameters will belong to the same virtual register as their - /// corresponding arguments at all predecessor branches. - pub fn union_find_block(&mut self, block: Block) { - let num_params = self.func.dfg.num_block_params(block); - if num_params == 0 { - return; - } - - self.isolate_conflicting_params(block, num_params); - - for i in 0..num_params { - self.union_pred_args(block, i); - } - } - - // Identify block parameter values that are live at one of the predecessor branches. - // - // Such a parameter value will conflict with any argument value at the predecessor branch, so - // it must be isolated by inserting a copy. - fn isolate_conflicting_params(&mut self, block: Block, num_params: usize) { - debug_assert_eq!(num_params, self.func.dfg.num_block_params(block)); - // The only way a parameter value can interfere with a predecessor branch is if the block is - // dominating the predecessor branch. That is, we are looking for loop back-edges. - for BlockPredecessor { - block: pred_block, - inst: pred_inst, - } in self.cfg.pred_iter(block) - { - // The quick pre-order dominance check is accurate because the block parameter is defined - // at the top of the block before any branches. - if !self.preorder.dominates(block, pred_block) { - continue; - } - - log::trace!( - " - checking {} params at back-edge {}: {}", - num_params, - pred_block, - self.func.dfg.display_inst(pred_inst, self.isa) - ); - - // Now `pred_inst` is known to be a back-edge, so it is possible for parameter values - // to be live at the use. - for i in 0..num_params { - let param = self.func.dfg.block_params(block)[i]; - if self.liveness[param].reaches_use(pred_inst, pred_block, &self.func.layout) { - self.isolate_param(block, param); - } - } - } - } - - // Union block parameter value `num` with the corresponding block arguments on the predecessor - // branches. - // - // Detect cases where the argument value is live-in to `block` so it conflicts with any block - // parameter. Isolate the argument in those cases before unioning it with the parameter value. - fn union_pred_args(&mut self, block: Block, argnum: usize) { - let param = self.func.dfg.block_params(block)[argnum]; - - for BlockPredecessor { - block: pred_block, - inst: pred_inst, - } in self.cfg.pred_iter(block) - { - let arg = self.func.dfg.inst_variable_args(pred_inst)[argnum]; - - // Never coalesce incoming function parameters on the stack. These parameters are - // pre-spilled, and the rest of the virtual register would be forced to spill to the - // `incoming_arg` stack slot too. - if let ir::ValueDef::Param(def_block, def_num) = self.func.dfg.value_def(arg) { - if Some(def_block) == self.func.layout.entry_block() - && self.func.signature.params[def_num].location.is_stack() - { - log::trace!("-> isolating function stack parameter {}", arg); - let new_arg = self.isolate_arg(pred_block, pred_inst, argnum, arg); - self.virtregs.union(param, new_arg); - continue; - } - } - - // Check for basic interference: If `arg` overlaps a value defined at the entry to - // `block`, it can never be used as a block argument. - let interference = { - let lr = &self.liveness[arg]; - - // There are two ways the argument value can interfere with `block`: - // - // 1. It is defined in a dominating block and live-in to `block`. - // 2. If is itself a parameter value for `block`. This case should already have been - // eliminated by `isolate_conflicting_params()`. - debug_assert!( - lr.def() != block.into(), - "{} parameter {} was missed by isolate_conflicting_params()", - block, - arg - ); - - // The only other possibility is that `arg` is live-in to `block`. - lr.is_livein(block, &self.func.layout) - }; - - if interference { - let new_arg = self.isolate_arg(pred_block, pred_inst, argnum, arg); - self.virtregs.union(param, new_arg); - } else { - self.virtregs.union(param, arg); - } - } - } - - // Isolate block parameter value `param` on `block`. - // - // When `param=v10`: - // - // block1(v10: i32): - // foo - // - // becomes: - // - // block1(v11: i32): - // v10 = copy v11 - // foo - // - // This function inserts the copy and updates the live ranges of the old and new parameter - // values. Returns the new parameter value. - fn isolate_param(&mut self, block: Block, param: Value) -> Value { - debug_assert_eq!( - self.func.dfg.value_def(param).pp(), - ExpandedProgramPoint::Block(block) - ); - let ty = self.func.dfg.value_type(param); - let new_val = self.func.dfg.replace_block_param(param, ty); - - // Insert a copy instruction at the top of `block`. - let mut pos = EncCursor::new(self.func, self.isa).at_first_inst(block); - if let Some(inst) = pos.current_inst() { - pos.use_srcloc(inst); - } - pos.ins().with_result(param).copy(new_val); - let inst = pos.built_inst(); - self.liveness.move_def_locally(param, inst); - - log::trace!( - "-> inserted {}, following {}({}: {})", - pos.display_inst(inst), - block, - new_val, - ty - ); - - // Create a live range for the new value. - // TODO: Should we handle ghost values? - let affinity = Affinity::new( - &self - .encinfo - .operand_constraints(pos.func.encodings[inst]) - .expect("Bad copy encoding") - .outs[0], - ); - self.liveness.create_dead(new_val, block, affinity); - self.liveness - .extend_locally(new_val, block, inst, &pos.func.layout); - - new_val - } - - // Isolate the block argument `pred_val` from the predecessor `(pred_block, pred_inst)`. - // - // It is assumed that `pred_inst` is a branch instruction in `pred_block` whose `argnum`'th block - // argument is `pred_val`. Since the argument value interferes with the corresponding block - // parameter at the destination, a copy is used instead: - // - // brnz v1, block2(v10) - // - // Becomes: - // - // v11 = copy v10 - // brnz v1, block2(v11) - // - // This way the interference with the block parameter is avoided. - // - // A live range for the new value is created while the live range for `pred_val` is left - // unaltered. - // - // The new argument value is returned. - fn isolate_arg( - &mut self, - pred_block: Block, - pred_inst: Inst, - argnum: usize, - pred_val: Value, - ) -> Value { - let mut pos = EncCursor::new(self.func, self.isa).at_inst(pred_inst); - pos.use_srcloc(pred_inst); - let copy = pos.ins().copy(pred_val); - let inst = pos.built_inst(); - - // Create a live range for the new value. - // TODO: Handle affinity for ghost values. - let affinity = Affinity::new( - &self - .encinfo - .operand_constraints(pos.func.encodings[inst]) - .expect("Bad copy encoding") - .outs[0], - ); - self.liveness.create_dead(copy, inst, affinity); - self.liveness - .extend_locally(copy, pred_block, pred_inst, &pos.func.layout); - - pos.func.dfg.inst_variable_args_mut(pred_inst)[argnum] = copy; - - log::trace!( - "-> inserted {}, before {}: {}", - pos.display_inst(inst), - pred_block, - pos.display_inst(pred_inst) - ); - - copy - } - - /// Finish the union-find part of the coalescing algorithm. - /// - /// This builds the initial set of virtual registers as the transitive/reflexive/symmetric - /// closure of the relation formed by block parameter-argument pairs found by `union_find_block()`. - fn finish_union_find(&mut self) { - self.virtregs.finish_union_find(None); - log::trace!("After union-find phase:{}", self.virtregs); - } -} - -/// Phase 2: Dominator forests. -/// -/// The main entry point is `process_vregs()`. -impl<'a> Context<'a> { - /// Check al virtual registers for interference and fix conflicts. - pub fn process_vregs(&mut self) { - for vreg in self.virtregs.all_virtregs() { - self.process_vreg(vreg); - } - } - - // Check `vreg` for interferences and fix conflicts. - fn process_vreg(&mut self, vreg: VirtReg) { - if !self.check_vreg(vreg) { - self.synthesize_vreg(vreg); - } - } - - // Check `vreg` for interferences. - // - // We use a Budimlic dominator forest to check for interferences between the values in `vreg` - // and identify values that should be isolated. - // - // Returns true if `vreg` is free of interference. - fn check_vreg(&mut self, vreg: VirtReg) -> bool { - // Order the values according to the dominator pre-order of their definition. - let values = self.virtregs.sort_values(vreg, self.func, self.preorder); - log::trace!("Checking {} = {}", vreg, DisplayList(values)); - - // Now push the values in order to the dominator forest. - // This gives us the closest dominating value def for each of the values. - self.forest.clear(); - for &value in values { - let node = Node::value(value, 0, self.func); - - // Push this value and get the nearest dominating def back. - let parent = match self - .forest - .push_node(node, self.func, self.domtree, self.preorder) - { - None => continue, - Some(n) => n, - }; - - // Check for interference between `parent` and `value`. Since `parent` dominates - // `value`, we only have to check if it overlaps the definition. - if self.liveness[parent.value].overlaps_def(node.def, node.block, &self.func.layout) { - // The two values are interfering, so they can't be in the same virtual register. - log::trace!("-> interference: {} overlaps def of {}", parent, value); - return false; - } - } - - // No interference found. - true - } - - /// Destroy and rebuild `vreg` by iterative coalescing. - /// - /// When detecting that a virtual register formed in phase 1 contains interference, we have to - /// start over in a more careful way. We'll split the vreg into individual values and then - /// reassemble virtual registers using an iterative algorithm of pairwise merging. - /// - /// It is possible to recover multiple large virtual registers this way while still avoiding - /// a lot of copies. - fn synthesize_vreg(&mut self, vreg: VirtReg) { - self.vcopies.initialize( - self.virtregs.values(vreg), - self.func, - self.cfg, - self.preorder, - ); - log::trace!( - "Synthesizing {} from {} branches and params {}", - vreg, - self.vcopies.branches.len(), - DisplayList(&self.vcopies.params) - ); - self.virtregs.remove(vreg); - - while let Some(param) = self.vcopies.next_param() { - self.merge_param(param); - self.vcopies.merged_param(param, self.func); - } - } - - /// Merge block parameter value `param` with virtual registers at its predecessors. - fn merge_param(&mut self, param: Value) { - let (block, argnum) = match self.func.dfg.value_def(param) { - ir::ValueDef::Param(e, n) => (e, n), - ir::ValueDef::Result(_, _) => panic!("Expected parameter"), - }; - - // Collect all the predecessors and rearrange them. - // - // The order we process the predecessors matters because once one predecessor's virtual - // register is merged, it can cause interference with following merges. This means that the - // first predecessors processed are more likely to be copy-free. We want an ordering that - // is a) good for performance and b) as stable as possible. The pred_iter() iterator uses - // instruction numbers which is not great for reproducible test cases. - // - // First merge loop back-edges in layout order, on the theory that shorter back-edges are - // more sensitive to inserted copies. - // - // Second everything else in reverse layout order. Again, short forward branches get merged - // first. There can also be backwards branches mixed in here, though, as long as they are - // not loop backedges. - debug_assert!(self.predecessors.is_empty()); - debug_assert!(self.backedges.is_empty()); - for BlockPredecessor { - block: pred_block, - inst: pred_inst, - } in self.cfg.pred_iter(block) - { - if self.preorder.dominates(block, pred_block) { - self.backedges.push(pred_inst); - } else { - self.predecessors.push(pred_inst); - } - } - // Order instructions in reverse order so we can pop them off the back. - { - let l = &self.func.layout; - self.backedges.sort_unstable_by(|&a, &b| l.cmp(b, a)); - self.predecessors.sort_unstable_by(|&a, &b| l.cmp(a, b)); - self.predecessors.extend_from_slice(&self.backedges); - self.backedges.clear(); - } - - while let Some(pred_inst) = self.predecessors.pop() { - let arg = self.func.dfg.inst_variable_args(pred_inst)[argnum]; - - // We want to merge the vreg containing `param` with the vreg containing `arg`. - if self.try_merge_vregs(param, arg) { - continue; - } - - // Can't merge because of interference. Insert a copy instead. - let pred_block = self.func.layout.pp_block(pred_inst); - let new_arg = self.isolate_arg(pred_block, pred_inst, argnum, arg); - self.virtregs - .insert_single(param, new_arg, self.func, self.preorder); - } - } - - /// Merge the virtual registers containing `param` and `arg` if possible. - /// - /// Use self.vcopies to check for virtual copy interference too. - /// - /// Returns true if the virtual registers are successfully merged. - fn try_merge_vregs(&mut self, param: Value, arg: Value) -> bool { - if self.virtregs.same_class(param, arg) { - return true; - } - - if !self.can_merge_vregs(param, arg) { - return false; - } - - let _vreg = self.virtregs.unify(self.values); - log::trace!("-> merged into {} = {}", _vreg, DisplayList(self.values)); - true - } - - /// Check if it is possible to merge two virtual registers. - /// - /// Also leave `self.values` with the ordered list of values in the merged vreg. - fn can_merge_vregs(&mut self, param: Value, arg: Value) -> bool { - // We only need an immutable function reference. - let func = &*self.func; - let domtree = self.domtree; - let preorder = self.preorder; - - // Restrict the virtual copy nodes we look at and key the `set_id` and `value` properties - // of the nodes. Set_id 0 will be `param` and set_id 1 will be `arg`. - self.vcopies - .set_filter([param, arg], func, self.virtregs, preorder); - - // Now create an ordered sequence of dom-forest nodes from three sources: The two virtual - // registers and the filtered virtual copies. - let v0 = self.virtregs.congruence_class(¶m); - let v1 = self.virtregs.congruence_class(&arg); - log::trace!( - " - set 0: {}\n - set 1: {}", - DisplayList(v0), - DisplayList(v1) - ); - let nodes = MergeNodes::new( - func, - preorder, - MergeNodes::new( - func, - preorder, - v0.iter().map(|&value| Node::value(value, 0, func)), - v1.iter().map(|&value| Node::value(value, 1, func)), - ), - self.vcopies.iter(func), - ); - - // Now push the values in order to the dominator forest. - // This gives us the closest dominating value def for each of the values. - self.forest.clear(); - self.values.clear(); - for node in nodes { - // Accumulate ordered values for the new vreg. - if node.is_value() { - self.values.push(node.value); - } - - // Push this value and get the nearest dominating def back. - let parent = match self.forest.push_node(node, func, domtree, preorder) { - None => { - if node.is_vcopy { - self.forest.pop_last(); - } - continue; - } - Some(n) => n, - }; - - if node.is_vcopy { - // Vcopy nodes don't represent interference if they are copies of the parent value. - // In that case, the node must be removed because the parent value can still be - // live belong the vcopy. - if parent.is_vcopy || node.value == parent.value { - self.forest.pop_last(); - continue; - } - - // Check if the parent value interferes with the virtual copy. - let inst = node.def.unwrap_inst(); - if node.set_id != parent.set_id - && self.liveness[parent.value].reaches_use(inst, node.block, &self.func.layout) - { - log::trace!( - " - interference: {} overlaps vcopy at {}:{}", - parent, - node.block, - self.func.dfg.display_inst(inst, self.isa) - ); - return false; - } - - // Keep this vcopy on the stack. It will save us a few interference checks. - continue; - } - - // Parent vcopies never represent any interference. We only keep them on the stack to - // avoid an interference check against a value higher up. - if parent.is_vcopy { - continue; - } - - // Both node and parent are values, so check for interference. - debug_assert!(node.is_value() && parent.is_value()); - if node.set_id != parent.set_id - && self.liveness[parent.value].overlaps_def(node.def, node.block, &self.func.layout) - { - // The two values are interfering. - log::trace!(" - interference: {} overlaps def of {}", parent, node.value); - return false; - } - } - - // The values vector should receive all values. - debug_assert_eq!(v0.len() + v1.len(), self.values.len()); - - // No interference found. - true - } -} - -/// Dominator forest. -/// -/// This is a utility type used for detecting interference in virtual registers, where each virtual -/// register is a list of values ordered according to the dominator tree pre-order. -/// -/// The idea of a dominator forest was introduced on the Budimlic paper and the linear stack -/// representation in the Boissinot paper. Our version of the linear stack is slightly modified -/// because we have a pre-order of the dominator tree at the block granularity, not basic block -/// granularity. -/// -/// Values are pushed in dominator tree pre-order of their definitions, and for each value pushed, -/// `push_node` will return the nearest previously pushed value that dominates the definition. -#[allow(dead_code)] -struct DomForest { - // Stack representing the rightmost edge of the dominator forest so far, ending in the last - // element of `values`. - // - // At all times, the block of each element in the stack dominates the block of the next one. - stack: Vec, -} - -/// A node in the dominator forest. -#[derive(Clone, Copy, Debug)] -#[allow(dead_code)] -struct Node { - /// The program point where the live range is defined. - def: ExpandedProgramPoint, - /// block containing `def`. - block: Block, - /// Is this a virtual copy or a value? - is_vcopy: bool, - /// Set identifier. - set_id: u8, - /// For a value node: The value defined at `def`. - /// For a vcopy node: The relevant branch argument at `def`. - value: Value, -} - -impl Node { - /// Create a node representing `value`. - pub fn value(value: Value, set_id: u8, func: &Function) -> Self { - let def = func.dfg.value_def(value).pp(); - let block = func.layout.pp_block(def); - Self { - def, - block, - is_vcopy: false, - set_id, - value, - } - } - - /// Create a node representing a virtual copy. - pub fn vcopy(branch: Inst, value: Value, set_id: u8, func: &Function) -> Self { - let def = branch.into(); - let block = func.layout.pp_block(def); - Self { - def, - block, - is_vcopy: true, - set_id, - value, - } - } - - /// IF this a value node? - pub fn is_value(&self) -> bool { - !self.is_vcopy - } -} - -impl fmt::Display for Node { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - if self.is_vcopy { - write!(f, "{}:vcopy({})@{}", self.set_id, self.value, self.block) - } else { - write!(f, "{}:{}@{}", self.set_id, self.value, self.block) - } - } -} - -impl DomForest { - /// Create a new empty dominator forest. - pub fn new() -> Self { - Self { stack: Vec::new() } - } - - /// Clear all data structures in this dominator forest. - pub fn clear(&mut self) { - self.stack.clear(); - } - - /// Add a single node to the forest. - /// - /// Update the stack so its dominance invariants are preserved. Detect a parent node on the - /// stack which is the closest one dominating the new node and return it. - fn push_node( - &mut self, - node: Node, - func: &Function, - domtree: &DominatorTree, - preorder: &DominatorTreePreorder, - ) -> Option { - // The stack contains the current sequence of dominating defs. Pop elements until we - // find one whose block dominates `node.block`. - while let Some(top) = self.stack.pop() { - if preorder.dominates(top.block, node.block) { - // This is the right insertion spot for `node`. - self.stack.push(top); - self.stack.push(node); - - // We know here that `top.block` dominates `node.block`, and thus `node.def`. This does - // not necessarily mean that `top.def` dominates `node.def`, though. The `top.def` - // program point may be below the last branch in `top.block` that dominates - // `node.def`. - // - // We do know, though, that if there is a nearest value dominating `node.def`, it - // will be on the stack. We just need to find the last stack entry that actually - // dominates. - let mut last_dom = node.def; - for &n in self.stack.iter().rev().skip(1) { - // If the node is defined at the block header, it does in fact dominate - // everything else pushed on the stack. - let def_inst = match n.def { - ExpandedProgramPoint::Block(_) => return Some(n), - ExpandedProgramPoint::Inst(i) => i, - }; - - // We need to find the last program point in `n.block` to dominate `node.def`. - last_dom = match domtree.last_dominator(n.block, last_dom, &func.layout) { - None => n.block.into(), - Some(inst) => { - if func.layout.cmp(def_inst, inst) != cmp::Ordering::Greater { - return Some(n); - } - inst.into() - } - }; - } - - // No real dominator found on the stack. - return None; - } - } - - // No dominators, start a new tree in the forest. - self.stack.push(node); - None - } - - pub fn pop_last(&mut self) { - self.stack.pop().expect("Stack is empty"); - } -} - -/// Virtual copies. -/// -/// When building a full virtual register at once, like phase 1 does with union-find, it is good -/// enough to check for interference between the values in the full virtual register like -/// `check_vreg()` does. However, in phase 2 we are doing pairwise merges of partial virtual -/// registers that don't represent the full transitive closure of the block argument-parameter -/// relation. This means that just checking for interference between values is inadequate. -/// -/// Example: -/// -/// v1 = iconst.i32 1 -/// brnz v10, block1(v1) -/// v2 = iconst.i32 2 -/// brnz v11, block1(v2) -/// return v1 -/// -/// block1(v3: i32): -/// v4 = iadd v3, v1 -/// -/// With just value interference checking, we could build the virtual register [v3, v1] since those -/// two values don't interfere. We can't merge v2 into this virtual register because v1 and v2 -/// interfere. However, we can't resolve that interference either by inserting a copy: -/// -/// v1 = iconst.i32 1 -/// brnz v10, block1(v1) -/// v2 = iconst.i32 2 -/// v20 = copy v2 <-- new value -/// brnz v11, block1(v20) -/// return v1 -/// -/// block1(v3: i32): -/// v4 = iadd v3, v1 -/// -/// The new value v20 still interferes with v1 because v1 is live across the "brnz v11" branch. We -/// shouldn't have placed v1 and v3 in the same virtual register to begin with. -/// -/// LLVM detects this form of interference by inserting copies in the predecessors of all phi -/// instructions, then attempting to delete the copies. This is quite expensive because it involves -/// creating a large number of copies and value. -/// -/// We'll detect this form of interference with *virtual copies*: Each block parameter value that -/// hasn't yet been fully merged with its block argument values is given a set of virtual copies at -/// the predecessors. Any candidate value to be merged is checked for interference against both the -/// virtual register and the virtual copies. -/// -/// In the general case, we're checking if two virtual registers can be merged, and both can -/// contain incomplete block parameter values with associated virtual copies. -/// -/// The `VirtualCopies` struct represents a set of incomplete parameters and their associated -/// virtual copies. Given two virtual registers, it can produce an ordered sequence of nodes -/// representing the virtual copies in both vregs. -struct VirtualCopies { - // Incomplete block parameters. These don't need to belong to the same virtual register. - params: Vec, - - // Set of `(branch, destination)` pairs. These are all the predecessor branches for the blocks - // whose parameters can be found in `params`. - // - // Ordered by dominator tree pre-order of the branch instructions. - branches: Vec<(Inst, Block)>, - - // Filter for the currently active node iterator. - // - // A block => (set_id, num) entry means that branches to `block` are active in `set_id` with - // branch argument number `num`. - filter: FxHashMap, -} - -impl VirtualCopies { - /// Create an empty VirtualCopies struct. - pub fn new() -> Self { - Self { - params: Vec::new(), - branches: Vec::new(), - filter: FxHashMap(), - } - } - - /// Clear all state. - pub fn clear(&mut self) { - self.params.clear(); - self.branches.clear(); - self.filter.clear(); - } - - /// Initialize virtual copies from the (interfering) values in a union-find virtual register - /// that is going to be broken up and reassembled iteratively. - /// - /// The values are assumed to be in domtree pre-order. - /// - /// This will extract the block parameter values and associate virtual copies all of them. - pub fn initialize( - &mut self, - values: &[Value], - func: &Function, - cfg: &ControlFlowGraph, - preorder: &DominatorTreePreorder, - ) { - self.clear(); - - let mut last_block = None; - for &val in values { - if let ir::ValueDef::Param(block, _) = func.dfg.value_def(val) { - self.params.push(val); - - // We may have multiple parameters from the same block, but we only need to collect - // predecessors once. Also verify the ordering of values. - if let Some(last) = last_block { - match preorder.pre_cmp_block(last, block) { - cmp::Ordering::Less => {} - cmp::Ordering::Equal => continue, - cmp::Ordering::Greater => panic!("values in wrong order"), - } - } - - // This block hasn't been seen before. - for BlockPredecessor { - inst: pred_inst, .. - } in cfg.pred_iter(block) - { - self.branches.push((pred_inst, block)); - } - last_block = Some(block); - } - } - - // Reorder the predecessor branches as required by the dominator forest. - self.branches - .sort_unstable_by(|&(a, _), &(b, _)| preorder.pre_cmp(a, b, &func.layout)); - } - - /// Get the next unmerged parameter value. - pub fn next_param(&self) -> Option { - self.params.last().cloned() - } - - /// Indicate that `param` is now fully merged. - pub fn merged_param(&mut self, param: Value, func: &Function) { - let popped = self.params.pop(); - debug_assert_eq!(popped, Some(param)); - - // The domtree pre-order in `self.params` guarantees that all parameters defined at the - // same block will be adjacent. This means we can see when all parameters at a block have been - // merged. - // - // We don't care about the last parameter - when that is merged we are done. - let last = match self.params.last() { - None => return, - Some(x) => *x, - }; - let block = func.dfg.value_def(param).unwrap_block(); - if func.dfg.value_def(last).unwrap_block() == block { - // We're not done with `block` parameters yet. - return; - } - - // Alright, we know there are no remaining `block` parameters in `self.params`. This means we - // can get rid of the `block` predecessors in `self.branches`. We don't have to, the - // `VCopyIter` will just skip them, but this reduces its workload. - self.branches.retain(|&(_, dest)| dest != block); - } - - /// Set a filter for the virtual copy nodes we're generating. - /// - /// Only generate nodes for parameter values that are in the same congruence class as `reprs`. - /// Assign a set_id to each node corresponding to the index into `reprs` of the parameter's - /// congruence class. - pub fn set_filter( - &mut self, - reprs: [Value; 2], - func: &Function, - virtregs: &VirtRegs, - preorder: &DominatorTreePreorder, - ) { - self.filter.clear(); - - // Parameters in `self.params` are ordered according to the domtree per-order, and they are - // removed from the back once they are fully merged. This means we can stop looking for - // parameters once we're beyond the last one. - let last_param = *self.params.last().expect("No more parameters"); - let limit = func.dfg.value_def(last_param).unwrap_block(); - - for (set_id, repr) in reprs.iter().enumerate() { - let set_id = set_id as u8; - for &value in virtregs.congruence_class(repr) { - if let ir::ValueDef::Param(block, num) = func.dfg.value_def(value) { - if preorder.pre_cmp_block(block, limit) == cmp::Ordering::Greater { - // Stop once we're outside the bounds of `self.params`. - break; - } - self.filter.insert(block, (set_id, num)); - } - } - } - } - - /// Look up the set_id and argument number for `block` in the current filter. - /// - /// Returns `None` if none of the currently active parameters are defined at `block`. Otherwise - /// returns `(set_id, argnum)` for an active parameter defined at `block`. - fn lookup(&self, block: Block) -> Option<(u8, usize)> { - self.filter.get(&block).cloned() - } - - /// Get an iterator of dom-forest nodes corresponding to the current filter. - pub fn iter<'a>(&'a self, func: &'a Function) -> VCopyIter { - VCopyIter { - func, - vcopies: self, - branches: self.branches.iter(), - } - } -} - -/// Virtual copy iterator. -/// -/// This iterator produces dom-forest nodes corresponding to the current filter in the virtual -/// copies container. -struct VCopyIter<'a> { - func: &'a Function, - vcopies: &'a VirtualCopies, - branches: slice::Iter<'a, (Inst, Block)>, -} - -impl<'a> Iterator for VCopyIter<'a> { - type Item = Node; - - fn next(&mut self) -> Option { - while let Some(&(branch, dest)) = self.branches.next() { - if let Some((set_id, argnum)) = self.vcopies.lookup(dest) { - let arg = self.func.dfg.inst_variable_args(branch)[argnum]; - return Some(Node::vcopy(branch, arg, set_id, self.func)); - } - } - None - } -} - -/// Node-merging iterator. -/// -/// Given two ordered sequences of nodes, yield an ordered sequence containing all of them. -struct MergeNodes<'a, IA, IB> -where - IA: Iterator, - IB: Iterator, -{ - a: iter::Peekable, - b: iter::Peekable, - layout: &'a ir::Layout, - preorder: &'a DominatorTreePreorder, -} - -impl<'a, IA, IB> MergeNodes<'a, IA, IB> -where - IA: Iterator, - IB: Iterator, -{ - pub fn new(func: &'a Function, preorder: &'a DominatorTreePreorder, a: IA, b: IB) -> Self { - MergeNodes { - a: a.peekable(), - b: b.peekable(), - layout: &func.layout, - preorder, - } - } -} - -impl<'a, IA, IB> Iterator for MergeNodes<'a, IA, IB> -where - IA: Iterator, - IB: Iterator, -{ - type Item = Node; - - fn next(&mut self) -> Option { - let ord = match (self.a.peek(), self.b.peek()) { - (Some(a), Some(b)) => { - let layout = self.layout; - self.preorder - .pre_cmp_block(a.block, b.block) - .then_with(|| layout.cmp(a.def, b.def)) - } - (Some(_), None) => cmp::Ordering::Less, - (None, Some(_)) => cmp::Ordering::Greater, - (None, None) => return None, - }; - // When the nodes compare equal, prefer the `a` side. - if ord != cmp::Ordering::Greater { - self.a.next() - } else { - self.b.next() - } - } -} diff --git a/cranelift/codegen/src/regalloc/coloring.rs b/cranelift/codegen/src/regalloc/coloring.rs deleted file mode 100644 index 2226784b25..0000000000 --- a/cranelift/codegen/src/regalloc/coloring.rs +++ /dev/null @@ -1,1323 +0,0 @@ -//! Register allocator coloring pass. -//! -//! The coloring pass assigns a physical register to every SSA value with a register affinity, -//! under the assumption that the register pressure has been lowered sufficiently by spilling and -//! splitting. -//! -//! # Preconditions -//! -//! The coloring pass doesn't work on arbitrary code. Certain preconditions must be satisfied: -//! -//! 1. All instructions must be legalized and assigned an encoding. The encoding recipe guides the -//! register assignments and provides exact constraints. -//! -//! 2. Instructions with tied operands must be in a coloring-friendly state. Specifically, the -//! values used by the tied operands must be killed by the instruction. This can be achieved by -//! inserting a `copy` to a new value immediately before the two-address instruction. -//! -//! 3. If a value is bound to more than one operand on the same instruction, the operand -//! constraints must be compatible. This can also be achieved by inserting copies so the -//! incompatible operands get different values. -//! -//! 4. The register pressure must be lowered sufficiently by inserting spill code. Register -//! operands are allowed to read spilled values, but each such instance must be counted as using -//! a register. -//! -//! 5. The code must be in Conventional SSA form. Among other things, this means that values passed -//! as arguments when branching to a block must belong to the same virtual register as the -//! corresponding block argument value. -//! -//! # Iteration order -//! -//! The SSA property guarantees that whenever the live range of two values overlap, one of the -//! values will be live at the definition point of the other value. If we visit the instructions in -//! a topological order relative to the dominance relation, we can assign colors to the values -//! defined by the instruction and only consider the colors of other values that are live at the -//! instruction. -//! -//! The first time we see a branch to a block, the block's argument values are colored to match the -//! registers currently holding branch argument values passed to the predecessor branch. By -//! visiting blocks in a CFG topological order, we guarantee that at least one predecessor branch has -//! been visited before the destination block. Therefore, the block's arguments are already colored. -//! -//! The exception is the entry block whose arguments are colored from the ABI requirements. - -use crate::cursor::{Cursor, EncCursor}; -use crate::dominator_tree::DominatorTree; -use crate::flowgraph::ControlFlowGraph; -use crate::ir::{ArgumentLoc, InstBuilder, ValueDef}; -use crate::ir::{Block, Function, Inst, InstructionData, Layout, Opcode, SigRef, Value, ValueLoc}; -use crate::isa::{regs_overlap, RegClass, RegInfo, RegUnit}; -use crate::isa::{ConstraintKind, EncInfo, OperandConstraint, RecipeConstraints, TargetIsa}; -use crate::packed_option::PackedOption; -use crate::regalloc::affinity::Affinity; -use crate::regalloc::diversion::RegDiversions; -use crate::regalloc::live_value_tracker::{LiveValue, LiveValueTracker}; -use crate::regalloc::liveness::Liveness; -use crate::regalloc::liverange::LiveRange; -use crate::regalloc::register_set::RegisterSet; -use crate::regalloc::solver::{Solver, SolverError}; -use crate::timing; -use core::mem; - -/// Data structures for the coloring pass. -/// -/// These are scratch space data structures that can be reused between invocations. -pub struct Coloring { - divert: RegDiversions, - solver: Solver, -} - -/// Kinds of ABI parameters. -enum AbiParams { - Parameters(SigRef), - Returns, -} - -/// Bundle of references that the coloring algorithm needs. -/// -/// Some of the needed mutable references are passed around as explicit function arguments so we -/// can avoid many fights with the borrow checker over mutable borrows of `self`. This includes the -/// `Function` and `LiveValueTracker` references. -/// -/// Immutable context information and mutable references that don't need to be borrowed across -/// method calls should go in this struct. -struct Context<'a> { - // Current instruction as well as reference to function and ISA. - cur: EncCursor<'a>, - - // Cached ISA information. - // We save it here to avoid frequent virtual function calls on the `TargetIsa` trait object. - reginfo: RegInfo, - encinfo: EncInfo, - - // References to contextual data structures we need. - cfg: &'a ControlFlowGraph, - domtree: &'a DominatorTree, - liveness: &'a mut Liveness, - - // References to working set data structures. - // If we need to borrow out of a data structure across a method call, it must be passed as a - // function argument instead, see the `LiveValueTracker` arguments. - divert: &'a mut RegDiversions, - solver: &'a mut Solver, - - // Pristine set of registers that the allocator can use. - // This set remains immutable, we make clones. - usable_regs: RegisterSet, - - uses_pinned_reg: bool, -} - -impl Coloring { - /// Allocate scratch space data structures for the coloring pass. - pub fn new() -> Self { - Self { - divert: RegDiversions::new(), - solver: Solver::new(), - } - } - - /// Clear all data structures in this coloring pass. - pub fn clear(&mut self) { - self.divert.clear(); - self.solver.clear(); - } - - /// Run the coloring algorithm over `func`. - pub fn run( - &mut self, - isa: &dyn TargetIsa, - func: &mut Function, - cfg: &ControlFlowGraph, - domtree: &DominatorTree, - liveness: &mut Liveness, - tracker: &mut LiveValueTracker, - ) { - let _tt = timing::ra_coloring(); - log::trace!("Coloring for:\n{}", func.display(isa)); - let mut ctx = Context { - usable_regs: isa.allocatable_registers(func), - uses_pinned_reg: isa.flags().enable_pinned_reg(), - cur: EncCursor::new(func, isa), - reginfo: isa.register_info(), - encinfo: isa.encoding_info(), - cfg, - domtree, - liveness, - divert: &mut self.divert, - solver: &mut self.solver, - }; - ctx.run(tracker) - } -} - -impl<'a> Context<'a> { - /// Is the pinned register usage enabled, and is this register the pinned register? - #[inline] - fn is_pinned_reg(&self, rc: RegClass, reg: RegUnit) -> bool { - rc.is_pinned_reg(self.uses_pinned_reg, reg) - } - - /// Run the coloring algorithm. - fn run(&mut self, tracker: &mut LiveValueTracker) { - self.cur - .func - .locations - .resize(self.cur.func.dfg.num_values()); - - // Visit blocks in reverse post-order. We need to ensure that at least one predecessor has - // been visited before each block. That guarantees that the block arguments have been colored. - for &block in self.domtree.cfg_postorder().iter().rev() { - self.visit_block(block, tracker); - } - } - - /// Visit `block`, assuming that the immediate dominator has already been visited. - fn visit_block(&mut self, block: Block, tracker: &mut LiveValueTracker) { - log::trace!("Coloring {}:", block); - let mut regs = self.visit_block_header(block, tracker); - tracker.drop_dead_params(); - - // Now go through the instructions in `block` and color the values they define. - self.cur.goto_top(block); - while let Some(inst) = self.cur.next_inst() { - self.cur.use_srcloc(inst); - let opcode = self.cur.func.dfg[inst].opcode(); - if !opcode.is_ghost() { - // This is an instruction which either has an encoding or carries ABI-related - // register allocation constraints. - let enc = self.cur.func.encodings[inst]; - let constraints = self.encinfo.operand_constraints(enc); - if self.visit_inst(inst, constraints, tracker, &mut regs) { - self.replace_global_defines(inst, tracker); - // Restore cursor location after `replace_global_defines` moves it. - // We want to revisit the copy instructions it inserted. - self.cur.goto_inst(inst); - } - } else { - // This is a ghost instruction with no encoding and no extra constraints. - let (_throughs, kills) = tracker.process_ghost(inst); - self.process_ghost_kills(kills, &mut regs); - } - tracker.drop_dead(inst); - - // We are not able to insert any regmove for diversion or un-diversion after the first - // branch. Instead, we record the diversion to be restored at the entry of the next block, - // which should have a single predecessor. - if opcode.is_branch() { - // The next instruction is necessarily an unconditional branch. - if let Some(branch) = self.cur.next_inst() { - log::trace!( - "Skip coloring {}\n from {}\n with diversions {}", - self.cur.display_inst(branch), - regs.input.display(&self.reginfo), - self.divert.display(&self.reginfo) - ); - use crate::ir::instructions::BranchInfo::*; - let target = match self.cur.func.dfg.analyze_branch(branch) { - NotABranch | Table(_, _) => panic!( - "unexpected instruction {} after a conditional branch", - self.cur.display_inst(branch) - ), - SingleDest(block, _) => block, - }; - - // We have a single branch with a single target, and a block with a single - // predecessor. Thus we can forward the diversion set to the next block. - if self.cfg.pred_iter(target).count() == 1 { - // Transfer the diversion to the next block. - self.divert - .save_for_block(&mut self.cur.func.entry_diversions, target); - log::trace!( - "Set entry-diversion for {} to\n {}", - target, - self.divert.display(&self.reginfo) - ); - } else { - debug_assert!( - self.divert.is_empty(), - "Divert set is non-empty after the terminator." - ); - } - assert_eq!( - self.cur.next_inst(), - None, - "Unexpected instruction after a branch group." - ); - } else { - assert!(opcode.is_terminator()); - } - } - } - } - - /// Visit the `block` header. - /// - /// Initialize the set of live registers and color the arguments to `block`. - fn visit_block_header( - &mut self, - block: Block, - tracker: &mut LiveValueTracker, - ) -> AvailableRegs { - // Reposition the live value tracker and deal with the block arguments. - tracker.block_top( - block, - &self.cur.func.dfg, - self.liveness, - &self.cur.func.layout, - self.domtree, - ); - - // Copy the content of the registered diversions to be reused at the - // entry of this basic block. - self.divert.at_block(&self.cur.func.entry_diversions, block); - log::trace!( - "Start {} with entry-diversion set to\n {}", - block, - self.divert.display(&self.reginfo) - ); - - if self.cur.func.layout.entry_block() == Some(block) { - // Parameters on the entry block have ABI constraints. - self.color_entry_params(tracker.live()) - } else { - // The live-ins and parameters of a non-entry block have already been assigned a register. - // Reconstruct the allocatable set. - self.livein_regs(tracker.live()) - } - } - - /// Initialize a set of allocatable registers from the values that are live-in to a block. - /// These values must already be colored when the dominating blocks were processed. - /// - /// Also process the block arguments which were colored when the first predecessor branch was - /// encountered. - fn livein_regs(&self, live: &[LiveValue]) -> AvailableRegs { - // Start from the registers that are actually usable. We don't want to include any reserved - // registers in the set. - let mut regs = AvailableRegs::new(&self.usable_regs); - - for lv in live.iter().filter(|lv| !lv.is_dead) { - log::trace!( - "Live-in: {}:{} in {}", - lv.value, - lv.affinity.display(&self.reginfo), - self.divert - .get(lv.value, &self.cur.func.locations) - .display(&self.reginfo) - ); - if let Affinity::Reg(rci) = lv.affinity { - let rc = self.reginfo.rc(rci); - let loc = self.cur.func.locations[lv.value]; - let reg = match loc { - ValueLoc::Reg(reg) => reg, - ValueLoc::Unassigned => panic!("Live-in {} wasn't assigned", lv.value), - ValueLoc::Stack(ss) => { - panic!("Live-in {} is in {}, should be register", lv.value, ss) - } - }; - if lv.is_local { - regs.take(rc, reg, lv.is_local); - } else { - let loc = self.divert.get(lv.value, &self.cur.func.locations); - let reg_divert = match loc { - ValueLoc::Reg(reg) => reg, - ValueLoc::Unassigned => { - panic!("Diversion: Live-in {} wasn't assigned", lv.value) - } - ValueLoc::Stack(ss) => panic!( - "Diversion: Live-in {} is in {}, should be register", - lv.value, ss - ), - }; - regs.take_divert(rc, reg, reg_divert); - } - } - } - - regs - } - - /// Color the parameters on the entry block. - /// - /// These are function parameters that should already have assigned register units in the - /// function signature. - /// - /// Return the set of remaining allocatable registers after filtering out the dead arguments. - fn color_entry_params(&mut self, args: &[LiveValue]) -> AvailableRegs { - let sig = &self.cur.func.signature; - debug_assert_eq!(sig.params.len(), args.len()); - - let mut regs = AvailableRegs::new(&self.usable_regs); - - for (lv, abi) in args.iter().zip(&sig.params) { - match lv.affinity { - Affinity::Reg(rci) => { - let rc = self.reginfo.rc(rci); - if let ArgumentLoc::Reg(reg) = abi.location { - if !lv.is_dead { - regs.take(rc, reg, lv.is_local); - } - self.cur.func.locations[lv.value] = ValueLoc::Reg(reg); - } else { - // This should have been fixed by the reload pass. - panic!( - "Entry arg {} has {} affinity, but ABI {}", - lv.value, - lv.affinity.display(&self.reginfo), - abi.display(&self.reginfo) - ); - } - } - // The spiller will have assigned an incoming stack slot already. - Affinity::Stack => debug_assert!(abi.location.is_stack()), - // This is a ghost value, unused in the function. Don't assign it to a location - // either. - Affinity::Unassigned => {} - } - } - - regs - } - - /// Program the input-side ABI constraints for `inst` into the constraint solver. - /// - /// ABI constraints are the fixed register assignments useds for calls and returns. - fn program_input_abi(&mut self, inst: Inst, abi_params: AbiParams) { - let abi_types = match abi_params { - AbiParams::Parameters(sig) => &self.cur.func.dfg.signatures[sig].params, - AbiParams::Returns => &self.cur.func.signature.returns, - }; - - for (abi, &value) in abi_types - .iter() - .zip(self.cur.func.dfg.inst_variable_args(inst)) - { - if let ArgumentLoc::Reg(reg) = abi.location { - if let Affinity::Reg(rci) = self - .liveness - .get(value) - .expect("ABI register must have live range") - .affinity - { - let rc = self.reginfo.rc(rci); - let cur_reg = self.divert.reg(value, &self.cur.func.locations); - self.solver.reassign_in(value, rc, cur_reg, reg); - } else { - panic!("ABI argument {} should be in a register", value); - } - } - } - } - - /// Color the values defined by `inst` and insert any necessary shuffle code to satisfy - /// instruction constraints. - /// - /// Update `regs` to reflect the allocated registers after `inst`, including removing any dead - /// or killed values from the set. - /// - /// Returns true when the global values defined by `inst` must be replaced by local values. - fn visit_inst( - &mut self, - inst: Inst, - constraints: Option<&RecipeConstraints>, - tracker: &mut LiveValueTracker, - regs: &mut AvailableRegs, - ) -> bool { - log::trace!( - "Coloring {}\n from {}", - self.cur.display_inst(inst), - regs.input.display(&self.reginfo), - ); - - // block whose arguments should be colored to match the current branch instruction's - // arguments. - let mut color_dest_args = None; - - // Program the solver with register constraints for the input side. - self.solver.reset(®s.input); - - if let Some(constraints) = constraints { - self.program_input_constraints(inst, constraints.ins); - } - - let call_sig = self.cur.func.dfg.call_signature(inst); - if let Some(sig) = call_sig { - self.program_input_abi(inst, AbiParams::Parameters(sig)); - } else if self.cur.func.dfg[inst].opcode().is_return() { - self.program_input_abi(inst, AbiParams::Returns); - } else if self.cur.func.dfg[inst].opcode().is_branch() { - // This is a branch, so we need to make sure that globally live values are in their - // global registers. For blocks that take arguments, we also need to place the argument - // values in the expected registers. - if let Some(dest) = self.cur.func.dfg[inst].branch_destination() { - if self.program_block_arguments(inst, dest) { - color_dest_args = Some(dest); - } - } else { - // This is a multi-way branch like `br_table`. We only support arguments on - // single-destination branches. - debug_assert_eq!( - self.cur.func.dfg.inst_variable_args(inst).len(), - 0, - "Can't handle block arguments: {}", - self.cur.display_inst(inst) - ); - self.undivert_regs(|lr, _| !lr.is_local()); - } - } - - if self.solver.has_fixed_input_conflicts() { - self.divert_fixed_input_conflicts(tracker.live()); - } - - self.solver.inputs_done(); - - // Update the live value tracker with this instruction. - let (throughs, kills, defs) = tracker.process_inst(inst, &self.cur.func.dfg, self.liveness); - - // Get rid of the killed values. - for lv in kills { - if let Affinity::Reg(rci) = lv.affinity { - let rc = self.reginfo.rc(rci); - let reg = self.divert.reg(lv.value, &self.cur.func.locations); - - if self.is_pinned_reg(rc, reg) { - // Don't kill the pinned reg, either in the local or global register sets. - debug_assert!(lv.is_local, "pinned register SSA value can't be global"); - continue; - } - - log::trace!( - " kill {} in {} ({} {})", - lv.value, - self.reginfo.display_regunit(reg), - if lv.is_local { "local" } else { "global" }, - rc - ); - self.solver.add_kill(lv.value, rc, reg); - - // Update the global register set which has no diversions. - if !lv.is_local { - regs.global - .free(rc, self.cur.func.locations[lv.value].unwrap_reg()); - } - } - } - - // This aligns with the " from" line at the top of the function. - log::trace!(" glob {}", regs.global.display(&self.reginfo)); - - // This flag is set when the solver failed to find a solution for the global defines that - // doesn't interfere with `regs.global`. We need to rewrite all of `inst`s global defines - // as local defines followed by copies. - let mut replace_global_defines = false; - - // Program the fixed output constraints before the general defines. This allows us to - // detect conflicts between fixed outputs and tied operands where the input value hasn't - // been converted to a solver variable. - if let Some(constraints) = constraints { - if constraints.fixed_outs { - self.program_fixed_outputs( - constraints.outs, - defs, - throughs, - &mut replace_global_defines, - ®s.global, - ); - } - } - - if let Some(sig) = call_sig { - self.program_output_abi( - sig, - defs, - throughs, - &mut replace_global_defines, - ®s.global, - ); - } - - if let Some(constraints) = constraints { - self.program_output_constraints( - inst, - constraints.outs, - defs, - &mut replace_global_defines, - ®s.global, - ); - } - - // Finally, we've fully programmed the constraint solver. - // We expect a quick solution in most cases. - let is_reload = match &self.cur.func.dfg[inst] { - InstructionData::Unary { - opcode: Opcode::Fill, - .. - } => true, - _ => false, - }; - - let output_regs = self - .solver - .quick_solve(®s.global, is_reload) - .unwrap_or_else(|_| { - log::trace!("quick_solve failed for {}", self.solver); - self.iterate_solution( - throughs, - ®s.global, - &mut replace_global_defines, - is_reload, - ) - }); - - // The solution and/or fixed input constraints may require us to shuffle the set of live - // registers around. - self.shuffle_inputs(&mut regs.input); - - // If this is the first time we branch to `dest`, color its arguments to match the current - // register state. - if let Some(dest) = color_dest_args { - self.color_block_params(inst, dest); - } - - // Apply the solution to the defs. - for v in self.solver.vars().iter().filter(|&v| v.is_define()) { - self.cur.func.locations[v.value] = ValueLoc::Reg(v.solution); - } - - // Tied defs are not part of the solution above. - // Copy register assignments from tied inputs to tied outputs. - if let Some(constraints) = constraints { - if constraints.tied_ops { - for (constraint, lv) in constraints.outs.iter().zip(defs) { - if let ConstraintKind::Tied(num) = constraint.kind { - let arg = self.cur.func.dfg.inst_args(inst)[num as usize]; - let reg = self.divert.reg(arg, &self.cur.func.locations); - self.cur.func.locations[lv.value] = ValueLoc::Reg(reg); - } - } - } - } - - // Update `regs` for the next instruction. - regs.input = output_regs; - for lv in defs { - let loc = self.cur.func.locations[lv.value]; - log::trace!( - " color {} -> {}{}", - lv.value, - loc.display(&self.reginfo), - if lv.is_local { - "" - } else if replace_global_defines { - " (global to be replaced)" - } else { - " (global)" - } - ); - - if let Affinity::Reg(rci) = lv.affinity { - let rc = self.reginfo.rc(rci); - let reg = loc.unwrap_reg(); - - debug_assert!( - !self.is_pinned_reg(rc, reg) - || self.cur.func.dfg[inst].opcode() == Opcode::GetPinnedReg, - "pinned register may not be part of outputs for '{}'.", - self.cur.func.dfg[inst].opcode() - ); - - if self.is_pinned_reg(rc, reg) { - continue; - } - - // Remove the dead defs. - if lv.endpoint == inst { - regs.input.free(rc, reg); - debug_assert!(lv.is_local); - } - - // Track globals in their undiverted locations. - if !lv.is_local && !replace_global_defines { - regs.global.take(rc, reg); - } - } - } - - self.forget_diverted(kills); - - replace_global_defines - } - - /// Program the input-side constraints for `inst` into the constraint solver. - fn program_input_constraints(&mut self, inst: Inst, constraints: &[OperandConstraint]) { - for (constraint, &arg_val) in constraints - .iter() - .zip(self.cur.func.dfg.inst_args(inst)) - .filter(|&(constraint, _)| constraint.kind != ConstraintKind::Stack) - { - // Reload pass is supposed to ensure that all arguments to register operands are - // already in a register. - let cur_reg = self.divert.reg(arg_val, &self.cur.func.locations); - match constraint.kind { - ConstraintKind::FixedReg(regunit) => { - // Add the fixed constraint even if `cur_reg == regunit`. - // It is possible that we will want to convert the value to a variable later, - // and this identity assignment prevents that from happening. - self.solver - .reassign_in(arg_val, constraint.regclass, cur_reg, regunit); - } - ConstraintKind::FixedTied(regunit) => { - // The pinned register may not be part of a fixed tied requirement. If this - // becomes the case, then it must be changed to a different register. - debug_assert!( - !self.is_pinned_reg(constraint.regclass, regunit), - "see comment above" - ); - // See comment right above. - self.solver - .reassign_in(arg_val, constraint.regclass, cur_reg, regunit); - } - ConstraintKind::Tied(_) => { - if self.is_pinned_reg(constraint.regclass, cur_reg) { - // Divert the pinned register; it shouldn't be reused for a tied input. - if self.solver.can_add_var(constraint.regclass, cur_reg) { - self.solver.add_var(arg_val, constraint.regclass, cur_reg); - } - } else if !constraint.regclass.contains(cur_reg) { - self.solver.add_var(arg_val, constraint.regclass, cur_reg); - } - } - ConstraintKind::Reg => { - if !constraint.regclass.contains(cur_reg) { - self.solver.add_var(arg_val, constraint.regclass, cur_reg); - } - } - ConstraintKind::Stack => unreachable!(), - } - } - } - - /// Program the complete set of input constraints into the solver. - /// - /// The `program_input_constraints()` function above will not tell the solver about any values - /// that are already assigned to appropriate registers. This is normally fine, but if we want - /// to add additional variables to help the solver, we need to make sure that they are - /// constrained properly. - /// - /// This function completes the work of `program_input_constraints()` by calling `add_var` for - /// all values used by the instruction. - fn program_complete_input_constraints(&mut self) { - let inst = self.cur.current_inst().expect("Not on an instruction"); - let constraints = self - .encinfo - .operand_constraints(self.cur.func.encodings[inst]) - .expect("Current instruction not encoded") - .ins; - - for (constraint, &arg_val) in constraints.iter().zip(self.cur.func.dfg.inst_args(inst)) { - match constraint.kind { - ConstraintKind::Reg | ConstraintKind::Tied(_) => { - let cur_reg = self.divert.reg(arg_val, &self.cur.func.locations); - - // This is the opposite condition of `program_input_constraints()`. The pinned - // register mustn't be added back as a variable. - if constraint.regclass.contains(cur_reg) - && !self.is_pinned_reg(constraint.regclass, cur_reg) - { - // This code runs after calling `solver.inputs_done()` so we must identify - // the new variable as killed or live-through. - let layout = &self.cur.func.layout; - if self.liveness[arg_val].killed_at(inst, layout.pp_block(inst), layout) { - self.solver - .add_killed_var(arg_val, constraint.regclass, cur_reg); - } else { - self.solver - .add_through_var(arg_val, constraint.regclass, cur_reg); - } - } - } - ConstraintKind::FixedReg(_) - | ConstraintKind::FixedTied(_) - | ConstraintKind::Stack => {} - } - } - } - - /// Prepare for a branch to `dest`. - /// - /// 1. Any values that are live-in to `dest` must be un-diverted so they live in their globally - /// assigned register. - /// 2. If the `dest` block takes arguments, reassign the branch argument values to the matching - /// registers. - /// - /// Returns true if this is the first time a branch to `dest` is seen, so the `dest` argument - /// values should be colored after `shuffle_inputs`. - fn program_block_arguments(&mut self, inst: Inst, dest: Block) -> bool { - // Find diverted registers that are live-in to `dest` and reassign them to their global - // home. - // - // Values with a global live range that are not live in to `dest` could appear as branch - // arguments, so they can't always be un-diverted. - self.undivert_regs(|lr, layout| lr.is_livein(dest, layout)); - - // Now handle the block arguments. - let br_args = self.cur.func.dfg.inst_variable_args(inst); - let dest_args = self.cur.func.dfg.block_params(dest); - debug_assert_eq!(br_args.len(), dest_args.len()); - for (&dest_arg, &br_arg) in dest_args.iter().zip(br_args) { - // The first time we encounter a branch to `dest`, we get to pick the location. The - // following times we see a branch to `dest`, we must follow suit. - match self.cur.func.locations[dest_arg] { - ValueLoc::Unassigned => { - // This is the first branch to `dest`, so we should color `dest_arg` instead of - // `br_arg`. However, we don't know where `br_arg` will end up until - // after `shuffle_inputs`. See `color_block_params` below. - // - // It is possible for `dest_arg` to have no affinity, and then it should simply - // be ignored. - if self.liveness[dest_arg].affinity.is_reg() { - return true; - } - } - ValueLoc::Reg(dest_reg) => { - // We've branched to `dest` before. Make sure we use the correct argument - // registers by reassigning `br_arg`. - if let Affinity::Reg(rci) = self.liveness[br_arg].affinity { - let rc = self.reginfo.rc(rci); - let br_reg = self.divert.reg(br_arg, &self.cur.func.locations); - self.solver.reassign_in(br_arg, rc, br_reg, dest_reg); - } else { - panic!("Branch argument {} is not in a register", br_arg); - } - } - ValueLoc::Stack(ss) => { - // The spiller should already have given us identical stack slots. - debug_assert_eq!(ValueLoc::Stack(ss), self.cur.func.locations[br_arg]); - } - } - } - - // No `dest` arguments need coloring. - false - } - - /// Knowing that we've never seen a branch to `dest` before, color its parameters to match our - /// register state. - /// - /// This function is only called when `program_block_arguments()` returned `true`. - fn color_block_params(&mut self, inst: Inst, dest: Block) { - let br_args = self.cur.func.dfg.inst_variable_args(inst); - let dest_args = self.cur.func.dfg.block_params(dest); - debug_assert_eq!(br_args.len(), dest_args.len()); - for (&dest_arg, &br_arg) in dest_args.iter().zip(br_args) { - match self.cur.func.locations[dest_arg] { - ValueLoc::Unassigned => { - if self.liveness[dest_arg].affinity.is_reg() { - let br_reg = self.divert.reg(br_arg, &self.cur.func.locations); - self.cur.func.locations[dest_arg] = ValueLoc::Reg(br_reg); - } - } - ValueLoc::Reg(_) => panic!("{} arg {} already colored", dest, dest_arg), - // Spilled value consistency is verified by `program_block_arguments()` above. - ValueLoc::Stack(_) => {} - } - } - } - - /// Find all diverted registers where `pred` returns `true` and undo their diversion so they - /// are reallocated to their global register assignments. - fn undivert_regs(&mut self, mut pred: Pred) - where - Pred: FnMut(&LiveRange, &Layout) -> bool, - { - for (&value, rdiv) in self.divert.iter() { - let lr = self - .liveness - .get(value) - .expect("Missing live range for diverted register"); - if pred(lr, &self.cur.func.layout) { - if let Affinity::Reg(rci) = lr.affinity { - let rc = self.reginfo.rc(rci); - // Stack diversions should not be possible here. They only live transiently - // during `shuffle_inputs()`. - self.solver.reassign_in( - value, - rc, - rdiv.to.unwrap_reg(), - rdiv.from.unwrap_reg(), - ); - } else { - panic!( - "Diverted register {} with {} affinity", - value, - lr.affinity.display(&self.reginfo) - ); - } - } - } - } - - /// Find existing live values that conflict with the fixed input register constraints programmed - /// into the constraint solver. Convert them to solver variables so they can be diverted. - fn divert_fixed_input_conflicts(&mut self, live: &[LiveValue]) { - for lv in live { - if let Affinity::Reg(rci) = lv.affinity { - let toprc = self.reginfo.toprc(rci); - let reg = self.divert.reg(lv.value, &self.cur.func.locations); - if self.solver.is_fixed_input_conflict(toprc, reg) { - log::trace!( - "adding var to divert fixed input conflict for {}", - toprc.info.display_regunit(reg) - ); - self.solver.add_var(lv.value, toprc, reg); - } - } - } - } - - /// Program any fixed-register output constraints into the solver. This may also detect - /// conflicts between live-through registers and fixed output registers. These live-through - /// values need to be turned into solver variables so they can be reassigned. - fn program_fixed_outputs( - &mut self, - constraints: &[OperandConstraint], - defs: &[LiveValue], - throughs: &[LiveValue], - replace_global_defines: &mut bool, - global_regs: &RegisterSet, - ) { - for (constraint, lv) in constraints.iter().zip(defs) { - match constraint.kind { - ConstraintKind::FixedReg(reg) | ConstraintKind::FixedTied(reg) => { - self.add_fixed_output(lv.value, constraint.regclass, reg, throughs); - if !lv.is_local && !global_regs.is_avail(constraint.regclass, reg) { - log::trace!( - "Fixed output {} in {}:{} is not available in global regs", - lv.value, - constraint.regclass, - self.reginfo.display_regunit(reg) - ); - *replace_global_defines = true; - } - } - ConstraintKind::Reg | ConstraintKind::Tied(_) | ConstraintKind::Stack => {} - } - } - } - - /// Program the output-side ABI constraints for `inst` into the constraint solver. - /// - /// That means return values for a call instruction. - fn program_output_abi( - &mut self, - sig: SigRef, - defs: &[LiveValue], - throughs: &[LiveValue], - replace_global_defines: &mut bool, - global_regs: &RegisterSet, - ) { - // It's technically possible for a call instruction to have fixed results before the - // variable list of results, but we have no known instances of that. - // Just assume all results are variable return values. - debug_assert_eq!(defs.len(), self.cur.func.dfg.signatures[sig].returns.len()); - for (i, lv) in defs.iter().enumerate() { - let abi = self.cur.func.dfg.signatures[sig].returns[i]; - if let ArgumentLoc::Reg(reg) = abi.location { - if let Affinity::Reg(rci) = lv.affinity { - let rc = self.reginfo.rc(rci); - self.add_fixed_output(lv.value, rc, reg, throughs); - if !lv.is_local && !global_regs.is_avail(rc, reg) { - log::trace!( - "ABI output {} in {}:{} is not available in global regs", - lv.value, - rc, - self.reginfo.display_regunit(reg) - ); - *replace_global_defines = true; - } - } else { - panic!("ABI argument {} should be in a register", lv.value); - } - } - } - } - - /// Add a single fixed output value to the solver. - fn add_fixed_output( - &mut self, - value: Value, - rc: RegClass, - reg: RegUnit, - throughs: &[LiveValue], - ) { - // Pinned register is already unavailable in the solver, since it is copied in the - // available registers on entry. - if !self.is_pinned_reg(rc, reg) && !self.solver.add_fixed_output(rc, reg) { - // The fixed output conflicts with some of the live-through registers. - for lv in throughs { - if let Affinity::Reg(rci) = lv.affinity { - let toprc2 = self.reginfo.toprc(rci); - let reg2 = self.divert.reg(lv.value, &self.cur.func.locations); - if regs_overlap(rc, reg, toprc2, reg2) { - // This live-through value is interfering with the fixed output assignment. - // Convert it to a solver variable. - self.solver.add_through_var(lv.value, toprc2, reg2); - } - } - } - - let ok = self.solver.add_fixed_output(rc, reg); - debug_assert!(ok, "Couldn't clear fixed output interference for {}", value); - } - self.cur.func.locations[value] = ValueLoc::Reg(reg); - } - - /// Program the output-side constraints for `inst` into the constraint solver. - /// - /// It is assumed that all fixed outputs have already been handled. - fn program_output_constraints( - &mut self, - inst: Inst, - constraints: &[OperandConstraint], - defs: &[LiveValue], - replace_global_defines: &mut bool, - global_regs: &RegisterSet, - ) { - for (constraint, lv) in constraints.iter().zip(defs) { - match constraint.kind { - ConstraintKind::FixedReg(_) - | ConstraintKind::FixedTied(_) - | ConstraintKind::Stack => continue, - ConstraintKind::Reg => { - self.solver - .add_def(lv.value, constraint.regclass, !lv.is_local); - } - ConstraintKind::Tied(num) => { - // Find the input operand we're tied to. - // The solver doesn't care about the output value. - let arg = self.cur.func.dfg.inst_args(inst)[num as usize]; - let reg = self.divert.reg(arg, &self.cur.func.locations); - - if let Some(reg) = - self.solver - .add_tied_input(arg, constraint.regclass, reg, !lv.is_local) - { - // The value we're tied to has been assigned to a fixed register. - // We need to make sure that fixed output register is compatible with the - // global register set. - if !lv.is_local && !global_regs.is_avail(constraint.regclass, reg) { - log::trace!( - "Tied output {} in {}:{} is not available in global regs", - lv.value, - constraint.regclass, - self.reginfo.display_regunit(reg) - ); - *replace_global_defines = true; - } - } - } - } - } - } - - /// Try harder to find a solution to the constraint problem since `quick_solve()` failed. - /// - /// We may need to move more registers around before a solution is possible. Use an iterative - /// algorithm that adds one more variable until a solution can be found. - fn iterate_solution( - &mut self, - throughs: &[LiveValue], - global_regs: &RegisterSet, - replace_global_defines: &mut bool, - is_reload: bool, - ) -> RegisterSet { - // Make sure `try_add_var()` below doesn't create a variable with too loose constraints. - self.program_complete_input_constraints(); - - loop { - match self.solver.real_solve(global_regs, is_reload) { - Ok(regs) => return regs, - Err(SolverError::Divert(rc)) => { - // Do we have any live-through `rc` registers that are not already variables? - let added = self.try_add_var(rc, throughs); - debug_assert!(added, "Ran out of registers in {}", rc); - } - Err(SolverError::Global(_value)) => { - log::trace!( - "Not enough global registers for {}, trying as local", - _value - ); - // We'll clear the `is_global` flag on all solver variables and instead make a - // note to replace all global defines with local defines followed by a copy. - *replace_global_defines = true; - self.solver.clear_all_global_flags(); - } - }; - } - } - - /// Try to add an `rc` variable to the solver from the `throughs` set. - fn try_add_var(&mut self, rc: RegClass, throughs: &[LiveValue]) -> bool { - log::trace!("Trying to add a {} reg from {} values", rc, throughs.len()); - - for lv in throughs { - if let Affinity::Reg(rci) = lv.affinity { - // The new variable gets to roam the whole top-level register class because it is - // not actually constrained by the instruction. We just want it out of the way. - let toprc2 = self.reginfo.toprc(rci); - let reg2 = self.divert.reg(lv.value, &self.cur.func.locations); - if rc.contains(reg2) - && self.solver.can_add_var(toprc2, reg2) - && !self.is_live_on_outgoing_edge(lv.value) - { - self.solver.add_through_var(lv.value, toprc2, reg2); - return true; - } - } - } - - false - } - - /// Determine if `value` is live on a CFG edge from the current instruction. - /// - /// This means that the current instruction is a branch and `value` is live in to one of the - /// branch destinations. Branch arguments and block parameters are not considered live on the - /// edge. - fn is_live_on_outgoing_edge(&self, value: Value) -> bool { - use crate::ir::instructions::BranchInfo::*; - - let inst = self.cur.current_inst().expect("Not on an instruction"); - let layout = &self.cur.func.layout; - match self.cur.func.dfg.analyze_branch(inst) { - NotABranch => false, - SingleDest(block, _) => { - let lr = &self.liveness[value]; - lr.is_livein(block, layout) - } - Table(jt, block) => { - let lr = &self.liveness[value]; - !lr.is_local() - && (block.map_or(false, |block| lr.is_livein(block, layout)) - || self.cur.func.jump_tables[jt] - .iter() - .any(|block| lr.is_livein(*block, layout))) - } - } - } - - /// Emit `regmove` instructions as needed to move the live registers into place before the - /// instruction. Also update `self.divert` accordingly. - /// - /// The `self.cur` cursor is expected to point at the instruction. The register moves are - /// inserted before. - /// - /// The solver needs to be reminded of the available registers before any moves are inserted. - fn shuffle_inputs(&mut self, regs: &mut RegisterSet) { - use crate::regalloc::solver::Move::*; - - let spills = self.solver.schedule_moves(regs); - - // The move operations returned by `schedule_moves` refer to emergency spill slots by - // consecutive indexes starting from 0. Map these to real stack slots. - // It is very unlikely (impossible?) that we would need more than one spill per top-level - // register class, so avoid allocation by using a fixed array here. - let mut slot = [PackedOption::default(); 8]; - debug_assert!(spills <= slot.len(), "Too many spills ({})", spills); - - for m in self.solver.moves() { - match *m { - Reg { - value, - from, - to, - rc, - } => { - debug_assert!( - !self.is_pinned_reg(rc, to), - "pinned register used in a regmove" - ); - self.divert.regmove(value, from, to); - self.cur.ins().regmove(value, from, to); - } - Spill { - value, - from, - to_slot, - .. - } => { - debug_assert_eq!(slot[to_slot].expand(), None, "Overwriting slot in use"); - let ss = self - .cur - .func - .stack_slots - .get_emergency_slot(self.cur.func.dfg.value_type(value), &slot[0..spills]); - slot[to_slot] = ss.into(); - self.divert.regspill(value, from, ss); - self.cur.ins().regspill(value, from, ss); - } - Fill { - value, - from_slot, - to, - rc, - } => { - debug_assert!( - !self.is_pinned_reg(rc, to), - "pinned register used in a regfill" - ); - // These slots are single use, so mark `ss` as available again. - let ss = slot[from_slot].take().expect("Using unallocated slot"); - self.divert.regfill(value, ss, to); - self.cur.ins().regfill(value, ss, to); - } - } - } - } - - /// Forget about any register diversions in `kills`. - fn forget_diverted(&mut self, kills: &[LiveValue]) { - if self.divert.is_empty() { - return; - } - - for lv in kills { - if lv.affinity.is_reg() { - self.divert.remove(lv.value); - } - } - } - - /// Replace all global values defined by `inst` with local values that are then copied into the - /// global value: - /// - /// v1 = foo - /// - /// becomes: - /// - /// v20 = foo - /// v1 = copy v20 - /// - /// This is sometimes necessary when there are no global registers available that can satisfy - /// the constraints on the instruction operands. - /// - fn replace_global_defines(&mut self, inst: Inst, tracker: &mut LiveValueTracker) { - log::trace!("Replacing global defs on {}", self.cur.display_inst(inst)); - - // We'll insert copies *after `inst`. Our caller will move the cursor back. - self.cur.next_inst(); - - // The tracker keeps the defs from `inst` at the end. Any dead defs have already been - // removed, so it's not obvious how many defs to process - for lv in tracker.live_mut().iter_mut().rev() { - // Keep going until we reach a value that is not defined by `inst`. - if match self.cur.func.dfg.value_def(lv.value) { - ValueDef::Result(i, _) => i != inst, - _ => true, - } { - break; - } - if lv.is_local || !lv.affinity.is_reg() { - continue; - } - - // Now `lv.value` is globally live and defined by `inst`. Replace it with a local live - // range that is copied after `inst`. - let ty = self.cur.func.dfg.value_type(lv.value); - let local = self.cur.func.dfg.replace_result(lv.value, ty); - self.cur.ins().with_result(lv.value).copy(local); - let copy = self.cur.built_inst(); - - // Create a live range for `local: inst -> copy`. - self.liveness.create_dead(local, inst, lv.affinity); - self.liveness.extend_locally( - local, - self.cur.func.layout.pp_block(inst), - copy, - &self.cur.func.layout, - ); - - // Move the definition of the global `lv.value`. - self.liveness.move_def_locally(lv.value, copy); - - // Transfer the register coloring to `local`. - let loc = mem::replace(&mut self.cur.func.locations[lv.value], ValueLoc::default()); - self.cur.func.locations[local] = loc; - - // Update `lv` to reflect the new `local` live range. - lv.value = local; - lv.endpoint = copy; - lv.is_local = true; - - log::trace!( - " + {} with {} in {}", - self.cur.display_inst(copy), - local, - loc.display(&self.reginfo) - ); - } - log::trace!("Done: {}", self.cur.display_inst(inst)); - } - - /// Process kills on a ghost instruction. - /// - Forget diversions. - /// - Free killed registers. - fn process_ghost_kills(&mut self, kills: &[LiveValue], regs: &mut AvailableRegs) { - for lv in kills { - if let Affinity::Reg(rci) = lv.affinity { - let rc = self.reginfo.rc(rci); - let loc = match self.divert.remove(lv.value) { - Some(loc) => loc, - None => self.cur.func.locations[lv.value], - }; - regs.input.free(rc, loc.unwrap_reg()); - if !lv.is_local { - regs.global - .free(rc, self.cur.func.locations[lv.value].unwrap_reg()); - } - } - } - } -} - -/// Keep track of the set of available registers in two interference domains: all registers -/// considering diversions and global registers not considering diversions. -struct AvailableRegs { - /// The exact set of registers available on the input side of the current instruction. This - /// takes into account register diversions, and it includes both local and global live ranges. - input: RegisterSet, - - /// Registers available for allocating globally live values. This set ignores any local values, - /// and it does not account for register diversions. - /// - /// Global values must be allocated out of this set because conflicts with other global values - /// can't be resolved with local diversions. - global: RegisterSet, -} - -impl AvailableRegs { - /// Initialize both the input and global sets from `regs`. - pub fn new(regs: &RegisterSet) -> Self { - Self { - input: regs.clone(), - global: regs.clone(), - } - } - - /// Take an un-diverted register from one or both sets. - pub fn take(&mut self, rc: RegClass, reg: RegUnit, is_local: bool) { - self.input.take(rc, reg); - if !is_local { - self.global.take(rc, reg); - } - } - - /// Take a diverted register from both sets for a non-local allocation. - pub fn take_divert(&mut self, rc: RegClass, reg: RegUnit, reg_divert: RegUnit) { - self.input.take(rc, reg_divert); - self.global.take(rc, reg); - } -} diff --git a/cranelift/codegen/src/regalloc/context.rs b/cranelift/codegen/src/regalloc/context.rs deleted file mode 100644 index 505b1d127a..0000000000 --- a/cranelift/codegen/src/regalloc/context.rs +++ /dev/null @@ -1,252 +0,0 @@ -//! Register allocator context. -//! -//! The `Context` struct contains data structures that should be preserved across invocations of -//! the register allocator algorithm. This doesn't preserve any data between functions, but it -//! avoids allocating data structures independently for each function begin compiled. - -use crate::dominator_tree::DominatorTree; -use crate::flowgraph::ControlFlowGraph; -use crate::ir::Function; -use crate::isa::TargetIsa; -use crate::regalloc::branch_splitting; -use crate::regalloc::coalescing::Coalescing; -use crate::regalloc::coloring::Coloring; -use crate::regalloc::live_value_tracker::LiveValueTracker; -use crate::regalloc::liveness::Liveness; -use crate::regalloc::reload::Reload; -use crate::regalloc::safepoint::emit_stack_maps; -use crate::regalloc::spilling::Spilling; -use crate::regalloc::virtregs::VirtRegs; -use crate::result::CodegenResult; -use crate::timing; -use crate::topo_order::TopoOrder; -use crate::verifier::{ - verify_context, verify_cssa, verify_liveness, verify_locations, VerifierErrors, -}; - -/// Persistent memory allocations for register allocation. -pub struct Context { - liveness: Liveness, - virtregs: VirtRegs, - coalescing: Coalescing, - topo: TopoOrder, - tracker: LiveValueTracker, - spilling: Spilling, - reload: Reload, - coloring: Coloring, -} - -impl Context { - /// Create a new context for register allocation. - /// - /// This context should be reused for multiple functions in order to avoid repeated memory - /// allocations. - pub fn new() -> Self { - Self { - liveness: Liveness::new(), - virtregs: VirtRegs::new(), - coalescing: Coalescing::new(), - topo: TopoOrder::new(), - tracker: LiveValueTracker::new(), - spilling: Spilling::new(), - reload: Reload::new(), - coloring: Coloring::new(), - } - } - - /// Clear all data structures in this context. - pub fn clear(&mut self) { - self.liveness.clear(); - self.virtregs.clear(); - self.coalescing.clear(); - self.topo.clear(); - self.tracker.clear(); - self.spilling.clear(); - self.reload.clear(); - self.coloring.clear(); - } - - /// Current values liveness state. - pub fn liveness(&self) -> &Liveness { - &self.liveness - } - - /// Allocate registers in `func`. - /// - /// After register allocation, all values in `func` have been assigned to a register or stack - /// location that is consistent with instruction encoding constraints. - pub fn run( - &mut self, - isa: &dyn TargetIsa, - func: &mut Function, - cfg: &mut ControlFlowGraph, - domtree: &mut DominatorTree, - ) -> CodegenResult<()> { - let _tt = timing::regalloc(); - debug_assert!(domtree.is_valid()); - - let mut errors = VerifierErrors::default(); - - // `Liveness` and `Coloring` are self-clearing. - self.virtregs.clear(); - - // Tracker state (dominator live sets) is actually reused between the spilling and coloring - // phases. - self.tracker.clear(); - - // Pass: Split branches, add space where to add copy & regmove instructions. - branch_splitting::run(isa, func, cfg, domtree, &mut self.topo); - - // Pass: Liveness analysis. - self.liveness.compute(isa, func, cfg); - - if isa.flags().enable_verifier() { - let ok = verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok(); - - if !ok { - return Err(errors.into()); - } - } - - // Pass: Coalesce and create Conventional SSA form. - self.coalescing.conventional_ssa( - isa, - func, - cfg, - domtree, - &mut self.liveness, - &mut self.virtregs, - ); - - if isa.flags().enable_verifier() { - let ok = verify_context(func, cfg, domtree, isa, &mut errors).is_ok() - && verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok() - && verify_cssa( - func, - cfg, - domtree, - &self.liveness, - &self.virtregs, - &mut errors, - ) - .is_ok(); - - if !ok { - return Err(errors.into()); - } - } - - // Pass: Spilling. - self.spilling.run( - isa, - func, - domtree, - &mut self.liveness, - &self.virtregs, - &mut self.topo, - &mut self.tracker, - ); - - if isa.flags().enable_verifier() { - let ok = verify_context(func, cfg, domtree, isa, &mut errors).is_ok() - && verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok() - && verify_cssa( - func, - cfg, - domtree, - &self.liveness, - &self.virtregs, - &mut errors, - ) - .is_ok(); - - if !ok { - return Err(errors.into()); - } - } - - // Pass: Reload. - self.reload.run( - isa, - func, - domtree, - &mut self.liveness, - &mut self.topo, - &mut self.tracker, - ); - - if isa.flags().enable_verifier() { - let ok = verify_context(func, cfg, domtree, isa, &mut errors).is_ok() - && verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok() - && verify_cssa( - func, - cfg, - domtree, - &self.liveness, - &self.virtregs, - &mut errors, - ) - .is_ok(); - - if !ok { - return Err(errors.into()); - } - } - - // Pass: Coloring. - self.coloring.run( - isa, - func, - cfg, - domtree, - &mut self.liveness, - &mut self.tracker, - ); - - // If there are any reference types used, encode safepoints and emit - // stack maps. - // - // This function runs after register allocation has taken place, meaning - // values have locations assigned already, which is necessary for - // creating the stack maps. - let safepoints_enabled = isa.flags().enable_safepoints(); - for val in func.dfg.values() { - let ty = func.dfg.value_type(val); - if ty.lane_type().is_ref() { - assert!( - safepoints_enabled, - "reference types were found but safepoints were not enabled" - ); - emit_stack_maps(func, domtree, &self.liveness, &mut self.tracker, isa); - break; - } - } - - if isa.flags().enable_verifier() { - let ok = verify_context(func, cfg, domtree, isa, &mut errors).is_ok() - && verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok() - && verify_locations(isa, func, cfg, Some(&self.liveness), &mut errors).is_ok() - && verify_cssa( - func, - cfg, - domtree, - &self.liveness, - &self.virtregs, - &mut errors, - ) - .is_ok(); - - if !ok { - return Err(errors.into()); - } - } - - // Even if we arrive here, (non-fatal) errors might have been reported, so we - // must make sure absolutely nothing is wrong - if errors.is_empty() { - Ok(()) - } else { - Err(errors.into()) - } - } -} diff --git a/cranelift/codegen/src/regalloc/diversion.rs b/cranelift/codegen/src/regalloc/diversion.rs deleted file mode 100644 index ba91df14a5..0000000000 --- a/cranelift/codegen/src/regalloc/diversion.rs +++ /dev/null @@ -1,322 +0,0 @@ -//! Register diversions. -//! -//! Normally, a value is assigned to a single register or stack location by the register allocator. -//! Sometimes, it is necessary to move register values to a different register in order to satisfy -//! instruction constraints. -//! -//! These register diversions are local to a block. No values can be diverted when entering a new -//! block. - -use crate::fx::FxHashMap; -use crate::hash_map::{Entry, Iter}; -use crate::ir::{Block, StackSlot, Value, ValueLoc, ValueLocations}; -use crate::ir::{InstructionData, Opcode}; -use crate::isa::{RegInfo, RegUnit}; -use core::fmt; -use cranelift_entity::{SparseMap, SparseMapValue}; - -#[cfg(feature = "enable-serde")] -use serde::{Deserialize, Serialize}; - -/// A diversion of a value from its original location to a new register or stack location. -/// -/// In IR, a diversion is represented by a `regmove` instruction, possibly a chain of them for the -/// same value. -/// -/// When tracking diversions, the `from` field is the original assigned value location, and `to` is -/// the current one. -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] -pub struct Diversion { - /// The original value location. - pub from: ValueLoc, - /// The current value location. - pub to: ValueLoc, -} - -impl Diversion { - /// Make a new diversion. - pub fn new(from: ValueLoc, to: ValueLoc) -> Self { - debug_assert!(from.is_assigned() && to.is_assigned()); - Self { from, to } - } -} - -/// Keep track of diversions in a block. -#[derive(Clone)] -#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] -pub struct RegDiversions { - current: FxHashMap, -} - -/// Keep track of diversions at the entry of block. -#[derive(Clone)] -#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] -struct EntryRegDiversionsValue { - key: Block, - divert: RegDiversions, -} - -/// Map block to their matching RegDiversions at basic blocks entry. -#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] -pub struct EntryRegDiversions { - map: SparseMap, -} - -impl RegDiversions { - /// Create a new empty diversion tracker. - pub fn new() -> Self { - Self { - current: FxHashMap::default(), - } - } - - /// Clear the content of the diversions, to reset the state of the compiler. - pub fn clear(&mut self) { - self.current.clear() - } - - /// Are there any diversions? - pub fn is_empty(&self) -> bool { - self.current.is_empty() - } - - /// Get the current diversion of `value`, if any. - pub fn diversion(&self, value: Value) -> Option<&Diversion> { - self.current.get(&value) - } - - /// Get all current diversions. - pub fn iter(&self) -> Iter<'_, Value, Diversion> { - self.current.iter() - } - - /// Get the current location for `value`. Fall back to the assignment map for non-diverted - /// values - pub fn get(&self, value: Value, locations: &ValueLocations) -> ValueLoc { - match self.diversion(value) { - Some(d) => d.to, - None => locations[value], - } - } - - /// Get the current register location for `value`, or panic if `value` isn't in a register. - pub fn reg(&self, value: Value, locations: &ValueLocations) -> RegUnit { - self.get(value, locations).unwrap_reg() - } - - /// Get the current stack location for `value`, or panic if `value` isn't in a stack slot. - pub fn stack(&self, value: Value, locations: &ValueLocations) -> StackSlot { - self.get(value, locations).unwrap_stack() - } - - /// Record any kind of move. - /// - /// The `from` location must match an existing `to` location, if any. - fn divert(&mut self, value: Value, from: ValueLoc, to: ValueLoc) { - debug_assert!(from.is_assigned() && to.is_assigned()); - match self.current.entry(value) { - Entry::Occupied(mut e) => { - // TODO: non-lexical lifetimes should allow removal of the scope and early return. - { - let d = e.get_mut(); - debug_assert_eq!(d.to, from, "Bad regmove chain for {}", value); - if d.from != to { - d.to = to; - return; - } - } - e.remove(); - } - Entry::Vacant(e) => { - e.insert(Diversion::new(from, to)); - } - } - } - - /// Record a register -> register move. - pub fn regmove(&mut self, value: Value, from: RegUnit, to: RegUnit) { - self.divert(value, ValueLoc::Reg(from), ValueLoc::Reg(to)); - } - - /// Record a register -> stack move. - pub fn regspill(&mut self, value: Value, from: RegUnit, to: StackSlot) { - self.divert(value, ValueLoc::Reg(from), ValueLoc::Stack(to)); - } - - /// Record a stack -> register move. - pub fn regfill(&mut self, value: Value, from: StackSlot, to: RegUnit) { - self.divert(value, ValueLoc::Stack(from), ValueLoc::Reg(to)); - } - - /// Apply the effect of `inst`. - /// - /// If `inst` is a `regmove`, `regfill`, or `regspill` instruction, update the diversions to - /// match. - pub fn apply(&mut self, inst: &InstructionData) { - match *inst { - InstructionData::RegMove { - opcode: Opcode::Regmove, - arg, - src, - dst, - } => self.regmove(arg, src, dst), - InstructionData::RegSpill { - opcode: Opcode::Regspill, - arg, - src, - dst, - } => self.regspill(arg, src, dst), - InstructionData::RegFill { - opcode: Opcode::Regfill, - arg, - src, - dst, - } => self.regfill(arg, src, dst), - _ => {} - } - } - - /// Drop any recorded move for `value`. - /// - /// Returns the `to` location of the removed diversion. - pub fn remove(&mut self, value: Value) -> Option { - self.current.remove(&value).map(|d| d.to) - } - - /// Resets the state of the current diversions to the recorded diversions at the entry of the - /// given `block`. The recoded diversions is available after coloring on `func.entry_diversions` - /// field. - pub fn at_block(&mut self, entry_diversions: &EntryRegDiversions, block: Block) { - self.clear(); - if let Some(entry_divert) = entry_diversions.map.get(block) { - let iter = entry_divert.divert.current.iter(); - self.current.extend(iter); - } - } - - /// Copy the current state of the diversions, and save it for the entry of the `block` given as - /// argument. - /// - /// Note: This function can only be called once on a `Block` with a given `entry_diversions` - /// argument, otherwise it would panic. - pub fn save_for_block(&mut self, entry_diversions: &mut EntryRegDiversions, target: Block) { - // No need to save anything if there is no diversions to be recorded. - if self.is_empty() { - return; - } - debug_assert!(!entry_diversions.map.contains_key(target)); - let iter = self.current.iter(); - let mut entry_divert = Self::new(); - entry_divert.current.extend(iter); - entry_diversions.map.insert(EntryRegDiversionsValue { - key: target, - divert: entry_divert, - }); - } - - /// Check that the recorded entry for a given `block` matches what is recorded in the - /// `entry_diversions`. - pub fn check_block_entry(&self, entry_diversions: &EntryRegDiversions, target: Block) -> bool { - let entry_divert = match entry_diversions.map.get(target) { - Some(entry_divert) => entry_divert, - None => return self.is_empty(), - }; - - if entry_divert.divert.current.len() != self.current.len() { - return false; - } - - for (val, _) in entry_divert.divert.current.iter() { - if !self.current.contains_key(val) { - return false; - } - } - true - } - - /// Return an object that can display the diversions. - pub fn display<'a, R: Into>>(&'a self, regs: R) -> DisplayDiversions<'a> { - DisplayDiversions(&self, regs.into()) - } -} - -impl EntryRegDiversions { - /// Create a new empty entry diversion, to associate diversions to each block entry. - pub fn new() -> Self { - Self { - map: SparseMap::new(), - } - } - - pub fn clear(&mut self) { - self.map.clear(); - } -} - -impl Clone for EntryRegDiversions { - /// The Clone trait is required by `ir::Function`. - fn clone(&self) -> Self { - let mut tmp = Self::new(); - for v in self.map.values() { - tmp.map.insert(v.clone()); - } - tmp - } -} - -/// Implement `SparseMapValue`, as required to make use of a `SparseMap` for mapping the entry -/// diversions for each block. -impl SparseMapValue for EntryRegDiversionsValue { - fn key(&self) -> Block { - self.key - } -} - -/// Object that displays register diversions. -pub struct DisplayDiversions<'a>(&'a RegDiversions, Option<&'a RegInfo>); - -impl<'a> fmt::Display for DisplayDiversions<'a> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{{")?; - for (value, div) in self.0.current.iter() { - write!( - f, - " {}: {} -> {}", - value, - div.from.display(self.1), - div.to.display(self.1) - )? - } - write!(f, " }}") - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::entity::EntityRef; - use crate::ir::Value; - - #[test] - fn inserts() { - let mut divs = RegDiversions::new(); - let v1 = Value::new(1); - let v2 = Value::new(2); - - divs.regmove(v1, 10, 12); - assert_eq!( - divs.diversion(v1), - Some(&Diversion { - from: ValueLoc::Reg(10), - to: ValueLoc::Reg(12), - }) - ); - assert_eq!(divs.diversion(v2), None); - - divs.regmove(v1, 12, 11); - assert_eq!(divs.diversion(v1).unwrap().to, ValueLoc::Reg(11)); - divs.regmove(v1, 11, 10); - assert_eq!(divs.diversion(v1), None); - } -} diff --git a/cranelift/codegen/src/regalloc/live_value_tracker.rs b/cranelift/codegen/src/regalloc/live_value_tracker.rs deleted file mode 100644 index ae33a15f4d..0000000000 --- a/cranelift/codegen/src/regalloc/live_value_tracker.rs +++ /dev/null @@ -1,344 +0,0 @@ -//! Track which values are live in a block with instruction granularity. -//! -//! The `LiveValueTracker` keeps track of the set of live SSA values at each instruction in a block. -//! The sets of live values are computed on the fly as the tracker is moved from instruction to -//! instruction, starting at the block header. - -use crate::dominator_tree::DominatorTree; -use crate::entity::{EntityList, ListPool}; -use crate::fx::FxHashMap; -use crate::ir::{Block, DataFlowGraph, ExpandedProgramPoint, Inst, Layout, Value}; -use crate::partition_slice::partition_slice; -use crate::regalloc::affinity::Affinity; -use crate::regalloc::liveness::Liveness; -use crate::regalloc::liverange::LiveRange; -use alloc::vec::Vec; - -type ValueList = EntityList; - -/// Compute and track live values throughout a block. -pub struct LiveValueTracker { - /// The set of values that are live at the current program point. - live: LiveValueVec, - - /// Saved set of live values for every jump and branch that can potentially be an immediate - /// dominator of a block. - /// - /// This is the set of values that are live *before* the branch. - idom_sets: FxHashMap, - - /// Memory pool for the live sets. - idom_pool: ListPool, -} - -/// Information about a value that is live at the current program point. -#[derive(Debug)] -pub struct LiveValue { - /// The live value. - pub value: Value, - - /// The local ending point of the live range in the current block, as returned by - /// `LiveRange::def_local_end()` or `LiveRange::livein_local_end()`. - pub endpoint: Inst, - - /// The affinity of the value as represented in its `LiveRange`. - /// - /// This value is simply a copy of the affinity stored in the live range. We copy it because - /// almost all users of `LiveValue` need to look at it. - pub affinity: Affinity, - - /// The live range for this value never leaves its block. - pub is_local: bool, - - /// This value is dead - the live range ends immediately. - pub is_dead: bool, -} - -struct LiveValueVec { - /// The set of values that are live at the current program point. - values: Vec, - - /// How many values at the front of `values` are known to be live after `inst`? - /// - /// This is used to pass a much smaller slice to `partition_slice` when its called a second - /// time for the same instruction. - live_prefix: Option<(Inst, usize)>, -} - -impl LiveValueVec { - fn new() -> Self { - Self { - values: Vec::new(), - live_prefix: None, - } - } - - /// Add a new live value to `values`. Copy some properties from `lr`. - fn push(&mut self, value: Value, endpoint: Inst, lr: &LiveRange) { - self.values.push(LiveValue { - value, - endpoint, - affinity: lr.affinity, - is_local: lr.is_local(), - is_dead: lr.is_dead(), - }); - } - - /// Remove all elements. - fn clear(&mut self) { - self.values.clear(); - self.live_prefix = None; - } - - /// Make sure that the values killed by `next_inst` are moved to the end of the `values` - /// vector. - /// - /// Returns the number of values that will be live after `next_inst`. - fn live_after(&mut self, next_inst: Inst) -> usize { - // How many values at the front of the vector are already known to survive `next_inst`? - // We don't need to pass this prefix to `partition_slice()` - let keep = match self.live_prefix { - Some((i, prefix)) if i == next_inst => prefix, - _ => 0, - }; - - // Move the remaining surviving values to the front partition of the vector. - let prefix = keep + partition_slice(&mut self.values[keep..], |v| v.endpoint != next_inst); - - // Remember the new prefix length in case we get called again for the same `next_inst`. - self.live_prefix = Some((next_inst, prefix)); - prefix - } - - /// Remove the values killed by `next_inst`. - fn remove_kill_values(&mut self, next_inst: Inst) { - let keep = self.live_after(next_inst); - self.values.truncate(keep); - } - - /// Remove any dead values. - fn remove_dead_values(&mut self) { - self.values.retain(|v| !v.is_dead); - self.live_prefix = None; - } -} - -impl LiveValueTracker { - /// Create a new blank tracker. - pub fn new() -> Self { - Self { - live: LiveValueVec::new(), - idom_sets: FxHashMap(), - idom_pool: ListPool::new(), - } - } - - /// Clear all cached information. - pub fn clear(&mut self) { - self.live.clear(); - self.idom_sets.clear(); - self.idom_pool.clear(); - } - - /// Get the set of currently live values. - /// - /// Between calls to `process_inst()` and `drop_dead()`, this includes both values killed and - /// defined by the current instruction. - pub fn live(&self) -> &[LiveValue] { - &self.live.values - } - - /// Get a mutable set of currently live values. - /// - /// Use with care and don't move entries around. - pub fn live_mut(&mut self) -> &mut [LiveValue] { - &mut self.live.values - } - - /// Move the current position to the top of `block`. - /// - /// This depends on the stored live value set at `block`'s immediate dominator, so that must have - /// been visited first. - /// - /// Returns `(liveins, args)` as a pair of slices. The first slice is the set of live-in values - /// from the immediate dominator. The second slice is the set of `block` parameters. - /// - /// Dead parameters with no uses are included in `args`. Call `drop_dead_args()` to remove them. - pub fn block_top( - &mut self, - block: Block, - dfg: &DataFlowGraph, - liveness: &Liveness, - layout: &Layout, - domtree: &DominatorTree, - ) -> (&[LiveValue], &[LiveValue]) { - // Start over, compute the set of live values at the top of the block from two sources: - // - // 1. Values that were live before `block`'s immediate dominator, filtered for those that are - // actually live-in. - // 2. Arguments to `block` that are not dead. - // - self.live.clear(); - - // Compute the live-in values. Start by filtering the set of values that were live before - // the immediate dominator. Just use the empty set if there's no immediate dominator (i.e., - // the entry block or an unreachable block). - if let Some(idom) = domtree.idom(block) { - // If the immediate dominator exits, we must have a stored list for it. This is a - // requirement to the order blocks are visited: All dominators must have been processed - // before the current block. - let idom_live_list = self - .idom_sets - .get(&idom) - .expect("No stored live set for dominator"); - // Get just the values that are live-in to `block`. - for &value in idom_live_list.as_slice(&self.idom_pool) { - let lr = liveness - .get(value) - .expect("Immediate dominator value has no live range"); - - // Check if this value is live-in here. - if let Some(endpoint) = lr.livein_local_end(block, layout) { - self.live.push(value, endpoint, lr); - } - } - } - - // Now add all the live parameters to `block`. - let first_arg = self.live.values.len(); - for &value in dfg.block_params(block) { - let lr = &liveness[value]; - debug_assert_eq!(lr.def(), block.into()); - match lr.def_local_end().into() { - ExpandedProgramPoint::Inst(endpoint) => { - self.live.push(value, endpoint, lr); - } - ExpandedProgramPoint::Block(local_block) => { - // This is a dead block parameter which is not even live into the first - // instruction in the block. - debug_assert_eq!( - local_block, block, - "block parameter live range ends at wrong block header" - ); - // Give this value a fake endpoint that is the first instruction in the block. - // We expect it to be removed by calling `drop_dead_args()`. - self.live - .push(value, layout.first_inst(block).expect("Empty block"), lr); - } - } - } - - self.live.values.split_at(first_arg) - } - - /// Prepare to move past `inst`. - /// - /// Determine the set of already live values that are killed by `inst`, and add the new defined - /// values to the tracked set. - /// - /// Returns `(throughs, kills, defs)` as a tuple of slices: - /// - /// 1. The `throughs` slice is the set of live-through values that are neither defined nor - /// killed by the instruction. - /// 2. The `kills` slice is the set of values that were live before the instruction and are - /// killed at the instruction. This does not include dead defs. - /// 3. The `defs` slice is guaranteed to be in the same order as `inst`'s results, and includes - /// dead defines. - /// - /// The order of `throughs` and `kills` is arbitrary. - /// - /// The `drop_dead()` method must be called next to actually remove the dead values from the - /// tracked set after the two returned slices are no longer needed. - pub fn process_inst( - &mut self, - inst: Inst, - dfg: &DataFlowGraph, - liveness: &Liveness, - ) -> (&[LiveValue], &[LiveValue], &[LiveValue]) { - // Save a copy of the live values before any branches or jumps that could be somebody's - // immediate dominator. - if dfg[inst].opcode().is_branch() { - self.save_idom_live_set(inst); - } - - // Move killed values to the end of the vector. - // Don't remove them yet, `drop_dead()` will do that. - let first_kill = self.live.live_after(inst); - - // Add the values defined by `inst`. - let first_def = self.live.values.len(); - for &value in dfg.inst_results(inst) { - let lr = &liveness[value]; - debug_assert_eq!(lr.def(), inst.into()); - match lr.def_local_end().into() { - ExpandedProgramPoint::Inst(endpoint) => { - self.live.push(value, endpoint, lr); - } - ExpandedProgramPoint::Block(block) => { - panic!("Instruction result live range can't end at {}", block); - } - } - } - - ( - &self.live.values[0..first_kill], - &self.live.values[first_kill..first_def], - &self.live.values[first_def..], - ) - } - - /// Prepare to move past a ghost instruction. - /// - /// This is like `process_inst`, except any defs are ignored. - /// - /// Returns `(throughs, kills)`. - pub fn process_ghost(&mut self, inst: Inst) -> (&[LiveValue], &[LiveValue]) { - let first_kill = self.live.live_after(inst); - self.live.values.as_slice().split_at(first_kill) - } - - /// Drop the values that are now dead after moving past `inst`. - /// - /// This removes both live values that were killed by `inst` and dead defines on `inst` itself. - /// - /// This must be called after `process_inst(inst)` and before proceeding to the next - /// instruction. - pub fn drop_dead(&mut self, inst: Inst) { - // Remove both live values that were killed by `inst` and dead defines from `inst`. - self.live.remove_kill_values(inst); - } - - /// Drop any values that are marked as `is_dead`. - /// - /// Use this after calling `block_top` to clean out dead block parameters. - pub fn drop_dead_params(&mut self) { - self.live.remove_dead_values(); - } - - /// Process new spills. - /// - /// Any values where `f` returns true are spilled and will be treated as if their affinity was - /// `Stack`. - pub fn process_spills(&mut self, mut f: F) - where - F: FnMut(Value) -> bool, - { - for lv in &mut self.live.values { - if f(lv.value) { - lv.affinity = Affinity::Stack; - } - } - } - - /// Save the current set of live values so it is associated with `idom`. - fn save_idom_live_set(&mut self, idom: Inst) { - let values = self.live.values.iter().map(|lv| lv.value); - let pool = &mut self.idom_pool; - // If there already is a set saved for `idom`, just keep it. - self.idom_sets.entry(idom).or_insert_with(|| { - let mut list = ValueList::default(); - list.extend(values, pool); - list - }); - } -} diff --git a/cranelift/codegen/src/regalloc/liveness.rs b/cranelift/codegen/src/regalloc/liveness.rs deleted file mode 100644 index 2e9c5015bd..0000000000 --- a/cranelift/codegen/src/regalloc/liveness.rs +++ /dev/null @@ -1,443 +0,0 @@ -//! Liveness analysis for SSA values. -//! -//! This module computes the live range of all the SSA values in a function and produces a -//! `LiveRange` instance for each. -//! -//! -//! # Liveness consumers -//! -//! The primary consumer of the liveness analysis is the SSA coloring pass which goes through each -//! block and assigns a register to the defined values. This algorithm needs to maintain a set of the -//! currently live values as it is iterating down the instructions in the block. It asks the -//! following questions: -//! -//! - What is the set of live values at the entry to the block? -//! - When moving past a use of a value, is that value still alive in the block, or was that the last -//! use? -//! - When moving past a branch, which of the live values are still live below the branch? -//! -//! The set of `LiveRange` instances can answer these questions through their `def_local_end` and -//! `livein_local_end` queries. The coloring algorithm visits blocks in a topological order of the -//! dominator tree, so it can compute the set of live values at the beginning of a block by starting -//! from the set of live values at the dominating branch instruction and filtering it with -//! `livein_local_end`. These sets do not need to be stored in the liveness analysis. -//! -//! The secondary consumer of the liveness analysis is the spilling pass which needs to count the -//! number of live values at every program point and insert spill code until the number of -//! registers needed is small enough. -//! -//! -//! # Alternative algorithms -//! -//! A number of different liveness analysis algorithms exist, so it is worthwhile to look at a few -//! alternatives. -//! -//! ## Data-flow equations -//! -//! The classic *live variables analysis* that you will find in all compiler books from the -//! previous century does not depend on SSA form. It is typically implemented by iteratively -//! solving data-flow equations on bit-vectors of variables. The result is a live-out bit-vector of -//! variables for every basic block in the program. -//! -//! This algorithm has some disadvantages that makes us look elsewhere: -//! -//! - Quadratic memory use. We need a bit per variable per basic block in the function. -//! - Dense representation of sparse data. In practice, the majority of SSA values never leave -//! their basic block, and those that do spa basic blocks rarely span a large number of basic -//! blocks. This makes the data stored in the bitvectors quite sparse. -//! - Traditionally, the data-flow equations were solved for real program *variables* which does -//! not include temporaries used in evaluating expressions. We have an SSA form program which -//! blurs the distinction between temporaries and variables. This makes the quadratic memory -//! problem worse because there are many more SSA values than there was variables in the original -//! program, and we don't know a priori which SSA values leave their basic block. -//! - Missing last-use information. For values that are not live-out of a basic block, we would -//! need to store information about the last use in the block somewhere. LLVM stores this -//! information as a 'kill bit' on the last use in the IR. Maintaining these kill bits has been a -//! source of problems for LLVM's register allocator. -//! -//! Data-flow equations can detect when a variable is used uninitialized, and they can handle -//! multiple definitions of the same variable. We don't need this generality since we already have -//! a program in SSA form. -//! -//! ## LLVM's liveness analysis -//! -//! LLVM's register allocator computes liveness per *virtual register*, where a virtual register is -//! a disjoint union of related SSA values that should be assigned to the same physical register. -//! It uses a compact data structure very similar to our `LiveRange`. The important difference is -//! that Cranelift's `LiveRange` only describes a single SSA value, while LLVM's `LiveInterval` -//! describes the live range of a virtual register *and* which one of the related SSA values is -//! live at any given program point. -//! -//! LLVM computes the live range of each virtual register independently by using the use-def chains -//! that are baked into its IR. The algorithm for a single virtual register is: -//! -//! 1. Initialize the live range with a single-instruction snippet of liveness at each def, using -//! the def-chain. This does not include any phi-values. -//! 2. Go through the virtual register's use chain and perform the following steps at each use: -//! 3. Perform an exhaustive depth-first traversal up the CFG from the use. Look for basic blocks -//! that already contain some liveness and extend the last live SSA value in the block to be -//! live-out. Also build a list of new basic blocks where the register needs to be live-in. -//! 4. Iteratively propagate live-out SSA values to the new live-in blocks. This may require new -//! PHI values to be created when different SSA values can reach the same block. -//! -//! The iterative SSA form reconstruction can be skipped if the depth-first search only encountered -//! one SSA value. -//! -//! This algorithm has some advantages compared to the data-flow equations: -//! -//! - The live ranges of local virtual registers are computed very quickly without ever traversing -//! the CFG. The memory needed to store these live ranges is independent of the number of basic -//! blocks in the program. -//! - The time to compute the live range of a global virtual register is proportional to the number -//! of basic blocks covered. Many virtual registers only cover a few blocks, even in very large -//! functions. -//! - A single live range can be recomputed after making modifications to the IR. No global -//! algorithm is necessary. This feature depends on having use-def chains for virtual registers -//! which Cranelift doesn't. -//! -//! Cranelift uses a very similar data structures and algorithms to LLVM, with the important -//! difference that live ranges are computed per SSA value instead of per virtual register, and the -//! uses in Cranelift IR refers to SSA values instead of virtual registers. This means that -//! Cranelift can skip the last step of reconstructing SSA form for the virtual register uses. -//! -//! ## Fast Liveness Checking for SSA-Form Programs -//! -//! A liveness analysis that is often brought up in the context of SSA-based register allocation -//! was presented at CGO 2008: -//! -//! > Boissinot, B., Hack, S., Grund, D., de Dinechin, B. D., & Rastello, F. (2008). *Fast Liveness -//! Checking for SSA-Form Programs.* CGO. -//! -//! This analysis uses a global pre-computation that only depends on the CFG of the function. It -//! then allows liveness queries for any (value, program point) pair. Each query traverses the use -//! chain of the value and performs lookups in the precomputed bit-vectors. -//! -//! I did not seriously consider this analysis for Cranelift because: -//! -//! - It depends critically on use chains which Cranelift doesn't have. -//! - Popular variables like the `this` pointer in a C++ method can have very large use chains. -//! Traversing such a long use chain on every liveness lookup has the potential for some nasty -//! quadratic behavior in unfortunate cases. -//! - It says "fast" in the title, but the paper only claims to be 16% faster than a data-flow -//! based approach, which isn't that impressive. -//! -//! Nevertheless, the property of only depending in the CFG structure is very useful. If Cranelift -//! gains use chains, this approach would be worth a proper evaluation. -//! -//! -//! # Cranelift's liveness analysis -//! -//! The algorithm implemented in this module is similar to LLVM's with these differences: -//! -//! - The `LiveRange` data structure describes the liveness of a single SSA value, not a virtual -//! register. -//! - Instructions in Cranelift IR contains references to SSA values, not virtual registers. -//! - All live ranges are computed in one traversal of the program. Cranelift doesn't have use -//! chains, so it is not possible to compute the live range for a single SSA value independently. -//! -//! The liveness computation visits all instructions in the program. The order is not important for -//! the algorithm to be correct. At each instruction, the used values are examined. -//! -//! - The first time a value is encountered, its live range is constructed as a dead live range -//! containing only the defining program point. -//! - The local interval of the value's live range is extended so it reaches the use. This may -//! require creating a new live-in local interval for the block. -//! - If the live range became live-in to the block, add the block to a work-list. -//! - While the work-list is non-empty pop a live-in block and repeat the two steps above, using each -//! of the live-in block's CFG predecessor instructions as a 'use'. -//! -//! The effect of this algorithm is to extend the live range of each to reach uses as they are -//! visited. No data about each value beyond the live range is needed between visiting uses, so -//! nothing is lost by computing the live range of all values simultaneously. -//! -//! ## Cache efficiency of Cranelift vs LLVM -//! -//! Since LLVM computes the complete live range of a virtual register in one go, it can keep the -//! whole `LiveInterval` for the register in L1 cache. Since it is visiting the instructions in use -//! chain order, some cache thrashing can occur as a result of pulling instructions into cache -//! somewhat chaotically. -//! -//! Cranelift uses a transposed algorithm, visiting instructions in order. This means that each -//! instruction is brought into cache only once, and it is likely that the other instructions on -//! the same cache line will be visited before the line is evicted. -//! -//! Cranelift's problem is that the `LiveRange` structs are visited many times and not always -//! regularly. We should strive to make the `LiveRange` struct as small as possible such that -//! multiple related values can live on the same cache line. -//! -//! - Local values should fit in a 16-byte `LiveRange` struct or smaller. The current -//! implementation contains a 24-byte `Vec` object and a redundant `value` member pushing the -//! size to 32 bytes. -//! - Related values should be stored on the same cache line. The current sparse set implementation -//! does a decent job of that. -//! - For global values, the list of live-in intervals is very likely to fit on a single cache -//! line. These lists are very likely to be found in L2 cache at least. -//! -//! There is some room for improvement. - -use crate::entity::SparseMap; -use crate::flowgraph::{BlockPredecessor, ControlFlowGraph}; -use crate::ir::dfg::ValueDef; -use crate::ir::{Block, Function, Inst, Layout, ProgramPoint, Value}; -use crate::isa::{EncInfo, OperandConstraint, TargetIsa}; -use crate::regalloc::affinity::Affinity; -use crate::regalloc::liverange::LiveRange; -use crate::timing; -use alloc::vec::Vec; -use core::mem; -use core::ops::Index; - -/// A set of live ranges, indexed by value number. -type LiveRangeSet = SparseMap; - -/// Get a mutable reference to the live range for `value`. -/// Create it if necessary. -fn get_or_create<'a>( - lrset: &'a mut LiveRangeSet, - value: Value, - isa: &dyn TargetIsa, - func: &Function, - encinfo: &EncInfo, -) -> &'a mut LiveRange { - // It would be better to use `get_mut()` here, but that leads to borrow checker fighting - // which can probably only be resolved by non-lexical lifetimes. - // https://github.com/rust-lang/rfcs/issues/811 - if lrset.get(value).is_none() { - // Create a live range for value. We need the program point that defines it. - let def; - let affinity; - match func.dfg.value_def(value) { - ValueDef::Result(inst, rnum) => { - def = inst.into(); - // Initialize the affinity from the defining instruction's result constraints. - // Don't do this for call return values which are always tied to a single register. - affinity = encinfo - .operand_constraints(func.encodings[inst]) - .and_then(|rc| rc.outs.get(rnum)) - .map(Affinity::new) - .or_else(|| { - // If this is a call, get the return value affinity. - func.dfg - .call_signature(inst) - .map(|sig| Affinity::abi(&func.dfg.signatures[sig].returns[rnum], isa)) - }) - .unwrap_or_default(); - } - ValueDef::Param(block, num) => { - def = block.into(); - if func.layout.entry_block() == Some(block) { - // The affinity for entry block parameters can be inferred from the function - // signature. - affinity = Affinity::abi(&func.signature.params[num], isa); - } else { - // Give normal block parameters a register affinity matching their type. - let rc = isa.regclass_for_abi_type(func.dfg.value_type(value)); - affinity = Affinity::Reg(rc.into()); - } - } - }; - lrset.insert(LiveRange::new(value, def, affinity)); - } - lrset.get_mut(value).unwrap() -} - -/// Extend the live range for `value` so it reaches `to` which must live in `block`. -fn extend_to_use( - lr: &mut LiveRange, - block: Block, - to: Inst, - worklist: &mut Vec, - func: &Function, - cfg: &ControlFlowGraph, -) { - // This is our scratch working space, and we'll leave it empty when we return. - debug_assert!(worklist.is_empty()); - - // Extend the range locally in `block`. - // If there already was a live interval in that block, we're done. - if lr.extend_in_block(block, to, &func.layout) { - worklist.push(block); - } - - // The work list contains those blocks where we have learned that the value needs to be - // live-in. - // - // This algorithm becomes a depth-first traversal up the CFG, enumerating all paths through the - // CFG from the existing live range to `block`. - // - // Extend the live range as we go. The live range itself also serves as a visited set since - // `extend_in_block` will never return true twice for the same block. - // - while let Some(livein) = worklist.pop() { - // We've learned that the value needs to be live-in to the `livein` block. - // Make sure it is also live at all predecessor branches to `livein`. - for BlockPredecessor { - block: pred, - inst: branch, - } in cfg.pred_iter(livein) - { - if lr.extend_in_block(pred, branch, &func.layout) { - // This predecessor block also became live-in. We need to process it later. - worklist.push(pred); - } - } - } -} - -/// Liveness analysis for a function. -/// -/// Compute a live range for every SSA value used in the function. -pub struct Liveness { - /// The live ranges that have been computed so far. - ranges: LiveRangeSet, - - /// Working space for the `extend_to_use` algorithm. - /// This vector is always empty, except for inside that function. - /// It lives here to avoid repeated allocation of scratch memory. - worklist: Vec, -} - -impl Liveness { - /// Create a new empty liveness analysis. - /// - /// The memory allocated for this analysis can be reused for multiple functions. Use the - /// `compute` method to actually runs the analysis for a function. - pub fn new() -> Self { - Self { - ranges: LiveRangeSet::new(), - worklist: Vec::new(), - } - } - - /// Current live ranges. - pub fn ranges(&self) -> &LiveRangeSet { - &self.ranges - } - - /// Clear all data structures in this liveness analysis. - pub fn clear(&mut self) { - self.ranges.clear(); - self.worklist.clear(); - } - - /// Get the live range for `value`, if it exists. - pub fn get(&self, value: Value) -> Option<&LiveRange> { - self.ranges.get(value) - } - - /// Create a new live range for `value`. - /// - /// The new live range will be defined at `def` with no extent, like a dead value. - /// - /// This asserts that `value` does not have an existing live range. - pub fn create_dead(&mut self, value: Value, def: PP, affinity: Affinity) - where - PP: Into, - { - let old = self - .ranges - .insert(LiveRange::new(value, def.into(), affinity)); - debug_assert!(old.is_none(), "{} already has a live range", value); - } - - /// Move the definition of `value` to `def`. - /// - /// The old and new def points must be in the same block, and before the end of the live range. - pub fn move_def_locally(&mut self, value: Value, def: PP) - where - PP: Into, - { - let lr = self.ranges.get_mut(value).expect("Value has no live range"); - lr.move_def_locally(def.into()); - } - - /// Locally extend the live range for `value` to reach `user`. - /// - /// It is assumed the `value` is already live before `user` in `block`. - /// - /// Returns a mutable reference to the value's affinity in case that also needs to be updated. - pub fn extend_locally( - &mut self, - value: Value, - block: Block, - user: Inst, - layout: &Layout, - ) -> &mut Affinity { - debug_assert_eq!(Some(block), layout.inst_block(user)); - let lr = self.ranges.get_mut(value).expect("Value has no live range"); - let livein = lr.extend_in_block(block, user, layout); - debug_assert!(!livein, "{} should already be live in {}", value, block); - &mut lr.affinity - } - - /// Change the affinity of `value` to `Stack` and return the previous affinity. - pub fn spill(&mut self, value: Value) -> Affinity { - let lr = self.ranges.get_mut(value).expect("Value has no live range"); - mem::replace(&mut lr.affinity, Affinity::Stack) - } - - /// Compute the live ranges of all SSA values used in `func`. - /// This clears out any existing analysis stored in this data structure. - pub fn compute(&mut self, isa: &dyn TargetIsa, func: &mut Function, cfg: &ControlFlowGraph) { - let _tt = timing::ra_liveness(); - self.ranges.clear(); - - // Get ISA data structures used for computing live range affinities. - let encinfo = isa.encoding_info(); - let reginfo = isa.register_info(); - - // The liveness computation needs to visit all uses, but the order doesn't matter. - // TODO: Perhaps this traversal of the function could be combined with a dead code - // elimination pass if we visit a post-order of the dominator tree? - for block in func.layout.blocks() { - // Make sure we have created live ranges for dead block parameters. - // TODO: If these parameters are really dead, we could remove them, except for the - // entry block which must match the function signature. - for &arg in func.dfg.block_params(block) { - get_or_create(&mut self.ranges, arg, isa, func, &encinfo); - } - - for inst in func.layout.block_insts(block) { - // Eliminate all value aliases, they would confuse the register allocator. - func.dfg.resolve_aliases_in_arguments(inst); - - // Make sure we have created live ranges for dead defs. - // TODO: When we implement DCE, we can use the absence of a live range to indicate - // an unused value. - for &def in func.dfg.inst_results(inst) { - get_or_create(&mut self.ranges, def, isa, func, &encinfo); - } - - // Iterator of constraints, one per value operand. - let encoding = func.encodings[inst]; - let operand_constraint_slice: &[OperandConstraint] = - encinfo.operand_constraints(encoding).map_or(&[], |c| c.ins); - let mut operand_constraints = operand_constraint_slice.iter(); - - for &arg in func.dfg.inst_args(inst) { - // Get the live range, create it as a dead range if necessary. - let lr = get_or_create(&mut self.ranges, arg, isa, func, &encinfo); - - // Extend the live range to reach this use. - extend_to_use(lr, block, inst, &mut self.worklist, func, cfg); - - // Apply operand constraint, ignoring any variable arguments after the fixed - // operands described by `operand_constraints`. Variable arguments are either - // block arguments or call/return ABI arguments. - if let Some(constraint) = operand_constraints.next() { - lr.affinity.merge(constraint, ®info); - } - } - } - } - } -} - -impl Index for Liveness { - type Output = LiveRange; - fn index(&self, index: Value) -> &LiveRange { - self.ranges - .get(index) - .unwrap_or_else(|| panic!("{} has no live range", index)) - } -} diff --git a/cranelift/codegen/src/regalloc/liverange.rs b/cranelift/codegen/src/regalloc/liverange.rs deleted file mode 100644 index 91cff53b03..0000000000 --- a/cranelift/codegen/src/regalloc/liverange.rs +++ /dev/null @@ -1,720 +0,0 @@ -//! Data structure representing the live range of an SSA value. -//! -//! Live ranges are tracked per SSA value, not per variable or virtual register. The live range of -//! an SSA value begins where it is defined and extends to all program points where the value is -//! still needed. -//! -//! # Local Live Ranges -//! -//! Inside a single basic block, the live range of a value is always an interval between -//! two program points (if the value is live in the block at all). The starting point is either: -//! -//! 1. The instruction that defines the value, or -//! 2. The block header, because the value is an argument to the block, or -//! 3. The block header, because the value is defined in another block and live-in to this one. -//! -//! The ending point of the local live range is the last of the following program points in the -//! block: -//! -//! 1. The last use in the block, where a *use* is an instruction that has the value as an argument. -//! 2. The last branch or jump instruction in the block that can reach a use. -//! 3. If the value has no uses anywhere (a *dead value*), the program point that defines it. -//! -//! Note that 2. includes loop back-edges to the same block. In general, if a value is defined -//! outside a loop and used inside the loop, it will be live in the entire loop. -//! -//! # Global Live Ranges -//! -//! Values that appear in more than one block have a *global live range* which can be seen as the -//! disjoint union of the per-block local intervals for all of the blocks where the value is live. -//! Together with a `ProgramOrder` which provides a linear ordering of the blocks, the global live -//! range becomes a linear sequence of disjoint intervals, at most one per block. -//! -//! In the special case of a dead value, the global live range is a single interval where the start -//! and end points are the same. The global live range of a value is never completely empty. -//! -//! # Register interference -//! -//! The register allocator uses live ranges to determine if values *interfere*, which means that -//! they can't be stored in the same register. Two live ranges interfere if and only if any of -//! their intervals overlap. -//! -//! If one live range ends at an instruction that defines another live range, those two live ranges -//! are not considered to interfere. This is because most ISAs allow instructions to reuse an input -//! register for an output value. If Cranelift gets support for inline assembly, we will need to -//! handle *early clobbers* which are output registers that are not allowed to alias any input -//! registers. -//! -//! If `i1 < i2 < i3` are program points, we have: -//! -//! - `i1-i2` and `i1-i3` interfere because the intervals overlap. -//! - `i1-i2` and `i2-i3` don't interfere. -//! - `i1-i3` and `i2-i2` do interfere because the dead def would clobber the register. -//! - `i1-i2` and `i2-i2` don't interfere. -//! - `i2-i3` and `i2-i2` do interfere. -//! -//! Because of this behavior around interval end points, live range interference is not completely -//! equivalent to mathematical intersection of open or half-open intervals. -//! -//! # Implementation notes -//! -//! A few notes about the implementation of the live intervals field `liveins`. This should not -//! concern someone only looking to use the public interface. -//! -//! ## Current representation -//! -//! Our current implementation uses a sorted array of compressed intervals, represented by their -//! boundaries (Block, Inst), sorted by Block. This is a simple data structure, enables coalescing of -//! intervals easily, and shows some nice performance behavior. See -//! for benchmarks against using a -//! bforest::Map. -//! -//! ## block ordering -//! -//! The relative order of blocks is used to maintain a sorted list of live-in intervals and to -//! coalesce adjacent live-in intervals when the prior interval covers the whole block. This doesn't -//! depend on any property of the program order, so alternative orderings are possible: -//! -//! 1. The block layout order. This is what we currently use. -//! 2. A topological order of the dominator tree. All the live-in intervals would come after the -//! def interval. -//! 3. A numerical order by block number. Performant because it doesn't need to indirect through the -//! `ProgramOrder` for comparisons. -//! -//! These orderings will cause small differences in coalescing opportunities, but all of them would -//! do a decent job of compressing a long live range. The numerical order might be preferable -//! because: -//! -//! - It has better performance because block numbers can be compared directly without any table -//! lookups. -//! - If block numbers are not reused, it is safe to allocate new blocks without getting spurious -//! live-in intervals from any coalesced representations that happen to cross a new block. -//! -//! For comparing instructions, the layout order is always what we want. -//! -//! ## Alternative representation -//! -//! Since a local live-in interval always begins at its block header, it is uniquely described by its -//! end point instruction alone. We can use the layout to look up the block containing the end point. -//! This means that a sorted `Vec` would be enough to represent the set of live-in intervals. -//! -//! Coalescing is an important compression technique because some live ranges can span thousands of -//! blocks. We can represent that by switching to a sorted `Vec` representation where -//! an `[Block, Inst]` pair represents a coalesced range, while an `Inst` entry without a preceding -//! `Block` entry represents a single live-in interval. -//! -//! This representation is more compact for a live range with many uncoalesced live-in intervals. -//! It is more complicated to work with, though, so it is probably not worth it. The performance -//! benefits of switching to a numerical block order only appears if the binary search is doing -//! block-block comparisons. -//! -//! A `BTreeMap` could have been used for the live-in intervals, but it doesn't provide -//! the necessary API to make coalescing easy, nor does it optimize for our types' sizes. -//! -//! Even the specialized `bforest::Map` implementation is slower than a plain sorted -//! array, see for details. - -use crate::entity::SparseMapValue; -use crate::ir::{Block, ExpandedProgramPoint, Inst, Layout, ProgramOrder, ProgramPoint, Value}; -use crate::regalloc::affinity::Affinity; -use core::cmp::Ordering; -use core::marker::PhantomData; -use smallvec::SmallVec; - -/// Global live range of a single SSA value. -/// -/// As [explained in the module documentation](index.html#local-live-ranges), the live range of an -/// SSA value is the disjoint union of a set of intervals, each local to a single block, and with at -/// most one interval per block. We further distinguish between: -/// -/// 1. The *def interval* is the local interval in the block where the value is defined, and -/// 2. The *live-in intervals* are the local intervals in the remaining blocks. -/// -/// A live-in interval always begins at the block header, while the def interval can begin at the -/// defining instruction, or at the block header for a block argument value. -/// -/// All values have a def interval, but a large proportion of values don't have any live-in -/// intervals. These are called *local live ranges*. -/// -/// # Program order requirements -/// -/// The internal representation of a `LiveRange` depends on a consistent `ProgramOrder` both for -/// ordering instructions inside a block *and* for ordering blocks. The methods that depend on the -/// ordering take an explicit `ProgramOrder` object, and it is the caller's responsibility to -/// ensure that the provided ordering is consistent between calls. -/// -/// In particular, changing the order of blocks or inserting new blocks will invalidate live ranges. -/// -/// Inserting new instructions in the layout is safe, but removing instructions is not. Besides the -/// instructions using or defining their value, `LiveRange` structs can contain references to -/// branch and jump instructions. -pub type LiveRange = GenericLiveRange; - -// See comment of liveins below. -pub struct Interval { - begin: Block, - end: Inst, -} - -/// Generic live range implementation. -/// -/// The intended generic parameter is `PO=Layout`, but tests are simpler with a mock order. -/// Use `LiveRange` instead of using this generic directly. -pub struct GenericLiveRange { - /// The value described by this live range. - /// This member can't be modified in case the live range is stored in a `SparseMap`. - value: Value, - - /// The preferred register allocation for this value. - pub affinity: Affinity, - - /// The instruction or block header where this value is defined. - def_begin: ProgramPoint, - - /// The end point of the def interval. This must always belong to the same block as `def_begin`. - /// - /// We always have `def_begin <= def_end` with equality implying a dead def live range with no - /// uses. - def_end: ProgramPoint, - - /// Additional live-in intervals sorted in program order. - /// - /// This vector is empty for most values which are only used in one block. - /// - /// An entry `block -> inst` means that the live range is live-in to `block`, continuing up to - /// `inst` which may belong to a later block in the program order. - /// - /// The entries are non-overlapping, and none of them overlap the block where the value is - /// defined. - liveins: SmallVec<[Interval; 2]>, - - po: PhantomData<*const PO>, -} - -/// A simple helper macro to make comparisons more natural to read. -macro_rules! cmp { - ($order:ident, $a:ident > $b:expr) => { - $order.cmp($a, $b) == Ordering::Greater - }; - ($order:ident, $a:ident >= $b:expr) => { - $order.cmp($a, $b) != Ordering::Less - }; - ($order:ident, $a:ident < $b:expr) => { - $order.cmp($a, $b) == Ordering::Less - }; - ($order:ident, $a:ident <= $b:expr) => { - $order.cmp($a, $b) != Ordering::Greater - }; -} - -impl GenericLiveRange { - /// Create a new live range for `value` defined at `def`. - /// - /// The live range will be created as dead, but it can be extended with `extend_in_block()`. - pub fn new(value: Value, def: ProgramPoint, affinity: Affinity) -> Self { - Self { - value, - affinity, - def_begin: def, - def_end: def, - liveins: SmallVec::new(), - po: PhantomData, - } - } - - /// Finds an entry in the compressed set of live-in intervals that contains `block`, or return - /// the position where to insert such a new entry. - fn lookup_entry_containing_block(&self, block: Block, order: &PO) -> Result { - self.liveins - .binary_search_by(|interval| order.cmp(interval.begin, block)) - .or_else(|n| { - // The previous interval's end might cover the searched block. - if n > 0 && cmp!(order, block <= self.liveins[n - 1].end) { - Ok(n - 1) - } else { - Err(n) - } - }) - } - - /// Extend the local interval for `block` so it reaches `to` which must belong to `block`. - /// Create a live-in interval if necessary. - /// - /// If the live range already has a local interval in `block`, extend its end point so it - /// includes `to`, and return false. - /// - /// If the live range did not previously have a local interval in `block`, add one so the value - /// is live-in to `block`, extending to `to`. Return true. - /// - /// The return value can be used to detect if we just learned that the value is live-in to - /// `block`. This can trigger recursive extensions in `block`'s CFG predecessor blocks. - pub fn extend_in_block(&mut self, block: Block, inst: Inst, order: &PO) -> bool { - // First check if we're extending the def interval. - // - // We're assuming here that `inst` never precedes `def_begin` in the same block, but we can't - // check it without a method for getting `inst`'s block. - if cmp!(order, block <= self.def_end) && cmp!(order, inst >= self.def_begin) { - let inst_pp = inst.into(); - debug_assert_ne!( - inst_pp, self.def_begin, - "Can't use value in the defining instruction." - ); - if cmp!(order, inst > self.def_end) { - self.def_end = inst_pp; - } - return false; - } - - // Now check if we're extending any of the existing live-in intervals. - match self.lookup_entry_containing_block(block, order) { - Ok(n) => { - // We found one interval and might need to extend it. - if cmp!(order, inst <= self.liveins[n].end) { - // Both interval parts are already included in a compressed interval. - return false; - } - - // If the instruction at the end is the last instruction before the next block, - // coalesce the two intervals: - // [ival.begin; ival.end] + [next.begin; next.end] = [ival.begin; next.end] - if let Some(next) = &self.liveins.get(n + 1) { - if order.is_block_gap(inst, next.begin) { - // At this point we can choose to remove the current interval or the next - // one; remove the next one to avoid one memory move. - let next_end = next.end; - debug_assert!(cmp!(order, next_end > self.liveins[n].end)); - self.liveins[n].end = next_end; - self.liveins.remove(n + 1); - return false; - } - } - - // We can't coalesce, just extend the interval. - self.liveins[n].end = inst; - false - } - - Err(n) => { - // No interval was found containing the current block: we need to insert a new one, - // unless there's a coalescing opportunity with the previous or next one. - let coalesce_next = self - .liveins - .get(n) - .filter(|next| order.is_block_gap(inst, next.begin)) - .is_some(); - let coalesce_prev = self - .liveins - .get(n.wrapping_sub(1)) - .filter(|prev| order.is_block_gap(prev.end, block)) - .is_some(); - - match (coalesce_prev, coalesce_next) { - // The new interval is the missing hole between prev and next: we can merge - // them all together. - (true, true) => { - let prev_end = self.liveins[n - 1].end; - debug_assert!(cmp!(order, prev_end <= self.liveins[n].end)); - self.liveins[n - 1].end = self.liveins[n].end; - self.liveins.remove(n); - } - - // Coalesce only with the previous or next one. - (true, false) => { - debug_assert!(cmp!(order, inst >= self.liveins[n - 1].end)); - self.liveins[n - 1].end = inst; - } - (false, true) => { - debug_assert!(cmp!(order, block <= self.liveins[n].begin)); - self.liveins[n].begin = block; - } - - (false, false) => { - // No coalescing opportunity, we have to insert. - self.liveins.insert( - n, - Interval { - begin: block, - end: inst, - }, - ); - } - } - - true - } - } - } - - /// Is this the live range of a dead value? - /// - /// A dead value has no uses, and its live range ends at the same program point where it is - /// defined. - pub fn is_dead(&self) -> bool { - self.def_begin == self.def_end - } - - /// Is this a local live range? - /// - /// A local live range is only used in the same block where it was defined. It is allowed to span - /// multiple basic blocks within that block. - pub fn is_local(&self) -> bool { - self.liveins.is_empty() - } - - /// Get the program point where this live range is defined. - /// - /// This will be a block header when the value is a block argument, otherwise it is the defining - /// instruction. - pub fn def(&self) -> ProgramPoint { - self.def_begin - } - - /// Move the definition of this value to a new program point. - /// - /// It is only valid to move the definition within the same block, and it can't be moved beyond - /// `def_local_end()`. - pub fn move_def_locally(&mut self, def: ProgramPoint) { - self.def_begin = def; - } - - /// Get the local end-point of this live range in the block where it is defined. - /// - /// This can be the block header itself in the case of a dead block argument. - /// Otherwise, it will be the last local use or branch/jump that can reach a use. - pub fn def_local_end(&self) -> ProgramPoint { - self.def_end - } - - /// Get the local end-point of this live range in a block where it is live-in. - /// - /// If this live range is not live-in to `block`, return `None`. Otherwise, return the end-point - /// of this live range's local interval in `block`. - /// - /// If the live range is live through all of `block`, the terminator of `block` is a correct - /// answer, but it is also possible that an even later program point is returned. So don't - /// depend on the returned `Inst` to belong to `block`. - pub fn livein_local_end(&self, block: Block, order: &PO) -> Option { - self.lookup_entry_containing_block(block, order) - .and_then(|i| { - let inst = self.liveins[i].end; - if cmp!(order, block < inst) { - Ok(inst) - } else { - // Can be any error type, really, since it's discarded by ok(). - Err(i) - } - }) - .ok() - } - - /// Is this value live-in to `block`? - /// - /// A block argument is not considered to be live in. - pub fn is_livein(&self, block: Block, order: &PO) -> bool { - self.livein_local_end(block, order).is_some() - } - - /// Get all the live-in intervals. - /// - /// Note that the intervals are stored in a compressed form so each entry may span multiple - /// blocks where the value is live in. - pub fn liveins<'a>(&'a self) -> impl Iterator + 'a { - self.liveins - .iter() - .map(|interval| (interval.begin, interval.end)) - } - - /// Check if this live range overlaps a definition in `block`. - pub fn overlaps_def(&self, def: ExpandedProgramPoint, block: Block, order: &PO) -> bool { - // Two defs at the same program point always overlap, even if one is dead. - if def == self.def_begin.into() { - return true; - } - - // Check for an overlap with the local range. - if cmp!(order, def >= self.def_begin) && cmp!(order, def < self.def_end) { - return true; - } - - // Check for an overlap with a live-in range. - match self.livein_local_end(block, order) { - Some(inst) => cmp!(order, def < inst), - None => false, - } - } - - /// Check if this live range reaches a use at `user` in `block`. - pub fn reaches_use(&self, user: Inst, block: Block, order: &PO) -> bool { - // Check for an overlap with the local range. - if cmp!(order, user > self.def_begin) && cmp!(order, user <= self.def_end) { - return true; - } - - // Check for an overlap with a live-in range. - match self.livein_local_end(block, order) { - Some(inst) => cmp!(order, user <= inst), - None => false, - } - } - - /// Check if this live range is killed at `user` in `block`. - pub fn killed_at(&self, user: Inst, block: Block, order: &PO) -> bool { - self.def_local_end() == user.into() || self.livein_local_end(block, order) == Some(user) - } -} - -/// Allow a `LiveRange` to be stored in a `SparseMap` indexed by values. -impl SparseMapValue for GenericLiveRange { - fn key(&self) -> Value { - self.value - } -} - -#[cfg(test)] -mod tests { - use super::{GenericLiveRange, Interval}; - use crate::entity::EntityRef; - use crate::ir::{Block, Inst, Value}; - use crate::ir::{ExpandedProgramPoint, ProgramOrder}; - use alloc::vec::Vec; - use core::cmp::Ordering; - - // Dummy program order which simply compares indexes. - // It is assumed that blocks have indexes that are multiples of 10, and instructions have indexes - // in between. `is_block_gap` assumes that terminator instructions have indexes of the form - // block * 10 + 1. This is used in the coalesce test. - struct ProgOrder {} - - impl ProgramOrder for ProgOrder { - fn cmp(&self, a: A, b: B) -> Ordering - where - A: Into, - B: Into, - { - fn idx(pp: ExpandedProgramPoint) -> usize { - match pp { - ExpandedProgramPoint::Inst(i) => i.index(), - ExpandedProgramPoint::Block(e) => e.index(), - } - } - - let ia = idx(a.into()); - let ib = idx(b.into()); - ia.cmp(&ib) - } - - fn is_block_gap(&self, inst: Inst, block: Block) -> bool { - inst.index() % 10 == 1 && block.index() / 10 == inst.index() / 10 + 1 - } - } - - impl ProgOrder { - // Get the block corresponding to `inst`. - fn inst_block(&self, inst: Inst) -> Block { - let i = inst.index(); - Block::new(i - i % 10) - } - - // Get the block of a program point. - fn pp_block>(&self, pp: PP) -> Block { - match pp.into() { - ExpandedProgramPoint::Inst(i) => self.inst_block(i), - ExpandedProgramPoint::Block(e) => e, - } - } - - // Validate the live range invariants. - fn validate(&self, lr: &GenericLiveRange) { - // The def interval must cover a single block. - let def_block = self.pp_block(lr.def_begin); - assert_eq!(def_block, self.pp_block(lr.def_end)); - - // Check that the def interval isn't backwards. - match self.cmp(lr.def_begin, lr.def_end) { - Ordering::Equal => assert!(lr.liveins.is_empty()), - Ordering::Greater => { - panic!("Backwards def interval: {}-{}", lr.def_begin, lr.def_end) - } - Ordering::Less => {} - } - - // Check the live-in intervals. - let mut prev_end = None; - for Interval { begin, end } in lr.liveins.iter() { - let begin = *begin; - let end = *end; - - assert_eq!(self.cmp(begin, end), Ordering::Less); - if let Some(e) = prev_end { - assert_eq!(self.cmp(e, begin), Ordering::Less); - } - - assert!( - self.cmp(lr.def_end, begin) == Ordering::Less - || self.cmp(lr.def_begin, end) == Ordering::Greater, - "Interval can't overlap the def block" - ); - - // Save for next round. - prev_end = Some(end); - } - } - } - - // Singleton `ProgramOrder` for tests below. - const PO: &'static ProgOrder = &ProgOrder {}; - - #[test] - fn dead_def_range() { - let v0 = Value::new(0); - let e0 = Block::new(0); - let i1 = Inst::new(1); - let i2 = Inst::new(2); - let e2 = Block::new(2); - let lr = GenericLiveRange::new(v0, i1.into(), Default::default()); - assert!(lr.is_dead()); - assert!(lr.is_local()); - assert_eq!(lr.def(), i1.into()); - assert_eq!(lr.def_local_end(), i1.into()); - assert_eq!(lr.livein_local_end(e2, PO), None); - PO.validate(&lr); - - // A dead live range overlaps its own def program point. - assert!(lr.overlaps_def(i1.into(), e0, PO)); - assert!(!lr.overlaps_def(i2.into(), e0, PO)); - assert!(!lr.overlaps_def(e0.into(), e0, PO)); - } - - #[test] - fn dead_arg_range() { - let v0 = Value::new(0); - let e2 = Block::new(2); - let lr = GenericLiveRange::new(v0, e2.into(), Default::default()); - assert!(lr.is_dead()); - assert!(lr.is_local()); - assert_eq!(lr.def(), e2.into()); - assert_eq!(lr.def_local_end(), e2.into()); - // The def interval of a block argument does not count as live-in. - assert_eq!(lr.livein_local_end(e2, PO), None); - PO.validate(&lr); - } - - #[test] - fn local_def() { - let v0 = Value::new(0); - let e10 = Block::new(10); - let i11 = Inst::new(11); - let i12 = Inst::new(12); - let i13 = Inst::new(13); - let mut lr = GenericLiveRange::new(v0, i11.into(), Default::default()); - - assert_eq!(lr.extend_in_block(e10, i13, PO), false); - PO.validate(&lr); - assert!(!lr.is_dead()); - assert!(lr.is_local()); - assert_eq!(lr.def(), i11.into()); - assert_eq!(lr.def_local_end(), i13.into()); - - // Extending to an already covered inst should not change anything. - assert_eq!(lr.extend_in_block(e10, i12, PO), false); - PO.validate(&lr); - assert_eq!(lr.def(), i11.into()); - assert_eq!(lr.def_local_end(), i13.into()); - } - - #[test] - fn local_arg() { - let v0 = Value::new(0); - let e10 = Block::new(10); - let i11 = Inst::new(11); - let i12 = Inst::new(12); - let i13 = Inst::new(13); - let mut lr = GenericLiveRange::new(v0, e10.into(), Default::default()); - - // Extending a dead block argument in its own block should not indicate that a live-in - // interval was created. - assert_eq!(lr.extend_in_block(e10, i12, PO), false); - PO.validate(&lr); - assert!(!lr.is_dead()); - assert!(lr.is_local()); - assert_eq!(lr.def(), e10.into()); - assert_eq!(lr.def_local_end(), i12.into()); - - // Extending to an already covered inst should not change anything. - assert_eq!(lr.extend_in_block(e10, i11, PO), false); - PO.validate(&lr); - assert_eq!(lr.def(), e10.into()); - assert_eq!(lr.def_local_end(), i12.into()); - - // Extending further. - assert_eq!(lr.extend_in_block(e10, i13, PO), false); - PO.validate(&lr); - assert_eq!(lr.def(), e10.into()); - assert_eq!(lr.def_local_end(), i13.into()); - } - - #[test] - fn global_def() { - let v0 = Value::new(0); - let e10 = Block::new(10); - let i11 = Inst::new(11); - let i12 = Inst::new(12); - let e20 = Block::new(20); - let i21 = Inst::new(21); - let i22 = Inst::new(22); - let i23 = Inst::new(23); - let mut lr = GenericLiveRange::new(v0, i11.into(), Default::default()); - - assert_eq!(lr.extend_in_block(e10, i12, PO), false); - - // Adding a live-in interval. - assert_eq!(lr.extend_in_block(e20, i22, PO), true); - PO.validate(&lr); - assert_eq!(lr.livein_local_end(e20, PO), Some(i22)); - - // Non-extending the live-in. - assert_eq!(lr.extend_in_block(e20, i21, PO), false); - assert_eq!(lr.livein_local_end(e20, PO), Some(i22)); - - // Extending the existing live-in. - assert_eq!(lr.extend_in_block(e20, i23, PO), false); - PO.validate(&lr); - assert_eq!(lr.livein_local_end(e20, PO), Some(i23)); - } - - #[test] - fn coalesce() { - let v0 = Value::new(0); - let i11 = Inst::new(11); - let e20 = Block::new(20); - let i21 = Inst::new(21); - let e30 = Block::new(30); - let i31 = Inst::new(31); - let e40 = Block::new(40); - let i41 = Inst::new(41); - let mut lr = GenericLiveRange::new(v0, i11.into(), Default::default()); - - assert_eq!(lr.extend_in_block(e30, i31, PO,), true); - assert_eq!(lr.liveins().collect::>(), [(e30, i31)]); - - // Coalesce to previous - assert_eq!(lr.extend_in_block(e40, i41, PO,), true); - assert_eq!(lr.liveins().collect::>(), [(e30, i41)]); - - // Coalesce to next - assert_eq!(lr.extend_in_block(e20, i21, PO,), true); - assert_eq!(lr.liveins().collect::>(), [(e20, i41)]); - - let mut lr = GenericLiveRange::new(v0, i11.into(), Default::default()); - - assert_eq!(lr.extend_in_block(e40, i41, PO,), true); - assert_eq!(lr.liveins().collect::>(), [(e40, i41)]); - - assert_eq!(lr.extend_in_block(e20, i21, PO,), true); - assert_eq!(lr.liveins().collect::>(), [(e20, i21), (e40, i41)]); - - // Coalesce to previous and next - assert_eq!(lr.extend_in_block(e30, i31, PO,), true); - assert_eq!(lr.liveins().collect::>(), [(e20, i41)]); - } -} diff --git a/cranelift/codegen/src/regalloc/mod.rs b/cranelift/codegen/src/regalloc/mod.rs deleted file mode 100644 index 581acc408e..0000000000 --- a/cranelift/codegen/src/regalloc/mod.rs +++ /dev/null @@ -1,26 +0,0 @@ -//! Register allocation. -//! -//! This module contains data structures and algorithms used for register allocation. - -pub mod coloring; -pub mod live_value_tracker; -pub mod liveness; -pub mod liverange; -pub mod register_set; -pub mod virtregs; - -mod affinity; -mod branch_splitting; -mod coalescing; -mod context; -mod diversion; -mod pressure; -mod reload; -mod safepoint; -mod solver; -mod spilling; - -pub use self::context::Context; -pub use self::diversion::{EntryRegDiversions, RegDiversions}; -pub use self::register_set::RegisterSet; -pub use self::safepoint::emit_stack_maps; diff --git a/cranelift/codegen/src/regalloc/pressure.rs b/cranelift/codegen/src/regalloc/pressure.rs deleted file mode 100644 index aa83037041..0000000000 --- a/cranelift/codegen/src/regalloc/pressure.rs +++ /dev/null @@ -1,371 +0,0 @@ -//! Register pressure tracking. -//! -//! SSA-based register allocation depends on a spilling phase that "lowers register pressure -//! sufficiently". This module defines the data structures needed to measure register pressure -//! accurately enough to guarantee that the coloring phase will not run out of registers. -//! -//! Ideally, measuring register pressure amounts to simply counting the number of live registers at -//! any given program point. This simplistic method has two problems: -//! -//! 1. Registers are not interchangeable. Most ISAs have separate integer and floating-point -//! register banks, so we need to at least count the number of live registers in each register -//! bank separately. -//! -//! 2. Some ISAs have complicated register aliasing properties. In particular, the 32-bit ARM -//! ISA has a floating-point register bank where two 32-bit registers alias one 64-bit register. -//! This makes it difficult to accurately measure register pressure. -//! -//! This module deals with the problems via *register banks* and *top-level register classes*. -//! Register classes in different register banks are completely independent, so we can count -//! registers in one bank without worrying about the other bank at all. -//! -//! All register classes have a unique top-level register class, and we will count registers for -//! each top-level register class individually. However, a register bank can have multiple -//! top-level register classes that interfere with each other, so all top-level counts need to -//! be considered when determining how many more registers can be allocated. -//! -//! Currently, the only register bank with multiple top-level registers is the `arm32` -//! floating-point register bank which has `S`, `D`, and `Q` top-level classes. -//! -//! # Base and transient counts -//! -//! We maintain two separate register counts per top-level register class: base counts and -//! transient counts. The base counts are adjusted with the `take` and `free` functions. The -//! transient counts are adjusted with `take_transient` and `free_transient`. - -// Remove once we're using the pressure tracker. -#![allow(dead_code)] - -use crate::isa::registers::{RegClass, RegClassMask, RegInfo}; -use crate::regalloc::RegisterSet; -use core::cmp::min; -use core::fmt; -use core::iter::ExactSizeIterator; -use cranelift_codegen_shared::constants::MAX_TRACKED_TOP_RCS; - -/// Information per top-level register class. -/// -/// Everything but the counts is static information computed from the constructor arguments. -#[derive(Default)] -struct TopRC { - /// Number of registers currently used from this register class. - base_count: u32, - transient_count: u32, - - /// Max number of registers that can be allocated. - limit: u32, - - /// Register units per register. - width: u8, - - /// The first aliasing top-level RC. - first_toprc: u8, - - /// The number of aliasing top-level RCs. - num_toprcs: u8, -} - -impl TopRC { - fn total_count(&self) -> u32 { - self.base_count + self.transient_count - } -} - -pub struct Pressure { - /// Bit mask of top-level register classes that are aliased by other top-level register classes. - /// Unaliased register classes can use a simpler interference algorithm. - aliased: RegClassMask, - - /// Current register counts per top-level register class. - toprc: [TopRC; MAX_TRACKED_TOP_RCS], -} - -impl Pressure { - /// Create a new register pressure tracker. - pub fn new(reginfo: &RegInfo, usable: &RegisterSet) -> Self { - let mut p = Self { - aliased: 0, - toprc: Default::default(), - }; - - // Get the layout of aliasing top-level register classes from the register banks. - for bank in reginfo.banks { - let first = bank.first_toprc; - let num = bank.num_toprcs; - - if bank.pressure_tracking { - for rc in &mut p.toprc[first..first + num] { - rc.first_toprc = first as u8; - rc.num_toprcs = num as u8; - } - - // Flag the top-level register classes with aliases. - if num > 1 { - p.aliased |= ((1 << num) - 1) << first; - } - } else { - // This bank has no pressure tracking, so its top-level register classes may exceed - // `MAX_TRACKED_TOPRCS`. Fill in dummy entries. - for rc in &mut p.toprc[first..min(first + num, MAX_TRACKED_TOP_RCS)] { - // These aren't used if we don't set the `aliased` bit. - rc.first_toprc = !0; - rc.limit = !0; - } - } - } - - // Compute per-class limits from `usable`. - for (toprc, rc) in p - .toprc - .iter_mut() - .take_while(|t| t.num_toprcs > 0) - .zip(reginfo.classes) - { - toprc.limit = usable.iter(rc).len() as u32; - toprc.width = rc.width; - } - - p - } - - /// Check for an available register in the register class `rc`. - /// - /// If it is possible to allocate one more register from `rc`'s top-level register class, - /// returns 0. - /// - /// If not, returns a bit-mask of top-level register classes that are interfering. Register - /// pressure should be eased in one of the returned top-level register classes before calling - /// `can_take()` to check again. - fn check_avail(&self, rc: RegClass) -> RegClassMask { - let entry = match self.toprc.get(rc.toprc as usize) { - None => return 0, // Not a pressure tracked bank. - Some(e) => e, - }; - let mask = 1 << rc.toprc; - if (self.aliased & mask) == 0 { - // This is a simple unaliased top-level register class. - if entry.total_count() < entry.limit { - 0 - } else { - mask - } - } else { - // This is the more complicated case. The top-level register class has aliases. - self.check_avail_aliased(entry) - } - } - - /// Check for an available register in a top-level register class that may have aliases. - /// - /// This is the out-of-line slow path for `check_avail()`. - fn check_avail_aliased(&self, entry: &TopRC) -> RegClassMask { - let first = usize::from(entry.first_toprc); - let num = usize::from(entry.num_toprcs); - let width = u32::from(entry.width); - let ulimit = entry.limit * width; - - // Count up the number of available register units. - let mut units = 0; - for (rc, rci) in self.toprc[first..first + num].iter().zip(first..) { - let rcw = u32::from(rc.width); - // If `rc.width` is smaller than `width`, each register in `rc` could potentially block - // one of ours. This is assuming that none of the smaller registers are straddling the - // bigger ones. - // - // If `rc.width` is larger than `width`, we are also assuming that the registers are - // aligned and `rc.width` is a multiple of `width`. - let u = if rcw < width { - // We can't take more than the total number of register units in the class. - // This matters for arm32 S-registers which can only ever lock out 16 D-registers. - min(rc.total_count() * width, rc.limit * rcw) - } else { - rc.total_count() * rcw - }; - - // If this top-level RC on its own is responsible for exceeding our limit, return it - // early to guarantee that registers here are spilled before spilling other registers - // unnecessarily. - if u >= ulimit { - return 1 << rci; - } - - units += u; - } - - // We've counted up the worst-case number of register units claimed by all aliasing - // classes. Compare to the unit limit in this class. - if units < ulimit { - 0 - } else { - // Registers need to be spilled from any one of the aliasing classes. - ((1 << num) - 1) << first - } - } - - /// Take a register from `rc`. - /// - /// This does not check if there are enough registers available. - pub fn take(&mut self, rc: RegClass) { - if let Some(t) = self.toprc.get_mut(rc.toprc as usize) { - t.base_count += 1; - } - } - - /// Free a register in `rc`. - pub fn free(&mut self, rc: RegClass) { - if let Some(t) = self.toprc.get_mut(rc.toprc as usize) { - t.base_count -= 1; - } - } - - /// Reset all counts to 0, both base and transient. - pub fn reset(&mut self) { - for e in &mut self.toprc { - e.base_count = 0; - e.transient_count = 0; - } - } - - /// Try to increment a transient counter. - /// - /// This will fail if there are not enough registers available. - pub fn take_transient(&mut self, rc: RegClass) -> Result<(), RegClassMask> { - let mask = self.check_avail(rc); - if mask == 0 { - if let Some(t) = self.toprc.get_mut(rc.toprc as usize) { - t.transient_count += 1; - } - - Ok(()) - } else { - Err(mask) - } - } - - /// Reset all transient counts to 0. - pub fn reset_transient(&mut self) { - for e in &mut self.toprc { - e.transient_count = 0; - } - } - - /// Preserve the transient counts by transferring them to the base counts. - pub fn preserve_transient(&mut self) { - for e in &mut self.toprc { - e.base_count += e.transient_count; - e.transient_count = 0; - } - } -} - -impl fmt::Display for Pressure { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "Pressure[")?; - for rc in &self.toprc { - if rc.limit > 0 && rc.limit < !0 { - write!(f, " {}+{}/{}", rc.base_count, rc.transient_count, rc.limit)?; - } - } - write!(f, " ]") - } -} - -#[cfg(test)] -#[cfg(feature = "arm32")] -mod tests { - use super::Pressure; - use crate::isa::registers::{RegBank, RegClassData}; - use crate::isa::{RegClass, RegInfo, RegUnit}; - use crate::regalloc::RegisterSet; - use core::borrow::Borrow; - - // Arm32 `TargetIsa` is now `TargetIsaAdapter`, which does not hold any info - // about registers, so we directly access `INFO` from registers-arm32.rs. - include!(concat!(env!("OUT_DIR"), "/registers-arm32.rs")); - - // Get a register class by name. - fn rc_by_name(reginfo: &RegInfo, name: &str) -> RegClass { - reginfo - .classes - .iter() - .find(|rc| rc.name == name) - .expect("Can't find named register class.") - } - - #[test] - fn basic_counting() { - let reginfo = INFO.borrow(); - let gpr = rc_by_name(®info, "GPR"); - let s = rc_by_name(®info, "S"); - - let regs = RegisterSet::new(); - - let mut pressure = Pressure::new(®info, ®s); - let mut count = 0; - while pressure.check_avail(gpr) == 0 { - pressure.take(gpr); - count += 1; - } - assert_eq!(count, 16); - assert_eq!(pressure.check_avail(gpr), 1 << gpr.toprc); - assert_eq!(pressure.check_avail(s), 0); - pressure.free(gpr); - assert_eq!(pressure.check_avail(gpr), 0); - pressure.take(gpr); - assert_eq!(pressure.check_avail(gpr), 1 << gpr.toprc); - assert_eq!(pressure.check_avail(s), 0); - pressure.reset(); - assert_eq!(pressure.check_avail(gpr), 0); - assert_eq!(pressure.check_avail(s), 0); - } - - #[test] - fn arm_float_bank() { - let reginfo = INFO.borrow(); - let s = rc_by_name(®info, "S"); - let d = rc_by_name(®info, "D"); - let q = rc_by_name(®info, "Q"); - let regs = RegisterSet::new(); - - let mut pressure = Pressure::new(®info, ®s); - assert_eq!(pressure.check_avail(s), 0); - assert_eq!(pressure.check_avail(d), 0); - assert_eq!(pressure.check_avail(q), 0); - - // Allocating a single S-register should not affect availability. - pressure.take(s); - assert_eq!(pressure.check_avail(s), 0); - assert_eq!(pressure.check_avail(d), 0); - assert_eq!(pressure.check_avail(q), 0); - - pressure.take(d); - assert_eq!(pressure.check_avail(s), 0); - assert_eq!(pressure.check_avail(d), 0); - assert_eq!(pressure.check_avail(q), 0); - - pressure.take(q); - assert_eq!(pressure.check_avail(s), 0); - assert_eq!(pressure.check_avail(d), 0); - assert_eq!(pressure.check_avail(q), 0); - - // Take a total of 16 S-regs. - for _ in 1..16 { - pressure.take(s); - } - assert_eq!(pressure.check_avail(s), 0); - assert_eq!(pressure.check_avail(d), 0); - assert_eq!(pressure.check_avail(q), 0); - - // We've taken 16 S, 1 D, and 1 Q. There should be 6 more Qs. - for _ in 0..6 { - assert_eq!(pressure.check_avail(d), 0); - assert_eq!(pressure.check_avail(q), 0); - pressure.take(q); - } - - // We've taken 16 S, 1 D, and 7 Qs. - assert!(pressure.check_avail(s) != 0); - assert_eq!(pressure.check_avail(d), 0); - assert!(pressure.check_avail(q) != 0); - } -} diff --git a/cranelift/codegen/src/regalloc/register_set.rs b/cranelift/codegen/src/regalloc/register_set.rs deleted file mode 100644 index 52b8a6fa0a..0000000000 --- a/cranelift/codegen/src/regalloc/register_set.rs +++ /dev/null @@ -1,391 +0,0 @@ -//! Set of allocatable registers as a bit vector of register units. -//! -//! While allocating registers, we need to keep track of which registers are available and which -//! registers are in use. Since registers can alias in different ways, we track this via the -//! "register unit" abstraction. Every register contains one or more register units. Registers that -//! share a register unit can't be in use at the same time. - -use crate::isa::registers::{RegClass, RegInfo, RegUnit, RegUnitMask}; -use core::char; -use core::fmt; -use core::iter::ExactSizeIterator; -use core::mem::size_of_val; - -/// Set of registers available for allocation. -#[derive(Clone)] -pub struct RegisterSet { - avail: RegUnitMask, -} - -// Given a register class and a register unit in the class, compute a word index and a bit mask of -// register units representing that register. -// -// Note that a register is not allowed to straddle words. -fn bitmask(rc: RegClass, reg: RegUnit) -> (usize, u32) { - // Bit mask representing the register. It is `rc.width` consecutive units. - let width_bits = (1 << rc.width) - 1; - // Index into avail[] of the word containing `reg`. - let word_index = (reg / 32) as usize; - // The actual bits in the word that cover `reg`. - let reg_bits = width_bits << (reg % 32); - - (word_index, reg_bits) -} - -impl RegisterSet { - /// Create a new register set with all registers available. - /// - /// Note that this includes *all* registers. Query the `TargetIsa` object to get a set of - /// allocatable registers where reserved registers have been filtered out. - pub fn new() -> Self { - Self { avail: [!0; 3] } - } - - /// Create a new register set with no registers available. - pub fn empty() -> Self { - Self { avail: [0; 3] } - } - - /// Returns `true` if the specified register is available. - pub fn is_avail(&self, rc: RegClass, reg: RegUnit) -> bool { - let (idx, bits) = bitmask(rc, reg); - (self.avail[idx] & bits) == bits - } - - /// Allocate `reg` from `rc` so it is no longer available. - /// - /// It is an error to take a register that doesn't have all of its register units available. - pub fn take(&mut self, rc: RegClass, reg: RegUnit) { - let (idx, bits) = bitmask(rc, reg); - debug_assert!( - (self.avail[idx] & bits) == bits, - "{}:{} not available in {}", - rc, - rc.info.display_regunit(reg), - self.display(rc.info) - ); - self.avail[idx] &= !bits; - } - - /// Return `reg` and all of its register units to the set of available registers. - pub fn free(&mut self, rc: RegClass, reg: RegUnit) { - let (idx, bits) = bitmask(rc, reg); - debug_assert!( - (self.avail[idx] & bits) == 0, - "{}:{} is already free in {}", - rc, - rc.info.display_regunit(reg), - self.display(rc.info) - ); - self.avail[idx] |= bits; - } - - /// Return an iterator over all available registers belonging to the register class `rc`. - /// - /// This doesn't allocate anything from the set; use `take()` for that. - pub fn iter(&self, rc: RegClass) -> RegSetIter { - // Start by copying the RC mask. It is a single set bit for each register in the class. - let mut rsi = RegSetIter { regs: rc.mask }; - - // Mask out the unavailable units. - for idx in 0..self.avail.len() { - // If a single unit in a register is unavailable, the whole register can't be used. If - // a register straddles a word boundary, it will be marked as unavailable. There's an - // assertion in `cranelift-codegen/meta/src/cdsl/regs.rs` to check for that. - for i in 0..rc.width { - rsi.regs[idx] &= self.avail[idx] >> i; - } - } - rsi - } - - /// Check if any register units allocated out of this set interferes with units allocated out - /// of `other`. - /// - /// This assumes that unused bits are 1. - pub fn interferes_with(&self, other: &Self) -> bool { - self.avail - .iter() - .zip(&other.avail) - .any(|(&x, &y)| (x | y) != !0) - } - - /// Intersect this set of registers with `other`. This has the effect of removing any register - /// units from this set that are not in `other`. - pub fn intersect(&mut self, other: &Self) { - for (x, &y) in self.avail.iter_mut().zip(&other.avail) { - *x &= y; - } - } - - /// Return an object that can display this register set, using the register info from the - /// target ISA. - pub fn display<'a, R: Into>>(&self, regs: R) -> DisplayRegisterSet<'a> { - DisplayRegisterSet(self.clone(), regs.into()) - } -} - -/// Iterator over available registers in a register class. -#[derive(Clone)] -pub struct RegSetIter { - regs: RegUnitMask, -} - -impl Iterator for RegSetIter { - type Item = RegUnit; - - fn next(&mut self) -> Option { - let mut unit_offset = 0; - - // Find the first set bit in `self.regs`. - for word in &mut self.regs { - if *word != 0 { - // Compute the register unit number from the lowest set bit in the word. - let unit = unit_offset + word.trailing_zeros() as RegUnit; - - // Clear that lowest bit so we won't find it again. - *word &= *word - 1; - - return Some(unit); - } - // How many register units was there in the word? This is a constant 32 for `u32` etc. - unit_offset += 8 * size_of_val(word) as RegUnit; - } - - // All of `self.regs` is 0. - None - } - - fn size_hint(&self) -> (usize, Option) { - let bits = self.regs.iter().map(|&w| w.count_ones() as usize).sum(); - (bits, Some(bits)) - } -} - -impl RegSetIter { - pub fn rnext(&mut self) -> Option { - let num_words = self.regs.len(); - let bits_per_word = 8 * size_of_val(&self.regs[0]); - - // Find the last set bit in `self.regs`. - for i in 0..num_words { - let word_ix = num_words - 1 - i; - - let word = &mut self.regs[word_ix]; - if *word != 0 { - let lzeroes = word.leading_zeros() as usize; - - // Clear that highest bit so we won't find it again. - *word &= !(1 << (bits_per_word - 1 - lzeroes)); - - return Some((word_ix * bits_per_word + bits_per_word - 1 - lzeroes) as RegUnit); - } - } - - // All of `self.regs` is 0. - None - } -} - -impl ExactSizeIterator for RegSetIter {} - -/// Displaying an `RegisterSet` correctly requires the associated `RegInfo` from the target ISA. -pub struct DisplayRegisterSet<'a>(RegisterSet, Option<&'a RegInfo>); - -impl<'a> fmt::Display for DisplayRegisterSet<'a> { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "[")?; - match self.1 { - None => { - for w in &self.0.avail { - write!(f, " #{:08x}", w)?; - } - } - Some(reginfo) => { - let toprcs = reginfo - .banks - .iter() - .map(|b| b.first_toprc + b.num_toprcs) - .max() - .expect("No register banks"); - for rc in ®info.classes[0..toprcs] { - if rc.width == 1 { - let bank = ®info.banks[rc.bank as usize]; - write!(f, " {}: ", rc)?; - for offset in 0..bank.units { - let reg = bank.first_unit + offset; - if !rc.contains(reg) { - continue; - } - if !self.0.is_avail(rc, reg) { - write!(f, "-")?; - continue; - } - // Display individual registers as either the second letter of their - // name or the last digit of their number. - // This works for x86 (rax, rbx, ...) and for numbered regs. - write!( - f, - "{}", - bank.names - .get(offset as usize) - .and_then(|name| name.chars().nth(1)) - .unwrap_or_else(|| char::from_digit( - u32::from(offset % 10), - 10 - ) - .unwrap()) - )?; - } - } - } - } - } - write!(f, " ]") - } -} - -impl fmt::Display for RegisterSet { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - self.display(None).fmt(f) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::isa::registers::{RegClass, RegClassData}; - use alloc::vec::Vec; - - // Register classes for testing. - const GPR: RegClass = &RegClassData { - name: "GPR", - index: 0, - width: 1, - bank: 0, - toprc: 0, - first: 28, - subclasses: 0, - mask: [0xf0000000, 0x0000000f, 0], - info: &INFO, - pinned_reg: None, - }; - - const DPR: RegClass = &RegClassData { - name: "DPR", - index: 0, - width: 2, - bank: 0, - toprc: 0, - first: 28, - subclasses: 0, - mask: [0x50000000, 0x0000000a, 0], - info: &INFO, - pinned_reg: None, - }; - - const INFO: RegInfo = RegInfo { - banks: &[], - classes: &[], - }; - - const RSI_1: RegSetIter = RegSetIter { - regs: [0x31415927, 0x27182818, 0x14141356], - }; - - const RSI_2: RegSetIter = RegSetIter { - regs: [0x00000000, 0x00000000, 0x00000000], - }; - - const RSI_3: RegSetIter = RegSetIter { - regs: [0xffffffff, 0xffffffff, 0xffffffff], - }; - - fn reverse_regset_iteration_work(rsi: &RegSetIter) { - // Check the reverse iterator by comparing its output with the forward iterator. - let rsi_f = (*rsi).clone(); - let results_f = rsi_f.collect::>(); - - let mut rsi_r = (*rsi).clone(); - let mut results_r = Vec::::new(); - while let Some(r) = rsi_r.rnext() { - results_r.push(r); - } - - let len_f = results_f.len(); - let len_r = results_r.len(); - assert_eq!(len_f, len_r); - - for i in 0..len_f { - assert_eq!(results_f[i], results_r[len_f - 1 - i]); - } - } - - #[test] - fn reverse_regset_iteration() { - reverse_regset_iteration_work(&RSI_1); - reverse_regset_iteration_work(&RSI_2); - reverse_regset_iteration_work(&RSI_3); - } - - #[test] - fn put_and_take() { - let mut regs = RegisterSet::new(); - - // `GPR` has units 28-36. - assert_eq!(regs.iter(GPR).len(), 8); - assert_eq!(regs.iter(GPR).count(), 8); - assert_eq!(regs.iter(DPR).collect::>(), [28, 30, 33, 35]); - - assert!(regs.is_avail(GPR, 29)); - regs.take(&GPR, 29); - assert!(!regs.is_avail(GPR, 29)); - - assert_eq!(regs.iter(GPR).count(), 7); - assert_eq!(regs.iter(DPR).collect::>(), [30, 33, 35]); - - assert!(regs.is_avail(GPR, 30)); - regs.take(&GPR, 30); - assert!(!regs.is_avail(GPR, 30)); - - assert_eq!(regs.iter(GPR).count(), 6); - assert_eq!(regs.iter(DPR).collect::>(), [33, 35]); - - assert!(regs.is_avail(GPR, 32)); - regs.take(&GPR, 32); - assert!(!regs.is_avail(GPR, 32)); - - assert_eq!(regs.iter(GPR).count(), 5); - assert_eq!(regs.iter(DPR).collect::>(), [33, 35]); - - regs.free(&GPR, 30); - assert!(regs.is_avail(GPR, 30)); - assert!(!regs.is_avail(GPR, 29)); - assert!(!regs.is_avail(GPR, 32)); - - assert_eq!(regs.iter(GPR).count(), 6); - assert_eq!(regs.iter(DPR).collect::>(), [30, 33, 35]); - - regs.free(&GPR, 32); - assert!(regs.is_avail(GPR, 31)); - assert!(!regs.is_avail(GPR, 29)); - assert!(regs.is_avail(GPR, 32)); - - assert_eq!(regs.iter(GPR).count(), 7); - assert_eq!(regs.iter(DPR).collect::>(), [30, 33, 35]); - } - - #[test] - fn interference() { - let mut regs1 = RegisterSet::new(); - let mut regs2 = RegisterSet::new(); - - assert!(!regs1.interferes_with(®s2)); - regs1.take(&GPR, 32); - assert!(!regs1.interferes_with(®s2)); - regs2.take(&GPR, 31); - assert!(!regs1.interferes_with(®s2)); - regs1.intersect(®s2); - assert!(regs1.interferes_with(®s2)); - } -} diff --git a/cranelift/codegen/src/regalloc/reload.rs b/cranelift/codegen/src/regalloc/reload.rs deleted file mode 100644 index d853ab5b18..0000000000 --- a/cranelift/codegen/src/regalloc/reload.rs +++ /dev/null @@ -1,484 +0,0 @@ -//! Reload pass -//! -//! The reload pass runs between the spilling and coloring passes. Its primary responsibility is to -//! insert `spill` and `fill` instructions such that instruction operands expecting a register will -//! get a value with register affinity, and operands expecting a stack slot will get a value with -//! stack affinity. -//! -//! The secondary responsibility of the reload pass is to reuse values in registers as much as -//! possible to minimize the number of `fill` instructions needed. This must not cause the register -//! pressure limits to be exceeded. - -use crate::cursor::{Cursor, EncCursor}; -use crate::dominator_tree::DominatorTree; -use crate::entity::{SparseMap, SparseMapValue}; -use crate::ir::{AbiParam, ArgumentLoc, InstBuilder}; -use crate::ir::{Block, Function, Inst, InstructionData, Opcode, Value, ValueLoc}; -use crate::isa::RegClass; -use crate::isa::{ConstraintKind, EncInfo, Encoding, RecipeConstraints, TargetIsa}; -use crate::regalloc::affinity::Affinity; -use crate::regalloc::live_value_tracker::{LiveValue, LiveValueTracker}; -use crate::regalloc::liveness::Liveness; -use crate::timing; -use crate::topo_order::TopoOrder; -use alloc::vec::Vec; - -/// Reusable data structures for the reload pass. -pub struct Reload { - candidates: Vec, - reloads: SparseMap, -} - -/// Context data structure that gets instantiated once per pass. -struct Context<'a> { - cur: EncCursor<'a>, - - // Cached ISA information. - // We save it here to avoid frequent virtual function calls on the `TargetIsa` trait object. - encinfo: EncInfo, - - // References to contextual data structures we need. - domtree: &'a DominatorTree, - liveness: &'a mut Liveness, - topo: &'a mut TopoOrder, - - candidates: &'a mut Vec, - reloads: &'a mut SparseMap, -} - -impl Reload { - /// Create a new blank reload pass. - pub fn new() -> Self { - Self { - candidates: Vec::new(), - reloads: SparseMap::new(), - } - } - - /// Clear all data structures in this reload pass. - pub fn clear(&mut self) { - self.candidates.clear(); - self.reloads.clear(); - } - - /// Run the reload algorithm over `func`. - pub fn run( - &mut self, - isa: &dyn TargetIsa, - func: &mut Function, - domtree: &DominatorTree, - liveness: &mut Liveness, - topo: &mut TopoOrder, - tracker: &mut LiveValueTracker, - ) { - let _tt = timing::ra_reload(); - log::trace!("Reload for:\n{}", func.display(isa)); - let mut ctx = Context { - cur: EncCursor::new(func, isa), - encinfo: isa.encoding_info(), - domtree, - liveness, - topo, - candidates: &mut self.candidates, - reloads: &mut self.reloads, - }; - ctx.run(tracker) - } -} - -/// A reload candidate. -/// -/// This represents a stack value that is used by the current instruction where a register is -/// needed. -struct ReloadCandidate { - argidx: usize, - value: Value, - regclass: RegClass, -} - -/// A Reloaded value. -/// -/// This represents a value that has been reloaded into a register value from the stack. -struct ReloadedValue { - stack: Value, - reg: Value, -} - -impl SparseMapValue for ReloadedValue { - fn key(&self) -> Value { - self.stack - } -} - -impl<'a> Context<'a> { - fn run(&mut self, tracker: &mut LiveValueTracker) { - self.topo.reset(self.cur.func.layout.blocks()); - while let Some(block) = self.topo.next(&self.cur.func.layout, self.domtree) { - self.visit_block(block, tracker); - } - } - - fn visit_block(&mut self, block: Block, tracker: &mut LiveValueTracker) { - log::trace!("Reloading {}:", block); - self.visit_block_header(block, tracker); - tracker.drop_dead_params(); - - // visit_block_header() places us at the first interesting instruction in the block. - while let Some(inst) = self.cur.current_inst() { - if !self.cur.func.dfg[inst].opcode().is_ghost() { - // This instruction either has an encoding or has ABI constraints, so visit it to - // insert spills and fills as needed. - let encoding = self.cur.func.encodings[inst]; - self.visit_inst(block, inst, encoding, tracker); - tracker.drop_dead(inst); - } else { - // This is a ghost instruction with no encoding and no extra constraints, so we can - // just skip over it. - self.cur.next_inst(); - } - } - } - - /// Process the block parameters. Move to the next instruction in the block to be processed - fn visit_block_header(&mut self, block: Block, tracker: &mut LiveValueTracker) { - let (liveins, args) = tracker.block_top( - block, - &self.cur.func.dfg, - self.liveness, - &self.cur.func.layout, - self.domtree, - ); - - if self.cur.func.layout.entry_block() == Some(block) { - debug_assert_eq!(liveins.len(), 0); - self.visit_entry_params(block, args); - } else { - self.visit_block_params(block, args); - } - } - - /// Visit the parameters on the entry block. - /// These values have ABI constraints from the function signature. - fn visit_entry_params(&mut self, block: Block, args: &[LiveValue]) { - debug_assert_eq!(self.cur.func.signature.params.len(), args.len()); - self.cur.goto_first_inst(block); - - for (arg_idx, arg) in args.iter().enumerate() { - let abi = self.cur.func.signature.params[arg_idx]; - match abi.location { - ArgumentLoc::Reg(_) => { - if arg.affinity.is_stack() { - // An incoming register parameter was spilled. Replace the parameter value - // with a temporary register value that is immediately spilled. - let reg = self - .cur - .func - .dfg - .replace_block_param(arg.value, abi.value_type); - let affinity = Affinity::abi(&abi, self.cur.isa); - self.liveness.create_dead(reg, block, affinity); - self.insert_spill(block, arg.value, reg); - } - } - ArgumentLoc::Stack(_) => { - debug_assert!(arg.affinity.is_stack()); - } - ArgumentLoc::Unassigned => panic!("Unexpected ABI location"), - } - } - } - - fn visit_block_params(&mut self, block: Block, _args: &[LiveValue]) { - self.cur.goto_first_inst(block); - } - - /// Process the instruction pointed to by `pos`, and advance the cursor to the next instruction - /// that needs processing. - fn visit_inst( - &mut self, - block: Block, - inst: Inst, - encoding: Encoding, - tracker: &mut LiveValueTracker, - ) { - self.cur.use_srcloc(inst); - - // Get the operand constraints for `inst` that we are trying to satisfy. - let constraints = self.encinfo.operand_constraints(encoding); - - // Identify reload candidates. - debug_assert!(self.candidates.is_empty()); - self.find_candidates(inst, constraints); - - // If we find a copy from a stack slot to the same stack slot, replace - // it with a `copy_nop` but otherwise ignore it. In particular, don't - // generate a reload immediately followed by a spill. The `copy_nop` - // has a zero-length encoding, so will disappear at emission time. - if let InstructionData::Unary { - opcode: Opcode::Copy, - arg, - } = self.cur.func.dfg[inst] - { - let dst_vals = self.cur.func.dfg.inst_results(inst); - if dst_vals.len() == 1 { - let dst_val = dst_vals[0]; - let can_transform = match ( - self.cur.func.locations[arg], - self.cur.func.locations[dst_val], - ) { - (ValueLoc::Stack(src_slot), ValueLoc::Stack(dst_slot)) => { - src_slot == dst_slot && { - let src_ty = self.cur.func.dfg.value_type(arg); - let dst_ty = self.cur.func.dfg.value_type(dst_val); - debug_assert!(src_ty == dst_ty); - // This limits the transformation to copies of the - // types: I128 I64 I32 I16 I8 F64 and F32, since that's - // the set of `copy_nop` encodings available. - src_ty.is_int() || src_ty.is_float() - } - } - _ => false, - }; - if can_transform { - // Convert the instruction into a `copy_nop`. - self.cur.func.dfg.replace(inst).copy_nop(arg); - let ok = self.cur.func.update_encoding(inst, self.cur.isa).is_ok(); - debug_assert!(ok, "copy_nop encoding missing for this type"); - - // And move on to the next insn. - self.reloads.clear(); - let _ = tracker.process_inst(inst, &self.cur.func.dfg, self.liveness); - self.cur.next_inst(); - self.candidates.clear(); - return; - } - } - } - - // Deal with all instructions not special-cased by the immediately - // preceding fragment. - if let InstructionData::Unary { - opcode: Opcode::Copy, - .. - } = self.cur.func.dfg[inst] - { - self.reload_copy_candidates(inst); - } else { - self.reload_inst_candidates(block, inst); - } - - // TODO: Reuse reloads for future instructions. - self.reloads.clear(); - - let (_throughs, _kills, defs) = - tracker.process_inst(inst, &self.cur.func.dfg, self.liveness); - - // Advance to the next instruction so we can insert any spills after the instruction. - self.cur.next_inst(); - - // Rewrite register defs that need to be spilled. - // - // Change: - // - // v2 = inst ... - // - // Into: - // - // v7 = inst ... - // v2 = spill v7 - // - // That way, we don't need to rewrite all future uses of v2. - if let Some(constraints) = constraints { - for (lv, op) in defs.iter().zip(constraints.outs) { - if lv.affinity.is_stack() && op.kind != ConstraintKind::Stack { - if let InstructionData::Unary { - opcode: Opcode::Copy, - arg, - } = self.cur.func.dfg[inst] - { - self.cur.func.dfg.replace(inst).spill(arg); - let ok = self.cur.func.update_encoding(inst, self.cur.isa).is_ok(); - debug_assert!(ok); - } else { - let value_type = self.cur.func.dfg.value_type(lv.value); - let reg = self.cur.func.dfg.replace_result(lv.value, value_type); - self.liveness.create_dead(reg, inst, Affinity::new(op)); - self.insert_spill(block, lv.value, reg); - } - } - } - } - - // Same thing for spilled call return values. - let retvals = &defs[self.cur.func.dfg[inst] - .opcode() - .constraints() - .num_fixed_results()..]; - if !retvals.is_empty() { - let sig = self - .cur - .func - .dfg - .call_signature(inst) - .expect("Extra results on non-call instruction"); - for (i, lv) in retvals.iter().enumerate() { - let abi = self.cur.func.dfg.signatures[sig].returns[i]; - debug_assert!( - abi.location.is_reg(), - "expected reg; got {:?}", - abi.location - ); - if lv.affinity.is_stack() { - let reg = self.cur.func.dfg.replace_result(lv.value, abi.value_type); - self.liveness - .create_dead(reg, inst, Affinity::abi(&abi, self.cur.isa)); - self.insert_spill(block, lv.value, reg); - } - } - } - } - - // Reload the current candidates for the given `inst`. - fn reload_inst_candidates(&mut self, block: Block, inst: Inst) { - // Insert fill instructions before `inst` and replace `cand.value` with the filled value. - for cand in self.candidates.iter_mut() { - if let Some(reload) = self.reloads.get(cand.value) { - cand.value = reload.reg; - continue; - } - - let reg = self.cur.ins().fill(cand.value); - let fill = self.cur.built_inst(); - - self.reloads.insert(ReloadedValue { - stack: cand.value, - reg, - }); - cand.value = reg; - - // Create a live range for the new reload. - let affinity = Affinity::Reg(cand.regclass.into()); - self.liveness.create_dead(reg, fill, affinity); - self.liveness - .extend_locally(reg, block, inst, &self.cur.func.layout); - } - - // Rewrite instruction arguments. - // - // Only rewrite those arguments that were identified as candidates. This leaves block - // arguments on branches as-is without rewriting them. A spilled block argument needs to stay - // spilled because the matching block parameter is going to be in the same virtual register - // and therefore the same stack slot as the block argument value. - if !self.candidates.is_empty() { - let args = self.cur.func.dfg.inst_args_mut(inst); - while let Some(cand) = self.candidates.pop() { - args[cand.argidx] = cand.value; - } - } - } - - // Reload the current candidates for the given copy `inst`. - // - // As an optimization, replace a copy instruction where the argument has been spilled with - // a fill instruction. - fn reload_copy_candidates(&mut self, inst: Inst) { - // Copy instructions can only have one argument. - debug_assert!(self.candidates.is_empty() || self.candidates.len() == 1); - - if let Some(cand) = self.candidates.pop() { - self.cur.func.dfg.replace(inst).fill(cand.value); - let ok = self.cur.func.update_encoding(inst, self.cur.isa).is_ok(); - debug_assert!(ok); - } - } - - // Find reload candidates for `inst` and add them to `self.candidates`. - // - // These are uses of spilled values where the operand constraint requires a register. - fn find_candidates(&mut self, inst: Inst, constraints: Option<&RecipeConstraints>) { - let args = self.cur.func.dfg.inst_args(inst); - - if let Some(constraints) = constraints { - for (argidx, (op, &arg)) in constraints.ins.iter().zip(args).enumerate() { - if op.kind != ConstraintKind::Stack && self.liveness[arg].affinity.is_stack() { - self.candidates.push(ReloadCandidate { - argidx, - value: arg, - regclass: op.regclass, - }) - } - } - } - - // If we only have the fixed arguments, we're done now. - let offset = self.cur.func.dfg[inst] - .opcode() - .constraints() - .num_fixed_value_arguments(); - if args.len() == offset { - return; - } - let var_args = &args[offset..]; - - // Handle ABI arguments. - if let Some(sig) = self.cur.func.dfg.call_signature(inst) { - handle_abi_args( - self.candidates, - &self.cur.func.dfg.signatures[sig].params, - var_args, - offset, - self.cur.isa, - self.liveness, - ); - } else if self.cur.func.dfg[inst].opcode().is_return() { - handle_abi_args( - self.candidates, - &self.cur.func.signature.returns, - var_args, - offset, - self.cur.isa, - self.liveness, - ); - } - } - - /// Insert a spill at `pos` and update data structures. - /// - /// - Insert `stack = spill reg` at `pos`, and assign an encoding. - /// - Move the `stack` live range starting point to the new instruction. - /// - Extend the `reg` live range to reach the new instruction. - fn insert_spill(&mut self, block: Block, stack: Value, reg: Value) { - self.cur.ins().with_result(stack).spill(reg); - let inst = self.cur.built_inst(); - - // Update live ranges. - self.liveness.move_def_locally(stack, inst); - self.liveness - .extend_locally(reg, block, inst, &self.cur.func.layout); - } -} - -/// Find reload candidates in the instruction's ABI variable arguments. This handles both -/// return values and call arguments. -fn handle_abi_args( - candidates: &mut Vec, - abi_types: &[AbiParam], - var_args: &[Value], - offset: usize, - isa: &dyn TargetIsa, - liveness: &Liveness, -) { - debug_assert_eq!(abi_types.len(), var_args.len()); - for ((abi, &arg), argidx) in abi_types.iter().zip(var_args).zip(offset..) { - if abi.location.is_reg() { - let lv = liveness.get(arg).expect("Missing live range for ABI arg"); - if lv.affinity.is_stack() { - candidates.push(ReloadCandidate { - argidx, - value: arg, - regclass: isa.regclass_for_abi_type(abi.value_type), - }); - } - } - } -} diff --git a/cranelift/codegen/src/regalloc/safepoint.rs b/cranelift/codegen/src/regalloc/safepoint.rs deleted file mode 100644 index 2686c57277..0000000000 --- a/cranelift/codegen/src/regalloc/safepoint.rs +++ /dev/null @@ -1,65 +0,0 @@ -use crate::cursor::{Cursor, FuncCursor}; -use crate::dominator_tree::DominatorTree; -use crate::inst_predicates::is_safepoint; -use crate::ir::{Function, InstBuilder}; -use crate::isa::TargetIsa; -use crate::regalloc::live_value_tracker::LiveValueTracker; -use crate::regalloc::liveness::Liveness; -use alloc::vec::Vec; - -fn insert_and_encode_safepoint<'f>( - pos: &mut FuncCursor<'f>, - tracker: &LiveValueTracker, - isa: &dyn TargetIsa, -) { - // Iterate through all live values, collect only the references. - let live_ref_values = tracker - .live() - .iter() - .filter(|live_value| pos.func.dfg.value_type(live_value.value).is_ref()) - .map(|live_val| live_val.value) - .collect::>(); - - if !live_ref_values.is_empty() { - pos.ins().safepoint(&live_ref_values); - // Move cursor to the new safepoint instruction to encode it. - if let Some(inst) = pos.prev_inst() { - let ok = pos.func.update_encoding(inst, isa).is_ok(); - debug_assert!(ok); - } - // Restore cursor position. - pos.next_inst(); - } -} - -// The emit_stack_maps() function analyzes each instruction to retrieve the liveness of -// the defs and operands by traversing a function's blocks in layout order. -pub fn emit_stack_maps( - func: &mut Function, - domtree: &DominatorTree, - liveness: &Liveness, - tracker: &mut LiveValueTracker, - isa: &dyn TargetIsa, -) { - let mut curr = func.layout.entry_block(); - - while let Some(block) = curr { - tracker.block_top(block, &func.dfg, liveness, &func.layout, domtree); - tracker.drop_dead_params(); - let mut pos = FuncCursor::new(func); - - // From the top of the block, step through the instructions. - pos.goto_top(block); - - while let Some(inst) = pos.next_inst() { - if is_safepoint(&pos.func, inst) { - insert_and_encode_safepoint(&mut pos, tracker, isa); - } - - // Process the instruction and get rid of dead values. - tracker.process_inst(inst, &pos.func.dfg, liveness); - tracker.drop_dead(inst); - } - curr = func.layout.next_block(block); - } -} diff --git a/cranelift/codegen/src/regalloc/solver.rs b/cranelift/codegen/src/regalloc/solver.rs deleted file mode 100644 index 3971ff4c55..0000000000 --- a/cranelift/codegen/src/regalloc/solver.rs +++ /dev/null @@ -1,1382 +0,0 @@ -//! Constraint solver for register coloring. -//! -//! The coloring phase of SSA-based register allocation is very simple in theory, but in practice -//! it is complicated by the various constraints imposed by individual instructions: -//! -//! - Call and return instructions have to satisfy ABI requirements for arguments and return -//! values. -//! - Values live across a call must be in a callee-saved register. -//! - Some instructions have operand constraints such as register sub-classes, fixed registers, or -//! tied operands. -//! -//! # The instruction register coloring problem -//! -//! The constraint solver addresses the problem of satisfying the constraints of a single -//! instruction. We have: -//! -//! - A set of values that are live in registers before the instruction, with current register -//! assignments. Some are used by the instruction, some are not. -//! - A subset of the live register values that are killed by the instruction. -//! - A set of new register values that are defined by the instruction. -//! -//! We are not concerned with stack values at all. The reload pass ensures that all values required -//! to be in a register by the instruction are already in a register. -//! -//! A solution to the register coloring problem consists of: -//! -//! - Register reassignment prescriptions for a subset of the live register values. -//! - Register assignments for the instruction's defined values. -//! -//! The solution ensures that when live registers are reassigned as prescribed before the -//! instruction, all its operand constraints are satisfied, and the definition assignments won't -//! conflict. -//! -//! # Register diversions and global interference -//! -//! We can divert register values temporarily to satisfy constraints, but we need to put the -//! values back into their originally assigned register locations before leaving the block. -//! Otherwise, values won't be in the right register at the entry point of other blocks. -//! -//! Some values are *local*, and we don't need to worry about putting those values back since they -//! are not used in any other blocks. -//! -//! When we assign register locations to defines, we are assigning both the register used locally -//! immediately after the instruction and the register used globally when the defined value is used -//! in a different block. We need to avoid interference both locally at the instruction and globally. -//! -//! We have multiple mappings of values to registers: -//! -//! 1. The initial local mapping before the instruction. This includes any diversions from previous -//! instructions in the block, but not diversions for the current instruction. -//! 2. The local mapping after applying the additional reassignments required to satisfy the -//! constraints of the current instruction. -//! 3. The local mapping after the instruction. This excludes values killed by the instruction and -//! includes values defined by the instruction. -//! 4. The global mapping after the instruction. This mapping only contains values with global live -//! ranges, and it does not include any diversions. -//! -//! All four mappings must be kept free of interference. -//! -//! # Problems handled by previous passes. -//! -//! The constraint solver can only reassign registers, it can't create spill code, so some -//! constraints are handled by earlier passes: -//! -//! - There will be enough free registers available for the defines. Ensuring this is the primary -//! purpose of the spilling phase. -//! - When the same value is used for multiple operands, the intersection of operand constraints is -//! non-empty. The spilling phase will insert copies to handle mutually incompatible constraints, -//! such as when the same value is bound to two different function arguments. -//! - Values bound to tied operands must be killed by the instruction. Also enforced by the -//! spiller. -//! - Values used by register operands are in registers, and values used by stack operands are in -//! stack slots. This is enforced by the reload pass. -//! -//! # Solver algorithm -//! -//! The goal of the solver is to satisfy the instruction constraints with a minimal number of -//! register assignments before the instruction. -//! -//! 1. Compute the set of values used by operands with a fixed register constraint that isn't -//! already satisfied. These are mandatory predetermined reassignments. -//! 2. Compute the set of values that don't satisfy their register class constraint. These are -//! mandatory reassignments that we need to solve. -//! 3. Add the set of defines to the set of variables computed in 2. Exclude defines tied to an -//! input operand since their value is pre-determined. -//! -//! The set of values computed in 2. and 3. are the *variables* for the solver. Given a set of -//! variables, we can also compute a set of allocatable registers by removing the variables from -//! the set of assigned registers before the instruction. -//! -//! 1. For each variable, compute its domain as the intersection of the allocatable registers and -//! its register class constraint. -//! 2. Sort the variables in order of increasing domain size. -//! 3. Search for a solution that assigns each variable a register from its domain without -//! interference between variables. -//! -//! If the search fails to find a solution, we may need to reassign more registers. Find an -//! appropriate candidate among the set of live register values, add it as a variable and start -//! over. - -use super::RegisterSet; -use crate::dbg::DisplayList; -use crate::entity::{SparseMap, SparseMapValue}; -use crate::ir::Value; -use crate::isa::{RegClass, RegUnit}; -use crate::regalloc::register_set::RegSetIter; -use alloc::vec::Vec; -use core::cmp; -use core::fmt; -use core::mem; -use core::u16; - -/// A variable in the constraint problem. -/// -/// Variables represent register values that can be assigned to any register unit within the -/// constraint register class. This includes live register values that can be reassigned to a new -/// register and values defined by the instruction which must be assigned to a register. -/// -/// Besides satisfying the register class constraint, variables must also be mutually -/// non-interfering in up to three contexts: -/// -/// 1. Input side live registers, after applying all the reassignments. -/// 2. Output side live registers, considering all the local register diversions. -/// 3. Global live register, not considering any local diversions. -/// -pub struct Variable { - /// The value whose register assignment we're looking for. - pub value: Value, - - /// Original register unit holding this live value before the instruction, or `None` for a - /// value that is defined by the instruction. - from: Option, - - /// Avoid interference on the input side. - is_input: bool, - - /// Avoid interference on the output side. - is_output: bool, - - /// Avoid interference with the global registers. - is_global: bool, - - /// Number of registers available in the domain of this variable. - domain: u16, - - /// The assigned register unit after a full solution was found. - pub solution: RegUnit, - - /// Any solution must belong to the constraint register class. - constraint: RegClass, -} - -impl Variable { - fn new_live(value: Value, constraint: RegClass, from: RegUnit, is_output: bool) -> Self { - Self { - value, - constraint, - from: Some(from), - is_input: true, - is_output, - is_global: false, - domain: 0, - solution: !0, - } - } - - fn new_def(value: Value, constraint: RegClass, is_global: bool) -> Self { - Self { - value, - constraint, - from: None, - is_input: false, - is_output: true, - is_global, - domain: 0, - solution: !0, - } - } - - /// Does this variable represent a value defined by the current instruction? - pub fn is_define(&self) -> bool { - self.from.is_none() - } - - /// Get an iterator over possible register choices, given the available registers on the input - /// and output sides as well as the available global register set. - fn iter(&self, iregs: &RegisterSet, oregs: &RegisterSet, gregs: &RegisterSet) -> RegSetIter { - if !self.is_output { - debug_assert!(!self.is_global, "Global implies output"); - debug_assert!(self.is_input, "Missing interference set"); - return iregs.iter(self.constraint); - } - - let mut r = oregs.clone(); - if self.is_input { - r.intersect(iregs); - } - if self.is_global { - r.intersect(gregs); - } - r.iter(self.constraint) - } -} - -impl fmt::Display for Variable { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}({}", self.value, self.constraint)?; - if let Some(reg) = self.from { - write!(f, ", from {}", self.constraint.info.display_regunit(reg))?; - } - if self.is_input { - write!(f, ", in")?; - } - if self.is_output { - write!(f, ", out")?; - } - if self.is_global { - write!(f, ", global")?; - } - if self.is_define() { - write!(f, ", def")?; - } - if self.domain > 0 { - write!(f, ", {}", self.domain)?; - } - write!(f, ")") - } -} - -#[derive(Clone, Debug)] -pub struct Assignment { - pub value: Value, - pub from: RegUnit, - pub to: RegUnit, - pub rc: RegClass, -} - -impl SparseMapValue for Assignment { - fn key(&self) -> Value { - self.value - } -} - -impl fmt::Display for Assignment { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let ri = self.rc.info; - write!( - f, - "{}:{}({} -> {})", - self.value, - self.rc, - ri.display_regunit(self.from), - ri.display_regunit(self.to) - ) - } -} - -/// A move operation between two registers or between a register and an emergency spill slot. -#[derive(Clone, PartialEq)] -pub enum Move { - Reg { - value: Value, - rc: RegClass, - from: RegUnit, - to: RegUnit, - }, - #[allow(dead_code)] // rustc doesn't see it isn't dead. - Spill { - value: Value, - rc: RegClass, - from: RegUnit, - to_slot: usize, - }, - Fill { - value: Value, - rc: RegClass, - from_slot: usize, - to: RegUnit, - }, -} - -impl Move { - /// Create a register move from an assignment, but not for identity assignments. - fn with_assignment(a: &Assignment) -> Option { - if a.from != a.to { - Some(Self::Reg { - value: a.value, - from: a.from, - to: a.to, - rc: a.rc, - }) - } else { - None - } - } - - /// Get the "from" register and register class, if possible. - #[cfg_attr(feature = "cargo-clippy", allow(clippy::wrong_self_convention))] - fn from_reg(&self) -> Option<(RegClass, RegUnit)> { - match *self { - Self::Reg { rc, from, .. } | Self::Spill { rc, from, .. } => Some((rc, from)), - Self::Fill { .. } => None, - } - } - - /// Get the "to" register and register class, if possible. - fn to_reg(&self) -> Option<(RegClass, RegUnit)> { - match *self { - Self::Reg { rc, to, .. } | Self::Fill { rc, to, .. } => Some((rc, to)), - Self::Spill { .. } => None, - } - } - - /// Replace the "to" register with `new` and return the old value. - fn replace_to_reg(&mut self, new: RegUnit) -> RegUnit { - mem::replace( - match *self { - Self::Reg { ref mut to, .. } | Self::Fill { ref mut to, .. } => to, - Self::Spill { .. } => panic!("No to register in a spill {}", self), - }, - new, - ) - } - - /// Convert this `Reg` move to a spill to `slot` and return the old "to" register. - fn change_to_spill(&mut self, slot: usize) -> RegUnit { - match self.clone() { - Self::Reg { - value, - rc, - from, - to, - } => { - *self = Self::Spill { - value, - rc, - from, - to_slot: slot, - }; - to - } - _ => panic!("Expected reg move: {}", self), - } - } - - /// Get the value being moved. - fn value(&self) -> Value { - match *self { - Self::Reg { value, .. } | Self::Fill { value, .. } | Self::Spill { value, .. } => value, - } - } - - /// Get the associated register class. - fn rc(&self) -> RegClass { - match *self { - Self::Reg { rc, .. } | Self::Fill { rc, .. } | Self::Spill { rc, .. } => rc, - } - } -} - -impl fmt::Display for Move { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match *self { - Self::Reg { - value, - from, - to, - rc, - } => write!( - f, - "{}:{}({} -> {})", - value, - rc, - rc.info.display_regunit(from), - rc.info.display_regunit(to) - ), - Self::Spill { - value, - from, - to_slot, - rc, - } => write!( - f, - "{}:{}({} -> slot {})", - value, - rc, - rc.info.display_regunit(from), - to_slot - ), - Self::Fill { - value, - from_slot, - to, - rc, - } => write!( - f, - "{}:{}(slot {} -> {})", - value, - rc, - from_slot, - rc.info.display_regunit(to) - ), - } - } -} - -impl fmt::Debug for Move { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let as_display: &dyn fmt::Display = self; - as_display.fmt(f) - } -} - -/// Constraint solver for register allocation around a single instruction. -/// -/// Start by programming in the instruction constraints. -/// -/// 1. Initialize the solver by calling `reset()` with the set of allocatable registers before the -/// instruction. -/// 2. Program the input side constraints: Call `reassign_in()` for all fixed register constraints, -/// and `add_var()` for any input operands whose constraints are not already satisfied. -/// 3. Check for conflicts between fixed input assignments and existing live values by calling -/// `has_fixed_input_conflicts()`. Resolve any conflicts by calling `add_var()` with the -/// conflicting values. -/// 4. Prepare for adding output side constraints by calling `inputs_done()`. -/// 5. Add any killed register values that no longer cause interference on the output side by -/// calling `add_kill()`. -/// 6. Program the output side constraints: Call `add_fixed_output()` for all fixed register -/// constraints and `add_def()` for free defines. Resolve fixed output conflicts by calling -/// `add_through_var()`. -/// -pub struct Solver { - /// Register reassignments that are required or decided as part of a full solution. - /// This includes identity assignments for values that are already in the correct fixed - /// register. - assignments: SparseMap, - - /// Variables are the values that should be reassigned as part of a solution. - /// Values with fixed register constraints are not considered variables. They are represented - /// in the `assignments` vector if necessary. - vars: Vec, - - /// Are we finished adding input-side constraints? This changes the meaning of the `regs_in` - /// and `regs_out` register sets. - inputs_done: bool, - - /// Available registers on the input side of the instruction. - /// - /// While we're adding input constraints (`!inputs_done`): - /// - /// - Live values on the input side are marked as unavailable. - /// - The 'from' registers of fixed input reassignments are marked as available as they are - /// added. - /// - Input-side variables are marked as available. - /// - /// After finishing input constraints (`inputs_done`): - /// - /// - Live values on the input side are marked as unavailable. - /// - The 'to' registers of fixed input reassignments are marked as unavailable. - /// - Input-side variables are marked as available. - /// - regs_in: RegisterSet, - - /// Available registers on the output side of the instruction / fixed input scratch space. - /// - /// While we're adding input constraints (`!inputs_done`): - /// - /// - The 'to' registers of fixed input reassignments are marked as unavailable. - /// - /// After finishing input constraints (`inputs_done`): - /// - /// - Live-through values are marked as unavailable. - /// - Fixed output assignments are marked as unavailable. - /// - Live-through variables are marked as available. - /// - regs_out: RegisterSet, - - /// List of register moves scheduled to avoid conflicts. - /// - /// This is used as working space by the `schedule_moves()` function. - moves: Vec, - - /// List of pending fill moves. This is only used during `schedule_moves()`. - fills: Vec, -} - -/// Interface for programming the constraints into the solver. -impl Solver { - /// Create a new empty solver. - pub fn new() -> Self { - Self { - assignments: SparseMap::new(), - vars: Vec::new(), - inputs_done: false, - regs_in: RegisterSet::new(), - regs_out: RegisterSet::new(), - moves: Vec::new(), - fills: Vec::new(), - } - } - - /// Clear all data structures in this coloring pass. - pub fn clear(&mut self) { - self.assignments.clear(); - self.vars.clear(); - self.inputs_done = false; - self.regs_in = RegisterSet::new(); - self.regs_out = RegisterSet::new(); - self.moves.clear(); - self.fills.clear(); - } - - /// Reset the solver state and prepare solving for a new instruction with an initial set of - /// allocatable registers. - /// - /// The `regs` set is the allocatable registers before any reassignments are applied. - pub fn reset(&mut self, regs: &RegisterSet) { - self.assignments.clear(); - self.vars.clear(); - self.inputs_done = false; - self.regs_in = regs.clone(); - // Used for tracking fixed input assignments while `!inputs_done`: - self.regs_out = RegisterSet::new(); - self.moves.clear(); - self.fills.clear(); - } - - /// Add a fixed input reassignment of `value`. - /// - /// This means that `value` must be assigned to `to` and can't become a variable. Call with - /// `from == to` to ensure that `value` is not reassigned from its existing register location. - /// - /// In either case, `to` will not be available for variables on the input side of the - /// instruction. - pub fn reassign_in(&mut self, value: Value, rc: RegClass, from: RegUnit, to: RegUnit) { - log::trace!( - "reassign_in({}:{}, {} -> {})", - value, - rc, - rc.info.display_regunit(from), - rc.info.display_regunit(to) - ); - debug_assert!(!self.inputs_done); - if self.regs_in.is_avail(rc, from) { - // It looks like `value` was already removed from the register set. It must have been - // added as a variable previously. A fixed constraint beats a variable, so convert it. - if let Some(idx) = self.vars.iter().position(|v| v.value == value) { - let v = self.vars.remove(idx); - log::trace!("-> converting variable {} to a fixed constraint", v); - // The spiller is responsible for ensuring that all constraints on the uses of a - // value are compatible. - debug_assert!( - v.constraint.contains(to), - "Incompatible constraints for {}", - value - ); - } else { - panic!("Invalid from register for fixed {} constraint", value); - } - } - self.regs_in.free(rc, from); - self.regs_out.take(rc, to); - self.assignments.insert(Assignment { - value, - rc, - from, - to, - }); - } - - /// Add a variable representing an input side value with an existing register assignment. - /// - /// A variable is a value that should be reassigned to something in the `constraint` register - /// class. - /// - /// It is assumed initially that the value is also live on the output side of the instruction. - /// This can be changed by calling to `add_kill()`. - /// - /// This function can only be used before calling `inputs_done()`. Afterwards, more input-side - /// variables can be added by calling `add_killed_var()` and `add_through_var()` - pub fn add_var(&mut self, value: Value, constraint: RegClass, from: RegUnit) { - log::trace!( - "add_var({}:{}, from={})", - value, - constraint, - constraint.info.display_regunit(from) - ); - debug_assert!(!self.inputs_done); - self.add_live_var(value, constraint, from, true); - } - - /// Add an extra input-side variable representing a value that is killed by the current - /// instruction. - /// - /// This function should be called after `inputs_done()` only. Use `add_var()` before. - pub fn add_killed_var(&mut self, value: Value, rc: RegClass, from: RegUnit) { - log::trace!( - "add_killed_var({}:{}, from={})", - value, - rc, - rc.info.display_regunit(from) - ); - debug_assert!(self.inputs_done); - self.add_live_var(value, rc, from, false); - } - - /// Add an extra input-side variable representing a value that is live through the current - /// instruction. - /// - /// This function should be called after `inputs_done()` only. Use `add_var()` before. - pub fn add_through_var(&mut self, value: Value, rc: RegClass, from: RegUnit) { - log::trace!( - "add_through_var({}:{}, from={})", - value, - rc, - rc.info.display_regunit(from) - ); - debug_assert!(self.inputs_done); - self.add_live_var(value, rc, from, true); - } - - /// Shared code for `add_var`, `add_killed_var`, and `add_through_var`. - /// - /// Add a variable that is live before the instruction, and possibly live through. Merge - /// constraints if the value has already been added as a variable or fixed assignment. - fn add_live_var(&mut self, value: Value, rc: RegClass, from: RegUnit, live_through: bool) { - // Check for existing entries for this value. - if !self.can_add_var(rc, from) { - // There could be an existing variable entry. - if let Some(v) = self.vars.iter_mut().find(|v| v.value == value) { - // We have an existing variable entry for `value`. Combine the constraints. - if let Some(rc) = v.constraint.intersect(rc) { - log::trace!("-> combining constraint with {} yields {}", v, rc); - v.constraint = rc; - return; - } else { - // The spiller should have made sure the same value is not used with disjoint - // constraints. - panic!("Incompatible constraints: {} + {}", rc, v) - } - } - - // No variable, then it must be a fixed reassignment. - if let Some(a) = self.assignments.get(value) { - log::trace!("-> already fixed assignment {}", a); - debug_assert!(rc.contains(a.to), "Incompatible constraints for {}", value); - return; - } - - log::trace!("{}", self); - panic!("Wrong from register for {}", value); - } - - let new_var = Variable::new_live(value, rc, from, live_through); - log::trace!("-> new var: {}", new_var); - - self.regs_in.free(rc, from); - if self.inputs_done && live_through { - self.regs_out.free(rc, from); - } - self.vars.push(new_var); - } - - /// Check for conflicts between fixed input assignments and existing live values. - /// - /// Returns true if one of the live values conflicts with a fixed input assignment. Such a - /// conflicting value must be turned into a variable. - pub fn has_fixed_input_conflicts(&self) -> bool { - debug_assert!(!self.inputs_done); - // The `from` side of the fixed input diversions are taken from `regs_out`. - self.regs_out.interferes_with(&self.regs_in) - } - - /// Check if `rc, reg` specifically conflicts with the fixed input assignments. - pub fn is_fixed_input_conflict(&self, rc: RegClass, reg: RegUnit) -> bool { - debug_assert!(!self.inputs_done); - !self.regs_out.is_avail(rc, reg) - } - - /// Finish adding input side constraints. - /// - /// Call this method to indicate that there will be no more fixed input reassignments added - /// and prepare for the output side constraints. - pub fn inputs_done(&mut self) { - debug_assert!(!self.has_fixed_input_conflicts()); - - // At this point, `regs_out` contains the `to` side of the input reassignments, and the - // `from` side has already been marked as available in `regs_in`. - // - // Remove the `to` assignments from `regs_in` so it now indicates the registers available - // to variables at the input side. - self.regs_in.intersect(&self.regs_out); - - // The meaning of `regs_out` now changes completely to indicate the registers available to - // variables on the output side. - // The initial mask will be modified by `add_kill()` and `add_fixed_output()`. - self.regs_out = self.regs_in.clone(); - - // Now we can't add more fixed input assignments, but `add_var()` is still allowed. - self.inputs_done = true; - } - - /// Record that an input register value is killed by the instruction. - /// - /// Even if a fixed reassignment has been added for the value, the `reg` argument should be the - /// original location before the reassignments. - /// - /// This means that the register is available on the output side. - pub fn add_kill(&mut self, value: Value, rc: RegClass, reg: RegUnit) { - debug_assert!(self.inputs_done); - - // If a fixed assignment is killed, the `to` register becomes available on the output side. - if let Some(a) = self.assignments.get(value) { - debug_assert_eq!(a.from, reg); - self.regs_out.free(a.rc, a.to); - return; - } - - // It's also possible that a variable is killed. That means it doesn't need to satisfy - // interference constraints on the output side. - // Variables representing tied operands will get their `is_output` flag set again later. - if let Some(v) = self.vars.iter_mut().find(|v| v.value == value) { - debug_assert!(v.is_input); - v.is_output = false; - return; - } - - // Alright, this is just a boring value being killed by the instruction. Just reclaim - // the assigned register. - self.regs_out.free(rc, reg); - } - - /// Record that an input register is tied to an output register. - /// - /// It is assumed that `add_kill` was called previously with the same arguments. - /// - /// The output value that must have the same register as the input value is not recorded in the - /// solver. - /// - /// If the value has already been assigned to a fixed register, return that. - pub fn add_tied_input( - &mut self, - value: Value, - rc: RegClass, - reg: RegUnit, - is_global: bool, - ) -> Option { - debug_assert!(self.inputs_done); - - // If a fixed assignment is tied, the `to` register is not available on the output side. - if let Some(a) = self.assignments.get(value) { - debug_assert_eq!(a.from, reg); - self.regs_out.take(a.rc, a.to); - return Some(a.to); - } - - // Check if a variable was created. - if let Some(v) = self.vars.iter_mut().find(|v| v.value == value) { - debug_assert!(v.is_input); - v.is_output = true; - v.is_global = is_global; - return None; - } - - // No variable exists for `value` because its constraints are already satisfied. - // However, if the tied output value has a global live range, we must create a variable to - // avoid global interference too. - if is_global { - let mut new_var = Variable::new_live(value, rc, reg, true); - new_var.is_global = true; - log::trace!("add_tied_input: new tied-global value: {}", new_var); - self.vars.push(new_var); - self.regs_in.free(rc, reg); - } else { - self.regs_out.take(rc, reg); - } - - None - } - - /// Add a fixed output assignment. - /// - /// This means that `to` will not be available for variables on the output side of the - /// instruction. - /// - /// Returns `false` if a live value conflicts with `to`, so it couldn't be added. Find the - /// conflicting live-through value and turn it into a variable before calling this method - /// again. - #[allow(dead_code)] - pub fn add_fixed_output(&mut self, rc: RegClass, reg: RegUnit) -> bool { - debug_assert!(self.inputs_done); - if self.regs_out.is_avail(rc, reg) { - self.regs_out.take(rc, reg); - true - } else { - false - } - } - - /// Add a defined output value. - /// - /// This is similar to `add_var`, except the value doesn't have a prior register assignment. - pub fn add_def(&mut self, value: Value, constraint: RegClass, is_global: bool) { - debug_assert!(self.inputs_done); - self.vars - .push(Variable::new_def(value, constraint, is_global)); - } - - /// Clear the `is_global` flag on all solver variables. - /// - /// This is used when there are not enough global registers available, and global defines have - /// to be replaced with local defines followed by a copy. - pub fn clear_all_global_flags(&mut self) { - for v in &mut self.vars { - v.is_global = false; - } - } -} - -/// Error reported when the solver fails to find a solution with the current constraints. -/// -/// When no solution can be found, the error indicates how constraints could be loosened to help. -pub enum SolverError { - /// There are not available registers in the given register class. - /// - /// This should be resolved by turning live-through values into variables so they can be moved - /// out of the way. - Divert(RegClass), - - /// There are insufficient available registers in the global set to assign an `is_global` - /// variable with the given value. - /// - /// This should be resolved by converting the variable to a local one. - Global(Value), -} - -/// Interface for searching for a solution. -impl Solver { - /// Try a quick-and-dirty solution. - /// - /// This is expected to succeed for most instructions since the constraint problem is almost - /// always trivial. - /// - /// Returns `Ok(regs)` if a solution was found. - pub fn quick_solve( - &mut self, - global_regs: &RegisterSet, - is_reload: bool, - ) -> Result { - self.find_solution(global_regs, is_reload) - } - - /// Try harder to find a solution. - /// - /// Call this method after `quick_solve()` fails. - /// - /// This may return an error with a register class that has run out of registers. If registers - /// can be freed up in the starving class, this method can be called again after adding - /// variables for the freed registers. - pub fn real_solve( - &mut self, - global_regs: &RegisterSet, - is_reload: bool, - ) -> Result { - // Compute domain sizes for all the variables given the current register sets. - for v in &mut self.vars { - let d = v.iter(&self.regs_in, &self.regs_out, global_regs).len(); - v.domain = cmp::min(d, u16::MAX as usize) as u16; - } - - // Solve for vars with small domains first to increase the chance of finding a solution. - // - // Also consider this case: - // - // v0: out, global - // v1: in - // v2: in+out - // - // If only %r0 and %r1 are available, the global constraint may cause us to assign: - // - // v0 -> %r1 - // v1 -> %r0 - // v2 -> ! - // - // Usually in+out variables will have a smaller domain, but in the above case the domain - // size is the same, so we also prioritize in+out variables. - // - // Include the reversed previous solution for this variable partly as a stable tie breaker, - // partly to shake things up on a second attempt. - // - // Use the `from` register and value number as a tie breaker to get a stable sort. - self.vars.sort_unstable_by_key(|v| { - ( - v.domain, - !(v.is_input && v.is_output), - !v.solution, - v.from.unwrap_or(0), - v.value, - ) - }); - - log::trace!("real_solve for {}", self); - self.find_solution(global_regs, is_reload) - } - - /// Search for a solution with the current list of variables. - /// - /// If a solution was found, returns `Ok(regs)` with the set of available registers on the - /// output side after the solution. If no solution could be found, returns `Err(rc)` with the - /// constraint register class that needs more available registers. - fn find_solution( - &mut self, - global_regs: &RegisterSet, - is_reload: bool, - ) -> Result { - // Available registers on the input and output sides respectively. - let mut iregs = self.regs_in.clone(); - let mut oregs = self.regs_out.clone(); - let mut gregs = global_regs.clone(); - - for v in &mut self.vars { - let rc = v.constraint; - - // Decide which register to assign. In order to try and keep registers holding - // reloaded values separate from all other registers to the extent possible, we choose - // the first available register in the normal case, but the last available one in the - // case of a reload. See "A side note on register choice heuristics" in - // src/redundant_reload_remover.rs for further details. - let mut reg_set_iter = v.iter(&iregs, &oregs, &gregs); - let maybe_reg = if is_reload { - reg_set_iter.rnext() - } else { - reg_set_iter.next() - }; - - let reg = match maybe_reg { - Some(reg) => reg, - None => { - // If `v` must avoid global interference, there is not point in requesting - // live registers be diverted. We need to make it a non-global value. - if v.is_global && gregs.iter(rc).next().is_none() { - return Err(SolverError::Global(v.value)); - } - return Err(SolverError::Divert(rc)); - } - }; - - v.solution = reg; - if v.is_input { - iregs.take(rc, reg); - } - if v.is_output { - oregs.take(rc, reg); - } - if v.is_global { - gregs.take(rc, reg); - } - } - - Ok(oregs) - } - - /// Get all the variables. - pub fn vars(&self) -> &[Variable] { - &self.vars - } - - /// Check if `value` can be added as a variable to help find a solution. - pub fn can_add_var(&mut self, constraint: RegClass, from: RegUnit) -> bool { - !self.regs_in.is_avail(constraint, from) - && !self.vars.iter().any(|var| var.from == Some(from)) - } -} - -/// Interface for working with parallel copies once a solution has been found. -impl Solver { - /// Collect all the register moves we need to execute. - fn collect_moves(&mut self) { - self.moves.clear(); - - // Collect moves from the chosen solution for all non-define variables. - for v in &self.vars { - if let Some(from) = v.from { - // Omit variable solutions that don't require the value to be moved. - if from != v.solution { - self.moves.push(Move::Reg { - value: v.value, - from, - to: v.solution, - rc: v.constraint, - }); - } - } - } - - // Convert all of the fixed register assignments into moves, but omit the ones that are - // already in the right register. - self.moves - .extend(self.assignments.values().filter_map(Move::with_assignment)); - - if !self.moves.is_empty() { - log::trace!("collect_moves: {}", DisplayList(&self.moves)); - } - } - - /// Try to schedule a sequence of `regmove` instructions that will shuffle registers into - /// place. - /// - /// This may require the use of additional available registers, and it can fail if no - /// additional registers are available. - /// - /// TODO: Handle failure by generating a sequence of register swaps, or by temporarily spilling - /// a register. - /// - /// Returns the number of spills that had to be emitted. - pub fn schedule_moves(&mut self, regs: &RegisterSet) -> usize { - self.collect_moves(); - debug_assert!(self.fills.is_empty()); - - let mut num_spill_slots = 0; - let mut avail = regs.clone(); - let mut i = 0; - while i < self.moves.len() + self.fills.len() { - // Don't even look at the fills until we've spent all the moves. Deferring these lets - // us potentially reuse the claimed registers to resolve multiple cycles. - if i >= self.moves.len() { - self.moves.append(&mut self.fills); - } - - // Find the first move that can be executed now. - if let Some(j) = self.moves[i..].iter().position(|m| match m.to_reg() { - Some((rc, reg)) => avail.is_avail(rc, reg), - None => true, - }) { - // This move can be executed now. - self.moves.swap(i, i + j); - let m = &self.moves[i]; - if let Some((rc, reg)) = m.to_reg() { - avail.take(rc, reg); - } - if let Some((rc, reg)) = m.from_reg() { - avail.free(rc, reg); - } - log::trace!("move #{}: {}", i, m); - i += 1; - continue; - } - - // When we get here, none of the `moves[i..]` can be executed. This means there are - // only cycles remaining. The cycles can be broken in a few ways: - // - // 1. Grab an available register and use it to break a cycle. - // 2. Move a value temporarily into a stack slot instead of a register. - // 3. Use swap instructions. - // - // TODO: So far we only implement 1 and 2. - - // Pick an assignment with the largest possible width. This is more likely to break up - // a cycle than an assignment with fewer register units. For example, it may be - // necessary to move two arm32 S-registers out of the way before a D-register can move - // into place. - // - // We use `min_by_key` and `!` instead of `max_by_key` because it preserves the - // existing order of moves with the same width. - let j = self.moves[i..] - .iter() - .enumerate() - .min_by_key(|&(_, m)| !m.rc().width) - .unwrap() - .0; - self.moves.swap(i, i + j); - - // Check the top-level register class for an available register. It is an axiom of the - // register allocator that we can move between all registers in the top-level RC. - let m = self.moves[i].clone(); - let toprc = m.rc().toprc(); - if let Some(reg) = avail.iter(toprc).next() { - log::trace!( - "breaking cycle at {} with available {} register {}", - m, - toprc, - toprc.info.display_regunit(reg) - ); - - // Alter the move so it is guaranteed to be picked up when we loop. It is important - // that this move is scheduled immediately, otherwise we would have multiple moves - // of the same value, and they would not be commutable. - let old_to_reg = self.moves[i].replace_to_reg(reg); - // Append a fixup move so we end up in the right place. This move will be scheduled - // later. That's ok because it is the single remaining move of `m.value` after the - // next iteration. - self.moves.push(Move::Reg { - value: m.value(), - rc: toprc, - from: reg, - to: old_to_reg, - }); - // TODO: What if allocating an extra register is not enough to break a cycle? This - // can happen when there are registers of different widths in a cycle. For ARM, we - // may have to move two S-registers out of the way before we can resolve a cycle - // involving a D-register. - continue; - } - - // It was impossible to free up a register in toprc, so use an emergency spill slot as - // a last resort. - let slot = num_spill_slots; - num_spill_slots += 1; - log::trace!("breaking cycle at {} with slot {}", m, slot); - let old_to_reg = self.moves[i].change_to_spill(slot); - self.fills.push(Move::Fill { - value: m.value(), - rc: toprc, - from_slot: slot, - to: old_to_reg, - }); - } - - num_spill_slots - } - - /// Borrow the scheduled set of register moves that was computed by `schedule_moves()`. - pub fn moves(&self) -> &[Move] { - &self.moves - } -} - -impl fmt::Display for Solver { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let reginfo = self.vars.first().map(|v| v.constraint.info); - writeln!(f, "Solver {{ inputs_done: {},", self.inputs_done)?; - writeln!(f, " in: {}", self.regs_in.display(reginfo))?; - writeln!(f, " out: {}", self.regs_out.display(reginfo))?; - writeln!( - f, - " assignments: {}", - DisplayList(self.assignments.as_slice()) - )?; - writeln!(f, " vars: {}", DisplayList(&self.vars))?; - writeln!(f, " moves: {}", DisplayList(&self.moves))?; - writeln!(f, "}}") - } -} - -#[cfg(test)] -#[cfg(feature = "arm32")] -mod tests { - use super::{Move, Solver}; - use crate::entity::EntityRef; - use crate::ir::Value; - use crate::isa::registers::{RegBank, RegClassData}; - use crate::isa::{RegClass, RegInfo, RegUnit}; - use crate::regalloc::RegisterSet; - use core::borrow::Borrow; - - // Arm32 `TargetIsa` is now `TargetIsaAdapter`, which does not hold any info - // about registers, so we directly access `INFO` from registers-arm32.rs. - include!(concat!(env!("OUT_DIR"), "/registers-arm32.rs")); - - // Get a register class by name. - fn rc_by_name(reginfo: &RegInfo, name: &str) -> RegClass { - reginfo - .classes - .iter() - .find(|rc| rc.name == name) - .expect("Can't find named register class.") - } - - // Construct a register move. - fn mov(value: Value, rc: RegClass, from: RegUnit, to: RegUnit) -> Move { - Move::Reg { - value, - rc, - from, - to, - } - } - - fn spill(value: Value, rc: RegClass, from: RegUnit, to_slot: usize) -> Move { - Move::Spill { - value, - rc, - from, - to_slot, - } - } - - fn fill(value: Value, rc: RegClass, from_slot: usize, to: RegUnit) -> Move { - Move::Fill { - value, - rc, - from_slot, - to, - } - } - - #[test] - fn simple_moves() { - let reginfo = INFO.borrow(); - let gpr = rc_by_name(®info, "GPR"); - let r0 = gpr.unit(0); - let r1 = gpr.unit(1); - let r2 = gpr.unit(2); - let gregs = RegisterSet::new(); - let mut regs = RegisterSet::new(); - let mut solver = Solver::new(); - let v10 = Value::new(10); - let v11 = Value::new(11); - - // As simple as it gets: Value is in r1, we want r0. - regs.take(gpr, r1); - solver.reset(®s); - solver.reassign_in(v10, gpr, r1, r0); - solver.inputs_done(); - assert!(solver.quick_solve(&gregs, false).is_ok()); - assert_eq!(solver.schedule_moves(®s), 0); - assert_eq!(solver.moves(), &[mov(v10, gpr, r1, r0)]); - - // A bit harder: r0, r1 need to go in r1, r2. - regs.take(gpr, r0); - solver.reset(®s); - solver.reassign_in(v10, gpr, r0, r1); - solver.reassign_in(v11, gpr, r1, r2); - solver.inputs_done(); - assert!(solver.quick_solve(&gregs, false).is_ok()); - assert_eq!(solver.schedule_moves(®s), 0); - assert_eq!( - solver.moves(), - &[mov(v11, gpr, r1, r2), mov(v10, gpr, r0, r1)] - ); - - // Swap r0 and r1 in three moves using r2 as a scratch. - solver.reset(®s); - solver.reassign_in(v10, gpr, r0, r1); - solver.reassign_in(v11, gpr, r1, r0); - solver.inputs_done(); - assert!(solver.quick_solve(&gregs, false).is_ok()); - assert_eq!(solver.schedule_moves(®s), 0); - assert_eq!( - solver.moves(), - &[ - mov(v10, gpr, r0, r2), - mov(v11, gpr, r1, r0), - mov(v10, gpr, r2, r1), - ] - ); - } - - #[test] - fn harder_move_cycles() { - let reginfo = INFO.borrow(); - let s = rc_by_name(®info, "S"); - let d = rc_by_name(®info, "D"); - let d0 = d.unit(0); - let d1 = d.unit(1); - let d2 = d.unit(2); - let s0 = s.unit(0); - let s1 = s.unit(1); - let s2 = s.unit(2); - let s3 = s.unit(3); - let gregs = RegisterSet::new(); - let mut regs = RegisterSet::new(); - let mut solver = Solver::new(); - let v10 = Value::new(10); - let v11 = Value::new(11); - let v12 = Value::new(12); - - // Not a simple cycle: Swap d0 <-> (s2, s3) - regs.take(d, d0); - regs.take(d, d1); - solver.reset(®s); - solver.reassign_in(v10, d, d0, d1); - solver.reassign_in(v11, s, s2, s0); - solver.reassign_in(v12, s, s3, s1); - solver.inputs_done(); - assert!(solver.quick_solve(&gregs, false).is_ok()); - assert_eq!(solver.schedule_moves(®s), 0); - assert_eq!( - solver.moves(), - &[ - mov(v10, d, d0, d2), - mov(v11, s, s2, s0), - mov(v12, s, s3, s1), - mov(v10, d, d2, d1), - ] - ); - - // Same problem in the other direction: Swap (s0, s1) <-> d1. - // - // If we divert the moves in order, we will need to allocate *two* temporary S registers. A - // trivial algorithm might assume that allocating a single temp is enough. - solver.reset(®s); - solver.reassign_in(v11, s, s0, s2); - solver.reassign_in(v12, s, s1, s3); - solver.reassign_in(v10, d, d1, d0); - solver.inputs_done(); - assert!(solver.quick_solve(&gregs, false).is_ok()); - assert_eq!(solver.schedule_moves(®s), 0); - assert_eq!( - solver.moves(), - &[ - mov(v10, d, d1, d2), - mov(v12, s, s1, s3), - mov(v11, s, s0, s2), - mov(v10, d, d2, d0), - ] - ); - } - - #[test] - fn emergency_spill() { - let reginfo = INFO.borrow(); - let gpr = rc_by_name(®info, "GPR"); - let r0 = gpr.unit(0); - let r1 = gpr.unit(1); - let r2 = gpr.unit(2); - let r3 = gpr.unit(3); - let r4 = gpr.unit(4); - let r5 = gpr.unit(5); - let gregs = RegisterSet::new(); - let mut regs = RegisterSet::new(); - let mut solver = Solver::new(); - let v10 = Value::new(10); - let v11 = Value::new(11); - let v12 = Value::new(12); - let v13 = Value::new(13); - let v14 = Value::new(14); - let v15 = Value::new(15); - - // Claim r0--r2 and r3--r15 for other values. - for i in 0..16 { - regs.take(gpr, gpr.unit(i)); - } - - // Request a permutation cycle. - solver.reset(®s); - solver.reassign_in(v10, gpr, r0, r1); - solver.reassign_in(v11, gpr, r1, r2); - solver.reassign_in(v12, gpr, r2, r0); - solver.inputs_done(); - assert!(solver.quick_solve(&gregs, false).is_ok()); - assert_eq!(solver.schedule_moves(®s), 1); - assert_eq!( - solver.moves(), - &[ - spill(v10, gpr, r0, 0), - mov(v12, gpr, r2, r0), - mov(v11, gpr, r1, r2), - fill(v10, gpr, 0, r1), - ] - ); - - // Two cycles should only require a single spill. - solver.reset(®s); - // Cycle 1. - solver.reassign_in(v10, gpr, r0, r1); - solver.reassign_in(v11, gpr, r1, r2); - solver.reassign_in(v12, gpr, r2, r0); - // Cycle 2. - solver.reassign_in(v13, gpr, r3, r4); - solver.reassign_in(v14, gpr, r4, r5); - solver.reassign_in(v15, gpr, r5, r3); - - solver.inputs_done(); - assert!(solver.quick_solve(&gregs, false).is_ok()); - // We resolve two cycles with one spill. - assert_eq!(solver.schedule_moves(®s), 1); - assert_eq!( - solver.moves(), - &[ - spill(v10, gpr, r0, 0), - mov(v12, gpr, r2, r0), - mov(v11, gpr, r1, r2), - mov(v13, gpr, r3, r1), // Use available r1 to break cycle 2. - mov(v15, gpr, r5, r3), - mov(v14, gpr, r4, r5), - mov(v13, gpr, r1, r4), - fill(v10, gpr, 0, r1), // Finally complete cycle 1. - ] - ); - } -} diff --git a/cranelift/codegen/src/regalloc/spilling.rs b/cranelift/codegen/src/regalloc/spilling.rs deleted file mode 100644 index e44502f0a6..0000000000 --- a/cranelift/codegen/src/regalloc/spilling.rs +++ /dev/null @@ -1,638 +0,0 @@ -//! Spilling pass. -//! -//! The spilling pass is the first to run after the liveness analysis. Its primary function is to -//! ensure that the register pressure never exceeds the number of available registers by moving -//! some SSA values to spill slots on the stack. This is encoded in the affinity of the value's -//! live range. -//! -//! Some instruction operand constraints may require additional registers to resolve. Since this -//! can cause spilling, the spilling pass is also responsible for resolving those constraints by -//! inserting copies. The extra constraints are: -//! -//! 1. A value used by a tied operand must be killed by the instruction. This is resolved by -//! inserting a copy to a temporary value when necessary. -//! 2. When the same value is used more than once by an instruction, the operand constraints must -//! be compatible. Otherwise, the value must be copied into a new register for some of the -//! operands. - -use crate::cursor::{Cursor, EncCursor}; -use crate::dominator_tree::DominatorTree; -use crate::ir::{ArgumentLoc, Block, Function, Inst, InstBuilder, SigRef, Value, ValueLoc}; -use crate::isa::registers::{RegClass, RegClassIndex, RegClassMask, RegUnit}; -use crate::isa::{ConstraintKind, EncInfo, RecipeConstraints, RegInfo, TargetIsa}; -use crate::regalloc::affinity::Affinity; -use crate::regalloc::live_value_tracker::{LiveValue, LiveValueTracker}; -use crate::regalloc::liveness::Liveness; -use crate::regalloc::pressure::Pressure; -use crate::regalloc::virtregs::VirtRegs; -use crate::timing; -use crate::topo_order::TopoOrder; -use alloc::vec::Vec; -use core::fmt; - -/// Return a top-level register class which contains `unit`. -fn toprc_containing_regunit(unit: RegUnit, reginfo: &RegInfo) -> RegClass { - let bank = reginfo.bank_containing_regunit(unit).unwrap(); - reginfo.classes[bank.first_toprc..(bank.first_toprc + bank.num_toprcs)] - .iter() - .find(|&rc| rc.contains(unit)) - .expect("reg unit should be in a toprc") -} - -/// Persistent data structures for the spilling pass. -pub struct Spilling { - spills: Vec, - reg_uses: Vec, -} - -/// Context data structure that gets instantiated once per pass. -struct Context<'a> { - // Current instruction as well as reference to function and ISA. - cur: EncCursor<'a>, - - // Cached ISA information. - reginfo: RegInfo, - encinfo: EncInfo, - - // References to contextual data structures we need. - domtree: &'a DominatorTree, - liveness: &'a mut Liveness, - virtregs: &'a VirtRegs, - topo: &'a mut TopoOrder, - - // Current register pressure. - pressure: Pressure, - - // Values spilled for the current instruction. These values have already been removed from the - // pressure tracker, but they are still present in the live value tracker and their affinity - // hasn't been changed yet. - spills: &'a mut Vec, - - // Uses of register values in the current instruction. - reg_uses: &'a mut Vec, -} - -impl Spilling { - /// Create a new spilling data structure. - pub fn new() -> Self { - Self { - spills: Vec::new(), - reg_uses: Vec::new(), - } - } - - /// Clear all data structures in this spilling pass. - pub fn clear(&mut self) { - self.spills.clear(); - self.reg_uses.clear(); - } - - /// Run the spilling algorithm over `func`. - pub fn run( - &mut self, - isa: &dyn TargetIsa, - func: &mut Function, - domtree: &DominatorTree, - liveness: &mut Liveness, - virtregs: &VirtRegs, - topo: &mut TopoOrder, - tracker: &mut LiveValueTracker, - ) { - let _tt = timing::ra_spilling(); - log::trace!("Spilling for:\n{}", func.display(isa)); - let reginfo = isa.register_info(); - let usable_regs = isa.allocatable_registers(func); - let mut ctx = Context { - cur: EncCursor::new(func, isa), - reginfo: isa.register_info(), - encinfo: isa.encoding_info(), - domtree, - liveness, - virtregs, - topo, - pressure: Pressure::new(®info, &usable_regs), - spills: &mut self.spills, - reg_uses: &mut self.reg_uses, - }; - ctx.run(tracker) - } -} - -impl<'a> Context<'a> { - fn run(&mut self, tracker: &mut LiveValueTracker) { - self.topo.reset(self.cur.func.layout.blocks()); - while let Some(block) = self.topo.next(&self.cur.func.layout, self.domtree) { - self.visit_block(block, tracker); - } - } - - fn visit_block(&mut self, block: Block, tracker: &mut LiveValueTracker) { - log::trace!("Spilling {}:", block); - self.cur.goto_top(block); - self.visit_block_header(block, tracker); - tracker.drop_dead_params(); - self.process_spills(tracker); - - while let Some(inst) = self.cur.next_inst() { - if !self.cur.func.dfg[inst].opcode().is_ghost() { - self.visit_inst(inst, block, tracker); - } else { - let (_throughs, kills) = tracker.process_ghost(inst); - self.free_regs(kills); - } - tracker.drop_dead(inst); - self.process_spills(tracker); - } - } - - // Take all live registers in `regs` from the pressure set. - // This doesn't cause any spilling, it is assumed there are enough registers. - fn take_live_regs(&mut self, regs: &[LiveValue]) { - for lv in regs { - if !lv.is_dead { - if let Affinity::Reg(rci) = lv.affinity { - let rc = self.reginfo.rc(rci); - self.pressure.take(rc); - } - } - } - } - - // Free all registers in `kills` from the pressure set. - fn free_regs(&mut self, kills: &[LiveValue]) { - for lv in kills { - if let Affinity::Reg(rci) = lv.affinity { - if !self.spills.contains(&lv.value) { - let rc = self.reginfo.rc(rci); - self.pressure.free(rc); - } - } - } - } - - // Free all dead registers in `regs` from the pressure set. - fn free_dead_regs(&mut self, regs: &[LiveValue]) { - for lv in regs { - if lv.is_dead { - if let Affinity::Reg(rci) = lv.affinity { - if !self.spills.contains(&lv.value) { - let rc = self.reginfo.rc(rci); - self.pressure.free(rc); - } - } - } - } - } - - fn visit_block_header(&mut self, block: Block, tracker: &mut LiveValueTracker) { - let (liveins, params) = tracker.block_top( - block, - &self.cur.func.dfg, - self.liveness, - &self.cur.func.layout, - self.domtree, - ); - - // Count the live-in registers. These should already fit in registers; they did at the - // dominator. - self.pressure.reset(); - self.take_live_regs(liveins); - - // A block can have an arbitrary (up to 2^16...) number of parameters, so they are not - // guaranteed to fit in registers. - for lv in params { - if let Affinity::Reg(rci) = lv.affinity { - let rc = self.reginfo.rc(rci); - 'try_take: while let Err(mask) = self.pressure.take_transient(rc) { - log::trace!("Need {} reg for block param {}", rc, lv.value); - match self.spill_candidate(mask, liveins) { - Some(cand) => { - log::trace!( - "Spilling live-in {} to make room for {} block param {}", - cand, - rc, - lv.value - ); - self.spill_reg(cand); - } - None => { - // We can't spill any of the live-in registers, so we have to spill an - // block argument. Since the current spill metric would consider all the - // block arguments equal, just spill the present register. - log::trace!("Spilling {} block argument {}", rc, lv.value); - - // Since `spill_reg` will free a register, add the current one here. - self.pressure.take(rc); - self.spill_reg(lv.value); - break 'try_take; - } - } - } - } - } - - // The transient pressure counts for the block arguments are accurate. Just preserve them. - self.pressure.preserve_transient(); - self.free_dead_regs(params); - } - - fn visit_inst(&mut self, inst: Inst, block: Block, tracker: &mut LiveValueTracker) { - log::trace!("Inst {}, {}", self.cur.display_inst(inst), self.pressure); - debug_assert_eq!(self.cur.current_inst(), Some(inst)); - debug_assert_eq!(self.cur.current_block(), Some(block)); - - let constraints = self - .encinfo - .operand_constraints(self.cur.func.encodings[inst]); - - // We may need to resolve register constraints if there are any noteworthy uses. - debug_assert!(self.reg_uses.is_empty()); - self.collect_reg_uses(inst, block, constraints); - - // Calls usually have fixed register uses. - let call_sig = self.cur.func.dfg.call_signature(inst); - if let Some(sig) = call_sig { - self.collect_abi_reg_uses(inst, sig); - } - - if !self.reg_uses.is_empty() { - self.process_reg_uses(inst, tracker); - } - - // Update the live value tracker with this instruction. - let (throughs, kills, defs) = tracker.process_inst(inst, &self.cur.func.dfg, self.liveness); - - // Remove kills from the pressure tracker. - self.free_regs(kills); - - // If inst is a call, spill all register values that are live across the call. - // This means that we don't currently take advantage of callee-saved registers. - // TODO: Be more sophisticated. - let opcode = self.cur.func.dfg[inst].opcode(); - if call_sig.is_some() || opcode.clobbers_all_regs() { - for lv in throughs { - if lv.affinity.is_reg() && !self.spills.contains(&lv.value) { - self.spill_reg(lv.value); - } - } - } - - // Make sure we have enough registers for the register defs. - // Dead defs are included here. They need a register too. - // No need to process call return values, they are in fixed registers. - if let Some(constraints) = constraints { - for op in constraints.outs { - if op.kind != ConstraintKind::Stack { - // Add register def to pressure, spill if needed. - while let Err(mask) = self.pressure.take_transient(op.regclass) { - log::trace!("Need {} reg from {} throughs", op.regclass, throughs.len()); - match self.spill_candidate(mask, throughs) { - Some(cand) => self.spill_reg(cand), - None => panic!( - "Ran out of {} registers for {}", - op.regclass, - self.cur.display_inst(inst) - ), - } - } - } - } - self.pressure.reset_transient(); - } - - // Restore pressure state, compute pressure with affinities from `defs`. - // Exclude dead defs. Includes call return values. - // This won't cause spilling. - self.take_live_regs(defs); - } - - // Collect register uses that are noteworthy in one of the following ways: - // - // 1. It's a fixed register constraint. - // 2. It's a use of a spilled value. - // 3. It's a tied register constraint and the value isn't killed. - // - // We are assuming here that if a value is used both by a fixed register operand and a register - // class operand, they two are compatible. We are also assuming that two register class - // operands are always compatible. - fn collect_reg_uses( - &mut self, - inst: Inst, - block: Block, - constraints: Option<&RecipeConstraints>, - ) { - let args = self.cur.func.dfg.inst_args(inst); - let num_fixed_ins = if let Some(constraints) = constraints { - for (idx, (op, &arg)) in constraints.ins.iter().zip(args).enumerate() { - let mut reguse = RegUse::new(arg, idx, op.regclass.into()); - let lr = &self.liveness[arg]; - match op.kind { - ConstraintKind::Stack => continue, - ConstraintKind::FixedReg(_) => reguse.fixed = true, - ConstraintKind::Tied(_) => { - // A tied operand must kill the used value. - reguse.tied = !lr.killed_at(inst, block, &self.cur.func.layout); - } - ConstraintKind::FixedTied(_) => { - reguse.fixed = true; - reguse.tied = !lr.killed_at(inst, block, &self.cur.func.layout); - } - ConstraintKind::Reg => {} - } - if lr.affinity.is_stack() { - reguse.spilled = true; - } - - // Only collect the interesting register uses. - if reguse.fixed || reguse.tied || reguse.spilled { - log::trace!(" reguse: {}", reguse); - self.reg_uses.push(reguse); - } - } - constraints.ins.len() - } else { - // A non-ghost instruction with no constraints can't have any - // fixed operands. - 0 - }; - - // Similarly, for return instructions, collect uses of ABI-defined - // return values. - if self.cur.func.dfg[inst].opcode().is_return() { - debug_assert_eq!( - self.cur.func.dfg.inst_variable_args(inst).len(), - self.cur.func.signature.returns.len(), - "The non-fixed arguments in a return should follow the function's signature." - ); - for (ret_idx, (ret, &arg)) in - self.cur.func.signature.returns.iter().zip(args).enumerate() - { - let idx = num_fixed_ins + ret_idx; - let unit = match ret.location { - ArgumentLoc::Unassigned => { - panic!("function return signature should be legalized") - } - ArgumentLoc::Reg(unit) => unit, - ArgumentLoc::Stack(_) => continue, - }; - let toprc = toprc_containing_regunit(unit, &self.reginfo); - let mut reguse = RegUse::new(arg, idx, toprc.into()); - reguse.fixed = true; - - log::trace!(" reguse: {}", reguse); - self.reg_uses.push(reguse); - } - } - } - - // Collect register uses from the ABI input constraints. - fn collect_abi_reg_uses(&mut self, inst: Inst, sig: SigRef) { - let num_fixed_args = self.cur.func.dfg[inst] - .opcode() - .constraints() - .num_fixed_value_arguments(); - let args = self.cur.func.dfg.inst_variable_args(inst); - for (idx, (abi, &arg)) in self.cur.func.dfg.signatures[sig] - .params - .iter() - .zip(args) - .enumerate() - { - if abi.location.is_reg() { - let (rci, spilled) = match self.liveness[arg].affinity { - Affinity::Reg(rci) => (rci, false), - Affinity::Stack => ( - self.cur.isa.regclass_for_abi_type(abi.value_type).into(), - true, - ), - Affinity::Unassigned => panic!("Missing affinity for {}", arg), - }; - let mut reguse = RegUse::new(arg, num_fixed_args + idx, rci); - reguse.fixed = true; - reguse.spilled = spilled; - self.reg_uses.push(reguse); - } - } - } - - // Process multiple register uses to resolve potential conflicts. - // - // Look for multiple uses of the same value in `self.reg_uses` and insert copies as necessary. - // Trigger spilling if any of the temporaries cause the register pressure to become too high. - // - // Leave `self.reg_uses` empty. - fn process_reg_uses(&mut self, inst: Inst, tracker: &LiveValueTracker) { - // We're looking for multiple uses of the same value, so start by sorting by value. The - // secondary `opidx` key makes it possible to use an unstable (non-allocating) sort. - self.reg_uses.sort_unstable_by_key(|u| (u.value, u.opidx)); - - self.cur.use_srcloc(inst); - for i in 0..self.reg_uses.len() { - let ru = self.reg_uses[i]; - - // Do we need to insert a copy for this use? - let need_copy = if ru.tied { - true - } else if ru.fixed { - // This is a fixed register use which doesn't necessarily require a copy. - // Make a copy only if this is not the first use of the value. - self.reg_uses - .get(i.wrapping_sub(1)) - .map_or(false, |ru2| ru2.value == ru.value) - } else { - false - }; - - if need_copy { - let copy = self.insert_copy(ru.value, ru.rci); - self.cur.func.dfg.inst_args_mut(inst)[ru.opidx as usize] = copy; - } - - // Even if we don't insert a copy, we may need to account for register pressure for the - // reload pass. - if need_copy || ru.spilled { - let rc = self.reginfo.rc(ru.rci); - while let Err(mask) = self.pressure.take_transient(rc) { - log::trace!("Copy of {} reg causes spill", rc); - // Spill a live register that is *not* used by the current instruction. - // Spilling a use wouldn't help. - // - // Do allow spilling of block arguments on branches. This is safe since we spill - // the whole virtual register which includes the matching block parameter value - // at the branch destination. It is also necessary since there can be - // arbitrarily many block arguments. - match { - let args = if self.cur.func.dfg[inst].opcode().is_branch() { - self.cur.func.dfg.inst_fixed_args(inst) - } else { - self.cur.func.dfg.inst_args(inst) - }; - self.spill_candidate( - mask, - tracker.live().iter().filter(|lv| !args.contains(&lv.value)), - ) - } { - Some(cand) => self.spill_reg(cand), - None => panic!( - "Ran out of {} registers when inserting copy before {}", - rc, - self.cur.display_inst(inst) - ), - } - } - } - } - self.pressure.reset_transient(); - self.reg_uses.clear() - } - - // Find a spill candidate from `candidates` whose top-level register class is in `mask`. - fn spill_candidate<'ii, II>(&self, mask: RegClassMask, candidates: II) -> Option - where - II: IntoIterator, - { - // Find the best viable spill candidate. - // - // The very simple strategy implemented here is to spill the value with the earliest def in - // the reverse post-order. This strategy depends on a good reload pass to generate good - // code. - // - // We know that all candidate defs dominate the current instruction, so one of them will - // dominate the others. That is the earliest def. - candidates - .into_iter() - .filter_map(|lv| { - // Viable candidates are registers in one of the `mask` classes, and not already in - // the spill set. - if let Affinity::Reg(rci) = lv.affinity { - let rc = self.reginfo.rc(rci); - if (mask & (1 << rc.toprc)) != 0 && !self.spills.contains(&lv.value) { - // Here, `lv` is a viable spill candidate. - return Some(lv.value); - } - } - None - }) - .min_by(|&a, &b| { - // Find the minimum candidate according to the RPO of their defs. - self.domtree.rpo_cmp( - self.cur.func.dfg.value_def(a), - self.cur.func.dfg.value_def(b), - &self.cur.func.layout, - ) - }) - } - - /// Spill `value` immediately by - /// - /// 1. Changing its affinity to `Stack` which marks the spill. - /// 2. Removing the value from the pressure tracker. - /// 3. Adding the value to `self.spills` for later reference by `process_spills`. - /// - /// Note that this does not update the cached affinity in the live value tracker. Call - /// `process_spills` to do that. - fn spill_reg(&mut self, value: Value) { - if let Affinity::Reg(rci) = self.liveness.spill(value) { - let rc = self.reginfo.rc(rci); - self.pressure.free(rc); - self.spills.push(value); - log::trace!("Spilled {}:{} -> {}", value, rc, self.pressure); - } else { - panic!("Cannot spill {} that was already on the stack", value); - } - - // Assign a spill slot for the whole virtual register. - let ss = self - .cur - .func - .stack_slots - .make_spill_slot(self.cur.func.dfg.value_type(value)); - for &v in self.virtregs.congruence_class(&value) { - self.liveness.spill(v); - self.cur.func.locations[v] = ValueLoc::Stack(ss); - } - } - - /// Process any pending spills in the `self.spills` vector. - /// - /// It is assumed that spills are removed from the pressure tracker immediately, see - /// `spill_reg` above. - /// - /// We also need to update the live range affinity and remove spilled values from the live - /// value tracker. - fn process_spills(&mut self, tracker: &mut LiveValueTracker) { - if !self.spills.is_empty() { - tracker.process_spills(|v| self.spills.contains(&v)); - self.spills.clear() - } - } - - /// Insert a `copy value` before the current instruction and give it a live range extending to - /// the current instruction. - /// - /// Returns the new local value created. - fn insert_copy(&mut self, value: Value, rci: RegClassIndex) -> Value { - let copy = self.cur.ins().copy(value); - let inst = self.cur.built_inst(); - - // Update live ranges. - self.liveness.create_dead(copy, inst, Affinity::Reg(rci)); - self.liveness.extend_locally( - copy, - self.cur.func.layout.pp_block(inst), - self.cur.current_inst().expect("must be at an instruction"), - &self.cur.func.layout, - ); - - copy - } -} - -/// Struct representing a register use of a value. -/// Used to detect multiple uses of the same value with incompatible register constraints. -#[derive(Clone, Copy)] -struct RegUse { - value: Value, - opidx: u16, - - // Register class required by the use. - rci: RegClassIndex, - - // A use with a fixed register constraint. - fixed: bool, - - // A register use of a spilled value. - spilled: bool, - - // A use with a tied register constraint *and* the used value is not killed. - tied: bool, -} - -impl RegUse { - fn new(value: Value, idx: usize, rci: RegClassIndex) -> Self { - Self { - value, - opidx: idx as u16, - rci, - fixed: false, - spilled: false, - tied: false, - } - } -} - -impl fmt::Display for RegUse { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}@op{}", self.value, self.opidx)?; - if self.fixed { - write!(f, "/fixed")?; - } - if self.spilled { - write!(f, "/spilled")?; - } - if self.tied { - write!(f, "/tied")?; - } - Ok(()) - } -} diff --git a/cranelift/codegen/src/stack_layout.rs b/cranelift/codegen/src/stack_layout.rs deleted file mode 100644 index 2430e8a643..0000000000 --- a/cranelift/codegen/src/stack_layout.rs +++ /dev/null @@ -1,241 +0,0 @@ -//! Computing stack layout. - -use crate::ir::stackslot::{StackOffset, StackSize, StackSlotKind}; -use crate::ir::{StackLayoutInfo, StackSlots}; -use crate::result::{CodegenError, CodegenResult}; -use core::cmp::{max, min}; - -/// Compute the stack frame layout. -/// -/// Determine the total size of this stack frame and assign offsets to all `Spill` and `Explicit` -/// stack slots. -/// -/// The total frame size will be a multiple of `alignment` which must be a power of two, unless the -/// function doesn't perform any call. -/// -/// Returns the total stack frame size which is also saved in `frame.frame_size`. -/// -/// If the stack frame is too big, returns an `ImplLimitExceeded` error. -pub fn layout_stack( - frame: &mut StackSlots, - is_leaf: bool, - alignment: StackSize, -) -> CodegenResult { - // Each object and the whole stack frame must fit in 2 GB such that any relative offset within - // the frame fits in a `StackOffset`. - let max_size = StackOffset::max_value() as StackSize; - debug_assert!(alignment.is_power_of_two() && alignment <= max_size); - - // We assume a stack that grows toward lower addresses as implemented by modern ISAs. The - // stack layout from high to low addresses will be: - // - // 1. incoming arguments. - // 2. spills + explicits + struct returns. - // 3. outgoing arguments. - // - // The incoming arguments can have both positive and negative offsets. A negative offset - // incoming arguments is usually the x86 return address pushed by the call instruction, but - // it can also be fixed stack slots pushed by an externally generated prologue. - // - // Both incoming and outgoing argument slots have fixed offsets that are treated as - // reserved zones by the layout algorithm. - // - // If a function only has incoming arguments and does not perform any calls, then it doesn't - // require the stack to be aligned. - - let mut incoming_min = 0; - let mut incoming_max = 0; - let mut outgoing_max = 0; - let mut min_align = alignment; - let mut must_align = !is_leaf; - - for slot in frame.values() { - if slot.size > max_size { - return Err(CodegenError::ImplLimitExceeded); - } - - match slot.kind { - StackSlotKind::IncomingArg => { - incoming_min = min(incoming_min, slot.offset.unwrap()); - incoming_max = max(incoming_max, slot.offset.unwrap() + slot.size as i32); - } - StackSlotKind::OutgoingArg => { - let offset = slot - .offset - .unwrap() - .checked_add(slot.size as StackOffset) - .ok_or(CodegenError::ImplLimitExceeded)?; - outgoing_max = max(outgoing_max, offset); - must_align = true; - } - StackSlotKind::StructReturnSlot - | StackSlotKind::SpillSlot - | StackSlotKind::ExplicitSlot - | StackSlotKind::EmergencySlot => { - // Determine the smallest alignment of any explicit or spill slot. - min_align = slot.alignment(min_align); - must_align = true; - } - } - } - - // Lay out spill slots, struct return slots, and explicit slots below the - // incoming arguments. The offset is negative, growing downwards. Start with - // the smallest alignments for better packing. - let mut offset = incoming_min; - debug_assert!(min_align.is_power_of_two()); - while min_align <= alignment { - for slot in frame.values_mut() { - // Pick out explicit and spill slots with exact alignment `min_align`. - match slot.kind { - StackSlotKind::SpillSlot - | StackSlotKind::StructReturnSlot - | StackSlotKind::ExplicitSlot - | StackSlotKind::EmergencySlot => { - if slot.alignment(alignment) != min_align { - continue; - } - } - StackSlotKind::IncomingArg | StackSlotKind::OutgoingArg => continue, - } - - offset = offset - .checked_sub(slot.size as StackOffset) - .ok_or(CodegenError::ImplLimitExceeded)?; - - // Aligning the negative offset can never cause overflow. We're only clearing bits. - offset &= -(min_align as StackOffset); - slot.offset = Some(offset); - } - - // Move on to the next higher alignment. - min_align *= 2; - } - - // Finally, make room for the outgoing arguments. - offset = offset - .checked_sub(outgoing_max) - .ok_or(CodegenError::ImplLimitExceeded)?; - - if must_align { - offset &= -(alignment as StackOffset); - } - - // Set the computed layout information for the frame - let frame_size = (offset as StackSize).wrapping_neg(); - let inbound_args_size = incoming_max as u32; - frame.layout_info = Some(StackLayoutInfo { - frame_size, - inbound_args_size, - }); - - Ok(frame_size) -} - -#[cfg(test)] -mod tests { - use super::layout_stack; - use crate::ir::stackslot::StackOffset; - use crate::ir::types; - use crate::ir::{StackSlotData, StackSlotKind, StackSlots}; - use crate::result::CodegenError; - - #[test] - fn layout() { - let sss = &mut StackSlots::new(); - - // For all these test cases, assume it will call. - let is_leaf = false; - - // An empty layout should have 0-sized stack frame. - assert_eq!(layout_stack(sss, is_leaf, 1), Ok(0)); - assert_eq!(layout_stack(sss, is_leaf, 16), Ok(0)); - - // Same for incoming arguments with non-negative offsets. - let in0 = sss.make_incoming_arg(8, 0); - let in1 = sss.make_incoming_arg(8, 8); - - assert_eq!(layout_stack(sss, is_leaf, 1), Ok(0)); - assert_eq!(layout_stack(sss, is_leaf, 16), Ok(0)); - assert_eq!(sss[in0].offset, Some(0)); - assert_eq!(sss[in1].offset, Some(8)); - - // Add some spill slots. - let ss0 = sss.make_spill_slot(types::I64); - let ss1 = sss.make_spill_slot(types::I32); - - assert_eq!(layout_stack(sss, is_leaf, 1), Ok(12)); - assert_eq!(sss[in0].offset, Some(0)); - assert_eq!(sss[in1].offset, Some(8)); - assert_eq!(sss[ss0].offset, Some(-8)); - assert_eq!(sss[ss1].offset, Some(-12)); - - assert_eq!(layout_stack(sss, is_leaf, 16), Ok(16)); - assert_eq!(sss[in0].offset, Some(0)); - assert_eq!(sss[in1].offset, Some(8)); - assert_eq!(sss[ss0].offset, Some(-16)); - assert_eq!(sss[ss1].offset, Some(-4)); - - // An incoming argument with negative offset counts towards the total frame size, but it - // should still pack nicely with the spill slots. - let in2 = sss.make_incoming_arg(4, -4); - - assert_eq!(layout_stack(sss, is_leaf, 1), Ok(16)); - assert_eq!(sss[in0].offset, Some(0)); - assert_eq!(sss[in1].offset, Some(8)); - assert_eq!(sss[in2].offset, Some(-4)); - assert_eq!(sss[ss0].offset, Some(-12)); - assert_eq!(sss[ss1].offset, Some(-16)); - - assert_eq!(layout_stack(sss, is_leaf, 16), Ok(16)); - assert_eq!(sss[in0].offset, Some(0)); - assert_eq!(sss[in1].offset, Some(8)); - assert_eq!(sss[in2].offset, Some(-4)); - assert_eq!(sss[ss0].offset, Some(-16)); - assert_eq!(sss[ss1].offset, Some(-8)); - - // Finally, make sure there is room for the outgoing args. - let out0 = sss.get_outgoing_arg(4, 0); - - assert_eq!(layout_stack(sss, is_leaf, 1), Ok(20)); - assert_eq!(sss[in0].offset, Some(0)); - assert_eq!(sss[in1].offset, Some(8)); - assert_eq!(sss[in2].offset, Some(-4)); - assert_eq!(sss[ss0].offset, Some(-12)); - assert_eq!(sss[ss1].offset, Some(-16)); - assert_eq!(sss[out0].offset, Some(0)); - - assert_eq!(layout_stack(sss, is_leaf, 16), Ok(32)); - assert_eq!(sss[in0].offset, Some(0)); - assert_eq!(sss[in1].offset, Some(8)); - assert_eq!(sss[in2].offset, Some(-4)); - assert_eq!(sss[ss0].offset, Some(-16)); - assert_eq!(sss[ss1].offset, Some(-8)); - assert_eq!(sss[out0].offset, Some(0)); - - // Also test that an unsupported offset is rejected. - sss.get_outgoing_arg(1, StackOffset::max_value() - 1); - assert_eq!( - layout_stack(sss, is_leaf, 1), - Err(CodegenError::ImplLimitExceeded) - ); - } - - #[test] - fn slot_kinds() { - let sss = &mut StackSlots::new(); - - // Add some slots of various kinds. - let ss0 = sss.make_spill_slot(types::I32); - let ss1 = sss.push(StackSlotData::new( - StackSlotKind::ExplicitSlot, - types::I32.bytes(), - )); - let ss2 = sss.get_emergency_slot(types::I32, &[]); - - assert_eq!(layout_stack(sss, true, 1), Ok(12)); - assert_eq!(sss[ss0].offset, Some(-4)); - assert_eq!(sss[ss1].offset, Some(-8)); - assert_eq!(sss[ss2].offset, Some(-12)); - } -} diff --git a/cranelift/codegen/src/topo_order.rs b/cranelift/codegen/src/topo_order.rs deleted file mode 100644 index 8d38e4f324..0000000000 --- a/cranelift/codegen/src/topo_order.rs +++ /dev/null @@ -1,138 +0,0 @@ -//! Topological order of blocks, according to the dominator tree. - -use crate::dominator_tree::DominatorTree; -use crate::entity::EntitySet; -use crate::ir::{Block, Layout}; -use alloc::vec::Vec; - -/// Present blocks in a topological order such that all dominating blocks are guaranteed to be visited -/// before the current block. -/// -/// There are many topological orders of the blocks in a function, so it is possible to provide a -/// preferred order, and the `TopoOrder` will present blocks in an order that is as close as possible -/// to the preferred order. -pub struct TopoOrder { - /// Preferred order of blocks to visit. - preferred: Vec, - - /// Next entry to get from `preferred`. - next: usize, - - /// Set of visited blocks. - visited: EntitySet, - - /// Stack of blocks to be visited next, already in `visited`. - stack: Vec, -} - -impl TopoOrder { - /// Create a new empty topological order. - pub fn new() -> Self { - Self { - preferred: Vec::new(), - next: 0, - visited: EntitySet::new(), - stack: Vec::new(), - } - } - - /// Clear all data structures in this topological order. - pub fn clear(&mut self) { - self.preferred.clear(); - self.next = 0; - self.visited.clear(); - self.stack.clear(); - } - - /// Reset and initialize with a preferred sequence of blocks. The resulting topological order is - /// guaranteed to contain all of the blocks in `preferred` as well as any dominators. - pub fn reset(&mut self, preferred: Blocks) - where - Blocks: IntoIterator, - { - self.preferred.clear(); - self.preferred.extend(preferred); - self.next = 0; - self.visited.clear(); - self.stack.clear(); - } - - /// Get the next block in the topological order. - /// - /// Two things are guaranteed about the blocks returned by this function: - /// - /// - All blocks in the `preferred` iterator given to `reset` will be returned. - /// - All dominators are visited before the block returned. - pub fn next(&mut self, layout: &Layout, domtree: &DominatorTree) -> Option { - self.visited.resize(layout.block_capacity()); - // Any entries in `stack` should be returned immediately. They have already been added to - // `visited`. - while self.stack.is_empty() { - match self.preferred.get(self.next).cloned() { - None => return None, - Some(mut block) => { - // We have the next block in the preferred order. - self.next += 1; - // Push it along with any non-visited dominators. - while self.visited.insert(block) { - self.stack.push(block); - match domtree.idom(block) { - Some(idom) => { - block = layout.inst_block(idom).expect("idom not in layout") - } - None => break, - } - } - } - } - } - self.stack.pop() - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::cursor::{Cursor, FuncCursor}; - use crate::dominator_tree::DominatorTree; - use crate::flowgraph::ControlFlowGraph; - use crate::ir::{Function, InstBuilder}; - use core::iter; - - #[test] - fn empty() { - let func = Function::new(); - let cfg = ControlFlowGraph::with_function(&func); - let domtree = DominatorTree::with_function(&func, &cfg); - let mut topo = TopoOrder::new(); - - assert_eq!(topo.next(&func.layout, &domtree), None); - topo.reset(func.layout.blocks()); - assert_eq!(topo.next(&func.layout, &domtree), None); - } - - #[test] - fn simple() { - let mut func = Function::new(); - let block0 = func.dfg.make_block(); - let block1 = func.dfg.make_block(); - - { - let mut cur = FuncCursor::new(&mut func); - - cur.insert_block(block0); - cur.ins().jump(block1, &[]); - cur.insert_block(block1); - cur.ins().jump(block1, &[]); - } - - let cfg = ControlFlowGraph::with_function(&func); - let domtree = DominatorTree::with_function(&func, &cfg); - let mut topo = TopoOrder::new(); - - topo.reset(iter::once(block1)); - assert_eq!(topo.next(&func.layout, &domtree), Some(block0)); - assert_eq!(topo.next(&func.layout, &domtree), Some(block1)); - assert_eq!(topo.next(&func.layout, &domtree), None); - } -} diff --git a/cranelift/codegen/src/value_label.rs b/cranelift/codegen/src/value_label.rs index 82bfd3e30c..459fa62d72 100644 --- a/cranelift/codegen/src/value_label.rs +++ b/cranelift/codegen/src/value_label.rs @@ -1,14 +1,8 @@ -use crate::ir::{Function, SourceLoc, Value, ValueLabel, ValueLabelAssignments, ValueLoc}; -use crate::isa::TargetIsa; -use crate::machinst::MachCompileResult; -use crate::regalloc::{Context, RegDiversions}; +use crate::ir::{SourceLoc, ValueLabel}; use crate::HashMap; -use alloc::collections::BTreeMap; use alloc::vec::Vec; use core::cmp::Ordering; use core::convert::From; -use core::iter::Iterator; -use core::ops::Bound::*; use core::ops::Deref; use regalloc::Reg; @@ -31,241 +25,15 @@ pub struct ValueLocRange { #[derive(Debug, Clone, Copy, PartialEq, Eq)] #[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))] pub enum LabelValueLoc { - /// Old-backend location: RegUnit, StackSlot, or Unassigned. - ValueLoc(ValueLoc), /// New-backend Reg. Reg(Reg), /// New-backend offset from stack pointer. SPOffset(i64), } -impl From for LabelValueLoc { - fn from(v: ValueLoc) -> Self { - LabelValueLoc::ValueLoc(v) - } -} - /// Resulting map of Value labels and their ranges/locations. pub type ValueLabelsRanges = HashMap>; -fn build_value_labels_index(func: &Function) -> BTreeMap -where - T: From + Deref + Ord + Copy, -{ - if func.dfg.values_labels.is_none() { - return BTreeMap::new(); - } - let values_labels = func.dfg.values_labels.as_ref().unwrap(); - - // Index values_labels by srcloc/from - let mut sorted = BTreeMap::new(); - for (val, assigns) in values_labels { - match assigns { - ValueLabelAssignments::Starts(labels) => { - for label in labels { - if label.from.is_default() { - continue; - } - let srcloc = T::from(label.from); - let label = label.label; - sorted.insert(srcloc, (*val, label)); - } - } - ValueLabelAssignments::Alias { from, value } => { - if from.is_default() { - continue; - } - let mut aliased_value = *value; - while let Some(ValueLabelAssignments::Alias { value, .. }) = - values_labels.get(&aliased_value) - { - // TODO check/limit recursion? - aliased_value = *value; - } - let from = T::from(*from); - if let Some(ValueLabelAssignments::Starts(labels)) = - values_labels.get(&aliased_value) - { - for label in labels { - let srcloc = if label.from.is_default() { - from - } else { - from.max(T::from(label.from)) - }; - let label = label.label; - sorted.insert(srcloc, (*val, label)); - } - } - } - } - } - sorted -} - -/// Builds ranges and location for specified value labels. -/// The labels specified at DataFlowGraph's values_labels collection. -pub fn build_value_labels_ranges( - func: &Function, - regalloc: &Context, - mach_compile_result: Option<&MachCompileResult>, - isa: &dyn TargetIsa, -) -> ValueLabelsRanges -where - T: From + Deref + Ord + Copy, -{ - if let Some(mach_compile_result) = mach_compile_result { - return mach_compile_result.value_labels_ranges.clone(); - } - - let values_labels = build_value_labels_index::(func); - - let mut blocks = func.layout.blocks().collect::>(); - blocks.sort_by_key(|block| func.offsets[*block]); // Ensure inst offsets always increase - let encinfo = isa.encoding_info(); - let values_locations = &func.locations; - let liveness_ranges = regalloc.liveness().ranges(); - - let mut ranges = HashMap::new(); - let mut add_range = |label, range: (u32, u32), loc: ValueLoc| { - if range.0 >= range.1 || !loc.is_assigned() { - return; - } - ranges - .entry(label) - .or_insert_with(Vec::new) - .push(ValueLocRange { - loc: loc.into(), - start: range.0, - end: range.1, - }); - }; - - let mut end_offset = 0; - let mut tracked_values: Vec<(Value, ValueLabel, u32, ValueLoc)> = Vec::new(); - let mut divert = RegDiversions::new(); - for block in blocks { - divert.at_block(&func.entry_diversions, block); - let mut last_srcloc: Option = None; - for (offset, inst, size) in func.inst_offsets(block, &encinfo) { - divert.apply(&func.dfg[inst]); - end_offset = offset + size; - // Remove killed values. - tracked_values.retain(|(x, label, start_offset, last_loc)| { - let range = liveness_ranges.get(*x); - if range.expect("value").killed_at(inst, block, &func.layout) { - add_range(*label, (*start_offset, end_offset), *last_loc); - return false; - } - true - }); - - let srcloc = func.srclocs[inst]; - if srcloc.is_default() { - // Don't process instructions without srcloc. - continue; - } - let srcloc = T::from(srcloc); - - // Record and restart ranges if Value location was changed. - for (val, label, start_offset, last_loc) in &mut tracked_values { - let new_loc = divert.get(*val, values_locations); - if new_loc == *last_loc { - continue; - } - add_range(*label, (*start_offset, end_offset), *last_loc); - *start_offset = end_offset; - *last_loc = new_loc; - } - - // New source locations range started: abandon all tracked values. - if last_srcloc.is_some() && last_srcloc.unwrap() > srcloc { - for (_, label, start_offset, last_loc) in &tracked_values { - add_range(*label, (*start_offset, end_offset), *last_loc); - } - tracked_values.clear(); - last_srcloc = None; - } - - // Get non-processed Values based on srcloc - let range = ( - match last_srcloc { - Some(a) => Excluded(a), - None => Unbounded, - }, - Included(srcloc), - ); - let active_values = values_labels.range(range); - let active_values = active_values.filter(|(_, (v, _))| { - // Ignore dead/inactive Values. - let range = liveness_ranges.get(*v); - match range { - Some(r) => r.reaches_use(inst, block, &func.layout), - None => false, - } - }); - // Append new Values to the tracked_values. - for (_, (val, label)) in active_values { - let loc = divert.get(*val, values_locations); - tracked_values.push((*val, *label, end_offset, loc)); - } - - last_srcloc = Some(srcloc); - } - // Finish all started ranges. - for (_, label, start_offset, last_loc) in &tracked_values { - add_range(*label, (*start_offset, end_offset), *last_loc); - } - } - - // Optimize ranges in-place - for (_, label_ranges) in ranges.iter_mut() { - assert!(!label_ranges.is_empty()); - label_ranges.sort_by(|a, b| a.start.cmp(&b.start).then_with(|| a.end.cmp(&b.end))); - - // Merge ranges - let mut i = 1; - let mut j = 0; - while i < label_ranges.len() { - assert!(label_ranges[j].start <= label_ranges[i].end); - if label_ranges[j].loc != label_ranges[i].loc { - // Different location - if label_ranges[j].end >= label_ranges[i].end { - // Consumed by previous range, skipping - i += 1; - continue; - } - j += 1; - label_ranges[j] = label_ranges[i]; - i += 1; - continue; - } - if label_ranges[j].end < label_ranges[i].start { - // Gap in the range location - j += 1; - label_ranges[j] = label_ranges[i]; - i += 1; - continue; - } - // Merge i-th and j-th ranges - if label_ranges[j].end < label_ranges[i].end { - label_ranges[j].end = label_ranges[i].end; - } - i += 1; - } - label_ranges.truncate(j + 1); - - // Cut/move start position of next range, if two neighbor ranges intersect. - for i in 0..j { - if label_ranges[i].end > label_ranges[i + 1].start { - label_ranges[i + 1].start = label_ranges[i].end; - assert!(label_ranges[i + 1].start < label_ranges[i + 1].end); - } - assert!(label_ranges[i].end <= label_ranges[i + 1].start); - } - } - ranges -} - #[derive(Eq, Clone, Copy)] pub struct ComparableSourceLoc(SourceLoc); diff --git a/cranelift/codegen/src/verifier/cssa.rs b/cranelift/codegen/src/verifier/cssa.rs index d3e95bf51b..71c89cca13 100644 --- a/cranelift/codegen/src/verifier/cssa.rs +++ b/cranelift/codegen/src/verifier/cssa.rs @@ -4,10 +4,8 @@ use crate::dbg::DisplayList; use crate::dominator_tree::{DominatorTree, DominatorTreePreorder}; use crate::flowgraph::{BlockPredecessor, ControlFlowGraph}; use crate::ir::{ExpandedProgramPoint, Function}; -use crate::regalloc::liveness::Liveness; -use crate::regalloc::virtregs::VirtRegs; use crate::timing; -use crate::verifier::{VerifierErrors, VerifierStepResult}; +use crate::verifier::{virtregs::VirtRegs, VerifierErrors, VerifierStepResult}; /// Verify conventional SSA form for `func`. /// @@ -27,7 +25,6 @@ pub fn verify_cssa( func: &Function, cfg: &ControlFlowGraph, domtree: &DominatorTree, - liveness: &Liveness, virtregs: &VirtRegs, errors: &mut VerifierErrors, ) -> VerifierStepResult<()> { @@ -41,7 +38,6 @@ pub fn verify_cssa( cfg, domtree, virtregs, - liveness, preorder, }; verifier.check_virtregs(errors)?; @@ -54,7 +50,6 @@ struct CssaVerifier<'a> { cfg: &'a ControlFlowGraph, domtree: &'a DominatorTree, virtregs: &'a VirtRegs, - liveness: &'a Liveness, preorder: DominatorTreePreorder, } @@ -70,9 +65,6 @@ impl<'a> CssaVerifier<'a> { if !self.func.dfg.value_is_attached(val) { return errors.fatal((val, format!("Detached value in {}", vreg))); } - if self.liveness.get(val).is_none() { - return errors.fatal((val, format!("Value in {} has no live range", vreg))); - }; // Check topological ordering with the previous values in the virtual register. let def: ExpandedProgramPoint = self.func.dfg.value_def(val).into(); @@ -120,19 +112,7 @@ impl<'a> CssaVerifier<'a> { if self.preorder.dominates(prev_block, def_block) && self.domtree.dominates(prev_def, def, &self.func.layout) { - if self.liveness[prev_val].overlaps_def(def, def_block, &self.func.layout) { - return errors.fatal(( - val, - format!( - "Value def in {} = {} interferes with {}", - vreg, - DisplayList(values), - prev_val - ), - )); - } else { - break; - } + break; } } } diff --git a/cranelift/codegen/src/verifier/flags.rs b/cranelift/codegen/src/verifier/flags.rs index e4cfc80462..5e67e3ae77 100644 --- a/cranelift/codegen/src/verifier/flags.rs +++ b/cranelift/codegen/src/verifier/flags.rs @@ -4,7 +4,6 @@ use crate::entity::{EntitySet, SecondaryMap}; use crate::flowgraph::{BlockPredecessor, ControlFlowGraph}; use crate::ir; use crate::ir::instructions::BranchInfo; -use crate::isa; use crate::packed_option::PackedOption; use crate::timing; use crate::verifier::{VerifierErrors, VerifierStepResult}; @@ -24,19 +23,12 @@ use crate::verifier::{VerifierErrors, VerifierStepResult}; pub fn verify_flags( func: &ir::Function, cfg: &ControlFlowGraph, - isa: Option<&dyn isa::TargetIsa>, errors: &mut VerifierErrors, ) -> VerifierStepResult<()> { let _tt = timing::verify_flags(); - let encinfo = if isa.is_none() || isa.unwrap().get_mach_backend().is_some() { - None - } else { - Some(isa.unwrap().encoding_info()) - }; let mut verifier = FlagsVerifier { func, cfg, - encinfo, livein: SecondaryMap::new(), }; verifier.check(errors) @@ -45,7 +37,6 @@ pub fn verify_flags( struct FlagsVerifier<'a> { func: &'a ir::Function, cfg: &'a ControlFlowGraph, - encinfo: Option, /// The single live-in flags value (if any) for each block. livein: SecondaryMap>, @@ -111,21 +102,6 @@ impl<'a> FlagsVerifier<'a> { return Err(()); } } - - // Does the instruction have an encoding that clobbers the CPU flags? - if self - .encinfo - .as_ref() - .and_then(|ei| ei.operand_constraints(self.func.encodings[inst])) - .map_or(false, |c| c.clobbers_flags) - && live_val.is_some() - { - errors.report(( - inst, - format!("encoding clobbers live CPU flags in {}", live), - )); - return Err(()); - } } // Now look for live ranges of CPU flags that end here. diff --git a/cranelift/codegen/src/verifier/liveness.rs b/cranelift/codegen/src/verifier/liveness.rs deleted file mode 100644 index ac5ee62c42..0000000000 --- a/cranelift/codegen/src/verifier/liveness.rs +++ /dev/null @@ -1,235 +0,0 @@ -//! Liveness verifier. - -use crate::flowgraph::{BlockPredecessor, ControlFlowGraph}; -use crate::ir::entities::AnyEntity; -use crate::ir::{ExpandedProgramPoint, Function, ProgramPoint, Value}; -use crate::isa::TargetIsa; -use crate::regalloc::liveness::Liveness; -use crate::regalloc::liverange::LiveRange; -use crate::timing; -use crate::verifier::{VerifierErrors, VerifierStepResult}; - -/// Verify liveness information for `func`. -/// -/// The provided control flow graph is assumed to be sound. -/// -/// - All values in the program must have a live range. -/// - The live range def point must match where the value is defined. -/// - The live range must reach all uses. -/// - When a live range is live-in to a block, it must be live at all the predecessors. -/// - The live range affinity must be compatible with encoding constraints. -/// -/// We don't verify that live ranges are minimal. This would require recomputing live ranges for -/// all values. -pub fn verify_liveness( - isa: &dyn TargetIsa, - func: &Function, - cfg: &ControlFlowGraph, - liveness: &Liveness, - errors: &mut VerifierErrors, -) -> VerifierStepResult<()> { - let _tt = timing::verify_liveness(); - let verifier = LivenessVerifier { - isa, - func, - cfg, - liveness, - }; - verifier.check_blocks(errors)?; - verifier.check_insts(errors)?; - Ok(()) -} - -struct LivenessVerifier<'a> { - isa: &'a dyn TargetIsa, - func: &'a Function, - cfg: &'a ControlFlowGraph, - liveness: &'a Liveness, -} - -impl<'a> LivenessVerifier<'a> { - /// Check all block arguments. - fn check_blocks(&self, errors: &mut VerifierErrors) -> VerifierStepResult<()> { - for block in self.func.layout.blocks() { - for &val in self.func.dfg.block_params(block) { - let lr = match self.liveness.get(val) { - Some(lr) => lr, - None => { - return errors - .fatal((block, format!("block arg {} has no live range", val))) - } - }; - self.check_lr(block.into(), val, lr, errors)?; - } - } - Ok(()) - } - - /// Check all instructions. - fn check_insts(&self, errors: &mut VerifierErrors) -> VerifierStepResult<()> { - for block in self.func.layout.blocks() { - for inst in self.func.layout.block_insts(block) { - let encoding = self.func.encodings[inst]; - - // Check the defs. - for &val in self.func.dfg.inst_results(inst) { - let lr = match self.liveness.get(val) { - Some(lr) => lr, - None => return errors.fatal((inst, format!("{} has no live range", val))), - }; - self.check_lr(inst.into(), val, lr, errors)?; - - if encoding.is_legal() { - // A legal instruction is not allowed to define ghost values. - if lr.affinity.is_unassigned() { - return errors.fatal(( - inst, - format!( - "{} is a ghost value defined by a real [{}] instruction", - val, - self.isa.encoding_info().display(encoding) - ), - )); - } - } else if !lr.affinity.is_unassigned() { - // A non-encoded instruction can only define ghost values. - return errors.fatal(( - inst, - format!( - "{} is a real {} value defined by a ghost instruction", - val, - lr.affinity.display(&self.isa.register_info()) - ), - )); - } - } - - // Check the uses. - for &val in self.func.dfg.inst_args(inst) { - let lr = match self.liveness.get(val) { - Some(lr) => lr, - None => return errors.fatal((inst, format!("{} has no live range", val))), - }; - - debug_assert!(self.func.layout.inst_block(inst).unwrap() == block); - if !lr.reaches_use(inst, block, &self.func.layout) { - return errors.fatal((inst, format!("{} is not live at this use", val))); - } - - // A legal instruction is not allowed to depend on ghost values. - if encoding.is_legal() && lr.affinity.is_unassigned() { - return errors.fatal(( - inst, - format!( - "{} is a ghost value used by a real [{}] instruction", - val, - self.isa.encoding_info().display(encoding), - ), - )); - } - } - } - } - Ok(()) - } - - /// Check the integrity of the live range `lr`. - fn check_lr( - &self, - def: ProgramPoint, - val: Value, - lr: &LiveRange, - errors: &mut VerifierErrors, - ) -> VerifierStepResult<()> { - let l = &self.func.layout; - - let loc: AnyEntity = match def.into() { - ExpandedProgramPoint::Block(e) => e.into(), - ExpandedProgramPoint::Inst(i) => i.into(), - }; - if lr.def() != def { - return errors.fatal(( - loc, - format!("Wrong live range def ({}) for {}", lr.def(), val), - )); - } - if lr.is_dead() { - if !lr.is_local() { - return errors.fatal((loc, format!("Dead live range {} should be local", val))); - } else { - return Ok(()); - } - } - let def_block = match def.into() { - ExpandedProgramPoint::Block(e) => e, - ExpandedProgramPoint::Inst(i) => l.inst_block(i).unwrap(), - }; - match lr.def_local_end().into() { - ExpandedProgramPoint::Block(e) => { - return errors.fatal(( - loc, - format!("Def local range for {} can't end at {}", val, e), - )); - } - ExpandedProgramPoint::Inst(i) => { - if self.func.layout.inst_block(i) != Some(def_block) { - return errors - .fatal((loc, format!("Def local end for {} in wrong block", val))); - } - } - } - - // Now check the live-in intervals against the CFG. - for (mut block, end) in lr.liveins() { - if !l.is_block_inserted(block) { - return errors.fatal(( - loc, - format!("{} livein at {} which is not in the layout", val, block), - )); - } - let end_block = match l.inst_block(end) { - Some(e) => e, - None => { - return errors.fatal(( - loc, - format!( - "{} livein for {} ends at {} which is not in the layout", - val, block, end - ), - )); - } - }; - - // Check all the blocks in the interval independently. - loop { - // If `val` is live-in at `block`, it must be live at all the predecessors. - for BlockPredecessor { inst: pred, block } in self.cfg.pred_iter(block) { - if !lr.reaches_use(pred, block, &self.func.layout) { - return errors.fatal(( - pred, - format!( - "{} is live in to {} but not live at predecessor", - val, block - ), - )); - } - } - - if block == end_block { - break; - } - block = match l.next_block(block) { - Some(e) => e, - None => { - return errors.fatal(( - loc, - format!("end of {} livein ({}) never reached", val, end_block), - )); - } - }; - } - } - - Ok(()) - } -} diff --git a/cranelift/codegen/src/verifier/locations.rs b/cranelift/codegen/src/verifier/locations.rs deleted file mode 100644 index 7d04e9889b..0000000000 --- a/cranelift/codegen/src/verifier/locations.rs +++ /dev/null @@ -1,399 +0,0 @@ -//! Verify value locations. - -use crate::flowgraph::ControlFlowGraph; -use crate::ir; -use crate::isa; -use crate::regalloc::liveness::Liveness; -use crate::regalloc::RegDiversions; -use crate::timing; -use crate::verifier::{VerifierErrors, VerifierStepResult}; - -/// Verify value locations for `func`. -/// -/// After register allocation, every value must be assigned to a location - either a register or a -/// stack slot. These locations must be compatible with the constraints described by the -/// instruction encoding recipes. -/// -/// Values can be temporarily diverted to a different location by using the `regmove`, `regspill`, -/// and `regfill` instructions, but only inside a block. -/// -/// If a liveness analysis is provided, it is used to verify that there are no active register -/// diversions across control flow edges. -pub fn verify_locations( - isa: &dyn isa::TargetIsa, - func: &ir::Function, - cfg: &ControlFlowGraph, - liveness: Option<&Liveness>, - errors: &mut VerifierErrors, -) -> VerifierStepResult<()> { - let _tt = timing::verify_locations(); - let verifier = LocationVerifier { - isa, - func, - reginfo: isa.register_info(), - encinfo: isa.encoding_info(), - cfg, - liveness, - }; - verifier.check_constraints(errors)?; - Ok(()) -} - -struct LocationVerifier<'a> { - isa: &'a dyn isa::TargetIsa, - func: &'a ir::Function, - reginfo: isa::RegInfo, - encinfo: isa::EncInfo, - cfg: &'a ControlFlowGraph, - liveness: Option<&'a Liveness>, -} - -impl<'a> LocationVerifier<'a> { - /// Check that the assigned value locations match the operand constraints of their uses. - fn check_constraints(&self, errors: &mut VerifierErrors) -> VerifierStepResult<()> { - let dfg = &self.func.dfg; - let mut divert = RegDiversions::new(); - - for block in self.func.layout.blocks() { - divert.at_block(&self.func.entry_diversions, block); - - let mut is_after_branch = false; - for inst in self.func.layout.block_insts(block) { - let enc = self.func.encodings[inst]; - - if enc.is_legal() { - self.check_enc_constraints(inst, enc, &divert, errors)? - } else { - self.check_ghost_results(inst, errors)?; - } - - if let Some(sig) = dfg.call_signature(inst) { - self.check_call_abi(inst, sig, &divert, errors)?; - } - - let opcode = dfg[inst].opcode(); - if opcode.is_return() { - self.check_return_abi(inst, &divert, errors)?; - } else if opcode.is_branch() && !divert.is_empty() { - self.check_cfg_edges(inst, &mut divert, is_after_branch, errors)?; - } - - self.update_diversions(inst, &mut divert, errors)?; - is_after_branch = opcode.is_branch(); - } - } - - Ok(()) - } - - /// Check encoding constraints against the current value locations. - fn check_enc_constraints( - &self, - inst: ir::Inst, - enc: isa::Encoding, - divert: &RegDiversions, - errors: &mut VerifierErrors, - ) -> VerifierStepResult<()> { - let constraints = self - .encinfo - .operand_constraints(enc) - .expect("check_enc_constraints requires a legal encoding"); - - if constraints.satisfied(inst, divert, self.func) { - return Ok(()); - } - - // TODO: We could give a better error message here. - errors.fatal(( - inst, - format!( - "{} constraints not satisfied in: {}\n{}", - self.encinfo.display(enc), - self.func.dfg.display_inst(inst, self.isa), - self.func.display(self.isa), - ), - )) - } - - /// Check that the result values produced by a ghost instruction are not assigned a value - /// location. - fn check_ghost_results( - &self, - inst: ir::Inst, - errors: &mut VerifierErrors, - ) -> VerifierStepResult<()> { - let results = self.func.dfg.inst_results(inst); - - for &res in results { - let loc = self.func.locations[res]; - if loc.is_assigned() { - return errors.fatal(( - inst, - format!( - "ghost result {} value must not have a location ({}).", - res, - loc.display(&self.reginfo) - ), - )); - } - } - - Ok(()) - } - - /// Check the ABI argument and result locations for a call. - fn check_call_abi( - &self, - inst: ir::Inst, - sig: ir::SigRef, - divert: &RegDiversions, - errors: &mut VerifierErrors, - ) -> VerifierStepResult<()> { - let sig = &self.func.dfg.signatures[sig]; - let varargs = self.func.dfg.inst_variable_args(inst); - let results = self.func.dfg.inst_results(inst); - - for (abi, &value) in sig.params.iter().zip(varargs) { - self.check_abi_location( - inst, - value, - abi, - divert.get(value, &self.func.locations), - ir::StackSlotKind::OutgoingArg, - errors, - )?; - } - - for (abi, &value) in sig.returns.iter().zip(results) { - self.check_abi_location( - inst, - value, - abi, - self.func.locations[value], - ir::StackSlotKind::OutgoingArg, - errors, - )?; - } - - Ok(()) - } - - /// Check the ABI argument locations for a return. - fn check_return_abi( - &self, - inst: ir::Inst, - divert: &RegDiversions, - errors: &mut VerifierErrors, - ) -> VerifierStepResult<()> { - let sig = &self.func.signature; - let varargs = self.func.dfg.inst_variable_args(inst); - - for (abi, &value) in sig.returns.iter().zip(varargs) { - self.check_abi_location( - inst, - value, - abi, - divert.get(value, &self.func.locations), - ir::StackSlotKind::IncomingArg, - errors, - )?; - } - - Ok(()) - } - - /// Check a single ABI location. - fn check_abi_location( - &self, - inst: ir::Inst, - value: ir::Value, - abi: &ir::AbiParam, - loc: ir::ValueLoc, - want_kind: ir::StackSlotKind, - errors: &mut VerifierErrors, - ) -> VerifierStepResult<()> { - match abi.location { - ir::ArgumentLoc::Unassigned => {} - ir::ArgumentLoc::Reg(reg) => { - if loc != ir::ValueLoc::Reg(reg) { - return errors.fatal(( - inst, - format!( - "ABI expects {} in {}, got {}", - value, - abi.location.display(&self.reginfo), - loc.display(&self.reginfo), - ), - )); - } - } - ir::ArgumentLoc::Stack(offset) => { - if let ir::ValueLoc::Stack(ss) = loc { - let slot = &self.func.stack_slots[ss]; - if slot.kind != want_kind { - return errors.fatal(( - inst, - format!( - "call argument {} should be in a {} slot, but {} is {}", - value, want_kind, ss, slot.kind - ), - )); - } - if slot.offset.unwrap() != offset { - return errors.fatal(( - inst, - format!( - "ABI expects {} at stack offset {}, but {} is at {}", - value, - offset, - ss, - slot.offset.unwrap() - ), - )); - } - } else { - return errors.fatal(( - inst, - format!( - "ABI expects {} at stack offset {}, got {}", - value, - offset, - loc.display(&self.reginfo) - ), - )); - } - } - } - - Ok(()) - } - - /// Update diversions to reflect the current instruction and check their consistency. - fn update_diversions( - &self, - inst: ir::Inst, - divert: &mut RegDiversions, - errors: &mut VerifierErrors, - ) -> VerifierStepResult<()> { - let (arg, src) = match self.func.dfg[inst] { - ir::InstructionData::RegMove { arg, src, .. } - | ir::InstructionData::RegSpill { arg, src, .. } => (arg, ir::ValueLoc::Reg(src)), - ir::InstructionData::RegFill { arg, src, .. } => (arg, ir::ValueLoc::Stack(src)), - _ => return Ok(()), - }; - - if let Some(d) = divert.diversion(arg) { - if d.to != src { - return errors.fatal(( - inst, - format!( - "inconsistent with current diversion to {}", - d.to.display(&self.reginfo) - ), - )); - } - } else if self.func.locations[arg] != src { - return errors.fatal(( - inst, - format!( - "inconsistent with global location {} ({})", - self.func.locations[arg].display(&self.reginfo), - self.func.dfg.display_inst(inst, None) - ), - )); - } - - divert.apply(&self.func.dfg[inst]); - - Ok(()) - } - - /// We have active diversions before a branch. Make sure none of the diverted values are live - /// on the outgoing CFG edges. - fn check_cfg_edges( - &self, - inst: ir::Inst, - divert: &mut RegDiversions, - is_after_branch: bool, - errors: &mut VerifierErrors, - ) -> VerifierStepResult<()> { - use crate::ir::instructions::BranchInfo::*; - let dfg = &self.func.dfg; - let branch_kind = dfg.analyze_branch(inst); - - // We can only check CFG edges if we have a liveness analysis. - let liveness = match self.liveness { - Some(l) => l, - None => return Ok(()), - }; - - match branch_kind { - NotABranch => panic!( - "No branch information for {}", - dfg.display_inst(inst, self.isa) - ), - SingleDest(block, _) => { - let unique_predecessor = self.cfg.pred_iter(block).count() == 1; - let mut val_to_remove = vec![]; - for (&value, d) in divert.iter() { - let lr = &liveness[value]; - if is_after_branch && unique_predecessor { - // Forward diversions based on the targeted branch. - if !lr.is_livein(block, &self.func.layout) { - val_to_remove.push(value) - } - } else if lr.is_livein(block, &self.func.layout) { - return errors.fatal(( - inst, - format!( - "SingleDest: {} is diverted to {} and live in to {}", - value, - d.to.display(&self.reginfo), - block, - ), - )); - } - } - if is_after_branch && unique_predecessor { - for val in val_to_remove.into_iter() { - divert.remove(val); - } - debug_assert!(divert.check_block_entry(&self.func.entry_diversions, block)); - } - } - Table(jt, block) => { - for (&value, d) in divert.iter() { - let lr = &liveness[value]; - if let Some(block) = block { - if lr.is_livein(block, &self.func.layout) { - return errors.fatal(( - inst, - format!( - "Table.default: {} is diverted to {} and live in to {}", - value, - d.to.display(&self.reginfo), - block, - ), - )); - } - } - for block in self.func.jump_tables[jt].iter() { - if lr.is_livein(*block, &self.func.layout) { - return errors.fatal(( - inst, - format!( - "Table.case: {} is diverted to {} and live in to {}", - value, - d.to.display(&self.reginfo), - block, - ), - )); - } - } - } - } - } - - Ok(()) - } -} diff --git a/cranelift/codegen/src/verifier/mod.rs b/cranelift/codegen/src/verifier/mod.rs index c1721a38fd..96f0e7e32e 100644 --- a/cranelift/codegen/src/verifier/mod.rs +++ b/cranelift/codegen/src/verifier/mod.rs @@ -78,16 +78,13 @@ use alloc::collections::BTreeSet; use alloc::string::{String, ToString}; use alloc::vec::Vec; use core::cmp::Ordering; -use core::fmt::{self, Display, Formatter, Write}; +use core::fmt::{self, Display, Formatter}; pub use self::cssa::verify_cssa; -pub use self::liveness::verify_liveness; -pub use self::locations::verify_locations; mod cssa; mod flags; -mod liveness; -mod locations; +mod virtregs; /// A verifier error. #[derive(Debug, PartialEq, Eq, Clone)] @@ -1763,145 +1760,6 @@ impl<'a> Verifier<'a> { errors.as_result() } - /// If the verifier has been set up with an ISA, make sure that the recorded encoding for the - /// instruction (if any) matches how the ISA would encode it. - fn verify_encoding(&self, inst: Inst, errors: &mut VerifierErrors) -> VerifierStepResult<()> { - // When the encodings table is empty, we don't require any instructions to be encoded. - // - // Once some instructions are encoded, we require all side-effecting instructions to have a - // legal encoding. - if self.func.encodings.is_empty() { - return Ok(()); - } - - let isa = match self.isa { - Some(isa) => isa, - None => return Ok(()), - }; - - let encoding = self.func.encodings[inst]; - if encoding.is_legal() { - if self.func.dfg[inst].opcode().is_ghost() { - return errors.nonfatal(( - inst, - self.context(inst), - format!( - "Ghost instruction has an encoding: {}", - isa.encoding_info().display(encoding), - ), - )); - } - - let mut encodings = isa - .legal_encodings( - &self.func, - &self.func.dfg[inst], - self.func.dfg.ctrl_typevar(inst), - ) - .peekable(); - - if encodings.peek().is_none() { - return errors.nonfatal(( - inst, - self.context(inst), - format!( - "Instruction failed to re-encode {}", - isa.encoding_info().display(encoding), - ), - )); - } - - let has_valid_encoding = encodings.any(|possible_enc| encoding == possible_enc); - - if !has_valid_encoding { - let mut possible_encodings = String::new(); - let mut multiple_encodings = false; - - for enc in isa.legal_encodings( - &self.func, - &self.func.dfg[inst], - self.func.dfg.ctrl_typevar(inst), - ) { - if !possible_encodings.is_empty() { - possible_encodings.push_str(", "); - multiple_encodings = true; - } - possible_encodings - .write_fmt(format_args!("{}", isa.encoding_info().display(enc))) - .unwrap(); - } - - return errors.nonfatal(( - inst, - self.context(inst), - format!( - "encoding {} should be {}{}", - isa.encoding_info().display(encoding), - if multiple_encodings { "one of: " } else { "" }, - possible_encodings, - ), - )); - } - return Ok(()); - } - - // Instruction is not encoded, so it is a ghost instruction. - // Instructions with side effects are not allowed to be ghost instructions. - let opcode = self.func.dfg[inst].opcode(); - - // The `fallthrough`, `fallthrough_return`, and `safepoint` instructions are not required - // to have an encoding. - if opcode == Opcode::Fallthrough - || opcode == Opcode::FallthroughReturn - || opcode == Opcode::Safepoint - { - return Ok(()); - } - - // Check if this opcode must be encoded. - let mut needs_enc = None; - if opcode.is_branch() { - needs_enc = Some("Branch"); - } else if opcode.is_call() { - needs_enc = Some("Call"); - } else if opcode.is_return() { - needs_enc = Some("Return"); - } else if opcode.can_store() { - needs_enc = Some("Store"); - } else if opcode.can_trap() { - needs_enc = Some("Trapping instruction"); - } else if opcode.other_side_effects() { - needs_enc = Some("Instruction with side effects"); - } - - if let Some(text) = needs_enc { - // This instruction needs an encoding, so generate an error. - // Provide the ISA default encoding as a hint. - match self.func.encode(inst, isa) { - Ok(enc) => { - return errors.nonfatal(( - inst, - self.context(inst), - format!( - "{} must have an encoding (e.g., {})))", - text, - isa.encoding_info().display(enc), - ), - )); - } - Err(_) => { - return errors.nonfatal(( - inst, - self.context(inst), - format!("{} must have an encoding", text), - )) - } - } - } - - Ok(()) - } - fn immediate_constraints( &self, inst: Inst, @@ -2034,14 +1892,13 @@ impl<'a> Verifier<'a> { self.instruction_integrity(inst, errors)?; self.verify_safepoint_unused(inst, errors)?; self.typecheck(inst, errors)?; - self.verify_encoding(inst, errors)?; self.immediate_constraints(inst, errors)?; } self.encodable_as_bb(block, errors)?; } - verify_flags(self.func, &self.expected_cfg, self.isa, errors)?; + verify_flags(self.func, &self.expected_cfg, errors)?; if !errors.is_empty() { log::warn!( diff --git a/cranelift/codegen/src/regalloc/virtregs.rs b/cranelift/codegen/src/verifier/virtregs.rs similarity index 100% rename from cranelift/codegen/src/regalloc/virtregs.rs rename to cranelift/codegen/src/verifier/virtregs.rs diff --git a/cranelift/codegen/src/write.rs b/cranelift/codegen/src/write.rs index d7528beef4..567f86c2c7 100644 --- a/cranelift/codegen/src/write.rs +++ b/cranelift/codegen/src/write.rs @@ -6,13 +6,10 @@ use crate::entity::SecondaryMap; use crate::ir::entities::AnyEntity; use crate::ir::{ - Block, DataFlowGraph, DisplayFunctionAnnotations, Function, Inst, SigRef, Type, Value, - ValueDef, ValueLoc, + Block, DataFlowGraph, DisplayFunctionAnnotations, Function, Inst, SigRef, Type, Value, ValueDef, }; use crate::isa::{RegInfo, TargetIsa}; use crate::packed_option::ReservedValue; -use crate::value_label::{LabelValueLoc, ValueLabelsRanges}; -use crate::HashSet; use alloc::string::String; use alloc::vec::Vec; use core::fmt::{self, Write}; @@ -278,49 +275,6 @@ pub fn write_block_header( writeln!(w, "):") } -fn write_valueloc(w: &mut dyn Write, loc: LabelValueLoc, regs: &RegInfo) -> fmt::Result { - match loc { - LabelValueLoc::ValueLoc(ValueLoc::Reg(r)) => write!(w, "{}", regs.display_regunit(r)), - LabelValueLoc::ValueLoc(ValueLoc::Stack(ss)) => write!(w, "{}", ss), - LabelValueLoc::ValueLoc(ValueLoc::Unassigned) => write!(w, "?"), - LabelValueLoc::Reg(r) => write!(w, "{:?}", r), - LabelValueLoc::SPOffset(off) => write!(w, "[sp+{}]", off), - } -} - -fn write_value_range_markers( - w: &mut dyn Write, - val_ranges: &ValueLabelsRanges, - regs: &RegInfo, - offset: u32, - indent: usize, -) -> fmt::Result { - let mut result = String::new(); - let mut shown = HashSet::new(); - for (val, rng) in val_ranges { - for i in (0..rng.len()).rev() { - if rng[i].start == offset { - write!(&mut result, " {}@", val)?; - write_valueloc(&mut result, rng[i].loc, regs)?; - shown.insert(val); - break; - } - } - } - for (val, rng) in val_ranges { - for i in (0..rng.len()).rev() { - if rng[i].end == offset && !shown.contains(val) { - write!(&mut result, " {}\u{2620}", val)?; - break; - } - } - } - if !result.is_empty() { - writeln!(w, ";{1:0$}; {2}", indent + 24, "", result)?; - } - Ok(()) -} - fn decorate_block( func_w: &mut FW, w: &mut dyn Write, @@ -329,12 +283,8 @@ fn decorate_block( annotations: &DisplayFunctionAnnotations, block: Block, ) -> fmt::Result { - // Indent all instructions if any encodings are present. - let indent = if func.encodings.is_empty() && func.srclocs.is_empty() { - 4 - } else { - 36 - }; + // Indent all instructions if any srclocs are present. + let indent = if func.srclocs.is_empty() { 4 } else { 36 }; let isa = annotations.isa; func_w.write_block_header(w, func, isa, block, indent)?; @@ -342,22 +292,6 @@ fn decorate_block( write_value_aliases(w, aliases, a, indent)?; } - if let Some(isa) = isa { - if !func.offsets.is_empty() { - let encinfo = isa.encoding_info(); - let regs = &isa.register_info(); - for (offset, inst, size) in func.inst_offsets(block, &encinfo) { - func_w.write_instruction(w, func, aliases, Some(isa), inst, indent)?; - if size > 0 { - if let Some(val_ranges) = annotations.value_ranges { - write_value_range_markers(w, val_ranges, regs, offset + size, indent)?; - } - } - } - return Ok(()); - } - } - for inst in func.layout.block_insts(block) { func_w.write_instruction(w, func, aliases, isa, inst, indent)?; } @@ -438,23 +372,6 @@ fn write_instruction( write!(s, "{} ", srcloc)?; } - // Write out encoding info. - if let Some(enc) = func.encodings.get(inst).cloned() { - if let Some(isa) = isa { - write!(s, "[{}", isa.encoding_info().display(enc))?; - // Write value locations, if we have them. - if !func.locations.is_empty() { - let regs = isa.register_info(); - for &r in func.dfg.inst_results(inst) { - write!(s, ",{}", func.locations[r].display(®s))? - } - } - write!(s, "] ")?; - } else { - write!(s, "[{}] ", enc)?; - } - } - // Write out prefix and indent the instruction. write!(w, "{1:0$}", indent, s)?; diff --git a/cranelift/filetests/src/function_runner.rs b/cranelift/filetests/src/function_runner.rs index 45170d90ee..4046072a23 100644 --- a/cranelift/filetests/src/function_runner.rs +++ b/cranelift/filetests/src/function_runner.rs @@ -248,7 +248,7 @@ fn compile(function: Function, isa: &dyn TargetIsa) -> Result anyhow::Result> { match parsed.command { - "binemit" => test_binemit::subtest(parsed), "cat" => test_cat::subtest(parsed), "compile" => test_compile::subtest(parsed), "dce" => test_dce::subtest(parsed), @@ -127,14 +121,10 @@ fn new_subtest(parsed: &TestCommand) -> anyhow::Result "legalizer" => test_legalizer::subtest(parsed), "licm" => test_licm::subtest(parsed), "peepmatic" => test_peepmatic::subtest(parsed), - "postopt" => test_postopt::subtest(parsed), "preopt" => test_preopt::subtest(parsed), "print-cfg" => test_print_cfg::subtest(parsed), - "regalloc" => test_regalloc::subtest(parsed), - "rodata" => test_rodata::subtest(parsed), "run" => test_run::subtest(parsed), "safepoint" => test_safepoint::subtest(parsed), - "shrink" => test_shrink::subtest(parsed), "simple-gvn" => test_simple_gvn::subtest(parsed), "simple_preopt" => test_simple_preopt::subtest(parsed), "stack_maps" => test_stack_maps::subtest(parsed), diff --git a/cranelift/filetests/src/test_binemit.rs b/cranelift/filetests/src/test_binemit.rs deleted file mode 100644 index 9f8db9ebb9..0000000000 --- a/cranelift/filetests/src/test_binemit.rs +++ /dev/null @@ -1,335 +0,0 @@ -//! Test command for testing the binary machine code emission. -//! -//! The `binemit` test command generates binary machine code for every instruction in the input -//! functions and compares the results to the expected output. - -use crate::match_directive::match_directive; -use crate::subtest::{Context, SubTest}; -use cranelift_codegen::binemit::{self, CodeInfo, CodeSink, RegDiversions}; -use cranelift_codegen::dbg::DisplayList; -use cranelift_codegen::dominator_tree::DominatorTree; -use cranelift_codegen::flowgraph::ControlFlowGraph; -use cranelift_codegen::ir; -use cranelift_codegen::ir::entities::AnyEntity; -use cranelift_codegen::isa; -use cranelift_codegen::settings::OptLevel; -use cranelift_reader::TestCommand; -use std::borrow::Cow; -use std::collections::HashMap; -use std::fmt::Write; - -struct TestBinEmit; - -pub fn subtest(parsed: &TestCommand) -> anyhow::Result> { - assert_eq!(parsed.command, "binemit"); - if !parsed.options.is_empty() { - anyhow::bail!("No options allowed on {}", parsed) - } else { - Ok(Box::new(TestBinEmit)) - } -} - -/// Code sink that generates text. -struct TextSink { - offset: binemit::CodeOffset, - text: String, -} - -impl TextSink { - /// Create a new empty TextSink. - pub fn new() -> Self { - Self { - offset: 0, - text: String::new(), - } - } -} - -impl binemit::CodeSink for TextSink { - fn offset(&self) -> binemit::CodeOffset { - self.offset - } - - fn put1(&mut self, x: u8) { - write!(self.text, "{:02x} ", x).unwrap(); - self.offset += 1; - } - - fn put2(&mut self, x: u16) { - write!(self.text, "{:04x} ", x).unwrap(); - self.offset += 2; - } - - fn put4(&mut self, x: u32) { - write!(self.text, "{:08x} ", x).unwrap(); - self.offset += 4; - } - - fn put8(&mut self, x: u64) { - write!(self.text, "{:016x} ", x).unwrap(); - self.offset += 8; - } - - fn reloc_external( - &mut self, - _srcloc: ir::SourceLoc, - reloc: binemit::Reloc, - name: &ir::ExternalName, - addend: binemit::Addend, - ) { - write!(self.text, "{}({}", reloc, name).unwrap(); - if addend != 0 { - write!(self.text, "{:+}", addend).unwrap(); - } - write!(self.text, ") ").unwrap(); - } - - fn reloc_constant(&mut self, reloc: binemit::Reloc, constant: ir::ConstantOffset) { - write!(self.text, "{}({}) ", reloc, constant).unwrap(); - } - - fn reloc_jt(&mut self, reloc: binemit::Reloc, jt: ir::JumpTable) { - write!(self.text, "{}({}) ", reloc, jt).unwrap(); - } - - fn trap(&mut self, code: ir::TrapCode, _srcloc: ir::SourceLoc) { - write!(self.text, "{} ", code).unwrap(); - } - - fn begin_jumptables(&mut self) {} - fn begin_rodata(&mut self) {} - fn end_codegen(&mut self) {} - fn add_stack_map( - &mut self, - _: &[ir::entities::Value], - _: &ir::Function, - _: &dyn isa::TargetIsa, - ) { - } -} - -impl SubTest for TestBinEmit { - fn name(&self) -> &'static str { - "binemit" - } - - fn is_mutating(&self) -> bool { - true - } - - fn needs_isa(&self) -> bool { - true - } - - fn run(&self, func: Cow, context: &Context) -> anyhow::Result<()> { - let isa = context.isa.expect("binemit needs an ISA"); - let encinfo = isa.encoding_info(); - // TODO: Run a verifier pass over the code first to detect any bad encodings or missing/bad - // value locations. The current error reporting is just crashing... - let mut func = func.into_owned(); - - // Fix the stack frame layout so we can test spill/fill encodings. - let min_offset = func - .stack_slots - .values() - .map(|slot| slot.offset.unwrap()) - .min(); - func.stack_slots.layout_info = min_offset.map(|off| ir::StackLayoutInfo { - frame_size: (-off) as u32, - inbound_args_size: 0, - }); - - let opt_level = isa.flags().opt_level(); - - // Give an encoding to any instruction that doesn't already have one. - let mut divert = RegDiversions::new(); - for block in func.layout.blocks() { - divert.clear(); - for inst in func.layout.block_insts(block) { - if !func.encodings[inst].is_legal() { - // Find an encoding that satisfies both immediate field and register - // constraints. - if let Some(enc) = { - let mut legal_encodings = isa - .legal_encodings(&func, &func.dfg[inst], func.dfg.ctrl_typevar(inst)) - .filter(|e| { - let recipe_constraints = &encinfo.constraints[e.recipe()]; - recipe_constraints.satisfied(inst, &divert, &func) - }); - - if opt_level == OptLevel::SpeedAndSize { - // Get the smallest legal encoding - legal_encodings - .min_by_key(|&e| encinfo.byte_size(e, inst, &divert, &func)) - } else { - // If not optimizing, just use the first encoding. - legal_encodings.next() - } - } { - func.encodings[inst] = enc; - } - } - divert.apply(&func.dfg[inst]); - } - } - - // Relax branches and compute block offsets based on the encodings. - let mut cfg = ControlFlowGraph::with_function(&func); - let mut domtree = DominatorTree::with_function(&func, &cfg); - let CodeInfo { total_size, .. } = - binemit::relax_branches(&mut func, &mut cfg, &mut domtree, isa) - .map_err(|e| crate::pretty_anyhow_error(&func, context.isa, e))?; - - // Collect all of the 'bin:' directives on instructions. - let mut bins = HashMap::new(); - for comment in &context.details.comments { - if let Some(want) = match_directive(comment.text, "bin:") { - match comment.entity { - AnyEntity::Inst(inst) => { - if let Some(prev) = bins.insert(inst, want) { - anyhow::bail!( - "multiple 'bin:' directives on {}: '{}' and '{}'", - func.dfg.display_inst(inst, isa), - prev, - want - ); - } - } - _ => { - anyhow::bail!( - "'bin:' directive on non-inst {}: {}", - comment.entity, - comment.text - ); - } - } - } - } - if bins.is_empty() { - anyhow::bail!("No 'bin:' directives found"); - } - - // Now emit all instructions. - let mut sink = TextSink::new(); - for block in func.layout.blocks() { - divert.clear(); - // Correct header offsets should have been computed by `relax_branches()`. - assert_eq!( - sink.offset, func.offsets[block], - "Inconsistent {} header offset", - block - ); - for (offset, inst, enc_bytes) in func.inst_offsets(block, &encinfo) { - assert_eq!(sink.offset, offset); - sink.text.clear(); - let enc = func.encodings[inst]; - - // Send legal encodings into the emitter. - if enc.is_legal() { - // Generate a better error message if output locations are not specified. - validate_location_annotations(&func, inst, isa, false)?; - - let before = sink.offset; - isa.emit_inst(&func, inst, &mut divert, &mut sink); - let emitted = sink.offset - before; - // Verify the encoding recipe sizes against the ISAs emit_inst implementation. - assert_eq!( - emitted, - enc_bytes, - "Inconsistent size for [{}] {}", - encinfo.display(enc), - func.dfg.display_inst(inst, isa) - ); - } - - // Check against bin: directives. - if let Some(want) = bins.remove(&inst) { - if !enc.is_legal() { - // A possible cause of an unencoded instruction is a missing location for - // one of the input/output operands. - validate_location_annotations(&func, inst, isa, true)?; - validate_location_annotations(&func, inst, isa, false)?; - - // Do any encodings exist? - let encodings = isa - .legal_encodings(&func, &func.dfg[inst], func.dfg.ctrl_typevar(inst)) - .map(|e| encinfo.display(e)) - .collect::>(); - - if encodings.is_empty() { - anyhow::bail!( - "No encodings found for: {}", - func.dfg.display_inst(inst, isa) - ); - } - anyhow::bail!( - "No matching encodings for {} in {}", - func.dfg.display_inst(inst, isa), - DisplayList(&encodings), - ); - } - let have = sink.text.trim(); - if have != want { - anyhow::bail!( - "Bad machine code for {}: {}\nWant: {}\nGot: {}", - inst, - func.dfg.display_inst(inst, isa), - want, - have - ); - } - } - } - } - - sink.begin_jumptables(); - - for (jt, jt_data) in func.jump_tables.iter() { - let jt_offset = func.jt_offsets[jt]; - for block in jt_data.iter() { - let rel_offset: i32 = func.offsets[*block] as i32 - jt_offset as i32; - sink.put4(rel_offset as u32) - } - } - - sink.begin_rodata(); - - // output constants - for (_, constant_data) in func.dfg.constants.iter() { - for byte in constant_data.iter() { - sink.put1(*byte) - } - } - - sink.end_codegen(); - - if sink.offset != total_size { - anyhow::bail!("Expected code size {}, got {}", total_size, sink.offset); - } - - Ok(()) - } -} - -/// Validate registers/stack slots are correctly annotated. -fn validate_location_annotations( - func: &ir::Function, - inst: ir::Inst, - isa: &dyn isa::TargetIsa, - validate_inputs: bool, -) -> anyhow::Result<()> { - let values = if validate_inputs { - func.dfg.inst_args(inst) - } else { - func.dfg.inst_results(inst) - }; - - if let Some(&v) = values.iter().find(|&&v| !func.locations[v].is_assigned()) { - anyhow::bail!( - "Need register/stack slot annotation for {} in {}", - v, - func.dfg.display_inst(inst, isa) - ); - } - Ok(()) -} diff --git a/cranelift/filetests/src/test_compile.rs b/cranelift/filetests/src/test_compile.rs index b66f8eca66..920a1bc803 100644 --- a/cranelift/filetests/src/test_compile.rs +++ b/cranelift/filetests/src/test_compile.rs @@ -38,10 +38,8 @@ impl SubTest for TestCompile { let isa = context.isa.expect("compile needs an ISA"); let mut comp_ctx = cranelift_codegen::Context::for_function(func.into_owned()); - if isa.get_mach_backend().is_some() { - // With `MachBackend`s, we need to explicitly request dissassembly results. - comp_ctx.set_disasm(true); - } + // With `MachBackend`s, we need to explicitly request dissassembly results. + comp_ctx.set_disasm(true); let CodeInfo { total_size, .. } = comp_ctx .compile(isa) @@ -53,33 +51,14 @@ impl SubTest for TestCompile { comp_ctx.func.display(isa) ); - if !isa.get_mach_backend().is_some() { - // Verify that the returned code size matches the emitted bytes. - let mut sink = SizeSink { offset: 0 }; - binemit::emit_function( - &comp_ctx.func, - |func, inst, div, sink, isa| isa.emit_inst(func, inst, div, sink), - &mut sink, - isa, - ); - - if sink.offset != total_size { - anyhow::bail!("Expected code size {}, got {}", total_size, sink.offset); - } - - // Run final code through filecheck. - let text = comp_ctx.func.display(Some(isa)).to_string(); - run_filecheck(&text, context) - } else { - let disasm = comp_ctx - .mach_compile_result - .as_ref() - .unwrap() - .disasm - .as_ref() - .unwrap(); - run_filecheck(&disasm, context) - } + let disasm = comp_ctx + .mach_compile_result + .as_ref() + .unwrap() + .disasm + .as_ref() + .unwrap(); + run_filecheck(&disasm, context) } } diff --git a/cranelift/filetests/src/test_postopt.rs b/cranelift/filetests/src/test_postopt.rs deleted file mode 100644 index ff7726d9c8..0000000000 --- a/cranelift/filetests/src/test_postopt.rs +++ /dev/null @@ -1,46 +0,0 @@ -//! Test command for testing the postopt pass. -//! -//! The resulting function is sent to `filecheck`. - -use crate::subtest::{run_filecheck, Context, SubTest}; -use cranelift_codegen; -use cranelift_codegen::ir::Function; -use cranelift_reader::TestCommand; -use std::borrow::Cow; - -struct TestPostopt; - -pub fn subtest(parsed: &TestCommand) -> anyhow::Result> { - assert_eq!(parsed.command, "postopt"); - if !parsed.options.is_empty() { - anyhow::bail!("No options allowed on {}", parsed); - } - Ok(Box::new(TestPostopt)) -} - -impl SubTest for TestPostopt { - fn name(&self) -> &'static str { - "postopt" - } - - fn needs_isa(&self) -> bool { - true - } - - fn is_mutating(&self) -> bool { - true - } - - fn run(&self, func: Cow, context: &Context) -> anyhow::Result<()> { - let mut comp_ctx = cranelift_codegen::Context::for_function(func.into_owned()); - let isa = context.isa.expect("postopt needs an ISA"); - - comp_ctx.flowgraph(); - comp_ctx - .postopt(isa) - .map_err(|e| crate::pretty_anyhow_error(&comp_ctx.func, context.isa, Into::into(e)))?; - - let text = comp_ctx.func.display(isa).to_string(); - run_filecheck(&text, context) - } -} diff --git a/cranelift/filetests/src/test_regalloc.rs b/cranelift/filetests/src/test_regalloc.rs deleted file mode 100644 index f0f4025560..0000000000 --- a/cranelift/filetests/src/test_regalloc.rs +++ /dev/null @@ -1,54 +0,0 @@ -//! Test command for testing the register allocator. -//! -//! The `regalloc` test command runs each function through the register allocator after ensuring -//! that all instructions are legal for the target. -//! -//! The resulting function is sent to `filecheck`. - -use crate::subtest::{run_filecheck, Context, SubTest}; -use cranelift_codegen; -use cranelift_codegen::ir::Function; -use cranelift_reader::TestCommand; -use std::borrow::Cow; - -struct TestRegalloc; - -pub fn subtest(parsed: &TestCommand) -> anyhow::Result> { - assert_eq!(parsed.command, "regalloc"); - if !parsed.options.is_empty() { - anyhow::bail!("No options allowed on {}", parsed); - } - Ok(Box::new(TestRegalloc)) -} - -impl SubTest for TestRegalloc { - fn name(&self) -> &'static str { - "regalloc" - } - - fn is_mutating(&self) -> bool { - true - } - - fn needs_isa(&self) -> bool { - true - } - - fn run(&self, func: Cow, context: &Context) -> anyhow::Result<()> { - let isa = context.isa.expect("register allocator needs an ISA"); - let mut comp_ctx = cranelift_codegen::Context::for_function(func.into_owned()); - - comp_ctx.compute_cfg(); - // TODO: Should we have an option to skip legalization? - comp_ctx - .legalize(isa) - .map_err(|e| crate::pretty_anyhow_error(&comp_ctx.func, context.isa, e))?; - comp_ctx.compute_domtree(); - comp_ctx - .regalloc(isa) - .map_err(|e| crate::pretty_anyhow_error(&comp_ctx.func, context.isa, e))?; - - let text = comp_ctx.func.display(Some(isa)).to_string(); - run_filecheck(&text, context) - } -} diff --git a/cranelift/filetests/src/test_rodata.rs b/cranelift/filetests/src/test_rodata.rs deleted file mode 100644 index 83b10b4e08..0000000000 --- a/cranelift/filetests/src/test_rodata.rs +++ /dev/null @@ -1,133 +0,0 @@ -//! Test command for verifying the rodata emitted after each function -//! -//! The `rodata` test command runs each function through the full code generator pipeline - -use crate::subtest::{run_filecheck, Context, SubTest}; -use cranelift_codegen; -use cranelift_codegen::binemit::{self, CodeInfo}; -use cranelift_codegen::ir; -use cranelift_codegen::ir::{Function, Value}; -use cranelift_codegen::isa::TargetIsa; -use cranelift_reader::TestCommand; -use log::info; -use std::borrow::Cow; - -struct TestRodata; - -pub fn subtest(parsed: &TestCommand) -> anyhow::Result> { - assert_eq!(parsed.command, "rodata"); - if !parsed.options.is_empty() { - anyhow::bail!("No options allowed on {}", parsed); - } - Ok(Box::new(TestRodata)) -} - -impl SubTest for TestRodata { - fn name(&self) -> &'static str { - "rodata" - } - - fn is_mutating(&self) -> bool { - true - } - - fn needs_isa(&self) -> bool { - true - } - - fn run(&self, func: Cow, context: &Context) -> anyhow::Result<()> { - let isa = context.isa.expect("rodata needs an ISA"); - let mut comp_ctx = cranelift_codegen::Context::for_function(func.into_owned()); - - let CodeInfo { total_size, .. } = comp_ctx - .compile(isa) - .map_err(|e| crate::pretty_anyhow_error(&comp_ctx.func, context.isa, e))?; - - info!( - "Generated {} bytes of code:\n{}", - total_size, - comp_ctx.func.display(isa) - ); - - // Verify that the returned code size matches the emitted bytes. - let mut sink = RodataSink::default(); - binemit::emit_function( - &comp_ctx.func, - |func, inst, div, sink, isa| isa.emit_inst(func, inst, div, sink), - &mut sink, - isa, - ); - - // Run final code through filecheck. - let text = format!("{:X?}", sink.rodata); - info!("Found rodata: {}", text); - run_filecheck(&text, context) - } -} - -/// Code sink that only captures emitted rodata -#[derive(Default)] -struct RodataSink { - offset: usize, - rodata: Vec, - in_rodata: bool, -} - -impl binemit::CodeSink for RodataSink { - fn offset(&self) -> binemit::CodeOffset { - self.offset as u32 - } - - fn put1(&mut self, byte: u8) { - self.offset += 1; - if self.in_rodata { - self.rodata.push(byte); - } - } - - fn put2(&mut self, bytes: u16) { - self.offset += 2; - if self.in_rodata { - self.rodata.extend_from_slice(&bytes.to_be_bytes()); - } - } - - fn put4(&mut self, bytes: u32) { - self.offset += 4; - if self.in_rodata { - self.rodata.extend_from_slice(&bytes.to_be_bytes()); - } - } - - fn put8(&mut self, bytes: u64) { - self.offset += 8; - if self.in_rodata { - self.rodata.extend_from_slice(&bytes.to_be_bytes()); - } - } - - fn reloc_external( - &mut self, - _: ir::SourceLoc, - _: binemit::Reloc, - _: &ir::ExternalName, - _: binemit::Addend, - ) { - } - fn reloc_constant(&mut self, _: binemit::Reloc, _: ir::ConstantOffset) {} - fn reloc_jt(&mut self, _reloc: binemit::Reloc, _jt: ir::JumpTable) {} - fn trap(&mut self, _code: ir::TrapCode, _srcloc: ir::SourceLoc) {} - fn begin_jumptables(&mut self) { - assert!(!self.in_rodata, "Jump tables must be emitted before rodata"); - } - fn begin_rodata(&mut self) { - self.in_rodata = true; - } - fn end_codegen(&mut self) { - assert!( - self.in_rodata, - "Expected rodata to be emitted before the end of codegen" - ); - } - fn add_stack_map(&mut self, _: &[Value], _: &Function, _: &dyn TargetIsa) {} -} diff --git a/cranelift/filetests/src/test_safepoint.rs b/cranelift/filetests/src/test_safepoint.rs index 90d155ad1e..0b0c725e1e 100644 --- a/cranelift/filetests/src/test_safepoint.rs +++ b/cranelift/filetests/src/test_safepoint.rs @@ -27,9 +27,6 @@ impl SubTest for TestSafepoint { .legalize(isa) .map_err(|e| crate::pretty_anyhow_error(&comp_ctx.func, context.isa, e))?; comp_ctx.compute_domtree(); - comp_ctx - .regalloc(isa) - .map_err(|e| crate::pretty_anyhow_error(&comp_ctx.func, context.isa, e))?; let text = comp_ctx.func.display(context.isa).to_string(); run_filecheck(&text, context) diff --git a/cranelift/filetests/src/test_shrink.rs b/cranelift/filetests/src/test_shrink.rs deleted file mode 100644 index e3b971b66a..0000000000 --- a/cranelift/filetests/src/test_shrink.rs +++ /dev/null @@ -1,44 +0,0 @@ -//! Test command for testing the Shrink pass. -//! -//! The `shrink` test command runs each function through the Shrink pass after ensuring -//! that all instructions are legal for the target. -//! -//! The resulting function is sent to `filecheck`. - -use crate::subtest::{run_filecheck, Context, SubTest}; -use cranelift_codegen; -use cranelift_codegen::ir::Function; -use cranelift_reader::TestCommand; -use std::borrow::Cow; - -struct TestShrink; - -pub fn subtest(parsed: &TestCommand) -> anyhow::Result> { - assert_eq!(parsed.command, "shrink"); - if !parsed.options.is_empty() { - anyhow::bail!("No options allowed on {}", parsed); - } - Ok(Box::new(TestShrink)) -} - -impl SubTest for TestShrink { - fn name(&self) -> &'static str { - "shrink" - } - - fn is_mutating(&self) -> bool { - true - } - - fn run(&self, func: Cow, context: &Context) -> anyhow::Result<()> { - let isa = context.isa.expect("shrink needs an ISA"); - let mut comp_ctx = cranelift_codegen::Context::for_function(func.into_owned()); - - comp_ctx - .shrink_instructions(isa) - .map_err(|e| crate::pretty_anyhow_error(&comp_ctx.func, context.isa, Into::into(e)))?; - - let text = comp_ctx.func.display(isa).to_string(); - run_filecheck(&text, context) - } -} diff --git a/cranelift/filetests/src/test_stack_maps.rs b/cranelift/filetests/src/test_stack_maps.rs index 0f09966a85..1684520af0 100644 --- a/cranelift/filetests/src/test_stack_maps.rs +++ b/cranelift/filetests/src/test_stack_maps.rs @@ -29,13 +29,13 @@ impl SubTest for TestStackMaps { .map_err(|e| crate::pretty_anyhow_error(&comp_ctx.func, context.isa, e))?; let mut sink = TestStackMapsSink::default(); + // TODO remove entirely? seems a bit meaningless now binemit::emit_function( &comp_ctx.func, - |func, inst, div, sink, isa| { + |func, inst, sink, isa| { if func.dfg[inst].opcode() == Opcode::Safepoint { writeln!(&mut sink.text, "{}", func.dfg.display_inst(inst, isa)).unwrap(); } - isa.emit_inst(func, inst, div, sink) }, &mut sink, context.isa.expect("`test stack_maps` requires an isa"), diff --git a/cranelift/jit/src/backend.rs b/cranelift/jit/src/backend.rs index 07a5cad65d..e118d9a176 100644 --- a/cranelift/jit/src/backend.rs +++ b/cranelift/jit/src/backend.rs @@ -657,7 +657,7 @@ impl Module for JITModule { .expect("TODO: handle OOM etc."); let mut reloc_sink = JITRelocSink::default(); - unsafe { ctx.emit_to_memory(&*self.isa, ptr, &mut reloc_sink, trap_sink, stack_map_sink) }; + unsafe { ctx.emit_to_memory(ptr, &mut reloc_sink, trap_sink, stack_map_sink) }; self.record_function_for_perf(ptr, size, &decl.name); self.compiled_functions[id] = Some(CompiledBlob { diff --git a/cranelift/object/src/backend.rs b/cranelift/object/src/backend.rs index 2e48284df4..22fa807672 100644 --- a/cranelift/object/src/backend.rs +++ b/cranelift/object/src/backend.rs @@ -324,7 +324,6 @@ impl Module for ObjectModule { unsafe { ctx.emit_to_memory( - &*self.isa, code.as_mut_ptr(), &mut reloc_sink, trap_sink, diff --git a/cranelift/reader/src/parser.rs b/cranelift/reader/src/parser.rs index 1daa593501..3829bde59b 100644 --- a/cranelift/reader/src/parser.rs +++ b/cranelift/reader/src/parser.rs @@ -22,7 +22,7 @@ use cranelift_codegen::ir::{ HeapStyle, JumpTable, JumpTableData, MemFlags, Opcode, SigRef, Signature, StackSlot, StackSlotData, StackSlotKind, Table, TableData, Type, Value, ValueLoc, }; -use cranelift_codegen::isa::{self, CallConv, Encoding, RegUnit, TargetIsa}; +use cranelift_codegen::isa::{self, CallConv, RegUnit, TargetIsa}; use cranelift_codegen::packed_option::ReservedValue; use cranelift_codegen::{settings, settings::Configurable, timing}; use smallvec::SmallVec; @@ -253,20 +253,6 @@ impl<'a> Context<'a> { } } - // Get the index of a recipe name if it exists. - fn find_recipe_index(&self, recipe_name: &str) -> Option { - if let Some(unique_isa) = self.unique_isa { - unique_isa - .encoding_info() - .names - .iter() - .position(|&name| name == recipe_name) - .map(|idx| idx as u16) - } else { - None - } - } - // Allocate a new stack slot. fn add_ss(&mut self, ss: StackSlot, data: StackSlotData, loc: Location) -> ParseResult<()> { self.map.def_ss(ss, loc)?; @@ -962,21 +948,6 @@ impl<'a> Parser<'a> { } } - // Match and consume a HexSequence that fits into a u16. - // This is used for instruction encodings. - fn match_hex16(&mut self, err_msg: &str) -> ParseResult { - if let Some(Token::HexSequence(bits_str)) = self.token() { - self.consume(); - // The only error we anticipate from this parse is overflow, the lexer should - // already have ensured that the string doesn't contain invalid characters, and - // isn't empty or negative. - u16::from_str_radix(bits_str, 16) - .map_err(|_| self.error("the hex sequence given overflows the u16 type")) - } else { - err!(self.loc, err_msg) - } - } - // Match and consume a register unit either by number `%15` or by name `%rax`. fn match_regunit(&mut self, isa: Option<&dyn TargetIsa>) -> ParseResult { if let Some(Token::Name(name)) = self.token() { @@ -2010,7 +1981,7 @@ impl<'a> Parser<'a> { _ => false, } { let srcloc = self.optional_srcloc()?; - let (encoding, result_locations) = self.parse_instruction_encoding(ctx)?; + let result_locations = self.parse_instruction_encoding(ctx)?; // We need to parse instruction results here because they are shared // between the parsing of value aliases and the parsing of instructions. @@ -2031,24 +2002,10 @@ impl<'a> Parser<'a> { } Some(Token::Equal) => { self.consume(); - self.parse_instruction( - &results, - srcloc, - encoding, - result_locations, - ctx, - block, - )?; + self.parse_instruction(&results, srcloc, result_locations, ctx, block)?; } _ if !results.is_empty() => return err!(self.loc, "expected -> or ="), - _ => self.parse_instruction( - &results, - srcloc, - encoding, - result_locations, - ctx, - block, - )?, + _ => self.parse_instruction(&results, srcloc, result_locations, ctx, block)?, } } @@ -2146,49 +2103,29 @@ impl<'a> Parser<'a> { } } - fn parse_instruction_encoding( - &mut self, - ctx: &Context, - ) -> ParseResult<(Option, Option>)> { - let (mut encoding, mut result_locations) = (None, None); + fn parse_instruction_encoding(&mut self, ctx: &Context) -> ParseResult>> { + let mut result_locations = None; // encoding ::= "[" encoding_literal result_locations "]" if self.optional(Token::LBracket) { - // encoding_literal ::= "-" | Identifier HexSequence - if !self.optional(Token::Minus) { - let recipe = self.match_any_identifier("expected instruction encoding or '-'")?; - let bits = self.match_hex16("expected a hex sequence")?; - - if let Some(recipe_index) = ctx.find_recipe_index(recipe) { - encoding = Some(Encoding::new(recipe_index, bits)); - } else if ctx.unique_isa.is_some() { - return err!(self.loc, "invalid instruction recipe"); - } else { - // We allow encodings to be specified when there's no unique ISA purely - // for convenience, eg when copy-pasting code for a test. - } - } - // result_locations ::= ("," ( "-" | names ) )? // names ::= Name { "," Name } - if self.optional(Token::Comma) { - let mut results = Vec::new(); + let mut results = Vec::new(); + results.push(self.parse_value_location(ctx)?); + while self.optional(Token::Comma) { results.push(self.parse_value_location(ctx)?); - while self.optional(Token::Comma) { - results.push(self.parse_value_location(ctx)?); - } - - result_locations = Some(results); } + result_locations = Some(results); + self.match_token( Token::RBracket, "expected ']' to terminate instruction encoding", )?; } - Ok((encoding, result_locations)) + Ok(result_locations) } // Parse instruction results and return them. @@ -2265,7 +2202,6 @@ impl<'a> Parser<'a> { &mut self, results: &[Value], srcloc: ir::SourceLoc, - encoding: Option, result_locations: Option>, ctx: &mut Context, block: Block, @@ -2321,10 +2257,6 @@ impl<'a> Parser<'a> { ctx.function.srclocs[inst] = srcloc; } - if let Some(encoding) = encoding { - ctx.function.encodings[inst] = encoding; - } - if results.len() != num_results { return err!( self.loc, diff --git a/cranelift/src/wasm.rs b/cranelift/src/wasm.rs index 20310f157c..18593d0c82 100644 --- a/cranelift/src/wasm.rs +++ b/cranelift/src/wasm.rs @@ -309,8 +309,11 @@ fn handle_module(options: &Options, path: &Path, name: &str, fisa: FlagsOrIsa) - let value_ranges = if options.value_ranges { Some( context - .build_value_labels_ranges(isa) - .expect("value location ranges"), + .mach_compile_result + .as_ref() + .unwrap() + .value_labels_ranges + .clone(), ) } else { None diff --git a/crates/cranelift/src/compiler.rs b/crates/cranelift/src/compiler.rs index 170b8e841b..ae0219ed60 100644 --- a/crates/cranelift/src/compiler.rs +++ b/crates/cranelift/src/compiler.rs @@ -73,31 +73,19 @@ impl Compiler { let start_srcloc = FilePos::new(offset as u32); let end_srcloc = FilePos::new((offset + len) as u32); - let instructions = if let Some(ref mcr) = &context.mach_compile_result { - // New-style backend: we have a `MachCompileResult` that will give us `MachSrcLoc` mapping - // tuples. - collect_address_maps( - body_len, - mcr.buffer - .get_srclocs_sorted() - .into_iter() - .map(|&MachSrcLoc { start, end, loc }| (loc, start, (end - start))), - ) - } else { - // Old-style backend: we need to traverse the instruction/encoding info in the function. - let func = &context.func; - let mut blocks = func.layout.blocks().collect::>(); - blocks.sort_by_key(|block| func.offsets[*block]); // Ensure inst offsets always increase - - let encinfo = self.isa.encoding_info(); - collect_address_maps( - body_len, - blocks - .into_iter() - .flat_map(|block| func.inst_offsets(block, &encinfo)) - .map(|(offset, inst, size)| (func.srclocs[inst], offset, size)), - ) - }; + // New-style backend: we have a `MachCompileResult` that will give us `MachSrcLoc` mapping + // tuples. + let instructions = collect_address_maps( + body_len, + context + .mach_compile_result + .as_ref() + .unwrap() + .buffer + .get_srclocs_sorted() + .into_iter() + .map(|&MachSrcLoc { start, end, loc }| (loc, start, (end - start))), + ); FunctionAddressMap { instructions: instructions.into(), @@ -196,10 +184,14 @@ impl wasmtime_environ::Compiler for Compiler { self.get_function_address_map(&context, &input, code_buf.len() as u32); let ranges = if tunables.generate_native_debuginfo { - let ranges = context.build_value_labels_ranges(isa).map_err(|error| { - CompileError::Codegen(pretty_error(&context.func, Some(isa), error)) - })?; - Some(ranges) + Some( + context + .mach_compile_result + .as_ref() + .unwrap() + .value_labels_ranges + .clone(), + ) } else { None }; diff --git a/crates/cranelift/src/debug/transform/expression.rs b/crates/cranelift/src/debug/transform/expression.rs index 0ccfde8b03..a72e5daa42 100644 --- a/crates/cranelift/src/debug/transform/expression.rs +++ b/crates/cranelift/src/debug/transform/expression.rs @@ -1,7 +1,7 @@ use super::address_transform::AddressTransform; use crate::debug::ModuleMemoryOffset; use anyhow::{Context, Error, Result}; -use cranelift_codegen::ir::{LabelValueLoc, StackSlots, ValueLabel, ValueLoc}; +use cranelift_codegen::ir::{LabelValueLoc, StackSlots, ValueLabel}; use cranelift_codegen::isa::TargetIsa; use cranelift_codegen::ValueLabelsRanges; use cranelift_wasm::get_vmctx_value_label; @@ -129,40 +129,12 @@ impl CompiledExpression { } } -const X86_64_STACK_OFFSET: i64 = 16; - fn translate_loc( loc: LabelValueLoc, - frame_info: Option<&FunctionFrameInfo>, isa: &dyn TargetIsa, add_stack_value: bool, ) -> Result>> { Ok(match loc { - LabelValueLoc::ValueLoc(ValueLoc::Reg(reg)) => { - let machine_reg = isa.map_dwarf_register(reg)?; - let mut writer = ExpressionWriter::new(); - if add_stack_value { - writer.write_op_reg(machine_reg)?; - } else { - writer.write_op_breg(machine_reg)?; - writer.write_sleb128(0)?; - } - Some(writer.into_vec()) - } - LabelValueLoc::ValueLoc(ValueLoc::Stack(ss)) => { - if let Some(frame_info) = frame_info { - if let Some(ss_offset) = frame_info.stack_slots[ss].offset { - let mut writer = ExpressionWriter::new(); - writer.write_op_breg(X86_64::RBP.0)?; - writer.write_sleb128(ss_offset as i64 + X86_64_STACK_OFFSET)?; - if !add_stack_value { - writer.write_op(gimli::constants::DW_OP_deref)?; - } - return Ok(Some(writer.into_vec())); - } - } - None - } LabelValueLoc::Reg(r) => { let machine_reg = isa.map_regalloc_reg_to_dwarf(r)?; let mut writer = ExpressionWriter::new(); @@ -183,8 +155,6 @@ fn translate_loc( } return Ok(Some(writer.into_vec())); } - - _ => None, }) } @@ -197,35 +167,6 @@ fn append_memory_deref( let mut writer = ExpressionWriter::new(); // FIXME for imported memory match vmctx_loc { - LabelValueLoc::ValueLoc(ValueLoc::Reg(vmctx_reg)) => { - let reg = isa.map_dwarf_register(vmctx_reg)? as u8; - writer.write_u8(gimli::constants::DW_OP_breg0.0 + reg)?; - let memory_offset = match frame_info.vmctx_memory_offset() { - Some(offset) => offset, - None => { - return Ok(false); - } - }; - writer.write_sleb128(memory_offset)?; - } - LabelValueLoc::ValueLoc(ValueLoc::Stack(ss)) => { - if let Some(ss_offset) = frame_info.stack_slots[ss].offset { - writer.write_op_breg(X86_64::RBP.0)?; - writer.write_sleb128(ss_offset as i64 + X86_64_STACK_OFFSET)?; - writer.write_op(gimli::constants::DW_OP_deref)?; - writer.write_op(gimli::constants::DW_OP_consts)?; - let memory_offset = match frame_info.vmctx_memory_offset() { - Some(offset) => offset, - None => { - return Ok(false); - } - }; - writer.write_sleb128(memory_offset)?; - writer.write_op(gimli::constants::DW_OP_plus)?; - } else { - return Ok(false); - } - } LabelValueLoc::Reg(r) => { let reg = isa.map_regalloc_reg_to_dwarf(r)?; writer.write_op_breg(reg)?; @@ -251,9 +192,6 @@ fn append_memory_deref( writer.write_sleb128(memory_offset)?; writer.write_op(gimli::constants::DW_OP_plus)?; } - _ => { - return Ok(false); - } } writer.write_op(gimli::constants::DW_OP_deref)?; writer.write_op(gimli::constants::DW_OP_swap)?; @@ -416,9 +354,7 @@ impl CompiledExpression { CompiledExpressionPart::Local { label, trailing } => { let loc = *label_location.get(&label).context("label_location")?; - if let Some(expr) = - translate_loc(loc, frame_info, isa, *trailing)? - { + if let Some(expr) = translate_loc(loc, isa, *trailing)? { code_buf.extend_from_slice(&expr) } else { return Ok(None); @@ -1221,7 +1157,7 @@ mod tests { } fn create_mock_value_ranges() -> (ValueLabelsRanges, (ValueLabel, ValueLabel, ValueLabel)) { - use cranelift_codegen::ir::{LabelValueLoc, ValueLoc}; + use cranelift_codegen::ir::LabelValueLoc; use cranelift_codegen::ValueLocRange; use cranelift_entity::EntityRef; use std::collections::HashMap; @@ -1232,7 +1168,7 @@ mod tests { value_ranges.insert( value_0, vec![ValueLocRange { - loc: LabelValueLoc::ValueLoc(ValueLoc::Unassigned), + loc: LabelValueLoc::SPOffset(0), start: 0, end: 25, }], @@ -1240,7 +1176,7 @@ mod tests { value_ranges.insert( value_1, vec![ValueLocRange { - loc: LabelValueLoc::ValueLoc(ValueLoc::Unassigned), + loc: LabelValueLoc::SPOffset(0), start: 5, end: 30, }], @@ -1249,12 +1185,12 @@ mod tests { value_2, vec![ ValueLocRange { - loc: LabelValueLoc::ValueLoc(ValueLoc::Unassigned), + loc: LabelValueLoc::SPOffset(0), start: 0, end: 10, }, ValueLocRange { - loc: LabelValueLoc::ValueLoc(ValueLoc::Unassigned), + loc: LabelValueLoc::SPOffset(0), start: 20, end: 30, },