Remove ancient register allocation (#3401)

This commit is contained in:
Benjamin Bouvier
2021-09-30 21:27:23 +02:00
committed by GitHub
parent 80336f4535
commit bae4ec6427
66 changed files with 112 additions and 15380 deletions

View File

@@ -1,180 +0,0 @@
//! Common helper code for ABI lowering.
//!
//! This module provides functions and data structures that are useful for implementing the
//! `TargetIsa::legalize_signature()` method.
use crate::ir::{AbiParam, ArgumentExtension, ArgumentLoc, Type};
use core::cmp::Ordering;
/// Legalization action to perform on a single argument or return value when converting a
/// signature.
///
/// An argument may go through a sequence of legalization steps before it reaches the final
/// `Assign` action.
#[derive(Clone, Copy, Debug)]
pub enum ArgAction {
/// Assign the argument to the given location.
Assign(ArgumentLoc),
/// Convert the argument, then call again.
///
/// This action can split an integer type into two smaller integer arguments, or it can split a
/// SIMD vector into halves.
Convert(ValueConversion),
}
impl From<ArgumentLoc> for ArgAction {
fn from(x: ArgumentLoc) -> Self {
Self::Assign(x)
}
}
impl From<ValueConversion> for ArgAction {
fn from(x: ValueConversion) -> Self {
Self::Convert(x)
}
}
/// Legalization action to be applied to a value that is being passed to or from a legalized ABI.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum ValueConversion {
/// Split an integer types into low and high parts, using `isplit`.
IntSplit,
/// Split a vector type into halves with identical lane types, using `vsplit`.
VectorSplit,
/// Bit-cast to an integer type of the same size.
IntBits,
/// Sign-extend integer value to the required type.
Sext(Type),
/// Unsigned zero-extend value to the required type.
Uext(Type),
/// Pass value by pointer of given integer type.
Pointer(Type),
}
/// Common trait for assigning arguments to registers or stack locations.
///
/// This will be implemented by individual ISAs.
pub trait ArgAssigner {
/// Pick an assignment action for function argument (or return value) `arg`.
fn assign(&mut self, arg: &AbiParam) -> ArgAction;
}
/// Determine the right action to take when passing a `have` value type to a call signature where
/// the next argument is `arg` which has a different value type.
///
/// The signature legalization process in `legalize_args` above can replace a single argument value
/// with multiple arguments of smaller types. It can also change the type of an integer argument to
/// a larger integer type, requiring the smaller value to be sign- or zero-extended.
///
/// The legalizer needs to repair the values at all ABI boundaries:
///
/// - Incoming function arguments to the entry block.
/// - Function arguments passed to a call.
/// - Return values from a call.
/// - Return values passed to a return instruction.
///
/// The `legalize_abi_value` function helps the legalizer with the process. When the legalizer
/// needs to pass a pre-legalized `have` argument, but the ABI argument `arg` has a different value
/// type, `legalize_abi_value(have, arg)` tells the legalizer how to create the needed value type
/// for the argument.
///
/// It may be necessary to call `legalize_abi_value` more than once for a given argument before the
/// desired argument type appears. This will happen when a vector or integer type needs to be split
/// more than once, for example.
pub fn legalize_abi_value(have: Type, arg: &AbiParam) -> ValueConversion {
let have_bits = have.bits();
let arg_bits = arg.value_type.bits();
if arg.legalized_to_pointer {
return ValueConversion::Pointer(arg.value_type);
}
match have_bits.cmp(&arg_bits) {
// We have fewer bits than the ABI argument.
Ordering::Less => {
debug_assert!(
have.is_int() && arg.value_type.is_int(),
"Can only extend integer values"
);
match arg.extension {
ArgumentExtension::Uext => ValueConversion::Uext(arg.value_type),
ArgumentExtension::Sext => ValueConversion::Sext(arg.value_type),
_ => panic!("No argument extension specified"),
}
}
// We have the same number of bits as the argument.
Ordering::Equal => {
// This must be an integer vector that is split and then extended.
debug_assert!(arg.value_type.is_int());
debug_assert!(have.is_vector(), "expected vector type, got {}", have);
ValueConversion::VectorSplit
}
// We have more bits than the argument.
Ordering::Greater => {
if have.is_vector() {
ValueConversion::VectorSplit
} else if have.is_float() {
// Convert a float to int so it can be split the next time.
// ARM would do this to pass an `f64` in two registers.
ValueConversion::IntBits
} else {
ValueConversion::IntSplit
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::ir::types;
use crate::ir::AbiParam;
#[test]
fn legalize() {
let mut arg = AbiParam::new(types::I32);
assert_eq!(
legalize_abi_value(types::I64X2, &arg),
ValueConversion::VectorSplit
);
assert_eq!(
legalize_abi_value(types::I64, &arg),
ValueConversion::IntSplit
);
// Vector of integers is broken down, then sign-extended.
arg.extension = ArgumentExtension::Sext;
assert_eq!(
legalize_abi_value(types::I16X4, &arg),
ValueConversion::VectorSplit
);
assert_eq!(
legalize_abi_value(types::I16.by(2).unwrap(), &arg),
ValueConversion::VectorSplit
);
assert_eq!(
legalize_abi_value(types::I16, &arg),
ValueConversion::Sext(types::I32)
);
// 64-bit float is split as an integer.
assert_eq!(
legalize_abi_value(types::F64, &arg),
ValueConversion::IntBits
);
// Value is passed by reference
arg.legalized_to_pointer = true;
assert_eq!(
legalize_abi_value(types::F64, &arg),
ValueConversion::Pointer(types::I32)
);
}
}

View File

@@ -4,23 +4,18 @@
//! binary machine code.
mod memorysink;
mod relaxation;
mod shrink;
mod stack_map;
pub use self::memorysink::{
MemoryCodeSink, NullRelocSink, NullStackMapSink, NullTrapSink, RelocSink, StackMapSink,
TrapSink,
};
pub use self::relaxation::relax_branches;
pub use self::shrink::shrink_instructions;
pub use self::stack_map::StackMap;
use crate::ir::entities::Value;
use crate::ir::{
ConstantOffset, ExternalName, Function, Inst, JumpTable, Opcode, SourceLoc, TrapCode,
};
use crate::isa::TargetIsa;
pub use crate::regalloc::RegDiversions;
use core::fmt;
#[cfg(feature = "enable-serde")]
use serde::{Deserialize, Serialize};
@@ -183,16 +178,6 @@ pub trait CodeSink {
}
}
/// Report a bad encoding error.
#[cold]
pub fn bad_encoding(func: &Function, inst: Inst) -> ! {
panic!(
"Bad encoding {} for {}",
func.encodings[inst],
func.dfg.display_inst(inst, None)
);
}
/// Emit a function to `sink`, given an instruction emitter function.
///
/// This function is called from the `TargetIsa::emit_function()` implementations with the
@@ -200,14 +185,12 @@ pub fn bad_encoding(func: &Function, inst: Inst) -> ! {
pub fn emit_function<CS, EI>(func: &Function, emit_inst: EI, sink: &mut CS, isa: &dyn TargetIsa)
where
CS: CodeSink,
EI: Fn(&Function, Inst, &mut RegDiversions, &mut CS, &dyn TargetIsa),
EI: Fn(&Function, Inst, &mut CS, &dyn TargetIsa),
{
let mut divert = RegDiversions::new();
for block in func.layout.blocks() {
divert.at_block(&func.entry_diversions, block);
debug_assert_eq!(func.offsets[block], sink.offset());
for inst in func.layout.block_insts(block) {
emit_inst(func, inst, &mut divert, sink, isa);
emit_inst(func, inst, sink, isa);
}
}

View File

@@ -1,396 +0,0 @@
//! Branch relaxation and offset computation.
//!
//! # block header offsets
//!
//! Before we can generate binary machine code for branch instructions, we need to know the final
//! offsets of all the block headers in the function. This information is encoded in the
//! `func.offsets` table.
//!
//! # Branch relaxation
//!
//! Branch relaxation is the process of ensuring that all branches in the function have enough
//! range to encode their destination. It is common to have multiple branch encodings in an ISA.
//! For example, x86 branches can have either an 8-bit or a 32-bit displacement.
//!
//! On RISC architectures, it can happen that conditional branches have a shorter range than
//! unconditional branches:
//!
//! ```clif
//! brz v1, block17
//! ```
//!
//! can be transformed into:
//!
//! ```clif
//! brnz v1, block23
//! jump block17
//! block23:
//! ```
use crate::binemit::{CodeInfo, CodeOffset};
use crate::cursor::{Cursor, FuncCursor};
use crate::dominator_tree::DominatorTree;
use crate::flowgraph::ControlFlowGraph;
use crate::ir::{Block, Function, Inst, InstructionData, Opcode, Value, ValueList};
use crate::isa::{EncInfo, TargetIsa};
use crate::iterators::IteratorExtras;
use crate::regalloc::RegDiversions;
use crate::timing;
use crate::CodegenResult;
use core::convert::TryFrom;
/// Relax branches and compute the final layout of block headers in `func`.
///
/// Fill in the `func.offsets` table so the function is ready for binary emission.
pub fn relax_branches(
func: &mut Function,
_cfg: &mut ControlFlowGraph,
_domtree: &mut DominatorTree,
isa: &dyn TargetIsa,
) -> CodegenResult<CodeInfo> {
let _tt = timing::relax_branches();
let encinfo = isa.encoding_info();
// Clear all offsets so we can recognize blocks that haven't been visited yet.
func.offsets.clear();
func.offsets.resize(func.dfg.num_blocks());
// Start by removing redundant jumps.
fold_redundant_jumps(func, _cfg, _domtree);
// Convert jumps to fallthrough instructions where possible.
fallthroughs(func);
let mut offset = 0;
let mut divert = RegDiversions::new();
// First, compute initial offsets for every block.
{
let mut cur = FuncCursor::new(func);
while let Some(block) = cur.next_block() {
divert.at_block(&cur.func.entry_diversions, block);
cur.func.offsets[block] = offset;
while let Some(inst) = cur.next_inst() {
divert.apply(&cur.func.dfg[inst]);
let enc = cur.func.encodings[inst];
offset += encinfo.byte_size(enc, inst, &divert, &cur.func);
}
}
}
// Then, run the relaxation algorithm until it converges.
let mut go_again = true;
while go_again {
go_again = false;
offset = 0;
// Visit all instructions in layout order.
let mut cur = FuncCursor::new(func);
while let Some(block) = cur.next_block() {
divert.at_block(&cur.func.entry_diversions, block);
// Record the offset for `block` and make sure we iterate until offsets are stable.
if cur.func.offsets[block] != offset {
cur.func.offsets[block] = offset;
go_again = true;
}
while let Some(inst) = cur.next_inst() {
divert.apply(&cur.func.dfg[inst]);
let enc = cur.func.encodings[inst];
// See if this is a branch has a range and a destination, and if the target is in
// range.
if let Some(range) = encinfo.branch_range(enc) {
if let Some(dest) = cur.func.dfg[inst].branch_destination() {
let dest_offset = cur.func.offsets[dest];
if !range.contains(offset, dest_offset) {
offset +=
relax_branch(&mut cur, &divert, offset, dest_offset, &encinfo, isa);
continue;
}
}
}
offset += encinfo.byte_size(enc, inst, &divert, &cur.func);
}
}
}
let code_size = offset;
let jumptables = offset;
for (jt, jt_data) in func.jump_tables.iter() {
func.jt_offsets[jt] = offset;
// TODO: this should be computed based on the min size needed to hold the furthest branch.
offset += jt_data.len() as u32 * 4;
}
let jumptables_size = offset - jumptables;
let rodata = offset;
for constant in func.dfg.constants.entries_mut() {
constant.set_offset(offset);
offset +=
u32::try_from(constant.len()).expect("Constants must have a length that fits in a u32")
}
let rodata_size = offset - rodata;
Ok(CodeInfo {
code_size,
jumptables_size,
rodata_size,
total_size: offset,
})
}
/// Folds an instruction if it is a redundant jump.
/// Returns whether folding was performed (which invalidates the CFG).
fn try_fold_redundant_jump(
func: &mut Function,
cfg: &mut ControlFlowGraph,
block: Block,
first_inst: Inst,
) -> bool {
let first_dest = match func.dfg[first_inst].branch_destination() {
Some(block) => block, // The instruction was a single-target branch.
None => {
return false; // The instruction was either multi-target or not a branch.
}
};
// For the moment, only attempt to fold a branch to a block that is parameterless.
// These blocks are mainly produced by critical edge splitting.
//
// TODO: Allow folding blocks that define SSA values and function as phi nodes.
if func.dfg.num_block_params(first_dest) != 0 {
return false;
}
// Look at the first instruction of the first branch's destination.
// If it is an unconditional branch, maybe the second jump can be bypassed.
let second_inst = func.layout.first_inst(first_dest).expect("Instructions");
if func.dfg[second_inst].opcode() != Opcode::Jump {
return false;
}
// Now we need to fix up first_inst's block parameters to match second_inst's,
// without changing the branch-specific arguments.
//
// The intermediary block is allowed to reference any SSA value that dominates it,
// but that SSA value may not necessarily also dominate the instruction that's
// being patched.
// Get the arguments and parameters passed by the first branch.
let num_fixed = func.dfg[first_inst]
.opcode()
.constraints()
.num_fixed_value_arguments();
let (first_args, first_params) = func.dfg[first_inst]
.arguments(&func.dfg.value_lists)
.split_at(num_fixed);
// Get the parameters passed by the second jump.
let num_fixed = func.dfg[second_inst]
.opcode()
.constraints()
.num_fixed_value_arguments();
let (_, second_params) = func.dfg[second_inst]
.arguments(&func.dfg.value_lists)
.split_at(num_fixed);
let mut second_params = second_params.to_vec(); // Clone for rewriting below.
// For each parameter passed by the second jump, if any of those parameters
// was a block parameter, rewrite it to refer to the value that the first jump
// passed in its parameters. Otherwise, make sure it dominates first_inst.
//
// For example: if we `block0: jump block1(v1)` to `block1(v2): jump block2(v2)`,
// we want to rewrite the original jump to `jump block2(v1)`.
let block_params: &[Value] = func.dfg.block_params(first_dest);
debug_assert!(block_params.len() == first_params.len());
for value in second_params.iter_mut() {
if let Some((n, _)) = block_params.iter().enumerate().find(|(_, &p)| p == *value) {
// This value was the Nth parameter passed to the second_inst's block.
// Rewrite it as the Nth parameter passed by first_inst.
*value = first_params[n];
}
}
// Build a value list of first_args (unchanged) followed by second_params (rewritten).
let arguments_vec: alloc::vec::Vec<_> = first_args
.iter()
.chain(second_params.iter())
.copied()
.collect();
let value_list = ValueList::from_slice(&arguments_vec, &mut func.dfg.value_lists);
func.dfg[first_inst].take_value_list(); // Drop the current list.
func.dfg[first_inst].put_value_list(value_list); // Put the new list.
// Bypass the second jump.
// This can disconnect the Block containing `second_inst`, to be cleaned up later.
let second_dest = func.dfg[second_inst].branch_destination().expect("Dest");
func.change_branch_destination(first_inst, second_dest);
cfg.recompute_block(func, block);
// The previously-intermediary Block may now be unreachable. Update CFG.
if cfg.pred_iter(first_dest).count() == 0 {
// Remove all instructions from that block.
while let Some(inst) = func.layout.first_inst(first_dest) {
func.layout.remove_inst(inst);
}
// Remove the block...
cfg.recompute_block(func, first_dest); // ...from predecessor lists.
func.layout.remove_block(first_dest); // ...from the layout.
}
true
}
/// Redirects `jump` instructions that point to other `jump` instructions to the final destination.
/// This transformation may orphan some blocks.
fn fold_redundant_jumps(
func: &mut Function,
cfg: &mut ControlFlowGraph,
domtree: &mut DominatorTree,
) {
let mut folded = false;
// Postorder iteration guarantees that a chain of jumps is visited from
// the end of the chain to the start of the chain.
for &block in domtree.cfg_postorder() {
// Only proceed if the first terminator instruction is a single-target branch.
let first_inst = func
.layout
.last_inst(block)
.expect("Block has no terminator");
folded |= try_fold_redundant_jump(func, cfg, block, first_inst);
// Also try the previous instruction.
if let Some(prev_inst) = func.layout.prev_inst(first_inst) {
folded |= try_fold_redundant_jump(func, cfg, block, prev_inst);
}
}
// Folding jumps invalidates the dominator tree.
if folded {
domtree.compute(func, cfg);
}
}
/// Convert `jump` instructions to `fallthrough` instructions where possible and verify that any
/// existing `fallthrough` instructions are correct.
fn fallthroughs(func: &mut Function) {
for (block, succ) in func.layout.blocks().adjacent_pairs() {
let term = func
.layout
.last_inst(block)
.expect("block has no terminator.");
if let InstructionData::Jump {
ref mut opcode,
destination,
..
} = func.dfg[term]
{
match *opcode {
Opcode::Fallthrough => {
// Somebody used a fall-through instruction before the branch relaxation pass.
// Make sure it is correct, i.e. the destination is the layout successor.
debug_assert_eq!(
destination, succ,
"Illegal fallthrough from {} to {}, but {}'s successor is {}",
block, destination, block, succ
)
}
Opcode::Jump => {
// If this is a jump to the successor block, change it to a fall-through.
if destination == succ {
*opcode = Opcode::Fallthrough;
func.encodings[term] = Default::default();
}
}
_ => {}
}
}
}
}
/// Relax the branch instruction at `cur` so it can cover the range `offset - dest_offset`.
///
/// Return the size of the replacement instructions up to and including the location where `cur` is
/// left.
fn relax_branch(
cur: &mut FuncCursor,
divert: &RegDiversions,
offset: CodeOffset,
dest_offset: CodeOffset,
encinfo: &EncInfo,
isa: &dyn TargetIsa,
) -> CodeOffset {
let inst = cur.current_inst().unwrap();
log::trace!(
"Relaxing [{}] {} for {:#x}-{:#x} range",
encinfo.display(cur.func.encodings[inst]),
cur.func.dfg.display_inst(inst, isa),
offset,
dest_offset
);
// Pick the smallest encoding that can handle the branch range.
let dfg = &cur.func.dfg;
let ctrl_type = dfg.ctrl_typevar(inst);
if let Some(enc) = isa
.legal_encodings(cur.func, &dfg[inst], ctrl_type)
.filter(|&enc| {
let range = encinfo.branch_range(enc).expect("Branch with no range");
if !range.contains(offset, dest_offset) {
log::trace!(" trying [{}]: out of range", encinfo.display(enc));
false
} else if encinfo.operand_constraints(enc)
!= encinfo.operand_constraints(cur.func.encodings[inst])
{
// Conservatively give up if the encoding has different constraints
// than the original, so that we don't risk picking a new encoding
// which the existing operands don't satisfy. We can't check for
// validity directly because we don't have a RegDiversions active so
// we don't know which registers are actually in use.
log::trace!(" trying [{}]: constraints differ", encinfo.display(enc));
false
} else {
log::trace!(" trying [{}]: OK", encinfo.display(enc));
true
}
})
.min_by_key(|&enc| encinfo.byte_size(enc, inst, &divert, &cur.func))
{
debug_assert!(enc != cur.func.encodings[inst]);
cur.func.encodings[inst] = enc;
return encinfo.byte_size(enc, inst, &divert, &cur.func);
}
// Note: On some RISC ISAs, conditional branches have shorter range than unconditional
// branches, so one way of extending the range of a conditional branch is to invert its
// condition and make it branch over an unconditional jump which has the larger range.
//
// Splitting the block is problematic this late because there may be register diversions in
// effect across the conditional branch, and they can't survive the control flow edge to a new
// block. We have two options for handling that:
//
// 1. Set a flag on the new block that indicates it wants the preserve the register diversions of
// its layout predecessor, or
// 2. Use an encoding macro for the branch-over-jump pattern so we don't need to split the block.
//
// It seems that 1. would allow us to share code among RISC ISAs that need this.
//
// We can't allow register diversions to survive from the layout predecessor because the layout
// predecessor could contain kill points for some values that are live in this block, and
// diversions are not automatically cancelled when the live range of a value ends.
// This assumes solution 2. above:
panic!("No branch in range for {:#x}-{:#x}", offset, dest_offset);
}

View File

@@ -1,72 +0,0 @@
//! Instruction shrinking.
//!
//! Sometimes there are multiple valid encodings for a given instruction. Cranelift often initially
//! chooses the largest one, because this typically provides the register allocator the most
//! flexibility. However, once register allocation is done, this is no longer important, and we
//! can switch to smaller encodings when possible.
use crate::ir::instructions::InstructionData;
use crate::ir::Function;
use crate::isa::TargetIsa;
use crate::regalloc::RegDiversions;
use crate::timing;
/// Pick the smallest valid encodings for instructions.
pub fn shrink_instructions(func: &mut Function, isa: &dyn TargetIsa) {
let _tt = timing::shrink_instructions();
let encinfo = isa.encoding_info();
let mut divert = RegDiversions::new();
for block in func.layout.blocks() {
// Load diversions from predecessors.
divert.at_block(&func.entry_diversions, block);
for inst in func.layout.block_insts(block) {
let enc = func.encodings[inst];
if enc.is_legal() {
// regmove/regfill/regspill are special instructions with register immediates
// that represented as normal operands, so the normal predicates below don't
// handle them correctly.
//
// Also, they need to be presented to the `RegDiversions` to update the
// location tracking.
//
// TODO: Eventually, we want the register allocator to avoid leaving these special
// instructions behind, but for now, just temporarily avoid trying to shrink them.
let inst_data = &func.dfg[inst];
match inst_data {
InstructionData::RegMove { .. }
| InstructionData::RegFill { .. }
| InstructionData::RegSpill { .. } => {
divert.apply(inst_data);
continue;
}
_ => (),
}
let ctrl_type = func.dfg.ctrl_typevar(inst);
// Pick the last encoding with constraints that are satisfied.
let best_enc = isa
.legal_encodings(func, &func.dfg[inst], ctrl_type)
.filter(|e| encinfo.constraints[e.recipe()].satisfied(inst, &divert, &func))
.min_by_key(|e| encinfo.byte_size(*e, inst, &divert, &func))
.unwrap();
if best_enc != enc {
func.encodings[inst] = best_enc;
log::trace!(
"Shrunk [{}] to [{}] in {}, reducing the size from {} to {}",
encinfo.display(enc),
encinfo.display(best_enc),
func.dfg.display_inst(inst, isa),
encinfo.byte_size(enc, inst, &divert, &func),
encinfo.byte_size(best_enc, inst, &divert, &func)
);
}
}
}
}
}

View File

@@ -9,24 +9,17 @@
//! contexts concurrently. Typically, you would have one context per compilation thread and only a
//! single ISA instance.
use crate::binemit::{
relax_branches, shrink_instructions, CodeInfo, MemoryCodeSink, RelocSink, StackMapSink,
TrapSink,
};
use crate::binemit::{CodeInfo, MemoryCodeSink, RelocSink, StackMapSink, TrapSink};
use crate::dce::do_dce;
use crate::dominator_tree::DominatorTree;
use crate::flowgraph::ControlFlowGraph;
use crate::ir::Function;
use crate::isa::TargetIsa;
use crate::legalize_function;
use crate::legalizer::simple_legalize;
use crate::licm::do_licm;
use crate::loop_analysis::LoopAnalysis;
use crate::machinst::{MachCompileResult, MachStackMap};
use crate::nan_canonicalization::do_nan_canonicalization;
use crate::postopt::do_postopt;
use crate::redundant_reload_remover::RedundantReloadRemover;
use crate::regalloc;
use crate::remove_constant_phis::do_remove_constant_phis;
use crate::result::CodegenResult;
use crate::settings::{FlagsOrIsa, OptLevel};
@@ -34,8 +27,7 @@ use crate::simple_gvn::do_simple_gvn;
use crate::simple_preopt::do_preopt;
use crate::timing;
use crate::unreachable_code::eliminate_unreachable_code;
use crate::value_label::{build_value_labels_ranges, ComparableSourceLoc, ValueLabelsRanges};
use crate::verifier::{verify_context, verify_locations, VerifierErrors, VerifierResult};
use crate::verifier::{verify_context, VerifierErrors, VerifierResult};
#[cfg(feature = "souper-harvest")]
use alloc::string::String;
use alloc::vec::Vec;
@@ -54,15 +46,9 @@ pub struct Context {
/// Dominator tree for `func`.
pub domtree: DominatorTree,
/// Register allocation context.
pub regalloc: regalloc::Context,
/// Loop analysis of `func`.
pub loop_analysis: LoopAnalysis,
/// Redundant-reload remover context.
pub redundant_reload_remover: RedundantReloadRemover,
/// Result of MachBackend compilation, if computed.
pub mach_compile_result: Option<MachCompileResult>,
@@ -88,9 +74,7 @@ impl Context {
func,
cfg: ControlFlowGraph::new(),
domtree: DominatorTree::new(),
regalloc: regalloc::Context::new(),
loop_analysis: LoopAnalysis::new(),
redundant_reload_remover: RedundantReloadRemover::new(),
mach_compile_result: None,
want_disasm: false,
}
@@ -101,9 +85,7 @@ impl Context {
self.func.clear();
self.cfg.clear();
self.domtree.clear();
self.regalloc.clear();
self.loop_analysis.clear();
self.redundant_reload_remover.clear();
self.mach_compile_result = None;
self.want_disasm = false;
}
@@ -137,13 +119,7 @@ impl Context {
let old_len = mem.len();
mem.resize(old_len + info.total_size as usize, 0);
let new_info = unsafe {
self.emit_to_memory(
isa,
mem.as_mut_ptr().add(old_len),
relocs,
traps,
stack_maps,
)
self.emit_to_memory(mem.as_mut_ptr().add(old_len), relocs, traps, stack_maps)
};
debug_assert!(new_info == info);
Ok(info)
@@ -177,7 +153,6 @@ impl Context {
self.legalize(isa)?;
if opt_level != OptLevel::None {
self.postopt(isa)?;
self.compute_domtree();
self.compute_loop_analysis();
self.licm(isa)?;
@@ -192,25 +167,12 @@ impl Context {
self.remove_constant_phis(isa)?;
if let Some(backend) = isa.get_mach_backend() {
let result = backend.compile_function(&self.func, self.want_disasm)?;
let info = result.code_info();
self.mach_compile_result = Some(result);
Ok(info)
} else {
self.regalloc(isa)?;
self.prologue_epilogue(isa)?;
if opt_level == OptLevel::Speed || opt_level == OptLevel::SpeedAndSize {
self.redundant_reload_remover(isa)?;
}
if opt_level == OptLevel::SpeedAndSize {
self.shrink_instructions(isa)?;
}
let result = self.relax_branches(isa);
log::trace!("Compiled:\n{}", self.func.display(isa));
result
}
// FIXME: make this non optional
let backend = isa.get_mach_backend().expect("only mach backends nowadays");
let result = backend.compile_function(&self.func, self.want_disasm)?;
let info = result.code_info();
self.mach_compile_result = Some(result);
Ok(info)
}
/// Emit machine code directly into raw memory.
@@ -228,7 +190,6 @@ impl Context {
/// Returns information about the emitted code and data.
pub unsafe fn emit_to_memory(
&self,
isa: &dyn TargetIsa,
mem: *mut u8,
relocs: &mut dyn RelocSink,
traps: &mut dyn TrapSink,
@@ -236,25 +197,24 @@ impl Context {
) -> CodeInfo {
let _tt = timing::binemit();
let mut sink = MemoryCodeSink::new(mem, relocs, traps, stack_maps);
if let Some(ref result) = &self.mach_compile_result {
result.buffer.emit(&mut sink);
let info = sink.info;
// New backends do not emit StackMaps through the `CodeSink` because its interface
// requires `Value`s; instead, the `StackMap` objects are directly accessible via
// `result.buffer.stack_maps()`.
for &MachStackMap {
offset_end,
ref stack_map,
..
} in result.buffer.stack_maps()
{
stack_maps.add_stack_map(offset_end, stack_map.clone());
}
info
} else {
isa.emit_function_to_memory(&self.func, &mut sink);
sink.info
let result = self
.mach_compile_result
.as_ref()
.expect("only using mach backend now");
result.buffer.emit(&mut sink);
let info = sink.info;
// New backends do not emit StackMaps through the `CodeSink` because its interface
// requires `Value`s; instead, the `StackMap` objects are directly accessible via
// `result.buffer.stack_maps()`.
for &MachStackMap {
offset_end,
ref stack_map,
..
} in result.buffer.stack_maps()
{
stack_maps.add_stack_map(offset_end, stack_map.clone());
}
info
}
/// If available, return information about the code layout in the
@@ -314,26 +274,6 @@ impl Context {
Ok(())
}
/// Run the locations verifier on the function.
pub fn verify_locations(&self, isa: &dyn TargetIsa) -> VerifierResult<()> {
let mut errors = VerifierErrors::default();
let _ = verify_locations(isa, &self.func, &self.cfg, None, &mut errors);
if errors.is_empty() {
Ok(())
} else {
Err(errors)
}
}
/// Run the locations verifier only if the `enable_verifier` setting is true.
pub fn verify_locations_if(&self, isa: &dyn TargetIsa) -> CodegenResult<()> {
if isa.flags().enable_verifier() {
self.verify_locations(isa)?;
}
Ok(())
}
/// Perform dead-code elimination on the function.
pub fn dce<'a, FOI: Into<FlagsOrIsa<'a>>>(&mut self, fisa: FOI) -> CodegenResult<()> {
do_dce(&mut self.func, &mut self.domtree);
@@ -370,22 +310,10 @@ impl Context {
// TODO: Avoid doing this when legalization doesn't actually mutate the CFG.
self.domtree.clear();
self.loop_analysis.clear();
if isa.get_mach_backend().is_some() {
// Run some specific legalizations only.
simple_legalize(&mut self.func, &mut self.cfg, isa);
self.verify_if(isa)
} else {
legalize_function(&mut self.func, &mut self.cfg, isa);
log::trace!("Legalized:\n{}", self.func.display(isa));
self.verify_if(isa)
}
}
/// Perform post-legalization rewrites on the function.
pub fn postopt(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> {
do_postopt(&mut self.func, isa);
self.verify_if(isa)?;
Ok(())
// Run some specific legalizations only.
simple_legalize(&mut self.func, &mut self.cfg, isa);
self.verify_if(isa)
}
/// Compute the control flow graph.
@@ -437,58 +365,6 @@ impl Context {
self.verify_if(fisa)
}
/// Run the register allocator.
pub fn regalloc(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> {
self.regalloc
.run(isa, &mut self.func, &mut self.cfg, &mut self.domtree)
}
/// Insert prologue and epilogues after computing the stack frame layout.
pub fn prologue_epilogue(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> {
isa.prologue_epilogue(&mut self.func)?;
self.verify_if(isa)?;
self.verify_locations_if(isa)?;
Ok(())
}
/// Do redundant-reload removal after allocation of both registers and stack slots.
pub fn redundant_reload_remover(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> {
self.redundant_reload_remover
.run(isa, &mut self.func, &self.cfg);
self.verify_if(isa)?;
Ok(())
}
/// Run the instruction shrinking pass.
pub fn shrink_instructions(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> {
shrink_instructions(&mut self.func, isa);
self.verify_if(isa)?;
self.verify_locations_if(isa)?;
Ok(())
}
/// Run the branch relaxation pass and return information about the function's code and
/// read-only data.
pub fn relax_branches(&mut self, isa: &dyn TargetIsa) -> CodegenResult<CodeInfo> {
let info = relax_branches(&mut self.func, &mut self.cfg, &mut self.domtree, isa)?;
self.verify_if(isa)?;
self.verify_locations_if(isa)?;
Ok(info)
}
/// Builds ranges and location for specified value labels.
pub fn build_value_labels_ranges(
&self,
isa: &dyn TargetIsa,
) -> CodegenResult<ValueLabelsRanges> {
Ok(build_value_labels_ranges::<ComparableSourceLoc>(
&self.func,
&self.regalloc,
self.mach_compile_result.as_ref(),
isa,
))
}
/// Harvest candidate left-hand sides for superoptimization with Souper.
#[cfg(feature = "souper-harvest")]
pub fn souper_harvest(

View File

@@ -634,7 +634,7 @@ impl<'c, 'f> ir::InstInserterBase<'c> for &'c mut FuncCursor<'f> {
&mut self.func.dfg
}
fn insert_built_inst(self, inst: ir::Inst, _: ir::Type) -> &'c mut ir::DataFlowGraph {
fn insert_built_inst(self, inst: ir::Inst) -> &'c mut ir::DataFlowGraph {
// TODO: Remove this assertion once #796 is fixed.
#[cfg(debug_assertions)]
{
@@ -759,11 +759,7 @@ impl<'c, 'f> ir::InstInserterBase<'c> for &'c mut EncCursor<'f> {
&mut self.func.dfg
}
fn insert_built_inst(
self,
inst: ir::Inst,
ctrl_typevar: ir::Type,
) -> &'c mut ir::DataFlowGraph {
fn insert_built_inst(self, inst: ir::Inst) -> &'c mut ir::DataFlowGraph {
// TODO: Remove this assertion once #796 is fixed.
#[cfg(debug_assertions)]
{
@@ -787,6 +783,7 @@ impl<'c, 'f> ir::InstInserterBase<'c> for &'c mut EncCursor<'f> {
};
};
}
// Insert the instruction and remember the reference.
self.insert_inst(inst);
self.built_inst = Some(inst);
@@ -795,21 +792,6 @@ impl<'c, 'f> ir::InstInserterBase<'c> for &'c mut EncCursor<'f> {
self.func.srclocs[inst] = self.srcloc;
}
// Skip the encoding update if we're using a new (MachInst) backend; encodings come later,
// during lowering.
if self.isa.get_mach_backend().is_none() {
// Assign an encoding.
// XXX Is there a way to describe this error to the user?
#[cfg_attr(feature = "cargo-clippy", allow(clippy::match_wild_err_arm))]
match self
.isa
.encode(&self.func, &self.func.dfg[inst], ctrl_typevar)
{
Ok(e) => self.func.encodings[inst] = e,
Err(_) => panic!("can't encode {}", self.display_inst(inst)),
}
}
&mut self.func.dfg
}
}

View File

@@ -78,10 +78,3 @@ pub fn is_constant_64bit(func: &Function, inst: Inst) -> Option<u64> {
_ => None,
}
}
/// Is the given instruction a safepoint (i.e., potentially causes a GC, depending on the
/// embedding, and so requires reftyped values to be enumerated with a stack map)?
pub fn is_safepoint(func: &Function, inst: Inst) -> bool {
let op = func.dfg[inst].opcode();
op.is_resumable_trap() || op.is_call()
}

View File

@@ -56,7 +56,7 @@ pub trait InstInserterBase<'f>: Sized {
fn data_flow_graph_mut(&mut self) -> &mut DataFlowGraph;
/// Insert a new instruction which belongs to the DFG.
fn insert_built_inst(self, inst: Inst, ctrl_typevar: Type) -> &'f mut DataFlowGraph;
fn insert_built_inst(self, inst: Inst) -> &'f mut DataFlowGraph;
}
use core::marker::PhantomData;
@@ -129,7 +129,7 @@ impl<'f, IIB: InstInserterBase<'f>> InstBuilderBase<'f> for InsertBuilder<'f, II
inst = dfg.make_inst(data);
dfg.make_inst_results(inst, ctrl_typevar);
}
(inst, self.inserter.insert_built_inst(inst, ctrl_typevar))
(inst, self.inserter.insert_built_inst(inst))
}
}
@@ -166,7 +166,7 @@ where
let ru = self.reuse.as_ref().iter().cloned();
dfg.make_inst_results_reusing(inst, ctrl_typevar, ru);
}
(inst, self.inserter.insert_built_inst(inst, ctrl_typevar))
(inst, self.inserter.insert_built_inst(inst))
}
}

View File

@@ -3,7 +3,6 @@
//! The `Function` struct defined in this module owns all of its basic blocks and
//! instructions.
use crate::binemit::CodeOffset;
use crate::entity::{PrimaryMap, SecondaryMap};
use crate::ir;
use crate::ir::{
@@ -11,11 +10,10 @@ use crate::ir::{
HeapData, Inst, InstructionData, JumpTable, JumpTableData, Opcode, SigRef, StackSlot,
StackSlotData, Table, TableData,
};
use crate::ir::{BlockOffsets, InstEncodings, SourceLocs, StackSlots, ValueLocations};
use crate::ir::{BlockOffsets, SourceLocs, StackSlots, ValueLocations};
use crate::ir::{DataFlowGraph, ExternalName, Layout, Signature};
use crate::ir::{JumpTableOffsets, JumpTables};
use crate::isa::{CallConv, EncInfo, Encoding, Legalize, TargetIsa};
use crate::regalloc::{EntryRegDiversions, RegDiversions};
use crate::isa::{CallConv, TargetIsa};
use crate::value_label::ValueLabelsRanges;
use crate::write::write_function;
#[cfg(feature = "enable-serde")]
@@ -106,19 +104,9 @@ pub struct Function {
/// Layout of blocks and instructions in the function body.
pub layout: Layout,
/// Encoding recipe and bits for the legal instructions.
/// Illegal instructions have the `Encoding::default()` value.
pub encodings: InstEncodings,
/// Location assigned to every value.
pub locations: ValueLocations,
/// Non-default locations assigned to value at the entry of basic blocks.
///
/// At the entry of each basic block, we might have values which are not in their default
/// ValueLocation. This field records these register-to-register moves as Diversions.
pub entry_diversions: EntryRegDiversions,
/// Code offsets of the block headers.
///
/// This information is only transiently available after the `binemit::relax_branches` function
@@ -168,9 +156,7 @@ impl Function {
jump_tables: PrimaryMap::new(),
dfg: DataFlowGraph::new(),
layout: Layout::new(),
encodings: SecondaryMap::new(),
locations: SecondaryMap::new(),
entry_diversions: EntryRegDiversions::new(),
offsets: SecondaryMap::new(),
jt_offsets: SecondaryMap::new(),
srclocs: SecondaryMap::new(),
@@ -190,9 +176,7 @@ impl Function {
self.jump_tables.clear();
self.dfg.clear();
self.layout.clear();
self.encodings.clear();
self.locations.clear();
self.entry_diversions.clear();
self.offsets.clear();
self.jt_offsets.clear();
self.srclocs.clear();
@@ -268,51 +252,6 @@ impl Function {
.map(|i| self.dfg.block_params(entry)[i])
}
/// Get an iterator over the instructions in `block`, including offsets and encoded instruction
/// sizes.
///
/// The iterator returns `(offset, inst, size)` tuples, where `offset` if the offset in bytes
/// from the beginning of the function to the instruction, and `size` is the size of the
/// instruction in bytes, or 0 for unencoded instructions.
///
/// This function can only be used after the code layout has been computed by the
/// `binemit::relax_branches()` function.
pub fn inst_offsets<'a>(&'a self, block: Block, encinfo: &EncInfo) -> InstOffsetIter<'a> {
assert!(
!self.offsets.is_empty(),
"Code layout must be computed first"
);
let mut divert = RegDiversions::new();
divert.at_block(&self.entry_diversions, block);
InstOffsetIter {
encinfo: encinfo.clone(),
func: self,
divert,
encodings: &self.encodings,
offset: self.offsets[block],
iter: self.layout.block_insts(block),
}
}
/// Wrapper around `encode` which assigns `inst` the resulting encoding.
pub fn update_encoding(&mut self, inst: ir::Inst, isa: &dyn TargetIsa) -> Result<(), Legalize> {
if isa.get_mach_backend().is_some() {
Ok(())
} else {
self.encode(inst, isa).map(|e| self.encodings[inst] = e)
}
}
/// Wrapper around `TargetIsa::encode` for encoding an existing instruction
/// in the `Function`.
pub fn encode(&self, inst: ir::Inst, isa: &dyn TargetIsa) -> Result<Encoding, Legalize> {
if isa.get_mach_backend().is_some() {
Ok(Encoding::new(0, 0))
} else {
isa.encode(&self, &self.dfg[inst], self.dfg.ctrl_typevar(inst))
}
}
/// Starts collection of debug information.
pub fn collect_debug_info(&mut self) {
self.dfg.collect_debug_info();
@@ -469,29 +408,3 @@ impl fmt::Debug for Function {
write_function(fmt, self, &DisplayFunctionAnnotations::default())
}
}
/// Iterator returning instruction offsets and sizes: `(offset, inst, size)`.
pub struct InstOffsetIter<'a> {
encinfo: EncInfo,
divert: RegDiversions,
func: &'a Function,
encodings: &'a InstEncodings,
offset: CodeOffset,
iter: ir::layout::Insts<'a>,
}
impl<'a> Iterator for InstOffsetIter<'a> {
type Item = (CodeOffset, ir::Inst, CodeOffset);
fn next(&mut self) -> Option<Self::Item> {
self.iter.next().map(|inst| {
self.divert.apply(&self.func.dfg[inst]);
let byte_size =
self.encinfo
.byte_size(self.encodings[inst], inst, &self.divert, self.func);
let offset = self.offset;
self.offset += byte_size;
(offset, inst, byte_size)
})
}
}

View File

@@ -1,7 +1,7 @@
//! Naming well-known routines in the runtime library.
use crate::ir::{
types, AbiParam, ArgumentPurpose, ExtFuncData, ExternalName, FuncRef, Function, Inst, Opcode,
types, AbiParam, ArgumentPurpose, ExtFuncData, ExternalName, FuncRef, Function, Opcode,
Signature, Type,
};
use crate::isa::{CallConv, RegUnit, TargetIsa};
@@ -166,21 +166,6 @@ impl LibCall {
}
}
/// Get a function reference for `libcall` in `func`, following the signature
/// for `inst`.
///
/// If there is an existing reference, use it, otherwise make a new one.
pub(crate) fn get_libcall_funcref(
libcall: LibCall,
call_conv: CallConv,
func: &mut Function,
inst: Inst,
isa: &dyn TargetIsa,
) -> FuncRef {
find_funcref(libcall, func)
.unwrap_or_else(|| make_funcref_for_inst(libcall, call_conv, func, inst, isa))
}
/// Get a function reference for the probestack function in `func`.
///
/// If there is an existing reference, use it, otherwise make a new one.
@@ -227,33 +212,6 @@ fn make_funcref_for_probestack(
make_funcref(LibCall::Probestack, func, sig, isa)
}
/// Create a funcref for `libcall` with a signature matching `inst`.
fn make_funcref_for_inst(
libcall: LibCall,
call_conv: CallConv,
func: &mut Function,
inst: Inst,
isa: &dyn TargetIsa,
) -> FuncRef {
let mut sig = Signature::new(call_conv);
for &v in func.dfg.inst_args(inst) {
sig.params.push(AbiParam::new(func.dfg.value_type(v)));
}
for &v in func.dfg.inst_results(inst) {
sig.returns.push(AbiParam::new(func.dfg.value_type(v)));
}
if call_conv.extends_baldrdash() {
// Adds the special VMContext parameter to the signature.
sig.params.push(AbiParam::special(
isa.pointer_type(),
ArgumentPurpose::VMContext,
));
}
make_funcref(libcall, func, sig, isa)
}
/// Create a funcref for `libcall`.
fn make_funcref(
libcall: LibCall,

View File

@@ -63,7 +63,6 @@ pub use cranelift_codegen_shared::condcodes;
use crate::binemit;
use crate::entity::{entity_impl, PrimaryMap, SecondaryMap};
use crate::isa;
/// Map of value locations.
pub type ValueLocations = SecondaryMap<Value, ValueLoc>;
@@ -71,9 +70,6 @@ pub type ValueLocations = SecondaryMap<Value, ValueLoc>;
/// Map of jump tables.
pub type JumpTables = PrimaryMap<JumpTable, JumpTableData>;
/// Map of instruction encodings.
pub type InstEncodings = SecondaryMap<Inst, isa::Encoding>;
/// Code offsets for blocks.
pub type BlockOffsets = SecondaryMap<Block, binemit::CodeOffset>;

View File

@@ -8,9 +8,8 @@
//! are satisfied.
use crate::binemit::CodeOffset;
use crate::ir::{Function, Inst, ValueLoc};
use crate::ir::ValueLoc;
use crate::isa::{RegClass, RegUnit};
use crate::regalloc::RegDiversions;
/// Register constraint for a single value operand or instruction result.
#[derive(PartialEq, Debug)]
@@ -87,69 +86,6 @@ pub enum ConstraintKind {
Stack,
}
/// Value operand constraints for an encoding recipe.
#[derive(PartialEq, Clone)]
pub struct RecipeConstraints {
/// Constraints for the instruction's fixed value operands.
///
/// If the instruction takes a variable number of operands, the register constraints for those
/// operands must be computed dynamically.
///
/// - For branches and jumps, block arguments must match the expectations of the destination block.
/// - For calls and returns, the calling convention ABI specifies constraints.
pub ins: &'static [OperandConstraint],
/// Constraints for the instruction's fixed results.
///
/// If the instruction produces a variable number of results, it's probably a call and the
/// constraints must be derived from the calling convention ABI.
pub outs: &'static [OperandConstraint],
/// Are any of the input constraints `FixedReg` or `FixedTied`?
pub fixed_ins: bool,
/// Are any of the output constraints `FixedReg` or `FixedTied`?
pub fixed_outs: bool,
/// Are any of the input/output constraints `Tied` (but not `FixedTied`)?
pub tied_ops: bool,
/// Does this instruction clobber the CPU flags?
///
/// When true, SSA values of type `iflags` or `fflags` can not be live across the instruction.
pub clobbers_flags: bool,
}
impl RecipeConstraints {
/// Check that these constraints are satisfied by the operands on `inst`.
pub fn satisfied(&self, inst: Inst, divert: &RegDiversions, func: &Function) -> bool {
for (&arg, constraint) in func.dfg.inst_args(inst).iter().zip(self.ins) {
let loc = divert.get(arg, &func.locations);
if let ConstraintKind::Tied(out_index) = constraint.kind {
let out_val = func.dfg.inst_results(inst)[out_index as usize];
let out_loc = func.locations[out_val];
if loc != out_loc {
return false;
}
}
if !constraint.satisfied(loc) {
return false;
}
}
for (&arg, constraint) in func.dfg.inst_results(inst).iter().zip(self.outs) {
let loc = divert.get(arg, &func.locations);
if !constraint.satisfied(loc) {
return false;
}
}
true
}
}
/// Constraints on the range of a branch instruction.
///
/// A branch instruction usually encodes its destination as a signed n-bit offset from an origin.

View File

@@ -1,221 +0,0 @@
//! Support types for generated encoding tables.
//!
//! This module contains types and functions for working with the encoding tables generated by
//! `cranelift-codegen/meta/src/gen_encodings.rs`.
use crate::constant_hash::Table;
use crate::ir::{Function, InstructionData, Opcode, Type};
use crate::isa::{Encoding, Legalize};
use crate::settings::PredicateView;
/// A recipe predicate.
///
/// This is a predicate function capable of testing ISA and instruction predicates simultaneously.
///
/// A None predicate is always satisfied.
pub type RecipePredicate = Option<fn(PredicateView, &InstructionData) -> bool>;
/// An instruction predicate.
///
/// This is a predicate function that needs to be tested in addition to the recipe predicate. It
/// can't depend on ISA settings.
pub type InstPredicate = fn(&Function, &InstructionData) -> bool;
/// Legalization action to perform when no encoding can be found for an instruction.
///
/// This is an index into an ISA-specific table of legalization actions.
pub type LegalizeCode = u8;
/// Level 1 hash table entry.
///
/// One level 1 hash table is generated per CPU mode. This table is keyed by the controlling type
/// variable, using `INVALID` for non-polymorphic instructions.
///
/// The hash table values are references to level 2 hash tables, encoded as an offset in `LEVEL2`
/// where the table begins, and the binary logarithm of its length. All the level 2 hash tables
/// have a power-of-two size.
///
/// Entries are generic over the offset type. It will typically be `u32` or `u16`, depending on the
/// size of the `LEVEL2` table.
///
/// Empty entries are encoded with a `!0` value for `log2len` which will always be out of range.
/// Entries that have a `legalize` value but no level 2 table have an `offset` field that is out of
/// bounds.
pub struct Level1Entry<OffT: Into<u32> + Copy> {
pub ty: Type,
pub log2len: u8,
pub legalize: LegalizeCode,
pub offset: OffT,
}
impl<OffT: Into<u32> + Copy> Table<Type> for [Level1Entry<OffT>] {
fn len(&self) -> usize {
self.len()
}
fn key(&self, idx: usize) -> Option<Type> {
if self[idx].log2len != !0 {
Some(self[idx].ty)
} else {
None
}
}
}
/// Level 2 hash table entry.
///
/// The second level hash tables are keyed by `Opcode`, and contain an offset into the `ENCLISTS`
/// table where the encoding recipes for the instruction are stored.
///
/// Entries are generic over the offset type which depends on the size of `ENCLISTS`. A `u16`
/// offset allows the entries to be only 32 bits each. There is no benefit to dropping down to `u8`
/// for tiny ISAs. The entries won't shrink below 32 bits since the opcode is expected to be 16
/// bits.
///
/// Empty entries are encoded with a `NotAnOpcode` `opcode` field.
pub struct Level2Entry<OffT: Into<u32> + Copy> {
pub opcode: Option<Opcode>,
pub offset: OffT,
}
impl<OffT: Into<u32> + Copy> Table<Opcode> for [Level2Entry<OffT>] {
fn len(&self) -> usize {
self.len()
}
fn key(&self, idx: usize) -> Option<Opcode> {
self[idx].opcode
}
}
/// Encoding list entry.
///
/// Encoding lists are represented as sequences of u16 words.
pub type EncListEntry = u16;
/// Number of bits used to represent a predicate. c.f. `meta/src/gen_encodings.rs`.
const PRED_BITS: u8 = 12;
const PRED_MASK: usize = (1 << PRED_BITS) - 1;
/// First code word representing a predicate check. c.f. `meta/src/gen_encodings.rs`.
const PRED_START: usize = 0x1000;
/// An iterator over legal encodings for the instruction.
pub struct Encodings<'a> {
// Current offset into `enclist`, or out of bounds after we've reached the end.
offset: usize,
// Legalization code to use of no encoding is found.
legalize: LegalizeCode,
inst: &'a InstructionData,
func: &'a Function,
enclist: &'static [EncListEntry],
legalize_actions: &'static [Legalize],
recipe_preds: &'static [RecipePredicate],
inst_preds: &'static [InstPredicate],
isa_preds: PredicateView<'a>,
}
impl<'a> Encodings<'a> {
/// Creates a new instance of `Encodings`.
///
/// This iterator provides search for encodings that applies to the given instruction. The
/// encoding lists are laid out such that first call to `next` returns valid entry in the list
/// or `None`.
pub fn new(
offset: usize,
legalize: LegalizeCode,
inst: &'a InstructionData,
func: &'a Function,
enclist: &'static [EncListEntry],
legalize_actions: &'static [Legalize],
recipe_preds: &'static [RecipePredicate],
inst_preds: &'static [InstPredicate],
isa_preds: PredicateView<'a>,
) -> Self {
Encodings {
offset,
inst,
func,
legalize,
isa_preds,
recipe_preds,
inst_preds,
enclist,
legalize_actions,
}
}
/// Get the legalization action that caused the enumeration of encodings to stop.
/// This can be the default legalization action for the type or a custom code for the
/// instruction.
///
/// This method must only be called after the iterator returns `None`.
pub fn legalize(&self) -> Legalize {
debug_assert_eq!(self.offset, !0, "Premature Encodings::legalize()");
self.legalize_actions[self.legalize as usize]
}
/// Check if the `rpred` recipe predicate is satisfied.
fn check_recipe(&self, rpred: RecipePredicate) -> bool {
match rpred {
Some(p) => p(self.isa_preds, self.inst),
None => true,
}
}
/// Check an instruction or isa predicate.
fn check_pred(&self, pred: usize) -> bool {
if let Some(&p) = self.inst_preds.get(pred) {
p(self.func, self.inst)
} else {
let pred = pred - self.inst_preds.len();
self.isa_preds.test(pred)
}
}
}
impl<'a> Iterator for Encodings<'a> {
type Item = Encoding;
fn next(&mut self) -> Option<Encoding> {
while let Some(entryref) = self.enclist.get(self.offset) {
let entry = *entryref as usize;
// Check for "recipe+bits".
let recipe = entry >> 1;
if let Some(&rpred) = self.recipe_preds.get(recipe) {
let bits = self.offset + 1;
if entry & 1 == 0 {
self.offset += 2; // Next entry.
} else {
self.offset = !0; // Stop.
}
if self.check_recipe(rpred) {
return Some(Encoding::new(recipe as u16, self.enclist[bits]));
}
continue;
}
// Check for "stop with legalize".
if entry < PRED_START {
self.legalize = (entry - 2 * self.recipe_preds.len()) as LegalizeCode;
self.offset = !0; // Stop.
return None;
}
// Finally, this must be a predicate entry.
let pred_entry = entry - PRED_START;
let skip = pred_entry >> PRED_BITS;
let pred = pred_entry & PRED_MASK;
if self.check_pred(pred) {
self.offset += 1;
} else if skip == 0 {
self.offset = !0; // Stop.
return None;
} else {
self.offset += 1 + skip;
}
}
None
}
}

View File

@@ -1,167 +0,0 @@
//! The `Encoding` struct.
use crate::binemit::CodeOffset;
use crate::ir::{Function, Inst};
use crate::isa::constraints::{BranchRange, RecipeConstraints};
use crate::regalloc::RegDiversions;
use core::fmt;
#[cfg(feature = "enable-serde")]
use serde::{Deserialize, Serialize};
/// Bits needed to encode an instruction as binary machine code.
///
/// The encoding consists of two parts, both specific to the target ISA: An encoding *recipe*, and
/// encoding *bits*. The recipe determines the native instruction format and the mapping of
/// operands to encoded bits. The encoding bits provide additional information to the recipe,
/// typically parts of the opcode.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub struct Encoding {
recipe: u16,
bits: u16,
}
impl Encoding {
/// Create a new `Encoding` containing `(recipe, bits)`.
pub fn new(recipe: u16, bits: u16) -> Self {
Self { recipe, bits }
}
/// Get the recipe number in this encoding.
pub fn recipe(self) -> usize {
self.recipe as usize
}
/// Get the recipe-specific encoding bits.
pub fn bits(self) -> u16 {
self.bits
}
/// Is this a legal encoding, or the default placeholder?
pub fn is_legal(self) -> bool {
self != Self::default()
}
}
/// The default encoding is the illegal one.
impl Default for Encoding {
fn default() -> Self {
Self::new(0xffff, 0xffff)
}
}
/// ISA-independent display of an encoding.
impl fmt::Display for Encoding {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
if self.is_legal() {
write!(f, "{}#{:02x}", self.recipe, self.bits)
} else {
write!(f, "-")
}
}
}
/// Temporary object that holds enough context to properly display an encoding.
/// This is meant to be created by `EncInfo::display()`.
pub struct DisplayEncoding {
pub encoding: Encoding,
pub recipe_names: &'static [&'static str],
}
impl fmt::Display for DisplayEncoding {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
if self.encoding.is_legal() {
write!(
f,
"{}#{:02x}",
self.recipe_names[self.encoding.recipe()],
self.encoding.bits
)
} else {
write!(f, "-")
}
}
}
type SizeCalculatorFn = fn(&RecipeSizing, Encoding, Inst, &RegDiversions, &Function) -> u8;
/// Returns the base size of the Recipe, assuming it's fixed. This is the default for most
/// encodings; others can be variable and longer than this base size, depending on the registers
/// they're using and use a different function, specific per platform.
pub fn base_size(
sizing: &RecipeSizing,
_: Encoding,
_: Inst,
_: &RegDiversions,
_: &Function,
) -> u8 {
sizing.base_size
}
/// Code size information for an encoding recipe.
///
/// Encoding recipes may have runtime-determined instruction size.
pub struct RecipeSizing {
/// Minimum size in bytes of instructions encoded with this recipe.
pub base_size: u8,
/// Method computing the instruction's real size, given inputs and outputs.
pub compute_size: SizeCalculatorFn,
/// Allowed branch range in this recipe, if any.
///
/// All encoding recipes for branches have exact branch range information.
pub branch_range: Option<BranchRange>,
}
/// Information about all the encodings in this ISA.
#[derive(Clone)]
pub struct EncInfo {
/// Constraints on value operands per recipe.
pub constraints: &'static [RecipeConstraints],
/// Code size information per recipe.
pub sizing: &'static [RecipeSizing],
/// Names of encoding recipes.
pub names: &'static [&'static str],
}
impl EncInfo {
/// Get the value operand constraints for `enc` if it is a legal encoding.
pub fn operand_constraints(&self, enc: Encoding) -> Option<&'static RecipeConstraints> {
self.constraints.get(enc.recipe())
}
/// Create an object that can display an ISA-dependent encoding properly.
pub fn display(&self, enc: Encoding) -> DisplayEncoding {
DisplayEncoding {
encoding: enc,
recipe_names: self.names,
}
}
/// Get the size in bytes of `inst`, if it were encoded with `enc`.
///
/// Returns 0 for illegal encodings.
pub fn byte_size(
&self,
enc: Encoding,
inst: Inst,
divert: &RegDiversions,
func: &Function,
) -> CodeOffset {
self.sizing.get(enc.recipe()).map_or(0, |s| {
let compute_size = s.compute_size;
CodeOffset::from(compute_size(&s, enc, inst, divert, func))
})
}
/// Get the branch range that is supported by `enc`, if any.
///
/// This will never return `None` for a legal branch encoding.
pub fn branch_range(&self, enc: Encoding) -> Option<BranchRange> {
self.sizing.get(enc.recipe()).and_then(|s| s.branch_range)
}
}

View File

@@ -44,26 +44,19 @@
//! concurrent function compilations.
pub use crate::isa::call_conv::CallConv;
pub use crate::isa::constraints::{
BranchRange, ConstraintKind, OperandConstraint, RecipeConstraints,
};
pub use crate::isa::enc_tables::Encodings;
pub use crate::isa::encoding::{base_size, EncInfo, Encoding};
pub use crate::isa::constraints::{BranchRange, ConstraintKind, OperandConstraint};
pub use crate::isa::registers::{regs_overlap, RegClass, RegClassIndex, RegInfo, RegUnit};
pub use crate::isa::stack::{StackBase, StackBaseMask, StackRef};
use crate::binemit;
use crate::flowgraph;
use crate::ir;
#[cfg(feature = "unwind")]
use crate::isa::unwind::systemv::RegisterMappingError;
use crate::machinst::{MachBackend, UnwindInfoKind};
use crate::regalloc;
use crate::result::CodegenResult;
use crate::settings;
use crate::settings::SetResult;
use crate::timing;
use alloc::{borrow::Cow, boxed::Box, vec::Vec};
use alloc::{boxed::Box, vec::Vec};
use core::any::Any;
use core::fmt;
use core::fmt::{Debug, Formatter};
@@ -88,8 +81,6 @@ pub mod unwind;
mod call_conv;
mod constraints;
mod enc_tables;
mod encoding;
pub mod registers;
mod stack;
@@ -329,125 +320,6 @@ pub trait TargetIsa: fmt::Display + Send + Sync {
Err(RegisterMappingError::UnsupportedArchitecture)
}
/// Returns an iterator over legal encodings for the instruction.
fn legal_encodings<'a>(
&'a self,
func: &'a ir::Function,
inst: &'a ir::InstructionData,
ctrl_typevar: ir::Type,
) -> Encodings<'a>;
/// Encode an instruction after determining it is legal.
///
/// If `inst` can legally be encoded in this ISA, produce the corresponding `Encoding` object.
/// Otherwise, return `Legalize` action.
///
/// This is also the main entry point for determining if an instruction is legal.
fn encode(
&self,
func: &ir::Function,
inst: &ir::InstructionData,
ctrl_typevar: ir::Type,
) -> Result<Encoding, Legalize> {
let mut iter = self.legal_encodings(func, inst, ctrl_typevar);
iter.next().ok_or_else(|| iter.legalize())
}
/// Get a data structure describing the instruction encodings in this ISA.
fn encoding_info(&self) -> EncInfo;
/// Legalize a function signature.
///
/// This is used to legalize both the signature of the function being compiled and any called
/// functions. The signature should be modified by adding `ArgumentLoc` annotations to all
/// arguments and return values.
///
/// Arguments with types that are not supported by the ABI can be expanded into multiple
/// arguments:
///
/// - Integer types that are too large to fit in a register can be broken into multiple
/// arguments of a smaller integer type.
/// - Floating point types can be bit-cast to an integer type of the same size, and possible
/// broken into smaller integer types.
/// - Vector types can be bit-cast and broken down into smaller vectors or scalars.
///
/// The legalizer will adapt argument and return values as necessary at all ABI boundaries.
///
/// When this function is called to legalize the signature of the function currently being
/// compiled, `current` is true. The legalized signature can then also contain special purpose
/// arguments and return values such as:
///
/// - A `link` argument representing the link registers on RISC architectures that don't push
/// the return address on the stack.
/// - A `link` return value which will receive the value that was passed to the `link`
/// argument.
/// - An `sret` argument can be added if one wasn't present already. This is necessary if the
/// signature returns more values than registers are available for returning values.
/// - An `sret` return value can be added if the ABI requires a function to return its `sret`
/// argument in a register.
///
/// Arguments and return values for the caller's frame pointer and other callee-saved registers
/// should not be added by this function. These arguments are not added until after register
/// allocation.
fn legalize_signature(&self, sig: &mut Cow<ir::Signature>, current: bool);
/// Get the register class that should be used to represent an ABI argument or return value of
/// type `ty`. This should be the top-level register class that contains the argument
/// registers.
///
/// This function can assume that it will only be asked to provide register classes for types
/// that `legalize_signature()` produces in `ArgumentLoc::Reg` entries.
fn regclass_for_abi_type(&self, ty: ir::Type) -> RegClass;
/// Get the set of allocatable registers that can be used when compiling `func`.
///
/// This set excludes reserved registers like the stack pointer and other special-purpose
/// registers.
fn allocatable_registers(&self, func: &ir::Function) -> regalloc::RegisterSet;
/// Compute the stack layout and insert prologue and epilogue code into `func`.
///
/// Return an error if the stack frame is too large.
fn prologue_epilogue(&self, func: &mut ir::Function) -> CodegenResult<()> {
let _tt = timing::prologue_epilogue();
// This default implementation is unlikely to be good enough.
use crate::ir::stackslot::{StackOffset, StackSize};
use crate::stack_layout::layout_stack;
let word_size = StackSize::from(self.pointer_bytes());
// Account for the SpiderMonkey standard prologue pushes.
if func.signature.call_conv.extends_baldrdash() {
let bytes = StackSize::from(self.flags().baldrdash_prologue_words()) * word_size;
let mut ss = ir::StackSlotData::new(ir::StackSlotKind::IncomingArg, bytes);
ss.offset = Some(-(bytes as StackOffset));
func.stack_slots.push(ss);
}
let is_leaf = func.is_leaf();
layout_stack(&mut func.stack_slots, is_leaf, word_size)?;
Ok(())
}
/// Emit binary machine code for a single instruction into the `sink` trait object.
///
/// Note that this will call `put*` methods on the `sink` trait object via its vtable which
/// is not the fastest way of emitting code.
///
/// This function is under the "testing_hooks" feature, and is only suitable for use by
/// test harnesses. It increases code size, and is inefficient.
#[cfg(feature = "testing_hooks")]
fn emit_inst(
&self,
func: &ir::Function,
inst: ir::Inst,
divert: &mut regalloc::RegDiversions,
sink: &mut dyn binemit::CodeSink,
);
/// Emit a whole function into memory.
fn emit_function_to_memory(&self, func: &ir::Function, sink: &mut binemit::MemoryCodeSink);
/// IntCC condition for Unsigned Addition Overflow (Carry).
fn unsigned_add_overflow_condition(&self) -> ir::condcodes::IntCC;

File diff suppressed because it is too large Load Diff

View File

@@ -1,40 +0,0 @@
//! Expanding instructions as runtime library calls.
use crate::ir;
use crate::ir::{libcall::get_libcall_funcref, InstBuilder};
use crate::isa::{CallConv, TargetIsa};
use crate::legalizer::boundary::legalize_libcall_signature;
use alloc::vec::Vec;
/// Try to expand `inst` as a library call, returning true is successful.
pub fn expand_as_libcall(inst: ir::Inst, func: &mut ir::Function, isa: &dyn TargetIsa) -> bool {
// Does the opcode/ctrl_type combo even have a well-known runtime library name.
let libcall = match ir::LibCall::for_inst(func.dfg[inst].opcode(), func.dfg.ctrl_typevar(inst))
{
Some(lc) => lc,
None => return false,
};
// Now we convert `inst` to a call. First save the arguments.
let mut args = Vec::new();
args.extend_from_slice(func.dfg.inst_args(inst));
let call_conv = CallConv::for_libcall(isa.flags(), isa.default_call_conv());
if call_conv.extends_baldrdash() {
let vmctx = func
.special_param(ir::ArgumentPurpose::VMContext)
.expect("Missing vmctx parameter for baldrdash libcall");
args.push(vmctx);
}
// The replace builder will preserve the instruction result values.
let funcref = get_libcall_funcref(libcall, call_conv, func, inst, isa);
func.dfg.replace(inst).call(funcref, &args);
// Ask the ISA to legalize the signature.
let fn_data = &func.dfg.ext_funcs[funcref];
let sig_data = &mut func.dfg.signatures[fn_data.signature];
legalize_libcall_signature(sig_data, isa);
true
}

View File

@@ -19,179 +19,14 @@ use crate::ir::types::I32;
use crate::ir::{self, InstBuilder, MemFlags};
use crate::isa::TargetIsa;
use crate::timing;
use alloc::collections::BTreeSet;
mod boundary;
mod globalvalue;
mod heap;
mod libcall;
mod split;
mod table;
use self::globalvalue::expand_global_value;
use self::heap::expand_heap_addr;
pub(crate) use self::libcall::expand_as_libcall;
use self::table::expand_table_addr;
enum LegalizeInstResult {
Done,
Legalized,
SplitLegalizePending,
}
/// Legalize `inst` for `isa`.
fn legalize_inst(
inst: ir::Inst,
pos: &mut FuncCursor,
cfg: &mut ControlFlowGraph,
isa: &dyn TargetIsa,
) -> LegalizeInstResult {
let opcode = pos.func.dfg[inst].opcode();
// Check for ABI boundaries that need to be converted to the legalized signature.
if opcode.is_call() {
if boundary::handle_call_abi(isa, inst, pos.func, cfg) {
return LegalizeInstResult::Legalized;
}
} else if opcode.is_return() {
if boundary::handle_return_abi(inst, pos.func, cfg) {
return LegalizeInstResult::Legalized;
}
} else if opcode.is_branch() {
split::simplify_branch_arguments(&mut pos.func.dfg, inst);
} else if opcode == ir::Opcode::Isplit {
pos.use_srcloc(inst);
let arg = match pos.func.dfg[inst] {
ir::InstructionData::Unary { arg, .. } => pos.func.dfg.resolve_aliases(arg),
_ => panic!("Expected isplit: {}", pos.func.dfg.display_inst(inst, None)),
};
match pos.func.dfg.value_def(arg) {
ir::ValueDef::Result(inst, _num) => {
if let ir::InstructionData::Binary {
opcode: ir::Opcode::Iconcat,
..
} = pos.func.dfg[inst]
{
// `arg` was created by an `iconcat` instruction.
} else {
// `arg` was not created by an `iconcat` instruction. Don't try to resolve it,
// as otherwise `split::isplit` will re-insert the original `isplit`, causing
// an endless loop.
return LegalizeInstResult::SplitLegalizePending;
}
}
ir::ValueDef::Param(_block, _num) => {}
}
let res = pos.func.dfg.inst_results(inst).to_vec();
assert_eq!(res.len(), 2);
let (resl, resh) = (res[0], res[1]); // Prevent borrowck error
// Remove old isplit
pos.func.dfg.clear_results(inst);
pos.remove_inst();
let curpos = pos.position();
let srcloc = pos.srcloc();
let (xl, xh) = split::isplit(pos.func, cfg, curpos, srcloc, arg);
pos.func.dfg.change_to_alias(resl, xl);
pos.func.dfg.change_to_alias(resh, xh);
return LegalizeInstResult::Legalized;
}
match pos.func.update_encoding(inst, isa) {
Ok(()) => LegalizeInstResult::Done,
Err(action) => {
// We should transform the instruction into legal equivalents.
// If the current instruction was replaced, we need to double back and revisit
// the expanded sequence. This is both to assign encodings and possible to
// expand further.
// There's a risk of infinite looping here if the legalization patterns are
// unsound. Should we attempt to detect that?
if action(inst, pos.func, cfg, isa) {
return LegalizeInstResult::Legalized;
}
// We don't have any pattern expansion for this instruction either.
// Try converting it to a library call as a last resort.
if expand_as_libcall(inst, pos.func, isa) {
LegalizeInstResult::Legalized
} else {
LegalizeInstResult::Done
}
}
}
}
/// Legalize `func` for `isa`.
///
/// - Transform any instructions that don't have a legal representation in `isa`.
/// - Fill out `func.encodings`.
///
pub fn legalize_function(func: &mut ir::Function, cfg: &mut ControlFlowGraph, isa: &dyn TargetIsa) {
let _tt = timing::legalize();
debug_assert!(cfg.is_valid());
boundary::legalize_signatures(func, isa);
func.encodings.resize(func.dfg.num_insts());
let mut pos = FuncCursor::new(func);
let func_begin = pos.position();
// Split block params before trying to legalize instructions, so that the newly introduced
// isplit instructions get legalized.
while let Some(block) = pos.next_block() {
split::split_block_params(pos.func, cfg, block);
}
pos.set_position(func_begin);
// This must be a set to prevent trying to legalize `isplit` and `vsplit` twice in certain cases.
let mut pending_splits = BTreeSet::new();
// Process blocks in layout order. Some legalization actions may split the current block or append
// new ones to the end. We need to make sure we visit those new blocks too.
while let Some(_block) = pos.next_block() {
// Keep track of the cursor position before the instruction being processed, so we can
// double back when replacing instructions.
let mut prev_pos = pos.position();
while let Some(inst) = pos.next_inst() {
match legalize_inst(inst, &mut pos, cfg, isa) {
// Remember this position in case we need to double back.
LegalizeInstResult::Done => prev_pos = pos.position(),
// Go back and legalize the inserted return value conversion instructions.
LegalizeInstResult::Legalized => pos.set_position(prev_pos),
// The argument of a `isplit` or `vsplit` instruction didn't resolve to a
// `iconcat` or `vconcat` instruction. Try again after legalizing the rest of
// the instructions.
LegalizeInstResult::SplitLegalizePending => {
pending_splits.insert(inst);
}
}
}
}
// Try legalizing `isplit` and `vsplit` instructions, which could not previously be legalized.
for inst in pending_splits {
pos.goto_inst(inst);
legalize_inst(inst, &mut pos, cfg, isa);
}
// Now that we've lowered all br_tables, we don't need the jump tables anymore.
if !isa.flags().enable_jump_tables() {
pos.func.jump_tables.clear();
}
}
/// Perform a simple legalization by expansion of the function, without
/// platform-specific transforms.
pub fn simple_legalize(func: &mut ir::Function, cfg: &mut ControlFlowGraph, isa: &dyn TargetIsa) {

View File

@@ -1,405 +0,0 @@
//! Value splitting.
//!
//! Some value types are too large to fit in registers, so they need to be split into smaller parts
//! that the ISA can operate on. There's two dimensions of splitting, represented by two
//! complementary instruction pairs:
//!
//! - `isplit` and `iconcat` for splitting integer types into smaller integers.
//! - `vsplit` and `vconcat` for splitting vector types into smaller vector types with the same
//! lane types.
//!
//! There is no floating point splitting. If an ISA doesn't support `f64` values, they probably
//! have to be bit-cast to `i64` and possibly split into two `i32` values that fit in registers.
//! This breakdown is handled by the ABI lowering.
//!
//! When legalizing a single instruction, it is wrapped in splits and concatenations:
//!
//! ```clif
//! v1 = bxor.i64 v2, v3
//! ```
//!
//! becomes:
//!
//! ```clif
//! v20, v21 = isplit v2
//! v30, v31 = isplit v3
//! v10 = bxor.i32 v20, v30
//! v11 = bxor.i32 v21, v31
//! v1 = iconcat v10, v11
//! ```
//!
//! This local expansion approach still leaves the original `i64` values in the code as operands on
//! the `split` and `concat` instructions. It also creates a lot of redundant code to clean up as
//! values are constantly split and concatenated.
//!
//! # Optimized splitting
//!
//! We can eliminate a lot of the splitting code quite easily. Whenever we need to split a value,
//! first check if the value is defined by the corresponding concatenation. If so, then just use
//! the two concatenation inputs directly:
//!
//! ```clif
//! v4 = iadd_imm.i64 v1, 1
//! ```
//!
//! becomes, using the expanded code from above:
//!
//! ```clif
//! v40, v5 = iadd_imm_cout.i32 v10, 1
//! v6 = bint.i32
//! v41 = iadd.i32 v11, v6
//! v4 = iconcat v40, v41
//! ```
//!
//! This means that the `iconcat` instructions defining `v1` and `v4` end up with no uses, so they
//! can be trivially deleted by a dead code elimination pass.
//!
//! # block arguments
//!
//! If all instructions that produce an `i64` value are legalized as above, we will eventually end
//! up with no `i64` values anywhere, except for block arguments. We can work around this by
//! iteratively splitting block arguments too. That should leave us with no illegal value types
//! anywhere.
//!
//! It is possible to have circular dependencies of block arguments that are never used by any real
//! instructions. These loops will remain in the program.
use crate::cursor::{Cursor, CursorPosition, FuncCursor};
use crate::flowgraph::{BlockPredecessor, ControlFlowGraph};
use crate::ir::{self, Block, Inst, InstBuilder, InstructionData, Opcode, Type, Value, ValueDef};
use alloc::vec::Vec;
use core::iter;
use smallvec::SmallVec;
/// Split `value` into two values using the `isplit` semantics. Do this by reusing existing values
/// if possible.
pub fn isplit(
func: &mut ir::Function,
cfg: &ControlFlowGraph,
pos: CursorPosition,
srcloc: ir::SourceLoc,
value: Value,
) -> (Value, Value) {
split_any(func, cfg, pos, srcloc, value, Opcode::Iconcat)
}
/// Split `value` into halves using the `vsplit` semantics. Do this by reusing existing values if
/// possible.
pub fn vsplit(
func: &mut ir::Function,
cfg: &ControlFlowGraph,
pos: CursorPosition,
srcloc: ir::SourceLoc,
value: Value,
) -> (Value, Value) {
split_any(func, cfg, pos, srcloc, value, Opcode::Vconcat)
}
/// After splitting a block argument, we need to go back and fix up all of the predecessor
/// instructions. This is potentially a recursive operation, but we don't implement it recursively
/// since that could use up too muck stack.
///
/// Instead, the repairs are deferred and placed on a work list in stack form.
struct Repair {
concat: Opcode,
// The argument type after splitting.
split_type: Type,
// The destination block whose arguments have been split.
block: Block,
// Number of the original block argument which has been replaced by the low part.
num: usize,
// Number of the new block argument which represents the high part after the split.
hi_num: usize,
}
/// Generic version of `isplit` and `vsplit` controlled by the `concat` opcode.
fn split_any(
func: &mut ir::Function,
cfg: &ControlFlowGraph,
pos: CursorPosition,
srcloc: ir::SourceLoc,
value: Value,
concat: Opcode,
) -> (Value, Value) {
let mut repairs = Vec::new();
let pos = &mut FuncCursor::new(func).at_position(pos).with_srcloc(srcloc);
let result = split_value(pos, value, concat, &mut repairs);
perform_repairs(pos, cfg, repairs);
result
}
pub fn split_block_params(func: &mut ir::Function, cfg: &ControlFlowGraph, block: Block) {
let pos = &mut FuncCursor::new(func).at_top(block);
let block_params = pos.func.dfg.block_params(block);
// Add further splittable types here.
fn type_requires_splitting(ty: Type) -> bool {
ty == ir::types::I128
}
// A shortcut. If none of the param types require splitting, exit now. This helps because
// the loop below necessarily has to copy the block params into a new vector, so it's better to
// avoid doing so when possible.
if !block_params
.iter()
.any(|block_param| type_requires_splitting(pos.func.dfg.value_type(*block_param)))
{
return;
}
let mut repairs = Vec::new();
for (num, block_param) in block_params.to_vec().into_iter().enumerate() {
if !type_requires_splitting(pos.func.dfg.value_type(block_param)) {
continue;
}
split_block_param(pos, block, num, block_param, Opcode::Iconcat, &mut repairs);
}
perform_repairs(pos, cfg, repairs);
}
fn perform_repairs(pos: &mut FuncCursor, cfg: &ControlFlowGraph, mut repairs: Vec<Repair>) {
// We have split the value requested, and now we may need to fix some block predecessors.
while let Some(repair) = repairs.pop() {
for BlockPredecessor { inst, .. } in cfg.pred_iter(repair.block) {
let branch_opc = pos.func.dfg[inst].opcode();
debug_assert!(
branch_opc.is_branch(),
"Predecessor not a branch: {}",
pos.func.dfg.display_inst(inst, None)
);
let num_fixed_args = branch_opc.constraints().num_fixed_value_arguments();
let mut args = pos.func.dfg[inst]
.take_value_list()
.expect("Branches must have value lists.");
let num_args = args.len(&pos.func.dfg.value_lists);
// Get the old value passed to the block argument we're repairing.
let old_arg = args
.get(num_fixed_args + repair.num, &pos.func.dfg.value_lists)
.expect("Too few branch arguments");
// It's possible that the CFG's predecessor list has duplicates. Detect them here.
if pos.func.dfg.value_type(old_arg) == repair.split_type {
pos.func.dfg[inst].put_value_list(args);
continue;
}
// Split the old argument, possibly causing more repairs to be scheduled.
pos.goto_inst(inst);
let inst_block = pos.func.layout.inst_block(inst).expect("inst in block");
// Insert split values prior to the terminal branch group.
let canonical = pos
.func
.layout
.canonical_branch_inst(&pos.func.dfg, inst_block);
if let Some(first_branch) = canonical {
pos.goto_inst(first_branch);
}
let (lo, hi) = split_value(pos, old_arg, repair.concat, &mut repairs);
// The `lo` part replaces the original argument.
*args
.get_mut(num_fixed_args + repair.num, &mut pos.func.dfg.value_lists)
.unwrap() = lo;
// The `hi` part goes at the end. Since multiple repairs may have been scheduled to the
// same block, there could be multiple arguments missing.
if num_args > num_fixed_args + repair.hi_num {
*args
.get_mut(
num_fixed_args + repair.hi_num,
&mut pos.func.dfg.value_lists,
)
.unwrap() = hi;
} else {
// We need to append one or more arguments. If we're adding more than one argument,
// there must be pending repairs on the stack that will fill in the correct values
// instead of `hi`.
args.extend(
iter::repeat(hi).take(1 + num_fixed_args + repair.hi_num - num_args),
&mut pos.func.dfg.value_lists,
);
}
// Put the value list back after manipulating it.
pos.func.dfg[inst].put_value_list(args);
}
}
}
/// Split a single value using the integer or vector semantics given by the `concat` opcode.
///
/// If the value is defined by a `concat` instruction, just reuse the operand values of that
/// instruction.
///
/// Return the two new values representing the parts of `value`.
fn split_value(
pos: &mut FuncCursor,
value: Value,
concat: Opcode,
repairs: &mut Vec<Repair>,
) -> (Value, Value) {
let value = pos.func.dfg.resolve_aliases(value);
let mut reuse = None;
match pos.func.dfg.value_def(value) {
ValueDef::Result(inst, num) => {
// This is an instruction result. See if the value was created by a `concat`
// instruction.
if let InstructionData::Binary { opcode, args, .. } = pos.func.dfg[inst] {
debug_assert_eq!(num, 0);
if opcode == concat {
reuse = Some((args[0], args[1]));
}
}
}
ValueDef::Param(block, num) => {
// This is a block parameter.
// We can split the parameter value unless this is the entry block.
if pos.func.layout.entry_block() != Some(block) {
reuse = Some(split_block_param(pos, block, num, value, concat, repairs));
}
}
}
// Did the code above succeed in finding values we can reuse?
if let Some(pair) = reuse {
pair
} else {
// No, we'll just have to insert the requested split instruction at `pos`. Note that `pos`
// has not been moved by the block argument code above when `reuse` is `None`.
match concat {
Opcode::Iconcat => pos.ins().isplit(value),
Opcode::Vconcat => pos.ins().vsplit(value),
_ => panic!("Unhandled concat opcode: {}", concat),
}
}
}
fn split_block_param(
pos: &mut FuncCursor,
block: Block,
param_num: usize,
value: Value,
concat: Opcode,
repairs: &mut Vec<Repair>,
) -> (Value, Value) {
// We are going to replace the parameter at `num` with two new arguments.
// Determine the new value types.
let ty = pos.func.dfg.value_type(value);
let split_type = match concat {
Opcode::Iconcat => ty.half_width().expect("Invalid type for isplit"),
Opcode::Vconcat => ty.half_vector().expect("Invalid type for vsplit"),
_ => panic!("Unhandled concat opcode: {}", concat),
};
// Since the `repairs` stack potentially contains other parameter numbers for
// `block`, avoid shifting and renumbering block parameters. It could invalidate other
// `repairs` entries.
//
// Replace the original `value` with the low part, and append the high part at the
// end of the argument list.
let lo = pos.func.dfg.replace_block_param(value, split_type);
let hi_num = pos.func.dfg.num_block_params(block);
let hi = pos.func.dfg.append_block_param(block, split_type);
// Now the original value is dangling. Insert a concatenation instruction that can
// compute it from the two new parameters. This also serves as a record of what we
// did so a future call to this function doesn't have to redo the work.
//
// Note that it is safe to move `pos` here since `reuse` was set above, so we don't
// need to insert a split instruction before returning.
pos.goto_first_inst(block);
pos.ins()
.with_result(value)
.Binary(concat, split_type, lo, hi);
// Finally, splitting the block parameter is not enough. We also have to repair all
// of the predecessor instructions that branch here.
add_repair(concat, split_type, block, param_num, hi_num, repairs);
(lo, hi)
}
// Add a repair entry to the work list.
fn add_repair(
concat: Opcode,
split_type: Type,
block: Block,
num: usize,
hi_num: usize,
repairs: &mut Vec<Repair>,
) {
repairs.push(Repair {
concat,
split_type,
block,
num,
hi_num,
});
}
/// Strip concat-split chains. Return a simpler way of computing the same value.
///
/// Given this input:
///
/// ```clif
/// v10 = iconcat v1, v2
/// v11, v12 = isplit v10
/// ```
///
/// This function resolves `v11` to `v1` and `v12` to `v2`.
fn resolve_splits(dfg: &ir::DataFlowGraph, value: Value) -> Value {
let value = dfg.resolve_aliases(value);
// Deconstruct a split instruction.
let split_res;
let concat_opc;
let split_arg;
if let ValueDef::Result(inst, num) = dfg.value_def(value) {
split_res = num;
concat_opc = match dfg[inst].opcode() {
Opcode::Isplit => Opcode::Iconcat,
Opcode::Vsplit => Opcode::Vconcat,
_ => return value,
};
split_arg = dfg.inst_args(inst)[0];
} else {
return value;
}
// See if split_arg is defined by a concatenation instruction.
if let ValueDef::Result(inst, _) = dfg.value_def(split_arg) {
if dfg[inst].opcode() == concat_opc {
return dfg.inst_args(inst)[split_res];
}
}
value
}
/// Simplify the arguments to a branch *after* the instructions leading up to the branch have been
/// legalized.
///
/// The branch argument repairs performed by `split_any()` above may be performed on branches that
/// have not yet been legalized. The repaired arguments can be defined by actual split
/// instructions in that case.
///
/// After legalizing the instructions computing the value that was split, it is likely that we can
/// avoid depending on the split instruction. Its input probably comes from a concatenation.
pub fn simplify_branch_arguments(dfg: &mut ir::DataFlowGraph, branch: Inst) {
let mut new_args = SmallVec::<[Value; 32]>::new();
for &arg in dfg.inst_args(branch) {
let new_arg = resolve_splits(dfg, arg);
new_args.push(new_arg);
}
dfg.inst_args_mut(branch).copy_from_slice(&new_args);
}

View File

@@ -59,7 +59,6 @@ use hashbrown::{hash_map, HashMap, HashSet};
use std::collections::{hash_map, HashMap, HashSet};
pub use crate::context::Context;
pub use crate::legalizer::legalize_function;
pub use crate::value_label::{ValueLabelsRanges, ValueLocRange};
pub use crate::verifier::verify_function;
pub use crate::write::write_function;
@@ -87,7 +86,6 @@ pub use crate::entity::packed_option;
pub use crate::machinst::buffer::MachSrcLoc;
pub use crate::machinst::TextSectionBuilder;
mod abi;
mod bitset;
mod constant_hash;
mod context;
@@ -101,18 +99,12 @@ mod licm;
mod log;
mod machinst;
mod nan_canonicalization;
mod partition_slice;
mod postopt;
mod predicates;
mod redundant_reload_remover;
mod regalloc;
mod remove_constant_phis;
mod result;
mod scoped_hash_map;
mod simple_gvn;
mod simple_preopt;
mod stack_layout;
mod topo_order;
mod unreachable_code;
mod value_label;

View File

@@ -1,20 +1,14 @@
//! Adapter for a `MachBackend` to implement the `TargetIsa` trait.
use crate::binemit;
use crate::ir;
use crate::isa::{EncInfo, Encoding, Encodings, Legalize, RegClass, RegInfo, TargetIsa};
use crate::isa::{RegInfo, TargetIsa};
use crate::machinst::*;
use crate::regalloc::RegisterSet;
use crate::settings::{self, Flags};
#[cfg(feature = "testing_hooks")]
use crate::regalloc::RegDiversions;
#[cfg(feature = "unwind")]
use crate::isa::unwind::systemv::RegisterMappingError;
use core::any::Any;
use std::borrow::Cow;
use std::fmt;
use target_lexicon::Triple;
@@ -74,60 +68,6 @@ impl TargetIsa for TargetIsaAdapter {
}
}
fn legal_encodings<'a>(
&'a self,
_func: &'a ir::Function,
_inst: &'a ir::InstructionData,
_ctrl_typevar: ir::Type,
) -> Encodings<'a> {
panic!("Should not be called when new-style backend is available!")
}
fn encode(
&self,
_func: &ir::Function,
_inst: &ir::InstructionData,
_ctrl_typevar: ir::Type,
) -> Result<Encoding, Legalize> {
panic!("Should not be called when new-style backend is available!")
}
fn encoding_info(&self) -> EncInfo {
panic!("Should not be called when new-style backend is available!")
}
fn legalize_signature(&self, _sig: &mut Cow<ir::Signature>, _current: bool) {
panic!("Should not be called when new-style backend is available!")
}
fn regclass_for_abi_type(&self, _ty: ir::Type) -> RegClass {
panic!("Should not be called when new-style backend is available!")
}
fn allocatable_registers(&self, _func: &ir::Function) -> RegisterSet {
panic!("Should not be called when new-style backend is available!")
}
fn prologue_epilogue(&self, _func: &mut ir::Function) -> CodegenResult<()> {
panic!("Should not be called when new-style backend is available!")
}
#[cfg(feature = "testing_hooks")]
fn emit_inst(
&self,
_func: &ir::Function,
_inst: ir::Inst,
_divert: &mut RegDiversions,
_sink: &mut dyn binemit::CodeSink,
) {
panic!("Should not be called when new-style backend is available!")
}
/// Emit a whole function into memory.
fn emit_function_to_memory(&self, _func: &ir::Function, _sink: &mut binemit::MemoryCodeSink) {
panic!("Should not be called when new-style backend is available!")
}
fn get_mach_backend(&self) -> Option<&dyn MachBackend> {
Some(&*self.backend)
}

View File

@@ -1,97 +0,0 @@
//! Rearrange the elements in a slice according to a predicate.
use core::mem;
/// Rearrange the elements of the mutable slice `s` such that elements where `p(t)` is true precede
/// the elements where `p(t)` is false.
///
/// The order of elements is not preserved, unless the slice is already partitioned.
///
/// Returns the number of elements where `p(t)` is true.
pub fn partition_slice<T, F>(s: &mut [T], mut p: F) -> usize
where
F: FnMut(&T) -> bool,
{
// The iterator works like a deque which we can pop from both ends.
let mut i = s.iter_mut();
// Number of elements for which the predicate is known to be true.
let mut pos = 0;
loop {
// Find the first element for which the predicate fails.
let head = loop {
match i.next() {
Some(head) => {
if !p(&head) {
break head;
}
}
None => return pos,
}
pos += 1;
};
// Find the last element for which the predicate succeeds.
let tail = loop {
match i.next_back() {
Some(tail) => {
if p(&tail) {
break tail;
}
}
None => return pos,
}
};
// Swap the two elements into the right order.
mem::swap(head, tail);
pos += 1;
}
}
#[cfg(test)]
mod tests {
use super::partition_slice;
use alloc::vec::Vec;
fn check(x: &[u32], want: &[u32]) {
assert_eq!(x.len(), want.len());
let want_count = want.iter().cloned().filter(|&x| x % 10 == 0).count();
let mut v = Vec::new();
v.extend(x.iter().cloned());
let count = partition_slice(&mut v[..], |&x| x % 10 == 0);
assert_eq!(v, want);
assert_eq!(count, want_count);
}
#[test]
fn empty() {
check(&[], &[]);
}
#[test]
fn singles() {
check(&[0], &[0]);
check(&[1], &[1]);
check(&[10], &[10]);
}
#[test]
fn doubles() {
check(&[0, 0], &[0, 0]);
check(&[0, 5], &[0, 5]);
check(&[5, 0], &[0, 5]);
check(&[5, 4], &[5, 4]);
}
#[test]
fn longer() {
check(&[1, 2, 3], &[1, 2, 3]);
check(&[1, 2, 10], &[10, 2, 1]); // Note: 2, 1 order not required.
check(&[1, 10, 2], &[10, 1, 2]); // Note: 1, 2 order not required.
check(&[1, 20, 10], &[10, 20, 1]); // Note: 10, 20 order not required.
check(&[1, 20, 3, 10], &[10, 20, 3, 1]);
check(&[20, 3, 10, 1], &[20, 10, 3, 1]);
}
}

View File

@@ -1,427 +0,0 @@
//! A post-legalization rewriting pass.
#![allow(non_snake_case)]
use crate::cursor::{Cursor, EncCursor};
use crate::ir::condcodes::{CondCode, FloatCC, IntCC};
use crate::ir::dfg::ValueDef;
use crate::ir::immediates::{Imm64, Offset32};
use crate::ir::instructions::{Opcode, ValueList};
use crate::ir::{Block, Function, Inst, InstBuilder, InstructionData, MemFlags, Type, Value};
use crate::isa::TargetIsa;
use crate::timing;
/// Information collected about a compare+branch sequence.
struct CmpBrInfo {
/// The branch instruction.
br_inst: Inst,
/// The icmp, icmp_imm, or fcmp instruction.
cmp_inst: Inst,
/// The destination of the branch.
destination: Block,
/// The arguments of the branch.
args: ValueList,
/// The first argument to the comparison. The second is in the `kind` field.
cmp_arg: Value,
/// If the branch is `brz` rather than `brnz`, we need to invert the condition
/// before the branch.
invert_branch_cond: bool,
/// The kind of comparison, and the second argument.
kind: CmpBrKind,
}
enum CmpBrKind {
Icmp { cond: IntCC, arg: Value },
IcmpImm { cond: IntCC, imm: Imm64 },
Fcmp { cond: FloatCC, arg: Value },
}
/// Optimize comparisons to use flags values, to avoid materializing conditions
/// in integer registers.
///
/// For example, optimize icmp/fcmp brz/brnz sequences into ifcmp/ffcmp brif/brff
/// sequences.
fn optimize_cpu_flags(
pos: &mut EncCursor,
inst: Inst,
last_flags_clobber: Option<Inst>,
isa: &dyn TargetIsa,
) {
// Look for compare and branch patterns.
// This code could be considerably simplified with non-lexical lifetimes.
let info = match pos.func.dfg[inst] {
InstructionData::Branch {
opcode,
destination,
ref args,
} => {
let first_arg = args.first(&pos.func.dfg.value_lists).unwrap();
let invert_branch_cond = match opcode {
Opcode::Brz => true,
Opcode::Brnz => false,
_ => panic!(),
};
if let ValueDef::Result(cond_inst, _) = pos.func.dfg.value_def(first_arg) {
match pos.func.dfg[cond_inst] {
InstructionData::IntCompare {
cond,
args: cmp_args,
..
} => CmpBrInfo {
br_inst: inst,
cmp_inst: cond_inst,
destination,
args: args.clone(),
cmp_arg: cmp_args[0],
invert_branch_cond,
kind: CmpBrKind::Icmp {
cond,
arg: cmp_args[1],
},
},
InstructionData::IntCompareImm {
cond,
arg: cmp_arg,
imm: cmp_imm,
..
} => CmpBrInfo {
br_inst: inst,
cmp_inst: cond_inst,
destination,
args: args.clone(),
cmp_arg,
invert_branch_cond,
kind: CmpBrKind::IcmpImm { cond, imm: cmp_imm },
},
InstructionData::FloatCompare {
cond,
args: cmp_args,
..
} => CmpBrInfo {
br_inst: inst,
cmp_inst: cond_inst,
destination,
args: args.clone(),
cmp_arg: cmp_args[0],
invert_branch_cond,
kind: CmpBrKind::Fcmp {
cond,
arg: cmp_args[1],
},
},
_ => return,
}
} else {
return;
}
}
// TODO: trapif, trueif, selectif, and their ff counterparts.
_ => return,
};
// If any instructions clobber the flags between the comparison and the branch,
// don't optimize them.
if last_flags_clobber != Some(info.cmp_inst) {
return;
}
// We found a compare+branch pattern. Transform it to use flags.
let args = info.args.as_slice(&pos.func.dfg.value_lists)[1..].to_vec();
pos.goto_inst(info.cmp_inst);
pos.use_srcloc(info.cmp_inst);
match info.kind {
CmpBrKind::Icmp { mut cond, arg } => {
let flags = pos.ins().ifcmp(info.cmp_arg, arg);
pos.func.dfg.replace(info.cmp_inst).trueif(cond, flags);
if info.invert_branch_cond {
cond = cond.inverse();
}
pos.func
.dfg
.replace(info.br_inst)
.brif(cond, flags, info.destination, &args);
}
CmpBrKind::IcmpImm { mut cond, imm } => {
let flags = pos.ins().ifcmp_imm(info.cmp_arg, imm);
pos.func.dfg.replace(info.cmp_inst).trueif(cond, flags);
if info.invert_branch_cond {
cond = cond.inverse();
}
pos.func
.dfg
.replace(info.br_inst)
.brif(cond, flags, info.destination, &args);
}
CmpBrKind::Fcmp { mut cond, arg } => {
let flags = pos.ins().ffcmp(info.cmp_arg, arg);
pos.func.dfg.replace(info.cmp_inst).trueff(cond, flags);
if info.invert_branch_cond {
cond = cond.inverse();
}
pos.func
.dfg
.replace(info.br_inst)
.brff(cond, flags, info.destination, &args);
}
}
let ok = pos.func.update_encoding(info.cmp_inst, isa).is_ok();
debug_assert!(ok);
let ok = pos.func.update_encoding(info.br_inst, isa).is_ok();
debug_assert!(ok);
}
struct MemOpInfo {
opcode: Opcode,
itype: Type,
arg: Value,
st_arg: Option<Value>,
flags: MemFlags,
offset: Offset32,
}
fn optimize_complex_addresses(pos: &mut EncCursor, inst: Inst, isa: &dyn TargetIsa) {
// Look for simple loads and stores we can optimize.
let info = match pos.func.dfg[inst] {
InstructionData::Load {
opcode,
arg,
flags,
offset,
} => MemOpInfo {
opcode,
itype: pos.func.dfg.ctrl_typevar(inst),
arg,
st_arg: None,
flags,
offset,
},
InstructionData::Store {
opcode,
args,
flags,
offset,
} => MemOpInfo {
opcode,
itype: pos.func.dfg.ctrl_typevar(inst),
arg: args[1],
st_arg: Some(args[0]),
flags,
offset,
},
_ => return,
};
// Examine the instruction that defines the address operand.
if let ValueDef::Result(result_inst, _) = pos.func.dfg.value_def(info.arg) {
match pos.func.dfg[result_inst] {
InstructionData::Binary {
opcode: Opcode::Iadd,
args,
} => match info.opcode {
// Operand is an iadd. Fold it into a memory address with a complex address mode.
Opcode::Load => {
pos.func.dfg.replace(inst).load_complex(
info.itype,
info.flags,
&args,
info.offset,
);
}
Opcode::Uload8 => {
pos.func.dfg.replace(inst).uload8_complex(
info.itype,
info.flags,
&args,
info.offset,
);
}
Opcode::Sload8 => {
pos.func.dfg.replace(inst).sload8_complex(
info.itype,
info.flags,
&args,
info.offset,
);
}
Opcode::Uload16 => {
pos.func.dfg.replace(inst).uload16_complex(
info.itype,
info.flags,
&args,
info.offset,
);
}
Opcode::Sload16 => {
pos.func.dfg.replace(inst).sload16_complex(
info.itype,
info.flags,
&args,
info.offset,
);
}
Opcode::Uload32 => {
pos.func
.dfg
.replace(inst)
.uload32_complex(info.flags, &args, info.offset);
}
Opcode::Sload32 => {
pos.func
.dfg
.replace(inst)
.sload32_complex(info.flags, &args, info.offset);
}
Opcode::Uload8x8 => {
pos.func
.dfg
.replace(inst)
.uload8x8_complex(info.flags, &args, info.offset);
}
Opcode::Sload8x8 => {
pos.func
.dfg
.replace(inst)
.sload8x8_complex(info.flags, &args, info.offset);
}
Opcode::Uload16x4 => {
pos.func
.dfg
.replace(inst)
.uload16x4_complex(info.flags, &args, info.offset);
}
Opcode::Sload16x4 => {
pos.func
.dfg
.replace(inst)
.sload16x4_complex(info.flags, &args, info.offset);
}
Opcode::Uload32x2 => {
pos.func
.dfg
.replace(inst)
.uload32x2_complex(info.flags, &args, info.offset);
}
Opcode::Sload32x2 => {
pos.func
.dfg
.replace(inst)
.sload32x2_complex(info.flags, &args, info.offset);
}
Opcode::Store => {
pos.func.dfg.replace(inst).store_complex(
info.flags,
info.st_arg.unwrap(),
&args,
info.offset,
);
}
Opcode::Istore8 => {
pos.func.dfg.replace(inst).istore8_complex(
info.flags,
info.st_arg.unwrap(),
&args,
info.offset,
);
}
Opcode::Istore16 => {
pos.func.dfg.replace(inst).istore16_complex(
info.flags,
info.st_arg.unwrap(),
&args,
info.offset,
);
}
Opcode::Istore32 => {
pos.func.dfg.replace(inst).istore32_complex(
info.flags,
info.st_arg.unwrap(),
&args,
info.offset,
);
}
_ => panic!("Unsupported load or store opcode"),
},
InstructionData::BinaryImm64 {
opcode: Opcode::IaddImm,
arg,
imm,
} => match pos.func.dfg[inst] {
// Operand is an iadd_imm. Fold the immediate into the offset if possible.
InstructionData::Load {
arg: ref mut load_arg,
ref mut offset,
..
} => {
if let Some(imm) = offset.try_add_i64(imm.into()) {
*load_arg = arg;
*offset = imm;
} else {
// Overflow.
return;
}
}
InstructionData::Store {
args: ref mut store_args,
ref mut offset,
..
} => {
if let Some(imm) = offset.try_add_i64(imm.into()) {
store_args[1] = arg;
*offset = imm;
} else {
// Overflow.
return;
}
}
_ => panic!(),
},
_ => {
// Address value is defined by some other kind of instruction.
return;
}
}
} else {
// Address value is not the result of an instruction.
return;
}
let ok = pos.func.update_encoding(inst, isa).is_ok();
debug_assert!(
ok,
"failed to update encoding for `{}`",
pos.func.dfg.display_inst(inst, isa)
);
}
//----------------------------------------------------------------------
//
// The main post-opt pass.
pub fn do_postopt(func: &mut Function, isa: &dyn TargetIsa) {
let _tt = timing::postopt();
let mut pos = EncCursor::new(func, isa);
let is_mach_backend = isa.get_mach_backend().is_some();
while let Some(_block) = pos.next_block() {
let mut last_flags_clobber = None;
while let Some(inst) = pos.next_inst() {
if !is_mach_backend && isa.uses_cpu_flags() {
// Optimize instructions to make use of flags.
optimize_cpu_flags(&mut pos, inst, last_flags_clobber, isa);
// Track the most recent seen instruction that clobbers the flags.
if let Some(constraints) = isa
.encoding_info()
.operand_constraints(pos.func.encodings[inst])
{
if constraints.clobbers_flags {
last_flags_clobber = Some(inst)
}
}
}
if isa.uses_complex_addresses() {
optimize_complex_addresses(&mut pos, inst, isa);
}
}
}
}

View File

@@ -1,904 +0,0 @@
//! This module implements a late-stage redundant-reload remover, which runs after registers have
//! been allocated and stack slots have been given specific offsets.
use crate::cursor::{Cursor, CursorPosition, EncCursor, FuncCursor};
use crate::entity::EntitySet;
use crate::flowgraph::ControlFlowGraph;
use crate::ir::dfg::DataFlowGraph;
use crate::ir::instructions::BranchInfo;
use crate::ir::stackslot::{StackSlotKind, StackSlots};
use crate::ir::{
Block, Function, Inst, InstBuilder, InstructionData, Opcode, StackSlotData, Type, Value,
ValueLoc,
};
use crate::isa::{RegInfo, RegUnit, TargetIsa};
use crate::regalloc::RegDiversions;
use alloc::vec::Vec;
use core::convert::TryInto;
use cranelift_entity::{PrimaryMap, SecondaryMap};
// =============================================================================================
// A description of the redundant-fill-removal algorithm
//
//
// The algorithm works forwards through each Block. It carries along and updates a table,
// AvailEnv, with which it tracks registers that are known to have the same value as some stack
// slot. The actions on encountering an instruction depend on the instruction, as follows:
//
// ss1 = spill r0: update the AvailEnv so as to note that slot `ss1` and register `r0`
// have the same value.
//
// r1 = fill ss0: look in the AvailEnv. If it tells us that register `r1` and slot `ss0`
// have the same value, then delete the instruction by converting it to a
// `fill_nop`.
//
// If it tells us that some other register `r2` has the same value as
// slot `ss0`, convert the instruction into a copy from `r2` to `r1`.
//
// any other insn: remove from the AvailEnv, any bindings associated with registers
// written by this instruction, since they will be invalidated by it.
//
// Tracking the effects of `copy` instructions in AvailEnv for the case when both source and
// destination are registers does not cause any more fills to be removed or converted to copies.
// It's not clear why.
//
// There are various other instruction-handling cases in `visit_inst`, which are documented
// in-line, and do not change the core algorithm, so are not described here.
//
// The registers tracked by AvailEnv are the post-diversion registers that are really used by the
// code; they are not the pre-diversion names associated with each SSA `Value`. The second
// `fill` case above opportunistically copies values from registers that may have been diversion
// targets in some predecessor block, and so are no longer associated with any specific SSA-level
// name at the point the copy is made. Hence those copies (from `r2` to `r1`) cannot be done
// with an ordinary `copy` instruction. Instead they have to be done using a new `copy_to_ssa`
// instruction, which copies from an arbitrary register to a register-resident `Value` (that is,
// "back to" SSA-world).
//
// That completes the description of the core algorithm.
//
// In the case where a block `A` jumps to `B` and `A` is the only predecessor of `B`, the
// AvailEnv at the end of `A` will still be valid at the entry to `B`. In such a case, we can
// profitably transform `B` using the AvailEnv "inherited" from `A`. In order to take full
// advantage of this, this module partitions the function's CFG into tree-shaped groups of
// blocks, and processes each tree as described above. So the AvailEnv is only initialised to
// empty at the start of blocks that form the root of each tree; that is, for blocks which have
// two or more predecessors.
// =============================================================================================
// Top level algorithm structure
//
// The overall algorithm, for a function, starts like this:
//
// * (once per function): finds Blocks that have two or more predecessors, since they will be the
// roots of Block trees. Also, the entry node for the function is considered to be a root.
//
// It then continues with a loop that first finds a tree of Blocks ("discovery") and then removes
// redundant fills as described above ("processing"):
//
// * (discovery; once per tree): for each root, performs a depth first search to find all the Blocks
// in the tree, guided by RedundantReloadRemover::discovery_stack.
//
// * (processing; once per tree): the just-discovered tree is then processed as described above,
// guided by RedundantReloadRemover::processing_stack.
//
// In this way, all Blocks reachable from the function's entry point are eventually processed. Note
// that each tree is processed as soon as it has been discovered, so the algorithm never creates a
// list of trees for the function.
//
// The running state is stored in `RedundantReloadRemover`. This is allocated once and can be
// reused for multiple functions so as to minimise heap turnover. The fields are, roughly:
//
// num_regunits -- constant for the whole function; used by the tree processing phase
// num_preds_per_block -- constant for the whole function; used by the tree discovery process
//
// discovery_stack -- used to guide the tree discovery process
// nodes_in_tree -- the discovered nodes are recorded here
//
// processing_stack -- used to guide the tree processing process
// nodes_already_visited -- used to ensure the tree processing logic terminates in the case
// where a tree has a branch back to its root node.
//
// There is further documentation in line below, as appropriate.
// =============================================================================================
// A side note on register choice heuristics
// The core algorithm opportunistically replaces fill instructions when it knows of a register
// that already holds the required value. How effective this is largely depends on how long
// reloaded values happen to stay alive before the relevant register is overwritten. And that
// depends on the register allocator's register choice heuristics. The worst case is, when the
// register allocator reuses registers as soon as possible after they become free. Unfortunately
// that was indeed the selection scheme, prior to development of this pass.
//
// As part of this work, the register selection scheme has been changed as follows: for registers
// written by any instruction other than a fill, use the lowest numbered available register. But
// for registers written by a fill instruction, use the highest numbered available register. The
// aim is to try and keep reload- and non-reload registers disjoint to the extent possible.
// Several other schemes were tried, but this one is simple and can be worth an extra 2% of
// performance in some cases.
//
// The relevant change is more or less a one-line change in the solver.
// =============================================================================================
// Data structures used for discovery of trees
// `ZeroOneOrMany` is used to record the number of predecessors a Block block has. The `Zero` case
// is included so as to cleanly handle the case where the incoming graph has unreachable Blocks.
#[derive(Clone, PartialEq)]
enum ZeroOneOrMany {
Zero,
One,
Many,
}
// =============================================================================================
// Data structures used for processing of trees
// `SlotInfo` describes a spill slot in the obvious way. Note that it doesn't indicate which
// register(s) are currently associated with the slot. That job is done by `AvailEnv` instead.
//
// In the CL framework, stack slots are partitioned into disjoint sets, one for each
// `StackSlotKind`. The offset and size only give a unique identity within any particular
// `StackSlotKind`. So, to uniquely identify a stack slot, all three fields are necessary.
#[derive(Clone, Copy)]
struct SlotInfo {
kind: StackSlotKind,
offset: i32,
size: u32,
}
// `AvailEnv` maps each possible register to a stack slot that holds the same value. The index
// space of `AvailEnv::map` is exactly the set of registers available on the current target. If
// (as is mostly the case) a register is not known to have the same value as a stack slot, then
// its entry is `None` rather than `Some(..)`.
//
// Invariants for AvailEnv:
//
// AvailEnv may have multiple different registers bound to the same stack slot -- that is, `(kind,
// offset, size)` triple. That's OK, and reflects the reality that those two registers contain
// the same value. This could happen, for example, in the case
//
// ss1 = spill r0
// ..
// r2 = fill ss1
//
// Then both `r0` and `r2` will have the same value as `ss1`, provided that ".." doesn't write to
// `r1`.
//
// To say that two different registers may be bound to the same stack slot is the same as saying
// that it is allowed to have two different entries in AvailEnv with the same `(kind, offset,
// size)` triple. What is *not* allowed is to have partial overlaps. That is, if two SlotInfos
// have the same `kind` field and have `offset` and `size` fields that overlap, then their
// `offset` and `size` fields must be identical. This is so as to make the algorithm safe against
// situations where, for example, a 64 bit register is spilled, but then only the bottom 32 bits
// are reloaded from the slot.
//
// Although in such a case it seems likely that the Cranelift IR would be ill-typed, and so this
// case could probably not occur in practice.
#[derive(Clone)]
struct AvailEnv {
map: Vec<Option<SlotInfo>>,
}
// `ProcessingStackElem` combines AvailEnv with contextual information needed to "navigate" within
// a Block.
//
// A ProcessingStackElem conceptually has the lifetime of exactly one Block: once the current Block is
// completed, the ProcessingStackElem will be abandoned. In practice the top level state,
// RedundantReloadRemover, caches them, so as to avoid heap turnover.
//
// Note that ProcessingStackElem must contain a CursorPosition. The CursorPosition, which
// indicates where we are in the current Block, cannot be implicitly maintained by looping over all
// the instructions in a Block in turn, because we may choose to suspend processing the current Block
// at a side exit, continue by processing the subtree reached via the side exit, and only later
// resume the current Block.
struct ProcessingStackElem {
/// Indicates the AvailEnv at the current point in the Block.
avail_env: AvailEnv,
/// Shows where we currently are inside the Block.
cursor: CursorPosition,
/// Indicates the currently active register diversions at the current point.
diversions: RegDiversions,
}
// =============================================================================================
// The top level data structure
// `RedundantReloadRemover` contains data structures for the two passes: discovery of tree shaped
// regions, and processing of them. These are allocated once and stay alive for the entire
// function, even though they are cleared out for each new tree shaped region. It also caches
// `num_regunits` and `num_preds_per_block`, which are computed at the start of each function and
// then remain constant.
/// The redundant reload remover's state.
pub struct RedundantReloadRemover {
/// The total number of RegUnits available on this architecture. This is unknown when the
/// RedundantReloadRemover is created. It becomes known at the beginning of processing of a
/// function.
num_regunits: Option<u16>,
/// This stores, for each Block, a characterisation of the number of predecessors it has.
num_preds_per_block: PrimaryMap<Block, ZeroOneOrMany>,
/// The stack used for the first phase (discovery). There is one element on the discovery
/// stack for each currently unexplored Block in the tree being searched.
discovery_stack: Vec<Block>,
/// The nodes in the discovered tree are inserted here.
nodes_in_tree: EntitySet<Block>,
/// The stack used during the second phase (transformation). There is one element on the
/// processing stack for each currently-open node in the tree being transformed.
processing_stack: Vec<ProcessingStackElem>,
/// Used in the second phase to avoid visiting nodes more than once.
nodes_already_visited: EntitySet<Block>,
}
// =============================================================================================
// Miscellaneous small helper functions
// Is this a kind of stack slot that is safe to track in AvailEnv? This is probably overly
// conservative, but tracking only the SpillSlot and IncomingArgument kinds catches almost all
// available redundancy in practice.
fn is_slot_kind_tracked(kind: StackSlotKind) -> bool {
match kind {
StackSlotKind::SpillSlot | StackSlotKind::IncomingArg => true,
_ => false,
}
}
// Find out if the range `[offset, +size)` overlaps with the range in `si`.
fn overlaps(si: &SlotInfo, offset: i32, size: u32) -> bool {
let a_offset = si.offset as i64;
let a_size = si.size as i64;
let b_offset = offset as i64;
let b_size = size as i64;
let no_overlap = a_offset + a_size <= b_offset || b_offset + b_size <= a_offset;
!no_overlap
}
// Find, in `reginfo`, the register bank that `reg` lives in, and return the lower limit and size
// of the bank. This is so the caller can conveniently iterate over all RegUnits in the bank that
// `reg` lives in.
fn find_bank_limits(reginfo: &RegInfo, reg: RegUnit) -> (RegUnit, u16) {
if let Some(bank) = reginfo.bank_containing_regunit(reg) {
return (bank.first_unit, bank.units);
}
// We should never get here, since `reg` must come from *some* RegBank.
panic!("find_regclass_limits: reg not found");
}
// Returns the register that `v` is allocated to. Assumes that `v` actually resides in a
// register.
fn reg_of_value(locations: &SecondaryMap<Value, ValueLoc>, v: Value) -> RegUnit {
match locations[v] {
ValueLoc::Reg(ru) => ru,
_ => panic!("reg_of_value: value isn't in a reg"),
}
}
// Returns the stack slot that `v` is allocated to. Assumes that `v` actually resides in a stack
// slot.
fn slot_of_value<'s>(
locations: &SecondaryMap<Value, ValueLoc>,
stack_slots: &'s StackSlots,
v: Value,
) -> &'s StackSlotData {
match locations[v] {
ValueLoc::Stack(slot) => &stack_slots[slot],
_ => panic!("slot_of_value: value isn't in a stack slot"),
}
}
// =============================================================================================
// Top level: discovery of tree shaped regions
impl RedundantReloadRemover {
// A helper for `add_nodes_to_tree` below.
fn discovery_stack_push_successors_of(&mut self, cfg: &ControlFlowGraph, node: Block) {
for successor in cfg.succ_iter(node) {
self.discovery_stack.push(successor);
}
}
// Visit the tree of Blocks rooted at `starting_point` and add them to `self.nodes_in_tree`.
// `self.num_preds_per_block` guides the process, ensuring we don't leave the tree-ish region
// and indirectly ensuring that the process will terminate in the presence of cycles in the
// graph. `self.discovery_stack` holds the search state in this function.
fn add_nodes_to_tree(&mut self, cfg: &ControlFlowGraph, starting_point: Block) {
// One might well ask why this doesn't loop forever when it encounters cycles in the
// control flow graph. The reason is that any cycle in the graph that is reachable from
// anywhere outside the cycle -- in particular, that is reachable from the function's
// entry node -- must have at least one node that has two or more predecessors. So the
// logic below won't follow into it, because it regards any such node as the root of some
// other tree.
debug_assert!(self.discovery_stack.is_empty());
debug_assert!(self.nodes_in_tree.is_empty());
self.nodes_in_tree.insert(starting_point);
self.discovery_stack_push_successors_of(cfg, starting_point);
while let Some(node) = self.discovery_stack.pop() {
match self.num_preds_per_block[node] {
// We arrived at a node with multiple predecessors, so it's a new root. Ignore it.
ZeroOneOrMany::Many => {}
// This node has just one predecessor, so we should incorporate it in the tree and
// immediately transition into searching from it instead.
ZeroOneOrMany::One => {
self.nodes_in_tree.insert(node);
self.discovery_stack_push_successors_of(cfg, node);
}
// This is meaningless. We arrived at a node that doesn't point back at where we
// came from.
ZeroOneOrMany::Zero => panic!("add_nodes_to_tree: inconsistent graph"),
}
}
}
}
// =============================================================================================
// Operations relating to `AvailEnv`
impl AvailEnv {
// Create a new one.
fn new(size: usize) -> Self {
let mut env = Self {
map: Vec::<Option<SlotInfo>>::new(),
};
env.map.resize(size, None);
env
}
// Debug only: checks (some of) the required AvailEnv invariants.
#[cfg(debug_assertions)]
fn check_invariants(&self) -> bool {
// Check that any overlapping entries overlap exactly. This is super lame (quadratic),
// but it's only used in debug builds.
for i in 0..self.map.len() {
if let Some(si) = self.map[i] {
for j in i + 1..self.map.len() {
if let Some(sj) = self.map[j] {
// "si and sj overlap, but not exactly"
if si.kind == sj.kind
&& overlaps(&si, sj.offset, sj.size)
&& !(si.offset == sj.offset && si.size == sj.size)
{
return false;
}
}
}
}
}
true
}
// Invalidates the binding associated with `reg`. Note that by construction of AvailEnv,
// `reg` can only be associated with one binding at once.
fn invalidate_by_reg(&mut self, reg: RegUnit) {
self.map[reg as usize] = None;
}
// Invalidates any binding that has any overlap with `(kind, offset, size)`.
fn invalidate_by_offset(&mut self, kind: StackSlotKind, offset: i32, size: u32) {
debug_assert!(is_slot_kind_tracked(kind));
for i in 0..self.map.len() {
if let Some(si) = &self.map[i] {
if si.kind == kind && overlaps(&si, offset, size) {
self.map[i] = None;
}
}
}
}
// Invalidates all bindings.
fn invalidate_all(&mut self) {
for i in 0..self.map.len() {
self.map[i] = None;
}
}
// Updates AvailEnv to track the effect of a `regmove` instruction.
fn copy_reg(&mut self, src: RegUnit, dst: RegUnit) {
self.map[dst as usize] = self.map[src as usize];
}
// Does `env` have the exact binding characterised by `(reg, kind, offset, size)` ?
fn has_exact_binding(&self, reg: RegUnit, kind: StackSlotKind, offset: i32, size: u32) -> bool {
debug_assert!(is_slot_kind_tracked(kind));
if let Some(si) = &self.map[reg as usize] {
return si.kind == kind && si.offset == offset && si.size == size;
}
// No such binding.
false
}
// Does `env` have a binding characterised by `(kind, offset, size)` but to a register, let's
// call it `other_reg`, that isn't `reg`? If so, return `other_reg`. Note that `other_reg`
// will have the same bank as `reg`. It is a checked error to call this function with a
// binding matching all four of `(reg, kind, offset, size)`.
fn has_inexact_binding(
&self,
reginfo: &RegInfo,
reg: RegUnit,
kind: StackSlotKind,
offset: i32,
size: u32,
) -> Option<RegUnit> {
debug_assert!(is_slot_kind_tracked(kind));
// Find the range of RegUnit numbers for the bank that contains `reg`, and use that as our
// search space. This is so as to guarantee that any match is restricted to the same bank
// as `reg`.
let (first_unit, num_units) = find_bank_limits(reginfo, reg);
for other_reg in first_unit..first_unit + num_units {
if let Some(si) = &self.map[other_reg as usize] {
if si.kind == kind && si.offset == offset && si.size == size {
if other_reg == reg {
panic!("has_inexact_binding: binding *is* exact!");
}
return Some(other_reg);
}
}
}
// No such binding.
None
}
// Create the binding `(reg, kind, offset, size)` in `env`, and throw away any previous
// binding associated with either `reg` or the `(kind, offset, size)` triple.
fn bind(&mut self, reg: RegUnit, kind: StackSlotKind, offset: i32, size: u32) {
debug_assert!(is_slot_kind_tracked(kind));
self.invalidate_by_offset(kind, offset, size);
self.map[reg as usize] = Some(SlotInfo { kind, offset, size });
}
}
// Invalidates in `avail_env`, any binding associated with a regunit that is written by `inst`.
fn invalidate_regs_written_by_inst(
locations: &SecondaryMap<Value, ValueLoc>,
diversions: &RegDiversions,
dfg: &DataFlowGraph,
avail_env: &mut AvailEnv,
inst: Inst,
) {
for v in dfg.inst_results(inst).iter() {
if let ValueLoc::Reg(ru) = locations[*v] {
// This must be true. It would be meaningless for an SSA value to be diverted before
// the point where it is defined.
debug_assert!(diversions.reg(*v, locations) == ru);
avail_env.invalidate_by_reg(ru);
}
}
}
// =============================================================================================
// Processing of individual instructions
impl RedundantReloadRemover {
// Process `inst`, possibly changing it into a different instruction, and possibly changing
// `self.avail_env` and `func.dfg`.
fn visit_inst(
&mut self,
func: &mut Function,
reginfo: &RegInfo,
isa: &dyn TargetIsa,
inst: Inst,
) {
// Get hold of the top-of-stack work item. This is the state that we will mutate during
// processing of this instruction.
debug_assert!(!self.processing_stack.is_empty());
let ProcessingStackElem {
avail_env,
diversions,
..
} = self.processing_stack.last_mut().unwrap();
#[cfg(debug_assertions)]
debug_assert!(
avail_env.check_invariants(),
"visit_inst: env invariants not ok"
);
let dfg = &mut func.dfg;
let locations = &func.locations;
let stack_slots = &func.stack_slots;
// To avoid difficulties with the borrow checker, do this in two stages. First, examine
// the instruction to see if it can be deleted or modified, and park the relevant
// information in `transform`. Update `self.avail_env` too. Later, use `transform` to
// actually do the transformation if necessary.
enum Transform {
NoChange,
ChangeToNopFill(Value), // delete this insn entirely
ChangeToCopyToSSA(Type, RegUnit), // change it into a copy from the specified reg
}
let mut transform = Transform::NoChange;
// In this match { .. } statement, either we must treat the instruction specially, or we
// must call `invalidate_regs_written_by_inst` on it.
match &dfg[inst] {
InstructionData::Unary {
opcode: Opcode::Spill,
arg: src_value,
} => {
// Extract: (src_reg, kind, offset, size)
// Invalidate: (kind, offset, size)
// Add new binding: {src_reg -> (kind, offset, size)}
// Don't forget that src_value might be diverted, so we have to deref it.
let slot = slot_of_value(locations, stack_slots, dfg.inst_results(inst)[0]);
let src_reg = diversions.reg(*src_value, locations);
let kind = slot.kind;
if is_slot_kind_tracked(kind) {
let offset = slot.offset.expect("visit_inst: spill with no offset");
let size = slot.size;
avail_env.bind(src_reg, kind, offset, size);
} else {
// We don't expect this insn to write any regs. But to be consistent with the
// rule above, do this anyway.
invalidate_regs_written_by_inst(locations, diversions, dfg, avail_env, inst);
}
}
InstructionData::Unary {
opcode: Opcode::Fill,
arg: src_value,
} => {
// Extract: (dst_reg, kind, offset, size)
// Invalidate: (kind, offset, size)
// Add new: {dst_reg -> (dst_value, kind, offset, size)}
let slot = slot_of_value(locations, stack_slots, *src_value);
let dst_value = dfg.inst_results(inst)[0];
let dst_reg = reg_of_value(locations, dst_value);
// This must be true. It would be meaningless for an SSA value to be diverted
// before it was defined.
debug_assert!(dst_reg == diversions.reg(dst_value, locations));
let kind = slot.kind;
if is_slot_kind_tracked(kind) {
let offset = slot.offset.expect("visit_inst: fill with no offset");
let size = slot.size;
if avail_env.has_exact_binding(dst_reg, kind, offset, size) {
// This instruction is an exact copy of a fill we saw earlier, and the
// loaded value is still valid. So we'll schedule this instruction for
// deletion (below). No need to make any changes to `avail_env`.
transform = Transform::ChangeToNopFill(*src_value);
} else if let Some(other_reg) =
avail_env.has_inexact_binding(reginfo, dst_reg, kind, offset, size)
{
// This fill is from the required slot, but into a different register
// `other_reg`. So replace it with a copy from `other_reg` to `dst_reg`
// and update `dst_reg`s binding to make it the same as `other_reg`'s, so
// as to maximise the chances of future matches after this instruction.
debug_assert!(other_reg != dst_reg);
transform =
Transform::ChangeToCopyToSSA(dfg.value_type(dst_value), other_reg);
avail_env.copy_reg(other_reg, dst_reg);
} else {
// This fill creates some new binding we don't know about. Update
// `avail_env` to track it.
avail_env.bind(dst_reg, kind, offset, size);
}
} else {
// Else it's "just another instruction that writes a reg", so we'd better
// treat it as such, just as we do below for instructions that we don't handle
// specially.
invalidate_regs_written_by_inst(locations, diversions, dfg, avail_env, inst);
}
}
InstructionData::RegMove { src, dst, .. } => {
// These happen relatively rarely, but just frequently enough that it's worth
// tracking the copy (at the machine level, it's really a copy) in `avail_env`.
avail_env.copy_reg(*src, *dst);
}
InstructionData::RegSpill { .. }
| InstructionData::RegFill { .. }
| InstructionData::Call { .. }
| InstructionData::CallIndirect { .. }
| InstructionData::StackLoad { .. }
| InstructionData::StackStore { .. }
| InstructionData::Unary {
opcode: Opcode::AdjustSpDown,
..
}
| InstructionData::UnaryImm {
opcode: Opcode::AdjustSpUpImm,
..
}
| InstructionData::UnaryImm {
opcode: Opcode::AdjustSpDownImm,
..
} => {
// All of these change, or might change, the memory-register bindings tracked in
// `avail_env` in some way we don't know about, or at least, we might be able to
// track, but for which the effort-to-benefit ratio seems too low to bother. So
// play safe: forget everything we know.
//
// For Call/CallIndirect, we could do better when compiling for calling
// conventions that have callee-saved registers, since bindings for them would
// remain valid across the call.
avail_env.invalidate_all();
}
_ => {
// Invalidate: any `avail_env` entry associated with a reg written by `inst`.
invalidate_regs_written_by_inst(locations, diversions, dfg, avail_env, inst);
}
}
// Actually do the transformation.
match transform {
Transform::NoChange => {}
Transform::ChangeToNopFill(arg) => {
// Load is completely redundant. Convert it to a no-op.
dfg.replace(inst).fill_nop(arg);
let ok = func.update_encoding(inst, isa).is_ok();
debug_assert!(
ok,
"fill_nop encoding missing for this type: `{}`",
func.dfg.display_inst(inst, isa)
);
}
Transform::ChangeToCopyToSSA(ty, reg) => {
// We already have the relevant value in some other register. Convert the
// load into a reg-reg copy.
dfg.replace(inst).copy_to_ssa(ty, reg);
let ok = func.update_encoding(inst, isa).is_ok();
debug_assert!(ok, "copy_to_ssa encoding missing for type {}", ty);
}
}
}
}
// =============================================================================================
// Top level: processing of tree shaped regions
impl RedundantReloadRemover {
// Push a clone of the top-of-stack ProcessingStackElem. This will be used to process exactly
// one Block. The diversions are created new, rather than cloned, to reflect the fact
// that diversions are local to each Block.
fn processing_stack_push(&mut self, cursor: CursorPosition) {
let avail_env = if let Some(stack_top) = self.processing_stack.last() {
stack_top.avail_env.clone()
} else {
AvailEnv::new(
self.num_regunits
.expect("processing_stack_push: num_regunits unknown!")
as usize,
)
};
self.processing_stack.push(ProcessingStackElem {
avail_env,
cursor,
diversions: RegDiversions::new(),
});
}
// This pushes the node `dst` onto the processing stack, and sets up the new
// ProcessingStackElem accordingly. But it does all that only if `dst` is part of the current
// tree *and* we haven't yet visited it.
fn processing_stack_maybe_push(&mut self, dst: Block) {
if self.nodes_in_tree.contains(dst) && !self.nodes_already_visited.contains(dst) {
if !self.processing_stack.is_empty() {
// If this isn't the outermost node in the tree (that is, the root), then it must
// have exactly one predecessor. Nodes with no predecessors are dead and not
// incorporated in any tree. Nodes with two or more predecessors are the root of
// some other tree, and visiting them as if they were part of the current tree
// would be a serious error.
debug_assert!(self.num_preds_per_block[dst] == ZeroOneOrMany::One);
}
self.processing_stack_push(CursorPosition::Before(dst));
self.nodes_already_visited.insert(dst);
}
}
// Perform redundant-reload removal on the tree shaped region of graph defined by `root` and
// `self.nodes_in_tree`. The following state is modified: `self.processing_stack`,
// `self.nodes_already_visited`, and `func.dfg`.
fn process_tree(
&mut self,
func: &mut Function,
reginfo: &RegInfo,
isa: &dyn TargetIsa,
root: Block,
) {
debug_assert!(self.nodes_in_tree.contains(root));
debug_assert!(self.processing_stack.is_empty());
debug_assert!(self.nodes_already_visited.is_empty());
// Create the initial work item
self.processing_stack_maybe_push(root);
while !self.processing_stack.is_empty() {
// It seems somewhat ridiculous to construct a whole new FuncCursor just so we can do
// next_inst() on it once, and then copy the resulting position back out. But use of
// a function-global FuncCursor, or of the EncCursor in struct Context, leads to
// borrow checker problems, as does including FuncCursor directly in
// ProcessingStackElem. In any case this is not as bad as it looks, since profiling
// shows that the build-insert-step-extract work is reduced to just 8 machine
// instructions in an optimised x86_64 build, presumably because rustc can inline and
// then optimise out almost all the work.
let tos = self.processing_stack.len() - 1;
let mut pos = FuncCursor::new(func).at_position(self.processing_stack[tos].cursor);
let maybe_inst = pos.next_inst();
self.processing_stack[tos].cursor = pos.position();
if let Some(inst) = maybe_inst {
// Deal with this insn, possibly changing it, possibly updating the top item of
// `self.processing_stack`.
self.visit_inst(func, reginfo, isa, inst);
// Update diversions after the insn.
self.processing_stack[tos].diversions.apply(&func.dfg[inst]);
// If the insn can branch outside this Block, push work items on the stack for all
// target Blocks that are part of the same tree and that we haven't yet visited.
// The next iteration of this instruction-processing loop will immediately start
// work on the most recently pushed Block, and will eventually continue in this Block
// when those new items have been removed from the stack.
match func.dfg.analyze_branch(inst) {
BranchInfo::NotABranch => (),
BranchInfo::SingleDest(dst, _) => {
self.processing_stack_maybe_push(dst);
}
BranchInfo::Table(jt, default) => {
func.jump_tables[jt]
.iter()
.for_each(|dst| self.processing_stack_maybe_push(*dst));
if let Some(dst) = default {
self.processing_stack_maybe_push(dst);
}
}
}
} else {
// We've come to the end of the current work-item (Block). We'll already have
// processed the fallthrough/continuation/whatever for it using the logic above.
// Pop it off the stack and resume work on its parent.
self.processing_stack.pop();
}
}
}
}
// =============================================================================================
// Top level: perform redundant fill removal for a complete function
impl RedundantReloadRemover {
/// Create a new remover state.
pub fn new() -> Self {
Self {
num_regunits: None,
num_preds_per_block: PrimaryMap::<Block, ZeroOneOrMany>::with_capacity(8),
discovery_stack: Vec::<Block>::with_capacity(16),
nodes_in_tree: EntitySet::<Block>::new(),
processing_stack: Vec::<ProcessingStackElem>::with_capacity(8),
nodes_already_visited: EntitySet::<Block>::new(),
}
}
/// Clear the state of the remover.
pub fn clear(&mut self) {
self.clear_for_new_function();
}
fn clear_for_new_function(&mut self) {
self.num_preds_per_block.clear();
self.clear_for_new_tree();
}
fn clear_for_new_tree(&mut self) {
self.discovery_stack.clear();
self.nodes_in_tree.clear();
self.processing_stack.clear();
self.nodes_already_visited.clear();
}
#[inline(never)]
fn do_redundant_fill_removal_on_function(
&mut self,
func: &mut Function,
reginfo: &RegInfo,
isa: &dyn TargetIsa,
cfg: &ControlFlowGraph,
) {
// Fail in an obvious way if there are more than (2^32)-1 Blocks in this function.
let num_blocks: u32 = func.dfg.num_blocks().try_into().unwrap();
// Clear out per-tree state.
self.clear_for_new_function();
// Create a PrimaryMap that summarises the number of predecessors for each block, as 0, 1
// or "many", and that also claims the entry block as having "many" predecessors.
self.num_preds_per_block.clear();
self.num_preds_per_block.reserve(num_blocks as usize);
for i in 0..num_blocks {
let mut pi = cfg.pred_iter(Block::from_u32(i));
let mut n_pi = ZeroOneOrMany::Zero;
if pi.next().is_some() {
n_pi = ZeroOneOrMany::One;
if pi.next().is_some() {
n_pi = ZeroOneOrMany::Many;
// We don't care if there are more than two preds, so stop counting now.
}
}
self.num_preds_per_block.push(n_pi);
}
debug_assert!(self.num_preds_per_block.len() == num_blocks as usize);
// The entry block must be the root of some tree, so set up the state to reflect that.
let entry_block = func
.layout
.entry_block()
.expect("do_redundant_fill_removal_on_function: entry block unknown");
debug_assert!(self.num_preds_per_block[entry_block] == ZeroOneOrMany::Zero);
self.num_preds_per_block[entry_block] = ZeroOneOrMany::Many;
// Now build and process trees.
for root_ix in 0..self.num_preds_per_block.len() {
let root = Block::from_u32(root_ix as u32);
// Build a tree for each node that has two or more preds, and ignore all other nodes.
if self.num_preds_per_block[root] != ZeroOneOrMany::Many {
continue;
}
// Clear out per-tree state.
self.clear_for_new_tree();
// Discovery phase: build the tree, as `root` and `self.nodes_in_tree`.
self.add_nodes_to_tree(cfg, root);
debug_assert!(self.nodes_in_tree.cardinality() > 0);
debug_assert!(self.num_preds_per_block[root] == ZeroOneOrMany::Many);
// Processing phase: do redundant-reload-removal.
self.process_tree(func, reginfo, isa, root);
debug_assert!(
self.nodes_in_tree.cardinality() == self.nodes_already_visited.cardinality()
);
}
}
}
// =============================================================================================
// Top level: the external interface
struct Context<'a> {
// Current instruction as well as reference to function and ISA.
cur: EncCursor<'a>,
// Cached ISA information. We save it here to avoid frequent virtual function calls on the
// `TargetIsa` trait object.
reginfo: RegInfo,
// References to contextual data structures we need.
cfg: &'a ControlFlowGraph,
// The running state.
state: &'a mut RedundantReloadRemover,
}
impl RedundantReloadRemover {
/// Run the remover.
pub fn run(&mut self, isa: &dyn TargetIsa, func: &mut Function, cfg: &ControlFlowGraph) {
let ctx = Context {
cur: EncCursor::new(func, isa),
reginfo: isa.register_info(),
cfg,
state: self,
};
let mut total_regunits = 0;
for rb in isa.register_info().banks {
total_regunits += rb.units;
}
ctx.state.num_regunits = Some(total_regunits);
ctx.state.do_redundant_fill_removal_on_function(
ctx.cur.func,
&ctx.reginfo,
ctx.cur.isa,
&ctx.cfg,
);
}
}

View File

@@ -1,126 +0,0 @@
//! Value affinity for register allocation.
//!
//! An SSA value's affinity is a hint used to guide the register allocator. It specifies the class
//! of allocation that is likely to cause the least amount of fixup moves in order to satisfy
//! instruction operand constraints.
//!
//! For values that want to be in registers, the affinity hint includes a register class or
//! subclass. This is just a hint, and the register allocator is allowed to pick a register from a
//! larger register class instead.
use crate::ir::{AbiParam, ArgumentLoc};
use crate::isa::{ConstraintKind, OperandConstraint, RegClassIndex, RegInfo, TargetIsa};
use core::fmt;
/// Preferred register allocation for an SSA value.
#[derive(Clone, Copy, Debug)]
pub enum Affinity {
/// No affinity.
///
/// This indicates a value that is not defined or used by any real instructions. It is a ghost
/// value that won't appear in the final program.
Unassigned,
/// This value should be placed in a spill slot on the stack.
Stack,
/// This value prefers a register from the given register class.
Reg(RegClassIndex),
}
impl Default for Affinity {
fn default() -> Self {
Self::Unassigned
}
}
impl Affinity {
/// Create an affinity that satisfies a single constraint.
///
/// This will never create an `Affinity::Unassigned`.
/// Use the `Default` implementation for that.
pub fn new(constraint: &OperandConstraint) -> Self {
if constraint.kind == ConstraintKind::Stack {
Self::Stack
} else {
Self::Reg(constraint.regclass.into())
}
}
/// Create an affinity that matches an ABI argument for `isa`.
pub fn abi(arg: &AbiParam, isa: &dyn TargetIsa) -> Self {
match arg.location {
ArgumentLoc::Unassigned => Self::Unassigned,
ArgumentLoc::Reg(_) => Self::Reg(isa.regclass_for_abi_type(arg.value_type).into()),
ArgumentLoc::Stack(_) => Self::Stack,
}
}
/// Is this the `Unassigned` affinity?
pub fn is_unassigned(self) -> bool {
match self {
Self::Unassigned => true,
_ => false,
}
}
/// Is this the `Reg` affinity?
pub fn is_reg(self) -> bool {
match self {
Self::Reg(_) => true,
_ => false,
}
}
/// Is this the `Stack` affinity?
pub fn is_stack(self) -> bool {
match self {
Self::Stack => true,
_ => false,
}
}
/// Merge an operand constraint into this affinity.
///
/// Note that this does not guarantee that the register allocator will pick a register that
/// satisfies the constraint.
pub fn merge(&mut self, constraint: &OperandConstraint, reginfo: &RegInfo) {
match *self {
Self::Unassigned => *self = Self::new(constraint),
Self::Reg(rc) => {
// If the preferred register class is a subclass of the constraint, there's no need
// to change anything.
if constraint.kind != ConstraintKind::Stack && !constraint.regclass.has_subclass(rc)
{
// If the register classes overlap, try to shrink our preferred register class.
if let Some(subclass) = constraint.regclass.intersect_index(reginfo.rc(rc)) {
*self = Self::Reg(subclass);
}
}
}
Self::Stack => {}
}
}
/// Return an object that can display this value affinity, using the register info from the
/// target ISA.
pub fn display<'a, R: Into<Option<&'a RegInfo>>>(self, regs: R) -> DisplayAffinity<'a> {
DisplayAffinity(self, regs.into())
}
}
/// Displaying an `Affinity` correctly requires the associated `RegInfo` from the target ISA.
pub struct DisplayAffinity<'a>(Affinity, Option<&'a RegInfo>);
impl<'a> fmt::Display for DisplayAffinity<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self.0 {
Affinity::Unassigned => write!(f, "unassigned"),
Affinity::Stack => write!(f, "stack"),
Affinity::Reg(rci) => match self.1 {
Some(regs) => write!(f, "{}", regs.rc(rci)),
None => write!(f, "{}", rci),
},
}
}
}

View File

@@ -1,169 +0,0 @@
//! Split the outgoing edges of conditional branches that pass parameters.
//!
//! One of the reason for splitting edges is to be able to insert `copy` and `regmove` instructions
//! between a conditional branch and the following terminator.
use alloc::vec::Vec;
use crate::cursor::{Cursor, EncCursor};
use crate::dominator_tree::DominatorTree;
use crate::flowgraph::ControlFlowGraph;
use crate::ir::{Block, Function, Inst, InstBuilder, InstructionData, Opcode, ValueList};
use crate::isa::TargetIsa;
use crate::topo_order::TopoOrder;
pub fn run(
isa: &dyn TargetIsa,
func: &mut Function,
cfg: &mut ControlFlowGraph,
domtree: &mut DominatorTree,
topo: &mut TopoOrder,
) {
let mut ctx = Context {
has_new_blocks: false,
cur: EncCursor::new(func, isa),
domtree,
topo,
cfg,
};
ctx.run()
}
struct Context<'a> {
/// True if new blocks were inserted.
has_new_blocks: bool,
/// Current instruction as well as reference to function and ISA.
cur: EncCursor<'a>,
/// References to contextual data structures we need.
domtree: &'a mut DominatorTree,
topo: &'a mut TopoOrder,
cfg: &'a mut ControlFlowGraph,
}
impl<'a> Context<'a> {
fn run(&mut self) {
// Any block order will do.
self.topo.reset(self.cur.func.layout.blocks());
while let Some(block) = self.topo.next(&self.cur.func.layout, self.domtree) {
// Branches can only be at the last or second to last position in an extended basic
// block.
self.cur.goto_last_inst(block);
let terminator_inst = self.cur.current_inst().expect("terminator");
if let Some(inst) = self.cur.prev_inst() {
let opcode = self.cur.func.dfg[inst].opcode();
if opcode.is_branch() {
self.visit_conditional_branch(inst, opcode);
self.cur.goto_inst(terminator_inst);
self.visit_terminator_branch(terminator_inst);
}
}
}
// If blocks were added the cfg and domtree are inconsistent and must be recomputed.
if self.has_new_blocks {
self.cfg.compute(&self.cur.func);
self.domtree.compute(&self.cur.func, self.cfg);
}
}
fn visit_conditional_branch(&mut self, branch: Inst, opcode: Opcode) {
// TODO: target = dfg[branch].branch_destination().expect("conditional branch");
let target = match self.cur.func.dfg[branch] {
InstructionData::Branch { destination, .. }
| InstructionData::BranchIcmp { destination, .. }
| InstructionData::BranchInt { destination, .. }
| InstructionData::BranchFloat { destination, .. } => destination,
_ => panic!("Unexpected instruction in visit_conditional_branch"),
};
// If there are any parameters, split the edge.
if self.should_split_edge(target) {
// Create the block the branch will jump to.
let new_block = self.cur.func.dfg.make_block();
// Insert the new block before the destination, such that it can fallthrough in the
// target block.
assert_ne!(Some(target), self.cur.layout().entry_block());
self.cur.layout_mut().insert_block(new_block, target);
self.has_new_blocks = true;
// Extract the arguments of the branch instruction, split the Block parameters and the
// branch arguments
let num_fixed = opcode.constraints().num_fixed_value_arguments();
let dfg = &mut self.cur.func.dfg;
let old_args: Vec<_> = {
let args = dfg[branch].take_value_list().expect("block parameters");
args.as_slice(&dfg.value_lists).iter().copied().collect()
};
let (branch_args, block_params) = old_args.split_at(num_fixed);
// Replace the branch destination by the new Block created with no parameters, and restore
// the branch arguments, without the original Block parameters.
{
let branch_args = ValueList::from_slice(branch_args, &mut dfg.value_lists);
let data = &mut dfg[branch];
*data.branch_destination_mut().expect("branch") = new_block;
data.put_value_list(branch_args);
}
let ok = self.cur.func.update_encoding(branch, self.cur.isa).is_ok();
debug_assert!(ok);
// Insert a jump to the original target with its arguments into the new block.
self.cur.goto_first_insertion_point(new_block);
self.cur.ins().jump(target, block_params);
// Reset the cursor to point to the branch.
self.cur.goto_inst(branch);
}
}
fn visit_terminator_branch(&mut self, inst: Inst) {
let inst_data = &self.cur.func.dfg[inst];
let opcode = inst_data.opcode();
if opcode != Opcode::Jump && opcode != Opcode::Fallthrough {
// This opcode is ignored as it does not have any block parameters.
if opcode != Opcode::IndirectJumpTableBr {
debug_assert!(!opcode.is_branch())
}
return;
}
let target = match inst_data {
InstructionData::Jump { destination, .. } => destination,
_ => panic!(
"Unexpected instruction {} in visit_terminator_branch",
self.cur.display_inst(inst)
),
};
debug_assert!(self.cur.func.dfg[inst].opcode().is_terminator());
// If there are any parameters, split the edge.
if self.should_split_edge(*target) {
// Create the block the branch will jump to.
let new_block = self.cur.func.dfg.make_block();
self.has_new_blocks = true;
// Split the current block before its terminator, and insert a new jump instruction to
// jump to it.
let jump = self.cur.ins().jump(new_block, &[]);
self.cur.insert_block(new_block);
// Reset the cursor to point to new terminator of the old block.
self.cur.goto_inst(jump);
}
}
/// Returns whether we should introduce a new branch.
fn should_split_edge(&self, target: Block) -> bool {
// We should split the edge if the target has any parameters.
if !self.cur.func.dfg.block_params(target).is_empty() {
return true;
};
// Or, if the target has more than one block reaching it.
debug_assert!(self.cfg.pred_iter(target).next() != None);
self.cfg.pred_iter(target).nth(1).is_some()
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,252 +0,0 @@
//! Register allocator context.
//!
//! The `Context` struct contains data structures that should be preserved across invocations of
//! the register allocator algorithm. This doesn't preserve any data between functions, but it
//! avoids allocating data structures independently for each function begin compiled.
use crate::dominator_tree::DominatorTree;
use crate::flowgraph::ControlFlowGraph;
use crate::ir::Function;
use crate::isa::TargetIsa;
use crate::regalloc::branch_splitting;
use crate::regalloc::coalescing::Coalescing;
use crate::regalloc::coloring::Coloring;
use crate::regalloc::live_value_tracker::LiveValueTracker;
use crate::regalloc::liveness::Liveness;
use crate::regalloc::reload::Reload;
use crate::regalloc::safepoint::emit_stack_maps;
use crate::regalloc::spilling::Spilling;
use crate::regalloc::virtregs::VirtRegs;
use crate::result::CodegenResult;
use crate::timing;
use crate::topo_order::TopoOrder;
use crate::verifier::{
verify_context, verify_cssa, verify_liveness, verify_locations, VerifierErrors,
};
/// Persistent memory allocations for register allocation.
pub struct Context {
liveness: Liveness,
virtregs: VirtRegs,
coalescing: Coalescing,
topo: TopoOrder,
tracker: LiveValueTracker,
spilling: Spilling,
reload: Reload,
coloring: Coloring,
}
impl Context {
/// Create a new context for register allocation.
///
/// This context should be reused for multiple functions in order to avoid repeated memory
/// allocations.
pub fn new() -> Self {
Self {
liveness: Liveness::new(),
virtregs: VirtRegs::new(),
coalescing: Coalescing::new(),
topo: TopoOrder::new(),
tracker: LiveValueTracker::new(),
spilling: Spilling::new(),
reload: Reload::new(),
coloring: Coloring::new(),
}
}
/// Clear all data structures in this context.
pub fn clear(&mut self) {
self.liveness.clear();
self.virtregs.clear();
self.coalescing.clear();
self.topo.clear();
self.tracker.clear();
self.spilling.clear();
self.reload.clear();
self.coloring.clear();
}
/// Current values liveness state.
pub fn liveness(&self) -> &Liveness {
&self.liveness
}
/// Allocate registers in `func`.
///
/// After register allocation, all values in `func` have been assigned to a register or stack
/// location that is consistent with instruction encoding constraints.
pub fn run(
&mut self,
isa: &dyn TargetIsa,
func: &mut Function,
cfg: &mut ControlFlowGraph,
domtree: &mut DominatorTree,
) -> CodegenResult<()> {
let _tt = timing::regalloc();
debug_assert!(domtree.is_valid());
let mut errors = VerifierErrors::default();
// `Liveness` and `Coloring` are self-clearing.
self.virtregs.clear();
// Tracker state (dominator live sets) is actually reused between the spilling and coloring
// phases.
self.tracker.clear();
// Pass: Split branches, add space where to add copy & regmove instructions.
branch_splitting::run(isa, func, cfg, domtree, &mut self.topo);
// Pass: Liveness analysis.
self.liveness.compute(isa, func, cfg);
if isa.flags().enable_verifier() {
let ok = verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok();
if !ok {
return Err(errors.into());
}
}
// Pass: Coalesce and create Conventional SSA form.
self.coalescing.conventional_ssa(
isa,
func,
cfg,
domtree,
&mut self.liveness,
&mut self.virtregs,
);
if isa.flags().enable_verifier() {
let ok = verify_context(func, cfg, domtree, isa, &mut errors).is_ok()
&& verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok()
&& verify_cssa(
func,
cfg,
domtree,
&self.liveness,
&self.virtregs,
&mut errors,
)
.is_ok();
if !ok {
return Err(errors.into());
}
}
// Pass: Spilling.
self.spilling.run(
isa,
func,
domtree,
&mut self.liveness,
&self.virtregs,
&mut self.topo,
&mut self.tracker,
);
if isa.flags().enable_verifier() {
let ok = verify_context(func, cfg, domtree, isa, &mut errors).is_ok()
&& verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok()
&& verify_cssa(
func,
cfg,
domtree,
&self.liveness,
&self.virtregs,
&mut errors,
)
.is_ok();
if !ok {
return Err(errors.into());
}
}
// Pass: Reload.
self.reload.run(
isa,
func,
domtree,
&mut self.liveness,
&mut self.topo,
&mut self.tracker,
);
if isa.flags().enable_verifier() {
let ok = verify_context(func, cfg, domtree, isa, &mut errors).is_ok()
&& verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok()
&& verify_cssa(
func,
cfg,
domtree,
&self.liveness,
&self.virtregs,
&mut errors,
)
.is_ok();
if !ok {
return Err(errors.into());
}
}
// Pass: Coloring.
self.coloring.run(
isa,
func,
cfg,
domtree,
&mut self.liveness,
&mut self.tracker,
);
// If there are any reference types used, encode safepoints and emit
// stack maps.
//
// This function runs after register allocation has taken place, meaning
// values have locations assigned already, which is necessary for
// creating the stack maps.
let safepoints_enabled = isa.flags().enable_safepoints();
for val in func.dfg.values() {
let ty = func.dfg.value_type(val);
if ty.lane_type().is_ref() {
assert!(
safepoints_enabled,
"reference types were found but safepoints were not enabled"
);
emit_stack_maps(func, domtree, &self.liveness, &mut self.tracker, isa);
break;
}
}
if isa.flags().enable_verifier() {
let ok = verify_context(func, cfg, domtree, isa, &mut errors).is_ok()
&& verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok()
&& verify_locations(isa, func, cfg, Some(&self.liveness), &mut errors).is_ok()
&& verify_cssa(
func,
cfg,
domtree,
&self.liveness,
&self.virtregs,
&mut errors,
)
.is_ok();
if !ok {
return Err(errors.into());
}
}
// Even if we arrive here, (non-fatal) errors might have been reported, so we
// must make sure absolutely nothing is wrong
if errors.is_empty() {
Ok(())
} else {
Err(errors.into())
}
}
}

View File

@@ -1,322 +0,0 @@
//! Register diversions.
//!
//! Normally, a value is assigned to a single register or stack location by the register allocator.
//! Sometimes, it is necessary to move register values to a different register in order to satisfy
//! instruction constraints.
//!
//! These register diversions are local to a block. No values can be diverted when entering a new
//! block.
use crate::fx::FxHashMap;
use crate::hash_map::{Entry, Iter};
use crate::ir::{Block, StackSlot, Value, ValueLoc, ValueLocations};
use crate::ir::{InstructionData, Opcode};
use crate::isa::{RegInfo, RegUnit};
use core::fmt;
use cranelift_entity::{SparseMap, SparseMapValue};
#[cfg(feature = "enable-serde")]
use serde::{Deserialize, Serialize};
/// A diversion of a value from its original location to a new register or stack location.
///
/// In IR, a diversion is represented by a `regmove` instruction, possibly a chain of them for the
/// same value.
///
/// When tracking diversions, the `from` field is the original assigned value location, and `to` is
/// the current one.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub struct Diversion {
/// The original value location.
pub from: ValueLoc,
/// The current value location.
pub to: ValueLoc,
}
impl Diversion {
/// Make a new diversion.
pub fn new(from: ValueLoc, to: ValueLoc) -> Self {
debug_assert!(from.is_assigned() && to.is_assigned());
Self { from, to }
}
}
/// Keep track of diversions in a block.
#[derive(Clone)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub struct RegDiversions {
current: FxHashMap<Value, Diversion>,
}
/// Keep track of diversions at the entry of block.
#[derive(Clone)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
struct EntryRegDiversionsValue {
key: Block,
divert: RegDiversions,
}
/// Map block to their matching RegDiversions at basic blocks entry.
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub struct EntryRegDiversions {
map: SparseMap<Block, EntryRegDiversionsValue>,
}
impl RegDiversions {
/// Create a new empty diversion tracker.
pub fn new() -> Self {
Self {
current: FxHashMap::default(),
}
}
/// Clear the content of the diversions, to reset the state of the compiler.
pub fn clear(&mut self) {
self.current.clear()
}
/// Are there any diversions?
pub fn is_empty(&self) -> bool {
self.current.is_empty()
}
/// Get the current diversion of `value`, if any.
pub fn diversion(&self, value: Value) -> Option<&Diversion> {
self.current.get(&value)
}
/// Get all current diversions.
pub fn iter(&self) -> Iter<'_, Value, Diversion> {
self.current.iter()
}
/// Get the current location for `value`. Fall back to the assignment map for non-diverted
/// values
pub fn get(&self, value: Value, locations: &ValueLocations) -> ValueLoc {
match self.diversion(value) {
Some(d) => d.to,
None => locations[value],
}
}
/// Get the current register location for `value`, or panic if `value` isn't in a register.
pub fn reg(&self, value: Value, locations: &ValueLocations) -> RegUnit {
self.get(value, locations).unwrap_reg()
}
/// Get the current stack location for `value`, or panic if `value` isn't in a stack slot.
pub fn stack(&self, value: Value, locations: &ValueLocations) -> StackSlot {
self.get(value, locations).unwrap_stack()
}
/// Record any kind of move.
///
/// The `from` location must match an existing `to` location, if any.
fn divert(&mut self, value: Value, from: ValueLoc, to: ValueLoc) {
debug_assert!(from.is_assigned() && to.is_assigned());
match self.current.entry(value) {
Entry::Occupied(mut e) => {
// TODO: non-lexical lifetimes should allow removal of the scope and early return.
{
let d = e.get_mut();
debug_assert_eq!(d.to, from, "Bad regmove chain for {}", value);
if d.from != to {
d.to = to;
return;
}
}
e.remove();
}
Entry::Vacant(e) => {
e.insert(Diversion::new(from, to));
}
}
}
/// Record a register -> register move.
pub fn regmove(&mut self, value: Value, from: RegUnit, to: RegUnit) {
self.divert(value, ValueLoc::Reg(from), ValueLoc::Reg(to));
}
/// Record a register -> stack move.
pub fn regspill(&mut self, value: Value, from: RegUnit, to: StackSlot) {
self.divert(value, ValueLoc::Reg(from), ValueLoc::Stack(to));
}
/// Record a stack -> register move.
pub fn regfill(&mut self, value: Value, from: StackSlot, to: RegUnit) {
self.divert(value, ValueLoc::Stack(from), ValueLoc::Reg(to));
}
/// Apply the effect of `inst`.
///
/// If `inst` is a `regmove`, `regfill`, or `regspill` instruction, update the diversions to
/// match.
pub fn apply(&mut self, inst: &InstructionData) {
match *inst {
InstructionData::RegMove {
opcode: Opcode::Regmove,
arg,
src,
dst,
} => self.regmove(arg, src, dst),
InstructionData::RegSpill {
opcode: Opcode::Regspill,
arg,
src,
dst,
} => self.regspill(arg, src, dst),
InstructionData::RegFill {
opcode: Opcode::Regfill,
arg,
src,
dst,
} => self.regfill(arg, src, dst),
_ => {}
}
}
/// Drop any recorded move for `value`.
///
/// Returns the `to` location of the removed diversion.
pub fn remove(&mut self, value: Value) -> Option<ValueLoc> {
self.current.remove(&value).map(|d| d.to)
}
/// Resets the state of the current diversions to the recorded diversions at the entry of the
/// given `block`. The recoded diversions is available after coloring on `func.entry_diversions`
/// field.
pub fn at_block(&mut self, entry_diversions: &EntryRegDiversions, block: Block) {
self.clear();
if let Some(entry_divert) = entry_diversions.map.get(block) {
let iter = entry_divert.divert.current.iter();
self.current.extend(iter);
}
}
/// Copy the current state of the diversions, and save it for the entry of the `block` given as
/// argument.
///
/// Note: This function can only be called once on a `Block` with a given `entry_diversions`
/// argument, otherwise it would panic.
pub fn save_for_block(&mut self, entry_diversions: &mut EntryRegDiversions, target: Block) {
// No need to save anything if there is no diversions to be recorded.
if self.is_empty() {
return;
}
debug_assert!(!entry_diversions.map.contains_key(target));
let iter = self.current.iter();
let mut entry_divert = Self::new();
entry_divert.current.extend(iter);
entry_diversions.map.insert(EntryRegDiversionsValue {
key: target,
divert: entry_divert,
});
}
/// Check that the recorded entry for a given `block` matches what is recorded in the
/// `entry_diversions`.
pub fn check_block_entry(&self, entry_diversions: &EntryRegDiversions, target: Block) -> bool {
let entry_divert = match entry_diversions.map.get(target) {
Some(entry_divert) => entry_divert,
None => return self.is_empty(),
};
if entry_divert.divert.current.len() != self.current.len() {
return false;
}
for (val, _) in entry_divert.divert.current.iter() {
if !self.current.contains_key(val) {
return false;
}
}
true
}
/// Return an object that can display the diversions.
pub fn display<'a, R: Into<Option<&'a RegInfo>>>(&'a self, regs: R) -> DisplayDiversions<'a> {
DisplayDiversions(&self, regs.into())
}
}
impl EntryRegDiversions {
/// Create a new empty entry diversion, to associate diversions to each block entry.
pub fn new() -> Self {
Self {
map: SparseMap::new(),
}
}
pub fn clear(&mut self) {
self.map.clear();
}
}
impl Clone for EntryRegDiversions {
/// The Clone trait is required by `ir::Function`.
fn clone(&self) -> Self {
let mut tmp = Self::new();
for v in self.map.values() {
tmp.map.insert(v.clone());
}
tmp
}
}
/// Implement `SparseMapValue`, as required to make use of a `SparseMap` for mapping the entry
/// diversions for each block.
impl SparseMapValue<Block> for EntryRegDiversionsValue {
fn key(&self) -> Block {
self.key
}
}
/// Object that displays register diversions.
pub struct DisplayDiversions<'a>(&'a RegDiversions, Option<&'a RegInfo>);
impl<'a> fmt::Display for DisplayDiversions<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{{")?;
for (value, div) in self.0.current.iter() {
write!(
f,
" {}: {} -> {}",
value,
div.from.display(self.1),
div.to.display(self.1)
)?
}
write!(f, " }}")
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::entity::EntityRef;
use crate::ir::Value;
#[test]
fn inserts() {
let mut divs = RegDiversions::new();
let v1 = Value::new(1);
let v2 = Value::new(2);
divs.regmove(v1, 10, 12);
assert_eq!(
divs.diversion(v1),
Some(&Diversion {
from: ValueLoc::Reg(10),
to: ValueLoc::Reg(12),
})
);
assert_eq!(divs.diversion(v2), None);
divs.regmove(v1, 12, 11);
assert_eq!(divs.diversion(v1).unwrap().to, ValueLoc::Reg(11));
divs.regmove(v1, 11, 10);
assert_eq!(divs.diversion(v1), None);
}
}

View File

@@ -1,344 +0,0 @@
//! Track which values are live in a block with instruction granularity.
//!
//! The `LiveValueTracker` keeps track of the set of live SSA values at each instruction in a block.
//! The sets of live values are computed on the fly as the tracker is moved from instruction to
//! instruction, starting at the block header.
use crate::dominator_tree::DominatorTree;
use crate::entity::{EntityList, ListPool};
use crate::fx::FxHashMap;
use crate::ir::{Block, DataFlowGraph, ExpandedProgramPoint, Inst, Layout, Value};
use crate::partition_slice::partition_slice;
use crate::regalloc::affinity::Affinity;
use crate::regalloc::liveness::Liveness;
use crate::regalloc::liverange::LiveRange;
use alloc::vec::Vec;
type ValueList = EntityList<Value>;
/// Compute and track live values throughout a block.
pub struct LiveValueTracker {
/// The set of values that are live at the current program point.
live: LiveValueVec,
/// Saved set of live values for every jump and branch that can potentially be an immediate
/// dominator of a block.
///
/// This is the set of values that are live *before* the branch.
idom_sets: FxHashMap<Inst, ValueList>,
/// Memory pool for the live sets.
idom_pool: ListPool<Value>,
}
/// Information about a value that is live at the current program point.
#[derive(Debug)]
pub struct LiveValue {
/// The live value.
pub value: Value,
/// The local ending point of the live range in the current block, as returned by
/// `LiveRange::def_local_end()` or `LiveRange::livein_local_end()`.
pub endpoint: Inst,
/// The affinity of the value as represented in its `LiveRange`.
///
/// This value is simply a copy of the affinity stored in the live range. We copy it because
/// almost all users of `LiveValue` need to look at it.
pub affinity: Affinity,
/// The live range for this value never leaves its block.
pub is_local: bool,
/// This value is dead - the live range ends immediately.
pub is_dead: bool,
}
struct LiveValueVec {
/// The set of values that are live at the current program point.
values: Vec<LiveValue>,
/// How many values at the front of `values` are known to be live after `inst`?
///
/// This is used to pass a much smaller slice to `partition_slice` when its called a second
/// time for the same instruction.
live_prefix: Option<(Inst, usize)>,
}
impl LiveValueVec {
fn new() -> Self {
Self {
values: Vec::new(),
live_prefix: None,
}
}
/// Add a new live value to `values`. Copy some properties from `lr`.
fn push(&mut self, value: Value, endpoint: Inst, lr: &LiveRange) {
self.values.push(LiveValue {
value,
endpoint,
affinity: lr.affinity,
is_local: lr.is_local(),
is_dead: lr.is_dead(),
});
}
/// Remove all elements.
fn clear(&mut self) {
self.values.clear();
self.live_prefix = None;
}
/// Make sure that the values killed by `next_inst` are moved to the end of the `values`
/// vector.
///
/// Returns the number of values that will be live after `next_inst`.
fn live_after(&mut self, next_inst: Inst) -> usize {
// How many values at the front of the vector are already known to survive `next_inst`?
// We don't need to pass this prefix to `partition_slice()`
let keep = match self.live_prefix {
Some((i, prefix)) if i == next_inst => prefix,
_ => 0,
};
// Move the remaining surviving values to the front partition of the vector.
let prefix = keep + partition_slice(&mut self.values[keep..], |v| v.endpoint != next_inst);
// Remember the new prefix length in case we get called again for the same `next_inst`.
self.live_prefix = Some((next_inst, prefix));
prefix
}
/// Remove the values killed by `next_inst`.
fn remove_kill_values(&mut self, next_inst: Inst) {
let keep = self.live_after(next_inst);
self.values.truncate(keep);
}
/// Remove any dead values.
fn remove_dead_values(&mut self) {
self.values.retain(|v| !v.is_dead);
self.live_prefix = None;
}
}
impl LiveValueTracker {
/// Create a new blank tracker.
pub fn new() -> Self {
Self {
live: LiveValueVec::new(),
idom_sets: FxHashMap(),
idom_pool: ListPool::new(),
}
}
/// Clear all cached information.
pub fn clear(&mut self) {
self.live.clear();
self.idom_sets.clear();
self.idom_pool.clear();
}
/// Get the set of currently live values.
///
/// Between calls to `process_inst()` and `drop_dead()`, this includes both values killed and
/// defined by the current instruction.
pub fn live(&self) -> &[LiveValue] {
&self.live.values
}
/// Get a mutable set of currently live values.
///
/// Use with care and don't move entries around.
pub fn live_mut(&mut self) -> &mut [LiveValue] {
&mut self.live.values
}
/// Move the current position to the top of `block`.
///
/// This depends on the stored live value set at `block`'s immediate dominator, so that must have
/// been visited first.
///
/// Returns `(liveins, args)` as a pair of slices. The first slice is the set of live-in values
/// from the immediate dominator. The second slice is the set of `block` parameters.
///
/// Dead parameters with no uses are included in `args`. Call `drop_dead_args()` to remove them.
pub fn block_top(
&mut self,
block: Block,
dfg: &DataFlowGraph,
liveness: &Liveness,
layout: &Layout,
domtree: &DominatorTree,
) -> (&[LiveValue], &[LiveValue]) {
// Start over, compute the set of live values at the top of the block from two sources:
//
// 1. Values that were live before `block`'s immediate dominator, filtered for those that are
// actually live-in.
// 2. Arguments to `block` that are not dead.
//
self.live.clear();
// Compute the live-in values. Start by filtering the set of values that were live before
// the immediate dominator. Just use the empty set if there's no immediate dominator (i.e.,
// the entry block or an unreachable block).
if let Some(idom) = domtree.idom(block) {
// If the immediate dominator exits, we must have a stored list for it. This is a
// requirement to the order blocks are visited: All dominators must have been processed
// before the current block.
let idom_live_list = self
.idom_sets
.get(&idom)
.expect("No stored live set for dominator");
// Get just the values that are live-in to `block`.
for &value in idom_live_list.as_slice(&self.idom_pool) {
let lr = liveness
.get(value)
.expect("Immediate dominator value has no live range");
// Check if this value is live-in here.
if let Some(endpoint) = lr.livein_local_end(block, layout) {
self.live.push(value, endpoint, lr);
}
}
}
// Now add all the live parameters to `block`.
let first_arg = self.live.values.len();
for &value in dfg.block_params(block) {
let lr = &liveness[value];
debug_assert_eq!(lr.def(), block.into());
match lr.def_local_end().into() {
ExpandedProgramPoint::Inst(endpoint) => {
self.live.push(value, endpoint, lr);
}
ExpandedProgramPoint::Block(local_block) => {
// This is a dead block parameter which is not even live into the first
// instruction in the block.
debug_assert_eq!(
local_block, block,
"block parameter live range ends at wrong block header"
);
// Give this value a fake endpoint that is the first instruction in the block.
// We expect it to be removed by calling `drop_dead_args()`.
self.live
.push(value, layout.first_inst(block).expect("Empty block"), lr);
}
}
}
self.live.values.split_at(first_arg)
}
/// Prepare to move past `inst`.
///
/// Determine the set of already live values that are killed by `inst`, and add the new defined
/// values to the tracked set.
///
/// Returns `(throughs, kills, defs)` as a tuple of slices:
///
/// 1. The `throughs` slice is the set of live-through values that are neither defined nor
/// killed by the instruction.
/// 2. The `kills` slice is the set of values that were live before the instruction and are
/// killed at the instruction. This does not include dead defs.
/// 3. The `defs` slice is guaranteed to be in the same order as `inst`'s results, and includes
/// dead defines.
///
/// The order of `throughs` and `kills` is arbitrary.
///
/// The `drop_dead()` method must be called next to actually remove the dead values from the
/// tracked set after the two returned slices are no longer needed.
pub fn process_inst(
&mut self,
inst: Inst,
dfg: &DataFlowGraph,
liveness: &Liveness,
) -> (&[LiveValue], &[LiveValue], &[LiveValue]) {
// Save a copy of the live values before any branches or jumps that could be somebody's
// immediate dominator.
if dfg[inst].opcode().is_branch() {
self.save_idom_live_set(inst);
}
// Move killed values to the end of the vector.
// Don't remove them yet, `drop_dead()` will do that.
let first_kill = self.live.live_after(inst);
// Add the values defined by `inst`.
let first_def = self.live.values.len();
for &value in dfg.inst_results(inst) {
let lr = &liveness[value];
debug_assert_eq!(lr.def(), inst.into());
match lr.def_local_end().into() {
ExpandedProgramPoint::Inst(endpoint) => {
self.live.push(value, endpoint, lr);
}
ExpandedProgramPoint::Block(block) => {
panic!("Instruction result live range can't end at {}", block);
}
}
}
(
&self.live.values[0..first_kill],
&self.live.values[first_kill..first_def],
&self.live.values[first_def..],
)
}
/// Prepare to move past a ghost instruction.
///
/// This is like `process_inst`, except any defs are ignored.
///
/// Returns `(throughs, kills)`.
pub fn process_ghost(&mut self, inst: Inst) -> (&[LiveValue], &[LiveValue]) {
let first_kill = self.live.live_after(inst);
self.live.values.as_slice().split_at(first_kill)
}
/// Drop the values that are now dead after moving past `inst`.
///
/// This removes both live values that were killed by `inst` and dead defines on `inst` itself.
///
/// This must be called after `process_inst(inst)` and before proceeding to the next
/// instruction.
pub fn drop_dead(&mut self, inst: Inst) {
// Remove both live values that were killed by `inst` and dead defines from `inst`.
self.live.remove_kill_values(inst);
}
/// Drop any values that are marked as `is_dead`.
///
/// Use this after calling `block_top` to clean out dead block parameters.
pub fn drop_dead_params(&mut self) {
self.live.remove_dead_values();
}
/// Process new spills.
///
/// Any values where `f` returns true are spilled and will be treated as if their affinity was
/// `Stack`.
pub fn process_spills<F>(&mut self, mut f: F)
where
F: FnMut(Value) -> bool,
{
for lv in &mut self.live.values {
if f(lv.value) {
lv.affinity = Affinity::Stack;
}
}
}
/// Save the current set of live values so it is associated with `idom`.
fn save_idom_live_set(&mut self, idom: Inst) {
let values = self.live.values.iter().map(|lv| lv.value);
let pool = &mut self.idom_pool;
// If there already is a set saved for `idom`, just keep it.
self.idom_sets.entry(idom).or_insert_with(|| {
let mut list = ValueList::default();
list.extend(values, pool);
list
});
}
}

View File

@@ -1,443 +0,0 @@
//! Liveness analysis for SSA values.
//!
//! This module computes the live range of all the SSA values in a function and produces a
//! `LiveRange` instance for each.
//!
//!
//! # Liveness consumers
//!
//! The primary consumer of the liveness analysis is the SSA coloring pass which goes through each
//! block and assigns a register to the defined values. This algorithm needs to maintain a set of the
//! currently live values as it is iterating down the instructions in the block. It asks the
//! following questions:
//!
//! - What is the set of live values at the entry to the block?
//! - When moving past a use of a value, is that value still alive in the block, or was that the last
//! use?
//! - When moving past a branch, which of the live values are still live below the branch?
//!
//! The set of `LiveRange` instances can answer these questions through their `def_local_end` and
//! `livein_local_end` queries. The coloring algorithm visits blocks in a topological order of the
//! dominator tree, so it can compute the set of live values at the beginning of a block by starting
//! from the set of live values at the dominating branch instruction and filtering it with
//! `livein_local_end`. These sets do not need to be stored in the liveness analysis.
//!
//! The secondary consumer of the liveness analysis is the spilling pass which needs to count the
//! number of live values at every program point and insert spill code until the number of
//! registers needed is small enough.
//!
//!
//! # Alternative algorithms
//!
//! A number of different liveness analysis algorithms exist, so it is worthwhile to look at a few
//! alternatives.
//!
//! ## Data-flow equations
//!
//! The classic *live variables analysis* that you will find in all compiler books from the
//! previous century does not depend on SSA form. It is typically implemented by iteratively
//! solving data-flow equations on bit-vectors of variables. The result is a live-out bit-vector of
//! variables for every basic block in the program.
//!
//! This algorithm has some disadvantages that makes us look elsewhere:
//!
//! - Quadratic memory use. We need a bit per variable per basic block in the function.
//! - Dense representation of sparse data. In practice, the majority of SSA values never leave
//! their basic block, and those that do spa basic blocks rarely span a large number of basic
//! blocks. This makes the data stored in the bitvectors quite sparse.
//! - Traditionally, the data-flow equations were solved for real program *variables* which does
//! not include temporaries used in evaluating expressions. We have an SSA form program which
//! blurs the distinction between temporaries and variables. This makes the quadratic memory
//! problem worse because there are many more SSA values than there was variables in the original
//! program, and we don't know a priori which SSA values leave their basic block.
//! - Missing last-use information. For values that are not live-out of a basic block, we would
//! need to store information about the last use in the block somewhere. LLVM stores this
//! information as a 'kill bit' on the last use in the IR. Maintaining these kill bits has been a
//! source of problems for LLVM's register allocator.
//!
//! Data-flow equations can detect when a variable is used uninitialized, and they can handle
//! multiple definitions of the same variable. We don't need this generality since we already have
//! a program in SSA form.
//!
//! ## LLVM's liveness analysis
//!
//! LLVM's register allocator computes liveness per *virtual register*, where a virtual register is
//! a disjoint union of related SSA values that should be assigned to the same physical register.
//! It uses a compact data structure very similar to our `LiveRange`. The important difference is
//! that Cranelift's `LiveRange` only describes a single SSA value, while LLVM's `LiveInterval`
//! describes the live range of a virtual register *and* which one of the related SSA values is
//! live at any given program point.
//!
//! LLVM computes the live range of each virtual register independently by using the use-def chains
//! that are baked into its IR. The algorithm for a single virtual register is:
//!
//! 1. Initialize the live range with a single-instruction snippet of liveness at each def, using
//! the def-chain. This does not include any phi-values.
//! 2. Go through the virtual register's use chain and perform the following steps at each use:
//! 3. Perform an exhaustive depth-first traversal up the CFG from the use. Look for basic blocks
//! that already contain some liveness and extend the last live SSA value in the block to be
//! live-out. Also build a list of new basic blocks where the register needs to be live-in.
//! 4. Iteratively propagate live-out SSA values to the new live-in blocks. This may require new
//! PHI values to be created when different SSA values can reach the same block.
//!
//! The iterative SSA form reconstruction can be skipped if the depth-first search only encountered
//! one SSA value.
//!
//! This algorithm has some advantages compared to the data-flow equations:
//!
//! - The live ranges of local virtual registers are computed very quickly without ever traversing
//! the CFG. The memory needed to store these live ranges is independent of the number of basic
//! blocks in the program.
//! - The time to compute the live range of a global virtual register is proportional to the number
//! of basic blocks covered. Many virtual registers only cover a few blocks, even in very large
//! functions.
//! - A single live range can be recomputed after making modifications to the IR. No global
//! algorithm is necessary. This feature depends on having use-def chains for virtual registers
//! which Cranelift doesn't.
//!
//! Cranelift uses a very similar data structures and algorithms to LLVM, with the important
//! difference that live ranges are computed per SSA value instead of per virtual register, and the
//! uses in Cranelift IR refers to SSA values instead of virtual registers. This means that
//! Cranelift can skip the last step of reconstructing SSA form for the virtual register uses.
//!
//! ## Fast Liveness Checking for SSA-Form Programs
//!
//! A liveness analysis that is often brought up in the context of SSA-based register allocation
//! was presented at CGO 2008:
//!
//! > Boissinot, B., Hack, S., Grund, D., de Dinechin, B. D., & Rastello, F. (2008). *Fast Liveness
//! Checking for SSA-Form Programs.* CGO.
//!
//! This analysis uses a global pre-computation that only depends on the CFG of the function. It
//! then allows liveness queries for any (value, program point) pair. Each query traverses the use
//! chain of the value and performs lookups in the precomputed bit-vectors.
//!
//! I did not seriously consider this analysis for Cranelift because:
//!
//! - It depends critically on use chains which Cranelift doesn't have.
//! - Popular variables like the `this` pointer in a C++ method can have very large use chains.
//! Traversing such a long use chain on every liveness lookup has the potential for some nasty
//! quadratic behavior in unfortunate cases.
//! - It says "fast" in the title, but the paper only claims to be 16% faster than a data-flow
//! based approach, which isn't that impressive.
//!
//! Nevertheless, the property of only depending in the CFG structure is very useful. If Cranelift
//! gains use chains, this approach would be worth a proper evaluation.
//!
//!
//! # Cranelift's liveness analysis
//!
//! The algorithm implemented in this module is similar to LLVM's with these differences:
//!
//! - The `LiveRange` data structure describes the liveness of a single SSA value, not a virtual
//! register.
//! - Instructions in Cranelift IR contains references to SSA values, not virtual registers.
//! - All live ranges are computed in one traversal of the program. Cranelift doesn't have use
//! chains, so it is not possible to compute the live range for a single SSA value independently.
//!
//! The liveness computation visits all instructions in the program. The order is not important for
//! the algorithm to be correct. At each instruction, the used values are examined.
//!
//! - The first time a value is encountered, its live range is constructed as a dead live range
//! containing only the defining program point.
//! - The local interval of the value's live range is extended so it reaches the use. This may
//! require creating a new live-in local interval for the block.
//! - If the live range became live-in to the block, add the block to a work-list.
//! - While the work-list is non-empty pop a live-in block and repeat the two steps above, using each
//! of the live-in block's CFG predecessor instructions as a 'use'.
//!
//! The effect of this algorithm is to extend the live range of each to reach uses as they are
//! visited. No data about each value beyond the live range is needed between visiting uses, so
//! nothing is lost by computing the live range of all values simultaneously.
//!
//! ## Cache efficiency of Cranelift vs LLVM
//!
//! Since LLVM computes the complete live range of a virtual register in one go, it can keep the
//! whole `LiveInterval` for the register in L1 cache. Since it is visiting the instructions in use
//! chain order, some cache thrashing can occur as a result of pulling instructions into cache
//! somewhat chaotically.
//!
//! Cranelift uses a transposed algorithm, visiting instructions in order. This means that each
//! instruction is brought into cache only once, and it is likely that the other instructions on
//! the same cache line will be visited before the line is evicted.
//!
//! Cranelift's problem is that the `LiveRange` structs are visited many times and not always
//! regularly. We should strive to make the `LiveRange` struct as small as possible such that
//! multiple related values can live on the same cache line.
//!
//! - Local values should fit in a 16-byte `LiveRange` struct or smaller. The current
//! implementation contains a 24-byte `Vec` object and a redundant `value` member pushing the
//! size to 32 bytes.
//! - Related values should be stored on the same cache line. The current sparse set implementation
//! does a decent job of that.
//! - For global values, the list of live-in intervals is very likely to fit on a single cache
//! line. These lists are very likely to be found in L2 cache at least.
//!
//! There is some room for improvement.
use crate::entity::SparseMap;
use crate::flowgraph::{BlockPredecessor, ControlFlowGraph};
use crate::ir::dfg::ValueDef;
use crate::ir::{Block, Function, Inst, Layout, ProgramPoint, Value};
use crate::isa::{EncInfo, OperandConstraint, TargetIsa};
use crate::regalloc::affinity::Affinity;
use crate::regalloc::liverange::LiveRange;
use crate::timing;
use alloc::vec::Vec;
use core::mem;
use core::ops::Index;
/// A set of live ranges, indexed by value number.
type LiveRangeSet = SparseMap<Value, LiveRange>;
/// Get a mutable reference to the live range for `value`.
/// Create it if necessary.
fn get_or_create<'a>(
lrset: &'a mut LiveRangeSet,
value: Value,
isa: &dyn TargetIsa,
func: &Function,
encinfo: &EncInfo,
) -> &'a mut LiveRange {
// It would be better to use `get_mut()` here, but that leads to borrow checker fighting
// which can probably only be resolved by non-lexical lifetimes.
// https://github.com/rust-lang/rfcs/issues/811
if lrset.get(value).is_none() {
// Create a live range for value. We need the program point that defines it.
let def;
let affinity;
match func.dfg.value_def(value) {
ValueDef::Result(inst, rnum) => {
def = inst.into();
// Initialize the affinity from the defining instruction's result constraints.
// Don't do this for call return values which are always tied to a single register.
affinity = encinfo
.operand_constraints(func.encodings[inst])
.and_then(|rc| rc.outs.get(rnum))
.map(Affinity::new)
.or_else(|| {
// If this is a call, get the return value affinity.
func.dfg
.call_signature(inst)
.map(|sig| Affinity::abi(&func.dfg.signatures[sig].returns[rnum], isa))
})
.unwrap_or_default();
}
ValueDef::Param(block, num) => {
def = block.into();
if func.layout.entry_block() == Some(block) {
// The affinity for entry block parameters can be inferred from the function
// signature.
affinity = Affinity::abi(&func.signature.params[num], isa);
} else {
// Give normal block parameters a register affinity matching their type.
let rc = isa.regclass_for_abi_type(func.dfg.value_type(value));
affinity = Affinity::Reg(rc.into());
}
}
};
lrset.insert(LiveRange::new(value, def, affinity));
}
lrset.get_mut(value).unwrap()
}
/// Extend the live range for `value` so it reaches `to` which must live in `block`.
fn extend_to_use(
lr: &mut LiveRange,
block: Block,
to: Inst,
worklist: &mut Vec<Block>,
func: &Function,
cfg: &ControlFlowGraph,
) {
// This is our scratch working space, and we'll leave it empty when we return.
debug_assert!(worklist.is_empty());
// Extend the range locally in `block`.
// If there already was a live interval in that block, we're done.
if lr.extend_in_block(block, to, &func.layout) {
worklist.push(block);
}
// The work list contains those blocks where we have learned that the value needs to be
// live-in.
//
// This algorithm becomes a depth-first traversal up the CFG, enumerating all paths through the
// CFG from the existing live range to `block`.
//
// Extend the live range as we go. The live range itself also serves as a visited set since
// `extend_in_block` will never return true twice for the same block.
//
while let Some(livein) = worklist.pop() {
// We've learned that the value needs to be live-in to the `livein` block.
// Make sure it is also live at all predecessor branches to `livein`.
for BlockPredecessor {
block: pred,
inst: branch,
} in cfg.pred_iter(livein)
{
if lr.extend_in_block(pred, branch, &func.layout) {
// This predecessor block also became live-in. We need to process it later.
worklist.push(pred);
}
}
}
}
/// Liveness analysis for a function.
///
/// Compute a live range for every SSA value used in the function.
pub struct Liveness {
/// The live ranges that have been computed so far.
ranges: LiveRangeSet,
/// Working space for the `extend_to_use` algorithm.
/// This vector is always empty, except for inside that function.
/// It lives here to avoid repeated allocation of scratch memory.
worklist: Vec<Block>,
}
impl Liveness {
/// Create a new empty liveness analysis.
///
/// The memory allocated for this analysis can be reused for multiple functions. Use the
/// `compute` method to actually runs the analysis for a function.
pub fn new() -> Self {
Self {
ranges: LiveRangeSet::new(),
worklist: Vec::new(),
}
}
/// Current live ranges.
pub fn ranges(&self) -> &LiveRangeSet {
&self.ranges
}
/// Clear all data structures in this liveness analysis.
pub fn clear(&mut self) {
self.ranges.clear();
self.worklist.clear();
}
/// Get the live range for `value`, if it exists.
pub fn get(&self, value: Value) -> Option<&LiveRange> {
self.ranges.get(value)
}
/// Create a new live range for `value`.
///
/// The new live range will be defined at `def` with no extent, like a dead value.
///
/// This asserts that `value` does not have an existing live range.
pub fn create_dead<PP>(&mut self, value: Value, def: PP, affinity: Affinity)
where
PP: Into<ProgramPoint>,
{
let old = self
.ranges
.insert(LiveRange::new(value, def.into(), affinity));
debug_assert!(old.is_none(), "{} already has a live range", value);
}
/// Move the definition of `value` to `def`.
///
/// The old and new def points must be in the same block, and before the end of the live range.
pub fn move_def_locally<PP>(&mut self, value: Value, def: PP)
where
PP: Into<ProgramPoint>,
{
let lr = self.ranges.get_mut(value).expect("Value has no live range");
lr.move_def_locally(def.into());
}
/// Locally extend the live range for `value` to reach `user`.
///
/// It is assumed the `value` is already live before `user` in `block`.
///
/// Returns a mutable reference to the value's affinity in case that also needs to be updated.
pub fn extend_locally(
&mut self,
value: Value,
block: Block,
user: Inst,
layout: &Layout,
) -> &mut Affinity {
debug_assert_eq!(Some(block), layout.inst_block(user));
let lr = self.ranges.get_mut(value).expect("Value has no live range");
let livein = lr.extend_in_block(block, user, layout);
debug_assert!(!livein, "{} should already be live in {}", value, block);
&mut lr.affinity
}
/// Change the affinity of `value` to `Stack` and return the previous affinity.
pub fn spill(&mut self, value: Value) -> Affinity {
let lr = self.ranges.get_mut(value).expect("Value has no live range");
mem::replace(&mut lr.affinity, Affinity::Stack)
}
/// Compute the live ranges of all SSA values used in `func`.
/// This clears out any existing analysis stored in this data structure.
pub fn compute(&mut self, isa: &dyn TargetIsa, func: &mut Function, cfg: &ControlFlowGraph) {
let _tt = timing::ra_liveness();
self.ranges.clear();
// Get ISA data structures used for computing live range affinities.
let encinfo = isa.encoding_info();
let reginfo = isa.register_info();
// The liveness computation needs to visit all uses, but the order doesn't matter.
// TODO: Perhaps this traversal of the function could be combined with a dead code
// elimination pass if we visit a post-order of the dominator tree?
for block in func.layout.blocks() {
// Make sure we have created live ranges for dead block parameters.
// TODO: If these parameters are really dead, we could remove them, except for the
// entry block which must match the function signature.
for &arg in func.dfg.block_params(block) {
get_or_create(&mut self.ranges, arg, isa, func, &encinfo);
}
for inst in func.layout.block_insts(block) {
// Eliminate all value aliases, they would confuse the register allocator.
func.dfg.resolve_aliases_in_arguments(inst);
// Make sure we have created live ranges for dead defs.
// TODO: When we implement DCE, we can use the absence of a live range to indicate
// an unused value.
for &def in func.dfg.inst_results(inst) {
get_or_create(&mut self.ranges, def, isa, func, &encinfo);
}
// Iterator of constraints, one per value operand.
let encoding = func.encodings[inst];
let operand_constraint_slice: &[OperandConstraint] =
encinfo.operand_constraints(encoding).map_or(&[], |c| c.ins);
let mut operand_constraints = operand_constraint_slice.iter();
for &arg in func.dfg.inst_args(inst) {
// Get the live range, create it as a dead range if necessary.
let lr = get_or_create(&mut self.ranges, arg, isa, func, &encinfo);
// Extend the live range to reach this use.
extend_to_use(lr, block, inst, &mut self.worklist, func, cfg);
// Apply operand constraint, ignoring any variable arguments after the fixed
// operands described by `operand_constraints`. Variable arguments are either
// block arguments or call/return ABI arguments.
if let Some(constraint) = operand_constraints.next() {
lr.affinity.merge(constraint, &reginfo);
}
}
}
}
}
}
impl Index<Value> for Liveness {
type Output = LiveRange;
fn index(&self, index: Value) -> &LiveRange {
self.ranges
.get(index)
.unwrap_or_else(|| panic!("{} has no live range", index))
}
}

View File

@@ -1,720 +0,0 @@
//! Data structure representing the live range of an SSA value.
//!
//! Live ranges are tracked per SSA value, not per variable or virtual register. The live range of
//! an SSA value begins where it is defined and extends to all program points where the value is
//! still needed.
//!
//! # Local Live Ranges
//!
//! Inside a single basic block, the live range of a value is always an interval between
//! two program points (if the value is live in the block at all). The starting point is either:
//!
//! 1. The instruction that defines the value, or
//! 2. The block header, because the value is an argument to the block, or
//! 3. The block header, because the value is defined in another block and live-in to this one.
//!
//! The ending point of the local live range is the last of the following program points in the
//! block:
//!
//! 1. The last use in the block, where a *use* is an instruction that has the value as an argument.
//! 2. The last branch or jump instruction in the block that can reach a use.
//! 3. If the value has no uses anywhere (a *dead value*), the program point that defines it.
//!
//! Note that 2. includes loop back-edges to the same block. In general, if a value is defined
//! outside a loop and used inside the loop, it will be live in the entire loop.
//!
//! # Global Live Ranges
//!
//! Values that appear in more than one block have a *global live range* which can be seen as the
//! disjoint union of the per-block local intervals for all of the blocks where the value is live.
//! Together with a `ProgramOrder` which provides a linear ordering of the blocks, the global live
//! range becomes a linear sequence of disjoint intervals, at most one per block.
//!
//! In the special case of a dead value, the global live range is a single interval where the start
//! and end points are the same. The global live range of a value is never completely empty.
//!
//! # Register interference
//!
//! The register allocator uses live ranges to determine if values *interfere*, which means that
//! they can't be stored in the same register. Two live ranges interfere if and only if any of
//! their intervals overlap.
//!
//! If one live range ends at an instruction that defines another live range, those two live ranges
//! are not considered to interfere. This is because most ISAs allow instructions to reuse an input
//! register for an output value. If Cranelift gets support for inline assembly, we will need to
//! handle *early clobbers* which are output registers that are not allowed to alias any input
//! registers.
//!
//! If `i1 < i2 < i3` are program points, we have:
//!
//! - `i1-i2` and `i1-i3` interfere because the intervals overlap.
//! - `i1-i2` and `i2-i3` don't interfere.
//! - `i1-i3` and `i2-i2` do interfere because the dead def would clobber the register.
//! - `i1-i2` and `i2-i2` don't interfere.
//! - `i2-i3` and `i2-i2` do interfere.
//!
//! Because of this behavior around interval end points, live range interference is not completely
//! equivalent to mathematical intersection of open or half-open intervals.
//!
//! # Implementation notes
//!
//! A few notes about the implementation of the live intervals field `liveins`. This should not
//! concern someone only looking to use the public interface.
//!
//! ## Current representation
//!
//! Our current implementation uses a sorted array of compressed intervals, represented by their
//! boundaries (Block, Inst), sorted by Block. This is a simple data structure, enables coalescing of
//! intervals easily, and shows some nice performance behavior. See
//! <https://github.com/bytecodealliance/cranelift/issues/1084> for benchmarks against using a
//! bforest::Map<Block, Inst>.
//!
//! ## block ordering
//!
//! The relative order of blocks is used to maintain a sorted list of live-in intervals and to
//! coalesce adjacent live-in intervals when the prior interval covers the whole block. This doesn't
//! depend on any property of the program order, so alternative orderings are possible:
//!
//! 1. The block layout order. This is what we currently use.
//! 2. A topological order of the dominator tree. All the live-in intervals would come after the
//! def interval.
//! 3. A numerical order by block number. Performant because it doesn't need to indirect through the
//! `ProgramOrder` for comparisons.
//!
//! These orderings will cause small differences in coalescing opportunities, but all of them would
//! do a decent job of compressing a long live range. The numerical order might be preferable
//! because:
//!
//! - It has better performance because block numbers can be compared directly without any table
//! lookups.
//! - If block numbers are not reused, it is safe to allocate new blocks without getting spurious
//! live-in intervals from any coalesced representations that happen to cross a new block.
//!
//! For comparing instructions, the layout order is always what we want.
//!
//! ## Alternative representation
//!
//! Since a local live-in interval always begins at its block header, it is uniquely described by its
//! end point instruction alone. We can use the layout to look up the block containing the end point.
//! This means that a sorted `Vec<Inst>` would be enough to represent the set of live-in intervals.
//!
//! Coalescing is an important compression technique because some live ranges can span thousands of
//! blocks. We can represent that by switching to a sorted `Vec<ProgramPoint>` representation where
//! an `[Block, Inst]` pair represents a coalesced range, while an `Inst` entry without a preceding
//! `Block` entry represents a single live-in interval.
//!
//! This representation is more compact for a live range with many uncoalesced live-in intervals.
//! It is more complicated to work with, though, so it is probably not worth it. The performance
//! benefits of switching to a numerical block order only appears if the binary search is doing
//! block-block comparisons.
//!
//! A `BTreeMap<Block, Inst>` could have been used for the live-in intervals, but it doesn't provide
//! the necessary API to make coalescing easy, nor does it optimize for our types' sizes.
//!
//! Even the specialized `bforest::Map<Block, Inst>` implementation is slower than a plain sorted
//! array, see <https://github.com/bytecodealliance/cranelift/issues/1084> for details.
use crate::entity::SparseMapValue;
use crate::ir::{Block, ExpandedProgramPoint, Inst, Layout, ProgramOrder, ProgramPoint, Value};
use crate::regalloc::affinity::Affinity;
use core::cmp::Ordering;
use core::marker::PhantomData;
use smallvec::SmallVec;
/// Global live range of a single SSA value.
///
/// As [explained in the module documentation](index.html#local-live-ranges), the live range of an
/// SSA value is the disjoint union of a set of intervals, each local to a single block, and with at
/// most one interval per block. We further distinguish between:
///
/// 1. The *def interval* is the local interval in the block where the value is defined, and
/// 2. The *live-in intervals* are the local intervals in the remaining blocks.
///
/// A live-in interval always begins at the block header, while the def interval can begin at the
/// defining instruction, or at the block header for a block argument value.
///
/// All values have a def interval, but a large proportion of values don't have any live-in
/// intervals. These are called *local live ranges*.
///
/// # Program order requirements
///
/// The internal representation of a `LiveRange` depends on a consistent `ProgramOrder` both for
/// ordering instructions inside a block *and* for ordering blocks. The methods that depend on the
/// ordering take an explicit `ProgramOrder` object, and it is the caller's responsibility to
/// ensure that the provided ordering is consistent between calls.
///
/// In particular, changing the order of blocks or inserting new blocks will invalidate live ranges.
///
/// Inserting new instructions in the layout is safe, but removing instructions is not. Besides the
/// instructions using or defining their value, `LiveRange` structs can contain references to
/// branch and jump instructions.
pub type LiveRange = GenericLiveRange<Layout>;
// See comment of liveins below.
pub struct Interval {
begin: Block,
end: Inst,
}
/// Generic live range implementation.
///
/// The intended generic parameter is `PO=Layout`, but tests are simpler with a mock order.
/// Use `LiveRange` instead of using this generic directly.
pub struct GenericLiveRange<PO: ProgramOrder> {
/// The value described by this live range.
/// This member can't be modified in case the live range is stored in a `SparseMap`.
value: Value,
/// The preferred register allocation for this value.
pub affinity: Affinity,
/// The instruction or block header where this value is defined.
def_begin: ProgramPoint,
/// The end point of the def interval. This must always belong to the same block as `def_begin`.
///
/// We always have `def_begin <= def_end` with equality implying a dead def live range with no
/// uses.
def_end: ProgramPoint,
/// Additional live-in intervals sorted in program order.
///
/// This vector is empty for most values which are only used in one block.
///
/// An entry `block -> inst` means that the live range is live-in to `block`, continuing up to
/// `inst` which may belong to a later block in the program order.
///
/// The entries are non-overlapping, and none of them overlap the block where the value is
/// defined.
liveins: SmallVec<[Interval; 2]>,
po: PhantomData<*const PO>,
}
/// A simple helper macro to make comparisons more natural to read.
macro_rules! cmp {
($order:ident, $a:ident > $b:expr) => {
$order.cmp($a, $b) == Ordering::Greater
};
($order:ident, $a:ident >= $b:expr) => {
$order.cmp($a, $b) != Ordering::Less
};
($order:ident, $a:ident < $b:expr) => {
$order.cmp($a, $b) == Ordering::Less
};
($order:ident, $a:ident <= $b:expr) => {
$order.cmp($a, $b) != Ordering::Greater
};
}
impl<PO: ProgramOrder> GenericLiveRange<PO> {
/// Create a new live range for `value` defined at `def`.
///
/// The live range will be created as dead, but it can be extended with `extend_in_block()`.
pub fn new(value: Value, def: ProgramPoint, affinity: Affinity) -> Self {
Self {
value,
affinity,
def_begin: def,
def_end: def,
liveins: SmallVec::new(),
po: PhantomData,
}
}
/// Finds an entry in the compressed set of live-in intervals that contains `block`, or return
/// the position where to insert such a new entry.
fn lookup_entry_containing_block(&self, block: Block, order: &PO) -> Result<usize, usize> {
self.liveins
.binary_search_by(|interval| order.cmp(interval.begin, block))
.or_else(|n| {
// The previous interval's end might cover the searched block.
if n > 0 && cmp!(order, block <= self.liveins[n - 1].end) {
Ok(n - 1)
} else {
Err(n)
}
})
}
/// Extend the local interval for `block` so it reaches `to` which must belong to `block`.
/// Create a live-in interval if necessary.
///
/// If the live range already has a local interval in `block`, extend its end point so it
/// includes `to`, and return false.
///
/// If the live range did not previously have a local interval in `block`, add one so the value
/// is live-in to `block`, extending to `to`. Return true.
///
/// The return value can be used to detect if we just learned that the value is live-in to
/// `block`. This can trigger recursive extensions in `block`'s CFG predecessor blocks.
pub fn extend_in_block(&mut self, block: Block, inst: Inst, order: &PO) -> bool {
// First check if we're extending the def interval.
//
// We're assuming here that `inst` never precedes `def_begin` in the same block, but we can't
// check it without a method for getting `inst`'s block.
if cmp!(order, block <= self.def_end) && cmp!(order, inst >= self.def_begin) {
let inst_pp = inst.into();
debug_assert_ne!(
inst_pp, self.def_begin,
"Can't use value in the defining instruction."
);
if cmp!(order, inst > self.def_end) {
self.def_end = inst_pp;
}
return false;
}
// Now check if we're extending any of the existing live-in intervals.
match self.lookup_entry_containing_block(block, order) {
Ok(n) => {
// We found one interval and might need to extend it.
if cmp!(order, inst <= self.liveins[n].end) {
// Both interval parts are already included in a compressed interval.
return false;
}
// If the instruction at the end is the last instruction before the next block,
// coalesce the two intervals:
// [ival.begin; ival.end] + [next.begin; next.end] = [ival.begin; next.end]
if let Some(next) = &self.liveins.get(n + 1) {
if order.is_block_gap(inst, next.begin) {
// At this point we can choose to remove the current interval or the next
// one; remove the next one to avoid one memory move.
let next_end = next.end;
debug_assert!(cmp!(order, next_end > self.liveins[n].end));
self.liveins[n].end = next_end;
self.liveins.remove(n + 1);
return false;
}
}
// We can't coalesce, just extend the interval.
self.liveins[n].end = inst;
false
}
Err(n) => {
// No interval was found containing the current block: we need to insert a new one,
// unless there's a coalescing opportunity with the previous or next one.
let coalesce_next = self
.liveins
.get(n)
.filter(|next| order.is_block_gap(inst, next.begin))
.is_some();
let coalesce_prev = self
.liveins
.get(n.wrapping_sub(1))
.filter(|prev| order.is_block_gap(prev.end, block))
.is_some();
match (coalesce_prev, coalesce_next) {
// The new interval is the missing hole between prev and next: we can merge
// them all together.
(true, true) => {
let prev_end = self.liveins[n - 1].end;
debug_assert!(cmp!(order, prev_end <= self.liveins[n].end));
self.liveins[n - 1].end = self.liveins[n].end;
self.liveins.remove(n);
}
// Coalesce only with the previous or next one.
(true, false) => {
debug_assert!(cmp!(order, inst >= self.liveins[n - 1].end));
self.liveins[n - 1].end = inst;
}
(false, true) => {
debug_assert!(cmp!(order, block <= self.liveins[n].begin));
self.liveins[n].begin = block;
}
(false, false) => {
// No coalescing opportunity, we have to insert.
self.liveins.insert(
n,
Interval {
begin: block,
end: inst,
},
);
}
}
true
}
}
}
/// Is this the live range of a dead value?
///
/// A dead value has no uses, and its live range ends at the same program point where it is
/// defined.
pub fn is_dead(&self) -> bool {
self.def_begin == self.def_end
}
/// Is this a local live range?
///
/// A local live range is only used in the same block where it was defined. It is allowed to span
/// multiple basic blocks within that block.
pub fn is_local(&self) -> bool {
self.liveins.is_empty()
}
/// Get the program point where this live range is defined.
///
/// This will be a block header when the value is a block argument, otherwise it is the defining
/// instruction.
pub fn def(&self) -> ProgramPoint {
self.def_begin
}
/// Move the definition of this value to a new program point.
///
/// It is only valid to move the definition within the same block, and it can't be moved beyond
/// `def_local_end()`.
pub fn move_def_locally(&mut self, def: ProgramPoint) {
self.def_begin = def;
}
/// Get the local end-point of this live range in the block where it is defined.
///
/// This can be the block header itself in the case of a dead block argument.
/// Otherwise, it will be the last local use or branch/jump that can reach a use.
pub fn def_local_end(&self) -> ProgramPoint {
self.def_end
}
/// Get the local end-point of this live range in a block where it is live-in.
///
/// If this live range is not live-in to `block`, return `None`. Otherwise, return the end-point
/// of this live range's local interval in `block`.
///
/// If the live range is live through all of `block`, the terminator of `block` is a correct
/// answer, but it is also possible that an even later program point is returned. So don't
/// depend on the returned `Inst` to belong to `block`.
pub fn livein_local_end(&self, block: Block, order: &PO) -> Option<Inst> {
self.lookup_entry_containing_block(block, order)
.and_then(|i| {
let inst = self.liveins[i].end;
if cmp!(order, block < inst) {
Ok(inst)
} else {
// Can be any error type, really, since it's discarded by ok().
Err(i)
}
})
.ok()
}
/// Is this value live-in to `block`?
///
/// A block argument is not considered to be live in.
pub fn is_livein(&self, block: Block, order: &PO) -> bool {
self.livein_local_end(block, order).is_some()
}
/// Get all the live-in intervals.
///
/// Note that the intervals are stored in a compressed form so each entry may span multiple
/// blocks where the value is live in.
pub fn liveins<'a>(&'a self) -> impl Iterator<Item = (Block, Inst)> + 'a {
self.liveins
.iter()
.map(|interval| (interval.begin, interval.end))
}
/// Check if this live range overlaps a definition in `block`.
pub fn overlaps_def(&self, def: ExpandedProgramPoint, block: Block, order: &PO) -> bool {
// Two defs at the same program point always overlap, even if one is dead.
if def == self.def_begin.into() {
return true;
}
// Check for an overlap with the local range.
if cmp!(order, def >= self.def_begin) && cmp!(order, def < self.def_end) {
return true;
}
// Check for an overlap with a live-in range.
match self.livein_local_end(block, order) {
Some(inst) => cmp!(order, def < inst),
None => false,
}
}
/// Check if this live range reaches a use at `user` in `block`.
pub fn reaches_use(&self, user: Inst, block: Block, order: &PO) -> bool {
// Check for an overlap with the local range.
if cmp!(order, user > self.def_begin) && cmp!(order, user <= self.def_end) {
return true;
}
// Check for an overlap with a live-in range.
match self.livein_local_end(block, order) {
Some(inst) => cmp!(order, user <= inst),
None => false,
}
}
/// Check if this live range is killed at `user` in `block`.
pub fn killed_at(&self, user: Inst, block: Block, order: &PO) -> bool {
self.def_local_end() == user.into() || self.livein_local_end(block, order) == Some(user)
}
}
/// Allow a `LiveRange` to be stored in a `SparseMap` indexed by values.
impl<PO: ProgramOrder> SparseMapValue<Value> for GenericLiveRange<PO> {
fn key(&self) -> Value {
self.value
}
}
#[cfg(test)]
mod tests {
use super::{GenericLiveRange, Interval};
use crate::entity::EntityRef;
use crate::ir::{Block, Inst, Value};
use crate::ir::{ExpandedProgramPoint, ProgramOrder};
use alloc::vec::Vec;
use core::cmp::Ordering;
// Dummy program order which simply compares indexes.
// It is assumed that blocks have indexes that are multiples of 10, and instructions have indexes
// in between. `is_block_gap` assumes that terminator instructions have indexes of the form
// block * 10 + 1. This is used in the coalesce test.
struct ProgOrder {}
impl ProgramOrder for ProgOrder {
fn cmp<A, B>(&self, a: A, b: B) -> Ordering
where
A: Into<ExpandedProgramPoint>,
B: Into<ExpandedProgramPoint>,
{
fn idx(pp: ExpandedProgramPoint) -> usize {
match pp {
ExpandedProgramPoint::Inst(i) => i.index(),
ExpandedProgramPoint::Block(e) => e.index(),
}
}
let ia = idx(a.into());
let ib = idx(b.into());
ia.cmp(&ib)
}
fn is_block_gap(&self, inst: Inst, block: Block) -> bool {
inst.index() % 10 == 1 && block.index() / 10 == inst.index() / 10 + 1
}
}
impl ProgOrder {
// Get the block corresponding to `inst`.
fn inst_block(&self, inst: Inst) -> Block {
let i = inst.index();
Block::new(i - i % 10)
}
// Get the block of a program point.
fn pp_block<PP: Into<ExpandedProgramPoint>>(&self, pp: PP) -> Block {
match pp.into() {
ExpandedProgramPoint::Inst(i) => self.inst_block(i),
ExpandedProgramPoint::Block(e) => e,
}
}
// Validate the live range invariants.
fn validate(&self, lr: &GenericLiveRange<Self>) {
// The def interval must cover a single block.
let def_block = self.pp_block(lr.def_begin);
assert_eq!(def_block, self.pp_block(lr.def_end));
// Check that the def interval isn't backwards.
match self.cmp(lr.def_begin, lr.def_end) {
Ordering::Equal => assert!(lr.liveins.is_empty()),
Ordering::Greater => {
panic!("Backwards def interval: {}-{}", lr.def_begin, lr.def_end)
}
Ordering::Less => {}
}
// Check the live-in intervals.
let mut prev_end = None;
for Interval { begin, end } in lr.liveins.iter() {
let begin = *begin;
let end = *end;
assert_eq!(self.cmp(begin, end), Ordering::Less);
if let Some(e) = prev_end {
assert_eq!(self.cmp(e, begin), Ordering::Less);
}
assert!(
self.cmp(lr.def_end, begin) == Ordering::Less
|| self.cmp(lr.def_begin, end) == Ordering::Greater,
"Interval can't overlap the def block"
);
// Save for next round.
prev_end = Some(end);
}
}
}
// Singleton `ProgramOrder` for tests below.
const PO: &'static ProgOrder = &ProgOrder {};
#[test]
fn dead_def_range() {
let v0 = Value::new(0);
let e0 = Block::new(0);
let i1 = Inst::new(1);
let i2 = Inst::new(2);
let e2 = Block::new(2);
let lr = GenericLiveRange::new(v0, i1.into(), Default::default());
assert!(lr.is_dead());
assert!(lr.is_local());
assert_eq!(lr.def(), i1.into());
assert_eq!(lr.def_local_end(), i1.into());
assert_eq!(lr.livein_local_end(e2, PO), None);
PO.validate(&lr);
// A dead live range overlaps its own def program point.
assert!(lr.overlaps_def(i1.into(), e0, PO));
assert!(!lr.overlaps_def(i2.into(), e0, PO));
assert!(!lr.overlaps_def(e0.into(), e0, PO));
}
#[test]
fn dead_arg_range() {
let v0 = Value::new(0);
let e2 = Block::new(2);
let lr = GenericLiveRange::new(v0, e2.into(), Default::default());
assert!(lr.is_dead());
assert!(lr.is_local());
assert_eq!(lr.def(), e2.into());
assert_eq!(lr.def_local_end(), e2.into());
// The def interval of a block argument does not count as live-in.
assert_eq!(lr.livein_local_end(e2, PO), None);
PO.validate(&lr);
}
#[test]
fn local_def() {
let v0 = Value::new(0);
let e10 = Block::new(10);
let i11 = Inst::new(11);
let i12 = Inst::new(12);
let i13 = Inst::new(13);
let mut lr = GenericLiveRange::new(v0, i11.into(), Default::default());
assert_eq!(lr.extend_in_block(e10, i13, PO), false);
PO.validate(&lr);
assert!(!lr.is_dead());
assert!(lr.is_local());
assert_eq!(lr.def(), i11.into());
assert_eq!(lr.def_local_end(), i13.into());
// Extending to an already covered inst should not change anything.
assert_eq!(lr.extend_in_block(e10, i12, PO), false);
PO.validate(&lr);
assert_eq!(lr.def(), i11.into());
assert_eq!(lr.def_local_end(), i13.into());
}
#[test]
fn local_arg() {
let v0 = Value::new(0);
let e10 = Block::new(10);
let i11 = Inst::new(11);
let i12 = Inst::new(12);
let i13 = Inst::new(13);
let mut lr = GenericLiveRange::new(v0, e10.into(), Default::default());
// Extending a dead block argument in its own block should not indicate that a live-in
// interval was created.
assert_eq!(lr.extend_in_block(e10, i12, PO), false);
PO.validate(&lr);
assert!(!lr.is_dead());
assert!(lr.is_local());
assert_eq!(lr.def(), e10.into());
assert_eq!(lr.def_local_end(), i12.into());
// Extending to an already covered inst should not change anything.
assert_eq!(lr.extend_in_block(e10, i11, PO), false);
PO.validate(&lr);
assert_eq!(lr.def(), e10.into());
assert_eq!(lr.def_local_end(), i12.into());
// Extending further.
assert_eq!(lr.extend_in_block(e10, i13, PO), false);
PO.validate(&lr);
assert_eq!(lr.def(), e10.into());
assert_eq!(lr.def_local_end(), i13.into());
}
#[test]
fn global_def() {
let v0 = Value::new(0);
let e10 = Block::new(10);
let i11 = Inst::new(11);
let i12 = Inst::new(12);
let e20 = Block::new(20);
let i21 = Inst::new(21);
let i22 = Inst::new(22);
let i23 = Inst::new(23);
let mut lr = GenericLiveRange::new(v0, i11.into(), Default::default());
assert_eq!(lr.extend_in_block(e10, i12, PO), false);
// Adding a live-in interval.
assert_eq!(lr.extend_in_block(e20, i22, PO), true);
PO.validate(&lr);
assert_eq!(lr.livein_local_end(e20, PO), Some(i22));
// Non-extending the live-in.
assert_eq!(lr.extend_in_block(e20, i21, PO), false);
assert_eq!(lr.livein_local_end(e20, PO), Some(i22));
// Extending the existing live-in.
assert_eq!(lr.extend_in_block(e20, i23, PO), false);
PO.validate(&lr);
assert_eq!(lr.livein_local_end(e20, PO), Some(i23));
}
#[test]
fn coalesce() {
let v0 = Value::new(0);
let i11 = Inst::new(11);
let e20 = Block::new(20);
let i21 = Inst::new(21);
let e30 = Block::new(30);
let i31 = Inst::new(31);
let e40 = Block::new(40);
let i41 = Inst::new(41);
let mut lr = GenericLiveRange::new(v0, i11.into(), Default::default());
assert_eq!(lr.extend_in_block(e30, i31, PO,), true);
assert_eq!(lr.liveins().collect::<Vec<_>>(), [(e30, i31)]);
// Coalesce to previous
assert_eq!(lr.extend_in_block(e40, i41, PO,), true);
assert_eq!(lr.liveins().collect::<Vec<_>>(), [(e30, i41)]);
// Coalesce to next
assert_eq!(lr.extend_in_block(e20, i21, PO,), true);
assert_eq!(lr.liveins().collect::<Vec<_>>(), [(e20, i41)]);
let mut lr = GenericLiveRange::new(v0, i11.into(), Default::default());
assert_eq!(lr.extend_in_block(e40, i41, PO,), true);
assert_eq!(lr.liveins().collect::<Vec<_>>(), [(e40, i41)]);
assert_eq!(lr.extend_in_block(e20, i21, PO,), true);
assert_eq!(lr.liveins().collect::<Vec<_>>(), [(e20, i21), (e40, i41)]);
// Coalesce to previous and next
assert_eq!(lr.extend_in_block(e30, i31, PO,), true);
assert_eq!(lr.liveins().collect::<Vec<_>>(), [(e20, i41)]);
}
}

View File

@@ -1,26 +0,0 @@
//! Register allocation.
//!
//! This module contains data structures and algorithms used for register allocation.
pub mod coloring;
pub mod live_value_tracker;
pub mod liveness;
pub mod liverange;
pub mod register_set;
pub mod virtregs;
mod affinity;
mod branch_splitting;
mod coalescing;
mod context;
mod diversion;
mod pressure;
mod reload;
mod safepoint;
mod solver;
mod spilling;
pub use self::context::Context;
pub use self::diversion::{EntryRegDiversions, RegDiversions};
pub use self::register_set::RegisterSet;
pub use self::safepoint::emit_stack_maps;

View File

@@ -1,371 +0,0 @@
//! Register pressure tracking.
//!
//! SSA-based register allocation depends on a spilling phase that "lowers register pressure
//! sufficiently". This module defines the data structures needed to measure register pressure
//! accurately enough to guarantee that the coloring phase will not run out of registers.
//!
//! Ideally, measuring register pressure amounts to simply counting the number of live registers at
//! any given program point. This simplistic method has two problems:
//!
//! 1. Registers are not interchangeable. Most ISAs have separate integer and floating-point
//! register banks, so we need to at least count the number of live registers in each register
//! bank separately.
//!
//! 2. Some ISAs have complicated register aliasing properties. In particular, the 32-bit ARM
//! ISA has a floating-point register bank where two 32-bit registers alias one 64-bit register.
//! This makes it difficult to accurately measure register pressure.
//!
//! This module deals with the problems via *register banks* and *top-level register classes*.
//! Register classes in different register banks are completely independent, so we can count
//! registers in one bank without worrying about the other bank at all.
//!
//! All register classes have a unique top-level register class, and we will count registers for
//! each top-level register class individually. However, a register bank can have multiple
//! top-level register classes that interfere with each other, so all top-level counts need to
//! be considered when determining how many more registers can be allocated.
//!
//! Currently, the only register bank with multiple top-level registers is the `arm32`
//! floating-point register bank which has `S`, `D`, and `Q` top-level classes.
//!
//! # Base and transient counts
//!
//! We maintain two separate register counts per top-level register class: base counts and
//! transient counts. The base counts are adjusted with the `take` and `free` functions. The
//! transient counts are adjusted with `take_transient` and `free_transient`.
// Remove once we're using the pressure tracker.
#![allow(dead_code)]
use crate::isa::registers::{RegClass, RegClassMask, RegInfo};
use crate::regalloc::RegisterSet;
use core::cmp::min;
use core::fmt;
use core::iter::ExactSizeIterator;
use cranelift_codegen_shared::constants::MAX_TRACKED_TOP_RCS;
/// Information per top-level register class.
///
/// Everything but the counts is static information computed from the constructor arguments.
#[derive(Default)]
struct TopRC {
/// Number of registers currently used from this register class.
base_count: u32,
transient_count: u32,
/// Max number of registers that can be allocated.
limit: u32,
/// Register units per register.
width: u8,
/// The first aliasing top-level RC.
first_toprc: u8,
/// The number of aliasing top-level RCs.
num_toprcs: u8,
}
impl TopRC {
fn total_count(&self) -> u32 {
self.base_count + self.transient_count
}
}
pub struct Pressure {
/// Bit mask of top-level register classes that are aliased by other top-level register classes.
/// Unaliased register classes can use a simpler interference algorithm.
aliased: RegClassMask,
/// Current register counts per top-level register class.
toprc: [TopRC; MAX_TRACKED_TOP_RCS],
}
impl Pressure {
/// Create a new register pressure tracker.
pub fn new(reginfo: &RegInfo, usable: &RegisterSet) -> Self {
let mut p = Self {
aliased: 0,
toprc: Default::default(),
};
// Get the layout of aliasing top-level register classes from the register banks.
for bank in reginfo.banks {
let first = bank.first_toprc;
let num = bank.num_toprcs;
if bank.pressure_tracking {
for rc in &mut p.toprc[first..first + num] {
rc.first_toprc = first as u8;
rc.num_toprcs = num as u8;
}
// Flag the top-level register classes with aliases.
if num > 1 {
p.aliased |= ((1 << num) - 1) << first;
}
} else {
// This bank has no pressure tracking, so its top-level register classes may exceed
// `MAX_TRACKED_TOPRCS`. Fill in dummy entries.
for rc in &mut p.toprc[first..min(first + num, MAX_TRACKED_TOP_RCS)] {
// These aren't used if we don't set the `aliased` bit.
rc.first_toprc = !0;
rc.limit = !0;
}
}
}
// Compute per-class limits from `usable`.
for (toprc, rc) in p
.toprc
.iter_mut()
.take_while(|t| t.num_toprcs > 0)
.zip(reginfo.classes)
{
toprc.limit = usable.iter(rc).len() as u32;
toprc.width = rc.width;
}
p
}
/// Check for an available register in the register class `rc`.
///
/// If it is possible to allocate one more register from `rc`'s top-level register class,
/// returns 0.
///
/// If not, returns a bit-mask of top-level register classes that are interfering. Register
/// pressure should be eased in one of the returned top-level register classes before calling
/// `can_take()` to check again.
fn check_avail(&self, rc: RegClass) -> RegClassMask {
let entry = match self.toprc.get(rc.toprc as usize) {
None => return 0, // Not a pressure tracked bank.
Some(e) => e,
};
let mask = 1 << rc.toprc;
if (self.aliased & mask) == 0 {
// This is a simple unaliased top-level register class.
if entry.total_count() < entry.limit {
0
} else {
mask
}
} else {
// This is the more complicated case. The top-level register class has aliases.
self.check_avail_aliased(entry)
}
}
/// Check for an available register in a top-level register class that may have aliases.
///
/// This is the out-of-line slow path for `check_avail()`.
fn check_avail_aliased(&self, entry: &TopRC) -> RegClassMask {
let first = usize::from(entry.first_toprc);
let num = usize::from(entry.num_toprcs);
let width = u32::from(entry.width);
let ulimit = entry.limit * width;
// Count up the number of available register units.
let mut units = 0;
for (rc, rci) in self.toprc[first..first + num].iter().zip(first..) {
let rcw = u32::from(rc.width);
// If `rc.width` is smaller than `width`, each register in `rc` could potentially block
// one of ours. This is assuming that none of the smaller registers are straddling the
// bigger ones.
//
// If `rc.width` is larger than `width`, we are also assuming that the registers are
// aligned and `rc.width` is a multiple of `width`.
let u = if rcw < width {
// We can't take more than the total number of register units in the class.
// This matters for arm32 S-registers which can only ever lock out 16 D-registers.
min(rc.total_count() * width, rc.limit * rcw)
} else {
rc.total_count() * rcw
};
// If this top-level RC on its own is responsible for exceeding our limit, return it
// early to guarantee that registers here are spilled before spilling other registers
// unnecessarily.
if u >= ulimit {
return 1 << rci;
}
units += u;
}
// We've counted up the worst-case number of register units claimed by all aliasing
// classes. Compare to the unit limit in this class.
if units < ulimit {
0
} else {
// Registers need to be spilled from any one of the aliasing classes.
((1 << num) - 1) << first
}
}
/// Take a register from `rc`.
///
/// This does not check if there are enough registers available.
pub fn take(&mut self, rc: RegClass) {
if let Some(t) = self.toprc.get_mut(rc.toprc as usize) {
t.base_count += 1;
}
}
/// Free a register in `rc`.
pub fn free(&mut self, rc: RegClass) {
if let Some(t) = self.toprc.get_mut(rc.toprc as usize) {
t.base_count -= 1;
}
}
/// Reset all counts to 0, both base and transient.
pub fn reset(&mut self) {
for e in &mut self.toprc {
e.base_count = 0;
e.transient_count = 0;
}
}
/// Try to increment a transient counter.
///
/// This will fail if there are not enough registers available.
pub fn take_transient(&mut self, rc: RegClass) -> Result<(), RegClassMask> {
let mask = self.check_avail(rc);
if mask == 0 {
if let Some(t) = self.toprc.get_mut(rc.toprc as usize) {
t.transient_count += 1;
}
Ok(())
} else {
Err(mask)
}
}
/// Reset all transient counts to 0.
pub fn reset_transient(&mut self) {
for e in &mut self.toprc {
e.transient_count = 0;
}
}
/// Preserve the transient counts by transferring them to the base counts.
pub fn preserve_transient(&mut self) {
for e in &mut self.toprc {
e.base_count += e.transient_count;
e.transient_count = 0;
}
}
}
impl fmt::Display for Pressure {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "Pressure[")?;
for rc in &self.toprc {
if rc.limit > 0 && rc.limit < !0 {
write!(f, " {}+{}/{}", rc.base_count, rc.transient_count, rc.limit)?;
}
}
write!(f, " ]")
}
}
#[cfg(test)]
#[cfg(feature = "arm32")]
mod tests {
use super::Pressure;
use crate::isa::registers::{RegBank, RegClassData};
use crate::isa::{RegClass, RegInfo, RegUnit};
use crate::regalloc::RegisterSet;
use core::borrow::Borrow;
// Arm32 `TargetIsa` is now `TargetIsaAdapter`, which does not hold any info
// about registers, so we directly access `INFO` from registers-arm32.rs.
include!(concat!(env!("OUT_DIR"), "/registers-arm32.rs"));
// Get a register class by name.
fn rc_by_name(reginfo: &RegInfo, name: &str) -> RegClass {
reginfo
.classes
.iter()
.find(|rc| rc.name == name)
.expect("Can't find named register class.")
}
#[test]
fn basic_counting() {
let reginfo = INFO.borrow();
let gpr = rc_by_name(&reginfo, "GPR");
let s = rc_by_name(&reginfo, "S");
let regs = RegisterSet::new();
let mut pressure = Pressure::new(&reginfo, &regs);
let mut count = 0;
while pressure.check_avail(gpr) == 0 {
pressure.take(gpr);
count += 1;
}
assert_eq!(count, 16);
assert_eq!(pressure.check_avail(gpr), 1 << gpr.toprc);
assert_eq!(pressure.check_avail(s), 0);
pressure.free(gpr);
assert_eq!(pressure.check_avail(gpr), 0);
pressure.take(gpr);
assert_eq!(pressure.check_avail(gpr), 1 << gpr.toprc);
assert_eq!(pressure.check_avail(s), 0);
pressure.reset();
assert_eq!(pressure.check_avail(gpr), 0);
assert_eq!(pressure.check_avail(s), 0);
}
#[test]
fn arm_float_bank() {
let reginfo = INFO.borrow();
let s = rc_by_name(&reginfo, "S");
let d = rc_by_name(&reginfo, "D");
let q = rc_by_name(&reginfo, "Q");
let regs = RegisterSet::new();
let mut pressure = Pressure::new(&reginfo, &regs);
assert_eq!(pressure.check_avail(s), 0);
assert_eq!(pressure.check_avail(d), 0);
assert_eq!(pressure.check_avail(q), 0);
// Allocating a single S-register should not affect availability.
pressure.take(s);
assert_eq!(pressure.check_avail(s), 0);
assert_eq!(pressure.check_avail(d), 0);
assert_eq!(pressure.check_avail(q), 0);
pressure.take(d);
assert_eq!(pressure.check_avail(s), 0);
assert_eq!(pressure.check_avail(d), 0);
assert_eq!(pressure.check_avail(q), 0);
pressure.take(q);
assert_eq!(pressure.check_avail(s), 0);
assert_eq!(pressure.check_avail(d), 0);
assert_eq!(pressure.check_avail(q), 0);
// Take a total of 16 S-regs.
for _ in 1..16 {
pressure.take(s);
}
assert_eq!(pressure.check_avail(s), 0);
assert_eq!(pressure.check_avail(d), 0);
assert_eq!(pressure.check_avail(q), 0);
// We've taken 16 S, 1 D, and 1 Q. There should be 6 more Qs.
for _ in 0..6 {
assert_eq!(pressure.check_avail(d), 0);
assert_eq!(pressure.check_avail(q), 0);
pressure.take(q);
}
// We've taken 16 S, 1 D, and 7 Qs.
assert!(pressure.check_avail(s) != 0);
assert_eq!(pressure.check_avail(d), 0);
assert!(pressure.check_avail(q) != 0);
}
}

View File

@@ -1,391 +0,0 @@
//! Set of allocatable registers as a bit vector of register units.
//!
//! While allocating registers, we need to keep track of which registers are available and which
//! registers are in use. Since registers can alias in different ways, we track this via the
//! "register unit" abstraction. Every register contains one or more register units. Registers that
//! share a register unit can't be in use at the same time.
use crate::isa::registers::{RegClass, RegInfo, RegUnit, RegUnitMask};
use core::char;
use core::fmt;
use core::iter::ExactSizeIterator;
use core::mem::size_of_val;
/// Set of registers available for allocation.
#[derive(Clone)]
pub struct RegisterSet {
avail: RegUnitMask,
}
// Given a register class and a register unit in the class, compute a word index and a bit mask of
// register units representing that register.
//
// Note that a register is not allowed to straddle words.
fn bitmask(rc: RegClass, reg: RegUnit) -> (usize, u32) {
// Bit mask representing the register. It is `rc.width` consecutive units.
let width_bits = (1 << rc.width) - 1;
// Index into avail[] of the word containing `reg`.
let word_index = (reg / 32) as usize;
// The actual bits in the word that cover `reg`.
let reg_bits = width_bits << (reg % 32);
(word_index, reg_bits)
}
impl RegisterSet {
/// Create a new register set with all registers available.
///
/// Note that this includes *all* registers. Query the `TargetIsa` object to get a set of
/// allocatable registers where reserved registers have been filtered out.
pub fn new() -> Self {
Self { avail: [!0; 3] }
}
/// Create a new register set with no registers available.
pub fn empty() -> Self {
Self { avail: [0; 3] }
}
/// Returns `true` if the specified register is available.
pub fn is_avail(&self, rc: RegClass, reg: RegUnit) -> bool {
let (idx, bits) = bitmask(rc, reg);
(self.avail[idx] & bits) == bits
}
/// Allocate `reg` from `rc` so it is no longer available.
///
/// It is an error to take a register that doesn't have all of its register units available.
pub fn take(&mut self, rc: RegClass, reg: RegUnit) {
let (idx, bits) = bitmask(rc, reg);
debug_assert!(
(self.avail[idx] & bits) == bits,
"{}:{} not available in {}",
rc,
rc.info.display_regunit(reg),
self.display(rc.info)
);
self.avail[idx] &= !bits;
}
/// Return `reg` and all of its register units to the set of available registers.
pub fn free(&mut self, rc: RegClass, reg: RegUnit) {
let (idx, bits) = bitmask(rc, reg);
debug_assert!(
(self.avail[idx] & bits) == 0,
"{}:{} is already free in {}",
rc,
rc.info.display_regunit(reg),
self.display(rc.info)
);
self.avail[idx] |= bits;
}
/// Return an iterator over all available registers belonging to the register class `rc`.
///
/// This doesn't allocate anything from the set; use `take()` for that.
pub fn iter(&self, rc: RegClass) -> RegSetIter {
// Start by copying the RC mask. It is a single set bit for each register in the class.
let mut rsi = RegSetIter { regs: rc.mask };
// Mask out the unavailable units.
for idx in 0..self.avail.len() {
// If a single unit in a register is unavailable, the whole register can't be used. If
// a register straddles a word boundary, it will be marked as unavailable. There's an
// assertion in `cranelift-codegen/meta/src/cdsl/regs.rs` to check for that.
for i in 0..rc.width {
rsi.regs[idx] &= self.avail[idx] >> i;
}
}
rsi
}
/// Check if any register units allocated out of this set interferes with units allocated out
/// of `other`.
///
/// This assumes that unused bits are 1.
pub fn interferes_with(&self, other: &Self) -> bool {
self.avail
.iter()
.zip(&other.avail)
.any(|(&x, &y)| (x | y) != !0)
}
/// Intersect this set of registers with `other`. This has the effect of removing any register
/// units from this set that are not in `other`.
pub fn intersect(&mut self, other: &Self) {
for (x, &y) in self.avail.iter_mut().zip(&other.avail) {
*x &= y;
}
}
/// Return an object that can display this register set, using the register info from the
/// target ISA.
pub fn display<'a, R: Into<Option<&'a RegInfo>>>(&self, regs: R) -> DisplayRegisterSet<'a> {
DisplayRegisterSet(self.clone(), regs.into())
}
}
/// Iterator over available registers in a register class.
#[derive(Clone)]
pub struct RegSetIter {
regs: RegUnitMask,
}
impl Iterator for RegSetIter {
type Item = RegUnit;
fn next(&mut self) -> Option<RegUnit> {
let mut unit_offset = 0;
// Find the first set bit in `self.regs`.
for word in &mut self.regs {
if *word != 0 {
// Compute the register unit number from the lowest set bit in the word.
let unit = unit_offset + word.trailing_zeros() as RegUnit;
// Clear that lowest bit so we won't find it again.
*word &= *word - 1;
return Some(unit);
}
// How many register units was there in the word? This is a constant 32 for `u32` etc.
unit_offset += 8 * size_of_val(word) as RegUnit;
}
// All of `self.regs` is 0.
None
}
fn size_hint(&self) -> (usize, Option<usize>) {
let bits = self.regs.iter().map(|&w| w.count_ones() as usize).sum();
(bits, Some(bits))
}
}
impl RegSetIter {
pub fn rnext(&mut self) -> Option<RegUnit> {
let num_words = self.regs.len();
let bits_per_word = 8 * size_of_val(&self.regs[0]);
// Find the last set bit in `self.regs`.
for i in 0..num_words {
let word_ix = num_words - 1 - i;
let word = &mut self.regs[word_ix];
if *word != 0 {
let lzeroes = word.leading_zeros() as usize;
// Clear that highest bit so we won't find it again.
*word &= !(1 << (bits_per_word - 1 - lzeroes));
return Some((word_ix * bits_per_word + bits_per_word - 1 - lzeroes) as RegUnit);
}
}
// All of `self.regs` is 0.
None
}
}
impl ExactSizeIterator for RegSetIter {}
/// Displaying an `RegisterSet` correctly requires the associated `RegInfo` from the target ISA.
pub struct DisplayRegisterSet<'a>(RegisterSet, Option<&'a RegInfo>);
impl<'a> fmt::Display for DisplayRegisterSet<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "[")?;
match self.1 {
None => {
for w in &self.0.avail {
write!(f, " #{:08x}", w)?;
}
}
Some(reginfo) => {
let toprcs = reginfo
.banks
.iter()
.map(|b| b.first_toprc + b.num_toprcs)
.max()
.expect("No register banks");
for rc in &reginfo.classes[0..toprcs] {
if rc.width == 1 {
let bank = &reginfo.banks[rc.bank as usize];
write!(f, " {}: ", rc)?;
for offset in 0..bank.units {
let reg = bank.first_unit + offset;
if !rc.contains(reg) {
continue;
}
if !self.0.is_avail(rc, reg) {
write!(f, "-")?;
continue;
}
// Display individual registers as either the second letter of their
// name or the last digit of their number.
// This works for x86 (rax, rbx, ...) and for numbered regs.
write!(
f,
"{}",
bank.names
.get(offset as usize)
.and_then(|name| name.chars().nth(1))
.unwrap_or_else(|| char::from_digit(
u32::from(offset % 10),
10
)
.unwrap())
)?;
}
}
}
}
}
write!(f, " ]")
}
}
impl fmt::Display for RegisterSet {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
self.display(None).fmt(f)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::isa::registers::{RegClass, RegClassData};
use alloc::vec::Vec;
// Register classes for testing.
const GPR: RegClass = &RegClassData {
name: "GPR",
index: 0,
width: 1,
bank: 0,
toprc: 0,
first: 28,
subclasses: 0,
mask: [0xf0000000, 0x0000000f, 0],
info: &INFO,
pinned_reg: None,
};
const DPR: RegClass = &RegClassData {
name: "DPR",
index: 0,
width: 2,
bank: 0,
toprc: 0,
first: 28,
subclasses: 0,
mask: [0x50000000, 0x0000000a, 0],
info: &INFO,
pinned_reg: None,
};
const INFO: RegInfo = RegInfo {
banks: &[],
classes: &[],
};
const RSI_1: RegSetIter = RegSetIter {
regs: [0x31415927, 0x27182818, 0x14141356],
};
const RSI_2: RegSetIter = RegSetIter {
regs: [0x00000000, 0x00000000, 0x00000000],
};
const RSI_3: RegSetIter = RegSetIter {
regs: [0xffffffff, 0xffffffff, 0xffffffff],
};
fn reverse_regset_iteration_work(rsi: &RegSetIter) {
// Check the reverse iterator by comparing its output with the forward iterator.
let rsi_f = (*rsi).clone();
let results_f = rsi_f.collect::<Vec<_>>();
let mut rsi_r = (*rsi).clone();
let mut results_r = Vec::<RegUnit>::new();
while let Some(r) = rsi_r.rnext() {
results_r.push(r);
}
let len_f = results_f.len();
let len_r = results_r.len();
assert_eq!(len_f, len_r);
for i in 0..len_f {
assert_eq!(results_f[i], results_r[len_f - 1 - i]);
}
}
#[test]
fn reverse_regset_iteration() {
reverse_regset_iteration_work(&RSI_1);
reverse_regset_iteration_work(&RSI_2);
reverse_regset_iteration_work(&RSI_3);
}
#[test]
fn put_and_take() {
let mut regs = RegisterSet::new();
// `GPR` has units 28-36.
assert_eq!(regs.iter(GPR).len(), 8);
assert_eq!(regs.iter(GPR).count(), 8);
assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [28, 30, 33, 35]);
assert!(regs.is_avail(GPR, 29));
regs.take(&GPR, 29);
assert!(!regs.is_avail(GPR, 29));
assert_eq!(regs.iter(GPR).count(), 7);
assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [30, 33, 35]);
assert!(regs.is_avail(GPR, 30));
regs.take(&GPR, 30);
assert!(!regs.is_avail(GPR, 30));
assert_eq!(regs.iter(GPR).count(), 6);
assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [33, 35]);
assert!(regs.is_avail(GPR, 32));
regs.take(&GPR, 32);
assert!(!regs.is_avail(GPR, 32));
assert_eq!(regs.iter(GPR).count(), 5);
assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [33, 35]);
regs.free(&GPR, 30);
assert!(regs.is_avail(GPR, 30));
assert!(!regs.is_avail(GPR, 29));
assert!(!regs.is_avail(GPR, 32));
assert_eq!(regs.iter(GPR).count(), 6);
assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [30, 33, 35]);
regs.free(&GPR, 32);
assert!(regs.is_avail(GPR, 31));
assert!(!regs.is_avail(GPR, 29));
assert!(regs.is_avail(GPR, 32));
assert_eq!(regs.iter(GPR).count(), 7);
assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [30, 33, 35]);
}
#[test]
fn interference() {
let mut regs1 = RegisterSet::new();
let mut regs2 = RegisterSet::new();
assert!(!regs1.interferes_with(&regs2));
regs1.take(&GPR, 32);
assert!(!regs1.interferes_with(&regs2));
regs2.take(&GPR, 31);
assert!(!regs1.interferes_with(&regs2));
regs1.intersect(&regs2);
assert!(regs1.interferes_with(&regs2));
}
}

View File

@@ -1,484 +0,0 @@
//! Reload pass
//!
//! The reload pass runs between the spilling and coloring passes. Its primary responsibility is to
//! insert `spill` and `fill` instructions such that instruction operands expecting a register will
//! get a value with register affinity, and operands expecting a stack slot will get a value with
//! stack affinity.
//!
//! The secondary responsibility of the reload pass is to reuse values in registers as much as
//! possible to minimize the number of `fill` instructions needed. This must not cause the register
//! pressure limits to be exceeded.
use crate::cursor::{Cursor, EncCursor};
use crate::dominator_tree::DominatorTree;
use crate::entity::{SparseMap, SparseMapValue};
use crate::ir::{AbiParam, ArgumentLoc, InstBuilder};
use crate::ir::{Block, Function, Inst, InstructionData, Opcode, Value, ValueLoc};
use crate::isa::RegClass;
use crate::isa::{ConstraintKind, EncInfo, Encoding, RecipeConstraints, TargetIsa};
use crate::regalloc::affinity::Affinity;
use crate::regalloc::live_value_tracker::{LiveValue, LiveValueTracker};
use crate::regalloc::liveness::Liveness;
use crate::timing;
use crate::topo_order::TopoOrder;
use alloc::vec::Vec;
/// Reusable data structures for the reload pass.
pub struct Reload {
candidates: Vec<ReloadCandidate>,
reloads: SparseMap<Value, ReloadedValue>,
}
/// Context data structure that gets instantiated once per pass.
struct Context<'a> {
cur: EncCursor<'a>,
// Cached ISA information.
// We save it here to avoid frequent virtual function calls on the `TargetIsa` trait object.
encinfo: EncInfo,
// References to contextual data structures we need.
domtree: &'a DominatorTree,
liveness: &'a mut Liveness,
topo: &'a mut TopoOrder,
candidates: &'a mut Vec<ReloadCandidate>,
reloads: &'a mut SparseMap<Value, ReloadedValue>,
}
impl Reload {
/// Create a new blank reload pass.
pub fn new() -> Self {
Self {
candidates: Vec::new(),
reloads: SparseMap::new(),
}
}
/// Clear all data structures in this reload pass.
pub fn clear(&mut self) {
self.candidates.clear();
self.reloads.clear();
}
/// Run the reload algorithm over `func`.
pub fn run(
&mut self,
isa: &dyn TargetIsa,
func: &mut Function,
domtree: &DominatorTree,
liveness: &mut Liveness,
topo: &mut TopoOrder,
tracker: &mut LiveValueTracker,
) {
let _tt = timing::ra_reload();
log::trace!("Reload for:\n{}", func.display(isa));
let mut ctx = Context {
cur: EncCursor::new(func, isa),
encinfo: isa.encoding_info(),
domtree,
liveness,
topo,
candidates: &mut self.candidates,
reloads: &mut self.reloads,
};
ctx.run(tracker)
}
}
/// A reload candidate.
///
/// This represents a stack value that is used by the current instruction where a register is
/// needed.
struct ReloadCandidate {
argidx: usize,
value: Value,
regclass: RegClass,
}
/// A Reloaded value.
///
/// This represents a value that has been reloaded into a register value from the stack.
struct ReloadedValue {
stack: Value,
reg: Value,
}
impl SparseMapValue<Value> for ReloadedValue {
fn key(&self) -> Value {
self.stack
}
}
impl<'a> Context<'a> {
fn run(&mut self, tracker: &mut LiveValueTracker) {
self.topo.reset(self.cur.func.layout.blocks());
while let Some(block) = self.topo.next(&self.cur.func.layout, self.domtree) {
self.visit_block(block, tracker);
}
}
fn visit_block(&mut self, block: Block, tracker: &mut LiveValueTracker) {
log::trace!("Reloading {}:", block);
self.visit_block_header(block, tracker);
tracker.drop_dead_params();
// visit_block_header() places us at the first interesting instruction in the block.
while let Some(inst) = self.cur.current_inst() {
if !self.cur.func.dfg[inst].opcode().is_ghost() {
// This instruction either has an encoding or has ABI constraints, so visit it to
// insert spills and fills as needed.
let encoding = self.cur.func.encodings[inst];
self.visit_inst(block, inst, encoding, tracker);
tracker.drop_dead(inst);
} else {
// This is a ghost instruction with no encoding and no extra constraints, so we can
// just skip over it.
self.cur.next_inst();
}
}
}
/// Process the block parameters. Move to the next instruction in the block to be processed
fn visit_block_header(&mut self, block: Block, tracker: &mut LiveValueTracker) {
let (liveins, args) = tracker.block_top(
block,
&self.cur.func.dfg,
self.liveness,
&self.cur.func.layout,
self.domtree,
);
if self.cur.func.layout.entry_block() == Some(block) {
debug_assert_eq!(liveins.len(), 0);
self.visit_entry_params(block, args);
} else {
self.visit_block_params(block, args);
}
}
/// Visit the parameters on the entry block.
/// These values have ABI constraints from the function signature.
fn visit_entry_params(&mut self, block: Block, args: &[LiveValue]) {
debug_assert_eq!(self.cur.func.signature.params.len(), args.len());
self.cur.goto_first_inst(block);
for (arg_idx, arg) in args.iter().enumerate() {
let abi = self.cur.func.signature.params[arg_idx];
match abi.location {
ArgumentLoc::Reg(_) => {
if arg.affinity.is_stack() {
// An incoming register parameter was spilled. Replace the parameter value
// with a temporary register value that is immediately spilled.
let reg = self
.cur
.func
.dfg
.replace_block_param(arg.value, abi.value_type);
let affinity = Affinity::abi(&abi, self.cur.isa);
self.liveness.create_dead(reg, block, affinity);
self.insert_spill(block, arg.value, reg);
}
}
ArgumentLoc::Stack(_) => {
debug_assert!(arg.affinity.is_stack());
}
ArgumentLoc::Unassigned => panic!("Unexpected ABI location"),
}
}
}
fn visit_block_params(&mut self, block: Block, _args: &[LiveValue]) {
self.cur.goto_first_inst(block);
}
/// Process the instruction pointed to by `pos`, and advance the cursor to the next instruction
/// that needs processing.
fn visit_inst(
&mut self,
block: Block,
inst: Inst,
encoding: Encoding,
tracker: &mut LiveValueTracker,
) {
self.cur.use_srcloc(inst);
// Get the operand constraints for `inst` that we are trying to satisfy.
let constraints = self.encinfo.operand_constraints(encoding);
// Identify reload candidates.
debug_assert!(self.candidates.is_empty());
self.find_candidates(inst, constraints);
// If we find a copy from a stack slot to the same stack slot, replace
// it with a `copy_nop` but otherwise ignore it. In particular, don't
// generate a reload immediately followed by a spill. The `copy_nop`
// has a zero-length encoding, so will disappear at emission time.
if let InstructionData::Unary {
opcode: Opcode::Copy,
arg,
} = self.cur.func.dfg[inst]
{
let dst_vals = self.cur.func.dfg.inst_results(inst);
if dst_vals.len() == 1 {
let dst_val = dst_vals[0];
let can_transform = match (
self.cur.func.locations[arg],
self.cur.func.locations[dst_val],
) {
(ValueLoc::Stack(src_slot), ValueLoc::Stack(dst_slot)) => {
src_slot == dst_slot && {
let src_ty = self.cur.func.dfg.value_type(arg);
let dst_ty = self.cur.func.dfg.value_type(dst_val);
debug_assert!(src_ty == dst_ty);
// This limits the transformation to copies of the
// types: I128 I64 I32 I16 I8 F64 and F32, since that's
// the set of `copy_nop` encodings available.
src_ty.is_int() || src_ty.is_float()
}
}
_ => false,
};
if can_transform {
// Convert the instruction into a `copy_nop`.
self.cur.func.dfg.replace(inst).copy_nop(arg);
let ok = self.cur.func.update_encoding(inst, self.cur.isa).is_ok();
debug_assert!(ok, "copy_nop encoding missing for this type");
// And move on to the next insn.
self.reloads.clear();
let _ = tracker.process_inst(inst, &self.cur.func.dfg, self.liveness);
self.cur.next_inst();
self.candidates.clear();
return;
}
}
}
// Deal with all instructions not special-cased by the immediately
// preceding fragment.
if let InstructionData::Unary {
opcode: Opcode::Copy,
..
} = self.cur.func.dfg[inst]
{
self.reload_copy_candidates(inst);
} else {
self.reload_inst_candidates(block, inst);
}
// TODO: Reuse reloads for future instructions.
self.reloads.clear();
let (_throughs, _kills, defs) =
tracker.process_inst(inst, &self.cur.func.dfg, self.liveness);
// Advance to the next instruction so we can insert any spills after the instruction.
self.cur.next_inst();
// Rewrite register defs that need to be spilled.
//
// Change:
//
// v2 = inst ...
//
// Into:
//
// v7 = inst ...
// v2 = spill v7
//
// That way, we don't need to rewrite all future uses of v2.
if let Some(constraints) = constraints {
for (lv, op) in defs.iter().zip(constraints.outs) {
if lv.affinity.is_stack() && op.kind != ConstraintKind::Stack {
if let InstructionData::Unary {
opcode: Opcode::Copy,
arg,
} = self.cur.func.dfg[inst]
{
self.cur.func.dfg.replace(inst).spill(arg);
let ok = self.cur.func.update_encoding(inst, self.cur.isa).is_ok();
debug_assert!(ok);
} else {
let value_type = self.cur.func.dfg.value_type(lv.value);
let reg = self.cur.func.dfg.replace_result(lv.value, value_type);
self.liveness.create_dead(reg, inst, Affinity::new(op));
self.insert_spill(block, lv.value, reg);
}
}
}
}
// Same thing for spilled call return values.
let retvals = &defs[self.cur.func.dfg[inst]
.opcode()
.constraints()
.num_fixed_results()..];
if !retvals.is_empty() {
let sig = self
.cur
.func
.dfg
.call_signature(inst)
.expect("Extra results on non-call instruction");
for (i, lv) in retvals.iter().enumerate() {
let abi = self.cur.func.dfg.signatures[sig].returns[i];
debug_assert!(
abi.location.is_reg(),
"expected reg; got {:?}",
abi.location
);
if lv.affinity.is_stack() {
let reg = self.cur.func.dfg.replace_result(lv.value, abi.value_type);
self.liveness
.create_dead(reg, inst, Affinity::abi(&abi, self.cur.isa));
self.insert_spill(block, lv.value, reg);
}
}
}
}
// Reload the current candidates for the given `inst`.
fn reload_inst_candidates(&mut self, block: Block, inst: Inst) {
// Insert fill instructions before `inst` and replace `cand.value` with the filled value.
for cand in self.candidates.iter_mut() {
if let Some(reload) = self.reloads.get(cand.value) {
cand.value = reload.reg;
continue;
}
let reg = self.cur.ins().fill(cand.value);
let fill = self.cur.built_inst();
self.reloads.insert(ReloadedValue {
stack: cand.value,
reg,
});
cand.value = reg;
// Create a live range for the new reload.
let affinity = Affinity::Reg(cand.regclass.into());
self.liveness.create_dead(reg, fill, affinity);
self.liveness
.extend_locally(reg, block, inst, &self.cur.func.layout);
}
// Rewrite instruction arguments.
//
// Only rewrite those arguments that were identified as candidates. This leaves block
// arguments on branches as-is without rewriting them. A spilled block argument needs to stay
// spilled because the matching block parameter is going to be in the same virtual register
// and therefore the same stack slot as the block argument value.
if !self.candidates.is_empty() {
let args = self.cur.func.dfg.inst_args_mut(inst);
while let Some(cand) = self.candidates.pop() {
args[cand.argidx] = cand.value;
}
}
}
// Reload the current candidates for the given copy `inst`.
//
// As an optimization, replace a copy instruction where the argument has been spilled with
// a fill instruction.
fn reload_copy_candidates(&mut self, inst: Inst) {
// Copy instructions can only have one argument.
debug_assert!(self.candidates.is_empty() || self.candidates.len() == 1);
if let Some(cand) = self.candidates.pop() {
self.cur.func.dfg.replace(inst).fill(cand.value);
let ok = self.cur.func.update_encoding(inst, self.cur.isa).is_ok();
debug_assert!(ok);
}
}
// Find reload candidates for `inst` and add them to `self.candidates`.
//
// These are uses of spilled values where the operand constraint requires a register.
fn find_candidates(&mut self, inst: Inst, constraints: Option<&RecipeConstraints>) {
let args = self.cur.func.dfg.inst_args(inst);
if let Some(constraints) = constraints {
for (argidx, (op, &arg)) in constraints.ins.iter().zip(args).enumerate() {
if op.kind != ConstraintKind::Stack && self.liveness[arg].affinity.is_stack() {
self.candidates.push(ReloadCandidate {
argidx,
value: arg,
regclass: op.regclass,
})
}
}
}
// If we only have the fixed arguments, we're done now.
let offset = self.cur.func.dfg[inst]
.opcode()
.constraints()
.num_fixed_value_arguments();
if args.len() == offset {
return;
}
let var_args = &args[offset..];
// Handle ABI arguments.
if let Some(sig) = self.cur.func.dfg.call_signature(inst) {
handle_abi_args(
self.candidates,
&self.cur.func.dfg.signatures[sig].params,
var_args,
offset,
self.cur.isa,
self.liveness,
);
} else if self.cur.func.dfg[inst].opcode().is_return() {
handle_abi_args(
self.candidates,
&self.cur.func.signature.returns,
var_args,
offset,
self.cur.isa,
self.liveness,
);
}
}
/// Insert a spill at `pos` and update data structures.
///
/// - Insert `stack = spill reg` at `pos`, and assign an encoding.
/// - Move the `stack` live range starting point to the new instruction.
/// - Extend the `reg` live range to reach the new instruction.
fn insert_spill(&mut self, block: Block, stack: Value, reg: Value) {
self.cur.ins().with_result(stack).spill(reg);
let inst = self.cur.built_inst();
// Update live ranges.
self.liveness.move_def_locally(stack, inst);
self.liveness
.extend_locally(reg, block, inst, &self.cur.func.layout);
}
}
/// Find reload candidates in the instruction's ABI variable arguments. This handles both
/// return values and call arguments.
fn handle_abi_args(
candidates: &mut Vec<ReloadCandidate>,
abi_types: &[AbiParam],
var_args: &[Value],
offset: usize,
isa: &dyn TargetIsa,
liveness: &Liveness,
) {
debug_assert_eq!(abi_types.len(), var_args.len());
for ((abi, &arg), argidx) in abi_types.iter().zip(var_args).zip(offset..) {
if abi.location.is_reg() {
let lv = liveness.get(arg).expect("Missing live range for ABI arg");
if lv.affinity.is_stack() {
candidates.push(ReloadCandidate {
argidx,
value: arg,
regclass: isa.regclass_for_abi_type(abi.value_type),
});
}
}
}
}

View File

@@ -1,65 +0,0 @@
use crate::cursor::{Cursor, FuncCursor};
use crate::dominator_tree::DominatorTree;
use crate::inst_predicates::is_safepoint;
use crate::ir::{Function, InstBuilder};
use crate::isa::TargetIsa;
use crate::regalloc::live_value_tracker::LiveValueTracker;
use crate::regalloc::liveness::Liveness;
use alloc::vec::Vec;
fn insert_and_encode_safepoint<'f>(
pos: &mut FuncCursor<'f>,
tracker: &LiveValueTracker,
isa: &dyn TargetIsa,
) {
// Iterate through all live values, collect only the references.
let live_ref_values = tracker
.live()
.iter()
.filter(|live_value| pos.func.dfg.value_type(live_value.value).is_ref())
.map(|live_val| live_val.value)
.collect::<Vec<_>>();
if !live_ref_values.is_empty() {
pos.ins().safepoint(&live_ref_values);
// Move cursor to the new safepoint instruction to encode it.
if let Some(inst) = pos.prev_inst() {
let ok = pos.func.update_encoding(inst, isa).is_ok();
debug_assert!(ok);
}
// Restore cursor position.
pos.next_inst();
}
}
// The emit_stack_maps() function analyzes each instruction to retrieve the liveness of
// the defs and operands by traversing a function's blocks in layout order.
pub fn emit_stack_maps(
func: &mut Function,
domtree: &DominatorTree,
liveness: &Liveness,
tracker: &mut LiveValueTracker,
isa: &dyn TargetIsa,
) {
let mut curr = func.layout.entry_block();
while let Some(block) = curr {
tracker.block_top(block, &func.dfg, liveness, &func.layout, domtree);
tracker.drop_dead_params();
let mut pos = FuncCursor::new(func);
// From the top of the block, step through the instructions.
pos.goto_top(block);
while let Some(inst) = pos.next_inst() {
if is_safepoint(&pos.func, inst) {
insert_and_encode_safepoint(&mut pos, tracker, isa);
}
// Process the instruction and get rid of dead values.
tracker.process_inst(inst, &pos.func.dfg, liveness);
tracker.drop_dead(inst);
}
curr = func.layout.next_block(block);
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,638 +0,0 @@
//! Spilling pass.
//!
//! The spilling pass is the first to run after the liveness analysis. Its primary function is to
//! ensure that the register pressure never exceeds the number of available registers by moving
//! some SSA values to spill slots on the stack. This is encoded in the affinity of the value's
//! live range.
//!
//! Some instruction operand constraints may require additional registers to resolve. Since this
//! can cause spilling, the spilling pass is also responsible for resolving those constraints by
//! inserting copies. The extra constraints are:
//!
//! 1. A value used by a tied operand must be killed by the instruction. This is resolved by
//! inserting a copy to a temporary value when necessary.
//! 2. When the same value is used more than once by an instruction, the operand constraints must
//! be compatible. Otherwise, the value must be copied into a new register for some of the
//! operands.
use crate::cursor::{Cursor, EncCursor};
use crate::dominator_tree::DominatorTree;
use crate::ir::{ArgumentLoc, Block, Function, Inst, InstBuilder, SigRef, Value, ValueLoc};
use crate::isa::registers::{RegClass, RegClassIndex, RegClassMask, RegUnit};
use crate::isa::{ConstraintKind, EncInfo, RecipeConstraints, RegInfo, TargetIsa};
use crate::regalloc::affinity::Affinity;
use crate::regalloc::live_value_tracker::{LiveValue, LiveValueTracker};
use crate::regalloc::liveness::Liveness;
use crate::regalloc::pressure::Pressure;
use crate::regalloc::virtregs::VirtRegs;
use crate::timing;
use crate::topo_order::TopoOrder;
use alloc::vec::Vec;
use core::fmt;
/// Return a top-level register class which contains `unit`.
fn toprc_containing_regunit(unit: RegUnit, reginfo: &RegInfo) -> RegClass {
let bank = reginfo.bank_containing_regunit(unit).unwrap();
reginfo.classes[bank.first_toprc..(bank.first_toprc + bank.num_toprcs)]
.iter()
.find(|&rc| rc.contains(unit))
.expect("reg unit should be in a toprc")
}
/// Persistent data structures for the spilling pass.
pub struct Spilling {
spills: Vec<Value>,
reg_uses: Vec<RegUse>,
}
/// Context data structure that gets instantiated once per pass.
struct Context<'a> {
// Current instruction as well as reference to function and ISA.
cur: EncCursor<'a>,
// Cached ISA information.
reginfo: RegInfo,
encinfo: EncInfo,
// References to contextual data structures we need.
domtree: &'a DominatorTree,
liveness: &'a mut Liveness,
virtregs: &'a VirtRegs,
topo: &'a mut TopoOrder,
// Current register pressure.
pressure: Pressure,
// Values spilled for the current instruction. These values have already been removed from the
// pressure tracker, but they are still present in the live value tracker and their affinity
// hasn't been changed yet.
spills: &'a mut Vec<Value>,
// Uses of register values in the current instruction.
reg_uses: &'a mut Vec<RegUse>,
}
impl Spilling {
/// Create a new spilling data structure.
pub fn new() -> Self {
Self {
spills: Vec::new(),
reg_uses: Vec::new(),
}
}
/// Clear all data structures in this spilling pass.
pub fn clear(&mut self) {
self.spills.clear();
self.reg_uses.clear();
}
/// Run the spilling algorithm over `func`.
pub fn run(
&mut self,
isa: &dyn TargetIsa,
func: &mut Function,
domtree: &DominatorTree,
liveness: &mut Liveness,
virtregs: &VirtRegs,
topo: &mut TopoOrder,
tracker: &mut LiveValueTracker,
) {
let _tt = timing::ra_spilling();
log::trace!("Spilling for:\n{}", func.display(isa));
let reginfo = isa.register_info();
let usable_regs = isa.allocatable_registers(func);
let mut ctx = Context {
cur: EncCursor::new(func, isa),
reginfo: isa.register_info(),
encinfo: isa.encoding_info(),
domtree,
liveness,
virtregs,
topo,
pressure: Pressure::new(&reginfo, &usable_regs),
spills: &mut self.spills,
reg_uses: &mut self.reg_uses,
};
ctx.run(tracker)
}
}
impl<'a> Context<'a> {
fn run(&mut self, tracker: &mut LiveValueTracker) {
self.topo.reset(self.cur.func.layout.blocks());
while let Some(block) = self.topo.next(&self.cur.func.layout, self.domtree) {
self.visit_block(block, tracker);
}
}
fn visit_block(&mut self, block: Block, tracker: &mut LiveValueTracker) {
log::trace!("Spilling {}:", block);
self.cur.goto_top(block);
self.visit_block_header(block, tracker);
tracker.drop_dead_params();
self.process_spills(tracker);
while let Some(inst) = self.cur.next_inst() {
if !self.cur.func.dfg[inst].opcode().is_ghost() {
self.visit_inst(inst, block, tracker);
} else {
let (_throughs, kills) = tracker.process_ghost(inst);
self.free_regs(kills);
}
tracker.drop_dead(inst);
self.process_spills(tracker);
}
}
// Take all live registers in `regs` from the pressure set.
// This doesn't cause any spilling, it is assumed there are enough registers.
fn take_live_regs(&mut self, regs: &[LiveValue]) {
for lv in regs {
if !lv.is_dead {
if let Affinity::Reg(rci) = lv.affinity {
let rc = self.reginfo.rc(rci);
self.pressure.take(rc);
}
}
}
}
// Free all registers in `kills` from the pressure set.
fn free_regs(&mut self, kills: &[LiveValue]) {
for lv in kills {
if let Affinity::Reg(rci) = lv.affinity {
if !self.spills.contains(&lv.value) {
let rc = self.reginfo.rc(rci);
self.pressure.free(rc);
}
}
}
}
// Free all dead registers in `regs` from the pressure set.
fn free_dead_regs(&mut self, regs: &[LiveValue]) {
for lv in regs {
if lv.is_dead {
if let Affinity::Reg(rci) = lv.affinity {
if !self.spills.contains(&lv.value) {
let rc = self.reginfo.rc(rci);
self.pressure.free(rc);
}
}
}
}
}
fn visit_block_header(&mut self, block: Block, tracker: &mut LiveValueTracker) {
let (liveins, params) = tracker.block_top(
block,
&self.cur.func.dfg,
self.liveness,
&self.cur.func.layout,
self.domtree,
);
// Count the live-in registers. These should already fit in registers; they did at the
// dominator.
self.pressure.reset();
self.take_live_regs(liveins);
// A block can have an arbitrary (up to 2^16...) number of parameters, so they are not
// guaranteed to fit in registers.
for lv in params {
if let Affinity::Reg(rci) = lv.affinity {
let rc = self.reginfo.rc(rci);
'try_take: while let Err(mask) = self.pressure.take_transient(rc) {
log::trace!("Need {} reg for block param {}", rc, lv.value);
match self.spill_candidate(mask, liveins) {
Some(cand) => {
log::trace!(
"Spilling live-in {} to make room for {} block param {}",
cand,
rc,
lv.value
);
self.spill_reg(cand);
}
None => {
// We can't spill any of the live-in registers, so we have to spill an
// block argument. Since the current spill metric would consider all the
// block arguments equal, just spill the present register.
log::trace!("Spilling {} block argument {}", rc, lv.value);
// Since `spill_reg` will free a register, add the current one here.
self.pressure.take(rc);
self.spill_reg(lv.value);
break 'try_take;
}
}
}
}
}
// The transient pressure counts for the block arguments are accurate. Just preserve them.
self.pressure.preserve_transient();
self.free_dead_regs(params);
}
fn visit_inst(&mut self, inst: Inst, block: Block, tracker: &mut LiveValueTracker) {
log::trace!("Inst {}, {}", self.cur.display_inst(inst), self.pressure);
debug_assert_eq!(self.cur.current_inst(), Some(inst));
debug_assert_eq!(self.cur.current_block(), Some(block));
let constraints = self
.encinfo
.operand_constraints(self.cur.func.encodings[inst]);
// We may need to resolve register constraints if there are any noteworthy uses.
debug_assert!(self.reg_uses.is_empty());
self.collect_reg_uses(inst, block, constraints);
// Calls usually have fixed register uses.
let call_sig = self.cur.func.dfg.call_signature(inst);
if let Some(sig) = call_sig {
self.collect_abi_reg_uses(inst, sig);
}
if !self.reg_uses.is_empty() {
self.process_reg_uses(inst, tracker);
}
// Update the live value tracker with this instruction.
let (throughs, kills, defs) = tracker.process_inst(inst, &self.cur.func.dfg, self.liveness);
// Remove kills from the pressure tracker.
self.free_regs(kills);
// If inst is a call, spill all register values that are live across the call.
// This means that we don't currently take advantage of callee-saved registers.
// TODO: Be more sophisticated.
let opcode = self.cur.func.dfg[inst].opcode();
if call_sig.is_some() || opcode.clobbers_all_regs() {
for lv in throughs {
if lv.affinity.is_reg() && !self.spills.contains(&lv.value) {
self.spill_reg(lv.value);
}
}
}
// Make sure we have enough registers for the register defs.
// Dead defs are included here. They need a register too.
// No need to process call return values, they are in fixed registers.
if let Some(constraints) = constraints {
for op in constraints.outs {
if op.kind != ConstraintKind::Stack {
// Add register def to pressure, spill if needed.
while let Err(mask) = self.pressure.take_transient(op.regclass) {
log::trace!("Need {} reg from {} throughs", op.regclass, throughs.len());
match self.spill_candidate(mask, throughs) {
Some(cand) => self.spill_reg(cand),
None => panic!(
"Ran out of {} registers for {}",
op.regclass,
self.cur.display_inst(inst)
),
}
}
}
}
self.pressure.reset_transient();
}
// Restore pressure state, compute pressure with affinities from `defs`.
// Exclude dead defs. Includes call return values.
// This won't cause spilling.
self.take_live_regs(defs);
}
// Collect register uses that are noteworthy in one of the following ways:
//
// 1. It's a fixed register constraint.
// 2. It's a use of a spilled value.
// 3. It's a tied register constraint and the value isn't killed.
//
// We are assuming here that if a value is used both by a fixed register operand and a register
// class operand, they two are compatible. We are also assuming that two register class
// operands are always compatible.
fn collect_reg_uses(
&mut self,
inst: Inst,
block: Block,
constraints: Option<&RecipeConstraints>,
) {
let args = self.cur.func.dfg.inst_args(inst);
let num_fixed_ins = if let Some(constraints) = constraints {
for (idx, (op, &arg)) in constraints.ins.iter().zip(args).enumerate() {
let mut reguse = RegUse::new(arg, idx, op.regclass.into());
let lr = &self.liveness[arg];
match op.kind {
ConstraintKind::Stack => continue,
ConstraintKind::FixedReg(_) => reguse.fixed = true,
ConstraintKind::Tied(_) => {
// A tied operand must kill the used value.
reguse.tied = !lr.killed_at(inst, block, &self.cur.func.layout);
}
ConstraintKind::FixedTied(_) => {
reguse.fixed = true;
reguse.tied = !lr.killed_at(inst, block, &self.cur.func.layout);
}
ConstraintKind::Reg => {}
}
if lr.affinity.is_stack() {
reguse.spilled = true;
}
// Only collect the interesting register uses.
if reguse.fixed || reguse.tied || reguse.spilled {
log::trace!(" reguse: {}", reguse);
self.reg_uses.push(reguse);
}
}
constraints.ins.len()
} else {
// A non-ghost instruction with no constraints can't have any
// fixed operands.
0
};
// Similarly, for return instructions, collect uses of ABI-defined
// return values.
if self.cur.func.dfg[inst].opcode().is_return() {
debug_assert_eq!(
self.cur.func.dfg.inst_variable_args(inst).len(),
self.cur.func.signature.returns.len(),
"The non-fixed arguments in a return should follow the function's signature."
);
for (ret_idx, (ret, &arg)) in
self.cur.func.signature.returns.iter().zip(args).enumerate()
{
let idx = num_fixed_ins + ret_idx;
let unit = match ret.location {
ArgumentLoc::Unassigned => {
panic!("function return signature should be legalized")
}
ArgumentLoc::Reg(unit) => unit,
ArgumentLoc::Stack(_) => continue,
};
let toprc = toprc_containing_regunit(unit, &self.reginfo);
let mut reguse = RegUse::new(arg, idx, toprc.into());
reguse.fixed = true;
log::trace!(" reguse: {}", reguse);
self.reg_uses.push(reguse);
}
}
}
// Collect register uses from the ABI input constraints.
fn collect_abi_reg_uses(&mut self, inst: Inst, sig: SigRef) {
let num_fixed_args = self.cur.func.dfg[inst]
.opcode()
.constraints()
.num_fixed_value_arguments();
let args = self.cur.func.dfg.inst_variable_args(inst);
for (idx, (abi, &arg)) in self.cur.func.dfg.signatures[sig]
.params
.iter()
.zip(args)
.enumerate()
{
if abi.location.is_reg() {
let (rci, spilled) = match self.liveness[arg].affinity {
Affinity::Reg(rci) => (rci, false),
Affinity::Stack => (
self.cur.isa.regclass_for_abi_type(abi.value_type).into(),
true,
),
Affinity::Unassigned => panic!("Missing affinity for {}", arg),
};
let mut reguse = RegUse::new(arg, num_fixed_args + idx, rci);
reguse.fixed = true;
reguse.spilled = spilled;
self.reg_uses.push(reguse);
}
}
}
// Process multiple register uses to resolve potential conflicts.
//
// Look for multiple uses of the same value in `self.reg_uses` and insert copies as necessary.
// Trigger spilling if any of the temporaries cause the register pressure to become too high.
//
// Leave `self.reg_uses` empty.
fn process_reg_uses(&mut self, inst: Inst, tracker: &LiveValueTracker) {
// We're looking for multiple uses of the same value, so start by sorting by value. The
// secondary `opidx` key makes it possible to use an unstable (non-allocating) sort.
self.reg_uses.sort_unstable_by_key(|u| (u.value, u.opidx));
self.cur.use_srcloc(inst);
for i in 0..self.reg_uses.len() {
let ru = self.reg_uses[i];
// Do we need to insert a copy for this use?
let need_copy = if ru.tied {
true
} else if ru.fixed {
// This is a fixed register use which doesn't necessarily require a copy.
// Make a copy only if this is not the first use of the value.
self.reg_uses
.get(i.wrapping_sub(1))
.map_or(false, |ru2| ru2.value == ru.value)
} else {
false
};
if need_copy {
let copy = self.insert_copy(ru.value, ru.rci);
self.cur.func.dfg.inst_args_mut(inst)[ru.opidx as usize] = copy;
}
// Even if we don't insert a copy, we may need to account for register pressure for the
// reload pass.
if need_copy || ru.spilled {
let rc = self.reginfo.rc(ru.rci);
while let Err(mask) = self.pressure.take_transient(rc) {
log::trace!("Copy of {} reg causes spill", rc);
// Spill a live register that is *not* used by the current instruction.
// Spilling a use wouldn't help.
//
// Do allow spilling of block arguments on branches. This is safe since we spill
// the whole virtual register which includes the matching block parameter value
// at the branch destination. It is also necessary since there can be
// arbitrarily many block arguments.
match {
let args = if self.cur.func.dfg[inst].opcode().is_branch() {
self.cur.func.dfg.inst_fixed_args(inst)
} else {
self.cur.func.dfg.inst_args(inst)
};
self.spill_candidate(
mask,
tracker.live().iter().filter(|lv| !args.contains(&lv.value)),
)
} {
Some(cand) => self.spill_reg(cand),
None => panic!(
"Ran out of {} registers when inserting copy before {}",
rc,
self.cur.display_inst(inst)
),
}
}
}
}
self.pressure.reset_transient();
self.reg_uses.clear()
}
// Find a spill candidate from `candidates` whose top-level register class is in `mask`.
fn spill_candidate<'ii, II>(&self, mask: RegClassMask, candidates: II) -> Option<Value>
where
II: IntoIterator<Item = &'ii LiveValue>,
{
// Find the best viable spill candidate.
//
// The very simple strategy implemented here is to spill the value with the earliest def in
// the reverse post-order. This strategy depends on a good reload pass to generate good
// code.
//
// We know that all candidate defs dominate the current instruction, so one of them will
// dominate the others. That is the earliest def.
candidates
.into_iter()
.filter_map(|lv| {
// Viable candidates are registers in one of the `mask` classes, and not already in
// the spill set.
if let Affinity::Reg(rci) = lv.affinity {
let rc = self.reginfo.rc(rci);
if (mask & (1 << rc.toprc)) != 0 && !self.spills.contains(&lv.value) {
// Here, `lv` is a viable spill candidate.
return Some(lv.value);
}
}
None
})
.min_by(|&a, &b| {
// Find the minimum candidate according to the RPO of their defs.
self.domtree.rpo_cmp(
self.cur.func.dfg.value_def(a),
self.cur.func.dfg.value_def(b),
&self.cur.func.layout,
)
})
}
/// Spill `value` immediately by
///
/// 1. Changing its affinity to `Stack` which marks the spill.
/// 2. Removing the value from the pressure tracker.
/// 3. Adding the value to `self.spills` for later reference by `process_spills`.
///
/// Note that this does not update the cached affinity in the live value tracker. Call
/// `process_spills` to do that.
fn spill_reg(&mut self, value: Value) {
if let Affinity::Reg(rci) = self.liveness.spill(value) {
let rc = self.reginfo.rc(rci);
self.pressure.free(rc);
self.spills.push(value);
log::trace!("Spilled {}:{} -> {}", value, rc, self.pressure);
} else {
panic!("Cannot spill {} that was already on the stack", value);
}
// Assign a spill slot for the whole virtual register.
let ss = self
.cur
.func
.stack_slots
.make_spill_slot(self.cur.func.dfg.value_type(value));
for &v in self.virtregs.congruence_class(&value) {
self.liveness.spill(v);
self.cur.func.locations[v] = ValueLoc::Stack(ss);
}
}
/// Process any pending spills in the `self.spills` vector.
///
/// It is assumed that spills are removed from the pressure tracker immediately, see
/// `spill_reg` above.
///
/// We also need to update the live range affinity and remove spilled values from the live
/// value tracker.
fn process_spills(&mut self, tracker: &mut LiveValueTracker) {
if !self.spills.is_empty() {
tracker.process_spills(|v| self.spills.contains(&v));
self.spills.clear()
}
}
/// Insert a `copy value` before the current instruction and give it a live range extending to
/// the current instruction.
///
/// Returns the new local value created.
fn insert_copy(&mut self, value: Value, rci: RegClassIndex) -> Value {
let copy = self.cur.ins().copy(value);
let inst = self.cur.built_inst();
// Update live ranges.
self.liveness.create_dead(copy, inst, Affinity::Reg(rci));
self.liveness.extend_locally(
copy,
self.cur.func.layout.pp_block(inst),
self.cur.current_inst().expect("must be at an instruction"),
&self.cur.func.layout,
);
copy
}
}
/// Struct representing a register use of a value.
/// Used to detect multiple uses of the same value with incompatible register constraints.
#[derive(Clone, Copy)]
struct RegUse {
value: Value,
opidx: u16,
// Register class required by the use.
rci: RegClassIndex,
// A use with a fixed register constraint.
fixed: bool,
// A register use of a spilled value.
spilled: bool,
// A use with a tied register constraint *and* the used value is not killed.
tied: bool,
}
impl RegUse {
fn new(value: Value, idx: usize, rci: RegClassIndex) -> Self {
Self {
value,
opidx: idx as u16,
rci,
fixed: false,
spilled: false,
tied: false,
}
}
}
impl fmt::Display for RegUse {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}@op{}", self.value, self.opidx)?;
if self.fixed {
write!(f, "/fixed")?;
}
if self.spilled {
write!(f, "/spilled")?;
}
if self.tied {
write!(f, "/tied")?;
}
Ok(())
}
}

View File

@@ -1,241 +0,0 @@
//! Computing stack layout.
use crate::ir::stackslot::{StackOffset, StackSize, StackSlotKind};
use crate::ir::{StackLayoutInfo, StackSlots};
use crate::result::{CodegenError, CodegenResult};
use core::cmp::{max, min};
/// Compute the stack frame layout.
///
/// Determine the total size of this stack frame and assign offsets to all `Spill` and `Explicit`
/// stack slots.
///
/// The total frame size will be a multiple of `alignment` which must be a power of two, unless the
/// function doesn't perform any call.
///
/// Returns the total stack frame size which is also saved in `frame.frame_size`.
///
/// If the stack frame is too big, returns an `ImplLimitExceeded` error.
pub fn layout_stack(
frame: &mut StackSlots,
is_leaf: bool,
alignment: StackSize,
) -> CodegenResult<StackSize> {
// Each object and the whole stack frame must fit in 2 GB such that any relative offset within
// the frame fits in a `StackOffset`.
let max_size = StackOffset::max_value() as StackSize;
debug_assert!(alignment.is_power_of_two() && alignment <= max_size);
// We assume a stack that grows toward lower addresses as implemented by modern ISAs. The
// stack layout from high to low addresses will be:
//
// 1. incoming arguments.
// 2. spills + explicits + struct returns.
// 3. outgoing arguments.
//
// The incoming arguments can have both positive and negative offsets. A negative offset
// incoming arguments is usually the x86 return address pushed by the call instruction, but
// it can also be fixed stack slots pushed by an externally generated prologue.
//
// Both incoming and outgoing argument slots have fixed offsets that are treated as
// reserved zones by the layout algorithm.
//
// If a function only has incoming arguments and does not perform any calls, then it doesn't
// require the stack to be aligned.
let mut incoming_min = 0;
let mut incoming_max = 0;
let mut outgoing_max = 0;
let mut min_align = alignment;
let mut must_align = !is_leaf;
for slot in frame.values() {
if slot.size > max_size {
return Err(CodegenError::ImplLimitExceeded);
}
match slot.kind {
StackSlotKind::IncomingArg => {
incoming_min = min(incoming_min, slot.offset.unwrap());
incoming_max = max(incoming_max, slot.offset.unwrap() + slot.size as i32);
}
StackSlotKind::OutgoingArg => {
let offset = slot
.offset
.unwrap()
.checked_add(slot.size as StackOffset)
.ok_or(CodegenError::ImplLimitExceeded)?;
outgoing_max = max(outgoing_max, offset);
must_align = true;
}
StackSlotKind::StructReturnSlot
| StackSlotKind::SpillSlot
| StackSlotKind::ExplicitSlot
| StackSlotKind::EmergencySlot => {
// Determine the smallest alignment of any explicit or spill slot.
min_align = slot.alignment(min_align);
must_align = true;
}
}
}
// Lay out spill slots, struct return slots, and explicit slots below the
// incoming arguments. The offset is negative, growing downwards. Start with
// the smallest alignments for better packing.
let mut offset = incoming_min;
debug_assert!(min_align.is_power_of_two());
while min_align <= alignment {
for slot in frame.values_mut() {
// Pick out explicit and spill slots with exact alignment `min_align`.
match slot.kind {
StackSlotKind::SpillSlot
| StackSlotKind::StructReturnSlot
| StackSlotKind::ExplicitSlot
| StackSlotKind::EmergencySlot => {
if slot.alignment(alignment) != min_align {
continue;
}
}
StackSlotKind::IncomingArg | StackSlotKind::OutgoingArg => continue,
}
offset = offset
.checked_sub(slot.size as StackOffset)
.ok_or(CodegenError::ImplLimitExceeded)?;
// Aligning the negative offset can never cause overflow. We're only clearing bits.
offset &= -(min_align as StackOffset);
slot.offset = Some(offset);
}
// Move on to the next higher alignment.
min_align *= 2;
}
// Finally, make room for the outgoing arguments.
offset = offset
.checked_sub(outgoing_max)
.ok_or(CodegenError::ImplLimitExceeded)?;
if must_align {
offset &= -(alignment as StackOffset);
}
// Set the computed layout information for the frame
let frame_size = (offset as StackSize).wrapping_neg();
let inbound_args_size = incoming_max as u32;
frame.layout_info = Some(StackLayoutInfo {
frame_size,
inbound_args_size,
});
Ok(frame_size)
}
#[cfg(test)]
mod tests {
use super::layout_stack;
use crate::ir::stackslot::StackOffset;
use crate::ir::types;
use crate::ir::{StackSlotData, StackSlotKind, StackSlots};
use crate::result::CodegenError;
#[test]
fn layout() {
let sss = &mut StackSlots::new();
// For all these test cases, assume it will call.
let is_leaf = false;
// An empty layout should have 0-sized stack frame.
assert_eq!(layout_stack(sss, is_leaf, 1), Ok(0));
assert_eq!(layout_stack(sss, is_leaf, 16), Ok(0));
// Same for incoming arguments with non-negative offsets.
let in0 = sss.make_incoming_arg(8, 0);
let in1 = sss.make_incoming_arg(8, 8);
assert_eq!(layout_stack(sss, is_leaf, 1), Ok(0));
assert_eq!(layout_stack(sss, is_leaf, 16), Ok(0));
assert_eq!(sss[in0].offset, Some(0));
assert_eq!(sss[in1].offset, Some(8));
// Add some spill slots.
let ss0 = sss.make_spill_slot(types::I64);
let ss1 = sss.make_spill_slot(types::I32);
assert_eq!(layout_stack(sss, is_leaf, 1), Ok(12));
assert_eq!(sss[in0].offset, Some(0));
assert_eq!(sss[in1].offset, Some(8));
assert_eq!(sss[ss0].offset, Some(-8));
assert_eq!(sss[ss1].offset, Some(-12));
assert_eq!(layout_stack(sss, is_leaf, 16), Ok(16));
assert_eq!(sss[in0].offset, Some(0));
assert_eq!(sss[in1].offset, Some(8));
assert_eq!(sss[ss0].offset, Some(-16));
assert_eq!(sss[ss1].offset, Some(-4));
// An incoming argument with negative offset counts towards the total frame size, but it
// should still pack nicely with the spill slots.
let in2 = sss.make_incoming_arg(4, -4);
assert_eq!(layout_stack(sss, is_leaf, 1), Ok(16));
assert_eq!(sss[in0].offset, Some(0));
assert_eq!(sss[in1].offset, Some(8));
assert_eq!(sss[in2].offset, Some(-4));
assert_eq!(sss[ss0].offset, Some(-12));
assert_eq!(sss[ss1].offset, Some(-16));
assert_eq!(layout_stack(sss, is_leaf, 16), Ok(16));
assert_eq!(sss[in0].offset, Some(0));
assert_eq!(sss[in1].offset, Some(8));
assert_eq!(sss[in2].offset, Some(-4));
assert_eq!(sss[ss0].offset, Some(-16));
assert_eq!(sss[ss1].offset, Some(-8));
// Finally, make sure there is room for the outgoing args.
let out0 = sss.get_outgoing_arg(4, 0);
assert_eq!(layout_stack(sss, is_leaf, 1), Ok(20));
assert_eq!(sss[in0].offset, Some(0));
assert_eq!(sss[in1].offset, Some(8));
assert_eq!(sss[in2].offset, Some(-4));
assert_eq!(sss[ss0].offset, Some(-12));
assert_eq!(sss[ss1].offset, Some(-16));
assert_eq!(sss[out0].offset, Some(0));
assert_eq!(layout_stack(sss, is_leaf, 16), Ok(32));
assert_eq!(sss[in0].offset, Some(0));
assert_eq!(sss[in1].offset, Some(8));
assert_eq!(sss[in2].offset, Some(-4));
assert_eq!(sss[ss0].offset, Some(-16));
assert_eq!(sss[ss1].offset, Some(-8));
assert_eq!(sss[out0].offset, Some(0));
// Also test that an unsupported offset is rejected.
sss.get_outgoing_arg(1, StackOffset::max_value() - 1);
assert_eq!(
layout_stack(sss, is_leaf, 1),
Err(CodegenError::ImplLimitExceeded)
);
}
#[test]
fn slot_kinds() {
let sss = &mut StackSlots::new();
// Add some slots of various kinds.
let ss0 = sss.make_spill_slot(types::I32);
let ss1 = sss.push(StackSlotData::new(
StackSlotKind::ExplicitSlot,
types::I32.bytes(),
));
let ss2 = sss.get_emergency_slot(types::I32, &[]);
assert_eq!(layout_stack(sss, true, 1), Ok(12));
assert_eq!(sss[ss0].offset, Some(-4));
assert_eq!(sss[ss1].offset, Some(-8));
assert_eq!(sss[ss2].offset, Some(-12));
}
}

View File

@@ -1,138 +0,0 @@
//! Topological order of blocks, according to the dominator tree.
use crate::dominator_tree::DominatorTree;
use crate::entity::EntitySet;
use crate::ir::{Block, Layout};
use alloc::vec::Vec;
/// Present blocks in a topological order such that all dominating blocks are guaranteed to be visited
/// before the current block.
///
/// There are many topological orders of the blocks in a function, so it is possible to provide a
/// preferred order, and the `TopoOrder` will present blocks in an order that is as close as possible
/// to the preferred order.
pub struct TopoOrder {
/// Preferred order of blocks to visit.
preferred: Vec<Block>,
/// Next entry to get from `preferred`.
next: usize,
/// Set of visited blocks.
visited: EntitySet<Block>,
/// Stack of blocks to be visited next, already in `visited`.
stack: Vec<Block>,
}
impl TopoOrder {
/// Create a new empty topological order.
pub fn new() -> Self {
Self {
preferred: Vec::new(),
next: 0,
visited: EntitySet::new(),
stack: Vec::new(),
}
}
/// Clear all data structures in this topological order.
pub fn clear(&mut self) {
self.preferred.clear();
self.next = 0;
self.visited.clear();
self.stack.clear();
}
/// Reset and initialize with a preferred sequence of blocks. The resulting topological order is
/// guaranteed to contain all of the blocks in `preferred` as well as any dominators.
pub fn reset<Blocks>(&mut self, preferred: Blocks)
where
Blocks: IntoIterator<Item = Block>,
{
self.preferred.clear();
self.preferred.extend(preferred);
self.next = 0;
self.visited.clear();
self.stack.clear();
}
/// Get the next block in the topological order.
///
/// Two things are guaranteed about the blocks returned by this function:
///
/// - All blocks in the `preferred` iterator given to `reset` will be returned.
/// - All dominators are visited before the block returned.
pub fn next(&mut self, layout: &Layout, domtree: &DominatorTree) -> Option<Block> {
self.visited.resize(layout.block_capacity());
// Any entries in `stack` should be returned immediately. They have already been added to
// `visited`.
while self.stack.is_empty() {
match self.preferred.get(self.next).cloned() {
None => return None,
Some(mut block) => {
// We have the next block in the preferred order.
self.next += 1;
// Push it along with any non-visited dominators.
while self.visited.insert(block) {
self.stack.push(block);
match domtree.idom(block) {
Some(idom) => {
block = layout.inst_block(idom).expect("idom not in layout")
}
None => break,
}
}
}
}
}
self.stack.pop()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::cursor::{Cursor, FuncCursor};
use crate::dominator_tree::DominatorTree;
use crate::flowgraph::ControlFlowGraph;
use crate::ir::{Function, InstBuilder};
use core::iter;
#[test]
fn empty() {
let func = Function::new();
let cfg = ControlFlowGraph::with_function(&func);
let domtree = DominatorTree::with_function(&func, &cfg);
let mut topo = TopoOrder::new();
assert_eq!(topo.next(&func.layout, &domtree), None);
topo.reset(func.layout.blocks());
assert_eq!(topo.next(&func.layout, &domtree), None);
}
#[test]
fn simple() {
let mut func = Function::new();
let block0 = func.dfg.make_block();
let block1 = func.dfg.make_block();
{
let mut cur = FuncCursor::new(&mut func);
cur.insert_block(block0);
cur.ins().jump(block1, &[]);
cur.insert_block(block1);
cur.ins().jump(block1, &[]);
}
let cfg = ControlFlowGraph::with_function(&func);
let domtree = DominatorTree::with_function(&func, &cfg);
let mut topo = TopoOrder::new();
topo.reset(iter::once(block1));
assert_eq!(topo.next(&func.layout, &domtree), Some(block0));
assert_eq!(topo.next(&func.layout, &domtree), Some(block1));
assert_eq!(topo.next(&func.layout, &domtree), None);
}
}

View File

@@ -1,14 +1,8 @@
use crate::ir::{Function, SourceLoc, Value, ValueLabel, ValueLabelAssignments, ValueLoc};
use crate::isa::TargetIsa;
use crate::machinst::MachCompileResult;
use crate::regalloc::{Context, RegDiversions};
use crate::ir::{SourceLoc, ValueLabel};
use crate::HashMap;
use alloc::collections::BTreeMap;
use alloc::vec::Vec;
use core::cmp::Ordering;
use core::convert::From;
use core::iter::Iterator;
use core::ops::Bound::*;
use core::ops::Deref;
use regalloc::Reg;
@@ -31,241 +25,15 @@ pub struct ValueLocRange {
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
pub enum LabelValueLoc {
/// Old-backend location: RegUnit, StackSlot, or Unassigned.
ValueLoc(ValueLoc),
/// New-backend Reg.
Reg(Reg),
/// New-backend offset from stack pointer.
SPOffset(i64),
}
impl From<ValueLoc> for LabelValueLoc {
fn from(v: ValueLoc) -> Self {
LabelValueLoc::ValueLoc(v)
}
}
/// Resulting map of Value labels and their ranges/locations.
pub type ValueLabelsRanges = HashMap<ValueLabel, Vec<ValueLocRange>>;
fn build_value_labels_index<T>(func: &Function) -> BTreeMap<T, (Value, ValueLabel)>
where
T: From<SourceLoc> + Deref<Target = SourceLoc> + Ord + Copy,
{
if func.dfg.values_labels.is_none() {
return BTreeMap::new();
}
let values_labels = func.dfg.values_labels.as_ref().unwrap();
// Index values_labels by srcloc/from
let mut sorted = BTreeMap::new();
for (val, assigns) in values_labels {
match assigns {
ValueLabelAssignments::Starts(labels) => {
for label in labels {
if label.from.is_default() {
continue;
}
let srcloc = T::from(label.from);
let label = label.label;
sorted.insert(srcloc, (*val, label));
}
}
ValueLabelAssignments::Alias { from, value } => {
if from.is_default() {
continue;
}
let mut aliased_value = *value;
while let Some(ValueLabelAssignments::Alias { value, .. }) =
values_labels.get(&aliased_value)
{
// TODO check/limit recursion?
aliased_value = *value;
}
let from = T::from(*from);
if let Some(ValueLabelAssignments::Starts(labels)) =
values_labels.get(&aliased_value)
{
for label in labels {
let srcloc = if label.from.is_default() {
from
} else {
from.max(T::from(label.from))
};
let label = label.label;
sorted.insert(srcloc, (*val, label));
}
}
}
}
}
sorted
}
/// Builds ranges and location for specified value labels.
/// The labels specified at DataFlowGraph's values_labels collection.
pub fn build_value_labels_ranges<T>(
func: &Function,
regalloc: &Context,
mach_compile_result: Option<&MachCompileResult>,
isa: &dyn TargetIsa,
) -> ValueLabelsRanges
where
T: From<SourceLoc> + Deref<Target = SourceLoc> + Ord + Copy,
{
if let Some(mach_compile_result) = mach_compile_result {
return mach_compile_result.value_labels_ranges.clone();
}
let values_labels = build_value_labels_index::<T>(func);
let mut blocks = func.layout.blocks().collect::<Vec<_>>();
blocks.sort_by_key(|block| func.offsets[*block]); // Ensure inst offsets always increase
let encinfo = isa.encoding_info();
let values_locations = &func.locations;
let liveness_ranges = regalloc.liveness().ranges();
let mut ranges = HashMap::new();
let mut add_range = |label, range: (u32, u32), loc: ValueLoc| {
if range.0 >= range.1 || !loc.is_assigned() {
return;
}
ranges
.entry(label)
.or_insert_with(Vec::new)
.push(ValueLocRange {
loc: loc.into(),
start: range.0,
end: range.1,
});
};
let mut end_offset = 0;
let mut tracked_values: Vec<(Value, ValueLabel, u32, ValueLoc)> = Vec::new();
let mut divert = RegDiversions::new();
for block in blocks {
divert.at_block(&func.entry_diversions, block);
let mut last_srcloc: Option<T> = None;
for (offset, inst, size) in func.inst_offsets(block, &encinfo) {
divert.apply(&func.dfg[inst]);
end_offset = offset + size;
// Remove killed values.
tracked_values.retain(|(x, label, start_offset, last_loc)| {
let range = liveness_ranges.get(*x);
if range.expect("value").killed_at(inst, block, &func.layout) {
add_range(*label, (*start_offset, end_offset), *last_loc);
return false;
}
true
});
let srcloc = func.srclocs[inst];
if srcloc.is_default() {
// Don't process instructions without srcloc.
continue;
}
let srcloc = T::from(srcloc);
// Record and restart ranges if Value location was changed.
for (val, label, start_offset, last_loc) in &mut tracked_values {
let new_loc = divert.get(*val, values_locations);
if new_loc == *last_loc {
continue;
}
add_range(*label, (*start_offset, end_offset), *last_loc);
*start_offset = end_offset;
*last_loc = new_loc;
}
// New source locations range started: abandon all tracked values.
if last_srcloc.is_some() && last_srcloc.unwrap() > srcloc {
for (_, label, start_offset, last_loc) in &tracked_values {
add_range(*label, (*start_offset, end_offset), *last_loc);
}
tracked_values.clear();
last_srcloc = None;
}
// Get non-processed Values based on srcloc
let range = (
match last_srcloc {
Some(a) => Excluded(a),
None => Unbounded,
},
Included(srcloc),
);
let active_values = values_labels.range(range);
let active_values = active_values.filter(|(_, (v, _))| {
// Ignore dead/inactive Values.
let range = liveness_ranges.get(*v);
match range {
Some(r) => r.reaches_use(inst, block, &func.layout),
None => false,
}
});
// Append new Values to the tracked_values.
for (_, (val, label)) in active_values {
let loc = divert.get(*val, values_locations);
tracked_values.push((*val, *label, end_offset, loc));
}
last_srcloc = Some(srcloc);
}
// Finish all started ranges.
for (_, label, start_offset, last_loc) in &tracked_values {
add_range(*label, (*start_offset, end_offset), *last_loc);
}
}
// Optimize ranges in-place
for (_, label_ranges) in ranges.iter_mut() {
assert!(!label_ranges.is_empty());
label_ranges.sort_by(|a, b| a.start.cmp(&b.start).then_with(|| a.end.cmp(&b.end)));
// Merge ranges
let mut i = 1;
let mut j = 0;
while i < label_ranges.len() {
assert!(label_ranges[j].start <= label_ranges[i].end);
if label_ranges[j].loc != label_ranges[i].loc {
// Different location
if label_ranges[j].end >= label_ranges[i].end {
// Consumed by previous range, skipping
i += 1;
continue;
}
j += 1;
label_ranges[j] = label_ranges[i];
i += 1;
continue;
}
if label_ranges[j].end < label_ranges[i].start {
// Gap in the range location
j += 1;
label_ranges[j] = label_ranges[i];
i += 1;
continue;
}
// Merge i-th and j-th ranges
if label_ranges[j].end < label_ranges[i].end {
label_ranges[j].end = label_ranges[i].end;
}
i += 1;
}
label_ranges.truncate(j + 1);
// Cut/move start position of next range, if two neighbor ranges intersect.
for i in 0..j {
if label_ranges[i].end > label_ranges[i + 1].start {
label_ranges[i + 1].start = label_ranges[i].end;
assert!(label_ranges[i + 1].start < label_ranges[i + 1].end);
}
assert!(label_ranges[i].end <= label_ranges[i + 1].start);
}
}
ranges
}
#[derive(Eq, Clone, Copy)]
pub struct ComparableSourceLoc(SourceLoc);

View File

@@ -4,10 +4,8 @@ use crate::dbg::DisplayList;
use crate::dominator_tree::{DominatorTree, DominatorTreePreorder};
use crate::flowgraph::{BlockPredecessor, ControlFlowGraph};
use crate::ir::{ExpandedProgramPoint, Function};
use crate::regalloc::liveness::Liveness;
use crate::regalloc::virtregs::VirtRegs;
use crate::timing;
use crate::verifier::{VerifierErrors, VerifierStepResult};
use crate::verifier::{virtregs::VirtRegs, VerifierErrors, VerifierStepResult};
/// Verify conventional SSA form for `func`.
///
@@ -27,7 +25,6 @@ pub fn verify_cssa(
func: &Function,
cfg: &ControlFlowGraph,
domtree: &DominatorTree,
liveness: &Liveness,
virtregs: &VirtRegs,
errors: &mut VerifierErrors,
) -> VerifierStepResult<()> {
@@ -41,7 +38,6 @@ pub fn verify_cssa(
cfg,
domtree,
virtregs,
liveness,
preorder,
};
verifier.check_virtregs(errors)?;
@@ -54,7 +50,6 @@ struct CssaVerifier<'a> {
cfg: &'a ControlFlowGraph,
domtree: &'a DominatorTree,
virtregs: &'a VirtRegs,
liveness: &'a Liveness,
preorder: DominatorTreePreorder,
}
@@ -70,9 +65,6 @@ impl<'a> CssaVerifier<'a> {
if !self.func.dfg.value_is_attached(val) {
return errors.fatal((val, format!("Detached value in {}", vreg)));
}
if self.liveness.get(val).is_none() {
return errors.fatal((val, format!("Value in {} has no live range", vreg)));
};
// Check topological ordering with the previous values in the virtual register.
let def: ExpandedProgramPoint = self.func.dfg.value_def(val).into();
@@ -120,19 +112,7 @@ impl<'a> CssaVerifier<'a> {
if self.preorder.dominates(prev_block, def_block)
&& self.domtree.dominates(prev_def, def, &self.func.layout)
{
if self.liveness[prev_val].overlaps_def(def, def_block, &self.func.layout) {
return errors.fatal((
val,
format!(
"Value def in {} = {} interferes with {}",
vreg,
DisplayList(values),
prev_val
),
));
} else {
break;
}
break;
}
}
}

View File

@@ -4,7 +4,6 @@ use crate::entity::{EntitySet, SecondaryMap};
use crate::flowgraph::{BlockPredecessor, ControlFlowGraph};
use crate::ir;
use crate::ir::instructions::BranchInfo;
use crate::isa;
use crate::packed_option::PackedOption;
use crate::timing;
use crate::verifier::{VerifierErrors, VerifierStepResult};
@@ -24,19 +23,12 @@ use crate::verifier::{VerifierErrors, VerifierStepResult};
pub fn verify_flags(
func: &ir::Function,
cfg: &ControlFlowGraph,
isa: Option<&dyn isa::TargetIsa>,
errors: &mut VerifierErrors,
) -> VerifierStepResult<()> {
let _tt = timing::verify_flags();
let encinfo = if isa.is_none() || isa.unwrap().get_mach_backend().is_some() {
None
} else {
Some(isa.unwrap().encoding_info())
};
let mut verifier = FlagsVerifier {
func,
cfg,
encinfo,
livein: SecondaryMap::new(),
};
verifier.check(errors)
@@ -45,7 +37,6 @@ pub fn verify_flags(
struct FlagsVerifier<'a> {
func: &'a ir::Function,
cfg: &'a ControlFlowGraph,
encinfo: Option<isa::EncInfo>,
/// The single live-in flags value (if any) for each block.
livein: SecondaryMap<ir::Block, PackedOption<ir::Value>>,
@@ -111,21 +102,6 @@ impl<'a> FlagsVerifier<'a> {
return Err(());
}
}
// Does the instruction have an encoding that clobbers the CPU flags?
if self
.encinfo
.as_ref()
.and_then(|ei| ei.operand_constraints(self.func.encodings[inst]))
.map_or(false, |c| c.clobbers_flags)
&& live_val.is_some()
{
errors.report((
inst,
format!("encoding clobbers live CPU flags in {}", live),
));
return Err(());
}
}
// Now look for live ranges of CPU flags that end here.

View File

@@ -1,235 +0,0 @@
//! Liveness verifier.
use crate::flowgraph::{BlockPredecessor, ControlFlowGraph};
use crate::ir::entities::AnyEntity;
use crate::ir::{ExpandedProgramPoint, Function, ProgramPoint, Value};
use crate::isa::TargetIsa;
use crate::regalloc::liveness::Liveness;
use crate::regalloc::liverange::LiveRange;
use crate::timing;
use crate::verifier::{VerifierErrors, VerifierStepResult};
/// Verify liveness information for `func`.
///
/// The provided control flow graph is assumed to be sound.
///
/// - All values in the program must have a live range.
/// - The live range def point must match where the value is defined.
/// - The live range must reach all uses.
/// - When a live range is live-in to a block, it must be live at all the predecessors.
/// - The live range affinity must be compatible with encoding constraints.
///
/// We don't verify that live ranges are minimal. This would require recomputing live ranges for
/// all values.
pub fn verify_liveness(
isa: &dyn TargetIsa,
func: &Function,
cfg: &ControlFlowGraph,
liveness: &Liveness,
errors: &mut VerifierErrors,
) -> VerifierStepResult<()> {
let _tt = timing::verify_liveness();
let verifier = LivenessVerifier {
isa,
func,
cfg,
liveness,
};
verifier.check_blocks(errors)?;
verifier.check_insts(errors)?;
Ok(())
}
struct LivenessVerifier<'a> {
isa: &'a dyn TargetIsa,
func: &'a Function,
cfg: &'a ControlFlowGraph,
liveness: &'a Liveness,
}
impl<'a> LivenessVerifier<'a> {
/// Check all block arguments.
fn check_blocks(&self, errors: &mut VerifierErrors) -> VerifierStepResult<()> {
for block in self.func.layout.blocks() {
for &val in self.func.dfg.block_params(block) {
let lr = match self.liveness.get(val) {
Some(lr) => lr,
None => {
return errors
.fatal((block, format!("block arg {} has no live range", val)))
}
};
self.check_lr(block.into(), val, lr, errors)?;
}
}
Ok(())
}
/// Check all instructions.
fn check_insts(&self, errors: &mut VerifierErrors) -> VerifierStepResult<()> {
for block in self.func.layout.blocks() {
for inst in self.func.layout.block_insts(block) {
let encoding = self.func.encodings[inst];
// Check the defs.
for &val in self.func.dfg.inst_results(inst) {
let lr = match self.liveness.get(val) {
Some(lr) => lr,
None => return errors.fatal((inst, format!("{} has no live range", val))),
};
self.check_lr(inst.into(), val, lr, errors)?;
if encoding.is_legal() {
// A legal instruction is not allowed to define ghost values.
if lr.affinity.is_unassigned() {
return errors.fatal((
inst,
format!(
"{} is a ghost value defined by a real [{}] instruction",
val,
self.isa.encoding_info().display(encoding)
),
));
}
} else if !lr.affinity.is_unassigned() {
// A non-encoded instruction can only define ghost values.
return errors.fatal((
inst,
format!(
"{} is a real {} value defined by a ghost instruction",
val,
lr.affinity.display(&self.isa.register_info())
),
));
}
}
// Check the uses.
for &val in self.func.dfg.inst_args(inst) {
let lr = match self.liveness.get(val) {
Some(lr) => lr,
None => return errors.fatal((inst, format!("{} has no live range", val))),
};
debug_assert!(self.func.layout.inst_block(inst).unwrap() == block);
if !lr.reaches_use(inst, block, &self.func.layout) {
return errors.fatal((inst, format!("{} is not live at this use", val)));
}
// A legal instruction is not allowed to depend on ghost values.
if encoding.is_legal() && lr.affinity.is_unassigned() {
return errors.fatal((
inst,
format!(
"{} is a ghost value used by a real [{}] instruction",
val,
self.isa.encoding_info().display(encoding),
),
));
}
}
}
}
Ok(())
}
/// Check the integrity of the live range `lr`.
fn check_lr(
&self,
def: ProgramPoint,
val: Value,
lr: &LiveRange,
errors: &mut VerifierErrors,
) -> VerifierStepResult<()> {
let l = &self.func.layout;
let loc: AnyEntity = match def.into() {
ExpandedProgramPoint::Block(e) => e.into(),
ExpandedProgramPoint::Inst(i) => i.into(),
};
if lr.def() != def {
return errors.fatal((
loc,
format!("Wrong live range def ({}) for {}", lr.def(), val),
));
}
if lr.is_dead() {
if !lr.is_local() {
return errors.fatal((loc, format!("Dead live range {} should be local", val)));
} else {
return Ok(());
}
}
let def_block = match def.into() {
ExpandedProgramPoint::Block(e) => e,
ExpandedProgramPoint::Inst(i) => l.inst_block(i).unwrap(),
};
match lr.def_local_end().into() {
ExpandedProgramPoint::Block(e) => {
return errors.fatal((
loc,
format!("Def local range for {} can't end at {}", val, e),
));
}
ExpandedProgramPoint::Inst(i) => {
if self.func.layout.inst_block(i) != Some(def_block) {
return errors
.fatal((loc, format!("Def local end for {} in wrong block", val)));
}
}
}
// Now check the live-in intervals against the CFG.
for (mut block, end) in lr.liveins() {
if !l.is_block_inserted(block) {
return errors.fatal((
loc,
format!("{} livein at {} which is not in the layout", val, block),
));
}
let end_block = match l.inst_block(end) {
Some(e) => e,
None => {
return errors.fatal((
loc,
format!(
"{} livein for {} ends at {} which is not in the layout",
val, block, end
),
));
}
};
// Check all the blocks in the interval independently.
loop {
// If `val` is live-in at `block`, it must be live at all the predecessors.
for BlockPredecessor { inst: pred, block } in self.cfg.pred_iter(block) {
if !lr.reaches_use(pred, block, &self.func.layout) {
return errors.fatal((
pred,
format!(
"{} is live in to {} but not live at predecessor",
val, block
),
));
}
}
if block == end_block {
break;
}
block = match l.next_block(block) {
Some(e) => e,
None => {
return errors.fatal((
loc,
format!("end of {} livein ({}) never reached", val, end_block),
));
}
};
}
}
Ok(())
}
}

View File

@@ -1,399 +0,0 @@
//! Verify value locations.
use crate::flowgraph::ControlFlowGraph;
use crate::ir;
use crate::isa;
use crate::regalloc::liveness::Liveness;
use crate::regalloc::RegDiversions;
use crate::timing;
use crate::verifier::{VerifierErrors, VerifierStepResult};
/// Verify value locations for `func`.
///
/// After register allocation, every value must be assigned to a location - either a register or a
/// stack slot. These locations must be compatible with the constraints described by the
/// instruction encoding recipes.
///
/// Values can be temporarily diverted to a different location by using the `regmove`, `regspill`,
/// and `regfill` instructions, but only inside a block.
///
/// If a liveness analysis is provided, it is used to verify that there are no active register
/// diversions across control flow edges.
pub fn verify_locations(
isa: &dyn isa::TargetIsa,
func: &ir::Function,
cfg: &ControlFlowGraph,
liveness: Option<&Liveness>,
errors: &mut VerifierErrors,
) -> VerifierStepResult<()> {
let _tt = timing::verify_locations();
let verifier = LocationVerifier {
isa,
func,
reginfo: isa.register_info(),
encinfo: isa.encoding_info(),
cfg,
liveness,
};
verifier.check_constraints(errors)?;
Ok(())
}
struct LocationVerifier<'a> {
isa: &'a dyn isa::TargetIsa,
func: &'a ir::Function,
reginfo: isa::RegInfo,
encinfo: isa::EncInfo,
cfg: &'a ControlFlowGraph,
liveness: Option<&'a Liveness>,
}
impl<'a> LocationVerifier<'a> {
/// Check that the assigned value locations match the operand constraints of their uses.
fn check_constraints(&self, errors: &mut VerifierErrors) -> VerifierStepResult<()> {
let dfg = &self.func.dfg;
let mut divert = RegDiversions::new();
for block in self.func.layout.blocks() {
divert.at_block(&self.func.entry_diversions, block);
let mut is_after_branch = false;
for inst in self.func.layout.block_insts(block) {
let enc = self.func.encodings[inst];
if enc.is_legal() {
self.check_enc_constraints(inst, enc, &divert, errors)?
} else {
self.check_ghost_results(inst, errors)?;
}
if let Some(sig) = dfg.call_signature(inst) {
self.check_call_abi(inst, sig, &divert, errors)?;
}
let opcode = dfg[inst].opcode();
if opcode.is_return() {
self.check_return_abi(inst, &divert, errors)?;
} else if opcode.is_branch() && !divert.is_empty() {
self.check_cfg_edges(inst, &mut divert, is_after_branch, errors)?;
}
self.update_diversions(inst, &mut divert, errors)?;
is_after_branch = opcode.is_branch();
}
}
Ok(())
}
/// Check encoding constraints against the current value locations.
fn check_enc_constraints(
&self,
inst: ir::Inst,
enc: isa::Encoding,
divert: &RegDiversions,
errors: &mut VerifierErrors,
) -> VerifierStepResult<()> {
let constraints = self
.encinfo
.operand_constraints(enc)
.expect("check_enc_constraints requires a legal encoding");
if constraints.satisfied(inst, divert, self.func) {
return Ok(());
}
// TODO: We could give a better error message here.
errors.fatal((
inst,
format!(
"{} constraints not satisfied in: {}\n{}",
self.encinfo.display(enc),
self.func.dfg.display_inst(inst, self.isa),
self.func.display(self.isa),
),
))
}
/// Check that the result values produced by a ghost instruction are not assigned a value
/// location.
fn check_ghost_results(
&self,
inst: ir::Inst,
errors: &mut VerifierErrors,
) -> VerifierStepResult<()> {
let results = self.func.dfg.inst_results(inst);
for &res in results {
let loc = self.func.locations[res];
if loc.is_assigned() {
return errors.fatal((
inst,
format!(
"ghost result {} value must not have a location ({}).",
res,
loc.display(&self.reginfo)
),
));
}
}
Ok(())
}
/// Check the ABI argument and result locations for a call.
fn check_call_abi(
&self,
inst: ir::Inst,
sig: ir::SigRef,
divert: &RegDiversions,
errors: &mut VerifierErrors,
) -> VerifierStepResult<()> {
let sig = &self.func.dfg.signatures[sig];
let varargs = self.func.dfg.inst_variable_args(inst);
let results = self.func.dfg.inst_results(inst);
for (abi, &value) in sig.params.iter().zip(varargs) {
self.check_abi_location(
inst,
value,
abi,
divert.get(value, &self.func.locations),
ir::StackSlotKind::OutgoingArg,
errors,
)?;
}
for (abi, &value) in sig.returns.iter().zip(results) {
self.check_abi_location(
inst,
value,
abi,
self.func.locations[value],
ir::StackSlotKind::OutgoingArg,
errors,
)?;
}
Ok(())
}
/// Check the ABI argument locations for a return.
fn check_return_abi(
&self,
inst: ir::Inst,
divert: &RegDiversions,
errors: &mut VerifierErrors,
) -> VerifierStepResult<()> {
let sig = &self.func.signature;
let varargs = self.func.dfg.inst_variable_args(inst);
for (abi, &value) in sig.returns.iter().zip(varargs) {
self.check_abi_location(
inst,
value,
abi,
divert.get(value, &self.func.locations),
ir::StackSlotKind::IncomingArg,
errors,
)?;
}
Ok(())
}
/// Check a single ABI location.
fn check_abi_location(
&self,
inst: ir::Inst,
value: ir::Value,
abi: &ir::AbiParam,
loc: ir::ValueLoc,
want_kind: ir::StackSlotKind,
errors: &mut VerifierErrors,
) -> VerifierStepResult<()> {
match abi.location {
ir::ArgumentLoc::Unassigned => {}
ir::ArgumentLoc::Reg(reg) => {
if loc != ir::ValueLoc::Reg(reg) {
return errors.fatal((
inst,
format!(
"ABI expects {} in {}, got {}",
value,
abi.location.display(&self.reginfo),
loc.display(&self.reginfo),
),
));
}
}
ir::ArgumentLoc::Stack(offset) => {
if let ir::ValueLoc::Stack(ss) = loc {
let slot = &self.func.stack_slots[ss];
if slot.kind != want_kind {
return errors.fatal((
inst,
format!(
"call argument {} should be in a {} slot, but {} is {}",
value, want_kind, ss, slot.kind
),
));
}
if slot.offset.unwrap() != offset {
return errors.fatal((
inst,
format!(
"ABI expects {} at stack offset {}, but {} is at {}",
value,
offset,
ss,
slot.offset.unwrap()
),
));
}
} else {
return errors.fatal((
inst,
format!(
"ABI expects {} at stack offset {}, got {}",
value,
offset,
loc.display(&self.reginfo)
),
));
}
}
}
Ok(())
}
/// Update diversions to reflect the current instruction and check their consistency.
fn update_diversions(
&self,
inst: ir::Inst,
divert: &mut RegDiversions,
errors: &mut VerifierErrors,
) -> VerifierStepResult<()> {
let (arg, src) = match self.func.dfg[inst] {
ir::InstructionData::RegMove { arg, src, .. }
| ir::InstructionData::RegSpill { arg, src, .. } => (arg, ir::ValueLoc::Reg(src)),
ir::InstructionData::RegFill { arg, src, .. } => (arg, ir::ValueLoc::Stack(src)),
_ => return Ok(()),
};
if let Some(d) = divert.diversion(arg) {
if d.to != src {
return errors.fatal((
inst,
format!(
"inconsistent with current diversion to {}",
d.to.display(&self.reginfo)
),
));
}
} else if self.func.locations[arg] != src {
return errors.fatal((
inst,
format!(
"inconsistent with global location {} ({})",
self.func.locations[arg].display(&self.reginfo),
self.func.dfg.display_inst(inst, None)
),
));
}
divert.apply(&self.func.dfg[inst]);
Ok(())
}
/// We have active diversions before a branch. Make sure none of the diverted values are live
/// on the outgoing CFG edges.
fn check_cfg_edges(
&self,
inst: ir::Inst,
divert: &mut RegDiversions,
is_after_branch: bool,
errors: &mut VerifierErrors,
) -> VerifierStepResult<()> {
use crate::ir::instructions::BranchInfo::*;
let dfg = &self.func.dfg;
let branch_kind = dfg.analyze_branch(inst);
// We can only check CFG edges if we have a liveness analysis.
let liveness = match self.liveness {
Some(l) => l,
None => return Ok(()),
};
match branch_kind {
NotABranch => panic!(
"No branch information for {}",
dfg.display_inst(inst, self.isa)
),
SingleDest(block, _) => {
let unique_predecessor = self.cfg.pred_iter(block).count() == 1;
let mut val_to_remove = vec![];
for (&value, d) in divert.iter() {
let lr = &liveness[value];
if is_after_branch && unique_predecessor {
// Forward diversions based on the targeted branch.
if !lr.is_livein(block, &self.func.layout) {
val_to_remove.push(value)
}
} else if lr.is_livein(block, &self.func.layout) {
return errors.fatal((
inst,
format!(
"SingleDest: {} is diverted to {} and live in to {}",
value,
d.to.display(&self.reginfo),
block,
),
));
}
}
if is_after_branch && unique_predecessor {
for val in val_to_remove.into_iter() {
divert.remove(val);
}
debug_assert!(divert.check_block_entry(&self.func.entry_diversions, block));
}
}
Table(jt, block) => {
for (&value, d) in divert.iter() {
let lr = &liveness[value];
if let Some(block) = block {
if lr.is_livein(block, &self.func.layout) {
return errors.fatal((
inst,
format!(
"Table.default: {} is diverted to {} and live in to {}",
value,
d.to.display(&self.reginfo),
block,
),
));
}
}
for block in self.func.jump_tables[jt].iter() {
if lr.is_livein(*block, &self.func.layout) {
return errors.fatal((
inst,
format!(
"Table.case: {} is diverted to {} and live in to {}",
value,
d.to.display(&self.reginfo),
block,
),
));
}
}
}
}
}
Ok(())
}
}

View File

@@ -78,16 +78,13 @@ use alloc::collections::BTreeSet;
use alloc::string::{String, ToString};
use alloc::vec::Vec;
use core::cmp::Ordering;
use core::fmt::{self, Display, Formatter, Write};
use core::fmt::{self, Display, Formatter};
pub use self::cssa::verify_cssa;
pub use self::liveness::verify_liveness;
pub use self::locations::verify_locations;
mod cssa;
mod flags;
mod liveness;
mod locations;
mod virtregs;
/// A verifier error.
#[derive(Debug, PartialEq, Eq, Clone)]
@@ -1763,145 +1760,6 @@ impl<'a> Verifier<'a> {
errors.as_result()
}
/// If the verifier has been set up with an ISA, make sure that the recorded encoding for the
/// instruction (if any) matches how the ISA would encode it.
fn verify_encoding(&self, inst: Inst, errors: &mut VerifierErrors) -> VerifierStepResult<()> {
// When the encodings table is empty, we don't require any instructions to be encoded.
//
// Once some instructions are encoded, we require all side-effecting instructions to have a
// legal encoding.
if self.func.encodings.is_empty() {
return Ok(());
}
let isa = match self.isa {
Some(isa) => isa,
None => return Ok(()),
};
let encoding = self.func.encodings[inst];
if encoding.is_legal() {
if self.func.dfg[inst].opcode().is_ghost() {
return errors.nonfatal((
inst,
self.context(inst),
format!(
"Ghost instruction has an encoding: {}",
isa.encoding_info().display(encoding),
),
));
}
let mut encodings = isa
.legal_encodings(
&self.func,
&self.func.dfg[inst],
self.func.dfg.ctrl_typevar(inst),
)
.peekable();
if encodings.peek().is_none() {
return errors.nonfatal((
inst,
self.context(inst),
format!(
"Instruction failed to re-encode {}",
isa.encoding_info().display(encoding),
),
));
}
let has_valid_encoding = encodings.any(|possible_enc| encoding == possible_enc);
if !has_valid_encoding {
let mut possible_encodings = String::new();
let mut multiple_encodings = false;
for enc in isa.legal_encodings(
&self.func,
&self.func.dfg[inst],
self.func.dfg.ctrl_typevar(inst),
) {
if !possible_encodings.is_empty() {
possible_encodings.push_str(", ");
multiple_encodings = true;
}
possible_encodings
.write_fmt(format_args!("{}", isa.encoding_info().display(enc)))
.unwrap();
}
return errors.nonfatal((
inst,
self.context(inst),
format!(
"encoding {} should be {}{}",
isa.encoding_info().display(encoding),
if multiple_encodings { "one of: " } else { "" },
possible_encodings,
),
));
}
return Ok(());
}
// Instruction is not encoded, so it is a ghost instruction.
// Instructions with side effects are not allowed to be ghost instructions.
let opcode = self.func.dfg[inst].opcode();
// The `fallthrough`, `fallthrough_return`, and `safepoint` instructions are not required
// to have an encoding.
if opcode == Opcode::Fallthrough
|| opcode == Opcode::FallthroughReturn
|| opcode == Opcode::Safepoint
{
return Ok(());
}
// Check if this opcode must be encoded.
let mut needs_enc = None;
if opcode.is_branch() {
needs_enc = Some("Branch");
} else if opcode.is_call() {
needs_enc = Some("Call");
} else if opcode.is_return() {
needs_enc = Some("Return");
} else if opcode.can_store() {
needs_enc = Some("Store");
} else if opcode.can_trap() {
needs_enc = Some("Trapping instruction");
} else if opcode.other_side_effects() {
needs_enc = Some("Instruction with side effects");
}
if let Some(text) = needs_enc {
// This instruction needs an encoding, so generate an error.
// Provide the ISA default encoding as a hint.
match self.func.encode(inst, isa) {
Ok(enc) => {
return errors.nonfatal((
inst,
self.context(inst),
format!(
"{} must have an encoding (e.g., {})))",
text,
isa.encoding_info().display(enc),
),
));
}
Err(_) => {
return errors.nonfatal((
inst,
self.context(inst),
format!("{} must have an encoding", text),
))
}
}
}
Ok(())
}
fn immediate_constraints(
&self,
inst: Inst,
@@ -2034,14 +1892,13 @@ impl<'a> Verifier<'a> {
self.instruction_integrity(inst, errors)?;
self.verify_safepoint_unused(inst, errors)?;
self.typecheck(inst, errors)?;
self.verify_encoding(inst, errors)?;
self.immediate_constraints(inst, errors)?;
}
self.encodable_as_bb(block, errors)?;
}
verify_flags(self.func, &self.expected_cfg, self.isa, errors)?;
verify_flags(self.func, &self.expected_cfg, errors)?;
if !errors.is_empty() {
log::warn!(

View File

@@ -6,13 +6,10 @@
use crate::entity::SecondaryMap;
use crate::ir::entities::AnyEntity;
use crate::ir::{
Block, DataFlowGraph, DisplayFunctionAnnotations, Function, Inst, SigRef, Type, Value,
ValueDef, ValueLoc,
Block, DataFlowGraph, DisplayFunctionAnnotations, Function, Inst, SigRef, Type, Value, ValueDef,
};
use crate::isa::{RegInfo, TargetIsa};
use crate::packed_option::ReservedValue;
use crate::value_label::{LabelValueLoc, ValueLabelsRanges};
use crate::HashSet;
use alloc::string::String;
use alloc::vec::Vec;
use core::fmt::{self, Write};
@@ -278,49 +275,6 @@ pub fn write_block_header(
writeln!(w, "):")
}
fn write_valueloc(w: &mut dyn Write, loc: LabelValueLoc, regs: &RegInfo) -> fmt::Result {
match loc {
LabelValueLoc::ValueLoc(ValueLoc::Reg(r)) => write!(w, "{}", regs.display_regunit(r)),
LabelValueLoc::ValueLoc(ValueLoc::Stack(ss)) => write!(w, "{}", ss),
LabelValueLoc::ValueLoc(ValueLoc::Unassigned) => write!(w, "?"),
LabelValueLoc::Reg(r) => write!(w, "{:?}", r),
LabelValueLoc::SPOffset(off) => write!(w, "[sp+{}]", off),
}
}
fn write_value_range_markers(
w: &mut dyn Write,
val_ranges: &ValueLabelsRanges,
regs: &RegInfo,
offset: u32,
indent: usize,
) -> fmt::Result {
let mut result = String::new();
let mut shown = HashSet::new();
for (val, rng) in val_ranges {
for i in (0..rng.len()).rev() {
if rng[i].start == offset {
write!(&mut result, " {}@", val)?;
write_valueloc(&mut result, rng[i].loc, regs)?;
shown.insert(val);
break;
}
}
}
for (val, rng) in val_ranges {
for i in (0..rng.len()).rev() {
if rng[i].end == offset && !shown.contains(val) {
write!(&mut result, " {}\u{2620}", val)?;
break;
}
}
}
if !result.is_empty() {
writeln!(w, ";{1:0$}; {2}", indent + 24, "", result)?;
}
Ok(())
}
fn decorate_block<FW: FuncWriter>(
func_w: &mut FW,
w: &mut dyn Write,
@@ -329,12 +283,8 @@ fn decorate_block<FW: FuncWriter>(
annotations: &DisplayFunctionAnnotations,
block: Block,
) -> fmt::Result {
// Indent all instructions if any encodings are present.
let indent = if func.encodings.is_empty() && func.srclocs.is_empty() {
4
} else {
36
};
// Indent all instructions if any srclocs are present.
let indent = if func.srclocs.is_empty() { 4 } else { 36 };
let isa = annotations.isa;
func_w.write_block_header(w, func, isa, block, indent)?;
@@ -342,22 +292,6 @@ fn decorate_block<FW: FuncWriter>(
write_value_aliases(w, aliases, a, indent)?;
}
if let Some(isa) = isa {
if !func.offsets.is_empty() {
let encinfo = isa.encoding_info();
let regs = &isa.register_info();
for (offset, inst, size) in func.inst_offsets(block, &encinfo) {
func_w.write_instruction(w, func, aliases, Some(isa), inst, indent)?;
if size > 0 {
if let Some(val_ranges) = annotations.value_ranges {
write_value_range_markers(w, val_ranges, regs, offset + size, indent)?;
}
}
}
return Ok(());
}
}
for inst in func.layout.block_insts(block) {
func_w.write_instruction(w, func, aliases, isa, inst, indent)?;
}
@@ -438,23 +372,6 @@ fn write_instruction(
write!(s, "{} ", srcloc)?;
}
// Write out encoding info.
if let Some(enc) = func.encodings.get(inst).cloned() {
if let Some(isa) = isa {
write!(s, "[{}", isa.encoding_info().display(enc))?;
// Write value locations, if we have them.
if !func.locations.is_empty() {
let regs = isa.register_info();
for &r in func.dfg.inst_results(inst) {
write!(s, ",{}", func.locations[r].display(&regs))?
}
}
write!(s, "] ")?;
} else {
write!(s, "[{}] ", enc)?;
}
}
// Write out prefix and indent the instruction.
write!(w, "{1:0$}", indent, s)?;