Remove ancient register allocation (#3401)
This commit is contained in:
@@ -1,180 +0,0 @@
|
||||
//! Common helper code for ABI lowering.
|
||||
//!
|
||||
//! This module provides functions and data structures that are useful for implementing the
|
||||
//! `TargetIsa::legalize_signature()` method.
|
||||
|
||||
use crate::ir::{AbiParam, ArgumentExtension, ArgumentLoc, Type};
|
||||
use core::cmp::Ordering;
|
||||
|
||||
/// Legalization action to perform on a single argument or return value when converting a
|
||||
/// signature.
|
||||
///
|
||||
/// An argument may go through a sequence of legalization steps before it reaches the final
|
||||
/// `Assign` action.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub enum ArgAction {
|
||||
/// Assign the argument to the given location.
|
||||
Assign(ArgumentLoc),
|
||||
|
||||
/// Convert the argument, then call again.
|
||||
///
|
||||
/// This action can split an integer type into two smaller integer arguments, or it can split a
|
||||
/// SIMD vector into halves.
|
||||
Convert(ValueConversion),
|
||||
}
|
||||
|
||||
impl From<ArgumentLoc> for ArgAction {
|
||||
fn from(x: ArgumentLoc) -> Self {
|
||||
Self::Assign(x)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ValueConversion> for ArgAction {
|
||||
fn from(x: ValueConversion) -> Self {
|
||||
Self::Convert(x)
|
||||
}
|
||||
}
|
||||
|
||||
/// Legalization action to be applied to a value that is being passed to or from a legalized ABI.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum ValueConversion {
|
||||
/// Split an integer types into low and high parts, using `isplit`.
|
||||
IntSplit,
|
||||
|
||||
/// Split a vector type into halves with identical lane types, using `vsplit`.
|
||||
VectorSplit,
|
||||
|
||||
/// Bit-cast to an integer type of the same size.
|
||||
IntBits,
|
||||
|
||||
/// Sign-extend integer value to the required type.
|
||||
Sext(Type),
|
||||
|
||||
/// Unsigned zero-extend value to the required type.
|
||||
Uext(Type),
|
||||
|
||||
/// Pass value by pointer of given integer type.
|
||||
Pointer(Type),
|
||||
}
|
||||
|
||||
/// Common trait for assigning arguments to registers or stack locations.
|
||||
///
|
||||
/// This will be implemented by individual ISAs.
|
||||
pub trait ArgAssigner {
|
||||
/// Pick an assignment action for function argument (or return value) `arg`.
|
||||
fn assign(&mut self, arg: &AbiParam) -> ArgAction;
|
||||
}
|
||||
|
||||
/// Determine the right action to take when passing a `have` value type to a call signature where
|
||||
/// the next argument is `arg` which has a different value type.
|
||||
///
|
||||
/// The signature legalization process in `legalize_args` above can replace a single argument value
|
||||
/// with multiple arguments of smaller types. It can also change the type of an integer argument to
|
||||
/// a larger integer type, requiring the smaller value to be sign- or zero-extended.
|
||||
///
|
||||
/// The legalizer needs to repair the values at all ABI boundaries:
|
||||
///
|
||||
/// - Incoming function arguments to the entry block.
|
||||
/// - Function arguments passed to a call.
|
||||
/// - Return values from a call.
|
||||
/// - Return values passed to a return instruction.
|
||||
///
|
||||
/// The `legalize_abi_value` function helps the legalizer with the process. When the legalizer
|
||||
/// needs to pass a pre-legalized `have` argument, but the ABI argument `arg` has a different value
|
||||
/// type, `legalize_abi_value(have, arg)` tells the legalizer how to create the needed value type
|
||||
/// for the argument.
|
||||
///
|
||||
/// It may be necessary to call `legalize_abi_value` more than once for a given argument before the
|
||||
/// desired argument type appears. This will happen when a vector or integer type needs to be split
|
||||
/// more than once, for example.
|
||||
pub fn legalize_abi_value(have: Type, arg: &AbiParam) -> ValueConversion {
|
||||
let have_bits = have.bits();
|
||||
let arg_bits = arg.value_type.bits();
|
||||
|
||||
if arg.legalized_to_pointer {
|
||||
return ValueConversion::Pointer(arg.value_type);
|
||||
}
|
||||
|
||||
match have_bits.cmp(&arg_bits) {
|
||||
// We have fewer bits than the ABI argument.
|
||||
Ordering::Less => {
|
||||
debug_assert!(
|
||||
have.is_int() && arg.value_type.is_int(),
|
||||
"Can only extend integer values"
|
||||
);
|
||||
match arg.extension {
|
||||
ArgumentExtension::Uext => ValueConversion::Uext(arg.value_type),
|
||||
ArgumentExtension::Sext => ValueConversion::Sext(arg.value_type),
|
||||
_ => panic!("No argument extension specified"),
|
||||
}
|
||||
}
|
||||
// We have the same number of bits as the argument.
|
||||
Ordering::Equal => {
|
||||
// This must be an integer vector that is split and then extended.
|
||||
debug_assert!(arg.value_type.is_int());
|
||||
debug_assert!(have.is_vector(), "expected vector type, got {}", have);
|
||||
ValueConversion::VectorSplit
|
||||
}
|
||||
// We have more bits than the argument.
|
||||
Ordering::Greater => {
|
||||
if have.is_vector() {
|
||||
ValueConversion::VectorSplit
|
||||
} else if have.is_float() {
|
||||
// Convert a float to int so it can be split the next time.
|
||||
// ARM would do this to pass an `f64` in two registers.
|
||||
ValueConversion::IntBits
|
||||
} else {
|
||||
ValueConversion::IntSplit
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::ir::types;
|
||||
use crate::ir::AbiParam;
|
||||
|
||||
#[test]
|
||||
fn legalize() {
|
||||
let mut arg = AbiParam::new(types::I32);
|
||||
|
||||
assert_eq!(
|
||||
legalize_abi_value(types::I64X2, &arg),
|
||||
ValueConversion::VectorSplit
|
||||
);
|
||||
assert_eq!(
|
||||
legalize_abi_value(types::I64, &arg),
|
||||
ValueConversion::IntSplit
|
||||
);
|
||||
|
||||
// Vector of integers is broken down, then sign-extended.
|
||||
arg.extension = ArgumentExtension::Sext;
|
||||
assert_eq!(
|
||||
legalize_abi_value(types::I16X4, &arg),
|
||||
ValueConversion::VectorSplit
|
||||
);
|
||||
assert_eq!(
|
||||
legalize_abi_value(types::I16.by(2).unwrap(), &arg),
|
||||
ValueConversion::VectorSplit
|
||||
);
|
||||
assert_eq!(
|
||||
legalize_abi_value(types::I16, &arg),
|
||||
ValueConversion::Sext(types::I32)
|
||||
);
|
||||
|
||||
// 64-bit float is split as an integer.
|
||||
assert_eq!(
|
||||
legalize_abi_value(types::F64, &arg),
|
||||
ValueConversion::IntBits
|
||||
);
|
||||
|
||||
// Value is passed by reference
|
||||
arg.legalized_to_pointer = true;
|
||||
assert_eq!(
|
||||
legalize_abi_value(types::F64, &arg),
|
||||
ValueConversion::Pointer(types::I32)
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -4,23 +4,18 @@
|
||||
//! binary machine code.
|
||||
|
||||
mod memorysink;
|
||||
mod relaxation;
|
||||
mod shrink;
|
||||
mod stack_map;
|
||||
|
||||
pub use self::memorysink::{
|
||||
MemoryCodeSink, NullRelocSink, NullStackMapSink, NullTrapSink, RelocSink, StackMapSink,
|
||||
TrapSink,
|
||||
};
|
||||
pub use self::relaxation::relax_branches;
|
||||
pub use self::shrink::shrink_instructions;
|
||||
pub use self::stack_map::StackMap;
|
||||
use crate::ir::entities::Value;
|
||||
use crate::ir::{
|
||||
ConstantOffset, ExternalName, Function, Inst, JumpTable, Opcode, SourceLoc, TrapCode,
|
||||
};
|
||||
use crate::isa::TargetIsa;
|
||||
pub use crate::regalloc::RegDiversions;
|
||||
use core::fmt;
|
||||
#[cfg(feature = "enable-serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
@@ -183,16 +178,6 @@ pub trait CodeSink {
|
||||
}
|
||||
}
|
||||
|
||||
/// Report a bad encoding error.
|
||||
#[cold]
|
||||
pub fn bad_encoding(func: &Function, inst: Inst) -> ! {
|
||||
panic!(
|
||||
"Bad encoding {} for {}",
|
||||
func.encodings[inst],
|
||||
func.dfg.display_inst(inst, None)
|
||||
);
|
||||
}
|
||||
|
||||
/// Emit a function to `sink`, given an instruction emitter function.
|
||||
///
|
||||
/// This function is called from the `TargetIsa::emit_function()` implementations with the
|
||||
@@ -200,14 +185,12 @@ pub fn bad_encoding(func: &Function, inst: Inst) -> ! {
|
||||
pub fn emit_function<CS, EI>(func: &Function, emit_inst: EI, sink: &mut CS, isa: &dyn TargetIsa)
|
||||
where
|
||||
CS: CodeSink,
|
||||
EI: Fn(&Function, Inst, &mut RegDiversions, &mut CS, &dyn TargetIsa),
|
||||
EI: Fn(&Function, Inst, &mut CS, &dyn TargetIsa),
|
||||
{
|
||||
let mut divert = RegDiversions::new();
|
||||
for block in func.layout.blocks() {
|
||||
divert.at_block(&func.entry_diversions, block);
|
||||
debug_assert_eq!(func.offsets[block], sink.offset());
|
||||
for inst in func.layout.block_insts(block) {
|
||||
emit_inst(func, inst, &mut divert, sink, isa);
|
||||
emit_inst(func, inst, sink, isa);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,396 +0,0 @@
|
||||
//! Branch relaxation and offset computation.
|
||||
//!
|
||||
//! # block header offsets
|
||||
//!
|
||||
//! Before we can generate binary machine code for branch instructions, we need to know the final
|
||||
//! offsets of all the block headers in the function. This information is encoded in the
|
||||
//! `func.offsets` table.
|
||||
//!
|
||||
//! # Branch relaxation
|
||||
//!
|
||||
//! Branch relaxation is the process of ensuring that all branches in the function have enough
|
||||
//! range to encode their destination. It is common to have multiple branch encodings in an ISA.
|
||||
//! For example, x86 branches can have either an 8-bit or a 32-bit displacement.
|
||||
//!
|
||||
//! On RISC architectures, it can happen that conditional branches have a shorter range than
|
||||
//! unconditional branches:
|
||||
//!
|
||||
//! ```clif
|
||||
//! brz v1, block17
|
||||
//! ```
|
||||
//!
|
||||
//! can be transformed into:
|
||||
//!
|
||||
//! ```clif
|
||||
//! brnz v1, block23
|
||||
//! jump block17
|
||||
//! block23:
|
||||
//! ```
|
||||
|
||||
use crate::binemit::{CodeInfo, CodeOffset};
|
||||
use crate::cursor::{Cursor, FuncCursor};
|
||||
use crate::dominator_tree::DominatorTree;
|
||||
use crate::flowgraph::ControlFlowGraph;
|
||||
use crate::ir::{Block, Function, Inst, InstructionData, Opcode, Value, ValueList};
|
||||
use crate::isa::{EncInfo, TargetIsa};
|
||||
use crate::iterators::IteratorExtras;
|
||||
use crate::regalloc::RegDiversions;
|
||||
use crate::timing;
|
||||
use crate::CodegenResult;
|
||||
use core::convert::TryFrom;
|
||||
|
||||
/// Relax branches and compute the final layout of block headers in `func`.
|
||||
///
|
||||
/// Fill in the `func.offsets` table so the function is ready for binary emission.
|
||||
pub fn relax_branches(
|
||||
func: &mut Function,
|
||||
_cfg: &mut ControlFlowGraph,
|
||||
_domtree: &mut DominatorTree,
|
||||
isa: &dyn TargetIsa,
|
||||
) -> CodegenResult<CodeInfo> {
|
||||
let _tt = timing::relax_branches();
|
||||
|
||||
let encinfo = isa.encoding_info();
|
||||
|
||||
// Clear all offsets so we can recognize blocks that haven't been visited yet.
|
||||
func.offsets.clear();
|
||||
func.offsets.resize(func.dfg.num_blocks());
|
||||
|
||||
// Start by removing redundant jumps.
|
||||
fold_redundant_jumps(func, _cfg, _domtree);
|
||||
|
||||
// Convert jumps to fallthrough instructions where possible.
|
||||
fallthroughs(func);
|
||||
|
||||
let mut offset = 0;
|
||||
let mut divert = RegDiversions::new();
|
||||
|
||||
// First, compute initial offsets for every block.
|
||||
{
|
||||
let mut cur = FuncCursor::new(func);
|
||||
while let Some(block) = cur.next_block() {
|
||||
divert.at_block(&cur.func.entry_diversions, block);
|
||||
cur.func.offsets[block] = offset;
|
||||
while let Some(inst) = cur.next_inst() {
|
||||
divert.apply(&cur.func.dfg[inst]);
|
||||
let enc = cur.func.encodings[inst];
|
||||
offset += encinfo.byte_size(enc, inst, &divert, &cur.func);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Then, run the relaxation algorithm until it converges.
|
||||
let mut go_again = true;
|
||||
while go_again {
|
||||
go_again = false;
|
||||
offset = 0;
|
||||
|
||||
// Visit all instructions in layout order.
|
||||
let mut cur = FuncCursor::new(func);
|
||||
while let Some(block) = cur.next_block() {
|
||||
divert.at_block(&cur.func.entry_diversions, block);
|
||||
|
||||
// Record the offset for `block` and make sure we iterate until offsets are stable.
|
||||
if cur.func.offsets[block] != offset {
|
||||
cur.func.offsets[block] = offset;
|
||||
go_again = true;
|
||||
}
|
||||
|
||||
while let Some(inst) = cur.next_inst() {
|
||||
divert.apply(&cur.func.dfg[inst]);
|
||||
|
||||
let enc = cur.func.encodings[inst];
|
||||
|
||||
// See if this is a branch has a range and a destination, and if the target is in
|
||||
// range.
|
||||
if let Some(range) = encinfo.branch_range(enc) {
|
||||
if let Some(dest) = cur.func.dfg[inst].branch_destination() {
|
||||
let dest_offset = cur.func.offsets[dest];
|
||||
if !range.contains(offset, dest_offset) {
|
||||
offset +=
|
||||
relax_branch(&mut cur, &divert, offset, dest_offset, &encinfo, isa);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
offset += encinfo.byte_size(enc, inst, &divert, &cur.func);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let code_size = offset;
|
||||
let jumptables = offset;
|
||||
|
||||
for (jt, jt_data) in func.jump_tables.iter() {
|
||||
func.jt_offsets[jt] = offset;
|
||||
// TODO: this should be computed based on the min size needed to hold the furthest branch.
|
||||
offset += jt_data.len() as u32 * 4;
|
||||
}
|
||||
|
||||
let jumptables_size = offset - jumptables;
|
||||
let rodata = offset;
|
||||
|
||||
for constant in func.dfg.constants.entries_mut() {
|
||||
constant.set_offset(offset);
|
||||
offset +=
|
||||
u32::try_from(constant.len()).expect("Constants must have a length that fits in a u32")
|
||||
}
|
||||
|
||||
let rodata_size = offset - rodata;
|
||||
|
||||
Ok(CodeInfo {
|
||||
code_size,
|
||||
jumptables_size,
|
||||
rodata_size,
|
||||
total_size: offset,
|
||||
})
|
||||
}
|
||||
|
||||
/// Folds an instruction if it is a redundant jump.
|
||||
/// Returns whether folding was performed (which invalidates the CFG).
|
||||
fn try_fold_redundant_jump(
|
||||
func: &mut Function,
|
||||
cfg: &mut ControlFlowGraph,
|
||||
block: Block,
|
||||
first_inst: Inst,
|
||||
) -> bool {
|
||||
let first_dest = match func.dfg[first_inst].branch_destination() {
|
||||
Some(block) => block, // The instruction was a single-target branch.
|
||||
None => {
|
||||
return false; // The instruction was either multi-target or not a branch.
|
||||
}
|
||||
};
|
||||
|
||||
// For the moment, only attempt to fold a branch to a block that is parameterless.
|
||||
// These blocks are mainly produced by critical edge splitting.
|
||||
//
|
||||
// TODO: Allow folding blocks that define SSA values and function as phi nodes.
|
||||
if func.dfg.num_block_params(first_dest) != 0 {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Look at the first instruction of the first branch's destination.
|
||||
// If it is an unconditional branch, maybe the second jump can be bypassed.
|
||||
let second_inst = func.layout.first_inst(first_dest).expect("Instructions");
|
||||
if func.dfg[second_inst].opcode() != Opcode::Jump {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Now we need to fix up first_inst's block parameters to match second_inst's,
|
||||
// without changing the branch-specific arguments.
|
||||
//
|
||||
// The intermediary block is allowed to reference any SSA value that dominates it,
|
||||
// but that SSA value may not necessarily also dominate the instruction that's
|
||||
// being patched.
|
||||
|
||||
// Get the arguments and parameters passed by the first branch.
|
||||
let num_fixed = func.dfg[first_inst]
|
||||
.opcode()
|
||||
.constraints()
|
||||
.num_fixed_value_arguments();
|
||||
let (first_args, first_params) = func.dfg[first_inst]
|
||||
.arguments(&func.dfg.value_lists)
|
||||
.split_at(num_fixed);
|
||||
|
||||
// Get the parameters passed by the second jump.
|
||||
let num_fixed = func.dfg[second_inst]
|
||||
.opcode()
|
||||
.constraints()
|
||||
.num_fixed_value_arguments();
|
||||
let (_, second_params) = func.dfg[second_inst]
|
||||
.arguments(&func.dfg.value_lists)
|
||||
.split_at(num_fixed);
|
||||
let mut second_params = second_params.to_vec(); // Clone for rewriting below.
|
||||
|
||||
// For each parameter passed by the second jump, if any of those parameters
|
||||
// was a block parameter, rewrite it to refer to the value that the first jump
|
||||
// passed in its parameters. Otherwise, make sure it dominates first_inst.
|
||||
//
|
||||
// For example: if we `block0: jump block1(v1)` to `block1(v2): jump block2(v2)`,
|
||||
// we want to rewrite the original jump to `jump block2(v1)`.
|
||||
let block_params: &[Value] = func.dfg.block_params(first_dest);
|
||||
debug_assert!(block_params.len() == first_params.len());
|
||||
|
||||
for value in second_params.iter_mut() {
|
||||
if let Some((n, _)) = block_params.iter().enumerate().find(|(_, &p)| p == *value) {
|
||||
// This value was the Nth parameter passed to the second_inst's block.
|
||||
// Rewrite it as the Nth parameter passed by first_inst.
|
||||
*value = first_params[n];
|
||||
}
|
||||
}
|
||||
|
||||
// Build a value list of first_args (unchanged) followed by second_params (rewritten).
|
||||
let arguments_vec: alloc::vec::Vec<_> = first_args
|
||||
.iter()
|
||||
.chain(second_params.iter())
|
||||
.copied()
|
||||
.collect();
|
||||
let value_list = ValueList::from_slice(&arguments_vec, &mut func.dfg.value_lists);
|
||||
|
||||
func.dfg[first_inst].take_value_list(); // Drop the current list.
|
||||
func.dfg[first_inst].put_value_list(value_list); // Put the new list.
|
||||
|
||||
// Bypass the second jump.
|
||||
// This can disconnect the Block containing `second_inst`, to be cleaned up later.
|
||||
let second_dest = func.dfg[second_inst].branch_destination().expect("Dest");
|
||||
func.change_branch_destination(first_inst, second_dest);
|
||||
cfg.recompute_block(func, block);
|
||||
|
||||
// The previously-intermediary Block may now be unreachable. Update CFG.
|
||||
if cfg.pred_iter(first_dest).count() == 0 {
|
||||
// Remove all instructions from that block.
|
||||
while let Some(inst) = func.layout.first_inst(first_dest) {
|
||||
func.layout.remove_inst(inst);
|
||||
}
|
||||
|
||||
// Remove the block...
|
||||
cfg.recompute_block(func, first_dest); // ...from predecessor lists.
|
||||
func.layout.remove_block(first_dest); // ...from the layout.
|
||||
}
|
||||
|
||||
true
|
||||
}
|
||||
|
||||
/// Redirects `jump` instructions that point to other `jump` instructions to the final destination.
|
||||
/// This transformation may orphan some blocks.
|
||||
fn fold_redundant_jumps(
|
||||
func: &mut Function,
|
||||
cfg: &mut ControlFlowGraph,
|
||||
domtree: &mut DominatorTree,
|
||||
) {
|
||||
let mut folded = false;
|
||||
|
||||
// Postorder iteration guarantees that a chain of jumps is visited from
|
||||
// the end of the chain to the start of the chain.
|
||||
for &block in domtree.cfg_postorder() {
|
||||
// Only proceed if the first terminator instruction is a single-target branch.
|
||||
let first_inst = func
|
||||
.layout
|
||||
.last_inst(block)
|
||||
.expect("Block has no terminator");
|
||||
folded |= try_fold_redundant_jump(func, cfg, block, first_inst);
|
||||
|
||||
// Also try the previous instruction.
|
||||
if let Some(prev_inst) = func.layout.prev_inst(first_inst) {
|
||||
folded |= try_fold_redundant_jump(func, cfg, block, prev_inst);
|
||||
}
|
||||
}
|
||||
|
||||
// Folding jumps invalidates the dominator tree.
|
||||
if folded {
|
||||
domtree.compute(func, cfg);
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert `jump` instructions to `fallthrough` instructions where possible and verify that any
|
||||
/// existing `fallthrough` instructions are correct.
|
||||
fn fallthroughs(func: &mut Function) {
|
||||
for (block, succ) in func.layout.blocks().adjacent_pairs() {
|
||||
let term = func
|
||||
.layout
|
||||
.last_inst(block)
|
||||
.expect("block has no terminator.");
|
||||
if let InstructionData::Jump {
|
||||
ref mut opcode,
|
||||
destination,
|
||||
..
|
||||
} = func.dfg[term]
|
||||
{
|
||||
match *opcode {
|
||||
Opcode::Fallthrough => {
|
||||
// Somebody used a fall-through instruction before the branch relaxation pass.
|
||||
// Make sure it is correct, i.e. the destination is the layout successor.
|
||||
debug_assert_eq!(
|
||||
destination, succ,
|
||||
"Illegal fallthrough from {} to {}, but {}'s successor is {}",
|
||||
block, destination, block, succ
|
||||
)
|
||||
}
|
||||
Opcode::Jump => {
|
||||
// If this is a jump to the successor block, change it to a fall-through.
|
||||
if destination == succ {
|
||||
*opcode = Opcode::Fallthrough;
|
||||
func.encodings[term] = Default::default();
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Relax the branch instruction at `cur` so it can cover the range `offset - dest_offset`.
|
||||
///
|
||||
/// Return the size of the replacement instructions up to and including the location where `cur` is
|
||||
/// left.
|
||||
fn relax_branch(
|
||||
cur: &mut FuncCursor,
|
||||
divert: &RegDiversions,
|
||||
offset: CodeOffset,
|
||||
dest_offset: CodeOffset,
|
||||
encinfo: &EncInfo,
|
||||
isa: &dyn TargetIsa,
|
||||
) -> CodeOffset {
|
||||
let inst = cur.current_inst().unwrap();
|
||||
log::trace!(
|
||||
"Relaxing [{}] {} for {:#x}-{:#x} range",
|
||||
encinfo.display(cur.func.encodings[inst]),
|
||||
cur.func.dfg.display_inst(inst, isa),
|
||||
offset,
|
||||
dest_offset
|
||||
);
|
||||
|
||||
// Pick the smallest encoding that can handle the branch range.
|
||||
let dfg = &cur.func.dfg;
|
||||
let ctrl_type = dfg.ctrl_typevar(inst);
|
||||
if let Some(enc) = isa
|
||||
.legal_encodings(cur.func, &dfg[inst], ctrl_type)
|
||||
.filter(|&enc| {
|
||||
let range = encinfo.branch_range(enc).expect("Branch with no range");
|
||||
if !range.contains(offset, dest_offset) {
|
||||
log::trace!(" trying [{}]: out of range", encinfo.display(enc));
|
||||
false
|
||||
} else if encinfo.operand_constraints(enc)
|
||||
!= encinfo.operand_constraints(cur.func.encodings[inst])
|
||||
{
|
||||
// Conservatively give up if the encoding has different constraints
|
||||
// than the original, so that we don't risk picking a new encoding
|
||||
// which the existing operands don't satisfy. We can't check for
|
||||
// validity directly because we don't have a RegDiversions active so
|
||||
// we don't know which registers are actually in use.
|
||||
log::trace!(" trying [{}]: constraints differ", encinfo.display(enc));
|
||||
false
|
||||
} else {
|
||||
log::trace!(" trying [{}]: OK", encinfo.display(enc));
|
||||
true
|
||||
}
|
||||
})
|
||||
.min_by_key(|&enc| encinfo.byte_size(enc, inst, &divert, &cur.func))
|
||||
{
|
||||
debug_assert!(enc != cur.func.encodings[inst]);
|
||||
cur.func.encodings[inst] = enc;
|
||||
return encinfo.byte_size(enc, inst, &divert, &cur.func);
|
||||
}
|
||||
|
||||
// Note: On some RISC ISAs, conditional branches have shorter range than unconditional
|
||||
// branches, so one way of extending the range of a conditional branch is to invert its
|
||||
// condition and make it branch over an unconditional jump which has the larger range.
|
||||
//
|
||||
// Splitting the block is problematic this late because there may be register diversions in
|
||||
// effect across the conditional branch, and they can't survive the control flow edge to a new
|
||||
// block. We have two options for handling that:
|
||||
//
|
||||
// 1. Set a flag on the new block that indicates it wants the preserve the register diversions of
|
||||
// its layout predecessor, or
|
||||
// 2. Use an encoding macro for the branch-over-jump pattern so we don't need to split the block.
|
||||
//
|
||||
// It seems that 1. would allow us to share code among RISC ISAs that need this.
|
||||
//
|
||||
// We can't allow register diversions to survive from the layout predecessor because the layout
|
||||
// predecessor could contain kill points for some values that are live in this block, and
|
||||
// diversions are not automatically cancelled when the live range of a value ends.
|
||||
|
||||
// This assumes solution 2. above:
|
||||
panic!("No branch in range for {:#x}-{:#x}", offset, dest_offset);
|
||||
}
|
||||
@@ -1,72 +0,0 @@
|
||||
//! Instruction shrinking.
|
||||
//!
|
||||
//! Sometimes there are multiple valid encodings for a given instruction. Cranelift often initially
|
||||
//! chooses the largest one, because this typically provides the register allocator the most
|
||||
//! flexibility. However, once register allocation is done, this is no longer important, and we
|
||||
//! can switch to smaller encodings when possible.
|
||||
|
||||
use crate::ir::instructions::InstructionData;
|
||||
use crate::ir::Function;
|
||||
use crate::isa::TargetIsa;
|
||||
use crate::regalloc::RegDiversions;
|
||||
use crate::timing;
|
||||
|
||||
/// Pick the smallest valid encodings for instructions.
|
||||
pub fn shrink_instructions(func: &mut Function, isa: &dyn TargetIsa) {
|
||||
let _tt = timing::shrink_instructions();
|
||||
|
||||
let encinfo = isa.encoding_info();
|
||||
let mut divert = RegDiversions::new();
|
||||
|
||||
for block in func.layout.blocks() {
|
||||
// Load diversions from predecessors.
|
||||
divert.at_block(&func.entry_diversions, block);
|
||||
|
||||
for inst in func.layout.block_insts(block) {
|
||||
let enc = func.encodings[inst];
|
||||
if enc.is_legal() {
|
||||
// regmove/regfill/regspill are special instructions with register immediates
|
||||
// that represented as normal operands, so the normal predicates below don't
|
||||
// handle them correctly.
|
||||
//
|
||||
// Also, they need to be presented to the `RegDiversions` to update the
|
||||
// location tracking.
|
||||
//
|
||||
// TODO: Eventually, we want the register allocator to avoid leaving these special
|
||||
// instructions behind, but for now, just temporarily avoid trying to shrink them.
|
||||
let inst_data = &func.dfg[inst];
|
||||
match inst_data {
|
||||
InstructionData::RegMove { .. }
|
||||
| InstructionData::RegFill { .. }
|
||||
| InstructionData::RegSpill { .. } => {
|
||||
divert.apply(inst_data);
|
||||
continue;
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
|
||||
let ctrl_type = func.dfg.ctrl_typevar(inst);
|
||||
|
||||
// Pick the last encoding with constraints that are satisfied.
|
||||
let best_enc = isa
|
||||
.legal_encodings(func, &func.dfg[inst], ctrl_type)
|
||||
.filter(|e| encinfo.constraints[e.recipe()].satisfied(inst, &divert, &func))
|
||||
.min_by_key(|e| encinfo.byte_size(*e, inst, &divert, &func))
|
||||
.unwrap();
|
||||
|
||||
if best_enc != enc {
|
||||
func.encodings[inst] = best_enc;
|
||||
|
||||
log::trace!(
|
||||
"Shrunk [{}] to [{}] in {}, reducing the size from {} to {}",
|
||||
encinfo.display(enc),
|
||||
encinfo.display(best_enc),
|
||||
func.dfg.display_inst(inst, isa),
|
||||
encinfo.byte_size(enc, inst, &divert, &func),
|
||||
encinfo.byte_size(best_enc, inst, &divert, &func)
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -9,24 +9,17 @@
|
||||
//! contexts concurrently. Typically, you would have one context per compilation thread and only a
|
||||
//! single ISA instance.
|
||||
|
||||
use crate::binemit::{
|
||||
relax_branches, shrink_instructions, CodeInfo, MemoryCodeSink, RelocSink, StackMapSink,
|
||||
TrapSink,
|
||||
};
|
||||
use crate::binemit::{CodeInfo, MemoryCodeSink, RelocSink, StackMapSink, TrapSink};
|
||||
use crate::dce::do_dce;
|
||||
use crate::dominator_tree::DominatorTree;
|
||||
use crate::flowgraph::ControlFlowGraph;
|
||||
use crate::ir::Function;
|
||||
use crate::isa::TargetIsa;
|
||||
use crate::legalize_function;
|
||||
use crate::legalizer::simple_legalize;
|
||||
use crate::licm::do_licm;
|
||||
use crate::loop_analysis::LoopAnalysis;
|
||||
use crate::machinst::{MachCompileResult, MachStackMap};
|
||||
use crate::nan_canonicalization::do_nan_canonicalization;
|
||||
use crate::postopt::do_postopt;
|
||||
use crate::redundant_reload_remover::RedundantReloadRemover;
|
||||
use crate::regalloc;
|
||||
use crate::remove_constant_phis::do_remove_constant_phis;
|
||||
use crate::result::CodegenResult;
|
||||
use crate::settings::{FlagsOrIsa, OptLevel};
|
||||
@@ -34,8 +27,7 @@ use crate::simple_gvn::do_simple_gvn;
|
||||
use crate::simple_preopt::do_preopt;
|
||||
use crate::timing;
|
||||
use crate::unreachable_code::eliminate_unreachable_code;
|
||||
use crate::value_label::{build_value_labels_ranges, ComparableSourceLoc, ValueLabelsRanges};
|
||||
use crate::verifier::{verify_context, verify_locations, VerifierErrors, VerifierResult};
|
||||
use crate::verifier::{verify_context, VerifierErrors, VerifierResult};
|
||||
#[cfg(feature = "souper-harvest")]
|
||||
use alloc::string::String;
|
||||
use alloc::vec::Vec;
|
||||
@@ -54,15 +46,9 @@ pub struct Context {
|
||||
/// Dominator tree for `func`.
|
||||
pub domtree: DominatorTree,
|
||||
|
||||
/// Register allocation context.
|
||||
pub regalloc: regalloc::Context,
|
||||
|
||||
/// Loop analysis of `func`.
|
||||
pub loop_analysis: LoopAnalysis,
|
||||
|
||||
/// Redundant-reload remover context.
|
||||
pub redundant_reload_remover: RedundantReloadRemover,
|
||||
|
||||
/// Result of MachBackend compilation, if computed.
|
||||
pub mach_compile_result: Option<MachCompileResult>,
|
||||
|
||||
@@ -88,9 +74,7 @@ impl Context {
|
||||
func,
|
||||
cfg: ControlFlowGraph::new(),
|
||||
domtree: DominatorTree::new(),
|
||||
regalloc: regalloc::Context::new(),
|
||||
loop_analysis: LoopAnalysis::new(),
|
||||
redundant_reload_remover: RedundantReloadRemover::new(),
|
||||
mach_compile_result: None,
|
||||
want_disasm: false,
|
||||
}
|
||||
@@ -101,9 +85,7 @@ impl Context {
|
||||
self.func.clear();
|
||||
self.cfg.clear();
|
||||
self.domtree.clear();
|
||||
self.regalloc.clear();
|
||||
self.loop_analysis.clear();
|
||||
self.redundant_reload_remover.clear();
|
||||
self.mach_compile_result = None;
|
||||
self.want_disasm = false;
|
||||
}
|
||||
@@ -137,13 +119,7 @@ impl Context {
|
||||
let old_len = mem.len();
|
||||
mem.resize(old_len + info.total_size as usize, 0);
|
||||
let new_info = unsafe {
|
||||
self.emit_to_memory(
|
||||
isa,
|
||||
mem.as_mut_ptr().add(old_len),
|
||||
relocs,
|
||||
traps,
|
||||
stack_maps,
|
||||
)
|
||||
self.emit_to_memory(mem.as_mut_ptr().add(old_len), relocs, traps, stack_maps)
|
||||
};
|
||||
debug_assert!(new_info == info);
|
||||
Ok(info)
|
||||
@@ -177,7 +153,6 @@ impl Context {
|
||||
|
||||
self.legalize(isa)?;
|
||||
if opt_level != OptLevel::None {
|
||||
self.postopt(isa)?;
|
||||
self.compute_domtree();
|
||||
self.compute_loop_analysis();
|
||||
self.licm(isa)?;
|
||||
@@ -192,25 +167,12 @@ impl Context {
|
||||
|
||||
self.remove_constant_phis(isa)?;
|
||||
|
||||
if let Some(backend) = isa.get_mach_backend() {
|
||||
let result = backend.compile_function(&self.func, self.want_disasm)?;
|
||||
let info = result.code_info();
|
||||
self.mach_compile_result = Some(result);
|
||||
Ok(info)
|
||||
} else {
|
||||
self.regalloc(isa)?;
|
||||
self.prologue_epilogue(isa)?;
|
||||
if opt_level == OptLevel::Speed || opt_level == OptLevel::SpeedAndSize {
|
||||
self.redundant_reload_remover(isa)?;
|
||||
}
|
||||
if opt_level == OptLevel::SpeedAndSize {
|
||||
self.shrink_instructions(isa)?;
|
||||
}
|
||||
let result = self.relax_branches(isa);
|
||||
|
||||
log::trace!("Compiled:\n{}", self.func.display(isa));
|
||||
result
|
||||
}
|
||||
// FIXME: make this non optional
|
||||
let backend = isa.get_mach_backend().expect("only mach backends nowadays");
|
||||
let result = backend.compile_function(&self.func, self.want_disasm)?;
|
||||
let info = result.code_info();
|
||||
self.mach_compile_result = Some(result);
|
||||
Ok(info)
|
||||
}
|
||||
|
||||
/// Emit machine code directly into raw memory.
|
||||
@@ -228,7 +190,6 @@ impl Context {
|
||||
/// Returns information about the emitted code and data.
|
||||
pub unsafe fn emit_to_memory(
|
||||
&self,
|
||||
isa: &dyn TargetIsa,
|
||||
mem: *mut u8,
|
||||
relocs: &mut dyn RelocSink,
|
||||
traps: &mut dyn TrapSink,
|
||||
@@ -236,25 +197,24 @@ impl Context {
|
||||
) -> CodeInfo {
|
||||
let _tt = timing::binemit();
|
||||
let mut sink = MemoryCodeSink::new(mem, relocs, traps, stack_maps);
|
||||
if let Some(ref result) = &self.mach_compile_result {
|
||||
result.buffer.emit(&mut sink);
|
||||
let info = sink.info;
|
||||
// New backends do not emit StackMaps through the `CodeSink` because its interface
|
||||
// requires `Value`s; instead, the `StackMap` objects are directly accessible via
|
||||
// `result.buffer.stack_maps()`.
|
||||
for &MachStackMap {
|
||||
offset_end,
|
||||
ref stack_map,
|
||||
..
|
||||
} in result.buffer.stack_maps()
|
||||
{
|
||||
stack_maps.add_stack_map(offset_end, stack_map.clone());
|
||||
}
|
||||
info
|
||||
} else {
|
||||
isa.emit_function_to_memory(&self.func, &mut sink);
|
||||
sink.info
|
||||
let result = self
|
||||
.mach_compile_result
|
||||
.as_ref()
|
||||
.expect("only using mach backend now");
|
||||
result.buffer.emit(&mut sink);
|
||||
let info = sink.info;
|
||||
// New backends do not emit StackMaps through the `CodeSink` because its interface
|
||||
// requires `Value`s; instead, the `StackMap` objects are directly accessible via
|
||||
// `result.buffer.stack_maps()`.
|
||||
for &MachStackMap {
|
||||
offset_end,
|
||||
ref stack_map,
|
||||
..
|
||||
} in result.buffer.stack_maps()
|
||||
{
|
||||
stack_maps.add_stack_map(offset_end, stack_map.clone());
|
||||
}
|
||||
info
|
||||
}
|
||||
|
||||
/// If available, return information about the code layout in the
|
||||
@@ -314,26 +274,6 @@ impl Context {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Run the locations verifier on the function.
|
||||
pub fn verify_locations(&self, isa: &dyn TargetIsa) -> VerifierResult<()> {
|
||||
let mut errors = VerifierErrors::default();
|
||||
let _ = verify_locations(isa, &self.func, &self.cfg, None, &mut errors);
|
||||
|
||||
if errors.is_empty() {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(errors)
|
||||
}
|
||||
}
|
||||
|
||||
/// Run the locations verifier only if the `enable_verifier` setting is true.
|
||||
pub fn verify_locations_if(&self, isa: &dyn TargetIsa) -> CodegenResult<()> {
|
||||
if isa.flags().enable_verifier() {
|
||||
self.verify_locations(isa)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Perform dead-code elimination on the function.
|
||||
pub fn dce<'a, FOI: Into<FlagsOrIsa<'a>>>(&mut self, fisa: FOI) -> CodegenResult<()> {
|
||||
do_dce(&mut self.func, &mut self.domtree);
|
||||
@@ -370,22 +310,10 @@ impl Context {
|
||||
// TODO: Avoid doing this when legalization doesn't actually mutate the CFG.
|
||||
self.domtree.clear();
|
||||
self.loop_analysis.clear();
|
||||
if isa.get_mach_backend().is_some() {
|
||||
// Run some specific legalizations only.
|
||||
simple_legalize(&mut self.func, &mut self.cfg, isa);
|
||||
self.verify_if(isa)
|
||||
} else {
|
||||
legalize_function(&mut self.func, &mut self.cfg, isa);
|
||||
log::trace!("Legalized:\n{}", self.func.display(isa));
|
||||
self.verify_if(isa)
|
||||
}
|
||||
}
|
||||
|
||||
/// Perform post-legalization rewrites on the function.
|
||||
pub fn postopt(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> {
|
||||
do_postopt(&mut self.func, isa);
|
||||
self.verify_if(isa)?;
|
||||
Ok(())
|
||||
// Run some specific legalizations only.
|
||||
simple_legalize(&mut self.func, &mut self.cfg, isa);
|
||||
self.verify_if(isa)
|
||||
}
|
||||
|
||||
/// Compute the control flow graph.
|
||||
@@ -437,58 +365,6 @@ impl Context {
|
||||
self.verify_if(fisa)
|
||||
}
|
||||
|
||||
/// Run the register allocator.
|
||||
pub fn regalloc(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> {
|
||||
self.regalloc
|
||||
.run(isa, &mut self.func, &mut self.cfg, &mut self.domtree)
|
||||
}
|
||||
|
||||
/// Insert prologue and epilogues after computing the stack frame layout.
|
||||
pub fn prologue_epilogue(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> {
|
||||
isa.prologue_epilogue(&mut self.func)?;
|
||||
self.verify_if(isa)?;
|
||||
self.verify_locations_if(isa)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Do redundant-reload removal after allocation of both registers and stack slots.
|
||||
pub fn redundant_reload_remover(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> {
|
||||
self.redundant_reload_remover
|
||||
.run(isa, &mut self.func, &self.cfg);
|
||||
self.verify_if(isa)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Run the instruction shrinking pass.
|
||||
pub fn shrink_instructions(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> {
|
||||
shrink_instructions(&mut self.func, isa);
|
||||
self.verify_if(isa)?;
|
||||
self.verify_locations_if(isa)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Run the branch relaxation pass and return information about the function's code and
|
||||
/// read-only data.
|
||||
pub fn relax_branches(&mut self, isa: &dyn TargetIsa) -> CodegenResult<CodeInfo> {
|
||||
let info = relax_branches(&mut self.func, &mut self.cfg, &mut self.domtree, isa)?;
|
||||
self.verify_if(isa)?;
|
||||
self.verify_locations_if(isa)?;
|
||||
Ok(info)
|
||||
}
|
||||
|
||||
/// Builds ranges and location for specified value labels.
|
||||
pub fn build_value_labels_ranges(
|
||||
&self,
|
||||
isa: &dyn TargetIsa,
|
||||
) -> CodegenResult<ValueLabelsRanges> {
|
||||
Ok(build_value_labels_ranges::<ComparableSourceLoc>(
|
||||
&self.func,
|
||||
&self.regalloc,
|
||||
self.mach_compile_result.as_ref(),
|
||||
isa,
|
||||
))
|
||||
}
|
||||
|
||||
/// Harvest candidate left-hand sides for superoptimization with Souper.
|
||||
#[cfg(feature = "souper-harvest")]
|
||||
pub fn souper_harvest(
|
||||
|
||||
@@ -634,7 +634,7 @@ impl<'c, 'f> ir::InstInserterBase<'c> for &'c mut FuncCursor<'f> {
|
||||
&mut self.func.dfg
|
||||
}
|
||||
|
||||
fn insert_built_inst(self, inst: ir::Inst, _: ir::Type) -> &'c mut ir::DataFlowGraph {
|
||||
fn insert_built_inst(self, inst: ir::Inst) -> &'c mut ir::DataFlowGraph {
|
||||
// TODO: Remove this assertion once #796 is fixed.
|
||||
#[cfg(debug_assertions)]
|
||||
{
|
||||
@@ -759,11 +759,7 @@ impl<'c, 'f> ir::InstInserterBase<'c> for &'c mut EncCursor<'f> {
|
||||
&mut self.func.dfg
|
||||
}
|
||||
|
||||
fn insert_built_inst(
|
||||
self,
|
||||
inst: ir::Inst,
|
||||
ctrl_typevar: ir::Type,
|
||||
) -> &'c mut ir::DataFlowGraph {
|
||||
fn insert_built_inst(self, inst: ir::Inst) -> &'c mut ir::DataFlowGraph {
|
||||
// TODO: Remove this assertion once #796 is fixed.
|
||||
#[cfg(debug_assertions)]
|
||||
{
|
||||
@@ -787,6 +783,7 @@ impl<'c, 'f> ir::InstInserterBase<'c> for &'c mut EncCursor<'f> {
|
||||
};
|
||||
};
|
||||
}
|
||||
|
||||
// Insert the instruction and remember the reference.
|
||||
self.insert_inst(inst);
|
||||
self.built_inst = Some(inst);
|
||||
@@ -795,21 +792,6 @@ impl<'c, 'f> ir::InstInserterBase<'c> for &'c mut EncCursor<'f> {
|
||||
self.func.srclocs[inst] = self.srcloc;
|
||||
}
|
||||
|
||||
// Skip the encoding update if we're using a new (MachInst) backend; encodings come later,
|
||||
// during lowering.
|
||||
if self.isa.get_mach_backend().is_none() {
|
||||
// Assign an encoding.
|
||||
// XXX Is there a way to describe this error to the user?
|
||||
#[cfg_attr(feature = "cargo-clippy", allow(clippy::match_wild_err_arm))]
|
||||
match self
|
||||
.isa
|
||||
.encode(&self.func, &self.func.dfg[inst], ctrl_typevar)
|
||||
{
|
||||
Ok(e) => self.func.encodings[inst] = e,
|
||||
Err(_) => panic!("can't encode {}", self.display_inst(inst)),
|
||||
}
|
||||
}
|
||||
|
||||
&mut self.func.dfg
|
||||
}
|
||||
}
|
||||
|
||||
@@ -78,10 +78,3 @@ pub fn is_constant_64bit(func: &Function, inst: Inst) -> Option<u64> {
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Is the given instruction a safepoint (i.e., potentially causes a GC, depending on the
|
||||
/// embedding, and so requires reftyped values to be enumerated with a stack map)?
|
||||
pub fn is_safepoint(func: &Function, inst: Inst) -> bool {
|
||||
let op = func.dfg[inst].opcode();
|
||||
op.is_resumable_trap() || op.is_call()
|
||||
}
|
||||
|
||||
@@ -56,7 +56,7 @@ pub trait InstInserterBase<'f>: Sized {
|
||||
fn data_flow_graph_mut(&mut self) -> &mut DataFlowGraph;
|
||||
|
||||
/// Insert a new instruction which belongs to the DFG.
|
||||
fn insert_built_inst(self, inst: Inst, ctrl_typevar: Type) -> &'f mut DataFlowGraph;
|
||||
fn insert_built_inst(self, inst: Inst) -> &'f mut DataFlowGraph;
|
||||
}
|
||||
|
||||
use core::marker::PhantomData;
|
||||
@@ -129,7 +129,7 @@ impl<'f, IIB: InstInserterBase<'f>> InstBuilderBase<'f> for InsertBuilder<'f, II
|
||||
inst = dfg.make_inst(data);
|
||||
dfg.make_inst_results(inst, ctrl_typevar);
|
||||
}
|
||||
(inst, self.inserter.insert_built_inst(inst, ctrl_typevar))
|
||||
(inst, self.inserter.insert_built_inst(inst))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -166,7 +166,7 @@ where
|
||||
let ru = self.reuse.as_ref().iter().cloned();
|
||||
dfg.make_inst_results_reusing(inst, ctrl_typevar, ru);
|
||||
}
|
||||
(inst, self.inserter.insert_built_inst(inst, ctrl_typevar))
|
||||
(inst, self.inserter.insert_built_inst(inst))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
//! The `Function` struct defined in this module owns all of its basic blocks and
|
||||
//! instructions.
|
||||
|
||||
use crate::binemit::CodeOffset;
|
||||
use crate::entity::{PrimaryMap, SecondaryMap};
|
||||
use crate::ir;
|
||||
use crate::ir::{
|
||||
@@ -11,11 +10,10 @@ use crate::ir::{
|
||||
HeapData, Inst, InstructionData, JumpTable, JumpTableData, Opcode, SigRef, StackSlot,
|
||||
StackSlotData, Table, TableData,
|
||||
};
|
||||
use crate::ir::{BlockOffsets, InstEncodings, SourceLocs, StackSlots, ValueLocations};
|
||||
use crate::ir::{BlockOffsets, SourceLocs, StackSlots, ValueLocations};
|
||||
use crate::ir::{DataFlowGraph, ExternalName, Layout, Signature};
|
||||
use crate::ir::{JumpTableOffsets, JumpTables};
|
||||
use crate::isa::{CallConv, EncInfo, Encoding, Legalize, TargetIsa};
|
||||
use crate::regalloc::{EntryRegDiversions, RegDiversions};
|
||||
use crate::isa::{CallConv, TargetIsa};
|
||||
use crate::value_label::ValueLabelsRanges;
|
||||
use crate::write::write_function;
|
||||
#[cfg(feature = "enable-serde")]
|
||||
@@ -106,19 +104,9 @@ pub struct Function {
|
||||
/// Layout of blocks and instructions in the function body.
|
||||
pub layout: Layout,
|
||||
|
||||
/// Encoding recipe and bits for the legal instructions.
|
||||
/// Illegal instructions have the `Encoding::default()` value.
|
||||
pub encodings: InstEncodings,
|
||||
|
||||
/// Location assigned to every value.
|
||||
pub locations: ValueLocations,
|
||||
|
||||
/// Non-default locations assigned to value at the entry of basic blocks.
|
||||
///
|
||||
/// At the entry of each basic block, we might have values which are not in their default
|
||||
/// ValueLocation. This field records these register-to-register moves as Diversions.
|
||||
pub entry_diversions: EntryRegDiversions,
|
||||
|
||||
/// Code offsets of the block headers.
|
||||
///
|
||||
/// This information is only transiently available after the `binemit::relax_branches` function
|
||||
@@ -168,9 +156,7 @@ impl Function {
|
||||
jump_tables: PrimaryMap::new(),
|
||||
dfg: DataFlowGraph::new(),
|
||||
layout: Layout::new(),
|
||||
encodings: SecondaryMap::new(),
|
||||
locations: SecondaryMap::new(),
|
||||
entry_diversions: EntryRegDiversions::new(),
|
||||
offsets: SecondaryMap::new(),
|
||||
jt_offsets: SecondaryMap::new(),
|
||||
srclocs: SecondaryMap::new(),
|
||||
@@ -190,9 +176,7 @@ impl Function {
|
||||
self.jump_tables.clear();
|
||||
self.dfg.clear();
|
||||
self.layout.clear();
|
||||
self.encodings.clear();
|
||||
self.locations.clear();
|
||||
self.entry_diversions.clear();
|
||||
self.offsets.clear();
|
||||
self.jt_offsets.clear();
|
||||
self.srclocs.clear();
|
||||
@@ -268,51 +252,6 @@ impl Function {
|
||||
.map(|i| self.dfg.block_params(entry)[i])
|
||||
}
|
||||
|
||||
/// Get an iterator over the instructions in `block`, including offsets and encoded instruction
|
||||
/// sizes.
|
||||
///
|
||||
/// The iterator returns `(offset, inst, size)` tuples, where `offset` if the offset in bytes
|
||||
/// from the beginning of the function to the instruction, and `size` is the size of the
|
||||
/// instruction in bytes, or 0 for unencoded instructions.
|
||||
///
|
||||
/// This function can only be used after the code layout has been computed by the
|
||||
/// `binemit::relax_branches()` function.
|
||||
pub fn inst_offsets<'a>(&'a self, block: Block, encinfo: &EncInfo) -> InstOffsetIter<'a> {
|
||||
assert!(
|
||||
!self.offsets.is_empty(),
|
||||
"Code layout must be computed first"
|
||||
);
|
||||
let mut divert = RegDiversions::new();
|
||||
divert.at_block(&self.entry_diversions, block);
|
||||
InstOffsetIter {
|
||||
encinfo: encinfo.clone(),
|
||||
func: self,
|
||||
divert,
|
||||
encodings: &self.encodings,
|
||||
offset: self.offsets[block],
|
||||
iter: self.layout.block_insts(block),
|
||||
}
|
||||
}
|
||||
|
||||
/// Wrapper around `encode` which assigns `inst` the resulting encoding.
|
||||
pub fn update_encoding(&mut self, inst: ir::Inst, isa: &dyn TargetIsa) -> Result<(), Legalize> {
|
||||
if isa.get_mach_backend().is_some() {
|
||||
Ok(())
|
||||
} else {
|
||||
self.encode(inst, isa).map(|e| self.encodings[inst] = e)
|
||||
}
|
||||
}
|
||||
|
||||
/// Wrapper around `TargetIsa::encode` for encoding an existing instruction
|
||||
/// in the `Function`.
|
||||
pub fn encode(&self, inst: ir::Inst, isa: &dyn TargetIsa) -> Result<Encoding, Legalize> {
|
||||
if isa.get_mach_backend().is_some() {
|
||||
Ok(Encoding::new(0, 0))
|
||||
} else {
|
||||
isa.encode(&self, &self.dfg[inst], self.dfg.ctrl_typevar(inst))
|
||||
}
|
||||
}
|
||||
|
||||
/// Starts collection of debug information.
|
||||
pub fn collect_debug_info(&mut self) {
|
||||
self.dfg.collect_debug_info();
|
||||
@@ -469,29 +408,3 @@ impl fmt::Debug for Function {
|
||||
write_function(fmt, self, &DisplayFunctionAnnotations::default())
|
||||
}
|
||||
}
|
||||
|
||||
/// Iterator returning instruction offsets and sizes: `(offset, inst, size)`.
|
||||
pub struct InstOffsetIter<'a> {
|
||||
encinfo: EncInfo,
|
||||
divert: RegDiversions,
|
||||
func: &'a Function,
|
||||
encodings: &'a InstEncodings,
|
||||
offset: CodeOffset,
|
||||
iter: ir::layout::Insts<'a>,
|
||||
}
|
||||
|
||||
impl<'a> Iterator for InstOffsetIter<'a> {
|
||||
type Item = (CodeOffset, ir::Inst, CodeOffset);
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.iter.next().map(|inst| {
|
||||
self.divert.apply(&self.func.dfg[inst]);
|
||||
let byte_size =
|
||||
self.encinfo
|
||||
.byte_size(self.encodings[inst], inst, &self.divert, self.func);
|
||||
let offset = self.offset;
|
||||
self.offset += byte_size;
|
||||
(offset, inst, byte_size)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
//! Naming well-known routines in the runtime library.
|
||||
|
||||
use crate::ir::{
|
||||
types, AbiParam, ArgumentPurpose, ExtFuncData, ExternalName, FuncRef, Function, Inst, Opcode,
|
||||
types, AbiParam, ArgumentPurpose, ExtFuncData, ExternalName, FuncRef, Function, Opcode,
|
||||
Signature, Type,
|
||||
};
|
||||
use crate::isa::{CallConv, RegUnit, TargetIsa};
|
||||
@@ -166,21 +166,6 @@ impl LibCall {
|
||||
}
|
||||
}
|
||||
|
||||
/// Get a function reference for `libcall` in `func`, following the signature
|
||||
/// for `inst`.
|
||||
///
|
||||
/// If there is an existing reference, use it, otherwise make a new one.
|
||||
pub(crate) fn get_libcall_funcref(
|
||||
libcall: LibCall,
|
||||
call_conv: CallConv,
|
||||
func: &mut Function,
|
||||
inst: Inst,
|
||||
isa: &dyn TargetIsa,
|
||||
) -> FuncRef {
|
||||
find_funcref(libcall, func)
|
||||
.unwrap_or_else(|| make_funcref_for_inst(libcall, call_conv, func, inst, isa))
|
||||
}
|
||||
|
||||
/// Get a function reference for the probestack function in `func`.
|
||||
///
|
||||
/// If there is an existing reference, use it, otherwise make a new one.
|
||||
@@ -227,33 +212,6 @@ fn make_funcref_for_probestack(
|
||||
make_funcref(LibCall::Probestack, func, sig, isa)
|
||||
}
|
||||
|
||||
/// Create a funcref for `libcall` with a signature matching `inst`.
|
||||
fn make_funcref_for_inst(
|
||||
libcall: LibCall,
|
||||
call_conv: CallConv,
|
||||
func: &mut Function,
|
||||
inst: Inst,
|
||||
isa: &dyn TargetIsa,
|
||||
) -> FuncRef {
|
||||
let mut sig = Signature::new(call_conv);
|
||||
for &v in func.dfg.inst_args(inst) {
|
||||
sig.params.push(AbiParam::new(func.dfg.value_type(v)));
|
||||
}
|
||||
for &v in func.dfg.inst_results(inst) {
|
||||
sig.returns.push(AbiParam::new(func.dfg.value_type(v)));
|
||||
}
|
||||
|
||||
if call_conv.extends_baldrdash() {
|
||||
// Adds the special VMContext parameter to the signature.
|
||||
sig.params.push(AbiParam::special(
|
||||
isa.pointer_type(),
|
||||
ArgumentPurpose::VMContext,
|
||||
));
|
||||
}
|
||||
|
||||
make_funcref(libcall, func, sig, isa)
|
||||
}
|
||||
|
||||
/// Create a funcref for `libcall`.
|
||||
fn make_funcref(
|
||||
libcall: LibCall,
|
||||
|
||||
@@ -63,7 +63,6 @@ pub use cranelift_codegen_shared::condcodes;
|
||||
|
||||
use crate::binemit;
|
||||
use crate::entity::{entity_impl, PrimaryMap, SecondaryMap};
|
||||
use crate::isa;
|
||||
|
||||
/// Map of value locations.
|
||||
pub type ValueLocations = SecondaryMap<Value, ValueLoc>;
|
||||
@@ -71,9 +70,6 @@ pub type ValueLocations = SecondaryMap<Value, ValueLoc>;
|
||||
/// Map of jump tables.
|
||||
pub type JumpTables = PrimaryMap<JumpTable, JumpTableData>;
|
||||
|
||||
/// Map of instruction encodings.
|
||||
pub type InstEncodings = SecondaryMap<Inst, isa::Encoding>;
|
||||
|
||||
/// Code offsets for blocks.
|
||||
pub type BlockOffsets = SecondaryMap<Block, binemit::CodeOffset>;
|
||||
|
||||
|
||||
@@ -8,9 +8,8 @@
|
||||
//! are satisfied.
|
||||
|
||||
use crate::binemit::CodeOffset;
|
||||
use crate::ir::{Function, Inst, ValueLoc};
|
||||
use crate::ir::ValueLoc;
|
||||
use crate::isa::{RegClass, RegUnit};
|
||||
use crate::regalloc::RegDiversions;
|
||||
|
||||
/// Register constraint for a single value operand or instruction result.
|
||||
#[derive(PartialEq, Debug)]
|
||||
@@ -87,69 +86,6 @@ pub enum ConstraintKind {
|
||||
Stack,
|
||||
}
|
||||
|
||||
/// Value operand constraints for an encoding recipe.
|
||||
#[derive(PartialEq, Clone)]
|
||||
pub struct RecipeConstraints {
|
||||
/// Constraints for the instruction's fixed value operands.
|
||||
///
|
||||
/// If the instruction takes a variable number of operands, the register constraints for those
|
||||
/// operands must be computed dynamically.
|
||||
///
|
||||
/// - For branches and jumps, block arguments must match the expectations of the destination block.
|
||||
/// - For calls and returns, the calling convention ABI specifies constraints.
|
||||
pub ins: &'static [OperandConstraint],
|
||||
|
||||
/// Constraints for the instruction's fixed results.
|
||||
///
|
||||
/// If the instruction produces a variable number of results, it's probably a call and the
|
||||
/// constraints must be derived from the calling convention ABI.
|
||||
pub outs: &'static [OperandConstraint],
|
||||
|
||||
/// Are any of the input constraints `FixedReg` or `FixedTied`?
|
||||
pub fixed_ins: bool,
|
||||
|
||||
/// Are any of the output constraints `FixedReg` or `FixedTied`?
|
||||
pub fixed_outs: bool,
|
||||
|
||||
/// Are any of the input/output constraints `Tied` (but not `FixedTied`)?
|
||||
pub tied_ops: bool,
|
||||
|
||||
/// Does this instruction clobber the CPU flags?
|
||||
///
|
||||
/// When true, SSA values of type `iflags` or `fflags` can not be live across the instruction.
|
||||
pub clobbers_flags: bool,
|
||||
}
|
||||
|
||||
impl RecipeConstraints {
|
||||
/// Check that these constraints are satisfied by the operands on `inst`.
|
||||
pub fn satisfied(&self, inst: Inst, divert: &RegDiversions, func: &Function) -> bool {
|
||||
for (&arg, constraint) in func.dfg.inst_args(inst).iter().zip(self.ins) {
|
||||
let loc = divert.get(arg, &func.locations);
|
||||
|
||||
if let ConstraintKind::Tied(out_index) = constraint.kind {
|
||||
let out_val = func.dfg.inst_results(inst)[out_index as usize];
|
||||
let out_loc = func.locations[out_val];
|
||||
if loc != out_loc {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if !constraint.satisfied(loc) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
for (&arg, constraint) in func.dfg.inst_results(inst).iter().zip(self.outs) {
|
||||
let loc = divert.get(arg, &func.locations);
|
||||
if !constraint.satisfied(loc) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
/// Constraints on the range of a branch instruction.
|
||||
///
|
||||
/// A branch instruction usually encodes its destination as a signed n-bit offset from an origin.
|
||||
|
||||
@@ -1,221 +0,0 @@
|
||||
//! Support types for generated encoding tables.
|
||||
//!
|
||||
//! This module contains types and functions for working with the encoding tables generated by
|
||||
//! `cranelift-codegen/meta/src/gen_encodings.rs`.
|
||||
|
||||
use crate::constant_hash::Table;
|
||||
use crate::ir::{Function, InstructionData, Opcode, Type};
|
||||
use crate::isa::{Encoding, Legalize};
|
||||
use crate::settings::PredicateView;
|
||||
|
||||
/// A recipe predicate.
|
||||
///
|
||||
/// This is a predicate function capable of testing ISA and instruction predicates simultaneously.
|
||||
///
|
||||
/// A None predicate is always satisfied.
|
||||
pub type RecipePredicate = Option<fn(PredicateView, &InstructionData) -> bool>;
|
||||
|
||||
/// An instruction predicate.
|
||||
///
|
||||
/// This is a predicate function that needs to be tested in addition to the recipe predicate. It
|
||||
/// can't depend on ISA settings.
|
||||
pub type InstPredicate = fn(&Function, &InstructionData) -> bool;
|
||||
|
||||
/// Legalization action to perform when no encoding can be found for an instruction.
|
||||
///
|
||||
/// This is an index into an ISA-specific table of legalization actions.
|
||||
pub type LegalizeCode = u8;
|
||||
|
||||
/// Level 1 hash table entry.
|
||||
///
|
||||
/// One level 1 hash table is generated per CPU mode. This table is keyed by the controlling type
|
||||
/// variable, using `INVALID` for non-polymorphic instructions.
|
||||
///
|
||||
/// The hash table values are references to level 2 hash tables, encoded as an offset in `LEVEL2`
|
||||
/// where the table begins, and the binary logarithm of its length. All the level 2 hash tables
|
||||
/// have a power-of-two size.
|
||||
///
|
||||
/// Entries are generic over the offset type. It will typically be `u32` or `u16`, depending on the
|
||||
/// size of the `LEVEL2` table.
|
||||
///
|
||||
/// Empty entries are encoded with a `!0` value for `log2len` which will always be out of range.
|
||||
/// Entries that have a `legalize` value but no level 2 table have an `offset` field that is out of
|
||||
/// bounds.
|
||||
pub struct Level1Entry<OffT: Into<u32> + Copy> {
|
||||
pub ty: Type,
|
||||
pub log2len: u8,
|
||||
pub legalize: LegalizeCode,
|
||||
pub offset: OffT,
|
||||
}
|
||||
|
||||
impl<OffT: Into<u32> + Copy> Table<Type> for [Level1Entry<OffT>] {
|
||||
fn len(&self) -> usize {
|
||||
self.len()
|
||||
}
|
||||
|
||||
fn key(&self, idx: usize) -> Option<Type> {
|
||||
if self[idx].log2len != !0 {
|
||||
Some(self[idx].ty)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Level 2 hash table entry.
|
||||
///
|
||||
/// The second level hash tables are keyed by `Opcode`, and contain an offset into the `ENCLISTS`
|
||||
/// table where the encoding recipes for the instruction are stored.
|
||||
///
|
||||
/// Entries are generic over the offset type which depends on the size of `ENCLISTS`. A `u16`
|
||||
/// offset allows the entries to be only 32 bits each. There is no benefit to dropping down to `u8`
|
||||
/// for tiny ISAs. The entries won't shrink below 32 bits since the opcode is expected to be 16
|
||||
/// bits.
|
||||
///
|
||||
/// Empty entries are encoded with a `NotAnOpcode` `opcode` field.
|
||||
pub struct Level2Entry<OffT: Into<u32> + Copy> {
|
||||
pub opcode: Option<Opcode>,
|
||||
pub offset: OffT,
|
||||
}
|
||||
|
||||
impl<OffT: Into<u32> + Copy> Table<Opcode> for [Level2Entry<OffT>] {
|
||||
fn len(&self) -> usize {
|
||||
self.len()
|
||||
}
|
||||
|
||||
fn key(&self, idx: usize) -> Option<Opcode> {
|
||||
self[idx].opcode
|
||||
}
|
||||
}
|
||||
|
||||
/// Encoding list entry.
|
||||
///
|
||||
/// Encoding lists are represented as sequences of u16 words.
|
||||
pub type EncListEntry = u16;
|
||||
|
||||
/// Number of bits used to represent a predicate. c.f. `meta/src/gen_encodings.rs`.
|
||||
const PRED_BITS: u8 = 12;
|
||||
const PRED_MASK: usize = (1 << PRED_BITS) - 1;
|
||||
/// First code word representing a predicate check. c.f. `meta/src/gen_encodings.rs`.
|
||||
const PRED_START: usize = 0x1000;
|
||||
|
||||
/// An iterator over legal encodings for the instruction.
|
||||
pub struct Encodings<'a> {
|
||||
// Current offset into `enclist`, or out of bounds after we've reached the end.
|
||||
offset: usize,
|
||||
// Legalization code to use of no encoding is found.
|
||||
legalize: LegalizeCode,
|
||||
inst: &'a InstructionData,
|
||||
func: &'a Function,
|
||||
enclist: &'static [EncListEntry],
|
||||
legalize_actions: &'static [Legalize],
|
||||
recipe_preds: &'static [RecipePredicate],
|
||||
inst_preds: &'static [InstPredicate],
|
||||
isa_preds: PredicateView<'a>,
|
||||
}
|
||||
|
||||
impl<'a> Encodings<'a> {
|
||||
/// Creates a new instance of `Encodings`.
|
||||
///
|
||||
/// This iterator provides search for encodings that applies to the given instruction. The
|
||||
/// encoding lists are laid out such that first call to `next` returns valid entry in the list
|
||||
/// or `None`.
|
||||
pub fn new(
|
||||
offset: usize,
|
||||
legalize: LegalizeCode,
|
||||
inst: &'a InstructionData,
|
||||
func: &'a Function,
|
||||
enclist: &'static [EncListEntry],
|
||||
legalize_actions: &'static [Legalize],
|
||||
recipe_preds: &'static [RecipePredicate],
|
||||
inst_preds: &'static [InstPredicate],
|
||||
isa_preds: PredicateView<'a>,
|
||||
) -> Self {
|
||||
Encodings {
|
||||
offset,
|
||||
inst,
|
||||
func,
|
||||
legalize,
|
||||
isa_preds,
|
||||
recipe_preds,
|
||||
inst_preds,
|
||||
enclist,
|
||||
legalize_actions,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the legalization action that caused the enumeration of encodings to stop.
|
||||
/// This can be the default legalization action for the type or a custom code for the
|
||||
/// instruction.
|
||||
///
|
||||
/// This method must only be called after the iterator returns `None`.
|
||||
pub fn legalize(&self) -> Legalize {
|
||||
debug_assert_eq!(self.offset, !0, "Premature Encodings::legalize()");
|
||||
self.legalize_actions[self.legalize as usize]
|
||||
}
|
||||
|
||||
/// Check if the `rpred` recipe predicate is satisfied.
|
||||
fn check_recipe(&self, rpred: RecipePredicate) -> bool {
|
||||
match rpred {
|
||||
Some(p) => p(self.isa_preds, self.inst),
|
||||
None => true,
|
||||
}
|
||||
}
|
||||
|
||||
/// Check an instruction or isa predicate.
|
||||
fn check_pred(&self, pred: usize) -> bool {
|
||||
if let Some(&p) = self.inst_preds.get(pred) {
|
||||
p(self.func, self.inst)
|
||||
} else {
|
||||
let pred = pred - self.inst_preds.len();
|
||||
self.isa_preds.test(pred)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for Encodings<'a> {
|
||||
type Item = Encoding;
|
||||
|
||||
fn next(&mut self) -> Option<Encoding> {
|
||||
while let Some(entryref) = self.enclist.get(self.offset) {
|
||||
let entry = *entryref as usize;
|
||||
|
||||
// Check for "recipe+bits".
|
||||
let recipe = entry >> 1;
|
||||
if let Some(&rpred) = self.recipe_preds.get(recipe) {
|
||||
let bits = self.offset + 1;
|
||||
if entry & 1 == 0 {
|
||||
self.offset += 2; // Next entry.
|
||||
} else {
|
||||
self.offset = !0; // Stop.
|
||||
}
|
||||
if self.check_recipe(rpred) {
|
||||
return Some(Encoding::new(recipe as u16, self.enclist[bits]));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check for "stop with legalize".
|
||||
if entry < PRED_START {
|
||||
self.legalize = (entry - 2 * self.recipe_preds.len()) as LegalizeCode;
|
||||
self.offset = !0; // Stop.
|
||||
return None;
|
||||
}
|
||||
|
||||
// Finally, this must be a predicate entry.
|
||||
let pred_entry = entry - PRED_START;
|
||||
let skip = pred_entry >> PRED_BITS;
|
||||
let pred = pred_entry & PRED_MASK;
|
||||
|
||||
if self.check_pred(pred) {
|
||||
self.offset += 1;
|
||||
} else if skip == 0 {
|
||||
self.offset = !0; // Stop.
|
||||
return None;
|
||||
} else {
|
||||
self.offset += 1 + skip;
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
@@ -1,167 +0,0 @@
|
||||
//! The `Encoding` struct.
|
||||
|
||||
use crate::binemit::CodeOffset;
|
||||
use crate::ir::{Function, Inst};
|
||||
use crate::isa::constraints::{BranchRange, RecipeConstraints};
|
||||
use crate::regalloc::RegDiversions;
|
||||
use core::fmt;
|
||||
|
||||
#[cfg(feature = "enable-serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// Bits needed to encode an instruction as binary machine code.
|
||||
///
|
||||
/// The encoding consists of two parts, both specific to the target ISA: An encoding *recipe*, and
|
||||
/// encoding *bits*. The recipe determines the native instruction format and the mapping of
|
||||
/// operands to encoded bits. The encoding bits provide additional information to the recipe,
|
||||
/// typically parts of the opcode.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
|
||||
pub struct Encoding {
|
||||
recipe: u16,
|
||||
bits: u16,
|
||||
}
|
||||
|
||||
impl Encoding {
|
||||
/// Create a new `Encoding` containing `(recipe, bits)`.
|
||||
pub fn new(recipe: u16, bits: u16) -> Self {
|
||||
Self { recipe, bits }
|
||||
}
|
||||
|
||||
/// Get the recipe number in this encoding.
|
||||
pub fn recipe(self) -> usize {
|
||||
self.recipe as usize
|
||||
}
|
||||
|
||||
/// Get the recipe-specific encoding bits.
|
||||
pub fn bits(self) -> u16 {
|
||||
self.bits
|
||||
}
|
||||
|
||||
/// Is this a legal encoding, or the default placeholder?
|
||||
pub fn is_legal(self) -> bool {
|
||||
self != Self::default()
|
||||
}
|
||||
}
|
||||
|
||||
/// The default encoding is the illegal one.
|
||||
impl Default for Encoding {
|
||||
fn default() -> Self {
|
||||
Self::new(0xffff, 0xffff)
|
||||
}
|
||||
}
|
||||
|
||||
/// ISA-independent display of an encoding.
|
||||
impl fmt::Display for Encoding {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
if self.is_legal() {
|
||||
write!(f, "{}#{:02x}", self.recipe, self.bits)
|
||||
} else {
|
||||
write!(f, "-")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Temporary object that holds enough context to properly display an encoding.
|
||||
/// This is meant to be created by `EncInfo::display()`.
|
||||
pub struct DisplayEncoding {
|
||||
pub encoding: Encoding,
|
||||
pub recipe_names: &'static [&'static str],
|
||||
}
|
||||
|
||||
impl fmt::Display for DisplayEncoding {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
if self.encoding.is_legal() {
|
||||
write!(
|
||||
f,
|
||||
"{}#{:02x}",
|
||||
self.recipe_names[self.encoding.recipe()],
|
||||
self.encoding.bits
|
||||
)
|
||||
} else {
|
||||
write!(f, "-")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type SizeCalculatorFn = fn(&RecipeSizing, Encoding, Inst, &RegDiversions, &Function) -> u8;
|
||||
|
||||
/// Returns the base size of the Recipe, assuming it's fixed. This is the default for most
|
||||
/// encodings; others can be variable and longer than this base size, depending on the registers
|
||||
/// they're using and use a different function, specific per platform.
|
||||
pub fn base_size(
|
||||
sizing: &RecipeSizing,
|
||||
_: Encoding,
|
||||
_: Inst,
|
||||
_: &RegDiversions,
|
||||
_: &Function,
|
||||
) -> u8 {
|
||||
sizing.base_size
|
||||
}
|
||||
|
||||
/// Code size information for an encoding recipe.
|
||||
///
|
||||
/// Encoding recipes may have runtime-determined instruction size.
|
||||
pub struct RecipeSizing {
|
||||
/// Minimum size in bytes of instructions encoded with this recipe.
|
||||
pub base_size: u8,
|
||||
|
||||
/// Method computing the instruction's real size, given inputs and outputs.
|
||||
pub compute_size: SizeCalculatorFn,
|
||||
|
||||
/// Allowed branch range in this recipe, if any.
|
||||
///
|
||||
/// All encoding recipes for branches have exact branch range information.
|
||||
pub branch_range: Option<BranchRange>,
|
||||
}
|
||||
|
||||
/// Information about all the encodings in this ISA.
|
||||
#[derive(Clone)]
|
||||
pub struct EncInfo {
|
||||
/// Constraints on value operands per recipe.
|
||||
pub constraints: &'static [RecipeConstraints],
|
||||
|
||||
/// Code size information per recipe.
|
||||
pub sizing: &'static [RecipeSizing],
|
||||
|
||||
/// Names of encoding recipes.
|
||||
pub names: &'static [&'static str],
|
||||
}
|
||||
|
||||
impl EncInfo {
|
||||
/// Get the value operand constraints for `enc` if it is a legal encoding.
|
||||
pub fn operand_constraints(&self, enc: Encoding) -> Option<&'static RecipeConstraints> {
|
||||
self.constraints.get(enc.recipe())
|
||||
}
|
||||
|
||||
/// Create an object that can display an ISA-dependent encoding properly.
|
||||
pub fn display(&self, enc: Encoding) -> DisplayEncoding {
|
||||
DisplayEncoding {
|
||||
encoding: enc,
|
||||
recipe_names: self.names,
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the size in bytes of `inst`, if it were encoded with `enc`.
|
||||
///
|
||||
/// Returns 0 for illegal encodings.
|
||||
pub fn byte_size(
|
||||
&self,
|
||||
enc: Encoding,
|
||||
inst: Inst,
|
||||
divert: &RegDiversions,
|
||||
func: &Function,
|
||||
) -> CodeOffset {
|
||||
self.sizing.get(enc.recipe()).map_or(0, |s| {
|
||||
let compute_size = s.compute_size;
|
||||
CodeOffset::from(compute_size(&s, enc, inst, divert, func))
|
||||
})
|
||||
}
|
||||
|
||||
/// Get the branch range that is supported by `enc`, if any.
|
||||
///
|
||||
/// This will never return `None` for a legal branch encoding.
|
||||
pub fn branch_range(&self, enc: Encoding) -> Option<BranchRange> {
|
||||
self.sizing.get(enc.recipe()).and_then(|s| s.branch_range)
|
||||
}
|
||||
}
|
||||
@@ -44,26 +44,19 @@
|
||||
//! concurrent function compilations.
|
||||
|
||||
pub use crate::isa::call_conv::CallConv;
|
||||
pub use crate::isa::constraints::{
|
||||
BranchRange, ConstraintKind, OperandConstraint, RecipeConstraints,
|
||||
};
|
||||
pub use crate::isa::enc_tables::Encodings;
|
||||
pub use crate::isa::encoding::{base_size, EncInfo, Encoding};
|
||||
pub use crate::isa::constraints::{BranchRange, ConstraintKind, OperandConstraint};
|
||||
pub use crate::isa::registers::{regs_overlap, RegClass, RegClassIndex, RegInfo, RegUnit};
|
||||
pub use crate::isa::stack::{StackBase, StackBaseMask, StackRef};
|
||||
|
||||
use crate::binemit;
|
||||
use crate::flowgraph;
|
||||
use crate::ir;
|
||||
#[cfg(feature = "unwind")]
|
||||
use crate::isa::unwind::systemv::RegisterMappingError;
|
||||
use crate::machinst::{MachBackend, UnwindInfoKind};
|
||||
use crate::regalloc;
|
||||
use crate::result::CodegenResult;
|
||||
use crate::settings;
|
||||
use crate::settings::SetResult;
|
||||
use crate::timing;
|
||||
use alloc::{borrow::Cow, boxed::Box, vec::Vec};
|
||||
use alloc::{boxed::Box, vec::Vec};
|
||||
use core::any::Any;
|
||||
use core::fmt;
|
||||
use core::fmt::{Debug, Formatter};
|
||||
@@ -88,8 +81,6 @@ pub mod unwind;
|
||||
|
||||
mod call_conv;
|
||||
mod constraints;
|
||||
mod enc_tables;
|
||||
mod encoding;
|
||||
pub mod registers;
|
||||
mod stack;
|
||||
|
||||
@@ -329,125 +320,6 @@ pub trait TargetIsa: fmt::Display + Send + Sync {
|
||||
Err(RegisterMappingError::UnsupportedArchitecture)
|
||||
}
|
||||
|
||||
/// Returns an iterator over legal encodings for the instruction.
|
||||
fn legal_encodings<'a>(
|
||||
&'a self,
|
||||
func: &'a ir::Function,
|
||||
inst: &'a ir::InstructionData,
|
||||
ctrl_typevar: ir::Type,
|
||||
) -> Encodings<'a>;
|
||||
|
||||
/// Encode an instruction after determining it is legal.
|
||||
///
|
||||
/// If `inst` can legally be encoded in this ISA, produce the corresponding `Encoding` object.
|
||||
/// Otherwise, return `Legalize` action.
|
||||
///
|
||||
/// This is also the main entry point for determining if an instruction is legal.
|
||||
fn encode(
|
||||
&self,
|
||||
func: &ir::Function,
|
||||
inst: &ir::InstructionData,
|
||||
ctrl_typevar: ir::Type,
|
||||
) -> Result<Encoding, Legalize> {
|
||||
let mut iter = self.legal_encodings(func, inst, ctrl_typevar);
|
||||
iter.next().ok_or_else(|| iter.legalize())
|
||||
}
|
||||
|
||||
/// Get a data structure describing the instruction encodings in this ISA.
|
||||
fn encoding_info(&self) -> EncInfo;
|
||||
|
||||
/// Legalize a function signature.
|
||||
///
|
||||
/// This is used to legalize both the signature of the function being compiled and any called
|
||||
/// functions. The signature should be modified by adding `ArgumentLoc` annotations to all
|
||||
/// arguments and return values.
|
||||
///
|
||||
/// Arguments with types that are not supported by the ABI can be expanded into multiple
|
||||
/// arguments:
|
||||
///
|
||||
/// - Integer types that are too large to fit in a register can be broken into multiple
|
||||
/// arguments of a smaller integer type.
|
||||
/// - Floating point types can be bit-cast to an integer type of the same size, and possible
|
||||
/// broken into smaller integer types.
|
||||
/// - Vector types can be bit-cast and broken down into smaller vectors or scalars.
|
||||
///
|
||||
/// The legalizer will adapt argument and return values as necessary at all ABI boundaries.
|
||||
///
|
||||
/// When this function is called to legalize the signature of the function currently being
|
||||
/// compiled, `current` is true. The legalized signature can then also contain special purpose
|
||||
/// arguments and return values such as:
|
||||
///
|
||||
/// - A `link` argument representing the link registers on RISC architectures that don't push
|
||||
/// the return address on the stack.
|
||||
/// - A `link` return value which will receive the value that was passed to the `link`
|
||||
/// argument.
|
||||
/// - An `sret` argument can be added if one wasn't present already. This is necessary if the
|
||||
/// signature returns more values than registers are available for returning values.
|
||||
/// - An `sret` return value can be added if the ABI requires a function to return its `sret`
|
||||
/// argument in a register.
|
||||
///
|
||||
/// Arguments and return values for the caller's frame pointer and other callee-saved registers
|
||||
/// should not be added by this function. These arguments are not added until after register
|
||||
/// allocation.
|
||||
fn legalize_signature(&self, sig: &mut Cow<ir::Signature>, current: bool);
|
||||
|
||||
/// Get the register class that should be used to represent an ABI argument or return value of
|
||||
/// type `ty`. This should be the top-level register class that contains the argument
|
||||
/// registers.
|
||||
///
|
||||
/// This function can assume that it will only be asked to provide register classes for types
|
||||
/// that `legalize_signature()` produces in `ArgumentLoc::Reg` entries.
|
||||
fn regclass_for_abi_type(&self, ty: ir::Type) -> RegClass;
|
||||
|
||||
/// Get the set of allocatable registers that can be used when compiling `func`.
|
||||
///
|
||||
/// This set excludes reserved registers like the stack pointer and other special-purpose
|
||||
/// registers.
|
||||
fn allocatable_registers(&self, func: &ir::Function) -> regalloc::RegisterSet;
|
||||
|
||||
/// Compute the stack layout and insert prologue and epilogue code into `func`.
|
||||
///
|
||||
/// Return an error if the stack frame is too large.
|
||||
fn prologue_epilogue(&self, func: &mut ir::Function) -> CodegenResult<()> {
|
||||
let _tt = timing::prologue_epilogue();
|
||||
// This default implementation is unlikely to be good enough.
|
||||
use crate::ir::stackslot::{StackOffset, StackSize};
|
||||
use crate::stack_layout::layout_stack;
|
||||
|
||||
let word_size = StackSize::from(self.pointer_bytes());
|
||||
|
||||
// Account for the SpiderMonkey standard prologue pushes.
|
||||
if func.signature.call_conv.extends_baldrdash() {
|
||||
let bytes = StackSize::from(self.flags().baldrdash_prologue_words()) * word_size;
|
||||
let mut ss = ir::StackSlotData::new(ir::StackSlotKind::IncomingArg, bytes);
|
||||
ss.offset = Some(-(bytes as StackOffset));
|
||||
func.stack_slots.push(ss);
|
||||
}
|
||||
|
||||
let is_leaf = func.is_leaf();
|
||||
layout_stack(&mut func.stack_slots, is_leaf, word_size)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Emit binary machine code for a single instruction into the `sink` trait object.
|
||||
///
|
||||
/// Note that this will call `put*` methods on the `sink` trait object via its vtable which
|
||||
/// is not the fastest way of emitting code.
|
||||
///
|
||||
/// This function is under the "testing_hooks" feature, and is only suitable for use by
|
||||
/// test harnesses. It increases code size, and is inefficient.
|
||||
#[cfg(feature = "testing_hooks")]
|
||||
fn emit_inst(
|
||||
&self,
|
||||
func: &ir::Function,
|
||||
inst: ir::Inst,
|
||||
divert: &mut regalloc::RegDiversions,
|
||||
sink: &mut dyn binemit::CodeSink,
|
||||
);
|
||||
|
||||
/// Emit a whole function into memory.
|
||||
fn emit_function_to_memory(&self, func: &ir::Function, sink: &mut binemit::MemoryCodeSink);
|
||||
|
||||
/// IntCC condition for Unsigned Addition Overflow (Carry).
|
||||
fn unsigned_add_overflow_condition(&self) -> ir::condcodes::IntCC;
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,40 +0,0 @@
|
||||
//! Expanding instructions as runtime library calls.
|
||||
|
||||
use crate::ir;
|
||||
use crate::ir::{libcall::get_libcall_funcref, InstBuilder};
|
||||
use crate::isa::{CallConv, TargetIsa};
|
||||
use crate::legalizer::boundary::legalize_libcall_signature;
|
||||
use alloc::vec::Vec;
|
||||
|
||||
/// Try to expand `inst` as a library call, returning true is successful.
|
||||
pub fn expand_as_libcall(inst: ir::Inst, func: &mut ir::Function, isa: &dyn TargetIsa) -> bool {
|
||||
// Does the opcode/ctrl_type combo even have a well-known runtime library name.
|
||||
let libcall = match ir::LibCall::for_inst(func.dfg[inst].opcode(), func.dfg.ctrl_typevar(inst))
|
||||
{
|
||||
Some(lc) => lc,
|
||||
None => return false,
|
||||
};
|
||||
|
||||
// Now we convert `inst` to a call. First save the arguments.
|
||||
let mut args = Vec::new();
|
||||
args.extend_from_slice(func.dfg.inst_args(inst));
|
||||
|
||||
let call_conv = CallConv::for_libcall(isa.flags(), isa.default_call_conv());
|
||||
if call_conv.extends_baldrdash() {
|
||||
let vmctx = func
|
||||
.special_param(ir::ArgumentPurpose::VMContext)
|
||||
.expect("Missing vmctx parameter for baldrdash libcall");
|
||||
args.push(vmctx);
|
||||
}
|
||||
|
||||
// The replace builder will preserve the instruction result values.
|
||||
let funcref = get_libcall_funcref(libcall, call_conv, func, inst, isa);
|
||||
func.dfg.replace(inst).call(funcref, &args);
|
||||
|
||||
// Ask the ISA to legalize the signature.
|
||||
let fn_data = &func.dfg.ext_funcs[funcref];
|
||||
let sig_data = &mut func.dfg.signatures[fn_data.signature];
|
||||
legalize_libcall_signature(sig_data, isa);
|
||||
|
||||
true
|
||||
}
|
||||
@@ -19,179 +19,14 @@ use crate::ir::types::I32;
|
||||
use crate::ir::{self, InstBuilder, MemFlags};
|
||||
use crate::isa::TargetIsa;
|
||||
|
||||
use crate::timing;
|
||||
use alloc::collections::BTreeSet;
|
||||
|
||||
mod boundary;
|
||||
mod globalvalue;
|
||||
mod heap;
|
||||
mod libcall;
|
||||
mod split;
|
||||
mod table;
|
||||
|
||||
use self::globalvalue::expand_global_value;
|
||||
use self::heap::expand_heap_addr;
|
||||
pub(crate) use self::libcall::expand_as_libcall;
|
||||
use self::table::expand_table_addr;
|
||||
|
||||
enum LegalizeInstResult {
|
||||
Done,
|
||||
Legalized,
|
||||
SplitLegalizePending,
|
||||
}
|
||||
|
||||
/// Legalize `inst` for `isa`.
|
||||
fn legalize_inst(
|
||||
inst: ir::Inst,
|
||||
pos: &mut FuncCursor,
|
||||
cfg: &mut ControlFlowGraph,
|
||||
isa: &dyn TargetIsa,
|
||||
) -> LegalizeInstResult {
|
||||
let opcode = pos.func.dfg[inst].opcode();
|
||||
|
||||
// Check for ABI boundaries that need to be converted to the legalized signature.
|
||||
if opcode.is_call() {
|
||||
if boundary::handle_call_abi(isa, inst, pos.func, cfg) {
|
||||
return LegalizeInstResult::Legalized;
|
||||
}
|
||||
} else if opcode.is_return() {
|
||||
if boundary::handle_return_abi(inst, pos.func, cfg) {
|
||||
return LegalizeInstResult::Legalized;
|
||||
}
|
||||
} else if opcode.is_branch() {
|
||||
split::simplify_branch_arguments(&mut pos.func.dfg, inst);
|
||||
} else if opcode == ir::Opcode::Isplit {
|
||||
pos.use_srcloc(inst);
|
||||
|
||||
let arg = match pos.func.dfg[inst] {
|
||||
ir::InstructionData::Unary { arg, .. } => pos.func.dfg.resolve_aliases(arg),
|
||||
_ => panic!("Expected isplit: {}", pos.func.dfg.display_inst(inst, None)),
|
||||
};
|
||||
|
||||
match pos.func.dfg.value_def(arg) {
|
||||
ir::ValueDef::Result(inst, _num) => {
|
||||
if let ir::InstructionData::Binary {
|
||||
opcode: ir::Opcode::Iconcat,
|
||||
..
|
||||
} = pos.func.dfg[inst]
|
||||
{
|
||||
// `arg` was created by an `iconcat` instruction.
|
||||
} else {
|
||||
// `arg` was not created by an `iconcat` instruction. Don't try to resolve it,
|
||||
// as otherwise `split::isplit` will re-insert the original `isplit`, causing
|
||||
// an endless loop.
|
||||
return LegalizeInstResult::SplitLegalizePending;
|
||||
}
|
||||
}
|
||||
ir::ValueDef::Param(_block, _num) => {}
|
||||
}
|
||||
|
||||
let res = pos.func.dfg.inst_results(inst).to_vec();
|
||||
assert_eq!(res.len(), 2);
|
||||
let (resl, resh) = (res[0], res[1]); // Prevent borrowck error
|
||||
|
||||
// Remove old isplit
|
||||
pos.func.dfg.clear_results(inst);
|
||||
pos.remove_inst();
|
||||
|
||||
let curpos = pos.position();
|
||||
let srcloc = pos.srcloc();
|
||||
let (xl, xh) = split::isplit(pos.func, cfg, curpos, srcloc, arg);
|
||||
|
||||
pos.func.dfg.change_to_alias(resl, xl);
|
||||
pos.func.dfg.change_to_alias(resh, xh);
|
||||
|
||||
return LegalizeInstResult::Legalized;
|
||||
}
|
||||
|
||||
match pos.func.update_encoding(inst, isa) {
|
||||
Ok(()) => LegalizeInstResult::Done,
|
||||
Err(action) => {
|
||||
// We should transform the instruction into legal equivalents.
|
||||
// If the current instruction was replaced, we need to double back and revisit
|
||||
// the expanded sequence. This is both to assign encodings and possible to
|
||||
// expand further.
|
||||
// There's a risk of infinite looping here if the legalization patterns are
|
||||
// unsound. Should we attempt to detect that?
|
||||
if action(inst, pos.func, cfg, isa) {
|
||||
return LegalizeInstResult::Legalized;
|
||||
}
|
||||
|
||||
// We don't have any pattern expansion for this instruction either.
|
||||
// Try converting it to a library call as a last resort.
|
||||
if expand_as_libcall(inst, pos.func, isa) {
|
||||
LegalizeInstResult::Legalized
|
||||
} else {
|
||||
LegalizeInstResult::Done
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Legalize `func` for `isa`.
|
||||
///
|
||||
/// - Transform any instructions that don't have a legal representation in `isa`.
|
||||
/// - Fill out `func.encodings`.
|
||||
///
|
||||
pub fn legalize_function(func: &mut ir::Function, cfg: &mut ControlFlowGraph, isa: &dyn TargetIsa) {
|
||||
let _tt = timing::legalize();
|
||||
debug_assert!(cfg.is_valid());
|
||||
|
||||
boundary::legalize_signatures(func, isa);
|
||||
|
||||
func.encodings.resize(func.dfg.num_insts());
|
||||
|
||||
let mut pos = FuncCursor::new(func);
|
||||
let func_begin = pos.position();
|
||||
|
||||
// Split block params before trying to legalize instructions, so that the newly introduced
|
||||
// isplit instructions get legalized.
|
||||
while let Some(block) = pos.next_block() {
|
||||
split::split_block_params(pos.func, cfg, block);
|
||||
}
|
||||
|
||||
pos.set_position(func_begin);
|
||||
|
||||
// This must be a set to prevent trying to legalize `isplit` and `vsplit` twice in certain cases.
|
||||
let mut pending_splits = BTreeSet::new();
|
||||
|
||||
// Process blocks in layout order. Some legalization actions may split the current block or append
|
||||
// new ones to the end. We need to make sure we visit those new blocks too.
|
||||
while let Some(_block) = pos.next_block() {
|
||||
// Keep track of the cursor position before the instruction being processed, so we can
|
||||
// double back when replacing instructions.
|
||||
let mut prev_pos = pos.position();
|
||||
|
||||
while let Some(inst) = pos.next_inst() {
|
||||
match legalize_inst(inst, &mut pos, cfg, isa) {
|
||||
// Remember this position in case we need to double back.
|
||||
LegalizeInstResult::Done => prev_pos = pos.position(),
|
||||
|
||||
// Go back and legalize the inserted return value conversion instructions.
|
||||
LegalizeInstResult::Legalized => pos.set_position(prev_pos),
|
||||
|
||||
// The argument of a `isplit` or `vsplit` instruction didn't resolve to a
|
||||
// `iconcat` or `vconcat` instruction. Try again after legalizing the rest of
|
||||
// the instructions.
|
||||
LegalizeInstResult::SplitLegalizePending => {
|
||||
pending_splits.insert(inst);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Try legalizing `isplit` and `vsplit` instructions, which could not previously be legalized.
|
||||
for inst in pending_splits {
|
||||
pos.goto_inst(inst);
|
||||
legalize_inst(inst, &mut pos, cfg, isa);
|
||||
}
|
||||
|
||||
// Now that we've lowered all br_tables, we don't need the jump tables anymore.
|
||||
if !isa.flags().enable_jump_tables() {
|
||||
pos.func.jump_tables.clear();
|
||||
}
|
||||
}
|
||||
|
||||
/// Perform a simple legalization by expansion of the function, without
|
||||
/// platform-specific transforms.
|
||||
pub fn simple_legalize(func: &mut ir::Function, cfg: &mut ControlFlowGraph, isa: &dyn TargetIsa) {
|
||||
|
||||
@@ -1,405 +0,0 @@
|
||||
//! Value splitting.
|
||||
//!
|
||||
//! Some value types are too large to fit in registers, so they need to be split into smaller parts
|
||||
//! that the ISA can operate on. There's two dimensions of splitting, represented by two
|
||||
//! complementary instruction pairs:
|
||||
//!
|
||||
//! - `isplit` and `iconcat` for splitting integer types into smaller integers.
|
||||
//! - `vsplit` and `vconcat` for splitting vector types into smaller vector types with the same
|
||||
//! lane types.
|
||||
//!
|
||||
//! There is no floating point splitting. If an ISA doesn't support `f64` values, they probably
|
||||
//! have to be bit-cast to `i64` and possibly split into two `i32` values that fit in registers.
|
||||
//! This breakdown is handled by the ABI lowering.
|
||||
//!
|
||||
//! When legalizing a single instruction, it is wrapped in splits and concatenations:
|
||||
//!
|
||||
//! ```clif
|
||||
//! v1 = bxor.i64 v2, v3
|
||||
//! ```
|
||||
//!
|
||||
//! becomes:
|
||||
//!
|
||||
//! ```clif
|
||||
//! v20, v21 = isplit v2
|
||||
//! v30, v31 = isplit v3
|
||||
//! v10 = bxor.i32 v20, v30
|
||||
//! v11 = bxor.i32 v21, v31
|
||||
//! v1 = iconcat v10, v11
|
||||
//! ```
|
||||
//!
|
||||
//! This local expansion approach still leaves the original `i64` values in the code as operands on
|
||||
//! the `split` and `concat` instructions. It also creates a lot of redundant code to clean up as
|
||||
//! values are constantly split and concatenated.
|
||||
//!
|
||||
//! # Optimized splitting
|
||||
//!
|
||||
//! We can eliminate a lot of the splitting code quite easily. Whenever we need to split a value,
|
||||
//! first check if the value is defined by the corresponding concatenation. If so, then just use
|
||||
//! the two concatenation inputs directly:
|
||||
//!
|
||||
//! ```clif
|
||||
//! v4 = iadd_imm.i64 v1, 1
|
||||
//! ```
|
||||
//!
|
||||
//! becomes, using the expanded code from above:
|
||||
//!
|
||||
//! ```clif
|
||||
//! v40, v5 = iadd_imm_cout.i32 v10, 1
|
||||
//! v6 = bint.i32
|
||||
//! v41 = iadd.i32 v11, v6
|
||||
//! v4 = iconcat v40, v41
|
||||
//! ```
|
||||
//!
|
||||
//! This means that the `iconcat` instructions defining `v1` and `v4` end up with no uses, so they
|
||||
//! can be trivially deleted by a dead code elimination pass.
|
||||
//!
|
||||
//! # block arguments
|
||||
//!
|
||||
//! If all instructions that produce an `i64` value are legalized as above, we will eventually end
|
||||
//! up with no `i64` values anywhere, except for block arguments. We can work around this by
|
||||
//! iteratively splitting block arguments too. That should leave us with no illegal value types
|
||||
//! anywhere.
|
||||
//!
|
||||
//! It is possible to have circular dependencies of block arguments that are never used by any real
|
||||
//! instructions. These loops will remain in the program.
|
||||
|
||||
use crate::cursor::{Cursor, CursorPosition, FuncCursor};
|
||||
use crate::flowgraph::{BlockPredecessor, ControlFlowGraph};
|
||||
use crate::ir::{self, Block, Inst, InstBuilder, InstructionData, Opcode, Type, Value, ValueDef};
|
||||
use alloc::vec::Vec;
|
||||
use core::iter;
|
||||
use smallvec::SmallVec;
|
||||
|
||||
/// Split `value` into two values using the `isplit` semantics. Do this by reusing existing values
|
||||
/// if possible.
|
||||
pub fn isplit(
|
||||
func: &mut ir::Function,
|
||||
cfg: &ControlFlowGraph,
|
||||
pos: CursorPosition,
|
||||
srcloc: ir::SourceLoc,
|
||||
value: Value,
|
||||
) -> (Value, Value) {
|
||||
split_any(func, cfg, pos, srcloc, value, Opcode::Iconcat)
|
||||
}
|
||||
|
||||
/// Split `value` into halves using the `vsplit` semantics. Do this by reusing existing values if
|
||||
/// possible.
|
||||
pub fn vsplit(
|
||||
func: &mut ir::Function,
|
||||
cfg: &ControlFlowGraph,
|
||||
pos: CursorPosition,
|
||||
srcloc: ir::SourceLoc,
|
||||
value: Value,
|
||||
) -> (Value, Value) {
|
||||
split_any(func, cfg, pos, srcloc, value, Opcode::Vconcat)
|
||||
}
|
||||
|
||||
/// After splitting a block argument, we need to go back and fix up all of the predecessor
|
||||
/// instructions. This is potentially a recursive operation, but we don't implement it recursively
|
||||
/// since that could use up too muck stack.
|
||||
///
|
||||
/// Instead, the repairs are deferred and placed on a work list in stack form.
|
||||
struct Repair {
|
||||
concat: Opcode,
|
||||
// The argument type after splitting.
|
||||
split_type: Type,
|
||||
// The destination block whose arguments have been split.
|
||||
block: Block,
|
||||
// Number of the original block argument which has been replaced by the low part.
|
||||
num: usize,
|
||||
// Number of the new block argument which represents the high part after the split.
|
||||
hi_num: usize,
|
||||
}
|
||||
|
||||
/// Generic version of `isplit` and `vsplit` controlled by the `concat` opcode.
|
||||
fn split_any(
|
||||
func: &mut ir::Function,
|
||||
cfg: &ControlFlowGraph,
|
||||
pos: CursorPosition,
|
||||
srcloc: ir::SourceLoc,
|
||||
value: Value,
|
||||
concat: Opcode,
|
||||
) -> (Value, Value) {
|
||||
let mut repairs = Vec::new();
|
||||
let pos = &mut FuncCursor::new(func).at_position(pos).with_srcloc(srcloc);
|
||||
let result = split_value(pos, value, concat, &mut repairs);
|
||||
|
||||
perform_repairs(pos, cfg, repairs);
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
pub fn split_block_params(func: &mut ir::Function, cfg: &ControlFlowGraph, block: Block) {
|
||||
let pos = &mut FuncCursor::new(func).at_top(block);
|
||||
let block_params = pos.func.dfg.block_params(block);
|
||||
|
||||
// Add further splittable types here.
|
||||
fn type_requires_splitting(ty: Type) -> bool {
|
||||
ty == ir::types::I128
|
||||
}
|
||||
|
||||
// A shortcut. If none of the param types require splitting, exit now. This helps because
|
||||
// the loop below necessarily has to copy the block params into a new vector, so it's better to
|
||||
// avoid doing so when possible.
|
||||
if !block_params
|
||||
.iter()
|
||||
.any(|block_param| type_requires_splitting(pos.func.dfg.value_type(*block_param)))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
let mut repairs = Vec::new();
|
||||
for (num, block_param) in block_params.to_vec().into_iter().enumerate() {
|
||||
if !type_requires_splitting(pos.func.dfg.value_type(block_param)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
split_block_param(pos, block, num, block_param, Opcode::Iconcat, &mut repairs);
|
||||
}
|
||||
|
||||
perform_repairs(pos, cfg, repairs);
|
||||
}
|
||||
|
||||
fn perform_repairs(pos: &mut FuncCursor, cfg: &ControlFlowGraph, mut repairs: Vec<Repair>) {
|
||||
// We have split the value requested, and now we may need to fix some block predecessors.
|
||||
while let Some(repair) = repairs.pop() {
|
||||
for BlockPredecessor { inst, .. } in cfg.pred_iter(repair.block) {
|
||||
let branch_opc = pos.func.dfg[inst].opcode();
|
||||
debug_assert!(
|
||||
branch_opc.is_branch(),
|
||||
"Predecessor not a branch: {}",
|
||||
pos.func.dfg.display_inst(inst, None)
|
||||
);
|
||||
let num_fixed_args = branch_opc.constraints().num_fixed_value_arguments();
|
||||
let mut args = pos.func.dfg[inst]
|
||||
.take_value_list()
|
||||
.expect("Branches must have value lists.");
|
||||
let num_args = args.len(&pos.func.dfg.value_lists);
|
||||
// Get the old value passed to the block argument we're repairing.
|
||||
let old_arg = args
|
||||
.get(num_fixed_args + repair.num, &pos.func.dfg.value_lists)
|
||||
.expect("Too few branch arguments");
|
||||
|
||||
// It's possible that the CFG's predecessor list has duplicates. Detect them here.
|
||||
if pos.func.dfg.value_type(old_arg) == repair.split_type {
|
||||
pos.func.dfg[inst].put_value_list(args);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Split the old argument, possibly causing more repairs to be scheduled.
|
||||
pos.goto_inst(inst);
|
||||
|
||||
let inst_block = pos.func.layout.inst_block(inst).expect("inst in block");
|
||||
|
||||
// Insert split values prior to the terminal branch group.
|
||||
let canonical = pos
|
||||
.func
|
||||
.layout
|
||||
.canonical_branch_inst(&pos.func.dfg, inst_block);
|
||||
if let Some(first_branch) = canonical {
|
||||
pos.goto_inst(first_branch);
|
||||
}
|
||||
|
||||
let (lo, hi) = split_value(pos, old_arg, repair.concat, &mut repairs);
|
||||
|
||||
// The `lo` part replaces the original argument.
|
||||
*args
|
||||
.get_mut(num_fixed_args + repair.num, &mut pos.func.dfg.value_lists)
|
||||
.unwrap() = lo;
|
||||
|
||||
// The `hi` part goes at the end. Since multiple repairs may have been scheduled to the
|
||||
// same block, there could be multiple arguments missing.
|
||||
if num_args > num_fixed_args + repair.hi_num {
|
||||
*args
|
||||
.get_mut(
|
||||
num_fixed_args + repair.hi_num,
|
||||
&mut pos.func.dfg.value_lists,
|
||||
)
|
||||
.unwrap() = hi;
|
||||
} else {
|
||||
// We need to append one or more arguments. If we're adding more than one argument,
|
||||
// there must be pending repairs on the stack that will fill in the correct values
|
||||
// instead of `hi`.
|
||||
args.extend(
|
||||
iter::repeat(hi).take(1 + num_fixed_args + repair.hi_num - num_args),
|
||||
&mut pos.func.dfg.value_lists,
|
||||
);
|
||||
}
|
||||
|
||||
// Put the value list back after manipulating it.
|
||||
pos.func.dfg[inst].put_value_list(args);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Split a single value using the integer or vector semantics given by the `concat` opcode.
|
||||
///
|
||||
/// If the value is defined by a `concat` instruction, just reuse the operand values of that
|
||||
/// instruction.
|
||||
///
|
||||
/// Return the two new values representing the parts of `value`.
|
||||
fn split_value(
|
||||
pos: &mut FuncCursor,
|
||||
value: Value,
|
||||
concat: Opcode,
|
||||
repairs: &mut Vec<Repair>,
|
||||
) -> (Value, Value) {
|
||||
let value = pos.func.dfg.resolve_aliases(value);
|
||||
let mut reuse = None;
|
||||
|
||||
match pos.func.dfg.value_def(value) {
|
||||
ValueDef::Result(inst, num) => {
|
||||
// This is an instruction result. See if the value was created by a `concat`
|
||||
// instruction.
|
||||
if let InstructionData::Binary { opcode, args, .. } = pos.func.dfg[inst] {
|
||||
debug_assert_eq!(num, 0);
|
||||
if opcode == concat {
|
||||
reuse = Some((args[0], args[1]));
|
||||
}
|
||||
}
|
||||
}
|
||||
ValueDef::Param(block, num) => {
|
||||
// This is a block parameter.
|
||||
// We can split the parameter value unless this is the entry block.
|
||||
if pos.func.layout.entry_block() != Some(block) {
|
||||
reuse = Some(split_block_param(pos, block, num, value, concat, repairs));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Did the code above succeed in finding values we can reuse?
|
||||
if let Some(pair) = reuse {
|
||||
pair
|
||||
} else {
|
||||
// No, we'll just have to insert the requested split instruction at `pos`. Note that `pos`
|
||||
// has not been moved by the block argument code above when `reuse` is `None`.
|
||||
match concat {
|
||||
Opcode::Iconcat => pos.ins().isplit(value),
|
||||
Opcode::Vconcat => pos.ins().vsplit(value),
|
||||
_ => panic!("Unhandled concat opcode: {}", concat),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn split_block_param(
|
||||
pos: &mut FuncCursor,
|
||||
block: Block,
|
||||
param_num: usize,
|
||||
value: Value,
|
||||
concat: Opcode,
|
||||
repairs: &mut Vec<Repair>,
|
||||
) -> (Value, Value) {
|
||||
// We are going to replace the parameter at `num` with two new arguments.
|
||||
// Determine the new value types.
|
||||
let ty = pos.func.dfg.value_type(value);
|
||||
let split_type = match concat {
|
||||
Opcode::Iconcat => ty.half_width().expect("Invalid type for isplit"),
|
||||
Opcode::Vconcat => ty.half_vector().expect("Invalid type for vsplit"),
|
||||
_ => panic!("Unhandled concat opcode: {}", concat),
|
||||
};
|
||||
|
||||
// Since the `repairs` stack potentially contains other parameter numbers for
|
||||
// `block`, avoid shifting and renumbering block parameters. It could invalidate other
|
||||
// `repairs` entries.
|
||||
//
|
||||
// Replace the original `value` with the low part, and append the high part at the
|
||||
// end of the argument list.
|
||||
let lo = pos.func.dfg.replace_block_param(value, split_type);
|
||||
let hi_num = pos.func.dfg.num_block_params(block);
|
||||
let hi = pos.func.dfg.append_block_param(block, split_type);
|
||||
|
||||
// Now the original value is dangling. Insert a concatenation instruction that can
|
||||
// compute it from the two new parameters. This also serves as a record of what we
|
||||
// did so a future call to this function doesn't have to redo the work.
|
||||
//
|
||||
// Note that it is safe to move `pos` here since `reuse` was set above, so we don't
|
||||
// need to insert a split instruction before returning.
|
||||
pos.goto_first_inst(block);
|
||||
pos.ins()
|
||||
.with_result(value)
|
||||
.Binary(concat, split_type, lo, hi);
|
||||
|
||||
// Finally, splitting the block parameter is not enough. We also have to repair all
|
||||
// of the predecessor instructions that branch here.
|
||||
add_repair(concat, split_type, block, param_num, hi_num, repairs);
|
||||
|
||||
(lo, hi)
|
||||
}
|
||||
|
||||
// Add a repair entry to the work list.
|
||||
fn add_repair(
|
||||
concat: Opcode,
|
||||
split_type: Type,
|
||||
block: Block,
|
||||
num: usize,
|
||||
hi_num: usize,
|
||||
repairs: &mut Vec<Repair>,
|
||||
) {
|
||||
repairs.push(Repair {
|
||||
concat,
|
||||
split_type,
|
||||
block,
|
||||
num,
|
||||
hi_num,
|
||||
});
|
||||
}
|
||||
|
||||
/// Strip concat-split chains. Return a simpler way of computing the same value.
|
||||
///
|
||||
/// Given this input:
|
||||
///
|
||||
/// ```clif
|
||||
/// v10 = iconcat v1, v2
|
||||
/// v11, v12 = isplit v10
|
||||
/// ```
|
||||
///
|
||||
/// This function resolves `v11` to `v1` and `v12` to `v2`.
|
||||
fn resolve_splits(dfg: &ir::DataFlowGraph, value: Value) -> Value {
|
||||
let value = dfg.resolve_aliases(value);
|
||||
|
||||
// Deconstruct a split instruction.
|
||||
let split_res;
|
||||
let concat_opc;
|
||||
let split_arg;
|
||||
if let ValueDef::Result(inst, num) = dfg.value_def(value) {
|
||||
split_res = num;
|
||||
concat_opc = match dfg[inst].opcode() {
|
||||
Opcode::Isplit => Opcode::Iconcat,
|
||||
Opcode::Vsplit => Opcode::Vconcat,
|
||||
_ => return value,
|
||||
};
|
||||
split_arg = dfg.inst_args(inst)[0];
|
||||
} else {
|
||||
return value;
|
||||
}
|
||||
|
||||
// See if split_arg is defined by a concatenation instruction.
|
||||
if let ValueDef::Result(inst, _) = dfg.value_def(split_arg) {
|
||||
if dfg[inst].opcode() == concat_opc {
|
||||
return dfg.inst_args(inst)[split_res];
|
||||
}
|
||||
}
|
||||
|
||||
value
|
||||
}
|
||||
|
||||
/// Simplify the arguments to a branch *after* the instructions leading up to the branch have been
|
||||
/// legalized.
|
||||
///
|
||||
/// The branch argument repairs performed by `split_any()` above may be performed on branches that
|
||||
/// have not yet been legalized. The repaired arguments can be defined by actual split
|
||||
/// instructions in that case.
|
||||
///
|
||||
/// After legalizing the instructions computing the value that was split, it is likely that we can
|
||||
/// avoid depending on the split instruction. Its input probably comes from a concatenation.
|
||||
pub fn simplify_branch_arguments(dfg: &mut ir::DataFlowGraph, branch: Inst) {
|
||||
let mut new_args = SmallVec::<[Value; 32]>::new();
|
||||
|
||||
for &arg in dfg.inst_args(branch) {
|
||||
let new_arg = resolve_splits(dfg, arg);
|
||||
new_args.push(new_arg);
|
||||
}
|
||||
|
||||
dfg.inst_args_mut(branch).copy_from_slice(&new_args);
|
||||
}
|
||||
@@ -59,7 +59,6 @@ use hashbrown::{hash_map, HashMap, HashSet};
|
||||
use std::collections::{hash_map, HashMap, HashSet};
|
||||
|
||||
pub use crate::context::Context;
|
||||
pub use crate::legalizer::legalize_function;
|
||||
pub use crate::value_label::{ValueLabelsRanges, ValueLocRange};
|
||||
pub use crate::verifier::verify_function;
|
||||
pub use crate::write::write_function;
|
||||
@@ -87,7 +86,6 @@ pub use crate::entity::packed_option;
|
||||
pub use crate::machinst::buffer::MachSrcLoc;
|
||||
pub use crate::machinst::TextSectionBuilder;
|
||||
|
||||
mod abi;
|
||||
mod bitset;
|
||||
mod constant_hash;
|
||||
mod context;
|
||||
@@ -101,18 +99,12 @@ mod licm;
|
||||
mod log;
|
||||
mod machinst;
|
||||
mod nan_canonicalization;
|
||||
mod partition_slice;
|
||||
mod postopt;
|
||||
mod predicates;
|
||||
mod redundant_reload_remover;
|
||||
mod regalloc;
|
||||
mod remove_constant_phis;
|
||||
mod result;
|
||||
mod scoped_hash_map;
|
||||
mod simple_gvn;
|
||||
mod simple_preopt;
|
||||
mod stack_layout;
|
||||
mod topo_order;
|
||||
mod unreachable_code;
|
||||
mod value_label;
|
||||
|
||||
|
||||
@@ -1,20 +1,14 @@
|
||||
//! Adapter for a `MachBackend` to implement the `TargetIsa` trait.
|
||||
|
||||
use crate::binemit;
|
||||
use crate::ir;
|
||||
use crate::isa::{EncInfo, Encoding, Encodings, Legalize, RegClass, RegInfo, TargetIsa};
|
||||
use crate::isa::{RegInfo, TargetIsa};
|
||||
use crate::machinst::*;
|
||||
use crate::regalloc::RegisterSet;
|
||||
use crate::settings::{self, Flags};
|
||||
|
||||
#[cfg(feature = "testing_hooks")]
|
||||
use crate::regalloc::RegDiversions;
|
||||
|
||||
#[cfg(feature = "unwind")]
|
||||
use crate::isa::unwind::systemv::RegisterMappingError;
|
||||
|
||||
use core::any::Any;
|
||||
use std::borrow::Cow;
|
||||
use std::fmt;
|
||||
use target_lexicon::Triple;
|
||||
|
||||
@@ -74,60 +68,6 @@ impl TargetIsa for TargetIsaAdapter {
|
||||
}
|
||||
}
|
||||
|
||||
fn legal_encodings<'a>(
|
||||
&'a self,
|
||||
_func: &'a ir::Function,
|
||||
_inst: &'a ir::InstructionData,
|
||||
_ctrl_typevar: ir::Type,
|
||||
) -> Encodings<'a> {
|
||||
panic!("Should not be called when new-style backend is available!")
|
||||
}
|
||||
|
||||
fn encode(
|
||||
&self,
|
||||
_func: &ir::Function,
|
||||
_inst: &ir::InstructionData,
|
||||
_ctrl_typevar: ir::Type,
|
||||
) -> Result<Encoding, Legalize> {
|
||||
panic!("Should not be called when new-style backend is available!")
|
||||
}
|
||||
|
||||
fn encoding_info(&self) -> EncInfo {
|
||||
panic!("Should not be called when new-style backend is available!")
|
||||
}
|
||||
|
||||
fn legalize_signature(&self, _sig: &mut Cow<ir::Signature>, _current: bool) {
|
||||
panic!("Should not be called when new-style backend is available!")
|
||||
}
|
||||
|
||||
fn regclass_for_abi_type(&self, _ty: ir::Type) -> RegClass {
|
||||
panic!("Should not be called when new-style backend is available!")
|
||||
}
|
||||
|
||||
fn allocatable_registers(&self, _func: &ir::Function) -> RegisterSet {
|
||||
panic!("Should not be called when new-style backend is available!")
|
||||
}
|
||||
|
||||
fn prologue_epilogue(&self, _func: &mut ir::Function) -> CodegenResult<()> {
|
||||
panic!("Should not be called when new-style backend is available!")
|
||||
}
|
||||
|
||||
#[cfg(feature = "testing_hooks")]
|
||||
fn emit_inst(
|
||||
&self,
|
||||
_func: &ir::Function,
|
||||
_inst: ir::Inst,
|
||||
_divert: &mut RegDiversions,
|
||||
_sink: &mut dyn binemit::CodeSink,
|
||||
) {
|
||||
panic!("Should not be called when new-style backend is available!")
|
||||
}
|
||||
|
||||
/// Emit a whole function into memory.
|
||||
fn emit_function_to_memory(&self, _func: &ir::Function, _sink: &mut binemit::MemoryCodeSink) {
|
||||
panic!("Should not be called when new-style backend is available!")
|
||||
}
|
||||
|
||||
fn get_mach_backend(&self) -> Option<&dyn MachBackend> {
|
||||
Some(&*self.backend)
|
||||
}
|
||||
|
||||
@@ -1,97 +0,0 @@
|
||||
//! Rearrange the elements in a slice according to a predicate.
|
||||
|
||||
use core::mem;
|
||||
|
||||
/// Rearrange the elements of the mutable slice `s` such that elements where `p(t)` is true precede
|
||||
/// the elements where `p(t)` is false.
|
||||
///
|
||||
/// The order of elements is not preserved, unless the slice is already partitioned.
|
||||
///
|
||||
/// Returns the number of elements where `p(t)` is true.
|
||||
pub fn partition_slice<T, F>(s: &mut [T], mut p: F) -> usize
|
||||
where
|
||||
F: FnMut(&T) -> bool,
|
||||
{
|
||||
// The iterator works like a deque which we can pop from both ends.
|
||||
let mut i = s.iter_mut();
|
||||
|
||||
// Number of elements for which the predicate is known to be true.
|
||||
let mut pos = 0;
|
||||
|
||||
loop {
|
||||
// Find the first element for which the predicate fails.
|
||||
let head = loop {
|
||||
match i.next() {
|
||||
Some(head) => {
|
||||
if !p(&head) {
|
||||
break head;
|
||||
}
|
||||
}
|
||||
None => return pos,
|
||||
}
|
||||
pos += 1;
|
||||
};
|
||||
|
||||
// Find the last element for which the predicate succeeds.
|
||||
let tail = loop {
|
||||
match i.next_back() {
|
||||
Some(tail) => {
|
||||
if p(&tail) {
|
||||
break tail;
|
||||
}
|
||||
}
|
||||
None => return pos,
|
||||
}
|
||||
};
|
||||
|
||||
// Swap the two elements into the right order.
|
||||
mem::swap(head, tail);
|
||||
pos += 1;
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::partition_slice;
|
||||
use alloc::vec::Vec;
|
||||
|
||||
fn check(x: &[u32], want: &[u32]) {
|
||||
assert_eq!(x.len(), want.len());
|
||||
let want_count = want.iter().cloned().filter(|&x| x % 10 == 0).count();
|
||||
let mut v = Vec::new();
|
||||
v.extend(x.iter().cloned());
|
||||
let count = partition_slice(&mut v[..], |&x| x % 10 == 0);
|
||||
assert_eq!(v, want);
|
||||
assert_eq!(count, want_count);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty() {
|
||||
check(&[], &[]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn singles() {
|
||||
check(&[0], &[0]);
|
||||
check(&[1], &[1]);
|
||||
check(&[10], &[10]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn doubles() {
|
||||
check(&[0, 0], &[0, 0]);
|
||||
check(&[0, 5], &[0, 5]);
|
||||
check(&[5, 0], &[0, 5]);
|
||||
check(&[5, 4], &[5, 4]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn longer() {
|
||||
check(&[1, 2, 3], &[1, 2, 3]);
|
||||
check(&[1, 2, 10], &[10, 2, 1]); // Note: 2, 1 order not required.
|
||||
check(&[1, 10, 2], &[10, 1, 2]); // Note: 1, 2 order not required.
|
||||
check(&[1, 20, 10], &[10, 20, 1]); // Note: 10, 20 order not required.
|
||||
check(&[1, 20, 3, 10], &[10, 20, 3, 1]);
|
||||
check(&[20, 3, 10, 1], &[20, 10, 3, 1]);
|
||||
}
|
||||
}
|
||||
@@ -1,427 +0,0 @@
|
||||
//! A post-legalization rewriting pass.
|
||||
|
||||
#![allow(non_snake_case)]
|
||||
|
||||
use crate::cursor::{Cursor, EncCursor};
|
||||
use crate::ir::condcodes::{CondCode, FloatCC, IntCC};
|
||||
use crate::ir::dfg::ValueDef;
|
||||
use crate::ir::immediates::{Imm64, Offset32};
|
||||
use crate::ir::instructions::{Opcode, ValueList};
|
||||
use crate::ir::{Block, Function, Inst, InstBuilder, InstructionData, MemFlags, Type, Value};
|
||||
use crate::isa::TargetIsa;
|
||||
use crate::timing;
|
||||
|
||||
/// Information collected about a compare+branch sequence.
|
||||
struct CmpBrInfo {
|
||||
/// The branch instruction.
|
||||
br_inst: Inst,
|
||||
/// The icmp, icmp_imm, or fcmp instruction.
|
||||
cmp_inst: Inst,
|
||||
/// The destination of the branch.
|
||||
destination: Block,
|
||||
/// The arguments of the branch.
|
||||
args: ValueList,
|
||||
/// The first argument to the comparison. The second is in the `kind` field.
|
||||
cmp_arg: Value,
|
||||
/// If the branch is `brz` rather than `brnz`, we need to invert the condition
|
||||
/// before the branch.
|
||||
invert_branch_cond: bool,
|
||||
/// The kind of comparison, and the second argument.
|
||||
kind: CmpBrKind,
|
||||
}
|
||||
|
||||
enum CmpBrKind {
|
||||
Icmp { cond: IntCC, arg: Value },
|
||||
IcmpImm { cond: IntCC, imm: Imm64 },
|
||||
Fcmp { cond: FloatCC, arg: Value },
|
||||
}
|
||||
|
||||
/// Optimize comparisons to use flags values, to avoid materializing conditions
|
||||
/// in integer registers.
|
||||
///
|
||||
/// For example, optimize icmp/fcmp brz/brnz sequences into ifcmp/ffcmp brif/brff
|
||||
/// sequences.
|
||||
fn optimize_cpu_flags(
|
||||
pos: &mut EncCursor,
|
||||
inst: Inst,
|
||||
last_flags_clobber: Option<Inst>,
|
||||
isa: &dyn TargetIsa,
|
||||
) {
|
||||
// Look for compare and branch patterns.
|
||||
// This code could be considerably simplified with non-lexical lifetimes.
|
||||
let info = match pos.func.dfg[inst] {
|
||||
InstructionData::Branch {
|
||||
opcode,
|
||||
destination,
|
||||
ref args,
|
||||
} => {
|
||||
let first_arg = args.first(&pos.func.dfg.value_lists).unwrap();
|
||||
let invert_branch_cond = match opcode {
|
||||
Opcode::Brz => true,
|
||||
Opcode::Brnz => false,
|
||||
_ => panic!(),
|
||||
};
|
||||
if let ValueDef::Result(cond_inst, _) = pos.func.dfg.value_def(first_arg) {
|
||||
match pos.func.dfg[cond_inst] {
|
||||
InstructionData::IntCompare {
|
||||
cond,
|
||||
args: cmp_args,
|
||||
..
|
||||
} => CmpBrInfo {
|
||||
br_inst: inst,
|
||||
cmp_inst: cond_inst,
|
||||
destination,
|
||||
args: args.clone(),
|
||||
cmp_arg: cmp_args[0],
|
||||
invert_branch_cond,
|
||||
kind: CmpBrKind::Icmp {
|
||||
cond,
|
||||
arg: cmp_args[1],
|
||||
},
|
||||
},
|
||||
InstructionData::IntCompareImm {
|
||||
cond,
|
||||
arg: cmp_arg,
|
||||
imm: cmp_imm,
|
||||
..
|
||||
} => CmpBrInfo {
|
||||
br_inst: inst,
|
||||
cmp_inst: cond_inst,
|
||||
destination,
|
||||
args: args.clone(),
|
||||
cmp_arg,
|
||||
invert_branch_cond,
|
||||
kind: CmpBrKind::IcmpImm { cond, imm: cmp_imm },
|
||||
},
|
||||
InstructionData::FloatCompare {
|
||||
cond,
|
||||
args: cmp_args,
|
||||
..
|
||||
} => CmpBrInfo {
|
||||
br_inst: inst,
|
||||
cmp_inst: cond_inst,
|
||||
destination,
|
||||
args: args.clone(),
|
||||
cmp_arg: cmp_args[0],
|
||||
invert_branch_cond,
|
||||
kind: CmpBrKind::Fcmp {
|
||||
cond,
|
||||
arg: cmp_args[1],
|
||||
},
|
||||
},
|
||||
_ => return,
|
||||
}
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
}
|
||||
// TODO: trapif, trueif, selectif, and their ff counterparts.
|
||||
_ => return,
|
||||
};
|
||||
|
||||
// If any instructions clobber the flags between the comparison and the branch,
|
||||
// don't optimize them.
|
||||
if last_flags_clobber != Some(info.cmp_inst) {
|
||||
return;
|
||||
}
|
||||
|
||||
// We found a compare+branch pattern. Transform it to use flags.
|
||||
let args = info.args.as_slice(&pos.func.dfg.value_lists)[1..].to_vec();
|
||||
pos.goto_inst(info.cmp_inst);
|
||||
pos.use_srcloc(info.cmp_inst);
|
||||
match info.kind {
|
||||
CmpBrKind::Icmp { mut cond, arg } => {
|
||||
let flags = pos.ins().ifcmp(info.cmp_arg, arg);
|
||||
pos.func.dfg.replace(info.cmp_inst).trueif(cond, flags);
|
||||
if info.invert_branch_cond {
|
||||
cond = cond.inverse();
|
||||
}
|
||||
pos.func
|
||||
.dfg
|
||||
.replace(info.br_inst)
|
||||
.brif(cond, flags, info.destination, &args);
|
||||
}
|
||||
CmpBrKind::IcmpImm { mut cond, imm } => {
|
||||
let flags = pos.ins().ifcmp_imm(info.cmp_arg, imm);
|
||||
pos.func.dfg.replace(info.cmp_inst).trueif(cond, flags);
|
||||
if info.invert_branch_cond {
|
||||
cond = cond.inverse();
|
||||
}
|
||||
pos.func
|
||||
.dfg
|
||||
.replace(info.br_inst)
|
||||
.brif(cond, flags, info.destination, &args);
|
||||
}
|
||||
CmpBrKind::Fcmp { mut cond, arg } => {
|
||||
let flags = pos.ins().ffcmp(info.cmp_arg, arg);
|
||||
pos.func.dfg.replace(info.cmp_inst).trueff(cond, flags);
|
||||
if info.invert_branch_cond {
|
||||
cond = cond.inverse();
|
||||
}
|
||||
pos.func
|
||||
.dfg
|
||||
.replace(info.br_inst)
|
||||
.brff(cond, flags, info.destination, &args);
|
||||
}
|
||||
}
|
||||
let ok = pos.func.update_encoding(info.cmp_inst, isa).is_ok();
|
||||
debug_assert!(ok);
|
||||
let ok = pos.func.update_encoding(info.br_inst, isa).is_ok();
|
||||
debug_assert!(ok);
|
||||
}
|
||||
|
||||
struct MemOpInfo {
|
||||
opcode: Opcode,
|
||||
itype: Type,
|
||||
arg: Value,
|
||||
st_arg: Option<Value>,
|
||||
flags: MemFlags,
|
||||
offset: Offset32,
|
||||
}
|
||||
|
||||
fn optimize_complex_addresses(pos: &mut EncCursor, inst: Inst, isa: &dyn TargetIsa) {
|
||||
// Look for simple loads and stores we can optimize.
|
||||
let info = match pos.func.dfg[inst] {
|
||||
InstructionData::Load {
|
||||
opcode,
|
||||
arg,
|
||||
flags,
|
||||
offset,
|
||||
} => MemOpInfo {
|
||||
opcode,
|
||||
itype: pos.func.dfg.ctrl_typevar(inst),
|
||||
arg,
|
||||
st_arg: None,
|
||||
flags,
|
||||
offset,
|
||||
},
|
||||
InstructionData::Store {
|
||||
opcode,
|
||||
args,
|
||||
flags,
|
||||
offset,
|
||||
} => MemOpInfo {
|
||||
opcode,
|
||||
itype: pos.func.dfg.ctrl_typevar(inst),
|
||||
arg: args[1],
|
||||
st_arg: Some(args[0]),
|
||||
flags,
|
||||
offset,
|
||||
},
|
||||
_ => return,
|
||||
};
|
||||
|
||||
// Examine the instruction that defines the address operand.
|
||||
if let ValueDef::Result(result_inst, _) = pos.func.dfg.value_def(info.arg) {
|
||||
match pos.func.dfg[result_inst] {
|
||||
InstructionData::Binary {
|
||||
opcode: Opcode::Iadd,
|
||||
args,
|
||||
} => match info.opcode {
|
||||
// Operand is an iadd. Fold it into a memory address with a complex address mode.
|
||||
Opcode::Load => {
|
||||
pos.func.dfg.replace(inst).load_complex(
|
||||
info.itype,
|
||||
info.flags,
|
||||
&args,
|
||||
info.offset,
|
||||
);
|
||||
}
|
||||
Opcode::Uload8 => {
|
||||
pos.func.dfg.replace(inst).uload8_complex(
|
||||
info.itype,
|
||||
info.flags,
|
||||
&args,
|
||||
info.offset,
|
||||
);
|
||||
}
|
||||
Opcode::Sload8 => {
|
||||
pos.func.dfg.replace(inst).sload8_complex(
|
||||
info.itype,
|
||||
info.flags,
|
||||
&args,
|
||||
info.offset,
|
||||
);
|
||||
}
|
||||
Opcode::Uload16 => {
|
||||
pos.func.dfg.replace(inst).uload16_complex(
|
||||
info.itype,
|
||||
info.flags,
|
||||
&args,
|
||||
info.offset,
|
||||
);
|
||||
}
|
||||
Opcode::Sload16 => {
|
||||
pos.func.dfg.replace(inst).sload16_complex(
|
||||
info.itype,
|
||||
info.flags,
|
||||
&args,
|
||||
info.offset,
|
||||
);
|
||||
}
|
||||
Opcode::Uload32 => {
|
||||
pos.func
|
||||
.dfg
|
||||
.replace(inst)
|
||||
.uload32_complex(info.flags, &args, info.offset);
|
||||
}
|
||||
Opcode::Sload32 => {
|
||||
pos.func
|
||||
.dfg
|
||||
.replace(inst)
|
||||
.sload32_complex(info.flags, &args, info.offset);
|
||||
}
|
||||
Opcode::Uload8x8 => {
|
||||
pos.func
|
||||
.dfg
|
||||
.replace(inst)
|
||||
.uload8x8_complex(info.flags, &args, info.offset);
|
||||
}
|
||||
Opcode::Sload8x8 => {
|
||||
pos.func
|
||||
.dfg
|
||||
.replace(inst)
|
||||
.sload8x8_complex(info.flags, &args, info.offset);
|
||||
}
|
||||
Opcode::Uload16x4 => {
|
||||
pos.func
|
||||
.dfg
|
||||
.replace(inst)
|
||||
.uload16x4_complex(info.flags, &args, info.offset);
|
||||
}
|
||||
Opcode::Sload16x4 => {
|
||||
pos.func
|
||||
.dfg
|
||||
.replace(inst)
|
||||
.sload16x4_complex(info.flags, &args, info.offset);
|
||||
}
|
||||
Opcode::Uload32x2 => {
|
||||
pos.func
|
||||
.dfg
|
||||
.replace(inst)
|
||||
.uload32x2_complex(info.flags, &args, info.offset);
|
||||
}
|
||||
Opcode::Sload32x2 => {
|
||||
pos.func
|
||||
.dfg
|
||||
.replace(inst)
|
||||
.sload32x2_complex(info.flags, &args, info.offset);
|
||||
}
|
||||
Opcode::Store => {
|
||||
pos.func.dfg.replace(inst).store_complex(
|
||||
info.flags,
|
||||
info.st_arg.unwrap(),
|
||||
&args,
|
||||
info.offset,
|
||||
);
|
||||
}
|
||||
Opcode::Istore8 => {
|
||||
pos.func.dfg.replace(inst).istore8_complex(
|
||||
info.flags,
|
||||
info.st_arg.unwrap(),
|
||||
&args,
|
||||
info.offset,
|
||||
);
|
||||
}
|
||||
Opcode::Istore16 => {
|
||||
pos.func.dfg.replace(inst).istore16_complex(
|
||||
info.flags,
|
||||
info.st_arg.unwrap(),
|
||||
&args,
|
||||
info.offset,
|
||||
);
|
||||
}
|
||||
Opcode::Istore32 => {
|
||||
pos.func.dfg.replace(inst).istore32_complex(
|
||||
info.flags,
|
||||
info.st_arg.unwrap(),
|
||||
&args,
|
||||
info.offset,
|
||||
);
|
||||
}
|
||||
_ => panic!("Unsupported load or store opcode"),
|
||||
},
|
||||
InstructionData::BinaryImm64 {
|
||||
opcode: Opcode::IaddImm,
|
||||
arg,
|
||||
imm,
|
||||
} => match pos.func.dfg[inst] {
|
||||
// Operand is an iadd_imm. Fold the immediate into the offset if possible.
|
||||
InstructionData::Load {
|
||||
arg: ref mut load_arg,
|
||||
ref mut offset,
|
||||
..
|
||||
} => {
|
||||
if let Some(imm) = offset.try_add_i64(imm.into()) {
|
||||
*load_arg = arg;
|
||||
*offset = imm;
|
||||
} else {
|
||||
// Overflow.
|
||||
return;
|
||||
}
|
||||
}
|
||||
InstructionData::Store {
|
||||
args: ref mut store_args,
|
||||
ref mut offset,
|
||||
..
|
||||
} => {
|
||||
if let Some(imm) = offset.try_add_i64(imm.into()) {
|
||||
store_args[1] = arg;
|
||||
*offset = imm;
|
||||
} else {
|
||||
// Overflow.
|
||||
return;
|
||||
}
|
||||
}
|
||||
_ => panic!(),
|
||||
},
|
||||
_ => {
|
||||
// Address value is defined by some other kind of instruction.
|
||||
return;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Address value is not the result of an instruction.
|
||||
return;
|
||||
}
|
||||
|
||||
let ok = pos.func.update_encoding(inst, isa).is_ok();
|
||||
debug_assert!(
|
||||
ok,
|
||||
"failed to update encoding for `{}`",
|
||||
pos.func.dfg.display_inst(inst, isa)
|
||||
);
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------
|
||||
//
|
||||
// The main post-opt pass.
|
||||
|
||||
pub fn do_postopt(func: &mut Function, isa: &dyn TargetIsa) {
|
||||
let _tt = timing::postopt();
|
||||
let mut pos = EncCursor::new(func, isa);
|
||||
let is_mach_backend = isa.get_mach_backend().is_some();
|
||||
while let Some(_block) = pos.next_block() {
|
||||
let mut last_flags_clobber = None;
|
||||
while let Some(inst) = pos.next_inst() {
|
||||
if !is_mach_backend && isa.uses_cpu_flags() {
|
||||
// Optimize instructions to make use of flags.
|
||||
optimize_cpu_flags(&mut pos, inst, last_flags_clobber, isa);
|
||||
|
||||
// Track the most recent seen instruction that clobbers the flags.
|
||||
if let Some(constraints) = isa
|
||||
.encoding_info()
|
||||
.operand_constraints(pos.func.encodings[inst])
|
||||
{
|
||||
if constraints.clobbers_flags {
|
||||
last_flags_clobber = Some(inst)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if isa.uses_complex_addresses() {
|
||||
optimize_complex_addresses(&mut pos, inst, isa);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,904 +0,0 @@
|
||||
//! This module implements a late-stage redundant-reload remover, which runs after registers have
|
||||
//! been allocated and stack slots have been given specific offsets.
|
||||
|
||||
use crate::cursor::{Cursor, CursorPosition, EncCursor, FuncCursor};
|
||||
use crate::entity::EntitySet;
|
||||
use crate::flowgraph::ControlFlowGraph;
|
||||
use crate::ir::dfg::DataFlowGraph;
|
||||
use crate::ir::instructions::BranchInfo;
|
||||
use crate::ir::stackslot::{StackSlotKind, StackSlots};
|
||||
use crate::ir::{
|
||||
Block, Function, Inst, InstBuilder, InstructionData, Opcode, StackSlotData, Type, Value,
|
||||
ValueLoc,
|
||||
};
|
||||
use crate::isa::{RegInfo, RegUnit, TargetIsa};
|
||||
use crate::regalloc::RegDiversions;
|
||||
use alloc::vec::Vec;
|
||||
use core::convert::TryInto;
|
||||
use cranelift_entity::{PrimaryMap, SecondaryMap};
|
||||
|
||||
// =============================================================================================
|
||||
// A description of the redundant-fill-removal algorithm
|
||||
//
|
||||
//
|
||||
// The algorithm works forwards through each Block. It carries along and updates a table,
|
||||
// AvailEnv, with which it tracks registers that are known to have the same value as some stack
|
||||
// slot. The actions on encountering an instruction depend on the instruction, as follows:
|
||||
//
|
||||
// ss1 = spill r0: update the AvailEnv so as to note that slot `ss1` and register `r0`
|
||||
// have the same value.
|
||||
//
|
||||
// r1 = fill ss0: look in the AvailEnv. If it tells us that register `r1` and slot `ss0`
|
||||
// have the same value, then delete the instruction by converting it to a
|
||||
// `fill_nop`.
|
||||
//
|
||||
// If it tells us that some other register `r2` has the same value as
|
||||
// slot `ss0`, convert the instruction into a copy from `r2` to `r1`.
|
||||
//
|
||||
// any other insn: remove from the AvailEnv, any bindings associated with registers
|
||||
// written by this instruction, since they will be invalidated by it.
|
||||
//
|
||||
// Tracking the effects of `copy` instructions in AvailEnv for the case when both source and
|
||||
// destination are registers does not cause any more fills to be removed or converted to copies.
|
||||
// It's not clear why.
|
||||
//
|
||||
// There are various other instruction-handling cases in `visit_inst`, which are documented
|
||||
// in-line, and do not change the core algorithm, so are not described here.
|
||||
//
|
||||
// The registers tracked by AvailEnv are the post-diversion registers that are really used by the
|
||||
// code; they are not the pre-diversion names associated with each SSA `Value`. The second
|
||||
// `fill` case above opportunistically copies values from registers that may have been diversion
|
||||
// targets in some predecessor block, and so are no longer associated with any specific SSA-level
|
||||
// name at the point the copy is made. Hence those copies (from `r2` to `r1`) cannot be done
|
||||
// with an ordinary `copy` instruction. Instead they have to be done using a new `copy_to_ssa`
|
||||
// instruction, which copies from an arbitrary register to a register-resident `Value` (that is,
|
||||
// "back to" SSA-world).
|
||||
//
|
||||
// That completes the description of the core algorithm.
|
||||
//
|
||||
// In the case where a block `A` jumps to `B` and `A` is the only predecessor of `B`, the
|
||||
// AvailEnv at the end of `A` will still be valid at the entry to `B`. In such a case, we can
|
||||
// profitably transform `B` using the AvailEnv "inherited" from `A`. In order to take full
|
||||
// advantage of this, this module partitions the function's CFG into tree-shaped groups of
|
||||
// blocks, and processes each tree as described above. So the AvailEnv is only initialised to
|
||||
// empty at the start of blocks that form the root of each tree; that is, for blocks which have
|
||||
// two or more predecessors.
|
||||
|
||||
// =============================================================================================
|
||||
// Top level algorithm structure
|
||||
//
|
||||
// The overall algorithm, for a function, starts like this:
|
||||
//
|
||||
// * (once per function): finds Blocks that have two or more predecessors, since they will be the
|
||||
// roots of Block trees. Also, the entry node for the function is considered to be a root.
|
||||
//
|
||||
// It then continues with a loop that first finds a tree of Blocks ("discovery") and then removes
|
||||
// redundant fills as described above ("processing"):
|
||||
//
|
||||
// * (discovery; once per tree): for each root, performs a depth first search to find all the Blocks
|
||||
// in the tree, guided by RedundantReloadRemover::discovery_stack.
|
||||
//
|
||||
// * (processing; once per tree): the just-discovered tree is then processed as described above,
|
||||
// guided by RedundantReloadRemover::processing_stack.
|
||||
//
|
||||
// In this way, all Blocks reachable from the function's entry point are eventually processed. Note
|
||||
// that each tree is processed as soon as it has been discovered, so the algorithm never creates a
|
||||
// list of trees for the function.
|
||||
//
|
||||
// The running state is stored in `RedundantReloadRemover`. This is allocated once and can be
|
||||
// reused for multiple functions so as to minimise heap turnover. The fields are, roughly:
|
||||
//
|
||||
// num_regunits -- constant for the whole function; used by the tree processing phase
|
||||
// num_preds_per_block -- constant for the whole function; used by the tree discovery process
|
||||
//
|
||||
// discovery_stack -- used to guide the tree discovery process
|
||||
// nodes_in_tree -- the discovered nodes are recorded here
|
||||
//
|
||||
// processing_stack -- used to guide the tree processing process
|
||||
// nodes_already_visited -- used to ensure the tree processing logic terminates in the case
|
||||
// where a tree has a branch back to its root node.
|
||||
//
|
||||
// There is further documentation in line below, as appropriate.
|
||||
|
||||
// =============================================================================================
|
||||
// A side note on register choice heuristics
|
||||
|
||||
// The core algorithm opportunistically replaces fill instructions when it knows of a register
|
||||
// that already holds the required value. How effective this is largely depends on how long
|
||||
// reloaded values happen to stay alive before the relevant register is overwritten. And that
|
||||
// depends on the register allocator's register choice heuristics. The worst case is, when the
|
||||
// register allocator reuses registers as soon as possible after they become free. Unfortunately
|
||||
// that was indeed the selection scheme, prior to development of this pass.
|
||||
//
|
||||
// As part of this work, the register selection scheme has been changed as follows: for registers
|
||||
// written by any instruction other than a fill, use the lowest numbered available register. But
|
||||
// for registers written by a fill instruction, use the highest numbered available register. The
|
||||
// aim is to try and keep reload- and non-reload registers disjoint to the extent possible.
|
||||
// Several other schemes were tried, but this one is simple and can be worth an extra 2% of
|
||||
// performance in some cases.
|
||||
//
|
||||
// The relevant change is more or less a one-line change in the solver.
|
||||
|
||||
// =============================================================================================
|
||||
// Data structures used for discovery of trees
|
||||
|
||||
// `ZeroOneOrMany` is used to record the number of predecessors a Block block has. The `Zero` case
|
||||
// is included so as to cleanly handle the case where the incoming graph has unreachable Blocks.
|
||||
|
||||
#[derive(Clone, PartialEq)]
|
||||
enum ZeroOneOrMany {
|
||||
Zero,
|
||||
One,
|
||||
Many,
|
||||
}
|
||||
|
||||
// =============================================================================================
|
||||
// Data structures used for processing of trees
|
||||
|
||||
// `SlotInfo` describes a spill slot in the obvious way. Note that it doesn't indicate which
|
||||
// register(s) are currently associated with the slot. That job is done by `AvailEnv` instead.
|
||||
//
|
||||
// In the CL framework, stack slots are partitioned into disjoint sets, one for each
|
||||
// `StackSlotKind`. The offset and size only give a unique identity within any particular
|
||||
// `StackSlotKind`. So, to uniquely identify a stack slot, all three fields are necessary.
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
struct SlotInfo {
|
||||
kind: StackSlotKind,
|
||||
offset: i32,
|
||||
size: u32,
|
||||
}
|
||||
|
||||
// `AvailEnv` maps each possible register to a stack slot that holds the same value. The index
|
||||
// space of `AvailEnv::map` is exactly the set of registers available on the current target. If
|
||||
// (as is mostly the case) a register is not known to have the same value as a stack slot, then
|
||||
// its entry is `None` rather than `Some(..)`.
|
||||
//
|
||||
// Invariants for AvailEnv:
|
||||
//
|
||||
// AvailEnv may have multiple different registers bound to the same stack slot -- that is, `(kind,
|
||||
// offset, size)` triple. That's OK, and reflects the reality that those two registers contain
|
||||
// the same value. This could happen, for example, in the case
|
||||
//
|
||||
// ss1 = spill r0
|
||||
// ..
|
||||
// r2 = fill ss1
|
||||
//
|
||||
// Then both `r0` and `r2` will have the same value as `ss1`, provided that ".." doesn't write to
|
||||
// `r1`.
|
||||
//
|
||||
// To say that two different registers may be bound to the same stack slot is the same as saying
|
||||
// that it is allowed to have two different entries in AvailEnv with the same `(kind, offset,
|
||||
// size)` triple. What is *not* allowed is to have partial overlaps. That is, if two SlotInfos
|
||||
// have the same `kind` field and have `offset` and `size` fields that overlap, then their
|
||||
// `offset` and `size` fields must be identical. This is so as to make the algorithm safe against
|
||||
// situations where, for example, a 64 bit register is spilled, but then only the bottom 32 bits
|
||||
// are reloaded from the slot.
|
||||
//
|
||||
// Although in such a case it seems likely that the Cranelift IR would be ill-typed, and so this
|
||||
// case could probably not occur in practice.
|
||||
|
||||
#[derive(Clone)]
|
||||
struct AvailEnv {
|
||||
map: Vec<Option<SlotInfo>>,
|
||||
}
|
||||
|
||||
// `ProcessingStackElem` combines AvailEnv with contextual information needed to "navigate" within
|
||||
// a Block.
|
||||
//
|
||||
// A ProcessingStackElem conceptually has the lifetime of exactly one Block: once the current Block is
|
||||
// completed, the ProcessingStackElem will be abandoned. In practice the top level state,
|
||||
// RedundantReloadRemover, caches them, so as to avoid heap turnover.
|
||||
//
|
||||
// Note that ProcessingStackElem must contain a CursorPosition. The CursorPosition, which
|
||||
// indicates where we are in the current Block, cannot be implicitly maintained by looping over all
|
||||
// the instructions in a Block in turn, because we may choose to suspend processing the current Block
|
||||
// at a side exit, continue by processing the subtree reached via the side exit, and only later
|
||||
// resume the current Block.
|
||||
|
||||
struct ProcessingStackElem {
|
||||
/// Indicates the AvailEnv at the current point in the Block.
|
||||
avail_env: AvailEnv,
|
||||
|
||||
/// Shows where we currently are inside the Block.
|
||||
cursor: CursorPosition,
|
||||
|
||||
/// Indicates the currently active register diversions at the current point.
|
||||
diversions: RegDiversions,
|
||||
}
|
||||
|
||||
// =============================================================================================
|
||||
// The top level data structure
|
||||
|
||||
// `RedundantReloadRemover` contains data structures for the two passes: discovery of tree shaped
|
||||
// regions, and processing of them. These are allocated once and stay alive for the entire
|
||||
// function, even though they are cleared out for each new tree shaped region. It also caches
|
||||
// `num_regunits` and `num_preds_per_block`, which are computed at the start of each function and
|
||||
// then remain constant.
|
||||
|
||||
/// The redundant reload remover's state.
|
||||
pub struct RedundantReloadRemover {
|
||||
/// The total number of RegUnits available on this architecture. This is unknown when the
|
||||
/// RedundantReloadRemover is created. It becomes known at the beginning of processing of a
|
||||
/// function.
|
||||
num_regunits: Option<u16>,
|
||||
|
||||
/// This stores, for each Block, a characterisation of the number of predecessors it has.
|
||||
num_preds_per_block: PrimaryMap<Block, ZeroOneOrMany>,
|
||||
|
||||
/// The stack used for the first phase (discovery). There is one element on the discovery
|
||||
/// stack for each currently unexplored Block in the tree being searched.
|
||||
discovery_stack: Vec<Block>,
|
||||
|
||||
/// The nodes in the discovered tree are inserted here.
|
||||
nodes_in_tree: EntitySet<Block>,
|
||||
|
||||
/// The stack used during the second phase (transformation). There is one element on the
|
||||
/// processing stack for each currently-open node in the tree being transformed.
|
||||
processing_stack: Vec<ProcessingStackElem>,
|
||||
|
||||
/// Used in the second phase to avoid visiting nodes more than once.
|
||||
nodes_already_visited: EntitySet<Block>,
|
||||
}
|
||||
|
||||
// =============================================================================================
|
||||
// Miscellaneous small helper functions
|
||||
|
||||
// Is this a kind of stack slot that is safe to track in AvailEnv? This is probably overly
|
||||
// conservative, but tracking only the SpillSlot and IncomingArgument kinds catches almost all
|
||||
// available redundancy in practice.
|
||||
fn is_slot_kind_tracked(kind: StackSlotKind) -> bool {
|
||||
match kind {
|
||||
StackSlotKind::SpillSlot | StackSlotKind::IncomingArg => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
// Find out if the range `[offset, +size)` overlaps with the range in `si`.
|
||||
fn overlaps(si: &SlotInfo, offset: i32, size: u32) -> bool {
|
||||
let a_offset = si.offset as i64;
|
||||
let a_size = si.size as i64;
|
||||
let b_offset = offset as i64;
|
||||
let b_size = size as i64;
|
||||
let no_overlap = a_offset + a_size <= b_offset || b_offset + b_size <= a_offset;
|
||||
!no_overlap
|
||||
}
|
||||
|
||||
// Find, in `reginfo`, the register bank that `reg` lives in, and return the lower limit and size
|
||||
// of the bank. This is so the caller can conveniently iterate over all RegUnits in the bank that
|
||||
// `reg` lives in.
|
||||
fn find_bank_limits(reginfo: &RegInfo, reg: RegUnit) -> (RegUnit, u16) {
|
||||
if let Some(bank) = reginfo.bank_containing_regunit(reg) {
|
||||
return (bank.first_unit, bank.units);
|
||||
}
|
||||
// We should never get here, since `reg` must come from *some* RegBank.
|
||||
panic!("find_regclass_limits: reg not found");
|
||||
}
|
||||
|
||||
// Returns the register that `v` is allocated to. Assumes that `v` actually resides in a
|
||||
// register.
|
||||
fn reg_of_value(locations: &SecondaryMap<Value, ValueLoc>, v: Value) -> RegUnit {
|
||||
match locations[v] {
|
||||
ValueLoc::Reg(ru) => ru,
|
||||
_ => panic!("reg_of_value: value isn't in a reg"),
|
||||
}
|
||||
}
|
||||
|
||||
// Returns the stack slot that `v` is allocated to. Assumes that `v` actually resides in a stack
|
||||
// slot.
|
||||
fn slot_of_value<'s>(
|
||||
locations: &SecondaryMap<Value, ValueLoc>,
|
||||
stack_slots: &'s StackSlots,
|
||||
v: Value,
|
||||
) -> &'s StackSlotData {
|
||||
match locations[v] {
|
||||
ValueLoc::Stack(slot) => &stack_slots[slot],
|
||||
_ => panic!("slot_of_value: value isn't in a stack slot"),
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================================
|
||||
// Top level: discovery of tree shaped regions
|
||||
|
||||
impl RedundantReloadRemover {
|
||||
// A helper for `add_nodes_to_tree` below.
|
||||
fn discovery_stack_push_successors_of(&mut self, cfg: &ControlFlowGraph, node: Block) {
|
||||
for successor in cfg.succ_iter(node) {
|
||||
self.discovery_stack.push(successor);
|
||||
}
|
||||
}
|
||||
|
||||
// Visit the tree of Blocks rooted at `starting_point` and add them to `self.nodes_in_tree`.
|
||||
// `self.num_preds_per_block` guides the process, ensuring we don't leave the tree-ish region
|
||||
// and indirectly ensuring that the process will terminate in the presence of cycles in the
|
||||
// graph. `self.discovery_stack` holds the search state in this function.
|
||||
fn add_nodes_to_tree(&mut self, cfg: &ControlFlowGraph, starting_point: Block) {
|
||||
// One might well ask why this doesn't loop forever when it encounters cycles in the
|
||||
// control flow graph. The reason is that any cycle in the graph that is reachable from
|
||||
// anywhere outside the cycle -- in particular, that is reachable from the function's
|
||||
// entry node -- must have at least one node that has two or more predecessors. So the
|
||||
// logic below won't follow into it, because it regards any such node as the root of some
|
||||
// other tree.
|
||||
debug_assert!(self.discovery_stack.is_empty());
|
||||
debug_assert!(self.nodes_in_tree.is_empty());
|
||||
|
||||
self.nodes_in_tree.insert(starting_point);
|
||||
self.discovery_stack_push_successors_of(cfg, starting_point);
|
||||
|
||||
while let Some(node) = self.discovery_stack.pop() {
|
||||
match self.num_preds_per_block[node] {
|
||||
// We arrived at a node with multiple predecessors, so it's a new root. Ignore it.
|
||||
ZeroOneOrMany::Many => {}
|
||||
// This node has just one predecessor, so we should incorporate it in the tree and
|
||||
// immediately transition into searching from it instead.
|
||||
ZeroOneOrMany::One => {
|
||||
self.nodes_in_tree.insert(node);
|
||||
self.discovery_stack_push_successors_of(cfg, node);
|
||||
}
|
||||
// This is meaningless. We arrived at a node that doesn't point back at where we
|
||||
// came from.
|
||||
ZeroOneOrMany::Zero => panic!("add_nodes_to_tree: inconsistent graph"),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================================
|
||||
// Operations relating to `AvailEnv`
|
||||
|
||||
impl AvailEnv {
|
||||
// Create a new one.
|
||||
fn new(size: usize) -> Self {
|
||||
let mut env = Self {
|
||||
map: Vec::<Option<SlotInfo>>::new(),
|
||||
};
|
||||
env.map.resize(size, None);
|
||||
env
|
||||
}
|
||||
|
||||
// Debug only: checks (some of) the required AvailEnv invariants.
|
||||
#[cfg(debug_assertions)]
|
||||
fn check_invariants(&self) -> bool {
|
||||
// Check that any overlapping entries overlap exactly. This is super lame (quadratic),
|
||||
// but it's only used in debug builds.
|
||||
for i in 0..self.map.len() {
|
||||
if let Some(si) = self.map[i] {
|
||||
for j in i + 1..self.map.len() {
|
||||
if let Some(sj) = self.map[j] {
|
||||
// "si and sj overlap, but not exactly"
|
||||
if si.kind == sj.kind
|
||||
&& overlaps(&si, sj.offset, sj.size)
|
||||
&& !(si.offset == sj.offset && si.size == sj.size)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
// Invalidates the binding associated with `reg`. Note that by construction of AvailEnv,
|
||||
// `reg` can only be associated with one binding at once.
|
||||
fn invalidate_by_reg(&mut self, reg: RegUnit) {
|
||||
self.map[reg as usize] = None;
|
||||
}
|
||||
|
||||
// Invalidates any binding that has any overlap with `(kind, offset, size)`.
|
||||
fn invalidate_by_offset(&mut self, kind: StackSlotKind, offset: i32, size: u32) {
|
||||
debug_assert!(is_slot_kind_tracked(kind));
|
||||
for i in 0..self.map.len() {
|
||||
if let Some(si) = &self.map[i] {
|
||||
if si.kind == kind && overlaps(&si, offset, size) {
|
||||
self.map[i] = None;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Invalidates all bindings.
|
||||
fn invalidate_all(&mut self) {
|
||||
for i in 0..self.map.len() {
|
||||
self.map[i] = None;
|
||||
}
|
||||
}
|
||||
|
||||
// Updates AvailEnv to track the effect of a `regmove` instruction.
|
||||
fn copy_reg(&mut self, src: RegUnit, dst: RegUnit) {
|
||||
self.map[dst as usize] = self.map[src as usize];
|
||||
}
|
||||
|
||||
// Does `env` have the exact binding characterised by `(reg, kind, offset, size)` ?
|
||||
fn has_exact_binding(&self, reg: RegUnit, kind: StackSlotKind, offset: i32, size: u32) -> bool {
|
||||
debug_assert!(is_slot_kind_tracked(kind));
|
||||
if let Some(si) = &self.map[reg as usize] {
|
||||
return si.kind == kind && si.offset == offset && si.size == size;
|
||||
}
|
||||
// No such binding.
|
||||
false
|
||||
}
|
||||
|
||||
// Does `env` have a binding characterised by `(kind, offset, size)` but to a register, let's
|
||||
// call it `other_reg`, that isn't `reg`? If so, return `other_reg`. Note that `other_reg`
|
||||
// will have the same bank as `reg`. It is a checked error to call this function with a
|
||||
// binding matching all four of `(reg, kind, offset, size)`.
|
||||
fn has_inexact_binding(
|
||||
&self,
|
||||
reginfo: &RegInfo,
|
||||
reg: RegUnit,
|
||||
kind: StackSlotKind,
|
||||
offset: i32,
|
||||
size: u32,
|
||||
) -> Option<RegUnit> {
|
||||
debug_assert!(is_slot_kind_tracked(kind));
|
||||
// Find the range of RegUnit numbers for the bank that contains `reg`, and use that as our
|
||||
// search space. This is so as to guarantee that any match is restricted to the same bank
|
||||
// as `reg`.
|
||||
let (first_unit, num_units) = find_bank_limits(reginfo, reg);
|
||||
for other_reg in first_unit..first_unit + num_units {
|
||||
if let Some(si) = &self.map[other_reg as usize] {
|
||||
if si.kind == kind && si.offset == offset && si.size == size {
|
||||
if other_reg == reg {
|
||||
panic!("has_inexact_binding: binding *is* exact!");
|
||||
}
|
||||
return Some(other_reg);
|
||||
}
|
||||
}
|
||||
}
|
||||
// No such binding.
|
||||
None
|
||||
}
|
||||
|
||||
// Create the binding `(reg, kind, offset, size)` in `env`, and throw away any previous
|
||||
// binding associated with either `reg` or the `(kind, offset, size)` triple.
|
||||
fn bind(&mut self, reg: RegUnit, kind: StackSlotKind, offset: i32, size: u32) {
|
||||
debug_assert!(is_slot_kind_tracked(kind));
|
||||
self.invalidate_by_offset(kind, offset, size);
|
||||
self.map[reg as usize] = Some(SlotInfo { kind, offset, size });
|
||||
}
|
||||
}
|
||||
|
||||
// Invalidates in `avail_env`, any binding associated with a regunit that is written by `inst`.
|
||||
fn invalidate_regs_written_by_inst(
|
||||
locations: &SecondaryMap<Value, ValueLoc>,
|
||||
diversions: &RegDiversions,
|
||||
dfg: &DataFlowGraph,
|
||||
avail_env: &mut AvailEnv,
|
||||
inst: Inst,
|
||||
) {
|
||||
for v in dfg.inst_results(inst).iter() {
|
||||
if let ValueLoc::Reg(ru) = locations[*v] {
|
||||
// This must be true. It would be meaningless for an SSA value to be diverted before
|
||||
// the point where it is defined.
|
||||
debug_assert!(diversions.reg(*v, locations) == ru);
|
||||
avail_env.invalidate_by_reg(ru);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================================
|
||||
// Processing of individual instructions
|
||||
|
||||
impl RedundantReloadRemover {
|
||||
// Process `inst`, possibly changing it into a different instruction, and possibly changing
|
||||
// `self.avail_env` and `func.dfg`.
|
||||
fn visit_inst(
|
||||
&mut self,
|
||||
func: &mut Function,
|
||||
reginfo: &RegInfo,
|
||||
isa: &dyn TargetIsa,
|
||||
inst: Inst,
|
||||
) {
|
||||
// Get hold of the top-of-stack work item. This is the state that we will mutate during
|
||||
// processing of this instruction.
|
||||
debug_assert!(!self.processing_stack.is_empty());
|
||||
let ProcessingStackElem {
|
||||
avail_env,
|
||||
diversions,
|
||||
..
|
||||
} = self.processing_stack.last_mut().unwrap();
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
debug_assert!(
|
||||
avail_env.check_invariants(),
|
||||
"visit_inst: env invariants not ok"
|
||||
);
|
||||
|
||||
let dfg = &mut func.dfg;
|
||||
let locations = &func.locations;
|
||||
let stack_slots = &func.stack_slots;
|
||||
|
||||
// To avoid difficulties with the borrow checker, do this in two stages. First, examine
|
||||
// the instruction to see if it can be deleted or modified, and park the relevant
|
||||
// information in `transform`. Update `self.avail_env` too. Later, use `transform` to
|
||||
// actually do the transformation if necessary.
|
||||
enum Transform {
|
||||
NoChange,
|
||||
ChangeToNopFill(Value), // delete this insn entirely
|
||||
ChangeToCopyToSSA(Type, RegUnit), // change it into a copy from the specified reg
|
||||
}
|
||||
let mut transform = Transform::NoChange;
|
||||
|
||||
// In this match { .. } statement, either we must treat the instruction specially, or we
|
||||
// must call `invalidate_regs_written_by_inst` on it.
|
||||
match &dfg[inst] {
|
||||
InstructionData::Unary {
|
||||
opcode: Opcode::Spill,
|
||||
arg: src_value,
|
||||
} => {
|
||||
// Extract: (src_reg, kind, offset, size)
|
||||
// Invalidate: (kind, offset, size)
|
||||
// Add new binding: {src_reg -> (kind, offset, size)}
|
||||
// Don't forget that src_value might be diverted, so we have to deref it.
|
||||
let slot = slot_of_value(locations, stack_slots, dfg.inst_results(inst)[0]);
|
||||
let src_reg = diversions.reg(*src_value, locations);
|
||||
let kind = slot.kind;
|
||||
if is_slot_kind_tracked(kind) {
|
||||
let offset = slot.offset.expect("visit_inst: spill with no offset");
|
||||
let size = slot.size;
|
||||
avail_env.bind(src_reg, kind, offset, size);
|
||||
} else {
|
||||
// We don't expect this insn to write any regs. But to be consistent with the
|
||||
// rule above, do this anyway.
|
||||
invalidate_regs_written_by_inst(locations, diversions, dfg, avail_env, inst);
|
||||
}
|
||||
}
|
||||
InstructionData::Unary {
|
||||
opcode: Opcode::Fill,
|
||||
arg: src_value,
|
||||
} => {
|
||||
// Extract: (dst_reg, kind, offset, size)
|
||||
// Invalidate: (kind, offset, size)
|
||||
// Add new: {dst_reg -> (dst_value, kind, offset, size)}
|
||||
let slot = slot_of_value(locations, stack_slots, *src_value);
|
||||
let dst_value = dfg.inst_results(inst)[0];
|
||||
let dst_reg = reg_of_value(locations, dst_value);
|
||||
// This must be true. It would be meaningless for an SSA value to be diverted
|
||||
// before it was defined.
|
||||
debug_assert!(dst_reg == diversions.reg(dst_value, locations));
|
||||
let kind = slot.kind;
|
||||
if is_slot_kind_tracked(kind) {
|
||||
let offset = slot.offset.expect("visit_inst: fill with no offset");
|
||||
let size = slot.size;
|
||||
if avail_env.has_exact_binding(dst_reg, kind, offset, size) {
|
||||
// This instruction is an exact copy of a fill we saw earlier, and the
|
||||
// loaded value is still valid. So we'll schedule this instruction for
|
||||
// deletion (below). No need to make any changes to `avail_env`.
|
||||
transform = Transform::ChangeToNopFill(*src_value);
|
||||
} else if let Some(other_reg) =
|
||||
avail_env.has_inexact_binding(reginfo, dst_reg, kind, offset, size)
|
||||
{
|
||||
// This fill is from the required slot, but into a different register
|
||||
// `other_reg`. So replace it with a copy from `other_reg` to `dst_reg`
|
||||
// and update `dst_reg`s binding to make it the same as `other_reg`'s, so
|
||||
// as to maximise the chances of future matches after this instruction.
|
||||
debug_assert!(other_reg != dst_reg);
|
||||
transform =
|
||||
Transform::ChangeToCopyToSSA(dfg.value_type(dst_value), other_reg);
|
||||
avail_env.copy_reg(other_reg, dst_reg);
|
||||
} else {
|
||||
// This fill creates some new binding we don't know about. Update
|
||||
// `avail_env` to track it.
|
||||
avail_env.bind(dst_reg, kind, offset, size);
|
||||
}
|
||||
} else {
|
||||
// Else it's "just another instruction that writes a reg", so we'd better
|
||||
// treat it as such, just as we do below for instructions that we don't handle
|
||||
// specially.
|
||||
invalidate_regs_written_by_inst(locations, diversions, dfg, avail_env, inst);
|
||||
}
|
||||
}
|
||||
InstructionData::RegMove { src, dst, .. } => {
|
||||
// These happen relatively rarely, but just frequently enough that it's worth
|
||||
// tracking the copy (at the machine level, it's really a copy) in `avail_env`.
|
||||
avail_env.copy_reg(*src, *dst);
|
||||
}
|
||||
InstructionData::RegSpill { .. }
|
||||
| InstructionData::RegFill { .. }
|
||||
| InstructionData::Call { .. }
|
||||
| InstructionData::CallIndirect { .. }
|
||||
| InstructionData::StackLoad { .. }
|
||||
| InstructionData::StackStore { .. }
|
||||
| InstructionData::Unary {
|
||||
opcode: Opcode::AdjustSpDown,
|
||||
..
|
||||
}
|
||||
| InstructionData::UnaryImm {
|
||||
opcode: Opcode::AdjustSpUpImm,
|
||||
..
|
||||
}
|
||||
| InstructionData::UnaryImm {
|
||||
opcode: Opcode::AdjustSpDownImm,
|
||||
..
|
||||
} => {
|
||||
// All of these change, or might change, the memory-register bindings tracked in
|
||||
// `avail_env` in some way we don't know about, or at least, we might be able to
|
||||
// track, but for which the effort-to-benefit ratio seems too low to bother. So
|
||||
// play safe: forget everything we know.
|
||||
//
|
||||
// For Call/CallIndirect, we could do better when compiling for calling
|
||||
// conventions that have callee-saved registers, since bindings for them would
|
||||
// remain valid across the call.
|
||||
avail_env.invalidate_all();
|
||||
}
|
||||
_ => {
|
||||
// Invalidate: any `avail_env` entry associated with a reg written by `inst`.
|
||||
invalidate_regs_written_by_inst(locations, diversions, dfg, avail_env, inst);
|
||||
}
|
||||
}
|
||||
|
||||
// Actually do the transformation.
|
||||
match transform {
|
||||
Transform::NoChange => {}
|
||||
Transform::ChangeToNopFill(arg) => {
|
||||
// Load is completely redundant. Convert it to a no-op.
|
||||
dfg.replace(inst).fill_nop(arg);
|
||||
let ok = func.update_encoding(inst, isa).is_ok();
|
||||
debug_assert!(
|
||||
ok,
|
||||
"fill_nop encoding missing for this type: `{}`",
|
||||
func.dfg.display_inst(inst, isa)
|
||||
);
|
||||
}
|
||||
Transform::ChangeToCopyToSSA(ty, reg) => {
|
||||
// We already have the relevant value in some other register. Convert the
|
||||
// load into a reg-reg copy.
|
||||
dfg.replace(inst).copy_to_ssa(ty, reg);
|
||||
let ok = func.update_encoding(inst, isa).is_ok();
|
||||
debug_assert!(ok, "copy_to_ssa encoding missing for type {}", ty);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================================
|
||||
// Top level: processing of tree shaped regions
|
||||
|
||||
impl RedundantReloadRemover {
|
||||
// Push a clone of the top-of-stack ProcessingStackElem. This will be used to process exactly
|
||||
// one Block. The diversions are created new, rather than cloned, to reflect the fact
|
||||
// that diversions are local to each Block.
|
||||
fn processing_stack_push(&mut self, cursor: CursorPosition) {
|
||||
let avail_env = if let Some(stack_top) = self.processing_stack.last() {
|
||||
stack_top.avail_env.clone()
|
||||
} else {
|
||||
AvailEnv::new(
|
||||
self.num_regunits
|
||||
.expect("processing_stack_push: num_regunits unknown!")
|
||||
as usize,
|
||||
)
|
||||
};
|
||||
self.processing_stack.push(ProcessingStackElem {
|
||||
avail_env,
|
||||
cursor,
|
||||
diversions: RegDiversions::new(),
|
||||
});
|
||||
}
|
||||
|
||||
// This pushes the node `dst` onto the processing stack, and sets up the new
|
||||
// ProcessingStackElem accordingly. But it does all that only if `dst` is part of the current
|
||||
// tree *and* we haven't yet visited it.
|
||||
fn processing_stack_maybe_push(&mut self, dst: Block) {
|
||||
if self.nodes_in_tree.contains(dst) && !self.nodes_already_visited.contains(dst) {
|
||||
if !self.processing_stack.is_empty() {
|
||||
// If this isn't the outermost node in the tree (that is, the root), then it must
|
||||
// have exactly one predecessor. Nodes with no predecessors are dead and not
|
||||
// incorporated in any tree. Nodes with two or more predecessors are the root of
|
||||
// some other tree, and visiting them as if they were part of the current tree
|
||||
// would be a serious error.
|
||||
debug_assert!(self.num_preds_per_block[dst] == ZeroOneOrMany::One);
|
||||
}
|
||||
self.processing_stack_push(CursorPosition::Before(dst));
|
||||
self.nodes_already_visited.insert(dst);
|
||||
}
|
||||
}
|
||||
|
||||
// Perform redundant-reload removal on the tree shaped region of graph defined by `root` and
|
||||
// `self.nodes_in_tree`. The following state is modified: `self.processing_stack`,
|
||||
// `self.nodes_already_visited`, and `func.dfg`.
|
||||
fn process_tree(
|
||||
&mut self,
|
||||
func: &mut Function,
|
||||
reginfo: &RegInfo,
|
||||
isa: &dyn TargetIsa,
|
||||
root: Block,
|
||||
) {
|
||||
debug_assert!(self.nodes_in_tree.contains(root));
|
||||
debug_assert!(self.processing_stack.is_empty());
|
||||
debug_assert!(self.nodes_already_visited.is_empty());
|
||||
|
||||
// Create the initial work item
|
||||
self.processing_stack_maybe_push(root);
|
||||
|
||||
while !self.processing_stack.is_empty() {
|
||||
// It seems somewhat ridiculous to construct a whole new FuncCursor just so we can do
|
||||
// next_inst() on it once, and then copy the resulting position back out. But use of
|
||||
// a function-global FuncCursor, or of the EncCursor in struct Context, leads to
|
||||
// borrow checker problems, as does including FuncCursor directly in
|
||||
// ProcessingStackElem. In any case this is not as bad as it looks, since profiling
|
||||
// shows that the build-insert-step-extract work is reduced to just 8 machine
|
||||
// instructions in an optimised x86_64 build, presumably because rustc can inline and
|
||||
// then optimise out almost all the work.
|
||||
let tos = self.processing_stack.len() - 1;
|
||||
let mut pos = FuncCursor::new(func).at_position(self.processing_stack[tos].cursor);
|
||||
let maybe_inst = pos.next_inst();
|
||||
self.processing_stack[tos].cursor = pos.position();
|
||||
|
||||
if let Some(inst) = maybe_inst {
|
||||
// Deal with this insn, possibly changing it, possibly updating the top item of
|
||||
// `self.processing_stack`.
|
||||
self.visit_inst(func, reginfo, isa, inst);
|
||||
|
||||
// Update diversions after the insn.
|
||||
self.processing_stack[tos].diversions.apply(&func.dfg[inst]);
|
||||
|
||||
// If the insn can branch outside this Block, push work items on the stack for all
|
||||
// target Blocks that are part of the same tree and that we haven't yet visited.
|
||||
// The next iteration of this instruction-processing loop will immediately start
|
||||
// work on the most recently pushed Block, and will eventually continue in this Block
|
||||
// when those new items have been removed from the stack.
|
||||
match func.dfg.analyze_branch(inst) {
|
||||
BranchInfo::NotABranch => (),
|
||||
BranchInfo::SingleDest(dst, _) => {
|
||||
self.processing_stack_maybe_push(dst);
|
||||
}
|
||||
BranchInfo::Table(jt, default) => {
|
||||
func.jump_tables[jt]
|
||||
.iter()
|
||||
.for_each(|dst| self.processing_stack_maybe_push(*dst));
|
||||
if let Some(dst) = default {
|
||||
self.processing_stack_maybe_push(dst);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// We've come to the end of the current work-item (Block). We'll already have
|
||||
// processed the fallthrough/continuation/whatever for it using the logic above.
|
||||
// Pop it off the stack and resume work on its parent.
|
||||
self.processing_stack.pop();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================================
|
||||
// Top level: perform redundant fill removal for a complete function
|
||||
|
||||
impl RedundantReloadRemover {
|
||||
/// Create a new remover state.
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
num_regunits: None,
|
||||
num_preds_per_block: PrimaryMap::<Block, ZeroOneOrMany>::with_capacity(8),
|
||||
discovery_stack: Vec::<Block>::with_capacity(16),
|
||||
nodes_in_tree: EntitySet::<Block>::new(),
|
||||
processing_stack: Vec::<ProcessingStackElem>::with_capacity(8),
|
||||
nodes_already_visited: EntitySet::<Block>::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Clear the state of the remover.
|
||||
pub fn clear(&mut self) {
|
||||
self.clear_for_new_function();
|
||||
}
|
||||
|
||||
fn clear_for_new_function(&mut self) {
|
||||
self.num_preds_per_block.clear();
|
||||
self.clear_for_new_tree();
|
||||
}
|
||||
|
||||
fn clear_for_new_tree(&mut self) {
|
||||
self.discovery_stack.clear();
|
||||
self.nodes_in_tree.clear();
|
||||
self.processing_stack.clear();
|
||||
self.nodes_already_visited.clear();
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
fn do_redundant_fill_removal_on_function(
|
||||
&mut self,
|
||||
func: &mut Function,
|
||||
reginfo: &RegInfo,
|
||||
isa: &dyn TargetIsa,
|
||||
cfg: &ControlFlowGraph,
|
||||
) {
|
||||
// Fail in an obvious way if there are more than (2^32)-1 Blocks in this function.
|
||||
let num_blocks: u32 = func.dfg.num_blocks().try_into().unwrap();
|
||||
|
||||
// Clear out per-tree state.
|
||||
self.clear_for_new_function();
|
||||
|
||||
// Create a PrimaryMap that summarises the number of predecessors for each block, as 0, 1
|
||||
// or "many", and that also claims the entry block as having "many" predecessors.
|
||||
self.num_preds_per_block.clear();
|
||||
self.num_preds_per_block.reserve(num_blocks as usize);
|
||||
|
||||
for i in 0..num_blocks {
|
||||
let mut pi = cfg.pred_iter(Block::from_u32(i));
|
||||
let mut n_pi = ZeroOneOrMany::Zero;
|
||||
if pi.next().is_some() {
|
||||
n_pi = ZeroOneOrMany::One;
|
||||
if pi.next().is_some() {
|
||||
n_pi = ZeroOneOrMany::Many;
|
||||
// We don't care if there are more than two preds, so stop counting now.
|
||||
}
|
||||
}
|
||||
self.num_preds_per_block.push(n_pi);
|
||||
}
|
||||
debug_assert!(self.num_preds_per_block.len() == num_blocks as usize);
|
||||
|
||||
// The entry block must be the root of some tree, so set up the state to reflect that.
|
||||
let entry_block = func
|
||||
.layout
|
||||
.entry_block()
|
||||
.expect("do_redundant_fill_removal_on_function: entry block unknown");
|
||||
debug_assert!(self.num_preds_per_block[entry_block] == ZeroOneOrMany::Zero);
|
||||
self.num_preds_per_block[entry_block] = ZeroOneOrMany::Many;
|
||||
|
||||
// Now build and process trees.
|
||||
for root_ix in 0..self.num_preds_per_block.len() {
|
||||
let root = Block::from_u32(root_ix as u32);
|
||||
|
||||
// Build a tree for each node that has two or more preds, and ignore all other nodes.
|
||||
if self.num_preds_per_block[root] != ZeroOneOrMany::Many {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Clear out per-tree state.
|
||||
self.clear_for_new_tree();
|
||||
|
||||
// Discovery phase: build the tree, as `root` and `self.nodes_in_tree`.
|
||||
self.add_nodes_to_tree(cfg, root);
|
||||
debug_assert!(self.nodes_in_tree.cardinality() > 0);
|
||||
debug_assert!(self.num_preds_per_block[root] == ZeroOneOrMany::Many);
|
||||
|
||||
// Processing phase: do redundant-reload-removal.
|
||||
self.process_tree(func, reginfo, isa, root);
|
||||
debug_assert!(
|
||||
self.nodes_in_tree.cardinality() == self.nodes_already_visited.cardinality()
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// =============================================================================================
|
||||
// Top level: the external interface
|
||||
|
||||
struct Context<'a> {
|
||||
// Current instruction as well as reference to function and ISA.
|
||||
cur: EncCursor<'a>,
|
||||
|
||||
// Cached ISA information. We save it here to avoid frequent virtual function calls on the
|
||||
// `TargetIsa` trait object.
|
||||
reginfo: RegInfo,
|
||||
|
||||
// References to contextual data structures we need.
|
||||
cfg: &'a ControlFlowGraph,
|
||||
|
||||
// The running state.
|
||||
state: &'a mut RedundantReloadRemover,
|
||||
}
|
||||
|
||||
impl RedundantReloadRemover {
|
||||
/// Run the remover.
|
||||
pub fn run(&mut self, isa: &dyn TargetIsa, func: &mut Function, cfg: &ControlFlowGraph) {
|
||||
let ctx = Context {
|
||||
cur: EncCursor::new(func, isa),
|
||||
reginfo: isa.register_info(),
|
||||
cfg,
|
||||
state: self,
|
||||
};
|
||||
let mut total_regunits = 0;
|
||||
for rb in isa.register_info().banks {
|
||||
total_regunits += rb.units;
|
||||
}
|
||||
ctx.state.num_regunits = Some(total_regunits);
|
||||
ctx.state.do_redundant_fill_removal_on_function(
|
||||
ctx.cur.func,
|
||||
&ctx.reginfo,
|
||||
ctx.cur.isa,
|
||||
&ctx.cfg,
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1,126 +0,0 @@
|
||||
//! Value affinity for register allocation.
|
||||
//!
|
||||
//! An SSA value's affinity is a hint used to guide the register allocator. It specifies the class
|
||||
//! of allocation that is likely to cause the least amount of fixup moves in order to satisfy
|
||||
//! instruction operand constraints.
|
||||
//!
|
||||
//! For values that want to be in registers, the affinity hint includes a register class or
|
||||
//! subclass. This is just a hint, and the register allocator is allowed to pick a register from a
|
||||
//! larger register class instead.
|
||||
|
||||
use crate::ir::{AbiParam, ArgumentLoc};
|
||||
use crate::isa::{ConstraintKind, OperandConstraint, RegClassIndex, RegInfo, TargetIsa};
|
||||
use core::fmt;
|
||||
|
||||
/// Preferred register allocation for an SSA value.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub enum Affinity {
|
||||
/// No affinity.
|
||||
///
|
||||
/// This indicates a value that is not defined or used by any real instructions. It is a ghost
|
||||
/// value that won't appear in the final program.
|
||||
Unassigned,
|
||||
|
||||
/// This value should be placed in a spill slot on the stack.
|
||||
Stack,
|
||||
|
||||
/// This value prefers a register from the given register class.
|
||||
Reg(RegClassIndex),
|
||||
}
|
||||
|
||||
impl Default for Affinity {
|
||||
fn default() -> Self {
|
||||
Self::Unassigned
|
||||
}
|
||||
}
|
||||
|
||||
impl Affinity {
|
||||
/// Create an affinity that satisfies a single constraint.
|
||||
///
|
||||
/// This will never create an `Affinity::Unassigned`.
|
||||
/// Use the `Default` implementation for that.
|
||||
pub fn new(constraint: &OperandConstraint) -> Self {
|
||||
if constraint.kind == ConstraintKind::Stack {
|
||||
Self::Stack
|
||||
} else {
|
||||
Self::Reg(constraint.regclass.into())
|
||||
}
|
||||
}
|
||||
|
||||
/// Create an affinity that matches an ABI argument for `isa`.
|
||||
pub fn abi(arg: &AbiParam, isa: &dyn TargetIsa) -> Self {
|
||||
match arg.location {
|
||||
ArgumentLoc::Unassigned => Self::Unassigned,
|
||||
ArgumentLoc::Reg(_) => Self::Reg(isa.regclass_for_abi_type(arg.value_type).into()),
|
||||
ArgumentLoc::Stack(_) => Self::Stack,
|
||||
}
|
||||
}
|
||||
|
||||
/// Is this the `Unassigned` affinity?
|
||||
pub fn is_unassigned(self) -> bool {
|
||||
match self {
|
||||
Self::Unassigned => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Is this the `Reg` affinity?
|
||||
pub fn is_reg(self) -> bool {
|
||||
match self {
|
||||
Self::Reg(_) => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Is this the `Stack` affinity?
|
||||
pub fn is_stack(self) -> bool {
|
||||
match self {
|
||||
Self::Stack => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Merge an operand constraint into this affinity.
|
||||
///
|
||||
/// Note that this does not guarantee that the register allocator will pick a register that
|
||||
/// satisfies the constraint.
|
||||
pub fn merge(&mut self, constraint: &OperandConstraint, reginfo: &RegInfo) {
|
||||
match *self {
|
||||
Self::Unassigned => *self = Self::new(constraint),
|
||||
Self::Reg(rc) => {
|
||||
// If the preferred register class is a subclass of the constraint, there's no need
|
||||
// to change anything.
|
||||
if constraint.kind != ConstraintKind::Stack && !constraint.regclass.has_subclass(rc)
|
||||
{
|
||||
// If the register classes overlap, try to shrink our preferred register class.
|
||||
if let Some(subclass) = constraint.regclass.intersect_index(reginfo.rc(rc)) {
|
||||
*self = Self::Reg(subclass);
|
||||
}
|
||||
}
|
||||
}
|
||||
Self::Stack => {}
|
||||
}
|
||||
}
|
||||
|
||||
/// Return an object that can display this value affinity, using the register info from the
|
||||
/// target ISA.
|
||||
pub fn display<'a, R: Into<Option<&'a RegInfo>>>(self, regs: R) -> DisplayAffinity<'a> {
|
||||
DisplayAffinity(self, regs.into())
|
||||
}
|
||||
}
|
||||
|
||||
/// Displaying an `Affinity` correctly requires the associated `RegInfo` from the target ISA.
|
||||
pub struct DisplayAffinity<'a>(Affinity, Option<&'a RegInfo>);
|
||||
|
||||
impl<'a> fmt::Display for DisplayAffinity<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self.0 {
|
||||
Affinity::Unassigned => write!(f, "unassigned"),
|
||||
Affinity::Stack => write!(f, "stack"),
|
||||
Affinity::Reg(rci) => match self.1 {
|
||||
Some(regs) => write!(f, "{}", regs.rc(rci)),
|
||||
None => write!(f, "{}", rci),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,169 +0,0 @@
|
||||
//! Split the outgoing edges of conditional branches that pass parameters.
|
||||
//!
|
||||
//! One of the reason for splitting edges is to be able to insert `copy` and `regmove` instructions
|
||||
//! between a conditional branch and the following terminator.
|
||||
use alloc::vec::Vec;
|
||||
|
||||
use crate::cursor::{Cursor, EncCursor};
|
||||
use crate::dominator_tree::DominatorTree;
|
||||
use crate::flowgraph::ControlFlowGraph;
|
||||
use crate::ir::{Block, Function, Inst, InstBuilder, InstructionData, Opcode, ValueList};
|
||||
use crate::isa::TargetIsa;
|
||||
use crate::topo_order::TopoOrder;
|
||||
|
||||
pub fn run(
|
||||
isa: &dyn TargetIsa,
|
||||
func: &mut Function,
|
||||
cfg: &mut ControlFlowGraph,
|
||||
domtree: &mut DominatorTree,
|
||||
topo: &mut TopoOrder,
|
||||
) {
|
||||
let mut ctx = Context {
|
||||
has_new_blocks: false,
|
||||
cur: EncCursor::new(func, isa),
|
||||
domtree,
|
||||
topo,
|
||||
cfg,
|
||||
};
|
||||
ctx.run()
|
||||
}
|
||||
|
||||
struct Context<'a> {
|
||||
/// True if new blocks were inserted.
|
||||
has_new_blocks: bool,
|
||||
|
||||
/// Current instruction as well as reference to function and ISA.
|
||||
cur: EncCursor<'a>,
|
||||
|
||||
/// References to contextual data structures we need.
|
||||
domtree: &'a mut DominatorTree,
|
||||
topo: &'a mut TopoOrder,
|
||||
cfg: &'a mut ControlFlowGraph,
|
||||
}
|
||||
|
||||
impl<'a> Context<'a> {
|
||||
fn run(&mut self) {
|
||||
// Any block order will do.
|
||||
self.topo.reset(self.cur.func.layout.blocks());
|
||||
while let Some(block) = self.topo.next(&self.cur.func.layout, self.domtree) {
|
||||
// Branches can only be at the last or second to last position in an extended basic
|
||||
// block.
|
||||
self.cur.goto_last_inst(block);
|
||||
let terminator_inst = self.cur.current_inst().expect("terminator");
|
||||
if let Some(inst) = self.cur.prev_inst() {
|
||||
let opcode = self.cur.func.dfg[inst].opcode();
|
||||
if opcode.is_branch() {
|
||||
self.visit_conditional_branch(inst, opcode);
|
||||
self.cur.goto_inst(terminator_inst);
|
||||
self.visit_terminator_branch(terminator_inst);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If blocks were added the cfg and domtree are inconsistent and must be recomputed.
|
||||
if self.has_new_blocks {
|
||||
self.cfg.compute(&self.cur.func);
|
||||
self.domtree.compute(&self.cur.func, self.cfg);
|
||||
}
|
||||
}
|
||||
|
||||
fn visit_conditional_branch(&mut self, branch: Inst, opcode: Opcode) {
|
||||
// TODO: target = dfg[branch].branch_destination().expect("conditional branch");
|
||||
let target = match self.cur.func.dfg[branch] {
|
||||
InstructionData::Branch { destination, .. }
|
||||
| InstructionData::BranchIcmp { destination, .. }
|
||||
| InstructionData::BranchInt { destination, .. }
|
||||
| InstructionData::BranchFloat { destination, .. } => destination,
|
||||
_ => panic!("Unexpected instruction in visit_conditional_branch"),
|
||||
};
|
||||
|
||||
// If there are any parameters, split the edge.
|
||||
if self.should_split_edge(target) {
|
||||
// Create the block the branch will jump to.
|
||||
let new_block = self.cur.func.dfg.make_block();
|
||||
|
||||
// Insert the new block before the destination, such that it can fallthrough in the
|
||||
// target block.
|
||||
assert_ne!(Some(target), self.cur.layout().entry_block());
|
||||
self.cur.layout_mut().insert_block(new_block, target);
|
||||
self.has_new_blocks = true;
|
||||
|
||||
// Extract the arguments of the branch instruction, split the Block parameters and the
|
||||
// branch arguments
|
||||
let num_fixed = opcode.constraints().num_fixed_value_arguments();
|
||||
let dfg = &mut self.cur.func.dfg;
|
||||
let old_args: Vec<_> = {
|
||||
let args = dfg[branch].take_value_list().expect("block parameters");
|
||||
args.as_slice(&dfg.value_lists).iter().copied().collect()
|
||||
};
|
||||
let (branch_args, block_params) = old_args.split_at(num_fixed);
|
||||
|
||||
// Replace the branch destination by the new Block created with no parameters, and restore
|
||||
// the branch arguments, without the original Block parameters.
|
||||
{
|
||||
let branch_args = ValueList::from_slice(branch_args, &mut dfg.value_lists);
|
||||
let data = &mut dfg[branch];
|
||||
*data.branch_destination_mut().expect("branch") = new_block;
|
||||
data.put_value_list(branch_args);
|
||||
}
|
||||
let ok = self.cur.func.update_encoding(branch, self.cur.isa).is_ok();
|
||||
debug_assert!(ok);
|
||||
|
||||
// Insert a jump to the original target with its arguments into the new block.
|
||||
self.cur.goto_first_insertion_point(new_block);
|
||||
self.cur.ins().jump(target, block_params);
|
||||
|
||||
// Reset the cursor to point to the branch.
|
||||
self.cur.goto_inst(branch);
|
||||
}
|
||||
}
|
||||
|
||||
fn visit_terminator_branch(&mut self, inst: Inst) {
|
||||
let inst_data = &self.cur.func.dfg[inst];
|
||||
let opcode = inst_data.opcode();
|
||||
if opcode != Opcode::Jump && opcode != Opcode::Fallthrough {
|
||||
// This opcode is ignored as it does not have any block parameters.
|
||||
if opcode != Opcode::IndirectJumpTableBr {
|
||||
debug_assert!(!opcode.is_branch())
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
let target = match inst_data {
|
||||
InstructionData::Jump { destination, .. } => destination,
|
||||
_ => panic!(
|
||||
"Unexpected instruction {} in visit_terminator_branch",
|
||||
self.cur.display_inst(inst)
|
||||
),
|
||||
};
|
||||
debug_assert!(self.cur.func.dfg[inst].opcode().is_terminator());
|
||||
|
||||
// If there are any parameters, split the edge.
|
||||
if self.should_split_edge(*target) {
|
||||
// Create the block the branch will jump to.
|
||||
let new_block = self.cur.func.dfg.make_block();
|
||||
self.has_new_blocks = true;
|
||||
|
||||
// Split the current block before its terminator, and insert a new jump instruction to
|
||||
// jump to it.
|
||||
let jump = self.cur.ins().jump(new_block, &[]);
|
||||
self.cur.insert_block(new_block);
|
||||
|
||||
// Reset the cursor to point to new terminator of the old block.
|
||||
self.cur.goto_inst(jump);
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns whether we should introduce a new branch.
|
||||
fn should_split_edge(&self, target: Block) -> bool {
|
||||
// We should split the edge if the target has any parameters.
|
||||
if !self.cur.func.dfg.block_params(target).is_empty() {
|
||||
return true;
|
||||
};
|
||||
|
||||
// Or, if the target has more than one block reaching it.
|
||||
debug_assert!(self.cfg.pred_iter(target).next() != None);
|
||||
|
||||
self.cfg.pred_iter(target).nth(1).is_some()
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,252 +0,0 @@
|
||||
//! Register allocator context.
|
||||
//!
|
||||
//! The `Context` struct contains data structures that should be preserved across invocations of
|
||||
//! the register allocator algorithm. This doesn't preserve any data between functions, but it
|
||||
//! avoids allocating data structures independently for each function begin compiled.
|
||||
|
||||
use crate::dominator_tree::DominatorTree;
|
||||
use crate::flowgraph::ControlFlowGraph;
|
||||
use crate::ir::Function;
|
||||
use crate::isa::TargetIsa;
|
||||
use crate::regalloc::branch_splitting;
|
||||
use crate::regalloc::coalescing::Coalescing;
|
||||
use crate::regalloc::coloring::Coloring;
|
||||
use crate::regalloc::live_value_tracker::LiveValueTracker;
|
||||
use crate::regalloc::liveness::Liveness;
|
||||
use crate::regalloc::reload::Reload;
|
||||
use crate::regalloc::safepoint::emit_stack_maps;
|
||||
use crate::regalloc::spilling::Spilling;
|
||||
use crate::regalloc::virtregs::VirtRegs;
|
||||
use crate::result::CodegenResult;
|
||||
use crate::timing;
|
||||
use crate::topo_order::TopoOrder;
|
||||
use crate::verifier::{
|
||||
verify_context, verify_cssa, verify_liveness, verify_locations, VerifierErrors,
|
||||
};
|
||||
|
||||
/// Persistent memory allocations for register allocation.
|
||||
pub struct Context {
|
||||
liveness: Liveness,
|
||||
virtregs: VirtRegs,
|
||||
coalescing: Coalescing,
|
||||
topo: TopoOrder,
|
||||
tracker: LiveValueTracker,
|
||||
spilling: Spilling,
|
||||
reload: Reload,
|
||||
coloring: Coloring,
|
||||
}
|
||||
|
||||
impl Context {
|
||||
/// Create a new context for register allocation.
|
||||
///
|
||||
/// This context should be reused for multiple functions in order to avoid repeated memory
|
||||
/// allocations.
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
liveness: Liveness::new(),
|
||||
virtregs: VirtRegs::new(),
|
||||
coalescing: Coalescing::new(),
|
||||
topo: TopoOrder::new(),
|
||||
tracker: LiveValueTracker::new(),
|
||||
spilling: Spilling::new(),
|
||||
reload: Reload::new(),
|
||||
coloring: Coloring::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Clear all data structures in this context.
|
||||
pub fn clear(&mut self) {
|
||||
self.liveness.clear();
|
||||
self.virtregs.clear();
|
||||
self.coalescing.clear();
|
||||
self.topo.clear();
|
||||
self.tracker.clear();
|
||||
self.spilling.clear();
|
||||
self.reload.clear();
|
||||
self.coloring.clear();
|
||||
}
|
||||
|
||||
/// Current values liveness state.
|
||||
pub fn liveness(&self) -> &Liveness {
|
||||
&self.liveness
|
||||
}
|
||||
|
||||
/// Allocate registers in `func`.
|
||||
///
|
||||
/// After register allocation, all values in `func` have been assigned to a register or stack
|
||||
/// location that is consistent with instruction encoding constraints.
|
||||
pub fn run(
|
||||
&mut self,
|
||||
isa: &dyn TargetIsa,
|
||||
func: &mut Function,
|
||||
cfg: &mut ControlFlowGraph,
|
||||
domtree: &mut DominatorTree,
|
||||
) -> CodegenResult<()> {
|
||||
let _tt = timing::regalloc();
|
||||
debug_assert!(domtree.is_valid());
|
||||
|
||||
let mut errors = VerifierErrors::default();
|
||||
|
||||
// `Liveness` and `Coloring` are self-clearing.
|
||||
self.virtregs.clear();
|
||||
|
||||
// Tracker state (dominator live sets) is actually reused between the spilling and coloring
|
||||
// phases.
|
||||
self.tracker.clear();
|
||||
|
||||
// Pass: Split branches, add space where to add copy & regmove instructions.
|
||||
branch_splitting::run(isa, func, cfg, domtree, &mut self.topo);
|
||||
|
||||
// Pass: Liveness analysis.
|
||||
self.liveness.compute(isa, func, cfg);
|
||||
|
||||
if isa.flags().enable_verifier() {
|
||||
let ok = verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok();
|
||||
|
||||
if !ok {
|
||||
return Err(errors.into());
|
||||
}
|
||||
}
|
||||
|
||||
// Pass: Coalesce and create Conventional SSA form.
|
||||
self.coalescing.conventional_ssa(
|
||||
isa,
|
||||
func,
|
||||
cfg,
|
||||
domtree,
|
||||
&mut self.liveness,
|
||||
&mut self.virtregs,
|
||||
);
|
||||
|
||||
if isa.flags().enable_verifier() {
|
||||
let ok = verify_context(func, cfg, domtree, isa, &mut errors).is_ok()
|
||||
&& verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok()
|
||||
&& verify_cssa(
|
||||
func,
|
||||
cfg,
|
||||
domtree,
|
||||
&self.liveness,
|
||||
&self.virtregs,
|
||||
&mut errors,
|
||||
)
|
||||
.is_ok();
|
||||
|
||||
if !ok {
|
||||
return Err(errors.into());
|
||||
}
|
||||
}
|
||||
|
||||
// Pass: Spilling.
|
||||
self.spilling.run(
|
||||
isa,
|
||||
func,
|
||||
domtree,
|
||||
&mut self.liveness,
|
||||
&self.virtregs,
|
||||
&mut self.topo,
|
||||
&mut self.tracker,
|
||||
);
|
||||
|
||||
if isa.flags().enable_verifier() {
|
||||
let ok = verify_context(func, cfg, domtree, isa, &mut errors).is_ok()
|
||||
&& verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok()
|
||||
&& verify_cssa(
|
||||
func,
|
||||
cfg,
|
||||
domtree,
|
||||
&self.liveness,
|
||||
&self.virtregs,
|
||||
&mut errors,
|
||||
)
|
||||
.is_ok();
|
||||
|
||||
if !ok {
|
||||
return Err(errors.into());
|
||||
}
|
||||
}
|
||||
|
||||
// Pass: Reload.
|
||||
self.reload.run(
|
||||
isa,
|
||||
func,
|
||||
domtree,
|
||||
&mut self.liveness,
|
||||
&mut self.topo,
|
||||
&mut self.tracker,
|
||||
);
|
||||
|
||||
if isa.flags().enable_verifier() {
|
||||
let ok = verify_context(func, cfg, domtree, isa, &mut errors).is_ok()
|
||||
&& verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok()
|
||||
&& verify_cssa(
|
||||
func,
|
||||
cfg,
|
||||
domtree,
|
||||
&self.liveness,
|
||||
&self.virtregs,
|
||||
&mut errors,
|
||||
)
|
||||
.is_ok();
|
||||
|
||||
if !ok {
|
||||
return Err(errors.into());
|
||||
}
|
||||
}
|
||||
|
||||
// Pass: Coloring.
|
||||
self.coloring.run(
|
||||
isa,
|
||||
func,
|
||||
cfg,
|
||||
domtree,
|
||||
&mut self.liveness,
|
||||
&mut self.tracker,
|
||||
);
|
||||
|
||||
// If there are any reference types used, encode safepoints and emit
|
||||
// stack maps.
|
||||
//
|
||||
// This function runs after register allocation has taken place, meaning
|
||||
// values have locations assigned already, which is necessary for
|
||||
// creating the stack maps.
|
||||
let safepoints_enabled = isa.flags().enable_safepoints();
|
||||
for val in func.dfg.values() {
|
||||
let ty = func.dfg.value_type(val);
|
||||
if ty.lane_type().is_ref() {
|
||||
assert!(
|
||||
safepoints_enabled,
|
||||
"reference types were found but safepoints were not enabled"
|
||||
);
|
||||
emit_stack_maps(func, domtree, &self.liveness, &mut self.tracker, isa);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if isa.flags().enable_verifier() {
|
||||
let ok = verify_context(func, cfg, domtree, isa, &mut errors).is_ok()
|
||||
&& verify_liveness(isa, func, cfg, &self.liveness, &mut errors).is_ok()
|
||||
&& verify_locations(isa, func, cfg, Some(&self.liveness), &mut errors).is_ok()
|
||||
&& verify_cssa(
|
||||
func,
|
||||
cfg,
|
||||
domtree,
|
||||
&self.liveness,
|
||||
&self.virtregs,
|
||||
&mut errors,
|
||||
)
|
||||
.is_ok();
|
||||
|
||||
if !ok {
|
||||
return Err(errors.into());
|
||||
}
|
||||
}
|
||||
|
||||
// Even if we arrive here, (non-fatal) errors might have been reported, so we
|
||||
// must make sure absolutely nothing is wrong
|
||||
if errors.is_empty() {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(errors.into())
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,322 +0,0 @@
|
||||
//! Register diversions.
|
||||
//!
|
||||
//! Normally, a value is assigned to a single register or stack location by the register allocator.
|
||||
//! Sometimes, it is necessary to move register values to a different register in order to satisfy
|
||||
//! instruction constraints.
|
||||
//!
|
||||
//! These register diversions are local to a block. No values can be diverted when entering a new
|
||||
//! block.
|
||||
|
||||
use crate::fx::FxHashMap;
|
||||
use crate::hash_map::{Entry, Iter};
|
||||
use crate::ir::{Block, StackSlot, Value, ValueLoc, ValueLocations};
|
||||
use crate::ir::{InstructionData, Opcode};
|
||||
use crate::isa::{RegInfo, RegUnit};
|
||||
use core::fmt;
|
||||
use cranelift_entity::{SparseMap, SparseMapValue};
|
||||
|
||||
#[cfg(feature = "enable-serde")]
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// A diversion of a value from its original location to a new register or stack location.
|
||||
///
|
||||
/// In IR, a diversion is represented by a `regmove` instruction, possibly a chain of them for the
|
||||
/// same value.
|
||||
///
|
||||
/// When tracking diversions, the `from` field is the original assigned value location, and `to` is
|
||||
/// the current one.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
|
||||
pub struct Diversion {
|
||||
/// The original value location.
|
||||
pub from: ValueLoc,
|
||||
/// The current value location.
|
||||
pub to: ValueLoc,
|
||||
}
|
||||
|
||||
impl Diversion {
|
||||
/// Make a new diversion.
|
||||
pub fn new(from: ValueLoc, to: ValueLoc) -> Self {
|
||||
debug_assert!(from.is_assigned() && to.is_assigned());
|
||||
Self { from, to }
|
||||
}
|
||||
}
|
||||
|
||||
/// Keep track of diversions in a block.
|
||||
#[derive(Clone)]
|
||||
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
|
||||
pub struct RegDiversions {
|
||||
current: FxHashMap<Value, Diversion>,
|
||||
}
|
||||
|
||||
/// Keep track of diversions at the entry of block.
|
||||
#[derive(Clone)]
|
||||
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
|
||||
struct EntryRegDiversionsValue {
|
||||
key: Block,
|
||||
divert: RegDiversions,
|
||||
}
|
||||
|
||||
/// Map block to their matching RegDiversions at basic blocks entry.
|
||||
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
|
||||
pub struct EntryRegDiversions {
|
||||
map: SparseMap<Block, EntryRegDiversionsValue>,
|
||||
}
|
||||
|
||||
impl RegDiversions {
|
||||
/// Create a new empty diversion tracker.
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
current: FxHashMap::default(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Clear the content of the diversions, to reset the state of the compiler.
|
||||
pub fn clear(&mut self) {
|
||||
self.current.clear()
|
||||
}
|
||||
|
||||
/// Are there any diversions?
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.current.is_empty()
|
||||
}
|
||||
|
||||
/// Get the current diversion of `value`, if any.
|
||||
pub fn diversion(&self, value: Value) -> Option<&Diversion> {
|
||||
self.current.get(&value)
|
||||
}
|
||||
|
||||
/// Get all current diversions.
|
||||
pub fn iter(&self) -> Iter<'_, Value, Diversion> {
|
||||
self.current.iter()
|
||||
}
|
||||
|
||||
/// Get the current location for `value`. Fall back to the assignment map for non-diverted
|
||||
/// values
|
||||
pub fn get(&self, value: Value, locations: &ValueLocations) -> ValueLoc {
|
||||
match self.diversion(value) {
|
||||
Some(d) => d.to,
|
||||
None => locations[value],
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the current register location for `value`, or panic if `value` isn't in a register.
|
||||
pub fn reg(&self, value: Value, locations: &ValueLocations) -> RegUnit {
|
||||
self.get(value, locations).unwrap_reg()
|
||||
}
|
||||
|
||||
/// Get the current stack location for `value`, or panic if `value` isn't in a stack slot.
|
||||
pub fn stack(&self, value: Value, locations: &ValueLocations) -> StackSlot {
|
||||
self.get(value, locations).unwrap_stack()
|
||||
}
|
||||
|
||||
/// Record any kind of move.
|
||||
///
|
||||
/// The `from` location must match an existing `to` location, if any.
|
||||
fn divert(&mut self, value: Value, from: ValueLoc, to: ValueLoc) {
|
||||
debug_assert!(from.is_assigned() && to.is_assigned());
|
||||
match self.current.entry(value) {
|
||||
Entry::Occupied(mut e) => {
|
||||
// TODO: non-lexical lifetimes should allow removal of the scope and early return.
|
||||
{
|
||||
let d = e.get_mut();
|
||||
debug_assert_eq!(d.to, from, "Bad regmove chain for {}", value);
|
||||
if d.from != to {
|
||||
d.to = to;
|
||||
return;
|
||||
}
|
||||
}
|
||||
e.remove();
|
||||
}
|
||||
Entry::Vacant(e) => {
|
||||
e.insert(Diversion::new(from, to));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Record a register -> register move.
|
||||
pub fn regmove(&mut self, value: Value, from: RegUnit, to: RegUnit) {
|
||||
self.divert(value, ValueLoc::Reg(from), ValueLoc::Reg(to));
|
||||
}
|
||||
|
||||
/// Record a register -> stack move.
|
||||
pub fn regspill(&mut self, value: Value, from: RegUnit, to: StackSlot) {
|
||||
self.divert(value, ValueLoc::Reg(from), ValueLoc::Stack(to));
|
||||
}
|
||||
|
||||
/// Record a stack -> register move.
|
||||
pub fn regfill(&mut self, value: Value, from: StackSlot, to: RegUnit) {
|
||||
self.divert(value, ValueLoc::Stack(from), ValueLoc::Reg(to));
|
||||
}
|
||||
|
||||
/// Apply the effect of `inst`.
|
||||
///
|
||||
/// If `inst` is a `regmove`, `regfill`, or `regspill` instruction, update the diversions to
|
||||
/// match.
|
||||
pub fn apply(&mut self, inst: &InstructionData) {
|
||||
match *inst {
|
||||
InstructionData::RegMove {
|
||||
opcode: Opcode::Regmove,
|
||||
arg,
|
||||
src,
|
||||
dst,
|
||||
} => self.regmove(arg, src, dst),
|
||||
InstructionData::RegSpill {
|
||||
opcode: Opcode::Regspill,
|
||||
arg,
|
||||
src,
|
||||
dst,
|
||||
} => self.regspill(arg, src, dst),
|
||||
InstructionData::RegFill {
|
||||
opcode: Opcode::Regfill,
|
||||
arg,
|
||||
src,
|
||||
dst,
|
||||
} => self.regfill(arg, src, dst),
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
/// Drop any recorded move for `value`.
|
||||
///
|
||||
/// Returns the `to` location of the removed diversion.
|
||||
pub fn remove(&mut self, value: Value) -> Option<ValueLoc> {
|
||||
self.current.remove(&value).map(|d| d.to)
|
||||
}
|
||||
|
||||
/// Resets the state of the current diversions to the recorded diversions at the entry of the
|
||||
/// given `block`. The recoded diversions is available after coloring on `func.entry_diversions`
|
||||
/// field.
|
||||
pub fn at_block(&mut self, entry_diversions: &EntryRegDiversions, block: Block) {
|
||||
self.clear();
|
||||
if let Some(entry_divert) = entry_diversions.map.get(block) {
|
||||
let iter = entry_divert.divert.current.iter();
|
||||
self.current.extend(iter);
|
||||
}
|
||||
}
|
||||
|
||||
/// Copy the current state of the diversions, and save it for the entry of the `block` given as
|
||||
/// argument.
|
||||
///
|
||||
/// Note: This function can only be called once on a `Block` with a given `entry_diversions`
|
||||
/// argument, otherwise it would panic.
|
||||
pub fn save_for_block(&mut self, entry_diversions: &mut EntryRegDiversions, target: Block) {
|
||||
// No need to save anything if there is no diversions to be recorded.
|
||||
if self.is_empty() {
|
||||
return;
|
||||
}
|
||||
debug_assert!(!entry_diversions.map.contains_key(target));
|
||||
let iter = self.current.iter();
|
||||
let mut entry_divert = Self::new();
|
||||
entry_divert.current.extend(iter);
|
||||
entry_diversions.map.insert(EntryRegDiversionsValue {
|
||||
key: target,
|
||||
divert: entry_divert,
|
||||
});
|
||||
}
|
||||
|
||||
/// Check that the recorded entry for a given `block` matches what is recorded in the
|
||||
/// `entry_diversions`.
|
||||
pub fn check_block_entry(&self, entry_diversions: &EntryRegDiversions, target: Block) -> bool {
|
||||
let entry_divert = match entry_diversions.map.get(target) {
|
||||
Some(entry_divert) => entry_divert,
|
||||
None => return self.is_empty(),
|
||||
};
|
||||
|
||||
if entry_divert.divert.current.len() != self.current.len() {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (val, _) in entry_divert.divert.current.iter() {
|
||||
if !self.current.contains_key(val) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
/// Return an object that can display the diversions.
|
||||
pub fn display<'a, R: Into<Option<&'a RegInfo>>>(&'a self, regs: R) -> DisplayDiversions<'a> {
|
||||
DisplayDiversions(&self, regs.into())
|
||||
}
|
||||
}
|
||||
|
||||
impl EntryRegDiversions {
|
||||
/// Create a new empty entry diversion, to associate diversions to each block entry.
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
map: SparseMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn clear(&mut self) {
|
||||
self.map.clear();
|
||||
}
|
||||
}
|
||||
|
||||
impl Clone for EntryRegDiversions {
|
||||
/// The Clone trait is required by `ir::Function`.
|
||||
fn clone(&self) -> Self {
|
||||
let mut tmp = Self::new();
|
||||
for v in self.map.values() {
|
||||
tmp.map.insert(v.clone());
|
||||
}
|
||||
tmp
|
||||
}
|
||||
}
|
||||
|
||||
/// Implement `SparseMapValue`, as required to make use of a `SparseMap` for mapping the entry
|
||||
/// diversions for each block.
|
||||
impl SparseMapValue<Block> for EntryRegDiversionsValue {
|
||||
fn key(&self) -> Block {
|
||||
self.key
|
||||
}
|
||||
}
|
||||
|
||||
/// Object that displays register diversions.
|
||||
pub struct DisplayDiversions<'a>(&'a RegDiversions, Option<&'a RegInfo>);
|
||||
|
||||
impl<'a> fmt::Display for DisplayDiversions<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{{")?;
|
||||
for (value, div) in self.0.current.iter() {
|
||||
write!(
|
||||
f,
|
||||
" {}: {} -> {}",
|
||||
value,
|
||||
div.from.display(self.1),
|
||||
div.to.display(self.1)
|
||||
)?
|
||||
}
|
||||
write!(f, " }}")
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::entity::EntityRef;
|
||||
use crate::ir::Value;
|
||||
|
||||
#[test]
|
||||
fn inserts() {
|
||||
let mut divs = RegDiversions::new();
|
||||
let v1 = Value::new(1);
|
||||
let v2 = Value::new(2);
|
||||
|
||||
divs.regmove(v1, 10, 12);
|
||||
assert_eq!(
|
||||
divs.diversion(v1),
|
||||
Some(&Diversion {
|
||||
from: ValueLoc::Reg(10),
|
||||
to: ValueLoc::Reg(12),
|
||||
})
|
||||
);
|
||||
assert_eq!(divs.diversion(v2), None);
|
||||
|
||||
divs.regmove(v1, 12, 11);
|
||||
assert_eq!(divs.diversion(v1).unwrap().to, ValueLoc::Reg(11));
|
||||
divs.regmove(v1, 11, 10);
|
||||
assert_eq!(divs.diversion(v1), None);
|
||||
}
|
||||
}
|
||||
@@ -1,344 +0,0 @@
|
||||
//! Track which values are live in a block with instruction granularity.
|
||||
//!
|
||||
//! The `LiveValueTracker` keeps track of the set of live SSA values at each instruction in a block.
|
||||
//! The sets of live values are computed on the fly as the tracker is moved from instruction to
|
||||
//! instruction, starting at the block header.
|
||||
|
||||
use crate::dominator_tree::DominatorTree;
|
||||
use crate::entity::{EntityList, ListPool};
|
||||
use crate::fx::FxHashMap;
|
||||
use crate::ir::{Block, DataFlowGraph, ExpandedProgramPoint, Inst, Layout, Value};
|
||||
use crate::partition_slice::partition_slice;
|
||||
use crate::regalloc::affinity::Affinity;
|
||||
use crate::regalloc::liveness::Liveness;
|
||||
use crate::regalloc::liverange::LiveRange;
|
||||
use alloc::vec::Vec;
|
||||
|
||||
type ValueList = EntityList<Value>;
|
||||
|
||||
/// Compute and track live values throughout a block.
|
||||
pub struct LiveValueTracker {
|
||||
/// The set of values that are live at the current program point.
|
||||
live: LiveValueVec,
|
||||
|
||||
/// Saved set of live values for every jump and branch that can potentially be an immediate
|
||||
/// dominator of a block.
|
||||
///
|
||||
/// This is the set of values that are live *before* the branch.
|
||||
idom_sets: FxHashMap<Inst, ValueList>,
|
||||
|
||||
/// Memory pool for the live sets.
|
||||
idom_pool: ListPool<Value>,
|
||||
}
|
||||
|
||||
/// Information about a value that is live at the current program point.
|
||||
#[derive(Debug)]
|
||||
pub struct LiveValue {
|
||||
/// The live value.
|
||||
pub value: Value,
|
||||
|
||||
/// The local ending point of the live range in the current block, as returned by
|
||||
/// `LiveRange::def_local_end()` or `LiveRange::livein_local_end()`.
|
||||
pub endpoint: Inst,
|
||||
|
||||
/// The affinity of the value as represented in its `LiveRange`.
|
||||
///
|
||||
/// This value is simply a copy of the affinity stored in the live range. We copy it because
|
||||
/// almost all users of `LiveValue` need to look at it.
|
||||
pub affinity: Affinity,
|
||||
|
||||
/// The live range for this value never leaves its block.
|
||||
pub is_local: bool,
|
||||
|
||||
/// This value is dead - the live range ends immediately.
|
||||
pub is_dead: bool,
|
||||
}
|
||||
|
||||
struct LiveValueVec {
|
||||
/// The set of values that are live at the current program point.
|
||||
values: Vec<LiveValue>,
|
||||
|
||||
/// How many values at the front of `values` are known to be live after `inst`?
|
||||
///
|
||||
/// This is used to pass a much smaller slice to `partition_slice` when its called a second
|
||||
/// time for the same instruction.
|
||||
live_prefix: Option<(Inst, usize)>,
|
||||
}
|
||||
|
||||
impl LiveValueVec {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
values: Vec::new(),
|
||||
live_prefix: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a new live value to `values`. Copy some properties from `lr`.
|
||||
fn push(&mut self, value: Value, endpoint: Inst, lr: &LiveRange) {
|
||||
self.values.push(LiveValue {
|
||||
value,
|
||||
endpoint,
|
||||
affinity: lr.affinity,
|
||||
is_local: lr.is_local(),
|
||||
is_dead: lr.is_dead(),
|
||||
});
|
||||
}
|
||||
|
||||
/// Remove all elements.
|
||||
fn clear(&mut self) {
|
||||
self.values.clear();
|
||||
self.live_prefix = None;
|
||||
}
|
||||
|
||||
/// Make sure that the values killed by `next_inst` are moved to the end of the `values`
|
||||
/// vector.
|
||||
///
|
||||
/// Returns the number of values that will be live after `next_inst`.
|
||||
fn live_after(&mut self, next_inst: Inst) -> usize {
|
||||
// How many values at the front of the vector are already known to survive `next_inst`?
|
||||
// We don't need to pass this prefix to `partition_slice()`
|
||||
let keep = match self.live_prefix {
|
||||
Some((i, prefix)) if i == next_inst => prefix,
|
||||
_ => 0,
|
||||
};
|
||||
|
||||
// Move the remaining surviving values to the front partition of the vector.
|
||||
let prefix = keep + partition_slice(&mut self.values[keep..], |v| v.endpoint != next_inst);
|
||||
|
||||
// Remember the new prefix length in case we get called again for the same `next_inst`.
|
||||
self.live_prefix = Some((next_inst, prefix));
|
||||
prefix
|
||||
}
|
||||
|
||||
/// Remove the values killed by `next_inst`.
|
||||
fn remove_kill_values(&mut self, next_inst: Inst) {
|
||||
let keep = self.live_after(next_inst);
|
||||
self.values.truncate(keep);
|
||||
}
|
||||
|
||||
/// Remove any dead values.
|
||||
fn remove_dead_values(&mut self) {
|
||||
self.values.retain(|v| !v.is_dead);
|
||||
self.live_prefix = None;
|
||||
}
|
||||
}
|
||||
|
||||
impl LiveValueTracker {
|
||||
/// Create a new blank tracker.
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
live: LiveValueVec::new(),
|
||||
idom_sets: FxHashMap(),
|
||||
idom_pool: ListPool::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Clear all cached information.
|
||||
pub fn clear(&mut self) {
|
||||
self.live.clear();
|
||||
self.idom_sets.clear();
|
||||
self.idom_pool.clear();
|
||||
}
|
||||
|
||||
/// Get the set of currently live values.
|
||||
///
|
||||
/// Between calls to `process_inst()` and `drop_dead()`, this includes both values killed and
|
||||
/// defined by the current instruction.
|
||||
pub fn live(&self) -> &[LiveValue] {
|
||||
&self.live.values
|
||||
}
|
||||
|
||||
/// Get a mutable set of currently live values.
|
||||
///
|
||||
/// Use with care and don't move entries around.
|
||||
pub fn live_mut(&mut self) -> &mut [LiveValue] {
|
||||
&mut self.live.values
|
||||
}
|
||||
|
||||
/// Move the current position to the top of `block`.
|
||||
///
|
||||
/// This depends on the stored live value set at `block`'s immediate dominator, so that must have
|
||||
/// been visited first.
|
||||
///
|
||||
/// Returns `(liveins, args)` as a pair of slices. The first slice is the set of live-in values
|
||||
/// from the immediate dominator. The second slice is the set of `block` parameters.
|
||||
///
|
||||
/// Dead parameters with no uses are included in `args`. Call `drop_dead_args()` to remove them.
|
||||
pub fn block_top(
|
||||
&mut self,
|
||||
block: Block,
|
||||
dfg: &DataFlowGraph,
|
||||
liveness: &Liveness,
|
||||
layout: &Layout,
|
||||
domtree: &DominatorTree,
|
||||
) -> (&[LiveValue], &[LiveValue]) {
|
||||
// Start over, compute the set of live values at the top of the block from two sources:
|
||||
//
|
||||
// 1. Values that were live before `block`'s immediate dominator, filtered for those that are
|
||||
// actually live-in.
|
||||
// 2. Arguments to `block` that are not dead.
|
||||
//
|
||||
self.live.clear();
|
||||
|
||||
// Compute the live-in values. Start by filtering the set of values that were live before
|
||||
// the immediate dominator. Just use the empty set if there's no immediate dominator (i.e.,
|
||||
// the entry block or an unreachable block).
|
||||
if let Some(idom) = domtree.idom(block) {
|
||||
// If the immediate dominator exits, we must have a stored list for it. This is a
|
||||
// requirement to the order blocks are visited: All dominators must have been processed
|
||||
// before the current block.
|
||||
let idom_live_list = self
|
||||
.idom_sets
|
||||
.get(&idom)
|
||||
.expect("No stored live set for dominator");
|
||||
// Get just the values that are live-in to `block`.
|
||||
for &value in idom_live_list.as_slice(&self.idom_pool) {
|
||||
let lr = liveness
|
||||
.get(value)
|
||||
.expect("Immediate dominator value has no live range");
|
||||
|
||||
// Check if this value is live-in here.
|
||||
if let Some(endpoint) = lr.livein_local_end(block, layout) {
|
||||
self.live.push(value, endpoint, lr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Now add all the live parameters to `block`.
|
||||
let first_arg = self.live.values.len();
|
||||
for &value in dfg.block_params(block) {
|
||||
let lr = &liveness[value];
|
||||
debug_assert_eq!(lr.def(), block.into());
|
||||
match lr.def_local_end().into() {
|
||||
ExpandedProgramPoint::Inst(endpoint) => {
|
||||
self.live.push(value, endpoint, lr);
|
||||
}
|
||||
ExpandedProgramPoint::Block(local_block) => {
|
||||
// This is a dead block parameter which is not even live into the first
|
||||
// instruction in the block.
|
||||
debug_assert_eq!(
|
||||
local_block, block,
|
||||
"block parameter live range ends at wrong block header"
|
||||
);
|
||||
// Give this value a fake endpoint that is the first instruction in the block.
|
||||
// We expect it to be removed by calling `drop_dead_args()`.
|
||||
self.live
|
||||
.push(value, layout.first_inst(block).expect("Empty block"), lr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.live.values.split_at(first_arg)
|
||||
}
|
||||
|
||||
/// Prepare to move past `inst`.
|
||||
///
|
||||
/// Determine the set of already live values that are killed by `inst`, and add the new defined
|
||||
/// values to the tracked set.
|
||||
///
|
||||
/// Returns `(throughs, kills, defs)` as a tuple of slices:
|
||||
///
|
||||
/// 1. The `throughs` slice is the set of live-through values that are neither defined nor
|
||||
/// killed by the instruction.
|
||||
/// 2. The `kills` slice is the set of values that were live before the instruction and are
|
||||
/// killed at the instruction. This does not include dead defs.
|
||||
/// 3. The `defs` slice is guaranteed to be in the same order as `inst`'s results, and includes
|
||||
/// dead defines.
|
||||
///
|
||||
/// The order of `throughs` and `kills` is arbitrary.
|
||||
///
|
||||
/// The `drop_dead()` method must be called next to actually remove the dead values from the
|
||||
/// tracked set after the two returned slices are no longer needed.
|
||||
pub fn process_inst(
|
||||
&mut self,
|
||||
inst: Inst,
|
||||
dfg: &DataFlowGraph,
|
||||
liveness: &Liveness,
|
||||
) -> (&[LiveValue], &[LiveValue], &[LiveValue]) {
|
||||
// Save a copy of the live values before any branches or jumps that could be somebody's
|
||||
// immediate dominator.
|
||||
if dfg[inst].opcode().is_branch() {
|
||||
self.save_idom_live_set(inst);
|
||||
}
|
||||
|
||||
// Move killed values to the end of the vector.
|
||||
// Don't remove them yet, `drop_dead()` will do that.
|
||||
let first_kill = self.live.live_after(inst);
|
||||
|
||||
// Add the values defined by `inst`.
|
||||
let first_def = self.live.values.len();
|
||||
for &value in dfg.inst_results(inst) {
|
||||
let lr = &liveness[value];
|
||||
debug_assert_eq!(lr.def(), inst.into());
|
||||
match lr.def_local_end().into() {
|
||||
ExpandedProgramPoint::Inst(endpoint) => {
|
||||
self.live.push(value, endpoint, lr);
|
||||
}
|
||||
ExpandedProgramPoint::Block(block) => {
|
||||
panic!("Instruction result live range can't end at {}", block);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
(
|
||||
&self.live.values[0..first_kill],
|
||||
&self.live.values[first_kill..first_def],
|
||||
&self.live.values[first_def..],
|
||||
)
|
||||
}
|
||||
|
||||
/// Prepare to move past a ghost instruction.
|
||||
///
|
||||
/// This is like `process_inst`, except any defs are ignored.
|
||||
///
|
||||
/// Returns `(throughs, kills)`.
|
||||
pub fn process_ghost(&mut self, inst: Inst) -> (&[LiveValue], &[LiveValue]) {
|
||||
let first_kill = self.live.live_after(inst);
|
||||
self.live.values.as_slice().split_at(first_kill)
|
||||
}
|
||||
|
||||
/// Drop the values that are now dead after moving past `inst`.
|
||||
///
|
||||
/// This removes both live values that were killed by `inst` and dead defines on `inst` itself.
|
||||
///
|
||||
/// This must be called after `process_inst(inst)` and before proceeding to the next
|
||||
/// instruction.
|
||||
pub fn drop_dead(&mut self, inst: Inst) {
|
||||
// Remove both live values that were killed by `inst` and dead defines from `inst`.
|
||||
self.live.remove_kill_values(inst);
|
||||
}
|
||||
|
||||
/// Drop any values that are marked as `is_dead`.
|
||||
///
|
||||
/// Use this after calling `block_top` to clean out dead block parameters.
|
||||
pub fn drop_dead_params(&mut self) {
|
||||
self.live.remove_dead_values();
|
||||
}
|
||||
|
||||
/// Process new spills.
|
||||
///
|
||||
/// Any values where `f` returns true are spilled and will be treated as if their affinity was
|
||||
/// `Stack`.
|
||||
pub fn process_spills<F>(&mut self, mut f: F)
|
||||
where
|
||||
F: FnMut(Value) -> bool,
|
||||
{
|
||||
for lv in &mut self.live.values {
|
||||
if f(lv.value) {
|
||||
lv.affinity = Affinity::Stack;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Save the current set of live values so it is associated with `idom`.
|
||||
fn save_idom_live_set(&mut self, idom: Inst) {
|
||||
let values = self.live.values.iter().map(|lv| lv.value);
|
||||
let pool = &mut self.idom_pool;
|
||||
// If there already is a set saved for `idom`, just keep it.
|
||||
self.idom_sets.entry(idom).or_insert_with(|| {
|
||||
let mut list = ValueList::default();
|
||||
list.extend(values, pool);
|
||||
list
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -1,443 +0,0 @@
|
||||
//! Liveness analysis for SSA values.
|
||||
//!
|
||||
//! This module computes the live range of all the SSA values in a function and produces a
|
||||
//! `LiveRange` instance for each.
|
||||
//!
|
||||
//!
|
||||
//! # Liveness consumers
|
||||
//!
|
||||
//! The primary consumer of the liveness analysis is the SSA coloring pass which goes through each
|
||||
//! block and assigns a register to the defined values. This algorithm needs to maintain a set of the
|
||||
//! currently live values as it is iterating down the instructions in the block. It asks the
|
||||
//! following questions:
|
||||
//!
|
||||
//! - What is the set of live values at the entry to the block?
|
||||
//! - When moving past a use of a value, is that value still alive in the block, or was that the last
|
||||
//! use?
|
||||
//! - When moving past a branch, which of the live values are still live below the branch?
|
||||
//!
|
||||
//! The set of `LiveRange` instances can answer these questions through their `def_local_end` and
|
||||
//! `livein_local_end` queries. The coloring algorithm visits blocks in a topological order of the
|
||||
//! dominator tree, so it can compute the set of live values at the beginning of a block by starting
|
||||
//! from the set of live values at the dominating branch instruction and filtering it with
|
||||
//! `livein_local_end`. These sets do not need to be stored in the liveness analysis.
|
||||
//!
|
||||
//! The secondary consumer of the liveness analysis is the spilling pass which needs to count the
|
||||
//! number of live values at every program point and insert spill code until the number of
|
||||
//! registers needed is small enough.
|
||||
//!
|
||||
//!
|
||||
//! # Alternative algorithms
|
||||
//!
|
||||
//! A number of different liveness analysis algorithms exist, so it is worthwhile to look at a few
|
||||
//! alternatives.
|
||||
//!
|
||||
//! ## Data-flow equations
|
||||
//!
|
||||
//! The classic *live variables analysis* that you will find in all compiler books from the
|
||||
//! previous century does not depend on SSA form. It is typically implemented by iteratively
|
||||
//! solving data-flow equations on bit-vectors of variables. The result is a live-out bit-vector of
|
||||
//! variables for every basic block in the program.
|
||||
//!
|
||||
//! This algorithm has some disadvantages that makes us look elsewhere:
|
||||
//!
|
||||
//! - Quadratic memory use. We need a bit per variable per basic block in the function.
|
||||
//! - Dense representation of sparse data. In practice, the majority of SSA values never leave
|
||||
//! their basic block, and those that do spa basic blocks rarely span a large number of basic
|
||||
//! blocks. This makes the data stored in the bitvectors quite sparse.
|
||||
//! - Traditionally, the data-flow equations were solved for real program *variables* which does
|
||||
//! not include temporaries used in evaluating expressions. We have an SSA form program which
|
||||
//! blurs the distinction between temporaries and variables. This makes the quadratic memory
|
||||
//! problem worse because there are many more SSA values than there was variables in the original
|
||||
//! program, and we don't know a priori which SSA values leave their basic block.
|
||||
//! - Missing last-use information. For values that are not live-out of a basic block, we would
|
||||
//! need to store information about the last use in the block somewhere. LLVM stores this
|
||||
//! information as a 'kill bit' on the last use in the IR. Maintaining these kill bits has been a
|
||||
//! source of problems for LLVM's register allocator.
|
||||
//!
|
||||
//! Data-flow equations can detect when a variable is used uninitialized, and they can handle
|
||||
//! multiple definitions of the same variable. We don't need this generality since we already have
|
||||
//! a program in SSA form.
|
||||
//!
|
||||
//! ## LLVM's liveness analysis
|
||||
//!
|
||||
//! LLVM's register allocator computes liveness per *virtual register*, where a virtual register is
|
||||
//! a disjoint union of related SSA values that should be assigned to the same physical register.
|
||||
//! It uses a compact data structure very similar to our `LiveRange`. The important difference is
|
||||
//! that Cranelift's `LiveRange` only describes a single SSA value, while LLVM's `LiveInterval`
|
||||
//! describes the live range of a virtual register *and* which one of the related SSA values is
|
||||
//! live at any given program point.
|
||||
//!
|
||||
//! LLVM computes the live range of each virtual register independently by using the use-def chains
|
||||
//! that are baked into its IR. The algorithm for a single virtual register is:
|
||||
//!
|
||||
//! 1. Initialize the live range with a single-instruction snippet of liveness at each def, using
|
||||
//! the def-chain. This does not include any phi-values.
|
||||
//! 2. Go through the virtual register's use chain and perform the following steps at each use:
|
||||
//! 3. Perform an exhaustive depth-first traversal up the CFG from the use. Look for basic blocks
|
||||
//! that already contain some liveness and extend the last live SSA value in the block to be
|
||||
//! live-out. Also build a list of new basic blocks where the register needs to be live-in.
|
||||
//! 4. Iteratively propagate live-out SSA values to the new live-in blocks. This may require new
|
||||
//! PHI values to be created when different SSA values can reach the same block.
|
||||
//!
|
||||
//! The iterative SSA form reconstruction can be skipped if the depth-first search only encountered
|
||||
//! one SSA value.
|
||||
//!
|
||||
//! This algorithm has some advantages compared to the data-flow equations:
|
||||
//!
|
||||
//! - The live ranges of local virtual registers are computed very quickly without ever traversing
|
||||
//! the CFG. The memory needed to store these live ranges is independent of the number of basic
|
||||
//! blocks in the program.
|
||||
//! - The time to compute the live range of a global virtual register is proportional to the number
|
||||
//! of basic blocks covered. Many virtual registers only cover a few blocks, even in very large
|
||||
//! functions.
|
||||
//! - A single live range can be recomputed after making modifications to the IR. No global
|
||||
//! algorithm is necessary. This feature depends on having use-def chains for virtual registers
|
||||
//! which Cranelift doesn't.
|
||||
//!
|
||||
//! Cranelift uses a very similar data structures and algorithms to LLVM, with the important
|
||||
//! difference that live ranges are computed per SSA value instead of per virtual register, and the
|
||||
//! uses in Cranelift IR refers to SSA values instead of virtual registers. This means that
|
||||
//! Cranelift can skip the last step of reconstructing SSA form for the virtual register uses.
|
||||
//!
|
||||
//! ## Fast Liveness Checking for SSA-Form Programs
|
||||
//!
|
||||
//! A liveness analysis that is often brought up in the context of SSA-based register allocation
|
||||
//! was presented at CGO 2008:
|
||||
//!
|
||||
//! > Boissinot, B., Hack, S., Grund, D., de Dinechin, B. D., & Rastello, F. (2008). *Fast Liveness
|
||||
//! Checking for SSA-Form Programs.* CGO.
|
||||
//!
|
||||
//! This analysis uses a global pre-computation that only depends on the CFG of the function. It
|
||||
//! then allows liveness queries for any (value, program point) pair. Each query traverses the use
|
||||
//! chain of the value and performs lookups in the precomputed bit-vectors.
|
||||
//!
|
||||
//! I did not seriously consider this analysis for Cranelift because:
|
||||
//!
|
||||
//! - It depends critically on use chains which Cranelift doesn't have.
|
||||
//! - Popular variables like the `this` pointer in a C++ method can have very large use chains.
|
||||
//! Traversing such a long use chain on every liveness lookup has the potential for some nasty
|
||||
//! quadratic behavior in unfortunate cases.
|
||||
//! - It says "fast" in the title, but the paper only claims to be 16% faster than a data-flow
|
||||
//! based approach, which isn't that impressive.
|
||||
//!
|
||||
//! Nevertheless, the property of only depending in the CFG structure is very useful. If Cranelift
|
||||
//! gains use chains, this approach would be worth a proper evaluation.
|
||||
//!
|
||||
//!
|
||||
//! # Cranelift's liveness analysis
|
||||
//!
|
||||
//! The algorithm implemented in this module is similar to LLVM's with these differences:
|
||||
//!
|
||||
//! - The `LiveRange` data structure describes the liveness of a single SSA value, not a virtual
|
||||
//! register.
|
||||
//! - Instructions in Cranelift IR contains references to SSA values, not virtual registers.
|
||||
//! - All live ranges are computed in one traversal of the program. Cranelift doesn't have use
|
||||
//! chains, so it is not possible to compute the live range for a single SSA value independently.
|
||||
//!
|
||||
//! The liveness computation visits all instructions in the program. The order is not important for
|
||||
//! the algorithm to be correct. At each instruction, the used values are examined.
|
||||
//!
|
||||
//! - The first time a value is encountered, its live range is constructed as a dead live range
|
||||
//! containing only the defining program point.
|
||||
//! - The local interval of the value's live range is extended so it reaches the use. This may
|
||||
//! require creating a new live-in local interval for the block.
|
||||
//! - If the live range became live-in to the block, add the block to a work-list.
|
||||
//! - While the work-list is non-empty pop a live-in block and repeat the two steps above, using each
|
||||
//! of the live-in block's CFG predecessor instructions as a 'use'.
|
||||
//!
|
||||
//! The effect of this algorithm is to extend the live range of each to reach uses as they are
|
||||
//! visited. No data about each value beyond the live range is needed between visiting uses, so
|
||||
//! nothing is lost by computing the live range of all values simultaneously.
|
||||
//!
|
||||
//! ## Cache efficiency of Cranelift vs LLVM
|
||||
//!
|
||||
//! Since LLVM computes the complete live range of a virtual register in one go, it can keep the
|
||||
//! whole `LiveInterval` for the register in L1 cache. Since it is visiting the instructions in use
|
||||
//! chain order, some cache thrashing can occur as a result of pulling instructions into cache
|
||||
//! somewhat chaotically.
|
||||
//!
|
||||
//! Cranelift uses a transposed algorithm, visiting instructions in order. This means that each
|
||||
//! instruction is brought into cache only once, and it is likely that the other instructions on
|
||||
//! the same cache line will be visited before the line is evicted.
|
||||
//!
|
||||
//! Cranelift's problem is that the `LiveRange` structs are visited many times and not always
|
||||
//! regularly. We should strive to make the `LiveRange` struct as small as possible such that
|
||||
//! multiple related values can live on the same cache line.
|
||||
//!
|
||||
//! - Local values should fit in a 16-byte `LiveRange` struct or smaller. The current
|
||||
//! implementation contains a 24-byte `Vec` object and a redundant `value` member pushing the
|
||||
//! size to 32 bytes.
|
||||
//! - Related values should be stored on the same cache line. The current sparse set implementation
|
||||
//! does a decent job of that.
|
||||
//! - For global values, the list of live-in intervals is very likely to fit on a single cache
|
||||
//! line. These lists are very likely to be found in L2 cache at least.
|
||||
//!
|
||||
//! There is some room for improvement.
|
||||
|
||||
use crate::entity::SparseMap;
|
||||
use crate::flowgraph::{BlockPredecessor, ControlFlowGraph};
|
||||
use crate::ir::dfg::ValueDef;
|
||||
use crate::ir::{Block, Function, Inst, Layout, ProgramPoint, Value};
|
||||
use crate::isa::{EncInfo, OperandConstraint, TargetIsa};
|
||||
use crate::regalloc::affinity::Affinity;
|
||||
use crate::regalloc::liverange::LiveRange;
|
||||
use crate::timing;
|
||||
use alloc::vec::Vec;
|
||||
use core::mem;
|
||||
use core::ops::Index;
|
||||
|
||||
/// A set of live ranges, indexed by value number.
|
||||
type LiveRangeSet = SparseMap<Value, LiveRange>;
|
||||
|
||||
/// Get a mutable reference to the live range for `value`.
|
||||
/// Create it if necessary.
|
||||
fn get_or_create<'a>(
|
||||
lrset: &'a mut LiveRangeSet,
|
||||
value: Value,
|
||||
isa: &dyn TargetIsa,
|
||||
func: &Function,
|
||||
encinfo: &EncInfo,
|
||||
) -> &'a mut LiveRange {
|
||||
// It would be better to use `get_mut()` here, but that leads to borrow checker fighting
|
||||
// which can probably only be resolved by non-lexical lifetimes.
|
||||
// https://github.com/rust-lang/rfcs/issues/811
|
||||
if lrset.get(value).is_none() {
|
||||
// Create a live range for value. We need the program point that defines it.
|
||||
let def;
|
||||
let affinity;
|
||||
match func.dfg.value_def(value) {
|
||||
ValueDef::Result(inst, rnum) => {
|
||||
def = inst.into();
|
||||
// Initialize the affinity from the defining instruction's result constraints.
|
||||
// Don't do this for call return values which are always tied to a single register.
|
||||
affinity = encinfo
|
||||
.operand_constraints(func.encodings[inst])
|
||||
.and_then(|rc| rc.outs.get(rnum))
|
||||
.map(Affinity::new)
|
||||
.or_else(|| {
|
||||
// If this is a call, get the return value affinity.
|
||||
func.dfg
|
||||
.call_signature(inst)
|
||||
.map(|sig| Affinity::abi(&func.dfg.signatures[sig].returns[rnum], isa))
|
||||
})
|
||||
.unwrap_or_default();
|
||||
}
|
||||
ValueDef::Param(block, num) => {
|
||||
def = block.into();
|
||||
if func.layout.entry_block() == Some(block) {
|
||||
// The affinity for entry block parameters can be inferred from the function
|
||||
// signature.
|
||||
affinity = Affinity::abi(&func.signature.params[num], isa);
|
||||
} else {
|
||||
// Give normal block parameters a register affinity matching their type.
|
||||
let rc = isa.regclass_for_abi_type(func.dfg.value_type(value));
|
||||
affinity = Affinity::Reg(rc.into());
|
||||
}
|
||||
}
|
||||
};
|
||||
lrset.insert(LiveRange::new(value, def, affinity));
|
||||
}
|
||||
lrset.get_mut(value).unwrap()
|
||||
}
|
||||
|
||||
/// Extend the live range for `value` so it reaches `to` which must live in `block`.
|
||||
fn extend_to_use(
|
||||
lr: &mut LiveRange,
|
||||
block: Block,
|
||||
to: Inst,
|
||||
worklist: &mut Vec<Block>,
|
||||
func: &Function,
|
||||
cfg: &ControlFlowGraph,
|
||||
) {
|
||||
// This is our scratch working space, and we'll leave it empty when we return.
|
||||
debug_assert!(worklist.is_empty());
|
||||
|
||||
// Extend the range locally in `block`.
|
||||
// If there already was a live interval in that block, we're done.
|
||||
if lr.extend_in_block(block, to, &func.layout) {
|
||||
worklist.push(block);
|
||||
}
|
||||
|
||||
// The work list contains those blocks where we have learned that the value needs to be
|
||||
// live-in.
|
||||
//
|
||||
// This algorithm becomes a depth-first traversal up the CFG, enumerating all paths through the
|
||||
// CFG from the existing live range to `block`.
|
||||
//
|
||||
// Extend the live range as we go. The live range itself also serves as a visited set since
|
||||
// `extend_in_block` will never return true twice for the same block.
|
||||
//
|
||||
while let Some(livein) = worklist.pop() {
|
||||
// We've learned that the value needs to be live-in to the `livein` block.
|
||||
// Make sure it is also live at all predecessor branches to `livein`.
|
||||
for BlockPredecessor {
|
||||
block: pred,
|
||||
inst: branch,
|
||||
} in cfg.pred_iter(livein)
|
||||
{
|
||||
if lr.extend_in_block(pred, branch, &func.layout) {
|
||||
// This predecessor block also became live-in. We need to process it later.
|
||||
worklist.push(pred);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Liveness analysis for a function.
|
||||
///
|
||||
/// Compute a live range for every SSA value used in the function.
|
||||
pub struct Liveness {
|
||||
/// The live ranges that have been computed so far.
|
||||
ranges: LiveRangeSet,
|
||||
|
||||
/// Working space for the `extend_to_use` algorithm.
|
||||
/// This vector is always empty, except for inside that function.
|
||||
/// It lives here to avoid repeated allocation of scratch memory.
|
||||
worklist: Vec<Block>,
|
||||
}
|
||||
|
||||
impl Liveness {
|
||||
/// Create a new empty liveness analysis.
|
||||
///
|
||||
/// The memory allocated for this analysis can be reused for multiple functions. Use the
|
||||
/// `compute` method to actually runs the analysis for a function.
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
ranges: LiveRangeSet::new(),
|
||||
worklist: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Current live ranges.
|
||||
pub fn ranges(&self) -> &LiveRangeSet {
|
||||
&self.ranges
|
||||
}
|
||||
|
||||
/// Clear all data structures in this liveness analysis.
|
||||
pub fn clear(&mut self) {
|
||||
self.ranges.clear();
|
||||
self.worklist.clear();
|
||||
}
|
||||
|
||||
/// Get the live range for `value`, if it exists.
|
||||
pub fn get(&self, value: Value) -> Option<&LiveRange> {
|
||||
self.ranges.get(value)
|
||||
}
|
||||
|
||||
/// Create a new live range for `value`.
|
||||
///
|
||||
/// The new live range will be defined at `def` with no extent, like a dead value.
|
||||
///
|
||||
/// This asserts that `value` does not have an existing live range.
|
||||
pub fn create_dead<PP>(&mut self, value: Value, def: PP, affinity: Affinity)
|
||||
where
|
||||
PP: Into<ProgramPoint>,
|
||||
{
|
||||
let old = self
|
||||
.ranges
|
||||
.insert(LiveRange::new(value, def.into(), affinity));
|
||||
debug_assert!(old.is_none(), "{} already has a live range", value);
|
||||
}
|
||||
|
||||
/// Move the definition of `value` to `def`.
|
||||
///
|
||||
/// The old and new def points must be in the same block, and before the end of the live range.
|
||||
pub fn move_def_locally<PP>(&mut self, value: Value, def: PP)
|
||||
where
|
||||
PP: Into<ProgramPoint>,
|
||||
{
|
||||
let lr = self.ranges.get_mut(value).expect("Value has no live range");
|
||||
lr.move_def_locally(def.into());
|
||||
}
|
||||
|
||||
/// Locally extend the live range for `value` to reach `user`.
|
||||
///
|
||||
/// It is assumed the `value` is already live before `user` in `block`.
|
||||
///
|
||||
/// Returns a mutable reference to the value's affinity in case that also needs to be updated.
|
||||
pub fn extend_locally(
|
||||
&mut self,
|
||||
value: Value,
|
||||
block: Block,
|
||||
user: Inst,
|
||||
layout: &Layout,
|
||||
) -> &mut Affinity {
|
||||
debug_assert_eq!(Some(block), layout.inst_block(user));
|
||||
let lr = self.ranges.get_mut(value).expect("Value has no live range");
|
||||
let livein = lr.extend_in_block(block, user, layout);
|
||||
debug_assert!(!livein, "{} should already be live in {}", value, block);
|
||||
&mut lr.affinity
|
||||
}
|
||||
|
||||
/// Change the affinity of `value` to `Stack` and return the previous affinity.
|
||||
pub fn spill(&mut self, value: Value) -> Affinity {
|
||||
let lr = self.ranges.get_mut(value).expect("Value has no live range");
|
||||
mem::replace(&mut lr.affinity, Affinity::Stack)
|
||||
}
|
||||
|
||||
/// Compute the live ranges of all SSA values used in `func`.
|
||||
/// This clears out any existing analysis stored in this data structure.
|
||||
pub fn compute(&mut self, isa: &dyn TargetIsa, func: &mut Function, cfg: &ControlFlowGraph) {
|
||||
let _tt = timing::ra_liveness();
|
||||
self.ranges.clear();
|
||||
|
||||
// Get ISA data structures used for computing live range affinities.
|
||||
let encinfo = isa.encoding_info();
|
||||
let reginfo = isa.register_info();
|
||||
|
||||
// The liveness computation needs to visit all uses, but the order doesn't matter.
|
||||
// TODO: Perhaps this traversal of the function could be combined with a dead code
|
||||
// elimination pass if we visit a post-order of the dominator tree?
|
||||
for block in func.layout.blocks() {
|
||||
// Make sure we have created live ranges for dead block parameters.
|
||||
// TODO: If these parameters are really dead, we could remove them, except for the
|
||||
// entry block which must match the function signature.
|
||||
for &arg in func.dfg.block_params(block) {
|
||||
get_or_create(&mut self.ranges, arg, isa, func, &encinfo);
|
||||
}
|
||||
|
||||
for inst in func.layout.block_insts(block) {
|
||||
// Eliminate all value aliases, they would confuse the register allocator.
|
||||
func.dfg.resolve_aliases_in_arguments(inst);
|
||||
|
||||
// Make sure we have created live ranges for dead defs.
|
||||
// TODO: When we implement DCE, we can use the absence of a live range to indicate
|
||||
// an unused value.
|
||||
for &def in func.dfg.inst_results(inst) {
|
||||
get_or_create(&mut self.ranges, def, isa, func, &encinfo);
|
||||
}
|
||||
|
||||
// Iterator of constraints, one per value operand.
|
||||
let encoding = func.encodings[inst];
|
||||
let operand_constraint_slice: &[OperandConstraint] =
|
||||
encinfo.operand_constraints(encoding).map_or(&[], |c| c.ins);
|
||||
let mut operand_constraints = operand_constraint_slice.iter();
|
||||
|
||||
for &arg in func.dfg.inst_args(inst) {
|
||||
// Get the live range, create it as a dead range if necessary.
|
||||
let lr = get_or_create(&mut self.ranges, arg, isa, func, &encinfo);
|
||||
|
||||
// Extend the live range to reach this use.
|
||||
extend_to_use(lr, block, inst, &mut self.worklist, func, cfg);
|
||||
|
||||
// Apply operand constraint, ignoring any variable arguments after the fixed
|
||||
// operands described by `operand_constraints`. Variable arguments are either
|
||||
// block arguments or call/return ABI arguments.
|
||||
if let Some(constraint) = operand_constraints.next() {
|
||||
lr.affinity.merge(constraint, ®info);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Index<Value> for Liveness {
|
||||
type Output = LiveRange;
|
||||
fn index(&self, index: Value) -> &LiveRange {
|
||||
self.ranges
|
||||
.get(index)
|
||||
.unwrap_or_else(|| panic!("{} has no live range", index))
|
||||
}
|
||||
}
|
||||
@@ -1,720 +0,0 @@
|
||||
//! Data structure representing the live range of an SSA value.
|
||||
//!
|
||||
//! Live ranges are tracked per SSA value, not per variable or virtual register. The live range of
|
||||
//! an SSA value begins where it is defined and extends to all program points where the value is
|
||||
//! still needed.
|
||||
//!
|
||||
//! # Local Live Ranges
|
||||
//!
|
||||
//! Inside a single basic block, the live range of a value is always an interval between
|
||||
//! two program points (if the value is live in the block at all). The starting point is either:
|
||||
//!
|
||||
//! 1. The instruction that defines the value, or
|
||||
//! 2. The block header, because the value is an argument to the block, or
|
||||
//! 3. The block header, because the value is defined in another block and live-in to this one.
|
||||
//!
|
||||
//! The ending point of the local live range is the last of the following program points in the
|
||||
//! block:
|
||||
//!
|
||||
//! 1. The last use in the block, where a *use* is an instruction that has the value as an argument.
|
||||
//! 2. The last branch or jump instruction in the block that can reach a use.
|
||||
//! 3. If the value has no uses anywhere (a *dead value*), the program point that defines it.
|
||||
//!
|
||||
//! Note that 2. includes loop back-edges to the same block. In general, if a value is defined
|
||||
//! outside a loop and used inside the loop, it will be live in the entire loop.
|
||||
//!
|
||||
//! # Global Live Ranges
|
||||
//!
|
||||
//! Values that appear in more than one block have a *global live range* which can be seen as the
|
||||
//! disjoint union of the per-block local intervals for all of the blocks where the value is live.
|
||||
//! Together with a `ProgramOrder` which provides a linear ordering of the blocks, the global live
|
||||
//! range becomes a linear sequence of disjoint intervals, at most one per block.
|
||||
//!
|
||||
//! In the special case of a dead value, the global live range is a single interval where the start
|
||||
//! and end points are the same. The global live range of a value is never completely empty.
|
||||
//!
|
||||
//! # Register interference
|
||||
//!
|
||||
//! The register allocator uses live ranges to determine if values *interfere*, which means that
|
||||
//! they can't be stored in the same register. Two live ranges interfere if and only if any of
|
||||
//! their intervals overlap.
|
||||
//!
|
||||
//! If one live range ends at an instruction that defines another live range, those two live ranges
|
||||
//! are not considered to interfere. This is because most ISAs allow instructions to reuse an input
|
||||
//! register for an output value. If Cranelift gets support for inline assembly, we will need to
|
||||
//! handle *early clobbers* which are output registers that are not allowed to alias any input
|
||||
//! registers.
|
||||
//!
|
||||
//! If `i1 < i2 < i3` are program points, we have:
|
||||
//!
|
||||
//! - `i1-i2` and `i1-i3` interfere because the intervals overlap.
|
||||
//! - `i1-i2` and `i2-i3` don't interfere.
|
||||
//! - `i1-i3` and `i2-i2` do interfere because the dead def would clobber the register.
|
||||
//! - `i1-i2` and `i2-i2` don't interfere.
|
||||
//! - `i2-i3` and `i2-i2` do interfere.
|
||||
//!
|
||||
//! Because of this behavior around interval end points, live range interference is not completely
|
||||
//! equivalent to mathematical intersection of open or half-open intervals.
|
||||
//!
|
||||
//! # Implementation notes
|
||||
//!
|
||||
//! A few notes about the implementation of the live intervals field `liveins`. This should not
|
||||
//! concern someone only looking to use the public interface.
|
||||
//!
|
||||
//! ## Current representation
|
||||
//!
|
||||
//! Our current implementation uses a sorted array of compressed intervals, represented by their
|
||||
//! boundaries (Block, Inst), sorted by Block. This is a simple data structure, enables coalescing of
|
||||
//! intervals easily, and shows some nice performance behavior. See
|
||||
//! <https://github.com/bytecodealliance/cranelift/issues/1084> for benchmarks against using a
|
||||
//! bforest::Map<Block, Inst>.
|
||||
//!
|
||||
//! ## block ordering
|
||||
//!
|
||||
//! The relative order of blocks is used to maintain a sorted list of live-in intervals and to
|
||||
//! coalesce adjacent live-in intervals when the prior interval covers the whole block. This doesn't
|
||||
//! depend on any property of the program order, so alternative orderings are possible:
|
||||
//!
|
||||
//! 1. The block layout order. This is what we currently use.
|
||||
//! 2. A topological order of the dominator tree. All the live-in intervals would come after the
|
||||
//! def interval.
|
||||
//! 3. A numerical order by block number. Performant because it doesn't need to indirect through the
|
||||
//! `ProgramOrder` for comparisons.
|
||||
//!
|
||||
//! These orderings will cause small differences in coalescing opportunities, but all of them would
|
||||
//! do a decent job of compressing a long live range. The numerical order might be preferable
|
||||
//! because:
|
||||
//!
|
||||
//! - It has better performance because block numbers can be compared directly without any table
|
||||
//! lookups.
|
||||
//! - If block numbers are not reused, it is safe to allocate new blocks without getting spurious
|
||||
//! live-in intervals from any coalesced representations that happen to cross a new block.
|
||||
//!
|
||||
//! For comparing instructions, the layout order is always what we want.
|
||||
//!
|
||||
//! ## Alternative representation
|
||||
//!
|
||||
//! Since a local live-in interval always begins at its block header, it is uniquely described by its
|
||||
//! end point instruction alone. We can use the layout to look up the block containing the end point.
|
||||
//! This means that a sorted `Vec<Inst>` would be enough to represent the set of live-in intervals.
|
||||
//!
|
||||
//! Coalescing is an important compression technique because some live ranges can span thousands of
|
||||
//! blocks. We can represent that by switching to a sorted `Vec<ProgramPoint>` representation where
|
||||
//! an `[Block, Inst]` pair represents a coalesced range, while an `Inst` entry without a preceding
|
||||
//! `Block` entry represents a single live-in interval.
|
||||
//!
|
||||
//! This representation is more compact for a live range with many uncoalesced live-in intervals.
|
||||
//! It is more complicated to work with, though, so it is probably not worth it. The performance
|
||||
//! benefits of switching to a numerical block order only appears if the binary search is doing
|
||||
//! block-block comparisons.
|
||||
//!
|
||||
//! A `BTreeMap<Block, Inst>` could have been used for the live-in intervals, but it doesn't provide
|
||||
//! the necessary API to make coalescing easy, nor does it optimize for our types' sizes.
|
||||
//!
|
||||
//! Even the specialized `bforest::Map<Block, Inst>` implementation is slower than a plain sorted
|
||||
//! array, see <https://github.com/bytecodealliance/cranelift/issues/1084> for details.
|
||||
|
||||
use crate::entity::SparseMapValue;
|
||||
use crate::ir::{Block, ExpandedProgramPoint, Inst, Layout, ProgramOrder, ProgramPoint, Value};
|
||||
use crate::regalloc::affinity::Affinity;
|
||||
use core::cmp::Ordering;
|
||||
use core::marker::PhantomData;
|
||||
use smallvec::SmallVec;
|
||||
|
||||
/// Global live range of a single SSA value.
|
||||
///
|
||||
/// As [explained in the module documentation](index.html#local-live-ranges), the live range of an
|
||||
/// SSA value is the disjoint union of a set of intervals, each local to a single block, and with at
|
||||
/// most one interval per block. We further distinguish between:
|
||||
///
|
||||
/// 1. The *def interval* is the local interval in the block where the value is defined, and
|
||||
/// 2. The *live-in intervals* are the local intervals in the remaining blocks.
|
||||
///
|
||||
/// A live-in interval always begins at the block header, while the def interval can begin at the
|
||||
/// defining instruction, or at the block header for a block argument value.
|
||||
///
|
||||
/// All values have a def interval, but a large proportion of values don't have any live-in
|
||||
/// intervals. These are called *local live ranges*.
|
||||
///
|
||||
/// # Program order requirements
|
||||
///
|
||||
/// The internal representation of a `LiveRange` depends on a consistent `ProgramOrder` both for
|
||||
/// ordering instructions inside a block *and* for ordering blocks. The methods that depend on the
|
||||
/// ordering take an explicit `ProgramOrder` object, and it is the caller's responsibility to
|
||||
/// ensure that the provided ordering is consistent between calls.
|
||||
///
|
||||
/// In particular, changing the order of blocks or inserting new blocks will invalidate live ranges.
|
||||
///
|
||||
/// Inserting new instructions in the layout is safe, but removing instructions is not. Besides the
|
||||
/// instructions using or defining their value, `LiveRange` structs can contain references to
|
||||
/// branch and jump instructions.
|
||||
pub type LiveRange = GenericLiveRange<Layout>;
|
||||
|
||||
// See comment of liveins below.
|
||||
pub struct Interval {
|
||||
begin: Block,
|
||||
end: Inst,
|
||||
}
|
||||
|
||||
/// Generic live range implementation.
|
||||
///
|
||||
/// The intended generic parameter is `PO=Layout`, but tests are simpler with a mock order.
|
||||
/// Use `LiveRange` instead of using this generic directly.
|
||||
pub struct GenericLiveRange<PO: ProgramOrder> {
|
||||
/// The value described by this live range.
|
||||
/// This member can't be modified in case the live range is stored in a `SparseMap`.
|
||||
value: Value,
|
||||
|
||||
/// The preferred register allocation for this value.
|
||||
pub affinity: Affinity,
|
||||
|
||||
/// The instruction or block header where this value is defined.
|
||||
def_begin: ProgramPoint,
|
||||
|
||||
/// The end point of the def interval. This must always belong to the same block as `def_begin`.
|
||||
///
|
||||
/// We always have `def_begin <= def_end` with equality implying a dead def live range with no
|
||||
/// uses.
|
||||
def_end: ProgramPoint,
|
||||
|
||||
/// Additional live-in intervals sorted in program order.
|
||||
///
|
||||
/// This vector is empty for most values which are only used in one block.
|
||||
///
|
||||
/// An entry `block -> inst` means that the live range is live-in to `block`, continuing up to
|
||||
/// `inst` which may belong to a later block in the program order.
|
||||
///
|
||||
/// The entries are non-overlapping, and none of them overlap the block where the value is
|
||||
/// defined.
|
||||
liveins: SmallVec<[Interval; 2]>,
|
||||
|
||||
po: PhantomData<*const PO>,
|
||||
}
|
||||
|
||||
/// A simple helper macro to make comparisons more natural to read.
|
||||
macro_rules! cmp {
|
||||
($order:ident, $a:ident > $b:expr) => {
|
||||
$order.cmp($a, $b) == Ordering::Greater
|
||||
};
|
||||
($order:ident, $a:ident >= $b:expr) => {
|
||||
$order.cmp($a, $b) != Ordering::Less
|
||||
};
|
||||
($order:ident, $a:ident < $b:expr) => {
|
||||
$order.cmp($a, $b) == Ordering::Less
|
||||
};
|
||||
($order:ident, $a:ident <= $b:expr) => {
|
||||
$order.cmp($a, $b) != Ordering::Greater
|
||||
};
|
||||
}
|
||||
|
||||
impl<PO: ProgramOrder> GenericLiveRange<PO> {
|
||||
/// Create a new live range for `value` defined at `def`.
|
||||
///
|
||||
/// The live range will be created as dead, but it can be extended with `extend_in_block()`.
|
||||
pub fn new(value: Value, def: ProgramPoint, affinity: Affinity) -> Self {
|
||||
Self {
|
||||
value,
|
||||
affinity,
|
||||
def_begin: def,
|
||||
def_end: def,
|
||||
liveins: SmallVec::new(),
|
||||
po: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
/// Finds an entry in the compressed set of live-in intervals that contains `block`, or return
|
||||
/// the position where to insert such a new entry.
|
||||
fn lookup_entry_containing_block(&self, block: Block, order: &PO) -> Result<usize, usize> {
|
||||
self.liveins
|
||||
.binary_search_by(|interval| order.cmp(interval.begin, block))
|
||||
.or_else(|n| {
|
||||
// The previous interval's end might cover the searched block.
|
||||
if n > 0 && cmp!(order, block <= self.liveins[n - 1].end) {
|
||||
Ok(n - 1)
|
||||
} else {
|
||||
Err(n)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Extend the local interval for `block` so it reaches `to` which must belong to `block`.
|
||||
/// Create a live-in interval if necessary.
|
||||
///
|
||||
/// If the live range already has a local interval in `block`, extend its end point so it
|
||||
/// includes `to`, and return false.
|
||||
///
|
||||
/// If the live range did not previously have a local interval in `block`, add one so the value
|
||||
/// is live-in to `block`, extending to `to`. Return true.
|
||||
///
|
||||
/// The return value can be used to detect if we just learned that the value is live-in to
|
||||
/// `block`. This can trigger recursive extensions in `block`'s CFG predecessor blocks.
|
||||
pub fn extend_in_block(&mut self, block: Block, inst: Inst, order: &PO) -> bool {
|
||||
// First check if we're extending the def interval.
|
||||
//
|
||||
// We're assuming here that `inst` never precedes `def_begin` in the same block, but we can't
|
||||
// check it without a method for getting `inst`'s block.
|
||||
if cmp!(order, block <= self.def_end) && cmp!(order, inst >= self.def_begin) {
|
||||
let inst_pp = inst.into();
|
||||
debug_assert_ne!(
|
||||
inst_pp, self.def_begin,
|
||||
"Can't use value in the defining instruction."
|
||||
);
|
||||
if cmp!(order, inst > self.def_end) {
|
||||
self.def_end = inst_pp;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Now check if we're extending any of the existing live-in intervals.
|
||||
match self.lookup_entry_containing_block(block, order) {
|
||||
Ok(n) => {
|
||||
// We found one interval and might need to extend it.
|
||||
if cmp!(order, inst <= self.liveins[n].end) {
|
||||
// Both interval parts are already included in a compressed interval.
|
||||
return false;
|
||||
}
|
||||
|
||||
// If the instruction at the end is the last instruction before the next block,
|
||||
// coalesce the two intervals:
|
||||
// [ival.begin; ival.end] + [next.begin; next.end] = [ival.begin; next.end]
|
||||
if let Some(next) = &self.liveins.get(n + 1) {
|
||||
if order.is_block_gap(inst, next.begin) {
|
||||
// At this point we can choose to remove the current interval or the next
|
||||
// one; remove the next one to avoid one memory move.
|
||||
let next_end = next.end;
|
||||
debug_assert!(cmp!(order, next_end > self.liveins[n].end));
|
||||
self.liveins[n].end = next_end;
|
||||
self.liveins.remove(n + 1);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// We can't coalesce, just extend the interval.
|
||||
self.liveins[n].end = inst;
|
||||
false
|
||||
}
|
||||
|
||||
Err(n) => {
|
||||
// No interval was found containing the current block: we need to insert a new one,
|
||||
// unless there's a coalescing opportunity with the previous or next one.
|
||||
let coalesce_next = self
|
||||
.liveins
|
||||
.get(n)
|
||||
.filter(|next| order.is_block_gap(inst, next.begin))
|
||||
.is_some();
|
||||
let coalesce_prev = self
|
||||
.liveins
|
||||
.get(n.wrapping_sub(1))
|
||||
.filter(|prev| order.is_block_gap(prev.end, block))
|
||||
.is_some();
|
||||
|
||||
match (coalesce_prev, coalesce_next) {
|
||||
// The new interval is the missing hole between prev and next: we can merge
|
||||
// them all together.
|
||||
(true, true) => {
|
||||
let prev_end = self.liveins[n - 1].end;
|
||||
debug_assert!(cmp!(order, prev_end <= self.liveins[n].end));
|
||||
self.liveins[n - 1].end = self.liveins[n].end;
|
||||
self.liveins.remove(n);
|
||||
}
|
||||
|
||||
// Coalesce only with the previous or next one.
|
||||
(true, false) => {
|
||||
debug_assert!(cmp!(order, inst >= self.liveins[n - 1].end));
|
||||
self.liveins[n - 1].end = inst;
|
||||
}
|
||||
(false, true) => {
|
||||
debug_assert!(cmp!(order, block <= self.liveins[n].begin));
|
||||
self.liveins[n].begin = block;
|
||||
}
|
||||
|
||||
(false, false) => {
|
||||
// No coalescing opportunity, we have to insert.
|
||||
self.liveins.insert(
|
||||
n,
|
||||
Interval {
|
||||
begin: block,
|
||||
end: inst,
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Is this the live range of a dead value?
|
||||
///
|
||||
/// A dead value has no uses, and its live range ends at the same program point where it is
|
||||
/// defined.
|
||||
pub fn is_dead(&self) -> bool {
|
||||
self.def_begin == self.def_end
|
||||
}
|
||||
|
||||
/// Is this a local live range?
|
||||
///
|
||||
/// A local live range is only used in the same block where it was defined. It is allowed to span
|
||||
/// multiple basic blocks within that block.
|
||||
pub fn is_local(&self) -> bool {
|
||||
self.liveins.is_empty()
|
||||
}
|
||||
|
||||
/// Get the program point where this live range is defined.
|
||||
///
|
||||
/// This will be a block header when the value is a block argument, otherwise it is the defining
|
||||
/// instruction.
|
||||
pub fn def(&self) -> ProgramPoint {
|
||||
self.def_begin
|
||||
}
|
||||
|
||||
/// Move the definition of this value to a new program point.
|
||||
///
|
||||
/// It is only valid to move the definition within the same block, and it can't be moved beyond
|
||||
/// `def_local_end()`.
|
||||
pub fn move_def_locally(&mut self, def: ProgramPoint) {
|
||||
self.def_begin = def;
|
||||
}
|
||||
|
||||
/// Get the local end-point of this live range in the block where it is defined.
|
||||
///
|
||||
/// This can be the block header itself in the case of a dead block argument.
|
||||
/// Otherwise, it will be the last local use or branch/jump that can reach a use.
|
||||
pub fn def_local_end(&self) -> ProgramPoint {
|
||||
self.def_end
|
||||
}
|
||||
|
||||
/// Get the local end-point of this live range in a block where it is live-in.
|
||||
///
|
||||
/// If this live range is not live-in to `block`, return `None`. Otherwise, return the end-point
|
||||
/// of this live range's local interval in `block`.
|
||||
///
|
||||
/// If the live range is live through all of `block`, the terminator of `block` is a correct
|
||||
/// answer, but it is also possible that an even later program point is returned. So don't
|
||||
/// depend on the returned `Inst` to belong to `block`.
|
||||
pub fn livein_local_end(&self, block: Block, order: &PO) -> Option<Inst> {
|
||||
self.lookup_entry_containing_block(block, order)
|
||||
.and_then(|i| {
|
||||
let inst = self.liveins[i].end;
|
||||
if cmp!(order, block < inst) {
|
||||
Ok(inst)
|
||||
} else {
|
||||
// Can be any error type, really, since it's discarded by ok().
|
||||
Err(i)
|
||||
}
|
||||
})
|
||||
.ok()
|
||||
}
|
||||
|
||||
/// Is this value live-in to `block`?
|
||||
///
|
||||
/// A block argument is not considered to be live in.
|
||||
pub fn is_livein(&self, block: Block, order: &PO) -> bool {
|
||||
self.livein_local_end(block, order).is_some()
|
||||
}
|
||||
|
||||
/// Get all the live-in intervals.
|
||||
///
|
||||
/// Note that the intervals are stored in a compressed form so each entry may span multiple
|
||||
/// blocks where the value is live in.
|
||||
pub fn liveins<'a>(&'a self) -> impl Iterator<Item = (Block, Inst)> + 'a {
|
||||
self.liveins
|
||||
.iter()
|
||||
.map(|interval| (interval.begin, interval.end))
|
||||
}
|
||||
|
||||
/// Check if this live range overlaps a definition in `block`.
|
||||
pub fn overlaps_def(&self, def: ExpandedProgramPoint, block: Block, order: &PO) -> bool {
|
||||
// Two defs at the same program point always overlap, even if one is dead.
|
||||
if def == self.def_begin.into() {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check for an overlap with the local range.
|
||||
if cmp!(order, def >= self.def_begin) && cmp!(order, def < self.def_end) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check for an overlap with a live-in range.
|
||||
match self.livein_local_end(block, order) {
|
||||
Some(inst) => cmp!(order, def < inst),
|
||||
None => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if this live range reaches a use at `user` in `block`.
|
||||
pub fn reaches_use(&self, user: Inst, block: Block, order: &PO) -> bool {
|
||||
// Check for an overlap with the local range.
|
||||
if cmp!(order, user > self.def_begin) && cmp!(order, user <= self.def_end) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check for an overlap with a live-in range.
|
||||
match self.livein_local_end(block, order) {
|
||||
Some(inst) => cmp!(order, user <= inst),
|
||||
None => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if this live range is killed at `user` in `block`.
|
||||
pub fn killed_at(&self, user: Inst, block: Block, order: &PO) -> bool {
|
||||
self.def_local_end() == user.into() || self.livein_local_end(block, order) == Some(user)
|
||||
}
|
||||
}
|
||||
|
||||
/// Allow a `LiveRange` to be stored in a `SparseMap` indexed by values.
|
||||
impl<PO: ProgramOrder> SparseMapValue<Value> for GenericLiveRange<PO> {
|
||||
fn key(&self) -> Value {
|
||||
self.value
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{GenericLiveRange, Interval};
|
||||
use crate::entity::EntityRef;
|
||||
use crate::ir::{Block, Inst, Value};
|
||||
use crate::ir::{ExpandedProgramPoint, ProgramOrder};
|
||||
use alloc::vec::Vec;
|
||||
use core::cmp::Ordering;
|
||||
|
||||
// Dummy program order which simply compares indexes.
|
||||
// It is assumed that blocks have indexes that are multiples of 10, and instructions have indexes
|
||||
// in between. `is_block_gap` assumes that terminator instructions have indexes of the form
|
||||
// block * 10 + 1. This is used in the coalesce test.
|
||||
struct ProgOrder {}
|
||||
|
||||
impl ProgramOrder for ProgOrder {
|
||||
fn cmp<A, B>(&self, a: A, b: B) -> Ordering
|
||||
where
|
||||
A: Into<ExpandedProgramPoint>,
|
||||
B: Into<ExpandedProgramPoint>,
|
||||
{
|
||||
fn idx(pp: ExpandedProgramPoint) -> usize {
|
||||
match pp {
|
||||
ExpandedProgramPoint::Inst(i) => i.index(),
|
||||
ExpandedProgramPoint::Block(e) => e.index(),
|
||||
}
|
||||
}
|
||||
|
||||
let ia = idx(a.into());
|
||||
let ib = idx(b.into());
|
||||
ia.cmp(&ib)
|
||||
}
|
||||
|
||||
fn is_block_gap(&self, inst: Inst, block: Block) -> bool {
|
||||
inst.index() % 10 == 1 && block.index() / 10 == inst.index() / 10 + 1
|
||||
}
|
||||
}
|
||||
|
||||
impl ProgOrder {
|
||||
// Get the block corresponding to `inst`.
|
||||
fn inst_block(&self, inst: Inst) -> Block {
|
||||
let i = inst.index();
|
||||
Block::new(i - i % 10)
|
||||
}
|
||||
|
||||
// Get the block of a program point.
|
||||
fn pp_block<PP: Into<ExpandedProgramPoint>>(&self, pp: PP) -> Block {
|
||||
match pp.into() {
|
||||
ExpandedProgramPoint::Inst(i) => self.inst_block(i),
|
||||
ExpandedProgramPoint::Block(e) => e,
|
||||
}
|
||||
}
|
||||
|
||||
// Validate the live range invariants.
|
||||
fn validate(&self, lr: &GenericLiveRange<Self>) {
|
||||
// The def interval must cover a single block.
|
||||
let def_block = self.pp_block(lr.def_begin);
|
||||
assert_eq!(def_block, self.pp_block(lr.def_end));
|
||||
|
||||
// Check that the def interval isn't backwards.
|
||||
match self.cmp(lr.def_begin, lr.def_end) {
|
||||
Ordering::Equal => assert!(lr.liveins.is_empty()),
|
||||
Ordering::Greater => {
|
||||
panic!("Backwards def interval: {}-{}", lr.def_begin, lr.def_end)
|
||||
}
|
||||
Ordering::Less => {}
|
||||
}
|
||||
|
||||
// Check the live-in intervals.
|
||||
let mut prev_end = None;
|
||||
for Interval { begin, end } in lr.liveins.iter() {
|
||||
let begin = *begin;
|
||||
let end = *end;
|
||||
|
||||
assert_eq!(self.cmp(begin, end), Ordering::Less);
|
||||
if let Some(e) = prev_end {
|
||||
assert_eq!(self.cmp(e, begin), Ordering::Less);
|
||||
}
|
||||
|
||||
assert!(
|
||||
self.cmp(lr.def_end, begin) == Ordering::Less
|
||||
|| self.cmp(lr.def_begin, end) == Ordering::Greater,
|
||||
"Interval can't overlap the def block"
|
||||
);
|
||||
|
||||
// Save for next round.
|
||||
prev_end = Some(end);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Singleton `ProgramOrder` for tests below.
|
||||
const PO: &'static ProgOrder = &ProgOrder {};
|
||||
|
||||
#[test]
|
||||
fn dead_def_range() {
|
||||
let v0 = Value::new(0);
|
||||
let e0 = Block::new(0);
|
||||
let i1 = Inst::new(1);
|
||||
let i2 = Inst::new(2);
|
||||
let e2 = Block::new(2);
|
||||
let lr = GenericLiveRange::new(v0, i1.into(), Default::default());
|
||||
assert!(lr.is_dead());
|
||||
assert!(lr.is_local());
|
||||
assert_eq!(lr.def(), i1.into());
|
||||
assert_eq!(lr.def_local_end(), i1.into());
|
||||
assert_eq!(lr.livein_local_end(e2, PO), None);
|
||||
PO.validate(&lr);
|
||||
|
||||
// A dead live range overlaps its own def program point.
|
||||
assert!(lr.overlaps_def(i1.into(), e0, PO));
|
||||
assert!(!lr.overlaps_def(i2.into(), e0, PO));
|
||||
assert!(!lr.overlaps_def(e0.into(), e0, PO));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dead_arg_range() {
|
||||
let v0 = Value::new(0);
|
||||
let e2 = Block::new(2);
|
||||
let lr = GenericLiveRange::new(v0, e2.into(), Default::default());
|
||||
assert!(lr.is_dead());
|
||||
assert!(lr.is_local());
|
||||
assert_eq!(lr.def(), e2.into());
|
||||
assert_eq!(lr.def_local_end(), e2.into());
|
||||
// The def interval of a block argument does not count as live-in.
|
||||
assert_eq!(lr.livein_local_end(e2, PO), None);
|
||||
PO.validate(&lr);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn local_def() {
|
||||
let v0 = Value::new(0);
|
||||
let e10 = Block::new(10);
|
||||
let i11 = Inst::new(11);
|
||||
let i12 = Inst::new(12);
|
||||
let i13 = Inst::new(13);
|
||||
let mut lr = GenericLiveRange::new(v0, i11.into(), Default::default());
|
||||
|
||||
assert_eq!(lr.extend_in_block(e10, i13, PO), false);
|
||||
PO.validate(&lr);
|
||||
assert!(!lr.is_dead());
|
||||
assert!(lr.is_local());
|
||||
assert_eq!(lr.def(), i11.into());
|
||||
assert_eq!(lr.def_local_end(), i13.into());
|
||||
|
||||
// Extending to an already covered inst should not change anything.
|
||||
assert_eq!(lr.extend_in_block(e10, i12, PO), false);
|
||||
PO.validate(&lr);
|
||||
assert_eq!(lr.def(), i11.into());
|
||||
assert_eq!(lr.def_local_end(), i13.into());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn local_arg() {
|
||||
let v0 = Value::new(0);
|
||||
let e10 = Block::new(10);
|
||||
let i11 = Inst::new(11);
|
||||
let i12 = Inst::new(12);
|
||||
let i13 = Inst::new(13);
|
||||
let mut lr = GenericLiveRange::new(v0, e10.into(), Default::default());
|
||||
|
||||
// Extending a dead block argument in its own block should not indicate that a live-in
|
||||
// interval was created.
|
||||
assert_eq!(lr.extend_in_block(e10, i12, PO), false);
|
||||
PO.validate(&lr);
|
||||
assert!(!lr.is_dead());
|
||||
assert!(lr.is_local());
|
||||
assert_eq!(lr.def(), e10.into());
|
||||
assert_eq!(lr.def_local_end(), i12.into());
|
||||
|
||||
// Extending to an already covered inst should not change anything.
|
||||
assert_eq!(lr.extend_in_block(e10, i11, PO), false);
|
||||
PO.validate(&lr);
|
||||
assert_eq!(lr.def(), e10.into());
|
||||
assert_eq!(lr.def_local_end(), i12.into());
|
||||
|
||||
// Extending further.
|
||||
assert_eq!(lr.extend_in_block(e10, i13, PO), false);
|
||||
PO.validate(&lr);
|
||||
assert_eq!(lr.def(), e10.into());
|
||||
assert_eq!(lr.def_local_end(), i13.into());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn global_def() {
|
||||
let v0 = Value::new(0);
|
||||
let e10 = Block::new(10);
|
||||
let i11 = Inst::new(11);
|
||||
let i12 = Inst::new(12);
|
||||
let e20 = Block::new(20);
|
||||
let i21 = Inst::new(21);
|
||||
let i22 = Inst::new(22);
|
||||
let i23 = Inst::new(23);
|
||||
let mut lr = GenericLiveRange::new(v0, i11.into(), Default::default());
|
||||
|
||||
assert_eq!(lr.extend_in_block(e10, i12, PO), false);
|
||||
|
||||
// Adding a live-in interval.
|
||||
assert_eq!(lr.extend_in_block(e20, i22, PO), true);
|
||||
PO.validate(&lr);
|
||||
assert_eq!(lr.livein_local_end(e20, PO), Some(i22));
|
||||
|
||||
// Non-extending the live-in.
|
||||
assert_eq!(lr.extend_in_block(e20, i21, PO), false);
|
||||
assert_eq!(lr.livein_local_end(e20, PO), Some(i22));
|
||||
|
||||
// Extending the existing live-in.
|
||||
assert_eq!(lr.extend_in_block(e20, i23, PO), false);
|
||||
PO.validate(&lr);
|
||||
assert_eq!(lr.livein_local_end(e20, PO), Some(i23));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn coalesce() {
|
||||
let v0 = Value::new(0);
|
||||
let i11 = Inst::new(11);
|
||||
let e20 = Block::new(20);
|
||||
let i21 = Inst::new(21);
|
||||
let e30 = Block::new(30);
|
||||
let i31 = Inst::new(31);
|
||||
let e40 = Block::new(40);
|
||||
let i41 = Inst::new(41);
|
||||
let mut lr = GenericLiveRange::new(v0, i11.into(), Default::default());
|
||||
|
||||
assert_eq!(lr.extend_in_block(e30, i31, PO,), true);
|
||||
assert_eq!(lr.liveins().collect::<Vec<_>>(), [(e30, i31)]);
|
||||
|
||||
// Coalesce to previous
|
||||
assert_eq!(lr.extend_in_block(e40, i41, PO,), true);
|
||||
assert_eq!(lr.liveins().collect::<Vec<_>>(), [(e30, i41)]);
|
||||
|
||||
// Coalesce to next
|
||||
assert_eq!(lr.extend_in_block(e20, i21, PO,), true);
|
||||
assert_eq!(lr.liveins().collect::<Vec<_>>(), [(e20, i41)]);
|
||||
|
||||
let mut lr = GenericLiveRange::new(v0, i11.into(), Default::default());
|
||||
|
||||
assert_eq!(lr.extend_in_block(e40, i41, PO,), true);
|
||||
assert_eq!(lr.liveins().collect::<Vec<_>>(), [(e40, i41)]);
|
||||
|
||||
assert_eq!(lr.extend_in_block(e20, i21, PO,), true);
|
||||
assert_eq!(lr.liveins().collect::<Vec<_>>(), [(e20, i21), (e40, i41)]);
|
||||
|
||||
// Coalesce to previous and next
|
||||
assert_eq!(lr.extend_in_block(e30, i31, PO,), true);
|
||||
assert_eq!(lr.liveins().collect::<Vec<_>>(), [(e20, i41)]);
|
||||
}
|
||||
}
|
||||
@@ -1,26 +0,0 @@
|
||||
//! Register allocation.
|
||||
//!
|
||||
//! This module contains data structures and algorithms used for register allocation.
|
||||
|
||||
pub mod coloring;
|
||||
pub mod live_value_tracker;
|
||||
pub mod liveness;
|
||||
pub mod liverange;
|
||||
pub mod register_set;
|
||||
pub mod virtregs;
|
||||
|
||||
mod affinity;
|
||||
mod branch_splitting;
|
||||
mod coalescing;
|
||||
mod context;
|
||||
mod diversion;
|
||||
mod pressure;
|
||||
mod reload;
|
||||
mod safepoint;
|
||||
mod solver;
|
||||
mod spilling;
|
||||
|
||||
pub use self::context::Context;
|
||||
pub use self::diversion::{EntryRegDiversions, RegDiversions};
|
||||
pub use self::register_set::RegisterSet;
|
||||
pub use self::safepoint::emit_stack_maps;
|
||||
@@ -1,371 +0,0 @@
|
||||
//! Register pressure tracking.
|
||||
//!
|
||||
//! SSA-based register allocation depends on a spilling phase that "lowers register pressure
|
||||
//! sufficiently". This module defines the data structures needed to measure register pressure
|
||||
//! accurately enough to guarantee that the coloring phase will not run out of registers.
|
||||
//!
|
||||
//! Ideally, measuring register pressure amounts to simply counting the number of live registers at
|
||||
//! any given program point. This simplistic method has two problems:
|
||||
//!
|
||||
//! 1. Registers are not interchangeable. Most ISAs have separate integer and floating-point
|
||||
//! register banks, so we need to at least count the number of live registers in each register
|
||||
//! bank separately.
|
||||
//!
|
||||
//! 2. Some ISAs have complicated register aliasing properties. In particular, the 32-bit ARM
|
||||
//! ISA has a floating-point register bank where two 32-bit registers alias one 64-bit register.
|
||||
//! This makes it difficult to accurately measure register pressure.
|
||||
//!
|
||||
//! This module deals with the problems via *register banks* and *top-level register classes*.
|
||||
//! Register classes in different register banks are completely independent, so we can count
|
||||
//! registers in one bank without worrying about the other bank at all.
|
||||
//!
|
||||
//! All register classes have a unique top-level register class, and we will count registers for
|
||||
//! each top-level register class individually. However, a register bank can have multiple
|
||||
//! top-level register classes that interfere with each other, so all top-level counts need to
|
||||
//! be considered when determining how many more registers can be allocated.
|
||||
//!
|
||||
//! Currently, the only register bank with multiple top-level registers is the `arm32`
|
||||
//! floating-point register bank which has `S`, `D`, and `Q` top-level classes.
|
||||
//!
|
||||
//! # Base and transient counts
|
||||
//!
|
||||
//! We maintain two separate register counts per top-level register class: base counts and
|
||||
//! transient counts. The base counts are adjusted with the `take` and `free` functions. The
|
||||
//! transient counts are adjusted with `take_transient` and `free_transient`.
|
||||
|
||||
// Remove once we're using the pressure tracker.
|
||||
#![allow(dead_code)]
|
||||
|
||||
use crate::isa::registers::{RegClass, RegClassMask, RegInfo};
|
||||
use crate::regalloc::RegisterSet;
|
||||
use core::cmp::min;
|
||||
use core::fmt;
|
||||
use core::iter::ExactSizeIterator;
|
||||
use cranelift_codegen_shared::constants::MAX_TRACKED_TOP_RCS;
|
||||
|
||||
/// Information per top-level register class.
|
||||
///
|
||||
/// Everything but the counts is static information computed from the constructor arguments.
|
||||
#[derive(Default)]
|
||||
struct TopRC {
|
||||
/// Number of registers currently used from this register class.
|
||||
base_count: u32,
|
||||
transient_count: u32,
|
||||
|
||||
/// Max number of registers that can be allocated.
|
||||
limit: u32,
|
||||
|
||||
/// Register units per register.
|
||||
width: u8,
|
||||
|
||||
/// The first aliasing top-level RC.
|
||||
first_toprc: u8,
|
||||
|
||||
/// The number of aliasing top-level RCs.
|
||||
num_toprcs: u8,
|
||||
}
|
||||
|
||||
impl TopRC {
|
||||
fn total_count(&self) -> u32 {
|
||||
self.base_count + self.transient_count
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Pressure {
|
||||
/// Bit mask of top-level register classes that are aliased by other top-level register classes.
|
||||
/// Unaliased register classes can use a simpler interference algorithm.
|
||||
aliased: RegClassMask,
|
||||
|
||||
/// Current register counts per top-level register class.
|
||||
toprc: [TopRC; MAX_TRACKED_TOP_RCS],
|
||||
}
|
||||
|
||||
impl Pressure {
|
||||
/// Create a new register pressure tracker.
|
||||
pub fn new(reginfo: &RegInfo, usable: &RegisterSet) -> Self {
|
||||
let mut p = Self {
|
||||
aliased: 0,
|
||||
toprc: Default::default(),
|
||||
};
|
||||
|
||||
// Get the layout of aliasing top-level register classes from the register banks.
|
||||
for bank in reginfo.banks {
|
||||
let first = bank.first_toprc;
|
||||
let num = bank.num_toprcs;
|
||||
|
||||
if bank.pressure_tracking {
|
||||
for rc in &mut p.toprc[first..first + num] {
|
||||
rc.first_toprc = first as u8;
|
||||
rc.num_toprcs = num as u8;
|
||||
}
|
||||
|
||||
// Flag the top-level register classes with aliases.
|
||||
if num > 1 {
|
||||
p.aliased |= ((1 << num) - 1) << first;
|
||||
}
|
||||
} else {
|
||||
// This bank has no pressure tracking, so its top-level register classes may exceed
|
||||
// `MAX_TRACKED_TOPRCS`. Fill in dummy entries.
|
||||
for rc in &mut p.toprc[first..min(first + num, MAX_TRACKED_TOP_RCS)] {
|
||||
// These aren't used if we don't set the `aliased` bit.
|
||||
rc.first_toprc = !0;
|
||||
rc.limit = !0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Compute per-class limits from `usable`.
|
||||
for (toprc, rc) in p
|
||||
.toprc
|
||||
.iter_mut()
|
||||
.take_while(|t| t.num_toprcs > 0)
|
||||
.zip(reginfo.classes)
|
||||
{
|
||||
toprc.limit = usable.iter(rc).len() as u32;
|
||||
toprc.width = rc.width;
|
||||
}
|
||||
|
||||
p
|
||||
}
|
||||
|
||||
/// Check for an available register in the register class `rc`.
|
||||
///
|
||||
/// If it is possible to allocate one more register from `rc`'s top-level register class,
|
||||
/// returns 0.
|
||||
///
|
||||
/// If not, returns a bit-mask of top-level register classes that are interfering. Register
|
||||
/// pressure should be eased in one of the returned top-level register classes before calling
|
||||
/// `can_take()` to check again.
|
||||
fn check_avail(&self, rc: RegClass) -> RegClassMask {
|
||||
let entry = match self.toprc.get(rc.toprc as usize) {
|
||||
None => return 0, // Not a pressure tracked bank.
|
||||
Some(e) => e,
|
||||
};
|
||||
let mask = 1 << rc.toprc;
|
||||
if (self.aliased & mask) == 0 {
|
||||
// This is a simple unaliased top-level register class.
|
||||
if entry.total_count() < entry.limit {
|
||||
0
|
||||
} else {
|
||||
mask
|
||||
}
|
||||
} else {
|
||||
// This is the more complicated case. The top-level register class has aliases.
|
||||
self.check_avail_aliased(entry)
|
||||
}
|
||||
}
|
||||
|
||||
/// Check for an available register in a top-level register class that may have aliases.
|
||||
///
|
||||
/// This is the out-of-line slow path for `check_avail()`.
|
||||
fn check_avail_aliased(&self, entry: &TopRC) -> RegClassMask {
|
||||
let first = usize::from(entry.first_toprc);
|
||||
let num = usize::from(entry.num_toprcs);
|
||||
let width = u32::from(entry.width);
|
||||
let ulimit = entry.limit * width;
|
||||
|
||||
// Count up the number of available register units.
|
||||
let mut units = 0;
|
||||
for (rc, rci) in self.toprc[first..first + num].iter().zip(first..) {
|
||||
let rcw = u32::from(rc.width);
|
||||
// If `rc.width` is smaller than `width`, each register in `rc` could potentially block
|
||||
// one of ours. This is assuming that none of the smaller registers are straddling the
|
||||
// bigger ones.
|
||||
//
|
||||
// If `rc.width` is larger than `width`, we are also assuming that the registers are
|
||||
// aligned and `rc.width` is a multiple of `width`.
|
||||
let u = if rcw < width {
|
||||
// We can't take more than the total number of register units in the class.
|
||||
// This matters for arm32 S-registers which can only ever lock out 16 D-registers.
|
||||
min(rc.total_count() * width, rc.limit * rcw)
|
||||
} else {
|
||||
rc.total_count() * rcw
|
||||
};
|
||||
|
||||
// If this top-level RC on its own is responsible for exceeding our limit, return it
|
||||
// early to guarantee that registers here are spilled before spilling other registers
|
||||
// unnecessarily.
|
||||
if u >= ulimit {
|
||||
return 1 << rci;
|
||||
}
|
||||
|
||||
units += u;
|
||||
}
|
||||
|
||||
// We've counted up the worst-case number of register units claimed by all aliasing
|
||||
// classes. Compare to the unit limit in this class.
|
||||
if units < ulimit {
|
||||
0
|
||||
} else {
|
||||
// Registers need to be spilled from any one of the aliasing classes.
|
||||
((1 << num) - 1) << first
|
||||
}
|
||||
}
|
||||
|
||||
/// Take a register from `rc`.
|
||||
///
|
||||
/// This does not check if there are enough registers available.
|
||||
pub fn take(&mut self, rc: RegClass) {
|
||||
if let Some(t) = self.toprc.get_mut(rc.toprc as usize) {
|
||||
t.base_count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
/// Free a register in `rc`.
|
||||
pub fn free(&mut self, rc: RegClass) {
|
||||
if let Some(t) = self.toprc.get_mut(rc.toprc as usize) {
|
||||
t.base_count -= 1;
|
||||
}
|
||||
}
|
||||
|
||||
/// Reset all counts to 0, both base and transient.
|
||||
pub fn reset(&mut self) {
|
||||
for e in &mut self.toprc {
|
||||
e.base_count = 0;
|
||||
e.transient_count = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/// Try to increment a transient counter.
|
||||
///
|
||||
/// This will fail if there are not enough registers available.
|
||||
pub fn take_transient(&mut self, rc: RegClass) -> Result<(), RegClassMask> {
|
||||
let mask = self.check_avail(rc);
|
||||
if mask == 0 {
|
||||
if let Some(t) = self.toprc.get_mut(rc.toprc as usize) {
|
||||
t.transient_count += 1;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
} else {
|
||||
Err(mask)
|
||||
}
|
||||
}
|
||||
|
||||
/// Reset all transient counts to 0.
|
||||
pub fn reset_transient(&mut self) {
|
||||
for e in &mut self.toprc {
|
||||
e.transient_count = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/// Preserve the transient counts by transferring them to the base counts.
|
||||
pub fn preserve_transient(&mut self) {
|
||||
for e in &mut self.toprc {
|
||||
e.base_count += e.transient_count;
|
||||
e.transient_count = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Pressure {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "Pressure[")?;
|
||||
for rc in &self.toprc {
|
||||
if rc.limit > 0 && rc.limit < !0 {
|
||||
write!(f, " {}+{}/{}", rc.base_count, rc.transient_count, rc.limit)?;
|
||||
}
|
||||
}
|
||||
write!(f, " ]")
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[cfg(feature = "arm32")]
|
||||
mod tests {
|
||||
use super::Pressure;
|
||||
use crate::isa::registers::{RegBank, RegClassData};
|
||||
use crate::isa::{RegClass, RegInfo, RegUnit};
|
||||
use crate::regalloc::RegisterSet;
|
||||
use core::borrow::Borrow;
|
||||
|
||||
// Arm32 `TargetIsa` is now `TargetIsaAdapter`, which does not hold any info
|
||||
// about registers, so we directly access `INFO` from registers-arm32.rs.
|
||||
include!(concat!(env!("OUT_DIR"), "/registers-arm32.rs"));
|
||||
|
||||
// Get a register class by name.
|
||||
fn rc_by_name(reginfo: &RegInfo, name: &str) -> RegClass {
|
||||
reginfo
|
||||
.classes
|
||||
.iter()
|
||||
.find(|rc| rc.name == name)
|
||||
.expect("Can't find named register class.")
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn basic_counting() {
|
||||
let reginfo = INFO.borrow();
|
||||
let gpr = rc_by_name(®info, "GPR");
|
||||
let s = rc_by_name(®info, "S");
|
||||
|
||||
let regs = RegisterSet::new();
|
||||
|
||||
let mut pressure = Pressure::new(®info, ®s);
|
||||
let mut count = 0;
|
||||
while pressure.check_avail(gpr) == 0 {
|
||||
pressure.take(gpr);
|
||||
count += 1;
|
||||
}
|
||||
assert_eq!(count, 16);
|
||||
assert_eq!(pressure.check_avail(gpr), 1 << gpr.toprc);
|
||||
assert_eq!(pressure.check_avail(s), 0);
|
||||
pressure.free(gpr);
|
||||
assert_eq!(pressure.check_avail(gpr), 0);
|
||||
pressure.take(gpr);
|
||||
assert_eq!(pressure.check_avail(gpr), 1 << gpr.toprc);
|
||||
assert_eq!(pressure.check_avail(s), 0);
|
||||
pressure.reset();
|
||||
assert_eq!(pressure.check_avail(gpr), 0);
|
||||
assert_eq!(pressure.check_avail(s), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn arm_float_bank() {
|
||||
let reginfo = INFO.borrow();
|
||||
let s = rc_by_name(®info, "S");
|
||||
let d = rc_by_name(®info, "D");
|
||||
let q = rc_by_name(®info, "Q");
|
||||
let regs = RegisterSet::new();
|
||||
|
||||
let mut pressure = Pressure::new(®info, ®s);
|
||||
assert_eq!(pressure.check_avail(s), 0);
|
||||
assert_eq!(pressure.check_avail(d), 0);
|
||||
assert_eq!(pressure.check_avail(q), 0);
|
||||
|
||||
// Allocating a single S-register should not affect availability.
|
||||
pressure.take(s);
|
||||
assert_eq!(pressure.check_avail(s), 0);
|
||||
assert_eq!(pressure.check_avail(d), 0);
|
||||
assert_eq!(pressure.check_avail(q), 0);
|
||||
|
||||
pressure.take(d);
|
||||
assert_eq!(pressure.check_avail(s), 0);
|
||||
assert_eq!(pressure.check_avail(d), 0);
|
||||
assert_eq!(pressure.check_avail(q), 0);
|
||||
|
||||
pressure.take(q);
|
||||
assert_eq!(pressure.check_avail(s), 0);
|
||||
assert_eq!(pressure.check_avail(d), 0);
|
||||
assert_eq!(pressure.check_avail(q), 0);
|
||||
|
||||
// Take a total of 16 S-regs.
|
||||
for _ in 1..16 {
|
||||
pressure.take(s);
|
||||
}
|
||||
assert_eq!(pressure.check_avail(s), 0);
|
||||
assert_eq!(pressure.check_avail(d), 0);
|
||||
assert_eq!(pressure.check_avail(q), 0);
|
||||
|
||||
// We've taken 16 S, 1 D, and 1 Q. There should be 6 more Qs.
|
||||
for _ in 0..6 {
|
||||
assert_eq!(pressure.check_avail(d), 0);
|
||||
assert_eq!(pressure.check_avail(q), 0);
|
||||
pressure.take(q);
|
||||
}
|
||||
|
||||
// We've taken 16 S, 1 D, and 7 Qs.
|
||||
assert!(pressure.check_avail(s) != 0);
|
||||
assert_eq!(pressure.check_avail(d), 0);
|
||||
assert!(pressure.check_avail(q) != 0);
|
||||
}
|
||||
}
|
||||
@@ -1,391 +0,0 @@
|
||||
//! Set of allocatable registers as a bit vector of register units.
|
||||
//!
|
||||
//! While allocating registers, we need to keep track of which registers are available and which
|
||||
//! registers are in use. Since registers can alias in different ways, we track this via the
|
||||
//! "register unit" abstraction. Every register contains one or more register units. Registers that
|
||||
//! share a register unit can't be in use at the same time.
|
||||
|
||||
use crate::isa::registers::{RegClass, RegInfo, RegUnit, RegUnitMask};
|
||||
use core::char;
|
||||
use core::fmt;
|
||||
use core::iter::ExactSizeIterator;
|
||||
use core::mem::size_of_val;
|
||||
|
||||
/// Set of registers available for allocation.
|
||||
#[derive(Clone)]
|
||||
pub struct RegisterSet {
|
||||
avail: RegUnitMask,
|
||||
}
|
||||
|
||||
// Given a register class and a register unit in the class, compute a word index and a bit mask of
|
||||
// register units representing that register.
|
||||
//
|
||||
// Note that a register is not allowed to straddle words.
|
||||
fn bitmask(rc: RegClass, reg: RegUnit) -> (usize, u32) {
|
||||
// Bit mask representing the register. It is `rc.width` consecutive units.
|
||||
let width_bits = (1 << rc.width) - 1;
|
||||
// Index into avail[] of the word containing `reg`.
|
||||
let word_index = (reg / 32) as usize;
|
||||
// The actual bits in the word that cover `reg`.
|
||||
let reg_bits = width_bits << (reg % 32);
|
||||
|
||||
(word_index, reg_bits)
|
||||
}
|
||||
|
||||
impl RegisterSet {
|
||||
/// Create a new register set with all registers available.
|
||||
///
|
||||
/// Note that this includes *all* registers. Query the `TargetIsa` object to get a set of
|
||||
/// allocatable registers where reserved registers have been filtered out.
|
||||
pub fn new() -> Self {
|
||||
Self { avail: [!0; 3] }
|
||||
}
|
||||
|
||||
/// Create a new register set with no registers available.
|
||||
pub fn empty() -> Self {
|
||||
Self { avail: [0; 3] }
|
||||
}
|
||||
|
||||
/// Returns `true` if the specified register is available.
|
||||
pub fn is_avail(&self, rc: RegClass, reg: RegUnit) -> bool {
|
||||
let (idx, bits) = bitmask(rc, reg);
|
||||
(self.avail[idx] & bits) == bits
|
||||
}
|
||||
|
||||
/// Allocate `reg` from `rc` so it is no longer available.
|
||||
///
|
||||
/// It is an error to take a register that doesn't have all of its register units available.
|
||||
pub fn take(&mut self, rc: RegClass, reg: RegUnit) {
|
||||
let (idx, bits) = bitmask(rc, reg);
|
||||
debug_assert!(
|
||||
(self.avail[idx] & bits) == bits,
|
||||
"{}:{} not available in {}",
|
||||
rc,
|
||||
rc.info.display_regunit(reg),
|
||||
self.display(rc.info)
|
||||
);
|
||||
self.avail[idx] &= !bits;
|
||||
}
|
||||
|
||||
/// Return `reg` and all of its register units to the set of available registers.
|
||||
pub fn free(&mut self, rc: RegClass, reg: RegUnit) {
|
||||
let (idx, bits) = bitmask(rc, reg);
|
||||
debug_assert!(
|
||||
(self.avail[idx] & bits) == 0,
|
||||
"{}:{} is already free in {}",
|
||||
rc,
|
||||
rc.info.display_regunit(reg),
|
||||
self.display(rc.info)
|
||||
);
|
||||
self.avail[idx] |= bits;
|
||||
}
|
||||
|
||||
/// Return an iterator over all available registers belonging to the register class `rc`.
|
||||
///
|
||||
/// This doesn't allocate anything from the set; use `take()` for that.
|
||||
pub fn iter(&self, rc: RegClass) -> RegSetIter {
|
||||
// Start by copying the RC mask. It is a single set bit for each register in the class.
|
||||
let mut rsi = RegSetIter { regs: rc.mask };
|
||||
|
||||
// Mask out the unavailable units.
|
||||
for idx in 0..self.avail.len() {
|
||||
// If a single unit in a register is unavailable, the whole register can't be used. If
|
||||
// a register straddles a word boundary, it will be marked as unavailable. There's an
|
||||
// assertion in `cranelift-codegen/meta/src/cdsl/regs.rs` to check for that.
|
||||
for i in 0..rc.width {
|
||||
rsi.regs[idx] &= self.avail[idx] >> i;
|
||||
}
|
||||
}
|
||||
rsi
|
||||
}
|
||||
|
||||
/// Check if any register units allocated out of this set interferes with units allocated out
|
||||
/// of `other`.
|
||||
///
|
||||
/// This assumes that unused bits are 1.
|
||||
pub fn interferes_with(&self, other: &Self) -> bool {
|
||||
self.avail
|
||||
.iter()
|
||||
.zip(&other.avail)
|
||||
.any(|(&x, &y)| (x | y) != !0)
|
||||
}
|
||||
|
||||
/// Intersect this set of registers with `other`. This has the effect of removing any register
|
||||
/// units from this set that are not in `other`.
|
||||
pub fn intersect(&mut self, other: &Self) {
|
||||
for (x, &y) in self.avail.iter_mut().zip(&other.avail) {
|
||||
*x &= y;
|
||||
}
|
||||
}
|
||||
|
||||
/// Return an object that can display this register set, using the register info from the
|
||||
/// target ISA.
|
||||
pub fn display<'a, R: Into<Option<&'a RegInfo>>>(&self, regs: R) -> DisplayRegisterSet<'a> {
|
||||
DisplayRegisterSet(self.clone(), regs.into())
|
||||
}
|
||||
}
|
||||
|
||||
/// Iterator over available registers in a register class.
|
||||
#[derive(Clone)]
|
||||
pub struct RegSetIter {
|
||||
regs: RegUnitMask,
|
||||
}
|
||||
|
||||
impl Iterator for RegSetIter {
|
||||
type Item = RegUnit;
|
||||
|
||||
fn next(&mut self) -> Option<RegUnit> {
|
||||
let mut unit_offset = 0;
|
||||
|
||||
// Find the first set bit in `self.regs`.
|
||||
for word in &mut self.regs {
|
||||
if *word != 0 {
|
||||
// Compute the register unit number from the lowest set bit in the word.
|
||||
let unit = unit_offset + word.trailing_zeros() as RegUnit;
|
||||
|
||||
// Clear that lowest bit so we won't find it again.
|
||||
*word &= *word - 1;
|
||||
|
||||
return Some(unit);
|
||||
}
|
||||
// How many register units was there in the word? This is a constant 32 for `u32` etc.
|
||||
unit_offset += 8 * size_of_val(word) as RegUnit;
|
||||
}
|
||||
|
||||
// All of `self.regs` is 0.
|
||||
None
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
let bits = self.regs.iter().map(|&w| w.count_ones() as usize).sum();
|
||||
(bits, Some(bits))
|
||||
}
|
||||
}
|
||||
|
||||
impl RegSetIter {
|
||||
pub fn rnext(&mut self) -> Option<RegUnit> {
|
||||
let num_words = self.regs.len();
|
||||
let bits_per_word = 8 * size_of_val(&self.regs[0]);
|
||||
|
||||
// Find the last set bit in `self.regs`.
|
||||
for i in 0..num_words {
|
||||
let word_ix = num_words - 1 - i;
|
||||
|
||||
let word = &mut self.regs[word_ix];
|
||||
if *word != 0 {
|
||||
let lzeroes = word.leading_zeros() as usize;
|
||||
|
||||
// Clear that highest bit so we won't find it again.
|
||||
*word &= !(1 << (bits_per_word - 1 - lzeroes));
|
||||
|
||||
return Some((word_ix * bits_per_word + bits_per_word - 1 - lzeroes) as RegUnit);
|
||||
}
|
||||
}
|
||||
|
||||
// All of `self.regs` is 0.
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl ExactSizeIterator for RegSetIter {}
|
||||
|
||||
/// Displaying an `RegisterSet` correctly requires the associated `RegInfo` from the target ISA.
|
||||
pub struct DisplayRegisterSet<'a>(RegisterSet, Option<&'a RegInfo>);
|
||||
|
||||
impl<'a> fmt::Display for DisplayRegisterSet<'a> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "[")?;
|
||||
match self.1 {
|
||||
None => {
|
||||
for w in &self.0.avail {
|
||||
write!(f, " #{:08x}", w)?;
|
||||
}
|
||||
}
|
||||
Some(reginfo) => {
|
||||
let toprcs = reginfo
|
||||
.banks
|
||||
.iter()
|
||||
.map(|b| b.first_toprc + b.num_toprcs)
|
||||
.max()
|
||||
.expect("No register banks");
|
||||
for rc in ®info.classes[0..toprcs] {
|
||||
if rc.width == 1 {
|
||||
let bank = ®info.banks[rc.bank as usize];
|
||||
write!(f, " {}: ", rc)?;
|
||||
for offset in 0..bank.units {
|
||||
let reg = bank.first_unit + offset;
|
||||
if !rc.contains(reg) {
|
||||
continue;
|
||||
}
|
||||
if !self.0.is_avail(rc, reg) {
|
||||
write!(f, "-")?;
|
||||
continue;
|
||||
}
|
||||
// Display individual registers as either the second letter of their
|
||||
// name or the last digit of their number.
|
||||
// This works for x86 (rax, rbx, ...) and for numbered regs.
|
||||
write!(
|
||||
f,
|
||||
"{}",
|
||||
bank.names
|
||||
.get(offset as usize)
|
||||
.and_then(|name| name.chars().nth(1))
|
||||
.unwrap_or_else(|| char::from_digit(
|
||||
u32::from(offset % 10),
|
||||
10
|
||||
)
|
||||
.unwrap())
|
||||
)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
write!(f, " ]")
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for RegisterSet {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
self.display(None).fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::isa::registers::{RegClass, RegClassData};
|
||||
use alloc::vec::Vec;
|
||||
|
||||
// Register classes for testing.
|
||||
const GPR: RegClass = &RegClassData {
|
||||
name: "GPR",
|
||||
index: 0,
|
||||
width: 1,
|
||||
bank: 0,
|
||||
toprc: 0,
|
||||
first: 28,
|
||||
subclasses: 0,
|
||||
mask: [0xf0000000, 0x0000000f, 0],
|
||||
info: &INFO,
|
||||
pinned_reg: None,
|
||||
};
|
||||
|
||||
const DPR: RegClass = &RegClassData {
|
||||
name: "DPR",
|
||||
index: 0,
|
||||
width: 2,
|
||||
bank: 0,
|
||||
toprc: 0,
|
||||
first: 28,
|
||||
subclasses: 0,
|
||||
mask: [0x50000000, 0x0000000a, 0],
|
||||
info: &INFO,
|
||||
pinned_reg: None,
|
||||
};
|
||||
|
||||
const INFO: RegInfo = RegInfo {
|
||||
banks: &[],
|
||||
classes: &[],
|
||||
};
|
||||
|
||||
const RSI_1: RegSetIter = RegSetIter {
|
||||
regs: [0x31415927, 0x27182818, 0x14141356],
|
||||
};
|
||||
|
||||
const RSI_2: RegSetIter = RegSetIter {
|
||||
regs: [0x00000000, 0x00000000, 0x00000000],
|
||||
};
|
||||
|
||||
const RSI_3: RegSetIter = RegSetIter {
|
||||
regs: [0xffffffff, 0xffffffff, 0xffffffff],
|
||||
};
|
||||
|
||||
fn reverse_regset_iteration_work(rsi: &RegSetIter) {
|
||||
// Check the reverse iterator by comparing its output with the forward iterator.
|
||||
let rsi_f = (*rsi).clone();
|
||||
let results_f = rsi_f.collect::<Vec<_>>();
|
||||
|
||||
let mut rsi_r = (*rsi).clone();
|
||||
let mut results_r = Vec::<RegUnit>::new();
|
||||
while let Some(r) = rsi_r.rnext() {
|
||||
results_r.push(r);
|
||||
}
|
||||
|
||||
let len_f = results_f.len();
|
||||
let len_r = results_r.len();
|
||||
assert_eq!(len_f, len_r);
|
||||
|
||||
for i in 0..len_f {
|
||||
assert_eq!(results_f[i], results_r[len_f - 1 - i]);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reverse_regset_iteration() {
|
||||
reverse_regset_iteration_work(&RSI_1);
|
||||
reverse_regset_iteration_work(&RSI_2);
|
||||
reverse_regset_iteration_work(&RSI_3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn put_and_take() {
|
||||
let mut regs = RegisterSet::new();
|
||||
|
||||
// `GPR` has units 28-36.
|
||||
assert_eq!(regs.iter(GPR).len(), 8);
|
||||
assert_eq!(regs.iter(GPR).count(), 8);
|
||||
assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [28, 30, 33, 35]);
|
||||
|
||||
assert!(regs.is_avail(GPR, 29));
|
||||
regs.take(&GPR, 29);
|
||||
assert!(!regs.is_avail(GPR, 29));
|
||||
|
||||
assert_eq!(regs.iter(GPR).count(), 7);
|
||||
assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [30, 33, 35]);
|
||||
|
||||
assert!(regs.is_avail(GPR, 30));
|
||||
regs.take(&GPR, 30);
|
||||
assert!(!regs.is_avail(GPR, 30));
|
||||
|
||||
assert_eq!(regs.iter(GPR).count(), 6);
|
||||
assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [33, 35]);
|
||||
|
||||
assert!(regs.is_avail(GPR, 32));
|
||||
regs.take(&GPR, 32);
|
||||
assert!(!regs.is_avail(GPR, 32));
|
||||
|
||||
assert_eq!(regs.iter(GPR).count(), 5);
|
||||
assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [33, 35]);
|
||||
|
||||
regs.free(&GPR, 30);
|
||||
assert!(regs.is_avail(GPR, 30));
|
||||
assert!(!regs.is_avail(GPR, 29));
|
||||
assert!(!regs.is_avail(GPR, 32));
|
||||
|
||||
assert_eq!(regs.iter(GPR).count(), 6);
|
||||
assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [30, 33, 35]);
|
||||
|
||||
regs.free(&GPR, 32);
|
||||
assert!(regs.is_avail(GPR, 31));
|
||||
assert!(!regs.is_avail(GPR, 29));
|
||||
assert!(regs.is_avail(GPR, 32));
|
||||
|
||||
assert_eq!(regs.iter(GPR).count(), 7);
|
||||
assert_eq!(regs.iter(DPR).collect::<Vec<_>>(), [30, 33, 35]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn interference() {
|
||||
let mut regs1 = RegisterSet::new();
|
||||
let mut regs2 = RegisterSet::new();
|
||||
|
||||
assert!(!regs1.interferes_with(®s2));
|
||||
regs1.take(&GPR, 32);
|
||||
assert!(!regs1.interferes_with(®s2));
|
||||
regs2.take(&GPR, 31);
|
||||
assert!(!regs1.interferes_with(®s2));
|
||||
regs1.intersect(®s2);
|
||||
assert!(regs1.interferes_with(®s2));
|
||||
}
|
||||
}
|
||||
@@ -1,484 +0,0 @@
|
||||
//! Reload pass
|
||||
//!
|
||||
//! The reload pass runs between the spilling and coloring passes. Its primary responsibility is to
|
||||
//! insert `spill` and `fill` instructions such that instruction operands expecting a register will
|
||||
//! get a value with register affinity, and operands expecting a stack slot will get a value with
|
||||
//! stack affinity.
|
||||
//!
|
||||
//! The secondary responsibility of the reload pass is to reuse values in registers as much as
|
||||
//! possible to minimize the number of `fill` instructions needed. This must not cause the register
|
||||
//! pressure limits to be exceeded.
|
||||
|
||||
use crate::cursor::{Cursor, EncCursor};
|
||||
use crate::dominator_tree::DominatorTree;
|
||||
use crate::entity::{SparseMap, SparseMapValue};
|
||||
use crate::ir::{AbiParam, ArgumentLoc, InstBuilder};
|
||||
use crate::ir::{Block, Function, Inst, InstructionData, Opcode, Value, ValueLoc};
|
||||
use crate::isa::RegClass;
|
||||
use crate::isa::{ConstraintKind, EncInfo, Encoding, RecipeConstraints, TargetIsa};
|
||||
use crate::regalloc::affinity::Affinity;
|
||||
use crate::regalloc::live_value_tracker::{LiveValue, LiveValueTracker};
|
||||
use crate::regalloc::liveness::Liveness;
|
||||
use crate::timing;
|
||||
use crate::topo_order::TopoOrder;
|
||||
use alloc::vec::Vec;
|
||||
|
||||
/// Reusable data structures for the reload pass.
|
||||
pub struct Reload {
|
||||
candidates: Vec<ReloadCandidate>,
|
||||
reloads: SparseMap<Value, ReloadedValue>,
|
||||
}
|
||||
|
||||
/// Context data structure that gets instantiated once per pass.
|
||||
struct Context<'a> {
|
||||
cur: EncCursor<'a>,
|
||||
|
||||
// Cached ISA information.
|
||||
// We save it here to avoid frequent virtual function calls on the `TargetIsa` trait object.
|
||||
encinfo: EncInfo,
|
||||
|
||||
// References to contextual data structures we need.
|
||||
domtree: &'a DominatorTree,
|
||||
liveness: &'a mut Liveness,
|
||||
topo: &'a mut TopoOrder,
|
||||
|
||||
candidates: &'a mut Vec<ReloadCandidate>,
|
||||
reloads: &'a mut SparseMap<Value, ReloadedValue>,
|
||||
}
|
||||
|
||||
impl Reload {
|
||||
/// Create a new blank reload pass.
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
candidates: Vec::new(),
|
||||
reloads: SparseMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Clear all data structures in this reload pass.
|
||||
pub fn clear(&mut self) {
|
||||
self.candidates.clear();
|
||||
self.reloads.clear();
|
||||
}
|
||||
|
||||
/// Run the reload algorithm over `func`.
|
||||
pub fn run(
|
||||
&mut self,
|
||||
isa: &dyn TargetIsa,
|
||||
func: &mut Function,
|
||||
domtree: &DominatorTree,
|
||||
liveness: &mut Liveness,
|
||||
topo: &mut TopoOrder,
|
||||
tracker: &mut LiveValueTracker,
|
||||
) {
|
||||
let _tt = timing::ra_reload();
|
||||
log::trace!("Reload for:\n{}", func.display(isa));
|
||||
let mut ctx = Context {
|
||||
cur: EncCursor::new(func, isa),
|
||||
encinfo: isa.encoding_info(),
|
||||
domtree,
|
||||
liveness,
|
||||
topo,
|
||||
candidates: &mut self.candidates,
|
||||
reloads: &mut self.reloads,
|
||||
};
|
||||
ctx.run(tracker)
|
||||
}
|
||||
}
|
||||
|
||||
/// A reload candidate.
|
||||
///
|
||||
/// This represents a stack value that is used by the current instruction where a register is
|
||||
/// needed.
|
||||
struct ReloadCandidate {
|
||||
argidx: usize,
|
||||
value: Value,
|
||||
regclass: RegClass,
|
||||
}
|
||||
|
||||
/// A Reloaded value.
|
||||
///
|
||||
/// This represents a value that has been reloaded into a register value from the stack.
|
||||
struct ReloadedValue {
|
||||
stack: Value,
|
||||
reg: Value,
|
||||
}
|
||||
|
||||
impl SparseMapValue<Value> for ReloadedValue {
|
||||
fn key(&self) -> Value {
|
||||
self.stack
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Context<'a> {
|
||||
fn run(&mut self, tracker: &mut LiveValueTracker) {
|
||||
self.topo.reset(self.cur.func.layout.blocks());
|
||||
while let Some(block) = self.topo.next(&self.cur.func.layout, self.domtree) {
|
||||
self.visit_block(block, tracker);
|
||||
}
|
||||
}
|
||||
|
||||
fn visit_block(&mut self, block: Block, tracker: &mut LiveValueTracker) {
|
||||
log::trace!("Reloading {}:", block);
|
||||
self.visit_block_header(block, tracker);
|
||||
tracker.drop_dead_params();
|
||||
|
||||
// visit_block_header() places us at the first interesting instruction in the block.
|
||||
while let Some(inst) = self.cur.current_inst() {
|
||||
if !self.cur.func.dfg[inst].opcode().is_ghost() {
|
||||
// This instruction either has an encoding or has ABI constraints, so visit it to
|
||||
// insert spills and fills as needed.
|
||||
let encoding = self.cur.func.encodings[inst];
|
||||
self.visit_inst(block, inst, encoding, tracker);
|
||||
tracker.drop_dead(inst);
|
||||
} else {
|
||||
// This is a ghost instruction with no encoding and no extra constraints, so we can
|
||||
// just skip over it.
|
||||
self.cur.next_inst();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Process the block parameters. Move to the next instruction in the block to be processed
|
||||
fn visit_block_header(&mut self, block: Block, tracker: &mut LiveValueTracker) {
|
||||
let (liveins, args) = tracker.block_top(
|
||||
block,
|
||||
&self.cur.func.dfg,
|
||||
self.liveness,
|
||||
&self.cur.func.layout,
|
||||
self.domtree,
|
||||
);
|
||||
|
||||
if self.cur.func.layout.entry_block() == Some(block) {
|
||||
debug_assert_eq!(liveins.len(), 0);
|
||||
self.visit_entry_params(block, args);
|
||||
} else {
|
||||
self.visit_block_params(block, args);
|
||||
}
|
||||
}
|
||||
|
||||
/// Visit the parameters on the entry block.
|
||||
/// These values have ABI constraints from the function signature.
|
||||
fn visit_entry_params(&mut self, block: Block, args: &[LiveValue]) {
|
||||
debug_assert_eq!(self.cur.func.signature.params.len(), args.len());
|
||||
self.cur.goto_first_inst(block);
|
||||
|
||||
for (arg_idx, arg) in args.iter().enumerate() {
|
||||
let abi = self.cur.func.signature.params[arg_idx];
|
||||
match abi.location {
|
||||
ArgumentLoc::Reg(_) => {
|
||||
if arg.affinity.is_stack() {
|
||||
// An incoming register parameter was spilled. Replace the parameter value
|
||||
// with a temporary register value that is immediately spilled.
|
||||
let reg = self
|
||||
.cur
|
||||
.func
|
||||
.dfg
|
||||
.replace_block_param(arg.value, abi.value_type);
|
||||
let affinity = Affinity::abi(&abi, self.cur.isa);
|
||||
self.liveness.create_dead(reg, block, affinity);
|
||||
self.insert_spill(block, arg.value, reg);
|
||||
}
|
||||
}
|
||||
ArgumentLoc::Stack(_) => {
|
||||
debug_assert!(arg.affinity.is_stack());
|
||||
}
|
||||
ArgumentLoc::Unassigned => panic!("Unexpected ABI location"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn visit_block_params(&mut self, block: Block, _args: &[LiveValue]) {
|
||||
self.cur.goto_first_inst(block);
|
||||
}
|
||||
|
||||
/// Process the instruction pointed to by `pos`, and advance the cursor to the next instruction
|
||||
/// that needs processing.
|
||||
fn visit_inst(
|
||||
&mut self,
|
||||
block: Block,
|
||||
inst: Inst,
|
||||
encoding: Encoding,
|
||||
tracker: &mut LiveValueTracker,
|
||||
) {
|
||||
self.cur.use_srcloc(inst);
|
||||
|
||||
// Get the operand constraints for `inst` that we are trying to satisfy.
|
||||
let constraints = self.encinfo.operand_constraints(encoding);
|
||||
|
||||
// Identify reload candidates.
|
||||
debug_assert!(self.candidates.is_empty());
|
||||
self.find_candidates(inst, constraints);
|
||||
|
||||
// If we find a copy from a stack slot to the same stack slot, replace
|
||||
// it with a `copy_nop` but otherwise ignore it. In particular, don't
|
||||
// generate a reload immediately followed by a spill. The `copy_nop`
|
||||
// has a zero-length encoding, so will disappear at emission time.
|
||||
if let InstructionData::Unary {
|
||||
opcode: Opcode::Copy,
|
||||
arg,
|
||||
} = self.cur.func.dfg[inst]
|
||||
{
|
||||
let dst_vals = self.cur.func.dfg.inst_results(inst);
|
||||
if dst_vals.len() == 1 {
|
||||
let dst_val = dst_vals[0];
|
||||
let can_transform = match (
|
||||
self.cur.func.locations[arg],
|
||||
self.cur.func.locations[dst_val],
|
||||
) {
|
||||
(ValueLoc::Stack(src_slot), ValueLoc::Stack(dst_slot)) => {
|
||||
src_slot == dst_slot && {
|
||||
let src_ty = self.cur.func.dfg.value_type(arg);
|
||||
let dst_ty = self.cur.func.dfg.value_type(dst_val);
|
||||
debug_assert!(src_ty == dst_ty);
|
||||
// This limits the transformation to copies of the
|
||||
// types: I128 I64 I32 I16 I8 F64 and F32, since that's
|
||||
// the set of `copy_nop` encodings available.
|
||||
src_ty.is_int() || src_ty.is_float()
|
||||
}
|
||||
}
|
||||
_ => false,
|
||||
};
|
||||
if can_transform {
|
||||
// Convert the instruction into a `copy_nop`.
|
||||
self.cur.func.dfg.replace(inst).copy_nop(arg);
|
||||
let ok = self.cur.func.update_encoding(inst, self.cur.isa).is_ok();
|
||||
debug_assert!(ok, "copy_nop encoding missing for this type");
|
||||
|
||||
// And move on to the next insn.
|
||||
self.reloads.clear();
|
||||
let _ = tracker.process_inst(inst, &self.cur.func.dfg, self.liveness);
|
||||
self.cur.next_inst();
|
||||
self.candidates.clear();
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Deal with all instructions not special-cased by the immediately
|
||||
// preceding fragment.
|
||||
if let InstructionData::Unary {
|
||||
opcode: Opcode::Copy,
|
||||
..
|
||||
} = self.cur.func.dfg[inst]
|
||||
{
|
||||
self.reload_copy_candidates(inst);
|
||||
} else {
|
||||
self.reload_inst_candidates(block, inst);
|
||||
}
|
||||
|
||||
// TODO: Reuse reloads for future instructions.
|
||||
self.reloads.clear();
|
||||
|
||||
let (_throughs, _kills, defs) =
|
||||
tracker.process_inst(inst, &self.cur.func.dfg, self.liveness);
|
||||
|
||||
// Advance to the next instruction so we can insert any spills after the instruction.
|
||||
self.cur.next_inst();
|
||||
|
||||
// Rewrite register defs that need to be spilled.
|
||||
//
|
||||
// Change:
|
||||
//
|
||||
// v2 = inst ...
|
||||
//
|
||||
// Into:
|
||||
//
|
||||
// v7 = inst ...
|
||||
// v2 = spill v7
|
||||
//
|
||||
// That way, we don't need to rewrite all future uses of v2.
|
||||
if let Some(constraints) = constraints {
|
||||
for (lv, op) in defs.iter().zip(constraints.outs) {
|
||||
if lv.affinity.is_stack() && op.kind != ConstraintKind::Stack {
|
||||
if let InstructionData::Unary {
|
||||
opcode: Opcode::Copy,
|
||||
arg,
|
||||
} = self.cur.func.dfg[inst]
|
||||
{
|
||||
self.cur.func.dfg.replace(inst).spill(arg);
|
||||
let ok = self.cur.func.update_encoding(inst, self.cur.isa).is_ok();
|
||||
debug_assert!(ok);
|
||||
} else {
|
||||
let value_type = self.cur.func.dfg.value_type(lv.value);
|
||||
let reg = self.cur.func.dfg.replace_result(lv.value, value_type);
|
||||
self.liveness.create_dead(reg, inst, Affinity::new(op));
|
||||
self.insert_spill(block, lv.value, reg);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Same thing for spilled call return values.
|
||||
let retvals = &defs[self.cur.func.dfg[inst]
|
||||
.opcode()
|
||||
.constraints()
|
||||
.num_fixed_results()..];
|
||||
if !retvals.is_empty() {
|
||||
let sig = self
|
||||
.cur
|
||||
.func
|
||||
.dfg
|
||||
.call_signature(inst)
|
||||
.expect("Extra results on non-call instruction");
|
||||
for (i, lv) in retvals.iter().enumerate() {
|
||||
let abi = self.cur.func.dfg.signatures[sig].returns[i];
|
||||
debug_assert!(
|
||||
abi.location.is_reg(),
|
||||
"expected reg; got {:?}",
|
||||
abi.location
|
||||
);
|
||||
if lv.affinity.is_stack() {
|
||||
let reg = self.cur.func.dfg.replace_result(lv.value, abi.value_type);
|
||||
self.liveness
|
||||
.create_dead(reg, inst, Affinity::abi(&abi, self.cur.isa));
|
||||
self.insert_spill(block, lv.value, reg);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Reload the current candidates for the given `inst`.
|
||||
fn reload_inst_candidates(&mut self, block: Block, inst: Inst) {
|
||||
// Insert fill instructions before `inst` and replace `cand.value` with the filled value.
|
||||
for cand in self.candidates.iter_mut() {
|
||||
if let Some(reload) = self.reloads.get(cand.value) {
|
||||
cand.value = reload.reg;
|
||||
continue;
|
||||
}
|
||||
|
||||
let reg = self.cur.ins().fill(cand.value);
|
||||
let fill = self.cur.built_inst();
|
||||
|
||||
self.reloads.insert(ReloadedValue {
|
||||
stack: cand.value,
|
||||
reg,
|
||||
});
|
||||
cand.value = reg;
|
||||
|
||||
// Create a live range for the new reload.
|
||||
let affinity = Affinity::Reg(cand.regclass.into());
|
||||
self.liveness.create_dead(reg, fill, affinity);
|
||||
self.liveness
|
||||
.extend_locally(reg, block, inst, &self.cur.func.layout);
|
||||
}
|
||||
|
||||
// Rewrite instruction arguments.
|
||||
//
|
||||
// Only rewrite those arguments that were identified as candidates. This leaves block
|
||||
// arguments on branches as-is without rewriting them. A spilled block argument needs to stay
|
||||
// spilled because the matching block parameter is going to be in the same virtual register
|
||||
// and therefore the same stack slot as the block argument value.
|
||||
if !self.candidates.is_empty() {
|
||||
let args = self.cur.func.dfg.inst_args_mut(inst);
|
||||
while let Some(cand) = self.candidates.pop() {
|
||||
args[cand.argidx] = cand.value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Reload the current candidates for the given copy `inst`.
|
||||
//
|
||||
// As an optimization, replace a copy instruction where the argument has been spilled with
|
||||
// a fill instruction.
|
||||
fn reload_copy_candidates(&mut self, inst: Inst) {
|
||||
// Copy instructions can only have one argument.
|
||||
debug_assert!(self.candidates.is_empty() || self.candidates.len() == 1);
|
||||
|
||||
if let Some(cand) = self.candidates.pop() {
|
||||
self.cur.func.dfg.replace(inst).fill(cand.value);
|
||||
let ok = self.cur.func.update_encoding(inst, self.cur.isa).is_ok();
|
||||
debug_assert!(ok);
|
||||
}
|
||||
}
|
||||
|
||||
// Find reload candidates for `inst` and add them to `self.candidates`.
|
||||
//
|
||||
// These are uses of spilled values where the operand constraint requires a register.
|
||||
fn find_candidates(&mut self, inst: Inst, constraints: Option<&RecipeConstraints>) {
|
||||
let args = self.cur.func.dfg.inst_args(inst);
|
||||
|
||||
if let Some(constraints) = constraints {
|
||||
for (argidx, (op, &arg)) in constraints.ins.iter().zip(args).enumerate() {
|
||||
if op.kind != ConstraintKind::Stack && self.liveness[arg].affinity.is_stack() {
|
||||
self.candidates.push(ReloadCandidate {
|
||||
argidx,
|
||||
value: arg,
|
||||
regclass: op.regclass,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If we only have the fixed arguments, we're done now.
|
||||
let offset = self.cur.func.dfg[inst]
|
||||
.opcode()
|
||||
.constraints()
|
||||
.num_fixed_value_arguments();
|
||||
if args.len() == offset {
|
||||
return;
|
||||
}
|
||||
let var_args = &args[offset..];
|
||||
|
||||
// Handle ABI arguments.
|
||||
if let Some(sig) = self.cur.func.dfg.call_signature(inst) {
|
||||
handle_abi_args(
|
||||
self.candidates,
|
||||
&self.cur.func.dfg.signatures[sig].params,
|
||||
var_args,
|
||||
offset,
|
||||
self.cur.isa,
|
||||
self.liveness,
|
||||
);
|
||||
} else if self.cur.func.dfg[inst].opcode().is_return() {
|
||||
handle_abi_args(
|
||||
self.candidates,
|
||||
&self.cur.func.signature.returns,
|
||||
var_args,
|
||||
offset,
|
||||
self.cur.isa,
|
||||
self.liveness,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Insert a spill at `pos` and update data structures.
|
||||
///
|
||||
/// - Insert `stack = spill reg` at `pos`, and assign an encoding.
|
||||
/// - Move the `stack` live range starting point to the new instruction.
|
||||
/// - Extend the `reg` live range to reach the new instruction.
|
||||
fn insert_spill(&mut self, block: Block, stack: Value, reg: Value) {
|
||||
self.cur.ins().with_result(stack).spill(reg);
|
||||
let inst = self.cur.built_inst();
|
||||
|
||||
// Update live ranges.
|
||||
self.liveness.move_def_locally(stack, inst);
|
||||
self.liveness
|
||||
.extend_locally(reg, block, inst, &self.cur.func.layout);
|
||||
}
|
||||
}
|
||||
|
||||
/// Find reload candidates in the instruction's ABI variable arguments. This handles both
|
||||
/// return values and call arguments.
|
||||
fn handle_abi_args(
|
||||
candidates: &mut Vec<ReloadCandidate>,
|
||||
abi_types: &[AbiParam],
|
||||
var_args: &[Value],
|
||||
offset: usize,
|
||||
isa: &dyn TargetIsa,
|
||||
liveness: &Liveness,
|
||||
) {
|
||||
debug_assert_eq!(abi_types.len(), var_args.len());
|
||||
for ((abi, &arg), argidx) in abi_types.iter().zip(var_args).zip(offset..) {
|
||||
if abi.location.is_reg() {
|
||||
let lv = liveness.get(arg).expect("Missing live range for ABI arg");
|
||||
if lv.affinity.is_stack() {
|
||||
candidates.push(ReloadCandidate {
|
||||
argidx,
|
||||
value: arg,
|
||||
regclass: isa.regclass_for_abi_type(abi.value_type),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,65 +0,0 @@
|
||||
use crate::cursor::{Cursor, FuncCursor};
|
||||
use crate::dominator_tree::DominatorTree;
|
||||
use crate::inst_predicates::is_safepoint;
|
||||
use crate::ir::{Function, InstBuilder};
|
||||
use crate::isa::TargetIsa;
|
||||
use crate::regalloc::live_value_tracker::LiveValueTracker;
|
||||
use crate::regalloc::liveness::Liveness;
|
||||
use alloc::vec::Vec;
|
||||
|
||||
fn insert_and_encode_safepoint<'f>(
|
||||
pos: &mut FuncCursor<'f>,
|
||||
tracker: &LiveValueTracker,
|
||||
isa: &dyn TargetIsa,
|
||||
) {
|
||||
// Iterate through all live values, collect only the references.
|
||||
let live_ref_values = tracker
|
||||
.live()
|
||||
.iter()
|
||||
.filter(|live_value| pos.func.dfg.value_type(live_value.value).is_ref())
|
||||
.map(|live_val| live_val.value)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
if !live_ref_values.is_empty() {
|
||||
pos.ins().safepoint(&live_ref_values);
|
||||
// Move cursor to the new safepoint instruction to encode it.
|
||||
if let Some(inst) = pos.prev_inst() {
|
||||
let ok = pos.func.update_encoding(inst, isa).is_ok();
|
||||
debug_assert!(ok);
|
||||
}
|
||||
// Restore cursor position.
|
||||
pos.next_inst();
|
||||
}
|
||||
}
|
||||
|
||||
// The emit_stack_maps() function analyzes each instruction to retrieve the liveness of
|
||||
// the defs and operands by traversing a function's blocks in layout order.
|
||||
pub fn emit_stack_maps(
|
||||
func: &mut Function,
|
||||
domtree: &DominatorTree,
|
||||
liveness: &Liveness,
|
||||
tracker: &mut LiveValueTracker,
|
||||
isa: &dyn TargetIsa,
|
||||
) {
|
||||
let mut curr = func.layout.entry_block();
|
||||
|
||||
while let Some(block) = curr {
|
||||
tracker.block_top(block, &func.dfg, liveness, &func.layout, domtree);
|
||||
tracker.drop_dead_params();
|
||||
let mut pos = FuncCursor::new(func);
|
||||
|
||||
// From the top of the block, step through the instructions.
|
||||
pos.goto_top(block);
|
||||
|
||||
while let Some(inst) = pos.next_inst() {
|
||||
if is_safepoint(&pos.func, inst) {
|
||||
insert_and_encode_safepoint(&mut pos, tracker, isa);
|
||||
}
|
||||
|
||||
// Process the instruction and get rid of dead values.
|
||||
tracker.process_inst(inst, &pos.func.dfg, liveness);
|
||||
tracker.drop_dead(inst);
|
||||
}
|
||||
curr = func.layout.next_block(block);
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,638 +0,0 @@
|
||||
//! Spilling pass.
|
||||
//!
|
||||
//! The spilling pass is the first to run after the liveness analysis. Its primary function is to
|
||||
//! ensure that the register pressure never exceeds the number of available registers by moving
|
||||
//! some SSA values to spill slots on the stack. This is encoded in the affinity of the value's
|
||||
//! live range.
|
||||
//!
|
||||
//! Some instruction operand constraints may require additional registers to resolve. Since this
|
||||
//! can cause spilling, the spilling pass is also responsible for resolving those constraints by
|
||||
//! inserting copies. The extra constraints are:
|
||||
//!
|
||||
//! 1. A value used by a tied operand must be killed by the instruction. This is resolved by
|
||||
//! inserting a copy to a temporary value when necessary.
|
||||
//! 2. When the same value is used more than once by an instruction, the operand constraints must
|
||||
//! be compatible. Otherwise, the value must be copied into a new register for some of the
|
||||
//! operands.
|
||||
|
||||
use crate::cursor::{Cursor, EncCursor};
|
||||
use crate::dominator_tree::DominatorTree;
|
||||
use crate::ir::{ArgumentLoc, Block, Function, Inst, InstBuilder, SigRef, Value, ValueLoc};
|
||||
use crate::isa::registers::{RegClass, RegClassIndex, RegClassMask, RegUnit};
|
||||
use crate::isa::{ConstraintKind, EncInfo, RecipeConstraints, RegInfo, TargetIsa};
|
||||
use crate::regalloc::affinity::Affinity;
|
||||
use crate::regalloc::live_value_tracker::{LiveValue, LiveValueTracker};
|
||||
use crate::regalloc::liveness::Liveness;
|
||||
use crate::regalloc::pressure::Pressure;
|
||||
use crate::regalloc::virtregs::VirtRegs;
|
||||
use crate::timing;
|
||||
use crate::topo_order::TopoOrder;
|
||||
use alloc::vec::Vec;
|
||||
use core::fmt;
|
||||
|
||||
/// Return a top-level register class which contains `unit`.
|
||||
fn toprc_containing_regunit(unit: RegUnit, reginfo: &RegInfo) -> RegClass {
|
||||
let bank = reginfo.bank_containing_regunit(unit).unwrap();
|
||||
reginfo.classes[bank.first_toprc..(bank.first_toprc + bank.num_toprcs)]
|
||||
.iter()
|
||||
.find(|&rc| rc.contains(unit))
|
||||
.expect("reg unit should be in a toprc")
|
||||
}
|
||||
|
||||
/// Persistent data structures for the spilling pass.
|
||||
pub struct Spilling {
|
||||
spills: Vec<Value>,
|
||||
reg_uses: Vec<RegUse>,
|
||||
}
|
||||
|
||||
/// Context data structure that gets instantiated once per pass.
|
||||
struct Context<'a> {
|
||||
// Current instruction as well as reference to function and ISA.
|
||||
cur: EncCursor<'a>,
|
||||
|
||||
// Cached ISA information.
|
||||
reginfo: RegInfo,
|
||||
encinfo: EncInfo,
|
||||
|
||||
// References to contextual data structures we need.
|
||||
domtree: &'a DominatorTree,
|
||||
liveness: &'a mut Liveness,
|
||||
virtregs: &'a VirtRegs,
|
||||
topo: &'a mut TopoOrder,
|
||||
|
||||
// Current register pressure.
|
||||
pressure: Pressure,
|
||||
|
||||
// Values spilled for the current instruction. These values have already been removed from the
|
||||
// pressure tracker, but they are still present in the live value tracker and their affinity
|
||||
// hasn't been changed yet.
|
||||
spills: &'a mut Vec<Value>,
|
||||
|
||||
// Uses of register values in the current instruction.
|
||||
reg_uses: &'a mut Vec<RegUse>,
|
||||
}
|
||||
|
||||
impl Spilling {
|
||||
/// Create a new spilling data structure.
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
spills: Vec::new(),
|
||||
reg_uses: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Clear all data structures in this spilling pass.
|
||||
pub fn clear(&mut self) {
|
||||
self.spills.clear();
|
||||
self.reg_uses.clear();
|
||||
}
|
||||
|
||||
/// Run the spilling algorithm over `func`.
|
||||
pub fn run(
|
||||
&mut self,
|
||||
isa: &dyn TargetIsa,
|
||||
func: &mut Function,
|
||||
domtree: &DominatorTree,
|
||||
liveness: &mut Liveness,
|
||||
virtregs: &VirtRegs,
|
||||
topo: &mut TopoOrder,
|
||||
tracker: &mut LiveValueTracker,
|
||||
) {
|
||||
let _tt = timing::ra_spilling();
|
||||
log::trace!("Spilling for:\n{}", func.display(isa));
|
||||
let reginfo = isa.register_info();
|
||||
let usable_regs = isa.allocatable_registers(func);
|
||||
let mut ctx = Context {
|
||||
cur: EncCursor::new(func, isa),
|
||||
reginfo: isa.register_info(),
|
||||
encinfo: isa.encoding_info(),
|
||||
domtree,
|
||||
liveness,
|
||||
virtregs,
|
||||
topo,
|
||||
pressure: Pressure::new(®info, &usable_regs),
|
||||
spills: &mut self.spills,
|
||||
reg_uses: &mut self.reg_uses,
|
||||
};
|
||||
ctx.run(tracker)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Context<'a> {
|
||||
fn run(&mut self, tracker: &mut LiveValueTracker) {
|
||||
self.topo.reset(self.cur.func.layout.blocks());
|
||||
while let Some(block) = self.topo.next(&self.cur.func.layout, self.domtree) {
|
||||
self.visit_block(block, tracker);
|
||||
}
|
||||
}
|
||||
|
||||
fn visit_block(&mut self, block: Block, tracker: &mut LiveValueTracker) {
|
||||
log::trace!("Spilling {}:", block);
|
||||
self.cur.goto_top(block);
|
||||
self.visit_block_header(block, tracker);
|
||||
tracker.drop_dead_params();
|
||||
self.process_spills(tracker);
|
||||
|
||||
while let Some(inst) = self.cur.next_inst() {
|
||||
if !self.cur.func.dfg[inst].opcode().is_ghost() {
|
||||
self.visit_inst(inst, block, tracker);
|
||||
} else {
|
||||
let (_throughs, kills) = tracker.process_ghost(inst);
|
||||
self.free_regs(kills);
|
||||
}
|
||||
tracker.drop_dead(inst);
|
||||
self.process_spills(tracker);
|
||||
}
|
||||
}
|
||||
|
||||
// Take all live registers in `regs` from the pressure set.
|
||||
// This doesn't cause any spilling, it is assumed there are enough registers.
|
||||
fn take_live_regs(&mut self, regs: &[LiveValue]) {
|
||||
for lv in regs {
|
||||
if !lv.is_dead {
|
||||
if let Affinity::Reg(rci) = lv.affinity {
|
||||
let rc = self.reginfo.rc(rci);
|
||||
self.pressure.take(rc);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Free all registers in `kills` from the pressure set.
|
||||
fn free_regs(&mut self, kills: &[LiveValue]) {
|
||||
for lv in kills {
|
||||
if let Affinity::Reg(rci) = lv.affinity {
|
||||
if !self.spills.contains(&lv.value) {
|
||||
let rc = self.reginfo.rc(rci);
|
||||
self.pressure.free(rc);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Free all dead registers in `regs` from the pressure set.
|
||||
fn free_dead_regs(&mut self, regs: &[LiveValue]) {
|
||||
for lv in regs {
|
||||
if lv.is_dead {
|
||||
if let Affinity::Reg(rci) = lv.affinity {
|
||||
if !self.spills.contains(&lv.value) {
|
||||
let rc = self.reginfo.rc(rci);
|
||||
self.pressure.free(rc);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn visit_block_header(&mut self, block: Block, tracker: &mut LiveValueTracker) {
|
||||
let (liveins, params) = tracker.block_top(
|
||||
block,
|
||||
&self.cur.func.dfg,
|
||||
self.liveness,
|
||||
&self.cur.func.layout,
|
||||
self.domtree,
|
||||
);
|
||||
|
||||
// Count the live-in registers. These should already fit in registers; they did at the
|
||||
// dominator.
|
||||
self.pressure.reset();
|
||||
self.take_live_regs(liveins);
|
||||
|
||||
// A block can have an arbitrary (up to 2^16...) number of parameters, so they are not
|
||||
// guaranteed to fit in registers.
|
||||
for lv in params {
|
||||
if let Affinity::Reg(rci) = lv.affinity {
|
||||
let rc = self.reginfo.rc(rci);
|
||||
'try_take: while let Err(mask) = self.pressure.take_transient(rc) {
|
||||
log::trace!("Need {} reg for block param {}", rc, lv.value);
|
||||
match self.spill_candidate(mask, liveins) {
|
||||
Some(cand) => {
|
||||
log::trace!(
|
||||
"Spilling live-in {} to make room for {} block param {}",
|
||||
cand,
|
||||
rc,
|
||||
lv.value
|
||||
);
|
||||
self.spill_reg(cand);
|
||||
}
|
||||
None => {
|
||||
// We can't spill any of the live-in registers, so we have to spill an
|
||||
// block argument. Since the current spill metric would consider all the
|
||||
// block arguments equal, just spill the present register.
|
||||
log::trace!("Spilling {} block argument {}", rc, lv.value);
|
||||
|
||||
// Since `spill_reg` will free a register, add the current one here.
|
||||
self.pressure.take(rc);
|
||||
self.spill_reg(lv.value);
|
||||
break 'try_take;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// The transient pressure counts for the block arguments are accurate. Just preserve them.
|
||||
self.pressure.preserve_transient();
|
||||
self.free_dead_regs(params);
|
||||
}
|
||||
|
||||
fn visit_inst(&mut self, inst: Inst, block: Block, tracker: &mut LiveValueTracker) {
|
||||
log::trace!("Inst {}, {}", self.cur.display_inst(inst), self.pressure);
|
||||
debug_assert_eq!(self.cur.current_inst(), Some(inst));
|
||||
debug_assert_eq!(self.cur.current_block(), Some(block));
|
||||
|
||||
let constraints = self
|
||||
.encinfo
|
||||
.operand_constraints(self.cur.func.encodings[inst]);
|
||||
|
||||
// We may need to resolve register constraints if there are any noteworthy uses.
|
||||
debug_assert!(self.reg_uses.is_empty());
|
||||
self.collect_reg_uses(inst, block, constraints);
|
||||
|
||||
// Calls usually have fixed register uses.
|
||||
let call_sig = self.cur.func.dfg.call_signature(inst);
|
||||
if let Some(sig) = call_sig {
|
||||
self.collect_abi_reg_uses(inst, sig);
|
||||
}
|
||||
|
||||
if !self.reg_uses.is_empty() {
|
||||
self.process_reg_uses(inst, tracker);
|
||||
}
|
||||
|
||||
// Update the live value tracker with this instruction.
|
||||
let (throughs, kills, defs) = tracker.process_inst(inst, &self.cur.func.dfg, self.liveness);
|
||||
|
||||
// Remove kills from the pressure tracker.
|
||||
self.free_regs(kills);
|
||||
|
||||
// If inst is a call, spill all register values that are live across the call.
|
||||
// This means that we don't currently take advantage of callee-saved registers.
|
||||
// TODO: Be more sophisticated.
|
||||
let opcode = self.cur.func.dfg[inst].opcode();
|
||||
if call_sig.is_some() || opcode.clobbers_all_regs() {
|
||||
for lv in throughs {
|
||||
if lv.affinity.is_reg() && !self.spills.contains(&lv.value) {
|
||||
self.spill_reg(lv.value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Make sure we have enough registers for the register defs.
|
||||
// Dead defs are included here. They need a register too.
|
||||
// No need to process call return values, they are in fixed registers.
|
||||
if let Some(constraints) = constraints {
|
||||
for op in constraints.outs {
|
||||
if op.kind != ConstraintKind::Stack {
|
||||
// Add register def to pressure, spill if needed.
|
||||
while let Err(mask) = self.pressure.take_transient(op.regclass) {
|
||||
log::trace!("Need {} reg from {} throughs", op.regclass, throughs.len());
|
||||
match self.spill_candidate(mask, throughs) {
|
||||
Some(cand) => self.spill_reg(cand),
|
||||
None => panic!(
|
||||
"Ran out of {} registers for {}",
|
||||
op.regclass,
|
||||
self.cur.display_inst(inst)
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
self.pressure.reset_transient();
|
||||
}
|
||||
|
||||
// Restore pressure state, compute pressure with affinities from `defs`.
|
||||
// Exclude dead defs. Includes call return values.
|
||||
// This won't cause spilling.
|
||||
self.take_live_regs(defs);
|
||||
}
|
||||
|
||||
// Collect register uses that are noteworthy in one of the following ways:
|
||||
//
|
||||
// 1. It's a fixed register constraint.
|
||||
// 2. It's a use of a spilled value.
|
||||
// 3. It's a tied register constraint and the value isn't killed.
|
||||
//
|
||||
// We are assuming here that if a value is used both by a fixed register operand and a register
|
||||
// class operand, they two are compatible. We are also assuming that two register class
|
||||
// operands are always compatible.
|
||||
fn collect_reg_uses(
|
||||
&mut self,
|
||||
inst: Inst,
|
||||
block: Block,
|
||||
constraints: Option<&RecipeConstraints>,
|
||||
) {
|
||||
let args = self.cur.func.dfg.inst_args(inst);
|
||||
let num_fixed_ins = if let Some(constraints) = constraints {
|
||||
for (idx, (op, &arg)) in constraints.ins.iter().zip(args).enumerate() {
|
||||
let mut reguse = RegUse::new(arg, idx, op.regclass.into());
|
||||
let lr = &self.liveness[arg];
|
||||
match op.kind {
|
||||
ConstraintKind::Stack => continue,
|
||||
ConstraintKind::FixedReg(_) => reguse.fixed = true,
|
||||
ConstraintKind::Tied(_) => {
|
||||
// A tied operand must kill the used value.
|
||||
reguse.tied = !lr.killed_at(inst, block, &self.cur.func.layout);
|
||||
}
|
||||
ConstraintKind::FixedTied(_) => {
|
||||
reguse.fixed = true;
|
||||
reguse.tied = !lr.killed_at(inst, block, &self.cur.func.layout);
|
||||
}
|
||||
ConstraintKind::Reg => {}
|
||||
}
|
||||
if lr.affinity.is_stack() {
|
||||
reguse.spilled = true;
|
||||
}
|
||||
|
||||
// Only collect the interesting register uses.
|
||||
if reguse.fixed || reguse.tied || reguse.spilled {
|
||||
log::trace!(" reguse: {}", reguse);
|
||||
self.reg_uses.push(reguse);
|
||||
}
|
||||
}
|
||||
constraints.ins.len()
|
||||
} else {
|
||||
// A non-ghost instruction with no constraints can't have any
|
||||
// fixed operands.
|
||||
0
|
||||
};
|
||||
|
||||
// Similarly, for return instructions, collect uses of ABI-defined
|
||||
// return values.
|
||||
if self.cur.func.dfg[inst].opcode().is_return() {
|
||||
debug_assert_eq!(
|
||||
self.cur.func.dfg.inst_variable_args(inst).len(),
|
||||
self.cur.func.signature.returns.len(),
|
||||
"The non-fixed arguments in a return should follow the function's signature."
|
||||
);
|
||||
for (ret_idx, (ret, &arg)) in
|
||||
self.cur.func.signature.returns.iter().zip(args).enumerate()
|
||||
{
|
||||
let idx = num_fixed_ins + ret_idx;
|
||||
let unit = match ret.location {
|
||||
ArgumentLoc::Unassigned => {
|
||||
panic!("function return signature should be legalized")
|
||||
}
|
||||
ArgumentLoc::Reg(unit) => unit,
|
||||
ArgumentLoc::Stack(_) => continue,
|
||||
};
|
||||
let toprc = toprc_containing_regunit(unit, &self.reginfo);
|
||||
let mut reguse = RegUse::new(arg, idx, toprc.into());
|
||||
reguse.fixed = true;
|
||||
|
||||
log::trace!(" reguse: {}", reguse);
|
||||
self.reg_uses.push(reguse);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Collect register uses from the ABI input constraints.
|
||||
fn collect_abi_reg_uses(&mut self, inst: Inst, sig: SigRef) {
|
||||
let num_fixed_args = self.cur.func.dfg[inst]
|
||||
.opcode()
|
||||
.constraints()
|
||||
.num_fixed_value_arguments();
|
||||
let args = self.cur.func.dfg.inst_variable_args(inst);
|
||||
for (idx, (abi, &arg)) in self.cur.func.dfg.signatures[sig]
|
||||
.params
|
||||
.iter()
|
||||
.zip(args)
|
||||
.enumerate()
|
||||
{
|
||||
if abi.location.is_reg() {
|
||||
let (rci, spilled) = match self.liveness[arg].affinity {
|
||||
Affinity::Reg(rci) => (rci, false),
|
||||
Affinity::Stack => (
|
||||
self.cur.isa.regclass_for_abi_type(abi.value_type).into(),
|
||||
true,
|
||||
),
|
||||
Affinity::Unassigned => panic!("Missing affinity for {}", arg),
|
||||
};
|
||||
let mut reguse = RegUse::new(arg, num_fixed_args + idx, rci);
|
||||
reguse.fixed = true;
|
||||
reguse.spilled = spilled;
|
||||
self.reg_uses.push(reguse);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Process multiple register uses to resolve potential conflicts.
|
||||
//
|
||||
// Look for multiple uses of the same value in `self.reg_uses` and insert copies as necessary.
|
||||
// Trigger spilling if any of the temporaries cause the register pressure to become too high.
|
||||
//
|
||||
// Leave `self.reg_uses` empty.
|
||||
fn process_reg_uses(&mut self, inst: Inst, tracker: &LiveValueTracker) {
|
||||
// We're looking for multiple uses of the same value, so start by sorting by value. The
|
||||
// secondary `opidx` key makes it possible to use an unstable (non-allocating) sort.
|
||||
self.reg_uses.sort_unstable_by_key(|u| (u.value, u.opidx));
|
||||
|
||||
self.cur.use_srcloc(inst);
|
||||
for i in 0..self.reg_uses.len() {
|
||||
let ru = self.reg_uses[i];
|
||||
|
||||
// Do we need to insert a copy for this use?
|
||||
let need_copy = if ru.tied {
|
||||
true
|
||||
} else if ru.fixed {
|
||||
// This is a fixed register use which doesn't necessarily require a copy.
|
||||
// Make a copy only if this is not the first use of the value.
|
||||
self.reg_uses
|
||||
.get(i.wrapping_sub(1))
|
||||
.map_or(false, |ru2| ru2.value == ru.value)
|
||||
} else {
|
||||
false
|
||||
};
|
||||
|
||||
if need_copy {
|
||||
let copy = self.insert_copy(ru.value, ru.rci);
|
||||
self.cur.func.dfg.inst_args_mut(inst)[ru.opidx as usize] = copy;
|
||||
}
|
||||
|
||||
// Even if we don't insert a copy, we may need to account for register pressure for the
|
||||
// reload pass.
|
||||
if need_copy || ru.spilled {
|
||||
let rc = self.reginfo.rc(ru.rci);
|
||||
while let Err(mask) = self.pressure.take_transient(rc) {
|
||||
log::trace!("Copy of {} reg causes spill", rc);
|
||||
// Spill a live register that is *not* used by the current instruction.
|
||||
// Spilling a use wouldn't help.
|
||||
//
|
||||
// Do allow spilling of block arguments on branches. This is safe since we spill
|
||||
// the whole virtual register which includes the matching block parameter value
|
||||
// at the branch destination. It is also necessary since there can be
|
||||
// arbitrarily many block arguments.
|
||||
match {
|
||||
let args = if self.cur.func.dfg[inst].opcode().is_branch() {
|
||||
self.cur.func.dfg.inst_fixed_args(inst)
|
||||
} else {
|
||||
self.cur.func.dfg.inst_args(inst)
|
||||
};
|
||||
self.spill_candidate(
|
||||
mask,
|
||||
tracker.live().iter().filter(|lv| !args.contains(&lv.value)),
|
||||
)
|
||||
} {
|
||||
Some(cand) => self.spill_reg(cand),
|
||||
None => panic!(
|
||||
"Ran out of {} registers when inserting copy before {}",
|
||||
rc,
|
||||
self.cur.display_inst(inst)
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
self.pressure.reset_transient();
|
||||
self.reg_uses.clear()
|
||||
}
|
||||
|
||||
// Find a spill candidate from `candidates` whose top-level register class is in `mask`.
|
||||
fn spill_candidate<'ii, II>(&self, mask: RegClassMask, candidates: II) -> Option<Value>
|
||||
where
|
||||
II: IntoIterator<Item = &'ii LiveValue>,
|
||||
{
|
||||
// Find the best viable spill candidate.
|
||||
//
|
||||
// The very simple strategy implemented here is to spill the value with the earliest def in
|
||||
// the reverse post-order. This strategy depends on a good reload pass to generate good
|
||||
// code.
|
||||
//
|
||||
// We know that all candidate defs dominate the current instruction, so one of them will
|
||||
// dominate the others. That is the earliest def.
|
||||
candidates
|
||||
.into_iter()
|
||||
.filter_map(|lv| {
|
||||
// Viable candidates are registers in one of the `mask` classes, and not already in
|
||||
// the spill set.
|
||||
if let Affinity::Reg(rci) = lv.affinity {
|
||||
let rc = self.reginfo.rc(rci);
|
||||
if (mask & (1 << rc.toprc)) != 0 && !self.spills.contains(&lv.value) {
|
||||
// Here, `lv` is a viable spill candidate.
|
||||
return Some(lv.value);
|
||||
}
|
||||
}
|
||||
None
|
||||
})
|
||||
.min_by(|&a, &b| {
|
||||
// Find the minimum candidate according to the RPO of their defs.
|
||||
self.domtree.rpo_cmp(
|
||||
self.cur.func.dfg.value_def(a),
|
||||
self.cur.func.dfg.value_def(b),
|
||||
&self.cur.func.layout,
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
/// Spill `value` immediately by
|
||||
///
|
||||
/// 1. Changing its affinity to `Stack` which marks the spill.
|
||||
/// 2. Removing the value from the pressure tracker.
|
||||
/// 3. Adding the value to `self.spills` for later reference by `process_spills`.
|
||||
///
|
||||
/// Note that this does not update the cached affinity in the live value tracker. Call
|
||||
/// `process_spills` to do that.
|
||||
fn spill_reg(&mut self, value: Value) {
|
||||
if let Affinity::Reg(rci) = self.liveness.spill(value) {
|
||||
let rc = self.reginfo.rc(rci);
|
||||
self.pressure.free(rc);
|
||||
self.spills.push(value);
|
||||
log::trace!("Spilled {}:{} -> {}", value, rc, self.pressure);
|
||||
} else {
|
||||
panic!("Cannot spill {} that was already on the stack", value);
|
||||
}
|
||||
|
||||
// Assign a spill slot for the whole virtual register.
|
||||
let ss = self
|
||||
.cur
|
||||
.func
|
||||
.stack_slots
|
||||
.make_spill_slot(self.cur.func.dfg.value_type(value));
|
||||
for &v in self.virtregs.congruence_class(&value) {
|
||||
self.liveness.spill(v);
|
||||
self.cur.func.locations[v] = ValueLoc::Stack(ss);
|
||||
}
|
||||
}
|
||||
|
||||
/// Process any pending spills in the `self.spills` vector.
|
||||
///
|
||||
/// It is assumed that spills are removed from the pressure tracker immediately, see
|
||||
/// `spill_reg` above.
|
||||
///
|
||||
/// We also need to update the live range affinity and remove spilled values from the live
|
||||
/// value tracker.
|
||||
fn process_spills(&mut self, tracker: &mut LiveValueTracker) {
|
||||
if !self.spills.is_empty() {
|
||||
tracker.process_spills(|v| self.spills.contains(&v));
|
||||
self.spills.clear()
|
||||
}
|
||||
}
|
||||
|
||||
/// Insert a `copy value` before the current instruction and give it a live range extending to
|
||||
/// the current instruction.
|
||||
///
|
||||
/// Returns the new local value created.
|
||||
fn insert_copy(&mut self, value: Value, rci: RegClassIndex) -> Value {
|
||||
let copy = self.cur.ins().copy(value);
|
||||
let inst = self.cur.built_inst();
|
||||
|
||||
// Update live ranges.
|
||||
self.liveness.create_dead(copy, inst, Affinity::Reg(rci));
|
||||
self.liveness.extend_locally(
|
||||
copy,
|
||||
self.cur.func.layout.pp_block(inst),
|
||||
self.cur.current_inst().expect("must be at an instruction"),
|
||||
&self.cur.func.layout,
|
||||
);
|
||||
|
||||
copy
|
||||
}
|
||||
}
|
||||
|
||||
/// Struct representing a register use of a value.
|
||||
/// Used to detect multiple uses of the same value with incompatible register constraints.
|
||||
#[derive(Clone, Copy)]
|
||||
struct RegUse {
|
||||
value: Value,
|
||||
opidx: u16,
|
||||
|
||||
// Register class required by the use.
|
||||
rci: RegClassIndex,
|
||||
|
||||
// A use with a fixed register constraint.
|
||||
fixed: bool,
|
||||
|
||||
// A register use of a spilled value.
|
||||
spilled: bool,
|
||||
|
||||
// A use with a tied register constraint *and* the used value is not killed.
|
||||
tied: bool,
|
||||
}
|
||||
|
||||
impl RegUse {
|
||||
fn new(value: Value, idx: usize, rci: RegClassIndex) -> Self {
|
||||
Self {
|
||||
value,
|
||||
opidx: idx as u16,
|
||||
rci,
|
||||
fixed: false,
|
||||
spilled: false,
|
||||
tied: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for RegUse {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{}@op{}", self.value, self.opidx)?;
|
||||
if self.fixed {
|
||||
write!(f, "/fixed")?;
|
||||
}
|
||||
if self.spilled {
|
||||
write!(f, "/spilled")?;
|
||||
}
|
||||
if self.tied {
|
||||
write!(f, "/tied")?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -1,241 +0,0 @@
|
||||
//! Computing stack layout.
|
||||
|
||||
use crate::ir::stackslot::{StackOffset, StackSize, StackSlotKind};
|
||||
use crate::ir::{StackLayoutInfo, StackSlots};
|
||||
use crate::result::{CodegenError, CodegenResult};
|
||||
use core::cmp::{max, min};
|
||||
|
||||
/// Compute the stack frame layout.
|
||||
///
|
||||
/// Determine the total size of this stack frame and assign offsets to all `Spill` and `Explicit`
|
||||
/// stack slots.
|
||||
///
|
||||
/// The total frame size will be a multiple of `alignment` which must be a power of two, unless the
|
||||
/// function doesn't perform any call.
|
||||
///
|
||||
/// Returns the total stack frame size which is also saved in `frame.frame_size`.
|
||||
///
|
||||
/// If the stack frame is too big, returns an `ImplLimitExceeded` error.
|
||||
pub fn layout_stack(
|
||||
frame: &mut StackSlots,
|
||||
is_leaf: bool,
|
||||
alignment: StackSize,
|
||||
) -> CodegenResult<StackSize> {
|
||||
// Each object and the whole stack frame must fit in 2 GB such that any relative offset within
|
||||
// the frame fits in a `StackOffset`.
|
||||
let max_size = StackOffset::max_value() as StackSize;
|
||||
debug_assert!(alignment.is_power_of_two() && alignment <= max_size);
|
||||
|
||||
// We assume a stack that grows toward lower addresses as implemented by modern ISAs. The
|
||||
// stack layout from high to low addresses will be:
|
||||
//
|
||||
// 1. incoming arguments.
|
||||
// 2. spills + explicits + struct returns.
|
||||
// 3. outgoing arguments.
|
||||
//
|
||||
// The incoming arguments can have both positive and negative offsets. A negative offset
|
||||
// incoming arguments is usually the x86 return address pushed by the call instruction, but
|
||||
// it can also be fixed stack slots pushed by an externally generated prologue.
|
||||
//
|
||||
// Both incoming and outgoing argument slots have fixed offsets that are treated as
|
||||
// reserved zones by the layout algorithm.
|
||||
//
|
||||
// If a function only has incoming arguments and does not perform any calls, then it doesn't
|
||||
// require the stack to be aligned.
|
||||
|
||||
let mut incoming_min = 0;
|
||||
let mut incoming_max = 0;
|
||||
let mut outgoing_max = 0;
|
||||
let mut min_align = alignment;
|
||||
let mut must_align = !is_leaf;
|
||||
|
||||
for slot in frame.values() {
|
||||
if slot.size > max_size {
|
||||
return Err(CodegenError::ImplLimitExceeded);
|
||||
}
|
||||
|
||||
match slot.kind {
|
||||
StackSlotKind::IncomingArg => {
|
||||
incoming_min = min(incoming_min, slot.offset.unwrap());
|
||||
incoming_max = max(incoming_max, slot.offset.unwrap() + slot.size as i32);
|
||||
}
|
||||
StackSlotKind::OutgoingArg => {
|
||||
let offset = slot
|
||||
.offset
|
||||
.unwrap()
|
||||
.checked_add(slot.size as StackOffset)
|
||||
.ok_or(CodegenError::ImplLimitExceeded)?;
|
||||
outgoing_max = max(outgoing_max, offset);
|
||||
must_align = true;
|
||||
}
|
||||
StackSlotKind::StructReturnSlot
|
||||
| StackSlotKind::SpillSlot
|
||||
| StackSlotKind::ExplicitSlot
|
||||
| StackSlotKind::EmergencySlot => {
|
||||
// Determine the smallest alignment of any explicit or spill slot.
|
||||
min_align = slot.alignment(min_align);
|
||||
must_align = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Lay out spill slots, struct return slots, and explicit slots below the
|
||||
// incoming arguments. The offset is negative, growing downwards. Start with
|
||||
// the smallest alignments for better packing.
|
||||
let mut offset = incoming_min;
|
||||
debug_assert!(min_align.is_power_of_two());
|
||||
while min_align <= alignment {
|
||||
for slot in frame.values_mut() {
|
||||
// Pick out explicit and spill slots with exact alignment `min_align`.
|
||||
match slot.kind {
|
||||
StackSlotKind::SpillSlot
|
||||
| StackSlotKind::StructReturnSlot
|
||||
| StackSlotKind::ExplicitSlot
|
||||
| StackSlotKind::EmergencySlot => {
|
||||
if slot.alignment(alignment) != min_align {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
StackSlotKind::IncomingArg | StackSlotKind::OutgoingArg => continue,
|
||||
}
|
||||
|
||||
offset = offset
|
||||
.checked_sub(slot.size as StackOffset)
|
||||
.ok_or(CodegenError::ImplLimitExceeded)?;
|
||||
|
||||
// Aligning the negative offset can never cause overflow. We're only clearing bits.
|
||||
offset &= -(min_align as StackOffset);
|
||||
slot.offset = Some(offset);
|
||||
}
|
||||
|
||||
// Move on to the next higher alignment.
|
||||
min_align *= 2;
|
||||
}
|
||||
|
||||
// Finally, make room for the outgoing arguments.
|
||||
offset = offset
|
||||
.checked_sub(outgoing_max)
|
||||
.ok_or(CodegenError::ImplLimitExceeded)?;
|
||||
|
||||
if must_align {
|
||||
offset &= -(alignment as StackOffset);
|
||||
}
|
||||
|
||||
// Set the computed layout information for the frame
|
||||
let frame_size = (offset as StackSize).wrapping_neg();
|
||||
let inbound_args_size = incoming_max as u32;
|
||||
frame.layout_info = Some(StackLayoutInfo {
|
||||
frame_size,
|
||||
inbound_args_size,
|
||||
});
|
||||
|
||||
Ok(frame_size)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::layout_stack;
|
||||
use crate::ir::stackslot::StackOffset;
|
||||
use crate::ir::types;
|
||||
use crate::ir::{StackSlotData, StackSlotKind, StackSlots};
|
||||
use crate::result::CodegenError;
|
||||
|
||||
#[test]
|
||||
fn layout() {
|
||||
let sss = &mut StackSlots::new();
|
||||
|
||||
// For all these test cases, assume it will call.
|
||||
let is_leaf = false;
|
||||
|
||||
// An empty layout should have 0-sized stack frame.
|
||||
assert_eq!(layout_stack(sss, is_leaf, 1), Ok(0));
|
||||
assert_eq!(layout_stack(sss, is_leaf, 16), Ok(0));
|
||||
|
||||
// Same for incoming arguments with non-negative offsets.
|
||||
let in0 = sss.make_incoming_arg(8, 0);
|
||||
let in1 = sss.make_incoming_arg(8, 8);
|
||||
|
||||
assert_eq!(layout_stack(sss, is_leaf, 1), Ok(0));
|
||||
assert_eq!(layout_stack(sss, is_leaf, 16), Ok(0));
|
||||
assert_eq!(sss[in0].offset, Some(0));
|
||||
assert_eq!(sss[in1].offset, Some(8));
|
||||
|
||||
// Add some spill slots.
|
||||
let ss0 = sss.make_spill_slot(types::I64);
|
||||
let ss1 = sss.make_spill_slot(types::I32);
|
||||
|
||||
assert_eq!(layout_stack(sss, is_leaf, 1), Ok(12));
|
||||
assert_eq!(sss[in0].offset, Some(0));
|
||||
assert_eq!(sss[in1].offset, Some(8));
|
||||
assert_eq!(sss[ss0].offset, Some(-8));
|
||||
assert_eq!(sss[ss1].offset, Some(-12));
|
||||
|
||||
assert_eq!(layout_stack(sss, is_leaf, 16), Ok(16));
|
||||
assert_eq!(sss[in0].offset, Some(0));
|
||||
assert_eq!(sss[in1].offset, Some(8));
|
||||
assert_eq!(sss[ss0].offset, Some(-16));
|
||||
assert_eq!(sss[ss1].offset, Some(-4));
|
||||
|
||||
// An incoming argument with negative offset counts towards the total frame size, but it
|
||||
// should still pack nicely with the spill slots.
|
||||
let in2 = sss.make_incoming_arg(4, -4);
|
||||
|
||||
assert_eq!(layout_stack(sss, is_leaf, 1), Ok(16));
|
||||
assert_eq!(sss[in0].offset, Some(0));
|
||||
assert_eq!(sss[in1].offset, Some(8));
|
||||
assert_eq!(sss[in2].offset, Some(-4));
|
||||
assert_eq!(sss[ss0].offset, Some(-12));
|
||||
assert_eq!(sss[ss1].offset, Some(-16));
|
||||
|
||||
assert_eq!(layout_stack(sss, is_leaf, 16), Ok(16));
|
||||
assert_eq!(sss[in0].offset, Some(0));
|
||||
assert_eq!(sss[in1].offset, Some(8));
|
||||
assert_eq!(sss[in2].offset, Some(-4));
|
||||
assert_eq!(sss[ss0].offset, Some(-16));
|
||||
assert_eq!(sss[ss1].offset, Some(-8));
|
||||
|
||||
// Finally, make sure there is room for the outgoing args.
|
||||
let out0 = sss.get_outgoing_arg(4, 0);
|
||||
|
||||
assert_eq!(layout_stack(sss, is_leaf, 1), Ok(20));
|
||||
assert_eq!(sss[in0].offset, Some(0));
|
||||
assert_eq!(sss[in1].offset, Some(8));
|
||||
assert_eq!(sss[in2].offset, Some(-4));
|
||||
assert_eq!(sss[ss0].offset, Some(-12));
|
||||
assert_eq!(sss[ss1].offset, Some(-16));
|
||||
assert_eq!(sss[out0].offset, Some(0));
|
||||
|
||||
assert_eq!(layout_stack(sss, is_leaf, 16), Ok(32));
|
||||
assert_eq!(sss[in0].offset, Some(0));
|
||||
assert_eq!(sss[in1].offset, Some(8));
|
||||
assert_eq!(sss[in2].offset, Some(-4));
|
||||
assert_eq!(sss[ss0].offset, Some(-16));
|
||||
assert_eq!(sss[ss1].offset, Some(-8));
|
||||
assert_eq!(sss[out0].offset, Some(0));
|
||||
|
||||
// Also test that an unsupported offset is rejected.
|
||||
sss.get_outgoing_arg(1, StackOffset::max_value() - 1);
|
||||
assert_eq!(
|
||||
layout_stack(sss, is_leaf, 1),
|
||||
Err(CodegenError::ImplLimitExceeded)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn slot_kinds() {
|
||||
let sss = &mut StackSlots::new();
|
||||
|
||||
// Add some slots of various kinds.
|
||||
let ss0 = sss.make_spill_slot(types::I32);
|
||||
let ss1 = sss.push(StackSlotData::new(
|
||||
StackSlotKind::ExplicitSlot,
|
||||
types::I32.bytes(),
|
||||
));
|
||||
let ss2 = sss.get_emergency_slot(types::I32, &[]);
|
||||
|
||||
assert_eq!(layout_stack(sss, true, 1), Ok(12));
|
||||
assert_eq!(sss[ss0].offset, Some(-4));
|
||||
assert_eq!(sss[ss1].offset, Some(-8));
|
||||
assert_eq!(sss[ss2].offset, Some(-12));
|
||||
}
|
||||
}
|
||||
@@ -1,138 +0,0 @@
|
||||
//! Topological order of blocks, according to the dominator tree.
|
||||
|
||||
use crate::dominator_tree::DominatorTree;
|
||||
use crate::entity::EntitySet;
|
||||
use crate::ir::{Block, Layout};
|
||||
use alloc::vec::Vec;
|
||||
|
||||
/// Present blocks in a topological order such that all dominating blocks are guaranteed to be visited
|
||||
/// before the current block.
|
||||
///
|
||||
/// There are many topological orders of the blocks in a function, so it is possible to provide a
|
||||
/// preferred order, and the `TopoOrder` will present blocks in an order that is as close as possible
|
||||
/// to the preferred order.
|
||||
pub struct TopoOrder {
|
||||
/// Preferred order of blocks to visit.
|
||||
preferred: Vec<Block>,
|
||||
|
||||
/// Next entry to get from `preferred`.
|
||||
next: usize,
|
||||
|
||||
/// Set of visited blocks.
|
||||
visited: EntitySet<Block>,
|
||||
|
||||
/// Stack of blocks to be visited next, already in `visited`.
|
||||
stack: Vec<Block>,
|
||||
}
|
||||
|
||||
impl TopoOrder {
|
||||
/// Create a new empty topological order.
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
preferred: Vec::new(),
|
||||
next: 0,
|
||||
visited: EntitySet::new(),
|
||||
stack: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Clear all data structures in this topological order.
|
||||
pub fn clear(&mut self) {
|
||||
self.preferred.clear();
|
||||
self.next = 0;
|
||||
self.visited.clear();
|
||||
self.stack.clear();
|
||||
}
|
||||
|
||||
/// Reset and initialize with a preferred sequence of blocks. The resulting topological order is
|
||||
/// guaranteed to contain all of the blocks in `preferred` as well as any dominators.
|
||||
pub fn reset<Blocks>(&mut self, preferred: Blocks)
|
||||
where
|
||||
Blocks: IntoIterator<Item = Block>,
|
||||
{
|
||||
self.preferred.clear();
|
||||
self.preferred.extend(preferred);
|
||||
self.next = 0;
|
||||
self.visited.clear();
|
||||
self.stack.clear();
|
||||
}
|
||||
|
||||
/// Get the next block in the topological order.
|
||||
///
|
||||
/// Two things are guaranteed about the blocks returned by this function:
|
||||
///
|
||||
/// - All blocks in the `preferred` iterator given to `reset` will be returned.
|
||||
/// - All dominators are visited before the block returned.
|
||||
pub fn next(&mut self, layout: &Layout, domtree: &DominatorTree) -> Option<Block> {
|
||||
self.visited.resize(layout.block_capacity());
|
||||
// Any entries in `stack` should be returned immediately. They have already been added to
|
||||
// `visited`.
|
||||
while self.stack.is_empty() {
|
||||
match self.preferred.get(self.next).cloned() {
|
||||
None => return None,
|
||||
Some(mut block) => {
|
||||
// We have the next block in the preferred order.
|
||||
self.next += 1;
|
||||
// Push it along with any non-visited dominators.
|
||||
while self.visited.insert(block) {
|
||||
self.stack.push(block);
|
||||
match domtree.idom(block) {
|
||||
Some(idom) => {
|
||||
block = layout.inst_block(idom).expect("idom not in layout")
|
||||
}
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
self.stack.pop()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::cursor::{Cursor, FuncCursor};
|
||||
use crate::dominator_tree::DominatorTree;
|
||||
use crate::flowgraph::ControlFlowGraph;
|
||||
use crate::ir::{Function, InstBuilder};
|
||||
use core::iter;
|
||||
|
||||
#[test]
|
||||
fn empty() {
|
||||
let func = Function::new();
|
||||
let cfg = ControlFlowGraph::with_function(&func);
|
||||
let domtree = DominatorTree::with_function(&func, &cfg);
|
||||
let mut topo = TopoOrder::new();
|
||||
|
||||
assert_eq!(topo.next(&func.layout, &domtree), None);
|
||||
topo.reset(func.layout.blocks());
|
||||
assert_eq!(topo.next(&func.layout, &domtree), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn simple() {
|
||||
let mut func = Function::new();
|
||||
let block0 = func.dfg.make_block();
|
||||
let block1 = func.dfg.make_block();
|
||||
|
||||
{
|
||||
let mut cur = FuncCursor::new(&mut func);
|
||||
|
||||
cur.insert_block(block0);
|
||||
cur.ins().jump(block1, &[]);
|
||||
cur.insert_block(block1);
|
||||
cur.ins().jump(block1, &[]);
|
||||
}
|
||||
|
||||
let cfg = ControlFlowGraph::with_function(&func);
|
||||
let domtree = DominatorTree::with_function(&func, &cfg);
|
||||
let mut topo = TopoOrder::new();
|
||||
|
||||
topo.reset(iter::once(block1));
|
||||
assert_eq!(topo.next(&func.layout, &domtree), Some(block0));
|
||||
assert_eq!(topo.next(&func.layout, &domtree), Some(block1));
|
||||
assert_eq!(topo.next(&func.layout, &domtree), None);
|
||||
}
|
||||
}
|
||||
@@ -1,14 +1,8 @@
|
||||
use crate::ir::{Function, SourceLoc, Value, ValueLabel, ValueLabelAssignments, ValueLoc};
|
||||
use crate::isa::TargetIsa;
|
||||
use crate::machinst::MachCompileResult;
|
||||
use crate::regalloc::{Context, RegDiversions};
|
||||
use crate::ir::{SourceLoc, ValueLabel};
|
||||
use crate::HashMap;
|
||||
use alloc::collections::BTreeMap;
|
||||
use alloc::vec::Vec;
|
||||
use core::cmp::Ordering;
|
||||
use core::convert::From;
|
||||
use core::iter::Iterator;
|
||||
use core::ops::Bound::*;
|
||||
use core::ops::Deref;
|
||||
use regalloc::Reg;
|
||||
|
||||
@@ -31,241 +25,15 @@ pub struct ValueLocRange {
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
#[cfg_attr(feature = "enable-serde", derive(Serialize, Deserialize))]
|
||||
pub enum LabelValueLoc {
|
||||
/// Old-backend location: RegUnit, StackSlot, or Unassigned.
|
||||
ValueLoc(ValueLoc),
|
||||
/// New-backend Reg.
|
||||
Reg(Reg),
|
||||
/// New-backend offset from stack pointer.
|
||||
SPOffset(i64),
|
||||
}
|
||||
|
||||
impl From<ValueLoc> for LabelValueLoc {
|
||||
fn from(v: ValueLoc) -> Self {
|
||||
LabelValueLoc::ValueLoc(v)
|
||||
}
|
||||
}
|
||||
|
||||
/// Resulting map of Value labels and their ranges/locations.
|
||||
pub type ValueLabelsRanges = HashMap<ValueLabel, Vec<ValueLocRange>>;
|
||||
|
||||
fn build_value_labels_index<T>(func: &Function) -> BTreeMap<T, (Value, ValueLabel)>
|
||||
where
|
||||
T: From<SourceLoc> + Deref<Target = SourceLoc> + Ord + Copy,
|
||||
{
|
||||
if func.dfg.values_labels.is_none() {
|
||||
return BTreeMap::new();
|
||||
}
|
||||
let values_labels = func.dfg.values_labels.as_ref().unwrap();
|
||||
|
||||
// Index values_labels by srcloc/from
|
||||
let mut sorted = BTreeMap::new();
|
||||
for (val, assigns) in values_labels {
|
||||
match assigns {
|
||||
ValueLabelAssignments::Starts(labels) => {
|
||||
for label in labels {
|
||||
if label.from.is_default() {
|
||||
continue;
|
||||
}
|
||||
let srcloc = T::from(label.from);
|
||||
let label = label.label;
|
||||
sorted.insert(srcloc, (*val, label));
|
||||
}
|
||||
}
|
||||
ValueLabelAssignments::Alias { from, value } => {
|
||||
if from.is_default() {
|
||||
continue;
|
||||
}
|
||||
let mut aliased_value = *value;
|
||||
while let Some(ValueLabelAssignments::Alias { value, .. }) =
|
||||
values_labels.get(&aliased_value)
|
||||
{
|
||||
// TODO check/limit recursion?
|
||||
aliased_value = *value;
|
||||
}
|
||||
let from = T::from(*from);
|
||||
if let Some(ValueLabelAssignments::Starts(labels)) =
|
||||
values_labels.get(&aliased_value)
|
||||
{
|
||||
for label in labels {
|
||||
let srcloc = if label.from.is_default() {
|
||||
from
|
||||
} else {
|
||||
from.max(T::from(label.from))
|
||||
};
|
||||
let label = label.label;
|
||||
sorted.insert(srcloc, (*val, label));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
sorted
|
||||
}
|
||||
|
||||
/// Builds ranges and location for specified value labels.
|
||||
/// The labels specified at DataFlowGraph's values_labels collection.
|
||||
pub fn build_value_labels_ranges<T>(
|
||||
func: &Function,
|
||||
regalloc: &Context,
|
||||
mach_compile_result: Option<&MachCompileResult>,
|
||||
isa: &dyn TargetIsa,
|
||||
) -> ValueLabelsRanges
|
||||
where
|
||||
T: From<SourceLoc> + Deref<Target = SourceLoc> + Ord + Copy,
|
||||
{
|
||||
if let Some(mach_compile_result) = mach_compile_result {
|
||||
return mach_compile_result.value_labels_ranges.clone();
|
||||
}
|
||||
|
||||
let values_labels = build_value_labels_index::<T>(func);
|
||||
|
||||
let mut blocks = func.layout.blocks().collect::<Vec<_>>();
|
||||
blocks.sort_by_key(|block| func.offsets[*block]); // Ensure inst offsets always increase
|
||||
let encinfo = isa.encoding_info();
|
||||
let values_locations = &func.locations;
|
||||
let liveness_ranges = regalloc.liveness().ranges();
|
||||
|
||||
let mut ranges = HashMap::new();
|
||||
let mut add_range = |label, range: (u32, u32), loc: ValueLoc| {
|
||||
if range.0 >= range.1 || !loc.is_assigned() {
|
||||
return;
|
||||
}
|
||||
ranges
|
||||
.entry(label)
|
||||
.or_insert_with(Vec::new)
|
||||
.push(ValueLocRange {
|
||||
loc: loc.into(),
|
||||
start: range.0,
|
||||
end: range.1,
|
||||
});
|
||||
};
|
||||
|
||||
let mut end_offset = 0;
|
||||
let mut tracked_values: Vec<(Value, ValueLabel, u32, ValueLoc)> = Vec::new();
|
||||
let mut divert = RegDiversions::new();
|
||||
for block in blocks {
|
||||
divert.at_block(&func.entry_diversions, block);
|
||||
let mut last_srcloc: Option<T> = None;
|
||||
for (offset, inst, size) in func.inst_offsets(block, &encinfo) {
|
||||
divert.apply(&func.dfg[inst]);
|
||||
end_offset = offset + size;
|
||||
// Remove killed values.
|
||||
tracked_values.retain(|(x, label, start_offset, last_loc)| {
|
||||
let range = liveness_ranges.get(*x);
|
||||
if range.expect("value").killed_at(inst, block, &func.layout) {
|
||||
add_range(*label, (*start_offset, end_offset), *last_loc);
|
||||
return false;
|
||||
}
|
||||
true
|
||||
});
|
||||
|
||||
let srcloc = func.srclocs[inst];
|
||||
if srcloc.is_default() {
|
||||
// Don't process instructions without srcloc.
|
||||
continue;
|
||||
}
|
||||
let srcloc = T::from(srcloc);
|
||||
|
||||
// Record and restart ranges if Value location was changed.
|
||||
for (val, label, start_offset, last_loc) in &mut tracked_values {
|
||||
let new_loc = divert.get(*val, values_locations);
|
||||
if new_loc == *last_loc {
|
||||
continue;
|
||||
}
|
||||
add_range(*label, (*start_offset, end_offset), *last_loc);
|
||||
*start_offset = end_offset;
|
||||
*last_loc = new_loc;
|
||||
}
|
||||
|
||||
// New source locations range started: abandon all tracked values.
|
||||
if last_srcloc.is_some() && last_srcloc.unwrap() > srcloc {
|
||||
for (_, label, start_offset, last_loc) in &tracked_values {
|
||||
add_range(*label, (*start_offset, end_offset), *last_loc);
|
||||
}
|
||||
tracked_values.clear();
|
||||
last_srcloc = None;
|
||||
}
|
||||
|
||||
// Get non-processed Values based on srcloc
|
||||
let range = (
|
||||
match last_srcloc {
|
||||
Some(a) => Excluded(a),
|
||||
None => Unbounded,
|
||||
},
|
||||
Included(srcloc),
|
||||
);
|
||||
let active_values = values_labels.range(range);
|
||||
let active_values = active_values.filter(|(_, (v, _))| {
|
||||
// Ignore dead/inactive Values.
|
||||
let range = liveness_ranges.get(*v);
|
||||
match range {
|
||||
Some(r) => r.reaches_use(inst, block, &func.layout),
|
||||
None => false,
|
||||
}
|
||||
});
|
||||
// Append new Values to the tracked_values.
|
||||
for (_, (val, label)) in active_values {
|
||||
let loc = divert.get(*val, values_locations);
|
||||
tracked_values.push((*val, *label, end_offset, loc));
|
||||
}
|
||||
|
||||
last_srcloc = Some(srcloc);
|
||||
}
|
||||
// Finish all started ranges.
|
||||
for (_, label, start_offset, last_loc) in &tracked_values {
|
||||
add_range(*label, (*start_offset, end_offset), *last_loc);
|
||||
}
|
||||
}
|
||||
|
||||
// Optimize ranges in-place
|
||||
for (_, label_ranges) in ranges.iter_mut() {
|
||||
assert!(!label_ranges.is_empty());
|
||||
label_ranges.sort_by(|a, b| a.start.cmp(&b.start).then_with(|| a.end.cmp(&b.end)));
|
||||
|
||||
// Merge ranges
|
||||
let mut i = 1;
|
||||
let mut j = 0;
|
||||
while i < label_ranges.len() {
|
||||
assert!(label_ranges[j].start <= label_ranges[i].end);
|
||||
if label_ranges[j].loc != label_ranges[i].loc {
|
||||
// Different location
|
||||
if label_ranges[j].end >= label_ranges[i].end {
|
||||
// Consumed by previous range, skipping
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
j += 1;
|
||||
label_ranges[j] = label_ranges[i];
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
if label_ranges[j].end < label_ranges[i].start {
|
||||
// Gap in the range location
|
||||
j += 1;
|
||||
label_ranges[j] = label_ranges[i];
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
// Merge i-th and j-th ranges
|
||||
if label_ranges[j].end < label_ranges[i].end {
|
||||
label_ranges[j].end = label_ranges[i].end;
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
label_ranges.truncate(j + 1);
|
||||
|
||||
// Cut/move start position of next range, if two neighbor ranges intersect.
|
||||
for i in 0..j {
|
||||
if label_ranges[i].end > label_ranges[i + 1].start {
|
||||
label_ranges[i + 1].start = label_ranges[i].end;
|
||||
assert!(label_ranges[i + 1].start < label_ranges[i + 1].end);
|
||||
}
|
||||
assert!(label_ranges[i].end <= label_ranges[i + 1].start);
|
||||
}
|
||||
}
|
||||
ranges
|
||||
}
|
||||
|
||||
#[derive(Eq, Clone, Copy)]
|
||||
pub struct ComparableSourceLoc(SourceLoc);
|
||||
|
||||
|
||||
@@ -4,10 +4,8 @@ use crate::dbg::DisplayList;
|
||||
use crate::dominator_tree::{DominatorTree, DominatorTreePreorder};
|
||||
use crate::flowgraph::{BlockPredecessor, ControlFlowGraph};
|
||||
use crate::ir::{ExpandedProgramPoint, Function};
|
||||
use crate::regalloc::liveness::Liveness;
|
||||
use crate::regalloc::virtregs::VirtRegs;
|
||||
use crate::timing;
|
||||
use crate::verifier::{VerifierErrors, VerifierStepResult};
|
||||
use crate::verifier::{virtregs::VirtRegs, VerifierErrors, VerifierStepResult};
|
||||
|
||||
/// Verify conventional SSA form for `func`.
|
||||
///
|
||||
@@ -27,7 +25,6 @@ pub fn verify_cssa(
|
||||
func: &Function,
|
||||
cfg: &ControlFlowGraph,
|
||||
domtree: &DominatorTree,
|
||||
liveness: &Liveness,
|
||||
virtregs: &VirtRegs,
|
||||
errors: &mut VerifierErrors,
|
||||
) -> VerifierStepResult<()> {
|
||||
@@ -41,7 +38,6 @@ pub fn verify_cssa(
|
||||
cfg,
|
||||
domtree,
|
||||
virtregs,
|
||||
liveness,
|
||||
preorder,
|
||||
};
|
||||
verifier.check_virtregs(errors)?;
|
||||
@@ -54,7 +50,6 @@ struct CssaVerifier<'a> {
|
||||
cfg: &'a ControlFlowGraph,
|
||||
domtree: &'a DominatorTree,
|
||||
virtregs: &'a VirtRegs,
|
||||
liveness: &'a Liveness,
|
||||
preorder: DominatorTreePreorder,
|
||||
}
|
||||
|
||||
@@ -70,9 +65,6 @@ impl<'a> CssaVerifier<'a> {
|
||||
if !self.func.dfg.value_is_attached(val) {
|
||||
return errors.fatal((val, format!("Detached value in {}", vreg)));
|
||||
}
|
||||
if self.liveness.get(val).is_none() {
|
||||
return errors.fatal((val, format!("Value in {} has no live range", vreg)));
|
||||
};
|
||||
|
||||
// Check topological ordering with the previous values in the virtual register.
|
||||
let def: ExpandedProgramPoint = self.func.dfg.value_def(val).into();
|
||||
@@ -120,19 +112,7 @@ impl<'a> CssaVerifier<'a> {
|
||||
if self.preorder.dominates(prev_block, def_block)
|
||||
&& self.domtree.dominates(prev_def, def, &self.func.layout)
|
||||
{
|
||||
if self.liveness[prev_val].overlaps_def(def, def_block, &self.func.layout) {
|
||||
return errors.fatal((
|
||||
val,
|
||||
format!(
|
||||
"Value def in {} = {} interferes with {}",
|
||||
vreg,
|
||||
DisplayList(values),
|
||||
prev_val
|
||||
),
|
||||
));
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,7 +4,6 @@ use crate::entity::{EntitySet, SecondaryMap};
|
||||
use crate::flowgraph::{BlockPredecessor, ControlFlowGraph};
|
||||
use crate::ir;
|
||||
use crate::ir::instructions::BranchInfo;
|
||||
use crate::isa;
|
||||
use crate::packed_option::PackedOption;
|
||||
use crate::timing;
|
||||
use crate::verifier::{VerifierErrors, VerifierStepResult};
|
||||
@@ -24,19 +23,12 @@ use crate::verifier::{VerifierErrors, VerifierStepResult};
|
||||
pub fn verify_flags(
|
||||
func: &ir::Function,
|
||||
cfg: &ControlFlowGraph,
|
||||
isa: Option<&dyn isa::TargetIsa>,
|
||||
errors: &mut VerifierErrors,
|
||||
) -> VerifierStepResult<()> {
|
||||
let _tt = timing::verify_flags();
|
||||
let encinfo = if isa.is_none() || isa.unwrap().get_mach_backend().is_some() {
|
||||
None
|
||||
} else {
|
||||
Some(isa.unwrap().encoding_info())
|
||||
};
|
||||
let mut verifier = FlagsVerifier {
|
||||
func,
|
||||
cfg,
|
||||
encinfo,
|
||||
livein: SecondaryMap::new(),
|
||||
};
|
||||
verifier.check(errors)
|
||||
@@ -45,7 +37,6 @@ pub fn verify_flags(
|
||||
struct FlagsVerifier<'a> {
|
||||
func: &'a ir::Function,
|
||||
cfg: &'a ControlFlowGraph,
|
||||
encinfo: Option<isa::EncInfo>,
|
||||
|
||||
/// The single live-in flags value (if any) for each block.
|
||||
livein: SecondaryMap<ir::Block, PackedOption<ir::Value>>,
|
||||
@@ -111,21 +102,6 @@ impl<'a> FlagsVerifier<'a> {
|
||||
return Err(());
|
||||
}
|
||||
}
|
||||
|
||||
// Does the instruction have an encoding that clobbers the CPU flags?
|
||||
if self
|
||||
.encinfo
|
||||
.as_ref()
|
||||
.and_then(|ei| ei.operand_constraints(self.func.encodings[inst]))
|
||||
.map_or(false, |c| c.clobbers_flags)
|
||||
&& live_val.is_some()
|
||||
{
|
||||
errors.report((
|
||||
inst,
|
||||
format!("encoding clobbers live CPU flags in {}", live),
|
||||
));
|
||||
return Err(());
|
||||
}
|
||||
}
|
||||
|
||||
// Now look for live ranges of CPU flags that end here.
|
||||
|
||||
@@ -1,235 +0,0 @@
|
||||
//! Liveness verifier.
|
||||
|
||||
use crate::flowgraph::{BlockPredecessor, ControlFlowGraph};
|
||||
use crate::ir::entities::AnyEntity;
|
||||
use crate::ir::{ExpandedProgramPoint, Function, ProgramPoint, Value};
|
||||
use crate::isa::TargetIsa;
|
||||
use crate::regalloc::liveness::Liveness;
|
||||
use crate::regalloc::liverange::LiveRange;
|
||||
use crate::timing;
|
||||
use crate::verifier::{VerifierErrors, VerifierStepResult};
|
||||
|
||||
/// Verify liveness information for `func`.
|
||||
///
|
||||
/// The provided control flow graph is assumed to be sound.
|
||||
///
|
||||
/// - All values in the program must have a live range.
|
||||
/// - The live range def point must match where the value is defined.
|
||||
/// - The live range must reach all uses.
|
||||
/// - When a live range is live-in to a block, it must be live at all the predecessors.
|
||||
/// - The live range affinity must be compatible with encoding constraints.
|
||||
///
|
||||
/// We don't verify that live ranges are minimal. This would require recomputing live ranges for
|
||||
/// all values.
|
||||
pub fn verify_liveness(
|
||||
isa: &dyn TargetIsa,
|
||||
func: &Function,
|
||||
cfg: &ControlFlowGraph,
|
||||
liveness: &Liveness,
|
||||
errors: &mut VerifierErrors,
|
||||
) -> VerifierStepResult<()> {
|
||||
let _tt = timing::verify_liveness();
|
||||
let verifier = LivenessVerifier {
|
||||
isa,
|
||||
func,
|
||||
cfg,
|
||||
liveness,
|
||||
};
|
||||
verifier.check_blocks(errors)?;
|
||||
verifier.check_insts(errors)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
struct LivenessVerifier<'a> {
|
||||
isa: &'a dyn TargetIsa,
|
||||
func: &'a Function,
|
||||
cfg: &'a ControlFlowGraph,
|
||||
liveness: &'a Liveness,
|
||||
}
|
||||
|
||||
impl<'a> LivenessVerifier<'a> {
|
||||
/// Check all block arguments.
|
||||
fn check_blocks(&self, errors: &mut VerifierErrors) -> VerifierStepResult<()> {
|
||||
for block in self.func.layout.blocks() {
|
||||
for &val in self.func.dfg.block_params(block) {
|
||||
let lr = match self.liveness.get(val) {
|
||||
Some(lr) => lr,
|
||||
None => {
|
||||
return errors
|
||||
.fatal((block, format!("block arg {} has no live range", val)))
|
||||
}
|
||||
};
|
||||
self.check_lr(block.into(), val, lr, errors)?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Check all instructions.
|
||||
fn check_insts(&self, errors: &mut VerifierErrors) -> VerifierStepResult<()> {
|
||||
for block in self.func.layout.blocks() {
|
||||
for inst in self.func.layout.block_insts(block) {
|
||||
let encoding = self.func.encodings[inst];
|
||||
|
||||
// Check the defs.
|
||||
for &val in self.func.dfg.inst_results(inst) {
|
||||
let lr = match self.liveness.get(val) {
|
||||
Some(lr) => lr,
|
||||
None => return errors.fatal((inst, format!("{} has no live range", val))),
|
||||
};
|
||||
self.check_lr(inst.into(), val, lr, errors)?;
|
||||
|
||||
if encoding.is_legal() {
|
||||
// A legal instruction is not allowed to define ghost values.
|
||||
if lr.affinity.is_unassigned() {
|
||||
return errors.fatal((
|
||||
inst,
|
||||
format!(
|
||||
"{} is a ghost value defined by a real [{}] instruction",
|
||||
val,
|
||||
self.isa.encoding_info().display(encoding)
|
||||
),
|
||||
));
|
||||
}
|
||||
} else if !lr.affinity.is_unassigned() {
|
||||
// A non-encoded instruction can only define ghost values.
|
||||
return errors.fatal((
|
||||
inst,
|
||||
format!(
|
||||
"{} is a real {} value defined by a ghost instruction",
|
||||
val,
|
||||
lr.affinity.display(&self.isa.register_info())
|
||||
),
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
// Check the uses.
|
||||
for &val in self.func.dfg.inst_args(inst) {
|
||||
let lr = match self.liveness.get(val) {
|
||||
Some(lr) => lr,
|
||||
None => return errors.fatal((inst, format!("{} has no live range", val))),
|
||||
};
|
||||
|
||||
debug_assert!(self.func.layout.inst_block(inst).unwrap() == block);
|
||||
if !lr.reaches_use(inst, block, &self.func.layout) {
|
||||
return errors.fatal((inst, format!("{} is not live at this use", val)));
|
||||
}
|
||||
|
||||
// A legal instruction is not allowed to depend on ghost values.
|
||||
if encoding.is_legal() && lr.affinity.is_unassigned() {
|
||||
return errors.fatal((
|
||||
inst,
|
||||
format!(
|
||||
"{} is a ghost value used by a real [{}] instruction",
|
||||
val,
|
||||
self.isa.encoding_info().display(encoding),
|
||||
),
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Check the integrity of the live range `lr`.
|
||||
fn check_lr(
|
||||
&self,
|
||||
def: ProgramPoint,
|
||||
val: Value,
|
||||
lr: &LiveRange,
|
||||
errors: &mut VerifierErrors,
|
||||
) -> VerifierStepResult<()> {
|
||||
let l = &self.func.layout;
|
||||
|
||||
let loc: AnyEntity = match def.into() {
|
||||
ExpandedProgramPoint::Block(e) => e.into(),
|
||||
ExpandedProgramPoint::Inst(i) => i.into(),
|
||||
};
|
||||
if lr.def() != def {
|
||||
return errors.fatal((
|
||||
loc,
|
||||
format!("Wrong live range def ({}) for {}", lr.def(), val),
|
||||
));
|
||||
}
|
||||
if lr.is_dead() {
|
||||
if !lr.is_local() {
|
||||
return errors.fatal((loc, format!("Dead live range {} should be local", val)));
|
||||
} else {
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
let def_block = match def.into() {
|
||||
ExpandedProgramPoint::Block(e) => e,
|
||||
ExpandedProgramPoint::Inst(i) => l.inst_block(i).unwrap(),
|
||||
};
|
||||
match lr.def_local_end().into() {
|
||||
ExpandedProgramPoint::Block(e) => {
|
||||
return errors.fatal((
|
||||
loc,
|
||||
format!("Def local range for {} can't end at {}", val, e),
|
||||
));
|
||||
}
|
||||
ExpandedProgramPoint::Inst(i) => {
|
||||
if self.func.layout.inst_block(i) != Some(def_block) {
|
||||
return errors
|
||||
.fatal((loc, format!("Def local end for {} in wrong block", val)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Now check the live-in intervals against the CFG.
|
||||
for (mut block, end) in lr.liveins() {
|
||||
if !l.is_block_inserted(block) {
|
||||
return errors.fatal((
|
||||
loc,
|
||||
format!("{} livein at {} which is not in the layout", val, block),
|
||||
));
|
||||
}
|
||||
let end_block = match l.inst_block(end) {
|
||||
Some(e) => e,
|
||||
None => {
|
||||
return errors.fatal((
|
||||
loc,
|
||||
format!(
|
||||
"{} livein for {} ends at {} which is not in the layout",
|
||||
val, block, end
|
||||
),
|
||||
));
|
||||
}
|
||||
};
|
||||
|
||||
// Check all the blocks in the interval independently.
|
||||
loop {
|
||||
// If `val` is live-in at `block`, it must be live at all the predecessors.
|
||||
for BlockPredecessor { inst: pred, block } in self.cfg.pred_iter(block) {
|
||||
if !lr.reaches_use(pred, block, &self.func.layout) {
|
||||
return errors.fatal((
|
||||
pred,
|
||||
format!(
|
||||
"{} is live in to {} but not live at predecessor",
|
||||
val, block
|
||||
),
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
if block == end_block {
|
||||
break;
|
||||
}
|
||||
block = match l.next_block(block) {
|
||||
Some(e) => e,
|
||||
None => {
|
||||
return errors.fatal((
|
||||
loc,
|
||||
format!("end of {} livein ({}) never reached", val, end_block),
|
||||
));
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -1,399 +0,0 @@
|
||||
//! Verify value locations.
|
||||
|
||||
use crate::flowgraph::ControlFlowGraph;
|
||||
use crate::ir;
|
||||
use crate::isa;
|
||||
use crate::regalloc::liveness::Liveness;
|
||||
use crate::regalloc::RegDiversions;
|
||||
use crate::timing;
|
||||
use crate::verifier::{VerifierErrors, VerifierStepResult};
|
||||
|
||||
/// Verify value locations for `func`.
|
||||
///
|
||||
/// After register allocation, every value must be assigned to a location - either a register or a
|
||||
/// stack slot. These locations must be compatible with the constraints described by the
|
||||
/// instruction encoding recipes.
|
||||
///
|
||||
/// Values can be temporarily diverted to a different location by using the `regmove`, `regspill`,
|
||||
/// and `regfill` instructions, but only inside a block.
|
||||
///
|
||||
/// If a liveness analysis is provided, it is used to verify that there are no active register
|
||||
/// diversions across control flow edges.
|
||||
pub fn verify_locations(
|
||||
isa: &dyn isa::TargetIsa,
|
||||
func: &ir::Function,
|
||||
cfg: &ControlFlowGraph,
|
||||
liveness: Option<&Liveness>,
|
||||
errors: &mut VerifierErrors,
|
||||
) -> VerifierStepResult<()> {
|
||||
let _tt = timing::verify_locations();
|
||||
let verifier = LocationVerifier {
|
||||
isa,
|
||||
func,
|
||||
reginfo: isa.register_info(),
|
||||
encinfo: isa.encoding_info(),
|
||||
cfg,
|
||||
liveness,
|
||||
};
|
||||
verifier.check_constraints(errors)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
struct LocationVerifier<'a> {
|
||||
isa: &'a dyn isa::TargetIsa,
|
||||
func: &'a ir::Function,
|
||||
reginfo: isa::RegInfo,
|
||||
encinfo: isa::EncInfo,
|
||||
cfg: &'a ControlFlowGraph,
|
||||
liveness: Option<&'a Liveness>,
|
||||
}
|
||||
|
||||
impl<'a> LocationVerifier<'a> {
|
||||
/// Check that the assigned value locations match the operand constraints of their uses.
|
||||
fn check_constraints(&self, errors: &mut VerifierErrors) -> VerifierStepResult<()> {
|
||||
let dfg = &self.func.dfg;
|
||||
let mut divert = RegDiversions::new();
|
||||
|
||||
for block in self.func.layout.blocks() {
|
||||
divert.at_block(&self.func.entry_diversions, block);
|
||||
|
||||
let mut is_after_branch = false;
|
||||
for inst in self.func.layout.block_insts(block) {
|
||||
let enc = self.func.encodings[inst];
|
||||
|
||||
if enc.is_legal() {
|
||||
self.check_enc_constraints(inst, enc, &divert, errors)?
|
||||
} else {
|
||||
self.check_ghost_results(inst, errors)?;
|
||||
}
|
||||
|
||||
if let Some(sig) = dfg.call_signature(inst) {
|
||||
self.check_call_abi(inst, sig, &divert, errors)?;
|
||||
}
|
||||
|
||||
let opcode = dfg[inst].opcode();
|
||||
if opcode.is_return() {
|
||||
self.check_return_abi(inst, &divert, errors)?;
|
||||
} else if opcode.is_branch() && !divert.is_empty() {
|
||||
self.check_cfg_edges(inst, &mut divert, is_after_branch, errors)?;
|
||||
}
|
||||
|
||||
self.update_diversions(inst, &mut divert, errors)?;
|
||||
is_after_branch = opcode.is_branch();
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Check encoding constraints against the current value locations.
|
||||
fn check_enc_constraints(
|
||||
&self,
|
||||
inst: ir::Inst,
|
||||
enc: isa::Encoding,
|
||||
divert: &RegDiversions,
|
||||
errors: &mut VerifierErrors,
|
||||
) -> VerifierStepResult<()> {
|
||||
let constraints = self
|
||||
.encinfo
|
||||
.operand_constraints(enc)
|
||||
.expect("check_enc_constraints requires a legal encoding");
|
||||
|
||||
if constraints.satisfied(inst, divert, self.func) {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// TODO: We could give a better error message here.
|
||||
errors.fatal((
|
||||
inst,
|
||||
format!(
|
||||
"{} constraints not satisfied in: {}\n{}",
|
||||
self.encinfo.display(enc),
|
||||
self.func.dfg.display_inst(inst, self.isa),
|
||||
self.func.display(self.isa),
|
||||
),
|
||||
))
|
||||
}
|
||||
|
||||
/// Check that the result values produced by a ghost instruction are not assigned a value
|
||||
/// location.
|
||||
fn check_ghost_results(
|
||||
&self,
|
||||
inst: ir::Inst,
|
||||
errors: &mut VerifierErrors,
|
||||
) -> VerifierStepResult<()> {
|
||||
let results = self.func.dfg.inst_results(inst);
|
||||
|
||||
for &res in results {
|
||||
let loc = self.func.locations[res];
|
||||
if loc.is_assigned() {
|
||||
return errors.fatal((
|
||||
inst,
|
||||
format!(
|
||||
"ghost result {} value must not have a location ({}).",
|
||||
res,
|
||||
loc.display(&self.reginfo)
|
||||
),
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Check the ABI argument and result locations for a call.
|
||||
fn check_call_abi(
|
||||
&self,
|
||||
inst: ir::Inst,
|
||||
sig: ir::SigRef,
|
||||
divert: &RegDiversions,
|
||||
errors: &mut VerifierErrors,
|
||||
) -> VerifierStepResult<()> {
|
||||
let sig = &self.func.dfg.signatures[sig];
|
||||
let varargs = self.func.dfg.inst_variable_args(inst);
|
||||
let results = self.func.dfg.inst_results(inst);
|
||||
|
||||
for (abi, &value) in sig.params.iter().zip(varargs) {
|
||||
self.check_abi_location(
|
||||
inst,
|
||||
value,
|
||||
abi,
|
||||
divert.get(value, &self.func.locations),
|
||||
ir::StackSlotKind::OutgoingArg,
|
||||
errors,
|
||||
)?;
|
||||
}
|
||||
|
||||
for (abi, &value) in sig.returns.iter().zip(results) {
|
||||
self.check_abi_location(
|
||||
inst,
|
||||
value,
|
||||
abi,
|
||||
self.func.locations[value],
|
||||
ir::StackSlotKind::OutgoingArg,
|
||||
errors,
|
||||
)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Check the ABI argument locations for a return.
|
||||
fn check_return_abi(
|
||||
&self,
|
||||
inst: ir::Inst,
|
||||
divert: &RegDiversions,
|
||||
errors: &mut VerifierErrors,
|
||||
) -> VerifierStepResult<()> {
|
||||
let sig = &self.func.signature;
|
||||
let varargs = self.func.dfg.inst_variable_args(inst);
|
||||
|
||||
for (abi, &value) in sig.returns.iter().zip(varargs) {
|
||||
self.check_abi_location(
|
||||
inst,
|
||||
value,
|
||||
abi,
|
||||
divert.get(value, &self.func.locations),
|
||||
ir::StackSlotKind::IncomingArg,
|
||||
errors,
|
||||
)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Check a single ABI location.
|
||||
fn check_abi_location(
|
||||
&self,
|
||||
inst: ir::Inst,
|
||||
value: ir::Value,
|
||||
abi: &ir::AbiParam,
|
||||
loc: ir::ValueLoc,
|
||||
want_kind: ir::StackSlotKind,
|
||||
errors: &mut VerifierErrors,
|
||||
) -> VerifierStepResult<()> {
|
||||
match abi.location {
|
||||
ir::ArgumentLoc::Unassigned => {}
|
||||
ir::ArgumentLoc::Reg(reg) => {
|
||||
if loc != ir::ValueLoc::Reg(reg) {
|
||||
return errors.fatal((
|
||||
inst,
|
||||
format!(
|
||||
"ABI expects {} in {}, got {}",
|
||||
value,
|
||||
abi.location.display(&self.reginfo),
|
||||
loc.display(&self.reginfo),
|
||||
),
|
||||
));
|
||||
}
|
||||
}
|
||||
ir::ArgumentLoc::Stack(offset) => {
|
||||
if let ir::ValueLoc::Stack(ss) = loc {
|
||||
let slot = &self.func.stack_slots[ss];
|
||||
if slot.kind != want_kind {
|
||||
return errors.fatal((
|
||||
inst,
|
||||
format!(
|
||||
"call argument {} should be in a {} slot, but {} is {}",
|
||||
value, want_kind, ss, slot.kind
|
||||
),
|
||||
));
|
||||
}
|
||||
if slot.offset.unwrap() != offset {
|
||||
return errors.fatal((
|
||||
inst,
|
||||
format!(
|
||||
"ABI expects {} at stack offset {}, but {} is at {}",
|
||||
value,
|
||||
offset,
|
||||
ss,
|
||||
slot.offset.unwrap()
|
||||
),
|
||||
));
|
||||
}
|
||||
} else {
|
||||
return errors.fatal((
|
||||
inst,
|
||||
format!(
|
||||
"ABI expects {} at stack offset {}, got {}",
|
||||
value,
|
||||
offset,
|
||||
loc.display(&self.reginfo)
|
||||
),
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Update diversions to reflect the current instruction and check their consistency.
|
||||
fn update_diversions(
|
||||
&self,
|
||||
inst: ir::Inst,
|
||||
divert: &mut RegDiversions,
|
||||
errors: &mut VerifierErrors,
|
||||
) -> VerifierStepResult<()> {
|
||||
let (arg, src) = match self.func.dfg[inst] {
|
||||
ir::InstructionData::RegMove { arg, src, .. }
|
||||
| ir::InstructionData::RegSpill { arg, src, .. } => (arg, ir::ValueLoc::Reg(src)),
|
||||
ir::InstructionData::RegFill { arg, src, .. } => (arg, ir::ValueLoc::Stack(src)),
|
||||
_ => return Ok(()),
|
||||
};
|
||||
|
||||
if let Some(d) = divert.diversion(arg) {
|
||||
if d.to != src {
|
||||
return errors.fatal((
|
||||
inst,
|
||||
format!(
|
||||
"inconsistent with current diversion to {}",
|
||||
d.to.display(&self.reginfo)
|
||||
),
|
||||
));
|
||||
}
|
||||
} else if self.func.locations[arg] != src {
|
||||
return errors.fatal((
|
||||
inst,
|
||||
format!(
|
||||
"inconsistent with global location {} ({})",
|
||||
self.func.locations[arg].display(&self.reginfo),
|
||||
self.func.dfg.display_inst(inst, None)
|
||||
),
|
||||
));
|
||||
}
|
||||
|
||||
divert.apply(&self.func.dfg[inst]);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// We have active diversions before a branch. Make sure none of the diverted values are live
|
||||
/// on the outgoing CFG edges.
|
||||
fn check_cfg_edges(
|
||||
&self,
|
||||
inst: ir::Inst,
|
||||
divert: &mut RegDiversions,
|
||||
is_after_branch: bool,
|
||||
errors: &mut VerifierErrors,
|
||||
) -> VerifierStepResult<()> {
|
||||
use crate::ir::instructions::BranchInfo::*;
|
||||
let dfg = &self.func.dfg;
|
||||
let branch_kind = dfg.analyze_branch(inst);
|
||||
|
||||
// We can only check CFG edges if we have a liveness analysis.
|
||||
let liveness = match self.liveness {
|
||||
Some(l) => l,
|
||||
None => return Ok(()),
|
||||
};
|
||||
|
||||
match branch_kind {
|
||||
NotABranch => panic!(
|
||||
"No branch information for {}",
|
||||
dfg.display_inst(inst, self.isa)
|
||||
),
|
||||
SingleDest(block, _) => {
|
||||
let unique_predecessor = self.cfg.pred_iter(block).count() == 1;
|
||||
let mut val_to_remove = vec![];
|
||||
for (&value, d) in divert.iter() {
|
||||
let lr = &liveness[value];
|
||||
if is_after_branch && unique_predecessor {
|
||||
// Forward diversions based on the targeted branch.
|
||||
if !lr.is_livein(block, &self.func.layout) {
|
||||
val_to_remove.push(value)
|
||||
}
|
||||
} else if lr.is_livein(block, &self.func.layout) {
|
||||
return errors.fatal((
|
||||
inst,
|
||||
format!(
|
||||
"SingleDest: {} is diverted to {} and live in to {}",
|
||||
value,
|
||||
d.to.display(&self.reginfo),
|
||||
block,
|
||||
),
|
||||
));
|
||||
}
|
||||
}
|
||||
if is_after_branch && unique_predecessor {
|
||||
for val in val_to_remove.into_iter() {
|
||||
divert.remove(val);
|
||||
}
|
||||
debug_assert!(divert.check_block_entry(&self.func.entry_diversions, block));
|
||||
}
|
||||
}
|
||||
Table(jt, block) => {
|
||||
for (&value, d) in divert.iter() {
|
||||
let lr = &liveness[value];
|
||||
if let Some(block) = block {
|
||||
if lr.is_livein(block, &self.func.layout) {
|
||||
return errors.fatal((
|
||||
inst,
|
||||
format!(
|
||||
"Table.default: {} is diverted to {} and live in to {}",
|
||||
value,
|
||||
d.to.display(&self.reginfo),
|
||||
block,
|
||||
),
|
||||
));
|
||||
}
|
||||
}
|
||||
for block in self.func.jump_tables[jt].iter() {
|
||||
if lr.is_livein(*block, &self.func.layout) {
|
||||
return errors.fatal((
|
||||
inst,
|
||||
format!(
|
||||
"Table.case: {} is diverted to {} and live in to {}",
|
||||
value,
|
||||
d.to.display(&self.reginfo),
|
||||
block,
|
||||
),
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -78,16 +78,13 @@ use alloc::collections::BTreeSet;
|
||||
use alloc::string::{String, ToString};
|
||||
use alloc::vec::Vec;
|
||||
use core::cmp::Ordering;
|
||||
use core::fmt::{self, Display, Formatter, Write};
|
||||
use core::fmt::{self, Display, Formatter};
|
||||
|
||||
pub use self::cssa::verify_cssa;
|
||||
pub use self::liveness::verify_liveness;
|
||||
pub use self::locations::verify_locations;
|
||||
|
||||
mod cssa;
|
||||
mod flags;
|
||||
mod liveness;
|
||||
mod locations;
|
||||
mod virtregs;
|
||||
|
||||
/// A verifier error.
|
||||
#[derive(Debug, PartialEq, Eq, Clone)]
|
||||
@@ -1763,145 +1760,6 @@ impl<'a> Verifier<'a> {
|
||||
errors.as_result()
|
||||
}
|
||||
|
||||
/// If the verifier has been set up with an ISA, make sure that the recorded encoding for the
|
||||
/// instruction (if any) matches how the ISA would encode it.
|
||||
fn verify_encoding(&self, inst: Inst, errors: &mut VerifierErrors) -> VerifierStepResult<()> {
|
||||
// When the encodings table is empty, we don't require any instructions to be encoded.
|
||||
//
|
||||
// Once some instructions are encoded, we require all side-effecting instructions to have a
|
||||
// legal encoding.
|
||||
if self.func.encodings.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let isa = match self.isa {
|
||||
Some(isa) => isa,
|
||||
None => return Ok(()),
|
||||
};
|
||||
|
||||
let encoding = self.func.encodings[inst];
|
||||
if encoding.is_legal() {
|
||||
if self.func.dfg[inst].opcode().is_ghost() {
|
||||
return errors.nonfatal((
|
||||
inst,
|
||||
self.context(inst),
|
||||
format!(
|
||||
"Ghost instruction has an encoding: {}",
|
||||
isa.encoding_info().display(encoding),
|
||||
),
|
||||
));
|
||||
}
|
||||
|
||||
let mut encodings = isa
|
||||
.legal_encodings(
|
||||
&self.func,
|
||||
&self.func.dfg[inst],
|
||||
self.func.dfg.ctrl_typevar(inst),
|
||||
)
|
||||
.peekable();
|
||||
|
||||
if encodings.peek().is_none() {
|
||||
return errors.nonfatal((
|
||||
inst,
|
||||
self.context(inst),
|
||||
format!(
|
||||
"Instruction failed to re-encode {}",
|
||||
isa.encoding_info().display(encoding),
|
||||
),
|
||||
));
|
||||
}
|
||||
|
||||
let has_valid_encoding = encodings.any(|possible_enc| encoding == possible_enc);
|
||||
|
||||
if !has_valid_encoding {
|
||||
let mut possible_encodings = String::new();
|
||||
let mut multiple_encodings = false;
|
||||
|
||||
for enc in isa.legal_encodings(
|
||||
&self.func,
|
||||
&self.func.dfg[inst],
|
||||
self.func.dfg.ctrl_typevar(inst),
|
||||
) {
|
||||
if !possible_encodings.is_empty() {
|
||||
possible_encodings.push_str(", ");
|
||||
multiple_encodings = true;
|
||||
}
|
||||
possible_encodings
|
||||
.write_fmt(format_args!("{}", isa.encoding_info().display(enc)))
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
return errors.nonfatal((
|
||||
inst,
|
||||
self.context(inst),
|
||||
format!(
|
||||
"encoding {} should be {}{}",
|
||||
isa.encoding_info().display(encoding),
|
||||
if multiple_encodings { "one of: " } else { "" },
|
||||
possible_encodings,
|
||||
),
|
||||
));
|
||||
}
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Instruction is not encoded, so it is a ghost instruction.
|
||||
// Instructions with side effects are not allowed to be ghost instructions.
|
||||
let opcode = self.func.dfg[inst].opcode();
|
||||
|
||||
// The `fallthrough`, `fallthrough_return`, and `safepoint` instructions are not required
|
||||
// to have an encoding.
|
||||
if opcode == Opcode::Fallthrough
|
||||
|| opcode == Opcode::FallthroughReturn
|
||||
|| opcode == Opcode::Safepoint
|
||||
{
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Check if this opcode must be encoded.
|
||||
let mut needs_enc = None;
|
||||
if opcode.is_branch() {
|
||||
needs_enc = Some("Branch");
|
||||
} else if opcode.is_call() {
|
||||
needs_enc = Some("Call");
|
||||
} else if opcode.is_return() {
|
||||
needs_enc = Some("Return");
|
||||
} else if opcode.can_store() {
|
||||
needs_enc = Some("Store");
|
||||
} else if opcode.can_trap() {
|
||||
needs_enc = Some("Trapping instruction");
|
||||
} else if opcode.other_side_effects() {
|
||||
needs_enc = Some("Instruction with side effects");
|
||||
}
|
||||
|
||||
if let Some(text) = needs_enc {
|
||||
// This instruction needs an encoding, so generate an error.
|
||||
// Provide the ISA default encoding as a hint.
|
||||
match self.func.encode(inst, isa) {
|
||||
Ok(enc) => {
|
||||
return errors.nonfatal((
|
||||
inst,
|
||||
self.context(inst),
|
||||
format!(
|
||||
"{} must have an encoding (e.g., {})))",
|
||||
text,
|
||||
isa.encoding_info().display(enc),
|
||||
),
|
||||
));
|
||||
}
|
||||
Err(_) => {
|
||||
return errors.nonfatal((
|
||||
inst,
|
||||
self.context(inst),
|
||||
format!("{} must have an encoding", text),
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn immediate_constraints(
|
||||
&self,
|
||||
inst: Inst,
|
||||
@@ -2034,14 +1892,13 @@ impl<'a> Verifier<'a> {
|
||||
self.instruction_integrity(inst, errors)?;
|
||||
self.verify_safepoint_unused(inst, errors)?;
|
||||
self.typecheck(inst, errors)?;
|
||||
self.verify_encoding(inst, errors)?;
|
||||
self.immediate_constraints(inst, errors)?;
|
||||
}
|
||||
|
||||
self.encodable_as_bb(block, errors)?;
|
||||
}
|
||||
|
||||
verify_flags(self.func, &self.expected_cfg, self.isa, errors)?;
|
||||
verify_flags(self.func, &self.expected_cfg, errors)?;
|
||||
|
||||
if !errors.is_empty() {
|
||||
log::warn!(
|
||||
|
||||
@@ -6,13 +6,10 @@
|
||||
use crate::entity::SecondaryMap;
|
||||
use crate::ir::entities::AnyEntity;
|
||||
use crate::ir::{
|
||||
Block, DataFlowGraph, DisplayFunctionAnnotations, Function, Inst, SigRef, Type, Value,
|
||||
ValueDef, ValueLoc,
|
||||
Block, DataFlowGraph, DisplayFunctionAnnotations, Function, Inst, SigRef, Type, Value, ValueDef,
|
||||
};
|
||||
use crate::isa::{RegInfo, TargetIsa};
|
||||
use crate::packed_option::ReservedValue;
|
||||
use crate::value_label::{LabelValueLoc, ValueLabelsRanges};
|
||||
use crate::HashSet;
|
||||
use alloc::string::String;
|
||||
use alloc::vec::Vec;
|
||||
use core::fmt::{self, Write};
|
||||
@@ -278,49 +275,6 @@ pub fn write_block_header(
|
||||
writeln!(w, "):")
|
||||
}
|
||||
|
||||
fn write_valueloc(w: &mut dyn Write, loc: LabelValueLoc, regs: &RegInfo) -> fmt::Result {
|
||||
match loc {
|
||||
LabelValueLoc::ValueLoc(ValueLoc::Reg(r)) => write!(w, "{}", regs.display_regunit(r)),
|
||||
LabelValueLoc::ValueLoc(ValueLoc::Stack(ss)) => write!(w, "{}", ss),
|
||||
LabelValueLoc::ValueLoc(ValueLoc::Unassigned) => write!(w, "?"),
|
||||
LabelValueLoc::Reg(r) => write!(w, "{:?}", r),
|
||||
LabelValueLoc::SPOffset(off) => write!(w, "[sp+{}]", off),
|
||||
}
|
||||
}
|
||||
|
||||
fn write_value_range_markers(
|
||||
w: &mut dyn Write,
|
||||
val_ranges: &ValueLabelsRanges,
|
||||
regs: &RegInfo,
|
||||
offset: u32,
|
||||
indent: usize,
|
||||
) -> fmt::Result {
|
||||
let mut result = String::new();
|
||||
let mut shown = HashSet::new();
|
||||
for (val, rng) in val_ranges {
|
||||
for i in (0..rng.len()).rev() {
|
||||
if rng[i].start == offset {
|
||||
write!(&mut result, " {}@", val)?;
|
||||
write_valueloc(&mut result, rng[i].loc, regs)?;
|
||||
shown.insert(val);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (val, rng) in val_ranges {
|
||||
for i in (0..rng.len()).rev() {
|
||||
if rng[i].end == offset && !shown.contains(val) {
|
||||
write!(&mut result, " {}\u{2620}", val)?;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if !result.is_empty() {
|
||||
writeln!(w, ";{1:0$}; {2}", indent + 24, "", result)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn decorate_block<FW: FuncWriter>(
|
||||
func_w: &mut FW,
|
||||
w: &mut dyn Write,
|
||||
@@ -329,12 +283,8 @@ fn decorate_block<FW: FuncWriter>(
|
||||
annotations: &DisplayFunctionAnnotations,
|
||||
block: Block,
|
||||
) -> fmt::Result {
|
||||
// Indent all instructions if any encodings are present.
|
||||
let indent = if func.encodings.is_empty() && func.srclocs.is_empty() {
|
||||
4
|
||||
} else {
|
||||
36
|
||||
};
|
||||
// Indent all instructions if any srclocs are present.
|
||||
let indent = if func.srclocs.is_empty() { 4 } else { 36 };
|
||||
let isa = annotations.isa;
|
||||
|
||||
func_w.write_block_header(w, func, isa, block, indent)?;
|
||||
@@ -342,22 +292,6 @@ fn decorate_block<FW: FuncWriter>(
|
||||
write_value_aliases(w, aliases, a, indent)?;
|
||||
}
|
||||
|
||||
if let Some(isa) = isa {
|
||||
if !func.offsets.is_empty() {
|
||||
let encinfo = isa.encoding_info();
|
||||
let regs = &isa.register_info();
|
||||
for (offset, inst, size) in func.inst_offsets(block, &encinfo) {
|
||||
func_w.write_instruction(w, func, aliases, Some(isa), inst, indent)?;
|
||||
if size > 0 {
|
||||
if let Some(val_ranges) = annotations.value_ranges {
|
||||
write_value_range_markers(w, val_ranges, regs, offset + size, indent)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
|
||||
for inst in func.layout.block_insts(block) {
|
||||
func_w.write_instruction(w, func, aliases, isa, inst, indent)?;
|
||||
}
|
||||
@@ -438,23 +372,6 @@ fn write_instruction(
|
||||
write!(s, "{} ", srcloc)?;
|
||||
}
|
||||
|
||||
// Write out encoding info.
|
||||
if let Some(enc) = func.encodings.get(inst).cloned() {
|
||||
if let Some(isa) = isa {
|
||||
write!(s, "[{}", isa.encoding_info().display(enc))?;
|
||||
// Write value locations, if we have them.
|
||||
if !func.locations.is_empty() {
|
||||
let regs = isa.register_info();
|
||||
for &r in func.dfg.inst_results(inst) {
|
||||
write!(s, ",{}", func.locations[r].display(®s))?
|
||||
}
|
||||
}
|
||||
write!(s, "] ")?;
|
||||
} else {
|
||||
write!(s, "[{}] ", enc)?;
|
||||
}
|
||||
}
|
||||
|
||||
// Write out prefix and indent the instruction.
|
||||
write!(w, "{1:0$}", indent, s)?;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user