394 lines
15 KiB
Rust
394 lines
15 KiB
Rust
//! Branch relaxation and offset computation.
|
|
//!
|
|
//! # block header offsets
|
|
//!
|
|
//! Before we can generate binary machine code for branch instructions, we need to know the final
|
|
//! offsets of all the block headers in the function. This information is encoded in the
|
|
//! `func.offsets` table.
|
|
//!
|
|
//! # Branch relaxation
|
|
//!
|
|
//! Branch relaxation is the process of ensuring that all branches in the function have enough
|
|
//! range to encode their destination. It is common to have multiple branch encodings in an ISA.
|
|
//! For example, x86 branches can have either an 8-bit or a 32-bit displacement.
|
|
//!
|
|
//! On RISC architectures, it can happen that conditional branches have a shorter range than
|
|
//! unconditional branches:
|
|
//!
|
|
//! ```clif
|
|
//! brz v1, block17
|
|
//! ```
|
|
//!
|
|
//! can be transformed into:
|
|
//!
|
|
//! ```clif
|
|
//! brnz v1, block23
|
|
//! jump block17
|
|
//! block23:
|
|
//! ```
|
|
|
|
use crate::binemit::{CodeInfo, CodeOffset};
|
|
use crate::cursor::{Cursor, FuncCursor};
|
|
use crate::dominator_tree::DominatorTree;
|
|
use crate::flowgraph::ControlFlowGraph;
|
|
use crate::ir::{Block, Function, Inst, InstructionData, Opcode, Value, ValueList};
|
|
use crate::isa::{EncInfo, TargetIsa};
|
|
use crate::iterators::IteratorExtras;
|
|
use crate::regalloc::RegDiversions;
|
|
use crate::timing;
|
|
use crate::CodegenResult;
|
|
use core::convert::TryFrom;
|
|
use log::debug;
|
|
|
|
/// Relax branches and compute the final layout of block headers in `func`.
|
|
///
|
|
/// Fill in the `func.offsets` table so the function is ready for binary emission.
|
|
pub fn relax_branches(
|
|
func: &mut Function,
|
|
_cfg: &mut ControlFlowGraph,
|
|
_domtree: &mut DominatorTree,
|
|
isa: &dyn TargetIsa,
|
|
) -> CodegenResult<CodeInfo> {
|
|
let _tt = timing::relax_branches();
|
|
|
|
let encinfo = isa.encoding_info();
|
|
|
|
// Clear all offsets so we can recognize blocks that haven't been visited yet.
|
|
func.offsets.clear();
|
|
func.offsets.resize(func.dfg.num_blocks());
|
|
|
|
// Start by removing redundant jumps.
|
|
fold_redundant_jumps(func, _cfg, _domtree);
|
|
|
|
// Convert jumps to fallthrough instructions where possible.
|
|
fallthroughs(func);
|
|
|
|
let mut offset = 0;
|
|
let mut divert = RegDiversions::new();
|
|
|
|
// First, compute initial offsets for every block.
|
|
{
|
|
let mut cur = FuncCursor::new(func);
|
|
while let Some(block) = cur.next_block() {
|
|
divert.at_block(&cur.func.entry_diversions, block);
|
|
cur.func.offsets[block] = offset;
|
|
while let Some(inst) = cur.next_inst() {
|
|
divert.apply(&cur.func.dfg[inst]);
|
|
let enc = cur.func.encodings[inst];
|
|
offset += encinfo.byte_size(enc, inst, &divert, &cur.func);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Then, run the relaxation algorithm until it converges.
|
|
let mut go_again = true;
|
|
while go_again {
|
|
go_again = false;
|
|
offset = 0;
|
|
|
|
// Visit all instructions in layout order.
|
|
let mut cur = FuncCursor::new(func);
|
|
while let Some(block) = cur.next_block() {
|
|
divert.at_block(&cur.func.entry_diversions, block);
|
|
|
|
// Record the offset for `block` and make sure we iterate until offsets are stable.
|
|
if cur.func.offsets[block] != offset {
|
|
cur.func.offsets[block] = offset;
|
|
go_again = true;
|
|
}
|
|
|
|
while let Some(inst) = cur.next_inst() {
|
|
divert.apply(&cur.func.dfg[inst]);
|
|
|
|
let enc = cur.func.encodings[inst];
|
|
|
|
// See if this is a branch has a range and a destination, and if the target is in
|
|
// range.
|
|
if let Some(range) = encinfo.branch_range(enc) {
|
|
if let Some(dest) = cur.func.dfg[inst].branch_destination() {
|
|
let dest_offset = cur.func.offsets[dest];
|
|
if !range.contains(offset, dest_offset) {
|
|
offset +=
|
|
relax_branch(&mut cur, &divert, offset, dest_offset, &encinfo, isa);
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
|
|
offset += encinfo.byte_size(enc, inst, &divert, &cur.func);
|
|
}
|
|
}
|
|
}
|
|
|
|
let code_size = offset;
|
|
let jumptables = offset;
|
|
|
|
for (jt, jt_data) in func.jump_tables.iter() {
|
|
func.jt_offsets[jt] = offset;
|
|
// TODO: this should be computed based on the min size needed to hold the furthest branch.
|
|
offset += jt_data.len() as u32 * 4;
|
|
}
|
|
|
|
let jumptables_size = offset - jumptables;
|
|
let rodata = offset;
|
|
|
|
for constant in func.dfg.constants.entries_mut() {
|
|
constant.set_offset(offset);
|
|
offset +=
|
|
u32::try_from(constant.len()).expect("Constants must have a length that fits in a u32")
|
|
}
|
|
|
|
let rodata_size = offset - rodata;
|
|
|
|
Ok(CodeInfo {
|
|
code_size,
|
|
jumptables_size,
|
|
rodata_size,
|
|
total_size: offset,
|
|
})
|
|
}
|
|
|
|
/// Folds an instruction if it is a redundant jump.
|
|
/// Returns whether folding was performed (which invalidates the CFG).
|
|
fn try_fold_redundant_jump(
|
|
func: &mut Function,
|
|
cfg: &mut ControlFlowGraph,
|
|
block: Block,
|
|
first_inst: Inst,
|
|
) -> bool {
|
|
let first_dest = match func.dfg[first_inst].branch_destination() {
|
|
Some(block) => block, // The instruction was a single-target branch.
|
|
None => {
|
|
return false; // The instruction was either multi-target or not a branch.
|
|
}
|
|
};
|
|
|
|
// For the moment, only attempt to fold a branch to a block that is parameterless.
|
|
// These blocks are mainly produced by critical edge splitting.
|
|
//
|
|
// TODO: Allow folding blocks that define SSA values and function as phi nodes.
|
|
if func.dfg.num_block_params(first_dest) != 0 {
|
|
return false;
|
|
}
|
|
|
|
// Look at the first instruction of the first branch's destination.
|
|
// If it is an unconditional branch, maybe the second jump can be bypassed.
|
|
let second_inst = func.layout.first_inst(first_dest).expect("Instructions");
|
|
if func.dfg[second_inst].opcode() != Opcode::Jump {
|
|
return false;
|
|
}
|
|
|
|
// Now we need to fix up first_inst's block parameters to match second_inst's,
|
|
// without changing the branch-specific arguments.
|
|
//
|
|
// The intermediary block is allowed to reference any SSA value that dominates it,
|
|
// but that SSA value may not necessarily also dominate the instruction that's
|
|
// being patched.
|
|
|
|
// Get the arguments and parameters passed by the first branch.
|
|
let num_fixed = func.dfg[first_inst]
|
|
.opcode()
|
|
.constraints()
|
|
.num_fixed_value_arguments();
|
|
let (first_args, first_params) = func.dfg[first_inst]
|
|
.arguments(&func.dfg.value_lists)
|
|
.split_at(num_fixed);
|
|
|
|
// Get the parameters passed by the second jump.
|
|
let num_fixed = func.dfg[second_inst]
|
|
.opcode()
|
|
.constraints()
|
|
.num_fixed_value_arguments();
|
|
let (_, second_params) = func.dfg[second_inst]
|
|
.arguments(&func.dfg.value_lists)
|
|
.split_at(num_fixed);
|
|
let mut second_params = second_params.to_vec(); // Clone for rewriting below.
|
|
|
|
// For each parameter passed by the second jump, if any of those parameters
|
|
// was a block parameter, rewrite it to refer to the value that the first jump
|
|
// passed in its parameters. Otherwise, make sure it dominates first_inst.
|
|
//
|
|
// For example: if we `block0: jump block1(v1)` to `block1(v2): jump block2(v2)`,
|
|
// we want to rewrite the original jump to `jump block2(v1)`.
|
|
let block_params: &[Value] = func.dfg.block_params(first_dest);
|
|
debug_assert!(block_params.len() == first_params.len());
|
|
|
|
for value in second_params.iter_mut() {
|
|
if let Some((n, _)) = block_params.iter().enumerate().find(|(_, &p)| p == *value) {
|
|
// This value was the Nth parameter passed to the second_inst's block.
|
|
// Rewrite it as the Nth parameter passed by first_inst.
|
|
*value = first_params[n];
|
|
}
|
|
}
|
|
|
|
// Build a value list of first_args (unchanged) followed by second_params (rewritten).
|
|
let arguments_vec: alloc::vec::Vec<_> = first_args
|
|
.iter()
|
|
.chain(second_params.iter())
|
|
.copied()
|
|
.collect();
|
|
let value_list = ValueList::from_slice(&arguments_vec, &mut func.dfg.value_lists);
|
|
|
|
func.dfg[first_inst].take_value_list(); // Drop the current list.
|
|
func.dfg[first_inst].put_value_list(value_list); // Put the new list.
|
|
|
|
// Bypass the second jump.
|
|
// This can disconnect the Block containing `second_inst`, to be cleaned up later.
|
|
let second_dest = func.dfg[second_inst].branch_destination().expect("Dest");
|
|
func.change_branch_destination(first_inst, second_dest);
|
|
cfg.recompute_block(func, block);
|
|
|
|
// The previously-intermediary Block may now be unreachable. Update CFG.
|
|
if cfg.pred_iter(first_dest).count() == 0 {
|
|
// Remove all instructions from that block.
|
|
while let Some(inst) = func.layout.first_inst(first_dest) {
|
|
func.layout.remove_inst(inst);
|
|
}
|
|
|
|
// Remove the block...
|
|
cfg.recompute_block(func, first_dest); // ...from predecessor lists.
|
|
func.layout.remove_block(first_dest); // ...from the layout.
|
|
}
|
|
|
|
true
|
|
}
|
|
|
|
/// Redirects `jump` instructions that point to other `jump` instructions to the final destination.
|
|
/// This transformation may orphan some blocks.
|
|
fn fold_redundant_jumps(
|
|
func: &mut Function,
|
|
cfg: &mut ControlFlowGraph,
|
|
domtree: &mut DominatorTree,
|
|
) {
|
|
let mut folded = false;
|
|
|
|
// Postorder iteration guarantees that a chain of jumps is visited from
|
|
// the end of the chain to the start of the chain.
|
|
for &block in domtree.cfg_postorder() {
|
|
// Only proceed if the first terminator instruction is a single-target branch.
|
|
let first_inst = func
|
|
.layout
|
|
.last_inst(block)
|
|
.expect("Block has no terminator");
|
|
folded |= try_fold_redundant_jump(func, cfg, block, first_inst);
|
|
|
|
// Also try the previous instruction.
|
|
if let Some(prev_inst) = func.layout.prev_inst(first_inst) {
|
|
folded |= try_fold_redundant_jump(func, cfg, block, prev_inst);
|
|
}
|
|
}
|
|
|
|
// Folding jumps invalidates the dominator tree.
|
|
if folded {
|
|
domtree.compute(func, cfg);
|
|
}
|
|
}
|
|
|
|
/// Convert `jump` instructions to `fallthrough` instructions where possible and verify that any
|
|
/// existing `fallthrough` instructions are correct.
|
|
fn fallthroughs(func: &mut Function) {
|
|
for (block, succ) in func.layout.blocks().adjacent_pairs() {
|
|
let term = func
|
|
.layout
|
|
.last_inst(block)
|
|
.expect("block has no terminator.");
|
|
if let InstructionData::Jump {
|
|
ref mut opcode,
|
|
destination,
|
|
..
|
|
} = func.dfg[term]
|
|
{
|
|
match *opcode {
|
|
Opcode::Fallthrough => {
|
|
// Somebody used a fall-through instruction before the branch relaxation pass.
|
|
// Make sure it is correct, i.e. the destination is the layout successor.
|
|
debug_assert_eq!(destination, succ, "Illegal fall-through in {}", block)
|
|
}
|
|
Opcode::Jump => {
|
|
// If this is a jump to the successor block, change it to a fall-through.
|
|
if destination == succ {
|
|
*opcode = Opcode::Fallthrough;
|
|
func.encodings[term] = Default::default();
|
|
}
|
|
}
|
|
_ => {}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Relax the branch instruction at `cur` so it can cover the range `offset - dest_offset`.
|
|
///
|
|
/// Return the size of the replacement instructions up to and including the location where `cur` is
|
|
/// left.
|
|
fn relax_branch(
|
|
cur: &mut FuncCursor,
|
|
divert: &RegDiversions,
|
|
offset: CodeOffset,
|
|
dest_offset: CodeOffset,
|
|
encinfo: &EncInfo,
|
|
isa: &dyn TargetIsa,
|
|
) -> CodeOffset {
|
|
let inst = cur.current_inst().unwrap();
|
|
debug!(
|
|
"Relaxing [{}] {} for {:#x}-{:#x} range",
|
|
encinfo.display(cur.func.encodings[inst]),
|
|
cur.func.dfg.display_inst(inst, isa),
|
|
offset,
|
|
dest_offset
|
|
);
|
|
|
|
// Pick the smallest encoding that can handle the branch range.
|
|
let dfg = &cur.func.dfg;
|
|
let ctrl_type = dfg.ctrl_typevar(inst);
|
|
if let Some(enc) = isa
|
|
.legal_encodings(cur.func, &dfg[inst], ctrl_type)
|
|
.filter(|&enc| {
|
|
let range = encinfo.branch_range(enc).expect("Branch with no range");
|
|
if !range.contains(offset, dest_offset) {
|
|
debug!(" trying [{}]: out of range", encinfo.display(enc));
|
|
false
|
|
} else if encinfo.operand_constraints(enc)
|
|
!= encinfo.operand_constraints(cur.func.encodings[inst])
|
|
{
|
|
// Conservatively give up if the encoding has different constraints
|
|
// than the original, so that we don't risk picking a new encoding
|
|
// which the existing operands don't satisfy. We can't check for
|
|
// validity directly because we don't have a RegDiversions active so
|
|
// we don't know which registers are actually in use.
|
|
debug!(" trying [{}]: constraints differ", encinfo.display(enc));
|
|
false
|
|
} else {
|
|
debug!(" trying [{}]: OK", encinfo.display(enc));
|
|
true
|
|
}
|
|
})
|
|
.min_by_key(|&enc| encinfo.byte_size(enc, inst, &divert, &cur.func))
|
|
{
|
|
debug_assert!(enc != cur.func.encodings[inst]);
|
|
cur.func.encodings[inst] = enc;
|
|
return encinfo.byte_size(enc, inst, &divert, &cur.func);
|
|
}
|
|
|
|
// Note: On some RISC ISAs, conditional branches have shorter range than unconditional
|
|
// branches, so one way of extending the range of a conditional branch is to invert its
|
|
// condition and make it branch over an unconditional jump which has the larger range.
|
|
//
|
|
// Splitting the block is problematic this late because there may be register diversions in
|
|
// effect across the conditional branch, and they can't survive the control flow edge to a new
|
|
// block. We have two options for handling that:
|
|
//
|
|
// 1. Set a flag on the new block that indicates it wants the preserve the register diversions of
|
|
// its layout predecessor, or
|
|
// 2. Use an encoding macro for the branch-over-jump pattern so we don't need to split the block.
|
|
//
|
|
// It seems that 1. would allow us to share code among RISC ISAs that need this.
|
|
//
|
|
// We can't allow register diversions to survive from the layout predecessor because the layout
|
|
// predecessor could contain kill points for some values that are live in this block, and
|
|
// diversions are not automatically cancelled when the live range of a value ends.
|
|
|
|
// This assumes solution 2. above:
|
|
panic!("No branch in range for {:#x}-{:#x}", offset, dest_offset);
|
|
}
|