diff --git a/cranelift/codegen/src/binemit/relaxation.rs b/cranelift/codegen/src/binemit/relaxation.rs index 1e84f2f4b9..62f692e557 100644 --- a/cranelift/codegen/src/binemit/relaxation.rs +++ b/cranelift/codegen/src/binemit/relaxation.rs @@ -29,7 +29,9 @@ use crate::binemit::{CodeInfo, CodeOffset}; use crate::cursor::{Cursor, FuncCursor}; -use crate::ir::{Function, InstructionData, Opcode}; +use crate::dominator_tree::DominatorTree; +use crate::flowgraph::ControlFlowGraph; +use crate::ir::{Ebb, Function, Inst, InstructionData, Opcode, Value, ValueList}; use crate::isa::{EncInfo, TargetIsa}; use crate::iterators::IteratorExtras; use crate::regalloc::RegDiversions; @@ -40,7 +42,12 @@ use log::debug; /// Relax branches and compute the final layout of EBB headers in `func`. /// /// Fill in the `func.offsets` table so the function is ready for binary emission. -pub fn relax_branches(func: &mut Function, isa: &dyn TargetIsa) -> CodegenResult { +pub fn relax_branches( + func: &mut Function, + cfg: &mut ControlFlowGraph, + domtree: &mut DominatorTree, + isa: &dyn TargetIsa, +) -> CodegenResult { let _tt = timing::relax_branches(); let encinfo = isa.encoding_info(); @@ -49,7 +56,10 @@ pub fn relax_branches(func: &mut Function, isa: &dyn TargetIsa) -> CodegenResult func.offsets.clear(); func.offsets.resize(func.dfg.num_ebbs()); - // Start by inserting fall through instructions. + // Start by removing redundant jumps. + fold_redundant_jumps(func, cfg, domtree); + + // Convert jumps to fallthrough instructions where possible. fallthroughs(func); let mut offset = 0; @@ -79,7 +89,6 @@ pub fn relax_branches(func: &mut Function, isa: &dyn TargetIsa) -> CodegenResult let mut cur = FuncCursor::new(func); while let Some(ebb) = cur.next_ebb() { divert.clear(); - // Record the offset for `ebb` and make sure we iterate until offsets are stable. if cur.func.offsets[ebb] != offset { cur.func.offsets[ebb] = offset; @@ -134,6 +143,131 @@ pub fn relax_branches(func: &mut Function, isa: &dyn TargetIsa) -> CodegenResult }) } +/// Folds an instruction if it is a redundant jump. +/// Returns whether folding was performed (which invalidates the CFG). +fn try_fold_redundant_jump( + func: &mut Function, + cfg: &mut ControlFlowGraph, + ebb: Ebb, + first_inst: Inst, +) -> bool { + let first_dest = match func.dfg[first_inst].branch_destination() { + Some(ebb) => ebb, // The instruction was a single-target branch. + None => { + return false; // The instruction was either multi-target or not a branch. + } + }; + + // Look at the first instruction of the first branch's destination. + // If it is an unconditional branch, maybe the second jump can be bypassed. + let second_inst = func.layout.first_inst(first_dest).expect("Instructions"); + if func.dfg[second_inst].opcode() != Opcode::Jump { + return false; + } + + // Now we need to fix up first_inst's ebb parameters to match second_inst's, + // without changing the branch-specific arguments. + // + // The intermediary block is allowed to reference any SSA value that dominates it, + // but that SSA value may not necessarily also dominate the instruction that's + // being patched. + + // Get the arguments and parameters passed by the first branch. + let num_fixed = func.dfg[first_inst] + .opcode() + .constraints() + .num_fixed_value_arguments(); + let (first_args, first_params) = func.dfg[first_inst] + .arguments(&func.dfg.value_lists) + .split_at(num_fixed); + + // Get the parameters passed by the second jump. + let num_fixed = func.dfg[second_inst] + .opcode() + .constraints() + .num_fixed_value_arguments(); + let (_, second_params) = func.dfg[second_inst] + .arguments(&func.dfg.value_lists) + .split_at(num_fixed); + let mut second_params = second_params.to_vec(); // Clone for rewriting below. + + // For each parameter passed by the second jump, if any of those parameters + // was a block parameter, rewrite it to refer to the value that the first jump + // passed in its parameters. Otherwise, make sure it dominates first_inst. + // + // For example: if we `ebb0: jump ebb1(v1)` to `ebb1(v2): jump ebb2(v2)`, + // we want to rewrite the original jump to `jump ebb2(v1)`. + let ebb_params: &[Value] = func.dfg.ebb_params(first_dest); + debug_assert!(ebb_params.len() == first_params.len()); + + for value in second_params.iter_mut() { + if let Some((n, _)) = ebb_params.iter().enumerate().find(|(_, &p)| p == *value) { + // This value was the Nth parameter passed to the second_inst's ebb. + // Rewrite it as the Nth parameter passed by first_inst. + *value = first_params[n]; + } + } + + // Build a value list of first_args (unchanged) followed by second_params (rewritten). + let arguments_vec: std::vec::Vec<_> = first_args + .iter() + .chain(second_params.iter()) + .map(|x| *x) + .collect(); + let value_list = ValueList::from_slice(&arguments_vec, &mut func.dfg.value_lists); + + func.dfg[first_inst].take_value_list(); // Drop the current list. + func.dfg[first_inst].put_value_list(value_list); // Put the new list. + + // Bypass the second jump. + // This can disconnect the Ebb containing `second_inst`, to be cleaned up later. + let second_dest = func.dfg[second_inst].branch_destination().expect("Dest"); + func.change_branch_destination(first_inst, second_dest); + cfg.recompute_ebb(func, ebb); + + // The previously-intermediary Ebb may now be unreachable. Update CFG. + if cfg.pred_iter(first_dest).count() == 0 { + // Remove all instructions from that ebb. + while let Some(inst) = func.layout.first_inst(first_dest) { + func.layout.remove_inst(inst); + } + + // Remove the block... + cfg.recompute_ebb(func, first_dest); // ...from predecessor lists. + func.layout.remove_ebb(first_dest); // ...from the layout. + } + + return true; +} + +/// Redirects `jump` instructions that point to other `jump` instructions to the final destination. +/// This transformation may orphan some blocks. +fn fold_redundant_jumps( + func: &mut Function, + cfg: &mut ControlFlowGraph, + domtree: &mut DominatorTree, +) { + let mut folded = false; + + // Postorder iteration guarantees that a chain of jumps is visited from + // the end of the chain to the start of the chain. + for &ebb in domtree.cfg_postorder() { + // Only proceed if the first terminator instruction is a single-target branch. + let first_inst = func.layout.last_inst(ebb).expect("Ebb has no terminator"); + folded |= try_fold_redundant_jump(func, cfg, ebb, first_inst); + + // Also try the previous instruction. + if let Some(prev_inst) = func.layout.prev_inst(first_inst) { + folded |= try_fold_redundant_jump(func, cfg, ebb, prev_inst); + } + } + + // Folding jumps invalidates the dominator tree. + if folded { + domtree.compute(func, cfg); + } +} + /// Convert `jump` instructions to `fallthrough` instructions where possible and verify that any /// existing `fallthrough` instructions are correct. fn fallthroughs(func: &mut Function) { diff --git a/cranelift/codegen/src/context.rs b/cranelift/codegen/src/context.rs index 64c382cb23..a80d3c41e4 100644 --- a/cranelift/codegen/src/context.rs +++ b/cranelift/codegen/src/context.rs @@ -329,7 +329,7 @@ impl Context { /// Run the branch relaxation pass and return information about the function's code and /// read-only data. pub fn relax_branches(&mut self, isa: &dyn TargetIsa) -> CodegenResult { - let info = relax_branches(&mut self.func, isa)?; + let info = relax_branches(&mut self.func, &mut self.cfg, &mut self.domtree, isa)?; self.verify_if(isa)?; self.verify_locations_if(isa)?; Ok(info) diff --git a/cranelift/codegen/src/flowgraph.rs b/cranelift/codegen/src/flowgraph.rs index 335ca15ce4..645b7d4fff 100644 --- a/cranelift/codegen/src/flowgraph.rs +++ b/cranelift/codegen/src/flowgraph.rs @@ -31,7 +31,7 @@ use crate::timing; use core::mem; /// A basic block denoted by its enclosing Ebb and last instruction. -#[derive(PartialEq, Eq)] +#[derive(Debug, PartialEq, Eq)] pub struct BasicBlock { /// Enclosing Ebb key. pub ebb: Ebb, diff --git a/cranelift/codegen/src/ir/function.rs b/cranelift/codegen/src/ir/function.rs index adbb244278..8eea542564 100644 --- a/cranelift/codegen/src/ir/function.rs +++ b/cranelift/codegen/src/ir/function.rs @@ -8,7 +8,7 @@ use crate::entity::{PrimaryMap, SecondaryMap}; use crate::ir; use crate::ir::{DataFlowGraph, ExternalName, Layout, Signature}; use crate::ir::{ - Ebb, ExtFuncData, FuncRef, GlobalValue, GlobalValueData, Heap, HeapData, JumpTable, + Ebb, ExtFuncData, FuncRef, GlobalValue, GlobalValueData, Heap, HeapData, Inst, JumpTable, JumpTableData, SigRef, StackSlot, StackSlotData, Table, TableData, }; use crate::ir::{EbbOffsets, InstEncodings, SourceLocs, StackSlots, ValueLocations}; @@ -20,7 +20,7 @@ use crate::write::write_function; use core::fmt; #[cfg(feature = "basic-blocks")] -use crate::ir::{Inst, Opcode}; +use crate::ir::Opcode; /// A function. /// @@ -223,6 +223,15 @@ impl Function { self.dfg.collect_debug_info(); } + /// Changes the destination of a jump or branch instruction. + /// Does nothing if called with a non-jump or non-branch instruction. + pub fn change_branch_destination(&mut self, inst: Inst, new_dest: Ebb) { + match self.dfg[inst].branch_destination_mut() { + None => (), + Some(inst_dest) => *inst_dest = new_dest, + } + } + /// Checks that the specified EBB can be encoded as a basic block. /// /// On error, returns the first invalid instruction and an error message. diff --git a/cranelift/codegen/src/licm.rs b/cranelift/codegen/src/licm.rs index 2af4ef3244..2866da87d7 100644 --- a/cranelift/codegen/src/licm.rs +++ b/cranelift/codegen/src/licm.rs @@ -88,7 +88,7 @@ fn create_pre_header( { // We only follow normal edges (not the back edges) if !domtree.dominates(header, last_inst, &func.layout) { - change_branch_jump_destination(last_inst, pre_header, func); + func.change_branch_destination(last_inst, pre_header); } } { @@ -136,15 +136,6 @@ fn has_pre_header( result } -// Change the destination of a jump or branch instruction. Does nothing if called with a non-jump -// or non-branch instruction. -fn change_branch_jump_destination(inst: Inst, new_ebb: Ebb, func: &mut Function) { - match func.dfg[inst].branch_destination_mut() { - None => (), - Some(instruction_dest) => *instruction_dest = new_ebb, - } -} - /// Test whether the given opcode is unsafe to even consider for LICM. fn trivially_unsafe_for_licm(opcode: Opcode) -> bool { opcode.can_store() diff --git a/cranelift/filetests/filetests/isa/x86/binary64.clif b/cranelift/filetests/filetests/isa/x86/binary64.clif index a65b3d3d1d..aa732e67f5 100644 --- a/cranelift/filetests/filetests/isa/x86/binary64.clif +++ b/cranelift/filetests/filetests/isa/x86/binary64.clif @@ -689,48 +689,48 @@ ebb0: ; asm: testq %rcx, %rcx ; asm: je ebb1 - brz v1, ebb1 ; bin: 48 85 c9 74 1b + brz v1, ebb1 ; bin: 48 85 c9 74 19 fallthrough ebb3 ebb3: ; asm: testq %rsi, %rsi ; asm: je ebb1 - brz v2, ebb1 ; bin: 48 85 f6 74 16 + brz v2, ebb1 ; bin: 48 85 f6 74 14 fallthrough ebb4 ebb4: ; asm: testq %r10, %r10 ; asm: je ebb1 - brz v3, ebb1 ; bin: 4d 85 d2 74 11 + brz v3, ebb1 ; bin: 4d 85 d2 74 0f fallthrough ebb5 ebb5: ; asm: testq %rcx, %rcx ; asm: jne ebb1 - brnz v1, ebb1 ; bin: 48 85 c9 75 0c + brnz v1, ebb1 ; bin: 48 85 c9 75 0a fallthrough ebb6 ebb6: ; asm: testq %rsi, %rsi ; asm: jne ebb1 - brnz v2, ebb1 ; bin: 48 85 f6 75 07 + brnz v2, ebb1 ; bin: 48 85 f6 75 05 fallthrough ebb7 ebb7: ; asm: testq %r10, %r10 ; asm: jne ebb1 - brnz v3, ebb1 ; bin: 4d 85 d2 75 02 + brnz v3, ebb1 ; bin: 4d 85 d2 75 00 ; asm: jmp ebb2 - jump ebb2 ; bin: eb 01 + jump ebb2 ; asm: ebb1: ebb1: - return ; bin: c3 + return ; asm: ebb2: ebb2: - jump ebb1 ; bin: eb fd + jump ebb1 } ; CPU flag instructions. @@ -1292,40 +1292,41 @@ ebb0: ; asm: testl %ecx, %ecx ; asm: je ebb1x - brz v1, ebb1 ; bin: 85 c9 74 18 + brz v1, ebb1 ; bin: 85 c9 74 16 fallthrough ebb3 ebb3: ; asm: testl %esi, %esi ; asm: je ebb1x - brz v2, ebb1 ; bin: 85 f6 74 14 + brz v2, ebb1 ; bin: 85 f6 74 12 fallthrough ebb4 ebb4: ; asm: testl %r10d, %r10d ; asm: je ebb1x - brz v3, ebb1 ; bin: 45 85 d2 74 0f + brz v3, ebb1 ; bin: 45 85 d2 74 0d fallthrough ebb5 ebb5: ; asm: testl %ecx, %ecx ; asm: jne ebb1x - brnz v1, ebb1 ; bin: 85 c9 75 0b + brnz v1, ebb1 ; bin: 85 c9 75 09 fallthrough ebb6 ebb6: ; asm: testl %esi, %esi ; asm: jne ebb1x - brnz v2, ebb1 ; bin: 85 f6 75 07 + brnz v2, ebb1 ; bin: 85 f6 75 05 fallthrough ebb7 ebb7: ; asm: testl %r10d, %r10d ; asm: jne ebb1x - brnz v3, ebb1 ; bin: 45 85 d2 75 02 + brnz v3, ebb1 ; bin: 45 85 d2 75 00 ; asm: jmp ebb2x - jump ebb2 ; bin: eb 01 + ; branch relaxation translates this into `fallthrough ebb1` + jump ebb2 ; asm: ebb1x: ebb1: diff --git a/cranelift/filetests/filetests/isa/x86/legalize-br-table.clif b/cranelift/filetests/filetests/isa/x86/legalize-br-table.clif index 6d49a6e6d0..5656549cf7 100644 --- a/cranelift/filetests/filetests/isa/x86/legalize-br-table.clif +++ b/cranelift/filetests/filetests/isa/x86/legalize-br-table.clif @@ -11,9 +11,9 @@ function u0:0(i64) system_v { ebb0(v0: i64): v1 = stack_addr.i64 ss0 v2 = load.i8 v1 - br_table v2, ebb2, jt0 + br_table v2, ebb1, jt0 ; check: $(oob=$V) = ifcmp_imm $(idx=$V), 1 -; nextln: brif uge $oob, ebb2 +; nextln: brif uge $oob, ebb1 ; nextln: fallthrough $(inb=$EBB) ; check: $inb: ; nextln: $(final_idx=$V) = uextend.i64 $idx @@ -22,9 +22,6 @@ ebb0(v0: i64): ; nextln: $(addr=$V) = iadd $base, $rel_addr ; nextln: indirect_jump_table_br $addr, jt0 -ebb2: - jump ebb1 - ebb1: return } diff --git a/cranelift/filetests/src/test_binemit.rs b/cranelift/filetests/src/test_binemit.rs index 0b1e838795..3925acb8eb 100644 --- a/cranelift/filetests/src/test_binemit.rs +++ b/cranelift/filetests/src/test_binemit.rs @@ -7,6 +7,8 @@ use crate::match_directive::match_directive; use crate::subtest::{Context, SubTest, SubtestResult}; use cranelift_codegen::binemit::{self, CodeInfo, CodeSink, RegDiversions}; use cranelift_codegen::dbg::DisplayList; +use cranelift_codegen::dominator_tree::DominatorTree; +use cranelift_codegen::flowgraph::ControlFlowGraph; use cranelift_codegen::ir; use cranelift_codegen::ir::entities::AnyEntity; use cranelift_codegen::print_errors::pretty_error; @@ -166,8 +168,11 @@ impl SubTest for TestBinEmit { } // Relax branches and compute EBB offsets based on the encodings. - let CodeInfo { total_size, .. } = binemit::relax_branches(&mut func, isa) - .map_err(|e| pretty_error(&func, context.isa, e))?; + let mut cfg = ControlFlowGraph::with_function(&func); + let mut domtree = DominatorTree::with_function(&func, &cfg); + let CodeInfo { total_size, .. } = + binemit::relax_branches(&mut func, &mut cfg, &mut domtree, isa) + .map_err(|e| pretty_error(&func, context.isa, e))?; // Collect all of the 'bin:' directives on instructions. let mut bins = HashMap::new();