Add a fold_redundant_jumps() pass to the branch relaxation phase. (#887)

2019-08-09 15:30:11 -06:00
parent e6e274a3aa
commit c7b4b98cac
8 changed files with 178 additions and 41 deletions
--- a/cranelift/codegen/src/binemit/relaxation.rs
+++ b/cranelift/codegen/src/binemit/relaxation.rs
@@ -29,7 +29,9 @@

 use crate::binemit::{CodeInfo, CodeOffset};
 use crate::cursor::{Cursor, FuncCursor};
-use crate::ir::{Function, InstructionData, Opcode};
+use crate::dominator_tree::DominatorTree;
+use crate::flowgraph::ControlFlowGraph;
+use crate::ir::{Ebb, Function, Inst, InstructionData, Opcode, Value, ValueList};
 use crate::isa::{EncInfo, TargetIsa};
 use crate::iterators::IteratorExtras;
 use crate::regalloc::RegDiversions;
@@ -40,7 +42,12 @@ use log::debug;
 /// Relax branches and compute the final layout of EBB headers in `func`.
 ///
 /// Fill in the `func.offsets` table so the function is ready for binary emission.
-pub fn relax_branches(func: &mut Function, isa: &dyn TargetIsa) -> CodegenResult<CodeInfo> {
+pub fn relax_branches(
+    func: &mut Function,
+    cfg: &mut ControlFlowGraph,
+    domtree: &mut DominatorTree,
+    isa: &dyn TargetIsa,
+) -> CodegenResult<CodeInfo> {
    let _tt = timing::relax_branches();

    let encinfo = isa.encoding_info();
@@ -49,7 +56,10 @@ pub fn relax_branches(func: &mut Function, isa: &dyn TargetIsa) -> CodegenResult
    func.offsets.clear();
    func.offsets.resize(func.dfg.num_ebbs());

-    // Start by inserting fall through instructions.
+    // Start by removing redundant jumps.
+    fold_redundant_jumps(func, cfg, domtree);
+
+    // Convert jumps to fallthrough instructions where possible.
    fallthroughs(func);

    let mut offset = 0;
@@ -79,7 +89,6 @@ pub fn relax_branches(func: &mut Function, isa: &dyn TargetIsa) -> CodegenResult
        let mut cur = FuncCursor::new(func);
        while let Some(ebb) = cur.next_ebb() {
            divert.clear();
-
            // Record the offset for `ebb` and make sure we iterate until offsets are stable.
            if cur.func.offsets[ebb] != offset {
                cur.func.offsets[ebb] = offset;
@@ -134,6 +143,131 @@ pub fn relax_branches(func: &mut Function, isa: &dyn TargetIsa) -> CodegenResult
    })
 }

+/// Folds an instruction if it is a redundant jump.
+/// Returns whether folding was performed (which invalidates the CFG).
+fn try_fold_redundant_jump(
+    func: &mut Function,
+    cfg: &mut ControlFlowGraph,
+    ebb: Ebb,
+    first_inst: Inst,
+) -> bool {
+    let first_dest = match func.dfg[first_inst].branch_destination() {
+        Some(ebb) => ebb, // The instruction was a single-target branch.
+        None => {
+            return false; // The instruction was either multi-target or not a branch.
+        }
+    };
+
+    // Look at the first instruction of the first branch's destination.
+    // If it is an unconditional branch, maybe the second jump can be bypassed.
+    let second_inst = func.layout.first_inst(first_dest).expect("Instructions");
+    if func.dfg[second_inst].opcode() != Opcode::Jump {
+        return false;
+    }
+
+    // Now we need to fix up first_inst's ebb parameters to match second_inst's,
+    // without changing the branch-specific arguments.
+    //
+    // The intermediary block is allowed to reference any SSA value that dominates it,
+    // but that SSA value may not necessarily also dominate the instruction that's
+    // being patched.
+
+    // Get the arguments and parameters passed by the first branch.
+    let num_fixed = func.dfg[first_inst]
+        .opcode()
+        .constraints()
+        .num_fixed_value_arguments();
+    let (first_args, first_params) = func.dfg[first_inst]
+        .arguments(&func.dfg.value_lists)
+        .split_at(num_fixed);
+
+    // Get the parameters passed by the second jump.
+    let num_fixed = func.dfg[second_inst]
+        .opcode()
+        .constraints()
+        .num_fixed_value_arguments();
+    let (_, second_params) = func.dfg[second_inst]
+        .arguments(&func.dfg.value_lists)
+        .split_at(num_fixed);
+    let mut second_params = second_params.to_vec(); // Clone for rewriting below.
+
+    // For each parameter passed by the second jump, if any of those parameters
+    // was a block parameter, rewrite it to refer to the value that the first jump
+    // passed in its parameters. Otherwise, make sure it dominates first_inst.
+    //
+    // For example: if we `ebb0: jump ebb1(v1)` to `ebb1(v2): jump ebb2(v2)`,
+    // we want to rewrite the original jump to `jump ebb2(v1)`.
+    let ebb_params: &[Value] = func.dfg.ebb_params(first_dest);
+    debug_assert!(ebb_params.len() == first_params.len());
+
+    for value in second_params.iter_mut() {
+        if let Some((n, _)) = ebb_params.iter().enumerate().find(|(_, &p)| p == *value) {
+            // This value was the Nth parameter passed to the second_inst's ebb.
+            // Rewrite it as the Nth parameter passed by first_inst.
+            *value = first_params[n];
+        }
+    }
+
+    // Build a value list of first_args (unchanged) followed by second_params (rewritten).
+    let arguments_vec: std::vec::Vec<_> = first_args
+        .iter()
+        .chain(second_params.iter())
+        .map(|x| *x)
+        .collect();
+    let value_list = ValueList::from_slice(&arguments_vec, &mut func.dfg.value_lists);
+
+    func.dfg[first_inst].take_value_list(); // Drop the current list.
+    func.dfg[first_inst].put_value_list(value_list); // Put the new list.
+
+    // Bypass the second jump.
+    // This can disconnect the Ebb containing `second_inst`, to be cleaned up later.
+    let second_dest = func.dfg[second_inst].branch_destination().expect("Dest");
+    func.change_branch_destination(first_inst, second_dest);
+    cfg.recompute_ebb(func, ebb);
+
+    // The previously-intermediary Ebb may now be unreachable. Update CFG.
+    if cfg.pred_iter(first_dest).count() == 0 {
+        // Remove all instructions from that ebb.
+        while let Some(inst) = func.layout.first_inst(first_dest) {
+            func.layout.remove_inst(inst);
+        }
+
+        // Remove the block...
+        cfg.recompute_ebb(func, first_dest); // ...from predecessor lists.
+        func.layout.remove_ebb(first_dest); // ...from the layout.
+    }
+
+    return true;
+}
+
+/// Redirects `jump` instructions that point to other `jump` instructions to the final destination.
+/// This transformation may orphan some blocks.
+fn fold_redundant_jumps(
+    func: &mut Function,
+    cfg: &mut ControlFlowGraph,
+    domtree: &mut DominatorTree,
+) {
+    let mut folded = false;
+
+    // Postorder iteration guarantees that a chain of jumps is visited from
+    // the end of the chain to the start of the chain.
+    for &ebb in domtree.cfg_postorder() {
+        // Only proceed if the first terminator instruction is a single-target branch.
+        let first_inst = func.layout.last_inst(ebb).expect("Ebb has no terminator");
+        folded |= try_fold_redundant_jump(func, cfg, ebb, first_inst);
+
+        // Also try the previous instruction.
+        if let Some(prev_inst) = func.layout.prev_inst(first_inst) {
+            folded |= try_fold_redundant_jump(func, cfg, ebb, prev_inst);
+        }
+    }
+
+    // Folding jumps invalidates the dominator tree.
+    if folded {
+        domtree.compute(func, cfg);
+    }
+}
+
 /// Convert `jump` instructions to `fallthrough` instructions where possible and verify that any
 /// existing `fallthrough` instructions are correct.
 fn fallthroughs(func: &mut Function) {
--- a/cranelift/codegen/src/context.rs
+++ b/cranelift/codegen/src/context.rs
@@ -329,7 +329,7 @@ impl Context {
    /// Run the branch relaxation pass and return information about the function's code and
    /// read-only data.
    pub fn relax_branches(&mut self, isa: &dyn TargetIsa) -> CodegenResult<CodeInfo> {
-        let info = relax_branches(&mut self.func, isa)?;
+        let info = relax_branches(&mut self.func, &mut self.cfg, &mut self.domtree, isa)?;
        self.verify_if(isa)?;
        self.verify_locations_if(isa)?;
        Ok(info)
--- a/cranelift/codegen/src/flowgraph.rs
+++ b/cranelift/codegen/src/flowgraph.rs
@@ -31,7 +31,7 @@ use crate::timing;
 use core::mem;

 /// A basic block denoted by its enclosing Ebb and last instruction.
-#[derive(PartialEq, Eq)]
+#[derive(Debug, PartialEq, Eq)]
 pub struct BasicBlock {
    /// Enclosing Ebb key.
    pub ebb: Ebb,
--- a/cranelift/codegen/src/ir/function.rs
+++ b/cranelift/codegen/src/ir/function.rs
@@ -8,7 +8,7 @@ use crate::entity::{PrimaryMap, SecondaryMap};
 use crate::ir;
 use crate::ir::{DataFlowGraph, ExternalName, Layout, Signature};
 use crate::ir::{
-    Ebb, ExtFuncData, FuncRef, GlobalValue, GlobalValueData, Heap, HeapData, JumpTable,
+    Ebb, ExtFuncData, FuncRef, GlobalValue, GlobalValueData, Heap, HeapData, Inst, JumpTable,
    JumpTableData, SigRef, StackSlot, StackSlotData, Table, TableData,
 };
 use crate::ir::{EbbOffsets, InstEncodings, SourceLocs, StackSlots, ValueLocations};
@@ -20,7 +20,7 @@ use crate::write::write_function;
 use core::fmt;

 #[cfg(feature = "basic-blocks")]
-use crate::ir::{Inst, Opcode};
+use crate::ir::Opcode;

 /// A function.
 ///
@@ -223,6 +223,15 @@ impl Function {
        self.dfg.collect_debug_info();
    }

+    /// Changes the destination of a jump or branch instruction.
+    /// Does nothing if called with a non-jump or non-branch instruction.
+    pub fn change_branch_destination(&mut self, inst: Inst, new_dest: Ebb) {
+        match self.dfg[inst].branch_destination_mut() {
+            None => (),
+            Some(inst_dest) => *inst_dest = new_dest,
+        }
+    }
+
    /// Checks that the specified EBB can be encoded as a basic block.
    ///
    /// On error, returns the first invalid instruction and an error message.
--- a/cranelift/codegen/src/licm.rs
+++ b/cranelift/codegen/src/licm.rs
@@ -88,7 +88,7 @@ fn create_pre_header(
    {
        // We only follow normal edges (not the back edges)
        if !domtree.dominates(header, last_inst, &func.layout) {
-            change_branch_jump_destination(last_inst, pre_header, func);
+            func.change_branch_destination(last_inst, pre_header);
        }
    }
    {
@@ -136,15 +136,6 @@ fn has_pre_header(
    result
 }

-// Change the destination of a jump or branch instruction. Does nothing if called with a non-jump
-// or non-branch instruction.
-fn change_branch_jump_destination(inst: Inst, new_ebb: Ebb, func: &mut Function) {
-    match func.dfg[inst].branch_destination_mut() {
-        None => (),
-        Some(instruction_dest) => *instruction_dest = new_ebb,
-    }
-}
-
 /// Test whether the given opcode is unsafe to even consider for LICM.
 fn trivially_unsafe_for_licm(opcode: Opcode) -> bool {
    opcode.can_store()
--- a/cranelift/filetests/filetests/isa/x86/binary64.clif
+++ b/cranelift/filetests/filetests/isa/x86/binary64.clif
@@ -689,48 +689,48 @@ ebb0:

    ; asm: testq %rcx, %rcx
    ; asm: je ebb1
-    brz v1, ebb1                                ; bin: 48 85 c9 74 1b
+    brz v1, ebb1                                ; bin: 48 85 c9 74 19
    fallthrough ebb3

 ebb3:
    ; asm: testq %rsi, %rsi
    ; asm: je ebb1
-    brz v2, ebb1                                ; bin: 48 85 f6 74 16
+    brz v2, ebb1                                ; bin: 48 85 f6 74 14
    fallthrough ebb4

 ebb4:
    ; asm: testq %r10, %r10
    ; asm: je ebb1
-    brz v3, ebb1                                ; bin: 4d 85 d2 74 11
+    brz v3, ebb1                                ; bin: 4d 85 d2 74 0f
    fallthrough ebb5

 ebb5:
    ; asm: testq %rcx, %rcx
    ; asm: jne ebb1
-    brnz v1, ebb1                               ; bin: 48 85 c9 75 0c
+    brnz v1, ebb1                               ; bin: 48 85 c9 75 0a
    fallthrough ebb6

 ebb6:
    ; asm: testq %rsi, %rsi
    ; asm: jne ebb1
-    brnz v2, ebb1                               ; bin: 48 85 f6 75 07
+    brnz v2, ebb1                               ; bin: 48 85 f6 75 05
    fallthrough ebb7

 ebb7:
    ; asm: testq %r10, %r10
    ; asm: jne ebb1
-    brnz v3, ebb1                               ; bin: 4d 85 d2 75 02
+    brnz v3, ebb1                               ; bin: 4d 85 d2 75 00

    ; asm: jmp ebb2
-    jump ebb2                                   ; bin: eb 01
+    jump ebb2

    ; asm: ebb1:
 ebb1:
-    return                                       ; bin: c3
+    return

    ; asm: ebb2:
 ebb2:
-    jump ebb1                                   ; bin: eb fd
+    jump ebb1
 }

 ; CPU flag instructions.
@@ -1292,40 +1292,41 @@ ebb0:

    ; asm: testl %ecx, %ecx
    ; asm: je ebb1x
-    brz v1, ebb1                                ; bin: 85 c9 74 18
+    brz v1, ebb1                                ; bin: 85 c9 74 16
    fallthrough ebb3

 ebb3:
    ; asm: testl %esi, %esi
    ; asm: je ebb1x
-    brz v2, ebb1                                ; bin: 85 f6 74 14
+    brz v2, ebb1                                ; bin: 85 f6 74 12
    fallthrough ebb4

 ebb4:
    ; asm: testl %r10d, %r10d
    ; asm: je ebb1x
-    brz v3, ebb1                                ; bin: 45 85 d2 74 0f
+    brz v3, ebb1                                ; bin: 45 85 d2 74 0d
    fallthrough ebb5

 ebb5:
    ; asm: testl %ecx, %ecx
    ; asm: jne ebb1x
-    brnz v1, ebb1                               ; bin: 85 c9 75 0b
+    brnz v1, ebb1                               ; bin: 85 c9 75 09
    fallthrough ebb6

 ebb6:
    ; asm: testl %esi, %esi
    ; asm: jne ebb1x
-    brnz v2, ebb1                               ; bin: 85 f6 75 07
+    brnz v2, ebb1                               ; bin: 85 f6 75 05
    fallthrough ebb7

 ebb7:
    ; asm: testl %r10d, %r10d
    ; asm: jne ebb1x
-    brnz v3, ebb1                               ; bin: 45 85 d2 75 02
+    brnz v3, ebb1                               ; bin: 45 85 d2 75 00

    ; asm: jmp ebb2x
-    jump ebb2                                   ; bin: eb 01
+    ; branch relaxation translates this into `fallthrough ebb1`
+    jump ebb2

    ; asm: ebb1x:
 ebb1:
--- a/cranelift/filetests/filetests/isa/x86/legalize-br-table.clif
+++ b/cranelift/filetests/filetests/isa/x86/legalize-br-table.clif
@@ -11,9 +11,9 @@ function u0:0(i64) system_v {
 ebb0(v0: i64):
    v1 = stack_addr.i64 ss0
    v2 = load.i8 v1
-    br_table v2, ebb2, jt0
+    br_table v2, ebb1, jt0
 ; check:     $(oob=$V) = ifcmp_imm $(idx=$V), 1
-; nextln:    brif uge $oob, ebb2
+; nextln:    brif uge $oob, ebb1
 ; nextln:    fallthrough $(inb=$EBB)
 ; check:   $inb:
 ; nextln:    $(final_idx=$V) = uextend.i64 $idx
@@ -22,9 +22,6 @@ ebb0(v0: i64):
 ; nextln:    $(addr=$V) = iadd $base, $rel_addr
 ; nextln:    indirect_jump_table_br $addr, jt0

-ebb2:
-    jump ebb1
-
 ebb1:
    return
 }
--- a/cranelift/filetests/src/test_binemit.rs
+++ b/cranelift/filetests/src/test_binemit.rs
@@ -7,6 +7,8 @@ use crate::match_directive::match_directive;
 use crate::subtest::{Context, SubTest, SubtestResult};
 use cranelift_codegen::binemit::{self, CodeInfo, CodeSink, RegDiversions};
 use cranelift_codegen::dbg::DisplayList;
+use cranelift_codegen::dominator_tree::DominatorTree;
+use cranelift_codegen::flowgraph::ControlFlowGraph;
 use cranelift_codegen::ir;
 use cranelift_codegen::ir::entities::AnyEntity;
 use cranelift_codegen::print_errors::pretty_error;
@@ -166,8 +168,11 @@ impl SubTest for TestBinEmit {
        }

        // Relax branches and compute EBB offsets based on the encodings.
-        let CodeInfo { total_size, .. } = binemit::relax_branches(&mut func, isa)
-            .map_err(|e| pretty_error(&func, context.isa, e))?;
+        let mut cfg = ControlFlowGraph::with_function(&func);
+        let mut domtree = DominatorTree::with_function(&func, &cfg);
+        let CodeInfo { total_size, .. } =
+            binemit::relax_branches(&mut func, &mut cfg, &mut domtree, isa)
+                .map_err(|e| pretty_error(&func, context.isa, e))?;

        // Collect all of the 'bin:' directives on instructions.
        let mut bins = HashMap::new();