Split edges to have a block to add regmove & copy instructions.

When using basic block instructions cannot be added in-between jump instructions which are ending basic blocks. These changes create extra basic blocks such that extra space is available for the spilling and moving registers where they are expected.
2019-08-30 18:44:35 +02:00
parent bb87f1a54a
commit 381578311c
10 changed files with 529 additions and 3 deletions
--- a/cranelift/codegen/src/context.rs
+++ b/cranelift/codegen/src/context.rs
@@ -318,7 +318,7 @@ impl Context {
    /// Run the register allocator.
    pub fn regalloc(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> {
        self.regalloc
-            .run(isa, &mut self.func, &self.cfg, &mut self.domtree)
+            .run(isa, &mut self.func, &mut self.cfg, &mut self.domtree)
    }

    /// Insert prologue and epilogues after computing the stack frame layout.
--- a/cranelift/codegen/src/regalloc/branch_splitting.rs
+++ b/cranelift/codegen/src/regalloc/branch_splitting.rs
@@ -0,0 +1,197 @@
+//! Split the outgoing edges of conditional branches that pass parameters.
+//!
+//! One of the reason for splitting edges is to be able to insert `copy` and `regmove` instructions
+//! between a conditional branch and the following terminator.
+#![cfg(feature = "basic-blocks")]
+
+use std::vec::Vec;
+
+use crate::cursor::{Cursor, EncCursor};
+use crate::dominator_tree::DominatorTree;
+use crate::flowgraph::ControlFlowGraph;
+use crate::ir::{Ebb, Function, Inst, InstBuilder, InstructionData, Opcode, ValueList};
+use crate::isa::TargetIsa;
+use crate::topo_order::TopoOrder;
+
+pub fn run(
+    isa: &dyn TargetIsa,
+    func: &mut Function,
+    cfg: &mut ControlFlowGraph,
+    domtree: &mut DominatorTree,
+    topo: &mut TopoOrder,
+) {
+    let mut ctx = Context {
+        has_new_blocks: false,
+        has_fallthrough_return: None,
+        cur: EncCursor::new(func, isa),
+        domtree,
+        topo,
+        cfg,
+    };
+    ctx.run()
+}
+
+struct Context<'a> {
+    /// True if new blocks were inserted.
+    has_new_blocks: bool,
+
+    /// Record whether newly inserted empty blocks should be inserted last, or before the last, to
+    /// avoid disturbing the expected control flow of `fallthroug_return` statements.
+    ///
+    /// This value is computed when needed. The Option wraps the computed value if any.
+    has_fallthrough_return: Option<bool>,
+
+    /// Current instruction as well as reference to function and ISA.
+    cur: EncCursor<'a>,
+
+    /// References to contextual data structures we need.
+    domtree: &'a mut DominatorTree,
+    topo: &'a mut TopoOrder,
+    cfg: &'a mut ControlFlowGraph,
+}
+
+impl<'a> Context<'a> {
+    fn run(&mut self) {
+        // Any ebb order will do.
+        self.topo.reset(self.cur.func.layout.ebbs());
+        while let Some(ebb) = self.topo.next(&self.cur.func.layout, self.domtree) {
+            // Branches can only be at the last or second to last position in an extended basic
+            // block.
+            self.cur.goto_last_inst(ebb);
+            let terminator_inst = self.cur.current_inst().expect("terminator");
+            if let Some(inst) = self.cur.prev_inst() {
+                let opcode = self.cur.func.dfg[inst].opcode();
+                if opcode.is_branch() {
+                    self.visit_conditional_branch(inst, opcode);
+                    self.cur.goto_inst(terminator_inst);
+                    self.visit_terminator_branch(terminator_inst);
+                }
+            }
+        }
+
+        // If blocks were added the cfg and domtree are inconsistent and must be recomputed.
+        if self.has_new_blocks {
+            self.cfg.compute(&self.cur.func);
+            self.domtree.compute(&self.cur.func, self.cfg);
+        }
+    }
+
+    fn visit_conditional_branch(&mut self, branch: Inst, opcode: Opcode) {
+        // TODO: target = dfg[branch].branch_destination().expect("conditional branch");
+        let target = match self.cur.func.dfg[branch] {
+            InstructionData::Branch { destination, .. }
+            | InstructionData::BranchIcmp { destination, .. }
+            | InstructionData::BranchInt { destination, .. }
+            | InstructionData::BranchFloat { destination, .. } => destination,
+            _ => panic!("Unexpected instruction in visit_conditional_branch"),
+        };
+
+        // If there are any parameters, split the edge.
+        if self.should_split_edge(target) {
+            // Create the block the branch will jump to.
+            let new_ebb = self.make_empty_ebb();
+
+            // Extract the arguments of the branch instruction, split the Ebb parameters and the
+            // branch arguments
+            let num_fixed = opcode.constraints().num_fixed_value_arguments();
+            let dfg = &mut self.cur.func.dfg;
+            let old_args: Vec<_> = {
+                let args = dfg[branch].take_value_list().expect("ebb parameters");
+                args.as_slice(&dfg.value_lists).iter().map(|x| *x).collect()
+            };
+            let (branch_args, ebb_params) = old_args.split_at(num_fixed);
+
+            // Replace the branch destination by the new Ebb created with no parameters, and restore
+            // the branch arguments, without the original Ebb parameters.
+            {
+                let branch_args = ValueList::from_slice(branch_args, &mut dfg.value_lists);
+                let data = &mut dfg[branch];
+                *data.branch_destination_mut().expect("branch") = new_ebb;
+                data.put_value_list(branch_args);
+            }
+            let ok = self.cur.func.update_encoding(branch, self.cur.isa).is_ok();
+            debug_assert!(ok);
+
+            // Insert a jump to the original target with its arguments into the new block.
+            self.cur.goto_first_insertion_point(new_ebb);
+            self.cur.ins().jump(target, ebb_params);
+
+            // Reset the cursor to point to the branch.
+            self.cur.goto_inst(branch);
+        }
+    }
+
+    fn visit_terminator_branch(&mut self, inst: Inst) {
+        let inst_data = &self.cur.func.dfg[inst];
+        let opcode = inst_data.opcode();
+        if opcode != Opcode::Jump && opcode != Opcode::Fallthrough {
+            // This opcode is ignored as it does not have any EBB parameters.
+            if opcode != Opcode::IndirectJumpTableBr {
+                debug_assert!(!opcode.is_branch())
+            }
+            return;
+        }
+
+        let target = match inst_data {
+            InstructionData::Jump { destination, .. } => destination,
+            _ => panic!(
+                "Unexpected instruction {} in visit_terminator_branch",
+                self.cur.display_inst(inst)
+            ),
+        };
+        debug_assert!(self.cur.func.dfg[inst].opcode().is_terminator());
+
+        // If there are any parameters, split the edge.
+        if self.should_split_edge(*target) {
+            // Create the block the branch will jump to.
+            let new_ebb = self.cur.func.dfg.make_ebb();
+            self.has_new_blocks = true;
+
+            // Split the current block before its terminator, and insert a new jump instruction to
+            // jump to it.
+            let jump = self.cur.ins().jump(new_ebb, &[]);
+            self.cur.insert_ebb(new_ebb);
+
+            // Reset the cursor to point to new terminator of the old ebb.
+            self.cur.goto_inst(jump);
+        }
+    }
+
+    // A new ebb must be inserted before the last ebb because the last ebb may have a
+    // fallthrough_return and can't have anything after it.
+    fn make_empty_ebb(&mut self) -> Ebb {
+        let last_ebb = self.cur.layout().last_ebb().unwrap();
+        if self.has_fallthrough_return == None {
+            let last_inst = self.cur.layout().last_inst(last_ebb).unwrap();
+            self.has_fallthrough_return =
+                Some(self.cur.func.dfg[last_inst].opcode() == Opcode::FallthroughReturn);
+        }
+        let new_ebb = self.cur.func.dfg.make_ebb();
+        if self.has_fallthrough_return == Some(true) {
+            // Insert before the last block which has a fallthrough_return
+            // instruction.
+            self.cur.layout_mut().insert_ebb(new_ebb, last_ebb);
+        } else {
+            // Insert after the last block.
+            self.cur.layout_mut().insert_ebb_after(new_ebb, last_ebb);
+        }
+        self.has_new_blocks = true;
+        new_ebb
+    }
+
+    /// Returns whether we should introduce a new branch.
+    fn should_split_edge(&self, target: Ebb) -> bool {
+        // We should split the edge if the target has any parameters.
+        if self.cur.func.dfg.ebb_params(target).len() > 0 {
+            return true;
+        };
+
+        // Or, if the target has more than one block reaching it.
+        debug_assert!(self.cfg.pred_iter(target).next() != None);
+        if let Some(_) = self.cfg.pred_iter(target).skip(1).next() {
+            return true;
+        };
+
+        false
+    }
+}
--- a/cranelift/codegen/src/regalloc/context.rs
+++ b/cranelift/codegen/src/regalloc/context.rs
@@ -8,6 +8,8 @@ use crate::dominator_tree::DominatorTree;
 use crate::flowgraph::ControlFlowGraph;
 use crate::ir::Function;
 use crate::isa::TargetIsa;
+#[cfg(feature = "basic-blocks")]
+use crate::regalloc::branch_splitting;
 use crate::regalloc::coalescing::Coalescing;
 use crate::regalloc::coloring::Coloring;
 use crate::regalloc::live_value_tracker::LiveValueTracker;
@@ -78,7 +80,7 @@ impl Context {
        &mut self,
        isa: &dyn TargetIsa,
        func: &mut Function,
-        cfg: &ControlFlowGraph,
+        cfg: &mut ControlFlowGraph,
        domtree: &mut DominatorTree,
    ) -> CodegenResult<()> {
        let _tt = timing::regalloc();
@@ -93,6 +95,12 @@ impl Context {
        // phases.
        self.tracker.clear();

+        // Pass: Split branches, add space where to add copy & regmove instructions.
+        #[cfg(feature = "basic-blocks")]
+        {
+            branch_splitting::run(isa, func, cfg, domtree, &mut self.topo);
+        }
+
        // Pass: Liveness analysis.
        self.liveness.compute(isa, func, cfg);

--- a/cranelift/codegen/src/regalloc/mod.rs
+++ b/cranelift/codegen/src/regalloc/mod.rs
@@ -10,6 +10,7 @@ pub mod register_set;
 pub mod virtregs;

 mod affinity;
+mod branch_splitting;
 mod coalescing;
 mod context;
 mod diversion;
--- a/cranelift/filetests/filetests/regalloc/coalesce-bb.clif
+++ b/cranelift/filetests/filetests/regalloc/coalesce-bb.clif
@@ -0,0 +1,158 @@
+test regalloc
+target riscv32
+feature "basic-blocks"
+
+; Test the coalescer.
+; regex: V=v\d+
+; regex: WS=\s+
+; regex: LOC=%\w+
+; regex: EBB=ebb\d+
+
+; This function is already CSSA, so no copies should be inserted.
+function %cssa(i32) -> i32 {
+ebb0(v0: i32):
+    ; not: copy
+    ; v0 is used by the branch and passed as an arg - that's no conflict.
+    brnz v0, ebb1(v0)
+    jump ebb2
+
+ebb2:
+    ; v0 is live across the branch above. That's no conflict.
+    v1 = iadd_imm v0, 7
+    jump ebb1(v1)
+
+ebb1(v10: i32):
+    v11 = iadd_imm v10, 7
+    return v11
+}
+
+function %trivial(i32) -> i32 {
+ebb0(v0: i32):
+    ; check:    brnz v0, $(splitEdge=$EBB)
+    brnz v0, ebb1(v0)
+    jump ebb2
+
+ebb2:
+    ; not: copy
+    v1 = iadd_imm v0, 7
+    jump ebb1(v1)
+
+    ; check:  $splitEdge:
+    ; nextln:   $(cp1=$V) = copy.i32 v0
+    ; nextln:   jump ebb1($cp1)
+
+ebb1(v10: i32):
+    ; Use v0 in the destination EBB causes a conflict.
+    v11 = iadd v10, v0
+    return v11
+}
+
+; A value is used as an SSA argument twice in the same branch.
+function %dualuse(i32) -> i32 {
+ebb0(v0: i32):
+    ; check:  brnz v0, $(splitEdge=$EBB)
+    brnz v0, ebb1(v0, v0)
+    jump ebb2
+
+ebb2:
+    v1 = iadd_imm v0, 7
+    v2 = iadd_imm v1, 56
+    jump ebb1(v1, v2)
+
+    ; check:  $splitEdge:
+    ; check:    $(cp1=$V) = copy.i32 v0
+    ; nextln:   jump ebb1($cp1, v0)
+
+ebb1(v10: i32, v11: i32):
+    v12 = iadd v10, v11
+    return v12
+}
+
+; Interference away from the branch
+; The interference can be broken with a copy at either branch.
+function %interference(i32) -> i32 {
+ebb0(v0: i32):
+    ; not:    copy
+    ; check:  brnz v0, $(splitEdge=$EBB)
+    ; not:    copy
+    brnz v0, ebb1(v0)
+    jump ebb2
+
+ebb2:
+    v1 = iadd_imm v0, 7
+    ; v1 and v0 interfere here:
+    v2 = iadd_imm v0, 8
+    ; check: $(cp0=$V) = copy v1
+    ; check: jump ebb1($cp0)
+    jump ebb1(v1)
+
+    ; check:  $splitEdge:
+    ; not:      copy
+    ; nextln:   jump ebb1(v0)
+
+ebb1(v10: i32):
+    ; not: copy
+    v11 = iadd_imm v10, 7
+    return v11
+}
+
+; A loop where one induction variable is used as a backedge argument.
+function %fibonacci(i32) -> i32 {
+ebb0(v0: i32):
+    v1 = iconst.i32 1
+    v2 = iconst.i32 2
+    jump ebb1(v1, v2)
+
+ebb1(v10: i32, v11: i32):
+    ; v11 needs to be isolated because it interferes with v10.
+    ; check: ebb1(v10: i32 [$LOC], $(nv11a=$V): i32 [$LOC])
+    ; check: v11 = copy $nv11a
+    v12 = iadd v10, v11
+    v13 = icmp ult v12, v0
+    ; check:  brnz v13, $(splitEdge=$EBB)
+    brnz v13, ebb1(v11, v12)
+    jump ebb2
+
+    ; check:  $splitEdge:
+    ; check:    $(nv11b=$V) = copy.i32 v11
+    ; not:      copy
+    ; check:   jump ebb1($nv11b, v12)
+
+ebb2:
+    return v12
+}
+
+; Function arguments passed on the stack aren't allowed to be part of a virtual
+; register, at least for now. This is because the other values in the virtual
+; register would need to be spilled to the incoming_arg stack slot which we treat
+; as belonging to the caller.
+function %stackarg(i32, i32, i32, i32, i32, i32, i32, i32, i32) -> i32 {
+; check: ss0 = incoming_arg 4
+; not: incoming_arg
+ebb0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i32, v5: i32, v6: i32, v7: i32, v8: i32):
+    ; check: fill v8
+    ; not: v8
+    jump ebb1(v8)
+
+ebb1(v10: i32):
+    v11 = iadd_imm v10, 1
+    return v11
+}
+
+function %gvn_unremovable_phi(i32) system_v {
+ebb0(v0: i32):
+    v2 = iconst.i32 0
+    jump ebb2(v2, v0)
+
+ebb2(v3: i32, v4: i32):
+    brnz v3, ebb2(v3, v4)
+    jump ebb3
+
+ebb3:
+    v5 = iconst.i32 1
+    brnz v3, ebb2(v2, v5)
+    jump ebb4
+
+ebb4:
+    return
+}
--- a/cranelift/filetests/filetests/regalloc/coalesce.clif
+++ b/cranelift/filetests/filetests/regalloc/coalesce.clif
@@ -1,5 +1,6 @@
 test regalloc
 target riscv32
+feature !"basic-blocks"

 ; Test the coalescer.
 ; regex: V=v\d+
--- a/cranelift/filetests/filetests/regalloc/reload-208-bb.clif
+++ b/cranelift/filetests/filetests/regalloc/reload-208-bb.clif
@@ -0,0 +1,111 @@
+test regalloc
+target x86_64 haswell
+feature "basic-blocks"
+
+; regex: V=v\d+
+; regex: EBB=ebb\d+
+
+; Filed as https://github.com/CraneStation/cranelift/issues/208
+;
+; The verifier complains about a branch argument that is not in the same virtual register as the
+; corresponding EBB argument.
+;
+; The problem was the reload pass rewriting EBB arguments on "brnz v9, ebb3(v9)"
+
+function %pr208(i64 vmctx [%rdi]) system_v {
+    gv1 = vmctx
+    gv0 = iadd_imm.i64 gv1, -8
+    heap0 = static gv0, min 0, bound 0x5000, offset_guard 0x0040_0000
+    sig0 = (i64 vmctx [%rdi]) -> i32 [%rax] system_v
+    sig1 = (i64 vmctx [%rdi], i32 [%rsi]) system_v
+    fn0 = u0:1 sig0
+    fn1 = u0:3 sig1
+
+ebb0(v0: i64):
+    v1 = iconst.i32 0
+    v2 = call fn0(v0)
+    v20 = iconst.i32 0x4ffe
+    v16 = icmp uge v2, v20
+    brz v16, ebb5
+    jump ebb9
+
+ebb9:
+    trap heap_oob
+
+ebb5:
+    v17 = uextend.i64 v2
+    v18 = iadd_imm.i64 v0, -8
+    v19 = load.i64 v18
+    v3 = iadd v19, v17
+    v4 = load.i32 v3
+    v21 = iconst.i32 0
+    v5 = icmp eq v4, v21
+    v6 = bint.i32 v5
+    brnz v6, ebb2
+    jump ebb3(v4)
+
+ebb3(v7: i32):
+    call fn1(v0, v7)
+    v26 = iconst.i32 0x4ffe
+    v22 = icmp uge v7, v26
+    brz v22, ebb6
+    jump ebb10
+
+ebb10:
+    trap heap_oob
+
+ebb6:
+    v23 = uextend.i64 v7
+    v24 = iadd_imm.i64 v0, -8
+    v25 = load.i64 v24
+    v8 = iadd v25, v23
+    v9 = load.i32 v8+56
+    ; check: v9 = spill
+    ; check: brnz $V, $(splitEdge=$EBB)
+    brnz v9, ebb3(v9)
+    jump ebb4
+
+    ; check: $splitEdge:
+    ; nextln:  jump ebb3(v9)
+
+ebb4:
+    jump ebb2
+
+ebb2:
+    v10 = iconst.i32 0
+    v31 = iconst.i32 0x4ffe
+    v27 = icmp uge v10, v31
+    brz v27, ebb7
+    jump ebb11
+
+ebb11:
+    trap heap_oob
+
+ebb7:
+    v28 = uextend.i64 v10
+    v29 = iadd_imm.i64 v0, -8
+    v30 = load.i64 v29
+    v11 = iadd v30, v28
+    v12 = load.i32 v11+12
+    call fn1(v0, v12)
+    v13 = iconst.i32 0
+    v36 = iconst.i32 0x4ffe
+    v32 = icmp uge v13, v36
+    brz v32, ebb8
+    jump ebb12
+
+ebb12:
+    trap heap_oob
+
+ebb8:
+    v33 = uextend.i64 v13
+    v34 = iadd_imm.i64 v0, -8
+    v35 = load.i64 v34
+    v14 = iadd v35, v33
+    v15 = load.i32 v14+12
+    call fn1(v0, v15)
+    jump ebb1
+
+ebb1:
+    return
+}
--- a/cranelift/filetests/filetests/regalloc/reload-208.clif
+++ b/cranelift/filetests/filetests/regalloc/reload-208.clif
@@ -1,5 +1,6 @@
 test regalloc
 target x86_64 haswell
+feature !"basic-blocks"

 ; regex: V=v\d+

--- a/cranelift/filetests/filetests/regalloc/x86-regres-bb.clif
+++ b/cranelift/filetests/filetests/regalloc/x86-regres-bb.clif
@@ -0,0 +1,49 @@
+test regalloc
+target i686
+feature "basic-blocks"
+
+; regex: V=v\d+
+; regex: EBB=ebb\d+
+
+; The value v9 appears both as the branch control and one of the EBB arguments
+; in the brnz instruction in ebb2. It also happens that v7 and v9 are assigned
+; to the same register, so v9 doesn't need to be moved before the brnz.
+;
+; This ended up confusong the constraint solver which had not made a record of
+; the fixed register assignment for v9 since it was already in the correct
+; register.
+function %pr147(i32) -> i32 system_v {
+ebb0(v0: i32):
+    v1 = iconst.i32 0
+    v2 = iconst.i32 1
+    v3 = iconst.i32 0
+    jump ebb2(v3, v2, v0)
+
+ebb2(v4: i32, v5: i32, v7: i32):
+    ; check: ebb2
+    v6 = iadd v4, v5
+    v8 = iconst.i32 -1
+    ; v7 is killed here and v9 gets the same register.
+    v9 = iadd v7, v8
+    ; check:   v9 = iadd v7, v8
+    ; Here v9 the brnz control appears to interfere with v9 the EBB argument,
+    ; so divert_fixed_input_conflicts() calls add_var(v9), which is ok. The
+    ; add_var sanity checks got confused when no fixed assignment could be
+    ; found for v9.
+    ;
+    ; We should be able to handle this situation without making copies of v9.
+    brnz v9, ebb2(v5, v6, v9)
+    ; check:   brnz v9, $(splitEdge=$EBB)
+    jump ebb3
+
+    ; check: $splitEdge:
+    ; check:   jump ebb2($V, $V, v9)
+ebb3:
+    return v5
+}
+
+function %select_i64(i64, i64, i32) -> i64 {
+ebb0(v0: i64, v1: i64, v2: i32):
+    v3 = select v2, v0, v1
+    return v3
+}
--- a/cranelift/filetests/filetests/regalloc/x86-regres.clif
+++ b/cranelift/filetests/filetests/regalloc/x86-regres.clif
@@ -1,6 +1,6 @@
 test regalloc
-
 target i686
+feature !"basic-blocks"

 ; regex: V=v\d+