diff --git a/cranelift/codegen/src/context.rs b/cranelift/codegen/src/context.rs index 3953f7e4d4..aeec864d79 100644 --- a/cranelift/codegen/src/context.rs +++ b/cranelift/codegen/src/context.rs @@ -318,7 +318,7 @@ impl Context { /// Run the register allocator. pub fn regalloc(&mut self, isa: &dyn TargetIsa) -> CodegenResult<()> { self.regalloc - .run(isa, &mut self.func, &self.cfg, &mut self.domtree) + .run(isa, &mut self.func, &mut self.cfg, &mut self.domtree) } /// Insert prologue and epilogues after computing the stack frame layout. diff --git a/cranelift/codegen/src/regalloc/branch_splitting.rs b/cranelift/codegen/src/regalloc/branch_splitting.rs new file mode 100644 index 0000000000..e44f452296 --- /dev/null +++ b/cranelift/codegen/src/regalloc/branch_splitting.rs @@ -0,0 +1,197 @@ +//! Split the outgoing edges of conditional branches that pass parameters. +//! +//! One of the reason for splitting edges is to be able to insert `copy` and `regmove` instructions +//! between a conditional branch and the following terminator. +#![cfg(feature = "basic-blocks")] + +use std::vec::Vec; + +use crate::cursor::{Cursor, EncCursor}; +use crate::dominator_tree::DominatorTree; +use crate::flowgraph::ControlFlowGraph; +use crate::ir::{Ebb, Function, Inst, InstBuilder, InstructionData, Opcode, ValueList}; +use crate::isa::TargetIsa; +use crate::topo_order::TopoOrder; + +pub fn run( + isa: &dyn TargetIsa, + func: &mut Function, + cfg: &mut ControlFlowGraph, + domtree: &mut DominatorTree, + topo: &mut TopoOrder, +) { + let mut ctx = Context { + has_new_blocks: false, + has_fallthrough_return: None, + cur: EncCursor::new(func, isa), + domtree, + topo, + cfg, + }; + ctx.run() +} + +struct Context<'a> { + /// True if new blocks were inserted. + has_new_blocks: bool, + + /// Record whether newly inserted empty blocks should be inserted last, or before the last, to + /// avoid disturbing the expected control flow of `fallthroug_return` statements. + /// + /// This value is computed when needed. The Option wraps the computed value if any. + has_fallthrough_return: Option, + + /// Current instruction as well as reference to function and ISA. + cur: EncCursor<'a>, + + /// References to contextual data structures we need. + domtree: &'a mut DominatorTree, + topo: &'a mut TopoOrder, + cfg: &'a mut ControlFlowGraph, +} + +impl<'a> Context<'a> { + fn run(&mut self) { + // Any ebb order will do. + self.topo.reset(self.cur.func.layout.ebbs()); + while let Some(ebb) = self.topo.next(&self.cur.func.layout, self.domtree) { + // Branches can only be at the last or second to last position in an extended basic + // block. + self.cur.goto_last_inst(ebb); + let terminator_inst = self.cur.current_inst().expect("terminator"); + if let Some(inst) = self.cur.prev_inst() { + let opcode = self.cur.func.dfg[inst].opcode(); + if opcode.is_branch() { + self.visit_conditional_branch(inst, opcode); + self.cur.goto_inst(terminator_inst); + self.visit_terminator_branch(terminator_inst); + } + } + } + + // If blocks were added the cfg and domtree are inconsistent and must be recomputed. + if self.has_new_blocks { + self.cfg.compute(&self.cur.func); + self.domtree.compute(&self.cur.func, self.cfg); + } + } + + fn visit_conditional_branch(&mut self, branch: Inst, opcode: Opcode) { + // TODO: target = dfg[branch].branch_destination().expect("conditional branch"); + let target = match self.cur.func.dfg[branch] { + InstructionData::Branch { destination, .. } + | InstructionData::BranchIcmp { destination, .. } + | InstructionData::BranchInt { destination, .. } + | InstructionData::BranchFloat { destination, .. } => destination, + _ => panic!("Unexpected instruction in visit_conditional_branch"), + }; + + // If there are any parameters, split the edge. + if self.should_split_edge(target) { + // Create the block the branch will jump to. + let new_ebb = self.make_empty_ebb(); + + // Extract the arguments of the branch instruction, split the Ebb parameters and the + // branch arguments + let num_fixed = opcode.constraints().num_fixed_value_arguments(); + let dfg = &mut self.cur.func.dfg; + let old_args: Vec<_> = { + let args = dfg[branch].take_value_list().expect("ebb parameters"); + args.as_slice(&dfg.value_lists).iter().map(|x| *x).collect() + }; + let (branch_args, ebb_params) = old_args.split_at(num_fixed); + + // Replace the branch destination by the new Ebb created with no parameters, and restore + // the branch arguments, without the original Ebb parameters. + { + let branch_args = ValueList::from_slice(branch_args, &mut dfg.value_lists); + let data = &mut dfg[branch]; + *data.branch_destination_mut().expect("branch") = new_ebb; + data.put_value_list(branch_args); + } + let ok = self.cur.func.update_encoding(branch, self.cur.isa).is_ok(); + debug_assert!(ok); + + // Insert a jump to the original target with its arguments into the new block. + self.cur.goto_first_insertion_point(new_ebb); + self.cur.ins().jump(target, ebb_params); + + // Reset the cursor to point to the branch. + self.cur.goto_inst(branch); + } + } + + fn visit_terminator_branch(&mut self, inst: Inst) { + let inst_data = &self.cur.func.dfg[inst]; + let opcode = inst_data.opcode(); + if opcode != Opcode::Jump && opcode != Opcode::Fallthrough { + // This opcode is ignored as it does not have any EBB parameters. + if opcode != Opcode::IndirectJumpTableBr { + debug_assert!(!opcode.is_branch()) + } + return; + } + + let target = match inst_data { + InstructionData::Jump { destination, .. } => destination, + _ => panic!( + "Unexpected instruction {} in visit_terminator_branch", + self.cur.display_inst(inst) + ), + }; + debug_assert!(self.cur.func.dfg[inst].opcode().is_terminator()); + + // If there are any parameters, split the edge. + if self.should_split_edge(*target) { + // Create the block the branch will jump to. + let new_ebb = self.cur.func.dfg.make_ebb(); + self.has_new_blocks = true; + + // Split the current block before its terminator, and insert a new jump instruction to + // jump to it. + let jump = self.cur.ins().jump(new_ebb, &[]); + self.cur.insert_ebb(new_ebb); + + // Reset the cursor to point to new terminator of the old ebb. + self.cur.goto_inst(jump); + } + } + + // A new ebb must be inserted before the last ebb because the last ebb may have a + // fallthrough_return and can't have anything after it. + fn make_empty_ebb(&mut self) -> Ebb { + let last_ebb = self.cur.layout().last_ebb().unwrap(); + if self.has_fallthrough_return == None { + let last_inst = self.cur.layout().last_inst(last_ebb).unwrap(); + self.has_fallthrough_return = + Some(self.cur.func.dfg[last_inst].opcode() == Opcode::FallthroughReturn); + } + let new_ebb = self.cur.func.dfg.make_ebb(); + if self.has_fallthrough_return == Some(true) { + // Insert before the last block which has a fallthrough_return + // instruction. + self.cur.layout_mut().insert_ebb(new_ebb, last_ebb); + } else { + // Insert after the last block. + self.cur.layout_mut().insert_ebb_after(new_ebb, last_ebb); + } + self.has_new_blocks = true; + new_ebb + } + + /// Returns whether we should introduce a new branch. + fn should_split_edge(&self, target: Ebb) -> bool { + // We should split the edge if the target has any parameters. + if self.cur.func.dfg.ebb_params(target).len() > 0 { + return true; + }; + + // Or, if the target has more than one block reaching it. + debug_assert!(self.cfg.pred_iter(target).next() != None); + if let Some(_) = self.cfg.pred_iter(target).skip(1).next() { + return true; + }; + + false + } +} diff --git a/cranelift/codegen/src/regalloc/context.rs b/cranelift/codegen/src/regalloc/context.rs index fe2e702647..84fe139d87 100644 --- a/cranelift/codegen/src/regalloc/context.rs +++ b/cranelift/codegen/src/regalloc/context.rs @@ -8,6 +8,8 @@ use crate::dominator_tree::DominatorTree; use crate::flowgraph::ControlFlowGraph; use crate::ir::Function; use crate::isa::TargetIsa; +#[cfg(feature = "basic-blocks")] +use crate::regalloc::branch_splitting; use crate::regalloc::coalescing::Coalescing; use crate::regalloc::coloring::Coloring; use crate::regalloc::live_value_tracker::LiveValueTracker; @@ -78,7 +80,7 @@ impl Context { &mut self, isa: &dyn TargetIsa, func: &mut Function, - cfg: &ControlFlowGraph, + cfg: &mut ControlFlowGraph, domtree: &mut DominatorTree, ) -> CodegenResult<()> { let _tt = timing::regalloc(); @@ -93,6 +95,12 @@ impl Context { // phases. self.tracker.clear(); + // Pass: Split branches, add space where to add copy & regmove instructions. + #[cfg(feature = "basic-blocks")] + { + branch_splitting::run(isa, func, cfg, domtree, &mut self.topo); + } + // Pass: Liveness analysis. self.liveness.compute(isa, func, cfg); diff --git a/cranelift/codegen/src/regalloc/mod.rs b/cranelift/codegen/src/regalloc/mod.rs index 4df7cc9cef..37fcccb3b0 100644 --- a/cranelift/codegen/src/regalloc/mod.rs +++ b/cranelift/codegen/src/regalloc/mod.rs @@ -10,6 +10,7 @@ pub mod register_set; pub mod virtregs; mod affinity; +mod branch_splitting; mod coalescing; mod context; mod diversion; diff --git a/cranelift/filetests/filetests/regalloc/coalesce-bb.clif b/cranelift/filetests/filetests/regalloc/coalesce-bb.clif new file mode 100644 index 0000000000..384ef2ca4c --- /dev/null +++ b/cranelift/filetests/filetests/regalloc/coalesce-bb.clif @@ -0,0 +1,158 @@ +test regalloc +target riscv32 +feature "basic-blocks" + +; Test the coalescer. +; regex: V=v\d+ +; regex: WS=\s+ +; regex: LOC=%\w+ +; regex: EBB=ebb\d+ + +; This function is already CSSA, so no copies should be inserted. +function %cssa(i32) -> i32 { +ebb0(v0: i32): + ; not: copy + ; v0 is used by the branch and passed as an arg - that's no conflict. + brnz v0, ebb1(v0) + jump ebb2 + +ebb2: + ; v0 is live across the branch above. That's no conflict. + v1 = iadd_imm v0, 7 + jump ebb1(v1) + +ebb1(v10: i32): + v11 = iadd_imm v10, 7 + return v11 +} + +function %trivial(i32) -> i32 { +ebb0(v0: i32): + ; check: brnz v0, $(splitEdge=$EBB) + brnz v0, ebb1(v0) + jump ebb2 + +ebb2: + ; not: copy + v1 = iadd_imm v0, 7 + jump ebb1(v1) + + ; check: $splitEdge: + ; nextln: $(cp1=$V) = copy.i32 v0 + ; nextln: jump ebb1($cp1) + +ebb1(v10: i32): + ; Use v0 in the destination EBB causes a conflict. + v11 = iadd v10, v0 + return v11 +} + +; A value is used as an SSA argument twice in the same branch. +function %dualuse(i32) -> i32 { +ebb0(v0: i32): + ; check: brnz v0, $(splitEdge=$EBB) + brnz v0, ebb1(v0, v0) + jump ebb2 + +ebb2: + v1 = iadd_imm v0, 7 + v2 = iadd_imm v1, 56 + jump ebb1(v1, v2) + + ; check: $splitEdge: + ; check: $(cp1=$V) = copy.i32 v0 + ; nextln: jump ebb1($cp1, v0) + +ebb1(v10: i32, v11: i32): + v12 = iadd v10, v11 + return v12 +} + +; Interference away from the branch +; The interference can be broken with a copy at either branch. +function %interference(i32) -> i32 { +ebb0(v0: i32): + ; not: copy + ; check: brnz v0, $(splitEdge=$EBB) + ; not: copy + brnz v0, ebb1(v0) + jump ebb2 + +ebb2: + v1 = iadd_imm v0, 7 + ; v1 and v0 interfere here: + v2 = iadd_imm v0, 8 + ; check: $(cp0=$V) = copy v1 + ; check: jump ebb1($cp0) + jump ebb1(v1) + + ; check: $splitEdge: + ; not: copy + ; nextln: jump ebb1(v0) + +ebb1(v10: i32): + ; not: copy + v11 = iadd_imm v10, 7 + return v11 +} + +; A loop where one induction variable is used as a backedge argument. +function %fibonacci(i32) -> i32 { +ebb0(v0: i32): + v1 = iconst.i32 1 + v2 = iconst.i32 2 + jump ebb1(v1, v2) + +ebb1(v10: i32, v11: i32): + ; v11 needs to be isolated because it interferes with v10. + ; check: ebb1(v10: i32 [$LOC], $(nv11a=$V): i32 [$LOC]) + ; check: v11 = copy $nv11a + v12 = iadd v10, v11 + v13 = icmp ult v12, v0 + ; check: brnz v13, $(splitEdge=$EBB) + brnz v13, ebb1(v11, v12) + jump ebb2 + + ; check: $splitEdge: + ; check: $(nv11b=$V) = copy.i32 v11 + ; not: copy + ; check: jump ebb1($nv11b, v12) + +ebb2: + return v12 +} + +; Function arguments passed on the stack aren't allowed to be part of a virtual +; register, at least for now. This is because the other values in the virtual +; register would need to be spilled to the incoming_arg stack slot which we treat +; as belonging to the caller. +function %stackarg(i32, i32, i32, i32, i32, i32, i32, i32, i32) -> i32 { +; check: ss0 = incoming_arg 4 +; not: incoming_arg +ebb0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i32, v5: i32, v6: i32, v7: i32, v8: i32): + ; check: fill v8 + ; not: v8 + jump ebb1(v8) + +ebb1(v10: i32): + v11 = iadd_imm v10, 1 + return v11 +} + +function %gvn_unremovable_phi(i32) system_v { +ebb0(v0: i32): + v2 = iconst.i32 0 + jump ebb2(v2, v0) + +ebb2(v3: i32, v4: i32): + brnz v3, ebb2(v3, v4) + jump ebb3 + +ebb3: + v5 = iconst.i32 1 + brnz v3, ebb2(v2, v5) + jump ebb4 + +ebb4: + return +} diff --git a/cranelift/filetests/filetests/regalloc/coalesce.clif b/cranelift/filetests/filetests/regalloc/coalesce.clif index fa9f64ba4d..c78219ea53 100644 --- a/cranelift/filetests/filetests/regalloc/coalesce.clif +++ b/cranelift/filetests/filetests/regalloc/coalesce.clif @@ -1,5 +1,6 @@ test regalloc target riscv32 +feature !"basic-blocks" ; Test the coalescer. ; regex: V=v\d+ diff --git a/cranelift/filetests/filetests/regalloc/reload-208-bb.clif b/cranelift/filetests/filetests/regalloc/reload-208-bb.clif new file mode 100644 index 0000000000..47438d7d61 --- /dev/null +++ b/cranelift/filetests/filetests/regalloc/reload-208-bb.clif @@ -0,0 +1,111 @@ +test regalloc +target x86_64 haswell +feature "basic-blocks" + +; regex: V=v\d+ +; regex: EBB=ebb\d+ + +; Filed as https://github.com/CraneStation/cranelift/issues/208 +; +; The verifier complains about a branch argument that is not in the same virtual register as the +; corresponding EBB argument. +; +; The problem was the reload pass rewriting EBB arguments on "brnz v9, ebb3(v9)" + +function %pr208(i64 vmctx [%rdi]) system_v { + gv1 = vmctx + gv0 = iadd_imm.i64 gv1, -8 + heap0 = static gv0, min 0, bound 0x5000, offset_guard 0x0040_0000 + sig0 = (i64 vmctx [%rdi]) -> i32 [%rax] system_v + sig1 = (i64 vmctx [%rdi], i32 [%rsi]) system_v + fn0 = u0:1 sig0 + fn1 = u0:3 sig1 + +ebb0(v0: i64): + v1 = iconst.i32 0 + v2 = call fn0(v0) + v20 = iconst.i32 0x4ffe + v16 = icmp uge v2, v20 + brz v16, ebb5 + jump ebb9 + +ebb9: + trap heap_oob + +ebb5: + v17 = uextend.i64 v2 + v18 = iadd_imm.i64 v0, -8 + v19 = load.i64 v18 + v3 = iadd v19, v17 + v4 = load.i32 v3 + v21 = iconst.i32 0 + v5 = icmp eq v4, v21 + v6 = bint.i32 v5 + brnz v6, ebb2 + jump ebb3(v4) + +ebb3(v7: i32): + call fn1(v0, v7) + v26 = iconst.i32 0x4ffe + v22 = icmp uge v7, v26 + brz v22, ebb6 + jump ebb10 + +ebb10: + trap heap_oob + +ebb6: + v23 = uextend.i64 v7 + v24 = iadd_imm.i64 v0, -8 + v25 = load.i64 v24 + v8 = iadd v25, v23 + v9 = load.i32 v8+56 + ; check: v9 = spill + ; check: brnz $V, $(splitEdge=$EBB) + brnz v9, ebb3(v9) + jump ebb4 + + ; check: $splitEdge: + ; nextln: jump ebb3(v9) + +ebb4: + jump ebb2 + +ebb2: + v10 = iconst.i32 0 + v31 = iconst.i32 0x4ffe + v27 = icmp uge v10, v31 + brz v27, ebb7 + jump ebb11 + +ebb11: + trap heap_oob + +ebb7: + v28 = uextend.i64 v10 + v29 = iadd_imm.i64 v0, -8 + v30 = load.i64 v29 + v11 = iadd v30, v28 + v12 = load.i32 v11+12 + call fn1(v0, v12) + v13 = iconst.i32 0 + v36 = iconst.i32 0x4ffe + v32 = icmp uge v13, v36 + brz v32, ebb8 + jump ebb12 + +ebb12: + trap heap_oob + +ebb8: + v33 = uextend.i64 v13 + v34 = iadd_imm.i64 v0, -8 + v35 = load.i64 v34 + v14 = iadd v35, v33 + v15 = load.i32 v14+12 + call fn1(v0, v15) + jump ebb1 + +ebb1: + return +} diff --git a/cranelift/filetests/filetests/regalloc/reload-208.clif b/cranelift/filetests/filetests/regalloc/reload-208.clif index 0c65ffb1e8..c767670252 100644 --- a/cranelift/filetests/filetests/regalloc/reload-208.clif +++ b/cranelift/filetests/filetests/regalloc/reload-208.clif @@ -1,5 +1,6 @@ test regalloc target x86_64 haswell +feature !"basic-blocks" ; regex: V=v\d+ diff --git a/cranelift/filetests/filetests/regalloc/x86-regres-bb.clif b/cranelift/filetests/filetests/regalloc/x86-regres-bb.clif new file mode 100644 index 0000000000..28eba22a64 --- /dev/null +++ b/cranelift/filetests/filetests/regalloc/x86-regres-bb.clif @@ -0,0 +1,49 @@ +test regalloc +target i686 +feature "basic-blocks" + +; regex: V=v\d+ +; regex: EBB=ebb\d+ + +; The value v9 appears both as the branch control and one of the EBB arguments +; in the brnz instruction in ebb2. It also happens that v7 and v9 are assigned +; to the same register, so v9 doesn't need to be moved before the brnz. +; +; This ended up confusong the constraint solver which had not made a record of +; the fixed register assignment for v9 since it was already in the correct +; register. +function %pr147(i32) -> i32 system_v { +ebb0(v0: i32): + v1 = iconst.i32 0 + v2 = iconst.i32 1 + v3 = iconst.i32 0 + jump ebb2(v3, v2, v0) + +ebb2(v4: i32, v5: i32, v7: i32): + ; check: ebb2 + v6 = iadd v4, v5 + v8 = iconst.i32 -1 + ; v7 is killed here and v9 gets the same register. + v9 = iadd v7, v8 + ; check: v9 = iadd v7, v8 + ; Here v9 the brnz control appears to interfere with v9 the EBB argument, + ; so divert_fixed_input_conflicts() calls add_var(v9), which is ok. The + ; add_var sanity checks got confused when no fixed assignment could be + ; found for v9. + ; + ; We should be able to handle this situation without making copies of v9. + brnz v9, ebb2(v5, v6, v9) + ; check: brnz v9, $(splitEdge=$EBB) + jump ebb3 + + ; check: $splitEdge: + ; check: jump ebb2($V, $V, v9) +ebb3: + return v5 +} + +function %select_i64(i64, i64, i32) -> i64 { +ebb0(v0: i64, v1: i64, v2: i32): + v3 = select v2, v0, v1 + return v3 +} diff --git a/cranelift/filetests/filetests/regalloc/x86-regres.clif b/cranelift/filetests/filetests/regalloc/x86-regres.clif index 66c3d45c2e..ac6e82d66f 100644 --- a/cranelift/filetests/filetests/regalloc/x86-regres.clif +++ b/cranelift/filetests/filetests/regalloc/x86-regres.clif @@ -1,6 +1,6 @@ test regalloc - target i686 +feature !"basic-blocks" ; regex: V=v\d+