diff --git a/cranelift/filetests/licm/basic.cton b/cranelift/filetests/licm/basic.cton new file mode 100644 index 0000000000..637b910f53 --- /dev/null +++ b/cranelift/filetests/licm/basic.cton @@ -0,0 +1,31 @@ +test licm + +function simple_loop(i32) -> i32 { + +ebb1(v0: i32): + v1 = iconst.i32 1 + v2 = iconst.i32 2 + v3 = iadd v1, v2 + brz v0, ebb2(v0) + v4 = isub v0, v1 + jump ebb1(v4) + +ebb2(v5: i32): + return v5 + +} +; sameln: function simple_loop(i32) -> i32 { +; nextln: ebb2(v6: i32): +; nextln: v1 = iconst.i32 1 +; nextln: v2 = iconst.i32 2 +; nextln: v3 = iadd v1, v2 +; nextln: jump ebb0(v6) +; nextln: +; nextln: ebb0(v0: i32): +; nextln: brz v0, ebb1(v0) +; nextln: v4 = isub v0, v1 +; nextln: jump ebb0(v4) +; nextln: +; nextln: ebb1(v5: i32): +; nextln: return v5 +; nextln: } diff --git a/cranelift/filetests/licm/complex.cton b/cranelift/filetests/licm/complex.cton new file mode 100644 index 0000000000..fead0cf746 --- /dev/null +++ b/cranelift/filetests/licm/complex.cton @@ -0,0 +1,81 @@ +test licm + +function complex(i32) -> i32 { + +ebb0(v0: i32): + v1 = iconst.i32 1 + v19 = iconst.i32 4 + v2 = iadd v1, v0 + brz v0, ebb1(v1) + jump ebb3(v2) + +ebb1(v3: i32): + v4 = iconst.i32 2 + v5 = iadd v3, v2 + v6 = iadd v4, v0 + jump ebb2(v6) + +ebb2(v7: i32): + v8 = iadd v7, v3 + v9 = iadd v0, v2 + brz v0, ebb1(v7) + jump ebb5(v8) + +ebb3(v10: i32): + v11 = iconst.i32 3 + v12 = iadd v10, v11 + v13 = iadd v2, v11 + jump ebb4(v11) + +ebb4(v14: i32): + v15 = iadd v12, v2 + brz v0, ebb3(v14) + jump ebb5(v14) + +ebb5(v16: i32): + v17 = iadd v16, v1 + v18 = iadd v1, v19 + brz v0, ebb0(v18) + return v17 +} + +; sameln: function complex(i32) -> i32 { +; nextln: ebb6(v20: i32): +; nextln: v1 = iconst.i32 1 +; nextln: v2 = iconst.i32 4 +; nextln: v5 = iconst.i32 2 +; nextln: v12 = iconst.i32 3 +; nextln: v19 = iadd v1, v2 +; nextln: jump ebb0(v20) +; nextln: +; nextln: ebb0(v0: i32): +; nextln: v3 = iadd.i32 v1, v0 +; nextln: v7 = iadd.i32 v5, v0 +; nextln: v10 = iadd v0, v3 +; nextln: brz v0, ebb1(v1) +; nextln: v14 = iadd v3, v12 +; nextln: jump ebb3(v3) +; nextln: +; nextln: ebb1(v4: i32): +; nextln: v6 = iadd v4, v3 +; nextln: jump ebb2(v7) +; nextln: +; nextln: ebb2(v8: i32): +; nextln: v9 = iadd v8, v4 +; nextln: brz.i32 v0, ebb1(v8) +; nextln: jump ebb5(v9) +; nextln: +; nextln: ebb3(v11: i32): +; nextln: v13 = iadd v11, v12 +; nextln: jump ebb4(v12) +; nextln: +; nextln: ebb4(v15: i32): +; nextln: v16 = iadd.i32 v13, v3 +; nextln: brz.i32 v0, ebb3(v15) +; nextln: jump ebb5(v15) +; nextln: +; nextln: ebb5(v17: i32): +; nextln: v18 = iadd v17, v1 +; nextln: brz.i32 v0, ebb0(v19) +; nextln: return v18 +; nextln: } diff --git a/cranelift/filetests/licm/multiple-blocks.cton b/cranelift/filetests/licm/multiple-blocks.cton new file mode 100644 index 0000000000..54db640501 --- /dev/null +++ b/cranelift/filetests/licm/multiple-blocks.cton @@ -0,0 +1,46 @@ +test licm + +function multiple_blocks(i32) -> i32 { + +ebb0(v0: i32): + jump ebb1(v0) + +ebb1(v10: i32): + v11 = iconst.i32 1 + v12 = iconst.i32 2 + v13 = iadd v11, v12 + brz v10, ebb2(v10) + v15 = isub v10, v11 + brz v15, ebb3(v15) + v14 = isub v10, v11 + jump ebb1(v14) + +ebb2(v20: i32): + return v20 + +ebb3(v30: i32): + v31 = iadd v11, v13 + jump ebb1(v30) + +} +; sameln:function multiple_blocks(i32) -> i32 { +; nextln: ebb0(v0: i32): +; nextln: v2 = iconst.i32 1 +; nextln: v3 = iconst.i32 2 +; nextln: v4 = iadd v2, v3 +; nextln: v9 = iadd v2, v4 +; nextln: jump ebb1(v0) +; nextln: +; nextln: ebb1(v1: i32): +; nextln: brz v1, ebb2(v1) +; nextln: v5 = isub v1, v2 +; nextln: brz v5, ebb3(v5) +; nextln: v6 = isub v1, v2 +; nextln: jump ebb1(v6) +; nextln: +; nextln: ebb2(v7: i32): +; nextln: return v7 +; nextln: +; nextln: ebb3(v8: i32): +; nextln: jump ebb1(v8) +; nextln: } diff --git a/cranelift/filetests/licm/nested_loops.cton b/cranelift/filetests/licm/nested_loops.cton new file mode 100644 index 0000000000..e2d3846a0f --- /dev/null +++ b/cranelift/filetests/licm/nested_loops.cton @@ -0,0 +1,52 @@ +test licm + +function nested_loops(i32) -> i32 { + +ebb0(v0: i32): + v1 = iconst.i32 1 + v2 = iconst.i32 2 + v3 = iadd v1, v2 + v4 = isub v0, v1 + jump ebb1(v4,v4) + +ebb1(v10: i32,v11: i32): + brz v11, ebb2(v10) + v12 = iconst.i32 1 + v15 = iadd v12, v4 + v13 = isub v11, v12 + jump ebb1(v10,v13) + +ebb2(v20: i32): + brz v20, ebb3(v20) + jump ebb0(v20) + +ebb3(v30: i32): + return v30 + +} + +; sameln:function nested_loops(i32) -> i32 { +; nextln: ebb4(v12: i32): +; nextln: v1 = iconst.i32 1 +; nextln: v2 = iconst.i32 2 +; nextln: v3 = iadd v1, v2 +; nextln: v7 = iconst.i32 1 +; nextln: jump ebb0(v12) +; nextln: +; nextln: ebb0(v0: i32): +; nextln: v4 = isub v0, v1 +; nextln: v8 = iadd.i32 v7, v4 +; nextln: jump ebb1(v4, v4) +; nextln: +; nextln: ebb1(v5: i32, v6: i32): +; nextln: brz v6, ebb2(v5) +; nextln: v9 = isub v6, v7 +; nextln: jump ebb1(v5, v9) +; nextln: +; nextln: ebb2(v10: i32): +; nextln: brz v10, ebb3(v10) +; nextln: jump ebb0(v10) +; nextln: +; nextln: ebb3(v11: i32): +; nextln: return v11 +; nextln: } diff --git a/cranelift/src/filetest/licm.rs b/cranelift/src/filetest/licm.rs new file mode 100644 index 0000000000..dcde7dd7be --- /dev/null +++ b/cranelift/src/filetest/licm.rs @@ -0,0 +1,51 @@ +//! Test command for testing the LICM pass. +//! +//! The `licm` test command runs each function through the LICM pass after ensuring +//! that all instructions are legal for the target. +//! +//! The resulting function is sent to `filecheck`. + +use cretonne::ir::Function; +use cretonne; +use cton_reader::TestCommand; +use filetest::subtest::{SubTest, Context, Result, run_filecheck}; +use std::borrow::Cow; +use std::fmt::Write; +use utils::pretty_error; + +struct TestLICM; + +pub fn subtest(parsed: &TestCommand) -> Result> { + assert_eq!(parsed.command, "licm"); + if !parsed.options.is_empty() { + Err(format!("No options allowed on {}", parsed)) + } else { + Ok(Box::new(TestLICM)) + } +} + +impl SubTest for TestLICM { + fn name(&self) -> Cow { + Cow::from("licm") + } + + fn is_mutating(&self) -> bool { + true + } + + fn run(&self, func: Cow, context: &Context) -> Result<()> { + // Create a compilation context, and drop in the function. + let mut comp_ctx = cretonne::Context::new(); + comp_ctx.func = func.into_owned(); + + comp_ctx.flowgraph(); + comp_ctx + .licm() + .map_err(|e| pretty_error(&comp_ctx.func, e))?; + + let mut text = String::new(); + write!(&mut text, "{}", &comp_ctx.func) + .map_err(|e| e.to_string())?; + run_filecheck(&text, context) + } +} diff --git a/cranelift/src/filetest/mod.rs b/cranelift/src/filetest/mod.rs index 961c3ff2f2..9c03cc6d4a 100644 --- a/cranelift/src/filetest/mod.rs +++ b/cranelift/src/filetest/mod.rs @@ -17,6 +17,7 @@ mod binemit; mod concurrent; mod domtree; mod legalizer; +mod licm; mod regalloc; mod runner; mod runone; @@ -61,6 +62,7 @@ fn new_subtest(parsed: &TestCommand) -> subtest::Result> { "domtree" => domtree::subtest(parsed), "verifier" => verifier::subtest(parsed), "legalizer" => legalizer::subtest(parsed), + "licm" => licm::subtest(parsed), "regalloc" => regalloc::subtest(parsed), "binemit" => binemit::subtest(parsed), "simple-gvn" => simple_gvn::subtest(parsed), diff --git a/lib/cretonne/src/context.rs b/lib/cretonne/src/context.rs index 9b0b3b31a7..a67a4bde0e 100644 --- a/lib/cretonne/src/context.rs +++ b/lib/cretonne/src/context.rs @@ -19,6 +19,7 @@ use regalloc; use result::CtonResult; use verifier; use simple_gvn::do_simple_gvn; +use licm::do_licm; /// Persistent data structures and compilation pipeline. pub struct Context { @@ -92,6 +93,15 @@ impl Context { self.verify(None).map_err(Into::into) } + /// Perform LICM on the function. + pub fn licm(&mut self) -> CtonResult { + do_licm(&mut self.func, + &mut self.cfg, + &mut self.domtree, + &mut self.loop_analysis); + self.verify(None).map_err(Into::into) + } + /// Run the register allocator. pub fn regalloc(&mut self, isa: &TargetIsa) -> CtonResult { self.regalloc diff --git a/lib/cretonne/src/lib.rs b/lib/cretonne/src/lib.rs index 5ac263d4ff..6ac2f7993b 100644 --- a/lib/cretonne/src/lib.rs +++ b/lib/cretonne/src/lib.rs @@ -32,6 +32,7 @@ mod constant_hash; mod context; mod iterators; mod legalizer; +mod licm; mod packed_option; mod partition_slice; mod predicates; diff --git a/lib/cretonne/src/licm.rs b/lib/cretonne/src/licm.rs new file mode 100644 index 0000000000..fc4b7e251b --- /dev/null +++ b/lib/cretonne/src/licm.rs @@ -0,0 +1,208 @@ +//! A Loop Invariant Code Motion optimization pass + +use ir::{Function, Ebb, Inst, Value, Cursor, Type, InstBuilder, Layout}; +use flowgraph::ControlFlowGraph; +use std::collections::HashSet; +use dominator_tree::DominatorTree; +use entity_list::{EntityList, ListPool}; +use loop_analysis::{Loop, LoopAnalysis}; + +/// Performs the LICM pass by detecting loops within the CFG and moving +/// loop-invariant instructions out of them. +/// Changes the CFG and domtree in-place during the operation. +pub fn do_licm(func: &mut Function, + cfg: &mut ControlFlowGraph, + domtree: &mut DominatorTree, + loop_analysis: &mut LoopAnalysis) { + loop_analysis.compute(func, cfg, domtree); + for lp in loop_analysis.loops() { + // For each loop that we want to optimize we determine the set of loop-invariant + // instructions + let invariant_inst = remove_loop_invariant_instructions(lp, func, cfg, loop_analysis); + // Then we create the loop's pre-header and fill it with the invariant instructions + // Then we remove the invariant instructions from the loop body + if invariant_inst.len() > 0 { + // If the loop has a natural pre-header we use it, otherwise we create it. + let mut pos; + match has_pre_header(&func.layout, + cfg, + domtree, + loop_analysis.loop_header(lp).clone()) { + None => { + let pre_header = create_pre_header(loop_analysis.loop_header(lp).clone(), + func, + cfg, + domtree); + pos = Cursor::new(&mut func.layout); + pos.goto_bottom(pre_header); + pos.prev_inst(); + } + // If there is a natural pre-header we insert new instructions just before the + // related jumping instruction (which is not necessarily at the end). + Some((_, last_inst)) => { + pos = Cursor::new(&mut func.layout); + pos.goto_inst(last_inst); + } + }; + // The last instruction of the pre-header is the termination instruction (usually + // a jump) so we need to insert just before this. + for inst in invariant_inst.iter() { + pos.insert_inst(inst.clone()); + } + } + } + // We have to recompute the domtree to account for the changes + cfg.compute(func); + domtree.compute(func, cfg); +} + +// Insert a pre-header before the header, modifying the function layout and CFG to reflect it. +// A jump instruction to the header is placed at the end of the pre-header. +fn create_pre_header(header: Ebb, + func: &mut Function, + cfg: &mut ControlFlowGraph, + domtree: &DominatorTree) + -> Ebb { + let pool = &mut ListPool::::new(); + let header_args_values: Vec = func.dfg + .ebb_args(header) + .into_iter() + .map(|val| *val) + .collect(); + let header_args_types: Vec = header_args_values + .clone() + .into_iter() + .map(|val| func.dfg.value_type(val)) + .collect(); + let pre_header = func.dfg.make_ebb(); + let mut pre_header_args_value: EntityList = EntityList::new(); + for typ in header_args_types { + pre_header_args_value.push(func.dfg.append_ebb_arg(pre_header, typ), pool); + } + for &(_, last_inst) in cfg.get_predecessors(header) { + // We only follow normal edges (not the back edges) + if !domtree.ebb_dominates(header.clone(), last_inst, &func.layout) { + change_branch_jump_destination(last_inst, pre_header, func); + } + } + { + let mut pos = Cursor::new(&mut func.layout); + pos.goto_top(header); + // Inserts the pre-header at the right place in the layout. + pos.insert_ebb(pre_header); + pos.next_inst(); + func.dfg + .ins(&mut pos) + .jump(header, pre_header_args_value.as_slice(pool)); + } + pre_header +} + +// Detects if a loop header has a natural pre-header. +// +// A loop header has a pre-header if there is only one predecessor that the header doesn't +// dominate. +// Returns the pre-header Ebb and the instruction jumping to the header. +fn has_pre_header(layout: &Layout, + cfg: &ControlFlowGraph, + domtree: &DominatorTree, + header: Ebb) + -> Option<(Ebb, Inst)> { + let mut result = None; + let mut found = false; + for &(pred_ebb, last_inst) in cfg.get_predecessors(header) { + // We only count normal edges (not the back edges) + if !domtree.ebb_dominates(header.clone(), last_inst, layout) { + if found { + // We have already found one, there are more than one + return None; + } else { + result = Some((pred_ebb, last_inst)); + found = true; + } + } + } + result +} + + +// Change the destination of a jump or branch instruction. Does nothing if called with a non-jump +// or non-branch instruction. +fn change_branch_jump_destination(inst: Inst, new_ebb: Ebb, func: &mut Function) { + match func.dfg[inst].branch_destination_mut() { + None => (), + Some(instruction_dest) => *instruction_dest = new_ebb, + } +} + +// Traverses a loop in reverse post-order from a header EBB and identify lopp-invariant +// instructions. Theseloop-invariant instructions are then removed from the code and returned +// (in reverse post-order) for later use. +fn remove_loop_invariant_instructions(lp: Loop, + func: &mut Function, + cfg: &ControlFlowGraph, + loop_analysis: &LoopAnalysis) + -> Vec { + let mut loop_values: HashSet = HashSet::new(); + let mut invariant_inst: Vec = Vec::new(); + let mut pos = Cursor::new(&mut func.layout); + // We traverse the loop EBB in reverse post-order. + for ebb in postorder_ebbs_loop(loop_analysis, cfg, lp).iter().rev() { + // Arguments of the EBB are loop values + for val in func.dfg.ebb_args(*ebb) { + loop_values.insert(val.clone()); + } + pos.goto_top(*ebb); + while let Some(inst) = pos.next_inst() { + if func.dfg.has_results(inst) && + func.dfg + .inst_args(inst) + .into_iter() + .all(|arg| !loop_values.contains(arg)) { + // If all the instruction's argument are defined outside the loop + // then this instruction is loop-invariant + invariant_inst.push(inst); + // We remove it from the loop + pos.remove_inst(); + pos.prev_inst(); + } else { + // If the instruction is not loop-invariant we push its results in the set of + // loop values + for out in func.dfg.inst_results(inst) { + loop_values.insert(out.clone()); + } + } + } + } + invariant_inst +} + +/// Return ebbs from a loop in post-order, starting from an entry point in the block. +pub fn postorder_ebbs_loop(loop_analysis: &LoopAnalysis, + cfg: &ControlFlowGraph, + lp: Loop) + -> Vec { + let mut grey = HashSet::new(); + let mut black = HashSet::new(); + let mut stack = vec![loop_analysis.loop_header(lp).clone()]; + let mut postorder = Vec::new(); + + while !stack.is_empty() { + let node = stack.pop().unwrap(); + if !grey.contains(&node) { + // This is a white node. Mark it as gray. + grey.insert(node); + stack.push(node); + // Get any children we've never seen before. + for child in cfg.get_successors(node) { + if loop_analysis.is_in_loop(child.clone(), lp) && !grey.contains(child) { + stack.push(child.clone()); + } + } + } else if !black.contains(&node) { + postorder.push(node.clone()); + black.insert(node.clone()); + } + } + postorder +} diff --git a/lib/cretonne/src/loop_analysis.rs b/lib/cretonne/src/loop_analysis.rs index 1e36ced8f9..f67a18e9d0 100644 --- a/lib/cretonne/src/loop_analysis.rs +++ b/lib/cretonne/src/loop_analysis.rs @@ -129,13 +129,13 @@ impl LoopAnalysis { domtree: &DominatorTree, layout: &Layout) { // We traverse the CFg in reverse postorder - for ebb in cfg.postorder_ebbs().iter().rev() { - for &(_, pred_inst) in cfg.get_predecessors(*ebb) { + for &ebb in cfg.postorder_ebbs().iter().rev() { + for &(_, pred_inst) in cfg.get_predecessors(ebb) { // If the ebb dominates one of its predecessors it is a back edge - if domtree.ebb_dominates(ebb.clone(), pred_inst, layout) { + if domtree.ebb_dominates(ebb, pred_inst, layout) { // This ebb is a loop header, so we create its associated loop - let lp = self.loops.push(LoopData::new(*ebb, None)); - self.ebb_loop_map[*ebb] = lp.into(); + let lp = self.loops.push(LoopData::new(ebb, None)); + self.ebb_loop_map[ebb] = lp.into(); break; // We break because we only need one back edge to identify a loop header. }