LICM pass (#87)

* LICM pass * Uses loop analysis to detect loop tree * For each loop (starting with the inner ones), create a pre-header and move there loop-invariant instructions * An instruction is loop invariant if it does not use as argument a value defined earlier in the loop * File tests to check LICM's correctness * Optimized pre-header creation If the loop already has a natural pre-header, we use it instead of creating a new one. The natural pre-header of a loop is the only predecessor of the header it doesn't dominate.
2017-06-07 11:27:22 -07:00
parent 402cb8e1f6
commit e47f4a49fb
10 changed files with 487 additions and 5 deletions
--- a/cranelift/filetests/licm/basic.cton
+++ b/cranelift/filetests/licm/basic.cton
@@ -0,0 +1,31 @@
 test licm
 function simple_loop(i32) -> i32 {
 ebb1(v0: i32):
    v1 = iconst.i32 1
    v2 = iconst.i32 2
    v3 = iadd v1, v2
    brz v0, ebb2(v0)
    v4 = isub v0, v1
    jump ebb1(v4)
 ebb2(v5: i32):
    return v5
 }
 ; sameln: function simple_loop(i32) -> i32 {
 ; nextln: ebb2(v6: i32):
 ; nextln:     v1 = iconst.i32 1
 ; nextln:     v2 = iconst.i32 2
 ; nextln:     v3 = iadd v1, v2
 ; nextln:     jump ebb0(v6)
 ; nextln: 
 ; nextln: ebb0(v0: i32):
 ; nextln:     brz v0, ebb1(v0)
 ; nextln:     v4 = isub v0, v1
 ; nextln:     jump ebb0(v4)
 ; nextln: 
 ; nextln: ebb1(v5: i32):
 ; nextln:     return v5
 ; nextln: }
--- a/cranelift/filetests/licm/complex.cton
+++ b/cranelift/filetests/licm/complex.cton
@@ -0,0 +1,81 @@
 test licm
 function complex(i32) -> i32 {
 ebb0(v0: i32):
    v1 = iconst.i32 1
    v19 = iconst.i32 4
    v2 = iadd v1, v0
    brz v0, ebb1(v1)
    jump ebb3(v2)
 ebb1(v3: i32):
    v4 = iconst.i32 2
    v5 = iadd v3, v2
    v6 = iadd v4, v0
    jump ebb2(v6)
 ebb2(v7: i32):
    v8 = iadd v7, v3
    v9 = iadd v0, v2
    brz v0, ebb1(v7)
    jump ebb5(v8)
 ebb3(v10: i32):
    v11 = iconst.i32 3
    v12 = iadd v10, v11
    v13 = iadd v2, v11
    jump ebb4(v11)
 ebb4(v14: i32):
    v15 = iadd v12, v2
    brz v0, ebb3(v14)
    jump ebb5(v14)
 ebb5(v16: i32):
    v17 = iadd v16, v1
    v18 = iadd v1, v19
    brz v0, ebb0(v18)
    return v17
 }
 ; sameln: function complex(i32) -> i32 {
 ; nextln: ebb6(v20: i32):
 ; nextln:     v1 = iconst.i32 1
 ; nextln:     v2 = iconst.i32 4
 ; nextln:     v5 = iconst.i32 2
 ; nextln:     v12 = iconst.i32 3
 ; nextln:     v19 = iadd v1, v2
 ; nextln:     jump ebb0(v20)
 ; nextln: 
 ; nextln: ebb0(v0: i32):
 ; nextln:     v3 = iadd.i32 v1, v0
 ; nextln:     v7 = iadd.i32 v5, v0
 ; nextln:     v10 = iadd v0, v3
 ; nextln:     brz v0, ebb1(v1)
 ; nextln:     v14 = iadd v3, v12
 ; nextln:     jump ebb3(v3)
 ; nextln: 
 ; nextln: ebb1(v4: i32):
 ; nextln:     v6 = iadd v4, v3
 ; nextln:     jump ebb2(v7)
 ; nextln: 
 ; nextln: ebb2(v8: i32):
 ; nextln:     v9 = iadd v8, v4
 ; nextln:     brz.i32 v0, ebb1(v8)
 ; nextln:     jump ebb5(v9)
 ; nextln: 
 ; nextln: ebb3(v11: i32):
 ; nextln:     v13 = iadd v11, v12
 ; nextln:     jump ebb4(v12)
 ; nextln: 
 ; nextln: ebb4(v15: i32):
 ; nextln:     v16 = iadd.i32 v13, v3
 ; nextln:     brz.i32 v0, ebb3(v15)
 ; nextln:     jump ebb5(v15)
 ; nextln: 
 ; nextln: ebb5(v17: i32):
 ; nextln:     v18 = iadd v17, v1
 ; nextln:     brz.i32 v0, ebb0(v19)
 ; nextln:     return v18
 ; nextln: }
--- a/cranelift/filetests/licm/multiple-blocks.cton
+++ b/cranelift/filetests/licm/multiple-blocks.cton
@@ -0,0 +1,46 @@
 test licm
 function multiple_blocks(i32) -> i32 {
 ebb0(v0: i32):
    jump ebb1(v0)
 ebb1(v10: i32):
    v11 = iconst.i32 1
    v12 = iconst.i32 2
    v13 = iadd v11, v12
    brz v10, ebb2(v10)
    v15 = isub v10, v11
    brz v15, ebb3(v15)
    v14 = isub v10, v11
    jump ebb1(v14)
 ebb2(v20: i32):
    return v20
 ebb3(v30: i32):
    v31 = iadd v11, v13
    jump ebb1(v30)
 }
 ; sameln:function multiple_blocks(i32) -> i32 {
 ; nextln: ebb0(v0: i32):
 ; nextln:     v2 = iconst.i32 1
 ; nextln:     v3 = iconst.i32 2
 ; nextln:     v4 = iadd v2, v3
 ; nextln:     v9 = iadd v2, v4
 ; nextln:     jump ebb1(v0)
 ; nextln: 
 ; nextln: ebb1(v1: i32):
 ; nextln:     brz v1, ebb2(v1)
 ; nextln:     v5 = isub v1, v2
 ; nextln:     brz v5, ebb3(v5)
 ; nextln:     v6 = isub v1, v2
 ; nextln:     jump ebb1(v6)
 ; nextln: 
 ; nextln: ebb2(v7: i32):
 ; nextln:     return v7
 ; nextln: 
 ; nextln: ebb3(v8: i32):
 ; nextln:     jump ebb1(v8)
 ; nextln: }
--- a/cranelift/filetests/licm/nested_loops.cton
+++ b/cranelift/filetests/licm/nested_loops.cton
@@ -0,0 +1,52 @@
 test licm
 function nested_loops(i32) -> i32 {
 ebb0(v0: i32):
    v1 = iconst.i32 1
    v2 = iconst.i32 2
    v3 = iadd v1, v2
    v4 = isub v0, v1
    jump ebb1(v4,v4)
 ebb1(v10: i32,v11: i32):
    brz v11, ebb2(v10)
    v12 = iconst.i32 1
    v15 = iadd v12, v4
    v13 = isub v11, v12
    jump ebb1(v10,v13)
 ebb2(v20: i32):
    brz v20, ebb3(v20)
    jump ebb0(v20)
 ebb3(v30: i32):
    return v30
 }
 ; sameln:function nested_loops(i32) -> i32 {
 ; nextln: ebb4(v12: i32):
 ; nextln:     v1 = iconst.i32 1
 ; nextln:     v2 = iconst.i32 2
 ; nextln:     v3 = iadd v1, v2
 ; nextln:     v7 = iconst.i32 1
 ; nextln:     jump ebb0(v12)
 ; nextln: 
 ; nextln: ebb0(v0: i32):
 ; nextln:     v4 = isub v0, v1
 ; nextln:     v8 = iadd.i32 v7, v4
 ; nextln:     jump ebb1(v4, v4)
 ; nextln: 
 ; nextln: ebb1(v5: i32, v6: i32):
 ; nextln:     brz v6, ebb2(v5)
 ; nextln:     v9 = isub v6, v7
 ; nextln:     jump ebb1(v5, v9)
 ; nextln: 
 ; nextln: ebb2(v10: i32):
 ; nextln:     brz v10, ebb3(v10)
 ; nextln:     jump ebb0(v10)
 ; nextln: 
 ; nextln: ebb3(v11: i32):
 ; nextln:     return v11
 ; nextln: }
--- a/cranelift/src/filetest/licm.rs
+++ b/cranelift/src/filetest/licm.rs
@@ -0,0 +1,51 @@
 //! Test command for testing the LICM pass.
 //!
 //! The `licm` test command runs each function through the LICM pass after ensuring
 //! that all instructions are legal for the target.
 //!
 //! The resulting function is sent to `filecheck`.
 use cretonne::ir::Function;
 use cretonne;
 use cton_reader::TestCommand;
 use filetest::subtest::{SubTest, Context, Result, run_filecheck};
 use std::borrow::Cow;
 use std::fmt::Write;
 use utils::pretty_error;
 struct TestLICM;
 pub fn subtest(parsed: &TestCommand) -> Result<Box<SubTest>> {
    assert_eq!(parsed.command, "licm");
    if !parsed.options.is_empty() {
        Err(format!("No options allowed on {}", parsed))
    } else {
        Ok(Box::new(TestLICM))
    }
 }
 impl SubTest for TestLICM {
    fn name(&self) -> Cow<str> {
        Cow::from("licm")
    }
    fn is_mutating(&self) -> bool {
        true
    }
    fn run(&self, func: Cow<Function>, context: &Context) -> Result<()> {
        // Create a compilation context, and drop in the function.
        let mut comp_ctx = cretonne::Context::new();
        comp_ctx.func = func.into_owned();
        comp_ctx.flowgraph();
        comp_ctx
            .licm()
            .map_err(|e| pretty_error(&comp_ctx.func, e))?;
        let mut text = String::new();
        write!(&mut text, "{}", &comp_ctx.func)
            .map_err(|e| e.to_string())?;
        run_filecheck(&text, context)
    }
 }
--- a/cranelift/src/filetest/mod.rs
+++ b/cranelift/src/filetest/mod.rs
@@ -17,6 +17,7 @@ mod binemit;
 mod concurrent;
 mod domtree;
 mod legalizer;
 mod licm;
 mod regalloc;
 mod runner;
 mod runone;
@@ -61,6 +62,7 @@ fn new_subtest(parsed: &TestCommand) -> subtest::Result<Box<subtest::SubTest>> {
        "domtree" => domtree::subtest(parsed),
        "verifier" => verifier::subtest(parsed),
        "legalizer" => legalizer::subtest(parsed),
        "licm" => licm::subtest(parsed),
        "regalloc" => regalloc::subtest(parsed),
        "binemit" => binemit::subtest(parsed),
        "simple-gvn" => simple_gvn::subtest(parsed),
--- a/lib/cretonne/src/context.rs
+++ b/lib/cretonne/src/context.rs
@@ -19,6 +19,7 @@ use regalloc;
 use result::CtonResult;
 use verifier;
 use simple_gvn::do_simple_gvn;
 use licm::do_licm;
 /// Persistent data structures and compilation pipeline.
 pub struct Context {
@@ -92,6 +93,15 @@ impl Context {
        self.verify(None).map_err(Into::into)
    }
    /// Perform LICM on the function.
    pub fn licm(&mut self) -> CtonResult {
        do_licm(&mut self.func,
                &mut self.cfg,
                &mut self.domtree,
                &mut self.loop_analysis);
        self.verify(None).map_err(Into::into)
    }
    /// Run the register allocator.
    pub fn regalloc(&mut self, isa: &TargetIsa) -> CtonResult {
        self.regalloc
--- a/lib/cretonne/src/lib.rs
+++ b/lib/cretonne/src/lib.rs
@@ -32,6 +32,7 @@ mod constant_hash;
 mod context;
 mod iterators;
 mod legalizer;
 mod licm;
 mod packed_option;
 mod partition_slice;
 mod predicates;
--- a/lib/cretonne/src/licm.rs
+++ b/lib/cretonne/src/licm.rs
@@ -0,0 +1,208 @@
 //! A Loop Invariant Code Motion optimization pass
 use ir::{Function, Ebb, Inst, Value, Cursor, Type, InstBuilder, Layout};
 use flowgraph::ControlFlowGraph;
 use std::collections::HashSet;
 use dominator_tree::DominatorTree;
 use entity_list::{EntityList, ListPool};
 use loop_analysis::{Loop, LoopAnalysis};
 /// Performs the LICM pass by detecting loops within the CFG and moving
 /// loop-invariant instructions out of them.
 /// Changes the CFG and domtree in-place during the operation.
 pub fn do_licm(func: &mut Function,
               cfg: &mut ControlFlowGraph,
               domtree: &mut DominatorTree,
               loop_analysis: &mut LoopAnalysis) {
    loop_analysis.compute(func, cfg, domtree);
    for lp in loop_analysis.loops() {
        // For each loop that we want to optimize we determine the set of loop-invariant
        // instructions
        let invariant_inst = remove_loop_invariant_instructions(lp, func, cfg, loop_analysis);
        // Then we create the loop's pre-header and fill it with the invariant instructions
        // Then we remove the invariant instructions from the loop body
        if invariant_inst.len() > 0 {
            // If the loop has a natural pre-header we use it, otherwise we create it.
            let mut pos;
            match has_pre_header(&func.layout,
                                 cfg,
                                 domtree,
                                 loop_analysis.loop_header(lp).clone()) {
                None => {
                    let pre_header = create_pre_header(loop_analysis.loop_header(lp).clone(),
                                                       func,
                                                       cfg,
                                                       domtree);
                    pos = Cursor::new(&mut func.layout);
                    pos.goto_bottom(pre_header);
                    pos.prev_inst();
                }
                // If there is a natural pre-header we insert new instructions just before the
                // related jumping instruction (which is not necessarily at the end).
                Some((_, last_inst)) => {
                    pos = Cursor::new(&mut func.layout);
                    pos.goto_inst(last_inst);
                }
            };
            // The last instruction of the pre-header is the termination instruction (usually
            // a jump) so we need to insert just before this.
            for inst in invariant_inst.iter() {
                pos.insert_inst(inst.clone());
            }
        }
    }
    // We have to recompute the domtree to account for the changes
    cfg.compute(func);
    domtree.compute(func, cfg);
 }
 // Insert a pre-header before the header, modifying the function layout and CFG to reflect it.
 // A jump instruction to the header is placed at the end of the pre-header.
 fn create_pre_header(header: Ebb,
                     func: &mut Function,
                     cfg: &mut ControlFlowGraph,
                     domtree: &DominatorTree)
                     -> Ebb {
    let pool = &mut ListPool::<Value>::new();
    let header_args_values: Vec<Value> = func.dfg
        .ebb_args(header)
        .into_iter()
        .map(|val| *val)
        .collect();
    let header_args_types: Vec<Type> = header_args_values
        .clone()
        .into_iter()
        .map(|val| func.dfg.value_type(val))
        .collect();
    let pre_header = func.dfg.make_ebb();
    let mut pre_header_args_value: EntityList<Value> = EntityList::new();
    for typ in header_args_types {
        pre_header_args_value.push(func.dfg.append_ebb_arg(pre_header, typ), pool);
    }
    for &(_, last_inst) in cfg.get_predecessors(header) {
        // We only follow normal edges (not the back edges)
        if !domtree.ebb_dominates(header.clone(), last_inst, &func.layout) {
            change_branch_jump_destination(last_inst, pre_header, func);
        }
    }
    {
        let mut pos = Cursor::new(&mut func.layout);
        pos.goto_top(header);
        // Inserts the pre-header at the right place in the layout.
        pos.insert_ebb(pre_header);
        pos.next_inst();
        func.dfg
            .ins(&mut pos)
            .jump(header, pre_header_args_value.as_slice(pool));
    }
    pre_header
 }
 // Detects if a loop header has a natural pre-header.
 //
 // A loop header has a pre-header if there is only one predecessor that the header doesn't
 // dominate.
 // Returns the pre-header Ebb and the instruction jumping to  the header.
 fn has_pre_header(layout: &Layout,
                  cfg: &ControlFlowGraph,
                  domtree: &DominatorTree,
                  header: Ebb)
                  -> Option<(Ebb, Inst)> {
    let mut result = None;
    let mut found = false;
    for &(pred_ebb, last_inst) in cfg.get_predecessors(header) {
        // We only count normal edges (not the back edges)
        if !domtree.ebb_dominates(header.clone(), last_inst, layout) {
            if found {
                // We have already found one, there are more than one
                return None;
            } else {
                result = Some((pred_ebb, last_inst));
                found = true;
            }
        }
    }
    result
 }
 // Change the destination of a jump or branch instruction. Does nothing if called with a non-jump
 // or non-branch instruction.
 fn change_branch_jump_destination(inst: Inst, new_ebb: Ebb, func: &mut Function) {
    match func.dfg[inst].branch_destination_mut() {
        None => (),
        Some(instruction_dest) => *instruction_dest = new_ebb,
    }
 }
 // Traverses a loop in reverse post-order from a header EBB and identify lopp-invariant
 // instructions. Theseloop-invariant instructions are then removed from the code and returned
 // (in reverse post-order) for later use.
 fn remove_loop_invariant_instructions(lp: Loop,
                                      func: &mut Function,
                                      cfg: &ControlFlowGraph,
                                      loop_analysis: &LoopAnalysis)
                                      -> Vec<Inst> {
    let mut loop_values: HashSet<Value> = HashSet::new();
    let mut invariant_inst: Vec<Inst> = Vec::new();
    let mut pos = Cursor::new(&mut func.layout);
    // We traverse the loop EBB in reverse post-order.
    for ebb in postorder_ebbs_loop(loop_analysis, cfg, lp).iter().rev() {
        // Arguments of the EBB are loop values
        for val in func.dfg.ebb_args(*ebb) {
            loop_values.insert(val.clone());
        }
        pos.goto_top(*ebb);
        while let Some(inst) = pos.next_inst() {
            if func.dfg.has_results(inst) &&
               func.dfg
                   .inst_args(inst)
                   .into_iter()
                   .all(|arg| !loop_values.contains(arg)) {
                // If all the instruction's argument are defined outside the loop
                // then this instruction is loop-invariant
                invariant_inst.push(inst);
                // We remove it from the loop
                pos.remove_inst();
                pos.prev_inst();
            } else {
                // If the instruction is not loop-invariant we push its results in the set of
                // loop values
                for out in func.dfg.inst_results(inst) {
                    loop_values.insert(out.clone());
                }
            }
        }
    }
    invariant_inst
 }
 /// Return ebbs from a loop in post-order, starting from an entry point in the block.
 pub fn postorder_ebbs_loop(loop_analysis: &LoopAnalysis,
                           cfg: &ControlFlowGraph,
                           lp: Loop)
                           -> Vec<Ebb> {
    let mut grey = HashSet::new();
    let mut black = HashSet::new();
    let mut stack = vec![loop_analysis.loop_header(lp).clone()];
    let mut postorder = Vec::new();
    while !stack.is_empty() {
        let node = stack.pop().unwrap();
        if !grey.contains(&node) {
            // This is a white node. Mark it as gray.
            grey.insert(node);
            stack.push(node);
            // Get any children we've never seen before.
            for child in cfg.get_successors(node) {
                if loop_analysis.is_in_loop(child.clone(), lp) && !grey.contains(child) {
                    stack.push(child.clone());
                }
            }
        } else if !black.contains(&node) {
            postorder.push(node.clone());
            black.insert(node.clone());
        }
    }
    postorder
 }
--- a/lib/cretonne/src/loop_analysis.rs
+++ b/lib/cretonne/src/loop_analysis.rs
@@ -129,13 +129,13 @@ impl LoopAnalysis {
                         domtree: &DominatorTree,
                         layout: &Layout) {
        // We traverse the CFg in reverse postorder
-        for ebb in cfg.postorder_ebbs().iter().rev() {
+        for &ebb in cfg.postorder_ebbs().iter().rev() {
-            for &(_, pred_inst) in cfg.get_predecessors(*ebb) {
+            for &(_, pred_inst) in cfg.get_predecessors(ebb) {
                // If the ebb dominates one of its predecessors it is a back edge
-                if domtree.ebb_dominates(ebb.clone(), pred_inst, layout) {
+                if domtree.ebb_dominates(ebb, pred_inst, layout) {
                    // This ebb is a loop header, so we create its associated loop
-                    let lp = self.loops.push(LoopData::new(*ebb, None));
+                    let lp = self.loops.push(LoopData::new(ebb, None));
-                    self.ebb_loop_map[*ebb] = lp.into();
+                    self.ebb_loop_map[ebb] = lp.into();
                    break;
                    // We break because we only need one back edge to identify a loop header.
                }