LICM pass (#87)

* LICM pass

* Uses loop analysis to detect loop tree
* For each loop (starting with the inner ones), create a pre-header and move there loop-invariant instructions
* An instruction is loop invariant if it does not use as argument a value defined earlier in the loop
* File tests to check LICM's correctness
* Optimized pre-header creation
If the loop already has a natural pre-header, we use it instead of creating a new one.
The natural pre-header of a loop is the only predecessor of the header it doesn't dominate.
This commit is contained in:
Denis Merigoux
2017-06-07 11:27:22 -07:00
committed by Jakob Stoklund Olesen
parent 402cb8e1f6
commit e47f4a49fb
10 changed files with 487 additions and 5 deletions

View File

@@ -0,0 +1,31 @@
test licm
function simple_loop(i32) -> i32 {
ebb1(v0: i32):
v1 = iconst.i32 1
v2 = iconst.i32 2
v3 = iadd v1, v2
brz v0, ebb2(v0)
v4 = isub v0, v1
jump ebb1(v4)
ebb2(v5: i32):
return v5
}
; sameln: function simple_loop(i32) -> i32 {
; nextln: ebb2(v6: i32):
; nextln: v1 = iconst.i32 1
; nextln: v2 = iconst.i32 2
; nextln: v3 = iadd v1, v2
; nextln: jump ebb0(v6)
; nextln:
; nextln: ebb0(v0: i32):
; nextln: brz v0, ebb1(v0)
; nextln: v4 = isub v0, v1
; nextln: jump ebb0(v4)
; nextln:
; nextln: ebb1(v5: i32):
; nextln: return v5
; nextln: }

View File

@@ -0,0 +1,81 @@
test licm
function complex(i32) -> i32 {
ebb0(v0: i32):
v1 = iconst.i32 1
v19 = iconst.i32 4
v2 = iadd v1, v0
brz v0, ebb1(v1)
jump ebb3(v2)
ebb1(v3: i32):
v4 = iconst.i32 2
v5 = iadd v3, v2
v6 = iadd v4, v0
jump ebb2(v6)
ebb2(v7: i32):
v8 = iadd v7, v3
v9 = iadd v0, v2
brz v0, ebb1(v7)
jump ebb5(v8)
ebb3(v10: i32):
v11 = iconst.i32 3
v12 = iadd v10, v11
v13 = iadd v2, v11
jump ebb4(v11)
ebb4(v14: i32):
v15 = iadd v12, v2
brz v0, ebb3(v14)
jump ebb5(v14)
ebb5(v16: i32):
v17 = iadd v16, v1
v18 = iadd v1, v19
brz v0, ebb0(v18)
return v17
}
; sameln: function complex(i32) -> i32 {
; nextln: ebb6(v20: i32):
; nextln: v1 = iconst.i32 1
; nextln: v2 = iconst.i32 4
; nextln: v5 = iconst.i32 2
; nextln: v12 = iconst.i32 3
; nextln: v19 = iadd v1, v2
; nextln: jump ebb0(v20)
; nextln:
; nextln: ebb0(v0: i32):
; nextln: v3 = iadd.i32 v1, v0
; nextln: v7 = iadd.i32 v5, v0
; nextln: v10 = iadd v0, v3
; nextln: brz v0, ebb1(v1)
; nextln: v14 = iadd v3, v12
; nextln: jump ebb3(v3)
; nextln:
; nextln: ebb1(v4: i32):
; nextln: v6 = iadd v4, v3
; nextln: jump ebb2(v7)
; nextln:
; nextln: ebb2(v8: i32):
; nextln: v9 = iadd v8, v4
; nextln: brz.i32 v0, ebb1(v8)
; nextln: jump ebb5(v9)
; nextln:
; nextln: ebb3(v11: i32):
; nextln: v13 = iadd v11, v12
; nextln: jump ebb4(v12)
; nextln:
; nextln: ebb4(v15: i32):
; nextln: v16 = iadd.i32 v13, v3
; nextln: brz.i32 v0, ebb3(v15)
; nextln: jump ebb5(v15)
; nextln:
; nextln: ebb5(v17: i32):
; nextln: v18 = iadd v17, v1
; nextln: brz.i32 v0, ebb0(v19)
; nextln: return v18
; nextln: }

View File

@@ -0,0 +1,46 @@
test licm
function multiple_blocks(i32) -> i32 {
ebb0(v0: i32):
jump ebb1(v0)
ebb1(v10: i32):
v11 = iconst.i32 1
v12 = iconst.i32 2
v13 = iadd v11, v12
brz v10, ebb2(v10)
v15 = isub v10, v11
brz v15, ebb3(v15)
v14 = isub v10, v11
jump ebb1(v14)
ebb2(v20: i32):
return v20
ebb3(v30: i32):
v31 = iadd v11, v13
jump ebb1(v30)
}
; sameln:function multiple_blocks(i32) -> i32 {
; nextln: ebb0(v0: i32):
; nextln: v2 = iconst.i32 1
; nextln: v3 = iconst.i32 2
; nextln: v4 = iadd v2, v3
; nextln: v9 = iadd v2, v4
; nextln: jump ebb1(v0)
; nextln:
; nextln: ebb1(v1: i32):
; nextln: brz v1, ebb2(v1)
; nextln: v5 = isub v1, v2
; nextln: brz v5, ebb3(v5)
; nextln: v6 = isub v1, v2
; nextln: jump ebb1(v6)
; nextln:
; nextln: ebb2(v7: i32):
; nextln: return v7
; nextln:
; nextln: ebb3(v8: i32):
; nextln: jump ebb1(v8)
; nextln: }

View File

@@ -0,0 +1,52 @@
test licm
function nested_loops(i32) -> i32 {
ebb0(v0: i32):
v1 = iconst.i32 1
v2 = iconst.i32 2
v3 = iadd v1, v2
v4 = isub v0, v1
jump ebb1(v4,v4)
ebb1(v10: i32,v11: i32):
brz v11, ebb2(v10)
v12 = iconst.i32 1
v15 = iadd v12, v4
v13 = isub v11, v12
jump ebb1(v10,v13)
ebb2(v20: i32):
brz v20, ebb3(v20)
jump ebb0(v20)
ebb3(v30: i32):
return v30
}
; sameln:function nested_loops(i32) -> i32 {
; nextln: ebb4(v12: i32):
; nextln: v1 = iconst.i32 1
; nextln: v2 = iconst.i32 2
; nextln: v3 = iadd v1, v2
; nextln: v7 = iconst.i32 1
; nextln: jump ebb0(v12)
; nextln:
; nextln: ebb0(v0: i32):
; nextln: v4 = isub v0, v1
; nextln: v8 = iadd.i32 v7, v4
; nextln: jump ebb1(v4, v4)
; nextln:
; nextln: ebb1(v5: i32, v6: i32):
; nextln: brz v6, ebb2(v5)
; nextln: v9 = isub v6, v7
; nextln: jump ebb1(v5, v9)
; nextln:
; nextln: ebb2(v10: i32):
; nextln: brz v10, ebb3(v10)
; nextln: jump ebb0(v10)
; nextln:
; nextln: ebb3(v11: i32):
; nextln: return v11
; nextln: }

View File

@@ -0,0 +1,51 @@
//! Test command for testing the LICM pass.
//!
//! The `licm` test command runs each function through the LICM pass after ensuring
//! that all instructions are legal for the target.
//!
//! The resulting function is sent to `filecheck`.
use cretonne::ir::Function;
use cretonne;
use cton_reader::TestCommand;
use filetest::subtest::{SubTest, Context, Result, run_filecheck};
use std::borrow::Cow;
use std::fmt::Write;
use utils::pretty_error;
struct TestLICM;
pub fn subtest(parsed: &TestCommand) -> Result<Box<SubTest>> {
assert_eq!(parsed.command, "licm");
if !parsed.options.is_empty() {
Err(format!("No options allowed on {}", parsed))
} else {
Ok(Box::new(TestLICM))
}
}
impl SubTest for TestLICM {
fn name(&self) -> Cow<str> {
Cow::from("licm")
}
fn is_mutating(&self) -> bool {
true
}
fn run(&self, func: Cow<Function>, context: &Context) -> Result<()> {
// Create a compilation context, and drop in the function.
let mut comp_ctx = cretonne::Context::new();
comp_ctx.func = func.into_owned();
comp_ctx.flowgraph();
comp_ctx
.licm()
.map_err(|e| pretty_error(&comp_ctx.func, e))?;
let mut text = String::new();
write!(&mut text, "{}", &comp_ctx.func)
.map_err(|e| e.to_string())?;
run_filecheck(&text, context)
}
}

View File

@@ -17,6 +17,7 @@ mod binemit;
mod concurrent; mod concurrent;
mod domtree; mod domtree;
mod legalizer; mod legalizer;
mod licm;
mod regalloc; mod regalloc;
mod runner; mod runner;
mod runone; mod runone;
@@ -61,6 +62,7 @@ fn new_subtest(parsed: &TestCommand) -> subtest::Result<Box<subtest::SubTest>> {
"domtree" => domtree::subtest(parsed), "domtree" => domtree::subtest(parsed),
"verifier" => verifier::subtest(parsed), "verifier" => verifier::subtest(parsed),
"legalizer" => legalizer::subtest(parsed), "legalizer" => legalizer::subtest(parsed),
"licm" => licm::subtest(parsed),
"regalloc" => regalloc::subtest(parsed), "regalloc" => regalloc::subtest(parsed),
"binemit" => binemit::subtest(parsed), "binemit" => binemit::subtest(parsed),
"simple-gvn" => simple_gvn::subtest(parsed), "simple-gvn" => simple_gvn::subtest(parsed),

View File

@@ -19,6 +19,7 @@ use regalloc;
use result::CtonResult; use result::CtonResult;
use verifier; use verifier;
use simple_gvn::do_simple_gvn; use simple_gvn::do_simple_gvn;
use licm::do_licm;
/// Persistent data structures and compilation pipeline. /// Persistent data structures and compilation pipeline.
pub struct Context { pub struct Context {
@@ -92,6 +93,15 @@ impl Context {
self.verify(None).map_err(Into::into) self.verify(None).map_err(Into::into)
} }
/// Perform LICM on the function.
pub fn licm(&mut self) -> CtonResult {
do_licm(&mut self.func,
&mut self.cfg,
&mut self.domtree,
&mut self.loop_analysis);
self.verify(None).map_err(Into::into)
}
/// Run the register allocator. /// Run the register allocator.
pub fn regalloc(&mut self, isa: &TargetIsa) -> CtonResult { pub fn regalloc(&mut self, isa: &TargetIsa) -> CtonResult {
self.regalloc self.regalloc

View File

@@ -32,6 +32,7 @@ mod constant_hash;
mod context; mod context;
mod iterators; mod iterators;
mod legalizer; mod legalizer;
mod licm;
mod packed_option; mod packed_option;
mod partition_slice; mod partition_slice;
mod predicates; mod predicates;

208
lib/cretonne/src/licm.rs Normal file
View File

@@ -0,0 +1,208 @@
//! A Loop Invariant Code Motion optimization pass
use ir::{Function, Ebb, Inst, Value, Cursor, Type, InstBuilder, Layout};
use flowgraph::ControlFlowGraph;
use std::collections::HashSet;
use dominator_tree::DominatorTree;
use entity_list::{EntityList, ListPool};
use loop_analysis::{Loop, LoopAnalysis};
/// Performs the LICM pass by detecting loops within the CFG and moving
/// loop-invariant instructions out of them.
/// Changes the CFG and domtree in-place during the operation.
pub fn do_licm(func: &mut Function,
cfg: &mut ControlFlowGraph,
domtree: &mut DominatorTree,
loop_analysis: &mut LoopAnalysis) {
loop_analysis.compute(func, cfg, domtree);
for lp in loop_analysis.loops() {
// For each loop that we want to optimize we determine the set of loop-invariant
// instructions
let invariant_inst = remove_loop_invariant_instructions(lp, func, cfg, loop_analysis);
// Then we create the loop's pre-header and fill it with the invariant instructions
// Then we remove the invariant instructions from the loop body
if invariant_inst.len() > 0 {
// If the loop has a natural pre-header we use it, otherwise we create it.
let mut pos;
match has_pre_header(&func.layout,
cfg,
domtree,
loop_analysis.loop_header(lp).clone()) {
None => {
let pre_header = create_pre_header(loop_analysis.loop_header(lp).clone(),
func,
cfg,
domtree);
pos = Cursor::new(&mut func.layout);
pos.goto_bottom(pre_header);
pos.prev_inst();
}
// If there is a natural pre-header we insert new instructions just before the
// related jumping instruction (which is not necessarily at the end).
Some((_, last_inst)) => {
pos = Cursor::new(&mut func.layout);
pos.goto_inst(last_inst);
}
};
// The last instruction of the pre-header is the termination instruction (usually
// a jump) so we need to insert just before this.
for inst in invariant_inst.iter() {
pos.insert_inst(inst.clone());
}
}
}
// We have to recompute the domtree to account for the changes
cfg.compute(func);
domtree.compute(func, cfg);
}
// Insert a pre-header before the header, modifying the function layout and CFG to reflect it.
// A jump instruction to the header is placed at the end of the pre-header.
fn create_pre_header(header: Ebb,
func: &mut Function,
cfg: &mut ControlFlowGraph,
domtree: &DominatorTree)
-> Ebb {
let pool = &mut ListPool::<Value>::new();
let header_args_values: Vec<Value> = func.dfg
.ebb_args(header)
.into_iter()
.map(|val| *val)
.collect();
let header_args_types: Vec<Type> = header_args_values
.clone()
.into_iter()
.map(|val| func.dfg.value_type(val))
.collect();
let pre_header = func.dfg.make_ebb();
let mut pre_header_args_value: EntityList<Value> = EntityList::new();
for typ in header_args_types {
pre_header_args_value.push(func.dfg.append_ebb_arg(pre_header, typ), pool);
}
for &(_, last_inst) in cfg.get_predecessors(header) {
// We only follow normal edges (not the back edges)
if !domtree.ebb_dominates(header.clone(), last_inst, &func.layout) {
change_branch_jump_destination(last_inst, pre_header, func);
}
}
{
let mut pos = Cursor::new(&mut func.layout);
pos.goto_top(header);
// Inserts the pre-header at the right place in the layout.
pos.insert_ebb(pre_header);
pos.next_inst();
func.dfg
.ins(&mut pos)
.jump(header, pre_header_args_value.as_slice(pool));
}
pre_header
}
// Detects if a loop header has a natural pre-header.
//
// A loop header has a pre-header if there is only one predecessor that the header doesn't
// dominate.
// Returns the pre-header Ebb and the instruction jumping to the header.
fn has_pre_header(layout: &Layout,
cfg: &ControlFlowGraph,
domtree: &DominatorTree,
header: Ebb)
-> Option<(Ebb, Inst)> {
let mut result = None;
let mut found = false;
for &(pred_ebb, last_inst) in cfg.get_predecessors(header) {
// We only count normal edges (not the back edges)
if !domtree.ebb_dominates(header.clone(), last_inst, layout) {
if found {
// We have already found one, there are more than one
return None;
} else {
result = Some((pred_ebb, last_inst));
found = true;
}
}
}
result
}
// Change the destination of a jump or branch instruction. Does nothing if called with a non-jump
// or non-branch instruction.
fn change_branch_jump_destination(inst: Inst, new_ebb: Ebb, func: &mut Function) {
match func.dfg[inst].branch_destination_mut() {
None => (),
Some(instruction_dest) => *instruction_dest = new_ebb,
}
}
// Traverses a loop in reverse post-order from a header EBB and identify lopp-invariant
// instructions. Theseloop-invariant instructions are then removed from the code and returned
// (in reverse post-order) for later use.
fn remove_loop_invariant_instructions(lp: Loop,
func: &mut Function,
cfg: &ControlFlowGraph,
loop_analysis: &LoopAnalysis)
-> Vec<Inst> {
let mut loop_values: HashSet<Value> = HashSet::new();
let mut invariant_inst: Vec<Inst> = Vec::new();
let mut pos = Cursor::new(&mut func.layout);
// We traverse the loop EBB in reverse post-order.
for ebb in postorder_ebbs_loop(loop_analysis, cfg, lp).iter().rev() {
// Arguments of the EBB are loop values
for val in func.dfg.ebb_args(*ebb) {
loop_values.insert(val.clone());
}
pos.goto_top(*ebb);
while let Some(inst) = pos.next_inst() {
if func.dfg.has_results(inst) &&
func.dfg
.inst_args(inst)
.into_iter()
.all(|arg| !loop_values.contains(arg)) {
// If all the instruction's argument are defined outside the loop
// then this instruction is loop-invariant
invariant_inst.push(inst);
// We remove it from the loop
pos.remove_inst();
pos.prev_inst();
} else {
// If the instruction is not loop-invariant we push its results in the set of
// loop values
for out in func.dfg.inst_results(inst) {
loop_values.insert(out.clone());
}
}
}
}
invariant_inst
}
/// Return ebbs from a loop in post-order, starting from an entry point in the block.
pub fn postorder_ebbs_loop(loop_analysis: &LoopAnalysis,
cfg: &ControlFlowGraph,
lp: Loop)
-> Vec<Ebb> {
let mut grey = HashSet::new();
let mut black = HashSet::new();
let mut stack = vec![loop_analysis.loop_header(lp).clone()];
let mut postorder = Vec::new();
while !stack.is_empty() {
let node = stack.pop().unwrap();
if !grey.contains(&node) {
// This is a white node. Mark it as gray.
grey.insert(node);
stack.push(node);
// Get any children we've never seen before.
for child in cfg.get_successors(node) {
if loop_analysis.is_in_loop(child.clone(), lp) && !grey.contains(child) {
stack.push(child.clone());
}
}
} else if !black.contains(&node) {
postorder.push(node.clone());
black.insert(node.clone());
}
}
postorder
}

View File

@@ -129,13 +129,13 @@ impl LoopAnalysis {
domtree: &DominatorTree, domtree: &DominatorTree,
layout: &Layout) { layout: &Layout) {
// We traverse the CFg in reverse postorder // We traverse the CFg in reverse postorder
for ebb in cfg.postorder_ebbs().iter().rev() { for &ebb in cfg.postorder_ebbs().iter().rev() {
for &(_, pred_inst) in cfg.get_predecessors(*ebb) { for &(_, pred_inst) in cfg.get_predecessors(ebb) {
// If the ebb dominates one of its predecessors it is a back edge // If the ebb dominates one of its predecessors it is a back edge
if domtree.ebb_dominates(ebb.clone(), pred_inst, layout) { if domtree.ebb_dominates(ebb, pred_inst, layout) {
// This ebb is a loop header, so we create its associated loop // This ebb is a loop header, so we create its associated loop
let lp = self.loops.push(LoopData::new(*ebb, None)); let lp = self.loops.push(LoopData::new(ebb, None));
self.ebb_loop_map[*ebb] = lp.into(); self.ebb_loop_map[ebb] = lp.into();
break; break;
// We break because we only need one back edge to identify a loop header. // We break because we only need one back edge to identify a loop header.
} }