LICM pass (#87)
* LICM pass * Uses loop analysis to detect loop tree * For each loop (starting with the inner ones), create a pre-header and move there loop-invariant instructions * An instruction is loop invariant if it does not use as argument a value defined earlier in the loop * File tests to check LICM's correctness * Optimized pre-header creation If the loop already has a natural pre-header, we use it instead of creating a new one. The natural pre-header of a loop is the only predecessor of the header it doesn't dominate.
This commit is contained in:
committed by
Jakob Stoklund Olesen
parent
402cb8e1f6
commit
e47f4a49fb
31
cranelift/filetests/licm/basic.cton
Normal file
31
cranelift/filetests/licm/basic.cton
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
test licm
|
||||||
|
|
||||||
|
function simple_loop(i32) -> i32 {
|
||||||
|
|
||||||
|
ebb1(v0: i32):
|
||||||
|
v1 = iconst.i32 1
|
||||||
|
v2 = iconst.i32 2
|
||||||
|
v3 = iadd v1, v2
|
||||||
|
brz v0, ebb2(v0)
|
||||||
|
v4 = isub v0, v1
|
||||||
|
jump ebb1(v4)
|
||||||
|
|
||||||
|
ebb2(v5: i32):
|
||||||
|
return v5
|
||||||
|
|
||||||
|
}
|
||||||
|
; sameln: function simple_loop(i32) -> i32 {
|
||||||
|
; nextln: ebb2(v6: i32):
|
||||||
|
; nextln: v1 = iconst.i32 1
|
||||||
|
; nextln: v2 = iconst.i32 2
|
||||||
|
; nextln: v3 = iadd v1, v2
|
||||||
|
; nextln: jump ebb0(v6)
|
||||||
|
; nextln:
|
||||||
|
; nextln: ebb0(v0: i32):
|
||||||
|
; nextln: brz v0, ebb1(v0)
|
||||||
|
; nextln: v4 = isub v0, v1
|
||||||
|
; nextln: jump ebb0(v4)
|
||||||
|
; nextln:
|
||||||
|
; nextln: ebb1(v5: i32):
|
||||||
|
; nextln: return v5
|
||||||
|
; nextln: }
|
||||||
81
cranelift/filetests/licm/complex.cton
Normal file
81
cranelift/filetests/licm/complex.cton
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
test licm
|
||||||
|
|
||||||
|
function complex(i32) -> i32 {
|
||||||
|
|
||||||
|
ebb0(v0: i32):
|
||||||
|
v1 = iconst.i32 1
|
||||||
|
v19 = iconst.i32 4
|
||||||
|
v2 = iadd v1, v0
|
||||||
|
brz v0, ebb1(v1)
|
||||||
|
jump ebb3(v2)
|
||||||
|
|
||||||
|
ebb1(v3: i32):
|
||||||
|
v4 = iconst.i32 2
|
||||||
|
v5 = iadd v3, v2
|
||||||
|
v6 = iadd v4, v0
|
||||||
|
jump ebb2(v6)
|
||||||
|
|
||||||
|
ebb2(v7: i32):
|
||||||
|
v8 = iadd v7, v3
|
||||||
|
v9 = iadd v0, v2
|
||||||
|
brz v0, ebb1(v7)
|
||||||
|
jump ebb5(v8)
|
||||||
|
|
||||||
|
ebb3(v10: i32):
|
||||||
|
v11 = iconst.i32 3
|
||||||
|
v12 = iadd v10, v11
|
||||||
|
v13 = iadd v2, v11
|
||||||
|
jump ebb4(v11)
|
||||||
|
|
||||||
|
ebb4(v14: i32):
|
||||||
|
v15 = iadd v12, v2
|
||||||
|
brz v0, ebb3(v14)
|
||||||
|
jump ebb5(v14)
|
||||||
|
|
||||||
|
ebb5(v16: i32):
|
||||||
|
v17 = iadd v16, v1
|
||||||
|
v18 = iadd v1, v19
|
||||||
|
brz v0, ebb0(v18)
|
||||||
|
return v17
|
||||||
|
}
|
||||||
|
|
||||||
|
; sameln: function complex(i32) -> i32 {
|
||||||
|
; nextln: ebb6(v20: i32):
|
||||||
|
; nextln: v1 = iconst.i32 1
|
||||||
|
; nextln: v2 = iconst.i32 4
|
||||||
|
; nextln: v5 = iconst.i32 2
|
||||||
|
; nextln: v12 = iconst.i32 3
|
||||||
|
; nextln: v19 = iadd v1, v2
|
||||||
|
; nextln: jump ebb0(v20)
|
||||||
|
; nextln:
|
||||||
|
; nextln: ebb0(v0: i32):
|
||||||
|
; nextln: v3 = iadd.i32 v1, v0
|
||||||
|
; nextln: v7 = iadd.i32 v5, v0
|
||||||
|
; nextln: v10 = iadd v0, v3
|
||||||
|
; nextln: brz v0, ebb1(v1)
|
||||||
|
; nextln: v14 = iadd v3, v12
|
||||||
|
; nextln: jump ebb3(v3)
|
||||||
|
; nextln:
|
||||||
|
; nextln: ebb1(v4: i32):
|
||||||
|
; nextln: v6 = iadd v4, v3
|
||||||
|
; nextln: jump ebb2(v7)
|
||||||
|
; nextln:
|
||||||
|
; nextln: ebb2(v8: i32):
|
||||||
|
; nextln: v9 = iadd v8, v4
|
||||||
|
; nextln: brz.i32 v0, ebb1(v8)
|
||||||
|
; nextln: jump ebb5(v9)
|
||||||
|
; nextln:
|
||||||
|
; nextln: ebb3(v11: i32):
|
||||||
|
; nextln: v13 = iadd v11, v12
|
||||||
|
; nextln: jump ebb4(v12)
|
||||||
|
; nextln:
|
||||||
|
; nextln: ebb4(v15: i32):
|
||||||
|
; nextln: v16 = iadd.i32 v13, v3
|
||||||
|
; nextln: brz.i32 v0, ebb3(v15)
|
||||||
|
; nextln: jump ebb5(v15)
|
||||||
|
; nextln:
|
||||||
|
; nextln: ebb5(v17: i32):
|
||||||
|
; nextln: v18 = iadd v17, v1
|
||||||
|
; nextln: brz.i32 v0, ebb0(v19)
|
||||||
|
; nextln: return v18
|
||||||
|
; nextln: }
|
||||||
46
cranelift/filetests/licm/multiple-blocks.cton
Normal file
46
cranelift/filetests/licm/multiple-blocks.cton
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
test licm
|
||||||
|
|
||||||
|
function multiple_blocks(i32) -> i32 {
|
||||||
|
|
||||||
|
ebb0(v0: i32):
|
||||||
|
jump ebb1(v0)
|
||||||
|
|
||||||
|
ebb1(v10: i32):
|
||||||
|
v11 = iconst.i32 1
|
||||||
|
v12 = iconst.i32 2
|
||||||
|
v13 = iadd v11, v12
|
||||||
|
brz v10, ebb2(v10)
|
||||||
|
v15 = isub v10, v11
|
||||||
|
brz v15, ebb3(v15)
|
||||||
|
v14 = isub v10, v11
|
||||||
|
jump ebb1(v14)
|
||||||
|
|
||||||
|
ebb2(v20: i32):
|
||||||
|
return v20
|
||||||
|
|
||||||
|
ebb3(v30: i32):
|
||||||
|
v31 = iadd v11, v13
|
||||||
|
jump ebb1(v30)
|
||||||
|
|
||||||
|
}
|
||||||
|
; sameln:function multiple_blocks(i32) -> i32 {
|
||||||
|
; nextln: ebb0(v0: i32):
|
||||||
|
; nextln: v2 = iconst.i32 1
|
||||||
|
; nextln: v3 = iconst.i32 2
|
||||||
|
; nextln: v4 = iadd v2, v3
|
||||||
|
; nextln: v9 = iadd v2, v4
|
||||||
|
; nextln: jump ebb1(v0)
|
||||||
|
; nextln:
|
||||||
|
; nextln: ebb1(v1: i32):
|
||||||
|
; nextln: brz v1, ebb2(v1)
|
||||||
|
; nextln: v5 = isub v1, v2
|
||||||
|
; nextln: brz v5, ebb3(v5)
|
||||||
|
; nextln: v6 = isub v1, v2
|
||||||
|
; nextln: jump ebb1(v6)
|
||||||
|
; nextln:
|
||||||
|
; nextln: ebb2(v7: i32):
|
||||||
|
; nextln: return v7
|
||||||
|
; nextln:
|
||||||
|
; nextln: ebb3(v8: i32):
|
||||||
|
; nextln: jump ebb1(v8)
|
||||||
|
; nextln: }
|
||||||
52
cranelift/filetests/licm/nested_loops.cton
Normal file
52
cranelift/filetests/licm/nested_loops.cton
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
test licm
|
||||||
|
|
||||||
|
function nested_loops(i32) -> i32 {
|
||||||
|
|
||||||
|
ebb0(v0: i32):
|
||||||
|
v1 = iconst.i32 1
|
||||||
|
v2 = iconst.i32 2
|
||||||
|
v3 = iadd v1, v2
|
||||||
|
v4 = isub v0, v1
|
||||||
|
jump ebb1(v4,v4)
|
||||||
|
|
||||||
|
ebb1(v10: i32,v11: i32):
|
||||||
|
brz v11, ebb2(v10)
|
||||||
|
v12 = iconst.i32 1
|
||||||
|
v15 = iadd v12, v4
|
||||||
|
v13 = isub v11, v12
|
||||||
|
jump ebb1(v10,v13)
|
||||||
|
|
||||||
|
ebb2(v20: i32):
|
||||||
|
brz v20, ebb3(v20)
|
||||||
|
jump ebb0(v20)
|
||||||
|
|
||||||
|
ebb3(v30: i32):
|
||||||
|
return v30
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
; sameln:function nested_loops(i32) -> i32 {
|
||||||
|
; nextln: ebb4(v12: i32):
|
||||||
|
; nextln: v1 = iconst.i32 1
|
||||||
|
; nextln: v2 = iconst.i32 2
|
||||||
|
; nextln: v3 = iadd v1, v2
|
||||||
|
; nextln: v7 = iconst.i32 1
|
||||||
|
; nextln: jump ebb0(v12)
|
||||||
|
; nextln:
|
||||||
|
; nextln: ebb0(v0: i32):
|
||||||
|
; nextln: v4 = isub v0, v1
|
||||||
|
; nextln: v8 = iadd.i32 v7, v4
|
||||||
|
; nextln: jump ebb1(v4, v4)
|
||||||
|
; nextln:
|
||||||
|
; nextln: ebb1(v5: i32, v6: i32):
|
||||||
|
; nextln: brz v6, ebb2(v5)
|
||||||
|
; nextln: v9 = isub v6, v7
|
||||||
|
; nextln: jump ebb1(v5, v9)
|
||||||
|
; nextln:
|
||||||
|
; nextln: ebb2(v10: i32):
|
||||||
|
; nextln: brz v10, ebb3(v10)
|
||||||
|
; nextln: jump ebb0(v10)
|
||||||
|
; nextln:
|
||||||
|
; nextln: ebb3(v11: i32):
|
||||||
|
; nextln: return v11
|
||||||
|
; nextln: }
|
||||||
51
cranelift/src/filetest/licm.rs
Normal file
51
cranelift/src/filetest/licm.rs
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
//! Test command for testing the LICM pass.
|
||||||
|
//!
|
||||||
|
//! The `licm` test command runs each function through the LICM pass after ensuring
|
||||||
|
//! that all instructions are legal for the target.
|
||||||
|
//!
|
||||||
|
//! The resulting function is sent to `filecheck`.
|
||||||
|
|
||||||
|
use cretonne::ir::Function;
|
||||||
|
use cretonne;
|
||||||
|
use cton_reader::TestCommand;
|
||||||
|
use filetest::subtest::{SubTest, Context, Result, run_filecheck};
|
||||||
|
use std::borrow::Cow;
|
||||||
|
use std::fmt::Write;
|
||||||
|
use utils::pretty_error;
|
||||||
|
|
||||||
|
struct TestLICM;
|
||||||
|
|
||||||
|
pub fn subtest(parsed: &TestCommand) -> Result<Box<SubTest>> {
|
||||||
|
assert_eq!(parsed.command, "licm");
|
||||||
|
if !parsed.options.is_empty() {
|
||||||
|
Err(format!("No options allowed on {}", parsed))
|
||||||
|
} else {
|
||||||
|
Ok(Box::new(TestLICM))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SubTest for TestLICM {
|
||||||
|
fn name(&self) -> Cow<str> {
|
||||||
|
Cow::from("licm")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_mutating(&self) -> bool {
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
|
fn run(&self, func: Cow<Function>, context: &Context) -> Result<()> {
|
||||||
|
// Create a compilation context, and drop in the function.
|
||||||
|
let mut comp_ctx = cretonne::Context::new();
|
||||||
|
comp_ctx.func = func.into_owned();
|
||||||
|
|
||||||
|
comp_ctx.flowgraph();
|
||||||
|
comp_ctx
|
||||||
|
.licm()
|
||||||
|
.map_err(|e| pretty_error(&comp_ctx.func, e))?;
|
||||||
|
|
||||||
|
let mut text = String::new();
|
||||||
|
write!(&mut text, "{}", &comp_ctx.func)
|
||||||
|
.map_err(|e| e.to_string())?;
|
||||||
|
run_filecheck(&text, context)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -17,6 +17,7 @@ mod binemit;
|
|||||||
mod concurrent;
|
mod concurrent;
|
||||||
mod domtree;
|
mod domtree;
|
||||||
mod legalizer;
|
mod legalizer;
|
||||||
|
mod licm;
|
||||||
mod regalloc;
|
mod regalloc;
|
||||||
mod runner;
|
mod runner;
|
||||||
mod runone;
|
mod runone;
|
||||||
@@ -61,6 +62,7 @@ fn new_subtest(parsed: &TestCommand) -> subtest::Result<Box<subtest::SubTest>> {
|
|||||||
"domtree" => domtree::subtest(parsed),
|
"domtree" => domtree::subtest(parsed),
|
||||||
"verifier" => verifier::subtest(parsed),
|
"verifier" => verifier::subtest(parsed),
|
||||||
"legalizer" => legalizer::subtest(parsed),
|
"legalizer" => legalizer::subtest(parsed),
|
||||||
|
"licm" => licm::subtest(parsed),
|
||||||
"regalloc" => regalloc::subtest(parsed),
|
"regalloc" => regalloc::subtest(parsed),
|
||||||
"binemit" => binemit::subtest(parsed),
|
"binemit" => binemit::subtest(parsed),
|
||||||
"simple-gvn" => simple_gvn::subtest(parsed),
|
"simple-gvn" => simple_gvn::subtest(parsed),
|
||||||
|
|||||||
@@ -19,6 +19,7 @@ use regalloc;
|
|||||||
use result::CtonResult;
|
use result::CtonResult;
|
||||||
use verifier;
|
use verifier;
|
||||||
use simple_gvn::do_simple_gvn;
|
use simple_gvn::do_simple_gvn;
|
||||||
|
use licm::do_licm;
|
||||||
|
|
||||||
/// Persistent data structures and compilation pipeline.
|
/// Persistent data structures and compilation pipeline.
|
||||||
pub struct Context {
|
pub struct Context {
|
||||||
@@ -92,6 +93,15 @@ impl Context {
|
|||||||
self.verify(None).map_err(Into::into)
|
self.verify(None).map_err(Into::into)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Perform LICM on the function.
|
||||||
|
pub fn licm(&mut self) -> CtonResult {
|
||||||
|
do_licm(&mut self.func,
|
||||||
|
&mut self.cfg,
|
||||||
|
&mut self.domtree,
|
||||||
|
&mut self.loop_analysis);
|
||||||
|
self.verify(None).map_err(Into::into)
|
||||||
|
}
|
||||||
|
|
||||||
/// Run the register allocator.
|
/// Run the register allocator.
|
||||||
pub fn regalloc(&mut self, isa: &TargetIsa) -> CtonResult {
|
pub fn regalloc(&mut self, isa: &TargetIsa) -> CtonResult {
|
||||||
self.regalloc
|
self.regalloc
|
||||||
|
|||||||
@@ -32,6 +32,7 @@ mod constant_hash;
|
|||||||
mod context;
|
mod context;
|
||||||
mod iterators;
|
mod iterators;
|
||||||
mod legalizer;
|
mod legalizer;
|
||||||
|
mod licm;
|
||||||
mod packed_option;
|
mod packed_option;
|
||||||
mod partition_slice;
|
mod partition_slice;
|
||||||
mod predicates;
|
mod predicates;
|
||||||
|
|||||||
208
lib/cretonne/src/licm.rs
Normal file
208
lib/cretonne/src/licm.rs
Normal file
@@ -0,0 +1,208 @@
|
|||||||
|
//! A Loop Invariant Code Motion optimization pass
|
||||||
|
|
||||||
|
use ir::{Function, Ebb, Inst, Value, Cursor, Type, InstBuilder, Layout};
|
||||||
|
use flowgraph::ControlFlowGraph;
|
||||||
|
use std::collections::HashSet;
|
||||||
|
use dominator_tree::DominatorTree;
|
||||||
|
use entity_list::{EntityList, ListPool};
|
||||||
|
use loop_analysis::{Loop, LoopAnalysis};
|
||||||
|
|
||||||
|
/// Performs the LICM pass by detecting loops within the CFG and moving
|
||||||
|
/// loop-invariant instructions out of them.
|
||||||
|
/// Changes the CFG and domtree in-place during the operation.
|
||||||
|
pub fn do_licm(func: &mut Function,
|
||||||
|
cfg: &mut ControlFlowGraph,
|
||||||
|
domtree: &mut DominatorTree,
|
||||||
|
loop_analysis: &mut LoopAnalysis) {
|
||||||
|
loop_analysis.compute(func, cfg, domtree);
|
||||||
|
for lp in loop_analysis.loops() {
|
||||||
|
// For each loop that we want to optimize we determine the set of loop-invariant
|
||||||
|
// instructions
|
||||||
|
let invariant_inst = remove_loop_invariant_instructions(lp, func, cfg, loop_analysis);
|
||||||
|
// Then we create the loop's pre-header and fill it with the invariant instructions
|
||||||
|
// Then we remove the invariant instructions from the loop body
|
||||||
|
if invariant_inst.len() > 0 {
|
||||||
|
// If the loop has a natural pre-header we use it, otherwise we create it.
|
||||||
|
let mut pos;
|
||||||
|
match has_pre_header(&func.layout,
|
||||||
|
cfg,
|
||||||
|
domtree,
|
||||||
|
loop_analysis.loop_header(lp).clone()) {
|
||||||
|
None => {
|
||||||
|
let pre_header = create_pre_header(loop_analysis.loop_header(lp).clone(),
|
||||||
|
func,
|
||||||
|
cfg,
|
||||||
|
domtree);
|
||||||
|
pos = Cursor::new(&mut func.layout);
|
||||||
|
pos.goto_bottom(pre_header);
|
||||||
|
pos.prev_inst();
|
||||||
|
}
|
||||||
|
// If there is a natural pre-header we insert new instructions just before the
|
||||||
|
// related jumping instruction (which is not necessarily at the end).
|
||||||
|
Some((_, last_inst)) => {
|
||||||
|
pos = Cursor::new(&mut func.layout);
|
||||||
|
pos.goto_inst(last_inst);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
// The last instruction of the pre-header is the termination instruction (usually
|
||||||
|
// a jump) so we need to insert just before this.
|
||||||
|
for inst in invariant_inst.iter() {
|
||||||
|
pos.insert_inst(inst.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// We have to recompute the domtree to account for the changes
|
||||||
|
cfg.compute(func);
|
||||||
|
domtree.compute(func, cfg);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Insert a pre-header before the header, modifying the function layout and CFG to reflect it.
|
||||||
|
// A jump instruction to the header is placed at the end of the pre-header.
|
||||||
|
fn create_pre_header(header: Ebb,
|
||||||
|
func: &mut Function,
|
||||||
|
cfg: &mut ControlFlowGraph,
|
||||||
|
domtree: &DominatorTree)
|
||||||
|
-> Ebb {
|
||||||
|
let pool = &mut ListPool::<Value>::new();
|
||||||
|
let header_args_values: Vec<Value> = func.dfg
|
||||||
|
.ebb_args(header)
|
||||||
|
.into_iter()
|
||||||
|
.map(|val| *val)
|
||||||
|
.collect();
|
||||||
|
let header_args_types: Vec<Type> = header_args_values
|
||||||
|
.clone()
|
||||||
|
.into_iter()
|
||||||
|
.map(|val| func.dfg.value_type(val))
|
||||||
|
.collect();
|
||||||
|
let pre_header = func.dfg.make_ebb();
|
||||||
|
let mut pre_header_args_value: EntityList<Value> = EntityList::new();
|
||||||
|
for typ in header_args_types {
|
||||||
|
pre_header_args_value.push(func.dfg.append_ebb_arg(pre_header, typ), pool);
|
||||||
|
}
|
||||||
|
for &(_, last_inst) in cfg.get_predecessors(header) {
|
||||||
|
// We only follow normal edges (not the back edges)
|
||||||
|
if !domtree.ebb_dominates(header.clone(), last_inst, &func.layout) {
|
||||||
|
change_branch_jump_destination(last_inst, pre_header, func);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
{
|
||||||
|
let mut pos = Cursor::new(&mut func.layout);
|
||||||
|
pos.goto_top(header);
|
||||||
|
// Inserts the pre-header at the right place in the layout.
|
||||||
|
pos.insert_ebb(pre_header);
|
||||||
|
pos.next_inst();
|
||||||
|
func.dfg
|
||||||
|
.ins(&mut pos)
|
||||||
|
.jump(header, pre_header_args_value.as_slice(pool));
|
||||||
|
}
|
||||||
|
pre_header
|
||||||
|
}
|
||||||
|
|
||||||
|
// Detects if a loop header has a natural pre-header.
|
||||||
|
//
|
||||||
|
// A loop header has a pre-header if there is only one predecessor that the header doesn't
|
||||||
|
// dominate.
|
||||||
|
// Returns the pre-header Ebb and the instruction jumping to the header.
|
||||||
|
fn has_pre_header(layout: &Layout,
|
||||||
|
cfg: &ControlFlowGraph,
|
||||||
|
domtree: &DominatorTree,
|
||||||
|
header: Ebb)
|
||||||
|
-> Option<(Ebb, Inst)> {
|
||||||
|
let mut result = None;
|
||||||
|
let mut found = false;
|
||||||
|
for &(pred_ebb, last_inst) in cfg.get_predecessors(header) {
|
||||||
|
// We only count normal edges (not the back edges)
|
||||||
|
if !domtree.ebb_dominates(header.clone(), last_inst, layout) {
|
||||||
|
if found {
|
||||||
|
// We have already found one, there are more than one
|
||||||
|
return None;
|
||||||
|
} else {
|
||||||
|
result = Some((pred_ebb, last_inst));
|
||||||
|
found = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Change the destination of a jump or branch instruction. Does nothing if called with a non-jump
|
||||||
|
// or non-branch instruction.
|
||||||
|
fn change_branch_jump_destination(inst: Inst, new_ebb: Ebb, func: &mut Function) {
|
||||||
|
match func.dfg[inst].branch_destination_mut() {
|
||||||
|
None => (),
|
||||||
|
Some(instruction_dest) => *instruction_dest = new_ebb,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Traverses a loop in reverse post-order from a header EBB and identify lopp-invariant
|
||||||
|
// instructions. Theseloop-invariant instructions are then removed from the code and returned
|
||||||
|
// (in reverse post-order) for later use.
|
||||||
|
fn remove_loop_invariant_instructions(lp: Loop,
|
||||||
|
func: &mut Function,
|
||||||
|
cfg: &ControlFlowGraph,
|
||||||
|
loop_analysis: &LoopAnalysis)
|
||||||
|
-> Vec<Inst> {
|
||||||
|
let mut loop_values: HashSet<Value> = HashSet::new();
|
||||||
|
let mut invariant_inst: Vec<Inst> = Vec::new();
|
||||||
|
let mut pos = Cursor::new(&mut func.layout);
|
||||||
|
// We traverse the loop EBB in reverse post-order.
|
||||||
|
for ebb in postorder_ebbs_loop(loop_analysis, cfg, lp).iter().rev() {
|
||||||
|
// Arguments of the EBB are loop values
|
||||||
|
for val in func.dfg.ebb_args(*ebb) {
|
||||||
|
loop_values.insert(val.clone());
|
||||||
|
}
|
||||||
|
pos.goto_top(*ebb);
|
||||||
|
while let Some(inst) = pos.next_inst() {
|
||||||
|
if func.dfg.has_results(inst) &&
|
||||||
|
func.dfg
|
||||||
|
.inst_args(inst)
|
||||||
|
.into_iter()
|
||||||
|
.all(|arg| !loop_values.contains(arg)) {
|
||||||
|
// If all the instruction's argument are defined outside the loop
|
||||||
|
// then this instruction is loop-invariant
|
||||||
|
invariant_inst.push(inst);
|
||||||
|
// We remove it from the loop
|
||||||
|
pos.remove_inst();
|
||||||
|
pos.prev_inst();
|
||||||
|
} else {
|
||||||
|
// If the instruction is not loop-invariant we push its results in the set of
|
||||||
|
// loop values
|
||||||
|
for out in func.dfg.inst_results(inst) {
|
||||||
|
loop_values.insert(out.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
invariant_inst
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return ebbs from a loop in post-order, starting from an entry point in the block.
|
||||||
|
pub fn postorder_ebbs_loop(loop_analysis: &LoopAnalysis,
|
||||||
|
cfg: &ControlFlowGraph,
|
||||||
|
lp: Loop)
|
||||||
|
-> Vec<Ebb> {
|
||||||
|
let mut grey = HashSet::new();
|
||||||
|
let mut black = HashSet::new();
|
||||||
|
let mut stack = vec![loop_analysis.loop_header(lp).clone()];
|
||||||
|
let mut postorder = Vec::new();
|
||||||
|
|
||||||
|
while !stack.is_empty() {
|
||||||
|
let node = stack.pop().unwrap();
|
||||||
|
if !grey.contains(&node) {
|
||||||
|
// This is a white node. Mark it as gray.
|
||||||
|
grey.insert(node);
|
||||||
|
stack.push(node);
|
||||||
|
// Get any children we've never seen before.
|
||||||
|
for child in cfg.get_successors(node) {
|
||||||
|
if loop_analysis.is_in_loop(child.clone(), lp) && !grey.contains(child) {
|
||||||
|
stack.push(child.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if !black.contains(&node) {
|
||||||
|
postorder.push(node.clone());
|
||||||
|
black.insert(node.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
postorder
|
||||||
|
}
|
||||||
@@ -129,13 +129,13 @@ impl LoopAnalysis {
|
|||||||
domtree: &DominatorTree,
|
domtree: &DominatorTree,
|
||||||
layout: &Layout) {
|
layout: &Layout) {
|
||||||
// We traverse the CFg in reverse postorder
|
// We traverse the CFg in reverse postorder
|
||||||
for ebb in cfg.postorder_ebbs().iter().rev() {
|
for &ebb in cfg.postorder_ebbs().iter().rev() {
|
||||||
for &(_, pred_inst) in cfg.get_predecessors(*ebb) {
|
for &(_, pred_inst) in cfg.get_predecessors(ebb) {
|
||||||
// If the ebb dominates one of its predecessors it is a back edge
|
// If the ebb dominates one of its predecessors it is a back edge
|
||||||
if domtree.ebb_dominates(ebb.clone(), pred_inst, layout) {
|
if domtree.ebb_dominates(ebb, pred_inst, layout) {
|
||||||
// This ebb is a loop header, so we create its associated loop
|
// This ebb is a loop header, so we create its associated loop
|
||||||
let lp = self.loops.push(LoopData::new(*ebb, None));
|
let lp = self.loops.push(LoopData::new(ebb, None));
|
||||||
self.ebb_loop_map[*ebb] = lp.into();
|
self.ebb_loop_map[ebb] = lp.into();
|
||||||
break;
|
break;
|
||||||
// We break because we only need one back edge to identify a loop header.
|
// We break because we only need one back edge to identify a loop header.
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user