Allow readonly nontrapping loads to be hoisted by licm (#727)

This commit is contained in:
Lars T Hansen
2019-04-09 11:40:23 +02:00
committed by Benjamin Bouvier
parent 9062810c34
commit aa926e9097
4 changed files with 163 additions and 3 deletions

View File

@@ -5,7 +5,9 @@ use crate::dominator_tree::DominatorTree;
use crate::entity::{EntityList, ListPool}; use crate::entity::{EntityList, ListPool};
use crate::flowgraph::{BasicBlock, ControlFlowGraph}; use crate::flowgraph::{BasicBlock, ControlFlowGraph};
use crate::fx::FxHashSet; use crate::fx::FxHashSet;
use crate::ir::{DataFlowGraph, Ebb, Function, Inst, InstBuilder, Layout, Opcode, Type, Value}; use crate::ir::{
DataFlowGraph, Ebb, Function, Inst, InstBuilder, InstructionData, Layout, Opcode, Type, Value,
};
use crate::isa::TargetIsa; use crate::isa::TargetIsa;
use crate::loop_analysis::{Loop, LoopAnalysis}; use crate::loop_analysis::{Loop, LoopAnalysis};
use crate::timing; use crate::timing;
@@ -145,8 +147,7 @@ fn change_branch_jump_destination(inst: Inst, new_ebb: Ebb, func: &mut Function)
/// Test whether the given opcode is unsafe to even consider for LICM. /// Test whether the given opcode is unsafe to even consider for LICM.
fn trivially_unsafe_for_licm(opcode: Opcode) -> bool { fn trivially_unsafe_for_licm(opcode: Opcode) -> bool {
opcode.can_load() opcode.can_store()
|| opcode.can_store()
|| opcode.is_call() || opcode.is_call()
|| opcode.is_branch() || opcode.is_branch()
|| opcode.is_terminator() || opcode.is_terminator()
@@ -156,12 +157,25 @@ fn trivially_unsafe_for_licm(opcode: Opcode) -> bool {
|| opcode.writes_cpu_flags() || opcode.writes_cpu_flags()
} }
fn is_unsafe_load(inst_data: &InstructionData) -> bool {
match *inst_data {
InstructionData::Load { flags, .. } | InstructionData::LoadComplex { flags, .. } => {
!flags.readonly() || !flags.notrap()
}
_ => inst_data.opcode().can_load(),
}
}
/// Test whether the given instruction is loop-invariant. /// Test whether the given instruction is loop-invariant.
fn is_loop_invariant(inst: Inst, dfg: &DataFlowGraph, loop_values: &FxHashSet<Value>) -> bool { fn is_loop_invariant(inst: Inst, dfg: &DataFlowGraph, loop_values: &FxHashSet<Value>) -> bool {
if trivially_unsafe_for_licm(dfg[inst].opcode()) { if trivially_unsafe_for_licm(dfg[inst].opcode()) {
return false; return false;
} }
if is_unsafe_load(&dfg[inst]) {
return false;
}
let inst_args = dfg.inst_args(inst); let inst_args = dfg.inst_args(inst);
for arg in inst_args { for arg in inst_args {
let arg = dfg.resolve_aliases(*arg); let arg = dfg.resolve_aliases(*arg);

View File

@@ -0,0 +1,48 @@
test licm
target x86_64
;; Nontrapping readonly load from address that is not loop-dependent
;; should be hoisted out of loop.
function %hoist_load(i32, i64 vmctx) -> i32 {
gv0 = vmctx
gv1 = load.i64 notrap aligned readonly gv0
heap0 = static gv1, min 0x1_0000, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i32
ebb0(v0: i32, v1: i64):
jump ebb1(v0, v1)
ebb1(v2: i32, v3: i64):
v4 = iconst.i32 1
v5 = heap_addr.i64 heap0, v4, 1
v6 = load.i32 notrap aligned readonly v5
v7 = iadd v2, v6
brz v2, ebb2(v2)
v8 = isub v2, v4
jump ebb1(v8, v3)
ebb2(v9: i32):
return v9
}
; sameln: function %hoist_load(i32, i64 vmctx) -> i32 fast {
; nextln: gv0 = vmctx
; nextln: gv1 = load.i64 notrap aligned readonly gv0
; nextln: heap0 = static gv1, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000, index_type i32
; nextln:
; nextln: ebb0(v0: i32, v1: i64):
; nextln: v4 = iconst.i32 1
; nextln: v5 = heap_addr.i64 heap0, v4, 1
; nextln: v6 = load.i32 notrap aligned readonly v5
; nextln: jump ebb1(v0, v1)
; nextln:
; nextln: ebb1(v2: i32, v3: i64):
; nextln: v7 = iadd v2, v6
; nextln: brz v2, ebb2(v2)
; nextln: v8 = isub v2, v4
; nextln: jump ebb1(v8, v3)
; nextln:
; nextln: ebb2(v9: i32):
; nextln: return v9
; nextln: }

View File

@@ -0,0 +1,49 @@
test licm
target x86_64
;; Nontrapping possibly-not-readonly load from address that is not
;; loop-dependent should *not* be hoisted out of loop, though the
;; address computation can be.
function %hoist_load(i32, i64 vmctx) -> i32 {
gv0 = vmctx
gv1 = load.i64 notrap aligned readonly gv0
heap0 = static gv1, min 0x1_0000, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i32
ebb0(v0: i32, v1: i64):
v4 = iconst.i32 1
v5 = heap_addr.i64 heap0, v4, 1
jump ebb1(v0, v1)
ebb1(v2: i32, v3: i64):
v6 = load.i32 notrap aligned v5
v7 = iadd v2, v6
brz v2, ebb2(v2)
v8 = isub v2, v4
jump ebb1(v8, v3)
ebb2(v9: i32):
return v9
}
; sameln: function %hoist_load(i32, i64 vmctx) -> i32 fast {
; nextln: gv0 = vmctx
; nextln: gv1 = load.i64 notrap aligned readonly gv0
; nextln: heap0 = static gv1, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000, index_type i32
; nextln:
; nextln: ebb0(v0: i32, v1: i64):
; nextln: v4 = iconst.i32 1
; nextln: v5 = heap_addr.i64 heap0, v4, 1
; nextln: jump ebb1(v0, v1)
; nextln:
; nextln: ebb1(v2: i32, v3: i64):
; nextln: v6 = load.i32 notrap aligned v5
; nextln: v7 = iadd v2, v6
; nextln: brz v2, ebb2(v2)
; nextln: v8 = isub v2, v4
; nextln: jump ebb1(v8, v3)
; nextln:
; nextln: ebb2(v9: i32):
; nextln: return v9
; nextln: }

View File

@@ -0,0 +1,49 @@
test licm
target x86_64
;; Maybe-trapping readonly load from address that is not
;; loop-dependent should *not* be hoisted out of loop, though the
;; address computation can be hoisted.
function %hoist_load(i32, i64 vmctx) -> i32 {
gv0 = vmctx
gv1 = load.i64 notrap aligned readonly gv0
heap0 = static gv1, min 0x1_0000, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i32
ebb0(v0: i32, v1: i64):
jump ebb1(v0, v1)
ebb1(v2: i32, v3: i64):
v4 = iconst.i32 1
v5 = heap_addr.i64 heap0, v4, 1
v6 = load.i32 aligned readonly v5
v7 = iadd v2, v6
brz v2, ebb2(v2)
v8 = isub v2, v4
jump ebb1(v8, v3)
ebb2(v9: i32):
return v9
}
; sameln: function %hoist_load(i32, i64 vmctx) -> i32 fast {
; nextln: gv0 = vmctx
; nextln: gv1 = load.i64 notrap aligned readonly gv0
; nextln: heap0 = static gv1, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000, index_type i32
; nextln:
; nextln: ebb0(v0: i32, v1: i64):
; nextln: v4 = iconst.i32 1
; nextln: v5 = heap_addr.i64 heap0, v4, 1
; nextln: jump ebb1(v0, v1)
; nextln:
; nextln: ebb1(v2: i32, v3: i64):
; nextln: v6 = load.i32 aligned readonly v5
; nextln: v7 = iadd v2, v6
; nextln: brz v2, ebb2(v2)
; nextln: v8 = isub v2, v4
; nextln: jump ebb1(v8, v3)
; nextln:
; nextln: ebb2(v9: i32):
; nextln: return v9
; nextln: }