From aa926e90979fb38f5323867ba39753d5061a14f7 Mon Sep 17 00:00:00 2001 From: Lars T Hansen Date: Tue, 9 Apr 2019 11:40:23 +0200 Subject: [PATCH] Allow readonly nontrapping loads to be hoisted by licm (#727) --- cranelift/codegen/src/licm.rs | 20 ++++++-- .../filetests/licm/load_readonly_notrap.clif | 48 ++++++++++++++++++ .../filetests/licm/reject_load_notrap.clif | 49 +++++++++++++++++++ .../filetests/licm/reject_load_readonly.clif | 49 +++++++++++++++++++ 4 files changed, 163 insertions(+), 3 deletions(-) create mode 100644 cranelift/filetests/filetests/licm/load_readonly_notrap.clif create mode 100644 cranelift/filetests/filetests/licm/reject_load_notrap.clif create mode 100644 cranelift/filetests/filetests/licm/reject_load_readonly.clif diff --git a/cranelift/codegen/src/licm.rs b/cranelift/codegen/src/licm.rs index cb3dbd87a9..14c4630dc7 100644 --- a/cranelift/codegen/src/licm.rs +++ b/cranelift/codegen/src/licm.rs @@ -5,7 +5,9 @@ use crate::dominator_tree::DominatorTree; use crate::entity::{EntityList, ListPool}; use crate::flowgraph::{BasicBlock, ControlFlowGraph}; use crate::fx::FxHashSet; -use crate::ir::{DataFlowGraph, Ebb, Function, Inst, InstBuilder, Layout, Opcode, Type, Value}; +use crate::ir::{ + DataFlowGraph, Ebb, Function, Inst, InstBuilder, InstructionData, Layout, Opcode, Type, Value, +}; use crate::isa::TargetIsa; use crate::loop_analysis::{Loop, LoopAnalysis}; use crate::timing; @@ -145,8 +147,7 @@ fn change_branch_jump_destination(inst: Inst, new_ebb: Ebb, func: &mut Function) /// Test whether the given opcode is unsafe to even consider for LICM. fn trivially_unsafe_for_licm(opcode: Opcode) -> bool { - opcode.can_load() - || opcode.can_store() + opcode.can_store() || opcode.is_call() || opcode.is_branch() || opcode.is_terminator() @@ -156,12 +157,25 @@ fn trivially_unsafe_for_licm(opcode: Opcode) -> bool { || opcode.writes_cpu_flags() } +fn is_unsafe_load(inst_data: &InstructionData) -> bool { + match *inst_data { + InstructionData::Load { flags, .. } | InstructionData::LoadComplex { flags, .. } => { + !flags.readonly() || !flags.notrap() + } + _ => inst_data.opcode().can_load(), + } +} + /// Test whether the given instruction is loop-invariant. fn is_loop_invariant(inst: Inst, dfg: &DataFlowGraph, loop_values: &FxHashSet) -> bool { if trivially_unsafe_for_licm(dfg[inst].opcode()) { return false; } + if is_unsafe_load(&dfg[inst]) { + return false; + } + let inst_args = dfg.inst_args(inst); for arg in inst_args { let arg = dfg.resolve_aliases(*arg); diff --git a/cranelift/filetests/filetests/licm/load_readonly_notrap.clif b/cranelift/filetests/filetests/licm/load_readonly_notrap.clif new file mode 100644 index 0000000000..9a9d2dcbfa --- /dev/null +++ b/cranelift/filetests/filetests/licm/load_readonly_notrap.clif @@ -0,0 +1,48 @@ +test licm + +target x86_64 + +;; Nontrapping readonly load from address that is not loop-dependent +;; should be hoisted out of loop. + +function %hoist_load(i32, i64 vmctx) -> i32 { + gv0 = vmctx + gv1 = load.i64 notrap aligned readonly gv0 + heap0 = static gv1, min 0x1_0000, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i32 + +ebb0(v0: i32, v1: i64): + jump ebb1(v0, v1) + +ebb1(v2: i32, v3: i64): + v4 = iconst.i32 1 + v5 = heap_addr.i64 heap0, v4, 1 + v6 = load.i32 notrap aligned readonly v5 + v7 = iadd v2, v6 + brz v2, ebb2(v2) + v8 = isub v2, v4 + jump ebb1(v8, v3) + +ebb2(v9: i32): + return v9 +} + +; sameln: function %hoist_load(i32, i64 vmctx) -> i32 fast { +; nextln: gv0 = vmctx +; nextln: gv1 = load.i64 notrap aligned readonly gv0 +; nextln: heap0 = static gv1, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000, index_type i32 +; nextln: +; nextln: ebb0(v0: i32, v1: i64): +; nextln: v4 = iconst.i32 1 +; nextln: v5 = heap_addr.i64 heap0, v4, 1 +; nextln: v6 = load.i32 notrap aligned readonly v5 +; nextln: jump ebb1(v0, v1) +; nextln: +; nextln: ebb1(v2: i32, v3: i64): +; nextln: v7 = iadd v2, v6 +; nextln: brz v2, ebb2(v2) +; nextln: v8 = isub v2, v4 +; nextln: jump ebb1(v8, v3) +; nextln: +; nextln: ebb2(v9: i32): +; nextln: return v9 +; nextln: } diff --git a/cranelift/filetests/filetests/licm/reject_load_notrap.clif b/cranelift/filetests/filetests/licm/reject_load_notrap.clif new file mode 100644 index 0000000000..1d26faa71f --- /dev/null +++ b/cranelift/filetests/filetests/licm/reject_load_notrap.clif @@ -0,0 +1,49 @@ +test licm + +target x86_64 + +;; Nontrapping possibly-not-readonly load from address that is not +;; loop-dependent should *not* be hoisted out of loop, though the +;; address computation can be. + +function %hoist_load(i32, i64 vmctx) -> i32 { + gv0 = vmctx + gv1 = load.i64 notrap aligned readonly gv0 + heap0 = static gv1, min 0x1_0000, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i32 + +ebb0(v0: i32, v1: i64): + v4 = iconst.i32 1 + v5 = heap_addr.i64 heap0, v4, 1 + jump ebb1(v0, v1) + +ebb1(v2: i32, v3: i64): + v6 = load.i32 notrap aligned v5 + v7 = iadd v2, v6 + brz v2, ebb2(v2) + v8 = isub v2, v4 + jump ebb1(v8, v3) + +ebb2(v9: i32): + return v9 +} + +; sameln: function %hoist_load(i32, i64 vmctx) -> i32 fast { +; nextln: gv0 = vmctx +; nextln: gv1 = load.i64 notrap aligned readonly gv0 +; nextln: heap0 = static gv1, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000, index_type i32 +; nextln: +; nextln: ebb0(v0: i32, v1: i64): +; nextln: v4 = iconst.i32 1 +; nextln: v5 = heap_addr.i64 heap0, v4, 1 +; nextln: jump ebb1(v0, v1) +; nextln: +; nextln: ebb1(v2: i32, v3: i64): +; nextln: v6 = load.i32 notrap aligned v5 +; nextln: v7 = iadd v2, v6 +; nextln: brz v2, ebb2(v2) +; nextln: v8 = isub v2, v4 +; nextln: jump ebb1(v8, v3) +; nextln: +; nextln: ebb2(v9: i32): +; nextln: return v9 +; nextln: } diff --git a/cranelift/filetests/filetests/licm/reject_load_readonly.clif b/cranelift/filetests/filetests/licm/reject_load_readonly.clif new file mode 100644 index 0000000000..5b6a411712 --- /dev/null +++ b/cranelift/filetests/filetests/licm/reject_load_readonly.clif @@ -0,0 +1,49 @@ +test licm + +target x86_64 + +;; Maybe-trapping readonly load from address that is not +;; loop-dependent should *not* be hoisted out of loop, though the +;; address computation can be hoisted. + +function %hoist_load(i32, i64 vmctx) -> i32 { + gv0 = vmctx + gv1 = load.i64 notrap aligned readonly gv0 + heap0 = static gv1, min 0x1_0000, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i32 + +ebb0(v0: i32, v1: i64): + jump ebb1(v0, v1) + +ebb1(v2: i32, v3: i64): + v4 = iconst.i32 1 + v5 = heap_addr.i64 heap0, v4, 1 + v6 = load.i32 aligned readonly v5 + v7 = iadd v2, v6 + brz v2, ebb2(v2) + v8 = isub v2, v4 + jump ebb1(v8, v3) + +ebb2(v9: i32): + return v9 +} + +; sameln: function %hoist_load(i32, i64 vmctx) -> i32 fast { +; nextln: gv0 = vmctx +; nextln: gv1 = load.i64 notrap aligned readonly gv0 +; nextln: heap0 = static gv1, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000, index_type i32 +; nextln: +; nextln: ebb0(v0: i32, v1: i64): +; nextln: v4 = iconst.i32 1 +; nextln: v5 = heap_addr.i64 heap0, v4, 1 +; nextln: jump ebb1(v0, v1) +; nextln: +; nextln: ebb1(v2: i32, v3: i64): +; nextln: v6 = load.i32 aligned readonly v5 +; nextln: v7 = iadd v2, v6 +; nextln: brz v2, ebb2(v2) +; nextln: v8 = isub v2, v4 +; nextln: jump ebb1(v8, v3) +; nextln: +; nextln: ebb2(v9: i32): +; nextln: return v9 +; nextln: }