Cranelift/egraph mid-end: support merging effectful-but-idempotent ops (#5594)
* Support mergeable-but-side-effectful (idempotent) operations in general in the egraph's GVN. This mirrors the similar change made in #5534. * Add tests for egraph case.
This commit is contained in:
@@ -7,7 +7,7 @@ use crate::dominator_tree::DominatorTree;
|
|||||||
use crate::egraph::domtree::DomTreeWithChildren;
|
use crate::egraph::domtree::DomTreeWithChildren;
|
||||||
use crate::egraph::elaborate::Elaborator;
|
use crate::egraph::elaborate::Elaborator;
|
||||||
use crate::fx::FxHashSet;
|
use crate::fx::FxHashSet;
|
||||||
use crate::inst_predicates::is_pure_for_egraph;
|
use crate::inst_predicates::{is_mergeable_for_egraph, is_pure_for_egraph};
|
||||||
use crate::ir::{
|
use crate::ir::{
|
||||||
DataFlowGraph, Function, Inst, InstructionData, Type, Value, ValueDef, ValueListPool,
|
DataFlowGraph, Function, Inst, InstructionData, Type, Value, ValueDef, ValueListPool,
|
||||||
};
|
};
|
||||||
@@ -284,17 +284,59 @@ where
|
|||||||
/// the layout.
|
/// the layout.
|
||||||
fn optimize_skeleton_inst(&mut self, inst: Inst) -> bool {
|
fn optimize_skeleton_inst(&mut self, inst: Inst) -> bool {
|
||||||
self.stats.skeleton_inst += 1;
|
self.stats.skeleton_inst += 1;
|
||||||
// Not pure, but may still be a load or store:
|
|
||||||
// process it to see if we can optimize it.
|
// First, can we try to deduplicate? We need to keep some copy
|
||||||
if let Some(new_result) =
|
// of the instruction around because it's side-effecting, but
|
||||||
|
// we may be able to reuse an earlier instance of it.
|
||||||
|
if is_mergeable_for_egraph(self.func, inst) {
|
||||||
|
let result = self.func.dfg.inst_results(inst)[0];
|
||||||
|
trace!(" -> mergeable side-effecting op {}", inst);
|
||||||
|
let inst = NewOrExistingInst::Existing(inst);
|
||||||
|
let gvn_context = GVNContext {
|
||||||
|
union_find: self.eclasses,
|
||||||
|
value_lists: &self.func.dfg.value_lists,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Does this instruction already exist? If so, add entries to
|
||||||
|
// the value-map to rewrite uses of its results to the results
|
||||||
|
// of the original (existing) instruction. If not, optimize
|
||||||
|
// the new instruction.
|
||||||
|
let key = inst.get_inst_key(&self.func.dfg);
|
||||||
|
if let Some(&orig_result) = self.gvn_map.get(&key, &gvn_context) {
|
||||||
|
// Hit in GVN map -- reuse value.
|
||||||
|
self.value_to_opt_value[result] = orig_result;
|
||||||
|
self.eclasses.union(orig_result, result);
|
||||||
|
trace!(" -> merges result {} to {}", result, orig_result);
|
||||||
|
true
|
||||||
|
} else {
|
||||||
|
// Otherwise, insert it into the value-map.
|
||||||
|
self.value_to_opt_value[result] = result;
|
||||||
|
self.gvn_map.insert(key, result, &gvn_context);
|
||||||
|
trace!(" -> inserts as new (no GVN)");
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Otherwise, if a load or store, process it with the alias
|
||||||
|
// analysis to see if we can optimize it (rewrite in terms of
|
||||||
|
// an earlier load or stored value).
|
||||||
|
else if let Some(new_result) =
|
||||||
self.alias_analysis
|
self.alias_analysis
|
||||||
.process_inst(self.func, self.alias_analysis_state, inst)
|
.process_inst(self.func, self.alias_analysis_state, inst)
|
||||||
{
|
{
|
||||||
self.stats.alias_analysis_removed += 1;
|
self.stats.alias_analysis_removed += 1;
|
||||||
let result = self.func.dfg.first_result(inst);
|
let result = self.func.dfg.first_result(inst);
|
||||||
|
trace!(
|
||||||
|
" -> inst {} has result {} replaced with {}",
|
||||||
|
inst,
|
||||||
|
result,
|
||||||
|
new_result
|
||||||
|
);
|
||||||
self.value_to_opt_value[result] = new_result;
|
self.value_to_opt_value[result] = new_result;
|
||||||
true
|
true
|
||||||
} else {
|
}
|
||||||
|
// Otherwise, generic side-effecting op -- always keep it, and
|
||||||
|
// set its results to identity-map to original values.
|
||||||
|
else {
|
||||||
// Set all results to identity-map to themselves
|
// Set all results to identity-map to themselves
|
||||||
// in the value-to-opt-value map.
|
// in the value-to-opt-value map.
|
||||||
for &result in self.func.dfg.inst_results(inst) {
|
for &result in self.func.dfg.inst_results(inst) {
|
||||||
|
|||||||
@@ -74,6 +74,25 @@ pub fn is_pure_for_egraph(func: &Function, inst: Inst) -> bool {
|
|||||||
has_one_result && (is_readonly_load || (!op.can_load() && !trivially_has_side_effects(op)))
|
has_one_result && (is_readonly_load || (!op.can_load() && !trivially_has_side_effects(op)))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Can the given instruction be merged into another copy of itself?
|
||||||
|
/// These instructions may have side-effects, but as long as we retain
|
||||||
|
/// the first instance of the instruction, the second and further
|
||||||
|
/// instances are redundant if they would produce the same trap or
|
||||||
|
/// result.
|
||||||
|
pub fn is_mergeable_for_egraph(func: &Function, inst: Inst) -> bool {
|
||||||
|
let op = func.dfg.insts[inst].opcode();
|
||||||
|
// We can only merge one-result operators due to the way that GVN
|
||||||
|
// is structured in the egraph implementation.
|
||||||
|
let has_one_result = func.dfg.inst_results(inst).len() == 1;
|
||||||
|
has_one_result
|
||||||
|
// Loads/stores are handled by alias analysis and not
|
||||||
|
// otherwise mergeable.
|
||||||
|
&& !op.can_load()
|
||||||
|
&& !op.can_store()
|
||||||
|
// Can only have idempotent side-effects.
|
||||||
|
&& (!has_side_effect(func, inst) || op.side_effects_idempotent())
|
||||||
|
}
|
||||||
|
|
||||||
/// Does the given instruction have any side-effect as per [has_side_effect], or else is a load,
|
/// Does the given instruction have any side-effect as per [has_side_effect], or else is a load,
|
||||||
/// but not the get_pinned_reg opcode?
|
/// but not the get_pinned_reg opcode?
|
||||||
pub fn has_lowering_side_effect(func: &Function, inst: Inst) -> bool {
|
pub fn has_lowering_side_effect(func: &Function, inst: Inst) -> bool {
|
||||||
|
|||||||
@@ -0,0 +1,92 @@
|
|||||||
|
;;! target = "x86_64"
|
||||||
|
;;!
|
||||||
|
;;! optimize = true
|
||||||
|
;;!
|
||||||
|
;;! settings = [
|
||||||
|
;;! "enable_heap_access_spectre_mitigation=true",
|
||||||
|
;;! "opt_level=speed_and_size",
|
||||||
|
;;! "use_egraphs=true"
|
||||||
|
;;! ]
|
||||||
|
;;!
|
||||||
|
;;! [globals.vmctx]
|
||||||
|
;;! type = "i64"
|
||||||
|
;;! vmctx = true
|
||||||
|
;;!
|
||||||
|
;;! [globals.heap_base]
|
||||||
|
;;! type = "i64"
|
||||||
|
;;! load = { base = "vmctx", offset = 0 }
|
||||||
|
;;!
|
||||||
|
;;! [globals.heap_bound]
|
||||||
|
;;! type = "i64"
|
||||||
|
;;! load = { base = "vmctx", offset = 8 }
|
||||||
|
;;!
|
||||||
|
;;! [[heaps]]
|
||||||
|
;;! base = "heap_base"
|
||||||
|
;;! min_size = 0
|
||||||
|
;;! offset_guard_size = 0xffffffff
|
||||||
|
;;! index_type = "i32"
|
||||||
|
;;! style = { kind = "dynamic", bound = "heap_bound" }
|
||||||
|
|
||||||
|
(module
|
||||||
|
(memory (export "memory") 0)
|
||||||
|
(func (export "load-without-offset") (param i32) (result i32 i32)
|
||||||
|
local.get 0
|
||||||
|
i32.load
|
||||||
|
local.get 0
|
||||||
|
i32.load
|
||||||
|
)
|
||||||
|
(func (export "load-with-offset") (param i32) (result i32 i32)
|
||||||
|
local.get 0
|
||||||
|
i32.load offset=1234
|
||||||
|
local.get 0
|
||||||
|
i32.load offset=1234
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
;; function u0:0(i32, i64 vmctx) -> i32, i32 fast {
|
||||||
|
;; gv0 = vmctx
|
||||||
|
;; gv1 = load.i64 notrap aligned gv0+8
|
||||||
|
;; gv2 = load.i64 notrap aligned gv0
|
||||||
|
;;
|
||||||
|
;; block0(v0: i32, v1: i64):
|
||||||
|
;; @0057 v4 = uextend.i64 v0
|
||||||
|
;; @0057 v5 = iconst.i64 4
|
||||||
|
;; @0057 v6 = uadd_overflow_trap v4, v5, heap_oob ; v5 = 4
|
||||||
|
;; @0057 v7 = load.i64 notrap aligned v1+8
|
||||||
|
;; @0057 v8 = load.i64 notrap aligned v1
|
||||||
|
;; @0057 v11 = icmp ugt v6, v7
|
||||||
|
;; @0057 v10 = iconst.i64 0
|
||||||
|
;; @0057 v9 = iadd v8, v4
|
||||||
|
;; @0057 v12 = select_spectre_guard v11, v10, v9 ; v10 = 0
|
||||||
|
;; @0057 v13 = load.i32 little heap v12
|
||||||
|
;; v2 -> v13
|
||||||
|
;; @005f jump block1
|
||||||
|
;;
|
||||||
|
;; block1:
|
||||||
|
;; @005f return v13, v13
|
||||||
|
;; }
|
||||||
|
;;
|
||||||
|
;; function u0:1(i32, i64 vmctx) -> i32, i32 fast {
|
||||||
|
;; gv0 = vmctx
|
||||||
|
;; gv1 = load.i64 notrap aligned gv0+8
|
||||||
|
;; gv2 = load.i64 notrap aligned gv0
|
||||||
|
;;
|
||||||
|
;; block0(v0: i32, v1: i64):
|
||||||
|
;; @0064 v4 = uextend.i64 v0
|
||||||
|
;; @0064 v5 = iconst.i64 1238
|
||||||
|
;; @0064 v6 = uadd_overflow_trap v4, v5, heap_oob ; v5 = 1238
|
||||||
|
;; @0064 v7 = load.i64 notrap aligned v1+8
|
||||||
|
;; @0064 v8 = load.i64 notrap aligned v1
|
||||||
|
;; @0064 v12 = icmp ugt v6, v7
|
||||||
|
;; @0064 v11 = iconst.i64 0
|
||||||
|
;; @0064 v9 = iadd v8, v4
|
||||||
|
;; v26 = iconst.i64 1234
|
||||||
|
;; @0064 v10 = iadd v9, v26 ; v26 = 1234
|
||||||
|
;; @0064 v13 = select_spectre_guard v12, v11, v10 ; v11 = 0
|
||||||
|
;; @0064 v14 = load.i32 little heap v13
|
||||||
|
;; v2 -> v14
|
||||||
|
;; @006e jump block1
|
||||||
|
;;
|
||||||
|
;; block1:
|
||||||
|
;; @006e return v14, v14
|
||||||
|
;; }
|
||||||
@@ -0,0 +1,74 @@
|
|||||||
|
;;! target = "x86_64"
|
||||||
|
;;!
|
||||||
|
;;! optimize = true
|
||||||
|
;;!
|
||||||
|
;;! settings = [
|
||||||
|
;;! "enable_heap_access_spectre_mitigation=true",
|
||||||
|
;;! "opt_level=speed_and_size",
|
||||||
|
;;! "use_egraphs=true"
|
||||||
|
;;! ]
|
||||||
|
;;!
|
||||||
|
;;! [globals.vmctx]
|
||||||
|
;;! type = "i64"
|
||||||
|
;;! vmctx = true
|
||||||
|
;;!
|
||||||
|
;;! [globals.heap_base]
|
||||||
|
;;! type = "i64"
|
||||||
|
;;! load = { base = "vmctx", offset = 0, readonly = true }
|
||||||
|
;;!
|
||||||
|
;;! [[heaps]]
|
||||||
|
;;! base = "heap_base"
|
||||||
|
;;! min_size = 0x10000
|
||||||
|
;;! offset_guard_size = 0xffffffff
|
||||||
|
;;! index_type = "i32"
|
||||||
|
;;! style = { kind = "static", bound = 0x10000000 }
|
||||||
|
|
||||||
|
(module
|
||||||
|
(memory (export "memory") 1)
|
||||||
|
(func (export "load-without-offset") (param i32) (result i32 i32)
|
||||||
|
local.get 0
|
||||||
|
i32.load
|
||||||
|
local.get 0
|
||||||
|
i32.load
|
||||||
|
)
|
||||||
|
(func (export "load-with-offset") (param i32) (result i32 i32)
|
||||||
|
local.get 0
|
||||||
|
i32.load offset=1234
|
||||||
|
local.get 0
|
||||||
|
i32.load offset=1234
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
;; function u0:0(i32, i64 vmctx) -> i32, i32 fast {
|
||||||
|
;; gv0 = vmctx
|
||||||
|
;; gv1 = load.i64 notrap aligned readonly gv0
|
||||||
|
;;
|
||||||
|
;; block0(v0: i32, v1: i64):
|
||||||
|
;; @0057 v5 = load.i64 notrap aligned readonly v1
|
||||||
|
;; @0057 v4 = uextend.i64 v0
|
||||||
|
;; @0057 v6 = iadd v5, v4
|
||||||
|
;; @0057 v7 = load.i32 little heap v6
|
||||||
|
;; v2 -> v7
|
||||||
|
;; @005f jump block1
|
||||||
|
;;
|
||||||
|
;; block1:
|
||||||
|
;; @005f return v7, v7
|
||||||
|
;; }
|
||||||
|
;;
|
||||||
|
;; function u0:1(i32, i64 vmctx) -> i32, i32 fast {
|
||||||
|
;; gv0 = vmctx
|
||||||
|
;; gv1 = load.i64 notrap aligned readonly gv0
|
||||||
|
;;
|
||||||
|
;; block0(v0: i32, v1: i64):
|
||||||
|
;; @0064 v5 = load.i64 notrap aligned readonly v1
|
||||||
|
;; @0064 v4 = uextend.i64 v0
|
||||||
|
;; @0064 v6 = iadd v5, v4
|
||||||
|
;; v14 = iconst.i64 1234
|
||||||
|
;; @0064 v7 = iadd v6, v14 ; v14 = 1234
|
||||||
|
;; @0064 v8 = load.i32 little heap v7
|
||||||
|
;; v2 -> v8
|
||||||
|
;; @006e jump block1
|
||||||
|
;;
|
||||||
|
;; block1:
|
||||||
|
;; @006e return v8, v8
|
||||||
|
;; }
|
||||||
Reference in New Issue
Block a user