Optimizations to egraph framework (#5391)
* Optimizations to egraph framework: - Save elaborated results by canonical value, not latest value (union value). Previously we were artificially skipping and re-elaborating some values we already had because we were not finding them in the map. - Make some changes to handling of icmp results: when icmp became I8-typed (when bools went away), many uses became `(uextend $I32 (icmp $I8 ...))`, and so patterns in lowering backends were no longer matching. This PR includes an x64-specific change to match `(brz (uextend (icmp ...)))` and similarly for `brnz`, but it also takes advantage of the ability to write rules easily in the egraph mid-end to rewrite selects with icmp inputs appropriately. - Extend constprop to understand selects in the egraph mid-end. With these changes, bz2.wasm sees a ~1% speedup, and spidermonkey.wasm with a fib.js input sees a 16.8% speedup: ``` $ time taskset 1 target/release/wasmtime run --allow-precompiled --dir=. ./spidermonkey.base.cwasm ./fib.js 1346269 taskset 1 target/release/wasmtime run --allow-precompiled --dir=. ./fib.js 2.14s user 0.01s system 99% cpu 2.148 total $ time taskset 1 target/release/wasmtime run --allow-precompiled --dir=. ./spidermonkey.egraphs.cwasm ./fib.js 1346269 taskset 1 target/release/wasmtime run --allow-precompiled --dir=. ./fib.js 1.78s user 0.01s system 99% cpu 1.788 total ``` * Review feedback.
This commit is contained in:
@@ -277,7 +277,7 @@ impl<'a> Elaborator<'a> {
|
|||||||
let value = self.func.dfg.resolve_aliases(value);
|
let value = self.func.dfg.resolve_aliases(value);
|
||||||
|
|
||||||
self.stats.elaborate_visit_node += 1;
|
self.stats.elaborate_visit_node += 1;
|
||||||
let canonical_value = self.eclasses.find(value);
|
let canonical_value = self.eclasses.find_and_update(value);
|
||||||
debug_assert_ne!(canonical_value, Value::reserved_value());
|
debug_assert_ne!(canonical_value, Value::reserved_value());
|
||||||
trace!(
|
trace!(
|
||||||
"elaborate: value {} canonical {} before {}",
|
"elaborate: value {} canonical {} before {}",
|
||||||
@@ -518,8 +518,9 @@ impl<'a> Elaborator<'a> {
|
|||||||
value: new_result,
|
value: new_result,
|
||||||
in_block: insert_block,
|
in_block: insert_block,
|
||||||
};
|
};
|
||||||
|
let canonical_result = self.eclasses.find_and_update(result);
|
||||||
self.value_to_elaborated_value.insert_if_absent_with_depth(
|
self.value_to_elaborated_value.insert_if_absent_with_depth(
|
||||||
result,
|
canonical_result,
|
||||||
elab_value,
|
elab_value,
|
||||||
scope_depth,
|
scope_depth,
|
||||||
);
|
);
|
||||||
@@ -545,8 +546,9 @@ impl<'a> Elaborator<'a> {
|
|||||||
value: result,
|
value: result,
|
||||||
in_block: insert_block,
|
in_block: insert_block,
|
||||||
};
|
};
|
||||||
|
let canonical_result = self.eclasses.find_and_update(result);
|
||||||
self.value_to_elaborated_value.insert_if_absent_with_depth(
|
self.value_to_elaborated_value.insert_if_absent_with_depth(
|
||||||
result,
|
canonical_result,
|
||||||
elab_value,
|
elab_value,
|
||||||
scope_depth,
|
scope_depth,
|
||||||
);
|
);
|
||||||
@@ -623,8 +625,9 @@ impl<'a> Elaborator<'a> {
|
|||||||
// map now.
|
// map now.
|
||||||
for &result in self.func.dfg.inst_results(inst) {
|
for &result in self.func.dfg.inst_results(inst) {
|
||||||
trace!(" -> result {}", result);
|
trace!(" -> result {}", result);
|
||||||
|
let canonical_result = self.eclasses.find_and_update(result);
|
||||||
self.value_to_elaborated_value.insert_if_absent(
|
self.value_to_elaborated_value.insert_if_absent(
|
||||||
result,
|
canonical_result,
|
||||||
ElaboratedValue {
|
ElaboratedValue {
|
||||||
in_block: block,
|
in_block: block,
|
||||||
value: result,
|
value: result,
|
||||||
|
|||||||
@@ -2916,10 +2916,19 @@
|
|||||||
(let ((cmp IcmpCondResult (invert_icmp_cond_result (emit_cmp cc a b))))
|
(let ((cmp IcmpCondResult (invert_icmp_cond_result (emit_cmp cc a b))))
|
||||||
(side_effect (jmp_cond_icmp cmp taken not_taken))))
|
(side_effect (jmp_cond_icmp cmp taken not_taken))))
|
||||||
|
|
||||||
|
(rule 2 (lower_branch (brz (uextend (icmp cc a b)) _ _) (two_targets taken not_taken))
|
||||||
|
(let ((cmp IcmpCondResult (invert_icmp_cond_result (emit_cmp cc a b))))
|
||||||
|
(side_effect (jmp_cond_icmp cmp taken not_taken))))
|
||||||
|
|
||||||
|
|
||||||
(rule 2 (lower_branch (brz (fcmp cc a b) _ _) (two_targets taken not_taken))
|
(rule 2 (lower_branch (brz (fcmp cc a b) _ _) (two_targets taken not_taken))
|
||||||
(let ((cmp FcmpCondResult (emit_fcmp (floatcc_inverse cc) a b)))
|
(let ((cmp FcmpCondResult (emit_fcmp (floatcc_inverse cc) a b)))
|
||||||
(side_effect (jmp_cond_fcmp cmp taken not_taken))))
|
(side_effect (jmp_cond_fcmp cmp taken not_taken))))
|
||||||
|
|
||||||
|
(rule 2 (lower_branch (brz (uextend (fcmp cc a b)) _ _) (two_targets taken not_taken))
|
||||||
|
(let ((cmp FcmpCondResult (emit_fcmp (floatcc_inverse cc) a b)))
|
||||||
|
(side_effect (jmp_cond_fcmp cmp taken not_taken))))
|
||||||
|
|
||||||
(rule 1 (lower_branch (brz val @ (value_type $I128) _ _) (two_targets taken not_taken))
|
(rule 1 (lower_branch (brz val @ (value_type $I128) _ _) (two_targets taken not_taken))
|
||||||
(side_effect (jmp_cond_icmp (cmp_zero_i128 (CC.NZ) val) taken not_taken)))
|
(side_effect (jmp_cond_icmp (cmp_zero_i128 (CC.NZ) val) taken not_taken)))
|
||||||
|
|
||||||
@@ -2936,6 +2945,13 @@
|
|||||||
(let ((cmp FcmpCondResult (emit_fcmp cc a b)))
|
(let ((cmp FcmpCondResult (emit_fcmp cc a b)))
|
||||||
(side_effect (jmp_cond_fcmp cmp taken not_taken))))
|
(side_effect (jmp_cond_fcmp cmp taken not_taken))))
|
||||||
|
|
||||||
|
(rule 2 (lower_branch (brnz (uextend (icmp cc a b)) _ _) (two_targets taken not_taken))
|
||||||
|
(side_effect (jmp_cond_icmp (emit_cmp cc a b) taken not_taken)))
|
||||||
|
|
||||||
|
(rule 2 (lower_branch (brnz (uextend (fcmp cc a b)) _ _) (two_targets taken not_taken))
|
||||||
|
(let ((cmp FcmpCondResult (emit_fcmp cc a b)))
|
||||||
|
(side_effect (jmp_cond_fcmp cmp taken not_taken))))
|
||||||
|
|
||||||
(rule 1 (lower_branch (brnz val @ (value_type $I128) _ _) (two_targets taken not_taken))
|
(rule 1 (lower_branch (brnz val @ (value_type $I128) _ _) (two_targets taken not_taken))
|
||||||
(side_effect (jmp_cond_icmp (cmp_zero_i128 (CC.Z) val) taken not_taken)))
|
(side_effect (jmp_cond_icmp (cmp_zero_i128 (CC.Z) val) taken not_taken)))
|
||||||
|
|
||||||
|
|||||||
@@ -586,6 +586,26 @@ macro_rules! isle_common_prelude_methods {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn intcc_reverse(&mut self, cc: &IntCC) -> IntCC {
|
||||||
|
cc.reverse()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn intcc_inverse(&mut self, cc: &IntCC) -> IntCC {
|
||||||
|
cc.inverse()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn floatcc_reverse(&mut self, cc: &FloatCC) -> FloatCC {
|
||||||
|
cc.reverse()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn floatcc_inverse(&mut self, cc: &FloatCC) -> FloatCC {
|
||||||
|
cc.inverse()
|
||||||
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn unpack_value_array_2(&mut self, arr: &ValueArray2) -> (Value, Value) {
|
fn unpack_value_array_2(&mut self, arr: &ValueArray2) -> (Value, Value) {
|
||||||
let [a, b] = *arr;
|
let [a, b] = *arr;
|
||||||
|
|||||||
@@ -537,26 +537,6 @@ macro_rules! isle_lower_prelude_methods {
|
|||||||
MInst::gen_move(dst, src, ty)
|
MInst::gen_move(dst, src, ty)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
|
||||||
fn intcc_reverse(&mut self, cc: &IntCC) -> IntCC {
|
|
||||||
cc.reverse()
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline]
|
|
||||||
fn intcc_inverse(&mut self, cc: &IntCC) -> IntCC {
|
|
||||||
cc.inverse()
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline]
|
|
||||||
fn floatcc_reverse(&mut self, cc: &FloatCC) -> FloatCC {
|
|
||||||
cc.reverse()
|
|
||||||
}
|
|
||||||
|
|
||||||
#[inline]
|
|
||||||
fn floatcc_inverse(&mut self, cc: &FloatCC) -> FloatCC {
|
|
||||||
cc.inverse()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Generate the return instruction.
|
/// Generate the return instruction.
|
||||||
fn gen_return(&mut self, (list, off): ValueSlice) {
|
fn gen_return(&mut self, (list, off): ValueSlice) {
|
||||||
let rets = (off..list.len(&self.lower_ctx.dfg().value_lists))
|
let rets = (off..list.len(&self.lower_ctx.dfg().value_lists))
|
||||||
|
|||||||
@@ -185,3 +185,25 @@
|
|||||||
(remat x))
|
(remat x))
|
||||||
(rule (simplify x @ (f64const _ _))
|
(rule (simplify x @ (f64const _ _))
|
||||||
(remat x))
|
(remat x))
|
||||||
|
|
||||||
|
;; Optimize icmp-of-icmp.
|
||||||
|
(rule (simplify (icmp ty
|
||||||
|
(IntCC.NotEqual)
|
||||||
|
(uextend _ inner @ (icmp ty _ _ _))
|
||||||
|
(iconst _ (u64_from_imm64 0))))
|
||||||
|
(subsume inner))
|
||||||
|
|
||||||
|
(rule (simplify (icmp ty
|
||||||
|
(IntCC.Equal)
|
||||||
|
(uextend _ (icmp ty cc x y))
|
||||||
|
(iconst _ (u64_from_imm64 0))))
|
||||||
|
(subsume (icmp ty (intcc_inverse cc) x y)))
|
||||||
|
|
||||||
|
;; Optimize select-of-uextend-of-icmp to select-of-icmp, because
|
||||||
|
;; select can take an I8 condition too.
|
||||||
|
(rule (simplify
|
||||||
|
(select ty (uextend _ c @ (icmp _ _ _ _)) x y))
|
||||||
|
(select ty c x y))
|
||||||
|
(rule (simplify
|
||||||
|
(select ty (uextend _ c @ (icmp _ _ _ _)) x y))
|
||||||
|
(select ty c x y))
|
||||||
|
|||||||
@@ -130,5 +130,12 @@
|
|||||||
(bxor ty (bxor ty x k1 @ (iconst ty _)) k2 @ (iconst ty _)))
|
(bxor ty (bxor ty x k1 @ (iconst ty _)) k2 @ (iconst ty _)))
|
||||||
(bxor ty x (bxor ty k1 k2)))
|
(bxor ty x (bxor ty k1 k2)))
|
||||||
|
|
||||||
|
(rule (simplify
|
||||||
|
(select ty (iconst _ (u64_from_imm64 (u64_nonzero _))) x y))
|
||||||
|
x)
|
||||||
|
(rule (simplify
|
||||||
|
(select ty (iconst _ (u64_from_imm64 0)) x y))
|
||||||
|
y)
|
||||||
|
|
||||||
;; TODO: fadd, fsub, fmul, fdiv, fneg, fabs
|
;; TODO: fadd, fsub, fmul, fdiv, fneg, fabs
|
||||||
|
|
||||||
|
|||||||
@@ -206,6 +206,24 @@
|
|||||||
(decl mem_flags_trusted () MemFlags)
|
(decl mem_flags_trusted () MemFlags)
|
||||||
(extern constructor mem_flags_trusted mem_flags_trusted)
|
(extern constructor mem_flags_trusted mem_flags_trusted)
|
||||||
|
|
||||||
|
;;;; Helpers for Working with Flags ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
;; Reverse an IntCC flag.
|
||||||
|
(decl intcc_reverse (IntCC) IntCC)
|
||||||
|
(extern constructor intcc_reverse intcc_reverse)
|
||||||
|
|
||||||
|
;; Invert an IntCC flag.
|
||||||
|
(decl intcc_inverse (IntCC) IntCC)
|
||||||
|
(extern constructor intcc_inverse intcc_inverse)
|
||||||
|
|
||||||
|
;; Reverse an FloatCC flag.
|
||||||
|
(decl floatcc_reverse (FloatCC) FloatCC)
|
||||||
|
(extern constructor floatcc_reverse floatcc_reverse)
|
||||||
|
|
||||||
|
;; Invert an FloatCC flag.
|
||||||
|
(decl floatcc_inverse (FloatCC) FloatCC)
|
||||||
|
(extern constructor floatcc_inverse floatcc_inverse)
|
||||||
|
|
||||||
;;;; Helper Clif Extractors ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;; Helper Clif Extractors ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
;; An extractor that only matches types that can fit in 16 bits.
|
;; An extractor that only matches types that can fit in 16 bits.
|
||||||
|
|||||||
@@ -308,23 +308,6 @@
|
|||||||
|
|
||||||
;;;; Helpers for Working with Flags ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;; Helpers for Working with Flags ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
;; Reverse an IntCC flag.
|
|
||||||
(decl intcc_reverse (IntCC) IntCC)
|
|
||||||
(extern constructor intcc_reverse intcc_reverse)
|
|
||||||
|
|
||||||
;; Invert an IntCC flag.
|
|
||||||
(decl intcc_inverse (IntCC) IntCC)
|
|
||||||
(extern constructor intcc_inverse intcc_inverse)
|
|
||||||
|
|
||||||
;; Reverse an FloatCC flag.
|
|
||||||
(decl floatcc_reverse (FloatCC) FloatCC)
|
|
||||||
(extern constructor floatcc_reverse floatcc_reverse)
|
|
||||||
|
|
||||||
;; Invert an FloatCC flag.
|
|
||||||
(decl floatcc_inverse (FloatCC) FloatCC)
|
|
||||||
(extern constructor floatcc_inverse floatcc_inverse)
|
|
||||||
|
|
||||||
|
|
||||||
;; Newtype wrapper around `MInst` for instructions that are used for their
|
;; Newtype wrapper around `MInst` for instructions that are used for their
|
||||||
;; effect on flags.
|
;; effect on flags.
|
||||||
;;
|
;;
|
||||||
|
|||||||
Reference in New Issue
Block a user