From 8c55b813008c03a00fc829e3eea5b0a356cd0d38 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Wed, 7 Dec 2022 13:23:13 -0800 Subject: [PATCH] Optimizations to egraph framework (#5391) * Optimizations to egraph framework: - Save elaborated results by canonical value, not latest value (union value). Previously we were artificially skipping and re-elaborating some values we already had because we were not finding them in the map. - Make some changes to handling of icmp results: when icmp became I8-typed (when bools went away), many uses became `(uextend $I32 (icmp $I8 ...))`, and so patterns in lowering backends were no longer matching. This PR includes an x64-specific change to match `(brz (uextend (icmp ...)))` and similarly for `brnz`, but it also takes advantage of the ability to write rules easily in the egraph mid-end to rewrite selects with icmp inputs appropriately. - Extend constprop to understand selects in the egraph mid-end. With these changes, bz2.wasm sees a ~1% speedup, and spidermonkey.wasm with a fib.js input sees a 16.8% speedup: ``` $ time taskset 1 target/release/wasmtime run --allow-precompiled --dir=. ./spidermonkey.base.cwasm ./fib.js 1346269 taskset 1 target/release/wasmtime run --allow-precompiled --dir=. ./fib.js 2.14s user 0.01s system 99% cpu 2.148 total $ time taskset 1 target/release/wasmtime run --allow-precompiled --dir=. ./spidermonkey.egraphs.cwasm ./fib.js 1346269 taskset 1 target/release/wasmtime run --allow-precompiled --dir=. ./fib.js 1.78s user 0.01s system 99% cpu 1.788 total ``` * Review feedback. --- cranelift/codegen/src/egraph/elaborate.rs | 11 +++++++---- cranelift/codegen/src/isa/x64/lower.isle | 16 ++++++++++++++++ cranelift/codegen/src/isle_prelude.rs | 20 ++++++++++++++++++++ cranelift/codegen/src/machinst/isle.rs | 20 -------------------- cranelift/codegen/src/opts/algebraic.isle | 22 ++++++++++++++++++++++ cranelift/codegen/src/opts/cprop.isle | 7 +++++++ cranelift/codegen/src/prelude.isle | 18 ++++++++++++++++++ cranelift/codegen/src/prelude_lower.isle | 17 ----------------- 8 files changed, 90 insertions(+), 41 deletions(-) diff --git a/cranelift/codegen/src/egraph/elaborate.rs b/cranelift/codegen/src/egraph/elaborate.rs index 2ca59bdb03..ed145ee706 100644 --- a/cranelift/codegen/src/egraph/elaborate.rs +++ b/cranelift/codegen/src/egraph/elaborate.rs @@ -277,7 +277,7 @@ impl<'a> Elaborator<'a> { let value = self.func.dfg.resolve_aliases(value); self.stats.elaborate_visit_node += 1; - let canonical_value = self.eclasses.find(value); + let canonical_value = self.eclasses.find_and_update(value); debug_assert_ne!(canonical_value, Value::reserved_value()); trace!( "elaborate: value {} canonical {} before {}", @@ -518,8 +518,9 @@ impl<'a> Elaborator<'a> { value: new_result, in_block: insert_block, }; + let canonical_result = self.eclasses.find_and_update(result); self.value_to_elaborated_value.insert_if_absent_with_depth( - result, + canonical_result, elab_value, scope_depth, ); @@ -545,8 +546,9 @@ impl<'a> Elaborator<'a> { value: result, in_block: insert_block, }; + let canonical_result = self.eclasses.find_and_update(result); self.value_to_elaborated_value.insert_if_absent_with_depth( - result, + canonical_result, elab_value, scope_depth, ); @@ -623,8 +625,9 @@ impl<'a> Elaborator<'a> { // map now. for &result in self.func.dfg.inst_results(inst) { trace!(" -> result {}", result); + let canonical_result = self.eclasses.find_and_update(result); self.value_to_elaborated_value.insert_if_absent( - result, + canonical_result, ElaboratedValue { in_block: block, value: result, diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index b1cee1c536..93da49d6fd 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -2916,10 +2916,19 @@ (let ((cmp IcmpCondResult (invert_icmp_cond_result (emit_cmp cc a b)))) (side_effect (jmp_cond_icmp cmp taken not_taken)))) +(rule 2 (lower_branch (brz (uextend (icmp cc a b)) _ _) (two_targets taken not_taken)) + (let ((cmp IcmpCondResult (invert_icmp_cond_result (emit_cmp cc a b)))) + (side_effect (jmp_cond_icmp cmp taken not_taken)))) + + (rule 2 (lower_branch (brz (fcmp cc a b) _ _) (two_targets taken not_taken)) (let ((cmp FcmpCondResult (emit_fcmp (floatcc_inverse cc) a b))) (side_effect (jmp_cond_fcmp cmp taken not_taken)))) +(rule 2 (lower_branch (brz (uextend (fcmp cc a b)) _ _) (two_targets taken not_taken)) + (let ((cmp FcmpCondResult (emit_fcmp (floatcc_inverse cc) a b))) + (side_effect (jmp_cond_fcmp cmp taken not_taken)))) + (rule 1 (lower_branch (brz val @ (value_type $I128) _ _) (two_targets taken not_taken)) (side_effect (jmp_cond_icmp (cmp_zero_i128 (CC.NZ) val) taken not_taken))) @@ -2936,6 +2945,13 @@ (let ((cmp FcmpCondResult (emit_fcmp cc a b))) (side_effect (jmp_cond_fcmp cmp taken not_taken)))) +(rule 2 (lower_branch (brnz (uextend (icmp cc a b)) _ _) (two_targets taken not_taken)) + (side_effect (jmp_cond_icmp (emit_cmp cc a b) taken not_taken))) + +(rule 2 (lower_branch (brnz (uextend (fcmp cc a b)) _ _) (two_targets taken not_taken)) + (let ((cmp FcmpCondResult (emit_fcmp cc a b))) + (side_effect (jmp_cond_fcmp cmp taken not_taken)))) + (rule 1 (lower_branch (brnz val @ (value_type $I128) _ _) (two_targets taken not_taken)) (side_effect (jmp_cond_icmp (cmp_zero_i128 (CC.Z) val) taken not_taken))) diff --git a/cranelift/codegen/src/isle_prelude.rs b/cranelift/codegen/src/isle_prelude.rs index 4e5dd5f191..3e3c34c575 100644 --- a/cranelift/codegen/src/isle_prelude.rs +++ b/cranelift/codegen/src/isle_prelude.rs @@ -586,6 +586,26 @@ macro_rules! isle_common_prelude_methods { } } + #[inline] + fn intcc_reverse(&mut self, cc: &IntCC) -> IntCC { + cc.reverse() + } + + #[inline] + fn intcc_inverse(&mut self, cc: &IntCC) -> IntCC { + cc.inverse() + } + + #[inline] + fn floatcc_reverse(&mut self, cc: &FloatCC) -> FloatCC { + cc.reverse() + } + + #[inline] + fn floatcc_inverse(&mut self, cc: &FloatCC) -> FloatCC { + cc.inverse() + } + #[inline] fn unpack_value_array_2(&mut self, arr: &ValueArray2) -> (Value, Value) { let [a, b] = *arr; diff --git a/cranelift/codegen/src/machinst/isle.rs b/cranelift/codegen/src/machinst/isle.rs index 4a8bf45e0a..da1391d347 100644 --- a/cranelift/codegen/src/machinst/isle.rs +++ b/cranelift/codegen/src/machinst/isle.rs @@ -537,26 +537,6 @@ macro_rules! isle_lower_prelude_methods { MInst::gen_move(dst, src, ty) } - #[inline] - fn intcc_reverse(&mut self, cc: &IntCC) -> IntCC { - cc.reverse() - } - - #[inline] - fn intcc_inverse(&mut self, cc: &IntCC) -> IntCC { - cc.inverse() - } - - #[inline] - fn floatcc_reverse(&mut self, cc: &FloatCC) -> FloatCC { - cc.reverse() - } - - #[inline] - fn floatcc_inverse(&mut self, cc: &FloatCC) -> FloatCC { - cc.inverse() - } - /// Generate the return instruction. fn gen_return(&mut self, (list, off): ValueSlice) { let rets = (off..list.len(&self.lower_ctx.dfg().value_lists)) diff --git a/cranelift/codegen/src/opts/algebraic.isle b/cranelift/codegen/src/opts/algebraic.isle index caed553ba7..e4638222fe 100644 --- a/cranelift/codegen/src/opts/algebraic.isle +++ b/cranelift/codegen/src/opts/algebraic.isle @@ -185,3 +185,25 @@ (remat x)) (rule (simplify x @ (f64const _ _)) (remat x)) + +;; Optimize icmp-of-icmp. +(rule (simplify (icmp ty + (IntCC.NotEqual) + (uextend _ inner @ (icmp ty _ _ _)) + (iconst _ (u64_from_imm64 0)))) + (subsume inner)) + +(rule (simplify (icmp ty + (IntCC.Equal) + (uextend _ (icmp ty cc x y)) + (iconst _ (u64_from_imm64 0)))) + (subsume (icmp ty (intcc_inverse cc) x y))) + +;; Optimize select-of-uextend-of-icmp to select-of-icmp, because +;; select can take an I8 condition too. +(rule (simplify + (select ty (uextend _ c @ (icmp _ _ _ _)) x y)) + (select ty c x y)) +(rule (simplify + (select ty (uextend _ c @ (icmp _ _ _ _)) x y)) + (select ty c x y)) diff --git a/cranelift/codegen/src/opts/cprop.isle b/cranelift/codegen/src/opts/cprop.isle index ef4e8c28fd..ac6ce778e0 100644 --- a/cranelift/codegen/src/opts/cprop.isle +++ b/cranelift/codegen/src/opts/cprop.isle @@ -130,5 +130,12 @@ (bxor ty (bxor ty x k1 @ (iconst ty _)) k2 @ (iconst ty _))) (bxor ty x (bxor ty k1 k2))) +(rule (simplify + (select ty (iconst _ (u64_from_imm64 (u64_nonzero _))) x y)) + x) +(rule (simplify + (select ty (iconst _ (u64_from_imm64 0)) x y)) + y) + ;; TODO: fadd, fsub, fmul, fdiv, fneg, fabs diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle index 885056c507..fef9a3ab94 100644 --- a/cranelift/codegen/src/prelude.isle +++ b/cranelift/codegen/src/prelude.isle @@ -206,6 +206,24 @@ (decl mem_flags_trusted () MemFlags) (extern constructor mem_flags_trusted mem_flags_trusted) +;;;; Helpers for Working with Flags ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Reverse an IntCC flag. +(decl intcc_reverse (IntCC) IntCC) +(extern constructor intcc_reverse intcc_reverse) + +;; Invert an IntCC flag. +(decl intcc_inverse (IntCC) IntCC) +(extern constructor intcc_inverse intcc_inverse) + +;; Reverse an FloatCC flag. +(decl floatcc_reverse (FloatCC) FloatCC) +(extern constructor floatcc_reverse floatcc_reverse) + +;; Invert an FloatCC flag. +(decl floatcc_inverse (FloatCC) FloatCC) +(extern constructor floatcc_inverse floatcc_inverse) + ;;;; Helper Clif Extractors ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; An extractor that only matches types that can fit in 16 bits. diff --git a/cranelift/codegen/src/prelude_lower.isle b/cranelift/codegen/src/prelude_lower.isle index 05bae6e6c2..ca89554a66 100644 --- a/cranelift/codegen/src/prelude_lower.isle +++ b/cranelift/codegen/src/prelude_lower.isle @@ -308,23 +308,6 @@ ;;;; Helpers for Working with Flags ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; Reverse an IntCC flag. -(decl intcc_reverse (IntCC) IntCC) -(extern constructor intcc_reverse intcc_reverse) - -;; Invert an IntCC flag. -(decl intcc_inverse (IntCC) IntCC) -(extern constructor intcc_inverse intcc_inverse) - -;; Reverse an FloatCC flag. -(decl floatcc_reverse (FloatCC) FloatCC) -(extern constructor floatcc_reverse floatcc_reverse) - -;; Invert an FloatCC flag. -(decl floatcc_inverse (FloatCC) FloatCC) -(extern constructor floatcc_inverse floatcc_inverse) - - ;; Newtype wrapper around `MInst` for instructions that are used for their ;; effect on flags. ;;