egraph-based midend: draw the rest of the owl (productionized). (#4953)

* egraph-based midend: draw the rest of the owl. * Rename `egg` submodule of cranelift-codegen to `egraph`. * Apply some feedback from @jsharp during code walkthrough. * Remove recursion from find_best_node by doing a single pass. Rather than recursively computing the lowest-cost node for a given eclass and memoizing the answer at each eclass node, we can do a single forward pass; because every eclass node refers only to earlier nodes, this is sufficient. The behavior may slightly differ from the earlier behavior because we cannot short-circuit costs to zero once a node is elaborated; but in practice this should not matter. * Make elaboration non-recursive. Use an explicit stack instead (with `ElabStackEntry` entries, alongside a result stack). * Make elaboration traversal of the domtree non-recursive/stack-safe. * Work analysis logic in Cranelift-side egraph glue into a general analysis framework in cranelift-egraph. * Apply static recursion limit to rule application. * Fix aarch64 wrt dynamic-vector support -- broken rebase. * Topo-sort cranelift-egraph before cranelift-codegen in publish script, like the comment instructs me to! * Fix multi-result call testcase. * Include `cranelift-egraph` in `PUBLISHED_CRATES`. * Fix atomic_rmw: not really a load. * Remove now-unnecessary PartialOrd/Ord derivations. * Address some code-review comments. * Review feedback. * Review feedback. * No overlap in mid-end rules, because we are defining a multi-constructor. * rustfmt * Review feedback. * Review feedback. * Review feedback. * Review feedback. * Remove redundant `mut`. * Add comment noting what rules can do. * Review feedback. * Clarify comment wording. * Update `has_memory_fence_semantics`. * Apply @jameysharp's improved loop-level computation. Co-authored-by: Jamey Sharp <jamey@minilop.net> * Fix suggestion commit. * Fix off-by-one in new loop-nest analysis. * Review feedback. * Review feedback. * Review feedback. * Use `Default`, not `std::default::Default`, as per @fitzgen Co-authored-by: Nick Fitzgerald <fitzgen@gmail.com> * Apply @fitzgen's comment elaboration to a doc-comment. Co-authored-by: Nick Fitzgerald <fitzgen@gmail.com> * Add stat for hitting the rewrite-depth limit. * Some code motion in split prelude to make the diff a little clearer wrt `main`. * Take @jameysharp's suggested `try_into()` usage for blockparam indices. Co-authored-by: Jamey Sharp <jamey@minilop.net> * Take @jameysharp's suggestion to avoid double-match on load op. Co-authored-by: Jamey Sharp <jamey@minilop.net> * Fix suggestion (add import). * Review feedback. * Fix stack_load handling. * Remove redundant can_store case. * Take @jameysharp's suggested improvement to FuncEGraph::build() logic Co-authored-by: Jamey Sharp <jamey@minilop.net> * Tweaks to FuncEGraph::build() on top of suggestion. * Take @jameysharp's suggested clarified condition Co-authored-by: Jamey Sharp <jamey@minilop.net> * Clean up after suggestion (unused variable). * Fix loop analysis. * loop level asserts * Revert constant-space loop analysis -- edge cases were incorrect, so let's go with the simple thing for now. * Take @jameysharp's suggestion re: result_tys Co-authored-by: Jamey Sharp <jamey@minilop.net> * Fix up after suggestion * Take @jameysharp's suggestion to use fold rather than reduce Co-authored-by: Jamey Sharp <jamey@minilop.net> * Fixup after suggestion * Take @jameysharp's suggestion to remove elaborate_eclass_use's return value. * Clarifying comment in terminator insts. Co-authored-by: Jamey Sharp <jamey@minilop.net> Co-authored-by: Nick Fitzgerald <fitzgen@gmail.com>
2022-10-11 18:15:53 -07:00
parent e2f1ced0b6
commit 2be12a5167
59 changed files with 5125 additions and 1580 deletions
--- a/cranelift/filetests/filetests/egraph/algebraic.clif
+++ b/cranelift/filetests/filetests/egraph/algebraic.clif
@@ -0,0 +1,13 @@
+test optimize
+set opt_level=none
+set use_egraphs=true
+target x86_64
+
+function %f(i32) -> i32 {
+block0(v0: i32):
+    v1 = iconst.i32 2
+    v2 = imul v0, v1
+    ; check: v1 = iadd v0, v0
+    ; nextln: return v1
+    return v2
+}
--- a/cranelift/filetests/filetests/egraph/alias_analysis.clif
+++ b/cranelift/filetests/filetests/egraph/alias_analysis.clif
@@ -0,0 +1,22 @@
+test optimize
+set opt_level=none
+set use_egraphs=true
+target x86_64
+
+function %f(i64) -> i64 {
+block0(v0: i64):
+    v1 = iconst.i64 0
+    v2 = bor.i64 v0, v1
+    v3 = load.i64 heap v0
+    v4 = load.i64 heap v2
+    v5 = band.i64 v3, v4
+    store.i64 v0, v5
+    v6 = load.i64 v3
+    v7 = load.i64 v6
+    return v7
+}
+
+; check: v1 = load.i64 heap v0
+; nextln: store v0, v1
+; nextln: v2 = load.i64 v0
+; nextln: return v2
--- a/cranelift/filetests/filetests/egraph/basic-gvn.clif
+++ b/cranelift/filetests/filetests/egraph/basic-gvn.clif
@@ -0,0 +1,29 @@
+test optimize
+set opt_level=none
+set use_egraphs=true
+target x86_64
+
+function %f(i32, i32) -> i32 {
+block0(v0: i32, v1: i32):
+    v2 = iadd v0, v1
+    brnz v2, block1(v0)
+    jump block2(v1)
+
+block1(v3: i32):
+    v4 = iadd v0, v1
+    v5 = iadd v4, v3
+    return v5
+
+block2(v6: i32):
+    return v6
+}
+
+;; Check that the `iadd` for `v4` is subsumed by `v2`:
+
+; check: block0(v0: i32, v1: i32):
+; nextln:     v2 = iadd v0, v1
+; check:  block1:
+; nextln:     v3 = iadd.i32 v2, v0
+; nextln:     return v3
+; check: block2:
+; nextln:    return v1
--- a/cranelift/filetests/filetests/egraph/licm.clif
+++ b/cranelift/filetests/filetests/egraph/licm.clif
@@ -0,0 +1,40 @@
+test optimize
+set opt_level=none
+set use_egraphs=true
+target x86_64
+
+function %f(i32, i32) -> i32 {
+block0(v0: i32, v1: i32):
+    jump block1(v0)
+
+block1(v2: i32):
+    v3 = iconst.i32 1
+    v4 = iadd.i32 v1, v3
+    v5 = iconst.i32 40
+    v6 = icmp eq v2, v5
+    v7 = iconst.i32 1
+    v8 = iadd.i32 v2, v7
+    brnz v6, block2(v4)
+    jump block1(v8)
+
+block2(v9: i32):
+    return v9
+}
+
+; check:  block0(v0: i32, v1: i32):
+; nextln:     jump block1(v0)
+
+; check:  block1(v2: i32):
+;; constants are not lifted; they are rematerialized in each block where used
+; nextln:     v3 = iconst.i32 40
+; nextln:     v4 = icmp eq v2, v3
+; nextln:     v5 = iconst.i32 1
+; nextln:     v6 = iadd v2, v5
+; nextln:     brnz v4, block2
+; nextln:     jump block1(v6)
+
+; check:  block2:
+; nextln:     v7 = iconst.i32 1
+; nextln:     v8 = iadd.i32 v1, v7
+; nextln:     return v8
+
--- a/cranelift/filetests/filetests/egraph/misc.clif
+++ b/cranelift/filetests/filetests/egraph/misc.clif
@@ -0,0 +1,21 @@
+test optimize
+set opt_level=none
+set use_egraphs=true
+target x86_64
+
+function %stack_load(i64) -> i64 {
+  ss0 = explicit_slot 8
+
+block0(v0: i64):
+  stack_store.i64 v0, ss0
+  v1 = stack_load.i64 ss0
+  return v1
+}
+
+; check: function %stack_load(i64) -> i64 fast {
+; nextln:    ss0 = explicit_slot 8
+; check:  block0(v0: i64):
+; nextln:     v1 = stack_addr.i64 ss0
+; nextln:     store notrap aligned v0, v1
+; nextln:     return v0
+; nextln: }
--- a/cranelift/filetests/filetests/egraph/multivalue.clif
+++ b/cranelift/filetests/filetests/egraph/multivalue.clif
@@ -0,0 +1,24 @@
+test compile precise-output
+set use_egraphs=true
+target x86_64
+
+;; We want to make sure that this compiles successfully, so we are properly
+;; handling multi-value operator nodes.
+
+function u0:359(i64) -> i8, i8 system_v {
+    sig0 = (i64) -> i8, i8 system_v
+    fn0 = colocated u0:521 sig0
+
+    block0(v0: i64):
+		v3, v4 = call fn0(v0)
+		return v3, v4
+}
+
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   call    User(userextname0)
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+
--- a/cranelift/filetests/filetests/egraph/not_a_load.clif
+++ b/cranelift/filetests/filetests/egraph/not_a_load.clif
@@ -0,0 +1,23 @@
+test compile precise-output
+set use_egraphs=true
+target x86_64
+
+;; `atomic_rmw` is not a load, but it reports `true` to `.can_load()`. We want
+;; to make sure the alias analysis machinery doesn't break when we have these odd
+;; memory ops in the IR.
+
+function u0:1302(i64) -> i64 system_v {
+  block0(v0: i64):
+    v9 = atomic_rmw.i64 add v0, v0
+    return v0
+}
+
+;   pushq   %rbp
+;   movq    %rsp, %rbp
+; block0:
+;   atomically { 64_bits_at_[%r9]) Add= %r10; %rax = old_value_at_[%r9]; %r11, %rflags = trash }
+;   movq    %rdi, %rax
+;   movq    %rbp, %rsp
+;   popq    %rbp
+;   ret
+
--- a/cranelift/filetests/filetests/egraph/remat.clif
+++ b/cranelift/filetests/filetests/egraph/remat.clif
@@ -0,0 +1,35 @@
+test optimize
+set opt_level=none
+set use_egraphs=true
+target x86_64
+
+function %f(i32) -> i32 {
+block0(v0: i32):
+    v1 = iconst.i32 42
+    v2 = iadd.i32 v0, v1
+    brnz v2, block1
+    jump block2
+
+block1:
+    v3 = iconst.i32 84
+    v4 = iadd.i32 v2, v3
+    return v4
+
+block2:
+    return v2
+}
+
+; check:  block0(v0: i32):
+; nextln:     v1 = iconst.i32 42
+; nextln:     v2 = iadd v0, v1
+; nextln:     brnz v2, block1
+; nextln:     jump block2
+; check:   block1:
+; nextln:     v5 = iconst.i32 126
+; nextln:     v6 = iadd.i32 v0, v5
+; nextln:     return v6
+; check:   block2:
+; nextln:     v3 = iconst.i32 42
+; nextln:     v4 = iadd.i32 v0, v3
+; nextln:     return v4
+