ISLE: split algebraic.isle into several files (#6140)

* ISLE: split algebraic.isle into several files * delete `algebraic.clif` * Add `README.md` * Remove old `algebraic.clif` tests --------- Co-authored-by: Jamey Sharp <jsharp@fastly.com>
2023-04-11 22:39:18 +01:00
parent 569089e473
commit b9a58148cf
14 changed files with 919 additions and 910 deletions
--- a/cranelift/codegen/build.rs
+++ b/cranelift/codegen/build.rs
@@ -221,9 +221,14 @@ fn get_isle_compilations(
                inputs: vec![
                    prelude_isle.clone(),
                    prelude_opt_isle,
-                    src_opts.join("algebraic.isle"),
-                    src_opts.join("icmp.isle"),
+                    src_opts.join("arithmetic.isle"),
+                    src_opts.join("bitops.isle"),
                    src_opts.join("cprop.isle"),
+                    src_opts.join("extends.isle"),
+                    src_opts.join("icmp.isle"),
+                    src_opts.join("remat.isle"),
+                    src_opts.join("selects.isle"),
+                    src_opts.join("shifts.isle"),
                ],
                untracked_inputs: vec![clif_opt_isle],
            },
--- a/cranelift/codegen/src/opts/README.md
+++ b/cranelift/codegen/src/opts/README.md
@@ -0,0 +1,5 @@
+Rules here are allowed to rewrite pure expressions arbitrarily,
+using the same inputs as the original, or fewer. In other words, we
+cannot pull a new eclass id out of thin air and refer to it, other
+than a piece of the input or a new node that we construct; but we
+can freely rewrite e.g. `x+y-y` to `x`.
--- a/cranelift/codegen/src/opts/algebraic.isle
+++ b/cranelift/codegen/src/opts/algebraic.isle
@@ -1,411 +0,0 @@
-;; Algebraic optimizations.
-
-;; Rules here are allowed to rewrite pure expressions arbitrarily,
-;; using the same inputs as the original, or fewer. In other words, we
-;; cannot pull a new eclass id out of thin air and refer to it, other
-;; than a piece of the input or a new node that we construct; but we
-;; can freely rewrite e.g. `x+y-y` to `x`.
-
-;; Chained `uextend` and `sextend`.
-(rule (simplify (uextend ty (uextend _intermediate_ty x)))
-      (uextend ty x))
-(rule (simplify (sextend ty (sextend _intermediate_ty x)))
-      (sextend ty x))
-
-;; x+0 == 0+x == x.
-(rule (simplify (iadd ty
-                      x
-                      (iconst ty (u64_from_imm64 0))))
-      (subsume x))
-(rule (simplify (iadd ty
-                      (iconst ty (u64_from_imm64 0))
-                      x))
-      (subsume x))
-;; x-0 == x.
-(rule (simplify (isub ty
-                      x
-                      (iconst ty (u64_from_imm64 0))))
-      (subsume x))
-;; 0-x == (ineg x).
-(rule (simplify (isub ty
-                      (iconst ty (u64_from_imm64 0))
-                      x))
-      (ineg ty x))
-
-;; ineg(ineg(x)) == x.
-(rule (simplify (ineg ty (ineg ty x))) (subsume x))
-
-;; ineg(x) * ineg(y) == x*y.
-(rule (simplify (imul ty (ineg ty x) (ineg ty y)))
-      (subsume (imul ty x y)))
-
-;; iabs(ineg(x)) == iabs(x).
-(rule (simplify (iabs ty (ineg ty x)))
-      (iabs ty x))
-
-;; iabs(iabs(x)) == iabs(x).
-(rule (simplify (iabs ty inner @ (iabs ty x)))
-      (subsume inner))
-
-;; x-x == 0.
-(rule (simplify (isub (fits_in_64 (ty_int ty)) x x)) (subsume (iconst ty (imm64 0))))
-
-;; x*1 == 1*x == x.
-(rule (simplify (imul ty
-                      x
-                      (iconst ty (u64_from_imm64 1))))
-      (subsume x))
-(rule (simplify (imul ty
-                      (iconst ty (u64_from_imm64 1))
-                      x))
-      (subsume x))
-
-;; x*0 == 0*x == 0.
-(rule (simplify (imul ty
-                      _
-                      zero @ (iconst ty (u64_from_imm64 0))))
-      (subsume zero))
-(rule (simplify (imul ty
-                      zero @ (iconst ty (u64_from_imm64 0))
-                      _))
-      (subsume zero))
-
-;; x*-1 == -1*x == ineg(x).
-(rule (simplify (imul ty x (iconst ty c)))
-      (if-let -1 (i64_sextend_imm64 ty c))
-      (ineg ty x))
-(rule (simplify (imul ty (iconst ty c) x))
-      (if-let -1 (i64_sextend_imm64 ty c))
-      (ineg ty x))
-
-;; x/1 == x.
-(rule (simplify (sdiv ty
-                      x
-                      (iconst ty (u64_from_imm64 1))))
-      (subsume x))
-(rule (simplify (udiv ty
-                      x
-                      (iconst ty (u64_from_imm64 1))))
-      (subsume x))
-
-;; x>>0 == x<<0 == x rotr 0 == x rotl 0 == x.
-(rule (simplify (ishl ty
-                      x
-                      (iconst ty (u64_from_imm64 0))))
-      (subsume x))
-(rule (simplify (ushr ty
-                      x
-                      (iconst ty (u64_from_imm64 0))))
-      (subsume x))
-(rule (simplify (sshr ty
-                      x
-                      (iconst ty (u64_from_imm64 0))))
-      (subsume x))
-(rule (simplify (rotr ty
-                      x
-                      (iconst ty (u64_from_imm64 0))))
-      (subsume x))
-(rule (simplify (rotl ty
-                      x
-                      (iconst ty (u64_from_imm64 0))))
-      (subsume x))
-
-;; x | 0 == 0 | x == x | x == x.
-(rule (simplify (bor ty
-                     x
-                     (iconst ty (u64_from_imm64 0))))
-      (subsume x))
-(rule (simplify (bor ty
-                     (iconst ty (u64_from_imm64 0))
-                     x))
-      (subsume x))
-(rule (simplify (bor ty x x))
-      (subsume x))
-
-;; x ^ 0 == 0 ^ x == x.
-(rule (simplify (bxor ty
-                     x
-                     (iconst ty (u64_from_imm64 0))))
-      (subsume x))
-(rule (simplify (bxor ty
-                     (iconst ty (u64_from_imm64 0))
-                     x))
-      (subsume x))
-
-;; x ^ x == 0.
-(rule (simplify (bxor (fits_in_64 (ty_int ty)) x x))
-      (subsume (iconst ty (imm64 0))))
-
-;; x ^ not(x) == not(x) ^ x == x | not(x) == not(x) | x == -1.
-;; This identity also holds for non-integer types, vectors, and wider types.
-;; But `iconst` is only valid for integers up to 64 bits wide.
-(rule (simplify (bxor (fits_in_64 (ty_int ty)) x (bnot ty x))) (subsume (iconst ty (imm64 (ty_mask ty)))))
-(rule (simplify (bxor (fits_in_64 (ty_int ty)) (bnot ty x) x)) (subsume (iconst ty (imm64 (ty_mask ty)))))
-(rule (simplify (bor (fits_in_64 (ty_int ty)) x (bnot ty x))) (subsume (iconst ty (imm64 (ty_mask ty)))))
-(rule (simplify (bor (fits_in_64 (ty_int ty)) (bnot ty x) x)) (subsume (iconst ty (imm64 (ty_mask ty)))))
-
-;; x & -1 == -1 & x == x & x == x.
-(rule (simplify (band ty x x)) (subsume x))
-(rule (simplify (band ty x (iconst ty k)))
-      (if-let -1 (i64_sextend_imm64 ty k))
-      (subsume x))
-(rule (simplify (band ty (iconst ty k) x))
-      (if-let -1 (i64_sextend_imm64 ty k))
-      (subsume x))
-
-;; x & 0 == 0 & x == x & not(x) == not(x) & x == 0.
-(rule (simplify (band ty _ zero @ (iconst ty (u64_from_imm64 0)))) (subsume zero))
-(rule (simplify (band ty zero @ (iconst ty (u64_from_imm64 0)) _)) (subsume zero))
-(rule (simplify (band (fits_in_64 (ty_int ty)) x (bnot ty x))) (subsume (iconst ty (imm64 0))))
-(rule (simplify (band (fits_in_64 (ty_int ty)) (bnot ty x) x)) (subsume (iconst ty (imm64 0))))
-
-;; not(not(x)) == x.
-(rule (simplify (bnot ty (bnot ty x))) (subsume x))
-
-;; DeMorgan's rule (two versions):
-;; bnot(bor(x, y)) == band(bnot(x), bnot(y))
-(rule (simplify (bnot ty (bor ty x y)))
-      (band ty (bnot ty x) (bnot ty y)))
-;; bnot(band(x, y)) == bor(bnot(x), bnot(y))
-(rule (simplify (bnot ty (band t x y)))
-      (bor ty (bnot ty x) (bnot ty y)))
-
-;; `or(and(x, y), not(y)) == or(x, not(y))`
-(rule (simplify (bor ty
-                     (band ty x y)
-                     z @ (bnot ty y)))
-      (bor ty x z))
-;; Duplicate the rule but swap the `bor` operands because `bor` is
-;; commutative. We could, of course, add a `simplify` rule to do the commutative
-;; swap for all `bor`s but this will bloat the e-graph with many e-nodes. It is
-;; cheaper to have additional rules, rather than additional e-nodes, because we
-;; amortize their cost via ISLE's smart codegen.
-(rule (simplify (bor ty
-                     z @ (bnot ty y)
-                     (band ty x y)))
-      (bor ty x z))
-
-;; `or(and(x, y), not(y)) == or(x, not(y))` specialized for constants, since
-;; otherwise we may not know that `z == not(y)` since we don't generally expand
-;; constants in the e-graph.
-;;
-;; (No need to duplicate for commutative `bor` for this constant version because
-;; we move constants to the right.)
-(rule (simplify (bor ty
-                     (band ty x (iconst ty (u64_from_imm64 y)))
-                     z @ (iconst ty (u64_from_imm64 zk))))
-      (if-let $true (u64_eq (u64_and (ty_mask ty) zk)
-                            (u64_and (ty_mask ty) (u64_not y))))
-      (bor ty x z))
-
-;; x*2 == 2*x == x+x.
-(rule (simplify (imul ty x (iconst _ (simm32 2))))
-      (iadd ty x x))
-(rule (simplify (imul ty (iconst _ (simm32 2)) x))
-      (iadd ty x x))
-
-;; x*c == x<<log2(c) when c is a power of two.
-;; Note that the type of `iconst` must be the same as the type of `imul`,
-;; so these rules can only fire in situations where it's safe to construct an
-;; `iconst` of that type.
-(rule (simplify (imul ty x (iconst _ (imm64_power_of_two c))))
-      (ishl ty x (iconst ty (imm64 c))))
-(rule (simplify (imul ty (iconst _ (imm64_power_of_two c)) x))
-      (ishl ty x (iconst ty (imm64 c))))
-
-;; TODO: strength reduction: div to shifts
-;; TODO: div/rem by constants -> magic multiplications
-
-
-;; `(x >> k) << k` is the same as masking off the bottom `k` bits (regardless if
-;; this is a signed or unsigned shift right).
-(rule (simplify (ishl (fits_in_64 ty)
-                      (ushr ty x (iconst _ k))
-                      (iconst _ k)))
-      (let ((mask Imm64 (imm64_shl ty (imm64 0xFFFF_FFFF_FFFF_FFFF) k)))
-        (band ty x (iconst ty mask))))
-(rule (simplify (ishl (fits_in_64 ty)
-                      (sshr ty x (iconst _ k))
-                      (iconst _ k)))
-      (let ((mask Imm64 (imm64_shl ty (imm64 0xFFFF_FFFF_FFFF_FFFF) k)))
-        (band ty x (iconst ty mask))))
-
-
-;; For unsigned shifts, `(x << k) >> k` is the same as masking out the top
-;; `k` bits. A similar rule is valid for vectors but this `iconst` mask only
-;; works for scalar integers.
-(rule (simplify (ushr (fits_in_64 (ty_int ty))
-                      (ishl ty x (iconst _ k))
-                      (iconst _ k)))
-      (band ty x (iconst ty (imm64_ushr ty (imm64 (ty_mask ty)) k))))
-
-
-;; For signed shifts, `(x << k) >> k` does sign-extension from `n` bits to
-;; `n+k` bits. In the special case where `x` is the result of either `sextend`
-;; or `uextend` from `n` bits to `n+k` bits, we can implement this using
-;; `sextend`.
-(rule (simplify (sshr wide
-                 (ishl wide
-                  (uextend wide x @ (value_type narrow))
-                  (iconst _ shift))
-                 (iconst _ shift)))
-      (if-let (u64_from_imm64 shift_u64) shift)
-      (if-let $true (u64_eq shift_u64 (u64_sub (ty_bits_u64 wide) (ty_bits_u64 narrow))))
-      (sextend wide x))
-
-;; If `k` is smaller than the difference in bit widths of the two types, then
-;; the intermediate sign bit comes from the extend op, so the final result is
-;; the same as the original extend op.
-(rule (simplify (sshr wide
-                 (ishl wide
-                  x @ (uextend wide (value_type narrow))
-                  (iconst _ shift))
-                 (iconst _ shift)))
-      (if-let (u64_from_imm64 shift_u64) shift)
-      (if-let $true (u64_lt shift_u64 (u64_sub (ty_bits_u64 wide) (ty_bits_u64 narrow))))
-      x)
-
-;; If the original extend op was `sextend`, then both of the above cases say
-;; the result should also be `sextend`.
-(rule (simplify (sshr wide
-                 (ishl wide
-                  x @ (sextend wide (value_type narrow))
-                  (iconst _ shift))
-                 (iconst _ shift)))
-      (if-let (u64_from_imm64 shift_u64) shift)
-      (if-let $true (u64_le shift_u64 (u64_sub (ty_bits_u64 wide) (ty_bits_u64 narrow))))
-      x)
-
-
-;; Masking out any of the top bits of the result of `uextend` is a no-op. (This
-;; is like a cheap version of known-bits analysis.)
-(rule (simplify (band wide x @ (uextend _ (value_type narrow)) (iconst _ (u64_from_imm64 mask))))
-      ; Check that `narrow_mask` has a subset of the bits that `mask` does.
-      (if-let $true (let ((narrow_mask u64 (ty_mask narrow))) (u64_eq narrow_mask (u64_and mask narrow_mask))))
-      x)
-
-;; Masking out the sign-extended bits of an `sextend` turns it into a `uextend`.
-(rule (simplify (band wide (sextend _ x @ (value_type narrow)) (iconst _ (u64_from_imm64 mask))))
-      (if-let $true (u64_eq mask (ty_mask narrow)))
-      (uextend wide x))
-
-
-;; Rematerialize ALU-op-with-imm and iconsts in each block where they're
-;; used. This is neutral (add-with-imm) or positive (iconst) for
-;; register pressure, and these ops are very cheap.
-(rule (simplify x @ (iadd _ (iconst _ _) _))
-      (remat x))
-(rule (simplify x @ (iadd _ _ (iconst _ _)))
-      (remat x))
-(rule (simplify x @ (isub _ (iconst _ _) _))
-      (remat x))
-(rule (simplify x @ (isub _ _ (iconst _ _)))
-      (remat x))
-(rule (simplify x @ (band _ (iconst _ _) _))
-      (remat x))
-(rule (simplify x @ (band _ _ (iconst _ _)))
-      (remat x))
-(rule (simplify x @ (bor _ (iconst _ _) _))
-      (remat x))
-(rule (simplify x @ (bor _ _ (iconst _ _)))
-      (remat x))
-(rule (simplify x @ (bxor _ (iconst _ _) _))
-      (remat x))
-(rule (simplify x @ (bxor _ _ (iconst _ _)))
-      (remat x))
-(rule (simplify x @ (bnot _ _))
-      (remat x))
-(rule (simplify x @ (iconst _ _))
-      (remat x))
-(rule (simplify x @ (f32const _ _))
-      (remat x))
-(rule (simplify x @ (f64const _ _))
-      (remat x))
-
-;; (x ^ -1) can be replaced with the `bnot` instruction
-(rule (simplify (bxor ty x (iconst ty k)))
-  (if-let -1 (i64_sextend_imm64 ty k))
-  (bnot ty x))
-
-;; 32-bit integers zero-extended to 64-bit integers are never negative
-(rule (simplify
-       (slt ty
-             (uextend $I64 x @ (value_type $I32))
-             (iconst _ (u64_from_imm64 0))))
-      (iconst ty (imm64 0)))
-(rule (simplify
-       (sge ty
-             (uextend $I64 x @ (value_type $I32))
-             (iconst _ (u64_from_imm64 0))))
-      (iconst ty (imm64 1)))
-
-;; Transform select-of-icmp into {u,s}{min,max} instructions where possible.
-(rule (simplify (select ty (sgt _ x y) x y)) (smax ty x y))
-(rule (simplify (select ty (sge _ x y) x y)) (smax ty x y))
-(rule (simplify (select ty (ugt _ x y) x y)) (umax ty x y))
-(rule (simplify (select ty (uge _ x y) x y)) (umax ty x y))
-(rule (simplify (select ty (slt _ x y) x y)) (smin ty x y))
-(rule (simplify (select ty (sle _ x y) x y)) (smin ty x y))
-(rule (simplify (select ty (ult _ x y) x y)) (umin ty x y))
-(rule (simplify (select ty (ule _ x y) x y)) (umin ty x y))
-
-;; These are the same rules as above, but when the operands for select are swapped
-(rule (simplify (select ty (slt _ x y) y x)) (smax ty x y))
-(rule (simplify (select ty (sle _ x y) y x)) (smax ty x y))
-(rule (simplify (select ty (ult _ x y) y x)) (umax ty x y))
-(rule (simplify (select ty (ule _ x y) y x)) (umax ty x y))
-(rule (simplify (select ty (sgt _ x y) y x)) (smin ty x y))
-(rule (simplify (select ty (sge _ x y) y x)) (smin ty x y))
-(rule (simplify (select ty (ugt _ x y) y x)) (umin ty x y))
-(rule (simplify (select ty (uge _ x y) y x)) (umin ty x y))
-
-;; Transform bitselect-of-icmp into {u,s}{min,max} instructions where possible.
-(rule (simplify (bitselect ty (sgt _ x y) x y)) (smax ty x y))
-(rule (simplify (bitselect ty (sge _ x y) x y)) (smax ty x y))
-(rule (simplify (bitselect ty (ugt _ x y) x y)) (umax ty x y))
-(rule (simplify (bitselect ty (uge _ x y) x y)) (umax ty x y))
-(rule (simplify (bitselect ty (slt _ x y) x y)) (smin ty x y))
-(rule (simplify (bitselect ty (sle _ x y) x y)) (smin ty x y))
-(rule (simplify (bitselect ty (ult _ x y) x y)) (umin ty x y))
-(rule (simplify (bitselect ty (ule _ x y) x y)) (umin ty x y))
-
-;; These are the same rules as above, but when the operands for select are swapped
-(rule (simplify (bitselect ty (slt _ x y) y x)) (smax ty x y))
-(rule (simplify (bitselect ty (sle _ x y) y x)) (smax ty x y))
-(rule (simplify (bitselect ty (ult _ x y) y x)) (umax ty x y))
-(rule (simplify (bitselect ty (ule _ x y) y x)) (umax ty x y))
-(rule (simplify (bitselect ty (sgt _ x y) y x)) (smin ty x y))
-(rule (simplify (bitselect ty (sge _ x y) y x)) (smin ty x y))
-(rule (simplify (bitselect ty (ugt _ x y) y x)) (umin ty x y))
-(rule (simplify (bitselect ty (uge _ x y) y x)) (umin ty x y))
-
-;; For floats convert fcmp lt into pseudo_min and gt into pseudo_max
-;;
-;; fmax_pseudo docs state:
-;; The behaviour for this operations is defined as  fmax_pseudo(a, b) = (a < b) ? b : a, and the behaviour for zero
-;; or NaN inputs follows from the behaviour of < with such inputs.
-;;
-;; That is exactly the operation that we match here!
-(rule (simplify
-       (select ty (fcmp _ (FloatCC.LessThan) x y) x y))
-      (fmin_pseudo ty x y))
-(rule (simplify
-       (select ty (fcmp _ (FloatCC.GreaterThan) x y) x y))
-      (fmax_pseudo ty x y))
-
-;; TODO: perform this same optimization to `f{min,max}_pseudo` for vectors
-;; with the `bitselect` instruction, but the pattern is a bit more complicated
-;; due to most bitselects-over-floats having bitcasts.
-
-;; fneg(fneg(x)) == x.
-(rule (simplify (fneg ty (fneg ty x))) (subsume x))
-
-;; If both of the multiplied arguments to an `fma` are negated then remove
-;; both of them since they cancel out.
-(rule (simplify (fma ty (fneg ty x) (fneg ty y) z))
-      (fma ty x y z))
-
-;; If both of the multiplied arguments to an `fmul` are negated then remove
-;; both of them since they cancel out.
-(rule (simplify (fmul ty (fneg ty x) (fneg ty y)))
-      (fmul ty x y))
--- a/cranelift/codegen/src/opts/arithmetic.isle
+++ b/cranelift/codegen/src/opts/arithmetic.isle
@@ -0,0 +1,109 @@
+;; rewrites for integer and floating-point arithmetic
+;; eg: `iadd`, `isub`, `ineg`, `imul`, `fadd`, `fsub`, `fmul`
+
+;; x+0 == 0+x == x.
+(rule (simplify (iadd ty
+                      x
+                      (iconst ty (u64_from_imm64 0))))
+      (subsume x))
+(rule (simplify (iadd ty
+                      (iconst ty (u64_from_imm64 0))
+                      x))
+      (subsume x))
+;; x-0 == x.
+(rule (simplify (isub ty
+                      x
+                      (iconst ty (u64_from_imm64 0))))
+      (subsume x))
+;; 0-x == (ineg x).
+(rule (simplify (isub ty
+                      (iconst ty (u64_from_imm64 0))
+                      x))
+      (ineg ty x))
+
+;; ineg(ineg(x)) == x.
+(rule (simplify (ineg ty (ineg ty x))) (subsume x))
+
+;; ineg(x) * ineg(y) == x*y.
+(rule (simplify (imul ty (ineg ty x) (ineg ty y)))
+      (subsume (imul ty x y)))
+
+;; iabs(ineg(x)) == iabs(x).
+(rule (simplify (iabs ty (ineg ty x)))
+      (iabs ty x))
+
+;; iabs(iabs(x)) == iabs(x).
+(rule (simplify (iabs ty inner @ (iabs ty x)))
+      (subsume inner))
+
+;; x-x == 0.
+(rule (simplify (isub (fits_in_64 (ty_int ty)) x x)) (subsume (iconst ty (imm64 0))))
+
+;; x*1 == 1*x == x.
+(rule (simplify (imul ty
+                      x
+                      (iconst ty (u64_from_imm64 1))))
+      (subsume x))
+(rule (simplify (imul ty
+                      (iconst ty (u64_from_imm64 1))
+                      x))
+      (subsume x))
+
+;; x*0 == 0*x == 0.
+(rule (simplify (imul ty
+                      _
+                      zero @ (iconst ty (u64_from_imm64 0))))
+      (subsume zero))
+(rule (simplify (imul ty
+                      zero @ (iconst ty (u64_from_imm64 0))
+                      _))
+      (subsume zero))
+
+;; x*-1 == -1*x == ineg(x).
+(rule (simplify (imul ty x (iconst ty c)))
+      (if-let -1 (i64_sextend_imm64 ty c))
+      (ineg ty x))
+(rule (simplify (imul ty (iconst ty c) x))
+      (if-let -1 (i64_sextend_imm64 ty c))
+      (ineg ty x))
+
+;; x/1 == x.
+(rule (simplify (sdiv ty
+                      x
+                      (iconst ty (u64_from_imm64 1))))
+      (subsume x))
+(rule (simplify (udiv ty
+                      x
+                      (iconst ty (u64_from_imm64 1))))
+      (subsume x))
+
+;; TODO: strength reduction: div to shifts
+;; TODO: div/rem by constants -> magic multiplications
+
+;; x*2 == 2*x == x+x.
+(rule (simplify (imul ty x (iconst _ (simm32 2))))
+      (iadd ty x x))
+(rule (simplify (imul ty (iconst _ (simm32 2)) x))
+      (iadd ty x x))
+
+;; x*c == x<<log2(c) when c is a power of two.
+;; Note that the type of `iconst` must be the same as the type of `imul`,
+;; so these rules can only fire in situations where it's safe to construct an
+;; `iconst` of that type.
+(rule (simplify (imul ty x (iconst _ (imm64_power_of_two c))))
+      (ishl ty x (iconst ty (imm64 c))))
+(rule (simplify (imul ty (iconst _ (imm64_power_of_two c)) x))
+      (ishl ty x (iconst ty (imm64 c))))
+
+;; fneg(fneg(x)) == x.
+(rule (simplify (fneg ty (fneg ty x))) (subsume x))
+
+;; If both of the multiplied arguments to an `fma` are negated then remove
+;; both of them since they cancel out.
+(rule (simplify (fma ty (fneg ty x) (fneg ty y) z))
+      (fma ty x y z))
+
+;; If both of the multiplied arguments to an `fmul` are negated then remove
+;; both of them since they cancel out.
+(rule (simplify (fmul ty (fneg ty x) (fneg ty y)))
+      (fmul ty x y))
--- a/cranelift/codegen/src/opts/bitops.isle
+++ b/cranelift/codegen/src/opts/bitops.isle
@@ -0,0 +1,94 @@
+;; Rewrites for `band`, `bnot`, `bor`, `bxor`
+
+;; x | 0 == 0 | x == x | x == x.
+(rule (simplify (bor ty
+                     x
+                     (iconst ty (u64_from_imm64 0))))
+      (subsume x))
+(rule (simplify (bor ty
+                     (iconst ty (u64_from_imm64 0))
+                     x))
+      (subsume x))
+(rule (simplify (bor ty x x))
+      (subsume x))
+
+;; x ^ 0 == 0 ^ x == x.
+(rule (simplify (bxor ty
+                     x
+                     (iconst ty (u64_from_imm64 0))))
+      (subsume x))
+(rule (simplify (bxor ty
+                     (iconst ty (u64_from_imm64 0))
+                     x))
+      (subsume x))
+
+;; x ^ x == 0.
+(rule (simplify (bxor (fits_in_64 (ty_int ty)) x x))
+      (subsume (iconst ty (imm64 0))))
+
+;; x ^ not(x) == not(x) ^ x == x | not(x) == not(x) | x == -1.
+;; This identity also holds for non-integer types, vectors, and wider types.
+;; But `iconst` is only valid for integers up to 64 bits wide.
+(rule (simplify (bxor (fits_in_64 (ty_int ty)) x (bnot ty x))) (subsume (iconst ty (imm64 (ty_mask ty)))))
+(rule (simplify (bxor (fits_in_64 (ty_int ty)) (bnot ty x) x)) (subsume (iconst ty (imm64 (ty_mask ty)))))
+(rule (simplify (bor (fits_in_64 (ty_int ty)) x (bnot ty x))) (subsume (iconst ty (imm64 (ty_mask ty)))))
+(rule (simplify (bor (fits_in_64 (ty_int ty)) (bnot ty x) x)) (subsume (iconst ty (imm64 (ty_mask ty)))))
+
+;; x & -1 == -1 & x == x & x == x.
+(rule (simplify (band ty x x)) (subsume x))
+(rule (simplify (band ty x (iconst ty k)))
+      (if-let -1 (i64_sextend_imm64 ty k))
+      (subsume x))
+(rule (simplify (band ty (iconst ty k) x))
+      (if-let -1 (i64_sextend_imm64 ty k))
+      (subsume x))
+
+;; x & 0 == 0 & x == x & not(x) == not(x) & x == 0.
+(rule (simplify (band ty _ zero @ (iconst ty (u64_from_imm64 0)))) (subsume zero))
+(rule (simplify (band ty zero @ (iconst ty (u64_from_imm64 0)) _)) (subsume zero))
+(rule (simplify (band (fits_in_64 (ty_int ty)) x (bnot ty x))) (subsume (iconst ty (imm64 0))))
+(rule (simplify (band (fits_in_64 (ty_int ty)) (bnot ty x) x)) (subsume (iconst ty (imm64 0))))
+
+;; not(not(x)) == x.
+(rule (simplify (bnot ty (bnot ty x))) (subsume x))
+
+;; DeMorgan's rule (two versions):
+;; bnot(bor(x, y)) == band(bnot(x), bnot(y))
+(rule (simplify (bnot ty (bor ty x y)))
+      (band ty (bnot ty x) (bnot ty y)))
+;; bnot(band(x, y)) == bor(bnot(x), bnot(y))
+(rule (simplify (bnot ty (band t x y)))
+      (bor ty (bnot ty x) (bnot ty y)))
+
+;; `or(and(x, y), not(y)) == or(x, not(y))`
+(rule (simplify (bor ty
+                     (band ty x y)
+                     z @ (bnot ty y)))
+      (bor ty x z))
+;; Duplicate the rule but swap the `bor` operands because `bor` is
+;; commutative. We could, of course, add a `simplify` rule to do the commutative
+;; swap for all `bor`s but this will bloat the e-graph with many e-nodes. It is
+;; cheaper to have additional rules, rather than additional e-nodes, because we
+;; amortize their cost via ISLE's smart codegen.
+(rule (simplify (bor ty
+                     z @ (bnot ty y)
+                     (band ty x y)))
+      (bor ty x z))
+
+;; `or(and(x, y), not(y)) == or(x, not(y))` specialized for constants, since
+;; otherwise we may not know that `z == not(y)` since we don't generally expand
+;; constants in the e-graph.
+;;
+;; (No need to duplicate for commutative `bor` for this constant version because
+;; we move constants to the right.)
+(rule (simplify (bor ty
+                     (band ty x (iconst ty (u64_from_imm64 y)))
+                     z @ (iconst ty (u64_from_imm64 zk))))
+      (if-let $true (u64_eq (u64_and (ty_mask ty) zk)
+                            (u64_and (ty_mask ty) (u64_not y))))
+      (bor ty x z))
+
+;; (x ^ -1) can be replaced with the `bnot` instruction
+(rule (simplify (bxor ty x (iconst ty k)))
+  (if-let -1 (i64_sextend_imm64 ty k))
+  (bnot ty x))
--- a/cranelift/codegen/src/opts/extends.isle
+++ b/cranelift/codegen/src/opts/extends.isle
@@ -0,0 +1,29 @@
+;; Chained `uextend` and `sextend`.
+(rule (simplify (uextend ty (uextend _intermediate_ty x)))
+      (uextend ty x))
+(rule (simplify (sextend ty (sextend _intermediate_ty x)))
+      (sextend ty x))
+
+;; Masking out any of the top bits of the result of `uextend` is a no-op. (This
+;; is like a cheap version of known-bits analysis.)
+(rule (simplify (band wide x @ (uextend _ (value_type narrow)) (iconst _ (u64_from_imm64 mask))))
+      ; Check that `narrow_mask` has a subset of the bits that `mask` does.
+      (if-let $true (let ((narrow_mask u64 (ty_mask narrow))) (u64_eq narrow_mask (u64_and mask narrow_mask))))
+      x)
+
+;; Masking out the sign-extended bits of an `sextend` turns it into a `uextend`.
+(rule (simplify (band wide (sextend _ x @ (value_type narrow)) (iconst _ (u64_from_imm64 mask))))
+      (if-let $true (u64_eq mask (ty_mask narrow)))
+      (uextend wide x))
+
+;; 32-bit integers zero-extended to 64-bit integers are never negative
+(rule (simplify
+       (slt ty
+             (uextend $I64 x @ (value_type $I32))
+             (iconst _ (u64_from_imm64 0))))
+      (iconst ty (imm64 0)))
+(rule (simplify
+       (sge ty
+             (uextend $I64 x @ (value_type $I32))
+             (iconst _ (u64_from_imm64 0))))
+      (iconst ty (imm64 1)))
--- a/cranelift/codegen/src/opts/remat.isle
+++ b/cranelift/codegen/src/opts/remat.isle
@@ -0,0 +1,31 @@
+;; Rematerialize ALU-op-with-imm and iconsts in each block where they're
+;; used. This is neutral (add-with-imm) or positive (iconst) for
+;; register pressure, and these ops are very cheap.
+(rule (simplify x @ (iadd _ (iconst _ _) _))
+      (remat x))
+(rule (simplify x @ (iadd _ _ (iconst _ _)))
+      (remat x))
+(rule (simplify x @ (isub _ (iconst _ _) _))
+      (remat x))
+(rule (simplify x @ (isub _ _ (iconst _ _)))
+      (remat x))
+(rule (simplify x @ (band _ (iconst _ _) _))
+      (remat x))
+(rule (simplify x @ (band _ _ (iconst _ _)))
+      (remat x))
+(rule (simplify x @ (bor _ (iconst _ _) _))
+      (remat x))
+(rule (simplify x @ (bor _ _ (iconst _ _)))
+      (remat x))
+(rule (simplify x @ (bxor _ (iconst _ _) _))
+      (remat x))
+(rule (simplify x @ (bxor _ _ (iconst _ _)))
+      (remat x))
+(rule (simplify x @ (bnot _ _))
+      (remat x))
+(rule (simplify x @ (iconst _ _))
+      (remat x))
+(rule (simplify x @ (f32const _ _))
+      (remat x))
+(rule (simplify x @ (f64const _ _))
+      (remat x))
--- a/cranelift/codegen/src/opts/selects.isle
+++ b/cranelift/codegen/src/opts/selects.isle
@@ -0,0 +1,59 @@
+;; `select`/`bitselect`-related rewrites
+
+;; Transform select-of-icmp into {u,s}{min,max} instructions where possible.
+(rule (simplify (select ty (sgt _ x y) x y)) (smax ty x y))
+(rule (simplify (select ty (sge _ x y) x y)) (smax ty x y))
+(rule (simplify (select ty (ugt _ x y) x y)) (umax ty x y))
+(rule (simplify (select ty (uge _ x y) x y)) (umax ty x y))
+(rule (simplify (select ty (slt _ x y) x y)) (smin ty x y))
+(rule (simplify (select ty (sle _ x y) x y)) (smin ty x y))
+(rule (simplify (select ty (ult _ x y) x y)) (umin ty x y))
+(rule (simplify (select ty (ule _ x y) x y)) (umin ty x y))
+
+;; These are the same rules as above, but when the operands for select are swapped
+(rule (simplify (select ty (slt _ x y) y x)) (smax ty x y))
+(rule (simplify (select ty (sle _ x y) y x)) (smax ty x y))
+(rule (simplify (select ty (ult _ x y) y x)) (umax ty x y))
+(rule (simplify (select ty (ule _ x y) y x)) (umax ty x y))
+(rule (simplify (select ty (sgt _ x y) y x)) (smin ty x y))
+(rule (simplify (select ty (sge _ x y) y x)) (smin ty x y))
+(rule (simplify (select ty (ugt _ x y) y x)) (umin ty x y))
+(rule (simplify (select ty (uge _ x y) y x)) (umin ty x y))
+
+;; Transform bitselect-of-icmp into {u,s}{min,max} instructions where possible.
+(rule (simplify (bitselect ty (sgt _ x y) x y)) (smax ty x y))
+(rule (simplify (bitselect ty (sge _ x y) x y)) (smax ty x y))
+(rule (simplify (bitselect ty (ugt _ x y) x y)) (umax ty x y))
+(rule (simplify (bitselect ty (uge _ x y) x y)) (umax ty x y))
+(rule (simplify (bitselect ty (slt _ x y) x y)) (smin ty x y))
+(rule (simplify (bitselect ty (sle _ x y) x y)) (smin ty x y))
+(rule (simplify (bitselect ty (ult _ x y) x y)) (umin ty x y))
+(rule (simplify (bitselect ty (ule _ x y) x y)) (umin ty x y))
+
+;; These are the same rules as above, but when the operands for select are swapped
+(rule (simplify (bitselect ty (slt _ x y) y x)) (smax ty x y))
+(rule (simplify (bitselect ty (sle _ x y) y x)) (smax ty x y))
+(rule (simplify (bitselect ty (ult _ x y) y x)) (umax ty x y))
+(rule (simplify (bitselect ty (ule _ x y) y x)) (umax ty x y))
+(rule (simplify (bitselect ty (sgt _ x y) y x)) (smin ty x y))
+(rule (simplify (bitselect ty (sge _ x y) y x)) (smin ty x y))
+(rule (simplify (bitselect ty (ugt _ x y) y x)) (umin ty x y))
+(rule (simplify (bitselect ty (uge _ x y) y x)) (umin ty x y))
+
+;; For floats convert fcmp lt into pseudo_min and gt into pseudo_max
+;;
+;; fmax_pseudo docs state:
+;; The behaviour for this operations is defined as  fmax_pseudo(a, b) = (a < b) ? b : a, and the behaviour for zero
+;; or NaN inputs follows from the behaviour of < with such inputs.
+;;
+;; That is exactly the operation that we match here!
+(rule (simplify
+       (select ty (fcmp _ (FloatCC.LessThan) x y) x y))
+      (fmin_pseudo ty x y))
+(rule (simplify
+       (select ty (fcmp _ (FloatCC.GreaterThan) x y) x y))
+      (fmax_pseudo ty x y))
+
+;; TODO: perform this same optimization to `f{min,max}_pseudo` for vectors
+;; with the `bitselect` instruction, but the pattern is a bit more complicated
+;; due to most bitselects-over-floats having bitcasts.
--- a/cranelift/codegen/src/opts/shifts.isle
+++ b/cranelift/codegen/src/opts/shifts.isle
@@ -0,0 +1,80 @@
+;; rewrites for shifts and rotates: `ishl, `ushr`, `sshr`, `rotl, `rotr`
+
+;; x>>0 == x<<0 == x rotr 0 == x rotl 0 == x.
+(rule (simplify (ishl ty
+                      x
+                      (iconst ty (u64_from_imm64 0))))
+      (subsume x))
+(rule (simplify (ushr ty
+                      x
+                      (iconst ty (u64_from_imm64 0))))
+      (subsume x))
+(rule (simplify (sshr ty
+                      x
+                      (iconst ty (u64_from_imm64 0))))
+      (subsume x))
+(rule (simplify (rotr ty
+                      x
+                      (iconst ty (u64_from_imm64 0))))
+      (subsume x))
+(rule (simplify (rotl ty
+                      x
+                      (iconst ty (u64_from_imm64 0))))
+      (subsume x))
+
+;; `(x >> k) << k` is the same as masking off the bottom `k` bits (regardless if
+;; this is a signed or unsigned shift right).
+(rule (simplify (ishl (fits_in_64 ty)
+                      (ushr ty x (iconst _ k))
+                      (iconst _ k)))
+      (let ((mask Imm64 (imm64_shl ty (imm64 0xFFFF_FFFF_FFFF_FFFF) k)))
+        (band ty x (iconst ty mask))))
+(rule (simplify (ishl (fits_in_64 ty)
+                      (sshr ty x (iconst _ k))
+                      (iconst _ k)))
+      (let ((mask Imm64 (imm64_shl ty (imm64 0xFFFF_FFFF_FFFF_FFFF) k)))
+        (band ty x (iconst ty mask))))
+
+;; For unsigned shifts, `(x << k) >> k` is the same as masking out the top
+;; `k` bits. A similar rule is valid for vectors but this `iconst` mask only
+;; works for scalar integers.
+(rule (simplify (ushr (fits_in_64 (ty_int ty))
+                      (ishl ty x (iconst _ k))
+                      (iconst _ k)))
+      (band ty x (iconst ty (imm64_ushr ty (imm64 (ty_mask ty)) k))))
+
+;; For signed shifts, `(x << k) >> k` does sign-extension from `n` bits to
+;; `n+k` bits. In the special case where `x` is the result of either `sextend`
+;; or `uextend` from `n` bits to `n+k` bits, we can implement this using
+;; `sextend`.
+(rule (simplify (sshr wide
+                 (ishl wide
+                  (uextend wide x @ (value_type narrow))
+                  (iconst _ shift))
+                 (iconst _ shift)))
+      (if-let (u64_from_imm64 shift_u64) shift)
+      (if-let $true (u64_eq shift_u64 (u64_sub (ty_bits_u64 wide) (ty_bits_u64 narrow))))
+      (sextend wide x))
+
+;; If `k` is smaller than the difference in bit widths of the two types, then
+;; the intermediate sign bit comes from the extend op, so the final result is
+;; the same as the original extend op.
+(rule (simplify (sshr wide
+                 (ishl wide
+                  x @ (uextend wide (value_type narrow))
+                  (iconst _ shift))
+                 (iconst _ shift)))
+      (if-let (u64_from_imm64 shift_u64) shift)
+      (if-let $true (u64_lt shift_u64 (u64_sub (ty_bits_u64 wide) (ty_bits_u64 narrow))))
+      x)
+
+;; If the original extend op was `sextend`, then both of the above cases say
+;; the result should also be `sextend`.
+(rule (simplify (sshr wide
+                 (ishl wide
+                  x @ (sextend wide (value_type narrow))
+                  (iconst _ shift))
+                 (iconst _ shift)))
+      (if-let (u64_from_imm64 shift_u64) shift)
+      (if-let $true (u64_le shift_u64 (u64_sub (ty_bits_u64 wide) (ty_bits_u64 narrow))))
+      x)
--- a/cranelift/filetests/filetests/egraph/algebraic.clif
+++ b/cranelift/filetests/filetests/egraph/algebraic.clif
@@ -1,497 +0,0 @@
-test optimize
-set opt_level=speed
-target x86_64
-
-function %f0(i32) -> i32 {
-block0(v0: i32):
-    v1 = iconst.i32 2
-    v2 = imul v0, v1
-    ; check: v5 = ishl v0, v4  ; v4 = 1
-    ; check: return v5
-    return v2
-}
-
-function %f1() -> i64 {
-block0:
-  v0 = iconst.i32 0xffff_ffff_9876_5432
-  v1 = uextend.i64 v0
-  return v1
-  ; check: v2 = iconst.i64 0x9876_5432
-  ; check: return v2  ; v2 = 0x9876_5432
-}
-
-function %unsigned_shift_right_shift_left_i8(i8) -> i8 {
-block0(v0: i8):
-    v1 = iconst.i8 5
-    v2 = ushr v0, v1
-    v3 = ishl v2, v1
-    return v3
-    ; check: v4 = iconst.i8 224
-    ; check: v5 = band v0, v4
-    ; check: return v5
-}
-
-function %unsigned_shift_right_shift_left_i32(i32) -> i32 {
-block0(v0: i32):
-    v1 = iconst.i32 5
-    v2 = ushr v0, v1
-    v3 = ishl v2, v1
-    return v3
-    ; check: v4 = iconst.i32 0xffff_ffe0
-    ; check: v5 = band v0, v4
-    ; check: return v5
-}
-
-function %unsigned_shift_right_shift_left_i64(i64) -> i64 {
-block0(v0: i64):
-    v1 = iconst.i64 5
-    v2 = ushr v0, v1
-    v3 = ishl v2, v1
-    return v3
-    ; check: v4 = iconst.i64 -32
-    ; check: v5 = band v0, v4
-    ; check: return v5
-}
-
-function %signed_shift_right_shift_left_i8(i8) -> i8 {
-block0(v0: i8):
-    v1 = iconst.i8 5
-    v2 = sshr v0, v1
-    v3 = ishl v2, v1
-    return v3
-    ; check: v4 = iconst.i8 224
-    ; check: v5 = band v0, v4
-    ; check: return v5
-}
-
-function %signed_shift_right_shift_left_i32(i32) -> i32 {
-block0(v0: i32):
-    v1 = iconst.i32 5
-    v2 = sshr v0, v1
-    v3 = ishl v2, v1
-    return v3
-    ; check: v4 = iconst.i32 0xffff_ffe0
-    ; check: v5 = band v0, v4
-    ; check: return v5
-}
-
-function %signed_shift_right_shift_left_i64(i64) -> i64 {
-block0(v0: i64):
-    v1 = iconst.i64 5
-    v2 = sshr v0, v1
-    v3 = ishl v2, v1
-    return v3
-    ; check: v4 = iconst.i64 -32
-    ; check: v5 = band v0, v4
-    ; check: return v5
-}
-
-function %signed_shift_right_shift_left_i8_mask_rhs(i8) -> i8 {
-block0(v0: i8):
-    v1 = iconst.i8 0xf5
-    v2 = sshr v0, v1
-    v3 = ishl v2, v1
-    return v3
-    ; check: v4 = iconst.i8 224
-    ; check: v5 = band v0, v4
-    ; check: return v5
-}
-
-function %sextend_shift_32_64_unsigned(i32) -> i64 {
-block0(v0: i32):
-    v1 = iconst.i8 32
-    v2 = sextend.i64 v0
-    v3 = ishl v2, v1
-    v4 = ushr v3, v1
-    return v4
-    ; check: v7 = uextend.i64 v0
-    ; check: return v7
-}
-
-function %sextend_shift_32_64_signed(i32) -> i64 {
-block0(v0: i32):
-    v1 = iconst.i8 32
-    v2 = sextend.i64 v0
-    v3 = ishl v2, v1
-    v4 = sshr v3, v1
-    return v4
-    ; check: return v2
-}
-
-function %sextend_undershift_32_64_unsigned(i32) -> i64 {
-block0(v0: i32):
-    v1 = iconst.i8 31
-    v2 = sextend.i64 v0
-    v3 = ishl v2, v1
-    v4 = ushr v3, v1
-    return v4
-    ; check: v5 = iconst.i64 0x0001_ffff_ffff
-    ; check: v6 = band v2, v5
-    ; check: return v6
-}
-
-function %sextend_undershift_32_64_signed(i32) -> i64 {
-block0(v0: i32):
-    v1 = iconst.i8 31
-    v2 = sextend.i64 v0
-    v3 = ishl v2, v1
-    v4 = sshr v3, v1
-    return v4
-    ; check: return v2
-}
-
-function %sextend_shift_8_64_unsigned(i8) -> i64 {
-block0(v0: i8):
-    v1 = iconst.i8 56
-    v2 = sextend.i64 v0
-    v3 = ishl v2, v1
-    v4 = ushr v3, v1
-    return v4
-    ; check: v7 = uextend.i64 v0
-    ; check: return v7
-}
-
-function %sextend_shift_8_64_signed(i8) -> i64 {
-block0(v0: i8):
-    v1 = iconst.i8 56
-    v2 = sextend.i64 v0
-    v3 = ishl v2, v1
-    v4 = sshr v3, v1
-    return v4
-    ; check: return v2
-}
-
-function %uextend_shift_32_64_unsigned(i32) -> i64 {
-block0(v0: i32):
-    v1 = iconst.i8 32
-    v2 = uextend.i64 v0
-    v3 = ishl v2, v1
-    v4 = ushr v3, v1
-    return v4
-    ; check: return v2
-}
-
-function %uextend_shift_32_64_signed(i32) -> i64 {
-block0(v0: i32):
-    v1 = iconst.i8 32
-    v2 = uextend.i64 v0
-    v3 = ishl v2, v1
-    v4 = sshr v3, v1
-    return v4
-    ; check: v5 = sextend.i64 v0
-    ; check: return v5
-}
-
-function %uextend_undershift_32_64_unsigned(i32) -> i64 {
-block0(v0: i32):
-    v1 = iconst.i8 31
-    v2 = uextend.i64 v0
-    v3 = ishl v2, v1
-    v4 = ushr v3, v1
-    return v4
-    ; check: return v2
-}
-
-function %uextend_undershift_32_64_signed(i32) -> i64 {
-block0(v0: i32):
-    v1 = iconst.i8 31
-    v2 = uextend.i64 v0
-    v3 = ishl v2, v1
-    v4 = sshr v3, v1
-    return v4
-    ; check: return v2
-}
-
-function %uextend_shift_8_64_unsigned(i8) -> i64 {
-block0(v0: i8):
-    v1 = iconst.i8 56
-    v2 = uextend.i64 v0
-    v3 = ishl v2, v1
-    v4 = ushr v3, v1
-    return v4
-    ; check: return v2
-}
-
-function %uextend_shift_8_64_signed(i8) -> i64 {
-block0(v0: i8):
-    v1 = iconst.i8 56
-    v2 = uextend.i64 v0
-    v3 = ishl v2, v1
-    v4 = sshr v3, v1
-    return v4
-    ; check: v5 = sextend.i64 v0
-    ; check: return v5
-}
-
-function %double_ineg(i32) -> i32 {
-block0(v0: i32):
-    v1 = ineg v0
-    v2 = ineg v1
-    return v2
-    ; check: return v0
-}
-
-function %imul_ineg_cancel(i32, i32) -> i32 {
-block0(v0: i32, v1: i32):
-    v2 = ineg v0
-    v3 = ineg v1
-    v4 = imul v2, v3
-    return v4
-    ; check: v5 = imul v0, v1
-    ; check: return v5
-}
-
-function %iabs_ineg(i32) -> i32 {
-block0(v0: i32):
-    v1 = ineg v0
-    v2 = iabs v1
-    return v2
-    ; check: v3 = iabs v0
-    ; check: return v3
-}
-
-function %iabs_iabs(i32) -> i32 {
-block0(v0: i32):
-    v1 = iabs v0
-    v2 = iabs v1
-    return v2
-    ; check: return v1
-}
-
-function %isub_self(i32) -> i32 {
-block0(v0: i32):
-    v1 = isub v0, v0
-    return v1
-    ; check: v2 = iconst.i32 0
-    ; check: return v2
-}
-
-function %mul_minus_one(i32) -> i32 {
-block0(v0: i32):
-    v1 = iconst.i32 0xffff_ffff ; -1
-    v2 = imul v0, v1
-    return v2
-    ; check: v3 = ineg v0
-    ; check: v4 -> v3
-    ; check: return v3
-}
-
-function %mul_minus_one_commuted(i32) -> i32 {
-block0(v0: i32):
-    v1 = iconst.i32 0xffff_ffff ; -1
-    v2 = imul v1, v0
-    return v2
-    ; check: v3 = ineg v0
-    ; check: v5 -> v3
-    ; check: v6 -> v3
-    ; check: return v3
-}
-
-function %or_and_y_with_not_y_i8(i8, i8) -> i8 {
-block0(v0: i8, v1: i8):
-    v2 = band v0, v1
-    v3 = bnot v1
-    v4 = bor v2, v3
-    return v4
-    ; check: v5 = bor v0, v3
-    ; check: return v5
-}
-
-function %or_and_constant_with_not_constant_i8(i8) -> i8 {
-block0(v0: i8):
-    v1 = iconst.i8 -4
-    v2 = band v0, v1
-    v3 = iconst.i8 3
-    v4 = bor v2, v3
-    return v4
-    ; check: v5 = bor v0, v3
-    ; check: return v5
-}
-
-function %or_and_y_with_not_y_i8(i8, i8) -> i8 {
-block0(v0: i8, v1: i8):
-    v2 = band v0, v1
-    v3 = bnot v1
-    v4 = bor v3, v2
-    return v4
-    ; check: v5 = bor v0, v3
-    ; check: return v5
-}
-
-function %or_and_constant_with_not_constant_i8(i8) -> i8 {
-block0(v0: i8):
-    v1 = iconst.i8 -4
-    v2 = band v0, v1
-    v3 = iconst.i8 3
-    v4 = bor v3, v2
-    return v4
-    ; check: v6 = bor v0, v3
-    ; check: return v6
-}
-
-function %or_and_constant_with_any_constant_should_not_apply_rule_i8(i8) -> i8 {
-block0(v0: i8):
-    v1 = iconst.i8 -4
-    v2 = band v0, v1
-    ;; `v3` is not `bnot(v1)` so the rewrite should not apply.
-    v3 = iconst.i8 -5
-    v4 = bor v2, v3
-    return v4
-    ; check: return v4
-}
-
-function %or_and_y_with_not_y_i64(i64, i64) -> i64 {
-block0(v0: i64, v1: i64):
-    v2 = band v0, v1
-    v3 = bnot v1
-    v4 = bor v2, v3
-    return v4
-    ; check: v5 = bor v0, v3
-    ; check: return v5
-}
-
-function %or_and_constant_with_not_constant_i64(i64) -> i64 {
-block0(v0: i64):
-    v1 = iconst.i64 -4
-    v2 = band v0, v1
-    v3 = iconst.i64 3
-    v4 = bor v2, v3
-    return v4
-    ; check: v5 = bor v0, v3
-    ; check: return v5
-}
-
-function %or_and_y_with_not_y_i64(i64, i64) -> i64 {
-block0(v0: i64, v1: i64):
-    v2 = band v0, v1
-    v3 = bnot v1
-    v4 = bor v3, v2
-    return v4
-    ; check: v5 = bor v0, v3
-    ; check: return v5
-}
-
-function %or_and_constant_with_not_constant_i64(i64) -> i64 {
-block0(v0: i64):
-    v1 = iconst.i64 -4
-    v2 = band v0, v1
-    v3 = iconst.i64 3
-    v4 = bor v3, v2
-    return v4
-    ; check: v6 = bor v0, v3
-    ; check: return v6
-}
-
-function %or_and_constant_with_any_constant_should_not_apply_rule_i64(i64) -> i64 {
-block0(v0: i64):
-    v1 = iconst.i64 -4
-    v2 = band v0, v1
-    ;; `v3` is not `bnot(v1)` so the rewrite should not apply.
-    v3 = iconst.i64 -5
-    v4 = bor v2, v3
-    return v4
-    ; check: return v4
-}
-
-function %bnot1(i8) -> i8 {
-block0(v1: i8):
-    v2 = iconst.i8 -1
-    v3 = bxor v1, v2
-    return v3
-}
-
-; check: v4 = bnot v1
-; check: return v4
-
-function %bnot2(i64) -> i64 {
-block0(v1: i64):
-    v2 = iconst.i64 -1
-    v3 = bxor v1, v2
-    return v3
-}
-
-; check: v4 = bnot v1
-; check: return v4
-
-function %bnot3(i64) -> i64 {
-block0(v1: i64):
-    v2 = iconst.i64 -1
-    v3 = bxor v2, v1
-    return v3
-}
-
-; check: v5 = bnot v1
-; check: return v5
-
-function %extend_always_above_zero(i32) -> i8 {
-block0(v1: i32):
-    v2 = uextend.i64 v1
-    v3 = iconst.i64 0
-    v4 = icmp slt v2, v3
-    return v4
-}
-
-; check: v5 = iconst.i8 0
-; check: return v5
-
-function %extend_always_above_zero2(i32) -> i8 {
-block0(v1: i32):
-    v2 = uextend.i64 v1
-    v3 = iconst.i64 0
-    v4 = icmp sge v2, v3
-    return v4
-}
-
-; check: v5 = iconst.i8 1
-; check: return v5
-
-function %double_uextend(i16) -> i64 {
-block0(v1: i16):
-    v2 = uextend.i32 v1
-    v3 = uextend.i64 v2
-    return v3
-}
-
-; check: v4 = uextend.i64 v1
-; check: return v4
-
-function %double_sextend(i16) -> i64 {
-block0(v1: i16):
-    v2 = sextend.i32 v1
-    v3 = sextend.i64 v2
-    return v3
-}
-
-; check: v4 = sextend.i64 v1
-; check: return v4
-
-function %double_fneg(f32) -> f32 {
-block0(v1: f32):
-    v2 = fneg v1
-    v3 = fneg v2
-    return v3
-}
-
-; check: return v1
-
-function %fma_double_fneg(f32, f32, f32) -> f32 {
-block0(v1: f32, v2: f32, v3: f32):
-    v4 = fneg v1
-    v5 = fneg v2
-    v6 = fma v4, v5, v3
-    return v6
-}
-
-; check: v7 = fma v1, v2, v3
-; check: return v7
-
-function %fmul_double_fneg(f32, f32) -> f32 {
-block0(v1: f32, v2: f32):
-    v3 = fneg v1
-    v4 = fneg v2
-    v5 = fmul v3, v4
-    return v5
-}
-
-; check: v6 = fmul v1, v2
-; check: return v6
--- a/cranelift/filetests/filetests/egraph/arithmetic.clif
+++ b/cranelift/filetests/filetests/egraph/arithmetic.clif
@@ -0,0 +1,105 @@
+test optimize
+set opt_level=speed
+target x86_64
+
+function %f0(i32) -> i32 {
+block0(v0: i32):
+    v1 = iconst.i32 2
+    v2 = imul v0, v1
+    ; check: v5 = ishl v0, v4  ; v4 = 1
+    ; check: return v5
+    return v2
+}
+
+function %double_ineg(i32) -> i32 {
+block0(v0: i32):
+    v1 = ineg v0
+    v2 = ineg v1
+    return v2
+    ; check: return v0
+}
+
+function %imul_ineg_cancel(i32, i32) -> i32 {
+block0(v0: i32, v1: i32):
+    v2 = ineg v0
+    v3 = ineg v1
+    v4 = imul v2, v3
+    return v4
+    ; check: v5 = imul v0, v1
+    ; check: return v5
+}
+
+function %iabs_ineg(i32) -> i32 {
+block0(v0: i32):
+    v1 = ineg v0
+    v2 = iabs v1
+    return v2
+    ; check: v3 = iabs v0
+    ; check: return v3
+}
+
+function %iabs_iabs(i32) -> i32 {
+block0(v0: i32):
+    v1 = iabs v0
+    v2 = iabs v1
+    return v2
+    ; check: return v1
+}
+
+function %isub_self(i32) -> i32 {
+block0(v0: i32):
+    v1 = isub v0, v0
+    return v1
+    ; check: v2 = iconst.i32 0
+    ; check: return v2
+}
+
+function %mul_minus_one(i32) -> i32 {
+block0(v0: i32):
+    v1 = iconst.i32 0xffff_ffff ; -1
+    v2 = imul v0, v1
+    return v2
+    ; check: v3 = ineg v0
+    ; check: v4 -> v3
+    ; check: return v3
+}
+
+function %mul_minus_one_commuted(i32) -> i32 {
+block0(v0: i32):
+    v1 = iconst.i32 0xffff_ffff ; -1
+    v2 = imul v1, v0
+    return v2
+    ; check: v3 = ineg v0
+    ; check: return v3
+}
+
+function %double_fneg(f32) -> f32 {
+block0(v1: f32):
+    v2 = fneg v1
+    v3 = fneg v2
+    return v3
+}
+
+; check: return v1
+
+function %fma_double_fneg(f32, f32, f32) -> f32 {
+block0(v1: f32, v2: f32, v3: f32):
+    v4 = fneg v1
+    v5 = fneg v2
+    v6 = fma v4, v5, v3
+    return v6
+}
+
+; check: v7 = fma v1, v2, v3
+; check: return v7
+
+function %fmul_double_fneg(f32, f32) -> f32 {
+block0(v1: f32, v2: f32):
+    v3 = fneg v1
+    v4 = fneg v2
+    v5 = fmul v3, v4
+    return v5
+}
+
+; check: v6 = fmul v1, v2
+; check: return v6
--- a/cranelift/filetests/filetests/egraph/bitops.clif
+++ b/cranelift/filetests/filetests/egraph/bitops.clif
@@ -0,0 +1,139 @@
+test optimize
+set opt_level=speed
+target x86_64
+
+function %or_and_y_with_not_y_i8(i8, i8) -> i8 {
+block0(v0: i8, v1: i8):
+    v2 = band v0, v1
+    v3 = bnot v1
+    v4 = bor v2, v3
+    return v4
+    ; check: v5 = bor v0, v3
+    ; check: return v5
+}
+
+function %or_and_constant_with_not_constant_i8(i8) -> i8 {
+block0(v0: i8):
+    v1 = iconst.i8 -4
+    v2 = band v0, v1
+    v3 = iconst.i8 3
+    v4 = bor v2, v3
+    return v4
+    ; check: v5 = bor v0, v3
+    ; check: return v5
+}
+
+function %or_and_y_with_not_y_i8(i8, i8) -> i8 {
+block0(v0: i8, v1: i8):
+    v2 = band v0, v1
+    v3 = bnot v1
+    v4 = bor v3, v2
+    return v4
+    ; check: v5 = bor v0, v3
+    ; check: return v5
+}
+
+function %or_and_constant_with_not_constant_i8(i8) -> i8 {
+block0(v0: i8):
+    v1 = iconst.i8 -4
+    v2 = band v0, v1
+    v3 = iconst.i8 3
+    v4 = bor v3, v2
+    return v4
+    ; check: v6 = bor v0, v3
+    ; check: return v6
+}
+
+function %or_and_constant_with_any_constant_should_not_apply_rule_i8(i8) -> i8 {
+block0(v0: i8):
+    v1 = iconst.i8 -4
+    v2 = band v0, v1
+    ;; `v3` is not `bnot(v1)` so the rewrite should not apply.
+    v3 = iconst.i8 -5
+    v4 = bor v2, v3
+    return v4
+    ; check: return v4
+}
+
+function %or_and_y_with_not_y_i64(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+    v2 = band v0, v1
+    v3 = bnot v1
+    v4 = bor v2, v3
+    return v4
+    ; check: v5 = bor v0, v3
+    ; check: return v5
+}
+
+function %or_and_constant_with_not_constant_i64(i64) -> i64 {
+block0(v0: i64):
+    v1 = iconst.i64 -4
+    v2 = band v0, v1
+    v3 = iconst.i64 3
+    v4 = bor v2, v3
+    return v4
+    ; check: v5 = bor v0, v3
+    ; check: return v5
+}
+
+function %or_and_y_with_not_y_i64(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+    v2 = band v0, v1
+    v3 = bnot v1
+    v4 = bor v3, v2
+    return v4
+    ; check: v5 = bor v0, v3
+    ; check: return v5
+}
+
+function %or_and_constant_with_not_constant_i64(i64) -> i64 {
+block0(v0: i64):
+    v1 = iconst.i64 -4
+    v2 = band v0, v1
+    v3 = iconst.i64 3
+    v4 = bor v3, v2
+    return v4
+    ; check: v6 = bor v0, v3
+    ; check: return v6
+}
+
+function %or_and_constant_with_any_constant_should_not_apply_rule_i64(i64) -> i64 {
+block0(v0: i64):
+    v1 = iconst.i64 -4
+    v2 = band v0, v1
+    ;; `v3` is not `bnot(v1)` so the rewrite should not apply.
+    v3 = iconst.i64 -5
+    v4 = bor v2, v3
+    return v4
+    ; check: return v4
+}
+
+function %bnot1(i8) -> i8 {
+block0(v1: i8):
+    v2 = iconst.i8 -1
+    v3 = bxor v1, v2
+    return v3
+}
+
+; check: v4 = bnot v1
+; check: return v4
+
+function %bnot2(i64) -> i64 {
+block0(v1: i64):
+    v2 = iconst.i64 -1
+    v3 = bxor v1, v2
+    return v3
+}
+
+; check: v4 = bnot v1
+; check: return v4
+
+function %bnot3(i64) -> i64 {
+block0(v1: i64):
+    v2 = iconst.i64 -1
+    v3 = bxor v2, v1
+    return v3
+}
+
+; check: v5 = bnot v1
+; check: return v5
--- a/cranelift/filetests/filetests/egraph/extends.clif
+++ b/cranelift/filetests/filetests/egraph/extends.clif
@@ -0,0 +1,55 @@
+test optimize
+set opt_level=speed
+target x86_64
+
+function %f1() -> i64 {
+block0:
+  v0 = iconst.i32 0xffff_ffff_9876_5432
+  v1 = uextend.i64 v0
+  return v1
+  ; check: v2 = iconst.i64 0x9876_5432
+  ; check: return v2  ; v2 = 0x9876_5432
+}
+
+
+function %extend_always_above_zero(i32) -> i8 {
+block0(v1: i32):
+    v2 = uextend.i64 v1
+    v3 = iconst.i64 0
+    v4 = icmp slt v2, v3
+    return v4
+}
+
+; check: v5 = iconst.i8 0
+; check: return v5
+
+function %extend_always_above_zero2(i32) -> i8 {
+block0(v1: i32):
+    v2 = uextend.i64 v1
+    v3 = iconst.i64 0
+    v4 = icmp sge v2, v3
+    return v4
+}
+
+; check: v5 = iconst.i8 1
+; check: return v5
+
+function %double_uextend(i16) -> i64 {
+block0(v1: i16):
+    v2 = uextend.i32 v1
+    v3 = uextend.i64 v2
+    return v3
+}
+
+; check: v4 = uextend.i64 v1
+; check: return v4
+
+function %double_sextend(i16) -> i64 {
+block0(v1: i16):
+    v2 = sextend.i32 v1
+    v3 = sextend.i64 v2
+    return v3
+}
+
+; check: v4 = sextend.i64 v1
+; check: return v4
--- a/cranelift/filetests/filetests/egraph/shifts.clif
+++ b/cranelift/filetests/filetests/egraph/shifts.clif
@@ -0,0 +1,206 @@
+test optimize
+set opt_level=speed
+target x86_64
+
+function %unsigned_shift_right_shift_left_i8(i8) -> i8 {
+block0(v0: i8):
+    v1 = iconst.i8 5
+    v2 = ushr v0, v1
+    v3 = ishl v2, v1
+    return v3
+    ; check: v4 = iconst.i8 224
+    ; check: v5 = band v0, v4
+    ; check: return v5
+}
+
+function %unsigned_shift_right_shift_left_i32(i32) -> i32 {
+block0(v0: i32):
+    v1 = iconst.i32 5
+    v2 = ushr v0, v1
+    v3 = ishl v2, v1
+    return v3
+    ; check: v4 = iconst.i32 0xffff_ffe0
+    ; check: v5 = band v0, v4
+    ; check: return v5
+}
+
+function %unsigned_shift_right_shift_left_i64(i64) -> i64 {
+block0(v0: i64):
+    v1 = iconst.i64 5
+    v2 = ushr v0, v1
+    v3 = ishl v2, v1
+    return v3
+    ; check: v4 = iconst.i64 -32
+    ; check: v5 = band v0, v4
+    ; check: return v5
+}
+
+function %signed_shift_right_shift_left_i8(i8) -> i8 {
+block0(v0: i8):
+    v1 = iconst.i8 5
+    v2 = sshr v0, v1
+    v3 = ishl v2, v1
+    return v3
+    ; check: v4 = iconst.i8 224
+    ; check: v5 = band v0, v4
+    ; check: return v5
+}
+
+function %signed_shift_right_shift_left_i32(i32) -> i32 {
+block0(v0: i32):
+    v1 = iconst.i32 5
+    v2 = sshr v0, v1
+    v3 = ishl v2, v1
+    return v3
+    ; check: v4 = iconst.i32 0xffff_ffe0
+    ; check: v5 = band v0, v4
+    ; check: return v5
+}
+
+function %signed_shift_right_shift_left_i64(i64) -> i64 {
+block0(v0: i64):
+    v1 = iconst.i64 5
+    v2 = sshr v0, v1
+    v3 = ishl v2, v1
+    return v3
+    ; check: v4 = iconst.i64 -32
+    ; check: v5 = band v0, v4
+    ; check: return v5
+}
+
+function %signed_shift_right_shift_left_i8_mask_rhs(i8) -> i8 {
+block0(v0: i8):
+    v1 = iconst.i8 0xf5
+    v2 = sshr v0, v1
+    v3 = ishl v2, v1
+    return v3
+    ; check: v4 = iconst.i8 224
+    ; check: v5 = band v0, v4
+    ; check: return v5
+}
+
+function %sextend_shift_32_64_unsigned(i32) -> i64 {
+block0(v0: i32):
+    v1 = iconst.i8 32
+    v2 = sextend.i64 v0
+    v3 = ishl v2, v1
+    v4 = ushr v3, v1
+    return v4
+    ; check: v7 = uextend.i64 v0
+    ; check: return v7
+}
+
+function %sextend_shift_32_64_signed(i32) -> i64 {
+block0(v0: i32):
+    v1 = iconst.i8 32
+    v2 = sextend.i64 v0
+    v3 = ishl v2, v1
+    v4 = sshr v3, v1
+    return v4
+    ; check: return v2
+}
+
+function %sextend_undershift_32_64_unsigned(i32) -> i64 {
+block0(v0: i32):
+    v1 = iconst.i8 31
+    v2 = sextend.i64 v0
+    v3 = ishl v2, v1
+    v4 = ushr v3, v1
+    return v4
+    ; check: v5 = iconst.i64 0x0001_ffff_ffff
+    ; check: v6 = band v2, v5
+    ; check: return v6
+}
+
+function %sextend_undershift_32_64_signed(i32) -> i64 {
+block0(v0: i32):
+    v1 = iconst.i8 31
+    v2 = sextend.i64 v0
+    v3 = ishl v2, v1
+    v4 = sshr v3, v1
+    return v4
+    ; check: return v2
+}
+
+function %sextend_shift_8_64_unsigned(i8) -> i64 {
+block0(v0: i8):
+    v1 = iconst.i8 56
+    v2 = sextend.i64 v0
+    v3 = ishl v2, v1
+    v4 = ushr v3, v1
+    return v4
+    ; check: v7 = uextend.i64 v0
+    ; check: return v7
+}
+
+function %sextend_shift_8_64_signed(i8) -> i64 {
+block0(v0: i8):
+    v1 = iconst.i8 56
+    v2 = sextend.i64 v0
+    v3 = ishl v2, v1
+    v4 = sshr v3, v1
+    return v4
+    ; check: return v2
+}
+
+function %uextend_shift_32_64_unsigned(i32) -> i64 {
+block0(v0: i32):
+    v1 = iconst.i8 32
+    v2 = uextend.i64 v0
+    v3 = ishl v2, v1
+    v4 = ushr v3, v1
+    return v4
+    ; check: return v2
+}
+
+function %uextend_shift_32_64_signed(i32) -> i64 {
+block0(v0: i32):
+    v1 = iconst.i8 32
+    v2 = uextend.i64 v0
+    v3 = ishl v2, v1
+    v4 = sshr v3, v1
+    return v4
+    ; check: v5 = sextend.i64 v0
+    ; check: return v5
+}
+
+function %uextend_undershift_32_64_unsigned(i32) -> i64 {
+block0(v0: i32):
+    v1 = iconst.i8 31
+    v2 = uextend.i64 v0
+    v3 = ishl v2, v1
+    v4 = ushr v3, v1
+    return v4
+    ; check: return v2
+}
+
+function %uextend_undershift_32_64_signed(i32) -> i64 {
+block0(v0: i32):
+    v1 = iconst.i8 31
+    v2 = uextend.i64 v0
+    v3 = ishl v2, v1
+    v4 = sshr v3, v1
+    return v4
+    ; check: return v2
+}
+
+function %uextend_shift_8_64_unsigned(i8) -> i64 {
+block0(v0: i8):
+    v1 = iconst.i8 56
+    v2 = uextend.i64 v0
+    v3 = ishl v2, v1
+    v4 = ushr v3, v1
+    return v4
+    ; check: return v2
+}
+
+function %uextend_shift_8_64_signed(i8) -> i64 {
+block0(v0: i8):
+    v1 = iconst.i8 56
+    v2 = uextend.i64 v0
+    v3 = ishl v2, v1
+    v4 = sshr v3, v1
+    return v4
+    ; check: v5 = sextend.i64 v0
+    ; check: return v5
+}