diff --git a/cranelift/codegen/build.rs b/cranelift/codegen/build.rs index 349f3140b9..8ee8b42676 100644 --- a/cranelift/codegen/build.rs +++ b/cranelift/codegen/build.rs @@ -221,9 +221,14 @@ fn get_isle_compilations( inputs: vec![ prelude_isle.clone(), prelude_opt_isle, - src_opts.join("algebraic.isle"), - src_opts.join("icmp.isle"), + src_opts.join("arithmetic.isle"), + src_opts.join("bitops.isle"), src_opts.join("cprop.isle"), + src_opts.join("extends.isle"), + src_opts.join("icmp.isle"), + src_opts.join("remat.isle"), + src_opts.join("selects.isle"), + src_opts.join("shifts.isle"), ], untracked_inputs: vec![clif_opt_isle], }, diff --git a/cranelift/codegen/src/opts/README.md b/cranelift/codegen/src/opts/README.md new file mode 100644 index 0000000000..b4f46420ab --- /dev/null +++ b/cranelift/codegen/src/opts/README.md @@ -0,0 +1,5 @@ +Rules here are allowed to rewrite pure expressions arbitrarily, +using the same inputs as the original, or fewer. In other words, we +cannot pull a new eclass id out of thin air and refer to it, other +than a piece of the input or a new node that we construct; but we +can freely rewrite e.g. `x+y-y` to `x`. diff --git a/cranelift/codegen/src/opts/algebraic.isle b/cranelift/codegen/src/opts/algebraic.isle deleted file mode 100644 index d29dba8b9b..0000000000 --- a/cranelift/codegen/src/opts/algebraic.isle +++ /dev/null @@ -1,411 +0,0 @@ -;; Algebraic optimizations. - -;; Rules here are allowed to rewrite pure expressions arbitrarily, -;; using the same inputs as the original, or fewer. In other words, we -;; cannot pull a new eclass id out of thin air and refer to it, other -;; than a piece of the input or a new node that we construct; but we -;; can freely rewrite e.g. `x+y-y` to `x`. - -;; Chained `uextend` and `sextend`. -(rule (simplify (uextend ty (uextend _intermediate_ty x))) - (uextend ty x)) -(rule (simplify (sextend ty (sextend _intermediate_ty x))) - (sextend ty x)) - -;; x+0 == 0+x == x. -(rule (simplify (iadd ty - x - (iconst ty (u64_from_imm64 0)))) - (subsume x)) -(rule (simplify (iadd ty - (iconst ty (u64_from_imm64 0)) - x)) - (subsume x)) -;; x-0 == x. -(rule (simplify (isub ty - x - (iconst ty (u64_from_imm64 0)))) - (subsume x)) -;; 0-x == (ineg x). -(rule (simplify (isub ty - (iconst ty (u64_from_imm64 0)) - x)) - (ineg ty x)) - -;; ineg(ineg(x)) == x. -(rule (simplify (ineg ty (ineg ty x))) (subsume x)) - -;; ineg(x) * ineg(y) == x*y. -(rule (simplify (imul ty (ineg ty x) (ineg ty y))) - (subsume (imul ty x y))) - -;; iabs(ineg(x)) == iabs(x). -(rule (simplify (iabs ty (ineg ty x))) - (iabs ty x)) - -;; iabs(iabs(x)) == iabs(x). -(rule (simplify (iabs ty inner @ (iabs ty x))) - (subsume inner)) - -;; x-x == 0. -(rule (simplify (isub (fits_in_64 (ty_int ty)) x x)) (subsume (iconst ty (imm64 0)))) - -;; x*1 == 1*x == x. -(rule (simplify (imul ty - x - (iconst ty (u64_from_imm64 1)))) - (subsume x)) -(rule (simplify (imul ty - (iconst ty (u64_from_imm64 1)) - x)) - (subsume x)) - -;; x*0 == 0*x == 0. -(rule (simplify (imul ty - _ - zero @ (iconst ty (u64_from_imm64 0)))) - (subsume zero)) -(rule (simplify (imul ty - zero @ (iconst ty (u64_from_imm64 0)) - _)) - (subsume zero)) - -;; x*-1 == -1*x == ineg(x). -(rule (simplify (imul ty x (iconst ty c))) - (if-let -1 (i64_sextend_imm64 ty c)) - (ineg ty x)) -(rule (simplify (imul ty (iconst ty c) x)) - (if-let -1 (i64_sextend_imm64 ty c)) - (ineg ty x)) - -;; x/1 == x. -(rule (simplify (sdiv ty - x - (iconst ty (u64_from_imm64 1)))) - (subsume x)) -(rule (simplify (udiv ty - x - (iconst ty (u64_from_imm64 1)))) - (subsume x)) - -;; x>>0 == x<<0 == x rotr 0 == x rotl 0 == x. -(rule (simplify (ishl ty - x - (iconst ty (u64_from_imm64 0)))) - (subsume x)) -(rule (simplify (ushr ty - x - (iconst ty (u64_from_imm64 0)))) - (subsume x)) -(rule (simplify (sshr ty - x - (iconst ty (u64_from_imm64 0)))) - (subsume x)) -(rule (simplify (rotr ty - x - (iconst ty (u64_from_imm64 0)))) - (subsume x)) -(rule (simplify (rotl ty - x - (iconst ty (u64_from_imm64 0)))) - (subsume x)) - -;; x | 0 == 0 | x == x | x == x. -(rule (simplify (bor ty - x - (iconst ty (u64_from_imm64 0)))) - (subsume x)) -(rule (simplify (bor ty - (iconst ty (u64_from_imm64 0)) - x)) - (subsume x)) -(rule (simplify (bor ty x x)) - (subsume x)) - -;; x ^ 0 == 0 ^ x == x. -(rule (simplify (bxor ty - x - (iconst ty (u64_from_imm64 0)))) - (subsume x)) -(rule (simplify (bxor ty - (iconst ty (u64_from_imm64 0)) - x)) - (subsume x)) - -;; x ^ x == 0. -(rule (simplify (bxor (fits_in_64 (ty_int ty)) x x)) - (subsume (iconst ty (imm64 0)))) - -;; x ^ not(x) == not(x) ^ x == x | not(x) == not(x) | x == -1. -;; This identity also holds for non-integer types, vectors, and wider types. -;; But `iconst` is only valid for integers up to 64 bits wide. -(rule (simplify (bxor (fits_in_64 (ty_int ty)) x (bnot ty x))) (subsume (iconst ty (imm64 (ty_mask ty))))) -(rule (simplify (bxor (fits_in_64 (ty_int ty)) (bnot ty x) x)) (subsume (iconst ty (imm64 (ty_mask ty))))) -(rule (simplify (bor (fits_in_64 (ty_int ty)) x (bnot ty x))) (subsume (iconst ty (imm64 (ty_mask ty))))) -(rule (simplify (bor (fits_in_64 (ty_int ty)) (bnot ty x) x)) (subsume (iconst ty (imm64 (ty_mask ty))))) - -;; x & -1 == -1 & x == x & x == x. -(rule (simplify (band ty x x)) (subsume x)) -(rule (simplify (band ty x (iconst ty k))) - (if-let -1 (i64_sextend_imm64 ty k)) - (subsume x)) -(rule (simplify (band ty (iconst ty k) x)) - (if-let -1 (i64_sextend_imm64 ty k)) - (subsume x)) - -;; x & 0 == 0 & x == x & not(x) == not(x) & x == 0. -(rule (simplify (band ty _ zero @ (iconst ty (u64_from_imm64 0)))) (subsume zero)) -(rule (simplify (band ty zero @ (iconst ty (u64_from_imm64 0)) _)) (subsume zero)) -(rule (simplify (band (fits_in_64 (ty_int ty)) x (bnot ty x))) (subsume (iconst ty (imm64 0)))) -(rule (simplify (band (fits_in_64 (ty_int ty)) (bnot ty x) x)) (subsume (iconst ty (imm64 0)))) - -;; not(not(x)) == x. -(rule (simplify (bnot ty (bnot ty x))) (subsume x)) - -;; DeMorgan's rule (two versions): -;; bnot(bor(x, y)) == band(bnot(x), bnot(y)) -(rule (simplify (bnot ty (bor ty x y))) - (band ty (bnot ty x) (bnot ty y))) -;; bnot(band(x, y)) == bor(bnot(x), bnot(y)) -(rule (simplify (bnot ty (band t x y))) - (bor ty (bnot ty x) (bnot ty y))) - -;; `or(and(x, y), not(y)) == or(x, not(y))` -(rule (simplify (bor ty - (band ty x y) - z @ (bnot ty y))) - (bor ty x z)) -;; Duplicate the rule but swap the `bor` operands because `bor` is -;; commutative. We could, of course, add a `simplify` rule to do the commutative -;; swap for all `bor`s but this will bloat the e-graph with many e-nodes. It is -;; cheaper to have additional rules, rather than additional e-nodes, because we -;; amortize their cost via ISLE's smart codegen. -(rule (simplify (bor ty - z @ (bnot ty y) - (band ty x y))) - (bor ty x z)) - -;; `or(and(x, y), not(y)) == or(x, not(y))` specialized for constants, since -;; otherwise we may not know that `z == not(y)` since we don't generally expand -;; constants in the e-graph. -;; -;; (No need to duplicate for commutative `bor` for this constant version because -;; we move constants to the right.) -(rule (simplify (bor ty - (band ty x (iconst ty (u64_from_imm64 y))) - z @ (iconst ty (u64_from_imm64 zk)))) - (if-let $true (u64_eq (u64_and (ty_mask ty) zk) - (u64_and (ty_mask ty) (u64_not y)))) - (bor ty x z)) - -;; x*2 == 2*x == x+x. -(rule (simplify (imul ty x (iconst _ (simm32 2)))) - (iadd ty x x)) -(rule (simplify (imul ty (iconst _ (simm32 2)) x)) - (iadd ty x x)) - -;; x*c == x< magic multiplications - - -;; `(x >> k) << k` is the same as masking off the bottom `k` bits (regardless if -;; this is a signed or unsigned shift right). -(rule (simplify (ishl (fits_in_64 ty) - (ushr ty x (iconst _ k)) - (iconst _ k))) - (let ((mask Imm64 (imm64_shl ty (imm64 0xFFFF_FFFF_FFFF_FFFF) k))) - (band ty x (iconst ty mask)))) -(rule (simplify (ishl (fits_in_64 ty) - (sshr ty x (iconst _ k)) - (iconst _ k))) - (let ((mask Imm64 (imm64_shl ty (imm64 0xFFFF_FFFF_FFFF_FFFF) k))) - (band ty x (iconst ty mask)))) - - -;; For unsigned shifts, `(x << k) >> k` is the same as masking out the top -;; `k` bits. A similar rule is valid for vectors but this `iconst` mask only -;; works for scalar integers. -(rule (simplify (ushr (fits_in_64 (ty_int ty)) - (ishl ty x (iconst _ k)) - (iconst _ k))) - (band ty x (iconst ty (imm64_ushr ty (imm64 (ty_mask ty)) k)))) - - -;; For signed shifts, `(x << k) >> k` does sign-extension from `n` bits to -;; `n+k` bits. In the special case where `x` is the result of either `sextend` -;; or `uextend` from `n` bits to `n+k` bits, we can implement this using -;; `sextend`. -(rule (simplify (sshr wide - (ishl wide - (uextend wide x @ (value_type narrow)) - (iconst _ shift)) - (iconst _ shift))) - (if-let (u64_from_imm64 shift_u64) shift) - (if-let $true (u64_eq shift_u64 (u64_sub (ty_bits_u64 wide) (ty_bits_u64 narrow)))) - (sextend wide x)) - -;; If `k` is smaller than the difference in bit widths of the two types, then -;; the intermediate sign bit comes from the extend op, so the final result is -;; the same as the original extend op. -(rule (simplify (sshr wide - (ishl wide - x @ (uextend wide (value_type narrow)) - (iconst _ shift)) - (iconst _ shift))) - (if-let (u64_from_imm64 shift_u64) shift) - (if-let $true (u64_lt shift_u64 (u64_sub (ty_bits_u64 wide) (ty_bits_u64 narrow)))) - x) - -;; If the original extend op was `sextend`, then both of the above cases say -;; the result should also be `sextend`. -(rule (simplify (sshr wide - (ishl wide - x @ (sextend wide (value_type narrow)) - (iconst _ shift)) - (iconst _ shift))) - (if-let (u64_from_imm64 shift_u64) shift) - (if-let $true (u64_le shift_u64 (u64_sub (ty_bits_u64 wide) (ty_bits_u64 narrow)))) - x) - - -;; Masking out any of the top bits of the result of `uextend` is a no-op. (This -;; is like a cheap version of known-bits analysis.) -(rule (simplify (band wide x @ (uextend _ (value_type narrow)) (iconst _ (u64_from_imm64 mask)))) - ; Check that `narrow_mask` has a subset of the bits that `mask` does. - (if-let $true (let ((narrow_mask u64 (ty_mask narrow))) (u64_eq narrow_mask (u64_and mask narrow_mask)))) - x) - -;; Masking out the sign-extended bits of an `sextend` turns it into a `uextend`. -(rule (simplify (band wide (sextend _ x @ (value_type narrow)) (iconst _ (u64_from_imm64 mask)))) - (if-let $true (u64_eq mask (ty_mask narrow))) - (uextend wide x)) - - -;; Rematerialize ALU-op-with-imm and iconsts in each block where they're -;; used. This is neutral (add-with-imm) or positive (iconst) for -;; register pressure, and these ops are very cheap. -(rule (simplify x @ (iadd _ (iconst _ _) _)) - (remat x)) -(rule (simplify x @ (iadd _ _ (iconst _ _))) - (remat x)) -(rule (simplify x @ (isub _ (iconst _ _) _)) - (remat x)) -(rule (simplify x @ (isub _ _ (iconst _ _))) - (remat x)) -(rule (simplify x @ (band _ (iconst _ _) _)) - (remat x)) -(rule (simplify x @ (band _ _ (iconst _ _))) - (remat x)) -(rule (simplify x @ (bor _ (iconst _ _) _)) - (remat x)) -(rule (simplify x @ (bor _ _ (iconst _ _))) - (remat x)) -(rule (simplify x @ (bxor _ (iconst _ _) _)) - (remat x)) -(rule (simplify x @ (bxor _ _ (iconst _ _))) - (remat x)) -(rule (simplify x @ (bnot _ _)) - (remat x)) -(rule (simplify x @ (iconst _ _)) - (remat x)) -(rule (simplify x @ (f32const _ _)) - (remat x)) -(rule (simplify x @ (f64const _ _)) - (remat x)) - -;; (x ^ -1) can be replaced with the `bnot` instruction -(rule (simplify (bxor ty x (iconst ty k))) - (if-let -1 (i64_sextend_imm64 ty k)) - (bnot ty x)) - -;; 32-bit integers zero-extended to 64-bit integers are never negative -(rule (simplify - (slt ty - (uextend $I64 x @ (value_type $I32)) - (iconst _ (u64_from_imm64 0)))) - (iconst ty (imm64 0))) -(rule (simplify - (sge ty - (uextend $I64 x @ (value_type $I32)) - (iconst _ (u64_from_imm64 0)))) - (iconst ty (imm64 1))) - -;; Transform select-of-icmp into {u,s}{min,max} instructions where possible. -(rule (simplify (select ty (sgt _ x y) x y)) (smax ty x y)) -(rule (simplify (select ty (sge _ x y) x y)) (smax ty x y)) -(rule (simplify (select ty (ugt _ x y) x y)) (umax ty x y)) -(rule (simplify (select ty (uge _ x y) x y)) (umax ty x y)) -(rule (simplify (select ty (slt _ x y) x y)) (smin ty x y)) -(rule (simplify (select ty (sle _ x y) x y)) (smin ty x y)) -(rule (simplify (select ty (ult _ x y) x y)) (umin ty x y)) -(rule (simplify (select ty (ule _ x y) x y)) (umin ty x y)) - -;; These are the same rules as above, but when the operands for select are swapped -(rule (simplify (select ty (slt _ x y) y x)) (smax ty x y)) -(rule (simplify (select ty (sle _ x y) y x)) (smax ty x y)) -(rule (simplify (select ty (ult _ x y) y x)) (umax ty x y)) -(rule (simplify (select ty (ule _ x y) y x)) (umax ty x y)) -(rule (simplify (select ty (sgt _ x y) y x)) (smin ty x y)) -(rule (simplify (select ty (sge _ x y) y x)) (smin ty x y)) -(rule (simplify (select ty (ugt _ x y) y x)) (umin ty x y)) -(rule (simplify (select ty (uge _ x y) y x)) (umin ty x y)) - -;; Transform bitselect-of-icmp into {u,s}{min,max} instructions where possible. -(rule (simplify (bitselect ty (sgt _ x y) x y)) (smax ty x y)) -(rule (simplify (bitselect ty (sge _ x y) x y)) (smax ty x y)) -(rule (simplify (bitselect ty (ugt _ x y) x y)) (umax ty x y)) -(rule (simplify (bitselect ty (uge _ x y) x y)) (umax ty x y)) -(rule (simplify (bitselect ty (slt _ x y) x y)) (smin ty x y)) -(rule (simplify (bitselect ty (sle _ x y) x y)) (smin ty x y)) -(rule (simplify (bitselect ty (ult _ x y) x y)) (umin ty x y)) -(rule (simplify (bitselect ty (ule _ x y) x y)) (umin ty x y)) - -;; These are the same rules as above, but when the operands for select are swapped -(rule (simplify (bitselect ty (slt _ x y) y x)) (smax ty x y)) -(rule (simplify (bitselect ty (sle _ x y) y x)) (smax ty x y)) -(rule (simplify (bitselect ty (ult _ x y) y x)) (umax ty x y)) -(rule (simplify (bitselect ty (ule _ x y) y x)) (umax ty x y)) -(rule (simplify (bitselect ty (sgt _ x y) y x)) (smin ty x y)) -(rule (simplify (bitselect ty (sge _ x y) y x)) (smin ty x y)) -(rule (simplify (bitselect ty (ugt _ x y) y x)) (umin ty x y)) -(rule (simplify (bitselect ty (uge _ x y) y x)) (umin ty x y)) - -;; For floats convert fcmp lt into pseudo_min and gt into pseudo_max -;; -;; fmax_pseudo docs state: -;; The behaviour for this operations is defined as fmax_pseudo(a, b) = (a < b) ? b : a, and the behaviour for zero -;; or NaN inputs follows from the behaviour of < with such inputs. -;; -;; That is exactly the operation that we match here! -(rule (simplify - (select ty (fcmp _ (FloatCC.LessThan) x y) x y)) - (fmin_pseudo ty x y)) -(rule (simplify - (select ty (fcmp _ (FloatCC.GreaterThan) x y) x y)) - (fmax_pseudo ty x y)) - -;; TODO: perform this same optimization to `f{min,max}_pseudo` for vectors -;; with the `bitselect` instruction, but the pattern is a bit more complicated -;; due to most bitselects-over-floats having bitcasts. - -;; fneg(fneg(x)) == x. -(rule (simplify (fneg ty (fneg ty x))) (subsume x)) - -;; If both of the multiplied arguments to an `fma` are negated then remove -;; both of them since they cancel out. -(rule (simplify (fma ty (fneg ty x) (fneg ty y) z)) - (fma ty x y z)) - -;; If both of the multiplied arguments to an `fmul` are negated then remove -;; both of them since they cancel out. -(rule (simplify (fmul ty (fneg ty x) (fneg ty y))) - (fmul ty x y)) diff --git a/cranelift/codegen/src/opts/arithmetic.isle b/cranelift/codegen/src/opts/arithmetic.isle new file mode 100644 index 0000000000..e0e79980e4 --- /dev/null +++ b/cranelift/codegen/src/opts/arithmetic.isle @@ -0,0 +1,109 @@ +;; rewrites for integer and floating-point arithmetic +;; eg: `iadd`, `isub`, `ineg`, `imul`, `fadd`, `fsub`, `fmul` + +;; x+0 == 0+x == x. +(rule (simplify (iadd ty + x + (iconst ty (u64_from_imm64 0)))) + (subsume x)) +(rule (simplify (iadd ty + (iconst ty (u64_from_imm64 0)) + x)) + (subsume x)) +;; x-0 == x. +(rule (simplify (isub ty + x + (iconst ty (u64_from_imm64 0)))) + (subsume x)) +;; 0-x == (ineg x). +(rule (simplify (isub ty + (iconst ty (u64_from_imm64 0)) + x)) + (ineg ty x)) + +;; ineg(ineg(x)) == x. +(rule (simplify (ineg ty (ineg ty x))) (subsume x)) + +;; ineg(x) * ineg(y) == x*y. +(rule (simplify (imul ty (ineg ty x) (ineg ty y))) + (subsume (imul ty x y))) + +;; iabs(ineg(x)) == iabs(x). +(rule (simplify (iabs ty (ineg ty x))) + (iabs ty x)) + +;; iabs(iabs(x)) == iabs(x). +(rule (simplify (iabs ty inner @ (iabs ty x))) + (subsume inner)) + +;; x-x == 0. +(rule (simplify (isub (fits_in_64 (ty_int ty)) x x)) (subsume (iconst ty (imm64 0)))) + +;; x*1 == 1*x == x. +(rule (simplify (imul ty + x + (iconst ty (u64_from_imm64 1)))) + (subsume x)) +(rule (simplify (imul ty + (iconst ty (u64_from_imm64 1)) + x)) + (subsume x)) + +;; x*0 == 0*x == 0. +(rule (simplify (imul ty + _ + zero @ (iconst ty (u64_from_imm64 0)))) + (subsume zero)) +(rule (simplify (imul ty + zero @ (iconst ty (u64_from_imm64 0)) + _)) + (subsume zero)) + +;; x*-1 == -1*x == ineg(x). +(rule (simplify (imul ty x (iconst ty c))) + (if-let -1 (i64_sextend_imm64 ty c)) + (ineg ty x)) +(rule (simplify (imul ty (iconst ty c) x)) + (if-let -1 (i64_sextend_imm64 ty c)) + (ineg ty x)) + +;; x/1 == x. +(rule (simplify (sdiv ty + x + (iconst ty (u64_from_imm64 1)))) + (subsume x)) +(rule (simplify (udiv ty + x + (iconst ty (u64_from_imm64 1)))) + (subsume x)) + +;; TODO: strength reduction: div to shifts +;; TODO: div/rem by constants -> magic multiplications + +;; x*2 == 2*x == x+x. +(rule (simplify (imul ty x (iconst _ (simm32 2)))) + (iadd ty x x)) +(rule (simplify (imul ty (iconst _ (simm32 2)) x)) + (iadd ty x x)) + +;; x*c == x<>0 == x<<0 == x rotr 0 == x rotl 0 == x. +(rule (simplify (ishl ty + x + (iconst ty (u64_from_imm64 0)))) + (subsume x)) +(rule (simplify (ushr ty + x + (iconst ty (u64_from_imm64 0)))) + (subsume x)) +(rule (simplify (sshr ty + x + (iconst ty (u64_from_imm64 0)))) + (subsume x)) +(rule (simplify (rotr ty + x + (iconst ty (u64_from_imm64 0)))) + (subsume x)) +(rule (simplify (rotl ty + x + (iconst ty (u64_from_imm64 0)))) + (subsume x)) + +;; `(x >> k) << k` is the same as masking off the bottom `k` bits (regardless if +;; this is a signed or unsigned shift right). +(rule (simplify (ishl (fits_in_64 ty) + (ushr ty x (iconst _ k)) + (iconst _ k))) + (let ((mask Imm64 (imm64_shl ty (imm64 0xFFFF_FFFF_FFFF_FFFF) k))) + (band ty x (iconst ty mask)))) +(rule (simplify (ishl (fits_in_64 ty) + (sshr ty x (iconst _ k)) + (iconst _ k))) + (let ((mask Imm64 (imm64_shl ty (imm64 0xFFFF_FFFF_FFFF_FFFF) k))) + (band ty x (iconst ty mask)))) + +;; For unsigned shifts, `(x << k) >> k` is the same as masking out the top +;; `k` bits. A similar rule is valid for vectors but this `iconst` mask only +;; works for scalar integers. +(rule (simplify (ushr (fits_in_64 (ty_int ty)) + (ishl ty x (iconst _ k)) + (iconst _ k))) + (band ty x (iconst ty (imm64_ushr ty (imm64 (ty_mask ty)) k)))) + +;; For signed shifts, `(x << k) >> k` does sign-extension from `n` bits to +;; `n+k` bits. In the special case where `x` is the result of either `sextend` +;; or `uextend` from `n` bits to `n+k` bits, we can implement this using +;; `sextend`. +(rule (simplify (sshr wide + (ishl wide + (uextend wide x @ (value_type narrow)) + (iconst _ shift)) + (iconst _ shift))) + (if-let (u64_from_imm64 shift_u64) shift) + (if-let $true (u64_eq shift_u64 (u64_sub (ty_bits_u64 wide) (ty_bits_u64 narrow)))) + (sextend wide x)) + +;; If `k` is smaller than the difference in bit widths of the two types, then +;; the intermediate sign bit comes from the extend op, so the final result is +;; the same as the original extend op. +(rule (simplify (sshr wide + (ishl wide + x @ (uextend wide (value_type narrow)) + (iconst _ shift)) + (iconst _ shift))) + (if-let (u64_from_imm64 shift_u64) shift) + (if-let $true (u64_lt shift_u64 (u64_sub (ty_bits_u64 wide) (ty_bits_u64 narrow)))) + x) + +;; If the original extend op was `sextend`, then both of the above cases say +;; the result should also be `sextend`. +(rule (simplify (sshr wide + (ishl wide + x @ (sextend wide (value_type narrow)) + (iconst _ shift)) + (iconst _ shift))) + (if-let (u64_from_imm64 shift_u64) shift) + (if-let $true (u64_le shift_u64 (u64_sub (ty_bits_u64 wide) (ty_bits_u64 narrow)))) + x) diff --git a/cranelift/filetests/filetests/egraph/algebraic.clif b/cranelift/filetests/filetests/egraph/algebraic.clif deleted file mode 100644 index faf5998e16..0000000000 --- a/cranelift/filetests/filetests/egraph/algebraic.clif +++ /dev/null @@ -1,497 +0,0 @@ -test optimize -set opt_level=speed -target x86_64 - -function %f0(i32) -> i32 { -block0(v0: i32): - v1 = iconst.i32 2 - v2 = imul v0, v1 - ; check: v5 = ishl v0, v4 ; v4 = 1 - ; check: return v5 - return v2 -} - -function %f1() -> i64 { -block0: - v0 = iconst.i32 0xffff_ffff_9876_5432 - v1 = uextend.i64 v0 - return v1 - ; check: v2 = iconst.i64 0x9876_5432 - ; check: return v2 ; v2 = 0x9876_5432 -} - -function %unsigned_shift_right_shift_left_i8(i8) -> i8 { -block0(v0: i8): - v1 = iconst.i8 5 - v2 = ushr v0, v1 - v3 = ishl v2, v1 - return v3 - ; check: v4 = iconst.i8 224 - ; check: v5 = band v0, v4 - ; check: return v5 -} - -function %unsigned_shift_right_shift_left_i32(i32) -> i32 { -block0(v0: i32): - v1 = iconst.i32 5 - v2 = ushr v0, v1 - v3 = ishl v2, v1 - return v3 - ; check: v4 = iconst.i32 0xffff_ffe0 - ; check: v5 = band v0, v4 - ; check: return v5 -} - -function %unsigned_shift_right_shift_left_i64(i64) -> i64 { -block0(v0: i64): - v1 = iconst.i64 5 - v2 = ushr v0, v1 - v3 = ishl v2, v1 - return v3 - ; check: v4 = iconst.i64 -32 - ; check: v5 = band v0, v4 - ; check: return v5 -} - -function %signed_shift_right_shift_left_i8(i8) -> i8 { -block0(v0: i8): - v1 = iconst.i8 5 - v2 = sshr v0, v1 - v3 = ishl v2, v1 - return v3 - ; check: v4 = iconst.i8 224 - ; check: v5 = band v0, v4 - ; check: return v5 -} - -function %signed_shift_right_shift_left_i32(i32) -> i32 { -block0(v0: i32): - v1 = iconst.i32 5 - v2 = sshr v0, v1 - v3 = ishl v2, v1 - return v3 - ; check: v4 = iconst.i32 0xffff_ffe0 - ; check: v5 = band v0, v4 - ; check: return v5 -} - -function %signed_shift_right_shift_left_i64(i64) -> i64 { -block0(v0: i64): - v1 = iconst.i64 5 - v2 = sshr v0, v1 - v3 = ishl v2, v1 - return v3 - ; check: v4 = iconst.i64 -32 - ; check: v5 = band v0, v4 - ; check: return v5 -} - -function %signed_shift_right_shift_left_i8_mask_rhs(i8) -> i8 { -block0(v0: i8): - v1 = iconst.i8 0xf5 - v2 = sshr v0, v1 - v3 = ishl v2, v1 - return v3 - ; check: v4 = iconst.i8 224 - ; check: v5 = band v0, v4 - ; check: return v5 -} - -function %sextend_shift_32_64_unsigned(i32) -> i64 { -block0(v0: i32): - v1 = iconst.i8 32 - v2 = sextend.i64 v0 - v3 = ishl v2, v1 - v4 = ushr v3, v1 - return v4 - ; check: v7 = uextend.i64 v0 - ; check: return v7 -} - -function %sextend_shift_32_64_signed(i32) -> i64 { -block0(v0: i32): - v1 = iconst.i8 32 - v2 = sextend.i64 v0 - v3 = ishl v2, v1 - v4 = sshr v3, v1 - return v4 - ; check: return v2 -} - -function %sextend_undershift_32_64_unsigned(i32) -> i64 { -block0(v0: i32): - v1 = iconst.i8 31 - v2 = sextend.i64 v0 - v3 = ishl v2, v1 - v4 = ushr v3, v1 - return v4 - ; check: v5 = iconst.i64 0x0001_ffff_ffff - ; check: v6 = band v2, v5 - ; check: return v6 -} - -function %sextend_undershift_32_64_signed(i32) -> i64 { -block0(v0: i32): - v1 = iconst.i8 31 - v2 = sextend.i64 v0 - v3 = ishl v2, v1 - v4 = sshr v3, v1 - return v4 - ; check: return v2 -} - -function %sextend_shift_8_64_unsigned(i8) -> i64 { -block0(v0: i8): - v1 = iconst.i8 56 - v2 = sextend.i64 v0 - v3 = ishl v2, v1 - v4 = ushr v3, v1 - return v4 - ; check: v7 = uextend.i64 v0 - ; check: return v7 -} - -function %sextend_shift_8_64_signed(i8) -> i64 { -block0(v0: i8): - v1 = iconst.i8 56 - v2 = sextend.i64 v0 - v3 = ishl v2, v1 - v4 = sshr v3, v1 - return v4 - ; check: return v2 -} - -function %uextend_shift_32_64_unsigned(i32) -> i64 { -block0(v0: i32): - v1 = iconst.i8 32 - v2 = uextend.i64 v0 - v3 = ishl v2, v1 - v4 = ushr v3, v1 - return v4 - ; check: return v2 -} - -function %uextend_shift_32_64_signed(i32) -> i64 { -block0(v0: i32): - v1 = iconst.i8 32 - v2 = uextend.i64 v0 - v3 = ishl v2, v1 - v4 = sshr v3, v1 - return v4 - ; check: v5 = sextend.i64 v0 - ; check: return v5 -} - -function %uextend_undershift_32_64_unsigned(i32) -> i64 { -block0(v0: i32): - v1 = iconst.i8 31 - v2 = uextend.i64 v0 - v3 = ishl v2, v1 - v4 = ushr v3, v1 - return v4 - ; check: return v2 -} - -function %uextend_undershift_32_64_signed(i32) -> i64 { -block0(v0: i32): - v1 = iconst.i8 31 - v2 = uextend.i64 v0 - v3 = ishl v2, v1 - v4 = sshr v3, v1 - return v4 - ; check: return v2 -} - -function %uextend_shift_8_64_unsigned(i8) -> i64 { -block0(v0: i8): - v1 = iconst.i8 56 - v2 = uextend.i64 v0 - v3 = ishl v2, v1 - v4 = ushr v3, v1 - return v4 - ; check: return v2 -} - -function %uextend_shift_8_64_signed(i8) -> i64 { -block0(v0: i8): - v1 = iconst.i8 56 - v2 = uextend.i64 v0 - v3 = ishl v2, v1 - v4 = sshr v3, v1 - return v4 - ; check: v5 = sextend.i64 v0 - ; check: return v5 -} - -function %double_ineg(i32) -> i32 { -block0(v0: i32): - v1 = ineg v0 - v2 = ineg v1 - return v2 - ; check: return v0 -} - -function %imul_ineg_cancel(i32, i32) -> i32 { -block0(v0: i32, v1: i32): - v2 = ineg v0 - v3 = ineg v1 - v4 = imul v2, v3 - return v4 - ; check: v5 = imul v0, v1 - ; check: return v5 -} - -function %iabs_ineg(i32) -> i32 { -block0(v0: i32): - v1 = ineg v0 - v2 = iabs v1 - return v2 - ; check: v3 = iabs v0 - ; check: return v3 -} - -function %iabs_iabs(i32) -> i32 { -block0(v0: i32): - v1 = iabs v0 - v2 = iabs v1 - return v2 - ; check: return v1 -} - -function %isub_self(i32) -> i32 { -block0(v0: i32): - v1 = isub v0, v0 - return v1 - ; check: v2 = iconst.i32 0 - ; check: return v2 -} - -function %mul_minus_one(i32) -> i32 { -block0(v0: i32): - v1 = iconst.i32 0xffff_ffff ; -1 - v2 = imul v0, v1 - return v2 - ; check: v3 = ineg v0 - ; check: v4 -> v3 - ; check: return v3 -} - -function %mul_minus_one_commuted(i32) -> i32 { -block0(v0: i32): - v1 = iconst.i32 0xffff_ffff ; -1 - v2 = imul v1, v0 - return v2 - ; check: v3 = ineg v0 - ; check: v5 -> v3 - ; check: v6 -> v3 - ; check: return v3 -} - -function %or_and_y_with_not_y_i8(i8, i8) -> i8 { -block0(v0: i8, v1: i8): - v2 = band v0, v1 - v3 = bnot v1 - v4 = bor v2, v3 - return v4 - ; check: v5 = bor v0, v3 - ; check: return v5 -} - -function %or_and_constant_with_not_constant_i8(i8) -> i8 { -block0(v0: i8): - v1 = iconst.i8 -4 - v2 = band v0, v1 - v3 = iconst.i8 3 - v4 = bor v2, v3 - return v4 - ; check: v5 = bor v0, v3 - ; check: return v5 -} - -function %or_and_y_with_not_y_i8(i8, i8) -> i8 { -block0(v0: i8, v1: i8): - v2 = band v0, v1 - v3 = bnot v1 - v4 = bor v3, v2 - return v4 - ; check: v5 = bor v0, v3 - ; check: return v5 -} - -function %or_and_constant_with_not_constant_i8(i8) -> i8 { -block0(v0: i8): - v1 = iconst.i8 -4 - v2 = band v0, v1 - v3 = iconst.i8 3 - v4 = bor v3, v2 - return v4 - ; check: v6 = bor v0, v3 - ; check: return v6 -} - -function %or_and_constant_with_any_constant_should_not_apply_rule_i8(i8) -> i8 { -block0(v0: i8): - v1 = iconst.i8 -4 - v2 = band v0, v1 - ;; `v3` is not `bnot(v1)` so the rewrite should not apply. - v3 = iconst.i8 -5 - v4 = bor v2, v3 - return v4 - ; check: return v4 -} - -function %or_and_y_with_not_y_i64(i64, i64) -> i64 { -block0(v0: i64, v1: i64): - v2 = band v0, v1 - v3 = bnot v1 - v4 = bor v2, v3 - return v4 - ; check: v5 = bor v0, v3 - ; check: return v5 -} - -function %or_and_constant_with_not_constant_i64(i64) -> i64 { -block0(v0: i64): - v1 = iconst.i64 -4 - v2 = band v0, v1 - v3 = iconst.i64 3 - v4 = bor v2, v3 - return v4 - ; check: v5 = bor v0, v3 - ; check: return v5 -} - -function %or_and_y_with_not_y_i64(i64, i64) -> i64 { -block0(v0: i64, v1: i64): - v2 = band v0, v1 - v3 = bnot v1 - v4 = bor v3, v2 - return v4 - ; check: v5 = bor v0, v3 - ; check: return v5 -} - -function %or_and_constant_with_not_constant_i64(i64) -> i64 { -block0(v0: i64): - v1 = iconst.i64 -4 - v2 = band v0, v1 - v3 = iconst.i64 3 - v4 = bor v3, v2 - return v4 - ; check: v6 = bor v0, v3 - ; check: return v6 -} - -function %or_and_constant_with_any_constant_should_not_apply_rule_i64(i64) -> i64 { -block0(v0: i64): - v1 = iconst.i64 -4 - v2 = band v0, v1 - ;; `v3` is not `bnot(v1)` so the rewrite should not apply. - v3 = iconst.i64 -5 - v4 = bor v2, v3 - return v4 - ; check: return v4 -} - -function %bnot1(i8) -> i8 { -block0(v1: i8): - v2 = iconst.i8 -1 - v3 = bxor v1, v2 - return v3 -} - -; check: v4 = bnot v1 -; check: return v4 - -function %bnot2(i64) -> i64 { -block0(v1: i64): - v2 = iconst.i64 -1 - v3 = bxor v1, v2 - return v3 -} - -; check: v4 = bnot v1 -; check: return v4 - -function %bnot3(i64) -> i64 { -block0(v1: i64): - v2 = iconst.i64 -1 - v3 = bxor v2, v1 - return v3 -} - -; check: v5 = bnot v1 -; check: return v5 - -function %extend_always_above_zero(i32) -> i8 { -block0(v1: i32): - v2 = uextend.i64 v1 - v3 = iconst.i64 0 - v4 = icmp slt v2, v3 - return v4 -} - -; check: v5 = iconst.i8 0 -; check: return v5 - -function %extend_always_above_zero2(i32) -> i8 { -block0(v1: i32): - v2 = uextend.i64 v1 - v3 = iconst.i64 0 - v4 = icmp sge v2, v3 - return v4 -} - -; check: v5 = iconst.i8 1 -; check: return v5 - -function %double_uextend(i16) -> i64 { -block0(v1: i16): - v2 = uextend.i32 v1 - v3 = uextend.i64 v2 - return v3 -} - -; check: v4 = uextend.i64 v1 -; check: return v4 - -function %double_sextend(i16) -> i64 { -block0(v1: i16): - v2 = sextend.i32 v1 - v3 = sextend.i64 v2 - return v3 -} - -; check: v4 = sextend.i64 v1 -; check: return v4 - -function %double_fneg(f32) -> f32 { -block0(v1: f32): - v2 = fneg v1 - v3 = fneg v2 - return v3 -} - -; check: return v1 - -function %fma_double_fneg(f32, f32, f32) -> f32 { -block0(v1: f32, v2: f32, v3: f32): - v4 = fneg v1 - v5 = fneg v2 - v6 = fma v4, v5, v3 - return v6 -} - -; check: v7 = fma v1, v2, v3 -; check: return v7 - -function %fmul_double_fneg(f32, f32) -> f32 { -block0(v1: f32, v2: f32): - v3 = fneg v1 - v4 = fneg v2 - v5 = fmul v3, v4 - return v5 -} - -; check: v6 = fmul v1, v2 -; check: return v6 diff --git a/cranelift/filetests/filetests/egraph/arithmetic.clif b/cranelift/filetests/filetests/egraph/arithmetic.clif new file mode 100644 index 0000000000..7a014954db --- /dev/null +++ b/cranelift/filetests/filetests/egraph/arithmetic.clif @@ -0,0 +1,105 @@ +test optimize +set opt_level=speed +target x86_64 + +function %f0(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 2 + v2 = imul v0, v1 + ; check: v5 = ishl v0, v4 ; v4 = 1 + ; check: return v5 + return v2 +} + +function %double_ineg(i32) -> i32 { +block0(v0: i32): + v1 = ineg v0 + v2 = ineg v1 + return v2 + ; check: return v0 +} + +function %imul_ineg_cancel(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = ineg v0 + v3 = ineg v1 + v4 = imul v2, v3 + return v4 + ; check: v5 = imul v0, v1 + ; check: return v5 +} + +function %iabs_ineg(i32) -> i32 { +block0(v0: i32): + v1 = ineg v0 + v2 = iabs v1 + return v2 + ; check: v3 = iabs v0 + ; check: return v3 +} + +function %iabs_iabs(i32) -> i32 { +block0(v0: i32): + v1 = iabs v0 + v2 = iabs v1 + return v2 + ; check: return v1 +} + +function %isub_self(i32) -> i32 { +block0(v0: i32): + v1 = isub v0, v0 + return v1 + ; check: v2 = iconst.i32 0 + ; check: return v2 +} + +function %mul_minus_one(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 0xffff_ffff ; -1 + v2 = imul v0, v1 + return v2 + ; check: v3 = ineg v0 + ; check: v4 -> v3 + ; check: return v3 +} + +function %mul_minus_one_commuted(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 0xffff_ffff ; -1 + v2 = imul v1, v0 + return v2 + ; check: v3 = ineg v0 + ; check: return v3 +} + +function %double_fneg(f32) -> f32 { +block0(v1: f32): + v2 = fneg v1 + v3 = fneg v2 + return v3 +} + +; check: return v1 + +function %fma_double_fneg(f32, f32, f32) -> f32 { +block0(v1: f32, v2: f32, v3: f32): + v4 = fneg v1 + v5 = fneg v2 + v6 = fma v4, v5, v3 + return v6 +} + +; check: v7 = fma v1, v2, v3 +; check: return v7 + +function %fmul_double_fneg(f32, f32) -> f32 { +block0(v1: f32, v2: f32): + v3 = fneg v1 + v4 = fneg v2 + v5 = fmul v3, v4 + return v5 +} + +; check: v6 = fmul v1, v2 +; check: return v6 diff --git a/cranelift/filetests/filetests/egraph/bitops.clif b/cranelift/filetests/filetests/egraph/bitops.clif new file mode 100644 index 0000000000..88964da5ae --- /dev/null +++ b/cranelift/filetests/filetests/egraph/bitops.clif @@ -0,0 +1,139 @@ +test optimize +set opt_level=speed +target x86_64 + +function %or_and_y_with_not_y_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = band v0, v1 + v3 = bnot v1 + v4 = bor v2, v3 + return v4 + ; check: v5 = bor v0, v3 + ; check: return v5 +} + +function %or_and_constant_with_not_constant_i8(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i8 -4 + v2 = band v0, v1 + v3 = iconst.i8 3 + v4 = bor v2, v3 + return v4 + ; check: v5 = bor v0, v3 + ; check: return v5 +} + +function %or_and_y_with_not_y_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = band v0, v1 + v3 = bnot v1 + v4 = bor v3, v2 + return v4 + ; check: v5 = bor v0, v3 + ; check: return v5 +} + +function %or_and_constant_with_not_constant_i8(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i8 -4 + v2 = band v0, v1 + v3 = iconst.i8 3 + v4 = bor v3, v2 + return v4 + ; check: v6 = bor v0, v3 + ; check: return v6 +} + +function %or_and_constant_with_any_constant_should_not_apply_rule_i8(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i8 -4 + v2 = band v0, v1 + ;; `v3` is not `bnot(v1)` so the rewrite should not apply. + v3 = iconst.i8 -5 + v4 = bor v2, v3 + return v4 + ; check: return v4 +} + +function %or_and_y_with_not_y_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = band v0, v1 + v3 = bnot v1 + v4 = bor v2, v3 + return v4 + ; check: v5 = bor v0, v3 + ; check: return v5 +} + +function %or_and_constant_with_not_constant_i64(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 -4 + v2 = band v0, v1 + v3 = iconst.i64 3 + v4 = bor v2, v3 + return v4 + ; check: v5 = bor v0, v3 + ; check: return v5 +} + +function %or_and_y_with_not_y_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = band v0, v1 + v3 = bnot v1 + v4 = bor v3, v2 + return v4 + ; check: v5 = bor v0, v3 + ; check: return v5 +} + +function %or_and_constant_with_not_constant_i64(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 -4 + v2 = band v0, v1 + v3 = iconst.i64 3 + v4 = bor v3, v2 + return v4 + ; check: v6 = bor v0, v3 + ; check: return v6 +} + +function %or_and_constant_with_any_constant_should_not_apply_rule_i64(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 -4 + v2 = band v0, v1 + ;; `v3` is not `bnot(v1)` so the rewrite should not apply. + v3 = iconst.i64 -5 + v4 = bor v2, v3 + return v4 + ; check: return v4 +} + +function %bnot1(i8) -> i8 { +block0(v1: i8): + v2 = iconst.i8 -1 + v3 = bxor v1, v2 + return v3 +} + +; check: v4 = bnot v1 +; check: return v4 + +function %bnot2(i64) -> i64 { +block0(v1: i64): + v2 = iconst.i64 -1 + v3 = bxor v1, v2 + return v3 +} + +; check: v4 = bnot v1 +; check: return v4 + +function %bnot3(i64) -> i64 { +block0(v1: i64): + v2 = iconst.i64 -1 + v3 = bxor v2, v1 + return v3 +} + +; check: v5 = bnot v1 +; check: return v5 diff --git a/cranelift/filetests/filetests/egraph/extends.clif b/cranelift/filetests/filetests/egraph/extends.clif new file mode 100644 index 0000000000..bfc9876044 --- /dev/null +++ b/cranelift/filetests/filetests/egraph/extends.clif @@ -0,0 +1,55 @@ +test optimize +set opt_level=speed +target x86_64 + +function %f1() -> i64 { +block0: + v0 = iconst.i32 0xffff_ffff_9876_5432 + v1 = uextend.i64 v0 + return v1 + ; check: v2 = iconst.i64 0x9876_5432 + ; check: return v2 ; v2 = 0x9876_5432 +} + + +function %extend_always_above_zero(i32) -> i8 { +block0(v1: i32): + v2 = uextend.i64 v1 + v3 = iconst.i64 0 + v4 = icmp slt v2, v3 + return v4 +} + +; check: v5 = iconst.i8 0 +; check: return v5 + +function %extend_always_above_zero2(i32) -> i8 { +block0(v1: i32): + v2 = uextend.i64 v1 + v3 = iconst.i64 0 + v4 = icmp sge v2, v3 + return v4 +} + +; check: v5 = iconst.i8 1 +; check: return v5 + +function %double_uextend(i16) -> i64 { +block0(v1: i16): + v2 = uextend.i32 v1 + v3 = uextend.i64 v2 + return v3 +} + +; check: v4 = uextend.i64 v1 +; check: return v4 + +function %double_sextend(i16) -> i64 { +block0(v1: i16): + v2 = sextend.i32 v1 + v3 = sextend.i64 v2 + return v3 +} + +; check: v4 = sextend.i64 v1 +; check: return v4 diff --git a/cranelift/filetests/filetests/egraph/shifts.clif b/cranelift/filetests/filetests/egraph/shifts.clif new file mode 100644 index 0000000000..f03d2d41fa --- /dev/null +++ b/cranelift/filetests/filetests/egraph/shifts.clif @@ -0,0 +1,206 @@ +test optimize +set opt_level=speed +target x86_64 + +function %unsigned_shift_right_shift_left_i8(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i8 5 + v2 = ushr v0, v1 + v3 = ishl v2, v1 + return v3 + ; check: v4 = iconst.i8 224 + ; check: v5 = band v0, v4 + ; check: return v5 +} + +function %unsigned_shift_right_shift_left_i32(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 5 + v2 = ushr v0, v1 + v3 = ishl v2, v1 + return v3 + ; check: v4 = iconst.i32 0xffff_ffe0 + ; check: v5 = band v0, v4 + ; check: return v5 +} + +function %unsigned_shift_right_shift_left_i64(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 5 + v2 = ushr v0, v1 + v3 = ishl v2, v1 + return v3 + ; check: v4 = iconst.i64 -32 + ; check: v5 = band v0, v4 + ; check: return v5 +} + +function %signed_shift_right_shift_left_i8(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i8 5 + v2 = sshr v0, v1 + v3 = ishl v2, v1 + return v3 + ; check: v4 = iconst.i8 224 + ; check: v5 = band v0, v4 + ; check: return v5 +} + +function %signed_shift_right_shift_left_i32(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 5 + v2 = sshr v0, v1 + v3 = ishl v2, v1 + return v3 + ; check: v4 = iconst.i32 0xffff_ffe0 + ; check: v5 = band v0, v4 + ; check: return v5 +} + +function %signed_shift_right_shift_left_i64(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 5 + v2 = sshr v0, v1 + v3 = ishl v2, v1 + return v3 + ; check: v4 = iconst.i64 -32 + ; check: v5 = band v0, v4 + ; check: return v5 +} + +function %signed_shift_right_shift_left_i8_mask_rhs(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i8 0xf5 + v2 = sshr v0, v1 + v3 = ishl v2, v1 + return v3 + ; check: v4 = iconst.i8 224 + ; check: v5 = band v0, v4 + ; check: return v5 +} + +function %sextend_shift_32_64_unsigned(i32) -> i64 { +block0(v0: i32): + v1 = iconst.i8 32 + v2 = sextend.i64 v0 + v3 = ishl v2, v1 + v4 = ushr v3, v1 + return v4 + ; check: v7 = uextend.i64 v0 + ; check: return v7 +} + +function %sextend_shift_32_64_signed(i32) -> i64 { +block0(v0: i32): + v1 = iconst.i8 32 + v2 = sextend.i64 v0 + v3 = ishl v2, v1 + v4 = sshr v3, v1 + return v4 + ; check: return v2 +} + +function %sextend_undershift_32_64_unsigned(i32) -> i64 { +block0(v0: i32): + v1 = iconst.i8 31 + v2 = sextend.i64 v0 + v3 = ishl v2, v1 + v4 = ushr v3, v1 + return v4 + ; check: v5 = iconst.i64 0x0001_ffff_ffff + ; check: v6 = band v2, v5 + ; check: return v6 +} + +function %sextend_undershift_32_64_signed(i32) -> i64 { +block0(v0: i32): + v1 = iconst.i8 31 + v2 = sextend.i64 v0 + v3 = ishl v2, v1 + v4 = sshr v3, v1 + return v4 + ; check: return v2 +} + +function %sextend_shift_8_64_unsigned(i8) -> i64 { +block0(v0: i8): + v1 = iconst.i8 56 + v2 = sextend.i64 v0 + v3 = ishl v2, v1 + v4 = ushr v3, v1 + return v4 + ; check: v7 = uextend.i64 v0 + ; check: return v7 +} + +function %sextend_shift_8_64_signed(i8) -> i64 { +block0(v0: i8): + v1 = iconst.i8 56 + v2 = sextend.i64 v0 + v3 = ishl v2, v1 + v4 = sshr v3, v1 + return v4 + ; check: return v2 +} + +function %uextend_shift_32_64_unsigned(i32) -> i64 { +block0(v0: i32): + v1 = iconst.i8 32 + v2 = uextend.i64 v0 + v3 = ishl v2, v1 + v4 = ushr v3, v1 + return v4 + ; check: return v2 +} + +function %uextend_shift_32_64_signed(i32) -> i64 { +block0(v0: i32): + v1 = iconst.i8 32 + v2 = uextend.i64 v0 + v3 = ishl v2, v1 + v4 = sshr v3, v1 + return v4 + ; check: v5 = sextend.i64 v0 + ; check: return v5 +} + +function %uextend_undershift_32_64_unsigned(i32) -> i64 { +block0(v0: i32): + v1 = iconst.i8 31 + v2 = uextend.i64 v0 + v3 = ishl v2, v1 + v4 = ushr v3, v1 + return v4 + ; check: return v2 +} + +function %uextend_undershift_32_64_signed(i32) -> i64 { +block0(v0: i32): + v1 = iconst.i8 31 + v2 = uextend.i64 v0 + v3 = ishl v2, v1 + v4 = sshr v3, v1 + return v4 + ; check: return v2 +} + +function %uextend_shift_8_64_unsigned(i8) -> i64 { +block0(v0: i8): + v1 = iconst.i8 56 + v2 = uextend.i64 v0 + v3 = ishl v2, v1 + v4 = ushr v3, v1 + return v4 + ; check: return v2 +} + +function %uextend_shift_8_64_signed(i8) -> i64 { +block0(v0: i8): + v1 = iconst.i8 56 + v2 = uextend.i64 v0 + v3 = ishl v2, v1 + v4 = sshr v3, v1 + return v4 + ; check: v5 = sextend.i64 v0 + ; check: return v5 +}