From 244dce93f67b717cb6e77dc8d74d92052be165d8 Mon Sep 17 00:00:00 2001 From: Chris Fallin Date: Fri, 9 Dec 2022 14:29:25 -0800 Subject: [PATCH] Fix optimization rules for narrow types: wrap i8 results to 8 bits. (#5409) * Fix optimization rules for narrow types: wrap i8 results to 8 bits. This fixes #5405. In the egraph mid-end's optimization rules, we were rewriting e.g. imuls of two iconsts to an iconst of the result, but without masking off the high bits (beyond the result type's width). This was producing iconsts with set high bits beyond their types' width, which is not legal. In addition, this PR adds some optimizations to the algebraic rules to recognize e.g. `x == x` (and all other integer comparison operators) and resolve to 1 or 0 as appropriate. * Review feedback. * Review feedback, again. --- cranelift/codegen/src/isle_prelude.rs | 10 +++++ cranelift/codegen/src/opts/algebraic.isle | 33 ++++++++++++++ cranelift/codegen/src/opts/cprop.isle | 29 ++++++------- cranelift/codegen/src/prelude.isle | 4 ++ .../filetests/filetests/egraph/cprop.clif | 43 +++++++++++++++++++ .../filetests/egraph/issue-5405.clif | 16 +++++++ 6 files changed, 120 insertions(+), 15 deletions(-) create mode 100644 cranelift/filetests/filetests/egraph/cprop.clif create mode 100644 cranelift/filetests/filetests/egraph/issue-5405.clif diff --git a/cranelift/codegen/src/isle_prelude.rs b/cranelift/codegen/src/isle_prelude.rs index 23270f0c49..58f42f4868 100644 --- a/cranelift/codegen/src/isle_prelude.rs +++ b/cranelift/codegen/src/isle_prelude.rs @@ -509,6 +509,16 @@ macro_rules! isle_common_prelude_methods { Imm64::new(x as i64) } + #[inline] + fn imm64_masked(&mut self, ty: Type, x: u64) -> Imm64 { + debug_assert!(ty.bits() <= 64); + // Careful: we can't do `(1 << bits) - 1` because that + // would overflow for `bits == 64`. Instead, + // right-shift an all-ones mask. + let mask = u64::MAX >> (64 - ty.bits()); + Imm64::new((x & mask) as i64) + } + #[inline] fn simm32(&mut self, x: Imm64) -> Option { let x64: i64 = x.into(); diff --git a/cranelift/codegen/src/opts/algebraic.isle b/cranelift/codegen/src/opts/algebraic.isle index e4638222fe..d407474131 100644 --- a/cranelift/codegen/src/opts/algebraic.isle +++ b/cranelift/codegen/src/opts/algebraic.isle @@ -207,3 +207,36 @@ (rule (simplify (select ty (uextend _ c @ (icmp _ _ _ _)) x y)) (select ty c x y)) + +;; `x == x` is always true for integers; `x != x` is false. Strict +;; inequalities are false, and loose inequalities are true. +(rule (simplify + (icmp ty (IntCC.Equal) x x)) + (iconst ty (imm64 1))) +(rule (simplify + (icmp ty (IntCC.NotEqual) x x)) + (iconst ty (imm64 0))) +(rule (simplify + (icmp ty (IntCC.UnsignedGreaterThan) x x)) + (iconst ty (imm64 0))) +(rule (simplify + (icmp ty (IntCC.UnsignedGreaterThanOrEqual) x x)) + (iconst ty (imm64 1))) +(rule (simplify + (icmp ty (IntCC.SignedGreaterThan) x x)) + (iconst ty (imm64 0))) +(rule (simplify + (icmp ty (IntCC.SignedGreaterThanOrEqual) x x)) + (iconst ty (imm64 1))) +(rule (simplify + (icmp ty (IntCC.UnsignedLessThan) x x)) + (iconst ty (imm64 0))) +(rule (simplify + (icmp ty (IntCC.UnsignedLessThanOrEqual) x x)) + (iconst ty (imm64 1))) +(rule (simplify + (icmp ty (IntCC.SignedLessThan) x x)) + (iconst ty (imm64 0))) +(rule (simplify + (icmp ty (IntCC.SignedLessThanOrEqual) x x)) + (iconst ty (imm64 1))) diff --git a/cranelift/codegen/src/opts/cprop.isle b/cranelift/codegen/src/opts/cprop.isle index ac6ce778e0..d7ba06dcf6 100644 --- a/cranelift/codegen/src/opts/cprop.isle +++ b/cranelift/codegen/src/opts/cprop.isle @@ -4,56 +4,56 @@ (iadd (fits_in_64 ty) (iconst ty (u64_from_imm64 k1)) (iconst ty (u64_from_imm64 k2)))) - (subsume (iconst ty (imm64 (u64_add k1 k2))))) + (subsume (iconst ty (imm64_masked ty (u64_add k1 k2))))) (rule (simplify (isub (fits_in_64 ty) (iconst ty (u64_from_imm64 k1)) (iconst ty (u64_from_imm64 k2)))) - (subsume (iconst ty (imm64 (u64_sub k1 k2))))) + (subsume (iconst ty (imm64_masked ty (u64_sub k1 k2))))) (rule (simplify (imul (fits_in_64 ty) (iconst ty (u64_from_imm64 k1)) (iconst ty (u64_from_imm64 k2)))) - (subsume (iconst ty (imm64 (u64_mul k1 k2))))) + (subsume (iconst ty (imm64_masked ty (u64_mul k1 k2))))) (rule (simplify (sdiv (fits_in_64 ty) (iconst ty (u64_from_imm64 k1)) (iconst ty (u64_from_imm64 k2)))) (if-let d (u64_sdiv k1 k2)) - (subsume (iconst ty (imm64 d)))) + (subsume (iconst ty (imm64_masked ty d)))) (rule (simplify (udiv (fits_in_64 ty) (iconst ty (u64_from_imm64 k1)) (iconst ty (u64_from_imm64 k2)))) (if-let d (u64_udiv k1 k2)) - (subsume (iconst ty (imm64 d)))) + (subsume (iconst ty (imm64_masked ty d)))) (rule (simplify (bor (fits_in_64 ty) (iconst ty (u64_from_imm64 k1)) (iconst ty (u64_from_imm64 k2)))) - (subsume (iconst ty (imm64 (u64_or k1 k2))))) + (subsume (iconst ty (imm64_masked ty (u64_or k1 k2))))) (rule (simplify (band (fits_in_64 ty) (iconst ty (u64_from_imm64 k1)) (iconst ty (u64_from_imm64 k2)))) - (subsume (iconst ty (imm64 (u64_and k1 k2))))) + (subsume (iconst ty (imm64_masked ty (u64_and k1 k2))))) (rule (simplify (bxor (fits_in_64 ty) (iconst ty (u64_from_imm64 k1)) (iconst ty (u64_from_imm64 k2)))) - (subsume (iconst ty (imm64 (u64_xor k1 k2))))) + (subsume (iconst ty (imm64_masked ty (u64_xor k1 k2))))) (rule (simplify (bnot (fits_in_64 ty) (iconst ty (u64_from_imm64 k)))) - (subsume (iconst ty (imm64 (u64_not k))))) + (subsume (iconst ty (imm64_masked ty (u64_not k))))) ;; Canonicalize via commutativity: push immediates to the right. ;; @@ -99,23 +99,23 @@ (rule (simplify (isub ty (isub ty x (iconst ty (u64_from_imm64 k1))) (iconst ty (u64_from_imm64 k2)))) - (isub ty x (iconst ty (imm64 (u64_add k1 k2))))) + (isub ty x (iconst ty (imm64_masked ty (u64_add k1 k2))))) (rule (simplify (isub ty (isub ty (iconst ty (u64_from_imm64 k1)) x) (iconst ty (u64_from_imm64 k2)))) - (isub ty (iconst ty (imm64 (u64_sub k1 k2))) x)) + (isub ty (iconst ty (imm64_masked ty (u64_sub k1 k2))) x)) (rule (simplify (isub ty (iadd ty x (iconst ty (u64_from_imm64 k1))) (iconst ty (u64_from_imm64 k2)))) - (isub ty x (iconst ty (imm64 (u64_sub k2 k1))))) + (isub ty x (iconst ty (imm64_masked ty (u64_sub k2 k1))))) (rule (simplify (iadd ty (isub ty x (iconst ty (u64_from_imm64 k1))) (iconst ty (u64_from_imm64 k2)))) - (iadd ty x (iconst ty (imm64 (u64_sub k2 k1))))) + (iadd ty x (iconst ty (imm64_masked ty (u64_sub k2 k1))))) (rule (simplify (iadd ty (isub ty (iconst ty (u64_from_imm64 k1)) x) (iconst ty (u64_from_imm64 k2)))) - (isub ty (iconst ty (imm64 (u64_add k1 k2))) x)) + (isub ty (iconst ty (imm64_masked ty (u64_add k1 k2))) x)) (rule (simplify (imul ty (imul ty x k1 @ (iconst ty _)) k2 @ (iconst ty _))) @@ -138,4 +138,3 @@ y) ;; TODO: fadd, fsub, fmul, fdiv, fneg, fabs - diff --git a/cranelift/codegen/src/prelude.isle b/cranelift/codegen/src/prelude.isle index 0f17b6b442..5a0aa3376c 100644 --- a/cranelift/codegen/src/prelude.isle +++ b/cranelift/codegen/src/prelude.isle @@ -349,6 +349,10 @@ (decl pure imm64 (u64) Imm64) (extern constructor imm64 imm64) +;; Create a new Imm64, masked to the width of the given type. +(decl pure imm64_masked (Type u64) Imm64) +(extern constructor imm64_masked imm64_masked) + ;; Extract a `u64` from an `Ieee32`. (decl u64_from_ieee32 (u64) Ieee32) (extern extractor infallible u64_from_ieee32 u64_from_ieee32) diff --git a/cranelift/filetests/filetests/egraph/cprop.clif b/cranelift/filetests/filetests/egraph/cprop.clif new file mode 100644 index 0000000000..bc76e1d546 --- /dev/null +++ b/cranelift/filetests/filetests/egraph/cprop.clif @@ -0,0 +1,43 @@ +test optimize +set opt_level=none +set use_egraphs=true +target x86_64 + +function %f0() -> i8 { +block0: + v1 = iconst.i8 51 + v2 = imul.i8 v1, v1 + return v2 +} + +; check: v9 = iconst.i8 41 +; nextln: return v9 + +function %f1() -> i16 { +block0: + v1 = iconst.i16 1 + v2 = bnot.i16 v1 + return v2 +} + +; check: v3 = iconst.i16 0xfffe +; nextln: return v3 + +function %f2(i8) -> i8 { +block0(v1: i8): + v2 = icmp eq v1, v1 + return v2 +} + +; check: v3 = iconst.i8 1 +; check: return v3 + +function %f3(i8) -> i8 { +block0(v1: i8): + v2 = icmp ne v1, v1 + return v2 +} + +; check: v3 = iconst.i8 0 +; check: return v3 + diff --git a/cranelift/filetests/filetests/egraph/issue-5405.clif b/cranelift/filetests/filetests/egraph/issue-5405.clif new file mode 100644 index 0000000000..c453875de5 --- /dev/null +++ b/cranelift/filetests/filetests/egraph/issue-5405.clif @@ -0,0 +1,16 @@ +test interpret +test run +set opt_level=speed_and_size +set use_egraphs=true +target aarch64 + +function %a(i64) -> i8 system_v { +block0(v0: i64): + v6 = iconst.i8 51 + v17 = imul v6, v6 ; v6 = 51, v6 = 51 + v18 = icmp eq v17, v17 + v52 = imul v18, v18 + return v52 +} + +; run: %a(129) == 1