From 9e1ff9726cbcea8547873c511a098ab00c96a20a Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Fri, 14 Apr 2023 19:50:48 +0100 Subject: [PATCH] egraphs: Add `bmask` bit pattern optimization rule (#6196) * egraphs: Add a bmask bit pattern optimization * egraphs: Add more `ineg` rules * egraphs: Add sshr rule * egraphs: Simplify bmask rule * egraphs: Add comutative version of bmask rule * egraphs: Add more testcases * egraphs: Cleanup rule comments * egraphs: Add more `ineg` optimizations --- cranelift/codegen/src/opts/arithmetic.isle | 19 ++++++ cranelift/codegen/src/opts/bitops.isle | 12 ++++ cranelift/codegen/src/opts/shifts.isle | 5 ++ .../filetests/egraph/arithmetic.clif | 68 +++++++++++++++++++ .../filetests/filetests/egraph/bitops.clif | 41 +++++++++++ .../filetests/filetests/egraph/shifts.clif | 11 +++ .../filetests/filetests/runtests/bitops.clif | 27 ++++++++ 7 files changed, 183 insertions(+) diff --git a/cranelift/codegen/src/opts/arithmetic.isle b/cranelift/codegen/src/opts/arithmetic.isle index e0e79980e4..00c56c2409 100644 --- a/cranelift/codegen/src/opts/arithmetic.isle +++ b/cranelift/codegen/src/opts/arithmetic.isle @@ -67,6 +67,25 @@ (if-let -1 (i64_sextend_imm64 ty c)) (ineg ty x)) +;; (!x) + 1 == 1 + (!x) == !(x) - (-1) == ineg(x) +(rule (simplify (iadd ty (bnot ty x) (iconst ty (u64_from_imm64 1)))) + (ineg ty x)) +(rule (simplify (iadd ty (iconst ty (u64_from_imm64 1)) (bnot ty x))) + (ineg ty x)) +(rule (simplify (isub ty (bnot ty x) (iconst ty c))) + (if-let -1 (i64_sextend_imm64 ty c)) + (ineg ty x)) + +;; !(x - 1) == !(x + (-1)) == !((-1) + x) == ineg(x) +(rule (simplify (bnot ty (isub ty x (iconst ty (u64_from_imm64 1))))) + (ineg ty x)) +(rule (simplify (bnot ty (iadd ty x (iconst ty c)))) + (if-let -1 (i64_sextend_imm64 ty c)) + (ineg ty x)) +(rule (simplify (bnot ty (iadd ty (iconst ty c) x))) + (if-let -1 (i64_sextend_imm64 ty c)) + (ineg ty x)) + ;; x/1 == x. (rule (simplify (sdiv ty x diff --git a/cranelift/codegen/src/opts/bitops.isle b/cranelift/codegen/src/opts/bitops.isle index bf32251ca0..09836d3a63 100644 --- a/cranelift/codegen/src/opts/bitops.isle +++ b/cranelift/codegen/src/opts/bitops.isle @@ -92,3 +92,15 @@ (rule (simplify (bxor ty x (iconst ty k))) (if-let -1 (i64_sextend_imm64 ty k)) (bnot ty x)) + +;; sshr((x | -x), N) == bmask(x) where N = ty_bits(ty) - 1. +;; +;; (x | -x) sets the sign bit to 1 if x is nonzero, and 0 if x is zero. sshr propagates +;; the sign bit to the rest of the value. +(rule (simplify (sshr ty (bor ty x (ineg ty x)) (iconst ty (u64_from_imm64 shift_amt)))) + (if-let $true (u64_eq shift_amt (u64_sub (ty_bits ty) 1))) + (bmask ty x)) + +(rule (simplify (sshr ty (bor ty (ineg ty x) x) (iconst ty (u64_from_imm64 shift_amt)))) + (if-let $true (u64_eq shift_amt (u64_sub (ty_bits ty) 1))) + (bmask ty x)) diff --git a/cranelift/codegen/src/opts/shifts.isle b/cranelift/codegen/src/opts/shifts.isle index 445f5467c8..44c79a6f8d 100644 --- a/cranelift/codegen/src/opts/shifts.isle +++ b/cranelift/codegen/src/opts/shifts.isle @@ -78,3 +78,8 @@ (if-let (u64_from_imm64 shift_u64) shift) (if-let $true (u64_le shift_u64 (u64_sub (ty_bits_u64 wide) (ty_bits_u64 narrow)))) x) + +;; ineg(ushr(x, k)) == sshr(x, k) when k == ty_bits - 1. +(rule (simplify (ineg ty (ushr ty x sconst @ (iconst ty (u64_from_imm64 shift_amt))))) + (if-let $true (u64_eq shift_amt (u64_sub (ty_bits ty) 1))) + (sshr ty x sconst)) diff --git a/cranelift/filetests/filetests/egraph/arithmetic.clif b/cranelift/filetests/filetests/egraph/arithmetic.clif index 7a014954db..630aebeca4 100644 --- a/cranelift/filetests/filetests/egraph/arithmetic.clif +++ b/cranelift/filetests/filetests/egraph/arithmetic.clif @@ -73,6 +73,74 @@ block0(v0: i32): ; check: return v3 } +function %ineg_not_plus_one(i32) -> i32 { +block0(v0: i32): + v1 = bnot v0 + v2 = iconst.i32 1 + v3 = iadd v1, v2 + return v3 +} + +; check: v4 = ineg v0 +; check: return v4 + +function %ineg_not_plus_one_reverse(i32) -> i32 { +block0(v0: i32): + v1 = bnot v0 + v2 = iconst.i32 1 + v3 = iadd v2, v1 + return v3 +} + +; check: v4 = ineg v0 +; check: return v4 + +function %ineg_not_minus_neg_1(i32) -> i32 { +block0(v0: i32): + v1 = bnot v0 + v2 = iconst.i32 -1 + v3 = isub v1, v2 + return v3 +} + +; check: v4 = ineg v0 +; check: return v4 + +function %ineg_not_sub_one(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 1 + v2 = isub v0, v1 + v3 = bnot v2 + return v3 +} + +; check: v4 = ineg v0 +; check: return v4 + +function %ineg_not_add_neg_one(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 -1 + v2 = iadd v0, v1 + v3 = bnot v2 + return v3 +} + +; check: v4 = ineg v0 +; check: return v4 + +function %ineg_not_add_neg_one_reverse(i32) -> i32 { +block0(v0: i32): + v1 = iconst.i32 -1 + v2 = iadd v1, v0 + v3 = bnot v2 + return v3 +} + +; check: v6 = ineg v0 +; check: return v6 + + + function %double_fneg(f32) -> f32 { block0(v1: f32): v2 = fneg v1 diff --git a/cranelift/filetests/filetests/egraph/bitops.clif b/cranelift/filetests/filetests/egraph/bitops.clif index 88964da5ae..9f02eed10c 100644 --- a/cranelift/filetests/filetests/egraph/bitops.clif +++ b/cranelift/filetests/filetests/egraph/bitops.clif @@ -137,3 +137,44 @@ block0(v1: i64): ; check: v5 = bnot v1 ; check: return v5 + +function %bitops_bmask(i64) -> i64 { +block0(v0: i64): + v1 = bnot v0 + v2 = iconst.i64 1 + v3 = iadd.i64 v1, v2 + v4 = bor.i64 v0, v3 + v5 = iconst.i64 63 + v6 = ushr.i64 v4, v5 + v7 = iconst.i64 1 + v8 = isub.i64 v6, v7 + v9 = bnot.i64 v8 + return v9 +} + +; check: v14 = bmask.i64 v0 +; check: return v14 + +function %bmask_sshr(i64) -> i64 { +block0(v0: i64): + v1 = ineg v0 + v2 = bor v0, v1 + v3 = iconst.i64 63 + v4 = sshr v2, v3 + return v4 +} + +; check: v5 = bmask.i64 v0 +; check: return v5 + +function %bmask_reverse_sshr(i64) -> i64 { +block0(v0: i64): + v1 = ineg v0 + v2 = bor v1, v0 + v3 = iconst.i64 63 + v4 = sshr v2, v3 + return v4 +} + +; check: v5 = bmask.i64 v0 +; check: return v5 diff --git a/cranelift/filetests/filetests/egraph/shifts.clif b/cranelift/filetests/filetests/egraph/shifts.clif index f03d2d41fa..d9c9da277d 100644 --- a/cranelift/filetests/filetests/egraph/shifts.clif +++ b/cranelift/filetests/filetests/egraph/shifts.clif @@ -204,3 +204,14 @@ block0(v0: i8): ; check: v5 = sextend.i64 v0 ; check: return v5 } + + +function %ineg_ushr_to_sshr(i64) -> i64 { +block0(v0: i64): + v1 = iconst.i64 63 + v2 = ushr v0, v1 + v3 = ineg v2 + return v3 + ; check: v4 = sshr v0, v1 + ; check: return v4 +} diff --git a/cranelift/filetests/filetests/runtests/bitops.clif b/cranelift/filetests/filetests/runtests/bitops.clif index fe7615afb4..3fd366aa57 100644 --- a/cranelift/filetests/filetests/runtests/bitops.clif +++ b/cranelift/filetests/filetests/runtests/bitops.clif @@ -1,4 +1,10 @@ test run +set opt_level=none +target aarch64 +target s390x +target riscv64 +target s390x has_mie2 +set opt_level=speed target aarch64 target s390x target riscv64 @@ -55,3 +61,24 @@ block0(v0: i64, v1: i64, v2: i64): ; run: %bitselect_i64(0, 0, 0xFFFFFFFFFFFFFFFF) == 0xFFFFFFFFFFFFFFFF ; run: %bitselect_i64(0x5555555555555555, 0, 0xFFFFFFFFFFFFFFFF) == 0xAAAAAAAAAAAAAAAA ; run: %bitselect_i64(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0) == 0xFFFFFFFFFFFFFFFF + + +;; We have a optimization rule in the midend that turns this into a bmask +;; It's easier to have a runtest to ensure that it is correct than to inspect the output. +function %bitops_bmask(i16) -> i16 { +block0(v0: i16): + v1 = bnot v0 + v2 = iconst.i16 1 + v3 = iadd.i16 v1, v2 + v4 = bor.i16 v0, v3 + v5 = iconst.i16 15 + v6 = ushr.i16 v4, v5 + v7 = iconst.i16 1 + v8 = isub.i16 v6, v7 + v9 = bnot.i16 v8 + return v9 +} +; run: %bitops_bmask(0) == 0 +; run: %bitops_bmask(1) == -1 +; run: %bitops_bmask(0xFFFF) == -1 +; run: %bitops_bmask(0x8000) == -1