egraphs: Add bmask bit pattern optimization rule (#6196)

* egraphs: Add a bmask bit pattern optimization * egraphs: Add more `ineg` rules * egraphs: Add sshr rule * egraphs: Simplify bmask rule * egraphs: Add comutative version of bmask rule * egraphs: Add more testcases * egraphs: Cleanup rule comments * egraphs: Add more `ineg` optimizations
2023-04-14 19:50:48 +01:00
parent 2d25db047f
commit 9e1ff9726c
7 changed files with 183 additions and 0 deletions
--- a/cranelift/filetests/filetests/egraph/arithmetic.clif
+++ b/cranelift/filetests/filetests/egraph/arithmetic.clif
@@ -73,6 +73,74 @@ block0(v0: i32):
    ; check: return v3
 }

+function %ineg_not_plus_one(i32) -> i32 {
+block0(v0: i32):
+    v1 = bnot v0
+    v2 = iconst.i32 1
+    v3 = iadd v1, v2
+    return v3
+}
+
+; check: v4 = ineg v0
+; check: return v4
+
+function %ineg_not_plus_one_reverse(i32) -> i32 {
+block0(v0: i32):
+    v1 = bnot v0
+    v2 = iconst.i32 1
+    v3 = iadd v2, v1
+    return v3
+}
+
+; check: v4 = ineg v0
+; check: return v4
+
+function %ineg_not_minus_neg_1(i32) -> i32 {
+block0(v0: i32):
+    v1 = bnot v0
+    v2 = iconst.i32 -1
+    v3 = isub v1, v2
+    return v3
+}
+
+; check: v4 = ineg v0
+; check: return v4
+
+function %ineg_not_sub_one(i32) -> i32 {
+block0(v0: i32):
+    v1 = iconst.i32 1
+    v2 = isub v0, v1
+    v3 = bnot v2
+    return v3
+}
+
+; check: v4 = ineg v0
+; check: return v4
+
+function %ineg_not_add_neg_one(i32) -> i32 {
+block0(v0: i32):
+    v1 = iconst.i32 -1
+    v2 = iadd v0, v1
+    v3 = bnot v2
+    return v3
+}
+
+; check: v4 = ineg v0
+; check: return v4
+
+function %ineg_not_add_neg_one_reverse(i32) -> i32 {
+block0(v0: i32):
+    v1 = iconst.i32 -1
+    v2 = iadd v1, v0
+    v3 = bnot v2
+    return v3
+}
+
+; check: v6 = ineg v0
+; check: return v6
+
+
+
 function %double_fneg(f32) -> f32 {
 block0(v1: f32):
    v2 = fneg v1
--- a/cranelift/filetests/filetests/egraph/bitops.clif
+++ b/cranelift/filetests/filetests/egraph/bitops.clif
@@ -137,3 +137,44 @@ block0(v1: i64):

 ; check: v5 = bnot v1
 ; check: return v5
+
+function %bitops_bmask(i64) -> i64 {
+block0(v0: i64):
+    v1 = bnot v0
+    v2 = iconst.i64 1
+    v3 = iadd.i64 v1, v2
+    v4 = bor.i64 v0, v3
+    v5 = iconst.i64 63
+    v6 = ushr.i64 v4, v5
+    v7 = iconst.i64 1
+    v8 = isub.i64 v6, v7
+    v9 = bnot.i64 v8
+    return v9
+}
+
+; check: v14 = bmask.i64 v0
+; check: return v14
+
+function %bmask_sshr(i64) -> i64 {
+block0(v0: i64):
+    v1 = ineg v0
+    v2 = bor v0, v1
+    v3 = iconst.i64 63
+    v4 = sshr v2, v3
+    return v4
+}
+
+; check: v5 = bmask.i64 v0
+; check: return v5
+
+function %bmask_reverse_sshr(i64) -> i64 {
+block0(v0: i64):
+    v1 = ineg v0
+    v2 = bor v1, v0
+    v3 = iconst.i64 63
+    v4 = sshr v2, v3
+    return v4
+}
+
+; check: v5 = bmask.i64 v0
+; check: return v5
--- a/cranelift/filetests/filetests/egraph/shifts.clif
+++ b/cranelift/filetests/filetests/egraph/shifts.clif
@@ -204,3 +204,14 @@ block0(v0: i8):
    ; check: v5 = sextend.i64 v0
    ; check: return v5
 }
+
+
+function %ineg_ushr_to_sshr(i64) -> i64 {
+block0(v0: i64):
+    v1 = iconst.i64 63
+    v2 = ushr v0, v1
+    v3 = ineg v2
+    return v3
+    ; check: v4 = sshr v0, v1
+    ; check: return v4
+}
--- a/cranelift/filetests/filetests/runtests/bitops.clif
+++ b/cranelift/filetests/filetests/runtests/bitops.clif
@@ -1,4 +1,10 @@
 test run
+set opt_level=none
+target aarch64
+target s390x
+target riscv64
+target s390x has_mie2
+set opt_level=speed
 target aarch64
 target s390x
 target riscv64
@@ -55,3 +61,24 @@ block0(v0: i64, v1: i64, v2: i64):
 ; run: %bitselect_i64(0, 0, 0xFFFFFFFFFFFFFFFF) == 0xFFFFFFFFFFFFFFFF
 ; run: %bitselect_i64(0x5555555555555555, 0, 0xFFFFFFFFFFFFFFFF) == 0xAAAAAAAAAAAAAAAA
 ; run: %bitselect_i64(0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0) == 0xFFFFFFFFFFFFFFFF
+
+
+;; We have a optimization rule in the midend that turns this into a bmask
+;; It's easier to have a runtest to ensure that it is correct than to inspect the output.
+function %bitops_bmask(i16) -> i16 {
+block0(v0: i16):
+    v1 = bnot v0
+    v2 = iconst.i16 1
+    v3 = iadd.i16 v1, v2
+    v4 = bor.i16 v0, v3
+    v5 = iconst.i16 15
+    v6 = ushr.i16 v4, v5
+    v7 = iconst.i16 1
+    v8 = isub.i16 v6, v7
+    v9 = bnot.i16 v8
+    return v9
+}
+; run: %bitops_bmask(0) == 0
+; run: %bitops_bmask(1) == -1
+; run: %bitops_bmask(0xFFFF) == -1
+; run: %bitops_bmask(0x8000) == -1