aarch64: Migrate some bit-ops to ISLE (#3602)

* aarch64: Migrate some bit-ops to ISLE This commit migrates these instructions to ISLE: * `bnot` * `band` * `bor` * `bxor` * `band_not` * `bor_not` * `bxor_not` The translations were relatively straightforward but the interesting part here was trying to reduce the duplication between all these instructions. I opted for a route that's similar to what the lowering does today, having a `decl` which takes the `ALUOp` and then performs further pattern matching internally. This enabled each instruction's lowering to be pretty simple while we still get to handle all the fancy cases of shifts, constants, etc, for each instruction. * Actually delete previous lowerings * Remove dead code
2021-12-15 10:41:36 -06:00
parent 2cdbf32a06
commit 4236319a53
8 changed files with 1204 additions and 223 deletions
--- a/cranelift/filetests/filetests/isa/aarch64/bitops.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/bitops.clif
@@ -298,6 +298,35 @@ block0:
 ; nextln: sbfx w0, w0, #0, #1
 ; nextln: ret

+function %bnot_i32(i32) -> i32 {
+block0(v0: i32):
+    v1 = bnot v0
+    return v1
+}
+
+; check: orn w0, wzr, w0
+; nextln: ret
+
+function %bnot_i64(i64) -> i64 {
+block0(v0: i64):
+    v1 = bnot v0
+    return v1
+}
+
+; check: orn x0, xzr, x0
+; nextln: ret
+
+function %bnot_i64_with_shift(i64) -> i64 {
+block0(v0: i64):
+    v1 = iconst.i64 3
+    v2 = ishl.i64 v0, v1
+    v3 = bnot v2
+    return v3
+}
+
+; check:  orn x0, xzr, x0, LSL 3
+; nextln: ret
+
 function %bnot_i128(i128) -> i128 {
 block0(v0: i128):
    v1 = bnot v0
@@ -308,6 +337,33 @@ block0(v0: i128):
 ; nextln: orn x1, xzr, x1
 ; nextln: ret

+function %bnot_i8x16(i8x16) -> i8x16 {
+block0(v0: i8x16):
+    v1 = bnot v0
+    return v1
+}
+
+; check: mvn v0.16b, v0.16b
+; nextln: ret
+
+function %band_i32(i32, i32) -> i32 {
+block0(v0: i32, v1: i32):
+    v2 = band v0, v1
+    return v2
+}
+
+; check: and w0, w0, w1
+; nextln: ret
+
+function %band_i64(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+    v2 = band v0, v1
+    return v2
+}
+
+; check: and x0, x0, x1
+; nextln: ret
+
 function %band_i128(i128, i128) -> i128 {
 block0(v0: i128, v1: i128):
    v2 = band v0, v1
@@ -318,6 +374,72 @@ block0(v0: i128, v1: i128):
 ; nextln: and x1, x1, x3
 ; nextln: ret

+function %band_i8x16(i8x16, i8x16) -> i8x16 {
+block0(v0: i8x16, v1: i8x16):
+    v2 = band v0, v1
+    return v2
+}
+
+; check: and v0.16b, v0.16b, v1.16b
+; nextln: ret
+
+function %band_i64_constant(i64) -> i64 {
+block0(v0: i64):
+    v1 = iconst.i64 3
+    v2 = band v0, v1
+    return v2
+}
+
+; check: and x0, x0, #3
+; nextln: ret
+
+function %band_i64_constant2(i64) -> i64 {
+block0(v0: i64):
+    v1 = iconst.i64 3
+    v2 = band v1, v0
+    return v2
+}
+
+; check: and x0, x0, #3
+; nextln: ret
+
+function %band_i64_constant_shift(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+    v2 = iconst.i64 3
+    v3 = ishl.i64 v1, v2
+    v4 = band v0, v3
+    return v4
+}
+
+function %band_i64_constant_shift2(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+    v2 = iconst.i64 3
+    v3 = ishl.i64 v1, v2
+    v4 = band v3, v0
+    return v4
+}
+
+; check: and x0, x0, x1, LSL 3
+; nextln: ret
+
+function %bor_i32(i32, i32) -> i32 {
+block0(v0: i32, v1: i32):
+    v2 = bor v0, v1
+    return v2
+}
+
+; check: orr w0, w0, w1
+; nextln: ret
+
+function %bor_i64(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+    v2 = bor v0, v1
+    return v2
+}
+
+; check: orr x0, x0, x1
+; nextln: ret
+
 function %bor_i128(i128, i128) -> i128 {
 block0(v0: i128, v1: i128):
    v2 = bor v0, v1
@@ -328,6 +450,75 @@ block0(v0: i128, v1: i128):
 ; nextln: orr x1, x1, x3
 ; nextln: ret

+function %bor_i8x16(i8x16, i8x16) -> i8x16 {
+block0(v0: i8x16, v1: i8x16):
+    v2 = bor v0, v1
+    return v2
+}
+
+; check: orr v0.16b, v0.16b, v1.16b
+; nextln: ret
+
+function %bor_i64_constant(i64) -> i64 {
+block0(v0: i64):
+    v1 = iconst.i64 3
+    v2 = bor v0, v1
+    return v2
+}
+
+; check: orr x0, x0, #3
+; nextln: ret
+
+function %bor_i64_constant2(i64) -> i64 {
+block0(v0: i64):
+    v1 = iconst.i64 3
+    v2 = bor v1, v0
+    return v2
+}
+
+; check: orr x0, x0, #3
+; nextln: ret
+
+function %bor_i64_constant_shift(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+    v2 = iconst.i64 3
+    v3 = ishl.i64 v1, v2
+    v4 = bor v0, v3
+    return v4
+}
+
+; check: orr x0, x0, x1, LSL 3
+; nextln: ret
+
+function %bor_i64_constant_shift2(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+    v2 = iconst.i64 3
+    v3 = ishl.i64 v1, v2
+    v4 = bor v3, v0
+    return v4
+}
+
+; check: orr x0, x0, x1, LSL 3
+; nextln: ret
+
+function %bxor_i32(i32, i32) -> i32 {
+block0(v0: i32, v1: i32):
+    v2 = bxor v0, v1
+    return v2
+}
+
+; check: eor w0, w0, w1
+; nextln: ret
+
+function %bxor_i64(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+    v2 = bxor v0, v1
+    return v2
+}
+
+; check: eor x0, x0, x1
+; nextln: ret
+
 function %bxor_i128(i128, i128) -> i128 {
 block0(v0: i128, v1: i128):
    v2 = bxor v0, v1
@@ -338,6 +529,75 @@ block0(v0: i128, v1: i128):
 ; nextln: eor x1, x1, x3
 ; nextln: ret

+function %bxor_i8x16(i8x16, i8x16) -> i8x16 {
+block0(v0: i8x16, v1: i8x16):
+    v2 = bxor v0, v1
+    return v2
+}
+
+; check: eor v0.16b, v0.16b, v1.16b
+; nextln: ret
+
+function %bxor_i64_constant(i64) -> i64 {
+block0(v0: i64):
+    v1 = iconst.i64 3
+    v2 = bxor v0, v1
+    return v2
+}
+
+; check: eor x0, x0, #3
+; nextln: ret
+
+function %bxor_i64_constant2(i64) -> i64 {
+block0(v0: i64):
+    v1 = iconst.i64 3
+    v2 = bxor v1, v0
+    return v2
+}
+
+; check: eor x0, x0, #3
+; nextln: ret
+
+function %bxor_i64_constant_shift(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+    v2 = iconst.i64 3
+    v3 = ishl.i64 v1, v2
+    v4 = bxor v0, v3
+    return v4
+}
+
+; check: eor x0, x0, x1, LSL 3
+; nextln: ret
+
+function %bxor_i64_constant_shift2(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+    v2 = iconst.i64 3
+    v3 = ishl.i64 v1, v2
+    v4 = bxor v3, v0
+    return v4
+}
+
+; check: eor x0, x0, x1, LSL 3
+; nextln: ret
+
+function %band_not_i32(i32, i32) -> i32 {
+block0(v0: i32, v1: i32):
+    v2 = band_not v0, v1
+    return v2
+}
+
+; check: bic w0, w0, w1
+; nextln: ret
+
+function %band_not_i64(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+    v2 = band_not v0, v1
+    return v2
+}
+
+; check: bic x0, x0, x1
+; nextln: ret
+
 function %band_not_i128(i128, i128) -> i128 {
 block0(v0: i128, v1: i128):
    v2 = band_not v0, v1
@@ -348,6 +608,54 @@ block0(v0: i128, v1: i128):
 ; nextln: bic x1, x1, x3
 ; nextln: ret

+function %band_not_i8x16(i8x16, i8x16) -> i8x16 {
+block0(v0: i8x16, v1: i8x16):
+    v2 = band_not v0, v1
+    return v2
+}
+
+; check: bic v0.16b, v0.16b, v1.16b
+; nextln: ret
+
+function %band_not_i64_constant(i64) -> i64 {
+block0(v0: i64):
+    v1 = iconst.i64 4
+    v2 = band_not v0, v1
+    return v2
+}
+
+; check: bic x0, x0, #4
+; nextln: ret
+
+function %band_not_i64_constant_shift(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+    v2 = iconst.i64 4
+    v3 = ishl.i64 v1, v2
+    v4 = band_not v0, v3
+    return v4
+}
+
+; check: bic x0, x0, x1, LSL 4
+; nextln: ret
+
+function %bor_not_i32(i32, i32) -> i32 {
+block0(v0: i32, v1: i32):
+    v2 = bor_not v0, v1
+    return v2
+}
+
+; check: orn w0, w0, w1
+; nextln: ret
+
+function %bor_not_i64(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+    v2 = bor_not v0, v1
+    return v2
+}
+
+; check: orn x0, x0, x1
+; nextln: ret
+
 function %bor_not_i128(i128, i128) -> i128 {
 block0(v0: i128, v1: i128):
    v2 = bor_not v0, v1
@@ -358,6 +666,45 @@ block0(v0: i128, v1: i128):
 ; nextln: orn x1, x1, x3
 ; nextln: ret

+function %bor_not_i64_constant(i64) -> i64 {
+block0(v0: i64):
+    v1 = iconst.i64 4
+    v2 = bor_not v0, v1
+    return v2
+}
+
+; check: orn x0, x0, #4
+; nextln: ret
+
+function %bor_not_i64_constant_shift(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+    v2 = iconst.i64 4
+    v3 = ishl.i64 v1, v2
+    v4 = bor_not v0, v3
+    return v4
+}
+
+; check: orn x0, x0, x1, LSL 4
+; nextln: ret
+
+function %bxor_not_i32(i32, i32) -> i32 {
+block0(v0: i32, v1: i32):
+    v2 = bxor_not v0, v1
+    return v2
+}
+
+; check: eon w0, w0, w1
+; nextln: ret
+
+function %bxor_not_i64(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+    v2 = bxor_not v0, v1
+    return v2
+}
+
+; check: eon x0, x0, x1
+; nextln: ret
+
 function %bxor_not_i128(i128, i128) -> i128 {
 block0(v0: i128, v1: i128):
    v2 = bxor_not v0, v1
@@ -368,6 +715,26 @@ block0(v0: i128, v1: i128):
 ; nextln: eon x1, x1, x3
 ; nextln: ret

+function %bxor_not_i64_constant(i64) -> i64 {
+block0(v0: i64):
+    v1 = iconst.i64 4
+    v2 = bxor_not v0, v1
+    return v2
+}
+
+; check: eon x0, x0, #4
+; nextln: ret
+
+function %bxor_not_i64_constant_shift(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+    v2 = iconst.i64 4
+    v3 = ishl.i64 v1, v2
+    v4 = bxor_not v0, v3
+    return v4
+}
+
+; check: eon x0, x0, x1, LSL 4
+; nextln: ret

 function %ishl_i128_i8(i128, i8) -> i128 {
 block0(v0: i128, v1: i8):