aarch64: Migrate bitrev/clz/cls/ctz to ISLE (#3658)

This commit migrates these existing instructions to ISLE from the manual lowerings implemented today. This was mostly straightforward but while I was at it I fixed what appeared to be broken translations for I{8,16} for `clz`, `cls`, and `ctz`. Previously the lowerings would produce results as-if the input was 32-bits, but now I believe they all correctly account for the bit-width.
2022-01-06 15:18:32 -06:00
parent 7fd78da23f
commit 72e2b7fe80
9 changed files with 1040 additions and 608 deletions
--- a/cranelift/filetests/filetests/isa/aarch64/bitops.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/bitops.clif
@@ -59,6 +59,7 @@ block0(v0: i8):

 ; check: uxtb w0, w0
 ; nextln: clz w0, w0
+; nextln: sub w0, w0, #24
 ; nextln: ret

 function %b(i16) -> i16 {
@@ -69,6 +70,7 @@ block0(v0: i16):

 ; check: uxth w0, w0
 ; nextln: clz w0, w0
+; nextln: sub w0, w0, #16
 ; nextln: ret

 function %b(i32) -> i32 {
@@ -110,6 +112,7 @@ block0(v0: i8):

 ; check: uxtb w0, w0
 ; nextln: cls w0, w0
+; nextln: sub w0, w0, #24
 ; nextln: ret

 function %c(i16) -> i16 {
@@ -120,6 +123,7 @@ block0(v0: i16):

 ; check: uxth w0, w0
 ; nextln: cls w0, w0
+; nextln: sub w0, w0, #16
 ; nextln: ret

 function %c(i32) -> i32 {
@@ -164,7 +168,7 @@ block0(v0: i8):
 }

 ; check: rbit w0, w0
-; nextln: lsr w0, w0, #24
+; nextln: orr w0, w0, #8388608
 ; nextln: clz w0, w0
 ; nextln: ret

@@ -175,7 +179,7 @@ block0(v0: i16):
 }

 ; check: rbit w0, w0
-; nextln: lsr w0, w0, #16
+; nextln: orr w0, w0, #32768
 ; nextln: clz w0, w0
 ; nextln: ret

--- a/cranelift/filetests/filetests/runtests/clz.clif
+++ b/cranelift/filetests/filetests/runtests/clz.clif
@@ -3,6 +3,24 @@ test run
 target aarch64
 target x86_64

+function %clz_i8(i8) -> i8 {
+block0(v0: i8):
+    v1 = clz v0
+    return v1
+}
+; run: %clz_i8(1) == 7
+; run: %clz_i8(0x40) == 1
+; run: %clz_i8(-1) == 0
+
+function %clz_i16(i16) -> i16 {
+block0(v0: i16):
+    v1 = clz v0
+    return v1
+}
+; run: %clz_i16(1) == 15
+; run: %clz_i16(0x4000) == 1
+; run: %clz_i16(-1) == 0
+
 function %clz_i32(i32) -> i32 {
 block0(v0: i32):
    v1 = clz v0
--- a/cranelift/filetests/filetests/runtests/ctz.clif
+++ b/cranelift/filetests/filetests/runtests/ctz.clif
@@ -3,6 +3,24 @@ test run
 target aarch64
 target x86_64

+function %ctz_i8(i8) -> i8 {
+block0(v0: i8):
+    v1 = ctz v0
+    return v1
+}
+; run: %ctz_i8(1) == 0
+; run: %ctz_i8(0x40) == 6
+; run: %ctz_i8(-1) == 0
+
+function %ctz_i16(i16) -> i16 {
+block0(v0: i16):
+    v1 = ctz v0
+    return v1
+}
+; run: %ctz_i16(1) == 0
+; run: %ctz_i16(0x4000) == 14
+; run: %ctz_i16(-1) == 0
+
 function %ctz_i32(i32) -> i32 {
 block0(v0: i32):
    v1 = ctz v0