cranelift: Fix cls for small types on aarch64 (#4305)

The previous `cls` code was producing wrong results when fed with a -1 i8. The fix here is to sign extend instead of zero extending since we want to keep the sign bit as one in order for it to be counted correctly in the cls instruction This also merges the interpreter only tests now that aarch64 correctly supports this instruction
2022-06-27 23:55:02 +01:00
parent aef53784ec
commit 42d4f97b78
4 changed files with 24 additions and 28 deletions
--- a/cranelift/codegen/src/isa/aarch64/lower.isle
+++ b/cranelift/codegen/src/isa/aarch64/lower.isle
@@ -1078,10 +1078,10 @@
 ;;;; Rules for `cls` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 (rule (lower (has_type $I8 (cls x)))
-      (sub_imm $I32 (a64_cls $I32 (put_in_reg_zext32 x)) (u8_into_imm12 24)))
+      (sub_imm $I32 (a64_cls $I32 (put_in_reg_sext32 x)) (u8_into_imm12 24)))
 (rule (lower (has_type $I16 (cls x)))
-      (sub_imm $I32 (a64_cls $I32 (put_in_reg_zext32 x)) (u8_into_imm12 16)))
+      (sub_imm $I32 (a64_cls $I32 (put_in_reg_sext32 x)) (u8_into_imm12 16)))
 ;; cls lo_cls, lo
 ;; cls hi_cls, hi
--- a/cranelift/filetests/filetests/isa/aarch64/bitops.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/bitops.clif
@@ -121,7 +121,7 @@ block0(v0: i8):
 }
 ; block0:
-;   uxtb w3, w0
+;   sxtb w3, w0
 ;   cls w5, w3
 ;   sub w0, w5, #24
 ;   ret
@@ -133,7 +133,7 @@ block0(v0: i16):
 }
 ; block0:
-;   uxth w3, w0
+;   sxth w3, w0
 ;   cls w5, w3
 ;   sub w0, w5, #16
 ;   ret
@@ -928,4 +928,3 @@ block0(v0: i128, v1: i128):
 ;   csel x0, x12, x6, ne
 ;   csel x1, x4, x12, ne
 ;   ret
--- a/cranelift/filetests/filetests/runtests/cls-interpret.clif
+++ b/cranelift/filetests/filetests/runtests/cls-interpret.clif
@@ -1,23 +0,0 @@
 test interpret
 ; aarch64 yields cls_i8(1) == 30, which is incorrect
 function %cls_i8(i8) -> i8 {
 block0(v0: i8):
    v1 = cls v0
    return v1
 }
 ; run: %cls_i8(1) == 6
 ; run: %cls_i8(0x40) == 0
 ; run: %cls_i8(-1) == 7
 ; run: %cls_i8(0) == 7
 function %cls_i16(i16) -> i16 {
 block0(v0: i16):
    v1 = cls v0
    return v1
 }
 ; run: %cls_i16(1) == 14
 ; run: %cls_i16(0x4000) == 0
 ; run: %cls_i16(-1) == 15
 ; run: %cls_i16(0) == 15
--- a/cranelift/filetests/filetests/runtests/cls-aarch64.clif
+++ b/cranelift/filetests/filetests/runtests/cls-aarch64.clif
@@ -3,6 +3,26 @@ test run
 target aarch64
 ; not implemented on `x86_64`
 function %cls_i8(i8) -> i8 {
 block0(v0: i8):
    v1 = cls v0
    return v1
 }
 ; run: %cls_i8(1) == 6
 ; run: %cls_i8(0x40) == 0
 ; run: %cls_i8(-1) == 7
 ; run: %cls_i8(0) == 7
 function %cls_i16(i16) -> i16 {
 block0(v0: i16):
    v1 = cls v0
    return v1
 }
 ; run: %cls_i16(1) == 14
 ; run: %cls_i16(0x4000) == 0
 ; run: %cls_i16(-1) == 15
 ; run: %cls_i16(0) == 15
 function %cls_i32(i32) -> i32 {
 block0(v0: i32):
    v1 = cls v0