cranelift: Fix cls for small types on aarch64 (#4305)

The previous `cls` code was producing wrong results when fed with a -1 i8.

The fix here is to sign extend instead of zero extending since we want
to keep the sign bit as one in order for it to be counted correctly
in the cls instruction

This also merges the interpreter only tests now that aarch64
correctly supports this instruction
This commit is contained in:
Afonso Bordado
2022-06-27 23:55:02 +01:00
committed by GitHub
parent aef53784ec
commit 42d4f97b78
4 changed files with 24 additions and 28 deletions

View File

@@ -1078,10 +1078,10 @@
;;;; Rules for `cls` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;; Rules for `cls` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type $I8 (cls x))) (rule (lower (has_type $I8 (cls x)))
(sub_imm $I32 (a64_cls $I32 (put_in_reg_zext32 x)) (u8_into_imm12 24))) (sub_imm $I32 (a64_cls $I32 (put_in_reg_sext32 x)) (u8_into_imm12 24)))
(rule (lower (has_type $I16 (cls x))) (rule (lower (has_type $I16 (cls x)))
(sub_imm $I32 (a64_cls $I32 (put_in_reg_zext32 x)) (u8_into_imm12 16))) (sub_imm $I32 (a64_cls $I32 (put_in_reg_sext32 x)) (u8_into_imm12 16)))
;; cls lo_cls, lo ;; cls lo_cls, lo
;; cls hi_cls, hi ;; cls hi_cls, hi

View File

@@ -121,7 +121,7 @@ block0(v0: i8):
} }
; block0: ; block0:
; uxtb w3, w0 ; sxtb w3, w0
; cls w5, w3 ; cls w5, w3
; sub w0, w5, #24 ; sub w0, w5, #24
; ret ; ret
@@ -133,7 +133,7 @@ block0(v0: i16):
} }
; block0: ; block0:
; uxth w3, w0 ; sxth w3, w0
; cls w5, w3 ; cls w5, w3
; sub w0, w5, #16 ; sub w0, w5, #16
; ret ; ret
@@ -928,4 +928,3 @@ block0(v0: i128, v1: i128):
; csel x0, x12, x6, ne ; csel x0, x12, x6, ne
; csel x1, x4, x12, ne ; csel x1, x4, x12, ne
; ret ; ret

View File

@@ -1,23 +0,0 @@
test interpret
; aarch64 yields cls_i8(1) == 30, which is incorrect
function %cls_i8(i8) -> i8 {
block0(v0: i8):
v1 = cls v0
return v1
}
; run: %cls_i8(1) == 6
; run: %cls_i8(0x40) == 0
; run: %cls_i8(-1) == 7
; run: %cls_i8(0) == 7
function %cls_i16(i16) -> i16 {
block0(v0: i16):
v1 = cls v0
return v1
}
; run: %cls_i16(1) == 14
; run: %cls_i16(0x4000) == 0
; run: %cls_i16(-1) == 15
; run: %cls_i16(0) == 15

View File

@@ -3,6 +3,26 @@ test run
target aarch64 target aarch64
; not implemented on `x86_64` ; not implemented on `x86_64`
function %cls_i8(i8) -> i8 {
block0(v0: i8):
v1 = cls v0
return v1
}
; run: %cls_i8(1) == 6
; run: %cls_i8(0x40) == 0
; run: %cls_i8(-1) == 7
; run: %cls_i8(0) == 7
function %cls_i16(i16) -> i16 {
block0(v0: i16):
v1 = cls v0
return v1
}
; run: %cls_i16(1) == 14
; run: %cls_i16(0x4000) == 0
; run: %cls_i16(-1) == 15
; run: %cls_i16(0) == 15
function %cls_i32(i32) -> i32 { function %cls_i32(i32) -> i32 {
block0(v0: i32): block0(v0: i32):
v1 = cls v0 v1 = cls v0