cranelift: Fix cls for small types on aarch64 (#4305)
The previous `cls` code was producing wrong results when fed with a -1 i8. The fix here is to sign extend instead of zero extending since we want to keep the sign bit as one in order for it to be counted correctly in the cls instruction This also merges the interpreter only tests now that aarch64 correctly supports this instruction
This commit is contained in:
@@ -1078,10 +1078,10 @@
|
||||
;;;; Rules for `cls` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(rule (lower (has_type $I8 (cls x)))
|
||||
(sub_imm $I32 (a64_cls $I32 (put_in_reg_zext32 x)) (u8_into_imm12 24)))
|
||||
(sub_imm $I32 (a64_cls $I32 (put_in_reg_sext32 x)) (u8_into_imm12 24)))
|
||||
|
||||
(rule (lower (has_type $I16 (cls x)))
|
||||
(sub_imm $I32 (a64_cls $I32 (put_in_reg_zext32 x)) (u8_into_imm12 16)))
|
||||
(sub_imm $I32 (a64_cls $I32 (put_in_reg_sext32 x)) (u8_into_imm12 16)))
|
||||
|
||||
;; cls lo_cls, lo
|
||||
;; cls hi_cls, hi
|
||||
|
||||
@@ -121,7 +121,7 @@ block0(v0: i8):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; uxtb w3, w0
|
||||
; sxtb w3, w0
|
||||
; cls w5, w3
|
||||
; sub w0, w5, #24
|
||||
; ret
|
||||
@@ -133,7 +133,7 @@ block0(v0: i16):
|
||||
}
|
||||
|
||||
; block0:
|
||||
; uxth w3, w0
|
||||
; sxth w3, w0
|
||||
; cls w5, w3
|
||||
; sub w0, w5, #16
|
||||
; ret
|
||||
@@ -928,4 +928,3 @@ block0(v0: i128, v1: i128):
|
||||
; csel x0, x12, x6, ne
|
||||
; csel x1, x4, x12, ne
|
||||
; ret
|
||||
|
||||
|
||||
@@ -1,23 +0,0 @@
|
||||
test interpret
|
||||
; aarch64 yields cls_i8(1) == 30, which is incorrect
|
||||
|
||||
function %cls_i8(i8) -> i8 {
|
||||
block0(v0: i8):
|
||||
v1 = cls v0
|
||||
return v1
|
||||
}
|
||||
; run: %cls_i8(1) == 6
|
||||
; run: %cls_i8(0x40) == 0
|
||||
; run: %cls_i8(-1) == 7
|
||||
; run: %cls_i8(0) == 7
|
||||
|
||||
function %cls_i16(i16) -> i16 {
|
||||
block0(v0: i16):
|
||||
v1 = cls v0
|
||||
return v1
|
||||
}
|
||||
; run: %cls_i16(1) == 14
|
||||
; run: %cls_i16(0x4000) == 0
|
||||
; run: %cls_i16(-1) == 15
|
||||
; run: %cls_i16(0) == 15
|
||||
|
||||
@@ -3,6 +3,26 @@ test run
|
||||
target aarch64
|
||||
; not implemented on `x86_64`
|
||||
|
||||
function %cls_i8(i8) -> i8 {
|
||||
block0(v0: i8):
|
||||
v1 = cls v0
|
||||
return v1
|
||||
}
|
||||
; run: %cls_i8(1) == 6
|
||||
; run: %cls_i8(0x40) == 0
|
||||
; run: %cls_i8(-1) == 7
|
||||
; run: %cls_i8(0) == 7
|
||||
|
||||
function %cls_i16(i16) -> i16 {
|
||||
block0(v0: i16):
|
||||
v1 = cls v0
|
||||
return v1
|
||||
}
|
||||
; run: %cls_i16(1) == 14
|
||||
; run: %cls_i16(0x4000) == 0
|
||||
; run: %cls_i16(-1) == 15
|
||||
; run: %cls_i16(0) == 15
|
||||
|
||||
function %cls_i32(i32) -> i32 {
|
||||
block0(v0: i32):
|
||||
v1 = cls v0
|
||||
Reference in New Issue
Block a user