cranelift: Implement missing i128 rotates on AArch64 (#4866)

This commit is contained in:
Afonso Bordado
2022-09-07 19:11:47 +01:00
committed by GitHub
parent dd07e354b4
commit f57b4412ec
2 changed files with 123 additions and 6 deletions

View File

@@ -1282,7 +1282,8 @@
;; General 8/16-bit case. ;; General 8/16-bit case.
(rule (lower (has_type (fits_in_16 ty) (rotl x y))) (rule (lower (has_type (fits_in_16 ty) (rotl x y)))
(let ((neg_shift Reg (sub $I32 (zero_reg) y))) (let ((amt Reg (value_regs_get y 0))
(neg_shift Reg (sub $I32 (zero_reg) amt)))
(small_rotr ty (put_in_reg_zext32 x) neg_shift))) (small_rotr ty (put_in_reg_zext32 x) neg_shift)))
;; Specialization for the 8/16-bit case when the rotation amount is an immediate. ;; Specialization for the 8/16-bit case when the rotation amount is an immediate.
@@ -1300,12 +1301,14 @@
;; General 32-bit case. ;; General 32-bit case.
(rule (lower (has_type $I32 (rotl x y))) (rule (lower (has_type $I32 (rotl x y)))
(let ((neg_shift Reg (sub $I32 (zero_reg) y))) (let ((amt Reg (value_regs_get y 0))
(neg_shift Reg (sub $I32 (zero_reg) amt)))
(a64_rotr $I32 x neg_shift))) (a64_rotr $I32 x neg_shift)))
;; General 64-bit case. ;; General 64-bit case.
(rule (lower (has_type $I64 (rotl x y))) (rule (lower (has_type $I64 (rotl x y)))
(let ((neg_shift Reg (sub $I64 (zero_reg) y))) (let ((amt Reg (value_regs_get y 0))
(neg_shift Reg (sub $I64 (zero_reg) amt)))
(a64_rotr $I64 x neg_shift))) (a64_rotr $I64 x neg_shift)))
;; Specialization for the 32-bit case when the rotation amount is an immediate. ;; Specialization for the 32-bit case when the rotation amount is an immediate.
@@ -1338,15 +1341,15 @@
;; General 8/16-bit case. ;; General 8/16-bit case.
(rule (lower (has_type (fits_in_16 ty) (rotr x y))) (rule (lower (has_type (fits_in_16 ty) (rotr x y)))
(small_rotr ty (put_in_reg_zext32 x) y)) (small_rotr ty (put_in_reg_zext32 x) (value_regs_get y 0)))
;; General 32-bit case. ;; General 32-bit case.
(rule (lower (has_type $I32 (rotr x y))) (rule (lower (has_type $I32 (rotr x y)))
(a64_rotr $I32 x y)) (a64_rotr $I32 x (value_regs_get y 0)))
;; General 64-bit case. ;; General 64-bit case.
(rule (lower (has_type $I64 (rotr x y))) (rule (lower (has_type $I64 (rotr x y)))
(a64_rotr $I64 x y)) (a64_rotr $I64 x (value_regs_get y 0)))
;; Specialization for the 8/16-bit case when the rotation amount is an immediate. ;; Specialization for the 8/16-bit case when the rotation amount is an immediate.
(rule (lower (has_type (fits_in_16 ty) (rotr x (iconst k)))) (rule (lower (has_type (fits_in_16 ty) (rotr x (iconst k))))

View File

@@ -51,3 +51,117 @@ block0(v0: i128, v1: i8):
; run: %rotr_amt_i128(0x01010101_01010101_01010101_01010101, 73) == 0x80808080_80808080_80808080_80808080 ; run: %rotr_amt_i128(0x01010101_01010101_01010101_01010101, 73) == 0x80808080_80808080_80808080_80808080
; run: %rotr_amt_i128(0x02020202_02020202_01010101_01010101, 0) == 0x02020202_02020202_01010101_01010101 ; run: %rotr_amt_i128(0x02020202_02020202_01010101_01010101, 0) == 0x02020202_02020202_01010101_01010101
; run: %rotr_amt_i128(0x03030303_03030303_01010101_01010101, 128) == 0x03030303_03030303_01010101_01010101 ; run: %rotr_amt_i128(0x03030303_03030303_01010101_01010101, 128) == 0x03030303_03030303_01010101_01010101
function %rotl_i64_i128(i64, i128) -> i64 {
block0(v0: i64, v1: i128):
v2 = rotl.i64 v0, v1
return v2
}
; run: %rotl_i64_i128(0xe0000000_00000000, 0x00000000_00000000_00000000_00000000) == 0xe0000000_00000000
; run: %rotl_i64_i128(0xe0000000_00000000, 0x00000000_00000002_00000000_00000001) == 0xc0000000_00000001
; run: %rotl_i64_i128(0xe000000f_0000000f, 0x00000000_00000002_00000000_00000000) == 0xe000000f_0000000f
; run: %rotl_i64_i128(0xe000000f_0000000f, 0x00000000_00000002_00000000_00000004) == 0x000000f0_000000fe
; run: %rotl_i64_i128(0xe0000000_00000004, 0x00000000_00000002_00000000_00000040) == 0xe0000000_00000004
; run: %rotl_i64_i128(0xe0000000_00000004, 0x00000000_00000002_00000000_00000041) == 0xc0000000_00000009
; run: %rotl_i64_i128(0xe0000000_00000004, 0x00000000_00000002_00000000_00000042) == 0x80000000_00000013
; run: %rotl_i64_i128(0xe0000000_00000004, 0x00000000_00000002_00000000_00000101) == 0xc0000000_00000009
function %rotl_i32_i128(i32, i128) -> i32 {
block0(v0: i32, v1: i128):
v2 = rotl.i32 v0, v1
return v2
}
; run: %rotl_i32_i128(0xe0000000, 0x00000000_00000000_00000000_00000000) == 0xe0000000
; run: %rotl_i32_i128(0xe0000000, 0x00000000_00000002_00000000_00000001) == 0xc0000001
; run: %rotl_i32_i128(0xe00f000f, 0x00000000_00000002_00000000_00000000) == 0xe00f000f
; run: %rotl_i32_i128(0xe00f000f, 0x00000000_00000002_00000000_00000004) == 0x00f000fe
; run: %rotl_i32_i128(0xe0000004, 0x00000000_00000002_00000000_00000020) == 0xe0000004
; run: %rotl_i32_i128(0xe0000004, 0x00000000_00000002_00000000_00000021) == 0xc0000009
; run: %rotl_i32_i128(0xe0000004, 0x00000000_00000002_00000000_00000022) == 0x80000013
; run: %rotl_i32_i128(0xe0000004, 0x00000000_00000002_00000000_00000101) == 0xc0000009
function %rotl_i16_i128(i16, i128) -> i16 {
block0(v0: i16, v1: i128):
v2 = rotl.i16 v0, v1
return v2
}
; run: %rotl_i16_i128(0xe000, 0x00000000_00000000_00000000_00000000) == 0xe000
; run: %rotl_i16_i128(0xe000, 0x00000000_00000002_00000000_00000001) == 0xc001
; run: %rotl_i16_i128(0xef0f, 0x00000000_00000002_00000000_00000000) == 0xef0f
; run: %rotl_i16_i128(0xef0f, 0x00000000_00000002_00000000_00000004) == 0xf0fe
; run: %rotl_i16_i128(0xe004, 0x00000000_00000002_00000000_00000010) == 0xe004
; run: %rotl_i16_i128(0xe004, 0x00000000_00000002_00000000_00000011) == 0xc009
; run: %rotl_i16_i128(0xe004, 0x00000000_00000002_00000000_00000012) == 0x8013
; run: %rotl_i16_i128(0xe004, 0x00000000_00000002_00000000_00000101) == 0xc009
function %rotl_i8_i128(i8, i128) -> i8 {
block0(v0: i8, v1: i128):
v2 = rotl.i8 v0, v1
return v2
}
; run: %rotl_i8_i128(0xe0, 0x00000000_00000000_00000000_00000000) == 0xe0
; run: %rotl_i8_i128(0xe0, 0x00000000_00000002_00000000_00000001) == 0xc1
; run: %rotl_i8_i128(0xef, 0x00000000_00000002_00000000_00000000) == 0xef
; run: %rotl_i8_i128(0xef, 0x00000000_00000002_00000000_00000004) == 0xfe
; run: %rotl_i8_i128(0xe4, 0x00000000_00000002_00000000_00000008) == 0xe4
; run: %rotl_i8_i128(0xe4, 0x00000000_00000002_00000000_00000009) == 0xc9
; run: %rotl_i8_i128(0xe4, 0x00000000_00000002_00000000_0000000A) == 0x93
; run: %rotl_i8_i128(0xe4, 0x00000000_00000002_00000000_00000101) == 0xc9
function %rotr_i64_i128(i64, i128) -> i64 {
block0(v0: i64, v1: i128):
v2 = rotr.i64 v0, v1
return v2
}
; run: %rotr_i64_i128(0xe0000000_00000000, 0x00000000_00000000_00000000_00000000) == 0xe0000000_00000000
; run: %rotr_i64_i128(0xe0000000_00000000, 0x00000000_00000002_00000000_00000001) == 0x70000000_00000000
; run: %rotr_i64_i128(0xe000000f_0000000f, 0x00000000_00000002_00000000_00000000) == 0xe000000f_0000000f
; run: %rotr_i64_i128(0xe000000f_0000000f, 0x00000000_00000002_00000000_00000004) == 0xfe000000_f0000000
; run: %rotr_i64_i128(0xe0000000_00000004, 0x00000000_00000002_00000000_00000040) == 0xe0000000_00000004
; run: %rotr_i64_i128(0xe0000000_00000004, 0x00000000_00000002_00000000_00000041) == 0x70000000_00000002
; run: %rotr_i64_i128(0xe0000000_00000004, 0x00000000_00000002_00000000_00000042) == 0x38000000_00000001
; run: %rotr_i64_i128(0xe0000000_00000004, 0x00000000_00000002_00000000_00000101) == 0x70000000_00000002
function %rotr_i32_i128(i32, i128) -> i32 {
block0(v0: i32, v1: i128):
v2 = rotr.i32 v0, v1
return v2
}
; run: %rotr_i32_i128(0xe0000000, 0x00000000_00000000_00000000_00000000) == 0xe0000000
; run: %rotr_i32_i128(0xe0000000, 0x00000000_00000002_00000000_00000001) == 0x70000000
; run: %rotr_i32_i128(0xe00f000f, 0x00000000_00000002_00000000_00000000) == 0xe00f000f
; run: %rotr_i32_i128(0xe00f000f, 0x00000000_00000002_00000000_00000004) == 0xfe00f000
; run: %rotr_i32_i128(0xe0000004, 0x00000000_00000002_00000000_00000020) == 0xe0000004
; run: %rotr_i32_i128(0xe0000004, 0x00000000_00000002_00000000_00000021) == 0x70000002
; run: %rotr_i32_i128(0xe0000004, 0x00000000_00000002_00000000_00000022) == 0x38000001
; run: %rotr_i32_i128(0xe0000004, 0x00000000_00000002_00000000_00000101) == 0x70000002
function %rotr_i16_i128(i16, i128) -> i16 {
block0(v0: i16, v1: i128):
v2 = rotr.i16 v0, v1
return v2
}
; run: %rotr_i16_i128(0xe000, 0x00000000_00000000_00000000_00000000) == 0xe000
; run: %rotr_i16_i128(0xe000, 0x00000000_00000002_00000000_00000001) == 0x7000
; run: %rotr_i16_i128(0xef0f, 0x00000000_00000002_00000000_00000000) == 0xef0f
; run: %rotr_i16_i128(0xef0f, 0x00000000_00000002_00000000_00000004) == 0xfef0
; run: %rotr_i16_i128(0xe004, 0x00000000_00000002_00000000_00000010) == 0xe004
; run: %rotr_i16_i128(0xe004, 0x00000000_00000002_00000000_00000011) == 0x7002
; run: %rotr_i16_i128(0xe004, 0x00000000_00000002_00000000_00000012) == 0x3801
; run: %rotr_i16_i128(0xe004, 0x00000000_00000002_00000000_00000101) == 0x7002
function %rotr_i8_i128(i8, i128) -> i8 {
block0(v0: i8, v1: i128):
v2 = rotr.i8 v0, v1
return v2
}
; run: %rotr_i8_i128(0xe0, 0x00000000_00000000_00000000_00000000) == 0xe0
; run: %rotr_i8_i128(0xe0, 0x00000000_00000002_00000000_00000001) == 0x70
; run: %rotr_i8_i128(0xef, 0x00000000_00000002_00000000_00000000) == 0xef
; run: %rotr_i8_i128(0xef, 0x00000000_00000002_00000000_00000004) == 0xfe
; run: %rotr_i8_i128(0xe0, 0x00000000_00000002_00000000_00000008) == 0xe0
; run: %rotr_i8_i128(0xe0, 0x00000000_00000002_00000000_00000009) == 0x70
; run: %rotr_i8_i128(0xe0, 0x00000000_00000002_00000000_0000000A) == 0x38
; run: %rotr_i8_i128(0xe0, 0x00000000_00000002_00000000_00000101) == 0x70