From f57b4412ecb158cd36dba9a14d5496617cfaf820 Mon Sep 17 00:00:00 2001 From: Afonso Bordado Date: Wed, 7 Sep 2022 19:11:47 +0100 Subject: [PATCH] cranelift: Implement missing i128 rotates on AArch64 (#4866) --- cranelift/codegen/src/isa/aarch64/lower.isle | 15 ++- .../filetests/runtests/i128-rotate.clif | 114 ++++++++++++++++++ 2 files changed, 123 insertions(+), 6 deletions(-) diff --git a/cranelift/codegen/src/isa/aarch64/lower.isle b/cranelift/codegen/src/isa/aarch64/lower.isle index 4979fcb74d..323e9c9af6 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.isle +++ b/cranelift/codegen/src/isa/aarch64/lower.isle @@ -1282,7 +1282,8 @@ ;; General 8/16-bit case. (rule (lower (has_type (fits_in_16 ty) (rotl x y))) - (let ((neg_shift Reg (sub $I32 (zero_reg) y))) + (let ((amt Reg (value_regs_get y 0)) + (neg_shift Reg (sub $I32 (zero_reg) amt))) (small_rotr ty (put_in_reg_zext32 x) neg_shift))) ;; Specialization for the 8/16-bit case when the rotation amount is an immediate. @@ -1300,12 +1301,14 @@ ;; General 32-bit case. (rule (lower (has_type $I32 (rotl x y))) - (let ((neg_shift Reg (sub $I32 (zero_reg) y))) + (let ((amt Reg (value_regs_get y 0)) + (neg_shift Reg (sub $I32 (zero_reg) amt))) (a64_rotr $I32 x neg_shift))) ;; General 64-bit case. (rule (lower (has_type $I64 (rotl x y))) - (let ((neg_shift Reg (sub $I64 (zero_reg) y))) + (let ((amt Reg (value_regs_get y 0)) + (neg_shift Reg (sub $I64 (zero_reg) amt))) (a64_rotr $I64 x neg_shift))) ;; Specialization for the 32-bit case when the rotation amount is an immediate. @@ -1338,15 +1341,15 @@ ;; General 8/16-bit case. (rule (lower (has_type (fits_in_16 ty) (rotr x y))) - (small_rotr ty (put_in_reg_zext32 x) y)) + (small_rotr ty (put_in_reg_zext32 x) (value_regs_get y 0))) ;; General 32-bit case. (rule (lower (has_type $I32 (rotr x y))) - (a64_rotr $I32 x y)) + (a64_rotr $I32 x (value_regs_get y 0))) ;; General 64-bit case. (rule (lower (has_type $I64 (rotr x y))) - (a64_rotr $I64 x y)) + (a64_rotr $I64 x (value_regs_get y 0))) ;; Specialization for the 8/16-bit case when the rotation amount is an immediate. (rule (lower (has_type (fits_in_16 ty) (rotr x (iconst k)))) diff --git a/cranelift/filetests/filetests/runtests/i128-rotate.clif b/cranelift/filetests/filetests/runtests/i128-rotate.clif index 429f29fd84..9fa3488988 100644 --- a/cranelift/filetests/filetests/runtests/i128-rotate.clif +++ b/cranelift/filetests/filetests/runtests/i128-rotate.clif @@ -51,3 +51,117 @@ block0(v0: i128, v1: i8): ; run: %rotr_amt_i128(0x01010101_01010101_01010101_01010101, 73) == 0x80808080_80808080_80808080_80808080 ; run: %rotr_amt_i128(0x02020202_02020202_01010101_01010101, 0) == 0x02020202_02020202_01010101_01010101 ; run: %rotr_amt_i128(0x03030303_03030303_01010101_01010101, 128) == 0x03030303_03030303_01010101_01010101 + + +function %rotl_i64_i128(i64, i128) -> i64 { +block0(v0: i64, v1: i128): + v2 = rotl.i64 v0, v1 + return v2 +} +; run: %rotl_i64_i128(0xe0000000_00000000, 0x00000000_00000000_00000000_00000000) == 0xe0000000_00000000 +; run: %rotl_i64_i128(0xe0000000_00000000, 0x00000000_00000002_00000000_00000001) == 0xc0000000_00000001 +; run: %rotl_i64_i128(0xe000000f_0000000f, 0x00000000_00000002_00000000_00000000) == 0xe000000f_0000000f +; run: %rotl_i64_i128(0xe000000f_0000000f, 0x00000000_00000002_00000000_00000004) == 0x000000f0_000000fe +; run: %rotl_i64_i128(0xe0000000_00000004, 0x00000000_00000002_00000000_00000040) == 0xe0000000_00000004 +; run: %rotl_i64_i128(0xe0000000_00000004, 0x00000000_00000002_00000000_00000041) == 0xc0000000_00000009 +; run: %rotl_i64_i128(0xe0000000_00000004, 0x00000000_00000002_00000000_00000042) == 0x80000000_00000013 +; run: %rotl_i64_i128(0xe0000000_00000004, 0x00000000_00000002_00000000_00000101) == 0xc0000000_00000009 + +function %rotl_i32_i128(i32, i128) -> i32 { +block0(v0: i32, v1: i128): + v2 = rotl.i32 v0, v1 + return v2 +} +; run: %rotl_i32_i128(0xe0000000, 0x00000000_00000000_00000000_00000000) == 0xe0000000 +; run: %rotl_i32_i128(0xe0000000, 0x00000000_00000002_00000000_00000001) == 0xc0000001 +; run: %rotl_i32_i128(0xe00f000f, 0x00000000_00000002_00000000_00000000) == 0xe00f000f +; run: %rotl_i32_i128(0xe00f000f, 0x00000000_00000002_00000000_00000004) == 0x00f000fe +; run: %rotl_i32_i128(0xe0000004, 0x00000000_00000002_00000000_00000020) == 0xe0000004 +; run: %rotl_i32_i128(0xe0000004, 0x00000000_00000002_00000000_00000021) == 0xc0000009 +; run: %rotl_i32_i128(0xe0000004, 0x00000000_00000002_00000000_00000022) == 0x80000013 +; run: %rotl_i32_i128(0xe0000004, 0x00000000_00000002_00000000_00000101) == 0xc0000009 + +function %rotl_i16_i128(i16, i128) -> i16 { +block0(v0: i16, v1: i128): + v2 = rotl.i16 v0, v1 + return v2 +} +; run: %rotl_i16_i128(0xe000, 0x00000000_00000000_00000000_00000000) == 0xe000 +; run: %rotl_i16_i128(0xe000, 0x00000000_00000002_00000000_00000001) == 0xc001 +; run: %rotl_i16_i128(0xef0f, 0x00000000_00000002_00000000_00000000) == 0xef0f +; run: %rotl_i16_i128(0xef0f, 0x00000000_00000002_00000000_00000004) == 0xf0fe +; run: %rotl_i16_i128(0xe004, 0x00000000_00000002_00000000_00000010) == 0xe004 +; run: %rotl_i16_i128(0xe004, 0x00000000_00000002_00000000_00000011) == 0xc009 +; run: %rotl_i16_i128(0xe004, 0x00000000_00000002_00000000_00000012) == 0x8013 +; run: %rotl_i16_i128(0xe004, 0x00000000_00000002_00000000_00000101) == 0xc009 + +function %rotl_i8_i128(i8, i128) -> i8 { +block0(v0: i8, v1: i128): + v2 = rotl.i8 v0, v1 + return v2 +} +; run: %rotl_i8_i128(0xe0, 0x00000000_00000000_00000000_00000000) == 0xe0 +; run: %rotl_i8_i128(0xe0, 0x00000000_00000002_00000000_00000001) == 0xc1 +; run: %rotl_i8_i128(0xef, 0x00000000_00000002_00000000_00000000) == 0xef +; run: %rotl_i8_i128(0xef, 0x00000000_00000002_00000000_00000004) == 0xfe +; run: %rotl_i8_i128(0xe4, 0x00000000_00000002_00000000_00000008) == 0xe4 +; run: %rotl_i8_i128(0xe4, 0x00000000_00000002_00000000_00000009) == 0xc9 +; run: %rotl_i8_i128(0xe4, 0x00000000_00000002_00000000_0000000A) == 0x93 +; run: %rotl_i8_i128(0xe4, 0x00000000_00000002_00000000_00000101) == 0xc9 + + +function %rotr_i64_i128(i64, i128) -> i64 { +block0(v0: i64, v1: i128): + v2 = rotr.i64 v0, v1 + return v2 +} +; run: %rotr_i64_i128(0xe0000000_00000000, 0x00000000_00000000_00000000_00000000) == 0xe0000000_00000000 +; run: %rotr_i64_i128(0xe0000000_00000000, 0x00000000_00000002_00000000_00000001) == 0x70000000_00000000 +; run: %rotr_i64_i128(0xe000000f_0000000f, 0x00000000_00000002_00000000_00000000) == 0xe000000f_0000000f +; run: %rotr_i64_i128(0xe000000f_0000000f, 0x00000000_00000002_00000000_00000004) == 0xfe000000_f0000000 +; run: %rotr_i64_i128(0xe0000000_00000004, 0x00000000_00000002_00000000_00000040) == 0xe0000000_00000004 +; run: %rotr_i64_i128(0xe0000000_00000004, 0x00000000_00000002_00000000_00000041) == 0x70000000_00000002 +; run: %rotr_i64_i128(0xe0000000_00000004, 0x00000000_00000002_00000000_00000042) == 0x38000000_00000001 +; run: %rotr_i64_i128(0xe0000000_00000004, 0x00000000_00000002_00000000_00000101) == 0x70000000_00000002 + +function %rotr_i32_i128(i32, i128) -> i32 { +block0(v0: i32, v1: i128): + v2 = rotr.i32 v0, v1 + return v2 +} +; run: %rotr_i32_i128(0xe0000000, 0x00000000_00000000_00000000_00000000) == 0xe0000000 +; run: %rotr_i32_i128(0xe0000000, 0x00000000_00000002_00000000_00000001) == 0x70000000 +; run: %rotr_i32_i128(0xe00f000f, 0x00000000_00000002_00000000_00000000) == 0xe00f000f +; run: %rotr_i32_i128(0xe00f000f, 0x00000000_00000002_00000000_00000004) == 0xfe00f000 +; run: %rotr_i32_i128(0xe0000004, 0x00000000_00000002_00000000_00000020) == 0xe0000004 +; run: %rotr_i32_i128(0xe0000004, 0x00000000_00000002_00000000_00000021) == 0x70000002 +; run: %rotr_i32_i128(0xe0000004, 0x00000000_00000002_00000000_00000022) == 0x38000001 +; run: %rotr_i32_i128(0xe0000004, 0x00000000_00000002_00000000_00000101) == 0x70000002 + +function %rotr_i16_i128(i16, i128) -> i16 { +block0(v0: i16, v1: i128): + v2 = rotr.i16 v0, v1 + return v2 +} +; run: %rotr_i16_i128(0xe000, 0x00000000_00000000_00000000_00000000) == 0xe000 +; run: %rotr_i16_i128(0xe000, 0x00000000_00000002_00000000_00000001) == 0x7000 +; run: %rotr_i16_i128(0xef0f, 0x00000000_00000002_00000000_00000000) == 0xef0f +; run: %rotr_i16_i128(0xef0f, 0x00000000_00000002_00000000_00000004) == 0xfef0 +; run: %rotr_i16_i128(0xe004, 0x00000000_00000002_00000000_00000010) == 0xe004 +; run: %rotr_i16_i128(0xe004, 0x00000000_00000002_00000000_00000011) == 0x7002 +; run: %rotr_i16_i128(0xe004, 0x00000000_00000002_00000000_00000012) == 0x3801 +; run: %rotr_i16_i128(0xe004, 0x00000000_00000002_00000000_00000101) == 0x7002 + +function %rotr_i8_i128(i8, i128) -> i8 { +block0(v0: i8, v1: i128): + v2 = rotr.i8 v0, v1 + return v2 +} +; run: %rotr_i8_i128(0xe0, 0x00000000_00000000_00000000_00000000) == 0xe0 +; run: %rotr_i8_i128(0xe0, 0x00000000_00000002_00000000_00000001) == 0x70 +; run: %rotr_i8_i128(0xef, 0x00000000_00000002_00000000_00000000) == 0xef +; run: %rotr_i8_i128(0xef, 0x00000000_00000002_00000000_00000004) == 0xfe +; run: %rotr_i8_i128(0xe0, 0x00000000_00000002_00000000_00000008) == 0xe0 +; run: %rotr_i8_i128(0xe0, 0x00000000_00000002_00000000_00000009) == 0x70 +; run: %rotr_i8_i128(0xe0, 0x00000000_00000002_00000000_0000000A) == 0x38 +; run: %rotr_i8_i128(0xe0, 0x00000000_00000002_00000000_00000101) == 0x70