diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle index 01915b35a9..ccedd0a66d 100644 --- a/cranelift/codegen/src/isa/x64/lower.isle +++ b/cranelift/codegen/src/isa/x64/lower.isle @@ -818,31 +818,17 @@ ;;;; Rules for `rotl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; `i16` and `i8`: we need to extend the shift amount, or mask the -;; constant. +;; `i64` and smaller: we can rely on x86's rotate-amount masking since +;; we operate on the whole register. For const's we mask the constant. -(rule (lower (has_type (ty_8_or_16 ty) (rotl src amt))) - (let ((amt_ Gpr (extend_to_gpr amt $I32 (ExtendKind.Zero)))) - (x64_rotl ty src (gpr_to_imm8_gpr amt_)))) +(rule (lower (has_type (fits_in_64 ty) (rotl src amt))) + (x64_rotl ty src (put_masked_in_imm8_gpr amt ty))) -(rule (lower (has_type (ty_8_or_16 ty) +(rule (lower (has_type (fits_in_64 ty) (rotl src (u64_from_iconst amt)))) (x64_rotl ty src (const_to_type_masked_imm8 amt ty))) -;; `i64` and `i32`: we can rely on x86's rotate-amount masking since -;; we operate on the whole register. - -(rule (lower (has_type (ty_32_or_64 ty) (rotl src amt))) - ;; NB: Only the low bits of `amt` matter since we logically mask the - ;; shift amount to the value's bit width. - (let ((amt_ Gpr (lo_gpr amt))) - (x64_rotl ty src amt_))) - -(rule (lower (has_type (ty_32_or_64 ty) - (rotl src (u64_from_iconst amt)))) - (x64_rotl ty src - (const_to_type_masked_imm8 amt ty))) ;; `i128`. @@ -858,31 +844,17 @@ ;;;; Rules for `rotr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; `i16` and `i8`: we need to extend the shift amount, or mask the -;; constant. +;; `i64` and smaller: we can rely on x86's rotate-amount masking since +;; we operate on the whole register. For const's we mask the constant. -(rule (lower (has_type (ty_8_or_16 ty) (rotr src amt))) - (let ((amt_ Gpr (extend_to_gpr amt $I32 (ExtendKind.Zero)))) - (x64_rotr ty src amt_))) +(rule (lower (has_type (fits_in_64 ty) (rotr src amt))) + (x64_rotr ty src (put_masked_in_imm8_gpr amt ty))) -(rule (lower (has_type (ty_8_or_16 ty) +(rule (lower (has_type (fits_in_64 ty) (rotr src (u64_from_iconst amt)))) (x64_rotr ty src (const_to_type_masked_imm8 amt ty))) -;; `i64` and `i32`: we can rely on x86's rotate-amount masking since -;; we operate on the whole register. - -(rule (lower (has_type (ty_32_or_64 ty) (rotr src amt))) - ;; NB: Only the low bits of `amt` matter since we logically mask the - ;; shift amount to the value's bit width. - (let ((amt_ Gpr (lo_gpr amt))) - (x64_rotr ty src amt_))) - -(rule (lower (has_type (ty_32_or_64 ty) - (rotr src (u64_from_iconst amt)))) - (x64_rotr ty src - (const_to_type_masked_imm8 amt ty))) ;; `i128`. diff --git a/cranelift/filetests/filetests/runtests/i128-rotate.clif b/cranelift/filetests/filetests/runtests/i128-rotate.clif index dac4b567ad..429f29fd84 100644 --- a/cranelift/filetests/filetests/runtests/i128-rotate.clif +++ b/cranelift/filetests/filetests/runtests/i128-rotate.clif @@ -1,3 +1,4 @@ +test interpret test run set enable_llvm_abi_extensions=true target aarch64 diff --git a/cranelift/filetests/filetests/runtests/i128-shifts-small-types.clif b/cranelift/filetests/filetests/runtests/i128-shifts-small-types.clif index 847a1a9b1c..64fa59c441 100644 --- a/cranelift/filetests/filetests/runtests/i128-shifts-small-types.clif +++ b/cranelift/filetests/filetests/runtests/i128-shifts-small-types.clif @@ -1,3 +1,4 @@ +test interpret test run target aarch64 target s390x diff --git a/cranelift/filetests/filetests/runtests/i128-shifts.clif b/cranelift/filetests/filetests/runtests/i128-shifts.clif index 1c370e9c85..272c241f44 100644 --- a/cranelift/filetests/filetests/runtests/i128-shifts.clif +++ b/cranelift/filetests/filetests/runtests/i128-shifts.clif @@ -1,3 +1,4 @@ +test interpret test run set enable_llvm_abi_extensions=true target aarch64 diff --git a/cranelift/filetests/filetests/runtests/rotl.clif b/cranelift/filetests/filetests/runtests/rotl.clif new file mode 100644 index 0000000000..cdf8fde2f4 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/rotl.clif @@ -0,0 +1,243 @@ +test interpret +test run +target aarch64 +target x86_64 +target s390x + + +function %rotl_i64_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = rotl.i64 v0, v1 + return v2 +} +; run: %rotl_i64_i64(0xe0000000_00000000, 0) == 0xe0000000_00000000 +; run: %rotl_i64_i64(0xe0000000_00000000, 1) == 0xc0000000_00000001 +; run: %rotl_i64_i64(0xe000000f_0000000f, 0) == 0xe000000f_0000000f +; run: %rotl_i64_i64(0xe000000f_0000000f, 4) == 0x000000f0_000000fe +; run: %rotl_i64_i64(0xe0000000_00000004, 64) == 0xe0000000_00000004 +; run: %rotl_i64_i64(0xe0000000_00000004, 65) == 0xc0000000_00000009 +; run: %rotl_i64_i64(0xe0000000_00000004, 66) == 0x80000000_00000013 +; run: %rotl_i64_i64(0xe0000000_00000004, 257) == 0xc0000000_00000009 + +function %rotl_i64_i32(i64, i32) -> i64 { +block0(v0: i64, v1: i32): + v2 = rotl.i64 v0, v1 + return v2 +} +; run: %rotl_i64_i32(0xe0000000_00000000, 0) == 0xe0000000_00000000 +; run: %rotl_i64_i32(0xe0000000_00000000, 1) == 0xc0000000_00000001 +; run: %rotl_i64_i32(0xe000000f_0000000f, 0) == 0xe000000f_0000000f +; run: %rotl_i64_i32(0xe000000f_0000000f, 4) == 0x000000f0_000000fe +; run: %rotl_i64_i32(0xe0000000_00000004, 64) == 0xe0000000_00000004 +; run: %rotl_i64_i32(0xe0000000_00000004, 65) == 0xc0000000_00000009 +; run: %rotl_i64_i32(0xe0000000_00000004, 66) == 0x80000000_00000013 +; run: %rotl_i64_i32(0xe0000000_00000004, 257) == 0xc0000000_00000009 + +function %rotl_i64_i16(i64, i16) -> i64 { +block0(v0: i64, v1: i16): + v2 = rotl.i64 v0, v1 + return v2 +} +; run: %rotl_i64_i16(0xe0000000_00000000, 0) == 0xe0000000_00000000 +; run: %rotl_i64_i16(0xe0000000_00000000, 1) == 0xc0000000_00000001 +; run: %rotl_i64_i16(0xe000000f_0000000f, 0) == 0xe000000f_0000000f +; run: %rotl_i64_i16(0xe000000f_0000000f, 4) == 0x000000f0_000000fe +; run: %rotl_i64_i16(0xe0000000_00000004, 64) == 0xe0000000_00000004 +; run: %rotl_i64_i16(0xe0000000_00000004, 65) == 0xc0000000_00000009 +; run: %rotl_i64_i16(0xe0000000_00000004, 66) == 0x80000000_00000013 +; run: %rotl_i64_i16(0xe0000000_00000004, 257) == 0xc0000000_00000009 + +function %rotl_i64_i8(i64, i8) -> i64 { +block0(v0: i64, v1: i8): + v2 = rotl.i64 v0, v1 + return v2 +} +; run: %rotl_i64_i8(0xe0000000_00000000, 0) == 0xe0000000_00000000 +; run: %rotl_i64_i8(0xe0000000_00000000, 1) == 0xc0000000_00000001 +; run: %rotl_i64_i8(0xe000000f_0000000f, 0) == 0xe000000f_0000000f +; run: %rotl_i64_i8(0xe000000f_0000000f, 4) == 0x000000f0_000000fe +; run: %rotl_i64_i8(0xe0000000_00000004, 64) == 0xe0000000_00000004 +; run: %rotl_i64_i8(0xe0000000_00000004, 65) == 0xc0000000_00000009 +; run: %rotl_i64_i8(0xe0000000_00000004, 66) == 0x80000000_00000013 + + +function %rotl_i32_i64(i32, i64) -> i32 { +block0(v0: i32, v1: i64): + v2 = rotl.i32 v0, v1 + return v2 +} +; run: %rotl_i32_i64(0xe0000000, 0) == 0xe0000000 +; run: %rotl_i32_i64(0xe0000000, 1) == 0xc0000001 +; run: %rotl_i32_i64(0xe00f000f, 0) == 0xe00f000f +; run: %rotl_i32_i64(0xe00f000f, 4) == 0x00f000fe +; run: %rotl_i32_i64(0xe0000004, 64) == 0xe0000004 +; run: %rotl_i32_i64(0xe0000004, 65) == 0xc0000009 +; run: %rotl_i32_i64(0xe0000004, 66) == 0x80000013 +; run: %rotl_i32_i64(0xe0000004, 257) == 0xc0000009 + +function %rotl_i32_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = rotl.i32 v0, v1 + return v2 +} +; run: %rotl_i32_i32(0xe0000000, 0) == 0xe0000000 +; run: %rotl_i32_i32(0xe0000000, 1) == 0xc0000001 +; run: %rotl_i32_i32(0xe00f000f, 0) == 0xe00f000f +; run: %rotl_i32_i32(0xe00f000f, 4) == 0x00f000fe +; run: %rotl_i32_i32(0xe0000004, 64) == 0xe0000004 +; run: %rotl_i32_i32(0xe0000004, 65) == 0xc0000009 +; run: %rotl_i32_i32(0xe0000004, 66) == 0x80000013 +; run: %rotl_i32_i32(0xe0000004, 257) == 0xc0000009 + +function %rotl_i32_i16(i32, i16) -> i32 { +block0(v0: i32, v1: i16): + v2 = rotl.i32 v0, v1 + return v2 +} +; run: %rotl_i32_i16(0xe0000000, 0) == 0xe0000000 +; run: %rotl_i32_i16(0xe0000000, 1) == 0xc0000001 +; run: %rotl_i32_i16(0xe00f000f, 0) == 0xe00f000f +; run: %rotl_i32_i16(0xe00f000f, 4) == 0x00f000fe +; run: %rotl_i32_i16(0xe0000004, 64) == 0xe0000004 +; run: %rotl_i32_i16(0xe0000004, 65) == 0xc0000009 +; run: %rotl_i32_i16(0xe0000004, 66) == 0x80000013 +; run: %rotl_i32_i16(0xe0000004, 257) == 0xc0000009 + +function %rotl_i32_i8(i32, i8) -> i32 { +block0(v0: i32, v1: i8): + v2 = rotl.i32 v0, v1 + return v2 +} +; run: %rotl_i32_i8(0xe0000000, 0) == 0xe0000000 +; run: %rotl_i32_i8(0xe0000000, 1) == 0xc0000001 +; run: %rotl_i32_i8(0xe00f000f, 0) == 0xe00f000f +; run: %rotl_i32_i8(0xe00f000f, 4) == 0x00f000fe +; run: %rotl_i32_i8(0xe0000004, 64) == 0xe0000004 +; run: %rotl_i32_i8(0xe0000004, 65) == 0xc0000009 +; run: %rotl_i32_i8(0xe0000004, 66) == 0x80000013 + + +function %rotl_i16_i64(i16, i64) -> i16 { +block0(v0: i16, v1: i64): + v2 = rotl.i16 v0, v1 + return v2 +} +; run: %rotl_i16_i64(0xe000, 0) == 0xe000 +; run: %rotl_i16_i64(0xe000, 1) == 0xc001 +; run: %rotl_i16_i64(0xef0f, 0) == 0xef0f +; run: %rotl_i16_i64(0xef0f, 4) == 0xf0fe +; run: %rotl_i16_i64(0xe004, 64) == 0xe004 +; run: %rotl_i16_i64(0xe004, 65) == 0xc009 +; run: %rotl_i16_i64(0xe004, 66) == 0x8013 +; run: %rotl_i16_i64(0xe004, 257) == 0xc009 + +function %rotl_i16_i32(i16, i32) -> i16 { +block0(v0: i16, v1: i32): + v2 = rotl.i16 v0, v1 + return v2 +} +; run: %rotl_i16_i32(0xe000, 0) == 0xe000 +; run: %rotl_i16_i32(0xe000, 1) == 0xc001 +; run: %rotl_i16_i32(0xef0f, 0) == 0xef0f +; run: %rotl_i16_i32(0xef0f, 4) == 0xf0fe +; run: %rotl_i16_i32(0xe004, 64) == 0xe004 +; run: %rotl_i16_i32(0xe004, 65) == 0xc009 +; run: %rotl_i16_i32(0xe004, 66) == 0x8013 +; run: %rotl_i16_i32(0xe004, 257) == 0xc009 + +function %rotl_i16_i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = rotl.i16 v0, v1 + return v2 +} +; run: %rotl_i16_i16(0xe000, 0) == 0xe000 +; run: %rotl_i16_i16(0xe000, 1) == 0xc001 +; run: %rotl_i16_i16(0xef0f, 0) == 0xef0f +; run: %rotl_i16_i16(0xef0f, 4) == 0xf0fe +; run: %rotl_i16_i16(0xe004, 64) == 0xe004 +; run: %rotl_i16_i16(0xe004, 65) == 0xc009 +; run: %rotl_i16_i16(0xe004, 66) == 0x8013 +; run: %rotl_i16_i16(0xe004, 257) == 0xc009 + +function %rotl_i16_i8(i16, i8) -> i16 { +block0(v0: i16, v1: i8): + v2 = rotl.i16 v0, v1 + return v2 +} +; run: %rotl_i16_i8(0xe000, 0) == 0xe000 +; run: %rotl_i16_i8(0xe000, 1) == 0xc001 +; run: %rotl_i16_i8(0xef0f, 0) == 0xef0f +; run: %rotl_i16_i8(0xef0f, 4) == 0xf0fe +; run: %rotl_i16_i8(0xe004, 64) == 0xe004 +; run: %rotl_i16_i8(0xe004, 65) == 0xc009 +; run: %rotl_i16_i8(0xe004, 66) == 0x8013 + + +function %rotl_i8_i64(i8, i64) -> i8 { +block0(v0: i8, v1: i64): + v2 = rotl.i8 v0, v1 + return v2 +} +; run: %rotl_i8_i64(0xe0, 0) == 0xe0 +; run: %rotl_i8_i64(0xe0, 1) == 0xc1 +; run: %rotl_i8_i64(0xef, 0) == 0xef +; run: %rotl_i8_i64(0xef, 4) == 0xfe +; run: %rotl_i8_i64(0xe4, 64) == 0xe4 +; run: %rotl_i8_i64(0xe4, 65) == 0xc9 +; run: %rotl_i8_i64(0xe4, 66) == 0x93 +; run: %rotl_i8_i64(0xe4, 257) == 0xc9 + +function %rotl_i8_i32(i8, i32) -> i8 { +block0(v0: i8, v1: i32): + v2 = rotl.i8 v0, v1 + return v2 +} +; run: %rotl_i8_i32(0xe0, 0) == 0xe0 +; run: %rotl_i8_i32(0xe0, 1) == 0xc1 +; run: %rotl_i8_i32(0xef, 0) == 0xef +; run: %rotl_i8_i32(0xef, 4) == 0xfe +; run: %rotl_i8_i32(0xe4, 64) == 0xe4 +; run: %rotl_i8_i32(0xe4, 65) == 0xc9 +; run: %rotl_i8_i32(0xe4, 66) == 0x93 +; run: %rotl_i8_i32(0xe4, 257) == 0xc9 + +function %rotl_i8_i16(i8, i16) -> i8 { +block0(v0: i8, v1: i16): + v2 = rotl.i8 v0, v1 + return v2 +} +; run: %rotl_i8_i16(0xe0, 0) == 0xe0 +; run: %rotl_i8_i16(0xe0, 1) == 0xc1 +; run: %rotl_i8_i16(0xef, 0) == 0xef +; run: %rotl_i8_i16(0xef, 4) == 0xfe +; run: %rotl_i8_i16(0xe4, 64) == 0xe4 +; run: %rotl_i8_i16(0xe4, 65) == 0xc9 +; run: %rotl_i8_i16(0xe4, 66) == 0x93 +; run: %rotl_i8_i16(0xe4, 257) == 0xc9 + +function %rotl_i8_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = rotl.i8 v0, v1 + return v2 +} +; run: %rotl_i8_i8(0xe0, 0) == 0xe0 +; run: %rotl_i8_i8(0xe0, 1) == 0xc1 +; run: %rotl_i8_i8(0xef, 0) == 0xef +; run: %rotl_i8_i8(0xef, 4) == 0xfe +; run: %rotl_i8_i8(0xe4, 64) == 0xe4 +; run: %rotl_i8_i8(0xe4, 65) == 0xc9 +; run: %rotl_i8_i8(0xe4, 66) == 0x93 + + + +;; This is a regression test for rotates on x64 +;; See: https://github.com/bytecodealliance/wasmtime/pull/3610 +function %rotl_i8_const_37(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i8 37 + v2 = rotl.i8 v0, v1 + return v2 +} +; run: %rotl_i8_const_37(0x00) == 0x00 +; run: %rotl_i8_const_37(0x01) == 0x20 +; run: %rotl_i8_const_37(0x12) == 0x42 diff --git a/cranelift/filetests/filetests/runtests/rotr.clif b/cranelift/filetests/filetests/runtests/rotr.clif new file mode 100644 index 0000000000..5c0236f457 --- /dev/null +++ b/cranelift/filetests/filetests/runtests/rotr.clif @@ -0,0 +1,243 @@ +test interpret +test run +target aarch64 +target x86_64 +target s390x + + +function %rotr_i64_i64(i64, i64) -> i64 { +block0(v0: i64, v1: i64): + v2 = rotr.i64 v0, v1 + return v2 +} +; run: %rotr_i64_i64(0xe0000000_00000000, 0) == 0xe0000000_00000000 +; run: %rotr_i64_i64(0xe0000000_00000000, 1) == 0x70000000_00000000 +; run: %rotr_i64_i64(0xe000000f_0000000f, 0) == 0xe000000f_0000000f +; run: %rotr_i64_i64(0xe000000f_0000000f, 4) == 0xfe000000_f0000000 +; run: %rotr_i64_i64(0xe0000000_00000004, 64) == 0xe0000000_00000004 +; run: %rotr_i64_i64(0xe0000000_00000004, 65) == 0x70000000_00000002 +; run: %rotr_i64_i64(0xe0000000_00000004, 66) == 0x38000000_00000001 +; run: %rotr_i64_i64(0xe0000000_00000004, 257) == 0x70000000_00000002 + +function %rotr_i64_i32(i64, i32) -> i64 { +block0(v0: i64, v1: i32): + v2 = rotr.i64 v0, v1 + return v2 +} +; run: %rotr_i64_i32(0xe0000000_00000000, 0) == 0xe0000000_00000000 +; run: %rotr_i64_i32(0xe0000000_00000000, 1) == 0x70000000_00000000 +; run: %rotr_i64_i32(0xe000000f_0000000f, 0) == 0xe000000f_0000000f +; run: %rotr_i64_i32(0xe000000f_0000000f, 4) == 0xfe000000_f0000000 +; run: %rotr_i64_i32(0xe0000000_00000004, 64) == 0xe0000000_00000004 +; run: %rotr_i64_i32(0xe0000000_00000004, 65) == 0x70000000_00000002 +; run: %rotr_i64_i32(0xe0000000_00000004, 66) == 0x38000000_00000001 +; run: %rotr_i64_i32(0xe0000000_00000004, 257) == 0x70000000_00000002 + +function %rotr_i64_i16(i64, i16) -> i64 { +block0(v0: i64, v1: i16): + v2 = rotr.i64 v0, v1 + return v2 +} +; run: %rotr_i64_i16(0xe0000000_00000000, 0) == 0xe0000000_00000000 +; run: %rotr_i64_i16(0xe0000000_00000000, 1) == 0x70000000_00000000 +; run: %rotr_i64_i16(0xe000000f_0000000f, 0) == 0xe000000f_0000000f +; run: %rotr_i64_i16(0xe000000f_0000000f, 4) == 0xfe000000_f0000000 +; run: %rotr_i64_i16(0xe0000000_00000004, 64) == 0xe0000000_00000004 +; run: %rotr_i64_i16(0xe0000000_00000004, 65) == 0x70000000_00000002 +; run: %rotr_i64_i16(0xe0000000_00000004, 66) == 0x38000000_00000001 +; run: %rotr_i64_i16(0xe0000000_00000004, 257) == 0x70000000_00000002 + +function %rotr_i64_i8(i64, i8) -> i64 { +block0(v0: i64, v1: i8): + v2 = rotr.i64 v0, v1 + return v2 +} +; run: %rotr_i64_i8(0xe0000000_00000000, 0) == 0xe0000000_00000000 +; run: %rotr_i64_i8(0xe0000000_00000000, 1) == 0x70000000_00000000 +; run: %rotr_i64_i8(0xe000000f_0000000f, 0) == 0xe000000f_0000000f +; run: %rotr_i64_i8(0xe000000f_0000000f, 4) == 0xfe000000_f0000000 +; run: %rotr_i64_i8(0xe0000000_00000004, 64) == 0xe0000000_00000004 +; run: %rotr_i64_i8(0xe0000000_00000004, 65) == 0x70000000_00000002 +; run: %rotr_i64_i8(0xe0000000_00000004, 66) == 0x38000000_00000001 + + +function %rotr_i32_i64(i32, i64) -> i32 { +block0(v0: i32, v1: i64): + v2 = rotr.i32 v0, v1 + return v2 +} +; run: %rotr_i32_i64(0xe0000000, 0) == 0xe0000000 +; run: %rotr_i32_i64(0xe0000000, 1) == 0x70000000 +; run: %rotr_i32_i64(0xe00f000f, 0) == 0xe00f000f +; run: %rotr_i32_i64(0xe00f000f, 4) == 0xfe00f000 +; run: %rotr_i32_i64(0xe0000004, 64) == 0xe0000004 +; run: %rotr_i32_i64(0xe0000004, 65) == 0x70000002 +; run: %rotr_i32_i64(0xe0000004, 66) == 0x38000001 +; run: %rotr_i32_i64(0xe0000004, 257) == 0x70000002 + +function %rotr_i32_i32(i32, i32) -> i32 { +block0(v0: i32, v1: i32): + v2 = rotr.i32 v0, v1 + return v2 +} +; run: %rotr_i32_i32(0xe0000000, 0) == 0xe0000000 +; run: %rotr_i32_i32(0xe0000000, 1) == 0x70000000 +; run: %rotr_i32_i32(0xe00f000f, 0) == 0xe00f000f +; run: %rotr_i32_i32(0xe00f000f, 4) == 0xfe00f000 +; run: %rotr_i32_i32(0xe0000004, 64) == 0xe0000004 +; run: %rotr_i32_i32(0xe0000004, 65) == 0x70000002 +; run: %rotr_i32_i32(0xe0000004, 66) == 0x38000001 +; run: %rotr_i32_i32(0xe0000004, 257) == 0x70000002 + +function %rotr_i32_i16(i32, i16) -> i32 { +block0(v0: i32, v1: i16): + v2 = rotr.i32 v0, v1 + return v2 +} +; run: %rotr_i32_i16(0xe0000000, 0) == 0xe0000000 +; run: %rotr_i32_i16(0xe0000000, 1) == 0x70000000 +; run: %rotr_i32_i16(0xe00f000f, 0) == 0xe00f000f +; run: %rotr_i32_i16(0xe00f000f, 4) == 0xfe00f000 +; run: %rotr_i32_i16(0xe0000004, 64) == 0xe0000004 +; run: %rotr_i32_i16(0xe0000004, 65) == 0x70000002 +; run: %rotr_i32_i16(0xe0000004, 66) == 0x38000001 +; run: %rotr_i32_i16(0xe0000004, 257) == 0x70000002 + +function %rotr_i32_i8(i32, i8) -> i32 { +block0(v0: i32, v1: i8): + v2 = rotr.i32 v0, v1 + return v2 +} +; run: %rotr_i32_i8(0xe0000000, 0) == 0xe0000000 +; run: %rotr_i32_i8(0xe0000000, 1) == 0x70000000 +; run: %rotr_i32_i8(0xe00f000f, 0) == 0xe00f000f +; run: %rotr_i32_i8(0xe00f000f, 4) == 0xfe00f000 +; run: %rotr_i32_i8(0xe0000004, 64) == 0xe0000004 +; run: %rotr_i32_i8(0xe0000004, 65) == 0x70000002 +; run: %rotr_i32_i8(0xe0000004, 66) == 0x38000001 + + +function %rotr_i16_i64(i16, i64) -> i16 { +block0(v0: i16, v1: i64): + v2 = rotr.i16 v0, v1 + return v2 +} +; run: %rotr_i16_i64(0xe000, 0) == 0xe000 +; run: %rotr_i16_i64(0xe000, 1) == 0x7000 +; run: %rotr_i16_i64(0xef0f, 0) == 0xef0f +; run: %rotr_i16_i64(0xef0f, 4) == 0xfef0 +; run: %rotr_i16_i64(0xe004, 64) == 0xe004 +; run: %rotr_i16_i64(0xe004, 65) == 0x7002 +; run: %rotr_i16_i64(0xe004, 66) == 0x3801 +; run: %rotr_i16_i64(0xe004, 257) == 0x7002 + +function %rotr_i16_i32(i16, i32) -> i16 { +block0(v0: i16, v1: i32): + v2 = rotr.i16 v0, v1 + return v2 +} +; run: %rotr_i16_i32(0xe000, 0) == 0xe000 +; run: %rotr_i16_i32(0xe000, 1) == 0x7000 +; run: %rotr_i16_i32(0xef0f, 0) == 0xef0f +; run: %rotr_i16_i32(0xef0f, 4) == 0xfef0 +; run: %rotr_i16_i32(0xe004, 64) == 0xe004 +; run: %rotr_i16_i32(0xe004, 65) == 0x7002 +; run: %rotr_i16_i32(0xe004, 66) == 0x3801 +; run: %rotr_i16_i32(0xe004, 257) == 0x7002 + +function %rotr_i16_i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = rotr.i16 v0, v1 + return v2 +} +; run: %rotr_i16_i16(0xe000, 0) == 0xe000 +; run: %rotr_i16_i16(0xe000, 1) == 0x7000 +; run: %rotr_i16_i16(0xef0f, 0) == 0xef0f +; run: %rotr_i16_i16(0xef0f, 4) == 0xfef0 +; run: %rotr_i16_i16(0xe004, 64) == 0xe004 +; run: %rotr_i16_i16(0xe004, 65) == 0x7002 +; run: %rotr_i16_i16(0xe004, 66) == 0x3801 +; run: %rotr_i16_i16(0xe004, 257) == 0x7002 + +function %rotr_i16_i8(i16, i8) -> i16 { +block0(v0: i16, v1: i8): + v2 = rotr.i16 v0, v1 + return v2 +} +; run: %rotr_i16_i8(0xe000, 0) == 0xe000 +; run: %rotr_i16_i8(0xe000, 1) == 0x7000 +; run: %rotr_i16_i8(0xef0f, 0) == 0xef0f +; run: %rotr_i16_i8(0xef0f, 4) == 0xfef0 +; run: %rotr_i16_i8(0xe004, 64) == 0xe004 +; run: %rotr_i16_i8(0xe004, 65) == 0x7002 +; run: %rotr_i16_i8(0xe004, 66) == 0x3801 + + +function %rotr_i8_i64(i8, i64) -> i8 { +block0(v0: i8, v1: i64): + v2 = rotr.i8 v0, v1 + return v2 +} +; run: %rotr_i8_i64(0xe0, 0) == 0xe0 +; run: %rotr_i8_i64(0xe0, 1) == 0x70 +; run: %rotr_i8_i64(0xef, 0) == 0xef +; run: %rotr_i8_i64(0xef, 4) == 0xfe +; run: %rotr_i8_i64(0xe0, 64) == 0xe0 +; run: %rotr_i8_i64(0xe0, 65) == 0x70 +; run: %rotr_i8_i64(0xe0, 66) == 0x38 +; run: %rotr_i8_i64(0xe0, 257) == 0x70 + +function %rotr_i8_i32(i8, i32) -> i8 { +block0(v0: i8, v1: i32): + v2 = rotr.i8 v0, v1 + return v2 +} +; run: %rotr_i8_i32(0xe0, 0) == 0xe0 +; run: %rotr_i8_i32(0xe0, 1) == 0x70 +; run: %rotr_i8_i32(0xef, 0) == 0xef +; run: %rotr_i8_i32(0xef, 4) == 0xfe +; run: %rotr_i8_i32(0xe0, 64) == 0xe0 +; run: %rotr_i8_i32(0xe0, 65) == 0x70 +; run: %rotr_i8_i32(0xe0, 66) == 0x38 +; run: %rotr_i8_i32(0xe0, 257) == 0x70 + +function %rotr_i8_i16(i8, i16) -> i8 { +block0(v0: i8, v1: i16): + v2 = rotr.i8 v0, v1 + return v2 +} +; run: %rotr_i8_i16(0xe0, 0) == 0xe0 +; run: %rotr_i8_i16(0xe0, 1) == 0x70 +; run: %rotr_i8_i16(0xef, 0) == 0xef +; run: %rotr_i8_i16(0xef, 4) == 0xfe +; run: %rotr_i8_i16(0xe0, 64) == 0xe0 +; run: %rotr_i8_i16(0xe0, 65) == 0x70 +; run: %rotr_i8_i16(0xe0, 66) == 0x38 +; run: %rotr_i8_i16(0xe0, 257) == 0x70 + +function %rotr_i8_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = rotr.i8 v0, v1 + return v2 +} +; run: %rotr_i8_i8(0xe0, 0) == 0xe0 +; run: %rotr_i8_i8(0xe0, 1) == 0x70 +; run: %rotr_i8_i8(0xef, 0) == 0xef +; run: %rotr_i8_i8(0xef, 4) == 0xfe +; run: %rotr_i8_i8(0xe0, 64) == 0xe0 +; run: %rotr_i8_i8(0xe0, 65) == 0x70 +; run: %rotr_i8_i8(0xe0, 66) == 0x38 + + + +;; This is a regression test for rotates on x64 +;; See: https://github.com/bytecodealliance/wasmtime/pull/3610 +function %rotr_i8_const_37(i8) -> i8 { +block0(v0: i8): + v1 = iconst.i8 37 + v2 = rotr.i8 v0, v1 + return v2 +} +; run: %rotr_i8_const_37(0x00) == 0x00 +; run: %rotr_i8_const_37(0x01) == 0x08 +; run: %rotr_i8_const_37(0x12) == 0x90 diff --git a/cranelift/filetests/filetests/runtests/shifts.clif b/cranelift/filetests/filetests/runtests/shifts.clif index 5f66d56191..31fad8da36 100644 --- a/cranelift/filetests/filetests/runtests/shifts.clif +++ b/cranelift/filetests/filetests/runtests/shifts.clif @@ -1,3 +1,4 @@ +test interpret test run target aarch64 target x86_64 @@ -110,6 +111,113 @@ block0(v0: i32, v1: i8): ; run: %ishl_i32_i8(0x00000004, 34) == 0x00000010 +function %ishl_i16_i64(i16, i64) -> i16 { +block0(v0: i16, v1: i64): + v2 = ishl.i16 v0, v1 + return v2 +} +; run: %ishl_i16_i64(0x0000, 0) == 0x0000 +; run: %ishl_i16_i64(0x0000, 1) == 0x0000 +; run: %ishl_i16_i64(0x000f, 0) == 0x000f +; run: %ishl_i16_i64(0x000f, 4) == 0x00f0 +; run: %ishl_i16_i64(0x0004, 32) == 0x0004 +; run: %ishl_i16_i64(0x0004, 33) == 0x0008 +; run: %ishl_i16_i64(0x0004, 34) == 0x0010 + +function %ishl_i16_i32(i16, i32) -> i16 { +block0(v0: i16, v1: i32): + v2 = ishl.i16 v0, v1 + return v2 +} +; run: %ishl_i16_i32(0x0000, 0) == 0x0000 +; run: %ishl_i16_i32(0x0000, 1) == 0x0000 +; run: %ishl_i16_i32(0x000f, 0) == 0x000f +; run: %ishl_i16_i32(0x000f, 4) == 0x00f0 +; run: %ishl_i16_i32(0x0004, 32) == 0x0004 +; run: %ishl_i16_i32(0x0004, 33) == 0x0008 +; run: %ishl_i16_i32(0x0004, 34) == 0x0010 + +function %ishl_i16_i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = ishl.i16 v0, v1 + return v2 +} +; run: %ishl_i16_i16(0x0000, 0) == 0x0000 +; run: %ishl_i16_i16(0x0000, 1) == 0x0000 +; run: %ishl_i16_i16(0x000f, 0) == 0x000f +; run: %ishl_i16_i16(0x000f, 4) == 0x00f0 +; run: %ishl_i16_i16(0x0004, 32) == 0x0004 +; run: %ishl_i16_i16(0x0004, 33) == 0x0008 +; run: %ishl_i16_i16(0x0004, 34) == 0x0010 + +function %ishl_i16_i8(i16, i8) -> i16 { +block0(v0: i16, v1: i8): + v2 = ishl.i16 v0, v1 + return v2 +} +; run: %ishl_i16_i8(0x0000, 0) == 0x0000 +; run: %ishl_i16_i8(0x0000, 1) == 0x0000 +; run: %ishl_i16_i8(0x000f, 0) == 0x000f +; run: %ishl_i16_i8(0x000f, 4) == 0x00f0 +; run: %ishl_i16_i8(0x0004, 32) == 0x0004 +; run: %ishl_i16_i8(0x0004, 33) == 0x0008 +; run: %ishl_i16_i8(0x0004, 34) == 0x0010 + + +function %ishl_i8_i64(i8, i64) -> i8 { +block0(v0: i8, v1: i64): + v2 = ishl.i8 v0, v1 + return v2 +} +; run: %ishl_i8_i64(0x00, 0) == 0x00 +; run: %ishl_i8_i64(0x00, 1) == 0x00 +; run: %ishl_i8_i64(0x0f, 0) == 0x0f +; run: %ishl_i8_i64(0x0f, 4) == 0xf0 +; run: %ishl_i8_i64(0x04, 32) == 0x04 +; run: %ishl_i8_i64(0x04, 33) == 0x08 +; run: %ishl_i8_i64(0x04, 34) == 0x10 + +function %ishl_i8_i32(i8, i32) -> i8 { +block0(v0: i8, v1: i32): + v2 = ishl.i8 v0, v1 + return v2 +} +; run: %ishl_i8_i32(0x00, 0) == 0x00 +; run: %ishl_i8_i32(0x00, 1) == 0x00 +; run: %ishl_i8_i32(0x0f, 0) == 0x0f +; run: %ishl_i8_i32(0x0f, 4) == 0xf0 +; run: %ishl_i8_i32(0x04, 32) == 0x04 +; run: %ishl_i8_i32(0x04, 33) == 0x08 +; run: %ishl_i8_i32(0x04, 34) == 0x10 + +function %ishl_i8_i16(i8, i16) -> i8 { +block0(v0: i8, v1: i16): + v2 = ishl.i8 v0, v1 + return v2 +} +; run: %ishl_i8_i16(0x00, 0) == 0x00 +; run: %ishl_i8_i16(0x00, 1) == 0x00 +; run: %ishl_i8_i16(0x0f, 0) == 0x0f +; run: %ishl_i8_i16(0x0f, 4) == 0xf0 +; run: %ishl_i8_i16(0x04, 32) == 0x04 +; run: %ishl_i8_i16(0x04, 33) == 0x08 +; run: %ishl_i8_i16(0x04, 34) == 0x10 + +function %ishl_i8_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = ishl.i8 v0, v1 + return v2 +} +; run: %ishl_i8_i8(0x00, 0) == 0x00 +; run: %ishl_i8_i8(0x00, 1) == 0x00 +; run: %ishl_i8_i8(0x0f, 0) == 0x0f +; run: %ishl_i8_i8(0x0f, 4) == 0xf0 +; run: %ishl_i8_i8(0x04, 32) == 0x04 +; run: %ishl_i8_i8(0x04, 33) == 0x08 +; run: %ishl_i8_i8(0x04, 34) == 0x10 + + + function %ushr_i64_i64(i64, i64) -> i64 { block0(v0: i64, v1: i64): v2 = ushr.i64 v0, v1 @@ -215,6 +323,113 @@ block0(v0: i32, v1: i8): ; run: %ushr_i32_i8(0x40000000, 34) == 0x10000000 +function %ushr_i16_i64(i16, i64) -> i16 { +block0(v0: i16, v1: i64): + v2 = ushr.i16 v0, v1 + return v2 +} +; run: %ushr_i16_i64(0x1000, 0) == 0x1000 +; run: %ushr_i16_i64(0x1000, 1) == 0x0800 +; run: %ushr_i16_i64(0xf000, 0) == 0xf000 +; run: %ushr_i16_i64(0xf000, 4) == 0x0f00 +; run: %ushr_i16_i64(0x4000, 32) == 0x4000 +; run: %ushr_i16_i64(0x4000, 33) == 0x2000 +; run: %ushr_i16_i64(0x4000, 34) == 0x1000 + +function %ushr_i16_i32(i16, i32) -> i16 { +block0(v0: i16, v1: i32): + v2 = ushr.i16 v0, v1 + return v2 +} +; run: %ushr_i16_i32(0x1000, 0) == 0x1000 +; run: %ushr_i16_i32(0x1000, 1) == 0x0800 +; run: %ushr_i16_i32(0xf000, 0) == 0xf000 +; run: %ushr_i16_i32(0xf000, 4) == 0x0f00 +; run: %ushr_i16_i32(0x4000, 32) == 0x4000 +; run: %ushr_i16_i32(0x4000, 33) == 0x2000 +; run: %ushr_i16_i32(0x4000, 34) == 0x1000 + +function %ushr_i16_i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = ushr.i16 v0, v1 + return v2 +} +; run: %ushr_i16_i16(0x1000, 0) == 0x1000 +; run: %ushr_i16_i16(0x1000, 1) == 0x0800 +; run: %ushr_i16_i16(0xf000, 0) == 0xf000 +; run: %ushr_i16_i16(0xf000, 4) == 0x0f00 +; run: %ushr_i16_i16(0x4000, 32) == 0x4000 +; run: %ushr_i16_i16(0x4000, 33) == 0x2000 +; run: %ushr_i16_i16(0x4000, 34) == 0x1000 + +function %ushr_i16_i8(i16, i8) -> i16 { +block0(v0: i16, v1: i8): + v2 = ushr.i16 v0, v1 + return v2 +} +; run: %ushr_i16_i8(0x1000, 0) == 0x1000 +; run: %ushr_i16_i8(0x1000, 1) == 0x0800 +; run: %ushr_i16_i8(0xf000, 0) == 0xf000 +; run: %ushr_i16_i8(0xf000, 4) == 0x0f00 +; run: %ushr_i16_i8(0x4000, 32) == 0x4000 +; run: %ushr_i16_i8(0x4000, 33) == 0x2000 +; run: %ushr_i16_i8(0x4000, 34) == 0x1000 + + +function %ushr_i8_i64(i8, i64) -> i8 { +block0(v0: i8, v1: i64): + v2 = ushr.i8 v0, v1 + return v2 +} +; run: %ushr_i8_i64(0x10, 0) == 0x10 +; run: %ushr_i8_i64(0x10, 1) == 0x08 +; run: %ushr_i8_i64(0xf0, 0) == 0xf0 +; run: %ushr_i8_i64(0xf0, 4) == 0x0f +; run: %ushr_i8_i64(0x40, 32) == 0x40 +; run: %ushr_i8_i64(0x40, 33) == 0x20 +; run: %ushr_i8_i64(0x40, 34) == 0x10 + +function %ushr_i8_i32(i8, i32) -> i8 { +block0(v0: i8, v1: i32): + v2 = ushr.i8 v0, v1 + return v2 +} +; run: %ushr_i8_i32(0x10, 0) == 0x10 +; run: %ushr_i8_i32(0x10, 1) == 0x08 +; run: %ushr_i8_i32(0xf0, 0) == 0xf0 +; run: %ushr_i8_i32(0xf0, 4) == 0x0f +; run: %ushr_i8_i32(0x40, 32) == 0x40 +; run: %ushr_i8_i32(0x40, 33) == 0x20 +; run: %ushr_i8_i32(0x40, 34) == 0x10 + +function %ushr_i8_i16(i8, i16) -> i8 { +block0(v0: i8, v1: i16): + v2 = ushr.i8 v0, v1 + return v2 +} +; run: %ushr_i8_i16(0x10, 0) == 0x10 +; run: %ushr_i8_i16(0x10, 1) == 0x08 +; run: %ushr_i8_i16(0xf0, 0) == 0xf0 +; run: %ushr_i8_i16(0xf0, 4) == 0x0f +; run: %ushr_i8_i16(0x40, 32) == 0x40 +; run: %ushr_i8_i16(0x40, 33) == 0x20 +; run: %ushr_i8_i16(0x40, 34) == 0x10 + +function %ushr_i8_i8(i8, i8) -> i8 { +block0(v0: i8, v1: i8): + v2 = ushr.i8 v0, v1 + return v2 +} +; run: %ushr_i8_i8(0x10, 0) == 0x10 +; run: %ushr_i8_i8(0x10, 1) == 0x08 +; run: %ushr_i8_i8(0xf0, 0) == 0xf0 +; run: %ushr_i8_i8(0xf0, 4) == 0x0f +; run: %ushr_i8_i8(0x40, 32) == 0x40 +; run: %ushr_i8_i8(0x40, 33) == 0x20 +; run: %ushr_i8_i8(0x40, 34) == 0x10 + + + function %sshr_i64_i64(i64, i64) -> i64 { block0(v0: i64, v1: i64): v2 = sshr.i64 v0, v1 @@ -319,24 +534,108 @@ block0(v0: i32, v1: i8): ; run: %sshr_i32_i8(0x40000000, 33) == 0x20000000 ; run: %sshr_i32_i8(0x40000000, 34) == 0x10000000 -function %rotl_i8_const_37(i8) -> i8 { -block0(v0: i8): - v1 = iconst.i8 37 - v2 = rotl.i8 v0, v1 - return v2 + +function %sshr_i16_i64(i16, i64) -> i16 { +block0(v0: i16, v1: i64): + v2 = sshr.i16 v0, v1 + return v2 } +; run: %sshr_i16_i64(0x8000, 0) == 0x8000 +; run: %sshr_i16_i64(0x8000, 1) == 0xC000 +; run: %sshr_i16_i64(0xf000, 0) == 0xf000 +; run: %sshr_i16_i64(0xf000, 4) == 0xff00 +; run: %sshr_i16_i64(0x4000, 32) == 0x4000 +; run: %sshr_i16_i64(0x4000, 33) == 0x2000 +; run: %sshr_i16_i64(0x4000, 34) == 0x1000 -; run: %rotl_i8_const_37(0x00) == 0x00 -; run: %rotl_i8_const_37(0x01) == 0x20 -; run: %rotl_i8_const_37(0x12) == 0x42 - -function %rotr_i8_const_37(i8) -> i8 { -block0(v0: i8): - v1 = iconst.i8 37 - v2 = rotr.i8 v0, v1 - return v2 +function %sshr_i16_i32(i16, i32) -> i16 { +block0(v0: i16, v1: i32): + v2 = sshr.i16 v0, v1 + return v2 } +; run: %sshr_i16_i32(0x8000, 0) == 0x8000 +; run: %sshr_i16_i32(0x8000, 1) == 0xC000 +; run: %sshr_i16_i32(0xf000, 0) == 0xf000 +; run: %sshr_i16_i32(0xf000, 4) == 0xff00 +; run: %sshr_i16_i32(0x4000, 32) == 0x4000 +; run: %sshr_i16_i32(0x4000, 33) == 0x2000 +; run: %sshr_i16_i32(0x4000, 34) == 0x1000 -; run: %rotr_i8_const_37(0x00) == 0x00 -; run: %rotr_i8_const_37(0x01) == 0x08 -; run: %rotr_i8_const_37(0x12) == 0x90 +function %sshr_i16_i16(i16, i16) -> i16 { +block0(v0: i16, v1: i16): + v2 = sshr.i16 v0, v1 + return v2 +} +; run: %sshr_i16_i16(0x8000, 0) == 0x8000 +; run: %sshr_i16_i16(0x8000, 1) == 0xC000 +; run: %sshr_i16_i16(0xf000, 0) == 0xf000 +; run: %sshr_i16_i16(0xf000, 4) == 0xff00 +; run: %sshr_i16_i16(0x4000, 32) == 0x4000 +; run: %sshr_i16_i16(0x4000, 33) == 0x2000 +; run: %sshr_i16_i16(0x4000, 34) == 0x1000 + +function %sshr_i16_i8(i16, i8) -> i16 { +block0(v0: i16, v1: i8): + v2 = sshr.i16 v0, v1 + return v2 +} +; run: %sshr_i16_i8(0x8000, 0) == 0x8000 +; run: %sshr_i16_i8(0x8000, 1) == 0xC000 +; run: %sshr_i16_i8(0xf000, 0) == 0xf000 +; run: %sshr_i16_i8(0xf000, 4) == 0xff00 +; run: %sshr_i16_i8(0x4000, 32) == 0x4000 +; run: %sshr_i16_i8(0x4000, 33) == 0x2000 +; run: %sshr_i16_i8(0x4000, 34) == 0x1000 + + +function %sshr_i8_i64(i8, i64) -> i8 { +block0(v0: i8, v1: i64): + v2 = sshr.i8 v0, v1 + return v2 +} +; run: %sshr_i8_i64(0x80, 0) == 0x80 +; run: %sshr_i8_i64(0x80, 1) == 0xC0 +; run: %sshr_i8_i64(0xf0, 0) == 0xf0 +; run: %sshr_i8_i64(0xf0, 4) == 0xff +; run: %sshr_i8_i64(0x40, 32) == 0x40 +; run: %sshr_i8_i64(0x40, 33) == 0x20 +; run: %sshr_i8_i64(0x40, 34) == 0x10 + +function %sshr_i8_i32(i8, i32) -> i8 { +block0(v0: i8, v1: i32): + v2 = sshr.i8 v0, v1 + return v2 +} +; run: %sshr_i8_i32(0x80, 0) == 0x80 +; run: %sshr_i8_i32(0x80, 1) == 0xC0 +; run: %sshr_i8_i32(0xf0, 0) == 0xf0 +; run: %sshr_i8_i32(0xf0, 4) == 0xff +; run: %sshr_i8_i32(0x40, 32) == 0x40 +; run: %sshr_i8_i32(0x40, 33) == 0x20 +; run: %sshr_i8_i32(0x40, 34) == 0x10 + +function %sshr_i8_i16(i8, i16) -> i8 { +block0(v0: i8, v1: i16): + v2 = sshr.i8 v0, v1 + return v2 +} +; run: %sshr_i8_i16(0x80, 0) == 0x80 +; run: %sshr_i8_i16(0x80, 1) == 0xC0 +; run: %sshr_i8_i16(0xf0, 0) == 0xf0 +; run: %sshr_i8_i16(0xf0, 4) == 0xff +; run: %sshr_i8_i16(0x40, 32) == 0x40 +; run: %sshr_i8_i16(0x40, 33) == 0x20 +; run: %sshr_i8_i16(0x40, 34) == 0x10 + +function %sshr_i8_i8(i16, i8) -> i16 { +block0(v0: i16, v1: i8): + v2 = sshr.i16 v0, v1 + return v2 +} +; run: %sshr_i8_i8(0x80, 0) == 0x80 +; run: %sshr_i8_i8(0x80, 1) == 0x40 +; run: %sshr_i8_i8(0xf0, 0) == 0xf0 +; run: %sshr_i8_i8(0xf0, 4) == 0x0f +; run: %sshr_i8_i8(0x40, 32) == 0x40 +; run: %sshr_i8_i8(0x40, 33) == 0x20 +; run: %sshr_i8_i8(0x40, 34) == 0x10 diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs index a578fdfe5f..87c86409de 100644 --- a/cranelift/interpreter/src/step.rs +++ b/cranelift/interpreter/src/step.rs @@ -153,6 +153,20 @@ where right: V| -> ValueResult> { Ok(assign(op(left, right)?)) }; + // Same as `binary_unsigned`, but converts the values to their unsigned form before the + // operation and back to signed form afterwards. Since Cranelift types have no notion of + // signedness, this enables operations that depend on sign. + let binary_unsigned = + |op: fn(V, V) -> ValueResult, left: V, right: V| -> ValueResult> { + Ok(assign( + op( + left.convert(ValueConversionKind::ToUnsigned)?, + right.convert(ValueConversionKind::ToUnsigned)?, + ) + .and_then(|v| v.convert(ValueConversionKind::ToSigned))?, + )) + }; + // Similar to `binary` but converts select `ValueError`'s into trap `ControlFlow`'s let binary_can_trap = |op: fn(V, V) -> ValueResult, left: V, @@ -690,10 +704,10 @@ where Opcode::RotlImm => binary(Value::rotl, arg(0)?, imm_as_ctrl_ty()?)?, Opcode::RotrImm => binary(Value::rotr, arg(0)?, imm_as_ctrl_ty()?)?, Opcode::Ishl => binary(Value::shl, arg(0)?, arg(1)?)?, - Opcode::Ushr => binary(Value::ushr, arg(0)?, arg(1)?)?, + Opcode::Ushr => binary_unsigned(Value::ushr, arg(0)?, arg(1)?)?, Opcode::Sshr => binary(Value::ishr, arg(0)?, arg(1)?)?, Opcode::IshlImm => binary(Value::shl, arg(0)?, imm_as_ctrl_ty()?)?, - Opcode::UshrImm => binary(Value::ushr, arg(0)?, imm_as_ctrl_ty()?)?, + Opcode::UshrImm => binary_unsigned(Value::ushr, arg(0)?, imm_as_ctrl_ty()?)?, Opcode::SshrImm => binary(Value::ishr, arg(0)?, imm_as_ctrl_ty()?)?, Opcode::Bitrev => assign(Value::reverse_bits(arg(0)?)?), Opcode::Clz => assign(arg(0)?.leading_zeros()?), diff --git a/cranelift/interpreter/src/value.rs b/cranelift/interpreter/src/value.rs index 94d4a11bc9..01974b357f 100644 --- a/cranelift/interpreter/src/value.rs +++ b/cranelift/interpreter/src/value.rs @@ -191,12 +191,19 @@ macro_rules! binary_match { _ => unimplemented!() } }; - ( $op:tt($arg1:expr, $arg2:expr); unsigned integers ) => { + ( $op:tt($arg1:expr, $arg2:expr); [ $( $data_value_ty:ident ),* ]; rhs: $rhs:tt ) => { match ($arg1, $arg2) { - (DataValue::I8(a), DataValue::I8(b)) => { Ok(DataValue::I8((u8::try_from(*a)? $op u8::try_from(*b)?) as i8)) } - (DataValue::I16(a), DataValue::I16(b)) => { Ok(DataValue::I16((u16::try_from(*a)? $op u16::try_from(*b)?) as i16)) } - (DataValue::I32(a), DataValue::I32(b)) => { Ok(DataValue::I32((u32::try_from(*a)? $op u32::try_from(*b)?) as i32)) } - (DataValue::I64(a), DataValue::I64(b)) => { Ok(DataValue::I64((u64::try_from(*a)? $op u64::try_from(*b)?) as i64)) } + $( (DataValue::$data_value_ty(a), DataValue::$rhs(b)) => { Ok(DataValue::$data_value_ty(a.$op(*b))) } )* + _ => unimplemented!() + } + }; + ( $op:ident($arg1:expr, $arg2:expr); unsigned integers ) => { + match ($arg1, $arg2) { + (DataValue::I8(a), DataValue::I8(b)) => { Ok(DataValue::I8((u8::try_from(*a)?.$op(u8::try_from(*b)?) as i8))) } + (DataValue::I16(a), DataValue::I16(b)) => { Ok(DataValue::I16((u16::try_from(*a)?.$op(u16::try_from(*b)?) as i16))) } + (DataValue::I32(a), DataValue::I32(b)) => { Ok(DataValue::I32((u32::try_from(*a)?.$op(u32::try_from(*b)?) as i32))) } + (DataValue::I64(a), DataValue::I64(b)) => { Ok(DataValue::I64((u64::try_from(*a)?.$op(u64::try_from(*b)?) as i64))) } + (DataValue::I128(a), DataValue::I128(b)) => { Ok(DataValue::I128((u128::try_from(*a)?.$op(u128::try_from(*b)?) as i64))) } _ => { Err(ValueError::InvalidType(ValueTypeClass::Integer, if !($arg1).ty().is_int() { ($arg1).ty() } else { ($arg2).ty() })) } } }; @@ -306,7 +313,9 @@ impl Value for DataValue { Ok(match kind { ValueConversionKind::Exact(ty) => match (self, ty) { // TODO a lot to do here: from bmask to ireduce to raw_bitcast... - (DataValue::I64(n), ty) if ty.is_int() => DataValue::from_integer(n as i128, ty)?, + (val, ty) if val.ty().is_int() && ty.is_int() => { + DataValue::from_integer(val.into_int()?, ty)? + } (DataValue::F32(n), types::I32) => DataValue::I32(n.bits() as i32), (DataValue::F64(n), types::I64) => DataValue::I64(n.bits() as i64), (DataValue::B(b), t) if t.is_bool() => DataValue::B(b), @@ -623,23 +632,38 @@ impl Value for DataValue { } fn shl(self, other: Self) -> ValueResult { - binary_match!(<<(&self, &other); [I8, I16, I32, I64]) + let amt = other + .convert(ValueConversionKind::Exact(types::I32))? + .convert(ValueConversionKind::ToUnsigned)?; + binary_match!(wrapping_shl(&self, &amt); [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128]; rhs: U32) } fn ushr(self, other: Self) -> ValueResult { - binary_match!(>>(&self, &other); unsigned integers) + let amt = other + .convert(ValueConversionKind::Exact(types::I32))? + .convert(ValueConversionKind::ToUnsigned)?; + binary_match!(wrapping_shr(&self, &amt); [U8, U16, U32, U64, U128]; rhs: U32) } fn ishr(self, other: Self) -> ValueResult { - binary_match!(>>(&self, &other); [I8, I16, I32, I64]) + let amt = other + .convert(ValueConversionKind::Exact(types::I32))? + .convert(ValueConversionKind::ToUnsigned)?; + binary_match!(wrapping_shr(&self, &amt); [I8, I16, I32, I64, I128]; rhs: U32) } - fn rotl(self, _other: Self) -> ValueResult { - unimplemented!() + fn rotl(self, other: Self) -> ValueResult { + let amt = other + .convert(ValueConversionKind::Exact(types::I32))? + .convert(ValueConversionKind::ToUnsigned)?; + binary_match!(rotate_left(&self, &amt); [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128]; rhs: U32) } - fn rotr(self, _other: Self) -> ValueResult { - unimplemented!() + fn rotr(self, other: Self) -> ValueResult { + let amt = other + .convert(ValueConversionKind::Exact(types::I32))? + .convert(ValueConversionKind::ToUnsigned)?; + binary_match!(rotate_right(&self, &amt); [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128]; rhs: U32) } fn and(self, other: Self) -> ValueResult {