diff --git a/cranelift/codegen/src/isa/x64/lower.isle b/cranelift/codegen/src/isa/x64/lower.isle
index 01915b35a9..ccedd0a66d 100644
--- a/cranelift/codegen/src/isa/x64/lower.isle
+++ b/cranelift/codegen/src/isa/x64/lower.isle
@@ -818,31 +818,17 @@
 
 ;;;; Rules for `rotl` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-;; `i16` and `i8`: we need to extend the shift amount, or mask the
-;; constant.
+;; `i64` and smaller: we can rely on x86's rotate-amount masking since
+;;  we operate on the whole register. For const's we mask the constant.
 
-(rule (lower (has_type (ty_8_or_16 ty) (rotl src amt)))
-      (let ((amt_ Gpr (extend_to_gpr amt $I32 (ExtendKind.Zero))))
-        (x64_rotl ty src (gpr_to_imm8_gpr amt_))))
+(rule (lower (has_type (fits_in_64 ty) (rotl src amt)))
+        (x64_rotl ty src (put_masked_in_imm8_gpr amt ty)))
 
-(rule (lower (has_type (ty_8_or_16 ty)
+(rule (lower (has_type (fits_in_64 ty)
                        (rotl src (u64_from_iconst amt))))
       (x64_rotl ty src
                 (const_to_type_masked_imm8 amt ty)))
 
-;; `i64` and `i32`: we can rely on x86's rotate-amount masking since
-;;  we operate on the whole register.
-
-(rule (lower (has_type (ty_32_or_64 ty) (rotl src amt)))
-      ;; NB: Only the low bits of `amt` matter since we logically mask the
-      ;; shift amount to the value's bit width.
-      (let ((amt_ Gpr (lo_gpr amt)))
-        (x64_rotl ty src amt_)))
-
-(rule (lower (has_type (ty_32_or_64 ty)
-                       (rotl src (u64_from_iconst amt))))
-      (x64_rotl ty src
-                (const_to_type_masked_imm8 amt ty)))
 
 ;; `i128`.
 
@@ -858,31 +844,17 @@
 
 ;;;; Rules for `rotr` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-;; `i16` and `i8`: we need to extend the shift amount, or mask the
-;; constant.
+;; `i64` and smaller: we can rely on x86's rotate-amount masking since
+;;  we operate on the whole register. For const's we mask the constant.
 
-(rule (lower (has_type (ty_8_or_16 ty) (rotr src amt)))
-      (let ((amt_ Gpr (extend_to_gpr amt $I32 (ExtendKind.Zero))))
-        (x64_rotr ty src amt_)))
+(rule (lower (has_type (fits_in_64 ty) (rotr src amt)))
+        (x64_rotr ty src (put_masked_in_imm8_gpr amt ty)))
 
-(rule (lower (has_type (ty_8_or_16 ty)
+(rule (lower (has_type (fits_in_64 ty)
                        (rotr src (u64_from_iconst amt))))
       (x64_rotr ty src
                 (const_to_type_masked_imm8 amt ty)))
 
-;; `i64` and `i32`: we can rely on x86's rotate-amount masking since
-;;  we operate on the whole register.
-
-(rule (lower (has_type (ty_32_or_64 ty) (rotr src amt)))
-      ;; NB: Only the low bits of `amt` matter since we logically mask the
-      ;; shift amount to the value's bit width.
-      (let ((amt_ Gpr (lo_gpr amt)))
-        (x64_rotr ty src amt_)))
-
-(rule (lower (has_type (ty_32_or_64 ty)
-                       (rotr src (u64_from_iconst amt))))
-      (x64_rotr ty src
-                (const_to_type_masked_imm8 amt ty)))
 
 ;; `i128`.
 
diff --git a/cranelift/filetests/filetests/runtests/i128-rotate.clif b/cranelift/filetests/filetests/runtests/i128-rotate.clif
index dac4b567ad..429f29fd84 100644
--- a/cranelift/filetests/filetests/runtests/i128-rotate.clif
+++ b/cranelift/filetests/filetests/runtests/i128-rotate.clif
@@ -1,3 +1,4 @@
+test interpret
 test run
 set enable_llvm_abi_extensions=true
 target aarch64
diff --git a/cranelift/filetests/filetests/runtests/i128-shifts-small-types.clif b/cranelift/filetests/filetests/runtests/i128-shifts-small-types.clif
index 847a1a9b1c..64fa59c441 100644
--- a/cranelift/filetests/filetests/runtests/i128-shifts-small-types.clif
+++ b/cranelift/filetests/filetests/runtests/i128-shifts-small-types.clif
@@ -1,3 +1,4 @@
+test interpret
 test run
 target aarch64
 target s390x
diff --git a/cranelift/filetests/filetests/runtests/i128-shifts.clif b/cranelift/filetests/filetests/runtests/i128-shifts.clif
index 1c370e9c85..272c241f44 100644
--- a/cranelift/filetests/filetests/runtests/i128-shifts.clif
+++ b/cranelift/filetests/filetests/runtests/i128-shifts.clif
@@ -1,3 +1,4 @@
+test interpret
 test run
 set enable_llvm_abi_extensions=true
 target aarch64
diff --git a/cranelift/filetests/filetests/runtests/rotl.clif b/cranelift/filetests/filetests/runtests/rotl.clif
new file mode 100644
index 0000000000..cdf8fde2f4
--- /dev/null
+++ b/cranelift/filetests/filetests/runtests/rotl.clif
@@ -0,0 +1,243 @@
+test interpret
+test run
+target aarch64
+target x86_64
+target s390x
+
+
+function %rotl_i64_i64(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+    v2 = rotl.i64 v0, v1
+    return v2
+}
+; run: %rotl_i64_i64(0xe0000000_00000000, 0) == 0xe0000000_00000000
+; run: %rotl_i64_i64(0xe0000000_00000000, 1) == 0xc0000000_00000001
+; run: %rotl_i64_i64(0xe000000f_0000000f, 0) == 0xe000000f_0000000f
+; run: %rotl_i64_i64(0xe000000f_0000000f, 4) == 0x000000f0_000000fe
+; run: %rotl_i64_i64(0xe0000000_00000004, 64) == 0xe0000000_00000004
+; run: %rotl_i64_i64(0xe0000000_00000004, 65) == 0xc0000000_00000009
+; run: %rotl_i64_i64(0xe0000000_00000004, 66) == 0x80000000_00000013
+; run: %rotl_i64_i64(0xe0000000_00000004, 257) == 0xc0000000_00000009
+
+function %rotl_i64_i32(i64, i32) -> i64 {
+block0(v0: i64, v1: i32):
+    v2 = rotl.i64 v0, v1
+    return v2
+}
+; run: %rotl_i64_i32(0xe0000000_00000000, 0) == 0xe0000000_00000000
+; run: %rotl_i64_i32(0xe0000000_00000000, 1) == 0xc0000000_00000001
+; run: %rotl_i64_i32(0xe000000f_0000000f, 0) == 0xe000000f_0000000f
+; run: %rotl_i64_i32(0xe000000f_0000000f, 4) == 0x000000f0_000000fe
+; run: %rotl_i64_i32(0xe0000000_00000004, 64) == 0xe0000000_00000004
+; run: %rotl_i64_i32(0xe0000000_00000004, 65) == 0xc0000000_00000009
+; run: %rotl_i64_i32(0xe0000000_00000004, 66) == 0x80000000_00000013
+; run: %rotl_i64_i32(0xe0000000_00000004, 257) == 0xc0000000_00000009
+
+function %rotl_i64_i16(i64, i16) -> i64 {
+block0(v0: i64, v1: i16):
+    v2 = rotl.i64 v0, v1
+    return v2
+}
+; run: %rotl_i64_i16(0xe0000000_00000000, 0) == 0xe0000000_00000000
+; run: %rotl_i64_i16(0xe0000000_00000000, 1) == 0xc0000000_00000001
+; run: %rotl_i64_i16(0xe000000f_0000000f, 0) == 0xe000000f_0000000f
+; run: %rotl_i64_i16(0xe000000f_0000000f, 4) == 0x000000f0_000000fe
+; run: %rotl_i64_i16(0xe0000000_00000004, 64) == 0xe0000000_00000004
+; run: %rotl_i64_i16(0xe0000000_00000004, 65) == 0xc0000000_00000009
+; run: %rotl_i64_i16(0xe0000000_00000004, 66) == 0x80000000_00000013
+; run: %rotl_i64_i16(0xe0000000_00000004, 257) == 0xc0000000_00000009
+
+function %rotl_i64_i8(i64, i8) -> i64 {
+block0(v0: i64, v1: i8):
+    v2 = rotl.i64 v0, v1
+    return v2
+}
+; run: %rotl_i64_i8(0xe0000000_00000000, 0) == 0xe0000000_00000000
+; run: %rotl_i64_i8(0xe0000000_00000000, 1) == 0xc0000000_00000001
+; run: %rotl_i64_i8(0xe000000f_0000000f, 0) == 0xe000000f_0000000f
+; run: %rotl_i64_i8(0xe000000f_0000000f, 4) == 0x000000f0_000000fe
+; run: %rotl_i64_i8(0xe0000000_00000004, 64) == 0xe0000000_00000004
+; run: %rotl_i64_i8(0xe0000000_00000004, 65) == 0xc0000000_00000009
+; run: %rotl_i64_i8(0xe0000000_00000004, 66) == 0x80000000_00000013
+
+
+function %rotl_i32_i64(i32, i64) -> i32 {
+block0(v0: i32, v1: i64):
+    v2 = rotl.i32 v0, v1
+    return v2
+}
+; run: %rotl_i32_i64(0xe0000000, 0) == 0xe0000000
+; run: %rotl_i32_i64(0xe0000000, 1) == 0xc0000001
+; run: %rotl_i32_i64(0xe00f000f, 0) == 0xe00f000f
+; run: %rotl_i32_i64(0xe00f000f, 4) == 0x00f000fe
+; run: %rotl_i32_i64(0xe0000004, 64) == 0xe0000004
+; run: %rotl_i32_i64(0xe0000004, 65) == 0xc0000009
+; run: %rotl_i32_i64(0xe0000004, 66) == 0x80000013
+; run: %rotl_i32_i64(0xe0000004, 257) == 0xc0000009
+
+function %rotl_i32_i32(i32, i32) -> i32 {
+block0(v0: i32, v1: i32):
+    v2 = rotl.i32 v0, v1
+    return v2
+}
+; run: %rotl_i32_i32(0xe0000000, 0) == 0xe0000000
+; run: %rotl_i32_i32(0xe0000000, 1) == 0xc0000001
+; run: %rotl_i32_i32(0xe00f000f, 0) == 0xe00f000f
+; run: %rotl_i32_i32(0xe00f000f, 4) == 0x00f000fe
+; run: %rotl_i32_i32(0xe0000004, 64) == 0xe0000004
+; run: %rotl_i32_i32(0xe0000004, 65) == 0xc0000009
+; run: %rotl_i32_i32(0xe0000004, 66) == 0x80000013
+; run: %rotl_i32_i32(0xe0000004, 257) == 0xc0000009
+
+function %rotl_i32_i16(i32, i16) -> i32 {
+block0(v0: i32, v1: i16):
+    v2 = rotl.i32 v0, v1
+    return v2
+}
+; run: %rotl_i32_i16(0xe0000000, 0) == 0xe0000000
+; run: %rotl_i32_i16(0xe0000000, 1) == 0xc0000001
+; run: %rotl_i32_i16(0xe00f000f, 0) == 0xe00f000f
+; run: %rotl_i32_i16(0xe00f000f, 4) == 0x00f000fe
+; run: %rotl_i32_i16(0xe0000004, 64) == 0xe0000004
+; run: %rotl_i32_i16(0xe0000004, 65) == 0xc0000009
+; run: %rotl_i32_i16(0xe0000004, 66) == 0x80000013
+; run: %rotl_i32_i16(0xe0000004, 257) == 0xc0000009
+
+function %rotl_i32_i8(i32, i8) -> i32 {
+block0(v0: i32, v1: i8):
+    v2 = rotl.i32 v0, v1
+    return v2
+}
+; run: %rotl_i32_i8(0xe0000000, 0) == 0xe0000000
+; run: %rotl_i32_i8(0xe0000000, 1) == 0xc0000001
+; run: %rotl_i32_i8(0xe00f000f, 0) == 0xe00f000f
+; run: %rotl_i32_i8(0xe00f000f, 4) == 0x00f000fe
+; run: %rotl_i32_i8(0xe0000004, 64) == 0xe0000004
+; run: %rotl_i32_i8(0xe0000004, 65) == 0xc0000009
+; run: %rotl_i32_i8(0xe0000004, 66) == 0x80000013
+
+
+function %rotl_i16_i64(i16, i64) -> i16 {
+block0(v0: i16, v1: i64):
+    v2 = rotl.i16 v0, v1
+    return v2
+}
+; run: %rotl_i16_i64(0xe000, 0) == 0xe000
+; run: %rotl_i16_i64(0xe000, 1) == 0xc001
+; run: %rotl_i16_i64(0xef0f, 0) == 0xef0f
+; run: %rotl_i16_i64(0xef0f, 4) == 0xf0fe
+; run: %rotl_i16_i64(0xe004, 64) == 0xe004
+; run: %rotl_i16_i64(0xe004, 65) == 0xc009
+; run: %rotl_i16_i64(0xe004, 66) == 0x8013
+; run: %rotl_i16_i64(0xe004, 257) == 0xc009
+
+function %rotl_i16_i32(i16, i32) -> i16 {
+block0(v0: i16, v1: i32):
+    v2 = rotl.i16 v0, v1
+    return v2
+}
+; run: %rotl_i16_i32(0xe000, 0) == 0xe000
+; run: %rotl_i16_i32(0xe000, 1) == 0xc001
+; run: %rotl_i16_i32(0xef0f, 0) == 0xef0f
+; run: %rotl_i16_i32(0xef0f, 4) == 0xf0fe
+; run: %rotl_i16_i32(0xe004, 64) == 0xe004
+; run: %rotl_i16_i32(0xe004, 65) == 0xc009
+; run: %rotl_i16_i32(0xe004, 66) == 0x8013
+; run: %rotl_i16_i32(0xe004, 257) == 0xc009
+
+function %rotl_i16_i16(i16, i16) -> i16 {
+block0(v0: i16, v1: i16):
+    v2 = rotl.i16 v0, v1
+    return v2
+}
+; run: %rotl_i16_i16(0xe000, 0) == 0xe000
+; run: %rotl_i16_i16(0xe000, 1) == 0xc001
+; run: %rotl_i16_i16(0xef0f, 0) == 0xef0f
+; run: %rotl_i16_i16(0xef0f, 4) == 0xf0fe
+; run: %rotl_i16_i16(0xe004, 64) == 0xe004
+; run: %rotl_i16_i16(0xe004, 65) == 0xc009
+; run: %rotl_i16_i16(0xe004, 66) == 0x8013
+; run: %rotl_i16_i16(0xe004, 257) == 0xc009
+
+function %rotl_i16_i8(i16, i8) -> i16 {
+block0(v0: i16, v1: i8):
+    v2 = rotl.i16 v0, v1
+    return v2
+}
+; run: %rotl_i16_i8(0xe000, 0) == 0xe000
+; run: %rotl_i16_i8(0xe000, 1) == 0xc001
+; run: %rotl_i16_i8(0xef0f, 0) == 0xef0f
+; run: %rotl_i16_i8(0xef0f, 4) == 0xf0fe
+; run: %rotl_i16_i8(0xe004, 64) == 0xe004
+; run: %rotl_i16_i8(0xe004, 65) == 0xc009
+; run: %rotl_i16_i8(0xe004, 66) == 0x8013
+
+
+function %rotl_i8_i64(i8, i64) -> i8 {
+block0(v0: i8, v1: i64):
+    v2 = rotl.i8 v0, v1
+    return v2
+}
+; run: %rotl_i8_i64(0xe0, 0) == 0xe0
+; run: %rotl_i8_i64(0xe0, 1) == 0xc1
+; run: %rotl_i8_i64(0xef, 0) == 0xef
+; run: %rotl_i8_i64(0xef, 4) == 0xfe
+; run: %rotl_i8_i64(0xe4, 64) == 0xe4
+; run: %rotl_i8_i64(0xe4, 65) == 0xc9
+; run: %rotl_i8_i64(0xe4, 66) == 0x93
+; run: %rotl_i8_i64(0xe4, 257) == 0xc9
+
+function %rotl_i8_i32(i8, i32) -> i8 {
+block0(v0: i8, v1: i32):
+    v2 = rotl.i8 v0, v1
+    return v2
+}
+; run: %rotl_i8_i32(0xe0, 0) == 0xe0
+; run: %rotl_i8_i32(0xe0, 1) == 0xc1
+; run: %rotl_i8_i32(0xef, 0) == 0xef
+; run: %rotl_i8_i32(0xef, 4) == 0xfe
+; run: %rotl_i8_i32(0xe4, 64) == 0xe4
+; run: %rotl_i8_i32(0xe4, 65) == 0xc9
+; run: %rotl_i8_i32(0xe4, 66) == 0x93
+; run: %rotl_i8_i32(0xe4, 257) == 0xc9
+
+function %rotl_i8_i16(i8, i16) -> i8 {
+block0(v0: i8, v1: i16):
+    v2 = rotl.i8 v0, v1
+    return v2
+}
+; run: %rotl_i8_i16(0xe0, 0) == 0xe0
+; run: %rotl_i8_i16(0xe0, 1) == 0xc1
+; run: %rotl_i8_i16(0xef, 0) == 0xef
+; run: %rotl_i8_i16(0xef, 4) == 0xfe
+; run: %rotl_i8_i16(0xe4, 64) == 0xe4
+; run: %rotl_i8_i16(0xe4, 65) == 0xc9
+; run: %rotl_i8_i16(0xe4, 66) == 0x93
+; run: %rotl_i8_i16(0xe4, 257) == 0xc9
+
+function %rotl_i8_i8(i8, i8) -> i8 {
+block0(v0: i8, v1: i8):
+    v2 = rotl.i8 v0, v1
+    return v2
+}
+; run: %rotl_i8_i8(0xe0, 0) == 0xe0
+; run: %rotl_i8_i8(0xe0, 1) == 0xc1
+; run: %rotl_i8_i8(0xef, 0) == 0xef
+; run: %rotl_i8_i8(0xef, 4) == 0xfe
+; run: %rotl_i8_i8(0xe4, 64) == 0xe4
+; run: %rotl_i8_i8(0xe4, 65) == 0xc9
+; run: %rotl_i8_i8(0xe4, 66) == 0x93
+
+
+
+;; This is a regression test for rotates on x64
+;; See: https://github.com/bytecodealliance/wasmtime/pull/3610
+function %rotl_i8_const_37(i8) -> i8 {
+block0(v0: i8):
+  v1 = iconst.i8 37
+  v2 = rotl.i8 v0, v1
+  return v2
+}
+; run: %rotl_i8_const_37(0x00) == 0x00
+; run: %rotl_i8_const_37(0x01) == 0x20
+; run: %rotl_i8_const_37(0x12) == 0x42
diff --git a/cranelift/filetests/filetests/runtests/rotr.clif b/cranelift/filetests/filetests/runtests/rotr.clif
new file mode 100644
index 0000000000..5c0236f457
--- /dev/null
+++ b/cranelift/filetests/filetests/runtests/rotr.clif
@@ -0,0 +1,243 @@
+test interpret
+test run
+target aarch64
+target x86_64
+target s390x
+
+
+function %rotr_i64_i64(i64, i64) -> i64 {
+block0(v0: i64, v1: i64):
+    v2 = rotr.i64 v0, v1
+    return v2
+}
+; run: %rotr_i64_i64(0xe0000000_00000000, 0) == 0xe0000000_00000000
+; run: %rotr_i64_i64(0xe0000000_00000000, 1) == 0x70000000_00000000
+; run: %rotr_i64_i64(0xe000000f_0000000f, 0) == 0xe000000f_0000000f
+; run: %rotr_i64_i64(0xe000000f_0000000f, 4) == 0xfe000000_f0000000
+; run: %rotr_i64_i64(0xe0000000_00000004, 64) == 0xe0000000_00000004
+; run: %rotr_i64_i64(0xe0000000_00000004, 65) == 0x70000000_00000002
+; run: %rotr_i64_i64(0xe0000000_00000004, 66) == 0x38000000_00000001
+; run: %rotr_i64_i64(0xe0000000_00000004, 257) == 0x70000000_00000002
+
+function %rotr_i64_i32(i64, i32) -> i64 {
+block0(v0: i64, v1: i32):
+    v2 = rotr.i64 v0, v1
+    return v2
+}
+; run: %rotr_i64_i32(0xe0000000_00000000, 0) == 0xe0000000_00000000
+; run: %rotr_i64_i32(0xe0000000_00000000, 1) == 0x70000000_00000000
+; run: %rotr_i64_i32(0xe000000f_0000000f, 0) == 0xe000000f_0000000f
+; run: %rotr_i64_i32(0xe000000f_0000000f, 4) == 0xfe000000_f0000000
+; run: %rotr_i64_i32(0xe0000000_00000004, 64) == 0xe0000000_00000004
+; run: %rotr_i64_i32(0xe0000000_00000004, 65) == 0x70000000_00000002
+; run: %rotr_i64_i32(0xe0000000_00000004, 66) == 0x38000000_00000001
+; run: %rotr_i64_i32(0xe0000000_00000004, 257) == 0x70000000_00000002
+
+function %rotr_i64_i16(i64, i16) -> i64 {
+block0(v0: i64, v1: i16):
+    v2 = rotr.i64 v0, v1
+    return v2
+}
+; run: %rotr_i64_i16(0xe0000000_00000000, 0) == 0xe0000000_00000000
+; run: %rotr_i64_i16(0xe0000000_00000000, 1) == 0x70000000_00000000
+; run: %rotr_i64_i16(0xe000000f_0000000f, 0) == 0xe000000f_0000000f
+; run: %rotr_i64_i16(0xe000000f_0000000f, 4) == 0xfe000000_f0000000
+; run: %rotr_i64_i16(0xe0000000_00000004, 64) == 0xe0000000_00000004
+; run: %rotr_i64_i16(0xe0000000_00000004, 65) == 0x70000000_00000002
+; run: %rotr_i64_i16(0xe0000000_00000004, 66) == 0x38000000_00000001
+; run: %rotr_i64_i16(0xe0000000_00000004, 257) == 0x70000000_00000002
+
+function %rotr_i64_i8(i64, i8) -> i64 {
+block0(v0: i64, v1: i8):
+    v2 = rotr.i64 v0, v1
+    return v2
+}
+; run: %rotr_i64_i8(0xe0000000_00000000, 0) == 0xe0000000_00000000
+; run: %rotr_i64_i8(0xe0000000_00000000, 1) == 0x70000000_00000000
+; run: %rotr_i64_i8(0xe000000f_0000000f, 0) == 0xe000000f_0000000f
+; run: %rotr_i64_i8(0xe000000f_0000000f, 4) == 0xfe000000_f0000000
+; run: %rotr_i64_i8(0xe0000000_00000004, 64) == 0xe0000000_00000004
+; run: %rotr_i64_i8(0xe0000000_00000004, 65) == 0x70000000_00000002
+; run: %rotr_i64_i8(0xe0000000_00000004, 66) == 0x38000000_00000001
+
+
+function %rotr_i32_i64(i32, i64) -> i32 {
+block0(v0: i32, v1: i64):
+    v2 = rotr.i32 v0, v1
+    return v2
+}
+; run: %rotr_i32_i64(0xe0000000, 0) == 0xe0000000
+; run: %rotr_i32_i64(0xe0000000, 1) == 0x70000000
+; run: %rotr_i32_i64(0xe00f000f, 0) == 0xe00f000f
+; run: %rotr_i32_i64(0xe00f000f, 4) == 0xfe00f000
+; run: %rotr_i32_i64(0xe0000004, 64) == 0xe0000004
+; run: %rotr_i32_i64(0xe0000004, 65) == 0x70000002
+; run: %rotr_i32_i64(0xe0000004, 66) == 0x38000001
+; run: %rotr_i32_i64(0xe0000004, 257) == 0x70000002
+
+function %rotr_i32_i32(i32, i32) -> i32 {
+block0(v0: i32, v1: i32):
+    v2 = rotr.i32 v0, v1
+    return v2
+}
+; run: %rotr_i32_i32(0xe0000000, 0) == 0xe0000000
+; run: %rotr_i32_i32(0xe0000000, 1) == 0x70000000
+; run: %rotr_i32_i32(0xe00f000f, 0) == 0xe00f000f
+; run: %rotr_i32_i32(0xe00f000f, 4) == 0xfe00f000
+; run: %rotr_i32_i32(0xe0000004, 64) == 0xe0000004
+; run: %rotr_i32_i32(0xe0000004, 65) == 0x70000002
+; run: %rotr_i32_i32(0xe0000004, 66) == 0x38000001
+; run: %rotr_i32_i32(0xe0000004, 257) == 0x70000002
+
+function %rotr_i32_i16(i32, i16) -> i32 {
+block0(v0: i32, v1: i16):
+    v2 = rotr.i32 v0, v1
+    return v2
+}
+; run: %rotr_i32_i16(0xe0000000, 0) == 0xe0000000
+; run: %rotr_i32_i16(0xe0000000, 1) == 0x70000000
+; run: %rotr_i32_i16(0xe00f000f, 0) == 0xe00f000f
+; run: %rotr_i32_i16(0xe00f000f, 4) == 0xfe00f000
+; run: %rotr_i32_i16(0xe0000004, 64) == 0xe0000004
+; run: %rotr_i32_i16(0xe0000004, 65) == 0x70000002
+; run: %rotr_i32_i16(0xe0000004, 66) == 0x38000001
+; run: %rotr_i32_i16(0xe0000004, 257) == 0x70000002
+
+function %rotr_i32_i8(i32, i8) -> i32 {
+block0(v0: i32, v1: i8):
+    v2 = rotr.i32 v0, v1
+    return v2
+}
+; run: %rotr_i32_i8(0xe0000000, 0) == 0xe0000000
+; run: %rotr_i32_i8(0xe0000000, 1) == 0x70000000
+; run: %rotr_i32_i8(0xe00f000f, 0) == 0xe00f000f
+; run: %rotr_i32_i8(0xe00f000f, 4) == 0xfe00f000
+; run: %rotr_i32_i8(0xe0000004, 64) == 0xe0000004
+; run: %rotr_i32_i8(0xe0000004, 65) == 0x70000002
+; run: %rotr_i32_i8(0xe0000004, 66) == 0x38000001
+
+
+function %rotr_i16_i64(i16, i64) -> i16 {
+block0(v0: i16, v1: i64):
+    v2 = rotr.i16 v0, v1
+    return v2
+}
+; run: %rotr_i16_i64(0xe000, 0) == 0xe000
+; run: %rotr_i16_i64(0xe000, 1) == 0x7000
+; run: %rotr_i16_i64(0xef0f, 0) == 0xef0f
+; run: %rotr_i16_i64(0xef0f, 4) == 0xfef0
+; run: %rotr_i16_i64(0xe004, 64) == 0xe004
+; run: %rotr_i16_i64(0xe004, 65) == 0x7002
+; run: %rotr_i16_i64(0xe004, 66) == 0x3801
+; run: %rotr_i16_i64(0xe004, 257) == 0x7002
+
+function %rotr_i16_i32(i16, i32) -> i16 {
+block0(v0: i16, v1: i32):
+    v2 = rotr.i16 v0, v1
+    return v2
+}
+; run: %rotr_i16_i32(0xe000, 0) == 0xe000
+; run: %rotr_i16_i32(0xe000, 1) == 0x7000
+; run: %rotr_i16_i32(0xef0f, 0) == 0xef0f
+; run: %rotr_i16_i32(0xef0f, 4) == 0xfef0
+; run: %rotr_i16_i32(0xe004, 64) == 0xe004
+; run: %rotr_i16_i32(0xe004, 65) == 0x7002
+; run: %rotr_i16_i32(0xe004, 66) == 0x3801
+; run: %rotr_i16_i32(0xe004, 257) == 0x7002
+
+function %rotr_i16_i16(i16, i16) -> i16 {
+block0(v0: i16, v1: i16):
+    v2 = rotr.i16 v0, v1
+    return v2
+}
+; run: %rotr_i16_i16(0xe000, 0) == 0xe000
+; run: %rotr_i16_i16(0xe000, 1) == 0x7000
+; run: %rotr_i16_i16(0xef0f, 0) == 0xef0f
+; run: %rotr_i16_i16(0xef0f, 4) == 0xfef0
+; run: %rotr_i16_i16(0xe004, 64) == 0xe004
+; run: %rotr_i16_i16(0xe004, 65) == 0x7002
+; run: %rotr_i16_i16(0xe004, 66) == 0x3801
+; run: %rotr_i16_i16(0xe004, 257) == 0x7002
+
+function %rotr_i16_i8(i16, i8) -> i16 {
+block0(v0: i16, v1: i8):
+    v2 = rotr.i16 v0, v1
+    return v2
+}
+; run: %rotr_i16_i8(0xe000, 0) == 0xe000
+; run: %rotr_i16_i8(0xe000, 1) == 0x7000
+; run: %rotr_i16_i8(0xef0f, 0) == 0xef0f
+; run: %rotr_i16_i8(0xef0f, 4) == 0xfef0
+; run: %rotr_i16_i8(0xe004, 64) == 0xe004
+; run: %rotr_i16_i8(0xe004, 65) == 0x7002
+; run: %rotr_i16_i8(0xe004, 66) == 0x3801
+
+
+function %rotr_i8_i64(i8, i64) -> i8 {
+block0(v0: i8, v1: i64):
+    v2 = rotr.i8 v0, v1
+    return v2
+}
+; run: %rotr_i8_i64(0xe0, 0) == 0xe0
+; run: %rotr_i8_i64(0xe0, 1) == 0x70
+; run: %rotr_i8_i64(0xef, 0) == 0xef
+; run: %rotr_i8_i64(0xef, 4) == 0xfe
+; run: %rotr_i8_i64(0xe0, 64) == 0xe0
+; run: %rotr_i8_i64(0xe0, 65) == 0x70
+; run: %rotr_i8_i64(0xe0, 66) == 0x38
+; run: %rotr_i8_i64(0xe0, 257) == 0x70
+
+function %rotr_i8_i32(i8, i32) -> i8 {
+block0(v0: i8, v1: i32):
+    v2 = rotr.i8 v0, v1
+    return v2
+}
+; run: %rotr_i8_i32(0xe0, 0) == 0xe0
+; run: %rotr_i8_i32(0xe0, 1) == 0x70
+; run: %rotr_i8_i32(0xef, 0) == 0xef
+; run: %rotr_i8_i32(0xef, 4) == 0xfe
+; run: %rotr_i8_i32(0xe0, 64) == 0xe0
+; run: %rotr_i8_i32(0xe0, 65) == 0x70
+; run: %rotr_i8_i32(0xe0, 66) == 0x38
+; run: %rotr_i8_i32(0xe0, 257) == 0x70
+
+function %rotr_i8_i16(i8, i16) -> i8 {
+block0(v0: i8, v1: i16):
+    v2 = rotr.i8 v0, v1
+    return v2
+}
+; run: %rotr_i8_i16(0xe0, 0) == 0xe0
+; run: %rotr_i8_i16(0xe0, 1) == 0x70
+; run: %rotr_i8_i16(0xef, 0) == 0xef
+; run: %rotr_i8_i16(0xef, 4) == 0xfe
+; run: %rotr_i8_i16(0xe0, 64) == 0xe0
+; run: %rotr_i8_i16(0xe0, 65) == 0x70
+; run: %rotr_i8_i16(0xe0, 66) == 0x38
+; run: %rotr_i8_i16(0xe0, 257) == 0x70
+
+function %rotr_i8_i8(i8, i8) -> i8 {
+block0(v0: i8, v1: i8):
+    v2 = rotr.i8 v0, v1
+    return v2
+}
+; run: %rotr_i8_i8(0xe0, 0) == 0xe0
+; run: %rotr_i8_i8(0xe0, 1) == 0x70
+; run: %rotr_i8_i8(0xef, 0) == 0xef
+; run: %rotr_i8_i8(0xef, 4) == 0xfe
+; run: %rotr_i8_i8(0xe0, 64) == 0xe0
+; run: %rotr_i8_i8(0xe0, 65) == 0x70
+; run: %rotr_i8_i8(0xe0, 66) == 0x38
+
+
+
+;; This is a regression test for rotates on x64
+;; See: https://github.com/bytecodealliance/wasmtime/pull/3610
+function %rotr_i8_const_37(i8) -> i8 {
+block0(v0: i8):
+  v1 = iconst.i8 37
+  v2 = rotr.i8 v0, v1
+  return v2
+}
+; run: %rotr_i8_const_37(0x00) == 0x00
+; run: %rotr_i8_const_37(0x01) == 0x08
+; run: %rotr_i8_const_37(0x12) == 0x90
diff --git a/cranelift/filetests/filetests/runtests/shifts.clif b/cranelift/filetests/filetests/runtests/shifts.clif
index 5f66d56191..31fad8da36 100644
--- a/cranelift/filetests/filetests/runtests/shifts.clif
+++ b/cranelift/filetests/filetests/runtests/shifts.clif
@@ -1,3 +1,4 @@
+test interpret
 test run
 target aarch64
 target x86_64
@@ -110,6 +111,113 @@ block0(v0: i32, v1: i8):
 ; run: %ishl_i32_i8(0x00000004, 34) == 0x00000010
 
 
+function %ishl_i16_i64(i16, i64) -> i16 {
+block0(v0: i16, v1: i64):
+    v2 = ishl.i16 v0, v1
+    return v2
+}
+; run: %ishl_i16_i64(0x0000, 0) == 0x0000
+; run: %ishl_i16_i64(0x0000, 1) == 0x0000
+; run: %ishl_i16_i64(0x000f, 0) == 0x000f
+; run: %ishl_i16_i64(0x000f, 4) == 0x00f0
+; run: %ishl_i16_i64(0x0004, 32) == 0x0004
+; run: %ishl_i16_i64(0x0004, 33) == 0x0008
+; run: %ishl_i16_i64(0x0004, 34) == 0x0010
+
+function %ishl_i16_i32(i16, i32) -> i16 {
+block0(v0: i16, v1: i32):
+    v2 = ishl.i16 v0, v1
+    return v2
+}
+; run: %ishl_i16_i32(0x0000, 0) == 0x0000
+; run: %ishl_i16_i32(0x0000, 1) == 0x0000
+; run: %ishl_i16_i32(0x000f, 0) == 0x000f
+; run: %ishl_i16_i32(0x000f, 4) == 0x00f0
+; run: %ishl_i16_i32(0x0004, 32) == 0x0004
+; run: %ishl_i16_i32(0x0004, 33) == 0x0008
+; run: %ishl_i16_i32(0x0004, 34) == 0x0010
+
+function %ishl_i16_i16(i16, i16) -> i16 {
+block0(v0: i16, v1: i16):
+    v2 = ishl.i16 v0, v1
+    return v2
+}
+; run: %ishl_i16_i16(0x0000, 0) == 0x0000
+; run: %ishl_i16_i16(0x0000, 1) == 0x0000
+; run: %ishl_i16_i16(0x000f, 0) == 0x000f
+; run: %ishl_i16_i16(0x000f, 4) == 0x00f0
+; run: %ishl_i16_i16(0x0004, 32) == 0x0004
+; run: %ishl_i16_i16(0x0004, 33) == 0x0008
+; run: %ishl_i16_i16(0x0004, 34) == 0x0010
+
+function %ishl_i16_i8(i16, i8) -> i16 {
+block0(v0: i16, v1: i8):
+    v2 = ishl.i16 v0, v1
+    return v2
+}
+; run: %ishl_i16_i8(0x0000, 0) == 0x0000
+; run: %ishl_i16_i8(0x0000, 1) == 0x0000
+; run: %ishl_i16_i8(0x000f, 0) == 0x000f
+; run: %ishl_i16_i8(0x000f, 4) == 0x00f0
+; run: %ishl_i16_i8(0x0004, 32) == 0x0004
+; run: %ishl_i16_i8(0x0004, 33) == 0x0008
+; run: %ishl_i16_i8(0x0004, 34) == 0x0010
+
+
+function %ishl_i8_i64(i8, i64) -> i8 {
+block0(v0: i8, v1: i64):
+    v2 = ishl.i8 v0, v1
+    return v2
+}
+; run: %ishl_i8_i64(0x00, 0) == 0x00
+; run: %ishl_i8_i64(0x00, 1) == 0x00
+; run: %ishl_i8_i64(0x0f, 0) == 0x0f
+; run: %ishl_i8_i64(0x0f, 4) == 0xf0
+; run: %ishl_i8_i64(0x04, 32) == 0x04
+; run: %ishl_i8_i64(0x04, 33) == 0x08
+; run: %ishl_i8_i64(0x04, 34) == 0x10
+
+function %ishl_i8_i32(i8, i32) -> i8 {
+block0(v0: i8, v1: i32):
+    v2 = ishl.i8 v0, v1
+    return v2
+}
+; run: %ishl_i8_i32(0x00, 0) == 0x00
+; run: %ishl_i8_i32(0x00, 1) == 0x00
+; run: %ishl_i8_i32(0x0f, 0) == 0x0f
+; run: %ishl_i8_i32(0x0f, 4) == 0xf0
+; run: %ishl_i8_i32(0x04, 32) == 0x04
+; run: %ishl_i8_i32(0x04, 33) == 0x08
+; run: %ishl_i8_i32(0x04, 34) == 0x10
+
+function %ishl_i8_i16(i8, i16) -> i8 {
+block0(v0: i8, v1: i16):
+    v2 = ishl.i8 v0, v1
+    return v2
+}
+; run: %ishl_i8_i16(0x00, 0) == 0x00
+; run: %ishl_i8_i16(0x00, 1) == 0x00
+; run: %ishl_i8_i16(0x0f, 0) == 0x0f
+; run: %ishl_i8_i16(0x0f, 4) == 0xf0
+; run: %ishl_i8_i16(0x04, 32) == 0x04
+; run: %ishl_i8_i16(0x04, 33) == 0x08
+; run: %ishl_i8_i16(0x04, 34) == 0x10
+
+function %ishl_i8_i8(i8, i8) -> i8 {
+block0(v0: i8, v1: i8):
+    v2 = ishl.i8 v0, v1
+    return v2
+}
+; run: %ishl_i8_i8(0x00, 0) == 0x00
+; run: %ishl_i8_i8(0x00, 1) == 0x00
+; run: %ishl_i8_i8(0x0f, 0) == 0x0f
+; run: %ishl_i8_i8(0x0f, 4) == 0xf0
+; run: %ishl_i8_i8(0x04, 32) == 0x04
+; run: %ishl_i8_i8(0x04, 33) == 0x08
+; run: %ishl_i8_i8(0x04, 34) == 0x10
+
+
+
 function %ushr_i64_i64(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
     v2 = ushr.i64 v0, v1
@@ -215,6 +323,113 @@ block0(v0: i32, v1: i8):
 ; run: %ushr_i32_i8(0x40000000, 34) == 0x10000000
 
 
+function %ushr_i16_i64(i16, i64) -> i16 {
+block0(v0: i16, v1: i64):
+    v2 = ushr.i16 v0, v1
+    return v2
+}
+; run: %ushr_i16_i64(0x1000, 0) == 0x1000
+; run: %ushr_i16_i64(0x1000, 1) == 0x0800
+; run: %ushr_i16_i64(0xf000, 0) == 0xf000
+; run: %ushr_i16_i64(0xf000, 4) == 0x0f00
+; run: %ushr_i16_i64(0x4000, 32) == 0x4000
+; run: %ushr_i16_i64(0x4000, 33) == 0x2000
+; run: %ushr_i16_i64(0x4000, 34) == 0x1000
+
+function %ushr_i16_i32(i16, i32) -> i16 {
+block0(v0: i16, v1: i32):
+    v2 = ushr.i16 v0, v1
+    return v2
+}
+; run: %ushr_i16_i32(0x1000, 0) == 0x1000
+; run: %ushr_i16_i32(0x1000, 1) == 0x0800
+; run: %ushr_i16_i32(0xf000, 0) == 0xf000
+; run: %ushr_i16_i32(0xf000, 4) == 0x0f00
+; run: %ushr_i16_i32(0x4000, 32) == 0x4000
+; run: %ushr_i16_i32(0x4000, 33) == 0x2000
+; run: %ushr_i16_i32(0x4000, 34) == 0x1000
+
+function %ushr_i16_i16(i16, i16) -> i16 {
+block0(v0: i16, v1: i16):
+    v2 = ushr.i16 v0, v1
+    return v2
+}
+; run: %ushr_i16_i16(0x1000, 0) == 0x1000
+; run: %ushr_i16_i16(0x1000, 1) == 0x0800
+; run: %ushr_i16_i16(0xf000, 0) == 0xf000
+; run: %ushr_i16_i16(0xf000, 4) == 0x0f00
+; run: %ushr_i16_i16(0x4000, 32) == 0x4000
+; run: %ushr_i16_i16(0x4000, 33) == 0x2000
+; run: %ushr_i16_i16(0x4000, 34) == 0x1000
+
+function %ushr_i16_i8(i16, i8) -> i16 {
+block0(v0: i16, v1: i8):
+    v2 = ushr.i16 v0, v1
+    return v2
+}
+; run: %ushr_i16_i8(0x1000, 0) == 0x1000
+; run: %ushr_i16_i8(0x1000, 1) == 0x0800
+; run: %ushr_i16_i8(0xf000, 0) == 0xf000
+; run: %ushr_i16_i8(0xf000, 4) == 0x0f00
+; run: %ushr_i16_i8(0x4000, 32) == 0x4000
+; run: %ushr_i16_i8(0x4000, 33) == 0x2000
+; run: %ushr_i16_i8(0x4000, 34) == 0x1000
+
+
+function %ushr_i8_i64(i8, i64) -> i8 {
+block0(v0: i8, v1: i64):
+    v2 = ushr.i8 v0, v1
+    return v2
+}
+; run: %ushr_i8_i64(0x10, 0) == 0x10
+; run: %ushr_i8_i64(0x10, 1) == 0x08
+; run: %ushr_i8_i64(0xf0, 0) == 0xf0
+; run: %ushr_i8_i64(0xf0, 4) == 0x0f
+; run: %ushr_i8_i64(0x40, 32) == 0x40
+; run: %ushr_i8_i64(0x40, 33) == 0x20
+; run: %ushr_i8_i64(0x40, 34) == 0x10
+
+function %ushr_i8_i32(i8, i32) -> i8 {
+block0(v0: i8, v1: i32):
+    v2 = ushr.i8 v0, v1
+    return v2
+}
+; run: %ushr_i8_i32(0x10, 0) == 0x10
+; run: %ushr_i8_i32(0x10, 1) == 0x08
+; run: %ushr_i8_i32(0xf0, 0) == 0xf0
+; run: %ushr_i8_i32(0xf0, 4) == 0x0f
+; run: %ushr_i8_i32(0x40, 32) == 0x40
+; run: %ushr_i8_i32(0x40, 33) == 0x20
+; run: %ushr_i8_i32(0x40, 34) == 0x10
+
+function %ushr_i8_i16(i8, i16) -> i8 {
+block0(v0: i8, v1: i16):
+    v2 = ushr.i8 v0, v1
+    return v2
+}
+; run: %ushr_i8_i16(0x10, 0) == 0x10
+; run: %ushr_i8_i16(0x10, 1) == 0x08
+; run: %ushr_i8_i16(0xf0, 0) == 0xf0
+; run: %ushr_i8_i16(0xf0, 4) == 0x0f
+; run: %ushr_i8_i16(0x40, 32) == 0x40
+; run: %ushr_i8_i16(0x40, 33) == 0x20
+; run: %ushr_i8_i16(0x40, 34) == 0x10
+
+function %ushr_i8_i8(i8, i8) -> i8 {
+block0(v0: i8, v1: i8):
+    v2 = ushr.i8 v0, v1
+    return v2
+}
+; run: %ushr_i8_i8(0x10, 0) == 0x10
+; run: %ushr_i8_i8(0x10, 1) == 0x08
+; run: %ushr_i8_i8(0xf0, 0) == 0xf0
+; run: %ushr_i8_i8(0xf0, 4) == 0x0f
+; run: %ushr_i8_i8(0x40, 32) == 0x40
+; run: %ushr_i8_i8(0x40, 33) == 0x20
+; run: %ushr_i8_i8(0x40, 34) == 0x10
+
+
+
 function %sshr_i64_i64(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
     v2 = sshr.i64 v0, v1
@@ -319,24 +534,108 @@ block0(v0: i32, v1: i8):
 ; run: %sshr_i32_i8(0x40000000, 33) == 0x20000000
 ; run: %sshr_i32_i8(0x40000000, 34) == 0x10000000
 
-function %rotl_i8_const_37(i8) -> i8 {
-block0(v0: i8):
-  v1 = iconst.i8 37
-  v2 = rotl.i8 v0, v1
-  return v2
+
+function %sshr_i16_i64(i16, i64) -> i16 {
+block0(v0: i16, v1: i64):
+    v2 = sshr.i16 v0, v1
+    return v2
 }
+; run: %sshr_i16_i64(0x8000, 0) == 0x8000
+; run: %sshr_i16_i64(0x8000, 1) == 0xC000
+; run: %sshr_i16_i64(0xf000, 0) == 0xf000
+; run: %sshr_i16_i64(0xf000, 4) == 0xff00
+; run: %sshr_i16_i64(0x4000, 32) == 0x4000
+; run: %sshr_i16_i64(0x4000, 33) == 0x2000
+; run: %sshr_i16_i64(0x4000, 34) == 0x1000
 
-; run: %rotl_i8_const_37(0x00) == 0x00
-; run: %rotl_i8_const_37(0x01) == 0x20
-; run: %rotl_i8_const_37(0x12) == 0x42
-
-function %rotr_i8_const_37(i8) -> i8 {
-block0(v0: i8):
-  v1 = iconst.i8 37
-  v2 = rotr.i8 v0, v1
-  return v2
+function %sshr_i16_i32(i16, i32) -> i16 {
+block0(v0: i16, v1: i32):
+    v2 = sshr.i16 v0, v1
+    return v2
 }
+; run: %sshr_i16_i32(0x8000, 0) == 0x8000
+; run: %sshr_i16_i32(0x8000, 1) == 0xC000
+; run: %sshr_i16_i32(0xf000, 0) == 0xf000
+; run: %sshr_i16_i32(0xf000, 4) == 0xff00
+; run: %sshr_i16_i32(0x4000, 32) == 0x4000
+; run: %sshr_i16_i32(0x4000, 33) == 0x2000
+; run: %sshr_i16_i32(0x4000, 34) == 0x1000
 
-; run: %rotr_i8_const_37(0x00) == 0x00
-; run: %rotr_i8_const_37(0x01) == 0x08
-; run: %rotr_i8_const_37(0x12) == 0x90
+function %sshr_i16_i16(i16, i16) -> i16 {
+block0(v0: i16, v1: i16):
+    v2 = sshr.i16 v0, v1
+    return v2
+}
+; run: %sshr_i16_i16(0x8000, 0) == 0x8000
+; run: %sshr_i16_i16(0x8000, 1) == 0xC000
+; run: %sshr_i16_i16(0xf000, 0) == 0xf000
+; run: %sshr_i16_i16(0xf000, 4) == 0xff00
+; run: %sshr_i16_i16(0x4000, 32) == 0x4000
+; run: %sshr_i16_i16(0x4000, 33) == 0x2000
+; run: %sshr_i16_i16(0x4000, 34) == 0x1000
+
+function %sshr_i16_i8(i16, i8) -> i16 {
+block0(v0: i16, v1: i8):
+    v2 = sshr.i16 v0, v1
+    return v2
+}
+; run: %sshr_i16_i8(0x8000, 0) == 0x8000
+; run: %sshr_i16_i8(0x8000, 1) == 0xC000
+; run: %sshr_i16_i8(0xf000, 0) == 0xf000
+; run: %sshr_i16_i8(0xf000, 4) == 0xff00
+; run: %sshr_i16_i8(0x4000, 32) == 0x4000
+; run: %sshr_i16_i8(0x4000, 33) == 0x2000
+; run: %sshr_i16_i8(0x4000, 34) == 0x1000
+
+
+function %sshr_i8_i64(i8, i64) -> i8 {
+block0(v0: i8, v1: i64):
+    v2 = sshr.i8 v0, v1
+    return v2
+}
+; run: %sshr_i8_i64(0x80, 0) == 0x80
+; run: %sshr_i8_i64(0x80, 1) == 0xC0
+; run: %sshr_i8_i64(0xf0, 0) == 0xf0
+; run: %sshr_i8_i64(0xf0, 4) == 0xff
+; run: %sshr_i8_i64(0x40, 32) == 0x40
+; run: %sshr_i8_i64(0x40, 33) == 0x20
+; run: %sshr_i8_i64(0x40, 34) == 0x10
+
+function %sshr_i8_i32(i8, i32) -> i8 {
+block0(v0: i8, v1: i32):
+    v2 = sshr.i8 v0, v1
+    return v2
+}
+; run: %sshr_i8_i32(0x80, 0) == 0x80
+; run: %sshr_i8_i32(0x80, 1) == 0xC0
+; run: %sshr_i8_i32(0xf0, 0) == 0xf0
+; run: %sshr_i8_i32(0xf0, 4) == 0xff
+; run: %sshr_i8_i32(0x40, 32) == 0x40
+; run: %sshr_i8_i32(0x40, 33) == 0x20
+; run: %sshr_i8_i32(0x40, 34) == 0x10
+
+function %sshr_i8_i16(i8, i16) -> i8 {
+block0(v0: i8, v1: i16):
+    v2 = sshr.i8 v0, v1
+    return v2
+}
+; run: %sshr_i8_i16(0x80, 0) == 0x80
+; run: %sshr_i8_i16(0x80, 1) == 0xC0
+; run: %sshr_i8_i16(0xf0, 0) == 0xf0
+; run: %sshr_i8_i16(0xf0, 4) == 0xff
+; run: %sshr_i8_i16(0x40, 32) == 0x40
+; run: %sshr_i8_i16(0x40, 33) == 0x20
+; run: %sshr_i8_i16(0x40, 34) == 0x10
+
+function %sshr_i8_i8(i16, i8) -> i16 {
+block0(v0: i16, v1: i8):
+    v2 = sshr.i16 v0, v1
+    return v2
+}
+; run: %sshr_i8_i8(0x80, 0) == 0x80
+; run: %sshr_i8_i8(0x80, 1) == 0x40
+; run: %sshr_i8_i8(0xf0, 0) == 0xf0
+; run: %sshr_i8_i8(0xf0, 4) == 0x0f
+; run: %sshr_i8_i8(0x40, 32) == 0x40
+; run: %sshr_i8_i8(0x40, 33) == 0x20
+; run: %sshr_i8_i8(0x40, 34) == 0x10
diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs
index a578fdfe5f..87c86409de 100644
--- a/cranelift/interpreter/src/step.rs
+++ b/cranelift/interpreter/src/step.rs
@@ -153,6 +153,20 @@ where
                   right: V|
      -> ValueResult<ControlFlow<V>> { Ok(assign(op(left, right)?)) };
 
+    // Same as `binary_unsigned`, but converts the values to their unsigned form before the
+    // operation and back to signed form afterwards. Since Cranelift types have no notion of
+    // signedness, this enables operations that depend on sign.
+    let binary_unsigned =
+        |op: fn(V, V) -> ValueResult<V>, left: V, right: V| -> ValueResult<ControlFlow<V>> {
+            Ok(assign(
+                op(
+                    left.convert(ValueConversionKind::ToUnsigned)?,
+                    right.convert(ValueConversionKind::ToUnsigned)?,
+                )
+                .and_then(|v| v.convert(ValueConversionKind::ToSigned))?,
+            ))
+        };
+
     // Similar to `binary` but converts select `ValueError`'s into trap `ControlFlow`'s
     let binary_can_trap = |op: fn(V, V) -> ValueResult<V>,
                            left: V,
@@ -690,10 +704,10 @@ where
         Opcode::RotlImm => binary(Value::rotl, arg(0)?, imm_as_ctrl_ty()?)?,
         Opcode::RotrImm => binary(Value::rotr, arg(0)?, imm_as_ctrl_ty()?)?,
         Opcode::Ishl => binary(Value::shl, arg(0)?, arg(1)?)?,
-        Opcode::Ushr => binary(Value::ushr, arg(0)?, arg(1)?)?,
+        Opcode::Ushr => binary_unsigned(Value::ushr, arg(0)?, arg(1)?)?,
         Opcode::Sshr => binary(Value::ishr, arg(0)?, arg(1)?)?,
         Opcode::IshlImm => binary(Value::shl, arg(0)?, imm_as_ctrl_ty()?)?,
-        Opcode::UshrImm => binary(Value::ushr, arg(0)?, imm_as_ctrl_ty()?)?,
+        Opcode::UshrImm => binary_unsigned(Value::ushr, arg(0)?, imm_as_ctrl_ty()?)?,
         Opcode::SshrImm => binary(Value::ishr, arg(0)?, imm_as_ctrl_ty()?)?,
         Opcode::Bitrev => assign(Value::reverse_bits(arg(0)?)?),
         Opcode::Clz => assign(arg(0)?.leading_zeros()?),
diff --git a/cranelift/interpreter/src/value.rs b/cranelift/interpreter/src/value.rs
index 94d4a11bc9..01974b357f 100644
--- a/cranelift/interpreter/src/value.rs
+++ b/cranelift/interpreter/src/value.rs
@@ -191,12 +191,19 @@ macro_rules! binary_match {
             _ => unimplemented!()
         }
     };
-    ( $op:tt($arg1:expr, $arg2:expr); unsigned integers ) => {
+    ( $op:tt($arg1:expr, $arg2:expr); [ $( $data_value_ty:ident ),* ]; rhs: $rhs:tt ) => {
         match ($arg1, $arg2) {
-            (DataValue::I8(a), DataValue::I8(b)) => { Ok(DataValue::I8((u8::try_from(*a)? $op u8::try_from(*b)?) as i8)) }
-            (DataValue::I16(a), DataValue::I16(b)) => { Ok(DataValue::I16((u16::try_from(*a)? $op u16::try_from(*b)?) as i16)) }
-            (DataValue::I32(a), DataValue::I32(b)) => { Ok(DataValue::I32((u32::try_from(*a)? $op u32::try_from(*b)?) as i32)) }
-            (DataValue::I64(a), DataValue::I64(b)) => { Ok(DataValue::I64((u64::try_from(*a)? $op u64::try_from(*b)?) as i64)) }
+            $( (DataValue::$data_value_ty(a), DataValue::$rhs(b)) => { Ok(DataValue::$data_value_ty(a.$op(*b))) } )*
+            _ => unimplemented!()
+        }
+    };
+    ( $op:ident($arg1:expr, $arg2:expr); unsigned integers ) => {
+        match ($arg1, $arg2) {
+            (DataValue::I8(a), DataValue::I8(b)) => { Ok(DataValue::I8((u8::try_from(*a)?.$op(u8::try_from(*b)?) as i8))) }
+            (DataValue::I16(a), DataValue::I16(b)) => { Ok(DataValue::I16((u16::try_from(*a)?.$op(u16::try_from(*b)?) as i16))) }
+            (DataValue::I32(a), DataValue::I32(b)) => { Ok(DataValue::I32((u32::try_from(*a)?.$op(u32::try_from(*b)?) as i32))) }
+            (DataValue::I64(a), DataValue::I64(b)) => { Ok(DataValue::I64((u64::try_from(*a)?.$op(u64::try_from(*b)?) as i64))) }
+            (DataValue::I128(a), DataValue::I128(b)) => { Ok(DataValue::I128((u128::try_from(*a)?.$op(u128::try_from(*b)?) as i64))) }
             _ => { Err(ValueError::InvalidType(ValueTypeClass::Integer, if !($arg1).ty().is_int() { ($arg1).ty() } else { ($arg2).ty() })) }
         }
     };
@@ -306,7 +313,9 @@ impl Value for DataValue {
         Ok(match kind {
             ValueConversionKind::Exact(ty) => match (self, ty) {
                 // TODO a lot to do here: from bmask to ireduce to raw_bitcast...
-                (DataValue::I64(n), ty) if ty.is_int() => DataValue::from_integer(n as i128, ty)?,
+                (val, ty) if val.ty().is_int() && ty.is_int() => {
+                    DataValue::from_integer(val.into_int()?, ty)?
+                }
                 (DataValue::F32(n), types::I32) => DataValue::I32(n.bits() as i32),
                 (DataValue::F64(n), types::I64) => DataValue::I64(n.bits() as i64),
                 (DataValue::B(b), t) if t.is_bool() => DataValue::B(b),
@@ -623,23 +632,38 @@ impl Value for DataValue {
     }
 
     fn shl(self, other: Self) -> ValueResult<Self> {
-        binary_match!(<<(&self, &other); [I8, I16, I32, I64])
+        let amt = other
+            .convert(ValueConversionKind::Exact(types::I32))?
+            .convert(ValueConversionKind::ToUnsigned)?;
+        binary_match!(wrapping_shl(&self, &amt); [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128]; rhs: U32)
     }
 
     fn ushr(self, other: Self) -> ValueResult<Self> {
-        binary_match!(>>(&self, &other); unsigned integers)
+        let amt = other
+            .convert(ValueConversionKind::Exact(types::I32))?
+            .convert(ValueConversionKind::ToUnsigned)?;
+        binary_match!(wrapping_shr(&self, &amt); [U8, U16, U32, U64, U128]; rhs: U32)
     }
 
     fn ishr(self, other: Self) -> ValueResult<Self> {
-        binary_match!(>>(&self, &other); [I8, I16, I32, I64])
+        let amt = other
+            .convert(ValueConversionKind::Exact(types::I32))?
+            .convert(ValueConversionKind::ToUnsigned)?;
+        binary_match!(wrapping_shr(&self, &amt); [I8, I16, I32, I64, I128]; rhs: U32)
     }
 
-    fn rotl(self, _other: Self) -> ValueResult<Self> {
-        unimplemented!()
+    fn rotl(self, other: Self) -> ValueResult<Self> {
+        let amt = other
+            .convert(ValueConversionKind::Exact(types::I32))?
+            .convert(ValueConversionKind::ToUnsigned)?;
+        binary_match!(rotate_left(&self, &amt); [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128]; rhs: U32)
     }
 
-    fn rotr(self, _other: Self) -> ValueResult<Self> {
-        unimplemented!()
+    fn rotr(self, other: Self) -> ValueResult<Self> {
+        let amt = other
+            .convert(ValueConversionKind::Exact(types::I32))?
+            .convert(ValueConversionKind::ToUnsigned)?;
+        binary_match!(rotate_right(&self, &amt); [I8, I16, I32, I64, I128, U8, U16, U32, U64, U128]; rhs: U32)
     }
 
     fn and(self, other: Self) -> ValueResult<Self> {