diff --git a/cranelift/filetests/isa/intel/binary32.cton b/cranelift/filetests/isa/intel/binary32.cton
index 14f050c2e7..4cc14b6905 100644
--- a/cranelift/filetests/isa/intel/binary32.cton
+++ b/cranelift/filetests/isa/intel/binary32.cton
@@ -55,11 +55,14 @@ ebb0:
     [-,%rsi]             v24 = sshr v2, v1       ; bin: d3 fe
     ; asm: sarl %cl, %ecx
     [-,%rcx]             v25 = sshr v1, v1       ; bin: d3 f9
-
-    ; asm: movl %esi, %ecx
-    [-,%rcx]             v26 = copy v2           ; bin: 89 f1
-    ; asm: movl %ecx, %esi
-    [-,%rsi]             v27 = copy v1           ; bin: 89 ce
+    ; asm: roll %cl, %esi
+    [-,%rsi]             v26 = rotl v2, v1       ; bin: d3 c6
+    ; asm: roll %cl, %ecx
+    [-,%rcx]             v27 = rotl v1, v1       ; bin: d3 c1
+    ; asm: rorl %cl, %esi
+    [-,%rsi]             v28 = rotr v2, v1       ; bin: d3 ce
+    ; asm: rorl %cl, %ecx
+    [-,%rcx]             v29 = rotr v1, v1       ; bin: d3 c9
 
     ; Integer Register - Immediate 8-bit operations.
     ; The 8-bit immediate is sign-extended.
@@ -102,6 +105,13 @@ ebb0:
     ; asm: xorl $1000000, %esi
     [-,%rsi]             v47 = bxor_imm v2, 1000000 ; bin: 81 f6 000f4240
 
+    ; Register copies.
+
+    ; asm: movl %esi, %ecx
+    [-,%rcx]             v80 = copy v2           ; bin: 89 f1
+    ; asm: movl %ecx, %esi
+    [-,%rsi]             v81 = copy v1           ; bin: 89 ce
+
     ; Load/Store instructions.
 
     ; Register indirect addressing with no displacement.
diff --git a/cranelift/filetests/isa/intel/binary64.cton b/cranelift/filetests/isa/intel/binary64.cton
index 48e328b455..e9ee5d3941 100644
--- a/cranelift/filetests/isa/intel/binary64.cton
+++ b/cranelift/filetests/isa/intel/binary64.cton
@@ -65,13 +65,27 @@ ebb0:
     ; asm: xorq %rcx, %r10
     [-,%r10]             v52 = bxor v3, v1       ; bin: 49 31 ca
 
-    ; asm: movq %rsi, %rcx
-    [-,%rcx]             v60 = copy v2           ; bin: 48 89 f1
-    ; asm: movq %r10, %rsi
-    [-,%rsi]             v61 = copy v3           ; bin: 4c 89 d6
-    ; asm: movq %rcx, %r10
-    [-,%r10]             v62 = copy v1           ; bin: 49 89 ca
+    ; asm: shlq %cl, %rsi
+    [-,%rsi]             v60 = ishl v2, v1       ; bin: 48 d3 e6
+    ; asm: shlq %cl, %r10
+    [-,%r10]             v61 = ishl v3, v1       ; bin: 49 d3 e2
+    ; asm: sarq %cl, %rsi
+    [-,%rsi]             v62 = sshr v2, v1       ; bin: 48 d3 fe
+    ; asm: sarq %cl, %r10
+    [-,%r10]             v63 = sshr v3, v1       ; bin: 49 d3 fa
+    ; asm: shrq %cl, %rsi
+    [-,%rsi]             v64 = ushr v2, v1       ; bin: 48 d3 ee
+    ; asm: shrq %cl, %r10
+    [-,%r10]             v65 = ushr v3, v1       ; bin: 49 d3 ea
 
+    ; asm: rolq %cl, %rsi
+    [-,%rsi]             v66 = rotl v2, v1       ; bin: 48 d3 c6
+    ; asm: rolq %cl, %r10
+    [-,%r10]             v67 = rotl v3, v1       ; bin: 49 d3 c2
+    ; asm: rorq %cl, %rsi
+    [-,%rsi]             v68 = rotr v2, v1       ; bin: 48 d3 ce
+    ; asm: rorq %cl, %r10
+    [-,%r10]             v69 = rotr v3, v1       ; bin: 49 d3 ca
 
     ; Integer Register-Immediate Operations.
     ; These 64-bit ops all use a 32-bit immediate that is sign-extended to 64 bits.
@@ -122,6 +136,15 @@ ebb0:
     ; asm: xorq $-100, %r14
     [-,%r14]     v104 = bxor_imm v5, -100        ; bin: 49 83 f6 9c
 
+    ; Register copies.
+
+    ; asm: movq %rsi, %rcx
+    [-,%rcx]             v110 = copy v2          ; bin: 48 89 f1
+    ; asm: movq %r10, %rsi
+    [-,%rsi]             v111 = copy v3          ; bin: 4c 89 d6
+    ; asm: movq %rcx, %r10
+    [-,%r10]             v112 = copy v1          ; bin: 49 89 ca
+
     return                                       ; bin: c3
 }
 
@@ -187,13 +210,27 @@ ebb0:
     ; asm: xorl %ecx, %r10d
     [-,%r10]             v52 = bxor v3, v1       ; bin: 41 31 ca
 
-    ; asm: movl %esi, %ecx
-    [-,%rcx]             v60 = copy v2           ; bin: 40 89 f1
-    ; asm: movl %r10d, %esi
-    [-,%rsi]             v61 = copy v3           ; bin: 44 89 d6
-    ; asm: movl %ecx, %r10d
-    [-,%r10]             v62 = copy v1           ; bin: 41 89 ca
+    ; asm: shll %cl, %esi
+    [-,%rsi]             v60 = ishl v2, v1       ; bin: 40 d3 e6
+    ; asm: shll %cl, %r10d
+    [-,%r10]             v61 = ishl v3, v1       ; bin: 41 d3 e2
+    ; asm: sarl %cl, %esi
+    [-,%rsi]             v62 = sshr v2, v1       ; bin: 40 d3 fe
+    ; asm: sarl %cl, %r10d
+    [-,%r10]             v63 = sshr v3, v1       ; bin: 41 d3 fa
+    ; asm: shrl %cl, %esi
+    [-,%rsi]             v64 = ushr v2, v1       ; bin: 40 d3 ee
+    ; asm: shrl %cl, %r10d
+    [-,%r10]             v65 = ushr v3, v1       ; bin: 41 d3 ea
 
+    ; asm: roll %cl, %esi
+    [-,%rsi]             v66 = rotl v2, v1       ; bin: 40 d3 c6
+    ; asm: roll %cl, %r10d
+    [-,%r10]             v67 = rotl v3, v1       ; bin: 41 d3 c2
+    ; asm: rorl %cl, %esi
+    [-,%rsi]             v68 = rotr v2, v1       ; bin: 40 d3 ce
+    ; asm: rorl %cl, %r10d
+    [-,%r10]             v69 = rotr v3, v1       ; bin: 41 d3 ca
 
     ; Integer Register-Immediate Operations.
     ; These 64-bit ops all use a 32-bit immediate that is sign-extended to 64 bits.
@@ -244,5 +281,14 @@ ebb0:
     ; asm: xorl $-100, %r14d
     [-,%r14]     v104 = bxor_imm v5, -100        ; bin: 41 83 f6 9c
 
+    ; Register copies.
+
+    ; asm: movl %esi, %ecx
+    [-,%rcx]             v110 = copy v2          ; bin: 40 89 f1
+    ; asm: movl %r10d, %esi
+    [-,%rsi]             v111 = copy v3          ; bin: 44 89 d6
+    ; asm: movl %ecx, %r10d
+    [-,%r10]             v112 = copy v1          ; bin: 41 89 ca
+
     return                                       ; bin: c3
 }
diff --git a/cranelift/filetests/wasm/i32-arith.cton b/cranelift/filetests/wasm/i32-arith.cton
index 1096d70ddb..f2fafffee3 100644
--- a/cranelift/filetests/wasm/i32-arith.cton
+++ b/cranelift/filetests/wasm/i32-arith.cton
@@ -1,7 +1,10 @@
 ; Test basic code generation for i32 arithmetic WebAssembly instructions.
 test compile
 
-set is_64bit
+set is_64bit=0
+isa intel
+
+set is_64bit=1
 isa intel
 
 ; Constants.
@@ -55,8 +58,32 @@ ebb0(v0: i32, v1: i32):
     return v2
 }
 
-; function %i32_shl(i32, i32) -> i32
-; function %i32_shr_s(i32, i32) -> i32
-; function %i32_shr_u(i32, i32) -> i32
-; function %i32_rotl(i32, i32) -> i32
-; function %i32_rotr(i32, i32) -> i32
+function %i32_shl(i32, i32) -> i32 {
+ebb0(v0: i32, v1: i32):
+    v2 = ishl v0, v1
+    return v2
+}
+
+function %i32_shr_s(i32, i32) -> i32 {
+ebb0(v0: i32, v1: i32):
+    v2 = sshr v0, v1
+    return v2
+}
+
+function %i32_shr_u(i32, i32) -> i32 {
+ebb0(v0: i32, v1: i32):
+    v2 = ushr v0, v1
+    return v2
+}
+
+function %i32_rotl(i32, i32) -> i32 {
+ebb0(v0: i32, v1: i32):
+    v2 = rotl v0, v1
+    return v2
+}
+
+function %i32_rotr(i32, i32) -> i32 {
+ebb0(v0: i32, v1: i32):
+    v2 = rotr v0, v1
+    return v2
+}
diff --git a/lib/cretonne/meta/isa/intel/encodings.py b/lib/cretonne/meta/isa/intel/encodings.py
index ab32e77303..df6631fe4c 100644
--- a/lib/cretonne/meta/isa/intel/encodings.py
+++ b/lib/cretonne/meta/isa/intel/encodings.py
@@ -66,12 +66,19 @@ I64.enc(base.iconst.i64, *r.uid.rex(0xc7, rrr=0, w=1))
 # Finally, the 0xb8 opcode takes an 8-byte immediate with a REX.W prefix.
 I64.enc(base.iconst.i64, *r.puiq.rex(0xb8, w=1))
 
-# 32-bit shifts and rotates.
+# Shifts and rotates.
 # Note that the dynamic shift amount is only masked by 5 or 6 bits; the 8-bit
 # and 16-bit shifts would need explicit masking.
-I32.enc(base.ishl.i32.i32, *r.rc(0xd3, rrr=4))
-I32.enc(base.ushr.i32.i32, *r.rc(0xd3, rrr=5))
-I32.enc(base.sshr.i32.i32, *r.rc(0xd3, rrr=7))
+for inst,           rrr in [
+        (base.rotl, 0),
+        (base.rotr, 1),
+        (base.ishl, 4),
+        (base.ushr, 5),
+        (base.sshr, 7)]:
+    I32.enc(inst.i32.i32, *r.rc(0xd3, rrr=rrr))
+    I64.enc(inst.i64.i64, *r.rc.rex(0xd3, rrr=rrr, w=1))
+    I64.enc(inst.i32.i32, *r.rc.rex(0xd3, rrr=rrr))
+    I64.enc(inst.i32.i32, *r.rc(0xd3, rrr=rrr))
 
 # Loads and stores.
 I32.enc(base.store.i32.i32, *r.st(0x89))