diff --git a/cranelift/filetests/isa/intel/binary64.cton b/cranelift/filetests/isa/intel/binary64.cton
new file mode 100644
index 0000000000..48e328b455
--- /dev/null
+++ b/cranelift/filetests/isa/intel/binary64.cton
@@ -0,0 +1,248 @@
+; binary emission of 64-bit code.
+test binemit
+set is_64bit
+isa intel
+
+; The binary encodings can be verified with the command:
+;
+;   sed -ne 's/^ *; asm: *//p' filetests/isa/intel/binary64.cton | llvm-mc -show-encoding -triple=x86_64
+;
+
+; Tests for i64 instructions.
+function %I64() {
+    fn0 = function %foo()
+    sig0 = signature()
+
+ebb0:
+
+    ; Integer Constants.
+
+    ; asm: movq $0x01020304f1f2f3f4, %rcx
+    [-,%rcx]            v1 = iconst.i64 0x0102_0304_f1f2_f3f4 ; bin: 48 b9 01020304f1f2f3f4
+    ; asm: movq $0x11020304f1f2f3f4, %rsi
+    [-,%rsi]            v2 = iconst.i64 0x1102_0304_f1f2_f3f4 ; bin: 48 be 11020304f1f2f3f4
+    ; asm: movq $0x21020304f1f2f3f4, %r10
+    [-,%r10]            v3 = iconst.i64 0x2102_0304_f1f2_f3f4 ; bin: 49 ba 21020304f1f2f3f4
+    ; asm: movl $0xff001122, %r8d                             # 32-bit zero-extended constant.
+    [-,%r8]             v4 = iconst.i64 0xff00_1122           ; bin: 41 b8 ff001122
+    ; asm: movq $0xffffffff88001122, %r14                     # 32-bit sign-extended constant.
+    [-,%r14]            v5 = iconst.i64 0xffff_ffff_8800_1122 ; bin: 49 c7 c6 88001122
+
+    ; Integer Register-Register Operations.
+
+    ; asm: addq %rsi, %rcx
+    [-,%rcx]             v10 = iadd v1, v2       ; bin: 48 01 f1
+    ; asm: addq %r10, %rsi
+    [-,%rsi]             v11 = iadd v2, v3       ; bin: 4c 01 d6
+    ; asm: addq %rcx, %r10
+    [-,%r10]             v12 = iadd v3, v1       ; bin: 49 01 ca
+
+    ; asm: subq %rsi, %rcx
+    [-,%rcx]             v20 = isub v1, v2       ; bin: 48 29 f1
+    ; asm: subq %r10, %rsi
+    [-,%rsi]             v21 = isub v2, v3       ; bin: 4c 29 d6
+    ; asm: subq %rcx, %r10
+    [-,%r10]             v22 = isub v3, v1       ; bin: 49 29 ca
+
+    ; asm: andq %rsi, %rcx
+    [-,%rcx]             v30 = band v1, v2       ; bin: 48 21 f1
+    ; asm: andq %r10, %rsi
+    [-,%rsi]             v31 = band v2, v3       ; bin: 4c 21 d6
+    ; asm: andq %rcx, %r10
+    [-,%r10]             v32 = band v3, v1       ; bin: 49 21 ca
+
+    ; asm: orq %rsi, %rcx
+    [-,%rcx]             v40 = bor v1, v2       ; bin: 48 09 f1
+    ; asm: orq %r10, %rsi
+    [-,%rsi]             v41 = bor v2, v3       ; bin: 4c 09 d6
+    ; asm: orq %rcx, %r10
+    [-,%r10]             v42 = bor v3, v1       ; bin: 49 09 ca
+
+    ; asm: xorq %rsi, %rcx
+    [-,%rcx]             v50 = bxor v1, v2       ; bin: 48 31 f1
+    ; asm: xorq %r10, %rsi
+    [-,%rsi]             v51 = bxor v2, v3       ; bin: 4c 31 d6
+    ; asm: xorq %rcx, %r10
+    [-,%r10]             v52 = bxor v3, v1       ; bin: 49 31 ca
+
+    ; asm: movq %rsi, %rcx
+    [-,%rcx]             v60 = copy v2           ; bin: 48 89 f1
+    ; asm: movq %r10, %rsi
+    [-,%rsi]             v61 = copy v3           ; bin: 4c 89 d6
+    ; asm: movq %rcx, %r10
+    [-,%r10]             v62 = copy v1           ; bin: 49 89 ca
+
+
+    ; Integer Register-Immediate Operations.
+    ; These 64-bit ops all use a 32-bit immediate that is sign-extended to 64 bits.
+    ; Some take 8-bit immediates that are sign-extended to 64 bits.
+
+    ; asm: addq $-100000, %rcx
+    [-,%rcx]     v70 = iadd_imm v1, -100000     ; bin: 48 81 c1 fffe7960
+    ; asm: addq $100000, %rsi
+    [-,%rsi]     v71 = iadd_imm v2, 100000      ; bin: 48 81 c6 000186a0
+    ; asm: addq $0x7fffffff, %r10
+    [-,%r10]     v72 = iadd_imm v3, 0x7fff_ffff ; bin: 49 81 c2 7fffffff
+    ; asm: addq $100, %r8
+    [-,%r8]      v73 = iadd_imm v4, 100         ; bin: 49 83 c0 64
+    ; asm: addq $-100, %r14
+    [-,%r14]     v74 = iadd_imm v5, -100        ; bin: 49 83 c6 9c
+
+    ; asm: andq $-100000, %rcx
+    [-,%rcx]     v80 = band_imm v1, -100000     ; bin: 48 81 e1 fffe7960
+    ; asm: andq $100000, %rsi
+    [-,%rsi]     v81 = band_imm v2, 100000      ; bin: 48 81 e6 000186a0
+    ; asm: andq $0x7fffffff, %r10
+    [-,%r10]     v82 = band_imm v3, 0x7fff_ffff ; bin: 49 81 e2 7fffffff
+    ; asm: andq $100, %r8
+    [-,%r8]      v83 = band_imm v4, 100         ; bin: 49 83 e0 64
+    ; asm: andq $-100, %r14
+    [-,%r14]     v84 = band_imm v5, -100        ; bin: 49 83 e6 9c
+
+    ; asm: orq $-100000, %rcx
+    [-,%rcx]     v90 = bor_imm v1, -100000      ; bin: 48 81 c9 fffe7960
+    ; asm: orq $100000, %rsi
+    [-,%rsi]     v91 = bor_imm v2, 100000       ; bin: 48 81 ce 000186a0
+    ; asm: orq $0x7fffffff, %r10
+    [-,%r10]     v92 = bor_imm v3, 0x7fff_ffff  ; bin: 49 81 ca 7fffffff
+    ; asm: orq $100, %r8
+    [-,%r8]      v93 = bor_imm v4, 100          ; bin: 49 83 c8 64
+    ; asm: orq $-100, %r14
+    [-,%r14]     v94 = bor_imm v5, -100         ; bin: 49 83 ce 9c
+    ; asm: ret
+
+    ; asm: xorq $-100000, %rcx
+    [-,%rcx]     v100 = bxor_imm v1, -100000     ; bin: 48 81 f1 fffe7960
+    ; asm: xorq $100000, %rsi
+    [-,%rsi]     v101 = bxor_imm v2, 100000      ; bin: 48 81 f6 000186a0
+    ; asm: xorq $0x7fffffff, %r10
+    [-,%r10]     v102 = bxor_imm v3, 0x7fff_ffff ; bin: 49 81 f2 7fffffff
+    ; asm: xorq $100, %r8
+    [-,%r8]      v103 = bxor_imm v4, 100         ; bin: 49 83 f0 64
+    ; asm: xorq $-100, %r14
+    [-,%r14]     v104 = bxor_imm v5, -100        ; bin: 49 83 f6 9c
+
+    return                                       ; bin: c3
+}
+
+; Tests for i32 instructions in 64-bit mode.
+;
+; Note that many i32 instructions can be encoded both with and without a REX
+; prefix if they only use the low 8 registers. Here, we are testing the REX
+; encodings which are chosen by default. Switching to non-REX encodings should
+; be done by an instruction shrinking pass.
+function %I32() {
+    fn0 = function %foo()
+    sig0 = signature()
+
+ebb0:
+
+    ; Integer Constants.
+
+    ; asm: movl $0x01020304, %ecx
+    [-,%rcx]            v1 = iconst.i32 0x0102_0304           ; bin: 40 b9 01020304
+    ; asm: movl $0x11020304, %esi
+    [-,%rsi]            v2 = iconst.i32 0x1102_0304           ; bin: 40 be 11020304
+    ; asm: movl $0x21020304, %r10d
+    [-,%r10]            v3 = iconst.i32 0x2102_0304           ; bin: 41 ba 21020304
+    ; asm: movl $0xff001122, %r8d
+    [-,%r8]             v4 = iconst.i32 0xff00_1122           ; bin: 41 b8 ff001122
+    ; asm: movl $0x88001122, %r14d
+    [-,%r14]            v5 = iconst.i32 0xffff_ffff_8800_1122 ; bin: 41 be 88001122
+
+    ; Integer Register-Register Operations.
+
+    ; asm: addl %esi, %ecx
+    [-,%rcx]             v10 = iadd v1, v2       ; bin: 40 01 f1
+    ; asm: addl %r10d, %esi
+    [-,%rsi]             v11 = iadd v2, v3       ; bin: 44 01 d6
+    ; asm: addl %ecx, %r10d
+    [-,%r10]             v12 = iadd v3, v1       ; bin: 41 01 ca
+
+    ; asm: subl %esi, %ecx
+    [-,%rcx]             v20 = isub v1, v2       ; bin: 40 29 f1
+    ; asm: subl %r10d, %esi
+    [-,%rsi]             v21 = isub v2, v3       ; bin: 44 29 d6
+    ; asm: subl %ecx, %r10d
+    [-,%r10]             v22 = isub v3, v1       ; bin: 41 29 ca
+
+    ; asm: andl %esi, %ecx
+    [-,%rcx]             v30 = band v1, v2       ; bin: 40 21 f1
+    ; asm: andl %r10d, %esi
+    [-,%rsi]             v31 = band v2, v3       ; bin: 44 21 d6
+    ; asm: andl %ecx, %r10d
+    [-,%r10]             v32 = band v3, v1       ; bin: 41 21 ca
+
+    ; asm: orl %esi, %ecx
+    [-,%rcx]             v40 = bor v1, v2       ; bin: 40 09 f1
+    ; asm: orl %r10d, %esi
+    [-,%rsi]             v41 = bor v2, v3       ; bin: 44 09 d6
+    ; asm: orl %ecx, %r10d
+    [-,%r10]             v42 = bor v3, v1       ; bin: 41 09 ca
+
+    ; asm: xorl %esi, %ecx
+    [-,%rcx]             v50 = bxor v1, v2       ; bin: 40 31 f1
+    ; asm: xorl %r10d, %esi
+    [-,%rsi]             v51 = bxor v2, v3       ; bin: 44 31 d6
+    ; asm: xorl %ecx, %r10d
+    [-,%r10]             v52 = bxor v3, v1       ; bin: 41 31 ca
+
+    ; asm: movl %esi, %ecx
+    [-,%rcx]             v60 = copy v2           ; bin: 40 89 f1
+    ; asm: movl %r10d, %esi
+    [-,%rsi]             v61 = copy v3           ; bin: 44 89 d6
+    ; asm: movl %ecx, %r10d
+    [-,%r10]             v62 = copy v1           ; bin: 41 89 ca
+
+
+    ; Integer Register-Immediate Operations.
+    ; These 64-bit ops all use a 32-bit immediate that is sign-extended to 64 bits.
+    ; Some take 8-bit immediates that are sign-extended to 64 bits.
+
+    ; asm: addl $-100000, %ecx
+    [-,%rcx]     v70 = iadd_imm v1, -100000     ; bin: 40 81 c1 fffe7960
+    ; asm: addl $100000, %esi
+    [-,%rsi]     v71 = iadd_imm v2, 100000      ; bin: 40 81 c6 000186a0
+    ; asm: addl $0x7fffffff, %r10d
+    [-,%r10]     v72 = iadd_imm v3, 0x7fff_ffff ; bin: 41 81 c2 7fffffff
+    ; asm: addl $100, %r8d
+    [-,%r8]      v73 = iadd_imm v4, 100         ; bin: 41 83 c0 64
+    ; asm: addl $-100, %r14d
+    [-,%r14]     v74 = iadd_imm v5, -100        ; bin: 41 83 c6 9c
+
+    ; asm: andl $-100000, %ecx
+    [-,%rcx]     v80 = band_imm v1, -100000     ; bin: 40 81 e1 fffe7960
+    ; asm: andl $100000, %esi
+    [-,%rsi]     v81 = band_imm v2, 100000      ; bin: 40 81 e6 000186a0
+    ; asm: andl $0x7fffffff, %r10d
+    [-,%r10]     v82 = band_imm v3, 0x7fff_ffff ; bin: 41 81 e2 7fffffff
+    ; asm: andl $100, %r8d
+    [-,%r8]      v83 = band_imm v4, 100         ; bin: 41 83 e0 64
+    ; asm: andl $-100, %r14d
+    [-,%r14]     v84 = band_imm v5, -100        ; bin: 41 83 e6 9c
+
+    ; asm: orl $-100000, %ecx
+    [-,%rcx]     v90 = bor_imm v1, -100000      ; bin: 40 81 c9 fffe7960
+    ; asm: orl $100000, %esi
+    [-,%rsi]     v91 = bor_imm v2, 100000       ; bin: 40 81 ce 000186a0
+    ; asm: orl $0x7fffffff, %r10d
+    [-,%r10]     v92 = bor_imm v3, 0x7fff_ffff  ; bin: 41 81 ca 7fffffff
+    ; asm: orl $100, %r8d
+    [-,%r8]      v93 = bor_imm v4, 100          ; bin: 41 83 c8 64
+    ; asm: orl $-100, %r14d
+    [-,%r14]     v94 = bor_imm v5, -100         ; bin: 41 83 ce 9c
+    ; asm: ret
+
+    ; asm: xorl $-100000, %ecx
+    [-,%rcx]     v100 = bxor_imm v1, -100000     ; bin: 40 81 f1 fffe7960
+    ; asm: xorl $100000, %esi
+    [-,%rsi]     v101 = bxor_imm v2, 100000      ; bin: 40 81 f6 000186a0
+    ; asm: xorl $0x7fffffff, %r10d
+    [-,%r10]     v102 = bxor_imm v3, 0x7fff_ffff ; bin: 41 81 f2 7fffffff
+    ; asm: xorl $100, %r8d
+    [-,%r8]      v103 = bxor_imm v4, 100         ; bin: 41 83 f0 64
+    ; asm: xorl $-100, %r14d
+    [-,%r14]     v104 = bxor_imm v5, -100        ; bin: 41 83 f6 9c
+
+    return                                       ; bin: c3
+}
diff --git a/lib/cretonne/meta/isa/intel/encodings.py b/lib/cretonne/meta/isa/intel/encodings.py
index 0b59b32da3..74dd5b5999 100644
--- a/lib/cretonne/meta/isa/intel/encodings.py
+++ b/lib/cretonne/meta/isa/intel/encodings.py
@@ -2,30 +2,65 @@
 Intel Encodings.
 """
 from __future__ import absolute_import
+from cdsl.predicates import IsUnsignedInt
 from base import instructions as base
-from .defs import I32
+from base.formats import UnaryImm
+from .defs import I32, I64
 from . import recipes as r
 
-I32.enc(base.iadd.i32, *r.rr(0x01))
-I32.enc(base.isub.i32, *r.rr(0x29))
+for inst,           opc in [
+        (base.iadd, 0x01),
+        (base.isub, 0x29),
+        (base.band, 0x21),
+        (base.bor,  0x09),
+        (base.bxor, 0x31)]:
+    I32.enc(inst.i32, *r.rr(opc))
 
-I32.enc(base.band.i32, *r.rr(0x21))
-I32.enc(base.bor.i32,  *r.rr(0x09))
-I32.enc(base.bxor.i32, *r.rr(0x31))
+    I64.enc(inst.i64, *r.rr.rex(opc, w=1))
+    I64.enc(inst.i32, *r.rr.rex(opc))
+    # REX-less encoding must come after REX encoding so we don't use it by
+    # default. Otherwise reg-alloc would never use r8 and up.
+    I64.enc(inst.i32, *r.rr(opc))
 
 I32.enc(base.copy.i32, *r.ur(0x89))
 
-# Immediate instructions with sign-extended 8-bit and 32-bit immediate.
-for inst,                   rrr in [
-        (base.iadd_imm.i32, 0),
-        (base.band_imm.i32, 4),
-        (base.bor_imm.i32,  1),
-        (base.bxor_imm.i32, 6)]:
-    I32.enc(inst, *r.rib(0x83, rrr=rrr))
-    I32.enc(inst, *r.rid(0x81, rrr=rrr))
+I64.enc(base.copy.i64, *r.ur.rex(0x89, w=1))
+I64.enc(base.copy.i32, *r.ur.rex(0x89))
+I64.enc(base.copy.i32, *r.ur(0x89))
 
-# Immediate constant.
-I32.enc(base.iconst.i32, *r.uid(0xb8))
+# Immediate instructions with sign-extended 8-bit and 32-bit immediate.
+for inst,               rrr in [
+        (base.iadd_imm, 0),
+        (base.band_imm, 4),
+        (base.bor_imm,  1),
+        (base.bxor_imm, 6)]:
+    I32.enc(inst.i32, *r.rib(0x83, rrr=rrr))
+    I32.enc(inst.i32, *r.rid(0x81, rrr=rrr))
+
+    I64.enc(inst.i64, *r.rib.rex(0x83, rrr=rrr, w=1))
+    I64.enc(inst.i64, *r.rid.rex(0x81, rrr=rrr, w=1))
+    I64.enc(inst.i32, *r.rib.rex(0x83, rrr=rrr))
+    I64.enc(inst.i32, *r.rid.rex(0x81, rrr=rrr))
+    I64.enc(inst.i32, *r.rib(0x83, rrr=rrr))
+    I64.enc(inst.i32, *r.rid(0x81, rrr=rrr))
+
+# TODO: band_imm.i64 with an unsigned 32-bit immediate can be encoded as
+# band_imm.i32. Can even use the single-byte immediate for 0xffff_ffXX masks.
+
+# Immediate constants.
+I32.enc(base.iconst.i32, *r.puid(0xb8))
+
+I64.enc(base.iconst.i32, *r.puid.rex(0xb8))
+I64.enc(base.iconst.i32, *r.puid(0xb8))
+# The 32-bit immediate movl also zero-extends to 64 bits.
+I64.enc(base.iconst.i64, *r.puid.rex(0xb8),
+        instp=IsUnsignedInt(UnaryImm.imm, 32))
+I64.enc(base.iconst.i64, *r.puid(0xb8),
+        instp=IsUnsignedInt(UnaryImm.imm, 32))
+# Sign-extended 32-bit immediate.
+I64.enc(base.iconst.i64, *r.uid.rex(0xc7, rrr=0, w=1))
+# Finally, the 0xb8 opcode takes an 8-byte immediate with a REX.W prefix.
+I64.enc(base.iconst.i64, *r.puiq.rex(0xb8, w=1))
 
 # 32-bit shifts and rotates.
 # Note that the dynamic shift amount is only masked by 5 or 6 bits; the 8-bit
@@ -73,3 +108,4 @@ I32.enc(base.sload8.i32.i32, *r.ldDisp32(0x0f, 0xbe))
 I32.enc(base.call, *r.call_id(0xe8))
 I32.enc(base.call_indirect.i32, *r.call_r(0xff, rrr=2))
 I32.enc(base.x_return, *r.ret(0xc3))
+I64.enc(base.x_return, *r.ret(0xc3))
diff --git a/lib/cretonne/meta/isa/intel/recipes.py b/lib/cretonne/meta/isa/intel/recipes.py
index ebeacc5968..2d1fc62d80 100644
--- a/lib/cretonne/meta/isa/intel/recipes.py
+++ b/lib/cretonne/meta/isa/intel/recipes.py
@@ -160,6 +160,33 @@ class TailRecipe:
                 emit=replace_put_op(self.emit, name))
         return (self.recipes[name], bits)
 
+    def rex(self, *ops, **kwargs):
+        # type: (*int, **int) -> Tuple[EncRecipe, int]
+        """
+        Create a REX encoding recipe and encoding bits for the opcode bytes in
+        `ops`.
+
+        The recipe will always generate a REX prefix, whether it is required or
+        not. For instructions that don't require a REX prefix, two encodings
+        should be added: One with REX and one without.
+        """
+        rrr = kwargs.get('rrr', 0)
+        w = kwargs.get('w', 0)
+        name, bits = decode_ops(ops, rrr, w)
+        name = 'Rex' + name
+        if name not in self.recipes:
+            self.recipes[name] = EncRecipe(
+                name + self.name,
+                self.format,
+                1 + len(ops) + self.size,
+                ins=self.ins,
+                outs=self.outs,
+                branch_range=self.branch_range,
+                instp=self.instp,
+                isap=self.isap,
+                emit=replace_put_op(self.emit, name))
+        return (self.recipes[name], bits)
+
 
 # XX /r
 rr = TailRecipe(
@@ -208,11 +235,21 @@ rid = TailRecipe(
         sink.put4(imm as u32);
         ''')
 
-# XX+rd id unary with 32-bit immediate.
+# XX /n id with 32-bit immediate sign-extended. UnaryImm version.
 uid = TailRecipe(
-        'uid', UnaryImm, size=4, ins=(), outs=GPR,
+        'uid', UnaryImm, size=5, ins=(), outs=GPR,
         instp=IsSignedInt(UnaryImm.imm, 32),
         emit='''
+        PUT_OP(bits, rex1(out_reg0), sink);
+        modrm_r_bits(out_reg0, bits, sink);
+        let imm: i64 = imm.into();
+        sink.put4(imm as u32);
+        ''')
+
+# XX+rd id unary with 32-bit immediate. Note no recipe predicate.
+puid = TailRecipe(
+        'uid', UnaryImm, size=4, ins=(), outs=GPR,
+        emit='''
         // The destination register is encoded in the low bits of the opcode.
         // No ModR/M.
         PUT_OP(bits | (out_reg0 & 7), rex1(out_reg0), sink);
@@ -220,6 +257,15 @@ uid = TailRecipe(
         sink.put4(imm as u32);
         ''')
 
+# XX+rd iq unary with 64-bit immediate.
+puiq = TailRecipe(
+        'uiq', UnaryImm, size=8, ins=(), outs=GPR,
+        emit='''
+        PUT_OP(bits | (out_reg0 & 7), rex1(out_reg0), sink);
+        let imm: i64 = imm.into();
+        sink.put8(imm as u64);
+        ''')
+
 #
 # Store recipes.
 #
diff --git a/lib/cretonne/src/isa/intel/binemit.rs b/lib/cretonne/src/isa/intel/binemit.rs
index e2cb5396bc..9db0ee1234 100644
--- a/lib/cretonne/src/isa/intel/binemit.rs
+++ b/lib/cretonne/src/isa/intel/binemit.rs
@@ -44,6 +44,16 @@ fn rex2(rm: RegUnit, reg: RegUnit) -> u8 {
     BASE_REX | b | (r << 2)
 }
 
+// Emit a REX prefix.
+//
+// The R, X, and B bits are computed from registers using the functions above. The W bit is
+// extracted from `bits`.
+fn rex_prefix<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+    debug_assert_eq!(rex & 0xf8, BASE_REX);
+    let w = ((bits >> 15) & 1) as u8;
+    sink.put1(rex | (w << 3));
+}
+
 // Emit a single-byte opcode with no REX prefix.
 fn put_op1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
     debug_assert_eq!(bits & 0x8f00, 0, "Invalid encoding bits for Op1*");
@@ -51,6 +61,13 @@ fn put_op1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
     sink.put1(bits as u8);
 }
 
+// Emit a single-byte opcode with REX prefix.
+fn put_rexop1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
+    debug_assert_eq!(bits & 0x0f00, 0, "Invalid encoding bits for Op1*");
+    rex_prefix(bits, rex, sink);
+    sink.put1(bits as u8);
+}
+
 // Emit two-byte opcode: 0F XX
 fn put_op2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
     debug_assert_eq!(bits & 0x8f00, 0x0400, "Invalid encoding bits for Op2*");