Start adding Intel 64-bit encodings.

Add a TailRecipe.rex() method which creates an encoding recipe with a REX prefix. Define I64 encodings with REX.W for i64 operations and with/without REX for i32 ops. Only test the with-REX encodings for now. We don't yet have an instruction shrinking pass that can select the non-REX encodings.
2017-07-10 16:16:22 -07:00
parent 263779ac56
commit 6ae4eb82f8
4 changed files with 365 additions and 18 deletions
--- a/filetests/isa/intel/binary64.cton
+++ b/filetests/isa/intel/binary64.cton
@@ -0,0 +1,248 @@
 ; binary emission of 64-bit code.
 test binemit
 set is_64bit
 isa intel
 ; The binary encodings can be verified with the command:
 ;
 ;   sed -ne 's/^ *; asm: *//p' filetests/isa/intel/binary64.cton | llvm-mc -show-encoding -triple=x86_64
 ;
 ; Tests for i64 instructions.
 function %I64() {
    fn0 = function %foo()
    sig0 = signature()
 ebb0:
    ; Integer Constants.
    ; asm: movq $0x01020304f1f2f3f4, %rcx
    [-,%rcx]            v1 = iconst.i64 0x0102_0304_f1f2_f3f4 ; bin: 48 b9 01020304f1f2f3f4
    ; asm: movq $0x11020304f1f2f3f4, %rsi
    [-,%rsi]            v2 = iconst.i64 0x1102_0304_f1f2_f3f4 ; bin: 48 be 11020304f1f2f3f4
    ; asm: movq $0x21020304f1f2f3f4, %r10
    [-,%r10]            v3 = iconst.i64 0x2102_0304_f1f2_f3f4 ; bin: 49 ba 21020304f1f2f3f4
    ; asm: movl $0xff001122, %r8d                             # 32-bit zero-extended constant.
    [-,%r8]             v4 = iconst.i64 0xff00_1122           ; bin: 41 b8 ff001122
    ; asm: movq $0xffffffff88001122, %r14                     # 32-bit sign-extended constant.
    [-,%r14]            v5 = iconst.i64 0xffff_ffff_8800_1122 ; bin: 49 c7 c6 88001122
    ; Integer Register-Register Operations.
    ; asm: addq %rsi, %rcx
    [-,%rcx]             v10 = iadd v1, v2       ; bin: 48 01 f1
    ; asm: addq %r10, %rsi
    [-,%rsi]             v11 = iadd v2, v3       ; bin: 4c 01 d6
    ; asm: addq %rcx, %r10
    [-,%r10]             v12 = iadd v3, v1       ; bin: 49 01 ca
    ; asm: subq %rsi, %rcx
    [-,%rcx]             v20 = isub v1, v2       ; bin: 48 29 f1
    ; asm: subq %r10, %rsi
    [-,%rsi]             v21 = isub v2, v3       ; bin: 4c 29 d6
    ; asm: subq %rcx, %r10
    [-,%r10]             v22 = isub v3, v1       ; bin: 49 29 ca
    ; asm: andq %rsi, %rcx
    [-,%rcx]             v30 = band v1, v2       ; bin: 48 21 f1
    ; asm: andq %r10, %rsi
    [-,%rsi]             v31 = band v2, v3       ; bin: 4c 21 d6
    ; asm: andq %rcx, %r10
    [-,%r10]             v32 = band v3, v1       ; bin: 49 21 ca
    ; asm: orq %rsi, %rcx
    [-,%rcx]             v40 = bor v1, v2       ; bin: 48 09 f1
    ; asm: orq %r10, %rsi
    [-,%rsi]             v41 = bor v2, v3       ; bin: 4c 09 d6
    ; asm: orq %rcx, %r10
    [-,%r10]             v42 = bor v3, v1       ; bin: 49 09 ca
    ; asm: xorq %rsi, %rcx
    [-,%rcx]             v50 = bxor v1, v2       ; bin: 48 31 f1
    ; asm: xorq %r10, %rsi
    [-,%rsi]             v51 = bxor v2, v3       ; bin: 4c 31 d6
    ; asm: xorq %rcx, %r10
    [-,%r10]             v52 = bxor v3, v1       ; bin: 49 31 ca
    ; asm: movq %rsi, %rcx
    [-,%rcx]             v60 = copy v2           ; bin: 48 89 f1
    ; asm: movq %r10, %rsi
    [-,%rsi]             v61 = copy v3           ; bin: 4c 89 d6
    ; asm: movq %rcx, %r10
    [-,%r10]             v62 = copy v1           ; bin: 49 89 ca
    ; Integer Register-Immediate Operations.
    ; These 64-bit ops all use a 32-bit immediate that is sign-extended to 64 bits.
    ; Some take 8-bit immediates that are sign-extended to 64 bits.
    ; asm: addq $-100000, %rcx
    [-,%rcx]     v70 = iadd_imm v1, -100000     ; bin: 48 81 c1 fffe7960
    ; asm: addq $100000, %rsi
    [-,%rsi]     v71 = iadd_imm v2, 100000      ; bin: 48 81 c6 000186a0
    ; asm: addq $0x7fffffff, %r10
    [-,%r10]     v72 = iadd_imm v3, 0x7fff_ffff ; bin: 49 81 c2 7fffffff
    ; asm: addq $100, %r8
    [-,%r8]      v73 = iadd_imm v4, 100         ; bin: 49 83 c0 64
    ; asm: addq $-100, %r14
    [-,%r14]     v74 = iadd_imm v5, -100        ; bin: 49 83 c6 9c
    ; asm: andq $-100000, %rcx
    [-,%rcx]     v80 = band_imm v1, -100000     ; bin: 48 81 e1 fffe7960
    ; asm: andq $100000, %rsi
    [-,%rsi]     v81 = band_imm v2, 100000      ; bin: 48 81 e6 000186a0
    ; asm: andq $0x7fffffff, %r10
    [-,%r10]     v82 = band_imm v3, 0x7fff_ffff ; bin: 49 81 e2 7fffffff
    ; asm: andq $100, %r8
    [-,%r8]      v83 = band_imm v4, 100         ; bin: 49 83 e0 64
    ; asm: andq $-100, %r14
    [-,%r14]     v84 = band_imm v5, -100        ; bin: 49 83 e6 9c
    ; asm: orq $-100000, %rcx
    [-,%rcx]     v90 = bor_imm v1, -100000      ; bin: 48 81 c9 fffe7960
    ; asm: orq $100000, %rsi
    [-,%rsi]     v91 = bor_imm v2, 100000       ; bin: 48 81 ce 000186a0
    ; asm: orq $0x7fffffff, %r10
    [-,%r10]     v92 = bor_imm v3, 0x7fff_ffff  ; bin: 49 81 ca 7fffffff
    ; asm: orq $100, %r8
    [-,%r8]      v93 = bor_imm v4, 100          ; bin: 49 83 c8 64
    ; asm: orq $-100, %r14
    [-,%r14]     v94 = bor_imm v5, -100         ; bin: 49 83 ce 9c
    ; asm: ret
    ; asm: xorq $-100000, %rcx
    [-,%rcx]     v100 = bxor_imm v1, -100000     ; bin: 48 81 f1 fffe7960
    ; asm: xorq $100000, %rsi
    [-,%rsi]     v101 = bxor_imm v2, 100000      ; bin: 48 81 f6 000186a0
    ; asm: xorq $0x7fffffff, %r10
    [-,%r10]     v102 = bxor_imm v3, 0x7fff_ffff ; bin: 49 81 f2 7fffffff
    ; asm: xorq $100, %r8
    [-,%r8]      v103 = bxor_imm v4, 100         ; bin: 49 83 f0 64
    ; asm: xorq $-100, %r14
    [-,%r14]     v104 = bxor_imm v5, -100        ; bin: 49 83 f6 9c
    return                                       ; bin: c3
 }
 ; Tests for i32 instructions in 64-bit mode.
 ;
 ; Note that many i32 instructions can be encoded both with and without a REX
 ; prefix if they only use the low 8 registers. Here, we are testing the REX
 ; encodings which are chosen by default. Switching to non-REX encodings should
 ; be done by an instruction shrinking pass.
 function %I32() {
    fn0 = function %foo()
    sig0 = signature()
 ebb0:
    ; Integer Constants.
    ; asm: movl $0x01020304, %ecx
    [-,%rcx]            v1 = iconst.i32 0x0102_0304           ; bin: 40 b9 01020304
    ; asm: movl $0x11020304, %esi
    [-,%rsi]            v2 = iconst.i32 0x1102_0304           ; bin: 40 be 11020304
    ; asm: movl $0x21020304, %r10d
    [-,%r10]            v3 = iconst.i32 0x2102_0304           ; bin: 41 ba 21020304
    ; asm: movl $0xff001122, %r8d
    [-,%r8]             v4 = iconst.i32 0xff00_1122           ; bin: 41 b8 ff001122
    ; asm: movl $0x88001122, %r14d
    [-,%r14]            v5 = iconst.i32 0xffff_ffff_8800_1122 ; bin: 41 be 88001122
    ; Integer Register-Register Operations.
    ; asm: addl %esi, %ecx
    [-,%rcx]             v10 = iadd v1, v2       ; bin: 40 01 f1
    ; asm: addl %r10d, %esi
    [-,%rsi]             v11 = iadd v2, v3       ; bin: 44 01 d6
    ; asm: addl %ecx, %r10d
    [-,%r10]             v12 = iadd v3, v1       ; bin: 41 01 ca
    ; asm: subl %esi, %ecx
    [-,%rcx]             v20 = isub v1, v2       ; bin: 40 29 f1
    ; asm: subl %r10d, %esi
    [-,%rsi]             v21 = isub v2, v3       ; bin: 44 29 d6
    ; asm: subl %ecx, %r10d
    [-,%r10]             v22 = isub v3, v1       ; bin: 41 29 ca
    ; asm: andl %esi, %ecx
    [-,%rcx]             v30 = band v1, v2       ; bin: 40 21 f1
    ; asm: andl %r10d, %esi
    [-,%rsi]             v31 = band v2, v3       ; bin: 44 21 d6
    ; asm: andl %ecx, %r10d
    [-,%r10]             v32 = band v3, v1       ; bin: 41 21 ca
    ; asm: orl %esi, %ecx
    [-,%rcx]             v40 = bor v1, v2       ; bin: 40 09 f1
    ; asm: orl %r10d, %esi
    [-,%rsi]             v41 = bor v2, v3       ; bin: 44 09 d6
    ; asm: orl %ecx, %r10d
    [-,%r10]             v42 = bor v3, v1       ; bin: 41 09 ca
    ; asm: xorl %esi, %ecx
    [-,%rcx]             v50 = bxor v1, v2       ; bin: 40 31 f1
    ; asm: xorl %r10d, %esi
    [-,%rsi]             v51 = bxor v2, v3       ; bin: 44 31 d6
    ; asm: xorl %ecx, %r10d
    [-,%r10]             v52 = bxor v3, v1       ; bin: 41 31 ca
    ; asm: movl %esi, %ecx
    [-,%rcx]             v60 = copy v2           ; bin: 40 89 f1
    ; asm: movl %r10d, %esi
    [-,%rsi]             v61 = copy v3           ; bin: 44 89 d6
    ; asm: movl %ecx, %r10d
    [-,%r10]             v62 = copy v1           ; bin: 41 89 ca
    ; Integer Register-Immediate Operations.
    ; These 64-bit ops all use a 32-bit immediate that is sign-extended to 64 bits.
    ; Some take 8-bit immediates that are sign-extended to 64 bits.
    ; asm: addl $-100000, %ecx
    [-,%rcx]     v70 = iadd_imm v1, -100000     ; bin: 40 81 c1 fffe7960
    ; asm: addl $100000, %esi
    [-,%rsi]     v71 = iadd_imm v2, 100000      ; bin: 40 81 c6 000186a0
    ; asm: addl $0x7fffffff, %r10d
    [-,%r10]     v72 = iadd_imm v3, 0x7fff_ffff ; bin: 41 81 c2 7fffffff
    ; asm: addl $100, %r8d
    [-,%r8]      v73 = iadd_imm v4, 100         ; bin: 41 83 c0 64
    ; asm: addl $-100, %r14d
    [-,%r14]     v74 = iadd_imm v5, -100        ; bin: 41 83 c6 9c
    ; asm: andl $-100000, %ecx
    [-,%rcx]     v80 = band_imm v1, -100000     ; bin: 40 81 e1 fffe7960
    ; asm: andl $100000, %esi
    [-,%rsi]     v81 = band_imm v2, 100000      ; bin: 40 81 e6 000186a0
    ; asm: andl $0x7fffffff, %r10d
    [-,%r10]     v82 = band_imm v3, 0x7fff_ffff ; bin: 41 81 e2 7fffffff
    ; asm: andl $100, %r8d
    [-,%r8]      v83 = band_imm v4, 100         ; bin: 41 83 e0 64
    ; asm: andl $-100, %r14d
    [-,%r14]     v84 = band_imm v5, -100        ; bin: 41 83 e6 9c
    ; asm: orl $-100000, %ecx
    [-,%rcx]     v90 = bor_imm v1, -100000      ; bin: 40 81 c9 fffe7960
    ; asm: orl $100000, %esi
    [-,%rsi]     v91 = bor_imm v2, 100000       ; bin: 40 81 ce 000186a0
    ; asm: orl $0x7fffffff, %r10d
    [-,%r10]     v92 = bor_imm v3, 0x7fff_ffff  ; bin: 41 81 ca 7fffffff
    ; asm: orl $100, %r8d
    [-,%r8]      v93 = bor_imm v4, 100          ; bin: 41 83 c8 64
    ; asm: orl $-100, %r14d
    [-,%r14]     v94 = bor_imm v5, -100         ; bin: 41 83 ce 9c
    ; asm: ret
    ; asm: xorl $-100000, %ecx
    [-,%rcx]     v100 = bxor_imm v1, -100000     ; bin: 40 81 f1 fffe7960
    ; asm: xorl $100000, %esi
    [-,%rsi]     v101 = bxor_imm v2, 100000      ; bin: 40 81 f6 000186a0
    ; asm: xorl $0x7fffffff, %r10d
    [-,%r10]     v102 = bxor_imm v3, 0x7fff_ffff ; bin: 41 81 f2 7fffffff
    ; asm: xorl $100, %r8d
    [-,%r8]      v103 = bxor_imm v4, 100         ; bin: 41 83 f0 64
    ; asm: xorl $-100, %r14d
    [-,%r14]     v104 = bxor_imm v5, -100        ; bin: 41 83 f6 9c
    return                                       ; bin: c3
 }
--- a/lib/cretonne/meta/isa/intel/encodings.py
+++ b/lib/cretonne/meta/isa/intel/encodings.py
@@ -2,30 +2,65 @@
 Intel Encodings.
 """
 from __future__ import absolute_import
 from cdsl.predicates import IsUnsignedInt
 from base import instructions as base
-from .defs import I32
+from base.formats import UnaryImm
 from .defs import I32, I64
 from . import recipes as r
-I32.enc(base.iadd.i32, *r.rr(0x01))
+for inst,           opc in [
-I32.enc(base.isub.i32, *r.rr(0x29))
+        (base.iadd, 0x01),
        (base.isub, 0x29),
        (base.band, 0x21),
        (base.bor,  0x09),
        (base.bxor, 0x31)]:
    I32.enc(inst.i32, *r.rr(opc))
-I32.enc(base.band.i32, *r.rr(0x21))
+    I64.enc(inst.i64, *r.rr.rex(opc, w=1))
-I32.enc(base.bor.i32,  *r.rr(0x09))
+    I64.enc(inst.i32, *r.rr.rex(opc))
-I32.enc(base.bxor.i32, *r.rr(0x31))
+    # REX-less encoding must come after REX encoding so we don't use it by
    # default. Otherwise reg-alloc would never use r8 and up.
    I64.enc(inst.i32, *r.rr(opc))
 I32.enc(base.copy.i32, *r.ur(0x89))
-# Immediate instructions with sign-extended 8-bit and 32-bit immediate.
+I64.enc(base.copy.i64, *r.ur.rex(0x89, w=1))
-for inst,                   rrr in [
+I64.enc(base.copy.i32, *r.ur.rex(0x89))
-        (base.iadd_imm.i32, 0),
+I64.enc(base.copy.i32, *r.ur(0x89))
        (base.band_imm.i32, 4),
        (base.bor_imm.i32,  1),
        (base.bxor_imm.i32, 6)]:
    I32.enc(inst, *r.rib(0x83, rrr=rrr))
    I32.enc(inst, *r.rid(0x81, rrr=rrr))
-# Immediate constant.
+# Immediate instructions with sign-extended 8-bit and 32-bit immediate.
-I32.enc(base.iconst.i32, *r.uid(0xb8))
+for inst,               rrr in [
        (base.iadd_imm, 0),
        (base.band_imm, 4),
        (base.bor_imm,  1),
        (base.bxor_imm, 6)]:
    I32.enc(inst.i32, *r.rib(0x83, rrr=rrr))
    I32.enc(inst.i32, *r.rid(0x81, rrr=rrr))
    I64.enc(inst.i64, *r.rib.rex(0x83, rrr=rrr, w=1))
    I64.enc(inst.i64, *r.rid.rex(0x81, rrr=rrr, w=1))
    I64.enc(inst.i32, *r.rib.rex(0x83, rrr=rrr))
    I64.enc(inst.i32, *r.rid.rex(0x81, rrr=rrr))
    I64.enc(inst.i32, *r.rib(0x83, rrr=rrr))
    I64.enc(inst.i32, *r.rid(0x81, rrr=rrr))
 # TODO: band_imm.i64 with an unsigned 32-bit immediate can be encoded as
 # band_imm.i32. Can even use the single-byte immediate for 0xffff_ffXX masks.
 # Immediate constants.
 I32.enc(base.iconst.i32, *r.puid(0xb8))
 I64.enc(base.iconst.i32, *r.puid.rex(0xb8))
 I64.enc(base.iconst.i32, *r.puid(0xb8))
 # The 32-bit immediate movl also zero-extends to 64 bits.
 I64.enc(base.iconst.i64, *r.puid.rex(0xb8),
        instp=IsUnsignedInt(UnaryImm.imm, 32))
 I64.enc(base.iconst.i64, *r.puid(0xb8),
        instp=IsUnsignedInt(UnaryImm.imm, 32))
 # Sign-extended 32-bit immediate.
 I64.enc(base.iconst.i64, *r.uid.rex(0xc7, rrr=0, w=1))
 # Finally, the 0xb8 opcode takes an 8-byte immediate with a REX.W prefix.
 I64.enc(base.iconst.i64, *r.puiq.rex(0xb8, w=1))
 # 32-bit shifts and rotates.
 # Note that the dynamic shift amount is only masked by 5 or 6 bits; the 8-bit
@@ -73,3 +108,4 @@ I32.enc(base.sload8.i32.i32, *r.ldDisp32(0x0f, 0xbe))
 I32.enc(base.call, *r.call_id(0xe8))
 I32.enc(base.call_indirect.i32, *r.call_r(0xff, rrr=2))
 I32.enc(base.x_return, *r.ret(0xc3))
 I64.enc(base.x_return, *r.ret(0xc3))
--- a/lib/cretonne/meta/isa/intel/recipes.py
+++ b/lib/cretonne/meta/isa/intel/recipes.py
@@ -160,6 +160,33 @@ class TailRecipe:
                emit=replace_put_op(self.emit, name))
        return (self.recipes[name], bits)
    def rex(self, *ops, **kwargs):
        # type: (*int, **int) -> Tuple[EncRecipe, int]
        """
        Create a REX encoding recipe and encoding bits for the opcode bytes in
        `ops`.
        The recipe will always generate a REX prefix, whether it is required or
        not. For instructions that don't require a REX prefix, two encodings
        should be added: One with REX and one without.
        """
        rrr = kwargs.get('rrr', 0)
        w = kwargs.get('w', 0)
        name, bits = decode_ops(ops, rrr, w)
        name = 'Rex' + name
        if name not in self.recipes:
            self.recipes[name] = EncRecipe(
                name + self.name,
                self.format,
                1 + len(ops) + self.size,
                ins=self.ins,
                outs=self.outs,
                branch_range=self.branch_range,
                instp=self.instp,
                isap=self.isap,
                emit=replace_put_op(self.emit, name))
        return (self.recipes[name], bits)
 # XX /r
 rr = TailRecipe(
@@ -208,11 +235,21 @@ rid = TailRecipe(
        sink.put4(imm as u32);
        ''')
-# XX+rd id unary with 32-bit immediate.
+# XX /n id with 32-bit immediate sign-extended. UnaryImm version.
 uid = TailRecipe(
-        'uid', UnaryImm, size=4, ins=(), outs=GPR,
+        'uid', UnaryImm, size=5, ins=(), outs=GPR,
        instp=IsSignedInt(UnaryImm.imm, 32),
        emit='''
        PUT_OP(bits, rex1(out_reg0), sink);
        modrm_r_bits(out_reg0, bits, sink);
        let imm: i64 = imm.into();
        sink.put4(imm as u32);
        ''')
 # XX+rd id unary with 32-bit immediate. Note no recipe predicate.
 puid = TailRecipe(
        'uid', UnaryImm, size=4, ins=(), outs=GPR,
        emit='''
        // The destination register is encoded in the low bits of the opcode.
        // No ModR/M.
        PUT_OP(bits | (out_reg0 & 7), rex1(out_reg0), sink);
@@ -220,6 +257,15 @@ uid = TailRecipe(
        sink.put4(imm as u32);
        ''')
 # XX+rd iq unary with 64-bit immediate.
 puiq = TailRecipe(
        'uiq', UnaryImm, size=8, ins=(), outs=GPR,
        emit='''
        PUT_OP(bits | (out_reg0 & 7), rex1(out_reg0), sink);
        let imm: i64 = imm.into();
        sink.put8(imm as u64);
        ''')
 #
 # Store recipes.
 #
--- a/lib/cretonne/src/isa/intel/binemit.rs
+++ b/lib/cretonne/src/isa/intel/binemit.rs
@@ -44,6 +44,16 @@ fn rex2(rm: RegUnit, reg: RegUnit) -> u8 {
    BASE_REX | b | (r << 2)
 }
 // Emit a REX prefix.
 //
 // The R, X, and B bits are computed from registers using the functions above. The W bit is
 // extracted from `bits`.
 fn rex_prefix<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
    debug_assert_eq!(rex & 0xf8, BASE_REX);
    let w = ((bits >> 15) & 1) as u8;
    sink.put1(rex | (w << 3));
 }
 // Emit a single-byte opcode with no REX prefix.
 fn put_op1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
    debug_assert_eq!(bits & 0x8f00, 0, "Invalid encoding bits for Op1*");
@@ -51,6 +61,13 @@ fn put_op1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
    sink.put1(bits as u8);
 }
 // Emit a single-byte opcode with REX prefix.
 fn put_rexop1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
    debug_assert_eq!(bits & 0x0f00, 0, "Invalid encoding bits for Op1*");
    rex_prefix(bits, rex, sink);
    sink.put1(bits as u8);
 }
 // Emit two-byte opcode: 0F XX
 fn put_op2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
    debug_assert_eq!(bits & 0x8f00, 0x0400, "Invalid encoding bits for Op2*");