Start adding Intel 64-bit encodings.

Add a TailRecipe.rex() method which creates an encoding recipe with a
REX prefix.

Define I64 encodings with REX.W for i64 operations and with/without REX
for i32 ops. Only test the with-REX encodings for now. We don't yet have
an instruction shrinking pass that can select the non-REX encodings.
This commit is contained in:
Jakob Stoklund Olesen
2017-07-10 16:16:22 -07:00
parent 263779ac56
commit 6ae4eb82f8
4 changed files with 365 additions and 18 deletions

View File

@@ -0,0 +1,248 @@
; binary emission of 64-bit code.
test binemit
set is_64bit
isa intel
; The binary encodings can be verified with the command:
;
; sed -ne 's/^ *; asm: *//p' filetests/isa/intel/binary64.cton | llvm-mc -show-encoding -triple=x86_64
;
; Tests for i64 instructions.
function %I64() {
fn0 = function %foo()
sig0 = signature()
ebb0:
; Integer Constants.
; asm: movq $0x01020304f1f2f3f4, %rcx
[-,%rcx] v1 = iconst.i64 0x0102_0304_f1f2_f3f4 ; bin: 48 b9 01020304f1f2f3f4
; asm: movq $0x11020304f1f2f3f4, %rsi
[-,%rsi] v2 = iconst.i64 0x1102_0304_f1f2_f3f4 ; bin: 48 be 11020304f1f2f3f4
; asm: movq $0x21020304f1f2f3f4, %r10
[-,%r10] v3 = iconst.i64 0x2102_0304_f1f2_f3f4 ; bin: 49 ba 21020304f1f2f3f4
; asm: movl $0xff001122, %r8d # 32-bit zero-extended constant.
[-,%r8] v4 = iconst.i64 0xff00_1122 ; bin: 41 b8 ff001122
; asm: movq $0xffffffff88001122, %r14 # 32-bit sign-extended constant.
[-,%r14] v5 = iconst.i64 0xffff_ffff_8800_1122 ; bin: 49 c7 c6 88001122
; Integer Register-Register Operations.
; asm: addq %rsi, %rcx
[-,%rcx] v10 = iadd v1, v2 ; bin: 48 01 f1
; asm: addq %r10, %rsi
[-,%rsi] v11 = iadd v2, v3 ; bin: 4c 01 d6
; asm: addq %rcx, %r10
[-,%r10] v12 = iadd v3, v1 ; bin: 49 01 ca
; asm: subq %rsi, %rcx
[-,%rcx] v20 = isub v1, v2 ; bin: 48 29 f1
; asm: subq %r10, %rsi
[-,%rsi] v21 = isub v2, v3 ; bin: 4c 29 d6
; asm: subq %rcx, %r10
[-,%r10] v22 = isub v3, v1 ; bin: 49 29 ca
; asm: andq %rsi, %rcx
[-,%rcx] v30 = band v1, v2 ; bin: 48 21 f1
; asm: andq %r10, %rsi
[-,%rsi] v31 = band v2, v3 ; bin: 4c 21 d6
; asm: andq %rcx, %r10
[-,%r10] v32 = band v3, v1 ; bin: 49 21 ca
; asm: orq %rsi, %rcx
[-,%rcx] v40 = bor v1, v2 ; bin: 48 09 f1
; asm: orq %r10, %rsi
[-,%rsi] v41 = bor v2, v3 ; bin: 4c 09 d6
; asm: orq %rcx, %r10
[-,%r10] v42 = bor v3, v1 ; bin: 49 09 ca
; asm: xorq %rsi, %rcx
[-,%rcx] v50 = bxor v1, v2 ; bin: 48 31 f1
; asm: xorq %r10, %rsi
[-,%rsi] v51 = bxor v2, v3 ; bin: 4c 31 d6
; asm: xorq %rcx, %r10
[-,%r10] v52 = bxor v3, v1 ; bin: 49 31 ca
; asm: movq %rsi, %rcx
[-,%rcx] v60 = copy v2 ; bin: 48 89 f1
; asm: movq %r10, %rsi
[-,%rsi] v61 = copy v3 ; bin: 4c 89 d6
; asm: movq %rcx, %r10
[-,%r10] v62 = copy v1 ; bin: 49 89 ca
; Integer Register-Immediate Operations.
; These 64-bit ops all use a 32-bit immediate that is sign-extended to 64 bits.
; Some take 8-bit immediates that are sign-extended to 64 bits.
; asm: addq $-100000, %rcx
[-,%rcx] v70 = iadd_imm v1, -100000 ; bin: 48 81 c1 fffe7960
; asm: addq $100000, %rsi
[-,%rsi] v71 = iadd_imm v2, 100000 ; bin: 48 81 c6 000186a0
; asm: addq $0x7fffffff, %r10
[-,%r10] v72 = iadd_imm v3, 0x7fff_ffff ; bin: 49 81 c2 7fffffff
; asm: addq $100, %r8
[-,%r8] v73 = iadd_imm v4, 100 ; bin: 49 83 c0 64
; asm: addq $-100, %r14
[-,%r14] v74 = iadd_imm v5, -100 ; bin: 49 83 c6 9c
; asm: andq $-100000, %rcx
[-,%rcx] v80 = band_imm v1, -100000 ; bin: 48 81 e1 fffe7960
; asm: andq $100000, %rsi
[-,%rsi] v81 = band_imm v2, 100000 ; bin: 48 81 e6 000186a0
; asm: andq $0x7fffffff, %r10
[-,%r10] v82 = band_imm v3, 0x7fff_ffff ; bin: 49 81 e2 7fffffff
; asm: andq $100, %r8
[-,%r8] v83 = band_imm v4, 100 ; bin: 49 83 e0 64
; asm: andq $-100, %r14
[-,%r14] v84 = band_imm v5, -100 ; bin: 49 83 e6 9c
; asm: orq $-100000, %rcx
[-,%rcx] v90 = bor_imm v1, -100000 ; bin: 48 81 c9 fffe7960
; asm: orq $100000, %rsi
[-,%rsi] v91 = bor_imm v2, 100000 ; bin: 48 81 ce 000186a0
; asm: orq $0x7fffffff, %r10
[-,%r10] v92 = bor_imm v3, 0x7fff_ffff ; bin: 49 81 ca 7fffffff
; asm: orq $100, %r8
[-,%r8] v93 = bor_imm v4, 100 ; bin: 49 83 c8 64
; asm: orq $-100, %r14
[-,%r14] v94 = bor_imm v5, -100 ; bin: 49 83 ce 9c
; asm: ret
; asm: xorq $-100000, %rcx
[-,%rcx] v100 = bxor_imm v1, -100000 ; bin: 48 81 f1 fffe7960
; asm: xorq $100000, %rsi
[-,%rsi] v101 = bxor_imm v2, 100000 ; bin: 48 81 f6 000186a0
; asm: xorq $0x7fffffff, %r10
[-,%r10] v102 = bxor_imm v3, 0x7fff_ffff ; bin: 49 81 f2 7fffffff
; asm: xorq $100, %r8
[-,%r8] v103 = bxor_imm v4, 100 ; bin: 49 83 f0 64
; asm: xorq $-100, %r14
[-,%r14] v104 = bxor_imm v5, -100 ; bin: 49 83 f6 9c
return ; bin: c3
}
; Tests for i32 instructions in 64-bit mode.
;
; Note that many i32 instructions can be encoded both with and without a REX
; prefix if they only use the low 8 registers. Here, we are testing the REX
; encodings which are chosen by default. Switching to non-REX encodings should
; be done by an instruction shrinking pass.
function %I32() {
fn0 = function %foo()
sig0 = signature()
ebb0:
; Integer Constants.
; asm: movl $0x01020304, %ecx
[-,%rcx] v1 = iconst.i32 0x0102_0304 ; bin: 40 b9 01020304
; asm: movl $0x11020304, %esi
[-,%rsi] v2 = iconst.i32 0x1102_0304 ; bin: 40 be 11020304
; asm: movl $0x21020304, %r10d
[-,%r10] v3 = iconst.i32 0x2102_0304 ; bin: 41 ba 21020304
; asm: movl $0xff001122, %r8d
[-,%r8] v4 = iconst.i32 0xff00_1122 ; bin: 41 b8 ff001122
; asm: movl $0x88001122, %r14d
[-,%r14] v5 = iconst.i32 0xffff_ffff_8800_1122 ; bin: 41 be 88001122
; Integer Register-Register Operations.
; asm: addl %esi, %ecx
[-,%rcx] v10 = iadd v1, v2 ; bin: 40 01 f1
; asm: addl %r10d, %esi
[-,%rsi] v11 = iadd v2, v3 ; bin: 44 01 d6
; asm: addl %ecx, %r10d
[-,%r10] v12 = iadd v3, v1 ; bin: 41 01 ca
; asm: subl %esi, %ecx
[-,%rcx] v20 = isub v1, v2 ; bin: 40 29 f1
; asm: subl %r10d, %esi
[-,%rsi] v21 = isub v2, v3 ; bin: 44 29 d6
; asm: subl %ecx, %r10d
[-,%r10] v22 = isub v3, v1 ; bin: 41 29 ca
; asm: andl %esi, %ecx
[-,%rcx] v30 = band v1, v2 ; bin: 40 21 f1
; asm: andl %r10d, %esi
[-,%rsi] v31 = band v2, v3 ; bin: 44 21 d6
; asm: andl %ecx, %r10d
[-,%r10] v32 = band v3, v1 ; bin: 41 21 ca
; asm: orl %esi, %ecx
[-,%rcx] v40 = bor v1, v2 ; bin: 40 09 f1
; asm: orl %r10d, %esi
[-,%rsi] v41 = bor v2, v3 ; bin: 44 09 d6
; asm: orl %ecx, %r10d
[-,%r10] v42 = bor v3, v1 ; bin: 41 09 ca
; asm: xorl %esi, %ecx
[-,%rcx] v50 = bxor v1, v2 ; bin: 40 31 f1
; asm: xorl %r10d, %esi
[-,%rsi] v51 = bxor v2, v3 ; bin: 44 31 d6
; asm: xorl %ecx, %r10d
[-,%r10] v52 = bxor v3, v1 ; bin: 41 31 ca
; asm: movl %esi, %ecx
[-,%rcx] v60 = copy v2 ; bin: 40 89 f1
; asm: movl %r10d, %esi
[-,%rsi] v61 = copy v3 ; bin: 44 89 d6
; asm: movl %ecx, %r10d
[-,%r10] v62 = copy v1 ; bin: 41 89 ca
; Integer Register-Immediate Operations.
; These 64-bit ops all use a 32-bit immediate that is sign-extended to 64 bits.
; Some take 8-bit immediates that are sign-extended to 64 bits.
; asm: addl $-100000, %ecx
[-,%rcx] v70 = iadd_imm v1, -100000 ; bin: 40 81 c1 fffe7960
; asm: addl $100000, %esi
[-,%rsi] v71 = iadd_imm v2, 100000 ; bin: 40 81 c6 000186a0
; asm: addl $0x7fffffff, %r10d
[-,%r10] v72 = iadd_imm v3, 0x7fff_ffff ; bin: 41 81 c2 7fffffff
; asm: addl $100, %r8d
[-,%r8] v73 = iadd_imm v4, 100 ; bin: 41 83 c0 64
; asm: addl $-100, %r14d
[-,%r14] v74 = iadd_imm v5, -100 ; bin: 41 83 c6 9c
; asm: andl $-100000, %ecx
[-,%rcx] v80 = band_imm v1, -100000 ; bin: 40 81 e1 fffe7960
; asm: andl $100000, %esi
[-,%rsi] v81 = band_imm v2, 100000 ; bin: 40 81 e6 000186a0
; asm: andl $0x7fffffff, %r10d
[-,%r10] v82 = band_imm v3, 0x7fff_ffff ; bin: 41 81 e2 7fffffff
; asm: andl $100, %r8d
[-,%r8] v83 = band_imm v4, 100 ; bin: 41 83 e0 64
; asm: andl $-100, %r14d
[-,%r14] v84 = band_imm v5, -100 ; bin: 41 83 e6 9c
; asm: orl $-100000, %ecx
[-,%rcx] v90 = bor_imm v1, -100000 ; bin: 40 81 c9 fffe7960
; asm: orl $100000, %esi
[-,%rsi] v91 = bor_imm v2, 100000 ; bin: 40 81 ce 000186a0
; asm: orl $0x7fffffff, %r10d
[-,%r10] v92 = bor_imm v3, 0x7fff_ffff ; bin: 41 81 ca 7fffffff
; asm: orl $100, %r8d
[-,%r8] v93 = bor_imm v4, 100 ; bin: 41 83 c8 64
; asm: orl $-100, %r14d
[-,%r14] v94 = bor_imm v5, -100 ; bin: 41 83 ce 9c
; asm: ret
; asm: xorl $-100000, %ecx
[-,%rcx] v100 = bxor_imm v1, -100000 ; bin: 40 81 f1 fffe7960
; asm: xorl $100000, %esi
[-,%rsi] v101 = bxor_imm v2, 100000 ; bin: 40 81 f6 000186a0
; asm: xorl $0x7fffffff, %r10d
[-,%r10] v102 = bxor_imm v3, 0x7fff_ffff ; bin: 41 81 f2 7fffffff
; asm: xorl $100, %r8d
[-,%r8] v103 = bxor_imm v4, 100 ; bin: 41 83 f0 64
; asm: xorl $-100, %r14d
[-,%r14] v104 = bxor_imm v5, -100 ; bin: 41 83 f6 9c
return ; bin: c3
}

View File

@@ -2,30 +2,65 @@
Intel Encodings. Intel Encodings.
""" """
from __future__ import absolute_import from __future__ import absolute_import
from cdsl.predicates import IsUnsignedInt
from base import instructions as base from base import instructions as base
from .defs import I32 from base.formats import UnaryImm
from .defs import I32, I64
from . import recipes as r from . import recipes as r
I32.enc(base.iadd.i32, *r.rr(0x01)) for inst, opc in [
I32.enc(base.isub.i32, *r.rr(0x29)) (base.iadd, 0x01),
(base.isub, 0x29),
(base.band, 0x21),
(base.bor, 0x09),
(base.bxor, 0x31)]:
I32.enc(inst.i32, *r.rr(opc))
I32.enc(base.band.i32, *r.rr(0x21)) I64.enc(inst.i64, *r.rr.rex(opc, w=1))
I32.enc(base.bor.i32, *r.rr(0x09)) I64.enc(inst.i32, *r.rr.rex(opc))
I32.enc(base.bxor.i32, *r.rr(0x31)) # REX-less encoding must come after REX encoding so we don't use it by
# default. Otherwise reg-alloc would never use r8 and up.
I64.enc(inst.i32, *r.rr(opc))
I32.enc(base.copy.i32, *r.ur(0x89)) I32.enc(base.copy.i32, *r.ur(0x89))
# Immediate instructions with sign-extended 8-bit and 32-bit immediate. I64.enc(base.copy.i64, *r.ur.rex(0x89, w=1))
for inst, rrr in [ I64.enc(base.copy.i32, *r.ur.rex(0x89))
(base.iadd_imm.i32, 0), I64.enc(base.copy.i32, *r.ur(0x89))
(base.band_imm.i32, 4),
(base.bor_imm.i32, 1),
(base.bxor_imm.i32, 6)]:
I32.enc(inst, *r.rib(0x83, rrr=rrr))
I32.enc(inst, *r.rid(0x81, rrr=rrr))
# Immediate constant. # Immediate instructions with sign-extended 8-bit and 32-bit immediate.
I32.enc(base.iconst.i32, *r.uid(0xb8)) for inst, rrr in [
(base.iadd_imm, 0),
(base.band_imm, 4),
(base.bor_imm, 1),
(base.bxor_imm, 6)]:
I32.enc(inst.i32, *r.rib(0x83, rrr=rrr))
I32.enc(inst.i32, *r.rid(0x81, rrr=rrr))
I64.enc(inst.i64, *r.rib.rex(0x83, rrr=rrr, w=1))
I64.enc(inst.i64, *r.rid.rex(0x81, rrr=rrr, w=1))
I64.enc(inst.i32, *r.rib.rex(0x83, rrr=rrr))
I64.enc(inst.i32, *r.rid.rex(0x81, rrr=rrr))
I64.enc(inst.i32, *r.rib(0x83, rrr=rrr))
I64.enc(inst.i32, *r.rid(0x81, rrr=rrr))
# TODO: band_imm.i64 with an unsigned 32-bit immediate can be encoded as
# band_imm.i32. Can even use the single-byte immediate for 0xffff_ffXX masks.
# Immediate constants.
I32.enc(base.iconst.i32, *r.puid(0xb8))
I64.enc(base.iconst.i32, *r.puid.rex(0xb8))
I64.enc(base.iconst.i32, *r.puid(0xb8))
# The 32-bit immediate movl also zero-extends to 64 bits.
I64.enc(base.iconst.i64, *r.puid.rex(0xb8),
instp=IsUnsignedInt(UnaryImm.imm, 32))
I64.enc(base.iconst.i64, *r.puid(0xb8),
instp=IsUnsignedInt(UnaryImm.imm, 32))
# Sign-extended 32-bit immediate.
I64.enc(base.iconst.i64, *r.uid.rex(0xc7, rrr=0, w=1))
# Finally, the 0xb8 opcode takes an 8-byte immediate with a REX.W prefix.
I64.enc(base.iconst.i64, *r.puiq.rex(0xb8, w=1))
# 32-bit shifts and rotates. # 32-bit shifts and rotates.
# Note that the dynamic shift amount is only masked by 5 or 6 bits; the 8-bit # Note that the dynamic shift amount is only masked by 5 or 6 bits; the 8-bit
@@ -73,3 +108,4 @@ I32.enc(base.sload8.i32.i32, *r.ldDisp32(0x0f, 0xbe))
I32.enc(base.call, *r.call_id(0xe8)) I32.enc(base.call, *r.call_id(0xe8))
I32.enc(base.call_indirect.i32, *r.call_r(0xff, rrr=2)) I32.enc(base.call_indirect.i32, *r.call_r(0xff, rrr=2))
I32.enc(base.x_return, *r.ret(0xc3)) I32.enc(base.x_return, *r.ret(0xc3))
I64.enc(base.x_return, *r.ret(0xc3))

View File

@@ -160,6 +160,33 @@ class TailRecipe:
emit=replace_put_op(self.emit, name)) emit=replace_put_op(self.emit, name))
return (self.recipes[name], bits) return (self.recipes[name], bits)
def rex(self, *ops, **kwargs):
# type: (*int, **int) -> Tuple[EncRecipe, int]
"""
Create a REX encoding recipe and encoding bits for the opcode bytes in
`ops`.
The recipe will always generate a REX prefix, whether it is required or
not. For instructions that don't require a REX prefix, two encodings
should be added: One with REX and one without.
"""
rrr = kwargs.get('rrr', 0)
w = kwargs.get('w', 0)
name, bits = decode_ops(ops, rrr, w)
name = 'Rex' + name
if name not in self.recipes:
self.recipes[name] = EncRecipe(
name + self.name,
self.format,
1 + len(ops) + self.size,
ins=self.ins,
outs=self.outs,
branch_range=self.branch_range,
instp=self.instp,
isap=self.isap,
emit=replace_put_op(self.emit, name))
return (self.recipes[name], bits)
# XX /r # XX /r
rr = TailRecipe( rr = TailRecipe(
@@ -208,11 +235,21 @@ rid = TailRecipe(
sink.put4(imm as u32); sink.put4(imm as u32);
''') ''')
# XX+rd id unary with 32-bit immediate. # XX /n id with 32-bit immediate sign-extended. UnaryImm version.
uid = TailRecipe( uid = TailRecipe(
'uid', UnaryImm, size=4, ins=(), outs=GPR, 'uid', UnaryImm, size=5, ins=(), outs=GPR,
instp=IsSignedInt(UnaryImm.imm, 32), instp=IsSignedInt(UnaryImm.imm, 32),
emit=''' emit='''
PUT_OP(bits, rex1(out_reg0), sink);
modrm_r_bits(out_reg0, bits, sink);
let imm: i64 = imm.into();
sink.put4(imm as u32);
''')
# XX+rd id unary with 32-bit immediate. Note no recipe predicate.
puid = TailRecipe(
'uid', UnaryImm, size=4, ins=(), outs=GPR,
emit='''
// The destination register is encoded in the low bits of the opcode. // The destination register is encoded in the low bits of the opcode.
// No ModR/M. // No ModR/M.
PUT_OP(bits | (out_reg0 & 7), rex1(out_reg0), sink); PUT_OP(bits | (out_reg0 & 7), rex1(out_reg0), sink);
@@ -220,6 +257,15 @@ uid = TailRecipe(
sink.put4(imm as u32); sink.put4(imm as u32);
''') ''')
# XX+rd iq unary with 64-bit immediate.
puiq = TailRecipe(
'uiq', UnaryImm, size=8, ins=(), outs=GPR,
emit='''
PUT_OP(bits | (out_reg0 & 7), rex1(out_reg0), sink);
let imm: i64 = imm.into();
sink.put8(imm as u64);
''')
# #
# Store recipes. # Store recipes.
# #

View File

@@ -44,6 +44,16 @@ fn rex2(rm: RegUnit, reg: RegUnit) -> u8 {
BASE_REX | b | (r << 2) BASE_REX | b | (r << 2)
} }
// Emit a REX prefix.
//
// The R, X, and B bits are computed from registers using the functions above. The W bit is
// extracted from `bits`.
fn rex_prefix<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
debug_assert_eq!(rex & 0xf8, BASE_REX);
let w = ((bits >> 15) & 1) as u8;
sink.put1(rex | (w << 3));
}
// Emit a single-byte opcode with no REX prefix. // Emit a single-byte opcode with no REX prefix.
fn put_op1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) { fn put_op1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
debug_assert_eq!(bits & 0x8f00, 0, "Invalid encoding bits for Op1*"); debug_assert_eq!(bits & 0x8f00, 0, "Invalid encoding bits for Op1*");
@@ -51,6 +61,13 @@ fn put_op1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
sink.put1(bits as u8); sink.put1(bits as u8);
} }
// Emit a single-byte opcode with REX prefix.
fn put_rexop1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
debug_assert_eq!(bits & 0x0f00, 0, "Invalid encoding bits for Op1*");
rex_prefix(bits, rex, sink);
sink.put1(bits as u8);
}
// Emit two-byte opcode: 0F XX // Emit two-byte opcode: 0F XX
fn put_op2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) { fn put_op2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
debug_assert_eq!(bits & 0x8f00, 0x0400, "Invalid encoding bits for Op2*"); debug_assert_eq!(bits & 0x8f00, 0x0400, "Invalid encoding bits for Op2*");