Generate Intel encoding recipes on demand.

Cretonne's encoding recipes need to have a fixed size so we can compute
accurate branch destination addresses. Intel's instruction encoding has
a lot of variance in the number of bytes needed to encode the opcode
which leads to a number of duplicated encoding recipes that only differ
in the opcode size.

Add an Intel-specific TailEnc Python class which represents an
abstraction over a set of recipes that are identical except for the
opcode encoding. The TailEnc can then generate specific encoding recipes
for each opcode format.

The opcode format is a prefix of the recipe name, so for example, the
'rr' TailEnc will generate the 'Op1rr', 'Op2rr', 'Mp2rr' etc recipes.

The TailEnc class provides a __call__ implementation that simply takes
the sequence of opcode bytes as arguments. It then looks up the right
prefix for the opcode bytes.
This commit is contained in:
Jakob Stoklund Olesen
2017-05-14 11:53:44 -07:00
parent bd8230411a
commit 232fb36d8f
2 changed files with 155 additions and 118 deletions

View File

@@ -4,61 +4,60 @@ Intel Encodings.
from __future__ import absolute_import from __future__ import absolute_import
from base import instructions as base from base import instructions as base
from .defs import I32 from .defs import I32
from . import recipes as rcp from . import recipes as r
from .recipes import OP, OP0F, MP66
I32.enc(base.iadd.i32, rcp.Op1rr, OP(0x01)) I32.enc(base.iadd.i32, *r.rr(0x01))
I32.enc(base.isub.i32, rcp.Op1rr, OP(0x29)) I32.enc(base.isub.i32, *r.rr(0x29))
I32.enc(base.band.i32, rcp.Op1rr, OP(0x21)) I32.enc(base.band.i32, *r.rr(0x21))
I32.enc(base.bor.i32, rcp.Op1rr, OP(0x09)) I32.enc(base.bor.i32, *r.rr(0x09))
I32.enc(base.bxor.i32, rcp.Op1rr, OP(0x31)) I32.enc(base.bxor.i32, *r.rr(0x31))
# Immediate instructions with sign-extended 8-bit and 32-bit immediate. # Immediate instructions with sign-extended 8-bit and 32-bit immediate.
for inst, r in [ for inst, rrr in [
(base.iadd_imm.i32, 0), (base.iadd_imm.i32, 0),
(base.band_imm.i32, 4), (base.band_imm.i32, 4),
(base.bor_imm.i32, 1), (base.bor_imm.i32, 1),
(base.bxor_imm.i32, 6)]: (base.bxor_imm.i32, 6)]:
I32.enc(inst, rcp.Op1rib, OP(0x83, rrr=r)) I32.enc(inst, *r.rib(0x83, rrr=rrr))
I32.enc(inst, rcp.Op1rid, OP(0x81, rrr=r)) I32.enc(inst, *r.rid(0x81, rrr=rrr))
# 32-bit shifts and rotates. # 32-bit shifts and rotates.
# Note that the dynamic shift amount is only masked by 5 or 6 bits; the 8-bit # Note that the dynamic shift amount is only masked by 5 or 6 bits; the 8-bit
# and 16-bit shifts would need explicit masking. # and 16-bit shifts would need explicit masking.
I32.enc(base.ishl.i32.i32, rcp.Op1rc, OP(0xd3, rrr=4)) I32.enc(base.ishl.i32.i32, *r.rc(0xd3, rrr=4))
I32.enc(base.ushr.i32.i32, rcp.Op1rc, OP(0xd3, rrr=5)) I32.enc(base.ushr.i32.i32, *r.rc(0xd3, rrr=5))
I32.enc(base.sshr.i32.i32, rcp.Op1rc, OP(0xd3, rrr=7)) I32.enc(base.sshr.i32.i32, *r.rc(0xd3, rrr=7))
# Loads and stores. # Loads and stores.
I32.enc(base.store.i32.i32, rcp.Op1st, OP(0x89)) I32.enc(base.store.i32.i32, *r.st(0x89))
I32.enc(base.store.i32.i32, rcp.Op1stDisp8, OP(0x89)) I32.enc(base.store.i32.i32, *r.stDisp8(0x89))
I32.enc(base.store.i32.i32, rcp.Op1stDisp32, OP(0x89)) I32.enc(base.store.i32.i32, *r.stDisp32(0x89))
I32.enc(base.istore16.i32.i32, rcp.Mp1st, MP66(0x89)) I32.enc(base.istore16.i32.i32, *r.st(0x66, 0x89))
I32.enc(base.istore16.i32.i32, rcp.Mp1stDisp8, MP66(0x89)) I32.enc(base.istore16.i32.i32, *r.stDisp8(0x66, 0x89))
I32.enc(base.istore16.i32.i32, rcp.Mp1stDisp32, MP66(0x89)) I32.enc(base.istore16.i32.i32, *r.stDisp32(0x66, 0x89))
I32.enc(base.istore8.i32.i32, rcp.Op1st_abcd, OP(0x88)) I32.enc(base.istore8.i32.i32, *r.st_abcd(0x88))
I32.enc(base.istore8.i32.i32, rcp.Op1stDisp8_abcd, OP(0x88)) I32.enc(base.istore8.i32.i32, *r.stDisp8_abcd(0x88))
I32.enc(base.istore8.i32.i32, rcp.Op1stDisp32_abcd, OP(0x88)) I32.enc(base.istore8.i32.i32, *r.stDisp32_abcd(0x88))
I32.enc(base.load.i32.i32, rcp.Op1ld, OP(0x8b)) I32.enc(base.load.i32.i32, *r.ld(0x8b))
I32.enc(base.load.i32.i32, rcp.Op1ldDisp8, OP(0x8b)) I32.enc(base.load.i32.i32, *r.ldDisp8(0x8b))
I32.enc(base.load.i32.i32, rcp.Op1ldDisp32, OP(0x8b)) I32.enc(base.load.i32.i32, *r.ldDisp32(0x8b))
I32.enc(base.uload16.i32.i32, rcp.Op2ld, OP0F(0xb7)) I32.enc(base.uload16.i32.i32, *r.ld(0x0f, 0xb7))
I32.enc(base.uload16.i32.i32, rcp.Op2ldDisp8, OP0F(0xb7)) I32.enc(base.uload16.i32.i32, *r.ldDisp8(0x0f, 0xb7))
I32.enc(base.uload16.i32.i32, rcp.Op2ldDisp32, OP0F(0xb7)) I32.enc(base.uload16.i32.i32, *r.ldDisp32(0x0f, 0xb7))
I32.enc(base.sload16.i32.i32, rcp.Op2ld, OP0F(0xbf)) I32.enc(base.sload16.i32.i32, *r.ld(0x0f, 0xbf))
I32.enc(base.sload16.i32.i32, rcp.Op2ldDisp8, OP0F(0xbf)) I32.enc(base.sload16.i32.i32, *r.ldDisp8(0x0f, 0xbf))
I32.enc(base.sload16.i32.i32, rcp.Op2ldDisp32, OP0F(0xbf)) I32.enc(base.sload16.i32.i32, *r.ldDisp32(0x0f, 0xbf))
I32.enc(base.uload8.i32.i32, rcp.Op2ld, OP0F(0xb6)) I32.enc(base.uload8.i32.i32, *r.ld(0x0f, 0xb6))
I32.enc(base.uload8.i32.i32, rcp.Op2ldDisp8, OP0F(0xb6)) I32.enc(base.uload8.i32.i32, *r.ldDisp8(0x0f, 0xb6))
I32.enc(base.uload8.i32.i32, rcp.Op2ldDisp32, OP0F(0xb6)) I32.enc(base.uload8.i32.i32, *r.ldDisp32(0x0f, 0xb6))
I32.enc(base.sload8.i32.i32, rcp.Op2ld, OP0F(0xbe)) I32.enc(base.sload8.i32.i32, *r.ld(0x0f, 0xbe))
I32.enc(base.sload8.i32.i32, rcp.Op2ldDisp8, OP0F(0xbe)) I32.enc(base.sload8.i32.i32, *r.ldDisp8(0x0f, 0xbe))
I32.enc(base.sload8.i32.i32, rcp.Op2ldDisp32, OP0F(0xbe)) I32.enc(base.sload8.i32.i32, *r.ldDisp32(0x0f, 0xbe))

View File

@@ -7,30 +7,41 @@ from cdsl.predicates import IsSignedInt, IsEqual
from base.formats import Binary, BinaryImm, Store, Load from base.formats import Binary, BinaryImm, Store, Load
from .registers import GPR, ABCD from .registers import GPR, ABCD
try:
from typing import Tuple, Dict # noqa
from cdsl.instructions import InstructionFormat # noqa
from cdsl.isa import ConstraintSeq, BranchRange, PredNode # noqa
except ImportError:
pass
# Opcode representation. # Opcode representation.
# #
# Cretonne requires each recipe to have a single encoding size in bytes, and # Cretonne requires each recipe to have a single encoding size in bytes, and
# Intel opcodes are variable length, so we use separate recipes for different # Intel opcodes are variable length, so we use separate recipes for different
# styles of opcodes and prefixes. The opcode format is indicated by the recipe # styles of opcodes and prefixes. The opcode format is indicated by the recipe
# name prefix: # name prefix:
#
# <op> Op1* OP(op) OPCODE_PREFIX = {
# 0F <op> Op2* OP0F(op) # Prefix bytes Name mmpp
# 0F 38 <op> Op3* OP38(op) (): ('Op1', 0b0000),
# 0F 3A <op> Op3* OP3A(op) (0x66,): ('Mp1', 0b0001),
# 66 <op> Mp1* MP66(op) (0xf3,): ('Mp1', 0b0010),
# 66 0F <op> Mp2* MP660F(op) (0xf2,): ('Mp1', 0b0011),
# 66 0F 38 <op> Mp3* MP660F38(op) (0x0f,): ('Op2', 0b0100),
# 66 0F 3A <op> Mp3* MP660F3A(op) (0x66, 0x0f): ('Mp2', 0b0101),
# F2 <op> Mp1* MPF2(op) (0xf3, 0x0f): ('Mp2', 0b0110),
# F2 0F <op> Mp2* MPF20F(op) (0xf2, 0x0f): ('Mp2', 0b0111),
# F2 0F 38 <op> Mp3* MPF20F38(op) (0x0f, 0x38): ('Op3', 0b1000),
# F2 0F 3A <op> Mp3* MPF20F3A(op) (0x66, 0x0f, 0x38): ('Mp3', 0b1001),
# F3 <op> Mp1* MPF3(op) (0xf3, 0x0f, 0x38): ('Mp3', 0b1010),
# F3 0F <op> Mp2* MP0FF3(op) (0xf2, 0x0f, 0x38): ('Mp3', 0b1011),
# F3 0F 38 <op> Mp3* MPF30F38(op) (0x0f, 0x3a): ('Op3', 0b1100),
# F3 0F 3A <op> Mp3* MPF30F3A(op) (0x66, 0x0f, 0x3a): ('Mp3', 0b1101),
# (0xf3, 0x0f, 0x3a): ('Mp3', 0b1110),
(0xf2, 0x0f, 0x3a): ('Mp3', 0b1111)
}
# VEX/XOP and EVEX prefixes are not yet supported. # VEX/XOP and EVEX prefixes are not yet supported.
# #
# The encoding bits are: # The encoding bits are:
@@ -53,40 +64,95 @@ from .registers import GPR, ABCD
# enough bits, and the pp+mm format is ready for supporting VEX prefixes. # enough bits, and the pp+mm format is ready for supporting VEX prefixes.
def OP(op, pp=0, mm=0, rrr=0, w=0): def decode_ops(ops, rrr=0, w=0):
# type: (int, int, int, int, int) -> int # type: (Tuple[int, ...], int, int) -> Tuple[str, int]
assert op <= 0xff """
assert pp <= 0b11 Given a sequence of opcode bytes, compute the recipe name prefix and
assert mm <= 0b11 encoding bits.
"""
assert rrr <= 0b111 assert rrr <= 0b111
assert w <= 1 assert w <= 1
return op | (pp << 8) | (mm << 10) | (rrr << 12) | (w << 15) name, mmpp = OPCODE_PREFIX[ops[:-1]]
op = ops[-1]
assert op <= 256
return (name, op | (mmpp << 8) | (rrr << 12) | (w << 15))
def OP0F(op, rrr=0, w=0): class TailRecipe:
# type: (int, int, int) -> int """
return OP(op, pp=0, mm=1, rrr=rrr, w=w) Generate encoding recipes on demand.
Intel encodings are somewhat orthogonal with the opcode representation on
one side and the ModR/M, SIB and immediate fields on the other side.
def MP66(op, rrr=0, w=0): A `TailRecipe` represents the part of an encoding that follow the opcode.
# type: (int, int, int) -> int It is used to generate full encoding recipes on demand when combined with
return OP(op, pp=1, mm=0, rrr=rrr, w=w) an opcode.
The arguments are the same as for an `EncRecipe`, except for `size` which
does not include the size of the opcode.
"""
def __init__(
self,
name, # type: str
format, # type: InstructionFormat
size, # type: int
ins, # type: ConstraintSeq
outs, # type: ConstraintSeq
branch_range=None, # type: BranchRange
instp=None, # type: PredNode
isap=None # type: PredNode
):
# type: (...) -> None
self.name = name
self.format = format
self.size = size
self.ins = ins
self.outs = outs
self.branch_range = branch_range
self.instp = instp
self.isap = isap
# Cached recipes, keyed by name prefix.
self.recipes = dict() # type: Dict[str, EncRecipe]
def __call__(self, *ops, **kwargs):
# type: (*int, **int) -> Tuple[EncRecipe, int]
"""
Create an encoding recipe and encoding bits for the opcode bytes in
`ops`.
"""
rrr = kwargs.get('rrr', 0)
w = kwargs.get('w', 0)
name, bits = decode_ops(ops, rrr, w)
if name not in self.recipes:
self.recipes[name] = EncRecipe(
name + self.name,
self.format,
len(ops) + self.size,
ins=self.ins,
outs=self.outs,
branch_range=self.branch_range,
instp=self.instp,
isap=self.isap)
return (self.recipes[name], bits)
# XX /r # XX /r
Op1rr = EncRecipe('Op1rr', Binary, size=2, ins=(GPR, GPR), outs=0) rr = TailRecipe('rr', Binary, size=1, ins=(GPR, GPR), outs=0)
# XX /n with one arg in %rcx, for shifts. # XX /n with one arg in %rcx, for shifts.
Op1rc = EncRecipe('Op1rc', Binary, size=2, ins=(GPR, GPR.rcx), outs=0) rc = TailRecipe('rc', Binary, size=1, ins=(GPR, GPR.rcx), outs=0)
# XX /n ib with 8-bit immediate sign-extended. # XX /n ib with 8-bit immediate sign-extended.
Op1rib = EncRecipe( rib = TailRecipe(
'Op1rib', BinaryImm, size=3, ins=GPR, outs=0, 'rib', BinaryImm, size=2, ins=GPR, outs=0,
instp=IsSignedInt(BinaryImm.imm, 8)) instp=IsSignedInt(BinaryImm.imm, 8))
# XX /n id with 32-bit immediate sign-extended. # XX /n id with 32-bit immediate sign-extended.
Op1rid = EncRecipe( rid = TailRecipe(
'Op1rid', BinaryImm, size=6, ins=GPR, outs=0, 'rid', BinaryImm, size=5, ins=GPR, outs=0,
instp=IsSignedInt(BinaryImm.imm, 32)) instp=IsSignedInt(BinaryImm.imm, 32))
# #
@@ -94,72 +160,44 @@ Op1rid = EncRecipe(
# #
# XX /r register-indirect store with no offset. # XX /r register-indirect store with no offset.
Op1st = EncRecipe( st = TailRecipe(
'Op1st', Store, size=2, ins=(GPR, GPR), outs=(), 'st', Store, size=1, ins=(GPR, GPR), outs=(),
instp=IsEqual(Store.offset, 0)) instp=IsEqual(Store.offset, 0))
# XX /r register-indirect store with no offset. # XX /r register-indirect store with no offset.
# Only ABCD allowed for stored value. This is for byte stores. # Only ABCD allowed for stored value. This is for byte stores.
Op1st_abcd = EncRecipe( st_abcd = TailRecipe(
'Op1st_abcd', Store, size=2, ins=(ABCD, GPR), outs=(), 'st_abcd', Store, size=1, ins=(ABCD, GPR), outs=(),
instp=IsEqual(Store.offset, 0)) instp=IsEqual(Store.offset, 0))
# XX /r register-indirect store with 8-bit offset. # XX /r register-indirect store with 8-bit offset.
Op1stDisp8 = EncRecipe( stDisp8 = TailRecipe(
'Op1stDisp8', Store, size=3, ins=(GPR, GPR), outs=(), 'stDisp8', Store, size=2, ins=(GPR, GPR), outs=(),
instp=IsSignedInt(Store.offset, 8)) instp=IsSignedInt(Store.offset, 8))
Op1stDisp8_abcd = EncRecipe( stDisp8_abcd = TailRecipe(
'Op1stDisp8_abcd', Store, size=3, ins=(ABCD, GPR), outs=(), 'stDisp8_abcd', Store, size=2, ins=(ABCD, GPR), outs=(),
instp=IsSignedInt(Store.offset, 8)) instp=IsSignedInt(Store.offset, 8))
# XX /r register-indirect store with 32-bit offset. # XX /r register-indirect store with 32-bit offset.
Op1stDisp32 = EncRecipe('Op1stDisp32', Store, size=6, ins=(GPR, GPR), outs=()) stDisp32 = TailRecipe('stDisp32', Store, size=5, ins=(GPR, GPR), outs=())
Op1stDisp32_abcd = EncRecipe( stDisp32_abcd = TailRecipe(
'Op1stDisp32_abcd', Store, size=6, ins=(ABCD, GPR), outs=()) 'stDisp32_abcd', Store, size=5, ins=(ABCD, GPR), outs=())
# PP WW /r register-indirect store with no offset.
Mp1st = EncRecipe(
'Mp1st', Store, size=3, ins=(GPR, GPR), outs=(),
instp=IsEqual(Store.offset, 0))
# PP XX /r register-indirect store with 8-bit offset.
Mp1stDisp8 = EncRecipe(
'Mp1stDisp8', Store, size=4, ins=(GPR, GPR), outs=(),
instp=IsSignedInt(Store.offset, 8))
# PP XX /r register-indirect store with 32-bit offset.
Mp1stDisp32 = EncRecipe('Mp1stDisp32', Store, size=7, ins=(GPR, GPR), outs=())
# #
# Load recipes # Load recipes
# #
# XX /r load with no offset. # XX /r load with no offset.
Op1ld = EncRecipe( ld = TailRecipe(
'Op1ld', Load, size=2, ins=(GPR), outs=(GPR), 'ld', Load, size=1, ins=(GPR), outs=(GPR),
instp=IsEqual(Load.offset, 0)) instp=IsEqual(Load.offset, 0))
# XX /r load with 8-bit offset. # XX /r load with 8-bit offset.
Op1ldDisp8 = EncRecipe( ldDisp8 = TailRecipe(
'Op1ldDisp8', Load, size=3, ins=(GPR), outs=(GPR), 'ldDisp8', Load, size=2, ins=(GPR), outs=(GPR),
instp=IsSignedInt(Load.offset, 8)) instp=IsSignedInt(Load.offset, 8))
# XX /r load with 32-bit offset. # XX /r load with 32-bit offset.
Op1ldDisp32 = EncRecipe( ldDisp32 = TailRecipe(
'Op1ldDisp32', Load, size=6, ins=(GPR), outs=(GPR), 'ldDisp32', Load, size=5, ins=(GPR), outs=(GPR),
instp=IsSignedInt(Load.offset, 32))
# 0F XX /r load with no offset.
Op2ld = EncRecipe(
'Op2ld', Load, size=3, ins=(GPR), outs=(GPR),
instp=IsEqual(Load.offset, 0))
# XX /r load with 8-bit offset.
Op2ldDisp8 = EncRecipe(
'Op2ldDisp8', Load, size=4, ins=(GPR), outs=(GPR),
instp=IsSignedInt(Load.offset, 8))
# XX /r load with 32-bit offset.
Op2ldDisp32 = EncRecipe(
'Op2ldDisp32', Load, size=7, ins=(GPR), outs=(GPR),
instp=IsSignedInt(Load.offset, 32)) instp=IsSignedInt(Load.offset, 32))