966 lines
41 KiB
Python
966 lines
41 KiB
Python
#!/usr/bin/python3
|
|
|
|
import argparse
|
|
import bisect
|
|
from collections import OrderedDict, defaultdict, namedtuple, Counter
|
|
from enum import Enum
|
|
from itertools import product
|
|
import re
|
|
import struct
|
|
from typing import NamedTuple, FrozenSet, List, Tuple, Union, Optional, ByteString
|
|
|
|
INSTR_FLAGS_FIELDS, INSTR_FLAGS_SIZES = zip(*[
|
|
("modrm_idx", 2),
|
|
("modreg_idx", 2),
|
|
("vexreg_idx", 2), # note: vexreg w/o vex prefix is zeroreg_val
|
|
("imm_idx", 2),
|
|
("unused1", 2),
|
|
("zeroreg_val", 1),
|
|
("lock", 1),
|
|
("imm_control", 3),
|
|
("vsib", 1),
|
|
("modrm_size", 2),
|
|
("modreg_size", 2),
|
|
("vexreg_size", 2),
|
|
("imm_size", 2),
|
|
("unused2", 2),
|
|
("size_fix1", 3),
|
|
("size_fix2", 2),
|
|
("instr_width", 1),
|
|
("modrm_ty", 3),
|
|
("modreg_ty", 3),
|
|
("vexreg_ty", 2),
|
|
("imm_ty", 0),
|
|
("unused", 3),
|
|
("opsize", 3),
|
|
("modrm", 1),
|
|
("ign66", 1),
|
|
][::-1])
|
|
class InstrFlags(namedtuple("InstrFlags", INSTR_FLAGS_FIELDS)):
|
|
def __new__(cls, **kwargs):
|
|
init = {**{f: 0 for f in cls._fields}, **kwargs}
|
|
return super(InstrFlags, cls).__new__(cls, **init)
|
|
def _encode(self):
|
|
enc = 0
|
|
for value, size in zip(self, INSTR_FLAGS_SIZES):
|
|
enc = enc << size | (value & ((1 << size) - 1))
|
|
return enc
|
|
|
|
ENCODINGS = {
|
|
"NP": InstrFlags(),
|
|
"M": InstrFlags(modrm=1, modrm_idx=0^3),
|
|
"M1": InstrFlags(modrm=1, modrm_idx=0^3, imm_idx=1^3, imm_control=1),
|
|
"MI": InstrFlags(modrm=1, modrm_idx=0^3, imm_idx=1^3, imm_control=4),
|
|
"MC": InstrFlags(modrm=1, modrm_idx=0^3, vexreg_idx=1^3, zeroreg_val=1),
|
|
"MR": InstrFlags(modrm=1, modrm_idx=0^3, modreg_idx=1^3),
|
|
"RM": InstrFlags(modrm=1, modrm_idx=1^3, modreg_idx=0^3),
|
|
"RMA": InstrFlags(modrm=1, modrm_idx=1^3, modreg_idx=0^3, vexreg_idx=2^3),
|
|
"MRI": InstrFlags(modrm=1, modrm_idx=0^3, modreg_idx=1^3, imm_idx=2^3, imm_control=4),
|
|
"RMI": InstrFlags(modrm=1, modrm_idx=1^3, modreg_idx=0^3, imm_idx=2^3, imm_control=4),
|
|
"MRC": InstrFlags(modrm=1, modrm_idx=0^3, modreg_idx=1^3, vexreg_idx=2^3, zeroreg_val=1),
|
|
"AM": InstrFlags(modrm=1, modrm_idx=1^3, vexreg_idx=0^3),
|
|
"MA": InstrFlags(modrm=1, modrm_idx=0^3, vexreg_idx=1^3),
|
|
"I": InstrFlags(imm_idx=0^3, imm_control=4),
|
|
"IA": InstrFlags(vexreg_idx=0^3, imm_idx=1^3, imm_control=4),
|
|
"O": InstrFlags(modrm_idx=0^3),
|
|
"OI": InstrFlags(modrm_idx=0^3, imm_idx=1^3, imm_control=4),
|
|
"OA": InstrFlags(modrm_idx=0^3, vexreg_idx=1^3),
|
|
"S": InstrFlags(modreg_idx=0^3), # segment register in bits 3,4,5
|
|
"A": InstrFlags(vexreg_idx=0^3),
|
|
"D": InstrFlags(imm_idx=0^3, imm_control=6),
|
|
"FD": InstrFlags(vexreg_idx=0^3, imm_idx=1^3, imm_control=2),
|
|
"TD": InstrFlags(vexreg_idx=1^3, imm_idx=0^3, imm_control=2),
|
|
|
|
"RVM": InstrFlags(modrm=1, modrm_idx=2^3, modreg_idx=0^3, vexreg_idx=1^3),
|
|
"RVMI": InstrFlags(modrm=1, modrm_idx=2^3, modreg_idx=0^3, vexreg_idx=1^3, imm_idx=3^3, imm_control=4),
|
|
"RVMR": InstrFlags(modrm=1, modrm_idx=2^3, modreg_idx=0^3, vexreg_idx=1^3, imm_idx=3^3, imm_control=3),
|
|
"RMV": InstrFlags(modrm=1, modrm_idx=1^3, modreg_idx=0^3, vexreg_idx=2^3),
|
|
"VM": InstrFlags(modrm=1, modrm_idx=1^3, vexreg_idx=0^3),
|
|
"VMI": InstrFlags(modrm=1, modrm_idx=1^3, vexreg_idx=0^3, imm_idx=2^3, imm_control=4),
|
|
"MVR": InstrFlags(modrm=1, modrm_idx=0^3, modreg_idx=2^3, vexreg_idx=1^3),
|
|
"MRV": InstrFlags(modrm=1, modrm_idx=0^3, modreg_idx=1^3, vexreg_idx=2^3),
|
|
}
|
|
ENCODING_OPTYS = ["modrm", "modreg", "vexreg", "imm"]
|
|
ENCODING_OPORDER = { enc: sorted(ENCODING_OPTYS, key=lambda ty: getattr(ENCODINGS[enc], ty+"_idx")^3) for enc in ENCODINGS}
|
|
|
|
OPKIND_CANONICALIZE = {
|
|
"I": "IMM", # immediate
|
|
"A": "IMM", # Direct address, far jmp
|
|
"J": "IMM", # RIP-relative address
|
|
"M": "MEM", # ModRM.r/m selects memory only
|
|
"O": "MEM", # Direct address, FD/TD encoding
|
|
"R": "GP", # ModRM.r/m selects GP
|
|
"B": "GP", # VEX.vvvv selects GP
|
|
"E": "GP", # ModRM.r/m selects GP or memory
|
|
"G": "GP", # ModRM.reg selects GP
|
|
"P": "MMX", # ModRM.reg selects MMX
|
|
"N": "MMX", # ModRM.r/m selects MMX
|
|
"Q": "MMX", # ModRM.r/m selects MMX or memory
|
|
"V": "XMM", # ModRM.reg selects XMM
|
|
"H": "XMM", # VEX.vvvv selects XMM
|
|
"L": "XMM", # bits7:4 of imm8 select XMM
|
|
"U": "XMM", # ModRM.r/m selects XMM
|
|
"W": "XMM", # ModRM.r/m selects XMM or memory
|
|
"S": "SEG", # ModRM.reg selects SEG
|
|
"C": "CR", # ModRM.reg selects CR
|
|
"D": "DR", # ModRM.reg selects DR
|
|
|
|
# Custom names
|
|
"F": "FPU", # F is used for RFLAGS by Intel
|
|
"K": "MASK",
|
|
"T": "TMM",
|
|
"Z": "BND",
|
|
}
|
|
OPKIND_SIZES = {
|
|
"b": 1,
|
|
"w": 2,
|
|
"d": 4,
|
|
"ss": 4, # Scalar single of XMM (d)
|
|
"q": 8,
|
|
"sd": 8, # Scalar double of XMM (q)
|
|
"t": 10, # FPU/ten-byte
|
|
"dq": 16,
|
|
"qq": 32,
|
|
"oq": 64, # oct-quadword
|
|
"": 0, # for MEMZ
|
|
"v": -1, # operand size (w/d/q)
|
|
"y": -1, # operand size (d/q)
|
|
"z": -1, # w/d (immediates, min(operand size, 4))
|
|
"a": -1, # z:z
|
|
"p": -1, # w:z
|
|
"x": -2, # vector size
|
|
"h": -3, # half x
|
|
"f": -4, # fourth x
|
|
"e": -5, # eighth x
|
|
"pd": -2, # packed double (x)
|
|
"ps": -2, # packed single (x)
|
|
|
|
# Custom names
|
|
"bs": -1, # sign-extended immediate
|
|
"zd": 4, # z-immediate, but always 4-byte operand
|
|
"zq": 8, # z-immediate, but always 8-byte operand
|
|
}
|
|
class OpKind(NamedTuple):
|
|
kind: str
|
|
sizestr: str
|
|
size: int
|
|
|
|
SZ_OP = -1
|
|
SZ_VEC = -2
|
|
SZ_VEC_HALF = -3
|
|
SZ_VEC_QUARTER = -4
|
|
SZ_VEC_EIGHTH = -5
|
|
|
|
def abssize(self, opsz=None, vecsz=None):
|
|
res = opsz if self.size == self.SZ_OP else \
|
|
vecsz if self.size == self.SZ_VEC else \
|
|
vecsz >> 1 if self.size == self.SZ_VEC_HALF else \
|
|
vecsz >> 2 if self.size == self.SZ_VEC_QUARTER else \
|
|
vecsz >> 3 if self.size == self.SZ_VEC_EIGHTH else self.size
|
|
if res is None:
|
|
raise Exception("unspecified operand size")
|
|
return res
|
|
def immsize(self, opsz):
|
|
maxsz = 1 if self.sizestr == "bs" else 4 if self.sizestr[0] == "z" else 8
|
|
return min(maxsz, self.abssize(opsz))
|
|
@classmethod
|
|
def parse(cls, op):
|
|
return cls(OPKIND_CANONICALIZE[op[0]], op[1:], OPKIND_SIZES[op[1:]])
|
|
|
|
class InstrDesc(NamedTuple):
|
|
mnemonic: str
|
|
encoding: str
|
|
operands: Tuple[str, ...]
|
|
flags: FrozenSet[str]
|
|
|
|
OPKIND_REGTYS = {
|
|
("modrm", "GP"): 0, ("modreg", "GP"): 0, ("vexreg", "GP"): 0,
|
|
("modrm", "XMM"): 1, ("modreg", "XMM"): 1, ("vexreg", "XMM"): 1,
|
|
("modrm", "MMX"): 4, ("modreg", "MMX"): 4,
|
|
("modrm", "FPU"): 5, ("vexreg", "FPU"): 3,
|
|
("modrm", "MASK"): 6, ("modreg", "MASK"): 2, ("vexreg", "MASK"): 2,
|
|
("modreg", "SEG"): 5,
|
|
("modreg", "DR"): 0, # handled in code
|
|
("modreg", "CR"): 0, # handled in code
|
|
("modrm", "MEM"): 0,
|
|
("imm", "MEM"): 0, ("imm", "IMM"): 0, ("imm", "XMM"): 0,
|
|
}
|
|
OPKIND_REGTYS_ENC = {"SEG": 3, "FPU": 4, "MMX": 5, "XMM": 6, "BND": 8,
|
|
"CR": 9, "DR": 10}
|
|
OPKIND_SIZES = {
|
|
0: 0, 1: 1, 2: 2, 4: 3, 8: 4, 16: 5, 32: 6, 64: 7, 10: 0,
|
|
}
|
|
|
|
@classmethod
|
|
def parse(cls, desc):
|
|
desc = desc.split()
|
|
mnem_comp = desc[5].split("+", 1)
|
|
desc[5] = mnem_comp[0]
|
|
if len(mnem_comp) > 1 and "w" in mnem_comp[1]:
|
|
desc.append("INSTR_WIDTH")
|
|
if len(mnem_comp) > 1 and "a" in mnem_comp[1]:
|
|
desc.append("U67")
|
|
if len(mnem_comp) > 1 and "s" in mnem_comp[1]:
|
|
desc.append("USEG")
|
|
operands = tuple(OpKind.parse(op) for op in desc[1:5] if op != "-")
|
|
return cls(desc[5], desc[0], operands, frozenset(desc[6:]))
|
|
|
|
def imm_size(self, opsz):
|
|
flags = ENCODINGS[self.encoding]
|
|
if flags.imm_control < 3:
|
|
return 0
|
|
if flags.imm_control == 3:
|
|
return 1
|
|
if self.mnemonic == "ENTER":
|
|
return 3
|
|
return self.operands[flags.imm_idx^3].immsize(opsz)
|
|
|
|
def optype_str(self):
|
|
optypes = ["", "", "", ""]
|
|
flags = ENCODINGS[self.encoding]
|
|
if flags.modrm_idx: optypes[flags.modrm_idx^3] = "rM"[flags.modrm]
|
|
if flags.modreg_idx: optypes[flags.modreg_idx^3] = "r"
|
|
if flags.vexreg_idx: optypes[flags.vexreg_idx^3] = "r"
|
|
if flags.imm_control: optypes[flags.imm_idx^3] = " iariioo"[flags.imm_control]
|
|
return "".join(optypes)
|
|
|
|
def encode_regtys(self, ots, opsz):
|
|
tys = []
|
|
for ot, op in zip(ots, self.operands):
|
|
if ot == "m":
|
|
tys.append(0xf)
|
|
elif ot in "ioa":
|
|
tys.append(0)
|
|
elif op.kind == "GP":
|
|
if (self.mnemonic == "MOVSX" or self.mnemonic == "MOVZX" or
|
|
opsz == 1):
|
|
tys.append(2 if op.abssize(opsz) == 1 else 1)
|
|
else:
|
|
tys.append(1)
|
|
else:
|
|
tys.append(self.OPKIND_REGTYS_ENC[op.kind])
|
|
return sum(ty << (4*i) for i, ty in enumerate(tys))
|
|
|
|
def dynsizes(self):
|
|
dynopsz = set(op.size for op in self.operands if op.size < 0)
|
|
if {"INSTR_WIDTH", "SZ8"} & self.flags: dynopsz.add(OpKind.SZ_OP)
|
|
if OpKind.SZ_OP in dynopsz and len(dynopsz) > 1:
|
|
raise Exception(f"conflicting dynamic operand sizes in {self}")
|
|
return dynopsz
|
|
|
|
def encode(self, mnem, ign66, modrm):
|
|
flags = ENCODINGS[self.encoding]
|
|
extraflags = {}
|
|
|
|
dynopsz = self.dynsizes()
|
|
# Operand size either refers to vectors or GP, but not both
|
|
if dynopsz and OpKind.SZ_OP not in dynopsz: # Vector operand size
|
|
if self.flags & {"SZ8", "D64", "F64", "INSTR_WIDTH", "LOCK", "U66"}:
|
|
raise Exception(f"incompatible flags in {self}")
|
|
# Allow at most the vector size together with one alternative
|
|
dynsizes = [OpKind.SZ_VEC] + list(dynopsz - {OpKind.SZ_VEC})
|
|
extraflags["opsize"] = 4 | (OpKind.SZ_VEC - dynsizes[-1])
|
|
if len(dynsizes) > 2:
|
|
raise Exception(f"conflicting vector operand sizes in {self}")
|
|
else: # either empty or GP operand size
|
|
dynsizes = [OpKind.SZ_OP]
|
|
if "SZ8" in self.flags: extraflags["opsize"] = 1
|
|
if "D64" in self.flags: extraflags["opsize"] = 2
|
|
if "F64" in self.flags: extraflags["opsize"] = 3
|
|
extraflags["instr_width"] = "INSTR_WIDTH" in self.flags
|
|
extraflags["lock"] = "LOCK" in self.flags
|
|
|
|
imm_byte = self.imm_size(4) == 1
|
|
extraflags["imm_control"] = flags.imm_control | imm_byte
|
|
|
|
# Sort fixed sizes encodable in size_fix2 as second element.
|
|
# But: byte-sized immediates are handled specially and don't cost space.
|
|
fixed = set(self.OPKIND_SIZES[op.size] for op in self.operands if
|
|
op.size >= 0 and not (imm_byte and op.kind == "IMM"))
|
|
fixed = sorted(fixed, key=lambda x: 1 <= x <= 4)
|
|
if len(fixed) > 2 or (len(fixed) == 2 and not (1 <= fixed[1] <= 4)):
|
|
raise Exception(f"invalid fixed sizes {fixed} in {self}")
|
|
sizes = (fixed + [1, 1])[:2] + dynsizes # See operand_sizes in decode.c.
|
|
extraflags["size_fix1"] = sizes[0]
|
|
extraflags["size_fix2"] = sizes[1] - 1
|
|
|
|
for i, opkind in enumerate(self.operands):
|
|
sz = self.OPKIND_SIZES[opkind.size] if opkind.size >= 0 else opkind.size
|
|
if opkind.kind == "IMM":
|
|
if imm_byte and sz not in (dynsizes[0], 1):
|
|
raise Exception(f"imm_byte with opsize {sz} in {self}")
|
|
extraflags[f"imm_size"] = sz == 1 if imm_byte else sizes.index(sz)
|
|
else:
|
|
opname = ENCODING_OPORDER[self.encoding][i]
|
|
extraflags[f"{opname}_size"] = sizes.index(sz)
|
|
extraflags[f"{opname}_ty"] = self.OPKIND_REGTYS[opname, opkind.kind]
|
|
|
|
# Miscellaneous Flags
|
|
if "VSIB" in self.flags: extraflags["vsib"] = 1
|
|
if modrm: extraflags["modrm"] = 1
|
|
|
|
if "U66" not in self.flags and (ign66 or "I66" in self.flags):
|
|
extraflags["ign66"] = 1
|
|
|
|
enc = flags._replace(**extraflags)._encode()
|
|
enc = tuple((enc >> i) & 0xffff for i in range(0, 48, 16))
|
|
# First 2 bytes are the mnemonic, last 6 bytes are the encoding.
|
|
return f"{{FDI_{mnem}, {enc[0]}, {enc[1]}, {enc[2]}}}"
|
|
|
|
class EntryKind(Enum):
|
|
NONE = 0
|
|
INSTR = 1
|
|
WEAKINSTR = 9
|
|
TABLE256 = 2
|
|
TABLE16 = 3
|
|
TABLE8E = 4
|
|
TABLE_PREFIX = 5
|
|
TABLE_VEX = 6
|
|
TABLE_ROOT = -1
|
|
@property
|
|
def is_instr(self):
|
|
return self == EntryKind.INSTR or self == EntryKind.WEAKINSTR
|
|
|
|
opcode_regex = re.compile(
|
|
r"^(?:(?P<prefixes>(?P<vex>VEX\.)?(?P<legacy>NP|66|F2|F3|NFx)\." +
|
|
r"(?:W(?P<rexw>[01]|IG)\.)?(?:L(?P<vexl>[01]|IG)\.)?))?" +
|
|
r"(?P<escape>0f38|0f3a|0f|)" +
|
|
r"(?P<opcode>[0-9a-f]{2})" +
|
|
r"(?:/(?P<modreg>[0-7]|[rm]|[0-7][rm])|(?P<opcext>[c-f][0-9a-f]))?(?P<extended>\+)?$")
|
|
|
|
class Opcode(NamedTuple):
|
|
prefix: Union[None, str] # None/NP/66/F2/F3/NFx
|
|
escape: int # [0, 0f, 0f38, 0f3a]
|
|
opc: int
|
|
extended: bool # Extend opc or opcext, if present
|
|
modreg: Union[None, Tuple[Union[None, int], str]] # (modreg, "r"/"m"/"rm"), None
|
|
opcext: Union[None, int] # 0xc0-0xff, or 0
|
|
vex: bool
|
|
vexl: Union[str, None] # 0, 1, IG, None = used, both
|
|
rexw: Union[str, None] # 0, 1, IG, None = used, both
|
|
|
|
@classmethod
|
|
def parse(cls, opcode_string):
|
|
match = opcode_regex.match(opcode_string)
|
|
if match is None:
|
|
raise Exception(opcode_string)
|
|
return None
|
|
|
|
modreg = match.group("modreg")
|
|
if modreg:
|
|
if modreg[0] in "rm":
|
|
modreg = None, modreg[0]
|
|
else:
|
|
modreg = int(modreg[0]), modreg[1] if len(modreg) == 2 else "rm"
|
|
|
|
return cls(
|
|
prefix=match.group("legacy"),
|
|
escape=["", "0f", "0f38", "0f3a"].index(match.group("escape")),
|
|
opc=int(match.group("opcode"), 16),
|
|
extended=match.group("extended") is not None,
|
|
modreg=modreg,
|
|
opcext=int(match.group("opcext") or "0", 16) or None,
|
|
vex=match.group("vex") is not None,
|
|
vexl=match.group("vexl"),
|
|
rexw=match.group("rexw"),
|
|
)
|
|
|
|
class Trie:
|
|
KIND_ORDER = (EntryKind.TABLE_ROOT, EntryKind.TABLE256,
|
|
EntryKind.TABLE_PREFIX, EntryKind.TABLE16,
|
|
EntryKind.TABLE8E, EntryKind.TABLE_VEX)
|
|
TABLE_LENGTH = {
|
|
EntryKind.TABLE_ROOT: 8,
|
|
EntryKind.TABLE256: 256,
|
|
EntryKind.TABLE_PREFIX: 4,
|
|
EntryKind.TABLE16: 16,
|
|
EntryKind.TABLE8E: 8,
|
|
EntryKind.TABLE_VEX: 4,
|
|
}
|
|
|
|
def __init__(self, root_count):
|
|
self.trie = []
|
|
self.trie.append([None] * root_count)
|
|
self.kindmap = defaultdict(list)
|
|
|
|
def _add_table(self, kind):
|
|
self.trie.append([None] * self.TABLE_LENGTH[kind])
|
|
self.kindmap[kind].append(len(self.trie) - 1)
|
|
return len(self.trie) - 1
|
|
|
|
def _clone(self, elem):
|
|
if not elem or elem[0].is_instr:
|
|
return elem
|
|
new_num = self._add_table(elem[0])
|
|
self.trie[new_num] = [self._clone(e) for e in self.trie[elem[1]]]
|
|
return elem[0], new_num
|
|
|
|
def _transform_opcode(self, opc):
|
|
troot = [opc.escape | opc.vex << 2]
|
|
t256 = [opc.opc + i for i in range(8 if opc.extended and not opc.opcext else 1)]
|
|
tprefix, t16, t8e, tvex = None, None, None, None
|
|
if opc.prefix == "NFx":
|
|
tprefix = [0, 1]
|
|
elif opc.prefix:
|
|
tprefix = [["NP", "66", "F3", "F2"].index(opc.prefix)]
|
|
if opc.opcext:
|
|
t16 = [((opc.opcext - 0xc0) >> 3) | 8]
|
|
if not opc.extended:
|
|
t8e = [opc.opcext & 7]
|
|
elif opc.modreg:
|
|
# TODO: optimize for /r and /m specifiers to reduce size
|
|
mod = {"m": [0], "r": [1<<3], "rm": [0, 1<<3]}[opc.modreg[1]]
|
|
reg = [opc.modreg[0]] if opc.modreg[0] is not None else list(range(8))
|
|
t16 = [x + y for x in mod for y in reg]
|
|
if opc.vexl in ("0", "1") or opc.rexw in ("0", "1"):
|
|
rexw = {"0": [0], "1": [1<<0], "IG": [0, 1<<0]}[opc.rexw or "IG"]
|
|
vexl = {"0": [0], "1": [1<<1], "IG": [0, 1<<1]}[opc.vexl or "IG"]
|
|
tvex = list(map(sum, product(rexw, vexl)))
|
|
# Order must match KIND_ORDER.
|
|
return troot, t256, tprefix, t16, t8e, tvex
|
|
|
|
def add_opcode(self, opcode, descidx, root_idx, weak=False):
|
|
opcode = self._transform_opcode(opcode)
|
|
frontier = [(0, root_idx)]
|
|
for elem_kind, elem in zip(self.KIND_ORDER, opcode):
|
|
new_frontier = []
|
|
for entry_num, entry_idx in frontier:
|
|
entry = self.trie[entry_num]
|
|
if elem is None:
|
|
if entry[entry_idx] is None or entry[entry_idx][0] != elem_kind:
|
|
new_frontier.append((entry_num, entry_idx))
|
|
continue
|
|
elem = list(range(self.TABLE_LENGTH[elem_kind]))
|
|
if entry[entry_idx] is None:
|
|
new_num = self._add_table(elem_kind)
|
|
entry[entry_idx] = elem_kind, new_num
|
|
elif entry[entry_idx][0] != elem_kind:
|
|
# Need to add a new node here and copy entry one level below
|
|
new_num = self._add_table(elem_kind)
|
|
# Keep original entry, but clone others recursively
|
|
self.trie[new_num][0] = entry[entry_idx]
|
|
for i in range(1, len(self.trie[new_num])):
|
|
self.trie[new_num][i] = self._clone(entry[entry_idx])
|
|
entry[entry_idx] = elem_kind, new_num
|
|
for elem_idx in elem:
|
|
new_frontier.append((entry[entry_idx][1], elem_idx))
|
|
frontier = new_frontier
|
|
for entry_num, entry_idx in frontier:
|
|
entry = self.trie[entry_num]
|
|
if not entry[entry_idx] or entry[entry_idx][0] == EntryKind.WEAKINSTR:
|
|
kind = EntryKind.INSTR if not weak else EntryKind.WEAKINSTR
|
|
entry[entry_idx] = kind, descidx
|
|
elif not weak:
|
|
raise Exception(f"redundant non-weak {opcode}")
|
|
|
|
def deduplicate(self):
|
|
synonyms = {}
|
|
for kind in self.KIND_ORDER[::-1]:
|
|
entries = {}
|
|
for num in self.kindmap[kind]:
|
|
# Replace previous synonyms
|
|
entry = self.trie[num]
|
|
for i, elem in enumerate(entry):
|
|
if elem and not elem[0].is_instr and elem[1] in synonyms:
|
|
entry[i] = synonyms[elem[1]]
|
|
|
|
unique_entry = tuple(entry)
|
|
if len(set(unique_entry)) == 1:
|
|
# Omit kind if all entries point to the same child
|
|
synonyms[num] = entry[0]
|
|
self.trie[num] = None
|
|
elif unique_entry in entries:
|
|
# Deduplicate entries of this kind
|
|
synonyms[num] = kind, entries[unique_entry]
|
|
self.trie[num] = None
|
|
else:
|
|
entries[unique_entry] = num
|
|
|
|
def compile(self):
|
|
offsets = [None] * len(self.trie)
|
|
last_off = 0
|
|
for num, entry in enumerate(self.trie[1:], start=1):
|
|
if not entry:
|
|
continue
|
|
offsets[num] = last_off
|
|
last_off += (len(entry) + 3) & ~3
|
|
if last_off >= 0x8000:
|
|
raise Exception(f"maximum table size exceeded: {last_off:#x}")
|
|
|
|
data = [0] * last_off
|
|
for off, entry in zip(offsets, self.trie):
|
|
if off is None:
|
|
continue
|
|
for i, elem in enumerate(entry, start=off):
|
|
if elem is not None:
|
|
value = elem[1] << 2 if elem[0].is_instr else offsets[elem[1]]
|
|
data[i] = (value << 1) | (elem[0].value & 7)
|
|
return tuple(data), [offsets[v] for _, v in self.trie[0]]
|
|
|
|
@property
|
|
def stats(self):
|
|
return {k.name: sum(self.trie[e] is not None for e in v)
|
|
for k, v in self.kindmap.items()}
|
|
|
|
|
|
def superstring(strs):
|
|
# This faces the "shortest superstring" problem, which is NP-hard.
|
|
# Preprocessing: remove any strings which are already completely covered
|
|
realstrs = []
|
|
for s in sorted(strs, key=len, reverse=True):
|
|
for s2 in realstrs:
|
|
if s in s2:
|
|
break
|
|
else:
|
|
realstrs.append(s)
|
|
|
|
# Greedy heuristic generally yields acceptable results, though it depends on
|
|
# the order of the menmonics. More compact results are possible, but the
|
|
# expectable gains of an optimal result (probably with O(n!)) are small.
|
|
# First sort strings and later do a binary search for each possible prefix.
|
|
realstrs.sort()
|
|
merged = ""
|
|
while realstrs:
|
|
for i in range(min(16, len(merged)), 0, -1):
|
|
idx = bisect.bisect_left(realstrs, merged[-i:])
|
|
if idx < len(realstrs) and realstrs[idx][:i] == merged[-i:]:
|
|
merged += realstrs.pop(idx)[i:]
|
|
break
|
|
else:
|
|
merged += realstrs.pop()
|
|
return merged
|
|
|
|
def decode_table(entries, args):
|
|
modes = args.modes
|
|
|
|
trie = Trie(root_count=len(modes))
|
|
mnems, descs, desc_map = set(), [], {}
|
|
for weak, opcode, desc in entries:
|
|
ign66 = opcode.prefix in ("NP", "66", "F2", "F3")
|
|
modrm = opcode.modreg or opcode.opcext
|
|
mnem = {
|
|
"PUSH_SEG": "PUSH", "POP_SEG": "POP",
|
|
"MOV_CR2G": "MOV_CR", "MOV_G2CR": "MOV_CR",
|
|
"MOV_DR2G": "MOV_DR", "MOV_G2DR": "MOV_DR",
|
|
"MMX_MOVD_M2G": "MMX_MOVD", "MMX_MOVD_G2M": "MMX_MOVD",
|
|
"MMX_MOVQ_M2G": "MMX_MOVQ", "MMX_MOVQ_G2M": "MMX_MOVQ",
|
|
"SSE_MOVD_X2G": "SSE_MOVD", "SSE_MOVD_G2X": "SSE_MOVD",
|
|
"SSE_MOVQ_X2G": "SSE_MOVQ", "SSE_MOVQ_G2X": "SSE_MOVQ",
|
|
"VMOVD_X2G": "VMOVD", "VMOVD_G2X": "VMOVD",
|
|
"VMOVQ_X2G": "VMOVQ", "VMOVQ_G2X": "VMOVQ",
|
|
}.get(desc.mnemonic, desc.mnemonic)
|
|
mnems.add(mnem)
|
|
descenc = desc.encode(mnem, ign66, modrm)
|
|
desc_idx = desc_map.get(descenc)
|
|
if desc_idx is None:
|
|
desc_idx = desc_map[descenc] = len(descs)
|
|
descs.append(descenc)
|
|
for i, mode in enumerate(modes):
|
|
if "IO"[mode <= 32]+"64" not in desc.flags:
|
|
trie.add_opcode(opcode, desc_idx, i, weak)
|
|
|
|
trie.deduplicate()
|
|
table_data, root_offsets = trie.compile()
|
|
|
|
mnems = sorted(mnems)
|
|
decode_mnems_lines = [f"FD_MNEMONIC({m},{i})\n" for i, m in enumerate(mnems)]
|
|
|
|
mnemonics_intel = [m.replace("SSE_", "").replace("MMX_", "")
|
|
.replace("MOVABS", "MOV").replace("RESERVED_", "")
|
|
.replace("JMPF", "JMP FAR").replace("CALLF", "CALL FAR")
|
|
.replace("_S2G", "").replace("_G2S", "")
|
|
.replace("_CR", "").replace("_DR", "")
|
|
.replace("REP_", "REP ").replace("CMPXCHGD", "CMPXCHG")
|
|
.replace("JCXZ", "JCXZ JECXZJRCXZ")
|
|
.replace("C_SEP", "CWD CDQ CQO")
|
|
.replace("C_EX", "CBW CWDECDQE").replace("XCHG_NOP", "")
|
|
.lower() for m in mnems]
|
|
mnemonics_str = superstring(mnemonics_intel)
|
|
|
|
if args.stats:
|
|
print(f"Decode stats: Descs -- {len(descs)} ({8*len(descs)} bytes); ",
|
|
f"Trie -- {2*len(table_data)} bytes, {trie.stats}; "
|
|
f"Mnems -- {len(mnemonics_str)} + {3*len(mnemonics_intel)} bytes")
|
|
|
|
defines = ["FD_TABLE_OFFSET_%d %d\n"%k for k in zip(modes, root_offsets)]
|
|
|
|
return "".join(decode_mnems_lines), f"""// Auto-generated file -- do not modify!
|
|
#if defined(FD_DECODE_TABLE_DATA)
|
|
{"".join(f"{e:#06x}," for e in table_data)}
|
|
#elif defined(FD_DECODE_TABLE_DESCS)
|
|
{",".join(descs)}
|
|
#elif defined(FD_DECODE_TABLE_STRTAB1)
|
|
"{mnemonics_str}"
|
|
#elif defined(FD_DECODE_TABLE_STRTAB2)
|
|
{",".join(str(mnemonics_str.index(mnem)) for mnem in mnemonics_intel)}
|
|
#elif defined(FD_DECODE_TABLE_STRTAB3)
|
|
{",".join(str(len(mnem)) for mnem in mnemonics_intel)}
|
|
#elif defined(FD_DECODE_TABLE_DEFINES)
|
|
{"".join("#define " + line for line in defines)}
|
|
#else
|
|
#error "unspecified decode table"
|
|
#endif
|
|
"""
|
|
|
|
def encode_mnems(entries):
|
|
# mapping from (mnem, opsize, ots) -> (opcode, desc)
|
|
mnemonics = defaultdict(list)
|
|
for weak, opcode, desc in entries:
|
|
if "I64" in desc.flags or desc.mnemonic[:9] == "RESERVED_":
|
|
continue
|
|
|
|
opsizes, vecsizes = {0}, {0}
|
|
prepend_opsize, prepend_vecsize = False, False
|
|
# Where to put the operand size in the mnemonic
|
|
separate_opsize = "ENC_SEPSZ" in desc.flags
|
|
|
|
if "ENC_NOSZ" in desc.flags or not desc.dynsizes():
|
|
pass
|
|
elif OpKind.SZ_OP in desc.dynsizes():
|
|
if opcode.rexw:
|
|
raise Exception(f"unexpected REXW specifier {desc}")
|
|
opsizes = {8} if "SZ8" in desc.flags else {16, 32, 64}
|
|
if opcode.prefix in ("NP", "66", "F2", "F3") and "U66" not in desc.flags:
|
|
opsizes -= {16}
|
|
if "I66" in desc.flags:
|
|
opsizes -= {16}
|
|
if "D64" in desc.flags:
|
|
opsizes -= {32}
|
|
prepend_opsize = not separate_opsize
|
|
if "F64" in desc.flags:
|
|
opsizes = {64}
|
|
prepend_opsize = False
|
|
elif opcode.vex and opcode.vexl != "IG": # vectors; don't care for SSE
|
|
vecsizes = {128, 256}
|
|
if opcode.vexl:
|
|
vecsizes -= {128 if opcode.vexl == "1" else 256}
|
|
prepend_vecsize = not separate_opsize
|
|
|
|
modrm_type = opcode.modreg[1] if opcode.modreg else "rm"
|
|
optypes_base = desc.optype_str()
|
|
optypes = {optypes_base.replace("M", t) for t in modrm_type}
|
|
|
|
prefixes = [("", "")]
|
|
if "LOCK" in desc.flags:
|
|
prefixes.append(("LOCK_", "LOCK"))
|
|
if "ENC_REP" in desc.flags:
|
|
prefixes.append(("REP_", "F3"))
|
|
if "ENC_REPCC" in desc.flags:
|
|
prefixes.append(("REPNZ_", "F2"))
|
|
prefixes.append(("REPZ_", "F3"))
|
|
|
|
for opsize, vecsize, prefix, ots in product(opsizes, vecsizes, prefixes, optypes):
|
|
if prefix[1] == "LOCK" and ots[0] != "m":
|
|
continue
|
|
|
|
spec_opcode = opcode
|
|
if prefix[1]:
|
|
spec_opcode = spec_opcode._replace(prefix=prefix[1])
|
|
if opsize == 64 and "D64" not in desc.flags and "F64" not in desc.flags:
|
|
spec_opcode = spec_opcode._replace(rexw="1")
|
|
if vecsize == 256:
|
|
spec_opcode = spec_opcode._replace(vexl="1")
|
|
if spec_opcode.vexl == "IG":
|
|
spec_opcode = spec_opcode._replace(vexl="0")
|
|
|
|
# Construct mnemonic name
|
|
mnem_name = {"MOVABS": "MOV", "XCHG_NOP": "XCHG"}.get(desc.mnemonic, desc.mnemonic)
|
|
name = prefix[0] + mnem_name
|
|
if prepend_opsize and not ("D64" in desc.flags and opsize == 64):
|
|
name += f"_{opsize}"[name[-1] not in "0123456789":]
|
|
if prepend_vecsize:
|
|
name += f"_{vecsize}"[name[-1] not in "0123456789":]
|
|
for ot, op in zip(ots, desc.operands):
|
|
name += ot.replace("o", "")
|
|
if separate_opsize:
|
|
name += f"{op.abssize(opsize//8, vecsize//8)*8}"
|
|
mnemonics[name, opsize, ots].append((spec_opcode, desc))
|
|
|
|
for (mnem, opsize, ots), variants in mnemonics.items():
|
|
dedup = OrderedDict()
|
|
for i, (opcode, desc) in enumerate(variants):
|
|
PRIO = ["O", "OA", "AO", "AM", "MA", "IA", "OI"]
|
|
enc_prio = PRIO.index(desc.encoding) if desc.encoding in PRIO else len(PRIO)
|
|
unique = 0 if desc.encoding != "S" else i
|
|
key = desc.imm_size(opsize//8), enc_prio, unique
|
|
if key not in dedup:
|
|
dedup[key] = opcode, desc
|
|
mnemonics[mnem, opsize, ots] = [dedup[k] for k in sorted(dedup.keys())]
|
|
|
|
return dict(mnemonics)
|
|
|
|
def encode_table(entries, args):
|
|
mnemonics = encode_mnems(entries)
|
|
mnemonics["NOP", 0, ""] = [(Opcode.parse("90"), InstrDesc.parse("NP - - - - NOP"))]
|
|
mnem_map = {}
|
|
alt_table = [0] # first entry is unused
|
|
for (mnem, opsize, ots), variants in mnemonics.items():
|
|
supports_high_regs = []
|
|
if variants[0][1].mnemonic in ("MOVSX", "MOVZX") or opsize == 8:
|
|
# Should be the same for all variants
|
|
desc = variants[0][1]
|
|
for i, (ot, op) in enumerate(zip(ots, desc.operands)):
|
|
if ot == "r" and op.kind == "GP" and op.abssize(opsize//8) == 1:
|
|
supports_high_regs.append(i)
|
|
|
|
alt_indices = [i + len(alt_table) for i in range(len(variants) - 1)] + [0]
|
|
enc_opcs = []
|
|
for alt, (opcode, desc) in zip(alt_indices, variants):
|
|
opc_i = opcode.opc
|
|
if opcode.opcext:
|
|
opc_i |= opcode.opcext << 8
|
|
if opcode.modreg and opcode.modreg[0] is not None:
|
|
opc_i |= opcode.modreg[0] << 8
|
|
opc_i |= opcode.escape * 0x10000
|
|
opc_i |= 0x80000 if opcode.prefix == "66" or opsize == 16 else 0
|
|
opc_i |= 0x100000 if opcode.prefix == "F2" else 0
|
|
opc_i |= 0x200000 if opcode.prefix == "F3" else 0
|
|
opc_i |= 0x400000 if opcode.rexw == "1" else 0
|
|
if opcode.prefix == "LOCK":
|
|
opc_i |= 0x800000
|
|
elif opcode.vex:
|
|
opc_i |= 0x1000000 + 0x800000 * int(opcode.vexl or 0)
|
|
opc_i |= 0x8000000 if "VSIB" in desc.flags else 0
|
|
if alt >= 0x100:
|
|
raise Exception("encode alternate bits exhausted")
|
|
opc_i |= sum(1 << i for i in supports_high_regs) << 45
|
|
opc_i |= desc.imm_size(opsize//8) << 47
|
|
opc_i |= ["INVALID",
|
|
"NP", "M", "M1", "MI", "MC", "MR", "RM", "RMA", "MRI", "RMI", "MRC",
|
|
"AM", "MA", "I", "IA", "O", "OI", "OA", "S", "A", "D", "FD", "TD",
|
|
"RVM", "RVMI", "RVMR", "RMV", "VM", "VMI", "MVR", "MRV",
|
|
].index(desc.encoding) << 51
|
|
opc_i |= alt << 56
|
|
enc_opcs.append(opc_i)
|
|
mnem_map[f"FE_{mnem}"] = enc_opcs[0]
|
|
alt_table += enc_opcs[1:]
|
|
|
|
mnem_tab = "".join(f"#define {m} {v:#x}\n" for m, v in mnem_map.items())
|
|
alt_tab = "".join(f"[{i}] = {v:#x},\n" for i, v in enumerate(alt_table))
|
|
return mnem_tab, alt_tab
|
|
|
|
def encode2_table(entries, args):
|
|
mnemonics = encode_mnems(entries)
|
|
|
|
enc_decls, enc_code = "", ""
|
|
for (mnem, opsize, ots), variants in mnemonics.items():
|
|
max_imm_size = max(desc.imm_size(opsize//8) for _, desc in variants)
|
|
|
|
supports_high_regs = []
|
|
if variants[0][1].mnemonic in ("MOVSX", "MOVZX") or opsize == 8:
|
|
# Should be the same for all variants
|
|
desc = variants[0][1]
|
|
for i, (ot, op) in enumerate(zip(ots, desc.operands)):
|
|
if ot == "r" and op.kind == "GP" and op.abssize(opsize//8) == 1:
|
|
supports_high_regs.append(i)
|
|
supports_vsib = "VSIB" in variants[0][1].flags
|
|
|
|
if len({tuple(op.kind for op in v[1].operands) for v in variants}) > 1:
|
|
raise Exception(f"ambiguous operand kinds for {mnem}")
|
|
OPKIND_LUT = {"FPU": "ST", "SEG": "SREG", "MMX": "MM"}
|
|
reg_tys = [OPKIND_LUT.get(op.kind, op.kind) for op in variants[0][1].operands]
|
|
|
|
fnname = f"fe64_{mnem}{'_impl' if supports_high_regs else ''}"
|
|
op_tys = [{
|
|
"i": f"int{max_imm_size*8 if max_imm_size != 3 else 32}_t",
|
|
"a": "uintptr_t",
|
|
"r": f"FeReg{reg_ty if i not in supports_high_regs else 'GPLH'}",
|
|
"m": "FeMem" if not supports_vsib else "FeMemV",
|
|
"o": "const void*",
|
|
}[ot] for i, (ot, reg_ty) in enumerate(zip(ots, reg_tys))]
|
|
fn_opargs = "".join(f", {ty} op{i}" for i, ty in enumerate(op_tys))
|
|
fn_sig = f"unsigned {fnname}(uint8_t* buf, int flags{fn_opargs})"
|
|
enc_decls += f"{fn_sig};\n"
|
|
if supports_high_regs:
|
|
enc_decls += f"#define fe64_{mnem}(buf, flags"
|
|
enc_decls += "".join(f", op{i}" for i in range(len(op_tys)))
|
|
enc_decls += f") {fnname}(buf, flags"
|
|
enc_decls += "".join(f", FE_MAKE_GPLH(op{i})" if i in supports_high_regs else f", op{i}" for i in range(len(op_tys)))
|
|
enc_decls += f")\n"
|
|
|
|
code = f"{fn_sig} {{\n"
|
|
|
|
code += " unsigned idx = 0, rex = 0, memoff;\n"
|
|
if max_imm_size or "a" in ots:
|
|
code += " int64_t imm; unsigned imm_size;\n"
|
|
code += " (void) flags; (void) memoff;\n"
|
|
|
|
neednext = True
|
|
for i, (opcode, desc) in enumerate(variants):
|
|
if not neednext:
|
|
break
|
|
if i > 0:
|
|
code += f"\nnext{i-1}:\n"
|
|
neednext = False
|
|
|
|
imm_size = desc.imm_size(opsize//8)
|
|
flags = ENCODINGS[desc.encoding]
|
|
# Select usable encoding.
|
|
if desc.encoding == "S":
|
|
# Segment encoding is weird.
|
|
code += f" if (op_reg_idx(op0)!={(opcode.opc>>3)&0x7:#x}) goto next{i};\n"
|
|
neednext = True
|
|
if desc.mnemonic == "XCHG_NOP" and opsize == 32:
|
|
# XCHG eax, eax must not be encoded as 90 -- that'd be NOP.
|
|
code += f" if (op_reg_idx(op0)==0&&op_reg_idx(op1)==0) goto next{i};\n"
|
|
neednext = True
|
|
if flags.vexreg_idx and not opcode.vex: # vexreg w/o vex is zeroreg
|
|
code += f" if (op_reg_idx(op{flags.vexreg_idx^3})!={flags.zeroreg_val}) goto next{i};\n"
|
|
neednext = True
|
|
if flags.imm_control:
|
|
if flags.imm_control != 3:
|
|
code += f" imm = (int64_t) op{flags.imm_idx^3};\n"
|
|
else:
|
|
code += f" imm = op_reg_idx(op{flags.imm_idx^3}) << 4;\n"
|
|
code += f" imm_size = {imm_size};\n"
|
|
if flags.imm_control == 1:
|
|
code += f" if (imm != 1) goto next{i};\n"
|
|
neednext = True
|
|
if flags.imm_control == 2:
|
|
code += " imm_size = flags & FE_ADDR32 ? 4 : 8;\n"
|
|
code += " if (imm_size == 4) imm = (int32_t) imm;\n"
|
|
if imm_size < max_imm_size and 2 <= flags.imm_control < 6:
|
|
code += f" if (!op_imm_n(imm, imm_size)) goto next{i};\n"
|
|
neednext = True
|
|
if flags.imm_control == 6:
|
|
# idx is subtracted below.
|
|
code += f" imm -= (int64_t) buf + imm_size;\n"
|
|
if i != len(variants) - 1: # only Jcc+JMP
|
|
code += f" if (flags&FE_JMPL) goto next{i};\n"
|
|
# assume one-byte opcode without escape/prefixes
|
|
code += f" if (!op_imm_n(imm-1, imm_size)) goto next{i};\n"
|
|
neednext = True
|
|
|
|
if opcode.vex:
|
|
rexw, rexr, rexx, rexb = 0x8000, 0x80, 0x40, 0x20
|
|
else:
|
|
rexw, rexr, rexx, rexb = 0x48, 0x44, 0x42, 0x41
|
|
|
|
if not opcode.vex:
|
|
for i in supports_high_regs:
|
|
code += f" if (op_reg_idx(op{i}) >= 4 && op_reg_idx(op{i}) <= 15) rex = 0x40;\n"
|
|
if opcode.rexw == "1":
|
|
code += f" rex |= {rexw:#x};\n"
|
|
if flags.modrm_idx:
|
|
ismem = ots[flags.modrm_idx^3] == "m"
|
|
if ismem:
|
|
code += f" if (op_mem_base(op{flags.modrm_idx^3})&8) rex |= {rexb:#x};\n"
|
|
code += f" if (op_mem_idx(op{flags.modrm_idx^3})&8) rex |= {rexx:#x};\n"
|
|
else:
|
|
if desc.operands[flags.modrm_idx^3].kind in ("GP", "XMM"):
|
|
code += f" if (op_reg_idx(op{flags.modrm_idx^3})&8) rex |= {rexb:#x};\n"
|
|
if flags.modreg_idx:
|
|
if desc.operands[flags.modreg_idx^3].kind in ("GP", "XMM", "CR", "DR"):
|
|
code += f" if (op_reg_idx(op{flags.modreg_idx^3})&8) rex |= {rexr:#x};\n"
|
|
elif flags.modreg_idx: # O encoding
|
|
if desc.operands[flags.modreg_idx^3].kind in ("GP", "XMM"):
|
|
code += f" if (op_reg_idx(op{flags.modreg_idx^3})&8) rex |= {rexb:#x};\n"
|
|
|
|
for i in supports_high_regs:
|
|
code += f" if (rex && op_reg_gph(op{i})) return 0;\n"
|
|
|
|
if "m" in ots or "USEG" in desc.flags:
|
|
code += " if (UNLIKELY(flags & FE_SEG_MASK)) buf[idx++] = enc_seg(flags);\n"
|
|
if "m" in ots or "U67" in desc.flags:
|
|
code += " if (UNLIKELY(flags & FE_ADDR32)) buf[idx++] = 0x67;\n"
|
|
|
|
if opcode.vex:
|
|
ppl = ["NP", "66", "F3", "F2"].index(opcode.prefix)
|
|
ppl |= 4 if opcode.vexl == "1" else 0
|
|
mayvex2 = opcode.rexw != "1" and opcode.escape == 1
|
|
if mayvex2:
|
|
code += " if (!(rex&0x8060)) {\n"
|
|
code += " buf[idx++] = 0xc5;\n"
|
|
code += " rex ^= 0x80;\n"
|
|
code += " } else {\n"
|
|
code += " buf[idx++] = 0xc4;\n"
|
|
code += f" buf[idx++] = {0xe0+opcode.escape:#x}^rex;\n"
|
|
code += " rex >>= 8;\n"
|
|
if mayvex2:
|
|
code += " }\n"
|
|
vexop = 0
|
|
if flags.vexreg_idx:
|
|
vexop = f"op_reg_idx(op{flags.vexreg_idx^3})"
|
|
code += f" buf[idx++] = {ppl}|rex|(({vexop}^15)<<3);\n"
|
|
else:
|
|
if opsize == 16 or opcode.prefix == "66":
|
|
code += " buf[idx++] = 0x66;\n"
|
|
if opcode.prefix in ("F2", "F3"):
|
|
code += f" buf[idx++] = 0x{opcode.prefix};\n"
|
|
if opcode.prefix == "LOCK":
|
|
code += f" buf[idx++] = 0xF0;\n"
|
|
code += f" if (rex) buf[idx++] = rex;\n"
|
|
if opcode.escape:
|
|
code += f" buf[idx++] = 0x0F;\n"
|
|
if opcode.escape == 2:
|
|
code += f" buf[idx++] = 0x38;\n"
|
|
elif opcode.escape == 3:
|
|
code += f" buf[idx++] = 0x3A;\n"
|
|
code += f" buf[idx++] = {opcode.opc:#x};\n"
|
|
if opcode.opcext:
|
|
code += f" buf[idx++] = {opcode.opcext:#x};\n"
|
|
|
|
if flags.modrm:
|
|
modrm = f"op{flags.modrm_idx^3}"
|
|
if flags.modreg_idx:
|
|
modreg = f"op_reg_idx(op{flags.modreg_idx^3})"
|
|
else:
|
|
modreg = int(opcode.modreg[0]) if opcode.modreg else 0
|
|
if ismem:
|
|
imm_size_expr = "imm_size" if flags.imm_control >= 2 else 0
|
|
memfn = "enc_mem_vsib" if "VSIB" in desc.flags else "enc_mem"
|
|
code += f" memoff = {memfn}(buf, idx, {modrm}, {modreg}, {imm_size_expr}, 0);\n"
|
|
code += f" if (!memoff) return 0;\n idx += memoff;\n"
|
|
else:
|
|
modrm = f"op_reg_idx({modrm})"
|
|
code += f" buf[idx++] = 0xC0|(({modreg}&7)<<3)|({modrm}&7);\n"
|
|
elif flags.modrm_idx:
|
|
code += f" buf[idx-1] |= op_reg_idx(op{flags.modrm_idx^3}) & 7;\n"
|
|
|
|
if flags.imm_control >= 2:
|
|
if flags.imm_control == 6:
|
|
code += f" imm -= idx;\n"
|
|
code += f" if (enc_imm(buf+idx, imm, imm_size)) return 0;\n"
|
|
code += f" idx += imm_size;\n"
|
|
code += f" return idx;\n"
|
|
|
|
if neednext:
|
|
code += f"next{len(variants)-1}: return 0;\n"
|
|
code += "}\n"
|
|
|
|
enc_code += code
|
|
|
|
return enc_decls, enc_code
|
|
|
|
|
|
if __name__ == "__main__":
|
|
generators = {
|
|
"decode": decode_table,
|
|
"encode": encode_table,
|
|
"encode2": encode2_table,
|
|
}
|
|
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument("--32", dest="modes", action="append_const", const=32)
|
|
parser.add_argument("--64", dest="modes", action="append_const", const=64)
|
|
parser.add_argument("--with-undoc", action="store_true")
|
|
parser.add_argument("--stats", action="store_true")
|
|
parser.add_argument("mode", choices=generators.keys())
|
|
parser.add_argument("table", type=argparse.FileType('r'))
|
|
parser.add_argument("out_public", type=argparse.FileType('w'))
|
|
parser.add_argument("out_private", type=argparse.FileType('w'))
|
|
args = parser.parse_args()
|
|
|
|
entries = []
|
|
for line in args.table.read().splitlines():
|
|
if not line or line[0] == "#": continue
|
|
line, weak = (line, False) if line[0] != "*" else (line[1:], True)
|
|
opcode_string, desc_string = tuple(line.split(maxsplit=1))
|
|
opcode, desc = Opcode.parse(opcode_string), InstrDesc.parse(desc_string)
|
|
if "UNDOC" not in desc.flags or args.with_undoc:
|
|
entries.append((weak, opcode, desc))
|
|
|
|
res_public, res_private = generators[args.mode](entries, args)
|
|
args.out_public.write(res_public)
|
|
args.out_private.write(res_private)
|