#!/usr/bin/python3 import argparse from collections import OrderedDict, defaultdict, namedtuple, Counter from enum import Enum from itertools import product import re import struct from typing import NamedTuple, FrozenSet, List, Tuple, Union, Optional, ByteString INSTR_FLAGS_FIELDS, INSTR_FLAGS_SIZES = zip(*[ ("modrm_idx", 2), ("modreg_idx", 2), ("vexreg_idx", 2), ("zeroreg_idx", 2), ("imm_idx", 2), ("zeroreg_val", 1), ("lock", 1), ("imm_control", 3), ("vsib", 1), ("op0_size", 2), ("op1_size", 2), ("op2_size", 2), ("op3_size", 2), ("opsize", 2), ("size_fix1", 3), ("size_fix2", 2), ("instr_width", 1), ("modrm_ty", 3), ("modreg_ty", 3), ("vexreg_ty", 2), ("zeroreg_ty", 2), ("unused", 4), ("modrm", 1), ("ign66", 1), ][::-1]) class InstrFlags(namedtuple("InstrFlags", INSTR_FLAGS_FIELDS)): def __new__(cls, **kwargs): init = {**{f: 0 for f in cls._fields}, **kwargs} return super(InstrFlags, cls).__new__(cls, **init) def _encode(self): enc = 0 for value, size in zip(self, INSTR_FLAGS_SIZES): enc = enc << size | (value & ((1 << size) - 1)) return enc ENCODINGS = { "NP": InstrFlags(), "M": InstrFlags(modrm=1, modrm_idx=0^3), "M1": InstrFlags(modrm=1, modrm_idx=0^3, imm_idx=1^3, imm_control=1), "MI": InstrFlags(modrm=1, modrm_idx=0^3, imm_idx=1^3, imm_control=4), "MC": InstrFlags(modrm=1, modrm_idx=0^3, zeroreg_idx=1^3, zeroreg_val=1), "MR": InstrFlags(modrm=1, modrm_idx=0^3, modreg_idx=1^3), "RM": InstrFlags(modrm=1, modrm_idx=1^3, modreg_idx=0^3), "RMA": InstrFlags(modrm=1, modrm_idx=1^3, modreg_idx=0^3, zeroreg_idx=2^3), "MRI": InstrFlags(modrm=1, modrm_idx=0^3, modreg_idx=1^3, imm_idx=2^3, imm_control=4), "RMI": InstrFlags(modrm=1, modrm_idx=1^3, modreg_idx=0^3, imm_idx=2^3, imm_control=4), "MRC": InstrFlags(modrm=1, modrm_idx=0^3, modreg_idx=1^3, zeroreg_idx=2^3, zeroreg_val=1), "AM": InstrFlags(modrm=1, modrm_idx=1^3, zeroreg_idx=0^3), "MA": InstrFlags(modrm=1, modrm_idx=0^3, zeroreg_idx=1^3), "I": InstrFlags(imm_idx=0^3, imm_control=4), "IA": InstrFlags(zeroreg_idx=0^3, imm_idx=1^3, imm_control=4), "O": InstrFlags(modrm_idx=0^3), "OI": InstrFlags(modrm_idx=0^3, imm_idx=1^3, imm_control=4), "OA": InstrFlags(modrm_idx=0^3, zeroreg_idx=1^3), "S": InstrFlags(modreg_idx=0^3), # segment register in bits 3,4,5 "A": InstrFlags(zeroreg_idx=0^3), "D": InstrFlags(imm_idx=0^3, imm_control=6), "FD": InstrFlags(zeroreg_idx=0^3, imm_idx=1^3, imm_control=2), "TD": InstrFlags(zeroreg_idx=1^3, imm_idx=0^3, imm_control=2), "RVM": InstrFlags(modrm=1, modrm_idx=2^3, modreg_idx=0^3, vexreg_idx=1^3), "RVMI": InstrFlags(modrm=1, modrm_idx=2^3, modreg_idx=0^3, vexreg_idx=1^3, imm_idx=3^3, imm_control=4), "RVMR": InstrFlags(modrm=1, modrm_idx=2^3, modreg_idx=0^3, vexreg_idx=1^3, imm_idx=3^3, imm_control=3), "RMV": InstrFlags(modrm=1, modrm_idx=1^3, modreg_idx=0^3, vexreg_idx=2^3), "VM": InstrFlags(modrm=1, modrm_idx=1^3, vexreg_idx=0^3), "VMI": InstrFlags(modrm=1, modrm_idx=1^3, vexreg_idx=0^3, imm_idx=2^3, imm_control=4), "MVR": InstrFlags(modrm=1, modrm_idx=0^3, modreg_idx=2^3, vexreg_idx=1^3), } ENCODING_OPTYS = ["modrm", "modreg", "vexreg", "zeroreg", "imm"] ENCODING_OPORDER = { enc: sorted(ENCODING_OPTYS, key=lambda ty: getattr(ENCODINGS[enc], ty+"_idx")^3) for enc in ENCODINGS} OPKIND_CANONICALIZE = { "I": "IMM", # immediate "A": "IMM", # Direct address, far jmp "J": "IMM", # RIP-relative address "M": "MEM", # ModRM.r/m selects memory only "O": "MEM", # Direct address, FD/TD encoding "R": "GP", # ModRM.r/m selects GP "B": "GP", # VEX.vvvv selects GP "E": "GP", # ModRM.r/m selects GP or memory "G": "GP", # ModRM.reg selects GP "P": "MMX", # ModRM.reg selects MMX "N": "MMX", # ModRM.r/m selects MMX "Q": "MMX", # ModRM.r/m selects MMX or memory "V": "XMM", # ModRM.reg selects XMM "H": "XMM", # VEX.vvvv selects XMM "L": "XMM", # bits7:4 of imm8 select XMM "U": "XMM", # ModRM.r/m selects XMM "W": "XMM", # ModRM.r/m selects XMM or memory "S": "SEG", # ModRM.reg selects SEG "C": "CR", # ModRM.reg selects CR "D": "DR", # ModRM.reg selects DR # Custom names "F": "FPU", # F is used for RFLAGS by Intel "K": "MASK", "T": "TMM", "Z": "BND", } OPKIND_SIZES = { "b": 1, "w": 2, "d": 4, "ss": 4, # Scalar single of XMM "q": 8, "sd": 8, # Scalar double of XMM "t": 10, # FPU/ten-byte "dq": 16, "qq": 32, "oq": 64, # oct-quadword "": 0, # for MEMZ "v": -1, "y": -1, # actually, dword or qword "z": -1, # actually, op-size maxed at 4 (immediates) "a": -1, # actually, twice the size "p": -1, # actually, far pointer = SZ_OP + 2 "x": -2, "pd": -2, # packed double "ps": -2, # packed single # Custom names "bs": -1, # sign-extended immediate "zd": 4, # z-immediate, but always 4-byte operand "zq": 8, # z-immediate, but always 8-byte operand } class OpKind(NamedTuple): kind: str sizestr: str size: int SZ_OP = -1 SZ_VEC = -2 def abssize(self, opsz=None, vecsz=None): res = opsz if self.size == self.SZ_OP else \ vecsz if self.size == self.SZ_VEC else self.size if res is None: raise Exception("unspecified operand size") return res def immsize(self, opsz): maxsz = 1 if self.sizestr == "bs" else 4 if self.sizestr[0] == "z" else 8 return min(maxsz, self.abssize(opsz)) @classmethod def parse(cls, op): return cls(OPKIND_CANONICALIZE[op[0]], op[1:], OPKIND_SIZES[op[1:]]) class InstrDesc(NamedTuple): mnemonic: str encoding: str operands: Tuple[str, ...] flags: FrozenSet[str] OPKIND_REGTYS_MODRM = { "GP": 0, "XMM": 1, "MMX": 4, "FPU": 5, "MASK": 6, } OPKIND_REGTYS_MODREG = { "GP": 0, "XMM": 1, "MASK": 2, "MMX": 4, "SEG": 5, "CR": 0, "DR": 0 } # CR/DR handled in code. OPKIND_REGTYS_VEXREG = { "GP": 0, "XMM": 1, "MASK": 2 } OPKIND_REGTYS_ZEROREG = { "GP": 0, "XMM": 1, "FPU": 2 } OPKIND_REGTYS_ENC = {"SEG": 3, "FPU": 4, "MMX": 5, "XMM": 6, "BND": 8, "CR": 9, "DR": 10} OPKIND_SIZES = { 0: 0, 1: 1, 2: 2, 4: 3, 8: 4, 16: 5, 32: 6, 64: 7, 10: 0, OpKind.SZ_OP: -2, OpKind.SZ_VEC: -3, } @classmethod def parse(cls, desc): desc = desc.split() if desc[5][-2:] == "+w": desc[5] = desc[5][:-2] desc.append("INSTR_WIDTH") operands = tuple(OpKind.parse(op) for op in desc[1:5] if op != "-") return cls(desc[5], desc[0], operands, frozenset(desc[6:])) def imm_size(self, opsz): flags = ENCODINGS[self.encoding] if flags.imm_control < 3: return 0 if flags.imm_control == 3: return 1 if self.mnemonic == "ENTER": return 3 return self.operands[flags.imm_idx^3].immsize(opsz) def optype_str(self): optypes = ["", "", "", ""] flags = ENCODINGS[self.encoding] if flags.modrm_idx: optypes[flags.modrm_idx^3] = "rM"[flags.modrm] if flags.modreg_idx: optypes[flags.modreg_idx^3] = "r" if flags.vexreg_idx: optypes[flags.vexreg_idx^3] = "r" if flags.zeroreg_idx: optypes[flags.zeroreg_idx^3] = "r" if flags.imm_control: optypes[flags.imm_idx^3] = " iariioo"[flags.imm_control] return "".join(optypes) def encode_regtys(self, ots, opsz): tys = [] for ot, op in zip(ots, self.operands): if ot == "m": tys.append(0xf) elif ot in "ioa": tys.append(0) elif op.kind == "GP": if (self.mnemonic == "MOVSX" or self.mnemonic == "MOVZX" or opsz == 1): tys.append(2 if op.abssize(opsz) == 1 else 1) else: tys.append(1) else: tys.append(self.OPKIND_REGTYS_ENC[op.kind]) return sum(ty << (4*i) for i, ty in enumerate(tys)) def encode(self, ign66, modrm): flags = ENCODINGS[self.encoding] extraflags = {} opsz = set(self.OPKIND_SIZES[opkind.size] for opkind in self.operands) # Sort fixed sizes encodable in size_fix2 as second element. fixed = sorted((x for x in opsz if x >= 0), key=lambda x: 1 <= x <= 4) if len(fixed) > 2 or (len(fixed) == 2 and not (1 <= fixed[1] <= 4)): raise Exception(f"invalid fixed sizes {fixed} in {self}") sizes = (fixed + [1, 1])[:2] + [-2, -3] # See operand_sizes in decode.c. extraflags["size_fix1"] = sizes[0] extraflags["size_fix2"] = sizes[1] - 1 for i, opkind in enumerate(self.operands): sz = self.OPKIND_SIZES[opkind.size] extraflags["op%d_size"%i] = sizes.index(sz) if i >= 3: continue opname = ENCODING_OPORDER[self.encoding][i] if opname == "modrm": if opkind.kind == "MEM": continue extraflags["modrm_ty"] = self.OPKIND_REGTYS_MODRM[opkind.kind] elif opname == "modreg": extraflags["modreg_ty"] = self.OPKIND_REGTYS_MODREG[opkind.kind] elif opname == "vexreg": extraflags["vexreg_ty"] = self.OPKIND_REGTYS_VEXREG[opkind.kind] elif opname == "zeroreg": extraflags["zeroreg_ty"] = self.OPKIND_REGTYS_ZEROREG[opkind.kind] else: if opkind.kind not in ("IMM", "MEM", "XMM"): raise Exception("invalid regty for op 3, must be VEC") # Miscellaneous Flags if "SZ8" in self.flags: extraflags["opsize"] = 1 if "D64" in self.flags: extraflags["opsize"] = 2 if "F64" in self.flags: extraflags["opsize"] = 3 if "INSTR_WIDTH" in self.flags: extraflags["instr_width"] = 1 if "LOCK" in self.flags: extraflags["lock"] = 1 if "VSIB" in self.flags: extraflags["vsib"] = 1 if modrm: extraflags["modrm"] = 1 if "U66" not in self.flags and (ign66 or "I66" in self.flags): extraflags["ign66"] = 1 if self.imm_size(1 if "SZ8" in self.flags else 8) == 1: extraflags["imm_control"] = flags.imm_control | 1 enc = flags._replace(**extraflags)._encode() enc = tuple((enc >> i) & 0xffff for i in range(0, 48, 16)) # First 2 bytes are the mnemonic, last 6 bytes are the encoding. return ("FDI_"+self.mnemonic,) + enc class EntryKind(Enum): NONE = 0 INSTR = 1 WEAKINSTR = 9 TABLE256 = 2 TABLE16 = 3 TABLE8E = 4 TABLE_PREFIX = 5 TABLE_VEX = 6 TABLE_ROOT = -1 @property def is_instr(self): return self == EntryKind.INSTR or self == EntryKind.WEAKINSTR opcode_regex = re.compile( r"^(?:(?P(?PVEX\.)?(?PNP|66|F2|F3|NFx)\." + r"(?:W(?P[01]|IG)\.)?(?:L(?P[01]|IG)\.)?))?" + r"(?P0f38|0f3a|0f|)" + r"(?P[0-9a-f]{2})" + r"(?:(?P\+)|/(?P[0-7]|[rm]|[0-7][rm])|(?P[c-f][0-9a-f]))?$") class Opcode(NamedTuple): prefix: Union[None, str] # None/NP/66/F2/F3/NFx escape: int # [0, 0f, 0f38, 0f3a] opc: int extended: bool # Extend opc or opcext, if present modreg: Union[None, Tuple[Union[None, int], str]] # (modreg, "r"/"m"/"rm"), None opcext: Union[None, int] # 0xc0-0xff, or 0 vex: bool vexl: Union[str, None] # 0, 1, IG, None = used, both rexw: Union[str, None] # 0, 1, IG, None = used, both @classmethod def parse(cls, opcode_string): match = opcode_regex.match(opcode_string) if match is None: raise Exception(opcode_string) return None modreg = match.group("modreg") if modreg: if modreg[0] in "rm": modreg = None, modreg[0] else: modreg = int(modreg[0]), modreg[1] if len(modreg) == 2 else "rm" return cls( prefix=match.group("legacy"), escape=["", "0f", "0f38", "0f3a"].index(match.group("escape")), opc=int(match.group("opcode"), 16), extended=match.group("extended") is not None, modreg=modreg, opcext=int(match.group("opcext") or "0", 16) or None, vex=match.group("vex") is not None, vexl=match.group("vexl"), rexw=match.group("rexw"), ) class Trie: KIND_ORDER = (EntryKind.TABLE_ROOT, EntryKind.TABLE256, EntryKind.TABLE_PREFIX, EntryKind.TABLE16, EntryKind.TABLE8E, EntryKind.TABLE_VEX) TABLE_LENGTH = { EntryKind.TABLE_ROOT: 8, EntryKind.TABLE256: 256, EntryKind.TABLE_PREFIX: 4, EntryKind.TABLE16: 16, EntryKind.TABLE8E: 8, EntryKind.TABLE_VEX: 4, } def __init__(self, root_count): self.trie = [] self.trie.append([None] * root_count) self.kindmap = defaultdict(list) def _add_table(self, kind): self.trie.append([None] * self.TABLE_LENGTH[kind]) self.kindmap[kind].append(len(self.trie) - 1) return len(self.trie) - 1 def _clone(self, elem): if not elem or elem[0].is_instr: return elem new_num = self._add_table(elem[0]) self.trie[new_num] = [self._clone(e) for e in self.trie[elem[1]]] return elem[0], new_num def _transform_opcode(self, opc): troot = [opc.escape | opc.vex << 2] t256 = [opc.opc + i for i in range(8 if opc.extended else 1)] tprefix, t16, t8e, tvex = None, None, None, None if opc.prefix == "NFx": tprefix = [0, 1] elif opc.prefix: tprefix = [["NP", "66", "F3", "F2"].index(opc.prefix)] if opc.opcext: t16 = [((opc.opcext - 0xc0) >> 3) | 8] t8e = [opc.opcext & 7] elif opc.modreg: # TODO: optimize for /r and /m specifiers to reduce size mod = {"m": [0], "r": [1<<3], "rm": [0, 1<<3]}[opc.modreg[1]] reg = [opc.modreg[0]] if opc.modreg[0] is not None else list(range(8)) t16 = [x + y for x in mod for y in reg] if opc.vexl in ("0", "1") or opc.rexw in ("0", "1"): rexw = {"0": [0], "1": [1<<0], "IG": [0, 1<<0]}[opc.rexw or "IG"] vexl = {"0": [0], "1": [1<<1], "IG": [0, 1<<1]}[opc.vexl or "IG"] tvex = list(map(sum, product(rexw, vexl))) # Order must match KIND_ORDER. return troot, t256, tprefix, t16, t8e, tvex def add_opcode(self, opcode, descidx, root_idx, weak=False): opcode = self._transform_opcode(opcode) frontier = [(0, root_idx)] for elem_kind, elem in zip(self.KIND_ORDER, opcode): new_frontier = [] for entry_num, entry_idx in frontier: entry = self.trie[entry_num] if elem is None: if entry[entry_idx] is None or entry[entry_idx][0] != elem_kind: new_frontier.append((entry_num, entry_idx)) continue elem = list(range(self.TABLE_LENGTH[elem_kind])) if entry[entry_idx] is None: new_num = self._add_table(elem_kind) entry[entry_idx] = elem_kind, new_num elif entry[entry_idx][0] != elem_kind: # Need to add a new node here and copy entry one level below new_num = self._add_table(elem_kind) # Keep original entry, but clone others recursively self.trie[new_num][0] = entry[entry_idx] for i in range(1, len(self.trie[new_num])): self.trie[new_num][i] = self._clone(entry[entry_idx]) entry[entry_idx] = elem_kind, new_num for elem_idx in elem: new_frontier.append((entry[entry_idx][1], elem_idx)) frontier = new_frontier for entry_num, entry_idx in frontier: entry = self.trie[entry_num] if not entry[entry_idx] or entry[entry_idx][0] == EntryKind.WEAKINSTR: kind = EntryKind.INSTR if not weak else EntryKind.WEAKINSTR entry[entry_idx] = kind, descidx elif not weak: raise Exception(f"redundant non-weak {opcode}") def deduplicate(self): synonyms = {} for kind in self.KIND_ORDER[::-1]: entries = {} for num in self.kindmap[kind]: # Replace previous synonyms entry = self.trie[num] for i, elem in enumerate(entry): if elem and not elem[0].is_instr and elem[1] in synonyms: entry[i] = synonyms[elem[1]] unique_entry = tuple(entry) if len(set(unique_entry)) == 1: # Omit kind if all entries point to the same child synonyms[num] = entry[0] self.trie[num] = None elif unique_entry in entries: # Deduplicate entries of this kind synonyms[num] = kind, entries[unique_entry] self.trie[num] = None else: entries[unique_entry] = num def compile(self): offsets = [None] * len(self.trie) last_off = 0 for num, entry in enumerate(self.trie[1:], start=1): if not entry: continue offsets[num] = last_off last_off += (len(entry) + 3) & ~3 if last_off >= 0x8000: raise Exception(f"maximum table size exceeded: {last_off:#x}") data = [0] * last_off for off, entry in zip(offsets, self.trie): if off is None: continue for i, elem in enumerate(entry, start=off): if elem is not None: value = elem[1] << 2 if elem[0].is_instr else offsets[elem[1]] data[i] = (value << 1) | (elem[0].value & 7) stats = {k: len(v) for k, v in self.kindmap.items()} print("%d bytes" % (2*len(data)), stats) return tuple(data), [offsets[v] for _, v in self.trie[0]] def superstring(strs): # This faces the "shortest superstring" problem, which is NP-hard. # Preprocessing: remove any strings which are already completely covered realstrs = [] for s in sorted(strs, key=len, reverse=True): for s2 in realstrs: if s in s2: break else: realstrs.append(s) # Greedy heuristic generally yields acceptable results, though it depends on # the order of the menmonics. More compact results are possible, but the # expectable gains of an optimal result (probably with O(n!)) are small. merged = "" def maxoverlap(s1, s2): for i in range(min(len(s1), len(s2))-1, 0, -1): if s1[:i] == s2[-i:]: return i return 0 while realstrs: s = max(realstrs, key=lambda k: maxoverlap(k, merged)) merged += s[maxoverlap(s, merged):] realstrs.remove(s) return merged def decode_table(entries, modes): mnems = sorted({desc.mnemonic for _, _, desc in entries}) decode_mnems_lines = [f"FD_MNEMONIC({m},{i})\n" for i, m in enumerate(mnems)] trie = Trie(root_count=len(modes)) descs, desc_map = [], {} for weak, opcode, desc in entries: ign66 = opcode.prefix in ("NP", "66", "F2", "F3") modrm = opcode.modreg or opcode.opcext descenc = desc.encode(ign66, modrm) desc_idx = desc_map.get(descenc) if desc_idx is None: desc_idx = desc_map[descenc] = len(descs) descs.append(descenc) for i, mode in enumerate(modes): if "IO"[mode <= 32]+"64" not in desc.flags: trie.add_opcode(opcode, desc_idx, i, weak) trie.deduplicate() table_data, root_offsets = trie.compile() mnemonics_intel = [m.replace("SSE_", "").replace("MMX_", "") .replace("MOVABS", "MOV").replace("RESERVED_", "") .replace("JMPF", "JMP FAR").replace("CALLF", "CALL FAR") .replace("_S2G", "").replace("_G2S", "") .replace("_CR", "").replace("_DR", "") .replace("REP_", "REP ").replace("CMPXCHGD", "CMPXCHG") .replace("JCXZ", "JCXZ JECXZJRCXZ") .replace("C_SEP", "CWD CDQ CQO") .replace("C_EX", "CBW CWDECDQE") .lower() for m in mnems] mnemonics_str = superstring(mnemonics_intel) defines = ["FD_TABLE_OFFSET_%d %d\n"%k for k in zip(modes, root_offsets)] return "".join(decode_mnems_lines), f"""// Auto-generated file -- do not modify! #if defined(FD_DECODE_TABLE_DATA) {"".join(f"{e:#06x}," for e in table_data)} #elif defined(FD_DECODE_TABLE_DESCS) {"".join("{{{0},{1},{2},{3}}},".format(*desc) for desc in descs)} #elif defined(FD_DECODE_TABLE_STRTAB1) "{mnemonics_str}" #elif defined(FD_DECODE_TABLE_STRTAB2) {",".join(str(mnemonics_str.index(mnem)) for mnem in mnemonics_intel)} #elif defined(FD_DECODE_TABLE_STRTAB3) {",".join(str(len(mnem)) for mnem in mnemonics_intel)} #elif defined(FD_DECODE_TABLE_DEFINES) {"".join("#define " + line for line in defines)} #else #error "unspecified decode table" #endif """ def encode_table(entries): mnemonics = defaultdict(list) mnemonics["FE_NOP"].append(("NP", 0, 0, "0x90")) for weak, opcode, desc in entries: if "I64" in desc.flags or desc.mnemonic[:9] == "RESERVED_": continue opsizes = {8} if "SZ8" in desc.flags else {16, 32, 64} hasvex, vecsizes = False, {128} opc_i = opcode.opc if opcode.opcext: opc_i |= opcode.opcext << 8 if opcode.modreg and opcode.modreg[0] is not None: opc_i |= opcode.modreg[0] << 8 opc_flags = "" opc_flags += ["","|OPC_0F","|OPC_0F38","|OPC_0F3A"][opcode.escape] if "VSIB" in desc.flags: opc_flags += "|OPC_VSIB" if opcode.vex: hasvex, vecsizes = True, {128, 256} opc_flags += "|OPC_VEX" if opcode.prefix: if opcode.prefix in ("66", "F2", "F3"): opc_flags += "|OPC_" + opcode.prefix if "U66" not in desc.flags and opcode.prefix != "NFx": opsizes -= {16} if opcode.vexl == "IG": vecsizes = {0} elif opcode.vexl: vecsizes -= {128 if opcode.vexl == "1" else 256} if opcode.vexl == "1": opc_flags += "|OPC_VEXL" if opcode.rexw == "IG": opsizes = {0} elif opcode.rexw: opsizes -= {32 if opcode.rexw == "1" else 64} if opcode.rexw == "1": opc_flags += "|OPC_REXW" if "D64" in desc.flags: opsizes -= {32} if "SZ8" not in desc.flags and "INSTR_WIDTH" not in desc.flags and all(op.size != OpKind.SZ_OP for op in desc.operands): opsizes = {0} if "VSIB" not in desc.flags and all(op.size != OpKind.SZ_VEC for op in desc.operands): vecsizes = {0} # for VEX-encoded general-purpose instructions. if "ENC_NOSZ" in desc.flags: opsizes, vecsizes = {0}, {0} # Where to put the operand size in the mnemonic separate_opsize = "ENC_SEPSZ" in desc.flags prepend_opsize = max(opsizes) > 0 and not separate_opsize prepend_vecsize = hasvex and max(vecsizes) > 0 and not separate_opsize if "F64" in desc.flags: opsizes = {64} prepend_opsize = False modrm_type = opcode.modreg[1] if opcode.modreg else "rm" optypes_base = desc.optype_str() optypes = [optypes_base.replace("M", t) for t in modrm_type] prefixes = [("", "")] if "LOCK" in desc.flags: prefixes.append(("LOCK_", "|OPC_LOCK")) if "ENC_REP" in desc.flags: prefixes.append(("REP_", "|OPC_F3")) if "ENC_REPCC" in desc.flags: prefixes.append(("REPNZ_", "|OPC_F2")) prefixes.append(("REPZ_", "|OPC_F3")) for opsize, vecsize, prefix, ots in product(opsizes, vecsizes, prefixes, optypes): if prefix[1] == "|OPC_LOCK" and ots[0] != "m": continue imm_size = desc.imm_size(opsize//8) tys_i = desc.encode_regtys(ots, opsize//8) opc_s = hex(opc_i) + opc_flags + prefix[1] if opsize == 16: opc_s += "|OPC_66" if vecsize == 256: opc_s += "|OPC_VEXL" if opsize == 64 and "D64" not in desc.flags and "F64" not in desc.flags: opc_s += "|OPC_REXW" # Construct mnemonic name mnem_name = {"MOVABS": "MOV", "XCHG_NOP": "XCHG"}.get(desc.mnemonic, desc.mnemonic) name = "FE_" + prefix[0] + mnem_name if prepend_opsize and not ("D64" in desc.flags and opsize == 64): name += f"_{opsize}"[name[-1] not in "0123456789":] if prepend_vecsize: name += f"_{vecsize}"[name[-1] not in "0123456789":] for ot, op in zip(ots, desc.operands): name += ot.replace("o", "") if separate_opsize: name += f"{op.abssize(opsize//8, vecsize//8)*8}" mnemonics[name].append((desc.encoding, imm_size, tys_i, opc_s)) descs = "" alt_index = 0 for mnem, variants in mnemonics.items(): dedup = [] for variant in variants: # TODO: when adapting to 32-bit mode, handle S encodings. if not any(x[:3] == variant[:3] for x in dedup): dedup.append(variant) enc_prio = ["O", "OA", "OI", "IA", "M", "MI", "MR", "RM"] dedup.sort(key=lambda e: (e[1], e[0] in enc_prio and enc_prio.index(e[0]))) indices = [mnem] + [f"FE_MNEM_MAX+{alt_index+i}" for i in range(len(dedup) - 1)] alt_list = indices[1:] + ["0"] alt_index += len(alt_list) - 1 for idx, alt, (enc, immsz, tys_i, opc_s) in zip(indices, alt_list, dedup): descs += f"[{idx}] = {{ .enc = ENC_{enc}, .immsz = {immsz}, .tys = {tys_i:#x}, .opc = {opc_s}, .alt = {alt} }},\n" mnem_list = sorted(mnemonics.keys()) mnem_tab = "".join(f"FE_MNEMONIC({m},{i})\n" for i, m in enumerate(mnem_list)) return mnem_tab, descs if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--32", dest="modes", action="append_const", const=32) parser.add_argument("--64", dest="modes", action="append_const", const=64) parser.add_argument("--with-undoc", action="store_true") parser.add_argument("table", type=argparse.FileType('r')) parser.add_argument("decode_mnems", type=argparse.FileType('w')) parser.add_argument("decode_table", type=argparse.FileType('w')) parser.add_argument("encode_mnems", type=argparse.FileType('w')) parser.add_argument("encode_table", type=argparse.FileType('w')) args = parser.parse_args() entries = [] for line in args.table.read().splitlines(): if not line or line[0] == "#": continue line, weak = (line, False) if line[0] != "*" else (line[1:], True) opcode_string, desc_string = tuple(line.split(maxsplit=1)) opcode, desc = Opcode.parse(opcode_string), InstrDesc.parse(desc_string) if "UNDOC" not in desc.flags or args.with_undoc: entries.append((weak, opcode, desc)) fd_mnem_list, fd_table = decode_table(entries, args.modes) args.decode_mnems.write(fd_mnem_list) args.decode_table.write(fd_table) fe_mnem_list, fe_code = encode_table(entries) args.encode_mnems.write(fe_mnem_list) args.encode_table.write(fe_code)