parseinstrs: Simplify operand kind parsing

This commit is contained in:
Alexis Engelke
2021-01-22 10:55:11 +01:00
parent bd611902b0
commit 62018556a1
2 changed files with 22 additions and 50 deletions

View File

@@ -1557,7 +1557,7 @@ VEX.66.W0.0f3852 RVM XMM XMM XMM - VPDPWSSD
VEX.66.W0.0f3853 RVM XMM XMM XMM - VPDPWSSDS VEX.66.W0.0f3853 RVM XMM XMM XMM - VPDPWSSDS
# HRESET # HRESET
F3.0f3af0c0 IA IMM8 GP32 - - HRESET #F3.0f3af0c0 IA IMM8 GP32 - - HRESET
# SERIALIZE # SERIALIZE
NP.0f01e8 NP - - - - SERIALIZE NP.0f01e8 NP - - - - SERIALIZE

View File

@@ -4,6 +4,7 @@ import argparse
from collections import OrderedDict, defaultdict, namedtuple, Counter from collections import OrderedDict, defaultdict, namedtuple, Counter
from enum import Enum from enum import Enum
from itertools import product from itertools import product
import re
import struct import struct
from typing import NamedTuple, FrozenSet, List, Tuple, Union, Optional, ByteString from typing import NamedTuple, FrozenSet, List, Tuple, Union, Optional, ByteString
@@ -76,14 +77,15 @@ ENCODINGS = {
"MVR": InstrFlags(modrm=1, modrm_idx=0^3, modreg_idx=2^3, vexreg_idx=1^3), "MVR": InstrFlags(modrm=1, modrm_idx=0^3, modreg_idx=2^3, vexreg_idx=1^3),
} }
OPKIND_REGEX = re.compile(r"^([A-Z]+)([0-9]+)?$")
OPKIND_DEFAULTS = {"GP": -1, "IMM": -1, "SEG": -1, "MEM": -1, "XMM": -2, "MMX": 8, "FPU": 10}
OPKIND_KINDS = ("IMM", "MEM", "GP", "MMX", "XMM", "SEG", "FPU", "MEM", "MASK", "CR", "DR", "TMM", "BND")
class OpKind(NamedTuple): class OpKind(NamedTuple):
size: int size: int
kind: str kind: str
SZ_OP = -1 SZ_OP = -1
SZ_VEC = -2 SZ_VEC = -2
K_MEM = "mem"
K_IMM = "imm"
def abssize(self, opsz=None, vecsz=None): def abssize(self, opsz=None, vecsz=None):
res = opsz if self.size == self.SZ_OP else \ res = opsz if self.size == self.SZ_OP else \
@@ -91,48 +93,17 @@ class OpKind(NamedTuple):
if res is None: if res is None:
raise Exception("unspecified operand size") raise Exception("unspecified operand size")
return res return res
@classmethod
OPKINDS = { def parse(cls, op):
# sizeidx (0, fixedsz, opsz, vecsz), fixedsz (log2), regtype op = {"MEMZ": "MEM0", "MEMV": "XMM"}.get(op, op)
"IMM": OpKind(OpKind.SZ_OP, OpKind.K_IMM), match = OPKIND_REGEX.match(op)
"IMM8": OpKind(1, OpKind.K_IMM), if not match:
"IMM16": OpKind(2, OpKind.K_IMM), raise Exception(f"invalid opkind str: {op}")
"IMM32": OpKind(4, OpKind.K_IMM), kind, size = match.groups()
"IMM64": OpKind(8, OpKind.K_IMM), size = int(size) // 8 if size else OPKIND_DEFAULTS.get(kind, 0)
"GP": OpKind(OpKind.SZ_OP, "GP"), if kind not in OPKIND_KINDS:
"GP8": OpKind(1, "GP"), raise Exception(f"invalid opkind kind: {op}")
"GP16": OpKind(2, "GP"), return cls(size, kind)
"GP32": OpKind(4, "GP"),
"GP64": OpKind(8, "GP"),
"MMX": OpKind(8, "MMX"),
"XMM": OpKind(OpKind.SZ_VEC, "XMM"),
"XMM8": OpKind(1, "XMM"),
"XMM16": OpKind(2, "XMM"),
"XMM32": OpKind(4, "XMM"),
"XMM64": OpKind(8, "XMM"),
"XMM128": OpKind(16, "XMM"),
"XMM256": OpKind(32, "XMM"),
"SEG": OpKind(OpKind.SZ_OP, "SEG"),
"SEG16": OpKind(2, "SEG"),
"FPU": OpKind(10, "FPU"),
"MEM": OpKind(OpKind.SZ_OP, OpKind.K_MEM),
"MEMV": OpKind(OpKind.SZ_VEC, OpKind.K_MEM),
"MEMZ": OpKind(0, OpKind.K_MEM),
"MEM8": OpKind(1, OpKind.K_MEM),
"MEM16": OpKind(2, OpKind.K_MEM),
"MEM32": OpKind(4, OpKind.K_MEM),
"MEM64": OpKind(8, OpKind.K_MEM),
"MEM128": OpKind(16, OpKind.K_MEM),
"MEM256": OpKind(32, OpKind.K_MEM),
"MEM512": OpKind(64, OpKind.K_MEM),
"MASK8": OpKind(1, "MASK"),
"MASK16": OpKind(2, "MASK"),
"MASK32": OpKind(4, "MASK"),
"MASK64": OpKind(8, "MASK"),
"BND": OpKind(0, "BND"),
"CR": OpKind(0, "CR"),
"DR": OpKind(0, "DR"),
}
class InstrDesc(NamedTuple): class InstrDesc(NamedTuple):
mnemonic: str mnemonic: str
@@ -150,7 +121,7 @@ class InstrDesc(NamedTuple):
@classmethod @classmethod
def parse(cls, desc): def parse(cls, desc):
desc = desc.split() desc = desc.split()
operands = tuple(OPKINDS[op] for op in desc[1:5] if op != "-") operands = tuple(OpKind.parse(op) for op in desc[1:5] if op != "-")
return cls(desc[5], desc[0], operands, frozenset(desc[6:])) return cls(desc[5], desc[0], operands, frozenset(desc[6:]))
def encode(self, ign66, modrm): def encode(self, ign66, modrm):
@@ -189,7 +160,7 @@ class InstrDesc(NamedTuple):
extraflags["ign66"] = 1 extraflags["ign66"] = 1
if flags.imm_control >= 4: if flags.imm_control >= 4:
imm_op = next(op for op in self.operands if op.kind == OpKind.K_IMM) imm_op = self.operands[flags.imm_idx^3]
if ("IMM_8" in self.flags or imm_op.size == 1 or if ("IMM_8" in self.flags or imm_op.size == 1 or
(imm_op.size == OpKind.SZ_OP and "SIZE_8" in self.flags)): (imm_op.size == OpKind.SZ_OP and "SIZE_8" in self.flags)):
extraflags["imm_control"] = flags.imm_control | 1 extraflags["imm_control"] = flags.imm_control | 1
@@ -229,7 +200,6 @@ class TrieEntry(NamedTuple):
def instr(cls, descidx): def instr(cls, descidx):
return cls(EntryKind.INSTR, (), descidx) return cls(EntryKind.INSTR, (), descidx)
import re
opcode_regex = re.compile( opcode_regex = re.compile(
r"^(?:(?P<prefixes>(?P<vex>VEX\.)?(?P<legacy>NP|66|F2|F3|NFx)\." + r"^(?:(?P<prefixes>(?P<vex>VEX\.)?(?P<legacy>NP|66|F2|F3|NFx)\." +
r"(?:W(?P<rexw>[01]|IG)\.)?(?:L(?P<vexl>[01]|IG)\.)?))?" + r"(?:W(?P<rexw>[01]|IG)\.)?(?:L(?P<vexl>[01]|IG)\.)?))?" +
@@ -583,6 +553,8 @@ def encode_table(entries):
for ot, op in zip(ots, desc.operands): for ot, op in zip(ots, desc.operands):
if ot == "m": if ot == "m":
tys.append(0xf) tys.append(0xf)
elif ot in "io":
tys.append(0)
elif op.kind == "GP": elif op.kind == "GP":
if (desc.mnemonic == "MOVSX" or desc.mnemonic == "MOVZX" or if (desc.mnemonic == "MOVSX" or desc.mnemonic == "MOVZX" or
opsize == 8): opsize == 8):
@@ -591,9 +563,9 @@ def encode_table(entries):
tys.append(1) tys.append(1)
else: else:
tys.append({ tys.append({
"imm": 0, "SEG": 3, "FPU": 4, "MMX": 5, "XMM": 6, "SEG": 3, "FPU": 4, "MMX": 5, "XMM": 6,
"BND": 8, "CR": 9, "DR": 10, "BND": 8, "CR": 9, "DR": 10,
}.get(op.kind, -1)) }[op.kind])
tys_i = sum(ty << (4*i) for i, ty in enumerate(tys)) tys_i = sum(ty << (4*i) for i, ty in enumerate(tys))
opc_s = hex(opc_i) + opc_flags + prefix[1] opc_s = hex(opc_i) + opc_flags + prefix[1]