parseinstr: Simplify opcode naming scheme

This commit is contained in:
Alexis Engelke
2020-11-08 13:46:54 +01:00
parent 9df6ac1788
commit 62b0420147
4 changed files with 278 additions and 280 deletions

View File

@@ -63,12 +63,13 @@ ENCODINGS = {
"MRI": InstrFlags(modrm_idx=0^3, modreg_idx=1^3, imm_idx=2^3, imm_control=4),
"RMI": InstrFlags(modrm_idx=1^3, modreg_idx=0^3, imm_idx=2^3, imm_control=4),
"MRC": InstrFlags(modrm_idx=0^3, modreg_idx=1^3, zeroreg_idx=2^3, zeroreg_val=1),
"AM": InstrFlags(modrm_idx=1^3, zeroreg_idx=0^3),
"MA": InstrFlags(modrm_idx=0^3, zeroreg_idx=1^3),
"I": InstrFlags(imm_idx=0^3, imm_control=4),
"IA": InstrFlags(zeroreg_idx=0^3, imm_idx=1^3, imm_control=4),
"O": InstrFlags(modreg_idx=0^3),
"OI": InstrFlags(modreg_idx=0^3, imm_idx=1^3, imm_control=4),
"OA": InstrFlags(modreg_idx=0^3, zeroreg_idx=1^3),
"AO": InstrFlags(modreg_idx=1^3, zeroreg_idx=0^3),
"A": InstrFlags(zeroreg_idx=0^3),
"D": InstrFlags(imm_idx=0^3, imm_control=6),
"FD": InstrFlags(zeroreg_idx=0^3, imm_idx=1^3, imm_control=2),
@@ -245,17 +246,17 @@ import re
opcode_regex = re.compile(
r"^(?:(?P<prefixes>(?P<vex>VEX\.)?(?P<legacy>NP|66|F2|F3|NFx)\." +
r"(?:W(?P<rexw>[01]|IG)\.)?(?:L(?P<vexl>[01]|IG)\.)?))?" +
r"(?P<escape>(?:0f|0f38|0f3a)?)" +
r"(?P<escape>0f38|0f3a|0f|)" +
r"(?P<opcode>[0-9a-f]{2})" +
r"(?P<modrm>//?[0-7]|//[c-f][0-9a-f])?" +
r"(?P<extended>\+)?$")
r"(?:(?P<extended>\+)|/(?P<modreg>[0-7][rm]?)|(?P<opcext>[c-f][0-9a-f]))?$")
class Opcode(NamedTuple):
prefix: Union[None, Tuple[bool, str]] # (False, NP/66/F2/F3), (True, NP/F2/F3)
prefix: Union[None, str] # None/NP/66/F2/F3/NFx
escape: int # [0, 0f, 0f38, 0f3a]
opc: int
opcext: Union[None, Tuple[bool, int]] # (False, T8), (True, T72), None
extended: bool # Extend opc or opcext, if present
modreg: Union[None, Tuple[int, str]] # (modreg, "r"/"m"/"rm"), None
opcext: Union[None, int] # 0xc0-0xff, or 0
vex: bool
vexl: Union[str, None] # 0, 1, IG, None = used, both
rexw: Union[str, None] # 0, 1, IG, None = used, both
@@ -264,22 +265,20 @@ class Opcode(NamedTuple):
def parse(cls, opcode_string):
match = opcode_regex.match(opcode_string)
if match is None:
raise Exception(opcode_string)
return None
opcext = match.group("modrm")
if opcext:
is72 = opcext[1] == "/"
opcext = is72, int(opcext[1 + is72:], 16)
if match.group("extended") and opcext and not opcext[0]:
raise Exception("invalid opcode extension: {}".format(opcode_string))
modreg = match.group("modreg")
if modreg:
modreg = int(modreg[0]), modreg[1] if len(modreg) == 2 else "rm"
return cls(
prefix=match.group("legacy"),
escape=["", "0f", "0f38", "0f3a"].index(match.group("escape")),
opc=int(match.group("opcode"), 16),
opcext=opcext,
extended=match.group("extended") is not None,
modreg=modreg,
opcext=int(match.group("opcext") or "0", 16) or None,
vex=match.group("vex") is not None,
vexl=match.group("vexl"),
rexw=match.group("rexw"),
@@ -288,7 +287,10 @@ class Opcode(NamedTuple):
def for_trie(self):
opcode = []
opcode.append((EntryKind.TABLE_ROOT, [self.escape | self.vex << 2]))
opcode.append((EntryKind.TABLE256, [self.opc]))
if not self.extended:
opcode.append((EntryKind.TABLE256, [self.opc]))
else:
opcode.append((EntryKind.TABLE256, [self.opc + i for i in range(8)]))
if self.prefix:
if self.prefix == "NFx":
opcode.append((EntryKind.TABLE_PREFIX, [0, 1]))
@@ -296,20 +298,11 @@ class Opcode(NamedTuple):
prefix_val = ["NP", "66", "F3", "F2"].index(self.prefix)
opcode.append((EntryKind.TABLE_PREFIX, [prefix_val]))
if self.opcext:
opcext_val = self.opcext[1]
if not self.opcext[0]:
opcode.append((EntryKind.TABLE16, [opcext_val, opcext_val | 8]))
elif opcext_val < 8:
opcode.append((EntryKind.TABLE16, [opcext_val]))
else:
opcode.append((EntryKind.TABLE16, [((opcext_val - 0xc0) >> 3) | 8]))
if not self.extended:
opcode.append((EntryKind.TABLE8E, [opcext_val & 7]))
else:
opcode.append((EntryKind.TABLE8E, list(range(8))))
if self.extended and not self.opcext:
last_type, last_indices = opcode[-1]
opcode[-1] = last_type, [last_indices[0] + i for i in range(8)]
opcode.append((EntryKind.TABLE16, [((self.opcext - 0xc0) >> 3) | 8]))
opcode.append((EntryKind.TABLE8E, [self.opcext & 7]))
if self.modreg:
mod = {"m": [0], "r": [1<<3], "rm": [0, 1<<3]}[self.modreg[1]]
opcode.append((EntryKind.TABLE16, [self.modreg[0] + x for x in mod]))
if self.vexl in ("0", "1") or self.rexw in ("0", "1"):
rexw = {"0": [0], "1": [1<<0], "IG": [0, 1<<0]}[self.rexw or "IG"]
vexl = {"0": [0], "1": [1<<1], "IG": [0, 1<<1]}[self.vexl or "IG"]
@@ -449,7 +442,11 @@ def encode_table(entries):
opsizes = {8} if "SIZE_8" in desc.flags else {16, 32, 64}
hasvex, vecsizes = False, {128}
opc_i = opcode.opc | (opcode.opcext[1] << 8 if opcode.opcext else 0)
opc_i = opcode.opc
if opcode.opcext:
opc_i |= opcode.opcext << 8
if opcode.modreg:
opc_i |= opcode.modreg[0] << 8
opc_flags = ""
opc_flags += ["","|OPC_0F","|OPC_0F38","|OPC_0F3A"][opcode.escape]
if opcode.vex:
@@ -490,9 +487,10 @@ def encode_table(entries):
if enc.modrm_idx:
if "NOMEM" in desc.flags:
optypes[enc.modrm_idx^3] = "r"
elif ((opcode.opcext and opcode.opcext[0] and opcode.opcext[1] < 8)
or desc.operands[enc.modrm_idx^3].kind == OpKind.K_MEM):
elif desc.operands[enc.modrm_idx^3].kind == OpKind.K_MEM:
optypes[enc.modrm_idx^3] = "m"
elif opcode.modreg:
optypes[enc.modrm_idx^3] = opcode.modreg[1]
else:
optypes[enc.modrm_idx^3] = "rm"
if enc.modreg_idx: optypes[enc.modreg_idx^3] = "r"