parseinstr: Simplify opcode naming scheme

2020-11-08 13:46:54 +01:00
parent 9df6ac1788
commit 62b0420147
4 changed files with 278 additions and 280 deletions
--- a/parseinstrs.py
+++ b/parseinstrs.py
@@ -63,12 +63,13 @@ ENCODINGS = {
    "MRI": InstrFlags(modrm_idx=0^3, modreg_idx=1^3, imm_idx=2^3, imm_control=4),
    "RMI": InstrFlags(modrm_idx=1^3, modreg_idx=0^3, imm_idx=2^3, imm_control=4),
    "MRC": InstrFlags(modrm_idx=0^3, modreg_idx=1^3, zeroreg_idx=2^3, zeroreg_val=1),
+    "AM": InstrFlags(modrm_idx=1^3, zeroreg_idx=0^3),
+    "MA": InstrFlags(modrm_idx=0^3, zeroreg_idx=1^3),
    "I": InstrFlags(imm_idx=0^3, imm_control=4),
    "IA": InstrFlags(zeroreg_idx=0^3, imm_idx=1^3, imm_control=4),
    "O": InstrFlags(modreg_idx=0^3),
    "OI": InstrFlags(modreg_idx=0^3, imm_idx=1^3, imm_control=4),
    "OA": InstrFlags(modreg_idx=0^3, zeroreg_idx=1^3),
-    "AO": InstrFlags(modreg_idx=1^3, zeroreg_idx=0^3),
    "A": InstrFlags(zeroreg_idx=0^3),
    "D": InstrFlags(imm_idx=0^3, imm_control=6),
    "FD": InstrFlags(zeroreg_idx=0^3, imm_idx=1^3, imm_control=2),
@@ -245,17 +246,17 @@ import re
 opcode_regex = re.compile(
    r"^(?:(?P<prefixes>(?P<vex>VEX\.)?(?P<legacy>NP|66|F2|F3|NFx)\." +
                     r"(?:W(?P<rexw>[01]|IG)\.)?(?:L(?P<vexl>[01]|IG)\.)?))?" +
-     r"(?P<escape>(?:0f|0f38|0f3a)?)" +
+     r"(?P<escape>0f38|0f3a|0f|)" +
     r"(?P<opcode>[0-9a-f]{2})" +
-     r"(?P<modrm>//?[0-7]|//[c-f][0-9a-f])?" +
-     r"(?P<extended>\+)?$")
+     r"(?:(?P<extended>\+)|/(?P<modreg>[0-7][rm]?)|(?P<opcext>[c-f][0-9a-f]))?$")

 class Opcode(NamedTuple):
-    prefix: Union[None, Tuple[bool, str]] # (False, NP/66/F2/F3), (True, NP/F2/F3)
+    prefix: Union[None, str] # None/NP/66/F2/F3/NFx
    escape: int # [0, 0f, 0f38, 0f3a]
    opc: int
-    opcext: Union[None, Tuple[bool, int]] # (False, T8), (True, T72), None
    extended: bool # Extend opc or opcext, if present
+    modreg: Union[None, Tuple[int, str]] # (modreg, "r"/"m"/"rm"), None
+    opcext: Union[None, int] # 0xc0-0xff, or 0
    vex: bool
    vexl: Union[str, None] # 0, 1, IG, None = used, both
    rexw: Union[str, None] # 0, 1, IG, None = used, both
@@ -264,22 +265,20 @@ class Opcode(NamedTuple):
    def parse(cls, opcode_string):
        match = opcode_regex.match(opcode_string)
        if match is None:
+            raise Exception(opcode_string)
            return None

-        opcext = match.group("modrm")
-        if opcext:
-            is72 = opcext[1] == "/"
-            opcext = is72, int(opcext[1 + is72:], 16)
-
-        if match.group("extended") and opcext and not opcext[0]:
-            raise Exception("invalid opcode extension: {}".format(opcode_string))
+        modreg = match.group("modreg")
+        if modreg:
+            modreg = int(modreg[0]), modreg[1] if len(modreg) == 2 else "rm"

        return cls(
            prefix=match.group("legacy"),
            escape=["", "0f", "0f38", "0f3a"].index(match.group("escape")),
            opc=int(match.group("opcode"), 16),
-            opcext=opcext,
            extended=match.group("extended") is not None,
+            modreg=modreg,
+            opcext=int(match.group("opcext") or "0", 16) or None,
            vex=match.group("vex") is not None,
            vexl=match.group("vexl"),
            rexw=match.group("rexw"),
@@ -288,7 +287,10 @@ class Opcode(NamedTuple):
    def for_trie(self):
        opcode = []
        opcode.append((EntryKind.TABLE_ROOT, [self.escape | self.vex << 2]))
-        opcode.append((EntryKind.TABLE256, [self.opc]))
+        if not self.extended:
+            opcode.append((EntryKind.TABLE256, [self.opc]))
+        else:
+            opcode.append((EntryKind.TABLE256, [self.opc + i for i in range(8)]))
        if self.prefix:
            if self.prefix == "NFx":
                opcode.append((EntryKind.TABLE_PREFIX, [0, 1]))
@@ -296,20 +298,11 @@ class Opcode(NamedTuple):
                prefix_val = ["NP", "66", "F3", "F2"].index(self.prefix)
                opcode.append((EntryKind.TABLE_PREFIX, [prefix_val]))
        if self.opcext:
-            opcext_val = self.opcext[1]
-            if not self.opcext[0]:
-                opcode.append((EntryKind.TABLE16, [opcext_val, opcext_val | 8]))
-            elif opcext_val < 8:
-                opcode.append((EntryKind.TABLE16, [opcext_val]))
-            else:
-                opcode.append((EntryKind.TABLE16, [((opcext_val - 0xc0) >> 3) | 8]))
-                if not self.extended:
-                    opcode.append((EntryKind.TABLE8E, [opcext_val & 7]))
-                else:
-                    opcode.append((EntryKind.TABLE8E, list(range(8))))
-        if self.extended and not self.opcext:
-            last_type, last_indices = opcode[-1]
-            opcode[-1] = last_type, [last_indices[0] + i for i in range(8)]
+            opcode.append((EntryKind.TABLE16, [((self.opcext - 0xc0) >> 3) | 8]))
+            opcode.append((EntryKind.TABLE8E, [self.opcext & 7]))
+        if self.modreg:
+            mod = {"m": [0], "r": [1<<3], "rm": [0, 1<<3]}[self.modreg[1]]
+            opcode.append((EntryKind.TABLE16, [self.modreg[0] + x for x in mod]))
        if self.vexl in ("0", "1") or self.rexw in ("0", "1"):
            rexw = {"0": [0], "1": [1<<0], "IG": [0, 1<<0]}[self.rexw or "IG"]
            vexl = {"0": [0], "1": [1<<1], "IG": [0, 1<<1]}[self.vexl or "IG"]
@@ -449,7 +442,11 @@ def encode_table(entries):
        opsizes = {8} if "SIZE_8" in desc.flags else {16, 32, 64}
        hasvex, vecsizes = False, {128}

-        opc_i = opcode.opc | (opcode.opcext[1] << 8 if opcode.opcext else 0)
+        opc_i = opcode.opc
+        if opcode.opcext:
+            opc_i |= opcode.opcext << 8
+        if opcode.modreg:
+            opc_i |= opcode.modreg[0] << 8
        opc_flags = ""
        opc_flags += ["","|OPC_0F","|OPC_0F38","|OPC_0F3A"][opcode.escape]
        if opcode.vex:
@@ -490,9 +487,10 @@ def encode_table(entries):
        if enc.modrm_idx:
            if "NOMEM" in desc.flags:
                optypes[enc.modrm_idx^3] = "r"
-            elif ((opcode.opcext and opcode.opcext[0] and opcode.opcext[1] < 8)
-                  or desc.operands[enc.modrm_idx^3].kind == OpKind.K_MEM):
+            elif desc.operands[enc.modrm_idx^3].kind == OpKind.K_MEM:
                optypes[enc.modrm_idx^3] = "m"
+            elif opcode.modreg:
+                optypes[enc.modrm_idx^3] = opcode.modreg[1]
            else:
                optypes[enc.modrm_idx^3] = "rm"
        if enc.modreg_idx: optypes[enc.modreg_idx^3] = "r"