Use argparse
This commit is contained in:
151
parseinstrs.py
151
parseinstrs.py
@@ -1,12 +1,12 @@
|
|||||||
#!/usr/bin/python3
|
#!/usr/bin/python3
|
||||||
|
|
||||||
|
import argparse
|
||||||
from binascii import unhexlify
|
from binascii import unhexlify
|
||||||
from collections import OrderedDict, defaultdict, namedtuple
|
from collections import OrderedDict, defaultdict, namedtuple, Counter
|
||||||
from copy import copy
|
from copy import copy
|
||||||
from enum import Enum, IntEnum
|
from enum import Enum, IntEnum
|
||||||
from itertools import accumulate
|
from itertools import accumulate
|
||||||
import struct
|
import struct
|
||||||
import sys
|
|
||||||
|
|
||||||
def bitstruct(name, fields):
|
def bitstruct(name, fields):
|
||||||
names, sizes = zip(*(field.split(":") for field in fields))
|
names, sizes = zip(*(field.split(":") for field in fields))
|
||||||
@@ -126,15 +126,33 @@ class EntryKind(Enum):
|
|||||||
TABLE72 = 4
|
TABLE72 = 4
|
||||||
TABLE_PREFIX = 5
|
TABLE_PREFIX = 5
|
||||||
|
|
||||||
@property
|
class TrieEntry(namedtuple("TrieEntry", "kind,items,payload")):
|
||||||
def table_length(self):
|
__slots__ = ()
|
||||||
return {
|
TABLE_LENGTH = {
|
||||||
EntryKind.INSTR: 0,
|
|
||||||
EntryKind.TABLE256: 256,
|
EntryKind.TABLE256: 256,
|
||||||
EntryKind.TABLE8: 8,
|
EntryKind.TABLE8: 8,
|
||||||
EntryKind.TABLE72: 72,
|
EntryKind.TABLE72: 72,
|
||||||
EntryKind.TABLE_PREFIX: 16
|
EntryKind.TABLE_PREFIX: 16
|
||||||
}[self]
|
}
|
||||||
|
@classmethod
|
||||||
|
def table(cls, kind):
|
||||||
|
return cls(kind, [None] * cls.TABLE_LENGTH[kind], b"")
|
||||||
|
@classmethod
|
||||||
|
def instr(cls, payload):
|
||||||
|
return cls(EntryKind.INSTR, [], payload)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def encode_length(self):
|
||||||
|
return len(self.payload) + 2 * len(self.items)
|
||||||
|
def encode(self, encode_item):
|
||||||
|
enc_items = (encode_item(item) if item else 0 for item in self.items)
|
||||||
|
return self.payload + struct.pack("<%dH"%len(self.items), *enc_items)
|
||||||
|
|
||||||
|
def readonly(self):
|
||||||
|
return TrieEntry(self.kind, tuple(self.items), self.payload)
|
||||||
|
def map(self, mapping):
|
||||||
|
mapped_items = (mapping.get(v, v) for v in self.items)
|
||||||
|
return TrieEntry(self.kind, tuple(mapped_items), self.payload)
|
||||||
|
|
||||||
import re
|
import re
|
||||||
opcode_regex = re.compile(r"^(?P<prefixes>(?P<vex>VEX\.)?(?P<legacy>NP|66|F2|F3)\.(?P<rexw>W[01]\.)?(?P<vexl>L[01]\.)?)?(?P<opcode>(?:[0-9a-f]{2})+)(?P<modrm>//?[0-7]|//[c-f][0-9a-f])?(?P<extended>\+)?$")
|
opcode_regex = re.compile(r"^(?P<prefixes>(?P<vex>VEX\.)?(?P<legacy>NP|66|F2|F3)\.(?P<rexw>W[01]\.)?(?P<vexl>L[01]\.)?)?(?P<opcode>(?:[0-9a-f]{2})+)(?P<modrm>//?[0-7]|//[c-f][0-9a-f])?(?P<extended>\+)?$")
|
||||||
@@ -193,49 +211,36 @@ class Table:
|
|||||||
def __init__(self, root_count=1):
|
def __init__(self, root_count=1):
|
||||||
self.data = OrderedDict()
|
self.data = OrderedDict()
|
||||||
for i in range(root_count):
|
for i in range(root_count):
|
||||||
self.data["root%d"%i] = (EntryKind.TABLE256, [None] * 256)
|
self.data["root%d"%i] = TrieEntry.table(EntryKind.TABLE256)
|
||||||
|
self.offsets = {}
|
||||||
|
self.annotations = {}
|
||||||
|
|
||||||
def compile(self, mnemonics_lut):
|
def add_opcode(self, opcode, instr_encoding, root_idx=0):
|
||||||
offsets = {}
|
opcode = list(opcode) + [(None, None)]
|
||||||
annotations = {}
|
opcode = [(opcode[i+1][0], opcode[i][1]) for i in range(len(opcode)-1)]
|
||||||
currentOffset = 0
|
|
||||||
stats = defaultdict(int)
|
name, table = "t%d"%root_idx, self.data["root%d"%root_idx]
|
||||||
for name, (kind, _) in self.data.items():
|
for kind, byte in opcode[:-1]:
|
||||||
annotations[currentOffset] = "%s(%d)" % (name, kind.value)
|
if table.items[byte] is None:
|
||||||
offsets[name] = currentOffset
|
name += "{:02x}".format(byte)
|
||||||
stats[kind] += 1
|
self.data[name] = TrieEntry.table(kind)
|
||||||
if kind.table_length:
|
table.items[byte] = name
|
||||||
currentOffset += kind.table_length * 2
|
|
||||||
else:
|
else:
|
||||||
currentOffset += 6
|
name = table.items[byte]
|
||||||
currentOffset = (currentOffset + 7) & ~7
|
table = self.data[name]
|
||||||
assert currentOffset < 0x10000
|
assert table.kind == kind
|
||||||
|
|
||||||
data = b""
|
# An opcode can occur once only.
|
||||||
for name, (kind, value) in self.data.items():
|
assert table.items[opcode[-1][1]] is None
|
||||||
if len(data) < offsets[name]:
|
|
||||||
data += b"\0" * (offsets[name] - len(data))
|
|
||||||
assert len(data) == offsets[name]
|
|
||||||
if kind == EntryKind.INSTR:
|
|
||||||
data += value
|
|
||||||
else: # Table
|
|
||||||
# count = sum(1 for x in value if x is not None)
|
|
||||||
# print("Table of kind", kind, "with %d/%d entries"%(count, kind.table_length))
|
|
||||||
for i, entry in enumerate(value):
|
|
||||||
if entry is not None:
|
|
||||||
targetKind, _ = self.data[entry]
|
|
||||||
value = (offsets[entry] & ~7) | targetKind.value
|
|
||||||
else:
|
|
||||||
value = 0
|
|
||||||
data += struct.pack("<H", value)
|
|
||||||
|
|
||||||
print("%d bytes" % len(data), stats)
|
name += "{:02x}/{}".format(opcode[-1][1], "??")
|
||||||
return data, annotations
|
table.items[opcode[-1][1]] = name
|
||||||
|
self.data[name] = TrieEntry.instr(instr_encoding)
|
||||||
|
|
||||||
def deduplicate(self):
|
def deduplicate(self):
|
||||||
# Make values hashable
|
# Make values hashable
|
||||||
for n, (k, v) in self.data.items():
|
for name, entry in self.data.items():
|
||||||
self.data[n] = k, (v if k == EntryKind.INSTR else tuple(v))
|
self.data[name] = entry.readonly()
|
||||||
synonyms = True
|
synonyms = True
|
||||||
while synonyms:
|
while synonyms:
|
||||||
entries = {} # Mapping from entry to name
|
entries = {} # Mapping from entry to name
|
||||||
@@ -245,33 +250,33 @@ class Table:
|
|||||||
synonyms[name] = entries[entry]
|
synonyms[name] = entries[entry]
|
||||||
else:
|
else:
|
||||||
entries[entry] = name
|
entries[entry] = name
|
||||||
for name, (kind, value) in self.data.items():
|
for name, entry in self.data.items():
|
||||||
if kind != EntryKind.INSTR:
|
self.data[name] = entry.map(synonyms)
|
||||||
self.data[name] = kind, tuple(synonyms.get(v, v) for v in value)
|
|
||||||
for key in synonyms:
|
for key in synonyms:
|
||||||
del self.data[key]
|
del self.data[key]
|
||||||
|
|
||||||
def add_opcode(self, opcode, instr_encoding, root_idx=0):
|
def calc_offsets(self):
|
||||||
opcode = list(opcode) + [(None, None)]
|
current = 0
|
||||||
opcode = [(opcode[i+1][0], opcode[i][1]) for i in range(len(opcode)-1)]
|
for name, entry in self.data.items():
|
||||||
|
self.annotations[current] = "%s(%d)" % (name, entry.kind.value)
|
||||||
|
self.offsets[name] = current
|
||||||
|
current += (entry.encode_length + 7) & ~7
|
||||||
|
assert current < 0x10000
|
||||||
|
|
||||||
name, table = "t%d"%root_idx, self.data["root%d"%root_idx]
|
def encode_item(self, name):
|
||||||
for kind, byte in opcode[:-1]:
|
return self.offsets[name] | self.data[name].kind.value
|
||||||
if table[1][byte] is None:
|
|
||||||
name += "{:02x}".format(byte)
|
|
||||||
self.data[name] = kind, [None] * kind.table_length
|
|
||||||
table[1][byte] = name
|
|
||||||
else:
|
|
||||||
name = table[1][byte]
|
|
||||||
table = self.data[name]
|
|
||||||
assert table[0] == kind
|
|
||||||
|
|
||||||
# An opcode can occur once only.
|
def compile(self):
|
||||||
assert table[1][opcode[-1][1]] is None
|
self.calc_offsets()
|
||||||
|
ordered = sorted((off, self.data[k]) for k, off in self.offsets.items())
|
||||||
|
|
||||||
name += "{:02x}/{}".format(opcode[-1][1], "??")
|
data = b""
|
||||||
table[1][opcode[-1][1]] = name
|
for off, entry in ordered:
|
||||||
self.data[name] = EntryKind.INSTR, instr_encoding
|
data += b"\x00" * (off - len(data)) + entry.encode(self.encode_item)
|
||||||
|
|
||||||
|
stats = dict(Counter(entry.kind for entry in self.data.values()))
|
||||||
|
print("%d bytes" % len(data), stats)
|
||||||
|
return data, self.annotations
|
||||||
|
|
||||||
def wrap(string):
|
def wrap(string):
|
||||||
return "\n".join(string[i:i+80] for i in range(0, len(string), 80))
|
return "\n".join(string[i:i+80] for i in range(0, len(string), 80))
|
||||||
@@ -299,10 +304,14 @@ template = """// Auto-generated file -- do not modify!
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("table", type=argparse.FileType('r'))
|
||||||
|
parser.add_argument("output", type=argparse.FileType('w'))
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
entries = []
|
entries = []
|
||||||
with open(sys.argv[1], "r") as f:
|
for line in args.table.read().splitlines():
|
||||||
for line in f.read().splitlines():
|
if not line or line[0] == "#": continue
|
||||||
if line and line[0] != "#":
|
|
||||||
opcode_string, desc = tuple(line.split(maxsplit=1))
|
opcode_string, desc = tuple(line.split(maxsplit=1))
|
||||||
for opcode in parse_opcode(opcode_string):
|
for opcode in parse_opcode(opcode_string):
|
||||||
entries.append((opcode, InstrDesc.parse(desc)))
|
entries.append((opcode, InstrDesc.parse(desc)))
|
||||||
@@ -328,12 +337,10 @@ if __name__ == "__main__":
|
|||||||
mnemonic_cstr = '"' + "\\0".join(mnemonics) + '"'
|
mnemonic_cstr = '"' + "\\0".join(mnemonics) + '"'
|
||||||
|
|
||||||
file = template.format(
|
file = template.format(
|
||||||
hex_table32=bytes_to_table(*table32.compile(mnemonics_lut)),
|
hex_table32=bytes_to_table(*table32.compile()),
|
||||||
hex_table64=bytes_to_table(*table64.compile(mnemonics_lut)),
|
hex_table64=bytes_to_table(*table64.compile()),
|
||||||
mnemonic_list="\n".join("FD_MNEMONIC(%s,%d)"%entry for entry in mnemonics_lut.items()),
|
mnemonic_list="\n".join("FD_MNEMONIC(%s,%d)"%entry for entry in mnemonics_lut.items()),
|
||||||
mnemonic_cstr=mnemonic_cstr,
|
mnemonic_cstr=mnemonic_cstr,
|
||||||
mnemonic_offsets=",".join(str(off) for off in mnemonic_tab),
|
mnemonic_offsets=",".join(str(off) for off in mnemonic_tab),
|
||||||
)
|
)
|
||||||
|
args.output.write(file)
|
||||||
with open(sys.argv[2], "w") as f:
|
|
||||||
f.write(file)
|
|
||||||
|
|||||||
Reference in New Issue
Block a user