Use argparse

This commit is contained in:
Alexis Engelke
2019-02-23 15:25:34 +01:00
parent da683d850a
commit 256806e4b6

View File

@@ -1,12 +1,12 @@
#!/usr/bin/python3 #!/usr/bin/python3
import argparse
from binascii import unhexlify from binascii import unhexlify
from collections import OrderedDict, defaultdict, namedtuple from collections import OrderedDict, defaultdict, namedtuple, Counter
from copy import copy from copy import copy
from enum import Enum, IntEnum from enum import Enum, IntEnum
from itertools import accumulate from itertools import accumulate
import struct import struct
import sys
def bitstruct(name, fields): def bitstruct(name, fields):
names, sizes = zip(*(field.split(":") for field in fields)) names, sizes = zip(*(field.split(":") for field in fields))
@@ -126,15 +126,33 @@ class EntryKind(Enum):
TABLE72 = 4 TABLE72 = 4
TABLE_PREFIX = 5 TABLE_PREFIX = 5
class TrieEntry(namedtuple("TrieEntry", "kind,items,payload")):
__slots__ = ()
TABLE_LENGTH = {
EntryKind.TABLE256: 256,
EntryKind.TABLE8: 8,
EntryKind.TABLE72: 72,
EntryKind.TABLE_PREFIX: 16
}
@classmethod
def table(cls, kind):
return cls(kind, [None] * cls.TABLE_LENGTH[kind], b"")
@classmethod
def instr(cls, payload):
return cls(EntryKind.INSTR, [], payload)
@property @property
def table_length(self): def encode_length(self):
return { return len(self.payload) + 2 * len(self.items)
EntryKind.INSTR: 0, def encode(self, encode_item):
EntryKind.TABLE256: 256, enc_items = (encode_item(item) if item else 0 for item in self.items)
EntryKind.TABLE8: 8, return self.payload + struct.pack("<%dH"%len(self.items), *enc_items)
EntryKind.TABLE72: 72,
EntryKind.TABLE_PREFIX: 16 def readonly(self):
}[self] return TrieEntry(self.kind, tuple(self.items), self.payload)
def map(self, mapping):
mapped_items = (mapping.get(v, v) for v in self.items)
return TrieEntry(self.kind, tuple(mapped_items), self.payload)
import re import re
opcode_regex = re.compile(r"^(?P<prefixes>(?P<vex>VEX\.)?(?P<legacy>NP|66|F2|F3)\.(?P<rexw>W[01]\.)?(?P<vexl>L[01]\.)?)?(?P<opcode>(?:[0-9a-f]{2})+)(?P<modrm>//?[0-7]|//[c-f][0-9a-f])?(?P<extended>\+)?$") opcode_regex = re.compile(r"^(?P<prefixes>(?P<vex>VEX\.)?(?P<legacy>NP|66|F2|F3)\.(?P<rexw>W[01]\.)?(?P<vexl>L[01]\.)?)?(?P<opcode>(?:[0-9a-f]{2})+)(?P<modrm>//?[0-7]|//[c-f][0-9a-f])?(?P<extended>\+)?$")
@@ -193,49 +211,36 @@ class Table:
def __init__(self, root_count=1): def __init__(self, root_count=1):
self.data = OrderedDict() self.data = OrderedDict()
for i in range(root_count): for i in range(root_count):
self.data["root%d"%i] = (EntryKind.TABLE256, [None] * 256) self.data["root%d"%i] = TrieEntry.table(EntryKind.TABLE256)
self.offsets = {}
self.annotations = {}
def compile(self, mnemonics_lut): def add_opcode(self, opcode, instr_encoding, root_idx=0):
offsets = {} opcode = list(opcode) + [(None, None)]
annotations = {} opcode = [(opcode[i+1][0], opcode[i][1]) for i in range(len(opcode)-1)]
currentOffset = 0
stats = defaultdict(int) name, table = "t%d"%root_idx, self.data["root%d"%root_idx]
for name, (kind, _) in self.data.items(): for kind, byte in opcode[:-1]:
annotations[currentOffset] = "%s(%d)" % (name, kind.value) if table.items[byte] is None:
offsets[name] = currentOffset name += "{:02x}".format(byte)
stats[kind] += 1 self.data[name] = TrieEntry.table(kind)
if kind.table_length: table.items[byte] = name
currentOffset += kind.table_length * 2
else: else:
currentOffset += 6 name = table.items[byte]
currentOffset = (currentOffset + 7) & ~7 table = self.data[name]
assert currentOffset < 0x10000 assert table.kind == kind
data = b"" # An opcode can occur once only.
for name, (kind, value) in self.data.items(): assert table.items[opcode[-1][1]] is None
if len(data) < offsets[name]:
data += b"\0" * (offsets[name] - len(data))
assert len(data) == offsets[name]
if kind == EntryKind.INSTR:
data += value
else: # Table
# count = sum(1 for x in value if x is not None)
# print("Table of kind", kind, "with %d/%d entries"%(count, kind.table_length))
for i, entry in enumerate(value):
if entry is not None:
targetKind, _ = self.data[entry]
value = (offsets[entry] & ~7) | targetKind.value
else:
value = 0
data += struct.pack("<H", value)
print("%d bytes" % len(data), stats) name += "{:02x}/{}".format(opcode[-1][1], "??")
return data, annotations table.items[opcode[-1][1]] = name
self.data[name] = TrieEntry.instr(instr_encoding)
def deduplicate(self): def deduplicate(self):
# Make values hashable # Make values hashable
for n, (k, v) in self.data.items(): for name, entry in self.data.items():
self.data[n] = k, (v if k == EntryKind.INSTR else tuple(v)) self.data[name] = entry.readonly()
synonyms = True synonyms = True
while synonyms: while synonyms:
entries = {} # Mapping from entry to name entries = {} # Mapping from entry to name
@@ -245,33 +250,33 @@ class Table:
synonyms[name] = entries[entry] synonyms[name] = entries[entry]
else: else:
entries[entry] = name entries[entry] = name
for name, (kind, value) in self.data.items(): for name, entry in self.data.items():
if kind != EntryKind.INSTR: self.data[name] = entry.map(synonyms)
self.data[name] = kind, tuple(synonyms.get(v, v) for v in value)
for key in synonyms: for key in synonyms:
del self.data[key] del self.data[key]
def add_opcode(self, opcode, instr_encoding, root_idx=0): def calc_offsets(self):
opcode = list(opcode) + [(None, None)] current = 0
opcode = [(opcode[i+1][0], opcode[i][1]) for i in range(len(opcode)-1)] for name, entry in self.data.items():
self.annotations[current] = "%s(%d)" % (name, entry.kind.value)
self.offsets[name] = current
current += (entry.encode_length + 7) & ~7
assert current < 0x10000
name, table = "t%d"%root_idx, self.data["root%d"%root_idx] def encode_item(self, name):
for kind, byte in opcode[:-1]: return self.offsets[name] | self.data[name].kind.value
if table[1][byte] is None:
name += "{:02x}".format(byte)
self.data[name] = kind, [None] * kind.table_length
table[1][byte] = name
else:
name = table[1][byte]
table = self.data[name]
assert table[0] == kind
# An opcode can occur once only. def compile(self):
assert table[1][opcode[-1][1]] is None self.calc_offsets()
ordered = sorted((off, self.data[k]) for k, off in self.offsets.items())
name += "{:02x}/{}".format(opcode[-1][1], "??") data = b""
table[1][opcode[-1][1]] = name for off, entry in ordered:
self.data[name] = EntryKind.INSTR, instr_encoding data += b"\x00" * (off - len(data)) + entry.encode(self.encode_item)
stats = dict(Counter(entry.kind for entry in self.data.values()))
print("%d bytes" % len(data), stats)
return data, self.annotations
def wrap(string): def wrap(string):
return "\n".join(string[i:i+80] for i in range(0, len(string), 80)) return "\n".join(string[i:i+80] for i in range(0, len(string), 80))
@@ -299,13 +304,17 @@ template = """// Auto-generated file -- do not modify!
""" """
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("table", type=argparse.FileType('r'))
parser.add_argument("output", type=argparse.FileType('w'))
args = parser.parse_args()
entries = [] entries = []
with open(sys.argv[1], "r") as f: for line in args.table.read().splitlines():
for line in f.read().splitlines(): if not line or line[0] == "#": continue
if line and line[0] != "#": opcode_string, desc = tuple(line.split(maxsplit=1))
opcode_string, desc = tuple(line.split(maxsplit=1)) for opcode in parse_opcode(opcode_string):
for opcode in parse_opcode(opcode_string): entries.append((opcode, InstrDesc.parse(desc)))
entries.append((opcode, InstrDesc.parse(desc)))
mnemonics = sorted({desc.mnemonic for _, desc in entries}) mnemonics = sorted({desc.mnemonic for _, desc in entries})
mnemonics_lut = {name: mnemonics.index(name) for name in mnemonics} mnemonics_lut = {name: mnemonics.index(name) for name in mnemonics}
@@ -328,12 +337,10 @@ if __name__ == "__main__":
mnemonic_cstr = '"' + "\\0".join(mnemonics) + '"' mnemonic_cstr = '"' + "\\0".join(mnemonics) + '"'
file = template.format( file = template.format(
hex_table32=bytes_to_table(*table32.compile(mnemonics_lut)), hex_table32=bytes_to_table(*table32.compile()),
hex_table64=bytes_to_table(*table64.compile(mnemonics_lut)), hex_table64=bytes_to_table(*table64.compile()),
mnemonic_list="\n".join("FD_MNEMONIC(%s,%d)"%entry for entry in mnemonics_lut.items()), mnemonic_list="\n".join("FD_MNEMONIC(%s,%d)"%entry for entry in mnemonics_lut.items()),
mnemonic_cstr=mnemonic_cstr, mnemonic_cstr=mnemonic_cstr,
mnemonic_offsets=",".join(str(off) for off in mnemonic_tab), mnemonic_offsets=",".join(str(off) for off in mnemonic_tab),
) )
args.output.write(file)
with open(sys.argv[2], "w") as f:
f.write(file)