Use argparse

This commit is contained in:
Alexis Engelke
2019-02-23 15:25:34 +01:00
parent da683d850a
commit 256806e4b6

View File

@@ -1,12 +1,12 @@
#!/usr/bin/python3
import argparse
from binascii import unhexlify
from collections import OrderedDict, defaultdict, namedtuple
from collections import OrderedDict, defaultdict, namedtuple, Counter
from copy import copy
from enum import Enum, IntEnum
from itertools import accumulate
import struct
import sys
def bitstruct(name, fields):
names, sizes = zip(*(field.split(":") for field in fields))
@@ -126,15 +126,33 @@ class EntryKind(Enum):
TABLE72 = 4
TABLE_PREFIX = 5
@property
def table_length(self):
return {
EntryKind.INSTR: 0,
class TrieEntry(namedtuple("TrieEntry", "kind,items,payload")):
__slots__ = ()
TABLE_LENGTH = {
EntryKind.TABLE256: 256,
EntryKind.TABLE8: 8,
EntryKind.TABLE72: 72,
EntryKind.TABLE_PREFIX: 16
}[self]
}
@classmethod
def table(cls, kind):
return cls(kind, [None] * cls.TABLE_LENGTH[kind], b"")
@classmethod
def instr(cls, payload):
return cls(EntryKind.INSTR, [], payload)
@property
def encode_length(self):
return len(self.payload) + 2 * len(self.items)
def encode(self, encode_item):
enc_items = (encode_item(item) if item else 0 for item in self.items)
return self.payload + struct.pack("<%dH"%len(self.items), *enc_items)
def readonly(self):
return TrieEntry(self.kind, tuple(self.items), self.payload)
def map(self, mapping):
mapped_items = (mapping.get(v, v) for v in self.items)
return TrieEntry(self.kind, tuple(mapped_items), self.payload)
import re
opcode_regex = re.compile(r"^(?P<prefixes>(?P<vex>VEX\.)?(?P<legacy>NP|66|F2|F3)\.(?P<rexw>W[01]\.)?(?P<vexl>L[01]\.)?)?(?P<opcode>(?:[0-9a-f]{2})+)(?P<modrm>//?[0-7]|//[c-f][0-9a-f])?(?P<extended>\+)?$")
@@ -193,49 +211,36 @@ class Table:
def __init__(self, root_count=1):
self.data = OrderedDict()
for i in range(root_count):
self.data["root%d"%i] = (EntryKind.TABLE256, [None] * 256)
self.data["root%d"%i] = TrieEntry.table(EntryKind.TABLE256)
self.offsets = {}
self.annotations = {}
def compile(self, mnemonics_lut):
offsets = {}
annotations = {}
currentOffset = 0
stats = defaultdict(int)
for name, (kind, _) in self.data.items():
annotations[currentOffset] = "%s(%d)" % (name, kind.value)
offsets[name] = currentOffset
stats[kind] += 1
if kind.table_length:
currentOffset += kind.table_length * 2
def add_opcode(self, opcode, instr_encoding, root_idx=0):
opcode = list(opcode) + [(None, None)]
opcode = [(opcode[i+1][0], opcode[i][1]) for i in range(len(opcode)-1)]
name, table = "t%d"%root_idx, self.data["root%d"%root_idx]
for kind, byte in opcode[:-1]:
if table.items[byte] is None:
name += "{:02x}".format(byte)
self.data[name] = TrieEntry.table(kind)
table.items[byte] = name
else:
currentOffset += 6
currentOffset = (currentOffset + 7) & ~7
assert currentOffset < 0x10000
name = table.items[byte]
table = self.data[name]
assert table.kind == kind
data = b""
for name, (kind, value) in self.data.items():
if len(data) < offsets[name]:
data += b"\0" * (offsets[name] - len(data))
assert len(data) == offsets[name]
if kind == EntryKind.INSTR:
data += value
else: # Table
# count = sum(1 for x in value if x is not None)
# print("Table of kind", kind, "with %d/%d entries"%(count, kind.table_length))
for i, entry in enumerate(value):
if entry is not None:
targetKind, _ = self.data[entry]
value = (offsets[entry] & ~7) | targetKind.value
else:
value = 0
data += struct.pack("<H", value)
# An opcode can occur once only.
assert table.items[opcode[-1][1]] is None
print("%d bytes" % len(data), stats)
return data, annotations
name += "{:02x}/{}".format(opcode[-1][1], "??")
table.items[opcode[-1][1]] = name
self.data[name] = TrieEntry.instr(instr_encoding)
def deduplicate(self):
# Make values hashable
for n, (k, v) in self.data.items():
self.data[n] = k, (v if k == EntryKind.INSTR else tuple(v))
for name, entry in self.data.items():
self.data[name] = entry.readonly()
synonyms = True
while synonyms:
entries = {} # Mapping from entry to name
@@ -245,33 +250,33 @@ class Table:
synonyms[name] = entries[entry]
else:
entries[entry] = name
for name, (kind, value) in self.data.items():
if kind != EntryKind.INSTR:
self.data[name] = kind, tuple(synonyms.get(v, v) for v in value)
for name, entry in self.data.items():
self.data[name] = entry.map(synonyms)
for key in synonyms:
del self.data[key]
def add_opcode(self, opcode, instr_encoding, root_idx=0):
opcode = list(opcode) + [(None, None)]
opcode = [(opcode[i+1][0], opcode[i][1]) for i in range(len(opcode)-1)]
def calc_offsets(self):
current = 0
for name, entry in self.data.items():
self.annotations[current] = "%s(%d)" % (name, entry.kind.value)
self.offsets[name] = current
current += (entry.encode_length + 7) & ~7
assert current < 0x10000
name, table = "t%d"%root_idx, self.data["root%d"%root_idx]
for kind, byte in opcode[:-1]:
if table[1][byte] is None:
name += "{:02x}".format(byte)
self.data[name] = kind, [None] * kind.table_length
table[1][byte] = name
else:
name = table[1][byte]
table = self.data[name]
assert table[0] == kind
def encode_item(self, name):
return self.offsets[name] | self.data[name].kind.value
# An opcode can occur once only.
assert table[1][opcode[-1][1]] is None
def compile(self):
self.calc_offsets()
ordered = sorted((off, self.data[k]) for k, off in self.offsets.items())
name += "{:02x}/{}".format(opcode[-1][1], "??")
table[1][opcode[-1][1]] = name
self.data[name] = EntryKind.INSTR, instr_encoding
data = b""
for off, entry in ordered:
data += b"\x00" * (off - len(data)) + entry.encode(self.encode_item)
stats = dict(Counter(entry.kind for entry in self.data.values()))
print("%d bytes" % len(data), stats)
return data, self.annotations
def wrap(string):
return "\n".join(string[i:i+80] for i in range(0, len(string), 80))
@@ -299,10 +304,14 @@ template = """// Auto-generated file -- do not modify!
"""
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("table", type=argparse.FileType('r'))
parser.add_argument("output", type=argparse.FileType('w'))
args = parser.parse_args()
entries = []
with open(sys.argv[1], "r") as f:
for line in f.read().splitlines():
if line and line[0] != "#":
for line in args.table.read().splitlines():
if not line or line[0] == "#": continue
opcode_string, desc = tuple(line.split(maxsplit=1))
for opcode in parse_opcode(opcode_string):
entries.append((opcode, InstrDesc.parse(desc)))
@@ -328,12 +337,10 @@ if __name__ == "__main__":
mnemonic_cstr = '"' + "\\0".join(mnemonics) + '"'
file = template.format(
hex_table32=bytes_to_table(*table32.compile(mnemonics_lut)),
hex_table64=bytes_to_table(*table64.compile(mnemonics_lut)),
hex_table32=bytes_to_table(*table32.compile()),
hex_table64=bytes_to_table(*table64.compile()),
mnemonic_list="\n".join("FD_MNEMONIC(%s,%d)"%entry for entry in mnemonics_lut.items()),
mnemonic_cstr=mnemonic_cstr,
mnemonic_offsets=",".join(str(off) for off in mnemonic_tab),
)
with open(sys.argv[2], "w") as f:
f.write(file)
args.output.write(file)