Unify mnemonic table for 32 and 64 bit

As there is not much difference between the two mnemonic tables, it is
possible to unify them. As a consequence, the instruction types no
longer differ between 32 and 64 bit decodings.
This commit is contained in:
Alexis Engelke
2019-01-13 09:47:24 +01:00
parent ec7d27302e
commit 83ea2f0769

View File

@@ -193,8 +193,7 @@ class Table:
self.mnemonics = set() self.mnemonics = set()
self.instrs = {} self.instrs = {}
def compile(self): def compile(self, mnemonics_lut):
mnemonics = sorted(list(self.mnemonics))
offsets = {} offsets = {}
currentOffset = 0 currentOffset = 0
stats = defaultdict(int) stats = defaultdict(int)
@@ -214,7 +213,7 @@ class Table:
data += b"\0" * (offsets[name] - len(data)) data += b"\0" * (offsets[name] - len(data))
assert len(data) == offsets[name] assert len(data) == offsets[name]
if kind == EntryKind.INSTR: if kind == EntryKind.INSTR:
mnemonicIdx = mnemonics.index(value[0]) mnemonicIdx = mnemonics_lut[value[0]]
data += struct.pack("<HL", mnemonicIdx, value[1]) data += struct.pack("<HL", mnemonicIdx, value[1])
else: # Table else: # Table
# count = sum(1 for x in value if x is not None) # count = sum(1 for x in value if x is not None)
@@ -227,8 +226,8 @@ class Table:
value = 0 value = 0
data += struct.pack("<H", value) data += struct.pack("<H", value)
print("%d bytes, %d mnemonics"%(len(data),len(mnemonics)), stats) print("%d bytes" % len(data), stats)
return data, mnemonics return data
def add_opcode(self, opcode, instrData): def add_opcode(self, opcode, instrData):
opcode = list(opcode) + [(None, None)] opcode = list(opcode) + [(None, None)]
@@ -257,32 +256,29 @@ class Table:
self.data[name] = EntryKind.INSTR, instrData self.data[name] = EntryKind.INSTR, instrData
self.instrs[instrData] = name self.instrs[instrData] = name
def generate_cpp_table(table): def bytes_to_table(data):
compiled, mnemonics = table.compile() hexdata = ",".join("0x{:02x}".format(byte) for byte in data)
return "\n".join(hexdata[i:i+80] for i in range(0, len(hexdata), 80))
hexdata = ",".join("0x{:02x}".format(byte) for byte in compiled) template = """// Auto-generated file -- do not modify!
compiled_hex = "\n".join(hexdata[i:i+80] for i in range(0, len(hexdata), 80)) #if defined(DECODE_TABLE_DATA)
#if defined(ARCH_386)
mnemonic_tab = [0] {hex_table32}
for name in mnemonics: #elif defined(ARCH_X86_64)
mnemonic_tab.append(mnemonic_tab[-1] + len(name) + 1) {hex_table64}
mnemonic_cstr = '"' + "\\0".join(mnemonics) + '"' #else
#error "unknown architecture"
file = "" #endif
file += "#if defined(DECODE_TABLE_DATA)\n" #elif defined(DECODE_TABLE_MNEMONICS)
file += compiled_hex + "\n" {mnemonic_list}
file += "#elif defined(DECODE_TABLE_MNEMONICS)\n" #elif defined(DECODE_TABLE_STRTAB1)
for value, name in enumerate(mnemonics): {mnemonic_cstr}
file += "MNEMONIC({}, {})\n".format(name, value) #elif defined(DECODE_TABLE_STRTAB2)
file += "#elif defined(DECODE_TABLE_STRTAB1)\n" {mnemonic_offsets}
file += mnemonic_cstr + "\n" #else
file += "#elif defined(DECODE_TABLE_STRTAB2)\n" #error "unspecified decode table"
file += ",".join(str(off) for off in mnemonic_tab) + "\n" #endif
file += "#else\n" """
file += "#error \"unspecified decode table\"\n"
file += "#endif\n"
return file
if __name__ == "__main__": if __name__ == "__main__":
entries = defaultdict(list) entries = defaultdict(list)
@@ -293,6 +289,7 @@ if __name__ == "__main__":
for opcode in parse_opcode(opcode_string): for opcode in parse_opcode(opcode_string):
entries[opcode].append(desc) entries[opcode].append(desc)
mnemonics = set()
table32 = Table() table32 = Table()
table64 = Table() table64 = Table()
masks = "ONLY64", "ONLY32" masks = "ONLY64", "ONLY32"
@@ -302,15 +299,24 @@ if __name__ == "__main__":
parsed = [desc for desc in parsed if desc is not None] parsed = [desc for desc in parsed if desc is not None]
assert len(parsed) <= 1 assert len(parsed) <= 1
if parsed: if parsed:
mnemonics.add(parsed[0][0])
table.add_opcode(opcode, parsed[0]) table.add_opcode(opcode, parsed[0])
tableFile2 = "" mnemonics = sorted(mnemonics)
tableFile2 += "#if defined(ARCH_386)\n" mnemonics_lut = {name: mnemonics.index(name) for name in mnemonics}
tableFile2 += generate_cpp_table(table32)
tableFile2 += "#elif defined(ARCH_X86_64)\n" mnemonic_tab = [0]
tableFile2 += generate_cpp_table(table64) for name in mnemonics:
tableFile2 += "#else\n" mnemonic_tab.append(mnemonic_tab[-1] + len(name) + 1)
tableFile2 += "#error \"unknown architecture\"\n" mnemonic_cstr = '"' + "\\0".join(mnemonics) + '"'
tableFile2 += "#endif\n"
file = template.format(
hex_table32=bytes_to_table(table32.compile(mnemonics_lut)),
hex_table64=bytes_to_table(table64.compile(mnemonics_lut)),
mnemonic_list="\n".join("MNEMONIC(%s,%d)"%entry for entry in mnemonics_lut.items()),
mnemonic_cstr=mnemonic_cstr,
mnemonic_offsets=",".join(str(off) for off in mnemonic_tab),
)
with open(sys.argv[2], "w") as f: with open(sys.argv[2], "w") as f:
f.write(tableFile2) f.write(file)