parseinstrs: Make superstring function generic

This commit is contained in:
Alexis Engelke
2021-09-11 13:19:17 +02:00
parent 1fcacdeda7
commit e41d6c26f8

View File

@@ -414,53 +414,31 @@ class Trie:
print("%d bytes" % (2*len(data)), stats) print("%d bytes" % (2*len(data)), stats)
return tuple(data), [offsets[v] for _, v in self.trie[0]] return tuple(data), [offsets[v] for _, v in self.trie[0]]
def parse_mnemonics(mnemonics): def superstring(strs):
# This faces the "shortest superstring" problem, which is NP-hard. # This faces the "shortest superstring" problem, which is NP-hard.
# Preprocessing: remove any strings which are already completely covered # Preprocessing: remove any strings which are already completely covered
mnems = [] realstrs = []
for m in sorted(mnemonics, key=len, reverse=True): for s in sorted(strs, key=len, reverse=True):
for m2 in mnems: for s2 in realstrs:
if m in m2: if s in s2:
break break
else: else:
mnems.append(m) realstrs.append(s)
# Greedy heuristic generally yields acceptable results, though it depends on # Greedy heuristic generally yields acceptable results, though it depends on
# the order of the menmonics. More compact results are possible, but the # the order of the menmonics. More compact results are possible, but the
# expectable gains of an optimal result (probably with O(n!)) are small. # expectable gains of an optimal result (probably with O(n!)) are small.
merged_str = "" merged = ""
def maxoverlap(m1, m2): def maxoverlap(s1, s2):
# return next((i for i in range(min(len(m1), len(m2))-1, 0, -1) if m1[:i] == m2[-i:]), 0) for i in range(min(len(s1), len(s2))-1, 0, -1):
for i in range(min(len(m1), len(m2))-1, 0, -1): if s1[:i] == s2[-i:]:
if m1[:i] == m2[-i:]:
return i return i
return 0 return 0
while mnems: while realstrs:
mnem = max(mnems, key=lambda k: maxoverlap(k, merged_str)) s = max(realstrs, key=lambda k: maxoverlap(k, merged))
merged_str += mnem[maxoverlap(mnem, merged_str):] merged += s[maxoverlap(s, merged):]
mnems.remove(mnem) realstrs.remove(s)
indices = [str(merged_str.index(m)) for m in mnemonics] return merged
cstr = '"' + merged_str + '"'
tab = [(merged_str.index(m), len(m)) for m in mnemonics]
return cstr, ",".join(map(lambda e: f"{e[0]}", tab)), ",".join(map(lambda e: f"{e[1]}", tab))
DECODE_TABLE_TEMPLATE = """// Auto-generated file -- do not modify!
#if defined(FD_DECODE_TABLE_DATA)
{hex_table}
#elif defined(FD_DECODE_TABLE_DESCS)
{descs}
#elif defined(FD_DECODE_TABLE_STRTAB1)
{mnemonics[0]}
#elif defined(FD_DECODE_TABLE_STRTAB2)
{mnemonics[1]}
#elif defined(FD_DECODE_TABLE_STRTAB3)
{mnemonics[2]}
#elif defined(FD_DECODE_TABLE_DEFINES)
{defines}
#else
#error "unspecified decode table"
#endif
"""
def decode_table(entries, modes): def decode_table(entries, modes):
mnems = sorted({desc.mnemonic for _, _, desc in entries}) mnems = sorted({desc.mnemonic for _, _, desc in entries})
@@ -493,15 +471,27 @@ def decode_table(entries, modes):
.replace("C_SEP", "CWD CDQ CQO") .replace("C_SEP", "CWD CDQ CQO")
.replace("C_EX", "CBW CWDECDQE") .replace("C_EX", "CBW CWDECDQE")
.lower() for m in mnems] .lower() for m in mnems]
mnemonics_str = superstring(mnemonics_intel)
defines = ["FD_TABLE_OFFSET_%d %d"%k for k in zip(modes, root_offsets)] defines = ["FD_TABLE_OFFSET_%d %d\n"%k for k in zip(modes, root_offsets)]
return "".join(decode_mnems_lines), DECODE_TABLE_TEMPLATE.format( return "".join(decode_mnems_lines), f"""// Auto-generated file -- do not modify!
hex_table="".join(f"{e:#06x}," for e in table_data), #if defined(FD_DECODE_TABLE_DATA)
descs="\n".join("{{{0},{1},{2},{3}}},".format(*desc) for desc in descs), {"".join(f"{e:#06x}," for e in table_data)}
mnemonics=parse_mnemonics(mnemonics_intel), #elif defined(FD_DECODE_TABLE_DESCS)
defines="\n".join("#define " + line for line in defines), {"".join("{{{0},{1},{2},{3}}},".format(*desc) for desc in descs)}
) #elif defined(FD_DECODE_TABLE_STRTAB1)
"{mnemonics_str}"
#elif defined(FD_DECODE_TABLE_STRTAB2)
{",".join(str(mnemonics_str.index(mnem)) for mnem in mnemonics_intel)}
#elif defined(FD_DECODE_TABLE_STRTAB3)
{",".join(str(len(mnem)) for mnem in mnemonics_intel)}
#elif defined(FD_DECODE_TABLE_DEFINES)
{"".join("#define " + line for line in defines)}
#else
#error "unspecified decode table"
#endif
"""
def encode_table(entries): def encode_table(entries):
mnemonics = defaultdict(list) mnemonics = defaultdict(list)