parseinstrs: Optimize mnemonic compression
As the formatter no longer demands a null-terminated string, mnemonics can arbitarily overlap and therefore save space. This is the shortest superstring problem, which is NP-hard. This is currently approximated with a greedy heuristic.
This commit is contained in:
@@ -415,22 +415,33 @@ class Trie:
|
|||||||
return tuple(data), [offsets[v] for _, v in self.trie[0]]
|
return tuple(data), [offsets[v] for _, v in self.trie[0]]
|
||||||
|
|
||||||
def parse_mnemonics(mnemonics):
|
def parse_mnemonics(mnemonics):
|
||||||
mktree = lambda: defaultdict(mktree)
|
# This faces the "shortest superstring" problem, which is NP-hard.
|
||||||
tree = mktree()
|
# Preprocessing: remove any strings which are already completely covered
|
||||||
for m in mnemonics:
|
mnems = []
|
||||||
cur = tree
|
for m in sorted(mnemonics, key=len, reverse=True):
|
||||||
for c in m[::-1]:
|
for m2 in mnems:
|
||||||
cur = cur[c]
|
if m in m2:
|
||||||
def tree_walk(tree, cur="\0"):
|
break
|
||||||
if not tree:
|
|
||||||
yield cur
|
|
||||||
else:
|
else:
|
||||||
for el, subtree in tree.items():
|
mnems.append(m)
|
||||||
for path in tree_walk(subtree, el + cur):
|
|
||||||
yield path
|
# Greedy heuristic generally yields acceptable results, though it depends on
|
||||||
merged_str = "".join(sorted(tree_walk(tree)))
|
# the order of the menmonics. More compact results are possible, but the
|
||||||
cstr = '"' + merged_str[:-1].replace("\0", '\\0') + '"'
|
# expectable gains of an optimal result (probably with O(n!)) are small.
|
||||||
tab = [(merged_str.index(m + "\0"), len(m)) for m in mnemonics]
|
merged_str = ""
|
||||||
|
def maxoverlap(m1, m2):
|
||||||
|
# return next((i for i in range(min(len(m1), len(m2))-1, 0, -1) if m1[:i] == m2[-i:]), 0)
|
||||||
|
for i in range(min(len(m1), len(m2))-1, 0, -1):
|
||||||
|
if m1[:i] == m2[-i:]:
|
||||||
|
return i
|
||||||
|
return 0
|
||||||
|
while mnems:
|
||||||
|
mnem = max(mnems, key=lambda k: maxoverlap(k, merged_str))
|
||||||
|
merged_str += mnem[maxoverlap(mnem, merged_str):]
|
||||||
|
mnems.remove(mnem)
|
||||||
|
indices = [str(merged_str.index(m)) for m in mnemonics]
|
||||||
|
cstr = '"' + merged_str + '"'
|
||||||
|
tab = [(merged_str.index(m), len(m)) for m in mnemonics]
|
||||||
return cstr, ",".join(map(lambda e: f"{e[0]}", tab)), ",".join(map(lambda e: f"{e[1]}", tab))
|
return cstr, ",".join(map(lambda e: f"{e[0]}", tab)), ",".join(map(lambda e: f"{e[1]}", tab))
|
||||||
|
|
||||||
DECODE_TABLE_TEMPLATE = """// Auto-generated file -- do not modify!
|
DECODE_TABLE_TEMPLATE = """// Auto-generated file -- do not modify!
|
||||||
|
|||||||
Reference in New Issue
Block a user