parseinstrs: Improve performance of superstring

This algorithm yields slightly worse results, but is substantially
faster for larger string counts.
This commit is contained in:
Alexis Engelke
2022-12-28 11:28:29 +01:00
parent 771d968165
commit 6bf96d6963

View File

@@ -1,6 +1,7 @@
#!/usr/bin/python3 #!/usr/bin/python3
import argparse import argparse
import bisect
from collections import OrderedDict, defaultdict, namedtuple, Counter from collections import OrderedDict, defaultdict, namedtuple, Counter
from enum import Enum from enum import Enum
from itertools import product from itertools import product
@@ -515,16 +516,17 @@ def superstring(strs):
# Greedy heuristic generally yields acceptable results, though it depends on # Greedy heuristic generally yields acceptable results, though it depends on
# the order of the menmonics. More compact results are possible, but the # the order of the menmonics. More compact results are possible, but the
# expectable gains of an optimal result (probably with O(n!)) are small. # expectable gains of an optimal result (probably with O(n!)) are small.
# First sort strings and later do a binary search for each possible prefix.
realstrs.sort()
merged = "" merged = ""
def maxoverlap(s1, s2):
for i in range(min(len(s1), len(s2))-1, 0, -1):
if s1[:i] == s2[-i:]:
return i
return 0
while realstrs: while realstrs:
s = max(realstrs, key=lambda k: maxoverlap(k, merged)) for i in range(min(16, len(merged)), 0, -1):
merged += s[maxoverlap(s, merged):] idx = bisect.bisect_left(realstrs, merged[-i:])
realstrs.remove(s) if idx < len(realstrs) and realstrs[idx][:i] == merged[-i:]:
merged += realstrs.pop(idx)[i:]
break
else:
merged += realstrs.pop()
return merged return merged
def decode_table(entries, args): def decode_table(entries, args):
@@ -570,7 +572,7 @@ def decode_table(entries, args):
.replace("REP_", "REP ").replace("CMPXCHGD", "CMPXCHG") .replace("REP_", "REP ").replace("CMPXCHGD", "CMPXCHG")
.replace("JCXZ", "JCXZ JECXZJRCXZ") .replace("JCXZ", "JCXZ JECXZJRCXZ")
.replace("C_SEP", "CWD CDQ CQO") .replace("C_SEP", "CWD CDQ CQO")
.replace("C_EX", "CBW CWDECDQE") .replace("C_EX", "CBW CWDECDQE").replace("XCHG_NOP", "")
.lower() for m in mnems] .lower() for m in mnems]
mnemonics_str = superstring(mnemonics_intel) mnemonics_str = superstring(mnemonics_intel)