Store 32-bit and 64-bit data in a single table

This commit is contained in:
Alexis Engelke
2019-02-23 16:32:39 +01:00
parent 256806e4b6
commit 67ae0f8de8
3 changed files with 43 additions and 39 deletions

View File

@@ -12,21 +12,16 @@
#define LIKELY(x) __builtin_expect((x), 1) #define LIKELY(x) __builtin_expect((x), 1)
#define UNLIKELY(x) __builtin_expect((x), 0) #define UNLIKELY(x) __builtin_expect((x), 0)
#if defined(ARCH_386) #define FD_DECODE_TABLE_DATA
#define FD_DECODE_TABLE_DATA_32 static const uint8_t _decode_table[] = {
static const uint8_t _decode_table32[] = {
#include <decode-table.inc> #include <decode-table.inc>
}; };
#undef FD_DECODE_TABLE_DATA_32 #undef FD_DECODE_TABLE_DATA
#endif
#if defined(ARCH_X86_64) // Defines FD_TABLE_OFFSET_32 and FD_TABLE_OFFSET_64, if available
#define FD_DECODE_TABLE_DATA_64 #define FD_DECODE_TABLE_DEFINES
static const uint8_t _decode_table64[] = {
#include <decode-table.inc> #include <decode-table.inc>
}; #undef FD_DECODE_TABLE_DEFINES
#undef FD_DECODE_TABLE_DATA_64
#endif
enum DecodeMode { enum DecodeMode {
DECODE_64 = 0, DECODE_64 = 0,
@@ -369,7 +364,7 @@ int
fd_decode(const uint8_t* buffer, size_t len_sz, int mode_int, uintptr_t address, fd_decode(const uint8_t* buffer, size_t len_sz, int mode_int, uintptr_t address,
FdInstr* instr) FdInstr* instr)
{ {
const uint8_t* decode_table = NULL; const uint16_t* table = NULL;
int len = len_sz > 15 ? 15 : len_sz; int len = len_sz > 15 ? 15 : len_sz;
DecodeMode mode = mode_int == 32 ? DECODE_32 : DecodeMode mode = mode_int == 32 ? DECODE_32 :
@@ -378,14 +373,14 @@ fd_decode(const uint8_t* buffer, size_t len_sz, int mode_int, uintptr_t address,
// Ensure that we can actually handle the decode request // Ensure that we can actually handle the decode request
#if defined(ARCH_386) #if defined(ARCH_386)
if (mode == DECODE_32) if (mode == DECODE_32)
decode_table = _decode_table32; table = (uint16_t*) &_decode_table[FD_TABLE_OFFSET_32];
#endif #endif
#if defined(ARCH_X86_64) #if defined(ARCH_X86_64)
if (mode == DECODE_64) if (mode == DECODE_64)
decode_table = _decode_table64; table = (uint16_t*) &_decode_table[FD_TABLE_OFFSET_64];
#endif #endif
if (UNLIKELY(decode_table == NULL)) if (UNLIKELY(table == NULL))
return -2; return -2;
int retval; int retval;
@@ -402,26 +397,25 @@ fd_decode(const uint8_t* buffer, size_t len_sz, int mode_int, uintptr_t address,
} }
off += retval; off += retval;
const uint16_t* table = (uint16_t*) decode_table;
uint32_t kind = ENTRY_TABLE256; uint32_t kind = ENTRY_TABLE256;
if (UNLIKELY(prefixes & PREFIX_ESC_MASK)) if (UNLIKELY(prefixes & PREFIX_ESC_MASK))
{ {
uint32_t escape = prefixes & PREFIX_ESC_MASK; uint32_t escape = prefixes & PREFIX_ESC_MASK;
table = (uint16_t*) &decode_table[table[0x0F] & ~7]; table = (uint16_t*) &_decode_table[table[0x0F] & ~7];
if (escape == PREFIX_ESC_0F38) if (escape == PREFIX_ESC_0F38)
{ {
table = (uint16_t*) &decode_table[table[0x38] & ~7]; table = (uint16_t*) &_decode_table[table[0x38] & ~7];
} }
else if (escape == PREFIX_ESC_0F3A) else if (escape == PREFIX_ESC_0F3A)
{ {
table = (uint16_t*) &decode_table[table[0x3A] & ~7]; table = (uint16_t*) &_decode_table[table[0x3A] & ~7];
} }
} }
// First walk through full-byte opcodes. We do at most three iterations. // First walk through full-byte opcodes. We do at most three iterations.
while (kind == ENTRY_TABLE256 && LIKELY(off < len)) while (kind == ENTRY_TABLE256 && LIKELY(off < len))
ENTRY_UNPACK(table, kind, decode_table, table[buffer[off++]]); ENTRY_UNPACK(table, kind, _decode_table, table[buffer[off++]]);
// Then, walk through ModR/M-encoded opcode extensions. // Then, walk through ModR/M-encoded opcode extensions.
if ((kind == ENTRY_TABLE8 || kind == ENTRY_TABLE72) && LIKELY(off < len)) if ((kind == ENTRY_TABLE8 || kind == ENTRY_TABLE72) && LIKELY(off < len))
@@ -438,7 +432,7 @@ fd_decode(const uint8_t* buffer, size_t len_sz, int mode_int, uintptr_t address,
else else
entry = table[(buffer[off] >> 3) & 7]; entry = table[(buffer[off] >> 3) & 7];
ENTRY_UNPACK(table, kind, decode_table, entry); ENTRY_UNPACK(table, kind, _decode_table, entry);
} }
// Finally, handle mandatory prefixes (which behave like an opcode ext.). // Finally, handle mandatory prefixes (which behave like an opcode ext.).
@@ -452,7 +446,7 @@ fd_decode(const uint8_t* buffer, size_t len_sz, int mode_int, uintptr_t address,
// for the 0x66 prefix, which could otherwise override the operand // for the 0x66 prefix, which could otherwise override the operand
// size of general purpose registers. // size of general purpose registers.
prefixes &= ~(PREFIX_OPSZ | PREFIX_REPNZ | PREFIX_REP); prefixes &= ~(PREFIX_OPSZ | PREFIX_REPNZ | PREFIX_REP);
ENTRY_UNPACK(table, kind, decode_table, table[index]); ENTRY_UNPACK(table, kind, _decode_table, table[index]);
} }
if (UNLIKELY(kind != ENTRY_INSTR)) if (UNLIKELY(kind != ENTRY_INSTR))

View File

@@ -46,8 +46,15 @@ if not decode_32 and not decode_64
error('no architecture mode') error('no architecture mode')
endif endif
generate_args = []
if decode_32
generate_args += ['--32']
endif
if decode_64
generate_args += ['--64']
endif
instr_data = custom_target('tables', instr_data = custom_target('tables',
command: [python3, '@INPUT0@', '@INPUT1@', '@OUTPUT@'], command: [python3, '@INPUT0@', '@INPUT1@', '@OUTPUT@'] + generate_args,
input: files('parseinstrs.py', 'instrs.txt'), input: files('parseinstrs.py', 'instrs.txt'),
output: ['decode-table.inc']) output: ['decode-table.inc'])

View File

@@ -210,6 +210,7 @@ def parse_opcode(opcode_string):
class Table: class Table:
def __init__(self, root_count=1): def __init__(self, root_count=1):
self.data = OrderedDict() self.data = OrderedDict()
self.roots = ["root%d"%i for i in range(root_count)]
for i in range(root_count): for i in range(root_count):
self.data["root%d"%i] = TrieEntry.table(EntryKind.TABLE256) self.data["root%d"%i] = TrieEntry.table(EntryKind.TABLE256)
self.offsets = {} self.offsets = {}
@@ -276,7 +277,7 @@ class Table:
stats = dict(Counter(entry.kind for entry in self.data.values())) stats = dict(Counter(entry.kind for entry in self.data.values()))
print("%d bytes" % len(data), stats) print("%d bytes" % len(data), stats)
return data, self.annotations return data, self.annotations, [self.offsets[k] for k in self.roots]
def wrap(string): def wrap(string):
return "\n".join(string[i:i+80] for i in range(0, len(string), 80)) return "\n".join(string[i:i+80] for i in range(0, len(string), 80))
@@ -288,16 +289,16 @@ def bytes_to_table(data, notes):
for p, c in zip(offs, offs[1:])) for p, c in zip(offs, offs[1:]))
template = """// Auto-generated file -- do not modify! template = """// Auto-generated file -- do not modify!
#if defined(FD_DECODE_TABLE_DATA_32) #if defined(FD_DECODE_TABLE_DATA)
{hex_table32} {hex_table}
#elif defined(FD_DECODE_TABLE_DATA_64)
{hex_table64}
#elif defined(FD_DECODE_TABLE_MNEMONICS) #elif defined(FD_DECODE_TABLE_MNEMONICS)
{mnemonic_list} {mnemonic_list}
#elif defined(FD_DECODE_TABLE_STRTAB1) #elif defined(FD_DECODE_TABLE_STRTAB1)
{mnemonic_cstr} {mnemonic_cstr}
#elif defined(FD_DECODE_TABLE_STRTAB2) #elif defined(FD_DECODE_TABLE_STRTAB2)
{mnemonic_offsets} {mnemonic_offsets}
#elif defined(FD_DECODE_TABLE_DEFINES)
{defines}
#else #else
#error "unspecified decode table" #error "unspecified decode table"
#endif #endif
@@ -305,6 +306,8 @@ template = """// Auto-generated file -- do not modify!
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("--32", dest="modes", action="append_const", const=32)
parser.add_argument("--64", dest="modes", action="append_const", const=64)
parser.add_argument("table", type=argparse.FileType('r')) parser.add_argument("table", type=argparse.FileType('r'))
parser.add_argument("output", type=argparse.FileType('w')) parser.add_argument("output", type=argparse.FileType('w'))
args = parser.parse_args() args = parser.parse_args()
@@ -319,28 +322,28 @@ if __name__ == "__main__":
mnemonics = sorted({desc.mnemonic for _, desc in entries}) mnemonics = sorted({desc.mnemonic for _, desc in entries})
mnemonics_lut = {name: mnemonics.index(name) for name in mnemonics} mnemonics_lut = {name: mnemonics.index(name) for name in mnemonics}
table32 = Table() modes = [32, 64]
table64 = Table() table = Table(root_count=len(args.modes))
masks = "ONLY64", "ONLY32"
for opcode, desc in entries: for opcode, desc in entries:
if "ONLY64" not in desc.flags: for i, mode in enumerate(args.modes):
table32.add_opcode(opcode, desc.encode(mnemonics_lut)) if "ONLY%d"%(96-mode) not in desc.flags:
if "ONLY32" not in desc.flags: table.add_opcode(opcode, desc.encode(mnemonics_lut), i)
table64.add_opcode(opcode, desc.encode(mnemonics_lut))
table32.deduplicate() table.deduplicate()
table64.deduplicate() table_data, annotations, root_offsets = table.compile()
mnemonic_tab = [0] mnemonic_tab = [0]
for name in mnemonics: for name in mnemonics:
mnemonic_tab.append(mnemonic_tab[-1] + len(name) + 1) mnemonic_tab.append(mnemonic_tab[-1] + len(name) + 1)
mnemonic_cstr = '"' + "\\0".join(mnemonics) + '"' mnemonic_cstr = '"' + "\\0".join(mnemonics) + '"'
defines = ["FD_TABLE_OFFSET_%d %d"%k for k in zip(args.modes, root_offsets)]
file = template.format( file = template.format(
hex_table32=bytes_to_table(*table32.compile()), hex_table=bytes_to_table(table_data, annotations),
hex_table64=bytes_to_table(*table64.compile()),
mnemonic_list="\n".join("FD_MNEMONIC(%s,%d)"%entry for entry in mnemonics_lut.items()), mnemonic_list="\n".join("FD_MNEMONIC(%s,%d)"%entry for entry in mnemonics_lut.items()),
mnemonic_cstr=mnemonic_cstr, mnemonic_cstr=mnemonic_cstr,
mnemonic_offsets=",".join(str(off) for off in mnemonic_tab), mnemonic_offsets=",".join(str(off) for off in mnemonic_tab),
defines="\n".join("#define " + line for line in defines),
) )
args.output.write(file) args.output.write(file)