From ed53b4a54d36de7e8eb441e1a68947bab6651260 Mon Sep 17 00:00:00 2001 From: Alexis Engelke Date: Sun, 13 Jan 2019 11:58:59 +0100 Subject: [PATCH] Support 32 bit and 64 bit decoding with one binary It is possible to configure the build process such that decoding of 32 bit and 64 bit instructions can be chosen at runtime using an additional parameter of the decode function. The header file is now entirely architecture-independent and no longer required any previous defines. Decoding x86-64 still requires a 64-bit pointer size. --- README.md | 4 +- decode.c | 108 +++++++++++++++++++++++++++++++--------------- decode.h | 22 ++++------ meson.build | 16 +++++-- meson_options.txt | 2 +- parseinstrs.py | 8 +--- tests/driver.c | 30 ++++++++++--- tests/meson.build | 8 +++- tests/test.py | 18 +++++--- 9 files changed, 138 insertions(+), 78 deletions(-) diff --git a/README.md b/README.md index 753e516..8d61cf5 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,6 @@ # libx86decode -A fast and lightweight decoder for x86 and x86-64. *This is not a disassembler, it does not intend to procude valid assembly.* To meet the goal of speed, lookup tables are used to map the opcode the (internal) description of the instruction encoding. This table currently has a size of roughly 21 kiB. - -Currently, decoding 32-bit assembly required the `ARCH_386` macro to be defined and is only tested when compiling as 32-bit binary. Decoding of 64-bit instruction requires the macro `ARCH_X86_64` and is only supported in 64-bit mode. This restriction might change in future. +A fast and lightweight decoder for x86 and x86-64. *This is not a disassembler, it does not intend to produce valid assembly.* To meet the goal of speed, lookup tables are used to map the opcode the (internal) description of the instruction encoding. This table currently has a size of roughly 21 kiB. ### Known issues - An implicit `FWAIT` in FPU instructions is decoded as a separate instruction. For example, the instruction `FINIT` is decoded as an `FWAIT` followed by an `FINIT` where as `FNINIT` is decoded as a plain `FINIT` instruction. diff --git a/decode.c b/decode.c index 97bbd99..dd9bae8 100644 --- a/decode.c +++ b/decode.c @@ -5,14 +5,28 @@ #include +#if defined(ARCH_X86_64) && __SIZEOF_POINTER__ < 8 +#error "Decoding x86-64 requires a 64-bit architecture" +#endif + #define LIKELY(x) __builtin_expect((x), 1) #define UNLIKELY(x) __builtin_expect((x), 0) -#define DECODE_TABLE_DATA -static const uint8_t _decode_table[] = { +#if defined(ARCH_386) +#define DECODE_TABLE_DATA_32 +static const uint8_t _decode_table32[] = { #include }; -#undef DECODE_TABLE_DATA +#undef DECODE_TABLE_DATA_32 +#endif + +#if defined(ARCH_X86_64) +#define DECODE_TABLE_DATA_64 +static const uint8_t _decode_table64[] = { +#include +}; +#undef DECODE_TABLE_DATA_64 +#endif #define ENTRY_NONE 0 @@ -52,8 +66,8 @@ static const uint8_t _decode_table[] = { static int -decode_prefixes(const uint8_t* buffer, int len, PrefixSet* out_prefixes, - uint8_t* out_vex_operand) +decode_prefixes(const uint8_t* buffer, int len, DecodeMode mode, + PrefixSet* out_prefixes, uint8_t* out_vex_operand) { int off = 0; PrefixSet prefixes = 0; @@ -102,7 +116,7 @@ decode_prefixes(const uint8_t* buffer, int len, PrefixSet* out_prefixes, prefixes |= PREFIX_REP; } #if defined(ARCH_X86_64) - else if (LIKELY(prefix >= 0x40 && prefix <= 0x4F)) + else if (mode == DECODE_64 && LIKELY(prefix >= 0x40 && prefix <= 0x4F)) { prefixes |= PREFIX_REX; if (prefix & 0x1) @@ -134,7 +148,7 @@ decode_prefixes(const uint8_t* buffer, int len, PrefixSet* out_prefixes, return -1; } #if defined(ARCH_386) - if ((buffer[off + 1] & 0xc0) != 0xc0) + if (mode == DECODE_32 && (buffer[off + 1] & 0xc0) != 0xc0) { break; } @@ -144,15 +158,15 @@ decode_prefixes(const uint8_t* buffer, int len, PrefixSet* out_prefixes, prefixes |= PREFIX_VEX; #if defined(ARCH_X86_64) - if ((byte2 & 0x80) == 0) + if (mode == DECODE_64 && (byte2 & 0x80) == 0) { prefixes |= PREFIX_REXR; } - if ((byte2 & 0x40) == 0) + if (mode == DECODE_64 && (byte2 & 0x40) == 0) { prefixes |= PREFIX_REXX; } - if ((byte2 & 0x20) == 0) + if (mode == DECODE_64 && (byte2 & 0x20) == 0) { prefixes |= PREFIX_REXB; } @@ -172,7 +186,7 @@ decode_prefixes(const uint8_t* buffer, int len, PrefixSet* out_prefixes, // SDM Vol 2A 2-16 (Dec. 2016) // - "In 32-bit modes, VEX.W is silently ignored." // - VEX.W either replaces REX.W, is don't care or is reserved. - if (byte3 & 0x80) + if (mode == DECODE_64 && (byte3 & 0x80)) { prefixes |= PREFIX_REXW; } @@ -198,7 +212,7 @@ decode_prefixes(const uint8_t* buffer, int len, PrefixSet* out_prefixes, return -1; } #if defined(ARCH_386) - if ((buffer[off + 1] & 0xc0) != 0xc0) + if (mode == DECODE_32 && (buffer[off + 1] & 0xc0) != 0xc0) { break; } @@ -207,7 +221,7 @@ decode_prefixes(const uint8_t* buffer, int len, PrefixSet* out_prefixes, prefixes |= PREFIX_VEX | PREFIX_ESC_0F; #if defined(ARCH_X86_64) - if ((byte & 0x80) == 0) + if (mode == DECODE_64 && (byte & 0x80) == 0) { prefixes |= PREFIX_REXR; } @@ -246,7 +260,7 @@ decode_prefixes(const uint8_t* buffer, int len, PrefixSet* out_prefixes, static int -decode_modrm(const uint8_t* buffer, int len, Instr* instr, +decode_modrm(const uint8_t* buffer, int len, DecodeMode mode, Instr* instr, struct Operand* out_o1, struct Operand* out_o2) { int off = 0; @@ -336,10 +350,11 @@ decode_modrm(const uint8_t* buffer, int len, Instr* instr, if (mod == 0 && rm == 5) { #if defined(ARCH_X86_64) - out_o1->reg = RI_IP; -#else - out_o1->reg = REG_NONE; + if (mode == DECODE_64) + out_o1->reg = RI_IP; + else #endif + out_o1->reg = REG_NONE; return off; } @@ -402,8 +417,23 @@ struct InstrDesc #define DESC_IMM_BYTE(desc) (((desc)->immediate >> 7) & 1) int -decode(const uint8_t* buffer, int len, Instr* instr) +decode(const uint8_t* buffer, int len, DecodeMode mode, Instr* instr) { + const uint8_t* decode_table = NULL; + + // Ensure that we can actually handle the decode request +#if defined(ARCH_386) + if (mode == DECODE_32) + decode_table = _decode_table32; +#endif +#if defined(ARCH_X86_64) + if (mode == DECODE_64) + decode_table = _decode_table64; +#endif + + if (decode_table == NULL) + return -2; + int retval; int off = 0; uint8_t vex_operand = 0; @@ -411,27 +441,28 @@ decode(const uint8_t* buffer, int len, Instr* instr) __builtin_memset(instr->operands, 0, sizeof(instr->operands)); - retval = decode_prefixes(buffer + off, len - off, &prefixes, &vex_operand); + retval = decode_prefixes(buffer + off, len - off, mode, &prefixes, + &vex_operand); if (UNLIKELY(retval < 0 || off + retval >= len)) { return -1; } off += retval; - uint16_t* table = (uint16_t*) _decode_table; + const uint16_t* table = (uint16_t*) decode_table; uint32_t kind = ENTRY_TABLE256; if (UNLIKELY(prefixes & PREFIX_ESC_MASK)) { uint32_t escape = prefixes & PREFIX_ESC_MASK; - table = (uint16_t*) &_decode_table[table[0x0F] & ~7]; + table = (uint16_t*) &decode_table[table[0x0F] & ~7]; if (escape == PREFIX_ESC_0F38) { - table = (uint16_t*) &_decode_table[table[0x38] & ~7]; + table = (uint16_t*) &decode_table[table[0x38] & ~7]; } else if (escape == PREFIX_ESC_0F3A) { - table = (uint16_t*) &_decode_table[table[0x3A] & ~7]; + table = (uint16_t*) &decode_table[table[0x3A] & ~7]; } } @@ -497,7 +528,7 @@ decode(const uint8_t* buffer, int len, Instr* instr) } kind = entry & ENTRY_MASK; - table = (uint16_t*) &_decode_table[entry & ~7]; + table = (uint16_t*) &decode_table[entry & ~7]; } while (LIKELY(off < len)); if (UNLIKELY(kind != ENTRY_INSTR)) @@ -552,7 +583,7 @@ decode(const uint8_t* buffer, int len, Instr* instr) op_size = 2; } #if defined(ARCH_X86_64) - else if (desc->gp_size_def64) + else if (mode == DECODE_64 && desc->gp_size_def64) { op_size = 8; } @@ -603,7 +634,7 @@ decode(const uint8_t* buffer, int len, Instr* instr) { operand2 = &instr->operands[DESC_MODREG_IDX(desc)]; } - retval = decode_modrm(buffer + off, len - off, instr, + retval = decode_modrm(buffer + off, len - off, mode, instr, operand1, operand2); if (UNLIKELY(retval < 0)) @@ -649,19 +680,26 @@ decode(const uint8_t* buffer, int len, Instr* instr) instr->scale = 0; // TODO: Address size overrides #if defined(ARCH_386) - if (UNLIKELY(off + 4 > len)) + if (mode == DECODE_32) { - return -1; + if (UNLIKELY(off + 4 > len)) + { + return -1; + } + instr->disp = LOAD_LE_4(&buffer[off]); + off += 4; } - instr->disp = LOAD_LE_4(&buffer[off]); - off += 4; -#else - if (UNLIKELY(off + 8 > len)) +#endif +#if defined(ARCH_X86_64) + if (mode == DECODE_64) { - return -1; + if (UNLIKELY(off + 8 > len)) + { + return -1; + } + instr->disp = LOAD_LE_8(&buffer[off]); + off += 8; } - instr->disp = LOAD_LE_8(&buffer[off]); - off += 8; #endif } else if (UNLIKELY(imm_control != 0)) diff --git a/decode.h b/decode.h index 848fbe0..94f3318 100644 --- a/decode.h +++ b/decode.h @@ -5,10 +5,6 @@ #include #include -#if defined(ARCH_X86_64) && __SIZEOF_POINTER__ < 8 -#error "Decoding x86-64 requires a 64-bit architecture" -#endif - #ifndef ssize_t #define ssize_t intptr_t #endif @@ -22,6 +18,13 @@ enum #undef DECODE_TABLE_MNEMONICS #undef MNEMONIC +enum DecodeMode { + DECODE_64 = 0, + DECODE_32 = 1, +}; + +typedef enum DecodeMode DecodeMode; + enum RegIndex { RI_AL = 0, RI_CL, @@ -40,7 +43,6 @@ enum RegIndex { RI_BP, RI_SI, RI_DI, -#if defined(ARCH_X86_64) RI_R8, RI_R9, RI_R10, @@ -49,12 +51,9 @@ enum RegIndex { RI_R13, RI_R14, RI_R15, -#endif // EIP cannot be encoded in Protected/Compatibility Mode -#if defined(ARCH_X86_64) RI_IP = 0x10, -#endif RI_ES = 0, RI_CS, @@ -82,13 +81,11 @@ enum PrefixSet PREFIX_LOCK = 1 << 4, PREFIX_REPNZ = 1 << 5, PREFIX_REP = 1 << 6, -#if defined(ARCH_X86_64) PREFIX_REX = 1 << 7, PREFIX_REXB = 1 << 8, PREFIX_REXX = 1 << 9, PREFIX_REXR = 1 << 10, PREFIX_REXW = 1 << 11, -#endif PREFIX_ESC_NONE = 0 << 13, PREFIX_ESC_0F = 1 << 13, PREFIX_ESC_0F38 = 2 << 13, @@ -144,12 +141,9 @@ typedef struct Instr Instr; #define INSTR_HAS_REPNZ(instr) ((instr)->prefixes & PREFIX_REPNZ) #define INSTR_HAS_LOCK(instr) ((instr)->prefixes & PREFIX_LOCK) #define INSTR_HAS_ADDRSZ(instr) ((instr)->prefixes & PREFIX_ADDRSZ) - -#if defined(ARCH_X86_64) #define INSTR_HAS_REX(instr) ((instr)->prefixes & PREFIX_REX) -#endif -int decode(const uint8_t* buffer, int len, Instr* out_instr); +int decode(const uint8_t* buffer, int len, DecodeMode mode, Instr* out_instr); void instr_format(const Instr* instr, char buffer[128]); void instr_print(const Instr* instr) __attribute__((deprecated)); diff --git a/meson.build b/meson.build index bf3c7ae..a111bd9 100644 --- a/meson.build +++ b/meson.build @@ -29,13 +29,21 @@ if get_option('warning_level').to_int() >= 3 language: 'c') endif +decode_32 = false +decode_64 = false + archmode = get_option('archmode') -if archmode == '32' +if archmode == 'only32' or archmode == 'both' add_project_arguments(['-DARCH_386'], language: 'c') -elif archmode == '64' + decode_32 = true +endif +if archmode == 'only64' or archmode == 'both' add_project_arguments(['-DARCH_X86_64'], language: 'c') -else - error('Invalid architecture mode') + decode_64 = true +endif + +if not decode_32 and not decode_64 + error('no architecture mode') endif instr_data = custom_target('tables', diff --git a/meson_options.txt b/meson_options.txt index ce5a124..5f6a767 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -1 +1 @@ -option('archmode', type: 'combo', choices: ['32', '64']) +option('archmode', type: 'combo', choices: ['both', 'only32', 'only64']) diff --git a/parseinstrs.py b/parseinstrs.py index c127004..6b68a5c 100644 --- a/parseinstrs.py +++ b/parseinstrs.py @@ -261,14 +261,10 @@ def bytes_to_table(data): return "\n".join(hexdata[i:i+80] for i in range(0, len(hexdata), 80)) template = """// Auto-generated file -- do not modify! -#if defined(DECODE_TABLE_DATA) -#if defined(ARCH_386) +#if defined(DECODE_TABLE_DATA_32) {hex_table32} -#elif defined(ARCH_X86_64) +#elif defined(DECODE_TABLE_DATA_64) {hex_table64} -#else -#error "unknown architecture" -#endif #elif defined(DECODE_TABLE_MNEMONICS) {mnemonic_list} #elif defined(DECODE_TABLE_STRTAB1) diff --git a/tests/driver.c b/tests/driver.c index 6df9197..1132ff0 100644 --- a/tests/driver.c +++ b/tests/driver.c @@ -24,24 +24,40 @@ parse_nibble(const char nibble) int main(int argc, char** argv) { - if (argc != 2 && argc != 3) + if (argc != 3 && argc != 4) { - printf("usage: %s [instruction bytes] ([repetitions])\n", argv[0]); + printf("usage: %s [mode] [instruction bytes] ([repetitions])\n", argv[0]); return -1; } + DecodeMode mode; + size_t mode_input = strtoul(argv[1], NULL, 0); + if (mode_input == 32) + { + mode = DECODE_32; + } + else if (mode_input == 64) + { + mode = DECODE_64; + } + else + { + printf("Unknown decode mode\n"); + return 1; + } + // Avoid allocation by transforming hex to binary in-place. - uint8_t* code = (uint8_t*) argv[1]; + uint8_t* code = (uint8_t*) argv[2]; uint8_t* code_end = code; - char* hex = argv[1]; + char* hex = argv[2]; for (; *hex; hex += 2, code_end++) *code_end = (parse_nibble(hex[0]) << 4) | parse_nibble(hex[1]); size_t length = (size_t) (code_end - code); size_t repetitions = 1; - if (argc >= 3) - repetitions = strtoul(argv[2], NULL, 0); + if (argc >= 4) + repetitions = strtoul(argv[3], NULL, 0); struct timespec time_start; struct timespec time_end; @@ -56,7 +72,7 @@ main(int argc, char** argv) while (current_off != length) { size_t remaining = length - current_off; - int retval = decode(code + current_off, remaining, &instr); + int retval = decode(code + current_off, remaining, mode, &instr); if (retval < 0) goto fail; current_off += retval; diff --git a/tests/meson.build b/tests/meson.build index 89e9437..36fa3a7 100644 --- a/tests/meson.build +++ b/tests/meson.build @@ -2,7 +2,13 @@ test_driver = executable('test_driver', 'driver.c', dependencies: libx86decode, c_args: ['-D_GNU_SOURCE']) -test_args = [files('test.py'), test_driver, archmode] +test_args = [files('test.py'), test_driver] +if decode_32 + test_args += ['--32'] +endif +if decode_64 + test_args += ['--64'] +endif ## Test cases diff --git a/tests/test.py b/tests/test.py index b60cebf..771268e 100644 --- a/tests/test.py +++ b/tests/test.py @@ -5,12 +5,12 @@ import statistics import subprocess import sys -def run(args, code, expected): +def run(args, mode, code, expected): inner_reps = 10000000 if args.benchmark else 1 outer_reps = 3 if args.benchmark else 1 times = [] for _ in range(outer_reps): - output = subprocess.check_output([args.driver, code, str(inner_reps)], + output = subprocess.check_output([args.driver, str(mode), code, str(inner_reps)], universal_newlines=True) instr, time = tuple(output.split("\n", 1)) if instr != expected: @@ -22,26 +22,29 @@ def run(args, code, expected): if args.benchmark: mean = statistics.mean(times) stdev = statistics.stdev(times) - print("{:53} {:6.3f} ns (std: {:6.3f} ns)".format(expected, mean, stdev)) + print("{:2} {:50} {:6.3f} ns (std: {:6.3f} ns)".format(mode, expected, mean, stdev)) return times if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--benchmark", action="store_true") + parser.add_argument("--32", dest="test_modes", action="append_const", const=32) + parser.add_argument("--64", dest="test_modes", action="append_const", const=64) parser.add_argument("driver") - parser.add_argument("archmode", choices=[32, 64], type=int) parser.add_argument("cases", nargs="+", type=argparse.FileType('r')) args = parser.parse_args() failed, total = 0, 0 total_times = [] + test_modes = frozenset(args.test_modes if args.test_modes else [32, 64]) + for file in args.cases: cases = [tuple(ln.strip().split(maxsplit=2)) for ln in file.readlines()] for op, code, expected in cases: - if op == "decode32" and args.archmode != 32: continue - if op == "decode64" and args.archmode != 64: continue + case_modes = {"decode":{32,64},"decode32":{32},"decode64":{64}}[op] + if not case_modes & test_modes: continue # Compatibility with old test system if expected[0] == '"' and expected[-1] == '"': @@ -49,7 +52,8 @@ if __name__ == "__main__": try: total += 1 - total_times += run(args, code, expected) + for mode in case_modes & test_modes: + total_times += run(args, mode, code, expected) except Exception as e: failed += 1 print("FAILED: %s" % e)