diff --git a/README.md b/README.md index 753e516..8d61cf5 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,6 @@ # libx86decode -A fast and lightweight decoder for x86 and x86-64. *This is not a disassembler, it does not intend to procude valid assembly.* To meet the goal of speed, lookup tables are used to map the opcode the (internal) description of the instruction encoding. This table currently has a size of roughly 21 kiB. - -Currently, decoding 32-bit assembly required the `ARCH_386` macro to be defined and is only tested when compiling as 32-bit binary. Decoding of 64-bit instruction requires the macro `ARCH_X86_64` and is only supported in 64-bit mode. This restriction might change in future. +A fast and lightweight decoder for x86 and x86-64. *This is not a disassembler, it does not intend to produce valid assembly.* To meet the goal of speed, lookup tables are used to map the opcode the (internal) description of the instruction encoding. This table currently has a size of roughly 21 kiB. ### Known issues - An implicit `FWAIT` in FPU instructions is decoded as a separate instruction. For example, the instruction `FINIT` is decoded as an `FWAIT` followed by an `FINIT` where as `FNINIT` is decoded as a plain `FINIT` instruction. diff --git a/decode.c b/decode.c index 97bbd99..dd9bae8 100644 --- a/decode.c +++ b/decode.c @@ -5,14 +5,28 @@ #include +#if defined(ARCH_X86_64) && __SIZEOF_POINTER__ < 8 +#error "Decoding x86-64 requires a 64-bit architecture" +#endif + #define LIKELY(x) __builtin_expect((x), 1) #define UNLIKELY(x) __builtin_expect((x), 0) -#define DECODE_TABLE_DATA -static const uint8_t _decode_table[] = { +#if defined(ARCH_386) +#define DECODE_TABLE_DATA_32 +static const uint8_t _decode_table32[] = { #include }; -#undef DECODE_TABLE_DATA +#undef DECODE_TABLE_DATA_32 +#endif + +#if defined(ARCH_X86_64) +#define DECODE_TABLE_DATA_64 +static const uint8_t _decode_table64[] = { +#include +}; +#undef DECODE_TABLE_DATA_64 +#endif #define ENTRY_NONE 0 @@ -52,8 +66,8 @@ static const uint8_t _decode_table[] = { static int -decode_prefixes(const uint8_t* buffer, int len, PrefixSet* out_prefixes, - uint8_t* out_vex_operand) +decode_prefixes(const uint8_t* buffer, int len, DecodeMode mode, + PrefixSet* out_prefixes, uint8_t* out_vex_operand) { int off = 0; PrefixSet prefixes = 0; @@ -102,7 +116,7 @@ decode_prefixes(const uint8_t* buffer, int len, PrefixSet* out_prefixes, prefixes |= PREFIX_REP; } #if defined(ARCH_X86_64) - else if (LIKELY(prefix >= 0x40 && prefix <= 0x4F)) + else if (mode == DECODE_64 && LIKELY(prefix >= 0x40 && prefix <= 0x4F)) { prefixes |= PREFIX_REX; if (prefix & 0x1) @@ -134,7 +148,7 @@ decode_prefixes(const uint8_t* buffer, int len, PrefixSet* out_prefixes, return -1; } #if defined(ARCH_386) - if ((buffer[off + 1] & 0xc0) != 0xc0) + if (mode == DECODE_32 && (buffer[off + 1] & 0xc0) != 0xc0) { break; } @@ -144,15 +158,15 @@ decode_prefixes(const uint8_t* buffer, int len, PrefixSet* out_prefixes, prefixes |= PREFIX_VEX; #if defined(ARCH_X86_64) - if ((byte2 & 0x80) == 0) + if (mode == DECODE_64 && (byte2 & 0x80) == 0) { prefixes |= PREFIX_REXR; } - if ((byte2 & 0x40) == 0) + if (mode == DECODE_64 && (byte2 & 0x40) == 0) { prefixes |= PREFIX_REXX; } - if ((byte2 & 0x20) == 0) + if (mode == DECODE_64 && (byte2 & 0x20) == 0) { prefixes |= PREFIX_REXB; } @@ -172,7 +186,7 @@ decode_prefixes(const uint8_t* buffer, int len, PrefixSet* out_prefixes, // SDM Vol 2A 2-16 (Dec. 2016) // - "In 32-bit modes, VEX.W is silently ignored." // - VEX.W either replaces REX.W, is don't care or is reserved. - if (byte3 & 0x80) + if (mode == DECODE_64 && (byte3 & 0x80)) { prefixes |= PREFIX_REXW; } @@ -198,7 +212,7 @@ decode_prefixes(const uint8_t* buffer, int len, PrefixSet* out_prefixes, return -1; } #if defined(ARCH_386) - if ((buffer[off + 1] & 0xc0) != 0xc0) + if (mode == DECODE_32 && (buffer[off + 1] & 0xc0) != 0xc0) { break; } @@ -207,7 +221,7 @@ decode_prefixes(const uint8_t* buffer, int len, PrefixSet* out_prefixes, prefixes |= PREFIX_VEX | PREFIX_ESC_0F; #if defined(ARCH_X86_64) - if ((byte & 0x80) == 0) + if (mode == DECODE_64 && (byte & 0x80) == 0) { prefixes |= PREFIX_REXR; } @@ -246,7 +260,7 @@ decode_prefixes(const uint8_t* buffer, int len, PrefixSet* out_prefixes, static int -decode_modrm(const uint8_t* buffer, int len, Instr* instr, +decode_modrm(const uint8_t* buffer, int len, DecodeMode mode, Instr* instr, struct Operand* out_o1, struct Operand* out_o2) { int off = 0; @@ -336,10 +350,11 @@ decode_modrm(const uint8_t* buffer, int len, Instr* instr, if (mod == 0 && rm == 5) { #if defined(ARCH_X86_64) - out_o1->reg = RI_IP; -#else - out_o1->reg = REG_NONE; + if (mode == DECODE_64) + out_o1->reg = RI_IP; + else #endif + out_o1->reg = REG_NONE; return off; } @@ -402,8 +417,23 @@ struct InstrDesc #define DESC_IMM_BYTE(desc) (((desc)->immediate >> 7) & 1) int -decode(const uint8_t* buffer, int len, Instr* instr) +decode(const uint8_t* buffer, int len, DecodeMode mode, Instr* instr) { + const uint8_t* decode_table = NULL; + + // Ensure that we can actually handle the decode request +#if defined(ARCH_386) + if (mode == DECODE_32) + decode_table = _decode_table32; +#endif +#if defined(ARCH_X86_64) + if (mode == DECODE_64) + decode_table = _decode_table64; +#endif + + if (decode_table == NULL) + return -2; + int retval; int off = 0; uint8_t vex_operand = 0; @@ -411,27 +441,28 @@ decode(const uint8_t* buffer, int len, Instr* instr) __builtin_memset(instr->operands, 0, sizeof(instr->operands)); - retval = decode_prefixes(buffer + off, len - off, &prefixes, &vex_operand); + retval = decode_prefixes(buffer + off, len - off, mode, &prefixes, + &vex_operand); if (UNLIKELY(retval < 0 || off + retval >= len)) { return -1; } off += retval; - uint16_t* table = (uint16_t*) _decode_table; + const uint16_t* table = (uint16_t*) decode_table; uint32_t kind = ENTRY_TABLE256; if (UNLIKELY(prefixes & PREFIX_ESC_MASK)) { uint32_t escape = prefixes & PREFIX_ESC_MASK; - table = (uint16_t*) &_decode_table[table[0x0F] & ~7]; + table = (uint16_t*) &decode_table[table[0x0F] & ~7]; if (escape == PREFIX_ESC_0F38) { - table = (uint16_t*) &_decode_table[table[0x38] & ~7]; + table = (uint16_t*) &decode_table[table[0x38] & ~7]; } else if (escape == PREFIX_ESC_0F3A) { - table = (uint16_t*) &_decode_table[table[0x3A] & ~7]; + table = (uint16_t*) &decode_table[table[0x3A] & ~7]; } } @@ -497,7 +528,7 @@ decode(const uint8_t* buffer, int len, Instr* instr) } kind = entry & ENTRY_MASK; - table = (uint16_t*) &_decode_table[entry & ~7]; + table = (uint16_t*) &decode_table[entry & ~7]; } while (LIKELY(off < len)); if (UNLIKELY(kind != ENTRY_INSTR)) @@ -552,7 +583,7 @@ decode(const uint8_t* buffer, int len, Instr* instr) op_size = 2; } #if defined(ARCH_X86_64) - else if (desc->gp_size_def64) + else if (mode == DECODE_64 && desc->gp_size_def64) { op_size = 8; } @@ -603,7 +634,7 @@ decode(const uint8_t* buffer, int len, Instr* instr) { operand2 = &instr->operands[DESC_MODREG_IDX(desc)]; } - retval = decode_modrm(buffer + off, len - off, instr, + retval = decode_modrm(buffer + off, len - off, mode, instr, operand1, operand2); if (UNLIKELY(retval < 0)) @@ -649,19 +680,26 @@ decode(const uint8_t* buffer, int len, Instr* instr) instr->scale = 0; // TODO: Address size overrides #if defined(ARCH_386) - if (UNLIKELY(off + 4 > len)) + if (mode == DECODE_32) { - return -1; + if (UNLIKELY(off + 4 > len)) + { + return -1; + } + instr->disp = LOAD_LE_4(&buffer[off]); + off += 4; } - instr->disp = LOAD_LE_4(&buffer[off]); - off += 4; -#else - if (UNLIKELY(off + 8 > len)) +#endif +#if defined(ARCH_X86_64) + if (mode == DECODE_64) { - return -1; + if (UNLIKELY(off + 8 > len)) + { + return -1; + } + instr->disp = LOAD_LE_8(&buffer[off]); + off += 8; } - instr->disp = LOAD_LE_8(&buffer[off]); - off += 8; #endif } else if (UNLIKELY(imm_control != 0)) diff --git a/decode.h b/decode.h index 848fbe0..94f3318 100644 --- a/decode.h +++ b/decode.h @@ -5,10 +5,6 @@ #include #include -#if defined(ARCH_X86_64) && __SIZEOF_POINTER__ < 8 -#error "Decoding x86-64 requires a 64-bit architecture" -#endif - #ifndef ssize_t #define ssize_t intptr_t #endif @@ -22,6 +18,13 @@ enum #undef DECODE_TABLE_MNEMONICS #undef MNEMONIC +enum DecodeMode { + DECODE_64 = 0, + DECODE_32 = 1, +}; + +typedef enum DecodeMode DecodeMode; + enum RegIndex { RI_AL = 0, RI_CL, @@ -40,7 +43,6 @@ enum RegIndex { RI_BP, RI_SI, RI_DI, -#if defined(ARCH_X86_64) RI_R8, RI_R9, RI_R10, @@ -49,12 +51,9 @@ enum RegIndex { RI_R13, RI_R14, RI_R15, -#endif // EIP cannot be encoded in Protected/Compatibility Mode -#if defined(ARCH_X86_64) RI_IP = 0x10, -#endif RI_ES = 0, RI_CS, @@ -82,13 +81,11 @@ enum PrefixSet PREFIX_LOCK = 1 << 4, PREFIX_REPNZ = 1 << 5, PREFIX_REP = 1 << 6, -#if defined(ARCH_X86_64) PREFIX_REX = 1 << 7, PREFIX_REXB = 1 << 8, PREFIX_REXX = 1 << 9, PREFIX_REXR = 1 << 10, PREFIX_REXW = 1 << 11, -#endif PREFIX_ESC_NONE = 0 << 13, PREFIX_ESC_0F = 1 << 13, PREFIX_ESC_0F38 = 2 << 13, @@ -144,12 +141,9 @@ typedef struct Instr Instr; #define INSTR_HAS_REPNZ(instr) ((instr)->prefixes & PREFIX_REPNZ) #define INSTR_HAS_LOCK(instr) ((instr)->prefixes & PREFIX_LOCK) #define INSTR_HAS_ADDRSZ(instr) ((instr)->prefixes & PREFIX_ADDRSZ) - -#if defined(ARCH_X86_64) #define INSTR_HAS_REX(instr) ((instr)->prefixes & PREFIX_REX) -#endif -int decode(const uint8_t* buffer, int len, Instr* out_instr); +int decode(const uint8_t* buffer, int len, DecodeMode mode, Instr* out_instr); void instr_format(const Instr* instr, char buffer[128]); void instr_print(const Instr* instr) __attribute__((deprecated)); diff --git a/meson.build b/meson.build index bf3c7ae..a111bd9 100644 --- a/meson.build +++ b/meson.build @@ -29,13 +29,21 @@ if get_option('warning_level').to_int() >= 3 language: 'c') endif +decode_32 = false +decode_64 = false + archmode = get_option('archmode') -if archmode == '32' +if archmode == 'only32' or archmode == 'both' add_project_arguments(['-DARCH_386'], language: 'c') -elif archmode == '64' + decode_32 = true +endif +if archmode == 'only64' or archmode == 'both' add_project_arguments(['-DARCH_X86_64'], language: 'c') -else - error('Invalid architecture mode') + decode_64 = true +endif + +if not decode_32 and not decode_64 + error('no architecture mode') endif instr_data = custom_target('tables', diff --git a/meson_options.txt b/meson_options.txt index ce5a124..5f6a767 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -1 +1 @@ -option('archmode', type: 'combo', choices: ['32', '64']) +option('archmode', type: 'combo', choices: ['both', 'only32', 'only64']) diff --git a/parseinstrs.py b/parseinstrs.py index c127004..6b68a5c 100644 --- a/parseinstrs.py +++ b/parseinstrs.py @@ -261,14 +261,10 @@ def bytes_to_table(data): return "\n".join(hexdata[i:i+80] for i in range(0, len(hexdata), 80)) template = """// Auto-generated file -- do not modify! -#if defined(DECODE_TABLE_DATA) -#if defined(ARCH_386) +#if defined(DECODE_TABLE_DATA_32) {hex_table32} -#elif defined(ARCH_X86_64) +#elif defined(DECODE_TABLE_DATA_64) {hex_table64} -#else -#error "unknown architecture" -#endif #elif defined(DECODE_TABLE_MNEMONICS) {mnemonic_list} #elif defined(DECODE_TABLE_STRTAB1) diff --git a/tests/driver.c b/tests/driver.c index 6df9197..1132ff0 100644 --- a/tests/driver.c +++ b/tests/driver.c @@ -24,24 +24,40 @@ parse_nibble(const char nibble) int main(int argc, char** argv) { - if (argc != 2 && argc != 3) + if (argc != 3 && argc != 4) { - printf("usage: %s [instruction bytes] ([repetitions])\n", argv[0]); + printf("usage: %s [mode] [instruction bytes] ([repetitions])\n", argv[0]); return -1; } + DecodeMode mode; + size_t mode_input = strtoul(argv[1], NULL, 0); + if (mode_input == 32) + { + mode = DECODE_32; + } + else if (mode_input == 64) + { + mode = DECODE_64; + } + else + { + printf("Unknown decode mode\n"); + return 1; + } + // Avoid allocation by transforming hex to binary in-place. - uint8_t* code = (uint8_t*) argv[1]; + uint8_t* code = (uint8_t*) argv[2]; uint8_t* code_end = code; - char* hex = argv[1]; + char* hex = argv[2]; for (; *hex; hex += 2, code_end++) *code_end = (parse_nibble(hex[0]) << 4) | parse_nibble(hex[1]); size_t length = (size_t) (code_end - code); size_t repetitions = 1; - if (argc >= 3) - repetitions = strtoul(argv[2], NULL, 0); + if (argc >= 4) + repetitions = strtoul(argv[3], NULL, 0); struct timespec time_start; struct timespec time_end; @@ -56,7 +72,7 @@ main(int argc, char** argv) while (current_off != length) { size_t remaining = length - current_off; - int retval = decode(code + current_off, remaining, &instr); + int retval = decode(code + current_off, remaining, mode, &instr); if (retval < 0) goto fail; current_off += retval; diff --git a/tests/meson.build b/tests/meson.build index 89e9437..36fa3a7 100644 --- a/tests/meson.build +++ b/tests/meson.build @@ -2,7 +2,13 @@ test_driver = executable('test_driver', 'driver.c', dependencies: libx86decode, c_args: ['-D_GNU_SOURCE']) -test_args = [files('test.py'), test_driver, archmode] +test_args = [files('test.py'), test_driver] +if decode_32 + test_args += ['--32'] +endif +if decode_64 + test_args += ['--64'] +endif ## Test cases diff --git a/tests/test.py b/tests/test.py index b60cebf..771268e 100644 --- a/tests/test.py +++ b/tests/test.py @@ -5,12 +5,12 @@ import statistics import subprocess import sys -def run(args, code, expected): +def run(args, mode, code, expected): inner_reps = 10000000 if args.benchmark else 1 outer_reps = 3 if args.benchmark else 1 times = [] for _ in range(outer_reps): - output = subprocess.check_output([args.driver, code, str(inner_reps)], + output = subprocess.check_output([args.driver, str(mode), code, str(inner_reps)], universal_newlines=True) instr, time = tuple(output.split("\n", 1)) if instr != expected: @@ -22,26 +22,29 @@ def run(args, code, expected): if args.benchmark: mean = statistics.mean(times) stdev = statistics.stdev(times) - print("{:53} {:6.3f} ns (std: {:6.3f} ns)".format(expected, mean, stdev)) + print("{:2} {:50} {:6.3f} ns (std: {:6.3f} ns)".format(mode, expected, mean, stdev)) return times if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--benchmark", action="store_true") + parser.add_argument("--32", dest="test_modes", action="append_const", const=32) + parser.add_argument("--64", dest="test_modes", action="append_const", const=64) parser.add_argument("driver") - parser.add_argument("archmode", choices=[32, 64], type=int) parser.add_argument("cases", nargs="+", type=argparse.FileType('r')) args = parser.parse_args() failed, total = 0, 0 total_times = [] + test_modes = frozenset(args.test_modes if args.test_modes else [32, 64]) + for file in args.cases: cases = [tuple(ln.strip().split(maxsplit=2)) for ln in file.readlines()] for op, code, expected in cases: - if op == "decode32" and args.archmode != 32: continue - if op == "decode64" and args.archmode != 64: continue + case_modes = {"decode":{32,64},"decode32":{32},"decode64":{64}}[op] + if not case_modes & test_modes: continue # Compatibility with old test system if expected[0] == '"' and expected[-1] == '"': @@ -49,7 +52,8 @@ if __name__ == "__main__": try: total += 1 - total_times += run(args, code, expected) + for mode in case_modes & test_modes: + total_times += run(args, mode, code, expected) except Exception as e: failed += 1 print("FAILED: %s" % e)