Support 32 bit and 64 bit decoding with one binary

It is possible to configure the build process such that decoding of 32
bit and 64 bit instructions can be chosen at runtime using an additional
parameter of the decode function. The header file is now entirely
architecture-independent and no longer required any previous defines.

Decoding x86-64 still requires a 64-bit pointer size.
This commit is contained in:
Alexis Engelke
2019-01-13 11:58:59 +01:00
parent 83ea2f0769
commit ed53b4a54d
9 changed files with 138 additions and 78 deletions

View File

@@ -1,8 +1,6 @@
# libx86decode
A fast and lightweight decoder for x86 and x86-64. *This is not a disassembler, it does not intend to procude valid assembly.* To meet the goal of speed, lookup tables are used to map the opcode the (internal) description of the instruction encoding. This table currently has a size of roughly 21 kiB.
Currently, decoding 32-bit assembly required the `ARCH_386` macro to be defined and is only tested when compiling as 32-bit binary. Decoding of 64-bit instruction requires the macro `ARCH_X86_64` and is only supported in 64-bit mode. This restriction might change in future.
A fast and lightweight decoder for x86 and x86-64. *This is not a disassembler, it does not intend to produce valid assembly.* To meet the goal of speed, lookup tables are used to map the opcode the (internal) description of the instruction encoding. This table currently has a size of roughly 21 kiB.
### Known issues
- An implicit `FWAIT` in FPU instructions is decoded as a separate instruction. For example, the instruction `FINIT` is decoded as an `FWAIT` followed by an `FINIT` where as `FNINIT` is decoded as a plain `FINIT` instruction.

108
decode.c
View File

@@ -5,14 +5,28 @@
#include <decode.h>
#if defined(ARCH_X86_64) && __SIZEOF_POINTER__ < 8
#error "Decoding x86-64 requires a 64-bit architecture"
#endif
#define LIKELY(x) __builtin_expect((x), 1)
#define UNLIKELY(x) __builtin_expect((x), 0)
#define DECODE_TABLE_DATA
static const uint8_t _decode_table[] = {
#if defined(ARCH_386)
#define DECODE_TABLE_DATA_32
static const uint8_t _decode_table32[] = {
#include <decode-table.inc>
};
#undef DECODE_TABLE_DATA
#undef DECODE_TABLE_DATA_32
#endif
#if defined(ARCH_X86_64)
#define DECODE_TABLE_DATA_64
static const uint8_t _decode_table64[] = {
#include <decode-table.inc>
};
#undef DECODE_TABLE_DATA_64
#endif
#define ENTRY_NONE 0
@@ -52,8 +66,8 @@ static const uint8_t _decode_table[] = {
static
int
decode_prefixes(const uint8_t* buffer, int len, PrefixSet* out_prefixes,
uint8_t* out_vex_operand)
decode_prefixes(const uint8_t* buffer, int len, DecodeMode mode,
PrefixSet* out_prefixes, uint8_t* out_vex_operand)
{
int off = 0;
PrefixSet prefixes = 0;
@@ -102,7 +116,7 @@ decode_prefixes(const uint8_t* buffer, int len, PrefixSet* out_prefixes,
prefixes |= PREFIX_REP;
}
#if defined(ARCH_X86_64)
else if (LIKELY(prefix >= 0x40 && prefix <= 0x4F))
else if (mode == DECODE_64 && LIKELY(prefix >= 0x40 && prefix <= 0x4F))
{
prefixes |= PREFIX_REX;
if (prefix & 0x1)
@@ -134,7 +148,7 @@ decode_prefixes(const uint8_t* buffer, int len, PrefixSet* out_prefixes,
return -1;
}
#if defined(ARCH_386)
if ((buffer[off + 1] & 0xc0) != 0xc0)
if (mode == DECODE_32 && (buffer[off + 1] & 0xc0) != 0xc0)
{
break;
}
@@ -144,15 +158,15 @@ decode_prefixes(const uint8_t* buffer, int len, PrefixSet* out_prefixes,
prefixes |= PREFIX_VEX;
#if defined(ARCH_X86_64)
if ((byte2 & 0x80) == 0)
if (mode == DECODE_64 && (byte2 & 0x80) == 0)
{
prefixes |= PREFIX_REXR;
}
if ((byte2 & 0x40) == 0)
if (mode == DECODE_64 && (byte2 & 0x40) == 0)
{
prefixes |= PREFIX_REXX;
}
if ((byte2 & 0x20) == 0)
if (mode == DECODE_64 && (byte2 & 0x20) == 0)
{
prefixes |= PREFIX_REXB;
}
@@ -172,7 +186,7 @@ decode_prefixes(const uint8_t* buffer, int len, PrefixSet* out_prefixes,
// SDM Vol 2A 2-16 (Dec. 2016)
// - "In 32-bit modes, VEX.W is silently ignored."
// - VEX.W either replaces REX.W, is don't care or is reserved.
if (byte3 & 0x80)
if (mode == DECODE_64 && (byte3 & 0x80))
{
prefixes |= PREFIX_REXW;
}
@@ -198,7 +212,7 @@ decode_prefixes(const uint8_t* buffer, int len, PrefixSet* out_prefixes,
return -1;
}
#if defined(ARCH_386)
if ((buffer[off + 1] & 0xc0) != 0xc0)
if (mode == DECODE_32 && (buffer[off + 1] & 0xc0) != 0xc0)
{
break;
}
@@ -207,7 +221,7 @@ decode_prefixes(const uint8_t* buffer, int len, PrefixSet* out_prefixes,
prefixes |= PREFIX_VEX | PREFIX_ESC_0F;
#if defined(ARCH_X86_64)
if ((byte & 0x80) == 0)
if (mode == DECODE_64 && (byte & 0x80) == 0)
{
prefixes |= PREFIX_REXR;
}
@@ -246,7 +260,7 @@ decode_prefixes(const uint8_t* buffer, int len, PrefixSet* out_prefixes,
static
int
decode_modrm(const uint8_t* buffer, int len, Instr* instr,
decode_modrm(const uint8_t* buffer, int len, DecodeMode mode, Instr* instr,
struct Operand* out_o1, struct Operand* out_o2)
{
int off = 0;
@@ -336,10 +350,11 @@ decode_modrm(const uint8_t* buffer, int len, Instr* instr,
if (mod == 0 && rm == 5)
{
#if defined(ARCH_X86_64)
out_o1->reg = RI_IP;
#else
out_o1->reg = REG_NONE;
if (mode == DECODE_64)
out_o1->reg = RI_IP;
else
#endif
out_o1->reg = REG_NONE;
return off;
}
@@ -402,8 +417,23 @@ struct InstrDesc
#define DESC_IMM_BYTE(desc) (((desc)->immediate >> 7) & 1)
int
decode(const uint8_t* buffer, int len, Instr* instr)
decode(const uint8_t* buffer, int len, DecodeMode mode, Instr* instr)
{
const uint8_t* decode_table = NULL;
// Ensure that we can actually handle the decode request
#if defined(ARCH_386)
if (mode == DECODE_32)
decode_table = _decode_table32;
#endif
#if defined(ARCH_X86_64)
if (mode == DECODE_64)
decode_table = _decode_table64;
#endif
if (decode_table == NULL)
return -2;
int retval;
int off = 0;
uint8_t vex_operand = 0;
@@ -411,27 +441,28 @@ decode(const uint8_t* buffer, int len, Instr* instr)
__builtin_memset(instr->operands, 0, sizeof(instr->operands));
retval = decode_prefixes(buffer + off, len - off, &prefixes, &vex_operand);
retval = decode_prefixes(buffer + off, len - off, mode, &prefixes,
&vex_operand);
if (UNLIKELY(retval < 0 || off + retval >= len))
{
return -1;
}
off += retval;
uint16_t* table = (uint16_t*) _decode_table;
const uint16_t* table = (uint16_t*) decode_table;
uint32_t kind = ENTRY_TABLE256;
if (UNLIKELY(prefixes & PREFIX_ESC_MASK))
{
uint32_t escape = prefixes & PREFIX_ESC_MASK;
table = (uint16_t*) &_decode_table[table[0x0F] & ~7];
table = (uint16_t*) &decode_table[table[0x0F] & ~7];
if (escape == PREFIX_ESC_0F38)
{
table = (uint16_t*) &_decode_table[table[0x38] & ~7];
table = (uint16_t*) &decode_table[table[0x38] & ~7];
}
else if (escape == PREFIX_ESC_0F3A)
{
table = (uint16_t*) &_decode_table[table[0x3A] & ~7];
table = (uint16_t*) &decode_table[table[0x3A] & ~7];
}
}
@@ -497,7 +528,7 @@ decode(const uint8_t* buffer, int len, Instr* instr)
}
kind = entry & ENTRY_MASK;
table = (uint16_t*) &_decode_table[entry & ~7];
table = (uint16_t*) &decode_table[entry & ~7];
} while (LIKELY(off < len));
if (UNLIKELY(kind != ENTRY_INSTR))
@@ -552,7 +583,7 @@ decode(const uint8_t* buffer, int len, Instr* instr)
op_size = 2;
}
#if defined(ARCH_X86_64)
else if (desc->gp_size_def64)
else if (mode == DECODE_64 && desc->gp_size_def64)
{
op_size = 8;
}
@@ -603,7 +634,7 @@ decode(const uint8_t* buffer, int len, Instr* instr)
{
operand2 = &instr->operands[DESC_MODREG_IDX(desc)];
}
retval = decode_modrm(buffer + off, len - off, instr,
retval = decode_modrm(buffer + off, len - off, mode, instr,
operand1, operand2);
if (UNLIKELY(retval < 0))
@@ -649,19 +680,26 @@ decode(const uint8_t* buffer, int len, Instr* instr)
instr->scale = 0;
// TODO: Address size overrides
#if defined(ARCH_386)
if (UNLIKELY(off + 4 > len))
if (mode == DECODE_32)
{
return -1;
if (UNLIKELY(off + 4 > len))
{
return -1;
}
instr->disp = LOAD_LE_4(&buffer[off]);
off += 4;
}
instr->disp = LOAD_LE_4(&buffer[off]);
off += 4;
#else
if (UNLIKELY(off + 8 > len))
#endif
#if defined(ARCH_X86_64)
if (mode == DECODE_64)
{
return -1;
if (UNLIKELY(off + 8 > len))
{
return -1;
}
instr->disp = LOAD_LE_8(&buffer[off]);
off += 8;
}
instr->disp = LOAD_LE_8(&buffer[off]);
off += 8;
#endif
}
else if (UNLIKELY(imm_control != 0))

View File

@@ -5,10 +5,6 @@
#include <stddef.h>
#include <stdint.h>
#if defined(ARCH_X86_64) && __SIZEOF_POINTER__ < 8
#error "Decoding x86-64 requires a 64-bit architecture"
#endif
#ifndef ssize_t
#define ssize_t intptr_t
#endif
@@ -22,6 +18,13 @@ enum
#undef DECODE_TABLE_MNEMONICS
#undef MNEMONIC
enum DecodeMode {
DECODE_64 = 0,
DECODE_32 = 1,
};
typedef enum DecodeMode DecodeMode;
enum RegIndex {
RI_AL = 0,
RI_CL,
@@ -40,7 +43,6 @@ enum RegIndex {
RI_BP,
RI_SI,
RI_DI,
#if defined(ARCH_X86_64)
RI_R8,
RI_R9,
RI_R10,
@@ -49,12 +51,9 @@ enum RegIndex {
RI_R13,
RI_R14,
RI_R15,
#endif
// EIP cannot be encoded in Protected/Compatibility Mode
#if defined(ARCH_X86_64)
RI_IP = 0x10,
#endif
RI_ES = 0,
RI_CS,
@@ -82,13 +81,11 @@ enum PrefixSet
PREFIX_LOCK = 1 << 4,
PREFIX_REPNZ = 1 << 5,
PREFIX_REP = 1 << 6,
#if defined(ARCH_X86_64)
PREFIX_REX = 1 << 7,
PREFIX_REXB = 1 << 8,
PREFIX_REXX = 1 << 9,
PREFIX_REXR = 1 << 10,
PREFIX_REXW = 1 << 11,
#endif
PREFIX_ESC_NONE = 0 << 13,
PREFIX_ESC_0F = 1 << 13,
PREFIX_ESC_0F38 = 2 << 13,
@@ -144,12 +141,9 @@ typedef struct Instr Instr;
#define INSTR_HAS_REPNZ(instr) ((instr)->prefixes & PREFIX_REPNZ)
#define INSTR_HAS_LOCK(instr) ((instr)->prefixes & PREFIX_LOCK)
#define INSTR_HAS_ADDRSZ(instr) ((instr)->prefixes & PREFIX_ADDRSZ)
#if defined(ARCH_X86_64)
#define INSTR_HAS_REX(instr) ((instr)->prefixes & PREFIX_REX)
#endif
int decode(const uint8_t* buffer, int len, Instr* out_instr);
int decode(const uint8_t* buffer, int len, DecodeMode mode, Instr* out_instr);
void instr_format(const Instr* instr, char buffer[128]);
void instr_print(const Instr* instr) __attribute__((deprecated));

View File

@@ -29,13 +29,21 @@ if get_option('warning_level').to_int() >= 3
language: 'c')
endif
decode_32 = false
decode_64 = false
archmode = get_option('archmode')
if archmode == '32'
if archmode == 'only32' or archmode == 'both'
add_project_arguments(['-DARCH_386'], language: 'c')
elif archmode == '64'
decode_32 = true
endif
if archmode == 'only64' or archmode == 'both'
add_project_arguments(['-DARCH_X86_64'], language: 'c')
else
error('Invalid architecture mode')
decode_64 = true
endif
if not decode_32 and not decode_64
error('no architecture mode')
endif
instr_data = custom_target('tables',

View File

@@ -1 +1 @@
option('archmode', type: 'combo', choices: ['32', '64'])
option('archmode', type: 'combo', choices: ['both', 'only32', 'only64'])

View File

@@ -261,14 +261,10 @@ def bytes_to_table(data):
return "\n".join(hexdata[i:i+80] for i in range(0, len(hexdata), 80))
template = """// Auto-generated file -- do not modify!
#if defined(DECODE_TABLE_DATA)
#if defined(ARCH_386)
#if defined(DECODE_TABLE_DATA_32)
{hex_table32}
#elif defined(ARCH_X86_64)
#elif defined(DECODE_TABLE_DATA_64)
{hex_table64}
#else
#error "unknown architecture"
#endif
#elif defined(DECODE_TABLE_MNEMONICS)
{mnemonic_list}
#elif defined(DECODE_TABLE_STRTAB1)

View File

@@ -24,24 +24,40 @@ parse_nibble(const char nibble)
int
main(int argc, char** argv)
{
if (argc != 2 && argc != 3)
if (argc != 3 && argc != 4)
{
printf("usage: %s [instruction bytes] ([repetitions])\n", argv[0]);
printf("usage: %s [mode] [instruction bytes] ([repetitions])\n", argv[0]);
return -1;
}
DecodeMode mode;
size_t mode_input = strtoul(argv[1], NULL, 0);
if (mode_input == 32)
{
mode = DECODE_32;
}
else if (mode_input == 64)
{
mode = DECODE_64;
}
else
{
printf("Unknown decode mode\n");
return 1;
}
// Avoid allocation by transforming hex to binary in-place.
uint8_t* code = (uint8_t*) argv[1];
uint8_t* code = (uint8_t*) argv[2];
uint8_t* code_end = code;
char* hex = argv[1];
char* hex = argv[2];
for (; *hex; hex += 2, code_end++)
*code_end = (parse_nibble(hex[0]) << 4) | parse_nibble(hex[1]);
size_t length = (size_t) (code_end - code);
size_t repetitions = 1;
if (argc >= 3)
repetitions = strtoul(argv[2], NULL, 0);
if (argc >= 4)
repetitions = strtoul(argv[3], NULL, 0);
struct timespec time_start;
struct timespec time_end;
@@ -56,7 +72,7 @@ main(int argc, char** argv)
while (current_off != length)
{
size_t remaining = length - current_off;
int retval = decode(code + current_off, remaining, &instr);
int retval = decode(code + current_off, remaining, mode, &instr);
if (retval < 0)
goto fail;
current_off += retval;

View File

@@ -2,7 +2,13 @@
test_driver = executable('test_driver', 'driver.c',
dependencies: libx86decode,
c_args: ['-D_GNU_SOURCE'])
test_args = [files('test.py'), test_driver, archmode]
test_args = [files('test.py'), test_driver]
if decode_32
test_args += ['--32']
endif
if decode_64
test_args += ['--64']
endif
## Test cases

View File

@@ -5,12 +5,12 @@ import statistics
import subprocess
import sys
def run(args, code, expected):
def run(args, mode, code, expected):
inner_reps = 10000000 if args.benchmark else 1
outer_reps = 3 if args.benchmark else 1
times = []
for _ in range(outer_reps):
output = subprocess.check_output([args.driver, code, str(inner_reps)],
output = subprocess.check_output([args.driver, str(mode), code, str(inner_reps)],
universal_newlines=True)
instr, time = tuple(output.split("\n", 1))
if instr != expected:
@@ -22,26 +22,29 @@ def run(args, code, expected):
if args.benchmark:
mean = statistics.mean(times)
stdev = statistics.stdev(times)
print("{:53} {:6.3f} ns (std: {:6.3f} ns)".format(expected, mean, stdev))
print("{:2} {:50} {:6.3f} ns (std: {:6.3f} ns)".format(mode, expected, mean, stdev))
return times
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--benchmark", action="store_true")
parser.add_argument("--32", dest="test_modes", action="append_const", const=32)
parser.add_argument("--64", dest="test_modes", action="append_const", const=64)
parser.add_argument("driver")
parser.add_argument("archmode", choices=[32, 64], type=int)
parser.add_argument("cases", nargs="+", type=argparse.FileType('r'))
args = parser.parse_args()
failed, total = 0, 0
total_times = []
test_modes = frozenset(args.test_modes if args.test_modes else [32, 64])
for file in args.cases:
cases = [tuple(ln.strip().split(maxsplit=2)) for ln in file.readlines()]
for op, code, expected in cases:
if op == "decode32" and args.archmode != 32: continue
if op == "decode64" and args.archmode != 64: continue
case_modes = {"decode":{32,64},"decode32":{32},"decode64":{64}}[op]
if not case_modes & test_modes: continue
# Compatibility with old test system
if expected[0] == '"' and expected[-1] == '"':
@@ -49,7 +52,8 @@ if __name__ == "__main__":
try:
total += 1
total_times += run(args, code, expected)
for mode in case_modes & test_modes:
total_times += run(args, mode, code, expected)
except Exception as e:
failed += 1
print("FAILED: %s" % e)