Files
fadec/decode.c
Alexis Engelke ed53b4a54d Support 32 bit and 64 bit decoding with one binary
It is possible to configure the build process such that decoding of 32
bit and 64 bit instructions can be chosen at runtime using an additional
parameter of the decode function. The header file is now entirely
architecture-independent and no longer required any previous defines.

Decoding x86-64 still requires a 64-bit pointer size.
2019-01-13 11:58:59 +01:00

786 lines
19 KiB
C

#include <stddef.h>
#include <stdint.h>
#include <decode.h>
#if defined(ARCH_X86_64) && __SIZEOF_POINTER__ < 8
#error "Decoding x86-64 requires a 64-bit architecture"
#endif
#define LIKELY(x) __builtin_expect((x), 1)
#define UNLIKELY(x) __builtin_expect((x), 0)
#if defined(ARCH_386)
#define DECODE_TABLE_DATA_32
static const uint8_t _decode_table32[] = {
#include <decode-table.inc>
};
#undef DECODE_TABLE_DATA_32
#endif
#if defined(ARCH_X86_64)
#define DECODE_TABLE_DATA_64
static const uint8_t _decode_table64[] = {
#include <decode-table.inc>
};
#undef DECODE_TABLE_DATA_64
#endif
#define ENTRY_NONE 0
#define ENTRY_INSTR 1
#define ENTRY_TABLE256 2
#define ENTRY_TABLE8 3
#define ENTRY_TABLE72 4
#define ENTRY_TABLE_PREFIX 5
#define ENTRY_MASK 7
#define ENTRY_IS_TABLE(kind) ((kind) >= ENTRY_TABLE256)
#define INSTR_ENC_ADDR 0x08
#define INSTR_ENC_IMM 0x10
#define INSTR_ENC_MODRM 0x80
#define INSTR_ENC_MODRM_BOTH 0x40
#define INSTR_ENC_FLIP 0x20
#define INSTR_ENC_OPCODE 0x40
#define INSTR_ENC_IMPLICIT_REG 0x04
#define LOAD_LE_1(buf) (((size_t) ((uint8_t*) buf)[0]))
#define LOAD_LE_2(buf) (((size_t) ((uint8_t*) buf)[0]) | \
((size_t) ((uint8_t*) buf)[1] << 8))
#define LOAD_LE_4(buf) (((size_t) ((uint8_t*) buf)[0]) | \
((size_t) ((uint8_t*) buf)[1] << 8) | \
((size_t) ((uint8_t*) buf)[2] << 16) | \
((size_t) ((uint8_t*) buf)[3] << 24))
#if defined(ARCH_X86_64)
#define LOAD_LE_8(buf) (((size_t) ((uint8_t*) buf)[0]) | \
((size_t) ((uint8_t*) buf)[1] << 8) | \
((size_t) ((uint8_t*) buf)[2] << 16) | \
((size_t) ((uint8_t*) buf)[3] << 24) | \
((size_t) ((uint8_t*) buf)[4] << 32) | \
((size_t) ((uint8_t*) buf)[5] << 40) | \
((size_t) ((uint8_t*) buf)[6] << 48) | \
((size_t) ((uint8_t*) buf)[7] << 56))
#endif
static
int
decode_prefixes(const uint8_t* buffer, int len, DecodeMode mode,
PrefixSet* out_prefixes, uint8_t* out_vex_operand)
{
int off = 0;
PrefixSet prefixes = 0;
while (LIKELY(off < len))
{
uint8_t prefix = buffer[off];
if (prefix == 0x2E)
{
prefixes |= PREFIX_SEG_CS;
}
else if (prefix == 0x26)
{
prefixes |= PREFIX_SEG_ES;
}
else if (prefix == 0x3E)
{
prefixes |= PREFIX_SEG_DS;
}
else if (prefix == 0x64)
{
prefixes |= PREFIX_SEG_FS;
}
else if (prefix == 0x65)
{
prefixes |= PREFIX_SEG_GS;
}
else if (prefix == 0x66)
{
prefixes |= PREFIX_OPSZ;
}
else if (prefix == 0x67)
{
prefixes |= PREFIX_ADDRSZ;
}
else if (prefix == 0xF0)
{
prefixes |= PREFIX_LOCK;
}
else if (prefix == 0xF2)
{
prefixes |= PREFIX_REPNZ;
}
else if (prefix == 0xF3)
{
prefixes |= PREFIX_REP;
}
#if defined(ARCH_X86_64)
else if (mode == DECODE_64 && LIKELY(prefix >= 0x40 && prefix <= 0x4F))
{
prefixes |= PREFIX_REX;
if (prefix & 0x1)
{
prefixes |= PREFIX_REXB;
}
if (prefix & 0x2)
{
prefixes |= PREFIX_REXX;
}
if (prefix & 0x4)
{
prefixes |= PREFIX_REXR;
}
if (prefix & 0x8)
{
prefixes |= PREFIX_REXW;
}
// REX prefix is the last prefix.
off++;
break;
}
#endif
else if (UNLIKELY(prefix == 0xc4))
{
// 3-byte VEX
if (UNLIKELY(off + 2 >= len))
{
return -1;
}
#if defined(ARCH_386)
if (mode == DECODE_32 && (buffer[off + 1] & 0xc0) != 0xc0)
{
break;
}
#endif
uint8_t byte2 = buffer[off + 1];
uint8_t byte3 = buffer[off + 2];
prefixes |= PREFIX_VEX;
#if defined(ARCH_X86_64)
if (mode == DECODE_64 && (byte2 & 0x80) == 0)
{
prefixes |= PREFIX_REXR;
}
if (mode == DECODE_64 && (byte2 & 0x40) == 0)
{
prefixes |= PREFIX_REXX;
}
if (mode == DECODE_64 && (byte2 & 0x20) == 0)
{
prefixes |= PREFIX_REXB;
}
#endif
switch (byte2 & 0x1f)
{
case 0x01: prefixes |= PREFIX_ESC_0F; break;
case 0x02: prefixes |= PREFIX_ESC_0F38; break;
case 0x03: prefixes |= PREFIX_ESC_0F3A; break;
default: return -1;
}
if (byte3 & 0x04)
{
prefixes |= PREFIX_VEXL;
}
#if defined(ARCH_X86_64)
// SDM Vol 2A 2-16 (Dec. 2016)
// - "In 32-bit modes, VEX.W is silently ignored."
// - VEX.W either replaces REX.W, is don't care or is reserved.
if (mode == DECODE_64 && (byte3 & 0x80))
{
prefixes |= PREFIX_REXW;
}
#endif
*out_vex_operand = ((byte3 & 0x78) >> 3) ^ 0xf;
switch (byte3 & 0x03)
{
case 1: prefixes |= PREFIX_OPSZ; break;
case 2: prefixes |= PREFIX_REP; break;
case 3: prefixes |= PREFIX_REPNZ; break;
default: break;
}
// VEX prefix is always the last prefix.
off += 3;
break;
}
else if (UNLIKELY(prefix == 0xc5))
{
// 2-byte VEX
if (UNLIKELY(off + 1 >= len))
{
return -1;
}
#if defined(ARCH_386)
if (mode == DECODE_32 && (buffer[off + 1] & 0xc0) != 0xc0)
{
break;
}
#endif
uint8_t byte = buffer[off + 1];
prefixes |= PREFIX_VEX | PREFIX_ESC_0F;
#if defined(ARCH_X86_64)
if (mode == DECODE_64 && (byte & 0x80) == 0)
{
prefixes |= PREFIX_REXR;
}
#endif
if (byte & 0x04)
{
prefixes |= PREFIX_VEXL;
}
*out_vex_operand = ((byte & 0x78) >> 3) ^ 0xf;
switch (byte & 0x03)
{
case 1: prefixes |= PREFIX_OPSZ; break;
case 2: prefixes |= PREFIX_REP; break;
case 3: prefixes |= PREFIX_REPNZ; break;
default: break;
}
// VEX prefix is always the last prefix.
off += 2;
break;
}
else
{
break;
}
off++;
}
if (out_prefixes != NULL)
{
*out_prefixes = prefixes;
}
return off;
}
static
int
decode_modrm(const uint8_t* buffer, int len, DecodeMode mode, Instr* instr,
struct Operand* out_o1, struct Operand* out_o2)
{
int off = 0;
if (UNLIKELY(off >= len))
{
return -1;
}
uint8_t modrm = buffer[off++];
uint8_t mod = (modrm & 0xc0) >> 6;
uint8_t mod_reg = (modrm & 0x38) >> 3;
uint8_t rm = modrm & 0x07;
// Operand 2 may be NULL when reg field is used as opcode extension
if (out_o2)
{
uint8_t reg_idx = mod_reg;
#if defined(ARCH_X86_64)
reg_idx += instr->prefixes & PREFIX_REXR ? 8 : 0;
#endif
out_o2->type = OT_REG;
out_o2->reg = reg_idx;
}
if (mod == 3)
{
uint8_t reg_idx = rm;
#if defined(ARCH_X86_64)
reg_idx += instr->prefixes & PREFIX_REXB ? 8 : 0;
#endif
out_o1->type = OT_REG;
out_o1->reg = reg_idx;
return off;
}
// SIB byte
uint8_t scale = 0;
uint8_t idx = 0;
uint8_t base = 0;
if (rm == 4)
{
if (UNLIKELY(off >= len))
{
return -1;
}
uint8_t sib = buffer[off++];
scale = ((sib & 0xc0) >> 6) + 1;
idx = (sib & 0x38) >> 3;
#if defined(ARCH_X86_64)
idx += instr->prefixes & PREFIX_REXX ? 8 : 0;
#endif
base = sib & 0x07;
}
if (mod == 1)
{
if (UNLIKELY(off + 1 > len))
{
return -1;
}
instr->disp = (int8_t) LOAD_LE_1(&buffer[off]);
off += 1;
}
else if (mod == 2 || (mod == 0 && (rm == 5 || base == 5)))
{
if (UNLIKELY(off + 4 > len))
{
return -1;
}
instr->disp = (int32_t) LOAD_LE_4(&buffer[off]);
off += 4;
}
else
{
instr->disp = 0;
}
out_o1->type = OT_MEM;
instr->scale = scale;
if (scale == 0)
{
if (mod == 0 && rm == 5)
{
#if defined(ARCH_X86_64)
if (mode == DECODE_64)
out_o1->reg = RI_IP;
else
#endif
out_o1->reg = REG_NONE;
return off;
}
uint8_t reg_idx = rm;
#if defined(ARCH_X86_64)
reg_idx += instr->prefixes & PREFIX_REXB ? 8 : 0;
#endif
out_o1->reg = reg_idx;
return off;
}
if (idx == 4)
{
instr->scale = 0;
}
else
{
instr->sreg = idx;
}
if (base == 5 && mod == 0)
{
out_o1->reg = REG_NONE;
}
else
{
uint8_t reg_idx = base;
#if defined(ARCH_X86_64)
reg_idx += instr->prefixes & PREFIX_REXB ? 8 : 0;
#endif
out_o1->reg = reg_idx;
}
return off;
}
struct InstrDesc
{
uint16_t type;
uint8_t operand_indices;
uint8_t operand_sizes;
uint8_t immediate;
uint32_t gp_size_8 : 1;
uint32_t gp_size_def64 : 1;
uint32_t gp_instr_width : 1;
uint32_t gp_fixed_operand_size : 3;
} __attribute__((packed));
#define DESC_HAS_MODRM(desc) (((desc)->operand_indices & (3 << 0)) != 0)
#define DESC_MODRM_IDX(desc) ((((desc)->operand_indices >> 0) & 3) ^ 3)
#define DESC_HAS_MODREG(desc) (((desc)->operand_indices & (3 << 2)) != 0)
#define DESC_MODREG_IDX(desc) ((((desc)->operand_indices >> 2) & 3) ^ 3)
#define DESC_HAS_VEXREG(desc) (((desc)->operand_indices & (3 << 4)) != 0)
#define DESC_VEXREG_IDX(desc) ((((desc)->operand_indices >> 4) & 3) ^ 3)
#define DESC_HAS_IMPLICIT(desc) (((desc)->operand_indices & (3 << 6)) != 0)
#define DESC_IMPLICIT_IDX(desc) ((((desc)->operand_indices >> 6) & 3) ^ 3)
#define DESC_IMM_CONTROL(desc) (((desc)->immediate >> 4) & 0x7)
#define DESC_IMM_IDX(desc) (((desc)->immediate & 3) ^ 3)
#define DESC_IMM_BYTE(desc) (((desc)->immediate >> 7) & 1)
int
decode(const uint8_t* buffer, int len, DecodeMode mode, Instr* instr)
{
const uint8_t* decode_table = NULL;
// Ensure that we can actually handle the decode request
#if defined(ARCH_386)
if (mode == DECODE_32)
decode_table = _decode_table32;
#endif
#if defined(ARCH_X86_64)
if (mode == DECODE_64)
decode_table = _decode_table64;
#endif
if (decode_table == NULL)
return -2;
int retval;
int off = 0;
uint8_t vex_operand = 0;
PrefixSet prefixes = 0;
__builtin_memset(instr->operands, 0, sizeof(instr->operands));
retval = decode_prefixes(buffer + off, len - off, mode, &prefixes,
&vex_operand);
if (UNLIKELY(retval < 0 || off + retval >= len))
{
return -1;
}
off += retval;
const uint16_t* table = (uint16_t*) decode_table;
uint32_t kind = ENTRY_TABLE256;
if (UNLIKELY(prefixes & PREFIX_ESC_MASK))
{
uint32_t escape = prefixes & PREFIX_ESC_MASK;
table = (uint16_t*) &decode_table[table[0x0F] & ~7];
if (escape == PREFIX_ESC_0F38)
{
table = (uint16_t*) &decode_table[table[0x38] & ~7];
}
else if (escape == PREFIX_ESC_0F3A)
{
table = (uint16_t*) &decode_table[table[0x3A] & ~7];
}
}
do
{
uint16_t entry = 0;
if (kind == ENTRY_TABLE256)
{
entry = table[buffer[off++]];
}
else if (kind == ENTRY_TABLE8)
{
entry = table[(buffer[off] >> 3) & 7];
}
else if (kind == ENTRY_TABLE72)
{
if ((buffer[off] & 0xc0) == 0xc0)
{
entry = table[buffer[off] - 0xb8];
if ((entry & ENTRY_MASK) != ENTRY_NONE)
{
off++;
}
else
{
entry = table[(buffer[off] >> 3) & 7];
}
}
else
{
entry = table[(buffer[off] >> 3) & 7];
}
}
else if (kind == ENTRY_TABLE_PREFIX)
{
uint8_t index = 0;
if (prefixes & PREFIX_OPSZ)
{
index = 1;
}
else if (prefixes & PREFIX_REP)
{
index = 2;
}
else if (prefixes & PREFIX_REPNZ)
{
index = 3;
}
#if defined(ARCH_X86_64)
index |= prefixes & PREFIX_REXW ? (1 << 2) : 0;
#endif
index |= prefixes & PREFIX_VEX ? (1 << 3) : 0;
// If a prefix is mandatory and used as opcode extension, it has no
// further effect on the instruction. This is especially important
// for the 0x66 prefix, which could otherwise override the operand
// size of general purpose registers.
prefixes &= ~(PREFIX_OPSZ | PREFIX_REPNZ | PREFIX_REP);
entry = table[index];
}
else
{
break;
}
kind = entry & ENTRY_MASK;
table = (uint16_t*) &decode_table[entry & ~7];
} while (LIKELY(off < len));
if (UNLIKELY(kind != ENTRY_INSTR))
{
return -1;
}
struct InstrDesc* desc = (struct InstrDesc*) table;
instr->type = desc->type;
instr->prefixes = prefixes;
instr->address = (uintptr_t) buffer;
if (prefixes & PREFIX_SEG_FS)
{
instr->segment = RI_FS;
}
else if (prefixes & PREFIX_SEG_GS)
{
instr->segment = RI_GS;
}
else if (prefixes & PREFIX_SEG_CS)
{
instr->segment = RI_CS;
}
else if (prefixes & PREFIX_SEG_DS)
{
instr->segment = RI_DS;
}
else if (prefixes & PREFIX_SEG_ES)
{
instr->segment = RI_ES;
}
else
{
instr->segment = RI_DS;
}
uint8_t op_size = 0;
if (desc->gp_size_8)
{
op_size = 1;
}
#if defined(ARCH_X86_64)
else if (prefixes & PREFIX_REXW)
{
op_size = 8;
}
#endif
else if (prefixes & PREFIX_OPSZ)
{
op_size = 2;
}
#if defined(ARCH_X86_64)
else if (mode == DECODE_64 && desc->gp_size_def64)
{
op_size = 8;
}
#endif
else
{
op_size = 4;
}
if (UNLIKELY(desc->gp_instr_width))
{
instr->width = op_size;
}
else
{
instr->width = 0;
}
uint8_t vec_size = 16;
if (prefixes & PREFIX_VEXL)
{
vec_size = 32;
}
uint8_t operand_sizes[4] = {
0, 1 << desc->gp_fixed_operand_size, op_size, vec_size
};
for (int i = 0; i < 4; i++)
{
uint8_t enc_size = (desc->operand_sizes >> 2 * i) & 3;
instr->operands[i].size = operand_sizes[enc_size];
}
if (UNLIKELY(DESC_HAS_IMPLICIT(desc)))
{
struct Operand* operand = &instr->operands[DESC_IMPLICIT_IDX(desc)];
operand->type = OT_REG;
operand->reg = 0;
}
if (DESC_HAS_MODRM(desc))
{
struct Operand* operand1 = &instr->operands[DESC_MODRM_IDX(desc)];
struct Operand* operand2 = NULL;
if (DESC_HAS_MODREG(desc))
{
operand2 = &instr->operands[DESC_MODREG_IDX(desc)];
}
retval = decode_modrm(buffer + off, len - off, mode, instr,
operand1, operand2);
if (UNLIKELY(retval < 0))
{
return -1;
}
off += retval;
}
else if (UNLIKELY(DESC_HAS_MODREG(desc)))
{
// If there is no ModRM, but a Mod-Reg, its opcode-encoded.
struct Operand* operand = &instr->operands[DESC_MODREG_IDX(desc)];
uint8_t reg_idx = buffer[off - 1] & 7;
#if defined(ARCH_X86_64)
reg_idx += prefixes & PREFIX_REXB ? 8 : 0;
#endif
operand->type = OT_REG;
operand->reg = reg_idx;
}
if (UNLIKELY(DESC_HAS_VEXREG(desc)))
{
struct Operand* operand = &instr->operands[DESC_VEXREG_IDX(desc)];
operand->type = OT_REG;
operand->reg = vex_operand;
}
uint32_t imm_control = DESC_IMM_CONTROL(desc);
if (UNLIKELY(imm_control == 1))
{
struct Operand* operand = &instr->operands[DESC_IMM_IDX(desc)];
operand->type = OT_IMM;
operand->size = 1;
instr->immediate = 1;
}
else if (UNLIKELY(imm_control == 2))
{
struct Operand* operand = &instr->operands[DESC_IMM_IDX(desc)];
operand->type = OT_MEM;
operand->reg = REG_NONE;
operand->size = op_size;
instr->scale = 0;
// TODO: Address size overrides
#if defined(ARCH_386)
if (mode == DECODE_32)
{
if (UNLIKELY(off + 4 > len))
{
return -1;
}
instr->disp = LOAD_LE_4(&buffer[off]);
off += 4;
}
#endif
#if defined(ARCH_X86_64)
if (mode == DECODE_64)
{
if (UNLIKELY(off + 8 > len))
{
return -1;
}
instr->disp = LOAD_LE_8(&buffer[off]);
off += 8;
}
#endif
}
else if (UNLIKELY(imm_control != 0))
{
uint8_t imm_size;
if (DESC_IMM_BYTE(desc))
{
imm_size = 1;
}
else if (UNLIKELY(instr->type == IT_RET_IMM))
{
imm_size = 2;
}
else if (UNLIKELY(instr->type == IT_ENTER))
{
imm_size = 3;
}
else if (prefixes & PREFIX_OPSZ)
{
imm_size = 2;
}
#if defined(ARCH_X86_64)
else if (prefixes & PREFIX_REXW && instr->type == IT_MOVABS_IMM)
{
imm_size = 8;
}
#endif
else
{
imm_size = 4;
}
if (UNLIKELY(off + imm_size > len))
{
return -1;
}
if (imm_size == 1)
{
instr->immediate = (int8_t) LOAD_LE_1(&buffer[off]);
}
else if (imm_size == 2)
{
instr->immediate = (int16_t) LOAD_LE_2(&buffer[off]);
}
else if (imm_size == 3)
{
instr->immediate = LOAD_LE_2(&buffer[off]);
instr->immediate |= LOAD_LE_1(&buffer[off + 2]) << 16;
}
else if (imm_size == 4)
{
instr->immediate = (int32_t) LOAD_LE_4(&buffer[off]);
}
#if defined(ARCH_X86_64)
else if (imm_size == 8)
{
instr->immediate = (int64_t) LOAD_LE_8(&buffer[off]);
}
#endif
off += imm_size;
if (imm_control == 4)
{
instr->immediate += (uintptr_t) buffer + off;
}
struct Operand* operand = &instr->operands[DESC_IMM_IDX(desc)];
if (UNLIKELY(imm_control == 5))
{
operand->type = OT_REG;
operand->reg = (instr->immediate & 0xf0) >> 4;
}
else
{
operand->type = OT_IMM;
}
}
instr->size = off;
return off;
}