Major rework of API and improved documentation

This commit is contained in:
Alexis Engelke
2019-01-23 20:03:40 +01:00
parent a045588999
commit 3abf29d63e
6 changed files with 320 additions and 273 deletions

138
decode.c
View File

@@ -2,7 +2,7 @@
#include <stddef.h> #include <stddef.h>
#include <stdint.h> #include <stdint.h>
#include <decode.h> #include <fadec.h>
#if defined(ARCH_X86_64) && __SIZEOF_POINTER__ < 8 #if defined(ARCH_X86_64) && __SIZEOF_POINTER__ < 8
@@ -13,21 +13,27 @@
#define UNLIKELY(x) __builtin_expect((x), 0) #define UNLIKELY(x) __builtin_expect((x), 0)
#if defined(ARCH_386) #if defined(ARCH_386)
#define DECODE_TABLE_DATA_32 #define FD_DECODE_TABLE_DATA_32
static const uint8_t _decode_table32[] = { static const uint8_t _decode_table32[] = {
#include <decode-table.inc> #include <decode-table.inc>
}; };
#undef DECODE_TABLE_DATA_32 #undef FD_DECODE_TABLE_DATA_32
#endif #endif
#if defined(ARCH_X86_64) #if defined(ARCH_X86_64)
#define DECODE_TABLE_DATA_64 #define FD_DECODE_TABLE_DATA_64
static const uint8_t _decode_table64[] = { static const uint8_t _decode_table64[] = {
#include <decode-table.inc> #include <decode-table.inc>
}; };
#undef DECODE_TABLE_DATA_64 #undef FD_DECODE_TABLE_DATA_64
#endif #endif
enum DecodeMode {
DECODE_64 = 0,
DECODE_32 = 1,
};
typedef enum DecodeMode DecodeMode;
#define ENTRY_NONE 0 #define ENTRY_NONE 0
#define ENTRY_INSTR 1 #define ENTRY_INSTR 1
@@ -72,11 +78,11 @@ static const uint8_t _decode_table64[] = {
enum PrefixSet enum PrefixSet
{ {
PREFIX_LOCK = INSTR_FLAG_LOCK, PREFIX_LOCK = FD_FLAG_LOCK,
PREFIX_REP = INSTR_FLAG_REP, PREFIX_REP = FD_FLAG_REP,
PREFIX_REPNZ = INSTR_FLAG_REPNZ, PREFIX_REPNZ = FD_FLAG_REPNZ,
PREFIX_REX = INSTR_FLAG_REX, PREFIX_REX = FD_FLAG_REX,
PREFIX_VEXL = INSTR_FLAG_VEXL, PREFIX_VEXL = FD_FLAG_VEXL,
PREFIX_OPSZ = 1 << 13, PREFIX_OPSZ = 1 << 13,
PREFIX_ADDRSZ = 1 << 14, PREFIX_ADDRSZ = 1 << 14,
PREFIX_REXB = 1 << 15, PREFIX_REXB = 1 << 15,
@@ -104,7 +110,7 @@ decode_prefixes(const uint8_t* buffer, int len, DecodeMode mode,
uint8_t rep = 0; uint8_t rep = 0;
*out_mandatory = 0; *out_mandatory = 0;
*out_segment = RI_NONE; *out_segment = FD_REG_NONE;
while (LIKELY(off < len)) while (LIKELY(off < len))
{ {
@@ -113,11 +119,11 @@ decode_prefixes(const uint8_t* buffer, int len, DecodeMode mode,
{ {
default: goto out; default: goto out;
// From segment overrides, the last one wins. // From segment overrides, the last one wins.
case 0x26: *out_segment = RI_ES; off++; break; case 0x26: *out_segment = FD_REG_ES; off++; break;
case 0x2e: *out_segment = RI_CS; off++; break; case 0x2e: *out_segment = FD_REG_CS; off++; break;
case 0x3e: *out_segment = RI_DS; off++; break; case 0x3e: *out_segment = FD_REG_DS; off++; break;
case 0x64: *out_segment = RI_FS; off++; break; case 0x64: *out_segment = FD_REG_FS; off++; break;
case 0x65: *out_segment = RI_GS; off++; break; case 0x65: *out_segment = FD_REG_GS; off++; break;
case 0x67: prefixes |= PREFIX_ADDRSZ; off++; break; case 0x67: prefixes |= PREFIX_ADDRSZ; off++; break;
case 0xf0: prefixes |= PREFIX_LOCK; off++; break; case 0xf0: prefixes |= PREFIX_LOCK; off++; break;
case 0x66: prefixes |= PREFIX_OPSZ; off++; break; case 0x66: prefixes |= PREFIX_OPSZ; off++; break;
@@ -201,8 +207,8 @@ out:
static static
int int
decode_modrm(const uint8_t* buffer, int len, DecodeMode mode, Instr* instr, decode_modrm(const uint8_t* buffer, int len, DecodeMode mode, FdInstr* instr,
PrefixSet prefixes, struct Operand* out_o1, struct Operand* out_o2) PrefixSet prefixes, FdOp* out_o1, FdOp* out_o2)
{ {
int off = 0; int off = 0;
@@ -223,7 +229,7 @@ decode_modrm(const uint8_t* buffer, int len, DecodeMode mode, Instr* instr,
#if defined(ARCH_X86_64) #if defined(ARCH_X86_64)
reg_idx += prefixes & PREFIX_REXR ? 8 : 0; reg_idx += prefixes & PREFIX_REXR ? 8 : 0;
#endif #endif
out_o2->type = OT_REG; out_o2->type = FD_OP_REG;
out_o2->reg = reg_idx; out_o2->reg = reg_idx;
} }
@@ -233,7 +239,7 @@ decode_modrm(const uint8_t* buffer, int len, DecodeMode mode, Instr* instr,
#if defined(ARCH_X86_64) #if defined(ARCH_X86_64)
reg_idx += prefixes & PREFIX_REXB ? 8 : 0; reg_idx += prefixes & PREFIX_REXB ? 8 : 0;
#endif #endif
out_o1->type = OT_REG; out_o1->type = FD_OP_REG;
out_o1->reg = reg_idx; out_o1->reg = reg_idx;
return off; return off;
} }
@@ -250,7 +256,7 @@ decode_modrm(const uint8_t* buffer, int len, DecodeMode mode, Instr* instr,
} }
uint8_t sib = buffer[off++]; uint8_t sib = buffer[off++];
scale = ((sib & 0xc0) >> 6) + 1; scale = (sib & 0xc0) >> 6;
idx = (sib & 0x38) >> 3; idx = (sib & 0x38) >> 3;
#if defined(ARCH_X86_64) #if defined(ARCH_X86_64)
idx += prefixes & PREFIX_REXX ? 8 : 0; idx += prefixes & PREFIX_REXX ? 8 : 0;
@@ -283,19 +289,20 @@ decode_modrm(const uint8_t* buffer, int len, DecodeMode mode, Instr* instr,
instr->disp = 0; instr->disp = 0;
} }
out_o1->type = OT_MEM; out_o1->type = FD_OP_MEM;
instr->scale = scale; instr->idx_scale = scale;
if (scale == 0) // If there was no SIB byte.
if (rm != 4)
{ {
if (mod == 0 && rm == 5) if (mod == 0 && rm == 5)
{ {
#if defined(ARCH_X86_64) #if defined(ARCH_X86_64)
if (mode == DECODE_64) if (mode == DECODE_64)
out_o1->reg = RI_IP; out_o1->reg = FD_REG_IP;
else else
#endif #endif
out_o1->reg = REG_NONE; out_o1->reg = FD_REG_NONE;
return off; return off;
} }
@@ -304,21 +311,22 @@ decode_modrm(const uint8_t* buffer, int len, DecodeMode mode, Instr* instr,
reg_idx += prefixes & PREFIX_REXB ? 8 : 0; reg_idx += prefixes & PREFIX_REXB ? 8 : 0;
#endif #endif
out_o1->reg = reg_idx; out_o1->reg = reg_idx;
instr->idx_reg = FD_REG_NONE;
return off; return off;
} }
if (idx == 4) if (idx == 4)
{ {
instr->scale = 0; instr->idx_reg = FD_REG_NONE;
} }
else else
{ {
instr->sreg = idx; instr->idx_reg = idx;
} }
if (base == 5 && mod == 0) if (base == 5 && mod == 0)
{ {
out_o1->reg = REG_NONE; out_o1->reg = FD_REG_NONE;
} }
else else
{ {
@@ -358,11 +366,15 @@ struct InstrDesc
#define DESC_IMM_BYTE(desc) (((desc)->immediate >> 7) & 1) #define DESC_IMM_BYTE(desc) (((desc)->immediate >> 7) & 1)
int int
decode(const uint8_t* buffer, int len, DecodeMode mode, uintptr_t address, fd_decode(const uint8_t* buffer, size_t len_sz, int mode_int, uintptr_t address,
Instr* instr) FdInstr* instr)
{ {
const uint8_t* decode_table = NULL; const uint8_t* decode_table = NULL;
int len = len_sz > 15 ? 15 : len_sz;
DecodeMode mode = mode_int == 32 ? DECODE_32 :
mode_int == 64 ? DECODE_64 : -1;
// Ensure that we can actually handle the decode request // Ensure that we can actually handle the decode request
#if defined(ARCH_386) #if defined(ARCH_386)
if (mode == DECODE_32) if (mode == DECODE_32)
@@ -453,7 +465,7 @@ decode(const uint8_t* buffer, int len, DecodeMode mode, uintptr_t address,
instr->type = desc->type; instr->type = desc->type;
instr->flags = prefixes & 0x7f; instr->flags = prefixes & 0x7f;
if (mode == DECODE_64) if (mode == DECODE_64)
instr->flags |= INSTR_FLAG_64; instr->flags |= FD_FLAG_64;
instr->address = address; instr->address = address;
uint8_t op_size = 0; uint8_t op_size = 0;
@@ -468,7 +480,7 @@ decode(const uint8_t* buffer, int len, DecodeMode mode, uintptr_t address,
else else
op_size = 4; op_size = 4;
instr->op_size = desc->gp_instr_width ? op_size : 0; instr->operandsz = desc->gp_instr_width ? op_size : 0;
uint8_t vec_size = 16; uint8_t vec_size = 16;
if (prefixes & PREFIX_VEXL) if (prefixes & PREFIX_VEXL)
@@ -480,7 +492,7 @@ decode(const uint8_t* buffer, int len, DecodeMode mode, uintptr_t address,
uint8_t addr_size = mode == DECODE_64 ? 8 : 4; uint8_t addr_size = mode == DECODE_64 ? 8 : 4;
if (prefixes & PREFIX_ADDRSZ) if (prefixes & PREFIX_ADDRSZ)
addr_size >>= 1; addr_size >>= 1;
instr->addr_size = addr_size; instr->addrsz = addr_size;
uint8_t operand_sizes[4] = { uint8_t operand_sizes[4] = {
0, 1 << desc->gp_fixed_operand_size, op_size, vec_size 0, 1 << desc->gp_fixed_operand_size, op_size, vec_size
@@ -495,16 +507,16 @@ decode(const uint8_t* buffer, int len, DecodeMode mode, uintptr_t address,
if (DESC_HAS_IMPLICIT(desc)) if (DESC_HAS_IMPLICIT(desc))
{ {
struct Operand* operand = &instr->operands[DESC_IMPLICIT_IDX(desc)]; FdOp* operand = &instr->operands[DESC_IMPLICIT_IDX(desc)];
operand->type = OT_REG; operand->type = FD_OP_REG;
operand->reg = 0; operand->reg = 0;
} }
if (DESC_HAS_MODRM(desc)) if (DESC_HAS_MODRM(desc))
{ {
struct Operand* operand1 = &instr->operands[DESC_MODRM_IDX(desc)]; FdOp* operand1 = &instr->operands[DESC_MODRM_IDX(desc)];
struct Operand* operand2 = NULL; FdOp* operand2 = NULL;
if (DESC_HAS_MODREG(desc)) if (DESC_HAS_MODREG(desc))
{ {
operand2 = &instr->operands[DESC_MODREG_IDX(desc)]; operand2 = &instr->operands[DESC_MODREG_IDX(desc)];
@@ -522,37 +534,37 @@ decode(const uint8_t* buffer, int len, DecodeMode mode, uintptr_t address,
else if (DESC_HAS_MODREG(desc)) else if (DESC_HAS_MODREG(desc))
{ {
// If there is no ModRM, but a Mod-Reg, its opcode-encoded. // If there is no ModRM, but a Mod-Reg, its opcode-encoded.
struct Operand* operand = &instr->operands[DESC_MODREG_IDX(desc)]; FdOp* operand = &instr->operands[DESC_MODREG_IDX(desc)];
uint8_t reg_idx = buffer[off - 1] & 7; uint8_t reg_idx = buffer[off - 1] & 7;
#if defined(ARCH_X86_64) #if defined(ARCH_X86_64)
reg_idx += prefixes & PREFIX_REXB ? 8 : 0; reg_idx += prefixes & PREFIX_REXB ? 8 : 0;
#endif #endif
operand->type = OT_REG; operand->type = FD_OP_REG;
operand->reg = reg_idx; operand->reg = reg_idx;
} }
if (UNLIKELY(DESC_HAS_VEXREG(desc))) if (UNLIKELY(DESC_HAS_VEXREG(desc)))
{ {
struct Operand* operand = &instr->operands[DESC_VEXREG_IDX(desc)]; FdOp* operand = &instr->operands[DESC_VEXREG_IDX(desc)];
operand->type = OT_REG; operand->type = FD_OP_REG;
operand->reg = vex_operand; operand->reg = vex_operand;
} }
uint32_t imm_control = DESC_IMM_CONTROL(desc); uint32_t imm_control = DESC_IMM_CONTROL(desc);
if (imm_control == 1) if (imm_control == 1)
{ {
struct Operand* operand = &instr->operands[DESC_IMM_IDX(desc)]; FdOp* operand = &instr->operands[DESC_IMM_IDX(desc)];
operand->type = OT_IMM; operand->type = FD_OP_IMM;
operand->size = 1; operand->size = 1;
instr->immediate = 1; instr->imm = 1;
} }
else if (imm_control == 2) else if (imm_control == 2)
{ {
struct Operand* operand = &instr->operands[DESC_IMM_IDX(desc)]; FdOp* operand = &instr->operands[DESC_IMM_IDX(desc)];
operand->type = OT_MEM; operand->type = FD_OP_MEM;
operand->reg = REG_NONE; operand->reg = FD_REG_NONE;
operand->size = op_size; operand->size = op_size;
instr->scale = 0; instr->idx_reg = FD_REG_NONE;
if (UNLIKELY(off + addr_size > len)) if (UNLIKELY(off + addr_size > len))
return -1; return -1;
@@ -570,18 +582,18 @@ decode(const uint8_t* buffer, int len, DecodeMode mode, uintptr_t address,
} }
else if (imm_control != 0) else if (imm_control != 0)
{ {
struct Operand* operand = &instr->operands[DESC_IMM_IDX(desc)]; FdOp* operand = &instr->operands[DESC_IMM_IDX(desc)];
uint8_t imm_size; uint8_t imm_size;
if (DESC_IMM_BYTE(desc)) if (DESC_IMM_BYTE(desc))
{ {
imm_size = 1; imm_size = 1;
} }
else if (UNLIKELY(instr->type == IT_RET_IMM)) else if (UNLIKELY(instr->type == FDI_RET_IMM))
{ {
imm_size = 2; imm_size = 2;
} }
else if (UNLIKELY(instr->type == IT_ENTER)) else if (UNLIKELY(instr->type == FDI_ENTER))
{ {
imm_size = 3; imm_size = 3;
} }
@@ -599,7 +611,7 @@ decode(const uint8_t* buffer, int len, DecodeMode mode, uintptr_t address,
} }
#if defined(ARCH_X86_64) #if defined(ARCH_X86_64)
else if (mode == DECODE_64 && (prefixes & PREFIX_REXW) && else if (mode == DECODE_64 && (prefixes & PREFIX_REXW) &&
instr->type == IT_MOVABS_IMM) instr->type == FDI_MOVABS_IMM)
{ {
imm_size = 8; imm_size = 8;
} }
@@ -616,42 +628,42 @@ decode(const uint8_t* buffer, int len, DecodeMode mode, uintptr_t address,
if (imm_size == 1) if (imm_size == 1)
{ {
instr->immediate = (int8_t) LOAD_LE_1(&buffer[off]); instr->imm = (int8_t) LOAD_LE_1(&buffer[off]);
} }
else if (imm_size == 2) else if (imm_size == 2)
{ {
instr->immediate = (int16_t) LOAD_LE_2(&buffer[off]); instr->imm = (int16_t) LOAD_LE_2(&buffer[off]);
} }
else if (imm_size == 3) else if (imm_size == 3)
{ {
instr->immediate = LOAD_LE_2(&buffer[off]); instr->imm = LOAD_LE_2(&buffer[off]);
instr->immediate |= LOAD_LE_1(&buffer[off + 2]) << 16; instr->imm |= LOAD_LE_1(&buffer[off + 2]) << 16;
} }
else if (imm_size == 4) else if (imm_size == 4)
{ {
instr->immediate = (int32_t) LOAD_LE_4(&buffer[off]); instr->imm = (int32_t) LOAD_LE_4(&buffer[off]);
} }
#if defined(ARCH_X86_64) #if defined(ARCH_X86_64)
else if (imm_size == 8) else if (imm_size == 8)
{ {
instr->immediate = (int64_t) LOAD_LE_8(&buffer[off]); instr->imm = (int64_t) LOAD_LE_8(&buffer[off]);
} }
#endif #endif
off += imm_size; off += imm_size;
if (imm_control == 4) if (imm_control == 4)
{ {
instr->immediate += instr->address + off; instr->imm += instr->address + off;
} }
if (UNLIKELY(imm_control == 5)) if (UNLIKELY(imm_control == 5))
{ {
operand->type = OT_REG; operand->type = FD_OP_REG;
operand->reg = (instr->immediate & 0xf0) >> 4; operand->reg = (instr->imm & 0xf0) >> 4;
} }
else else
{ {
operand->type = OT_IMM; operand->type = FD_OP_IMM;
} }
} }

138
decode.h
View File

@@ -1,138 +0,0 @@
#ifndef ARMX86_DECODE_H
#define ARMX86_DECODE_H
#include <stddef.h>
#include <stdint.h>
#ifndef ssize_t
#define ssize_t intptr_t
#endif
#define DECODE_TABLE_MNEMONICS
#define MNEMONIC(name,value) IT_ ## name = value,
enum
{
#include <decode-table.inc>
};
#undef DECODE_TABLE_MNEMONICS
#undef MNEMONIC
enum DecodeMode {
DECODE_64 = 0,
DECODE_32 = 1,
};
typedef enum DecodeMode DecodeMode;
enum RegIndex {
RI_AL = 0,
RI_CL,
RI_DL,
RI_BL,
RI_AH,
RI_CH,
RI_DH,
RI_BH,
RI_AX = 0,
RI_CX,
RI_DX,
RI_BX,
RI_SP,
RI_BP,
RI_SI,
RI_DI,
RI_R8,
RI_R9,
RI_R10,
RI_R11,
RI_R12,
RI_R13,
RI_R14,
RI_R15,
// EIP cannot be encoded in Protected/Compatibility Mode
RI_IP = 0x10,
RI_ES = 0,
RI_CS,
RI_SS,
RI_DS,
RI_FS,
RI_GS,
// No register specified
RI_NONE = 0x3f
};
typedef uint8_t Reg;
#define reg_index(reg) (reg)
#define reg_is_none(reg) ((reg) == REG_NONE)
#define REG_NONE RI_NONE
enum
{
INSTR_FLAG_LOCK = 1 << 0,
INSTR_FLAG_REP = 1 << 1,
INSTR_FLAG_REPNZ = 1 << 2,
INSTR_FLAG_REX = 1 << 3,
INSTR_FLAG_VEXL = 1 << 4,
INSTR_FLAG_64 = 1 << 7,
};
enum OperandType
{
OT_NONE = 0,
OT_REG = 1,
OT_IMM = 2,
OT_MEM = 3,
};
struct Operand
{
uint8_t type : 2;
uint8_t reg : 6;
uint8_t size;
};
struct Instr
{
uint16_t type;
struct Operand operands[4];
uint8_t flags;
uint8_t segment;
uint8_t op_size;
uint8_t addr_size;
/**
* Encoded as 1 << (scale - 1) **or** no scaled register at all if zero.
**/
uint8_t scale : 3;
uint8_t sreg : 5;
size_t immediate;
intptr_t disp;
uintptr_t address;
uint32_t size : 4;
};
typedef struct Instr Instr;
#define INSTR_SEGMENT(instr) ((instr)->segment)
#define INSTR_WIDTH(instr) ((instr)->op_size)
#define INSTR_ADDRSZ(instr) ((instr)->addr_size)
#define INSTR_IS64(instr) ((instr)->flags & INSTR_FLAG_64)
#define INSTR_HAS_REP(instr) ((instr)->flags & INSTR_FLAG_REP)
#define INSTR_HAS_REPNZ(instr) ((instr)->flags & INSTR_FLAG_REPNZ)
#define INSTR_HAS_LOCK(instr) ((instr)->flags & INSTR_FLAG_LOCK)
#define INSTR_HAS_REX(instr) ((instr)->flags & INSTR_FLAG_REX)
#define INSTR_HAS_VEXL(instr) ((instr)->flags & INSTR_FLAG_VEXL)
int decode(const uint8_t* buffer, int len, DecodeMode mode, uintptr_t address,
Instr* out_instr);
void instr_format(const Instr* instr, char buffer[128]);
#endif

182
fadec.h Normal file
View File

@@ -0,0 +1,182 @@
#ifndef FD_FADEC_H_
#define FD_FADEC_H_
#include <stddef.h>
#include <stdint.h>
typedef enum {
FD_REG_R0 = 0, FD_REG_R1, FD_REG_R2, FD_REG_R3,
FD_REG_R4, FD_REG_R5, FD_REG_R6, FD_REG_R7,
FD_REG_R8, FD_REG_R9, FD_REG_R10, FD_REG_R11,
FD_REG_R12, FD_REG_R13, FD_REG_R14, FD_REG_R15,
// Alternative names for byte registers
FD_REG_AL = 0, FD_REG_CL, FD_REG_DL, FD_REG_BL,
FD_REG_AH, FD_REG_CH, FD_REG_DH, FD_REG_BH,
// Alternative names for general purpose registers
FD_REG_AX = 0, FD_REG_CX, FD_REG_DX, FD_REG_BX,
FD_REG_SP, FD_REG_BP, FD_REG_SI, FD_REG_DI,
// FD_REG_IP can only be accessed in long mode (64-bit)
FD_REG_IP = 0x10,
// Segment register values
FD_REG_ES = 0, FD_REG_CS, FD_REG_SS, FD_REG_DS, FD_REG_FS, FD_REG_GS,
// No register specified
FD_REG_NONE = 0x3f
} FdReg;
typedef enum {
#define FD_DECODE_TABLE_MNEMONICS
#define FD_MNEMONIC(name,value) FDI_ ## name = value,
#include <decode-table.inc>
#undef FD_DECODE_TABLE_MNEMONICS
#undef FD_MNEMONIC
} FdInstrType;
/** Internal use only. **/
enum {
FD_FLAG_LOCK = 1 << 0,
FD_FLAG_REP = 1 << 1,
FD_FLAG_REPNZ = 1 << 2,
FD_FLAG_REX = 1 << 3,
FD_FLAG_VEXL = 1 << 4,
FD_FLAG_64 = 1 << 7,
};
typedef enum {
FD_OP_NONE = 0,
FD_OP_REG = 1,
FD_OP_IMM = 2,
FD_OP_MEM = 3,
} FdOpType;
typedef struct {
uint8_t type;
uint8_t size;
int8_t reg;
} FdOp;
typedef struct {
uint16_t type;
uint8_t flags;
uint8_t segment;
uint8_t addrsz;
uint8_t operandsz;
FdOp operands[4];
uint8_t idx_reg;
uint8_t idx_scale;
uint8_t size;
intptr_t disp;
intptr_t imm;
uintptr_t address;
} FdInstr;
/** Decode an instruction.
* \param buf Buffer for instruction bytes.
* \param len Length of the buffer (in bytes). An instruction is not longer than
* 15 bytes on all x86 architectures.
* \param mode Decoding mode, either 32 for protected/compatibility mode or 64
* for long mode. 16-bit mode is not supported.
* \param address Virtual address where the decoded instruction. This is used
* for computing jump targets and segment-offset-relative memory
* operations (MOV with moffs* encoding) and stored in the instruction.
* \param out_instr Pointer to the instruction buffer. Note that this may get
* partially written even if an error is returned.
* \return The number of bytes consumed by the instruction, or a negative number
* indicating an error.
**/
int fd_decode(const uint8_t* buf, size_t len, int mode, uintptr_t address,
FdInstr* out_instr);
/** Format an instruction to a string.
* \param instr The instruction.
* \param buf The buffer to hold the formatted string.
* \param len The length of the buffer.
**/
void fd_format(const FdInstr* instr, char* buf, size_t len);
/** Gets the type/mnemonic of the instruction. **/
#define FD_TYPE(instr) ((FdInstrType) (instr)->type)
/** Gets the address of the instruction. **/
#define FD_ADDRESS(instr) ((instr)->address)
/** Gets the size of the instruction in bytes. **/
#define FD_SIZE(instr) ((instr)->size)
/** Gets the specified segment override, or FD_REG_NONE for default segment. **/
#define FD_SEGMENT(instr) ((FdReg) (instr)->segment)
/** Gets the address size attribute of the instruction in bytes. **/
#define FD_ADDRSIZE(instr) ((instr)->addrsz)
/** Gets the operation width in bytes of the instruction if this is not encoded
* in the operands, for example for the string instruction (e.g. MOVS). **/
#define FD_OPSIZE(instr) ((instr)->operandsz)
/** Indicates whether the instruction was encoded with a REP prefix. Needed for:
* (1) Handling the instructions MOVS, STOS, LODS, INS and OUTS properly.
* (2) Handling the instructions SCAS and CMPS, for which this means REPZ.
* (3) Distinguishing the instructions BSF (no REP) vs. TZCNT (REP) and the
* instructions BSR (no REP) vs. LZCNT (REP). **/
#define FD_HAS_REP(instr) ((instr)->flags & FD_FLAG_REP)
/** Indicates whether the instruction was encoded with a REP prefix. Needed for:
* (1) Handling the instructions SCAS and CMPS.
* (2) Distinguishing the instructions MOVBE (no REPNZ) vs. CRC32 (REPNZ). **/
#define FD_HAS_REPNZ(instr) ((instr)->flags & FD_FLAG_REPNZ)
/** Indicates whether the instruction was encoded with a LOCK prefix. Note that
* it is not checked whether the LOCK prefix is valid for the instruction. **/
#define FD_HAS_LOCK(instr) ((instr)->flags & FD_FLAG_LOCK)
/** Indicates whether the instruction was encoded with a VEX.L prefix. **/
#define FD_HAS_VEXL(instr) ((instr)->flags & FD_FLAG_VEXL)
#define FD_IS64(instr) ((instr)->flags & FD_FLAG_64)
/** Gets the type of an operand at the given index. **/
#define FD_OP_TYPE(instr,idx) ((FdOpType) (instr)->operands[idx].type)
/** Gets the size in bytes of an operand. However, there are a few exceptions:
* (1) For some register types, e.g., segment registers, or x87 registers, the
* size is zero. (This allows some simplifications internally.)
* (2) On some vector instructions this may be only an approximation of the
* actually needed operand size (that is, an instruction may/must only use
* a smaller part than specified here). The real operand size is always
* fully recoverable in combination with the instruction type. **/
#define FD_OP_SIZE(instr,idx) ((instr)->operands[idx].size)
/** Gets the accessed register index of a register operand. Note that /only/ the
* index is returned, no further interpretation of the index (which depends on
* the instruction type) is done. When an instruction accesses an 8-bit general
* purpose register with an index in the range 4-7, it needs to be determined
* explicitly whether a high-byte register is accessed (using FD_OP_REG_HIGH).
* If that is the case, the index needs to be decreased by 4.
* Only valid if FD_OP_TYPE == FD_OP_REG **/
#define FD_OP_REG(instr,idx) ((FdReg) (instr)->operands[idx].reg)
/** Returns whether the accessed register is a actually high-byte register when
* used on a general purpose instruction. In that case, the register index has
* to be decreased by 4.
* Only valid if FD_OP_TYPE == FD_OP_REG and the operand refers to a general
* purpose register (depends on the instruction type) **/
#define FD_OP_REG_HIGH(instr,idx) ( \
(instr)->operands[idx].size == 1 && \
(instr)->operands[idx].reg >= 4 && \
((instr)->flags & FD_FLAG_REX) == 0 \
)
/** Gets the index of the base register from a memory operand, or FD_REG_NONE,
* if the memory operand has no base register. This is the only case where the
* 64-bit register RIP can be returned, in which case the operand also has no
* scaled index register.
* Only valid if FD_OP_TYPE == FD_OP_MEM **/
#define FD_OP_BASE(instr,idx) ((FdReg) (instr)->operands[idx].reg)
/** Gets the index of the index register from a memory operand, or FD_REG_NONE,
* if the memory operand has no scaled index register.
* Only valid if FD_OP_TYPE == FD_OP_MEM **/
#define FD_OP_INDEX(instr,idx) ((FdReg) (instr)->idx_reg)
/** Gets the scale of the index register from a memory operand when existent.
* This does /not/ return the scale in an absolute value but returns the amount
* of bits the index register is shifted to the left (i.e. the value in in the
* range 0-3). The actual scale can be computed easily using 1<<FD_OP_SCALE.
* Only valid if FD_OP_TYPE == FD_OP_MEM and FD_OP_INDEX != FD_REG_NONE **/
#define FD_OP_SCALE(instr,idx) ((instr)->idx_scale)
/** Gets the sign-extended displacement of a memory operand.
* Only valid if FD_OP_TYPE == FD_OP_MEM **/
#define FD_OP_DISP(instr,idx) ((instr)->disp)
/** Gets the (sign-extended) encoded constant for an immediate operand.
* Only valid if FD_OP_TYPE == FD_OP_IMM **/
#define FD_OP_IMM(instr,idx) ((instr)->imm)
#endif

View File

@@ -1,22 +1,23 @@
#include <stdbool.h>
#include <stddef.h> #include <stddef.h>
#include <stdint.h> #include <stdint.h>
#include <stdio.h> #include <stdio.h>
#include <decode.h> #include <fadec.h>
#define DECODE_TABLE_STRTAB1 #define FD_DECODE_TABLE_STRTAB1
static const char* _mnemonic_str = static const char* _mnemonic_str =
#include <decode-table.inc> #include <decode-table.inc>
; ;
#undef DECODE_TABLE_STRTAB1 #undef FD_DECODE_TABLE_STRTAB1
#define DECODE_TABLE_STRTAB2 #define FD_DECODE_TABLE_STRTAB2
static const uint16_t _mnemonic_offs[] = { static const uint16_t _mnemonic_offs[] = {
#include <decode-table.inc> #include <decode-table.inc>
}; };
#undef DECODE_TABLE_STRTAB2 #undef FD_DECODE_TABLE_STRTAB2
#define FMT_CONCAT(buf, end, ...) do { \ #define FMT_CONCAT(buf, end, ...) do { \
buf += snprintf(buf, end - buf, __VA_ARGS__); \ buf += snprintf(buf, end - buf, __VA_ARGS__); \
@@ -25,79 +26,83 @@ static const uint16_t _mnemonic_offs[] = {
} while (0) } while (0)
void void
instr_format(const Instr* instr, char buffer[128]) fd_format(const FdInstr* instr, char* buffer, size_t len)
{ {
char* buf = buffer; char* buf = buffer;
char* end = buffer + 128; char* end = buffer + len;
FMT_CONCAT(buf, end, "["); FMT_CONCAT(buf, end, "[");
if (INSTR_HAS_REP(instr)) if (FD_HAS_REP(instr))
FMT_CONCAT(buf, end, "rep:"); FMT_CONCAT(buf, end, "rep:");
if (INSTR_HAS_REPNZ(instr)) if (FD_HAS_REPNZ(instr))
FMT_CONCAT(buf, end, "repnz:"); FMT_CONCAT(buf, end, "repnz:");
if (INSTR_SEGMENT(instr) < 6) if (FD_SEGMENT(instr) < 6)
FMT_CONCAT(buf, end, "%cs:", "ecsdfg"[INSTR_SEGMENT(instr)]); FMT_CONCAT(buf, end, "%cs:", "ecsdfg"[FD_SEGMENT(instr)]);
if (INSTR_IS64(instr) && INSTR_ADDRSZ(instr) == 4) if (FD_IS64(instr) && FD_ADDRSIZE(instr) == 4)
FMT_CONCAT(buf, end, "addr32:"); FMT_CONCAT(buf, end, "addr32:");
if (!INSTR_IS64(instr) && INSTR_ADDRSZ(instr) == 2) if (!FD_IS64(instr) && FD_ADDRSIZE(instr) == 2)
FMT_CONCAT(buf, end, "addr16:"); FMT_CONCAT(buf, end, "addr16:");
if (INSTR_HAS_LOCK(instr)) if (FD_HAS_LOCK(instr))
FMT_CONCAT(buf, end, "lock:"); FMT_CONCAT(buf, end, "lock:");
FMT_CONCAT(buf, end, "%s", &_mnemonic_str[_mnemonic_offs[instr->type]]); FMT_CONCAT(buf, end, "%s", &_mnemonic_str[_mnemonic_offs[FD_TYPE(instr)]]);
if (INSTR_WIDTH(instr)) if (FD_OPSIZE(instr))
FMT_CONCAT(buf, end, "_%u", INSTR_WIDTH(instr)); FMT_CONCAT(buf, end, "_%u", FD_OPSIZE(instr));
for (int i = 0; i < 4; i++) for (int i = 0; i < 4; i++)
{ {
const struct Operand* operand = &instr->operands[i]; FdOpType op_type = FD_OP_TYPE(instr, i);
if (operand->type == OT_NONE) if (op_type == FD_OP_NONE)
break; break;
const char* op_type_name = "reg\0imm\0mem" + operand->type * 4 - 4; const char* op_type_name = "reg\0imm\0mem" + op_type * 4 - 4;
FMT_CONCAT(buf, end, " %s%u:", op_type_name, operand->size); FMT_CONCAT(buf, end, " %s%u:", op_type_name, FD_OP_SIZE(instr, i));
switch (operand->type) switch (op_type)
{ {
size_t immediate; size_t immediate;
case OT_REG: bool has_base;
if (operand->size == 1 && !INSTR_HAS_REX(instr) && bool has_idx;
operand->reg >= 4 && operand->reg < 8) bool has_disp;
FMT_CONCAT(buf, end, "r%uh", operand->reg - 4); case FD_OP_REG:
if (FD_OP_REG_HIGH(instr, i))
FMT_CONCAT(buf, end, "r%uh", FD_OP_REG(instr, i) - 4);
else else
FMT_CONCAT(buf, end, "r%u", operand->reg); FMT_CONCAT(buf, end, "r%u", FD_OP_REG(instr, i));
break; break;
case OT_IMM: case FD_OP_IMM:
immediate = instr->immediate; immediate = FD_OP_IMM(instr, i);
if (operand->size == 1) if (FD_OP_SIZE(instr, i) == 1)
immediate &= 0xff; immediate &= 0xff;
else if (operand->size == 2) else if (FD_OP_SIZE(instr, i) == 2)
immediate &= 0xffff; immediate &= 0xffff;
else if (operand->size == 4) else if (FD_OP_SIZE(instr, i) == 4)
immediate &= 0xffffffff; immediate &= 0xffffffff;
FMT_CONCAT(buf, end, "0x%lx", immediate); FMT_CONCAT(buf, end, "0x%lx", immediate);
break; break;
case OT_MEM: case FD_OP_MEM:
if (!reg_is_none(operand->reg)) has_base = FD_OP_BASE(instr, i) != FD_REG_NONE;
has_idx = FD_OP_INDEX(instr, i) != FD_REG_NONE;
has_disp = FD_OP_DISP(instr, i) != 0;
if (has_base)
{ {
FMT_CONCAT(buf, end, "r%u", operand->reg); FMT_CONCAT(buf, end, "r%u", FD_OP_BASE(instr, i));
if (instr->scale != 0 || instr->disp > 0) if (has_idx || has_disp)
FMT_CONCAT(buf, end, "+"); FMT_CONCAT(buf, end, "+");
} }
if (instr->scale != 0) if (has_idx)
{ {
FMT_CONCAT(buf, end, "%u*r%u", 1 << (instr->scale - 1), FMT_CONCAT(buf, end, "%u*r%u", 1 << FD_OP_SCALE(instr, i),
instr->sreg); FD_OP_INDEX(instr, i));
if (instr->disp > 0) if (has_disp)
FMT_CONCAT(buf, end, "+"); FMT_CONCAT(buf, end, "+");
} }
if (instr->disp < 0) if (FD_OP_DISP(instr, i) < 0)
FMT_CONCAT(buf, end, "-0x%lx", -instr->disp); FMT_CONCAT(buf, end, "-0x%lx", -FD_OP_DISP(instr, i));
else if ((reg_is_none(operand->reg) && instr->scale == 0) || else if (has_disp || (!has_base && !has_idx))
instr->disp > 0) FMT_CONCAT(buf, end, "0x%lx", FD_OP_DISP(instr, i));
FMT_CONCAT(buf, end, "0x%lx", instr->disp);
break; break;
case OT_NONE: case FD_OP_NONE:
default: default:
break; break;
} }

View File

@@ -266,15 +266,15 @@ def bytes_to_table(data, notes):
for p, c in zip(offs, offs[1:])) for p, c in zip(offs, offs[1:]))
template = """// Auto-generated file -- do not modify! template = """// Auto-generated file -- do not modify!
#if defined(DECODE_TABLE_DATA_32) #if defined(FD_DECODE_TABLE_DATA_32)
{hex_table32} {hex_table32}
#elif defined(DECODE_TABLE_DATA_64) #elif defined(FD_DECODE_TABLE_DATA_64)
{hex_table64} {hex_table64}
#elif defined(DECODE_TABLE_MNEMONICS) #elif defined(FD_DECODE_TABLE_MNEMONICS)
{mnemonic_list} {mnemonic_list}
#elif defined(DECODE_TABLE_STRTAB1) #elif defined(FD_DECODE_TABLE_STRTAB1)
{mnemonic_cstr} {mnemonic_cstr}
#elif defined(DECODE_TABLE_STRTAB2) #elif defined(FD_DECODE_TABLE_STRTAB2)
{mnemonic_offsets} {mnemonic_offsets}
#else #else
#error "unspecified decode table" #error "unspecified decode table"
@@ -314,7 +314,7 @@ if __name__ == "__main__":
file = template.format( file = template.format(
hex_table32=bytes_to_table(*table32.compile(mnemonics_lut)), hex_table32=bytes_to_table(*table32.compile(mnemonics_lut)),
hex_table64=bytes_to_table(*table64.compile(mnemonics_lut)), hex_table64=bytes_to_table(*table64.compile(mnemonics_lut)),
mnemonic_list="\n".join("MNEMONIC(%s,%d)"%entry for entry in mnemonics_lut.items()), mnemonic_list="\n".join("FD_MNEMONIC(%s,%d)"%entry for entry in mnemonics_lut.items()),
mnemonic_cstr=mnemonic_cstr, mnemonic_cstr=mnemonic_cstr,
mnemonic_offsets=",".join(str(off) for off in mnemonic_tab), mnemonic_offsets=",".join(str(off) for off in mnemonic_tab),
) )

View File

@@ -4,7 +4,7 @@
#include <inttypes.h> #include <inttypes.h>
#include <time.h> #include <time.h>
#include <decode.h> #include <fadec.h>
static static
@@ -30,21 +30,7 @@ main(int argc, char** argv)
return -1; return -1;
} }
DecodeMode mode; size_t mode = strtoul(argv[1], NULL, 0);
size_t mode_input = strtoul(argv[1], NULL, 0);
if (mode_input == 32)
{
mode = DECODE_32;
}
else if (mode_input == 64)
{
mode = DECODE_64;
}
else
{
printf("Unknown decode mode\n");
return 1;
}
// Avoid allocation by transforming hex to binary in-place. // Avoid allocation by transforming hex to binary in-place.
uint8_t* code = (uint8_t*) argv[2]; uint8_t* code = (uint8_t*) argv[2];
@@ -62,7 +48,7 @@ main(int argc, char** argv)
struct timespec time_start; struct timespec time_start;
struct timespec time_end; struct timespec time_end;
Instr instr; FdInstr instr;
__asm__ volatile("" : : : "memory"); __asm__ volatile("" : : : "memory");
clock_gettime(CLOCK_MONOTONIC, &time_start); clock_gettime(CLOCK_MONOTONIC, &time_start);
@@ -72,8 +58,8 @@ main(int argc, char** argv)
while (current_off != length) while (current_off != length)
{ {
size_t remaining = length - current_off; size_t remaining = length - current_off;
int retval = decode(code + current_off, remaining, mode, 0x1234000, int retval = fd_decode(code + current_off, remaining, mode,
&instr); 0x1234000, &instr);
if (retval < 0) if (retval < 0)
goto fail; goto fail;
current_off += retval; current_off += retval;
@@ -83,7 +69,7 @@ main(int argc, char** argv)
__asm__ volatile("" : : : "memory"); __asm__ volatile("" : : : "memory");
char format_buffer[128]; char format_buffer[128];
instr_format(&instr, format_buffer); fd_format(&instr, format_buffer, sizeof(format_buffer));
printf("%s\n", format_buffer); printf("%s\n", format_buffer);
if (repetitions > 1) if (repetitions > 1)