Major rework of API and improved documentation

This commit is contained in:
Alexis Engelke
2019-01-23 20:03:40 +01:00
parent a045588999
commit 3abf29d63e
6 changed files with 320 additions and 273 deletions

138
decode.c
View File

@@ -2,7 +2,7 @@
#include <stddef.h>
#include <stdint.h>
#include <decode.h>
#include <fadec.h>
#if defined(ARCH_X86_64) && __SIZEOF_POINTER__ < 8
@@ -13,21 +13,27 @@
#define UNLIKELY(x) __builtin_expect((x), 0)
#if defined(ARCH_386)
#define DECODE_TABLE_DATA_32
#define FD_DECODE_TABLE_DATA_32
static const uint8_t _decode_table32[] = {
#include <decode-table.inc>
};
#undef DECODE_TABLE_DATA_32
#undef FD_DECODE_TABLE_DATA_32
#endif
#if defined(ARCH_X86_64)
#define DECODE_TABLE_DATA_64
#define FD_DECODE_TABLE_DATA_64
static const uint8_t _decode_table64[] = {
#include <decode-table.inc>
};
#undef DECODE_TABLE_DATA_64
#undef FD_DECODE_TABLE_DATA_64
#endif
enum DecodeMode {
DECODE_64 = 0,
DECODE_32 = 1,
};
typedef enum DecodeMode DecodeMode;
#define ENTRY_NONE 0
#define ENTRY_INSTR 1
@@ -72,11 +78,11 @@ static const uint8_t _decode_table64[] = {
enum PrefixSet
{
PREFIX_LOCK = INSTR_FLAG_LOCK,
PREFIX_REP = INSTR_FLAG_REP,
PREFIX_REPNZ = INSTR_FLAG_REPNZ,
PREFIX_REX = INSTR_FLAG_REX,
PREFIX_VEXL = INSTR_FLAG_VEXL,
PREFIX_LOCK = FD_FLAG_LOCK,
PREFIX_REP = FD_FLAG_REP,
PREFIX_REPNZ = FD_FLAG_REPNZ,
PREFIX_REX = FD_FLAG_REX,
PREFIX_VEXL = FD_FLAG_VEXL,
PREFIX_OPSZ = 1 << 13,
PREFIX_ADDRSZ = 1 << 14,
PREFIX_REXB = 1 << 15,
@@ -104,7 +110,7 @@ decode_prefixes(const uint8_t* buffer, int len, DecodeMode mode,
uint8_t rep = 0;
*out_mandatory = 0;
*out_segment = RI_NONE;
*out_segment = FD_REG_NONE;
while (LIKELY(off < len))
{
@@ -113,11 +119,11 @@ decode_prefixes(const uint8_t* buffer, int len, DecodeMode mode,
{
default: goto out;
// From segment overrides, the last one wins.
case 0x26: *out_segment = RI_ES; off++; break;
case 0x2e: *out_segment = RI_CS; off++; break;
case 0x3e: *out_segment = RI_DS; off++; break;
case 0x64: *out_segment = RI_FS; off++; break;
case 0x65: *out_segment = RI_GS; off++; break;
case 0x26: *out_segment = FD_REG_ES; off++; break;
case 0x2e: *out_segment = FD_REG_CS; off++; break;
case 0x3e: *out_segment = FD_REG_DS; off++; break;
case 0x64: *out_segment = FD_REG_FS; off++; break;
case 0x65: *out_segment = FD_REG_GS; off++; break;
case 0x67: prefixes |= PREFIX_ADDRSZ; off++; break;
case 0xf0: prefixes |= PREFIX_LOCK; off++; break;
case 0x66: prefixes |= PREFIX_OPSZ; off++; break;
@@ -201,8 +207,8 @@ out:
static
int
decode_modrm(const uint8_t* buffer, int len, DecodeMode mode, Instr* instr,
PrefixSet prefixes, struct Operand* out_o1, struct Operand* out_o2)
decode_modrm(const uint8_t* buffer, int len, DecodeMode mode, FdInstr* instr,
PrefixSet prefixes, FdOp* out_o1, FdOp* out_o2)
{
int off = 0;
@@ -223,7 +229,7 @@ decode_modrm(const uint8_t* buffer, int len, DecodeMode mode, Instr* instr,
#if defined(ARCH_X86_64)
reg_idx += prefixes & PREFIX_REXR ? 8 : 0;
#endif
out_o2->type = OT_REG;
out_o2->type = FD_OP_REG;
out_o2->reg = reg_idx;
}
@@ -233,7 +239,7 @@ decode_modrm(const uint8_t* buffer, int len, DecodeMode mode, Instr* instr,
#if defined(ARCH_X86_64)
reg_idx += prefixes & PREFIX_REXB ? 8 : 0;
#endif
out_o1->type = OT_REG;
out_o1->type = FD_OP_REG;
out_o1->reg = reg_idx;
return off;
}
@@ -250,7 +256,7 @@ decode_modrm(const uint8_t* buffer, int len, DecodeMode mode, Instr* instr,
}
uint8_t sib = buffer[off++];
scale = ((sib & 0xc0) >> 6) + 1;
scale = (sib & 0xc0) >> 6;
idx = (sib & 0x38) >> 3;
#if defined(ARCH_X86_64)
idx += prefixes & PREFIX_REXX ? 8 : 0;
@@ -283,19 +289,20 @@ decode_modrm(const uint8_t* buffer, int len, DecodeMode mode, Instr* instr,
instr->disp = 0;
}
out_o1->type = OT_MEM;
instr->scale = scale;
out_o1->type = FD_OP_MEM;
instr->idx_scale = scale;
if (scale == 0)
// If there was no SIB byte.
if (rm != 4)
{
if (mod == 0 && rm == 5)
{
#if defined(ARCH_X86_64)
if (mode == DECODE_64)
out_o1->reg = RI_IP;
out_o1->reg = FD_REG_IP;
else
#endif
out_o1->reg = REG_NONE;
out_o1->reg = FD_REG_NONE;
return off;
}
@@ -304,21 +311,22 @@ decode_modrm(const uint8_t* buffer, int len, DecodeMode mode, Instr* instr,
reg_idx += prefixes & PREFIX_REXB ? 8 : 0;
#endif
out_o1->reg = reg_idx;
instr->idx_reg = FD_REG_NONE;
return off;
}
if (idx == 4)
{
instr->scale = 0;
instr->idx_reg = FD_REG_NONE;
}
else
{
instr->sreg = idx;
instr->idx_reg = idx;
}
if (base == 5 && mod == 0)
{
out_o1->reg = REG_NONE;
out_o1->reg = FD_REG_NONE;
}
else
{
@@ -358,11 +366,15 @@ struct InstrDesc
#define DESC_IMM_BYTE(desc) (((desc)->immediate >> 7) & 1)
int
decode(const uint8_t* buffer, int len, DecodeMode mode, uintptr_t address,
Instr* instr)
fd_decode(const uint8_t* buffer, size_t len_sz, int mode_int, uintptr_t address,
FdInstr* instr)
{
const uint8_t* decode_table = NULL;
int len = len_sz > 15 ? 15 : len_sz;
DecodeMode mode = mode_int == 32 ? DECODE_32 :
mode_int == 64 ? DECODE_64 : -1;
// Ensure that we can actually handle the decode request
#if defined(ARCH_386)
if (mode == DECODE_32)
@@ -453,7 +465,7 @@ decode(const uint8_t* buffer, int len, DecodeMode mode, uintptr_t address,
instr->type = desc->type;
instr->flags = prefixes & 0x7f;
if (mode == DECODE_64)
instr->flags |= INSTR_FLAG_64;
instr->flags |= FD_FLAG_64;
instr->address = address;
uint8_t op_size = 0;
@@ -468,7 +480,7 @@ decode(const uint8_t* buffer, int len, DecodeMode mode, uintptr_t address,
else
op_size = 4;
instr->op_size = desc->gp_instr_width ? op_size : 0;
instr->operandsz = desc->gp_instr_width ? op_size : 0;
uint8_t vec_size = 16;
if (prefixes & PREFIX_VEXL)
@@ -480,7 +492,7 @@ decode(const uint8_t* buffer, int len, DecodeMode mode, uintptr_t address,
uint8_t addr_size = mode == DECODE_64 ? 8 : 4;
if (prefixes & PREFIX_ADDRSZ)
addr_size >>= 1;
instr->addr_size = addr_size;
instr->addrsz = addr_size;
uint8_t operand_sizes[4] = {
0, 1 << desc->gp_fixed_operand_size, op_size, vec_size
@@ -495,16 +507,16 @@ decode(const uint8_t* buffer, int len, DecodeMode mode, uintptr_t address,
if (DESC_HAS_IMPLICIT(desc))
{
struct Operand* operand = &instr->operands[DESC_IMPLICIT_IDX(desc)];
operand->type = OT_REG;
FdOp* operand = &instr->operands[DESC_IMPLICIT_IDX(desc)];
operand->type = FD_OP_REG;
operand->reg = 0;
}
if (DESC_HAS_MODRM(desc))
{
struct Operand* operand1 = &instr->operands[DESC_MODRM_IDX(desc)];
FdOp* operand1 = &instr->operands[DESC_MODRM_IDX(desc)];
struct Operand* operand2 = NULL;
FdOp* operand2 = NULL;
if (DESC_HAS_MODREG(desc))
{
operand2 = &instr->operands[DESC_MODREG_IDX(desc)];
@@ -522,37 +534,37 @@ decode(const uint8_t* buffer, int len, DecodeMode mode, uintptr_t address,
else if (DESC_HAS_MODREG(desc))
{
// If there is no ModRM, but a Mod-Reg, its opcode-encoded.
struct Operand* operand = &instr->operands[DESC_MODREG_IDX(desc)];
FdOp* operand = &instr->operands[DESC_MODREG_IDX(desc)];
uint8_t reg_idx = buffer[off - 1] & 7;
#if defined(ARCH_X86_64)
reg_idx += prefixes & PREFIX_REXB ? 8 : 0;
#endif
operand->type = OT_REG;
operand->type = FD_OP_REG;
operand->reg = reg_idx;
}
if (UNLIKELY(DESC_HAS_VEXREG(desc)))
{
struct Operand* operand = &instr->operands[DESC_VEXREG_IDX(desc)];
operand->type = OT_REG;
FdOp* operand = &instr->operands[DESC_VEXREG_IDX(desc)];
operand->type = FD_OP_REG;
operand->reg = vex_operand;
}
uint32_t imm_control = DESC_IMM_CONTROL(desc);
if (imm_control == 1)
{
struct Operand* operand = &instr->operands[DESC_IMM_IDX(desc)];
operand->type = OT_IMM;
FdOp* operand = &instr->operands[DESC_IMM_IDX(desc)];
operand->type = FD_OP_IMM;
operand->size = 1;
instr->immediate = 1;
instr->imm = 1;
}
else if (imm_control == 2)
{
struct Operand* operand = &instr->operands[DESC_IMM_IDX(desc)];
operand->type = OT_MEM;
operand->reg = REG_NONE;
FdOp* operand = &instr->operands[DESC_IMM_IDX(desc)];
operand->type = FD_OP_MEM;
operand->reg = FD_REG_NONE;
operand->size = op_size;
instr->scale = 0;
instr->idx_reg = FD_REG_NONE;
if (UNLIKELY(off + addr_size > len))
return -1;
@@ -570,18 +582,18 @@ decode(const uint8_t* buffer, int len, DecodeMode mode, uintptr_t address,
}
else if (imm_control != 0)
{
struct Operand* operand = &instr->operands[DESC_IMM_IDX(desc)];
FdOp* operand = &instr->operands[DESC_IMM_IDX(desc)];
uint8_t imm_size;
if (DESC_IMM_BYTE(desc))
{
imm_size = 1;
}
else if (UNLIKELY(instr->type == IT_RET_IMM))
else if (UNLIKELY(instr->type == FDI_RET_IMM))
{
imm_size = 2;
}
else if (UNLIKELY(instr->type == IT_ENTER))
else if (UNLIKELY(instr->type == FDI_ENTER))
{
imm_size = 3;
}
@@ -599,7 +611,7 @@ decode(const uint8_t* buffer, int len, DecodeMode mode, uintptr_t address,
}
#if defined(ARCH_X86_64)
else if (mode == DECODE_64 && (prefixes & PREFIX_REXW) &&
instr->type == IT_MOVABS_IMM)
instr->type == FDI_MOVABS_IMM)
{
imm_size = 8;
}
@@ -616,42 +628,42 @@ decode(const uint8_t* buffer, int len, DecodeMode mode, uintptr_t address,
if (imm_size == 1)
{
instr->immediate = (int8_t) LOAD_LE_1(&buffer[off]);
instr->imm = (int8_t) LOAD_LE_1(&buffer[off]);
}
else if (imm_size == 2)
{
instr->immediate = (int16_t) LOAD_LE_2(&buffer[off]);
instr->imm = (int16_t) LOAD_LE_2(&buffer[off]);
}
else if (imm_size == 3)
{
instr->immediate = LOAD_LE_2(&buffer[off]);
instr->immediate |= LOAD_LE_1(&buffer[off + 2]) << 16;
instr->imm = LOAD_LE_2(&buffer[off]);
instr->imm |= LOAD_LE_1(&buffer[off + 2]) << 16;
}
else if (imm_size == 4)
{
instr->immediate = (int32_t) LOAD_LE_4(&buffer[off]);
instr->imm = (int32_t) LOAD_LE_4(&buffer[off]);
}
#if defined(ARCH_X86_64)
else if (imm_size == 8)
{
instr->immediate = (int64_t) LOAD_LE_8(&buffer[off]);
instr->imm = (int64_t) LOAD_LE_8(&buffer[off]);
}
#endif
off += imm_size;
if (imm_control == 4)
{
instr->immediate += instr->address + off;
instr->imm += instr->address + off;
}
if (UNLIKELY(imm_control == 5))
{
operand->type = OT_REG;
operand->reg = (instr->immediate & 0xf0) >> 4;
operand->type = FD_OP_REG;
operand->reg = (instr->imm & 0xf0) >> 4;
}
else
{
operand->type = OT_IMM;
operand->type = FD_OP_IMM;
}
}

138
decode.h
View File

@@ -1,138 +0,0 @@
#ifndef ARMX86_DECODE_H
#define ARMX86_DECODE_H
#include <stddef.h>
#include <stdint.h>
#ifndef ssize_t
#define ssize_t intptr_t
#endif
#define DECODE_TABLE_MNEMONICS
#define MNEMONIC(name,value) IT_ ## name = value,
enum
{
#include <decode-table.inc>
};
#undef DECODE_TABLE_MNEMONICS
#undef MNEMONIC
enum DecodeMode {
DECODE_64 = 0,
DECODE_32 = 1,
};
typedef enum DecodeMode DecodeMode;
enum RegIndex {
RI_AL = 0,
RI_CL,
RI_DL,
RI_BL,
RI_AH,
RI_CH,
RI_DH,
RI_BH,
RI_AX = 0,
RI_CX,
RI_DX,
RI_BX,
RI_SP,
RI_BP,
RI_SI,
RI_DI,
RI_R8,
RI_R9,
RI_R10,
RI_R11,
RI_R12,
RI_R13,
RI_R14,
RI_R15,
// EIP cannot be encoded in Protected/Compatibility Mode
RI_IP = 0x10,
RI_ES = 0,
RI_CS,
RI_SS,
RI_DS,
RI_FS,
RI_GS,
// No register specified
RI_NONE = 0x3f
};
typedef uint8_t Reg;
#define reg_index(reg) (reg)
#define reg_is_none(reg) ((reg) == REG_NONE)
#define REG_NONE RI_NONE
enum
{
INSTR_FLAG_LOCK = 1 << 0,
INSTR_FLAG_REP = 1 << 1,
INSTR_FLAG_REPNZ = 1 << 2,
INSTR_FLAG_REX = 1 << 3,
INSTR_FLAG_VEXL = 1 << 4,
INSTR_FLAG_64 = 1 << 7,
};
enum OperandType
{
OT_NONE = 0,
OT_REG = 1,
OT_IMM = 2,
OT_MEM = 3,
};
struct Operand
{
uint8_t type : 2;
uint8_t reg : 6;
uint8_t size;
};
struct Instr
{
uint16_t type;
struct Operand operands[4];
uint8_t flags;
uint8_t segment;
uint8_t op_size;
uint8_t addr_size;
/**
* Encoded as 1 << (scale - 1) **or** no scaled register at all if zero.
**/
uint8_t scale : 3;
uint8_t sreg : 5;
size_t immediate;
intptr_t disp;
uintptr_t address;
uint32_t size : 4;
};
typedef struct Instr Instr;
#define INSTR_SEGMENT(instr) ((instr)->segment)
#define INSTR_WIDTH(instr) ((instr)->op_size)
#define INSTR_ADDRSZ(instr) ((instr)->addr_size)
#define INSTR_IS64(instr) ((instr)->flags & INSTR_FLAG_64)
#define INSTR_HAS_REP(instr) ((instr)->flags & INSTR_FLAG_REP)
#define INSTR_HAS_REPNZ(instr) ((instr)->flags & INSTR_FLAG_REPNZ)
#define INSTR_HAS_LOCK(instr) ((instr)->flags & INSTR_FLAG_LOCK)
#define INSTR_HAS_REX(instr) ((instr)->flags & INSTR_FLAG_REX)
#define INSTR_HAS_VEXL(instr) ((instr)->flags & INSTR_FLAG_VEXL)
int decode(const uint8_t* buffer, int len, DecodeMode mode, uintptr_t address,
Instr* out_instr);
void instr_format(const Instr* instr, char buffer[128]);
#endif

182
fadec.h Normal file
View File

@@ -0,0 +1,182 @@
#ifndef FD_FADEC_H_
#define FD_FADEC_H_
#include <stddef.h>
#include <stdint.h>
typedef enum {
FD_REG_R0 = 0, FD_REG_R1, FD_REG_R2, FD_REG_R3,
FD_REG_R4, FD_REG_R5, FD_REG_R6, FD_REG_R7,
FD_REG_R8, FD_REG_R9, FD_REG_R10, FD_REG_R11,
FD_REG_R12, FD_REG_R13, FD_REG_R14, FD_REG_R15,
// Alternative names for byte registers
FD_REG_AL = 0, FD_REG_CL, FD_REG_DL, FD_REG_BL,
FD_REG_AH, FD_REG_CH, FD_REG_DH, FD_REG_BH,
// Alternative names for general purpose registers
FD_REG_AX = 0, FD_REG_CX, FD_REG_DX, FD_REG_BX,
FD_REG_SP, FD_REG_BP, FD_REG_SI, FD_REG_DI,
// FD_REG_IP can only be accessed in long mode (64-bit)
FD_REG_IP = 0x10,
// Segment register values
FD_REG_ES = 0, FD_REG_CS, FD_REG_SS, FD_REG_DS, FD_REG_FS, FD_REG_GS,
// No register specified
FD_REG_NONE = 0x3f
} FdReg;
typedef enum {
#define FD_DECODE_TABLE_MNEMONICS
#define FD_MNEMONIC(name,value) FDI_ ## name = value,
#include <decode-table.inc>
#undef FD_DECODE_TABLE_MNEMONICS
#undef FD_MNEMONIC
} FdInstrType;
/** Internal use only. **/
enum {
FD_FLAG_LOCK = 1 << 0,
FD_FLAG_REP = 1 << 1,
FD_FLAG_REPNZ = 1 << 2,
FD_FLAG_REX = 1 << 3,
FD_FLAG_VEXL = 1 << 4,
FD_FLAG_64 = 1 << 7,
};
typedef enum {
FD_OP_NONE = 0,
FD_OP_REG = 1,
FD_OP_IMM = 2,
FD_OP_MEM = 3,
} FdOpType;
typedef struct {
uint8_t type;
uint8_t size;
int8_t reg;
} FdOp;
typedef struct {
uint16_t type;
uint8_t flags;
uint8_t segment;
uint8_t addrsz;
uint8_t operandsz;
FdOp operands[4];
uint8_t idx_reg;
uint8_t idx_scale;
uint8_t size;
intptr_t disp;
intptr_t imm;
uintptr_t address;
} FdInstr;
/** Decode an instruction.
* \param buf Buffer for instruction bytes.
* \param len Length of the buffer (in bytes). An instruction is not longer than
* 15 bytes on all x86 architectures.
* \param mode Decoding mode, either 32 for protected/compatibility mode or 64
* for long mode. 16-bit mode is not supported.
* \param address Virtual address where the decoded instruction. This is used
* for computing jump targets and segment-offset-relative memory
* operations (MOV with moffs* encoding) and stored in the instruction.
* \param out_instr Pointer to the instruction buffer. Note that this may get
* partially written even if an error is returned.
* \return The number of bytes consumed by the instruction, or a negative number
* indicating an error.
**/
int fd_decode(const uint8_t* buf, size_t len, int mode, uintptr_t address,
FdInstr* out_instr);
/** Format an instruction to a string.
* \param instr The instruction.
* \param buf The buffer to hold the formatted string.
* \param len The length of the buffer.
**/
void fd_format(const FdInstr* instr, char* buf, size_t len);
/** Gets the type/mnemonic of the instruction. **/
#define FD_TYPE(instr) ((FdInstrType) (instr)->type)
/** Gets the address of the instruction. **/
#define FD_ADDRESS(instr) ((instr)->address)
/** Gets the size of the instruction in bytes. **/
#define FD_SIZE(instr) ((instr)->size)
/** Gets the specified segment override, or FD_REG_NONE for default segment. **/
#define FD_SEGMENT(instr) ((FdReg) (instr)->segment)
/** Gets the address size attribute of the instruction in bytes. **/
#define FD_ADDRSIZE(instr) ((instr)->addrsz)
/** Gets the operation width in bytes of the instruction if this is not encoded
* in the operands, for example for the string instruction (e.g. MOVS). **/
#define FD_OPSIZE(instr) ((instr)->operandsz)
/** Indicates whether the instruction was encoded with a REP prefix. Needed for:
* (1) Handling the instructions MOVS, STOS, LODS, INS and OUTS properly.
* (2) Handling the instructions SCAS and CMPS, for which this means REPZ.
* (3) Distinguishing the instructions BSF (no REP) vs. TZCNT (REP) and the
* instructions BSR (no REP) vs. LZCNT (REP). **/
#define FD_HAS_REP(instr) ((instr)->flags & FD_FLAG_REP)
/** Indicates whether the instruction was encoded with a REP prefix. Needed for:
* (1) Handling the instructions SCAS and CMPS.
* (2) Distinguishing the instructions MOVBE (no REPNZ) vs. CRC32 (REPNZ). **/
#define FD_HAS_REPNZ(instr) ((instr)->flags & FD_FLAG_REPNZ)
/** Indicates whether the instruction was encoded with a LOCK prefix. Note that
* it is not checked whether the LOCK prefix is valid for the instruction. **/
#define FD_HAS_LOCK(instr) ((instr)->flags & FD_FLAG_LOCK)
/** Indicates whether the instruction was encoded with a VEX.L prefix. **/
#define FD_HAS_VEXL(instr) ((instr)->flags & FD_FLAG_VEXL)
#define FD_IS64(instr) ((instr)->flags & FD_FLAG_64)
/** Gets the type of an operand at the given index. **/
#define FD_OP_TYPE(instr,idx) ((FdOpType) (instr)->operands[idx].type)
/** Gets the size in bytes of an operand. However, there are a few exceptions:
* (1) For some register types, e.g., segment registers, or x87 registers, the
* size is zero. (This allows some simplifications internally.)
* (2) On some vector instructions this may be only an approximation of the
* actually needed operand size (that is, an instruction may/must only use
* a smaller part than specified here). The real operand size is always
* fully recoverable in combination with the instruction type. **/
#define FD_OP_SIZE(instr,idx) ((instr)->operands[idx].size)
/** Gets the accessed register index of a register operand. Note that /only/ the
* index is returned, no further interpretation of the index (which depends on
* the instruction type) is done. When an instruction accesses an 8-bit general
* purpose register with an index in the range 4-7, it needs to be determined
* explicitly whether a high-byte register is accessed (using FD_OP_REG_HIGH).
* If that is the case, the index needs to be decreased by 4.
* Only valid if FD_OP_TYPE == FD_OP_REG **/
#define FD_OP_REG(instr,idx) ((FdReg) (instr)->operands[idx].reg)
/** Returns whether the accessed register is a actually high-byte register when
* used on a general purpose instruction. In that case, the register index has
* to be decreased by 4.
* Only valid if FD_OP_TYPE == FD_OP_REG and the operand refers to a general
* purpose register (depends on the instruction type) **/
#define FD_OP_REG_HIGH(instr,idx) ( \
(instr)->operands[idx].size == 1 && \
(instr)->operands[idx].reg >= 4 && \
((instr)->flags & FD_FLAG_REX) == 0 \
)
/** Gets the index of the base register from a memory operand, or FD_REG_NONE,
* if the memory operand has no base register. This is the only case where the
* 64-bit register RIP can be returned, in which case the operand also has no
* scaled index register.
* Only valid if FD_OP_TYPE == FD_OP_MEM **/
#define FD_OP_BASE(instr,idx) ((FdReg) (instr)->operands[idx].reg)
/** Gets the index of the index register from a memory operand, or FD_REG_NONE,
* if the memory operand has no scaled index register.
* Only valid if FD_OP_TYPE == FD_OP_MEM **/
#define FD_OP_INDEX(instr,idx) ((FdReg) (instr)->idx_reg)
/** Gets the scale of the index register from a memory operand when existent.
* This does /not/ return the scale in an absolute value but returns the amount
* of bits the index register is shifted to the left (i.e. the value in in the
* range 0-3). The actual scale can be computed easily using 1<<FD_OP_SCALE.
* Only valid if FD_OP_TYPE == FD_OP_MEM and FD_OP_INDEX != FD_REG_NONE **/
#define FD_OP_SCALE(instr,idx) ((instr)->idx_scale)
/** Gets the sign-extended displacement of a memory operand.
* Only valid if FD_OP_TYPE == FD_OP_MEM **/
#define FD_OP_DISP(instr,idx) ((instr)->disp)
/** Gets the (sign-extended) encoded constant for an immediate operand.
* Only valid if FD_OP_TYPE == FD_OP_IMM **/
#define FD_OP_IMM(instr,idx) ((instr)->imm)
#endif

View File

@@ -1,22 +1,23 @@
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <decode.h>
#include <fadec.h>
#define DECODE_TABLE_STRTAB1
#define FD_DECODE_TABLE_STRTAB1
static const char* _mnemonic_str =
#include <decode-table.inc>
;
#undef DECODE_TABLE_STRTAB1
#undef FD_DECODE_TABLE_STRTAB1
#define DECODE_TABLE_STRTAB2
#define FD_DECODE_TABLE_STRTAB2
static const uint16_t _mnemonic_offs[] = {
#include <decode-table.inc>
};
#undef DECODE_TABLE_STRTAB2
#undef FD_DECODE_TABLE_STRTAB2
#define FMT_CONCAT(buf, end, ...) do { \
buf += snprintf(buf, end - buf, __VA_ARGS__); \
@@ -25,79 +26,83 @@ static const uint16_t _mnemonic_offs[] = {
} while (0)
void
instr_format(const Instr* instr, char buffer[128])
fd_format(const FdInstr* instr, char* buffer, size_t len)
{
char* buf = buffer;
char* end = buffer + 128;
char* end = buffer + len;
FMT_CONCAT(buf, end, "[");
if (INSTR_HAS_REP(instr))
if (FD_HAS_REP(instr))
FMT_CONCAT(buf, end, "rep:");
if (INSTR_HAS_REPNZ(instr))
if (FD_HAS_REPNZ(instr))
FMT_CONCAT(buf, end, "repnz:");
if (INSTR_SEGMENT(instr) < 6)
FMT_CONCAT(buf, end, "%cs:", "ecsdfg"[INSTR_SEGMENT(instr)]);
if (INSTR_IS64(instr) && INSTR_ADDRSZ(instr) == 4)
if (FD_SEGMENT(instr) < 6)
FMT_CONCAT(buf, end, "%cs:", "ecsdfg"[FD_SEGMENT(instr)]);
if (FD_IS64(instr) && FD_ADDRSIZE(instr) == 4)
FMT_CONCAT(buf, end, "addr32:");
if (!INSTR_IS64(instr) && INSTR_ADDRSZ(instr) == 2)
if (!FD_IS64(instr) && FD_ADDRSIZE(instr) == 2)
FMT_CONCAT(buf, end, "addr16:");
if (INSTR_HAS_LOCK(instr))
if (FD_HAS_LOCK(instr))
FMT_CONCAT(buf, end, "lock:");
FMT_CONCAT(buf, end, "%s", &_mnemonic_str[_mnemonic_offs[instr->type]]);
if (INSTR_WIDTH(instr))
FMT_CONCAT(buf, end, "_%u", INSTR_WIDTH(instr));
FMT_CONCAT(buf, end, "%s", &_mnemonic_str[_mnemonic_offs[FD_TYPE(instr)]]);
if (FD_OPSIZE(instr))
FMT_CONCAT(buf, end, "_%u", FD_OPSIZE(instr));
for (int i = 0; i < 4; i++)
{
const struct Operand* operand = &instr->operands[i];
if (operand->type == OT_NONE)
FdOpType op_type = FD_OP_TYPE(instr, i);
if (op_type == FD_OP_NONE)
break;
const char* op_type_name = "reg\0imm\0mem" + operand->type * 4 - 4;
FMT_CONCAT(buf, end, " %s%u:", op_type_name, operand->size);
const char* op_type_name = "reg\0imm\0mem" + op_type * 4 - 4;
FMT_CONCAT(buf, end, " %s%u:", op_type_name, FD_OP_SIZE(instr, i));
switch (operand->type)
switch (op_type)
{
size_t immediate;
case OT_REG:
if (operand->size == 1 && !INSTR_HAS_REX(instr) &&
operand->reg >= 4 && operand->reg < 8)
FMT_CONCAT(buf, end, "r%uh", operand->reg - 4);
bool has_base;
bool has_idx;
bool has_disp;
case FD_OP_REG:
if (FD_OP_REG_HIGH(instr, i))
FMT_CONCAT(buf, end, "r%uh", FD_OP_REG(instr, i) - 4);
else
FMT_CONCAT(buf, end, "r%u", operand->reg);
FMT_CONCAT(buf, end, "r%u", FD_OP_REG(instr, i));
break;
case OT_IMM:
immediate = instr->immediate;
if (operand->size == 1)
case FD_OP_IMM:
immediate = FD_OP_IMM(instr, i);
if (FD_OP_SIZE(instr, i) == 1)
immediate &= 0xff;
else if (operand->size == 2)
else if (FD_OP_SIZE(instr, i) == 2)
immediate &= 0xffff;
else if (operand->size == 4)
else if (FD_OP_SIZE(instr, i) == 4)
immediate &= 0xffffffff;
FMT_CONCAT(buf, end, "0x%lx", immediate);
break;
case OT_MEM:
if (!reg_is_none(operand->reg))
case FD_OP_MEM:
has_base = FD_OP_BASE(instr, i) != FD_REG_NONE;
has_idx = FD_OP_INDEX(instr, i) != FD_REG_NONE;
has_disp = FD_OP_DISP(instr, i) != 0;
if (has_base)
{
FMT_CONCAT(buf, end, "r%u", operand->reg);
if (instr->scale != 0 || instr->disp > 0)
FMT_CONCAT(buf, end, "r%u", FD_OP_BASE(instr, i));
if (has_idx || has_disp)
FMT_CONCAT(buf, end, "+");
}
if (instr->scale != 0)
if (has_idx)
{
FMT_CONCAT(buf, end, "%u*r%u", 1 << (instr->scale - 1),
instr->sreg);
if (instr->disp > 0)
FMT_CONCAT(buf, end, "%u*r%u", 1 << FD_OP_SCALE(instr, i),
FD_OP_INDEX(instr, i));
if (has_disp)
FMT_CONCAT(buf, end, "+");
}
if (instr->disp < 0)
FMT_CONCAT(buf, end, "-0x%lx", -instr->disp);
else if ((reg_is_none(operand->reg) && instr->scale == 0) ||
instr->disp > 0)
FMT_CONCAT(buf, end, "0x%lx", instr->disp);
if (FD_OP_DISP(instr, i) < 0)
FMT_CONCAT(buf, end, "-0x%lx", -FD_OP_DISP(instr, i));
else if (has_disp || (!has_base && !has_idx))
FMT_CONCAT(buf, end, "0x%lx", FD_OP_DISP(instr, i));
break;
case OT_NONE:
case FD_OP_NONE:
default:
break;
}

View File

@@ -266,15 +266,15 @@ def bytes_to_table(data, notes):
for p, c in zip(offs, offs[1:]))
template = """// Auto-generated file -- do not modify!
#if defined(DECODE_TABLE_DATA_32)
#if defined(FD_DECODE_TABLE_DATA_32)
{hex_table32}
#elif defined(DECODE_TABLE_DATA_64)
#elif defined(FD_DECODE_TABLE_DATA_64)
{hex_table64}
#elif defined(DECODE_TABLE_MNEMONICS)
#elif defined(FD_DECODE_TABLE_MNEMONICS)
{mnemonic_list}
#elif defined(DECODE_TABLE_STRTAB1)
#elif defined(FD_DECODE_TABLE_STRTAB1)
{mnemonic_cstr}
#elif defined(DECODE_TABLE_STRTAB2)
#elif defined(FD_DECODE_TABLE_STRTAB2)
{mnemonic_offsets}
#else
#error "unspecified decode table"
@@ -314,7 +314,7 @@ if __name__ == "__main__":
file = template.format(
hex_table32=bytes_to_table(*table32.compile(mnemonics_lut)),
hex_table64=bytes_to_table(*table64.compile(mnemonics_lut)),
mnemonic_list="\n".join("MNEMONIC(%s,%d)"%entry for entry in mnemonics_lut.items()),
mnemonic_list="\n".join("FD_MNEMONIC(%s,%d)"%entry for entry in mnemonics_lut.items()),
mnemonic_cstr=mnemonic_cstr,
mnemonic_offsets=",".join(str(off) for off in mnemonic_tab),
)

View File

@@ -4,7 +4,7 @@
#include <inttypes.h>
#include <time.h>
#include <decode.h>
#include <fadec.h>
static
@@ -30,21 +30,7 @@ main(int argc, char** argv)
return -1;
}
DecodeMode mode;
size_t mode_input = strtoul(argv[1], NULL, 0);
if (mode_input == 32)
{
mode = DECODE_32;
}
else if (mode_input == 64)
{
mode = DECODE_64;
}
else
{
printf("Unknown decode mode\n");
return 1;
}
size_t mode = strtoul(argv[1], NULL, 0);
// Avoid allocation by transforming hex to binary in-place.
uint8_t* code = (uint8_t*) argv[2];
@@ -62,7 +48,7 @@ main(int argc, char** argv)
struct timespec time_start;
struct timespec time_end;
Instr instr;
FdInstr instr;
__asm__ volatile("" : : : "memory");
clock_gettime(CLOCK_MONOTONIC, &time_start);
@@ -72,8 +58,8 @@ main(int argc, char** argv)
while (current_off != length)
{
size_t remaining = length - current_off;
int retval = decode(code + current_off, remaining, mode, 0x1234000,
&instr);
int retval = fd_decode(code + current_off, remaining, mode,
0x1234000, &instr);
if (retval < 0)
goto fail;
current_off += retval;
@@ -83,7 +69,7 @@ main(int argc, char** argv)
__asm__ volatile("" : : : "memory");
char format_buffer[128];
instr_format(&instr, format_buffer);
fd_format(&instr, format_buffer, sizeof(format_buffer));
printf("%s\n", format_buffer);
if (repetitions > 1)