Files
test-repo/fadec.h
Alexis Engelke afc574503f Decode jump targets as offset if address is NULL
Addresses relative to the actual address of the instruction are decoded
as new offset operand, where the RIP has to be added to obtain the real
value. For backwards compatibility, the new behavior is only exposed if
the address of the instruction is specified as zero.
2020-03-07 14:30:07 +01:00

215 lines
8.0 KiB
C

#ifndef FD_FADEC_H_
#define FD_FADEC_H_
#include <stddef.h>
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef enum {
FD_REG_R0 = 0, FD_REG_R1, FD_REG_R2, FD_REG_R3,
FD_REG_R4, FD_REG_R5, FD_REG_R6, FD_REG_R7,
FD_REG_R8, FD_REG_R9, FD_REG_R10, FD_REG_R11,
FD_REG_R12, FD_REG_R13, FD_REG_R14, FD_REG_R15,
// Alternative names for byte registers
FD_REG_AL = 0, FD_REG_CL, FD_REG_DL, FD_REG_BL,
FD_REG_AH, FD_REG_CH, FD_REG_DH, FD_REG_BH,
// Alternative names for general purpose registers
FD_REG_AX = 0, FD_REG_CX, FD_REG_DX, FD_REG_BX,
FD_REG_SP, FD_REG_BP, FD_REG_SI, FD_REG_DI,
// FD_REG_IP can only be accessed in long mode (64-bit)
FD_REG_IP = 0x10,
// Segment register values
FD_REG_ES = 0, FD_REG_CS, FD_REG_SS, FD_REG_DS, FD_REG_FS, FD_REG_GS,
// No register specified
FD_REG_NONE = 0x3f
} FdReg;
typedef enum {
#define FD_DECODE_TABLE_MNEMONICS
#define FD_MNEMONIC(name,value) FDI_ ## name = value,
#include <fadec-decode-table.inc>
#undef FD_DECODE_TABLE_MNEMONICS
#undef FD_MNEMONIC
} FdInstrType;
/** Internal use only. **/
enum {
FD_FLAG_LOCK = 1 << 0,
FD_FLAG_REP = 1 << 1,
FD_FLAG_REPNZ = 1 << 2,
FD_FLAG_REX = 1 << 3,
FD_FLAG_64 = 1 << 7,
};
typedef enum {
FD_OT_NONE = 0,
FD_OT_REG = 1,
FD_OT_IMM = 2,
FD_OT_MEM = 3,
FD_OT_OFF = 4,
} FdOpType;
typedef enum {
/** Register type is encoded in mnemonic **/
FD_RT_IMP = 0,
/** Low general purpose register **/
FD_RT_GPL = 1,
/** High-byte general purpose register **/
FD_RT_GPH = 2,
/** Segment register **/
FD_RT_SEG = 3,
/** FPU register ST(n) **/
FD_RT_FPU = 4,
/** MMX register MMn **/
FD_RT_MMX = 5,
/** Vector (SSE/AVX) register XMMn/YMMn/ZMMn **/
FD_RT_VEC = 6,
/** Vector mask (AVX-512) register Kn **/
FD_RT_MASK = 7,
/** Bound register BNDn **/
FD_RT_BND = 8,
/** Control Register CRn **/
FD_RT_CR = 9,
/** Debug Register DRn **/
FD_RT_DR = 10,
/** Must be a memory operand **/
FD_RT_MEM = 15,
} FdRegType;
typedef struct {
uint8_t type;
uint8_t size;
int8_t reg;
uint8_t misc;
} FdOp;
typedef struct {
uint16_t type;
uint8_t flags;
uint8_t segment;
uint8_t addrsz;
uint8_t operandsz;
FdOp operands[4];
uint8_t idx_reg;
uint8_t idx_scale;
uint8_t size;
intptr_t disp;
intptr_t imm;
uintptr_t address;
} FdInstr;
typedef enum {
FD_ERR_UD = -1,
FD_ERR_INTERNAL = -2,
FD_ERR_PARTIAL = -3,
} FdErr;
/** Decode an instruction.
* \param buf Buffer for instruction bytes.
* \param len Length of the buffer (in bytes). An instruction is not longer than
* 15 bytes on all x86 architectures.
* \param mode Decoding mode, either 32 for protected/compatibility mode or 64
* for long mode. 16-bit mode is not supported.
* \param address Virtual address where the decoded instruction. This is used
* for computing jump targets and segment-offset-relative memory
* operations (MOV with moffs* encoding) and stored in the instruction.
* \param out_instr Pointer to the instruction buffer. Note that this may get
* partially written even if an error is returned.
* \return The number of bytes consumed by the instruction, or a negative number
* indicating an error.
**/
int fd_decode(const uint8_t* buf, size_t len, int mode, uintptr_t address,
FdInstr* out_instr);
/** Format an instruction to a string.
* \param instr The instruction.
* \param buf The buffer to hold the formatted string.
* \param len The length of the buffer.
**/
void fd_format(const FdInstr* instr, char* buf, size_t len);
/** Gets the type/mnemonic of the instruction. **/
#define FD_TYPE(instr) ((FdInstrType) (instr)->type)
/** Gets the address of the instruction. **/
#define FD_ADDRESS(instr) ((instr)->address)
/** Gets the size of the instruction in bytes. **/
#define FD_SIZE(instr) ((instr)->size)
/** Gets the specified segment override, or FD_REG_NONE for default segment. **/
#define FD_SEGMENT(instr) ((FdReg) (instr)->segment)
/** Gets the address size attribute of the instruction in bytes. **/
#define FD_ADDRSIZE(instr) ((instr)->addrsz)
/** Gets the operation width in bytes of the instruction if this is not encoded
* in the operands, for example for the string instruction (e.g. MOVS). **/
#define FD_OPSIZE(instr) ((instr)->operandsz)
/** Indicates whether the instruction was encoded with a REP prefix. Needed for:
* (1) Handling the instructions MOVS, STOS, LODS, INS and OUTS properly.
* (2) Handling the instructions SCAS and CMPS, for which this means REPZ. **/
#define FD_HAS_REP(instr) ((instr)->flags & FD_FLAG_REP)
/** Indicates whether the instruction was encoded with a REPNZ prefix. **/
#define FD_HAS_REPNZ(instr) ((instr)->flags & FD_FLAG_REPNZ)
/** Indicates whether the instruction was encoded with a LOCK prefix. Note that
* it is not checked whether the LOCK prefix is valid for the instruction. **/
#define FD_HAS_LOCK(instr) ((instr)->flags & FD_FLAG_LOCK)
#define FD_IS64(instr) ((instr)->flags & FD_FLAG_64)
/** Gets the type of an operand at the given index. **/
#define FD_OP_TYPE(instr,idx) ((FdOpType) (instr)->operands[idx].type)
/** Gets the size in bytes of an operand. However, there are a few exceptions:
* (1) For some register types, e.g., segment registers, or x87 registers, the
* size is zero. (This allows some simplifications internally.)
* (2) On some vector instructions this may be only an approximation of the
* actually needed operand size (that is, an instruction may/must only use
* a smaller part than specified here). The real operand size is always
* fully recoverable in combination with the instruction type. **/
#define FD_OP_SIZE(instr,idx) ((instr)->operands[idx].size)
/** Gets the accessed register index of a register operand. Note that /only/ the
* index is returned, no further interpretation of the index (which depends on
* the instruction type) is done. The register type can be fetches using
* FD_OP_REG_TYPE, e.g. for distinguishing high-byte registers.
* Only valid if FD_OP_TYPE == FD_OT_REG **/
#define FD_OP_REG(instr,idx) ((FdReg) (instr)->operands[idx].reg)
/** Gets the type of the accessed register.
* Only valid if FD_OP_TYPE == FD_OT_REG **/
#define FD_OP_REG_TYPE(instr,idx) ((FdRegType) (instr)->operands[idx].misc)
/** DEPRECATED: use FD_OP_REG_TYPE() == FD_RT_GPH instead.
* Returns whether the accessed register is a high-byte register. In that case,
* the register index has to be decreased by 4.
* Only valid if FD_OP_TYPE == FD_OT_REG **/
#define FD_OP_REG_HIGH(instr,idx) (FD_OP_REG_TYPE(instr,idx) == FD_RT_GPH)
/** Gets the index of the base register from a memory operand, or FD_REG_NONE,
* if the memory operand has no base register. This is the only case where the
* 64-bit register RIP can be returned, in which case the operand also has no
* scaled index register.
* Only valid if FD_OP_TYPE == FD_OT_MEM **/
#define FD_OP_BASE(instr,idx) ((FdReg) (instr)->operands[idx].reg)
/** Gets the index of the index register from a memory operand, or FD_REG_NONE,
* if the memory operand has no scaled index register.
* Only valid if FD_OP_TYPE == FD_OT_MEM **/
#define FD_OP_INDEX(instr,idx) ((FdReg) (instr)->idx_reg)
/** Gets the scale of the index register from a memory operand when existent.
* This does /not/ return the scale in an absolute value but returns the amount
* of bits the index register is shifted to the left (i.e. the value in in the
* range 0-3). The actual scale can be computed easily using 1<<FD_OP_SCALE.
* Only valid if FD_OP_TYPE == FD_OT_MEM and FD_OP_INDEX != FD_REG_NONE **/
#define FD_OP_SCALE(instr,idx) ((instr)->idx_scale)
/** Gets the sign-extended displacement of a memory operand.
* Only valid if FD_OP_TYPE == FD_OT_MEM **/
#define FD_OP_DISP(instr,idx) ((instr)->disp)
/** Gets the (sign-extended) encoded constant for an immediate operand.
* Only valid if FD_OP_TYPE == FD_OT_IMM **/
#define FD_OP_IMM(instr,idx) ((instr)->imm)
#ifdef __cplusplus
}
#endif
#endif