Files
fadec/fadec.h
Alexis Engelke 0a36604c81 decode: Change REP flag values
The new values allow for a more optimizable computation of the required
flags from the decoded prefix.
2023-04-23 08:57:08 +02:00

285 lines
11 KiB
C

#ifndef FD_FADEC_H_
#define FD_FADEC_H_
#include <stddef.h>
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef enum {
FD_REG_R0 = 0, FD_REG_R1, FD_REG_R2, FD_REG_R3,
FD_REG_R4, FD_REG_R5, FD_REG_R6, FD_REG_R7,
FD_REG_R8, FD_REG_R9, FD_REG_R10, FD_REG_R11,
FD_REG_R12, FD_REG_R13, FD_REG_R14, FD_REG_R15,
// Alternative names for byte registers
FD_REG_AL = 0, FD_REG_CL, FD_REG_DL, FD_REG_BL,
FD_REG_AH, FD_REG_CH, FD_REG_DH, FD_REG_BH,
// Alternative names for general purpose registers
FD_REG_AX = 0, FD_REG_CX, FD_REG_DX, FD_REG_BX,
FD_REG_SP, FD_REG_BP, FD_REG_SI, FD_REG_DI,
// FD_REG_IP can only be accessed in long mode (64-bit)
FD_REG_IP = 0x10,
// Segment register values
FD_REG_ES = 0, FD_REG_CS, FD_REG_SS, FD_REG_DS, FD_REG_FS, FD_REG_GS,
// No register specified
FD_REG_NONE = 0x3f
} FdReg;
typedef enum {
#define FD_MNEMONIC(name,value) FDI_ ## name = value,
#include <fadec-decode-public.inc>
#undef FD_MNEMONIC
} FdInstrType;
/** Internal use only. **/
enum {
FD_FLAG_LOCK = 1 << 0,
FD_FLAG_REP = 1 << 2,
FD_FLAG_REPNZ = 1 << 1,
FD_FLAG_64 = 1 << 7,
};
/** Operand types. **/
typedef enum {
FD_OT_NONE = 0,
FD_OT_REG = 1,
FD_OT_IMM = 2,
FD_OT_MEM = 3,
FD_OT_OFF = 4,
FD_OT_MEMBCST = 5,
} FdOpType;
typedef enum {
/** Vector (SSE/AVX) register XMMn/YMMn/ZMMn **/
FD_RT_VEC = 0,
/** Low general purpose register **/
FD_RT_GPL = 1,
/** High-byte general purpose register **/
FD_RT_GPH = 2,
/** Segment register **/
FD_RT_SEG = 3,
/** FPU register ST(n) **/
FD_RT_FPU = 4,
/** MMX register MMn **/
FD_RT_MMX = 5,
/** Vector mask (AVX-512) register Kn **/
FD_RT_MASK = 7,
/** Bound register BNDn **/
FD_RT_BND = 8,
/** Control Register CRn **/
FD_RT_CR = 9,
/** Debug Register DRn **/
FD_RT_DR = 10,
/** Must be a memory operand **/
FD_RT_MEM = 15,
} FdRegType;
/** Do not depend on the actual enum values. **/
typedef enum {
/** Round to nearest (even) **/
FD_RC_RN = 1,
/** Round down **/
FD_RC_RD = 3,
/** Round up **/
FD_RC_RU = 5,
/** Round to zero (truncate) **/
FD_RC_RZ = 7,
/** Rounding mode as specified in MXCSR **/
FD_RC_MXCSR = 0,
/** Rounding mode irrelevant, but SAE **/
FD_RC_SAE = 6,
} FdRoundControl;
/** Internal use only. **/
typedef struct {
uint8_t type;
uint8_t size;
uint8_t reg;
uint8_t misc;
} FdOp;
/** Never(!) access struct fields directly. Use the macros defined below. **/
typedef struct {
uint16_t type;
uint8_t flags;
uint8_t segment;
uint8_t addrsz;
uint8_t operandsz;
uint8_t size;
uint8_t evex;
FdOp operands[4];
int64_t disp;
int64_t imm;
uint64_t address;
} FdInstr;
typedef enum {
FD_ERR_UD = -1,
FD_ERR_INTERNAL = -2,
FD_ERR_PARTIAL = -3,
} FdErr;
/** Decode an instruction.
* \param buf Buffer for instruction bytes.
* \param len Length of the buffer (in bytes). An instruction is not longer than
* 15 bytes on all x86 architectures.
* \param mode Decoding mode, either 32 for protected/compatibility mode or 64
* for long mode. 16-bit mode is not supported.
* \param address Virtual address where the decoded instruction. This is used
* for computing jump targets. If "0" is passed, operands which require
* adding EIP/RIP will be stored as FD_OT_OFF operands.
* DEPRECATED: Strongly prefer passing 0 and using FD_OT_OFF operands.
* \param out_instr Pointer to the instruction buffer. Note that this may get
* partially written even if an error is returned.
* \return The number of bytes consumed by the instruction, or a negative number
* indicating an error.
**/
int fd_decode(const uint8_t* buf, size_t len, int mode, uintptr_t address,
FdInstr* out_instr);
/** Format an instruction to a string.
* \param instr The instruction.
* \param buf The buffer to hold the formatted string.
* \param len The length of the buffer.
**/
void fd_format(const FdInstr* instr, char* buf, size_t len);
/** Format an instruction to a string.
* NOTE: API stability is currently not guaranteed for this function; its name
* and/or signature may change in future.
*
* \param instr The instruction.
* \param addr The base address to use for printing FD_OT_OFF operands.
* \param buf The buffer to hold the formatted string.
* \param len The length of the buffer.
**/
void fd_format_abs(const FdInstr* instr, uint64_t addr, char* buf, size_t len);
/** Get the stringified name of an instruction type.
* NOTE: API stability is currently not guaranteed for this function; changes
* to the signature and/or the returned string can be expected. E.g., a future
* version may take an extra parameter for the instruction operand size; or may
* take a complete decoded instruction as first parameter and return the
* mnemonic returned by fd_format.
*
* \param ty An instruction type
* \return The instruction type as string, or "(invalid)".
**/
const char* fdi_name(FdInstrType ty);
/** Gets the type/mnemonic of the instruction.
* ABI STABILITY NOTE: different versions or builds of the library may use
* different values. When linking as shared library, any interpretation of this
* value is meaningless; in such cases use fdi_name.
*
* API STABILITY NOTE: a future version of this library may decode string
* instructions prefixed with REP/REPNZ and instructions prefixed with LOCK as
* separate instruction types. **/
#define FD_TYPE(instr) ((FdInstrType) (instr)->type)
/** DEPRECATED: This functionality is obsolete in favor of FD_OT_OFF.
* Gets the address of the instruction. Invalid if decoded address == 0. **/
#define FD_ADDRESS(instr) ((instr)->address)
/** Gets the size of the instruction in bytes. **/
#define FD_SIZE(instr) ((instr)->size)
/** Gets the specified segment override, or FD_REG_NONE for default segment. **/
#define FD_SEGMENT(instr) ((FdReg) (instr)->segment & 0x3f)
/** Gets the address size attribute of the instruction in bytes. **/
#define FD_ADDRSIZE(instr) (1 << (instr)->addrsz)
/** Get the logarithmic address size; FD_ADDRSIZE == 1 << FD_ADDRSIZELG **/
#define FD_ADDRSIZELG(instr) ((instr)->addrsz)
/** Gets the operation width in bytes of the instruction if this is not encoded
* in the operands, for example for the string instruction (e.g. MOVS). **/
#define FD_OPSIZE(instr) (1 << (instr)->operandsz)
/** Get the logarithmic operand size; FD_OPSIZE == 1 << FD_OPSIZELG iff
* FD_OPSIZE is valid. **/
#define FD_OPSIZELG(instr) ((instr)->operandsz)
/** Indicates whether the instruction was encoded with a REP prefix. Needed for:
* (1) Handling the instructions MOVS, STOS, LODS, INS and OUTS properly.
* (2) Handling the instructions SCAS and CMPS, for which this means REPZ. **/
#define FD_HAS_REP(instr) ((instr)->flags & FD_FLAG_REP)
/** Indicates whether the instruction was encoded with a REPNZ prefix. **/
#define FD_HAS_REPNZ(instr) ((instr)->flags & FD_FLAG_REPNZ)
/** Indicates whether the instruction was encoded with a LOCK prefix. **/
#define FD_HAS_LOCK(instr) ((instr)->flags & FD_FLAG_LOCK)
/** Do not use. **/
#define FD_IS64(instr) ((instr)->flags & FD_FLAG_64)
/** Gets the type of an operand at the given index. **/
#define FD_OP_TYPE(instr,idx) ((FdOpType) (instr)->operands[idx].type)
/** Gets the size in bytes of an operand. However, there are a few exceptions:
* (1) For some register types, e.g., segment registers, or x87 registers, the
* size is zero. (This allows some simplifications internally.)
* (2) On some vector instructions this may be only an approximation of the
* actually needed operand size (that is, an instruction may/must only use
* a smaller part than specified here). The real operand size is always
* fully recoverable in combination with the instruction type. **/
#define FD_OP_SIZE(instr,idx) (1 << (instr)->operands[idx].size >> 1)
/** Get the logarithmic size of an operand; see FD_OP_SIZE for special cases.
* The following equality holds: FD_OP_SIZE == 1 << (FD_OP_SIZELG + 1) >> 1
* Note that typically FD_OP_SIZE == 1 << FD_OP_SIZELG unless a zero-sized
* memory operand, FPU register, or mask register is involved. **/
#define FD_OP_SIZELG(instr,idx) ((instr)->operands[idx].size - 1)
/** Gets the accessed register index of a register operand. Note that /only/ the
* index is returned, no further interpretation of the index (which depends on
* the instruction type) is done. The register type can be fetched using
* FD_OP_REG_TYPE, e.g. for distinguishing high-byte registers.
* Only valid if FD_OP_TYPE == FD_OT_REG **/
#define FD_OP_REG(instr,idx) ((FdReg) (instr)->operands[idx].reg)
/** Gets the type of the accessed register.
* Only valid if FD_OP_TYPE == FD_OT_REG **/
#define FD_OP_REG_TYPE(instr,idx) ((FdRegType) (instr)->operands[idx].misc)
/** DEPRECATED: use FD_OP_REG_TYPE() == FD_RT_GPH instead.
* Returns whether the accessed register is a high-byte register. In that case,
* the register index has to be decreased by 4.
* Only valid if FD_OP_TYPE == FD_OT_REG **/
#define FD_OP_REG_HIGH(instr,idx) (FD_OP_REG_TYPE(instr,idx) == FD_RT_GPH)
/** Gets the index of the base register from a memory operand, or FD_REG_NONE,
* if the memory operand has no base register. This is the only case where the
* 64-bit register RIP can be returned, in which case the operand also has no
* scaled index register.
* Only valid if FD_OP_TYPE == FD_OT_MEM/MEMBCST **/
#define FD_OP_BASE(instr,idx) ((FdReg) (instr)->operands[idx].reg)
/** Gets the index of the index register from a memory operand, or FD_REG_NONE,
* if the memory operand has no scaled index register.
* Only valid if FD_OP_TYPE == FD_OT_MEM/MEMBCST **/
#define FD_OP_INDEX(instr,idx) ((FdReg) (instr)->operands[idx].misc & 0x3f)
/** Gets the scale of the index register from a memory operand when existent.
* This does /not/ return the scale in an absolute value but returns the amount
* of bits the index register is shifted to the left (i.e. the value in in the
* range 0-3). The actual scale can be computed easily using 1<<FD_OP_SCALE.
* Only valid if FD_OP_TYPE == FD_OT_MEM/MEMBCST and FD_OP_INDEX != NONE **/
#define FD_OP_SCALE(instr,idx) ((instr)->operands[idx].misc >> 6)
/** Gets the sign-extended displacement of a memory operand.
* Only valid if FD_OP_TYPE == FD_OT_MEM/MEMBCST **/
#define FD_OP_DISP(instr,idx) ((int64_t) (instr)->disp)
/** Get memory broadcast size in bytes.
* Only valid if FD_OP_TYPE == FD_OT_MEMBCST **/
#define FD_OP_BCSTSZ(instr,idx) (1 << FD_OP_BCSTSZLG(instr,idx))
/** Get logarithmic memory broadcast size (1 = 2-byte; 2=4-byte; 3=8-byte).
* Only valid if FD_OP_TYPE == FD_OT_MEMBCST **/
#define FD_OP_BCSTSZLG(instr,idx) ((instr)->segment >> 6)
/** Gets the (sign-extended) encoded constant for an immediate operand.
* Only valid if FD_OP_TYPE == FD_OT_IMM or FD_OP_TYPE == FD_OT_OFF **/
#define FD_OP_IMM(instr,idx) ((instr)->imm)
/** Get the opmask register for EVEX-encoded instructions; 0 for no mask. **/
#define FD_MASKREG(instr) ((instr)->evex & 0x07)
/** Get whether zero masking shall be used. Only valid if FD_MASKREG != 0. **/
#define FD_MASKZERO(instr) ((instr)->evex & 0x80)
/** Get rounding mode for EVEX-encoded instructions. See FdRoundControl. **/
#define FD_ROUNDCONTROL(instr) ((FdRoundControl) (((instr)->evex & 0x70) >> 4))
#ifdef __cplusplus
}
#endif
#endif