From 3abf29d63ef55a39f5f6ec32720af229f1f74f3e Mon Sep 17 00:00:00 2001 From: Alexis Engelke Date: Wed, 23 Jan 2019 20:03:40 +0100 Subject: [PATCH] Major rework of API and improved documentation --- decode.c | 138 ++++++++++++++++++++----------------- decode.h | 138 ------------------------------------- fadec.h | 182 +++++++++++++++++++++++++++++++++++++++++++++++++ format.c | 97 +++++++++++++------------- parseinstrs.py | 12 ++-- tests/driver.c | 26 ++----- 6 files changed, 320 insertions(+), 273 deletions(-) delete mode 100644 decode.h create mode 100644 fadec.h diff --git a/decode.c b/decode.c index 93e54b7..e8e294a 100644 --- a/decode.c +++ b/decode.c @@ -2,7 +2,7 @@ #include #include -#include +#include #if defined(ARCH_X86_64) && __SIZEOF_POINTER__ < 8 @@ -13,21 +13,27 @@ #define UNLIKELY(x) __builtin_expect((x), 0) #if defined(ARCH_386) -#define DECODE_TABLE_DATA_32 +#define FD_DECODE_TABLE_DATA_32 static const uint8_t _decode_table32[] = { #include }; -#undef DECODE_TABLE_DATA_32 +#undef FD_DECODE_TABLE_DATA_32 #endif #if defined(ARCH_X86_64) -#define DECODE_TABLE_DATA_64 +#define FD_DECODE_TABLE_DATA_64 static const uint8_t _decode_table64[] = { #include }; -#undef DECODE_TABLE_DATA_64 +#undef FD_DECODE_TABLE_DATA_64 #endif +enum DecodeMode { + DECODE_64 = 0, + DECODE_32 = 1, +}; + +typedef enum DecodeMode DecodeMode; #define ENTRY_NONE 0 #define ENTRY_INSTR 1 @@ -72,11 +78,11 @@ static const uint8_t _decode_table64[] = { enum PrefixSet { - PREFIX_LOCK = INSTR_FLAG_LOCK, - PREFIX_REP = INSTR_FLAG_REP, - PREFIX_REPNZ = INSTR_FLAG_REPNZ, - PREFIX_REX = INSTR_FLAG_REX, - PREFIX_VEXL = INSTR_FLAG_VEXL, + PREFIX_LOCK = FD_FLAG_LOCK, + PREFIX_REP = FD_FLAG_REP, + PREFIX_REPNZ = FD_FLAG_REPNZ, + PREFIX_REX = FD_FLAG_REX, + PREFIX_VEXL = FD_FLAG_VEXL, PREFIX_OPSZ = 1 << 13, PREFIX_ADDRSZ = 1 << 14, PREFIX_REXB = 1 << 15, @@ -104,7 +110,7 @@ decode_prefixes(const uint8_t* buffer, int len, DecodeMode mode, uint8_t rep = 0; *out_mandatory = 0; - *out_segment = RI_NONE; + *out_segment = FD_REG_NONE; while (LIKELY(off < len)) { @@ -113,11 +119,11 @@ decode_prefixes(const uint8_t* buffer, int len, DecodeMode mode, { default: goto out; // From segment overrides, the last one wins. - case 0x26: *out_segment = RI_ES; off++; break; - case 0x2e: *out_segment = RI_CS; off++; break; - case 0x3e: *out_segment = RI_DS; off++; break; - case 0x64: *out_segment = RI_FS; off++; break; - case 0x65: *out_segment = RI_GS; off++; break; + case 0x26: *out_segment = FD_REG_ES; off++; break; + case 0x2e: *out_segment = FD_REG_CS; off++; break; + case 0x3e: *out_segment = FD_REG_DS; off++; break; + case 0x64: *out_segment = FD_REG_FS; off++; break; + case 0x65: *out_segment = FD_REG_GS; off++; break; case 0x67: prefixes |= PREFIX_ADDRSZ; off++; break; case 0xf0: prefixes |= PREFIX_LOCK; off++; break; case 0x66: prefixes |= PREFIX_OPSZ; off++; break; @@ -201,8 +207,8 @@ out: static int -decode_modrm(const uint8_t* buffer, int len, DecodeMode mode, Instr* instr, - PrefixSet prefixes, struct Operand* out_o1, struct Operand* out_o2) +decode_modrm(const uint8_t* buffer, int len, DecodeMode mode, FdInstr* instr, + PrefixSet prefixes, FdOp* out_o1, FdOp* out_o2) { int off = 0; @@ -223,7 +229,7 @@ decode_modrm(const uint8_t* buffer, int len, DecodeMode mode, Instr* instr, #if defined(ARCH_X86_64) reg_idx += prefixes & PREFIX_REXR ? 8 : 0; #endif - out_o2->type = OT_REG; + out_o2->type = FD_OP_REG; out_o2->reg = reg_idx; } @@ -233,7 +239,7 @@ decode_modrm(const uint8_t* buffer, int len, DecodeMode mode, Instr* instr, #if defined(ARCH_X86_64) reg_idx += prefixes & PREFIX_REXB ? 8 : 0; #endif - out_o1->type = OT_REG; + out_o1->type = FD_OP_REG; out_o1->reg = reg_idx; return off; } @@ -250,7 +256,7 @@ decode_modrm(const uint8_t* buffer, int len, DecodeMode mode, Instr* instr, } uint8_t sib = buffer[off++]; - scale = ((sib & 0xc0) >> 6) + 1; + scale = (sib & 0xc0) >> 6; idx = (sib & 0x38) >> 3; #if defined(ARCH_X86_64) idx += prefixes & PREFIX_REXX ? 8 : 0; @@ -283,19 +289,20 @@ decode_modrm(const uint8_t* buffer, int len, DecodeMode mode, Instr* instr, instr->disp = 0; } - out_o1->type = OT_MEM; - instr->scale = scale; + out_o1->type = FD_OP_MEM; + instr->idx_scale = scale; - if (scale == 0) + // If there was no SIB byte. + if (rm != 4) { if (mod == 0 && rm == 5) { #if defined(ARCH_X86_64) if (mode == DECODE_64) - out_o1->reg = RI_IP; + out_o1->reg = FD_REG_IP; else #endif - out_o1->reg = REG_NONE; + out_o1->reg = FD_REG_NONE; return off; } @@ -304,21 +311,22 @@ decode_modrm(const uint8_t* buffer, int len, DecodeMode mode, Instr* instr, reg_idx += prefixes & PREFIX_REXB ? 8 : 0; #endif out_o1->reg = reg_idx; + instr->idx_reg = FD_REG_NONE; return off; } if (idx == 4) { - instr->scale = 0; + instr->idx_reg = FD_REG_NONE; } else { - instr->sreg = idx; + instr->idx_reg = idx; } if (base == 5 && mod == 0) { - out_o1->reg = REG_NONE; + out_o1->reg = FD_REG_NONE; } else { @@ -358,11 +366,15 @@ struct InstrDesc #define DESC_IMM_BYTE(desc) (((desc)->immediate >> 7) & 1) int -decode(const uint8_t* buffer, int len, DecodeMode mode, uintptr_t address, - Instr* instr) +fd_decode(const uint8_t* buffer, size_t len_sz, int mode_int, uintptr_t address, + FdInstr* instr) { const uint8_t* decode_table = NULL; + int len = len_sz > 15 ? 15 : len_sz; + DecodeMode mode = mode_int == 32 ? DECODE_32 : + mode_int == 64 ? DECODE_64 : -1; + // Ensure that we can actually handle the decode request #if defined(ARCH_386) if (mode == DECODE_32) @@ -453,7 +465,7 @@ decode(const uint8_t* buffer, int len, DecodeMode mode, uintptr_t address, instr->type = desc->type; instr->flags = prefixes & 0x7f; if (mode == DECODE_64) - instr->flags |= INSTR_FLAG_64; + instr->flags |= FD_FLAG_64; instr->address = address; uint8_t op_size = 0; @@ -468,7 +480,7 @@ decode(const uint8_t* buffer, int len, DecodeMode mode, uintptr_t address, else op_size = 4; - instr->op_size = desc->gp_instr_width ? op_size : 0; + instr->operandsz = desc->gp_instr_width ? op_size : 0; uint8_t vec_size = 16; if (prefixes & PREFIX_VEXL) @@ -480,7 +492,7 @@ decode(const uint8_t* buffer, int len, DecodeMode mode, uintptr_t address, uint8_t addr_size = mode == DECODE_64 ? 8 : 4; if (prefixes & PREFIX_ADDRSZ) addr_size >>= 1; - instr->addr_size = addr_size; + instr->addrsz = addr_size; uint8_t operand_sizes[4] = { 0, 1 << desc->gp_fixed_operand_size, op_size, vec_size @@ -495,16 +507,16 @@ decode(const uint8_t* buffer, int len, DecodeMode mode, uintptr_t address, if (DESC_HAS_IMPLICIT(desc)) { - struct Operand* operand = &instr->operands[DESC_IMPLICIT_IDX(desc)]; - operand->type = OT_REG; + FdOp* operand = &instr->operands[DESC_IMPLICIT_IDX(desc)]; + operand->type = FD_OP_REG; operand->reg = 0; } if (DESC_HAS_MODRM(desc)) { - struct Operand* operand1 = &instr->operands[DESC_MODRM_IDX(desc)]; + FdOp* operand1 = &instr->operands[DESC_MODRM_IDX(desc)]; - struct Operand* operand2 = NULL; + FdOp* operand2 = NULL; if (DESC_HAS_MODREG(desc)) { operand2 = &instr->operands[DESC_MODREG_IDX(desc)]; @@ -522,37 +534,37 @@ decode(const uint8_t* buffer, int len, DecodeMode mode, uintptr_t address, else if (DESC_HAS_MODREG(desc)) { // If there is no ModRM, but a Mod-Reg, its opcode-encoded. - struct Operand* operand = &instr->operands[DESC_MODREG_IDX(desc)]; + FdOp* operand = &instr->operands[DESC_MODREG_IDX(desc)]; uint8_t reg_idx = buffer[off - 1] & 7; #if defined(ARCH_X86_64) reg_idx += prefixes & PREFIX_REXB ? 8 : 0; #endif - operand->type = OT_REG; + operand->type = FD_OP_REG; operand->reg = reg_idx; } if (UNLIKELY(DESC_HAS_VEXREG(desc))) { - struct Operand* operand = &instr->operands[DESC_VEXREG_IDX(desc)]; - operand->type = OT_REG; + FdOp* operand = &instr->operands[DESC_VEXREG_IDX(desc)]; + operand->type = FD_OP_REG; operand->reg = vex_operand; } uint32_t imm_control = DESC_IMM_CONTROL(desc); if (imm_control == 1) { - struct Operand* operand = &instr->operands[DESC_IMM_IDX(desc)]; - operand->type = OT_IMM; + FdOp* operand = &instr->operands[DESC_IMM_IDX(desc)]; + operand->type = FD_OP_IMM; operand->size = 1; - instr->immediate = 1; + instr->imm = 1; } else if (imm_control == 2) { - struct Operand* operand = &instr->operands[DESC_IMM_IDX(desc)]; - operand->type = OT_MEM; - operand->reg = REG_NONE; + FdOp* operand = &instr->operands[DESC_IMM_IDX(desc)]; + operand->type = FD_OP_MEM; + operand->reg = FD_REG_NONE; operand->size = op_size; - instr->scale = 0; + instr->idx_reg = FD_REG_NONE; if (UNLIKELY(off + addr_size > len)) return -1; @@ -570,18 +582,18 @@ decode(const uint8_t* buffer, int len, DecodeMode mode, uintptr_t address, } else if (imm_control != 0) { - struct Operand* operand = &instr->operands[DESC_IMM_IDX(desc)]; + FdOp* operand = &instr->operands[DESC_IMM_IDX(desc)]; uint8_t imm_size; if (DESC_IMM_BYTE(desc)) { imm_size = 1; } - else if (UNLIKELY(instr->type == IT_RET_IMM)) + else if (UNLIKELY(instr->type == FDI_RET_IMM)) { imm_size = 2; } - else if (UNLIKELY(instr->type == IT_ENTER)) + else if (UNLIKELY(instr->type == FDI_ENTER)) { imm_size = 3; } @@ -599,7 +611,7 @@ decode(const uint8_t* buffer, int len, DecodeMode mode, uintptr_t address, } #if defined(ARCH_X86_64) else if (mode == DECODE_64 && (prefixes & PREFIX_REXW) && - instr->type == IT_MOVABS_IMM) + instr->type == FDI_MOVABS_IMM) { imm_size = 8; } @@ -616,42 +628,42 @@ decode(const uint8_t* buffer, int len, DecodeMode mode, uintptr_t address, if (imm_size == 1) { - instr->immediate = (int8_t) LOAD_LE_1(&buffer[off]); + instr->imm = (int8_t) LOAD_LE_1(&buffer[off]); } else if (imm_size == 2) { - instr->immediate = (int16_t) LOAD_LE_2(&buffer[off]); + instr->imm = (int16_t) LOAD_LE_2(&buffer[off]); } else if (imm_size == 3) { - instr->immediate = LOAD_LE_2(&buffer[off]); - instr->immediate |= LOAD_LE_1(&buffer[off + 2]) << 16; + instr->imm = LOAD_LE_2(&buffer[off]); + instr->imm |= LOAD_LE_1(&buffer[off + 2]) << 16; } else if (imm_size == 4) { - instr->immediate = (int32_t) LOAD_LE_4(&buffer[off]); + instr->imm = (int32_t) LOAD_LE_4(&buffer[off]); } #if defined(ARCH_X86_64) else if (imm_size == 8) { - instr->immediate = (int64_t) LOAD_LE_8(&buffer[off]); + instr->imm = (int64_t) LOAD_LE_8(&buffer[off]); } #endif off += imm_size; if (imm_control == 4) { - instr->immediate += instr->address + off; + instr->imm += instr->address + off; } if (UNLIKELY(imm_control == 5)) { - operand->type = OT_REG; - operand->reg = (instr->immediate & 0xf0) >> 4; + operand->type = FD_OP_REG; + operand->reg = (instr->imm & 0xf0) >> 4; } else { - operand->type = OT_IMM; + operand->type = FD_OP_IMM; } } diff --git a/decode.h b/decode.h deleted file mode 100644 index d92e749..0000000 --- a/decode.h +++ /dev/null @@ -1,138 +0,0 @@ - -#ifndef ARMX86_DECODE_H -#define ARMX86_DECODE_H - -#include -#include - -#ifndef ssize_t -#define ssize_t intptr_t -#endif - -#define DECODE_TABLE_MNEMONICS -#define MNEMONIC(name,value) IT_ ## name = value, -enum -{ -#include -}; -#undef DECODE_TABLE_MNEMONICS -#undef MNEMONIC - -enum DecodeMode { - DECODE_64 = 0, - DECODE_32 = 1, -}; - -typedef enum DecodeMode DecodeMode; - -enum RegIndex { - RI_AL = 0, - RI_CL, - RI_DL, - RI_BL, - RI_AH, - RI_CH, - RI_DH, - RI_BH, - - RI_AX = 0, - RI_CX, - RI_DX, - RI_BX, - RI_SP, - RI_BP, - RI_SI, - RI_DI, - RI_R8, - RI_R9, - RI_R10, - RI_R11, - RI_R12, - RI_R13, - RI_R14, - RI_R15, - - // EIP cannot be encoded in Protected/Compatibility Mode - RI_IP = 0x10, - - RI_ES = 0, - RI_CS, - RI_SS, - RI_DS, - RI_FS, - RI_GS, - - // No register specified - RI_NONE = 0x3f -}; - -typedef uint8_t Reg; - -#define reg_index(reg) (reg) -#define reg_is_none(reg) ((reg) == REG_NONE) -#define REG_NONE RI_NONE - -enum -{ - INSTR_FLAG_LOCK = 1 << 0, - INSTR_FLAG_REP = 1 << 1, - INSTR_FLAG_REPNZ = 1 << 2, - INSTR_FLAG_REX = 1 << 3, - INSTR_FLAG_VEXL = 1 << 4, - INSTR_FLAG_64 = 1 << 7, -}; - -enum OperandType -{ - OT_NONE = 0, - OT_REG = 1, - OT_IMM = 2, - OT_MEM = 3, -}; - -struct Operand -{ - uint8_t type : 2; - uint8_t reg : 6; - uint8_t size; -}; - -struct Instr -{ - uint16_t type; - struct Operand operands[4]; - uint8_t flags; - uint8_t segment; - uint8_t op_size; - uint8_t addr_size; - - /** - * Encoded as 1 << (scale - 1) **or** no scaled register at all if zero. - **/ - uint8_t scale : 3; - uint8_t sreg : 5; - - size_t immediate; - intptr_t disp; - - uintptr_t address; - uint32_t size : 4; -}; - -typedef struct Instr Instr; - -#define INSTR_SEGMENT(instr) ((instr)->segment) -#define INSTR_WIDTH(instr) ((instr)->op_size) -#define INSTR_ADDRSZ(instr) ((instr)->addr_size) -#define INSTR_IS64(instr) ((instr)->flags & INSTR_FLAG_64) -#define INSTR_HAS_REP(instr) ((instr)->flags & INSTR_FLAG_REP) -#define INSTR_HAS_REPNZ(instr) ((instr)->flags & INSTR_FLAG_REPNZ) -#define INSTR_HAS_LOCK(instr) ((instr)->flags & INSTR_FLAG_LOCK) -#define INSTR_HAS_REX(instr) ((instr)->flags & INSTR_FLAG_REX) -#define INSTR_HAS_VEXL(instr) ((instr)->flags & INSTR_FLAG_VEXL) - -int decode(const uint8_t* buffer, int len, DecodeMode mode, uintptr_t address, - Instr* out_instr); -void instr_format(const Instr* instr, char buffer[128]); - -#endif diff --git a/fadec.h b/fadec.h new file mode 100644 index 0000000..22ebdbe --- /dev/null +++ b/fadec.h @@ -0,0 +1,182 @@ + +#ifndef FD_FADEC_H_ +#define FD_FADEC_H_ + +#include +#include + +typedef enum { + FD_REG_R0 = 0, FD_REG_R1, FD_REG_R2, FD_REG_R3, + FD_REG_R4, FD_REG_R5, FD_REG_R6, FD_REG_R7, + FD_REG_R8, FD_REG_R9, FD_REG_R10, FD_REG_R11, + FD_REG_R12, FD_REG_R13, FD_REG_R14, FD_REG_R15, + // Alternative names for byte registers + FD_REG_AL = 0, FD_REG_CL, FD_REG_DL, FD_REG_BL, + FD_REG_AH, FD_REG_CH, FD_REG_DH, FD_REG_BH, + // Alternative names for general purpose registers + FD_REG_AX = 0, FD_REG_CX, FD_REG_DX, FD_REG_BX, + FD_REG_SP, FD_REG_BP, FD_REG_SI, FD_REG_DI, + // FD_REG_IP can only be accessed in long mode (64-bit) + FD_REG_IP = 0x10, + // Segment register values + FD_REG_ES = 0, FD_REG_CS, FD_REG_SS, FD_REG_DS, FD_REG_FS, FD_REG_GS, + // No register specified + FD_REG_NONE = 0x3f +} FdReg; + +typedef enum { +#define FD_DECODE_TABLE_MNEMONICS +#define FD_MNEMONIC(name,value) FDI_ ## name = value, +#include +#undef FD_DECODE_TABLE_MNEMONICS +#undef FD_MNEMONIC +} FdInstrType; + +/** Internal use only. **/ +enum { + FD_FLAG_LOCK = 1 << 0, + FD_FLAG_REP = 1 << 1, + FD_FLAG_REPNZ = 1 << 2, + FD_FLAG_REX = 1 << 3, + FD_FLAG_VEXL = 1 << 4, + FD_FLAG_64 = 1 << 7, +}; + +typedef enum { + FD_OP_NONE = 0, + FD_OP_REG = 1, + FD_OP_IMM = 2, + FD_OP_MEM = 3, +} FdOpType; + +typedef struct { + uint8_t type; + uint8_t size; + int8_t reg; +} FdOp; + +typedef struct { + uint16_t type; + uint8_t flags; + uint8_t segment; + uint8_t addrsz; + uint8_t operandsz; + FdOp operands[4]; + + uint8_t idx_reg; + uint8_t idx_scale; + uint8_t size; + intptr_t disp; + intptr_t imm; + + uintptr_t address; +} FdInstr; + + +/** Decode an instruction. + * \param buf Buffer for instruction bytes. + * \param len Length of the buffer (in bytes). An instruction is not longer than + * 15 bytes on all x86 architectures. + * \param mode Decoding mode, either 32 for protected/compatibility mode or 64 + * for long mode. 16-bit mode is not supported. + * \param address Virtual address where the decoded instruction. This is used + * for computing jump targets and segment-offset-relative memory + * operations (MOV with moffs* encoding) and stored in the instruction. + * \param out_instr Pointer to the instruction buffer. Note that this may get + * partially written even if an error is returned. + * \return The number of bytes consumed by the instruction, or a negative number + * indicating an error. + **/ +int fd_decode(const uint8_t* buf, size_t len, int mode, uintptr_t address, + FdInstr* out_instr); + +/** Format an instruction to a string. + * \param instr The instruction. + * \param buf The buffer to hold the formatted string. + * \param len The length of the buffer. + **/ +void fd_format(const FdInstr* instr, char* buf, size_t len); + + +/** Gets the type/mnemonic of the instruction. **/ +#define FD_TYPE(instr) ((FdInstrType) (instr)->type) +/** Gets the address of the instruction. **/ +#define FD_ADDRESS(instr) ((instr)->address) +/** Gets the size of the instruction in bytes. **/ +#define FD_SIZE(instr) ((instr)->size) +/** Gets the specified segment override, or FD_REG_NONE for default segment. **/ +#define FD_SEGMENT(instr) ((FdReg) (instr)->segment) +/** Gets the address size attribute of the instruction in bytes. **/ +#define FD_ADDRSIZE(instr) ((instr)->addrsz) +/** Gets the operation width in bytes of the instruction if this is not encoded + * in the operands, for example for the string instruction (e.g. MOVS). **/ +#define FD_OPSIZE(instr) ((instr)->operandsz) +/** Indicates whether the instruction was encoded with a REP prefix. Needed for: + * (1) Handling the instructions MOVS, STOS, LODS, INS and OUTS properly. + * (2) Handling the instructions SCAS and CMPS, for which this means REPZ. + * (3) Distinguishing the instructions BSF (no REP) vs. TZCNT (REP) and the + * instructions BSR (no REP) vs. LZCNT (REP). **/ +#define FD_HAS_REP(instr) ((instr)->flags & FD_FLAG_REP) +/** Indicates whether the instruction was encoded with a REP prefix. Needed for: + * (1) Handling the instructions SCAS and CMPS. + * (2) Distinguishing the instructions MOVBE (no REPNZ) vs. CRC32 (REPNZ). **/ +#define FD_HAS_REPNZ(instr) ((instr)->flags & FD_FLAG_REPNZ) +/** Indicates whether the instruction was encoded with a LOCK prefix. Note that + * it is not checked whether the LOCK prefix is valid for the instruction. **/ +#define FD_HAS_LOCK(instr) ((instr)->flags & FD_FLAG_LOCK) +/** Indicates whether the instruction was encoded with a VEX.L prefix. **/ +#define FD_HAS_VEXL(instr) ((instr)->flags & FD_FLAG_VEXL) +#define FD_IS64(instr) ((instr)->flags & FD_FLAG_64) + +/** Gets the type of an operand at the given index. **/ +#define FD_OP_TYPE(instr,idx) ((FdOpType) (instr)->operands[idx].type) +/** Gets the size in bytes of an operand. However, there are a few exceptions: + * (1) For some register types, e.g., segment registers, or x87 registers, the + * size is zero. (This allows some simplifications internally.) + * (2) On some vector instructions this may be only an approximation of the + * actually needed operand size (that is, an instruction may/must only use + * a smaller part than specified here). The real operand size is always + * fully recoverable in combination with the instruction type. **/ +#define FD_OP_SIZE(instr,idx) ((instr)->operands[idx].size) +/** Gets the accessed register index of a register operand. Note that /only/ the + * index is returned, no further interpretation of the index (which depends on + * the instruction type) is done. When an instruction accesses an 8-bit general + * purpose register with an index in the range 4-7, it needs to be determined + * explicitly whether a high-byte register is accessed (using FD_OP_REG_HIGH). + * If that is the case, the index needs to be decreased by 4. + * Only valid if FD_OP_TYPE == FD_OP_REG **/ +#define FD_OP_REG(instr,idx) ((FdReg) (instr)->operands[idx].reg) +/** Returns whether the accessed register is a actually high-byte register when + * used on a general purpose instruction. In that case, the register index has + * to be decreased by 4. + * Only valid if FD_OP_TYPE == FD_OP_REG and the operand refers to a general + * purpose register (depends on the instruction type) **/ +#define FD_OP_REG_HIGH(instr,idx) ( \ + (instr)->operands[idx].size == 1 && \ + (instr)->operands[idx].reg >= 4 && \ + ((instr)->flags & FD_FLAG_REX) == 0 \ + ) +/** Gets the index of the base register from a memory operand, or FD_REG_NONE, + * if the memory operand has no base register. This is the only case where the + * 64-bit register RIP can be returned, in which case the operand also has no + * scaled index register. + * Only valid if FD_OP_TYPE == FD_OP_MEM **/ +#define FD_OP_BASE(instr,idx) ((FdReg) (instr)->operands[idx].reg) +/** Gets the index of the index register from a memory operand, or FD_REG_NONE, + * if the memory operand has no scaled index register. + * Only valid if FD_OP_TYPE == FD_OP_MEM **/ +#define FD_OP_INDEX(instr,idx) ((FdReg) (instr)->idx_reg) +/** Gets the scale of the index register from a memory operand when existent. + * This does /not/ return the scale in an absolute value but returns the amount + * of bits the index register is shifted to the left (i.e. the value in in the + * range 0-3). The actual scale can be computed easily using 1<idx_scale) +/** Gets the sign-extended displacement of a memory operand. + * Only valid if FD_OP_TYPE == FD_OP_MEM **/ +#define FD_OP_DISP(instr,idx) ((instr)->disp) +/** Gets the (sign-extended) encoded constant for an immediate operand. + * Only valid if FD_OP_TYPE == FD_OP_IMM **/ +#define FD_OP_IMM(instr,idx) ((instr)->imm) + +#endif diff --git a/format.c b/format.c index 2ceee57..e9485d2 100644 --- a/format.c +++ b/format.c @@ -1,22 +1,23 @@ +#include #include #include #include -#include +#include -#define DECODE_TABLE_STRTAB1 +#define FD_DECODE_TABLE_STRTAB1 static const char* _mnemonic_str = #include ; -#undef DECODE_TABLE_STRTAB1 +#undef FD_DECODE_TABLE_STRTAB1 -#define DECODE_TABLE_STRTAB2 +#define FD_DECODE_TABLE_STRTAB2 static const uint16_t _mnemonic_offs[] = { #include }; -#undef DECODE_TABLE_STRTAB2 +#undef FD_DECODE_TABLE_STRTAB2 #define FMT_CONCAT(buf, end, ...) do { \ buf += snprintf(buf, end - buf, __VA_ARGS__); \ @@ -25,79 +26,83 @@ static const uint16_t _mnemonic_offs[] = { } while (0) void -instr_format(const Instr* instr, char buffer[128]) +fd_format(const FdInstr* instr, char* buffer, size_t len) { char* buf = buffer; - char* end = buffer + 128; + char* end = buffer + len; FMT_CONCAT(buf, end, "["); - if (INSTR_HAS_REP(instr)) + if (FD_HAS_REP(instr)) FMT_CONCAT(buf, end, "rep:"); - if (INSTR_HAS_REPNZ(instr)) + if (FD_HAS_REPNZ(instr)) FMT_CONCAT(buf, end, "repnz:"); - if (INSTR_SEGMENT(instr) < 6) - FMT_CONCAT(buf, end, "%cs:", "ecsdfg"[INSTR_SEGMENT(instr)]); - if (INSTR_IS64(instr) && INSTR_ADDRSZ(instr) == 4) + if (FD_SEGMENT(instr) < 6) + FMT_CONCAT(buf, end, "%cs:", "ecsdfg"[FD_SEGMENT(instr)]); + if (FD_IS64(instr) && FD_ADDRSIZE(instr) == 4) FMT_CONCAT(buf, end, "addr32:"); - if (!INSTR_IS64(instr) && INSTR_ADDRSZ(instr) == 2) + if (!FD_IS64(instr) && FD_ADDRSIZE(instr) == 2) FMT_CONCAT(buf, end, "addr16:"); - if (INSTR_HAS_LOCK(instr)) + if (FD_HAS_LOCK(instr)) FMT_CONCAT(buf, end, "lock:"); - FMT_CONCAT(buf, end, "%s", &_mnemonic_str[_mnemonic_offs[instr->type]]); - if (INSTR_WIDTH(instr)) - FMT_CONCAT(buf, end, "_%u", INSTR_WIDTH(instr)); + FMT_CONCAT(buf, end, "%s", &_mnemonic_str[_mnemonic_offs[FD_TYPE(instr)]]); + if (FD_OPSIZE(instr)) + FMT_CONCAT(buf, end, "_%u", FD_OPSIZE(instr)); for (int i = 0; i < 4; i++) { - const struct Operand* operand = &instr->operands[i]; - if (operand->type == OT_NONE) + FdOpType op_type = FD_OP_TYPE(instr, i); + if (op_type == FD_OP_NONE) break; - const char* op_type_name = "reg\0imm\0mem" + operand->type * 4 - 4; - FMT_CONCAT(buf, end, " %s%u:", op_type_name, operand->size); + const char* op_type_name = "reg\0imm\0mem" + op_type * 4 - 4; + FMT_CONCAT(buf, end, " %s%u:", op_type_name, FD_OP_SIZE(instr, i)); - switch (operand->type) + switch (op_type) { size_t immediate; - case OT_REG: - if (operand->size == 1 && !INSTR_HAS_REX(instr) && - operand->reg >= 4 && operand->reg < 8) - FMT_CONCAT(buf, end, "r%uh", operand->reg - 4); + bool has_base; + bool has_idx; + bool has_disp; + case FD_OP_REG: + if (FD_OP_REG_HIGH(instr, i)) + FMT_CONCAT(buf, end, "r%uh", FD_OP_REG(instr, i) - 4); else - FMT_CONCAT(buf, end, "r%u", operand->reg); + FMT_CONCAT(buf, end, "r%u", FD_OP_REG(instr, i)); break; - case OT_IMM: - immediate = instr->immediate; - if (operand->size == 1) + case FD_OP_IMM: + immediate = FD_OP_IMM(instr, i); + if (FD_OP_SIZE(instr, i) == 1) immediate &= 0xff; - else if (operand->size == 2) + else if (FD_OP_SIZE(instr, i) == 2) immediate &= 0xffff; - else if (operand->size == 4) + else if (FD_OP_SIZE(instr, i) == 4) immediate &= 0xffffffff; FMT_CONCAT(buf, end, "0x%lx", immediate); break; - case OT_MEM: - if (!reg_is_none(operand->reg)) + case FD_OP_MEM: + has_base = FD_OP_BASE(instr, i) != FD_REG_NONE; + has_idx = FD_OP_INDEX(instr, i) != FD_REG_NONE; + has_disp = FD_OP_DISP(instr, i) != 0; + if (has_base) { - FMT_CONCAT(buf, end, "r%u", operand->reg); - if (instr->scale != 0 || instr->disp > 0) + FMT_CONCAT(buf, end, "r%u", FD_OP_BASE(instr, i)); + if (has_idx || has_disp) FMT_CONCAT(buf, end, "+"); } - if (instr->scale != 0) + if (has_idx) { - FMT_CONCAT(buf, end, "%u*r%u", 1 << (instr->scale - 1), - instr->sreg); - if (instr->disp > 0) + FMT_CONCAT(buf, end, "%u*r%u", 1 << FD_OP_SCALE(instr, i), + FD_OP_INDEX(instr, i)); + if (has_disp) FMT_CONCAT(buf, end, "+"); } - if (instr->disp < 0) - FMT_CONCAT(buf, end, "-0x%lx", -instr->disp); - else if ((reg_is_none(operand->reg) && instr->scale == 0) || - instr->disp > 0) - FMT_CONCAT(buf, end, "0x%lx", instr->disp); + if (FD_OP_DISP(instr, i) < 0) + FMT_CONCAT(buf, end, "-0x%lx", -FD_OP_DISP(instr, i)); + else if (has_disp || (!has_base && !has_idx)) + FMT_CONCAT(buf, end, "0x%lx", FD_OP_DISP(instr, i)); break; - case OT_NONE: + case FD_OP_NONE: default: break; } diff --git a/parseinstrs.py b/parseinstrs.py index b9cd277..ee68364 100644 --- a/parseinstrs.py +++ b/parseinstrs.py @@ -266,15 +266,15 @@ def bytes_to_table(data, notes): for p, c in zip(offs, offs[1:])) template = """// Auto-generated file -- do not modify! -#if defined(DECODE_TABLE_DATA_32) +#if defined(FD_DECODE_TABLE_DATA_32) {hex_table32} -#elif defined(DECODE_TABLE_DATA_64) +#elif defined(FD_DECODE_TABLE_DATA_64) {hex_table64} -#elif defined(DECODE_TABLE_MNEMONICS) +#elif defined(FD_DECODE_TABLE_MNEMONICS) {mnemonic_list} -#elif defined(DECODE_TABLE_STRTAB1) +#elif defined(FD_DECODE_TABLE_STRTAB1) {mnemonic_cstr} -#elif defined(DECODE_TABLE_STRTAB2) +#elif defined(FD_DECODE_TABLE_STRTAB2) {mnemonic_offsets} #else #error "unspecified decode table" @@ -314,7 +314,7 @@ if __name__ == "__main__": file = template.format( hex_table32=bytes_to_table(*table32.compile(mnemonics_lut)), hex_table64=bytes_to_table(*table64.compile(mnemonics_lut)), - mnemonic_list="\n".join("MNEMONIC(%s,%d)"%entry for entry in mnemonics_lut.items()), + mnemonic_list="\n".join("FD_MNEMONIC(%s,%d)"%entry for entry in mnemonics_lut.items()), mnemonic_cstr=mnemonic_cstr, mnemonic_offsets=",".join(str(off) for off in mnemonic_tab), ) diff --git a/tests/driver.c b/tests/driver.c index 8c922d4..b4b0a14 100644 --- a/tests/driver.c +++ b/tests/driver.c @@ -4,7 +4,7 @@ #include #include -#include +#include static @@ -30,21 +30,7 @@ main(int argc, char** argv) return -1; } - DecodeMode mode; - size_t mode_input = strtoul(argv[1], NULL, 0); - if (mode_input == 32) - { - mode = DECODE_32; - } - else if (mode_input == 64) - { - mode = DECODE_64; - } - else - { - printf("Unknown decode mode\n"); - return 1; - } + size_t mode = strtoul(argv[1], NULL, 0); // Avoid allocation by transforming hex to binary in-place. uint8_t* code = (uint8_t*) argv[2]; @@ -62,7 +48,7 @@ main(int argc, char** argv) struct timespec time_start; struct timespec time_end; - Instr instr; + FdInstr instr; __asm__ volatile("" : : : "memory"); clock_gettime(CLOCK_MONOTONIC, &time_start); @@ -72,8 +58,8 @@ main(int argc, char** argv) while (current_off != length) { size_t remaining = length - current_off; - int retval = decode(code + current_off, remaining, mode, 0x1234000, - &instr); + int retval = fd_decode(code + current_off, remaining, mode, + 0x1234000, &instr); if (retval < 0) goto fail; current_off += retval; @@ -83,7 +69,7 @@ main(int argc, char** argv) __asm__ volatile("" : : : "memory"); char format_buffer[128]; - instr_format(&instr, format_buffer); + fd_format(&instr, format_buffer, sizeof(format_buffer)); printf("%s\n", format_buffer); if (repetitions > 1)