diff --git a/decode.c b/decode.c index 29d5a21..38e12a9 100644 --- a/decode.c +++ b/decode.c @@ -277,6 +277,7 @@ struct InstrDesc uint8_t gp_fixed_operand_size : 3; uint8_t lock : 1; uint8_t vsib : 1; + uint16_t reg_types; } __attribute__((packed)); #define DESC_HAS_MODRM(desc) (((desc)->operand_indices & (3 << 0)) != 0) @@ -573,6 +574,17 @@ fd_decode(const uint8_t* buffer, size_t len_sz, int mode_int, uintptr_t address, if ((prefixes & PREFIX_LOCK) && instr->operands[0].type != FD_OT_MEM) return -1; + for (int i = 0; i < 4; i++) + { + if (instr->operands[i].type != FD_OT_REG) + continue; + uint32_t reg_type = (desc->reg_types >> 4 * i) & 0xf; + if (reg_type == FD_RT_GPL && !(prefixes & PREFIX_REX) && + instr->operands[i].size == 1 && instr->operands[i].reg >= 4) + reg_type = FD_RT_GPH; + instr->operands[i].misc = reg_type; + } + instr->size = off; return off; diff --git a/fadec.h b/fadec.h index 1516f4b..49df189 100644 --- a/fadec.h +++ b/fadec.h @@ -48,10 +48,36 @@ typedef enum { FD_OT_MEM = 3, } FdOpType; +typedef enum { + /** Register type is encoded in mnemonic **/ + FD_RT_IMP = 0, + /** Low general purpose register **/ + FD_RT_GPL = 1, + /** High-byte general purpose register **/ + FD_RT_GPH = 2, + /** Segment register **/ + FD_RT_SEG = 3, + /** FPU register ST(n) **/ + FD_RT_FPU = 4, + /** MMX register MMn **/ + FD_RT_MMX = 5, + /** Vector (SSE/AVX) register XMMn/YMMn/ZMMn **/ + FD_RT_VEC = 6, + /** Vector mask (AVX-512) register Kn **/ + FD_RT_MASK = 7, + /** Bound register BNDn **/ + FD_RT_BND = 8, + /** Control Register CRn **/ + FD_RT_CR = 9, + /** Debug Register DRn **/ + FD_RT_DR = 10, +} FdRegType; + typedef struct { uint8_t type; uint8_t size; int8_t reg; + uint8_t misc; } FdOp; typedef struct { @@ -137,22 +163,18 @@ void fd_format(const FdInstr* instr, char* buf, size_t len); #define FD_OP_SIZE(instr,idx) ((instr)->operands[idx].size) /** Gets the accessed register index of a register operand. Note that /only/ the * index is returned, no further interpretation of the index (which depends on - * the instruction type) is done. When an instruction accesses an 8-bit general - * purpose register with an index in the range 4-7, it needs to be determined - * explicitly whether a high-byte register is accessed (using FD_OP_REG_HIGH). - * If that is the case, the index needs to be decreased by 4. + * the instruction type) is done. The register type can be fetches using + * FD_OP_REG_TYPE, e.g. for distinguishing high-byte registers. * Only valid if FD_OP_TYPE == FD_OT_REG **/ #define FD_OP_REG(instr,idx) ((FdReg) (instr)->operands[idx].reg) -/** Returns whether the accessed register is a actually high-byte register when - * used on a general purpose instruction. In that case, the register index has - * to be decreased by 4. - * Only valid if FD_OP_TYPE == FD_OT_REG and the operand refers to a general - * purpose register (depends on the instruction type) **/ -#define FD_OP_REG_HIGH(instr,idx) ( \ - (instr)->operands[idx].size == 1 && \ - (instr)->operands[idx].reg >= 4 && \ - ((instr)->flags & FD_FLAG_REX) == 0 \ - ) +/** Gets the type of the accessed register. + * Only valid if FD_OP_TYPE == FD_OT_REG **/ +#define FD_OP_REG_TYPE(instr,idx) ((FdRegType) (instr)->operands[idx].misc) +/** DEPRECATED: use FD_OP_REG_TYPE() == FD_RT_GPH instead. + * Returns whether the accessed register is a high-byte register. In that case, + * the register index has to be decreased by 4. + * Only valid if FD_OP_TYPE == FD_OT_REG **/ +#define FD_OP_REG_HIGH(instr,idx) (FD_OP_REG_TYPE(instr,idx) == FD_RT_GPH) /** Gets the index of the base register from a memory operand, or FD_REG_NONE, * if the memory operand has no base register. This is the only case where the * 64-bit register RIP can be returned, in which case the operand also has no diff --git a/parseinstrs.py b/parseinstrs.py index 631753c..ed5a203 100644 --- a/parseinstrs.py +++ b/parseinstrs.py @@ -40,6 +40,7 @@ InstrFlags = bitstruct("InstrFlags", [ "gp_fixed_operand_size:3", "lock:1", "vsib:1", + "reg_types:16", ]) ENCODINGS = { @@ -72,26 +73,28 @@ ENCODINGS = { } OPKIND_LOOKUP = { - "-": (0, 0), - "IMM": (2, 0), - "IMM8": (1, 0), - "IMM16": (1, 1), - "IMM32": (1, 2), - "GP": (2, 0), - "GP8": (1, 0), - "GP16": (1, 1), - "GP32": (1, 2), - "GP64": (1, 3), - "XMM": (3, 0), - "XMM8": (1, 0), - "XMM16": (1, 1), - "XMM32": (1, 2), - "XMM64": (1, 3), - "XMM128": (1, 4), - "XMM256": (1, 5), - "SREG": (0, 0), - "FPU": (0, 0), - "MEMZ": (0, 0), + # sizeidx (0, fixedsz, opsz, vecsz), fixedsz (log2), regtype + "-": (0, 0, 0), + "IMM": (2, 0, 0), + "IMM8": (1, 0, 0), + "IMM16": (1, 1, 0), + "IMM32": (1, 2, 0), + "GP": (2, 0, 1), + "GP8": (1, 0, 1), + "GP16": (1, 1, 1), + "GP32": (1, 2, 1), + "GP64": (1, 3, 1), + "XMM": (3, 0, 6), + "XMM8": (1, 0, 6), + "XMM16": (1, 1, 6), + "XMM32": (1, 2, 6), + "XMM64": (1, 3, 6), + "XMM128": (1, 4, 6), + "XMM256": (1, 5, 6), + "SREG": (0, 0, 3), + "FPU": (0, 0, 4), + "MEMZ": (0, 0, 0), + "BND": (0, 0, 0), } class InstrDesc(namedtuple("InstrDesc", "mnemonic,flags,encoding")): @@ -102,13 +105,16 @@ class InstrDesc(namedtuple("InstrDesc", "mnemonic,flags,encoding")): fixed_opsz = set() opsizes = 0 + reg_types = 0 for i, opkind in enumerate(desc[1:5]): - enc_size, fixed_size = OPKIND_LOOKUP[opkind] + enc_size, fixed_size, reg_type = OPKIND_LOOKUP[opkind] if enc_size == 1: fixed_opsz.add(fixed_size) opsizes |= enc_size << 2 * i + reg_types |= reg_type << 4 * i flags = copy(ENCODINGS[desc[0]]) flags.operand_sizes = opsizes + flags.reg_types = reg_types if fixed_opsz: flags.gp_fixed_operand_size = next(iter(fixed_opsz)) # Miscellaneous Flags