From 50f052488d7fd83b71e5bfd2e29e9634b5011205 Mon Sep 17 00:00:00 2001 From: Alexis Engelke Date: Fri, 2 Apr 2021 14:23:18 +0200 Subject: [PATCH] decode: More precise register types --- decode.c | 27 ++++++++++++++++----------- instrs.txt | 8 ++++---- parseinstrs.py | 41 +++++++++++++++++++++++++++++------------ 3 files changed, 49 insertions(+), 27 deletions(-) diff --git a/decode.c b/decode.c index ccba6f3..31c37b2 100644 --- a/decode.c +++ b/decode.c @@ -86,6 +86,10 @@ struct InstrDesc #define DESC_INSTR_WIDTH(desc) (((desc)->operand_sizes >> 15) & 1) #define DESC_MODRM(desc) (((desc)->reg_types >> 14) & 1) #define DESC_IGN66(desc) (((desc)->reg_types >> 15) & 1) +#define DESC_REGTY_MODRM(desc) (((desc)->reg_types >> 0) & 7) +#define DESC_REGTY_MODREG(desc) (((desc)->reg_types >> 3) & 7) +#define DESC_REGTY_VEXREG(desc) (((desc)->reg_types >> 6) & 3) +#define DESC_REGTY_ZEROREG(desc) (((desc)->reg_types >> 8) & 3) int fd_decode(const uint8_t* buffer, size_t len_sz, int mode_int, uintptr_t address, @@ -304,14 +308,6 @@ prefix_end: __builtin_memset(instr->operands, 0, sizeof(instr->operands)); - // Reg operand 4 is only possible with RVMR encoding, which implies VEC. - for (int i = 0; i < 3; i++) - { - uint32_t reg_type = (desc->reg_types >> 3 * i) & 0x7; - // GPL FPU VEC MSK MMX BND SEG NVR - instr->operands[i].misc = (0xf3857641 >> (4 * reg_type)) & 0xf; - } - if (DESC_MODRM(desc) && UNLIKELY(off++ >= len)) return FD_ERR_PARTIAL; unsigned op_byte = buffer[off - 1] | (!DESC_MODRM(desc) ? 0xc0 : 0); @@ -341,13 +337,17 @@ prefix_end: FdOp* operand = &instr->operands[DESC_IMPLICIT_IDX(desc)]; operand->type = FD_OT_REG; operand->reg = DESC_IMPLICIT_VAL(desc); + unsigned reg_ty = DESC_REGTY_ZEROREG(desc); // GPL VEC FPU + operand->misc = (0461 >> (3 * reg_ty)) & 0x7; } if (DESC_HAS_MODREG(desc)) { FdOp* op_modreg = &instr->operands[DESC_MODREG_IDX(desc)]; unsigned reg_idx = (op_byte & 0x38) >> 3; - if (!UNLIKELY(op_modreg->misc == FD_RT_MMX || op_modreg->misc == FD_RT_SEG)) + unsigned reg_ty = DESC_REGTY_MODREG(desc); // GPL VEC MSK - MMX SEG + op_modreg->misc = (0350761 >> (3 * reg_ty)) & 0x7; + if (LIKELY(!(reg_ty & 4))) reg_idx += prefix_rex & PREFIX_REXR ? 8 : 0; op_modreg->type = FD_OT_REG; op_modreg->reg = reg_idx; @@ -362,7 +362,9 @@ prefix_end: if (mod == 3) { uint8_t reg_idx = rm; - if (LIKELY(op_modrm->misc == FD_RT_GPL || op_modrm->misc == FD_RT_VEC)) + unsigned reg_ty = DESC_REGTY_MODRM(desc); // GPL VEC - - MMX FPU MSK + op_modrm->misc = (07450061 >> (3 * reg_ty)) & 0x7; + if (LIKELY(!(reg_ty & 4))) reg_idx += prefix_rex & PREFIX_REXB ? 8 : 0; op_modrm->type = FD_OT_REG; op_modrm->reg = reg_idx; @@ -433,6 +435,9 @@ skip_modrm: if (mode == DECODE_32) vex_operand &= 0x7; operand->reg = vex_operand; + + unsigned reg_ty = DESC_REGTY_VEXREG(desc); // GPL VEC MSK + operand->misc = (0761 >> (3 * reg_ty)) & 0x7; } else if (vex_operand != 0) { @@ -498,7 +503,7 @@ skip_modrm: instr->type == FDI_SSE_EXTRQ || instr->type == FDI_SSE_INSERTQ)) imm_size = 2; - else if (UNLIKELY(desc->type == FDI_JMPF || desc->type == FDI_CALLF)) + else if (UNLIKELY(instr->type == FDI_JMPF || instr->type == FDI_CALLF)) imm_size = op_size + 2; else if (UNLIKELY(instr->type == FDI_ENTER)) imm_size = 3; diff --git a/instrs.txt b/instrs.txt index 74a3936..950480b 100644 --- a/instrs.txt +++ b/instrs.txt @@ -164,10 +164,10 @@ 9d NP - - - - POPF DEF64 INSTR_WIDTH 9e NP - - - - SAHF 9f NP - - - - LAHF -a0 FD GP GP - - MOV SIZE_8 -a1 FD GP GP - - MOV -a2 TD GP GP - - MOV SIZE_8 -a3 TD GP GP - - MOV +a0 FD GP MEM - - MOV SIZE_8 +a1 FD GP MEM - - MOV +a2 TD MEM GP - - MOV SIZE_8 +a3 TD MEM GP - - MOV a4 NP - - - - MOVS SIZE_8 INSTR_WIDTH ENC_REP a5 NP - - - - MOVS INSTR_WIDTH ENC_REP a6 NP - - - - CMPS SIZE_8 INSTR_WIDTH ENC_REPCC diff --git a/parseinstrs.py b/parseinstrs.py index dddd54a..1cf61fe 100644 --- a/parseinstrs.py +++ b/parseinstrs.py @@ -26,10 +26,11 @@ INSTR_FLAGS_FIELDS, INSTR_FLAGS_SIZES = zip(*[ ("size_fix1", 3), ("size_fix2", 2), ("instr_width", 1), - ("op0_regty", 3), - ("op1_regty", 3), - ("op2_regty", 3), - ("unused", 5), + ("modrm_ty", 3), + ("modreg_ty", 3), + ("vexreg_ty", 2), + ("zeroreg_ty", 2), + ("unused", 4), ("modrm", 1), ("ign66", 1), ][::-1]) @@ -76,6 +77,8 @@ ENCODINGS = { "VMI": InstrFlags(modrm=1, modrm_idx=1^3, vexreg_idx=0^3, imm_idx=2^3, imm_control=4), "MVR": InstrFlags(modrm=1, modrm_idx=0^3, modreg_idx=2^3, vexreg_idx=1^3), } +ENCODING_OPTYS = ["modrm", "modreg", "vexreg", "zeroreg", "imm"] +ENCODING_OPORDER = { enc: sorted(ENCODING_OPTYS, key=lambda ty: getattr(ENCODINGS[enc], ty+"_idx")^3) for enc in ENCODINGS} OPKIND_REGEX = re.compile(r"^([A-Z]+)([0-9]+)?$") OPKIND_DEFAULTS = {"GP": -1, "IMM": -1, "SEG": -1, "MEM": -1, "XMM": -2, "MMX": 8, "FPU": 10} @@ -111,8 +114,11 @@ class InstrDesc(NamedTuple): operands: Tuple[str, ...] flags: FrozenSet[str] - OPKIND_REGTYS = {"GP": 0, "FPU": 1, "XMM": 2, "MASK": 3, "MMX": 4, "BND": 5, - "SEG": 6} + OPKIND_REGTYS_MODRM = { "GP": 0, "XMM": 1, "MMX": 4, "FPU": 5, "MASK": 6, } + OPKIND_REGTYS_MODREG = { "GP": 0, "XMM": 1, "MASK": 2, "MMX": 4, "SEG": 5, + "CR": 0, "DR": 0 } # CR/DR handled in code. + OPKIND_REGTYS_VEXREG = { "GP": 0, "XMM": 1, "MASK": 2 } + OPKIND_REGTYS_ZEROREG = { "GP": 0, "XMM": 1, "FPU": 2 } OPKIND_REGTYS_ENC = {"SEG": 3, "FPU": 4, "MMX": 5, "XMM": 6, "BND": 8, "CR": 9, "DR": 10} OPKIND_SIZES = { @@ -175,19 +181,30 @@ class InstrDesc(NamedTuple): # Sort fixed sizes encodable in size_fix2 as second element. fixed = sorted((x for x in opsz if x >= 0), key=lambda x: 1 <= x <= 4) if len(fixed) > 2 or (len(fixed) == 2 and not (1 <= fixed[1] <= 4)): - raise Exception("invalid fixed operand sizes: %r"%fixed) + raise Exception(f"invalid fixed sizes {fixed} in {self}") sizes = (fixed + [1, 1])[:2] + [-2, -3] # See operand_sizes in decode.c. extraflags["size_fix1"] = sizes[0] extraflags["size_fix2"] = sizes[1] - 1 for i, opkind in enumerate(self.operands): sz = self.OPKIND_SIZES[opkind.size] - reg_type = self.OPKIND_REGTYS.get(opkind.kind, 7) extraflags["op%d_size"%i] = sizes.index(sz) - if i < 3: - extraflags["op%d_regty"%i] = reg_type - elif reg_type not in (7, 2): - raise Exception("invalid regty for op 3, must be VEC") + if i >= 3: + continue + opname = ENCODING_OPORDER[self.encoding][i] + if opname == "modrm": + if opkind.kind == "MEM": + continue + extraflags["modrm_ty"] = self.OPKIND_REGTYS_MODRM[opkind.kind] + elif opname == "modreg": + extraflags["modreg_ty"] = self.OPKIND_REGTYS_MODREG[opkind.kind] + elif opname == "vexreg": + extraflags["vexreg_ty"] = self.OPKIND_REGTYS_VEXREG[opkind.kind] + elif opname == "zeroreg": + extraflags["zeroreg_ty"] = self.OPKIND_REGTYS_ZEROREG[opkind.kind] + else: + if opkind.kind not in ("IMM", "MEM", "XMM"): + raise Exception("invalid regty for op 3, must be VEC") # Miscellaneous Flags if "SIZE_8" in self.flags: extraflags["opsize"] = 1