diff --git a/decode.c b/decode.c index f47c78a..6a21d9d 100644 --- a/decode.c +++ b/decode.c @@ -39,6 +39,7 @@ typedef enum DecodeMode DecodeMode; #define ENTRY_TABLE_PREFIX 5 #define ENTRY_TABLE_VEX 6 #define ENTRY_TABLE_PREFIX_REP 7 +#define ENTRY_TABLE_ROOT 8 #define ENTRY_MASK 7 #define ENTRY_UNPACK(table,kind,entry) do { \ @@ -348,25 +349,24 @@ fd_decode(const uint8_t* buffer, size_t len_sz, int mode_int, uintptr_t address, return FD_ERR_PARTIAL; off += retval; - uint32_t kind = ENTRY_TABLE256; + uint32_t kind = ENTRY_TABLE_ROOT; - // "Legacy" walk through table and escape opcodes - if (LIKELY(opcode_escape < 0)) - while (kind == ENTRY_TABLE256 && LIKELY(off < len)) - ENTRY_UNPACK(table, kind, table[buffer[off++]]); - // VEX/EVEX compact escapes; the prefix precedes the single opcode byte - else if (opcode_escape == 1 || opcode_escape == 2 || opcode_escape == 3) + if (LIKELY(!(prefixes & PREFIX_VEX))) { - ENTRY_UNPACK(table, kind, table[0x0F]); - if (opcode_escape == 2) - ENTRY_UNPACK(table, kind, table[0x38]); - if (opcode_escape == 3) - ENTRY_UNPACK(table, kind, table[0x3A]); - if (LIKELY(off < len)) + // "Legacy" walk through table and escape opcodes + ENTRY_UNPACK(table, kind, table[0]); + while (kind == ENTRY_TABLE256 && LIKELY(off < len)) ENTRY_UNPACK(table, kind, table[buffer[off++]]); } else - return FD_ERR_UD; + { + // VEX/EVEX compact escapes; the prefix precedes the single opcode byte + if (opcode_escape < 0 || opcode_escape > 3) + return FD_ERR_UD; + ENTRY_UNPACK(table, kind, table[4 | opcode_escape]); + if (LIKELY(off < len)) + ENTRY_UNPACK(table, kind, table[buffer[off++]]); + } // Then, walk through ModR/M-encoded opcode extensions. if ((kind == ENTRY_TABLE8 || kind == ENTRY_TABLE72) && LIKELY(off < len)) @@ -389,18 +389,12 @@ fd_decode(const uint8_t* buffer, size_t len_sz, int mode_int, uintptr_t address, // Handle mandatory prefixes (which behave like an opcode ext.). if (kind == ENTRY_TABLE_PREFIX) { - uint8_t index = mandatory_prefix; - index |= prefixes & PREFIX_VEX ? (1 << 2) : 0; // If a prefix is mandatory and used as opcode extension, it has no // further effect on the instruction. This is especially important // for the 0x66 prefix, which could otherwise override the operand // size of general purpose registers. prefixes &= ~(PREFIX_OPSZ | PREFIX_REPNZ | PREFIX_REP); - ENTRY_UNPACK(table, kind, table[index]); - } - else if (prefixes & PREFIX_VEX) - { - return FD_ERR_UD; + ENTRY_UNPACK(table, kind, table[mandatory_prefix]); } if (kind == ENTRY_TABLE_PREFIX_REP) @@ -519,7 +513,6 @@ fd_decode(const uint8_t* buffer, size_t len_sz, int mode_int, uintptr_t address, { FdOp* operand = &instr->operands[DESC_IMM_IDX(desc)]; operand->type = FD_OT_IMM; - operand->size = 1; instr->imm = 1; } else if (imm_control == 2) @@ -527,7 +520,6 @@ fd_decode(const uint8_t* buffer, size_t len_sz, int mode_int, uintptr_t address, FdOp* operand = &instr->operands[DESC_IMM_IDX(desc)]; operand->type = FD_OT_MEM; operand->reg = FD_REG_NONE; - operand->size = op_size; instr->idx_reg = FD_REG_NONE; if (UNLIKELY(off + addr_size > len)) diff --git a/parseinstrs.py b/parseinstrs.py index 274bb2f..33e4ae1 100644 --- a/parseinstrs.py +++ b/parseinstrs.py @@ -142,6 +142,7 @@ class EntryKind(Enum): TABLE_PREFIX = 5 TABLE_VEX = 6 TABLE_PREFIX_REP = 7 + TABLE_ROOT = -1 class TrieEntry(namedtuple("TrieEntry", "kind,items,payload")): __slots__ = () @@ -149,9 +150,10 @@ class TrieEntry(namedtuple("TrieEntry", "kind,items,payload")): EntryKind.TABLE256: 256, EntryKind.TABLE8: 8, EntryKind.TABLE72: 72, - EntryKind.TABLE_PREFIX: 8, + EntryKind.TABLE_PREFIX: 4, EntryKind.TABLE_VEX: 4, EntryKind.TABLE_PREFIX_REP: 4, + EntryKind.TABLE_ROOT: 8, } @classmethod def table(cls, kind): @@ -184,7 +186,19 @@ def parse_opcode(opcode_string): if match is None: raise Exception("invalid opcode: '%s'" % opcode_string) - opcode = [(EntryKind.TABLE256, [x]) for x in unhexlify(match.group("opcode"))] + opcode = [] + opcode_bytes = unhexlify(match.group("opcode")) + + # root table, VEX prefix already consumes escape opcode bytes + if match.group("vex"): + idx = [b"", b"\x0f", b"\x0f\x38", b"\x0f\x3a"].index(opcode_bytes[:-1]) + opcode.append((EntryKind.TABLE_ROOT, [4 | idx])) + opcode_bytes = opcode_bytes[-1:] + else: + opcode.append((EntryKind.TABLE_ROOT, [0])) + + # normal opcode bytes + opcode += [(EntryKind.TABLE256, [x]) for x in opcode_bytes] opcext = match.group("modrm") if opcext: @@ -208,8 +222,7 @@ def parse_opcode(opcode_string): if match.group("prefixes"): legacy = {"NP": 0, "66": 1, "F3": 2, "F2": 3}[match.group("legacy")] - entry = legacy | ((1 << 2) if match.group("vex") else 0) - opcode.append((EntryKind.TABLE_PREFIX, [entry])) + opcode.append((EntryKind.TABLE_PREFIX, [legacy])) if match.group("vexl") or match.group("rexw"): rexw = match.group("rexw") @@ -230,7 +243,10 @@ def format_opcode(opcode): opcode_string = "" prefix = "" for kind, byte in opcode: - if kind == EntryKind.TABLE256: + if kind == EntryKind.TABLE_ROOT: + opcode_string += ["", "0f", "0f38", "0f3a"][byte & 3] + prefix += ["", "VEX."][byte >> 2] + elif kind == EntryKind.TABLE256: opcode_string += "{:02x}".format(byte) elif kind in (EntryKind.TABLE8, EntryKind.TABLE72): opcode_string += "/{:x}".format(byte) @@ -251,7 +267,7 @@ class Table: self.data = OrderedDict() self.roots = ["root%d"%i for i in range(root_count)] for i in range(root_count): - self.data["root%d"%i] = TrieEntry.table(EntryKind.TABLE256) + self.data["root%d"%i] = TrieEntry.table(EntryKind.TABLE_ROOT) self.offsets = {} self.annotations = {} @@ -273,7 +289,7 @@ class Table: # An opcode can occur once only. if table.items[opcode[-1][1]]: - raise Exception("opcode_collision for {}".format(name)) + raise Exception("opcode collision for {}".format(name)) table.items[opcode[-1][1]] = name self.data[name] = TrieEntry.instr(instr_encoding) diff --git a/tests/decode-sse-movq.sh b/tests/decode-sse-movq.sh index f528fa7..e0edbb1 100644 --- a/tests/decode-sse-movq.sh +++ b/tests/decode-sse-movq.sh @@ -2,3 +2,7 @@ decode f30f7e5c2408 [SSE_MOVQ_X2X reg8:r3 mem8:r4+0x8] decode c5f96ec8 [VMOVD_G2X reg4:r1 reg4:r0] decode64 c4e1f96ec8 [VMOVQ_G2X reg8:r1 reg8:r0] decode32 c4e1f96ec8 [VMOVD_G2X reg4:r1 reg4:r0] +decode c5f22ac0 [VCVTSI2SS reg16:r0 reg16:r1 reg4:r0] +decode32 c4e1f22ac0 [VCVTSI2SS reg16:r0 reg16:r1 reg4:r0] +decode64 c4e1f22ac0 [VCVTSI2SS reg16:r0 reg16:r1 reg8:r0] +decode64 c4e2759004e7 [VPGATHERDD reg32:r0 mem32:r7+8*r4 reg32:r1]