From 2e7e396325f2a089d7ef2386e5dc5c24a3777ef3 Mon Sep 17 00:00:00 2001 From: Alexis Engelke Date: Sun, 8 Nov 2020 10:03:18 +0100 Subject: [PATCH] decode: Remove TABLE_PREFIX_REP and use NFx prefix --- decode.c | 21 ++++++------ instrs.txt | 78 ++++++++++++++++++++++----------------------- parseinstrs.py | 41 ++++++++++++------------ tests/test_decode.c | 34 ++++++++++++++++++++ 4 files changed, 102 insertions(+), 72 deletions(-) diff --git a/decode.c b/decode.c index 33214bb..8e81238 100644 --- a/decode.c +++ b/decode.c @@ -34,7 +34,6 @@ typedef enum DecodeMode DecodeMode; #define ENTRY_TABLE72 4 #define ENTRY_TABLE_PREFIX 5 #define ENTRY_TABLE_VEX 6 -#define ENTRY_TABLE_PREFIX_REP 7 #define ENTRY_TABLE_ROOT 8 #define ENTRY_MASK 7 @@ -326,6 +325,7 @@ struct InstrDesc #define DESC_SIZE_FIX1(desc) (((desc)->operand_sizes >> 10) & 7) #define DESC_SIZE_FIX2(desc) (((desc)->operand_sizes >> 13) & 3) #define DESC_INSTR_WIDTH(desc) (((desc)->operand_sizes >> 15) & 1) +#define DESC_IGN66(desc) (((desc)->reg_types >> 15) & 1) int fd_decode(const uint8_t* buffer, size_t len_sz, int mode_int, uintptr_t address, @@ -409,18 +409,12 @@ fd_decode(const uint8_t* buffer, size_t len_sz, int mode_int, uintptr_t address, if (kind == ENTRY_TABLE_PREFIX) { // If a prefix is mandatory and used as opcode extension, it has no - // further effect on the instruction. This is especially important - // for the 0x66 prefix, which could otherwise override the operand - // size of general purpose registers. - prefixes &= ~(PREFIX_OPSZ | PREFIX_REPNZ | PREFIX_REP); - ENTRY_UNPACK(table, kind, table[mandatory_prefix]); - } - else if (kind == ENTRY_TABLE_PREFIX_REP) - { - // Discard 66h mandatory prefix - uint8_t index = mandatory_prefix != 1 ? mandatory_prefix : 0; + // further effect on the instruction. This, however, does not completely + // apply to the 66 prefix: in rare cases it may affect the size of + // general purpose registers. The instruction descriptor encodes whether + // the 66 prefix has an effect on the instruction (IGN66). prefixes &= ~(PREFIX_REPNZ | PREFIX_REP); - ENTRY_UNPACK(table, kind, table[index]); + ENTRY_UNPACK(table, kind, table[mandatory_prefix]); } // For VEX prefix, we have to distinguish between VEX.W and VEX.L which may @@ -438,6 +432,9 @@ fd_decode(const uint8_t* buffer, size_t len_sz, int mode_int, uintptr_t address, struct InstrDesc* desc = (struct InstrDesc*) table; + if (DESC_IGN66(desc)) + prefixes &= ~PREFIX_OPSZ; + instr->type = desc->type; instr->flags = prefixes & 0x7f; if (mode == DECODE_64) diff --git a/instrs.txt b/instrs.txt index 65e871e..d077bb5 100644 --- a/instrs.txt +++ b/instrs.txt @@ -297,35 +297,35 @@ ff/6 M GP - - - PUSH DEF64 0f00/3 M GP16 - - - LTR 0f00/4 M GP16 - - - VERR 0f00/5 M GP16 - - - VERW -0f01//0 M MEMZ - - - SGDT -0f01//1 M MEMZ - - - SIDT -0f01//2 M MEMZ - - - LGDT -0f01//3 M MEMZ - - - LIDT -0f01//4 M GP16 - - - SMSW -0f01//6 M GP16 - - - LMSW -0f01//7 M GP - - - INVLPG SIZE_8 -0f01//c8 NP - - - - MONITOR -0f01//c9 NP - - - - MWAIT -0f01//ca NP - - - - CLAC -0f01//cb NP - - - - STAC -0f01//cf NP - - - - ENCLS -0f01//d0 NP - - - - XGETBV -0f01//d1 NP - - - - XSETBV -0f01//d5 NP - - - - XEND -0f01//d6 NP - - - - XTEST -0f01//d7 NP - - - - ENCLU -0f01//e0+ O GP - - - SMSW -0f01//f0+ O GP16 - - - LMSW -0f01//f8 NP - - - - SWAPGS ONLY64 -0f01//f9 NP - - - - RDTSCP +NFx.0f01//0 M MEMZ - - - SGDT +NFx.0f01//1 M MEMZ - - - SIDT +NFx.0f01//2 M MEMZ - - - LGDT +NFx.0f01//3 M MEMZ - - - LIDT +NFx.0f01//4 M GP16 - - - SMSW +NFx.0f01//6 M GP16 - - - LMSW +NFx.0f01//7 M GP - - - INVLPG SIZE_8 +NFx.0f01//c8 NP - - - - MONITOR +NFx.0f01//c9 NP - - - - MWAIT +NP.0f01//ca NP - - - - CLAC +NP.0f01//cb NP - - - - STAC +NP.0f01//cf NP - - - - ENCLS +NP.0f01//d0 NP - - - - XGETBV +NP.0f01//d1 NP - - - - XSETBV +NP.0f01//d5 NP - - - - XEND +NP.0f01//d6 NP - - - - XTEST +NP.0f01//d7 NP - - - - ENCLU +NFx.0f01//e0+ O GP - - - SMSW +NFx.0f01//f0+ O GP16 - - - LMSW +NFx.0f01//f8 NP - - - - SWAPGS ONLY64 +NFx.0f01//f9 NP - - - - RDTSCP 0f02 RM GP GP16 - - LAR 0f03 RM GP GP16 - - LSL 0f05 NP - - - - SYSCALL ONLY64 0f06 NP - - - - CLTS 0f07 NP - - - - SYSRET ONLY64 0f08 NP - - - - INVD -RNP.0f09 NP - - - - WBINVD -RF3.0f09 NP - - - - WBINVD +NFx.0f09 NP - - - - WBINVD +F2.0f09 NP - - - - WBINVD 0f0b NP - - - - UD2 0f0d//0 M MEM8 - - - PREFETCH 0f0d//1 M MEM8 - - - PREFETCHW @@ -437,32 +437,32 @@ NP.0f37 NP - - - - GETSEC 0fb5 RM GP MEM - - LGS 0fb6 RM GP GP8 - - MOVZX ENC_SEPSZ 0fb7 RM GP GP16 - - MOVZX ENC_SEPSZ -RF3.0fb8 RM GP GP - - POPCNT +F3.0fb8 RM GP GP - - POPCNT USE66 0fb9 RM GP GP - - UD1 0fba/4 MI GP IMM8 - - BT 0fba/5 MI GP IMM8 - - BTS LOCK 0fba/6 MI GP IMM8 - - BTR LOCK 0fba/7 MI GP IMM8 - - BTC LOCK 0fbb MR GP GP - - BTC LOCK -RNP.0fbc RM GP GP - - BSF -RF2.0fbc RM GP GP - - BSF -RF3.0fbc RM GP GP - - TZCNT -RNP.0fbd RM GP GP - - BSR -RF2.0fbd RM GP GP - - BSR -RF3.0fbd RM GP GP - - LZCNT +NFx.0fbc RM GP GP - - BSF +F2.0fbc RM GP GP - - BSF USE66 +F3.0fbc RM GP GP - - TZCNT USE66 +NFx.0fbd RM GP GP - - BSR +F2.0fbd RM GP GP - - BSR USE66 +F3.0fbd RM GP GP - - LZCNT USE66 0fbe RM GP GP8 - - MOVSX ENC_SEPSZ 0fbf RM GP GP16 - - MOVSX ENC_SEPSZ 0fc0 MR GP GP - - XADD SIZE_8 LOCK 0fc1 MR GP GP - - XADD LOCK NP.0fc3 MR MEM GP - - MOVNTI -0fc7//1 M MEMZ - - - CMPXCHGD LOCK +NP.0fc7//1 M MEMZ - - - CMPXCHGD LOCK 0fc8+ O GP - - - BSWAP 0fff RM GP GP - - UD0 # -RNP.0f38f0 RM GP MEM - - MOVBE -RF2.0f38f0 RM GP32 GP - - CRC32 SIZE_8 -RNP.0f38f1 MR MEM GP - - MOVBE -RF2.0f38f1 RM GP32 GP - - CRC32 +NFx.0f38f0 RM GP MEM - - MOVBE +F2.0f38f0 RM GP32 GP - - CRC32 USE66 SIZE_8 +NFx.0f38f1 MR MEM GP - - MOVBE +F2.0f38f1 RM GP32 GP - - CRC32 USE66 # # MMX NP.0f2a RM XMM64 MMX - - MMX_CVTPI2PS @@ -1443,7 +1443,7 @@ F3.0f38f8 RM GP MEMZ - - ENQCMDS NP.0f01//c5 NP - - - - PCONFIG # WBNOINVD -RF2.0f09 NP - - - - WBNOINVD +F3.0f09 NP - - - - WBNOINVD NP.0f01//ee NP - - - - RDPKRU NP.0f01//ef NP - - - - WRPKRU @@ -1460,9 +1460,9 @@ NP.0fae//7 M MEM8 - - - CLFLUSH NP.0fc7//3 M MEMZ - - - XRSTORS INSTR_WIDTH NP.0fc7//4 M MEMZ - - - XSAVEC INSTR_WIDTH NP.0fc7//5 M MEMZ - - - XSAVES INSTR_WIDTH -RNP.0fc7//f0+ O GP - - - RDRAND -RNP.0fc7//f8+ O GP - - - RDSEED -RF3.0fc7//f8+ O GP - - - RDPID DEF64 +NFx.0fc7//f0+ O GP - - - RDRAND +NFx.0fc7//f8+ O GP - - - RDSEED +F3.0fc7//f8+ O GP - - - RDPID DEF64 66.0f3882 RM GP MEMZ - - INVPCID DEF64 NP.0f38c8 RM XMM XMM - - SHA1NEXTE NP.0f38c9 RM XMM XMM - - SHA1MSG1 diff --git a/parseinstrs.py b/parseinstrs.py index b1fc140..12324c8 100644 --- a/parseinstrs.py +++ b/parseinstrs.py @@ -47,7 +47,8 @@ InstrFlags = bitstruct("InstrFlags", [ "op0_regty:3", "op1_regty:3", "op2_regty:3", - "_unused:7", + "_unused:6", + "ign66:1", ]) ENCODINGS = { @@ -154,7 +155,7 @@ class InstrDesc(NamedTuple): operands = tuple(OPKINDS[op] for op in desc[1:5] if op != "-") return cls(desc[5], desc[0], operands, frozenset(desc[6:])) - def encode(self): + def encode(self, ign66): flags = copy(ENCODINGS[self.encoding]) opsz = set(self.OPKIND_SIZES[opkind.size] for opkind in self.operands) @@ -183,6 +184,9 @@ class InstrDesc(NamedTuple): if "LOCK" in self.flags: flags.lock = 1 if "VSIB" in self.flags: flags.vsib = 1 + if "USE66" not in self.flags and (ign66 or "IGN66" in self.flags): + flags.ign66 = 1 + if flags.imm_control >= 4: imm_op = next(op for op in self.operands if op.kind == OpKind.K_IMM) if ("IMM_8" in self.flags or imm_op.size == 1 or @@ -202,7 +206,6 @@ class EntryKind(Enum): TABLE72 = 4 TABLE_PREFIX = 5 TABLE_VEX = 6 - TABLE_PREFIX_REP = 7 TABLE_ROOT = -1 class TrieEntry(NamedTuple): @@ -216,7 +219,6 @@ class TrieEntry(NamedTuple): EntryKind.TABLE72: 72, EntryKind.TABLE_PREFIX: 4, EntryKind.TABLE_VEX: 4, - EntryKind.TABLE_PREFIX_REP: 4, EntryKind.TABLE_ROOT: 8, } @classmethod @@ -241,9 +243,8 @@ class TrieEntry(NamedTuple): import re opcode_regex = re.compile( - r"^(?:(?P(?PVEX\.)?(?PNP|66|F2|F3)\." + - r"(?:W(?P[01]|IG)\.)?(?:L(?P[01]|IG)\.)?)" + - r"|R(?PNP|F2|F3).)?" + + r"^(?:(?P(?PVEX\.)?(?PNP|66|F2|F3|NFx)\." + + r"(?:W(?P[01]|IG)\.)?(?:L(?P[01]|IG)\.)?))?" + r"(?P(?:[0-9a-f]{2})+)" + r"(?P//?[0-7]|//[c-f][0-9a-f])?" + r"(?P\+)?$") @@ -272,13 +273,8 @@ class Opcode(NamedTuple): if match.group("extended") and opcext and not opcext[0]: raise Exception("invalid opcode extension: {}".format(opcode_string)) - prefix_strs = match.group("legacy"), match.group("repprefix") - prefix = prefix_strs[0] or prefix_strs[1] - if prefix: - prefix = prefix_strs[1] is not None, ["NP", "66", "F3", "F2"].index(prefix) - return cls( - prefix=prefix, + prefix=match.group("legacy"), escape=["", "0f", "0f38", "0f3a"].index(match.group("opcode")[:-2]), opc=int(match.group("opcode")[-2:], 16), opcext=opcext, @@ -300,9 +296,11 @@ class Opcode(NamedTuple): last_type, last_indices = opcode[-1] opcode[-1] = last_type, [last_indices[0] + i for i in range(8)] if self.prefix: - prefix_kind = [EntryKind.TABLE_PREFIX, EntryKind.TABLE_PREFIX_REP][self.prefix[0]] - prefix_val = self.prefix[1] - opcode.append((prefix_kind, [prefix_val])) + if self.prefix == "NFx": + opcode.append((EntryKind.TABLE_PREFIX, [0, 1])) + else: + prefix_val = ["NP", "66", "F3", "F2"].index(self.prefix) + opcode.append((EntryKind.TABLE_PREFIX, [prefix_val])) if self.vexl in ("0", "1") or self.rexw in ("0", "1"): rexw = {"0": [0], "1": [1<<0], "IG": [0, 1<<0]}[self.rexw or "IG"] vexl = {"0": [0], "1": [1<<1], "IG": [0, 1<<1]}[self.vexl or "IG"] @@ -327,8 +325,6 @@ def format_opcode(opcode): if byte & 4: prefix += "VEX." prefix += ["NP.", "66.", "F3.", "F2."][byte&3] - elif kind == EntryKind.TABLE_PREFIX_REP: - prefix += ["RNP.", "??.", "RF3.", "RF2."][byte&3] elif kind == EntryKind.TABLE_VEX: prefix += "W{}.L{}.".format(byte & 1, byte >> 1) else: @@ -449,8 +445,10 @@ def encode_table(entries): hasvex, vecsizes = True, {128, 256} opc_flags += "|OPC_VEX" if opcode.prefix: - opc_flags += ["", "|OPC_66", "|OPC_F3", "|OPC_F2"][opcode.prefix[1]] - if not opcode.prefix[0]: opsizes -= {16} + if opcode.prefix in ("66", "F2", "F3"): + opc_flags += "|OPC_" + opcode.prefix + if "USE66" not in desc.flags and opcode.prefix != "NFx": + opsizes -= {16} if opcode.vexl == "IG": vecsizes = {0} elif opcode.vexl: @@ -596,8 +594,9 @@ if __name__ == "__main__": for opcode, desc in entries: for i, mode in enumerate(args.modes): if "ONLY%d"%(96-mode) not in desc.flags: + ign66 = opcode.prefix in ("NP", "66", "F2", "F3") for opcode_path in opcode.for_trie(): - table.add_opcode(opcode_path, desc.encode(), i) + table.add_opcode(opcode_path, desc.encode(ign66), i) table.deduplicate() table_data, annotations, root_offsets = table.compile() diff --git a/tests/test_decode.c b/tests/test_decode.c index 6a81e38..93ed295 100644 --- a/tests/test_decode.c +++ b/tests/test_decode.c @@ -80,7 +80,12 @@ main(int argc, char** argv) TEST("\x0f\x38", "PARTIAL"); TEST("\x0f\x3a", "PARTIAL"); TEST("\x80", "PARTIAL"); + TEST("\x0F\x01\x22", "[SMSW mem2:r2]"); + TEST64("\x48\x0F\x01\x22", "[SMSW mem2:r2]"); + TEST("\x66\x0F\x01\x22", "[SMSW mem2:r2]"); TEST("\x0F\x01\xE2", "[SMSW reg4:r2]"); + TEST("\x66\x0F\x01\xE2", "[SMSW reg2:r2]"); + TEST64("\x66\x48\x0F\x01\xE2", "[SMSW reg8:r2]"); TEST64("\x66\x0f\x20\x00", "[MOV_CR reg8:r0 reg0:r0]"); TEST64("\x0f\x20\xc8", "UD"); TEST64("\x0f\x20\xd0", "[MOV_CR reg8:r0 reg0:r2]"); @@ -209,6 +214,35 @@ main(int argc, char** argv) TEST64("\xc2\x0d\x00", "[RET_8 imm2:0xd]"); TEST64("\xc2\x0d\xff", "[RET_8 imm2:0xff0d]"); + // NFx/66+F2/F3 combinations + TEST("\x0f\xc7\xf0", "[RDRAND reg4:r0]"); + TEST64("\x48\x0f\xc7\xf0", "[RDRAND reg8:r0]"); + TEST("\x66\x0f\xc7\xf0", "[RDRAND reg2:r0]"); + TEST64("\x66\x48\x0f\xc7\xf0", "[RDRAND reg8:r0]"); + TEST("\x0f\xc7\xf8", "[RDSEED reg4:r0]"); + TEST64("\x48\x0f\xc7\xf8", "[RDSEED reg8:r0]"); + TEST("\x66\x0f\xc7\xf8", "[RDSEED reg2:r0]"); + TEST64("\x66\x48\x0f\xc7\xf8", "[RDSEED reg8:r0]"); + TEST32("\xf3\x0f\xc7\xf8", "[RDPID reg4:r0]"); + TEST32("\x66\xf3\x0f\xc7\xf8", "[RDPID reg4:r0]"); + TEST32("\xf3\x66\x0f\xc7\xf8", "[RDPID reg4:r0]"); + TEST64("\xf3\x0f\xc7\xf8", "[RDPID reg8:r0]"); + TEST64("\x66\xf3\x0f\xc7\xf8", "[RDPID reg8:r0]"); + TEST64("\xf3\x66\x0f\xc7\xf8", "[RDPID reg8:r0]"); + TEST64("\xf3\x0f\xc7\x00", "UD"); + TEST64("\x0f\xc7\x30", "[VMPTRLD mem0:r0]"); + TEST64("\x66\x0f\xc7\x30", "[VMCLEAR mem0:r0]"); + TEST64("\xf3\x0f\xc7\x30", "[VMXON mem0:r0]"); + + TEST64("\x0f\x09", "[WBINVD]"); + TEST64("\xf3\x0f\x09", "[WBNOINVD]"); + + TEST("\xf3\x0f\x2a\xc1", "[SSE_CVTSI2SS reg4:r0 reg4:r1]"); + TEST("\xf3\x66\x0f\x2a\xc1", "[SSE_CVTSI2SS reg4:r0 reg4:r1]"); + TEST("\x66\xf3\x0f\x2a\xc1", "[SSE_CVTSI2SS reg4:r0 reg4:r1]"); + TEST64("\xf3\x48\x0f\x2a\xc1", "[SSE_CVTSI2SS reg4:r0 reg8:r1]"); + TEST64("\x66\xf3\x48\x0f\x2a\xc1", "[SSE_CVTSI2SS reg4:r0 reg8:r1]"); + TEST64("\x66\x0f\x50\xc1", "[SSE_MOVMSKPD reg8:r0 reg16:r1]"); TEST("\x66\x0f\xc6\xc0\x01", "[SSE_SHUFPD reg16:r0 reg16:r0 imm1:0x1]"); TEST("\xf3\x0f\x7e\x5c\x24\x08", "[SSE_MOVQ reg16:r3 mem8:r4+0x8]");