decode: Remove TABLE_PREFIX_REP and use NFx prefix

This commit is contained in:
Alexis Engelke
2020-11-08 10:03:18 +01:00
parent 69ce124354
commit 2e7e396325
4 changed files with 102 additions and 72 deletions

View File

@@ -34,7 +34,6 @@ typedef enum DecodeMode DecodeMode;
#define ENTRY_TABLE72 4
#define ENTRY_TABLE_PREFIX 5
#define ENTRY_TABLE_VEX 6
#define ENTRY_TABLE_PREFIX_REP 7
#define ENTRY_TABLE_ROOT 8
#define ENTRY_MASK 7
@@ -326,6 +325,7 @@ struct InstrDesc
#define DESC_SIZE_FIX1(desc) (((desc)->operand_sizes >> 10) & 7)
#define DESC_SIZE_FIX2(desc) (((desc)->operand_sizes >> 13) & 3)
#define DESC_INSTR_WIDTH(desc) (((desc)->operand_sizes >> 15) & 1)
#define DESC_IGN66(desc) (((desc)->reg_types >> 15) & 1)
int
fd_decode(const uint8_t* buffer, size_t len_sz, int mode_int, uintptr_t address,
@@ -409,18 +409,12 @@ fd_decode(const uint8_t* buffer, size_t len_sz, int mode_int, uintptr_t address,
if (kind == ENTRY_TABLE_PREFIX)
{
// If a prefix is mandatory and used as opcode extension, it has no
// further effect on the instruction. This is especially important
// for the 0x66 prefix, which could otherwise override the operand
// size of general purpose registers.
prefixes &= ~(PREFIX_OPSZ | PREFIX_REPNZ | PREFIX_REP);
ENTRY_UNPACK(table, kind, table[mandatory_prefix]);
}
else if (kind == ENTRY_TABLE_PREFIX_REP)
{
// Discard 66h mandatory prefix
uint8_t index = mandatory_prefix != 1 ? mandatory_prefix : 0;
// further effect on the instruction. This, however, does not completely
// apply to the 66 prefix: in rare cases it may affect the size of
// general purpose registers. The instruction descriptor encodes whether
// the 66 prefix has an effect on the instruction (IGN66).
prefixes &= ~(PREFIX_REPNZ | PREFIX_REP);
ENTRY_UNPACK(table, kind, table[index]);
ENTRY_UNPACK(table, kind, table[mandatory_prefix]);
}
// For VEX prefix, we have to distinguish between VEX.W and VEX.L which may
@@ -438,6 +432,9 @@ fd_decode(const uint8_t* buffer, size_t len_sz, int mode_int, uintptr_t address,
struct InstrDesc* desc = (struct InstrDesc*) table;
if (DESC_IGN66(desc))
prefixes &= ~PREFIX_OPSZ;
instr->type = desc->type;
instr->flags = prefixes & 0x7f;
if (mode == DECODE_64)

View File

@@ -297,35 +297,35 @@ ff/6 M GP - - - PUSH DEF64
0f00/3 M GP16 - - - LTR
0f00/4 M GP16 - - - VERR
0f00/5 M GP16 - - - VERW
0f01//0 M MEMZ - - - SGDT
0f01//1 M MEMZ - - - SIDT
0f01//2 M MEMZ - - - LGDT
0f01//3 M MEMZ - - - LIDT
0f01//4 M GP16 - - - SMSW
0f01//6 M GP16 - - - LMSW
0f01//7 M GP - - - INVLPG SIZE_8
0f01//c8 NP - - - - MONITOR
0f01//c9 NP - - - - MWAIT
0f01//ca NP - - - - CLAC
0f01//cb NP - - - - STAC
0f01//cf NP - - - - ENCLS
0f01//d0 NP - - - - XGETBV
0f01//d1 NP - - - - XSETBV
0f01//d5 NP - - - - XEND
0f01//d6 NP - - - - XTEST
0f01//d7 NP - - - - ENCLU
0f01//e0+ O GP - - - SMSW
0f01//f0+ O GP16 - - - LMSW
0f01//f8 NP - - - - SWAPGS ONLY64
0f01//f9 NP - - - - RDTSCP
NFx.0f01//0 M MEMZ - - - SGDT
NFx.0f01//1 M MEMZ - - - SIDT
NFx.0f01//2 M MEMZ - - - LGDT
NFx.0f01//3 M MEMZ - - - LIDT
NFx.0f01//4 M GP16 - - - SMSW
NFx.0f01//6 M GP16 - - - LMSW
NFx.0f01//7 M GP - - - INVLPG SIZE_8
NFx.0f01//c8 NP - - - - MONITOR
NFx.0f01//c9 NP - - - - MWAIT
NP.0f01//ca NP - - - - CLAC
NP.0f01//cb NP - - - - STAC
NP.0f01//cf NP - - - - ENCLS
NP.0f01//d0 NP - - - - XGETBV
NP.0f01//d1 NP - - - - XSETBV
NP.0f01//d5 NP - - - - XEND
NP.0f01//d6 NP - - - - XTEST
NP.0f01//d7 NP - - - - ENCLU
NFx.0f01//e0+ O GP - - - SMSW
NFx.0f01//f0+ O GP16 - - - LMSW
NFx.0f01//f8 NP - - - - SWAPGS ONLY64
NFx.0f01//f9 NP - - - - RDTSCP
0f02 RM GP GP16 - - LAR
0f03 RM GP GP16 - - LSL
0f05 NP - - - - SYSCALL ONLY64
0f06 NP - - - - CLTS
0f07 NP - - - - SYSRET ONLY64
0f08 NP - - - - INVD
RNP.0f09 NP - - - - WBINVD
RF3.0f09 NP - - - - WBINVD
NFx.0f09 NP - - - - WBINVD
F2.0f09 NP - - - - WBINVD
0f0b NP - - - - UD2
0f0d//0 M MEM8 - - - PREFETCH
0f0d//1 M MEM8 - - - PREFETCHW
@@ -437,32 +437,32 @@ NP.0f37 NP - - - - GETSEC
0fb5 RM GP MEM - - LGS
0fb6 RM GP GP8 - - MOVZX ENC_SEPSZ
0fb7 RM GP GP16 - - MOVZX ENC_SEPSZ
RF3.0fb8 RM GP GP - - POPCNT
F3.0fb8 RM GP GP - - POPCNT USE66
0fb9 RM GP GP - - UD1
0fba/4 MI GP IMM8 - - BT
0fba/5 MI GP IMM8 - - BTS LOCK
0fba/6 MI GP IMM8 - - BTR LOCK
0fba/7 MI GP IMM8 - - BTC LOCK
0fbb MR GP GP - - BTC LOCK
RNP.0fbc RM GP GP - - BSF
RF2.0fbc RM GP GP - - BSF
RF3.0fbc RM GP GP - - TZCNT
RNP.0fbd RM GP GP - - BSR
RF2.0fbd RM GP GP - - BSR
RF3.0fbd RM GP GP - - LZCNT
NFx.0fbc RM GP GP - - BSF
F2.0fbc RM GP GP - - BSF USE66
F3.0fbc RM GP GP - - TZCNT USE66
NFx.0fbd RM GP GP - - BSR
F2.0fbd RM GP GP - - BSR USE66
F3.0fbd RM GP GP - - LZCNT USE66
0fbe RM GP GP8 - - MOVSX ENC_SEPSZ
0fbf RM GP GP16 - - MOVSX ENC_SEPSZ
0fc0 MR GP GP - - XADD SIZE_8 LOCK
0fc1 MR GP GP - - XADD LOCK
NP.0fc3 MR MEM GP - - MOVNTI
0fc7//1 M MEMZ - - - CMPXCHGD LOCK
NP.0fc7//1 M MEMZ - - - CMPXCHGD LOCK
0fc8+ O GP - - - BSWAP
0fff RM GP GP - - UD0
#
RNP.0f38f0 RM GP MEM - - MOVBE
RF2.0f38f0 RM GP32 GP - - CRC32 SIZE_8
RNP.0f38f1 MR MEM GP - - MOVBE
RF2.0f38f1 RM GP32 GP - - CRC32
NFx.0f38f0 RM GP MEM - - MOVBE
F2.0f38f0 RM GP32 GP - - CRC32 USE66 SIZE_8
NFx.0f38f1 MR MEM GP - - MOVBE
F2.0f38f1 RM GP32 GP - - CRC32 USE66
#
# MMX
NP.0f2a RM XMM64 MMX - - MMX_CVTPI2PS
@@ -1443,7 +1443,7 @@ F3.0f38f8 RM GP MEMZ - - ENQCMDS
NP.0f01//c5 NP - - - - PCONFIG
# WBNOINVD
RF2.0f09 NP - - - - WBNOINVD
F3.0f09 NP - - - - WBNOINVD
NP.0f01//ee NP - - - - RDPKRU
NP.0f01//ef NP - - - - WRPKRU
@@ -1460,9 +1460,9 @@ NP.0fae//7 M MEM8 - - - CLFLUSH
NP.0fc7//3 M MEMZ - - - XRSTORS INSTR_WIDTH
NP.0fc7//4 M MEMZ - - - XSAVEC INSTR_WIDTH
NP.0fc7//5 M MEMZ - - - XSAVES INSTR_WIDTH
RNP.0fc7//f0+ O GP - - - RDRAND
RNP.0fc7//f8+ O GP - - - RDSEED
RF3.0fc7//f8+ O GP - - - RDPID DEF64
NFx.0fc7//f0+ O GP - - - RDRAND
NFx.0fc7//f8+ O GP - - - RDSEED
F3.0fc7//f8+ O GP - - - RDPID DEF64
66.0f3882 RM GP MEMZ - - INVPCID DEF64
NP.0f38c8 RM XMM XMM - - SHA1NEXTE
NP.0f38c9 RM XMM XMM - - SHA1MSG1

View File

@@ -47,7 +47,8 @@ InstrFlags = bitstruct("InstrFlags", [
"op0_regty:3",
"op1_regty:3",
"op2_regty:3",
"_unused:7",
"_unused:6",
"ign66:1",
])
ENCODINGS = {
@@ -154,7 +155,7 @@ class InstrDesc(NamedTuple):
operands = tuple(OPKINDS[op] for op in desc[1:5] if op != "-")
return cls(desc[5], desc[0], operands, frozenset(desc[6:]))
def encode(self):
def encode(self, ign66):
flags = copy(ENCODINGS[self.encoding])
opsz = set(self.OPKIND_SIZES[opkind.size] for opkind in self.operands)
@@ -183,6 +184,9 @@ class InstrDesc(NamedTuple):
if "LOCK" in self.flags: flags.lock = 1
if "VSIB" in self.flags: flags.vsib = 1
if "USE66" not in self.flags and (ign66 or "IGN66" in self.flags):
flags.ign66 = 1
if flags.imm_control >= 4:
imm_op = next(op for op in self.operands if op.kind == OpKind.K_IMM)
if ("IMM_8" in self.flags or imm_op.size == 1 or
@@ -202,7 +206,6 @@ class EntryKind(Enum):
TABLE72 = 4
TABLE_PREFIX = 5
TABLE_VEX = 6
TABLE_PREFIX_REP = 7
TABLE_ROOT = -1
class TrieEntry(NamedTuple):
@@ -216,7 +219,6 @@ class TrieEntry(NamedTuple):
EntryKind.TABLE72: 72,
EntryKind.TABLE_PREFIX: 4,
EntryKind.TABLE_VEX: 4,
EntryKind.TABLE_PREFIX_REP: 4,
EntryKind.TABLE_ROOT: 8,
}
@classmethod
@@ -241,9 +243,8 @@ class TrieEntry(NamedTuple):
import re
opcode_regex = re.compile(
r"^(?:(?P<prefixes>(?P<vex>VEX\.)?(?P<legacy>NP|66|F2|F3)\." +
r"(?:W(?P<rexw>[01]|IG)\.)?(?:L(?P<vexl>[01]|IG)\.)?)" +
r"|R(?P<repprefix>NP|F2|F3).)?" +
r"^(?:(?P<prefixes>(?P<vex>VEX\.)?(?P<legacy>NP|66|F2|F3|NFx)\." +
r"(?:W(?P<rexw>[01]|IG)\.)?(?:L(?P<vexl>[01]|IG)\.)?))?" +
r"(?P<opcode>(?:[0-9a-f]{2})+)" +
r"(?P<modrm>//?[0-7]|//[c-f][0-9a-f])?" +
r"(?P<extended>\+)?$")
@@ -272,13 +273,8 @@ class Opcode(NamedTuple):
if match.group("extended") and opcext and not opcext[0]:
raise Exception("invalid opcode extension: {}".format(opcode_string))
prefix_strs = match.group("legacy"), match.group("repprefix")
prefix = prefix_strs[0] or prefix_strs[1]
if prefix:
prefix = prefix_strs[1] is not None, ["NP", "66", "F3", "F2"].index(prefix)
return cls(
prefix=prefix,
prefix=match.group("legacy"),
escape=["", "0f", "0f38", "0f3a"].index(match.group("opcode")[:-2]),
opc=int(match.group("opcode")[-2:], 16),
opcext=opcext,
@@ -300,9 +296,11 @@ class Opcode(NamedTuple):
last_type, last_indices = opcode[-1]
opcode[-1] = last_type, [last_indices[0] + i for i in range(8)]
if self.prefix:
prefix_kind = [EntryKind.TABLE_PREFIX, EntryKind.TABLE_PREFIX_REP][self.prefix[0]]
prefix_val = self.prefix[1]
opcode.append((prefix_kind, [prefix_val]))
if self.prefix == "NFx":
opcode.append((EntryKind.TABLE_PREFIX, [0, 1]))
else:
prefix_val = ["NP", "66", "F3", "F2"].index(self.prefix)
opcode.append((EntryKind.TABLE_PREFIX, [prefix_val]))
if self.vexl in ("0", "1") or self.rexw in ("0", "1"):
rexw = {"0": [0], "1": [1<<0], "IG": [0, 1<<0]}[self.rexw or "IG"]
vexl = {"0": [0], "1": [1<<1], "IG": [0, 1<<1]}[self.vexl or "IG"]
@@ -327,8 +325,6 @@ def format_opcode(opcode):
if byte & 4:
prefix += "VEX."
prefix += ["NP.", "66.", "F3.", "F2."][byte&3]
elif kind == EntryKind.TABLE_PREFIX_REP:
prefix += ["RNP.", "??.", "RF3.", "RF2."][byte&3]
elif kind == EntryKind.TABLE_VEX:
prefix += "W{}.L{}.".format(byte & 1, byte >> 1)
else:
@@ -449,8 +445,10 @@ def encode_table(entries):
hasvex, vecsizes = True, {128, 256}
opc_flags += "|OPC_VEX"
if opcode.prefix:
opc_flags += ["", "|OPC_66", "|OPC_F3", "|OPC_F2"][opcode.prefix[1]]
if not opcode.prefix[0]: opsizes -= {16}
if opcode.prefix in ("66", "F2", "F3"):
opc_flags += "|OPC_" + opcode.prefix
if "USE66" not in desc.flags and opcode.prefix != "NFx":
opsizes -= {16}
if opcode.vexl == "IG":
vecsizes = {0}
elif opcode.vexl:
@@ -596,8 +594,9 @@ if __name__ == "__main__":
for opcode, desc in entries:
for i, mode in enumerate(args.modes):
if "ONLY%d"%(96-mode) not in desc.flags:
ign66 = opcode.prefix in ("NP", "66", "F2", "F3")
for opcode_path in opcode.for_trie():
table.add_opcode(opcode_path, desc.encode(), i)
table.add_opcode(opcode_path, desc.encode(ign66), i)
table.deduplicate()
table_data, annotations, root_offsets = table.compile()

View File

@@ -80,7 +80,12 @@ main(int argc, char** argv)
TEST("\x0f\x38", "PARTIAL");
TEST("\x0f\x3a", "PARTIAL");
TEST("\x80", "PARTIAL");
TEST("\x0F\x01\x22", "[SMSW mem2:r2]");
TEST64("\x48\x0F\x01\x22", "[SMSW mem2:r2]");
TEST("\x66\x0F\x01\x22", "[SMSW mem2:r2]");
TEST("\x0F\x01\xE2", "[SMSW reg4:r2]");
TEST("\x66\x0F\x01\xE2", "[SMSW reg2:r2]");
TEST64("\x66\x48\x0F\x01\xE2", "[SMSW reg8:r2]");
TEST64("\x66\x0f\x20\x00", "[MOV_CR reg8:r0 reg0:r0]");
TEST64("\x0f\x20\xc8", "UD");
TEST64("\x0f\x20\xd0", "[MOV_CR reg8:r0 reg0:r2]");
@@ -209,6 +214,35 @@ main(int argc, char** argv)
TEST64("\xc2\x0d\x00", "[RET_8 imm2:0xd]");
TEST64("\xc2\x0d\xff", "[RET_8 imm2:0xff0d]");
// NFx/66+F2/F3 combinations
TEST("\x0f\xc7\xf0", "[RDRAND reg4:r0]");
TEST64("\x48\x0f\xc7\xf0", "[RDRAND reg8:r0]");
TEST("\x66\x0f\xc7\xf0", "[RDRAND reg2:r0]");
TEST64("\x66\x48\x0f\xc7\xf0", "[RDRAND reg8:r0]");
TEST("\x0f\xc7\xf8", "[RDSEED reg4:r0]");
TEST64("\x48\x0f\xc7\xf8", "[RDSEED reg8:r0]");
TEST("\x66\x0f\xc7\xf8", "[RDSEED reg2:r0]");
TEST64("\x66\x48\x0f\xc7\xf8", "[RDSEED reg8:r0]");
TEST32("\xf3\x0f\xc7\xf8", "[RDPID reg4:r0]");
TEST32("\x66\xf3\x0f\xc7\xf8", "[RDPID reg4:r0]");
TEST32("\xf3\x66\x0f\xc7\xf8", "[RDPID reg4:r0]");
TEST64("\xf3\x0f\xc7\xf8", "[RDPID reg8:r0]");
TEST64("\x66\xf3\x0f\xc7\xf8", "[RDPID reg8:r0]");
TEST64("\xf3\x66\x0f\xc7\xf8", "[RDPID reg8:r0]");
TEST64("\xf3\x0f\xc7\x00", "UD");
TEST64("\x0f\xc7\x30", "[VMPTRLD mem0:r0]");
TEST64("\x66\x0f\xc7\x30", "[VMCLEAR mem0:r0]");
TEST64("\xf3\x0f\xc7\x30", "[VMXON mem0:r0]");
TEST64("\x0f\x09", "[WBINVD]");
TEST64("\xf3\x0f\x09", "[WBNOINVD]");
TEST("\xf3\x0f\x2a\xc1", "[SSE_CVTSI2SS reg4:r0 reg4:r1]");
TEST("\xf3\x66\x0f\x2a\xc1", "[SSE_CVTSI2SS reg4:r0 reg4:r1]");
TEST("\x66\xf3\x0f\x2a\xc1", "[SSE_CVTSI2SS reg4:r0 reg4:r1]");
TEST64("\xf3\x48\x0f\x2a\xc1", "[SSE_CVTSI2SS reg4:r0 reg8:r1]");
TEST64("\x66\xf3\x48\x0f\x2a\xc1", "[SSE_CVTSI2SS reg4:r0 reg8:r1]");
TEST64("\x66\x0f\x50\xc1", "[SSE_MOVMSKPD reg8:r0 reg16:r1]");
TEST("\x66\x0f\xc6\xc0\x01", "[SSE_SHUFPD reg16:r0 reg16:r0 imm1:0x1]");
TEST("\xf3\x0f\x7e\x5c\x24\x08", "[SSE_MOVQ reg16:r3 mem8:r4+0x8]");