parseinstrs: Add separate ModRM indicator to desc
Some instructions have no ModRM operand and no extended opcode but still consume a ModRM byte.
This commit is contained in:
95
decode.c
95
decode.c
@@ -84,6 +84,7 @@ struct InstrDesc
|
||||
#define DESC_SIZE_FIX1(desc) (((desc)->operand_sizes >> 10) & 7)
|
||||
#define DESC_SIZE_FIX2(desc) (((desc)->operand_sizes >> 13) & 3)
|
||||
#define DESC_INSTR_WIDTH(desc) (((desc)->operand_sizes >> 15) & 1)
|
||||
#define DESC_MODRM(desc) (((desc)->reg_types >> 14) & 1)
|
||||
#define DESC_IGN66(desc) (((desc)->reg_types >> 15) & 1)
|
||||
|
||||
int
|
||||
@@ -251,7 +252,7 @@ prefix_end:
|
||||
unsigned isreg = (buffer[off] & 0xc0) == 0xc0 ? 8 : 0;
|
||||
table_idx = table_walk(table_idx, ((buffer[off] >> 3) & 7) | isreg, &kind);
|
||||
if (kind == ENTRY_TABLE8E)
|
||||
table_idx = table_walk(table_idx, buffer[off++] & 7, &kind);
|
||||
table_idx = table_walk(table_idx, buffer[off] & 7, &kind);
|
||||
}
|
||||
|
||||
// For VEX prefix, we have to distinguish between VEX.W and VEX.L which may
|
||||
@@ -308,8 +309,32 @@ prefix_end:
|
||||
for (int i = 0; i < 3; i++)
|
||||
{
|
||||
uint32_t reg_type = (desc->reg_types >> 3 * i) & 0x7;
|
||||
// GPL FPU VEC MSK MMX BND ??? NVR
|
||||
instr->operands[i].misc = (0xf0857641 >> (4 * reg_type)) & 0xf;
|
||||
// GPL FPU VEC MSK MMX BND SEG NVR
|
||||
instr->operands[i].misc = (0xf3857641 >> (4 * reg_type)) & 0xf;
|
||||
}
|
||||
|
||||
if (DESC_MODRM(desc) && UNLIKELY(off++ >= len))
|
||||
return FD_ERR_PARTIAL;
|
||||
unsigned op_byte = buffer[off - 1] | (!DESC_MODRM(desc) ? 0xc0 : 0);
|
||||
|
||||
if (UNLIKELY(instr->type == FDI_MOV_CR || instr->type == FDI_MOV_DR)) {
|
||||
unsigned modreg = (op_byte >> 3) & 0x7;
|
||||
unsigned modrm = op_byte & 0x7;
|
||||
|
||||
FdOp* op_modreg = &instr->operands[DESC_MODREG_IDX(desc)];
|
||||
op_modreg->type = FD_OT_REG;
|
||||
op_modreg->reg = modreg | (prefix_rex & PREFIX_REXR ? 8 : 0);
|
||||
op_modreg->misc = instr->type == FDI_MOV_CR ? FD_RT_CR : FD_RT_DR;
|
||||
if (instr->type == FDI_MOV_CR && (~0x011d >> op_modreg->reg) & 1)
|
||||
return FD_ERR_UD;
|
||||
else if (instr->type == FDI_MOV_DR && prefix_rex & PREFIX_REXR)
|
||||
return FD_ERR_UD;
|
||||
|
||||
FdOp* op_modrm = &instr->operands[DESC_MODRM_IDX(desc)];
|
||||
op_modrm->type = FD_OT_REG;
|
||||
op_modrm->reg = modrm | (prefix_rex & PREFIX_REXB ? 8 : 0);
|
||||
op_modrm->misc = FD_RT_GPL;
|
||||
goto op_sizes;
|
||||
}
|
||||
|
||||
if (UNLIKELY(DESC_HAS_IMPLICIT(desc)))
|
||||
@@ -319,44 +344,23 @@ prefix_end:
|
||||
operand->reg = DESC_IMPLICIT_VAL(desc);
|
||||
}
|
||||
|
||||
if (DESC_HAS_MODREG(desc))
|
||||
{
|
||||
FdOp* op_modreg = &instr->operands[DESC_MODREG_IDX(desc)];
|
||||
unsigned reg_idx = (op_byte & 0x38) >> 3;
|
||||
if (!UNLIKELY(op_modreg->misc == FD_RT_MMX || op_modreg->misc == FD_RT_SEG))
|
||||
reg_idx += prefix_rex & PREFIX_REXR ? 8 : 0;
|
||||
op_modreg->type = FD_OT_REG;
|
||||
op_modreg->reg = reg_idx;
|
||||
}
|
||||
|
||||
if (DESC_HAS_MODRM(desc))
|
||||
{
|
||||
if (UNLIKELY(off >= len))
|
||||
return FD_ERR_PARTIAL;
|
||||
unsigned modrm = buffer[off++];
|
||||
unsigned mod = (modrm & 0xc0) >> 6;
|
||||
unsigned mod_reg = (modrm & 0x38) >> 3;
|
||||
unsigned rm = modrm & 0x07;
|
||||
|
||||
bool is_seg = UNLIKELY(instr->type == FDI_MOV_G2S || instr->type == FDI_MOV_S2G);
|
||||
bool is_cr = UNLIKELY(instr->type == FDI_MOV_CR);
|
||||
bool is_dr = UNLIKELY(instr->type == FDI_MOV_DR);
|
||||
|
||||
if (DESC_HAS_MODREG(desc))
|
||||
{
|
||||
FdOp* op_modreg = &instr->operands[DESC_MODREG_IDX(desc)];
|
||||
unsigned reg_idx = mod_reg;
|
||||
if (!is_seg && !UNLIKELY(op_modreg->misc == FD_RT_MMX))
|
||||
reg_idx += prefix_rex & PREFIX_REXR ? 8 : 0;
|
||||
|
||||
if (is_cr && (~0x011d >> reg_idx) & 1)
|
||||
return FD_ERR_UD;
|
||||
else if (is_dr && reg_idx >= 8)
|
||||
return FD_ERR_UD;
|
||||
|
||||
op_modreg->type = FD_OT_REG;
|
||||
op_modreg->reg = reg_idx;
|
||||
if (is_cr)
|
||||
op_modreg->misc = FD_RT_CR;
|
||||
else if (is_dr)
|
||||
op_modreg->misc = FD_RT_DR;
|
||||
else if (is_seg)
|
||||
op_modreg->misc = FD_RT_SEG;
|
||||
}
|
||||
|
||||
FdOp* op_modrm = &instr->operands[DESC_MODRM_IDX(desc)];
|
||||
|
||||
if (mod == 3 || is_cr || is_dr)
|
||||
unsigned mod = (op_byte & 0xc0) >> 6;
|
||||
unsigned rm = op_byte & 0x07;
|
||||
if (mod == 3)
|
||||
{
|
||||
uint8_t reg_idx = rm;
|
||||
if (LIKELY(op_modrm->misc == FD_RT_GPL || op_modrm->misc == FD_RT_VEC))
|
||||
@@ -421,22 +425,6 @@ prefix_end:
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (DESC_HAS_MODREG(desc))
|
||||
{
|
||||
// If there is no ModRM, but a Mod-Reg, its opcode-encoded.
|
||||
FdOp* operand = &instr->operands[DESC_MODREG_IDX(desc)];
|
||||
operand->type = FD_OT_REG;
|
||||
if (LIKELY(!DESC_VSIB(desc)))
|
||||
{
|
||||
// Only used for GP registers, therefore always apply REX.B.
|
||||
operand->reg = (buffer[off - 1] & 7) + (prefix_rex & PREFIX_REXB ? 8 : 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
operand->misc = FD_RT_SEG;
|
||||
operand->reg = (buffer[off - 1] >> 3) & 7;
|
||||
}
|
||||
}
|
||||
|
||||
if (UNLIKELY(DESC_HAS_VEXREG(desc)))
|
||||
{
|
||||
@@ -576,6 +564,7 @@ prefix_end:
|
||||
instr->flags |= FD_FLAG_LOCK;
|
||||
}
|
||||
|
||||
op_sizes:;
|
||||
uint8_t operand_sizes[4] = {
|
||||
1 << DESC_SIZE_FIX1(desc) >> 1, 1 << DESC_SIZE_FIX2(desc), op_size, vec_size
|
||||
};
|
||||
|
||||
@@ -726,9 +726,9 @@ NP.0fae/0m M MEMZ - - - FXSAVE INSTR_WIDTH
|
||||
NP.0fae/1m M MEMZ - - - FXRSTOR INSTR_WIDTH
|
||||
NP.0fae/2m M MEM32 - - - LDMXCSR
|
||||
NP.0fae/3m M MEM32 - - - STMXCSR
|
||||
NP.0faee8 NP - - - - LFENCE
|
||||
NP.0faef0 NP - - - - MFENCE
|
||||
NP.0faef8 NP - - - - SFENCE
|
||||
NP.0fae/5r NP - - - - LFENCE
|
||||
NP.0fae/6r NP - - - - MFENCE
|
||||
NP.0fae/7r NP - - - - SFENCE
|
||||
NP.0fc2 RMI XMM XMM IMM8 - SSE_CMPPS
|
||||
66.0fc2 RMI XMM XMM IMM8 - SSE_CMPPD
|
||||
F3.0fc2 RMI XMM XMM32 IMM8 - SSE_CMPSS
|
||||
|
||||
@@ -28,7 +28,8 @@ INSTR_FLAGS_FIELDS, INSTR_FLAGS_SIZES = zip(*[
|
||||
("op0_regty", 3),
|
||||
("op1_regty", 3),
|
||||
("op2_regty", 3),
|
||||
("unused", 6),
|
||||
("unused", 5),
|
||||
("modrm", 1),
|
||||
("ign66", 1),
|
||||
][::-1])
|
||||
class InstrFlags(namedtuple("InstrFlags", INSTR_FLAGS_FIELDS)):
|
||||
@@ -43,36 +44,36 @@ class InstrFlags(namedtuple("InstrFlags", INSTR_FLAGS_FIELDS)):
|
||||
|
||||
ENCODINGS = {
|
||||
"NP": InstrFlags(),
|
||||
"M": InstrFlags(modrm_idx=0^3),
|
||||
"M1": InstrFlags(modrm_idx=0^3, imm_idx=1^3, imm_control=1),
|
||||
"MI": InstrFlags(modrm_idx=0^3, imm_idx=1^3, imm_control=4),
|
||||
"MC": InstrFlags(modrm_idx=0^3, zeroreg_idx=1^3, zeroreg_val=1),
|
||||
"MR": InstrFlags(modrm_idx=0^3, modreg_idx=1^3),
|
||||
"RM": InstrFlags(modrm_idx=1^3, modreg_idx=0^3),
|
||||
"RMA": InstrFlags(modrm_idx=1^3, modreg_idx=0^3, zeroreg_idx=2^3),
|
||||
"MRI": InstrFlags(modrm_idx=0^3, modreg_idx=1^3, imm_idx=2^3, imm_control=4),
|
||||
"RMI": InstrFlags(modrm_idx=1^3, modreg_idx=0^3, imm_idx=2^3, imm_control=4),
|
||||
"MRC": InstrFlags(modrm_idx=0^3, modreg_idx=1^3, zeroreg_idx=2^3, zeroreg_val=1),
|
||||
"AM": InstrFlags(modrm_idx=1^3, zeroreg_idx=0^3),
|
||||
"MA": InstrFlags(modrm_idx=0^3, zeroreg_idx=1^3),
|
||||
"M": InstrFlags(modrm=1, modrm_idx=0^3),
|
||||
"M1": InstrFlags(modrm=1, modrm_idx=0^3, imm_idx=1^3, imm_control=1),
|
||||
"MI": InstrFlags(modrm=1, modrm_idx=0^3, imm_idx=1^3, imm_control=4),
|
||||
"MC": InstrFlags(modrm=1, modrm_idx=0^3, zeroreg_idx=1^3, zeroreg_val=1),
|
||||
"MR": InstrFlags(modrm=1, modrm_idx=0^3, modreg_idx=1^3),
|
||||
"RM": InstrFlags(modrm=1, modrm_idx=1^3, modreg_idx=0^3),
|
||||
"RMA": InstrFlags(modrm=1, modrm_idx=1^3, modreg_idx=0^3, zeroreg_idx=2^3),
|
||||
"MRI": InstrFlags(modrm=1, modrm_idx=0^3, modreg_idx=1^3, imm_idx=2^3, imm_control=4),
|
||||
"RMI": InstrFlags(modrm=1, modrm_idx=1^3, modreg_idx=0^3, imm_idx=2^3, imm_control=4),
|
||||
"MRC": InstrFlags(modrm=1, modrm_idx=0^3, modreg_idx=1^3, zeroreg_idx=2^3, zeroreg_val=1),
|
||||
"AM": InstrFlags(modrm=1, modrm_idx=1^3, zeroreg_idx=0^3),
|
||||
"MA": InstrFlags(modrm=1, modrm_idx=0^3, zeroreg_idx=1^3),
|
||||
"I": InstrFlags(imm_idx=0^3, imm_control=4),
|
||||
"IA": InstrFlags(zeroreg_idx=0^3, imm_idx=1^3, imm_control=4),
|
||||
"O": InstrFlags(modreg_idx=0^3),
|
||||
"OI": InstrFlags(modreg_idx=0^3, imm_idx=1^3, imm_control=4),
|
||||
"OA": InstrFlags(modreg_idx=0^3, zeroreg_idx=1^3),
|
||||
"S": InstrFlags(modreg_idx=0^3, vsib=1), # segment register in bits 3,4,5
|
||||
"O": InstrFlags(modrm_idx=0^3),
|
||||
"OI": InstrFlags(modrm_idx=0^3, imm_idx=1^3, imm_control=4),
|
||||
"OA": InstrFlags(modrm_idx=0^3, zeroreg_idx=1^3),
|
||||
"S": InstrFlags(modreg_idx=0^3), # segment register in bits 3,4,5
|
||||
"A": InstrFlags(zeroreg_idx=0^3),
|
||||
"D": InstrFlags(imm_idx=0^3, imm_control=6),
|
||||
"FD": InstrFlags(zeroreg_idx=0^3, imm_idx=1^3, imm_control=2),
|
||||
"TD": InstrFlags(zeroreg_idx=1^3, imm_idx=0^3, imm_control=2),
|
||||
|
||||
"RVM": InstrFlags(modrm_idx=2^3, modreg_idx=0^3, vexreg_idx=1^3),
|
||||
"RVMI": InstrFlags(modrm_idx=2^3, modreg_idx=0^3, vexreg_idx=1^3, imm_idx=3^3, imm_control=4),
|
||||
"RVMR": InstrFlags(modrm_idx=2^3, modreg_idx=0^3, vexreg_idx=1^3, imm_idx=3^3, imm_control=3),
|
||||
"RMV": InstrFlags(modrm_idx=1^3, modreg_idx=0^3, vexreg_idx=2^3),
|
||||
"VM": InstrFlags(modrm_idx=1^3, vexreg_idx=0^3),
|
||||
"VMI": InstrFlags(modrm_idx=1^3, vexreg_idx=0^3, imm_idx=2^3, imm_control=4),
|
||||
"MVR": InstrFlags(modrm_idx=0^3, modreg_idx=2^3, vexreg_idx=1^3),
|
||||
"RVM": InstrFlags(modrm=1, modrm_idx=2^3, modreg_idx=0^3, vexreg_idx=1^3),
|
||||
"RVMI": InstrFlags(modrm=1, modrm_idx=2^3, modreg_idx=0^3, vexreg_idx=1^3, imm_idx=3^3, imm_control=4),
|
||||
"RVMR": InstrFlags(modrm=1, modrm_idx=2^3, modreg_idx=0^3, vexreg_idx=1^3, imm_idx=3^3, imm_control=3),
|
||||
"RMV": InstrFlags(modrm=1, modrm_idx=1^3, modreg_idx=0^3, vexreg_idx=2^3),
|
||||
"VM": InstrFlags(modrm=1, modrm_idx=1^3, vexreg_idx=0^3),
|
||||
"VMI": InstrFlags(modrm=1, modrm_idx=1^3, vexreg_idx=0^3, imm_idx=2^3, imm_control=4),
|
||||
"MVR": InstrFlags(modrm=1, modrm_idx=0^3, modreg_idx=2^3, vexreg_idx=1^3),
|
||||
}
|
||||
|
||||
class OpKind(NamedTuple):
|
||||
@@ -139,7 +140,8 @@ class InstrDesc(NamedTuple):
|
||||
operands: Tuple[str, ...]
|
||||
flags: FrozenSet[str]
|
||||
|
||||
OPKIND_REGTYS = {"GP": 0, "FPU": 1, "XMM": 2, "MASK": 3, "MMX": 4, "BND": 5}
|
||||
OPKIND_REGTYS = {"GP": 0, "FPU": 1, "XMM": 2, "MASK": 3, "MMX": 4, "BND": 5,
|
||||
"SEG": 6}
|
||||
OPKIND_SIZES = {
|
||||
0: 0, 1: 1, 2: 2, 4: 3, 8: 4, 16: 5, 32: 6, 64: 7, 10: 0,
|
||||
OpKind.SZ_OP: -2, OpKind.SZ_VEC: -3,
|
||||
@@ -151,7 +153,7 @@ class InstrDesc(NamedTuple):
|
||||
operands = tuple(OPKINDS[op] for op in desc[1:5] if op != "-")
|
||||
return cls(desc[5], desc[0], operands, frozenset(desc[6:]))
|
||||
|
||||
def encode(self, ign66):
|
||||
def encode(self, ign66, modrm):
|
||||
flags = ENCODINGS[self.encoding]
|
||||
extraflags = {}
|
||||
|
||||
@@ -181,6 +183,7 @@ class InstrDesc(NamedTuple):
|
||||
if "INSTR_WIDTH" in self.flags: extraflags["instr_width"] = 1
|
||||
if "LOCK" in self.flags: extraflags["lock"] = 1
|
||||
if "VSIB" in self.flags: extraflags["vsib"] = 1
|
||||
if modrm: extraflags["modrm"] = 1
|
||||
|
||||
if "USE66" not in self.flags and (ign66 or "IGN66" in self.flags):
|
||||
extraflags["ign66"] = 1
|
||||
@@ -666,11 +669,12 @@ if __name__ == "__main__":
|
||||
for i, mode in enumerate(args.modes):
|
||||
if "ONLY%d"%(96-mode) not in desc.flags:
|
||||
ign66 = opcode.prefix in ("NP", "66", "F2", "F3")
|
||||
modrm = opcode.modreg or opcode.opcext
|
||||
for opcode_path in opcode.for_trie():
|
||||
if weak:
|
||||
weak_opcodes.append((opcode_path, desc.encode(ign66), i))
|
||||
weak_opcodes.append((opcode_path, desc.encode(ign66, modrm), i))
|
||||
else:
|
||||
table.add_opcode(opcode_path, desc.encode(ign66), i)
|
||||
table.add_opcode(opcode_path, desc.encode(ign66, modrm), i)
|
||||
for k in weak_opcodes:
|
||||
table.fill_free(*k)
|
||||
|
||||
|
||||
@@ -85,6 +85,7 @@ main(int argc, char** argv)
|
||||
TEST64("\x0f\xc7\x0f", "cmpxchg8b qword ptr [rdi]");
|
||||
TEST64("\x48\x0f\xc7\x0f", "cmpxchg16b xmmword ptr [rdi]");
|
||||
TEST("\x66", "PARTIAL");
|
||||
TEST("\xf0", "PARTIAL");
|
||||
TEST("\x0f", "PARTIAL");
|
||||
TEST("\x0f\x38", "PARTIAL");
|
||||
TEST("\x0f\x3a", "PARTIAL");
|
||||
@@ -113,6 +114,7 @@ main(int argc, char** argv)
|
||||
TEST("\x8e\xc8", "UD"); // No mov cs, eax
|
||||
TEST("\x0f\x1e\xc0", "nop eax, eax"); // reserved nop
|
||||
TEST("\x0f\x1e\x04\x25\x01\x00\x00\x00", "nop dword ptr [0x1], eax"); // reserved nop
|
||||
TEST64("\xf3\x4f\x0f\x1e\xfc", "nop r12, r15"); // reserved nop
|
||||
TEST("\xd8\xc1", "fadd st(0), st(1)");
|
||||
TEST("\xdc\xc1", "fadd st(1), st(0)");
|
||||
TEST64("\x41\xd8\xc1", "fadd st(0), st(1)"); // REX.B ignored
|
||||
@@ -303,7 +305,14 @@ main(int argc, char** argv)
|
||||
// TEST32("\x67\x66\x0f\x38\xf8\x01", "movdir64b ax, zmmword ptr [cx]");
|
||||
// TEST64("\x67\x66\x0f\x38\xf8\x01", "movdir64b eax, zmmword ptr [ecx]");
|
||||
|
||||
TEST64("\x0f\xae\xe8", "lfence");
|
||||
TEST("\x0f\xae\xe8", "lfence");
|
||||
TEST("\x0f\xae\xe9", "lfence");
|
||||
TEST("\x0f\xae\xef", "lfence");
|
||||
TEST("\x0f\xae\xf0", "mfence");
|
||||
TEST("\x0f\xae\xf7", "mfence");
|
||||
TEST("\x0f\xae\xf8", "sfence");
|
||||
TEST("\x0f\xae\xf9", "sfence");
|
||||
TEST("\x0f\xae\xff", "sfence");
|
||||
|
||||
TEST("\x0f\x70\xc0\x85", "pshufw mm0, mm0, 0x85");
|
||||
|
||||
|
||||
Reference in New Issue
Block a user