parseinstrs: Add separate ModRM indicator to desc

Some instructions have no ModRM operand and no extended opcode but still
consume a ModRM byte.
This commit is contained in:
Alexis Engelke
2021-01-16 14:37:30 +01:00
parent 8561d77c91
commit bd611902b0
4 changed files with 87 additions and 85 deletions

View File

@@ -84,6 +84,7 @@ struct InstrDesc
#define DESC_SIZE_FIX1(desc) (((desc)->operand_sizes >> 10) & 7)
#define DESC_SIZE_FIX2(desc) (((desc)->operand_sizes >> 13) & 3)
#define DESC_INSTR_WIDTH(desc) (((desc)->operand_sizes >> 15) & 1)
#define DESC_MODRM(desc) (((desc)->reg_types >> 14) & 1)
#define DESC_IGN66(desc) (((desc)->reg_types >> 15) & 1)
int
@@ -251,7 +252,7 @@ prefix_end:
unsigned isreg = (buffer[off] & 0xc0) == 0xc0 ? 8 : 0;
table_idx = table_walk(table_idx, ((buffer[off] >> 3) & 7) | isreg, &kind);
if (kind == ENTRY_TABLE8E)
table_idx = table_walk(table_idx, buffer[off++] & 7, &kind);
table_idx = table_walk(table_idx, buffer[off] & 7, &kind);
}
// For VEX prefix, we have to distinguish between VEX.W and VEX.L which may
@@ -308,8 +309,32 @@ prefix_end:
for (int i = 0; i < 3; i++)
{
uint32_t reg_type = (desc->reg_types >> 3 * i) & 0x7;
// GPL FPU VEC MSK MMX BND ??? NVR
instr->operands[i].misc = (0xf0857641 >> (4 * reg_type)) & 0xf;
// GPL FPU VEC MSK MMX BND SEG NVR
instr->operands[i].misc = (0xf3857641 >> (4 * reg_type)) & 0xf;
}
if (DESC_MODRM(desc) && UNLIKELY(off++ >= len))
return FD_ERR_PARTIAL;
unsigned op_byte = buffer[off - 1] | (!DESC_MODRM(desc) ? 0xc0 : 0);
if (UNLIKELY(instr->type == FDI_MOV_CR || instr->type == FDI_MOV_DR)) {
unsigned modreg = (op_byte >> 3) & 0x7;
unsigned modrm = op_byte & 0x7;
FdOp* op_modreg = &instr->operands[DESC_MODREG_IDX(desc)];
op_modreg->type = FD_OT_REG;
op_modreg->reg = modreg | (prefix_rex & PREFIX_REXR ? 8 : 0);
op_modreg->misc = instr->type == FDI_MOV_CR ? FD_RT_CR : FD_RT_DR;
if (instr->type == FDI_MOV_CR && (~0x011d >> op_modreg->reg) & 1)
return FD_ERR_UD;
else if (instr->type == FDI_MOV_DR && prefix_rex & PREFIX_REXR)
return FD_ERR_UD;
FdOp* op_modrm = &instr->operands[DESC_MODRM_IDX(desc)];
op_modrm->type = FD_OT_REG;
op_modrm->reg = modrm | (prefix_rex & PREFIX_REXB ? 8 : 0);
op_modrm->misc = FD_RT_GPL;
goto op_sizes;
}
if (UNLIKELY(DESC_HAS_IMPLICIT(desc)))
@@ -319,44 +344,23 @@ prefix_end:
operand->reg = DESC_IMPLICIT_VAL(desc);
}
if (DESC_HAS_MODREG(desc))
{
FdOp* op_modreg = &instr->operands[DESC_MODREG_IDX(desc)];
unsigned reg_idx = (op_byte & 0x38) >> 3;
if (!UNLIKELY(op_modreg->misc == FD_RT_MMX || op_modreg->misc == FD_RT_SEG))
reg_idx += prefix_rex & PREFIX_REXR ? 8 : 0;
op_modreg->type = FD_OT_REG;
op_modreg->reg = reg_idx;
}
if (DESC_HAS_MODRM(desc))
{
if (UNLIKELY(off >= len))
return FD_ERR_PARTIAL;
unsigned modrm = buffer[off++];
unsigned mod = (modrm & 0xc0) >> 6;
unsigned mod_reg = (modrm & 0x38) >> 3;
unsigned rm = modrm & 0x07;
bool is_seg = UNLIKELY(instr->type == FDI_MOV_G2S || instr->type == FDI_MOV_S2G);
bool is_cr = UNLIKELY(instr->type == FDI_MOV_CR);
bool is_dr = UNLIKELY(instr->type == FDI_MOV_DR);
if (DESC_HAS_MODREG(desc))
{
FdOp* op_modreg = &instr->operands[DESC_MODREG_IDX(desc)];
unsigned reg_idx = mod_reg;
if (!is_seg && !UNLIKELY(op_modreg->misc == FD_RT_MMX))
reg_idx += prefix_rex & PREFIX_REXR ? 8 : 0;
if (is_cr && (~0x011d >> reg_idx) & 1)
return FD_ERR_UD;
else if (is_dr && reg_idx >= 8)
return FD_ERR_UD;
op_modreg->type = FD_OT_REG;
op_modreg->reg = reg_idx;
if (is_cr)
op_modreg->misc = FD_RT_CR;
else if (is_dr)
op_modreg->misc = FD_RT_DR;
else if (is_seg)
op_modreg->misc = FD_RT_SEG;
}
FdOp* op_modrm = &instr->operands[DESC_MODRM_IDX(desc)];
if (mod == 3 || is_cr || is_dr)
unsigned mod = (op_byte & 0xc0) >> 6;
unsigned rm = op_byte & 0x07;
if (mod == 3)
{
uint8_t reg_idx = rm;
if (LIKELY(op_modrm->misc == FD_RT_GPL || op_modrm->misc == FD_RT_VEC))
@@ -421,22 +425,6 @@ prefix_end:
}
}
}
else if (DESC_HAS_MODREG(desc))
{
// If there is no ModRM, but a Mod-Reg, its opcode-encoded.
FdOp* operand = &instr->operands[DESC_MODREG_IDX(desc)];
operand->type = FD_OT_REG;
if (LIKELY(!DESC_VSIB(desc)))
{
// Only used for GP registers, therefore always apply REX.B.
operand->reg = (buffer[off - 1] & 7) + (prefix_rex & PREFIX_REXB ? 8 : 0);
}
else
{
operand->misc = FD_RT_SEG;
operand->reg = (buffer[off - 1] >> 3) & 7;
}
}
if (UNLIKELY(DESC_HAS_VEXREG(desc)))
{
@@ -576,6 +564,7 @@ prefix_end:
instr->flags |= FD_FLAG_LOCK;
}
op_sizes:;
uint8_t operand_sizes[4] = {
1 << DESC_SIZE_FIX1(desc) >> 1, 1 << DESC_SIZE_FIX2(desc), op_size, vec_size
};

View File

@@ -726,9 +726,9 @@ NP.0fae/0m M MEMZ - - - FXSAVE INSTR_WIDTH
NP.0fae/1m M MEMZ - - - FXRSTOR INSTR_WIDTH
NP.0fae/2m M MEM32 - - - LDMXCSR
NP.0fae/3m M MEM32 - - - STMXCSR
NP.0faee8 NP - - - - LFENCE
NP.0faef0 NP - - - - MFENCE
NP.0faef8 NP - - - - SFENCE
NP.0fae/5r NP - - - - LFENCE
NP.0fae/6r NP - - - - MFENCE
NP.0fae/7r NP - - - - SFENCE
NP.0fc2 RMI XMM XMM IMM8 - SSE_CMPPS
66.0fc2 RMI XMM XMM IMM8 - SSE_CMPPD
F3.0fc2 RMI XMM XMM32 IMM8 - SSE_CMPSS

View File

@@ -28,7 +28,8 @@ INSTR_FLAGS_FIELDS, INSTR_FLAGS_SIZES = zip(*[
("op0_regty", 3),
("op1_regty", 3),
("op2_regty", 3),
("unused", 6),
("unused", 5),
("modrm", 1),
("ign66", 1),
][::-1])
class InstrFlags(namedtuple("InstrFlags", INSTR_FLAGS_FIELDS)):
@@ -43,36 +44,36 @@ class InstrFlags(namedtuple("InstrFlags", INSTR_FLAGS_FIELDS)):
ENCODINGS = {
"NP": InstrFlags(),
"M": InstrFlags(modrm_idx=0^3),
"M1": InstrFlags(modrm_idx=0^3, imm_idx=1^3, imm_control=1),
"MI": InstrFlags(modrm_idx=0^3, imm_idx=1^3, imm_control=4),
"MC": InstrFlags(modrm_idx=0^3, zeroreg_idx=1^3, zeroreg_val=1),
"MR": InstrFlags(modrm_idx=0^3, modreg_idx=1^3),
"RM": InstrFlags(modrm_idx=1^3, modreg_idx=0^3),
"RMA": InstrFlags(modrm_idx=1^3, modreg_idx=0^3, zeroreg_idx=2^3),
"MRI": InstrFlags(modrm_idx=0^3, modreg_idx=1^3, imm_idx=2^3, imm_control=4),
"RMI": InstrFlags(modrm_idx=1^3, modreg_idx=0^3, imm_idx=2^3, imm_control=4),
"MRC": InstrFlags(modrm_idx=0^3, modreg_idx=1^3, zeroreg_idx=2^3, zeroreg_val=1),
"AM": InstrFlags(modrm_idx=1^3, zeroreg_idx=0^3),
"MA": InstrFlags(modrm_idx=0^3, zeroreg_idx=1^3),
"M": InstrFlags(modrm=1, modrm_idx=0^3),
"M1": InstrFlags(modrm=1, modrm_idx=0^3, imm_idx=1^3, imm_control=1),
"MI": InstrFlags(modrm=1, modrm_idx=0^3, imm_idx=1^3, imm_control=4),
"MC": InstrFlags(modrm=1, modrm_idx=0^3, zeroreg_idx=1^3, zeroreg_val=1),
"MR": InstrFlags(modrm=1, modrm_idx=0^3, modreg_idx=1^3),
"RM": InstrFlags(modrm=1, modrm_idx=1^3, modreg_idx=0^3),
"RMA": InstrFlags(modrm=1, modrm_idx=1^3, modreg_idx=0^3, zeroreg_idx=2^3),
"MRI": InstrFlags(modrm=1, modrm_idx=0^3, modreg_idx=1^3, imm_idx=2^3, imm_control=4),
"RMI": InstrFlags(modrm=1, modrm_idx=1^3, modreg_idx=0^3, imm_idx=2^3, imm_control=4),
"MRC": InstrFlags(modrm=1, modrm_idx=0^3, modreg_idx=1^3, zeroreg_idx=2^3, zeroreg_val=1),
"AM": InstrFlags(modrm=1, modrm_idx=1^3, zeroreg_idx=0^3),
"MA": InstrFlags(modrm=1, modrm_idx=0^3, zeroreg_idx=1^3),
"I": InstrFlags(imm_idx=0^3, imm_control=4),
"IA": InstrFlags(zeroreg_idx=0^3, imm_idx=1^3, imm_control=4),
"O": InstrFlags(modreg_idx=0^3),
"OI": InstrFlags(modreg_idx=0^3, imm_idx=1^3, imm_control=4),
"OA": InstrFlags(modreg_idx=0^3, zeroreg_idx=1^3),
"S": InstrFlags(modreg_idx=0^3, vsib=1), # segment register in bits 3,4,5
"O": InstrFlags(modrm_idx=0^3),
"OI": InstrFlags(modrm_idx=0^3, imm_idx=1^3, imm_control=4),
"OA": InstrFlags(modrm_idx=0^3, zeroreg_idx=1^3),
"S": InstrFlags(modreg_idx=0^3), # segment register in bits 3,4,5
"A": InstrFlags(zeroreg_idx=0^3),
"D": InstrFlags(imm_idx=0^3, imm_control=6),
"FD": InstrFlags(zeroreg_idx=0^3, imm_idx=1^3, imm_control=2),
"TD": InstrFlags(zeroreg_idx=1^3, imm_idx=0^3, imm_control=2),
"RVM": InstrFlags(modrm_idx=2^3, modreg_idx=0^3, vexreg_idx=1^3),
"RVMI": InstrFlags(modrm_idx=2^3, modreg_idx=0^3, vexreg_idx=1^3, imm_idx=3^3, imm_control=4),
"RVMR": InstrFlags(modrm_idx=2^3, modreg_idx=0^3, vexreg_idx=1^3, imm_idx=3^3, imm_control=3),
"RMV": InstrFlags(modrm_idx=1^3, modreg_idx=0^3, vexreg_idx=2^3),
"VM": InstrFlags(modrm_idx=1^3, vexreg_idx=0^3),
"VMI": InstrFlags(modrm_idx=1^3, vexreg_idx=0^3, imm_idx=2^3, imm_control=4),
"MVR": InstrFlags(modrm_idx=0^3, modreg_idx=2^3, vexreg_idx=1^3),
"RVM": InstrFlags(modrm=1, modrm_idx=2^3, modreg_idx=0^3, vexreg_idx=1^3),
"RVMI": InstrFlags(modrm=1, modrm_idx=2^3, modreg_idx=0^3, vexreg_idx=1^3, imm_idx=3^3, imm_control=4),
"RVMR": InstrFlags(modrm=1, modrm_idx=2^3, modreg_idx=0^3, vexreg_idx=1^3, imm_idx=3^3, imm_control=3),
"RMV": InstrFlags(modrm=1, modrm_idx=1^3, modreg_idx=0^3, vexreg_idx=2^3),
"VM": InstrFlags(modrm=1, modrm_idx=1^3, vexreg_idx=0^3),
"VMI": InstrFlags(modrm=1, modrm_idx=1^3, vexreg_idx=0^3, imm_idx=2^3, imm_control=4),
"MVR": InstrFlags(modrm=1, modrm_idx=0^3, modreg_idx=2^3, vexreg_idx=1^3),
}
class OpKind(NamedTuple):
@@ -139,7 +140,8 @@ class InstrDesc(NamedTuple):
operands: Tuple[str, ...]
flags: FrozenSet[str]
OPKIND_REGTYS = {"GP": 0, "FPU": 1, "XMM": 2, "MASK": 3, "MMX": 4, "BND": 5}
OPKIND_REGTYS = {"GP": 0, "FPU": 1, "XMM": 2, "MASK": 3, "MMX": 4, "BND": 5,
"SEG": 6}
OPKIND_SIZES = {
0: 0, 1: 1, 2: 2, 4: 3, 8: 4, 16: 5, 32: 6, 64: 7, 10: 0,
OpKind.SZ_OP: -2, OpKind.SZ_VEC: -3,
@@ -151,7 +153,7 @@ class InstrDesc(NamedTuple):
operands = tuple(OPKINDS[op] for op in desc[1:5] if op != "-")
return cls(desc[5], desc[0], operands, frozenset(desc[6:]))
def encode(self, ign66):
def encode(self, ign66, modrm):
flags = ENCODINGS[self.encoding]
extraflags = {}
@@ -181,6 +183,7 @@ class InstrDesc(NamedTuple):
if "INSTR_WIDTH" in self.flags: extraflags["instr_width"] = 1
if "LOCK" in self.flags: extraflags["lock"] = 1
if "VSIB" in self.flags: extraflags["vsib"] = 1
if modrm: extraflags["modrm"] = 1
if "USE66" not in self.flags and (ign66 or "IGN66" in self.flags):
extraflags["ign66"] = 1
@@ -666,11 +669,12 @@ if __name__ == "__main__":
for i, mode in enumerate(args.modes):
if "ONLY%d"%(96-mode) not in desc.flags:
ign66 = opcode.prefix in ("NP", "66", "F2", "F3")
modrm = opcode.modreg or opcode.opcext
for opcode_path in opcode.for_trie():
if weak:
weak_opcodes.append((opcode_path, desc.encode(ign66), i))
weak_opcodes.append((opcode_path, desc.encode(ign66, modrm), i))
else:
table.add_opcode(opcode_path, desc.encode(ign66), i)
table.add_opcode(opcode_path, desc.encode(ign66, modrm), i)
for k in weak_opcodes:
table.fill_free(*k)

View File

@@ -85,6 +85,7 @@ main(int argc, char** argv)
TEST64("\x0f\xc7\x0f", "cmpxchg8b qword ptr [rdi]");
TEST64("\x48\x0f\xc7\x0f", "cmpxchg16b xmmword ptr [rdi]");
TEST("\x66", "PARTIAL");
TEST("\xf0", "PARTIAL");
TEST("\x0f", "PARTIAL");
TEST("\x0f\x38", "PARTIAL");
TEST("\x0f\x3a", "PARTIAL");
@@ -113,6 +114,7 @@ main(int argc, char** argv)
TEST("\x8e\xc8", "UD"); // No mov cs, eax
TEST("\x0f\x1e\xc0", "nop eax, eax"); // reserved nop
TEST("\x0f\x1e\x04\x25\x01\x00\x00\x00", "nop dword ptr [0x1], eax"); // reserved nop
TEST64("\xf3\x4f\x0f\x1e\xfc", "nop r12, r15"); // reserved nop
TEST("\xd8\xc1", "fadd st(0), st(1)");
TEST("\xdc\xc1", "fadd st(1), st(0)");
TEST64("\x41\xd8\xc1", "fadd st(0), st(1)"); // REX.B ignored
@@ -303,7 +305,14 @@ main(int argc, char** argv)
// TEST32("\x67\x66\x0f\x38\xf8\x01", "movdir64b ax, zmmword ptr [cx]");
// TEST64("\x67\x66\x0f\x38\xf8\x01", "movdir64b eax, zmmword ptr [ecx]");
TEST64("\x0f\xae\xe8", "lfence");
TEST("\x0f\xae\xe8", "lfence");
TEST("\x0f\xae\xe9", "lfence");
TEST("\x0f\xae\xef", "lfence");
TEST("\x0f\xae\xf0", "mfence");
TEST("\x0f\xae\xf7", "mfence");
TEST("\x0f\xae\xf8", "sfence");
TEST("\x0f\xae\xf9", "sfence");
TEST("\x0f\xae\xff", "sfence");
TEST("\x0f\x70\xc0\x85", "pshufw mm0, mm0, 0x85");