decode: Store op types early and compact encoding

* The encoding of operand types in the decode table now only requires 9
  bits instead of the previous 16 bits.
* Operand types are decoded before the operands itself are stored. This
  allows to ignore REX.RB prefixed for specific register types.
This commit is contained in:
Alexis Engelke
2020-06-24 08:41:30 +02:00
parent 5e1bb1871f
commit 3ad518e22e
2 changed files with 66 additions and 54 deletions

View File

@@ -210,9 +210,8 @@ decode_modrm(const uint8_t* buffer, int len, DecodeMode mode, FdInstr* instr,
if (out_o2) if (out_o2)
{ {
uint8_t reg_idx = mod_reg; uint8_t reg_idx = mod_reg;
// FIXME: don't apply REX.R to MMX registers.
#if defined(ARCH_X86_64) #if defined(ARCH_X86_64)
if (!is_seg) if (!is_seg && !UNLIKELY(out_o2->misc == FD_RT_MMX))
reg_idx += prefixes & PREFIX_REXR ? 8 : 0; reg_idx += prefixes & PREFIX_REXR ? 8 : 0;
#endif #endif
@@ -227,13 +226,22 @@ decode_modrm(const uint8_t* buffer, int len, DecodeMode mode, FdInstr* instr,
out_o2->type = FD_OT_REG; out_o2->type = FD_OT_REG;
out_o2->reg = reg_idx; out_o2->reg = reg_idx;
if (is_cr)
out_o2->misc = FD_RT_CR;
else if (is_dr)
out_o2->misc = FD_RT_DR;
else if (is_seg)
out_o2->misc = FD_RT_SEG;
} }
if (mod == 3 || is_cr || is_dr) if (mod == 3 || is_cr || is_dr)
{ {
if (out_o1->misc == FD_RT_MEM)
return FD_ERR_UD;
uint8_t reg_idx = rm; uint8_t reg_idx = rm;
// FIXME: don't apply REX.B to MMX and MASK registers.
#if defined(ARCH_X86_64) #if defined(ARCH_X86_64)
if (!UNLIKELY(out_o1->misc == FD_RT_MMX || out_o1->misc == FD_RT_MASK))
reg_idx += prefixes & PREFIX_REXB ? 8 : 0; reg_idx += prefixes & PREFIX_REXB ? 8 : 0;
#endif #endif
out_o1->type = FD_OT_REG; out_o1->type = FD_OT_REG;
@@ -457,6 +465,14 @@ fd_decode(const uint8_t* buffer, size_t len_sz, int mode_int, uintptr_t address,
__builtin_memset(instr->operands, 0, sizeof(instr->operands)); __builtin_memset(instr->operands, 0, sizeof(instr->operands));
// Reg operand 4 is only possible with RVMR encoding, which implies VEC.
for (int i = 0; i < 3; i++)
{
uint32_t reg_type = (desc->reg_types >> 3 * i) & 0x7;
// GPL FPU VEC MSK MMX BND ??? NVR
instr->operands[i].misc = (0xf0857641 >> (4 * reg_type)) & 0xf;
}
if (DESC_HAS_IMPLICIT(desc)) if (DESC_HAS_IMPLICIT(desc))
{ {
FdOp* operand = &instr->operands[DESC_IMPLICIT_IDX(desc)]; FdOp* operand = &instr->operands[DESC_IMPLICIT_IDX(desc)];
@@ -484,6 +500,7 @@ fd_decode(const uint8_t* buffer, size_t len_sz, int mode_int, uintptr_t address,
uint8_t reg_idx = buffer[off - 1] & 7; uint8_t reg_idx = buffer[off - 1] & 7;
// FIXME: don't apply REX.B to FPU, MMX, and MASK registers. // FIXME: don't apply REX.B to FPU, MMX, and MASK registers.
#if defined(ARCH_X86_64) #if defined(ARCH_X86_64)
if (!UNLIKELY(operand->misc == FD_RT_FPU))
reg_idx += prefixes & PREFIX_REXB ? 8 : 0; reg_idx += prefixes & PREFIX_REXB ? 8 : 0;
#endif #endif
operand->type = FD_OT_REG; operand->type = FD_OT_REG;
@@ -534,6 +551,7 @@ fd_decode(const uint8_t* buffer, size_t len_sz, int mode_int, uintptr_t address,
// 3 = register in imm8[7:4], used for RVMR encoding with VBLENDVP[SD] // 3 = register in imm8[7:4], used for RVMR encoding with VBLENDVP[SD]
FdOp* operand = &instr->operands[DESC_IMM_IDX(desc)]; FdOp* operand = &instr->operands[DESC_IMM_IDX(desc)];
operand->type = FD_OT_REG; operand->type = FD_OT_REG;
operand->misc = FD_RT_VEC;
if (UNLIKELY(off + 1 > len)) if (UNLIKELY(off + 1 > len))
return FD_ERR_PARTIAL; return FD_ERR_PARTIAL;
@@ -622,25 +640,16 @@ fd_decode(const uint8_t* buffer, size_t len_sz, int mode_int, uintptr_t address,
for (int i = 0; i < 4; i++) for (int i = 0; i < 4; i++)
{ {
if (instr->operands[i].type == FD_OT_NONE) FdOp* operand = &instr->operands[i];
if (operand->type == FD_OT_NONE)
break; break;
uint8_t enc_size = (desc->operand_sizes >> 2 * i) & 3; operand->size = operand_sizes[(desc->operand_sizes >> 2 * i) & 3];
instr->operands[i].size = operand_sizes[enc_size];
uint32_t reg_type = (desc->reg_types >> 4 * i) & 0xf; // if (operand->type == FD_OT_REG && operand->misc == FD_RT_GPL &&
uint32_t reg_idx = instr->operands[i].reg; // !(prefixes & PREFIX_REX) && operand->size == 1 && operand->reg >= 4)
if (reg_type == FD_RT_MEM && instr->operands[i].type != FD_OT_MEM) if (!(prefixes & PREFIX_REX) && (LOAD_LE_4(operand) & 0xfffcffff) == 0x01040101)
return FD_ERR_UD; operand->misc = FD_RT_GPH;
if (instr->operands[i].type != FD_OT_REG)
continue;
if (reg_type == FD_RT_GPL && !(prefixes & PREFIX_REX) &&
instr->operands[i].size == 1 && reg_idx >= 4)
reg_type = FD_RT_GPH;
// Fixup eager application of REX prefix
if ((reg_type == FD_RT_MMX || reg_type == FD_RT_FPU) && reg_idx >= 8)
instr->operands[i].reg -= 8;
instr->operands[i].misc = reg_type;
} }
instr->size = off; instr->size = off;

View File

@@ -45,10 +45,10 @@ InstrFlags = bitstruct("InstrFlags", [
"gp_fixed_operand_size:3", "gp_fixed_operand_size:3",
"lock:1", "lock:1",
"vsib:1", "vsib:1",
"op0_regty:4", "op0_regty:3",
"op1_regty:4", "op1_regty:3",
"op2_regty:4", "op2_regty:3",
"op3_regty:4", "_unused:7",
]) ])
ENCODINGS = { ENCODINGS = {
@@ -84,34 +84,34 @@ ENCODINGS = {
OPKIND_LOOKUP = { OPKIND_LOOKUP = {
# sizeidx (0, fixedsz, opsz, vecsz), fixedsz (log2), regtype # sizeidx (0, fixedsz, opsz, vecsz), fixedsz (log2), regtype
"-": (0, 0, 0), "-": (0, 0, 7),
"IMM": (2, 0, 0), "IMM": (2, 0, 7),
"IMM8": (1, 0, 0), "IMM8": (1, 0, 7),
"IMM16": (1, 1, 0), "IMM16": (1, 1, 7),
"IMM32": (1, 2, 0), "IMM32": (1, 2, 7),
"GP": (2, 0, 1), "GP": (2, 0, 0),
"GP8": (1, 0, 1), "GP8": (1, 0, 0),
"GP16": (1, 1, 1), "GP16": (1, 1, 0),
"GP32": (1, 2, 1), "GP32": (1, 2, 0),
"GP64": (1, 3, 1), "GP64": (1, 3, 0),
"MMX": (1, 3, 5), "MMX": (1, 3, 4),
"XMM": (3, 0, 6), "XMM": (3, 0, 2),
"XMM8": (1, 0, 6), "XMM8": (1, 0, 2),
"XMM16": (1, 1, 6), "XMM16": (1, 1, 2),
"XMM32": (1, 2, 6), "XMM32": (1, 2, 2),
"XMM64": (1, 3, 6), "XMM64": (1, 3, 2),
"XMM128": (1, 4, 6), "XMM128": (1, 4, 2),
"XMM256": (1, 5, 6), "XMM256": (1, 5, 2),
"SREG": (0, 0, 3), "SREG": (0, 0, 7),
"FPU": (0, 0, 4), "FPU": (0, 0, 1),
"MEMZ": (0, 0, 0), "MEMZ": (0, 0, 7),
"MEM8": (1, 0, 0), "MEM8": (1, 0, 7),
"MEM16": (1, 1, 0), "MEM16": (1, 1, 7),
"MEM32": (1, 2, 0), "MEM32": (1, 2, 7),
"MEM64": (1, 3, 0), "MEM64": (1, 3, 7),
"BND": (0, 0, 8), "BND": (0, 0, 5),
"CR": (0, 0, 9), "CR": (0, 0, 7),
"DR": (0, 0, 10), "DR": (0, 0, 7),
} }
class InstrDesc(NamedTuple): class InstrDesc(NamedTuple):
@@ -134,7 +134,10 @@ class InstrDesc(NamedTuple):
enc_size, fixed_size, reg_type = OPKIND_LOOKUP[opkind] enc_size, fixed_size, reg_type = OPKIND_LOOKUP[opkind]
if enc_size == 1: fixed_opsz.add(fixed_size) if enc_size == 1: fixed_opsz.add(fixed_size)
setattr(flags, "op%d_size"%i, enc_size) setattr(flags, "op%d_size"%i, enc_size)
if i < 3:
setattr(flags, "op%d_regty"%i, reg_type) setattr(flags, "op%d_regty"%i, reg_type)
elif reg_type not in (7, 2):
raise Exception("invalid regty for op 3, must be VEC")
if fixed_opsz: flags.gp_fixed_operand_size = next(iter(fixed_opsz)) if fixed_opsz: flags.gp_fixed_operand_size = next(iter(fixed_opsz))