decode: Store op types early and compact encoding

* The encoding of operand types in the decode table now only requires 9
  bits instead of the previous 16 bits.
* Operand types are decoded before the operands itself are stored. This
  allows to ignore REX.RB prefixed for specific register types.
This commit is contained in:
Alexis Engelke
2020-06-24 08:41:30 +02:00
parent 5e1bb1871f
commit 3ad518e22e
2 changed files with 66 additions and 54 deletions

View File

@@ -210,9 +210,8 @@ decode_modrm(const uint8_t* buffer, int len, DecodeMode mode, FdInstr* instr,
if (out_o2)
{
uint8_t reg_idx = mod_reg;
// FIXME: don't apply REX.R to MMX registers.
#if defined(ARCH_X86_64)
if (!is_seg)
if (!is_seg && !UNLIKELY(out_o2->misc == FD_RT_MMX))
reg_idx += prefixes & PREFIX_REXR ? 8 : 0;
#endif
@@ -227,14 +226,23 @@ decode_modrm(const uint8_t* buffer, int len, DecodeMode mode, FdInstr* instr,
out_o2->type = FD_OT_REG;
out_o2->reg = reg_idx;
if (is_cr)
out_o2->misc = FD_RT_CR;
else if (is_dr)
out_o2->misc = FD_RT_DR;
else if (is_seg)
out_o2->misc = FD_RT_SEG;
}
if (mod == 3 || is_cr || is_dr)
{
if (out_o1->misc == FD_RT_MEM)
return FD_ERR_UD;
uint8_t reg_idx = rm;
// FIXME: don't apply REX.B to MMX and MASK registers.
#if defined(ARCH_X86_64)
reg_idx += prefixes & PREFIX_REXB ? 8 : 0;
if (!UNLIKELY(out_o1->misc == FD_RT_MMX || out_o1->misc == FD_RT_MASK))
reg_idx += prefixes & PREFIX_REXB ? 8 : 0;
#endif
out_o1->type = FD_OT_REG;
out_o1->reg = reg_idx;
@@ -457,6 +465,14 @@ fd_decode(const uint8_t* buffer, size_t len_sz, int mode_int, uintptr_t address,
__builtin_memset(instr->operands, 0, sizeof(instr->operands));
// Reg operand 4 is only possible with RVMR encoding, which implies VEC.
for (int i = 0; i < 3; i++)
{
uint32_t reg_type = (desc->reg_types >> 3 * i) & 0x7;
// GPL FPU VEC MSK MMX BND ??? NVR
instr->operands[i].misc = (0xf0857641 >> (4 * reg_type)) & 0xf;
}
if (DESC_HAS_IMPLICIT(desc))
{
FdOp* operand = &instr->operands[DESC_IMPLICIT_IDX(desc)];
@@ -484,7 +500,8 @@ fd_decode(const uint8_t* buffer, size_t len_sz, int mode_int, uintptr_t address,
uint8_t reg_idx = buffer[off - 1] & 7;
// FIXME: don't apply REX.B to FPU, MMX, and MASK registers.
#if defined(ARCH_X86_64)
reg_idx += prefixes & PREFIX_REXB ? 8 : 0;
if (!UNLIKELY(operand->misc == FD_RT_FPU))
reg_idx += prefixes & PREFIX_REXB ? 8 : 0;
#endif
operand->type = FD_OT_REG;
operand->reg = reg_idx;
@@ -534,6 +551,7 @@ fd_decode(const uint8_t* buffer, size_t len_sz, int mode_int, uintptr_t address,
// 3 = register in imm8[7:4], used for RVMR encoding with VBLENDVP[SD]
FdOp* operand = &instr->operands[DESC_IMM_IDX(desc)];
operand->type = FD_OT_REG;
operand->misc = FD_RT_VEC;
if (UNLIKELY(off + 1 > len))
return FD_ERR_PARTIAL;
@@ -622,25 +640,16 @@ fd_decode(const uint8_t* buffer, size_t len_sz, int mode_int, uintptr_t address,
for (int i = 0; i < 4; i++)
{
if (instr->operands[i].type == FD_OT_NONE)
FdOp* operand = &instr->operands[i];
if (operand->type == FD_OT_NONE)
break;
uint8_t enc_size = (desc->operand_sizes >> 2 * i) & 3;
instr->operands[i].size = operand_sizes[enc_size];
operand->size = operand_sizes[(desc->operand_sizes >> 2 * i) & 3];
uint32_t reg_type = (desc->reg_types >> 4 * i) & 0xf;
uint32_t reg_idx = instr->operands[i].reg;
if (reg_type == FD_RT_MEM && instr->operands[i].type != FD_OT_MEM)
return FD_ERR_UD;
if (instr->operands[i].type != FD_OT_REG)
continue;
if (reg_type == FD_RT_GPL && !(prefixes & PREFIX_REX) &&
instr->operands[i].size == 1 && reg_idx >= 4)
reg_type = FD_RT_GPH;
// Fixup eager application of REX prefix
if ((reg_type == FD_RT_MMX || reg_type == FD_RT_FPU) && reg_idx >= 8)
instr->operands[i].reg -= 8;
instr->operands[i].misc = reg_type;
// if (operand->type == FD_OT_REG && operand->misc == FD_RT_GPL &&
// !(prefixes & PREFIX_REX) && operand->size == 1 && operand->reg >= 4)
if (!(prefixes & PREFIX_REX) && (LOAD_LE_4(operand) & 0xfffcffff) == 0x01040101)
operand->misc = FD_RT_GPH;
}
instr->size = off;

View File

@@ -45,10 +45,10 @@ InstrFlags = bitstruct("InstrFlags", [
"gp_fixed_operand_size:3",
"lock:1",
"vsib:1",
"op0_regty:4",
"op1_regty:4",
"op2_regty:4",
"op3_regty:4",
"op0_regty:3",
"op1_regty:3",
"op2_regty:3",
"_unused:7",
])
ENCODINGS = {
@@ -84,34 +84,34 @@ ENCODINGS = {
OPKIND_LOOKUP = {
# sizeidx (0, fixedsz, opsz, vecsz), fixedsz (log2), regtype
"-": (0, 0, 0),
"IMM": (2, 0, 0),
"IMM8": (1, 0, 0),
"IMM16": (1, 1, 0),
"IMM32": (1, 2, 0),
"GP": (2, 0, 1),
"GP8": (1, 0, 1),
"GP16": (1, 1, 1),
"GP32": (1, 2, 1),
"GP64": (1, 3, 1),
"MMX": (1, 3, 5),
"XMM": (3, 0, 6),
"XMM8": (1, 0, 6),
"XMM16": (1, 1, 6),
"XMM32": (1, 2, 6),
"XMM64": (1, 3, 6),
"XMM128": (1, 4, 6),
"XMM256": (1, 5, 6),
"SREG": (0, 0, 3),
"FPU": (0, 0, 4),
"MEMZ": (0, 0, 0),
"MEM8": (1, 0, 0),
"MEM16": (1, 1, 0),
"MEM32": (1, 2, 0),
"MEM64": (1, 3, 0),
"BND": (0, 0, 8),
"CR": (0, 0, 9),
"DR": (0, 0, 10),
"-": (0, 0, 7),
"IMM": (2, 0, 7),
"IMM8": (1, 0, 7),
"IMM16": (1, 1, 7),
"IMM32": (1, 2, 7),
"GP": (2, 0, 0),
"GP8": (1, 0, 0),
"GP16": (1, 1, 0),
"GP32": (1, 2, 0),
"GP64": (1, 3, 0),
"MMX": (1, 3, 4),
"XMM": (3, 0, 2),
"XMM8": (1, 0, 2),
"XMM16": (1, 1, 2),
"XMM32": (1, 2, 2),
"XMM64": (1, 3, 2),
"XMM128": (1, 4, 2),
"XMM256": (1, 5, 2),
"SREG": (0, 0, 7),
"FPU": (0, 0, 1),
"MEMZ": (0, 0, 7),
"MEM8": (1, 0, 7),
"MEM16": (1, 1, 7),
"MEM32": (1, 2, 7),
"MEM64": (1, 3, 7),
"BND": (0, 0, 5),
"CR": (0, 0, 7),
"DR": (0, 0, 7),
}
class InstrDesc(NamedTuple):
@@ -134,7 +134,10 @@ class InstrDesc(NamedTuple):
enc_size, fixed_size, reg_type = OPKIND_LOOKUP[opkind]
if enc_size == 1: fixed_opsz.add(fixed_size)
setattr(flags, "op%d_size"%i, enc_size)
setattr(flags, "op%d_regty"%i, reg_type)
if i < 3:
setattr(flags, "op%d_regty"%i, reg_type)
elif reg_type not in (7, 2):
raise Exception("invalid regty for op 3, must be VEC")
if fixed_opsz: flags.gp_fixed_operand_size = next(iter(fixed_opsz))