decode: Support half vector size

This is realized by two changes: first, GP and vector operand size are
completely separated using one extra bit. If the operand size of an
instruction is derived from VEX.L (or EVEX.L'L), then the "opsize" bits
indicate how to derive a smaller vector size (half/quarter/eighth).
This commit is contained in:
Alexis Engelke
2022-11-27 18:04:15 +01:00
parent f565f09f9d
commit 4abad24610
3 changed files with 79 additions and 61 deletions

View File

@@ -83,7 +83,7 @@ struct InstrDesc
#define DESC_ZEROREG_VAL(desc) (((desc)->operand_indices >> 10) & 1)
#define DESC_LOCK(desc) (((desc)->operand_indices >> 11) & 1)
#define DESC_VSIB(desc) (((desc)->operand_indices >> 15) & 1)
#define DESC_OPSIZE(desc) (((desc)->operand_sizes >> 8) & 3)
#define DESC_OPSIZE(desc) (((desc)->reg_types >> 11) & 7)
#define DESC_SIZE_FIX1(desc) (((desc)->operand_sizes >> 10) & 7)
#define DESC_SIZE_FIX2(desc) (((desc)->operand_sizes >> 13) & 3)
#define DESC_INSTR_WIDTH(desc) (((desc)->operand_sizes >> 15) & 1)
@@ -288,21 +288,6 @@ prefix_end:
instr->flags |= FD_FLAG_64;
instr->address = address;
unsigned op_size;
if (DESC_OPSIZE(desc) == 1)
op_size = 1;
else if (mode == DECODE_64)
op_size = ((prefix_rex & PREFIX_REXW) || DESC_OPSIZE(desc) == 3) ? 4 :
UNLIKELY(prefix_66 && !DESC_IGN66(desc)) ? 2 :
DESC_OPSIZE(desc) ? 4 :
3;
else
op_size = UNLIKELY(prefix_66 && !DESC_IGN66(desc)) ? 2 : 3;
uint8_t vec_size = 5;
if (prefix_rex & PREFIX_VEXL)
vec_size = 6;
for (unsigned i = 0; i < sizeof(instr->operands) / sizeof(FdOp); i++)
instr->operands[i] = (FdOp) {0};
@@ -310,6 +295,23 @@ prefix_end:
return FD_ERR_PARTIAL;
unsigned op_byte = buffer[off - 1] | (!DESC_MODRM(desc) ? 0xc0 : 0);
unsigned op_size;
unsigned op_size_alt = 0;
if (!(DESC_OPSIZE(desc) & 4)) {
if (DESC_OPSIZE(desc) == 1)
op_size = 1;
else if (mode == DECODE_64)
op_size = ((prefix_rex & PREFIX_REXW) || DESC_OPSIZE(desc) == 3) ? 4 :
UNLIKELY(prefix_66 && !DESC_IGN66(desc)) ? 2 :
DESC_OPSIZE(desc) ? 4 :
3;
else
op_size = UNLIKELY(prefix_66 && !DESC_IGN66(desc)) ? 2 : 3;
} else {
op_size = 5 + !!(prefix_rex & PREFIX_VEXL);
op_size_alt = op_size - (DESC_OPSIZE(desc) & 3);
}
if (UNLIKELY(instr->type == FDI_MOV_CR || instr->type == FDI_MOV_DR)) {
unsigned modreg = (op_byte >> 3) & 0x7;
unsigned modrm = op_byte & 0x7;
@@ -562,7 +564,7 @@ skip_modrm:
}
uint8_t operand_sizes[4] = {
DESC_SIZE_FIX1(desc), DESC_SIZE_FIX2(desc) + 1, op_size, vec_size
DESC_SIZE_FIX1(desc), DESC_SIZE_FIX2(desc) + 1, op_size, op_size_alt
};
for (int i = 0; i < 4; i++)

View File

@@ -964,9 +964,8 @@ VEX.NP.0f59 RVM Vps Hps Wps - VMULPS F=AVX
VEX.66.0f59 RVM Vpd Hpd Wpd - VMULPD F=AVX
VEX.F3.LIG.0f59 RVM Vdq Hdq Wss - VMULSS F=AVX
VEX.F2.LIG.0f59 RVM Vdq Hdq Wsd - VMULSD F=AVX
VEX.NP.L0.0f5a RM Vpd Wq - - VCVTPS2PD F=AVX
VEX.NP.L1.0f5a RM Vpd Wdq - - VCVTPS2PD F=AVX
VEX.66.0f5a RM Vdq Wpd - - VCVTPD2PS F=AVX
VEX.NP.0f5a RM Vpd Wh - - VCVTPS2PD F=AVX
VEX.66.0f5a RM Vh Wpd - - VCVTPD2PS F=AVX
VEX.F3.LIG.0f5a RVM Vdq Hdq Wss - VCVTSS2SD F=AVX
VEX.F2.LIG.0f5a RVM Vdq Hdq Wsd - VCVTSD2SS F=AVX
VEX.NP.0f5b RM Vps Vx - - VCVTDQ2PS F=AVX
@@ -1069,10 +1068,9 @@ VEX.66.0fe2 RVM Vx Hx Wx - VPSRAD F=AVX
VEX.66.0fe3 RVM Vx Hx Wx - VPAVGW F=AVX
VEX.66.0fe4 RVM Vx Hx Wx - VPMULHUW F=AVX
VEX.66.0fe5 RVM Vx Hx Wx - VPMULHW F=AVX
VEX.66.0fe6 RM Vdq Wx - - VCVTTPD2DQ F=AVX
VEX.F3.L0.0fe6 RM Vx Wq - - VCVTDQ2PD F=AVX
VEX.F3.L1.0fe6 RM Vx Wdq - - VCVTDQ2PD F=AVX
VEX.F2.0fe6 RM Vdq Wx - - VCVTPD2DQ F=AVX
VEX.66.0fe6 RM Vh Wx - - VCVTTPD2DQ F=AVX
VEX.F3.0fe6 RM Vx Wh - - VCVTDQ2PD F=AVX
VEX.F2.0fe6 RM Vh Wx - - VCVTPD2DQ F=AVX
VEX.66.0fe7/m MR Mx Vx - - VMOVNTDQ F=AVX
VEX.66.0fe8 RVM Vx Hx Wx - VPSUBSB F=AVX
VEX.66.0fe9 RVM Vx Hx Wx - VPSUBSW F=AVX
@@ -1113,8 +1111,7 @@ VEX.66.W0.0f380c RVM Vx Hx Wx - VPERMILPS F=AVX
VEX.66.W0.0f380d RVM Vx Hx Wx - VPERMILPD F=AVX
VEX.66.W0.0f380e RM Vx Wx - - VTESTPS F=AVX
VEX.66.W0.0f380f RM Vx Wx - - VTESTPD F=AVX
# TODO: Wx is actually half the size
VEX.66.W0.0f3813 RM Vx Wx - - VCVTPH2PS F=F16C
VEX.66.W0.0f3813 RM Vx Wh - - VCVTPH2PS F=F16C
VEX.66.W0.L1.0f3816 RVM Vx Hx Wx - VPERMPS F=AVX2
VEX.66.0f3817 RM Vx Wx - - VPTEST F=AVX EFL=0--0m00m
VEX.66.W0.0f3818 RM Vx Wd - - VBROADCASTSS F=AVX
@@ -1172,13 +1169,11 @@ VEX.66.W0.0f388e MVR Wx Hx Vx - VPMASKMOVD F=AVX2
VEX.66.W1.0f388e MVR Wx Hx Vx - VPMASKMOVQ F=AVX2
VEX.66.W0.0f3890/m RMV Vx Md Hx - VPGATHERDD VSIB F=AVX2
VEX.66.W1.0f3890/m RMV Vx Mq Hx - VPGATHERDQ VSIB F=AVX2
VEX.66.W0.L0.0f3891/m RMV Vq Md Hq - VPGATHERQD VSIB F=AVX2
VEX.66.W0.L1.0f3891/m RMV Vdq Md Hdq - VPGATHERQD VSIB F=AVX2
VEX.66.W0.0f3891/m RMV Vh Md Hh - VPGATHERQD VSIB F=AVX2
VEX.66.W1.0f3891/m RMV Vx Mq Hx - VPGATHERQQ VSIB F=AVX2
VEX.66.W0.0f3892/m RMV Vx Md Hx - VGATHERDPS VSIB F=AVX2
VEX.66.W1.0f3892/m RMV Vx Mq Hx - VGATHERDPD VSIB F=AVX2
VEX.66.W0.L0.0f3893/m RMV Vq Md Hq - VGATHERQPS VSIB F=AVX2
VEX.66.W0.L1.0f3893/m RMV Vdq Md Hdq - VGATHERQPS VSIB F=AVX2
VEX.66.W0.0f3893/m RMV Vh Md Hh - VGATHERQPS VSIB F=AVX2
VEX.66.W1.0f3893/m RMV Vx Mq Hx - VGATHERQPD VSIB F=AVX2
VEX.66.W0.0f3896 RVM Vx Hx Wx - VFMADDSUB132PS F=FMA
VEX.66.W1.0f3896 RVM Vx Hx Wx - VFMADDSUB132PD F=FMA
@@ -1265,8 +1260,7 @@ VEX.66.W1.L0.0f3a16 MRI Eq Vx Ib - VPEXTRQ O64 F=AVX ENC_NOSZ
VEX.66.L0.0f3a17 MRI Ed Vx Ib - VEXTRACTPS F=AVX ENC_NOSZ
VEX.66.W0.L1.0f3a18 RVMI Vx Hx Wdq Ib VINSERTF128 F=AVX ENC_NOSZ
VEX.66.W0.L1.0f3a19 MRI Wdq Vx Ib - VEXTRACTF128 F=AVX ENC_NOSZ
# TODO: Wx is actually half the size
VEX.66.W0.0f3a1d MRI Wx Hx Ib - VCVTPS2PH F=F16C
VEX.66.W0.0f3a1d MRI Wh Hx Ib - VCVTPS2PH F=F16C
VEX.66.WIG.L0.0f3a20 RVMI Vx Hx Eb Ib VPINSRB F=AVX ENC_NOSZ
VEX.66.L0.0f3a21 RVMI Vx Hx Wd Ib VINSERTPS F=AVX ENC_NOSZ
VEX.66.W0.L0.0f3a22 RVMI Vdq Hdq Ey Ib VPINSRD F=AVX ENC_NOSZ
@@ -1613,8 +1607,7 @@ VEX.F3.W0.0f38b0/m RM Vx Mx - - VCVTNEEBF162PS F=AVX-NE-CONVERT
VEX.F2.W0.0f38b0/m RM Vx Mx - - VCVTNEOBF162PS F=AVX-NE-CONVERT
VEX.66.W0.0f38b1/m RM Vx Mw - - VBCSTNESH2PS F=AVX-NE-CONVERT
VEX.F3.W0.0f38b1/m RM Vx Mw - - VBCSTNEBF162PS F=AVX-NE-CONVERT
# TODO: Vdq is actually half the vector size
VEX.F3.W0.0f3872 RM Vdq Wps - - VCVTNEPS2BF16 F=AVX-NE-CONVERT
VEX.F3.W0.0f3872 RM Vh Wps - - VCVTNEPS2BF16 F=AVX-NE-CONVERT
# AVX-IFMA
VEX.66.W1.0f38b4 RVM Vx Hx Wx - VPMADD52LUQ F=AVX-IFMA

View File

@@ -22,14 +22,15 @@ INSTR_FLAGS_FIELDS, INSTR_FLAGS_SIZES = zip(*[
("op1_size", 2),
("op2_size", 2),
("op3_size", 2),
("opsize", 2),
("unused2", 2),
("size_fix1", 3),
("size_fix2", 2),
("instr_width", 1),
("modrm_ty", 3),
("modreg_ty", 3),
("vexreg_ty", 2),
("unused", 6),
("unused", 3),
("opsize", 3),
("modrm", 1),
("ign66", 1),
][::-1])
@@ -112,22 +113,25 @@ OPKIND_SIZES = {
"b": 1,
"w": 2,
"d": 4,
"ss": 4, # Scalar single of XMM
"ss": 4, # Scalar single of XMM (d)
"q": 8,
"sd": 8, # Scalar double of XMM
"sd": 8, # Scalar double of XMM (q)
"t": 10, # FPU/ten-byte
"dq": 16,
"qq": 32,
"oq": 64, # oct-quadword
"": 0, # for MEMZ
"v": -1,
"y": -1, # actually, dword or qword
"z": -1, # actually, op-size maxed at 4 (immediates)
"a": -1, # actually, twice the size
"p": -1, # actually, far pointer = SZ_OP + 2
"x": -2,
"pd": -2, # packed double
"ps": -2, # packed single
"v": -1, # operand size (w/d/q)
"y": -1, # operand size (d/q)
"z": -1, # w/d (immediates, min(operand size, 4))
"a": -1, # z:z
"p": -1, # w:z
"x": -2, # vector size
"h": -3, # half x
"f": -4, # fourth x
"e": -5, # eighth x
"pd": -2, # packed double (x)
"ps": -2, # packed single (x)
# Custom names
"bs": -1, # sign-extended immediate
@@ -141,10 +145,16 @@ class OpKind(NamedTuple):
SZ_OP = -1
SZ_VEC = -2
SZ_VEC_HALF = -3
SZ_VEC_QUARTER = -4
SZ_VEC_EIGHTH = -5
def abssize(self, opsz=None, vecsz=None):
res = opsz if self.size == self.SZ_OP else \
vecsz if self.size == self.SZ_VEC else self.size
vecsz if self.size == self.SZ_VEC else \
vecsz >> 1 if self.size == self.SZ_VEC_HALF else \
vecsz >> 2 if self.size == self.SZ_VEC_QUARTER else \
vecsz >> 3 if self.size == self.SZ_VEC_EIGHTH else self.size
if res is None:
raise Exception("unspecified operand size")
return res
@@ -169,7 +179,6 @@ class InstrDesc(NamedTuple):
"CR": 9, "DR": 10}
OPKIND_SIZES = {
0: 0, 1: 1, 2: 2, 4: 3, 8: 4, 16: 5, 32: 6, 64: 7, 10: 0,
OpKind.SZ_OP: -2, OpKind.SZ_VEC: -3,
}
@classmethod
@@ -222,28 +231,47 @@ class InstrDesc(NamedTuple):
tys.append(self.OPKIND_REGTYS_ENC[op.kind])
return sum(ty << (4*i) for i, ty in enumerate(tys))
def dynsizes(self):
dynopsz = set(op.size for op in self.operands if op.size < 0)
if {"INSTR_WIDTH", "SZ8"} & self.flags: dynopsz.add(OpKind.SZ_OP)
if OpKind.SZ_OP in dynopsz and len(dynopsz) > 1:
raise Exception(f"conflicting dynamic operand sizes in {self}")
return dynopsz
def encode(self, mnem, ign66, modrm):
flags = ENCODINGS[self.encoding]
extraflags = {}
opsz = set(self.OPKIND_SIZES[opkind.size] for opkind in self.operands)
dynopsz = self.dynsizes()
# Operand size either refers to vectors or GP, but not both
if -2 in opsz and -3 in opsz:
raise Exception(f"conflicting gp vs. vec operand size in {self}")
if dynopsz and OpKind.SZ_OP not in dynopsz: # Vector operand size
if self.flags & {"SZ8", "D64", "F64", "INSTR_WIDTH", "LOCK", "U66"}:
raise Exception(f"incompatible flags in {self}")
# Allow at most the vector size together with one alternative
dynsizes = [OpKind.SZ_VEC] + list(dynopsz - {OpKind.SZ_VEC})
extraflags["opsize"] = 4 | (OpKind.SZ_VEC - dynsizes[-1])
if len(dynsizes) > 2:
raise Exception(f"conflicting vector operand sizes in {self}")
else: # either empty or GP operand size
dynsizes = [OpKind.SZ_OP]
if "SZ8" in self.flags: extraflags["opsize"] = 1
if "D64" in self.flags: extraflags["opsize"] = 2
if "F64" in self.flags: extraflags["opsize"] = 3
extraflags["instr_width"] = "INSTR_WIDTH" in self.flags
extraflags["lock"] = "LOCK" in self.flags
# Sort fixed sizes encodable in size_fix2 as second element.
fixed = sorted((x for x in opsz if x >= 0), key=lambda x: 1 <= x <= 4)
fixed = set(self.OPKIND_SIZES[op.size] for op in self.operands if op.size >= 0)
fixed = sorted(fixed, key=lambda x: 1 <= x <= 4)
if len(fixed) > 2 or (len(fixed) == 2 and not (1 <= fixed[1] <= 4)):
raise Exception(f"invalid fixed sizes {fixed} in {self}")
sizes = (fixed + [1, 1])[:2] + [-2, -3] # See operand_sizes in decode.c.
sizes = (fixed + [1, 1])[:2] + dynsizes # See operand_sizes in decode.c.
extraflags["size_fix1"] = sizes[0]
extraflags["size_fix2"] = sizes[1] - 1
for i, opkind in enumerate(self.operands):
sz = self.OPKIND_SIZES[opkind.size]
sz = self.OPKIND_SIZES[opkind.size] if opkind.size >= 0 else opkind.size
extraflags["op%d_size"%i] = sizes.index(sz)
if i >= 3:
continue
opname = ENCODING_OPORDER[self.encoding][i]
if opname == "modrm":
if opkind.kind == "MEM":
@@ -258,11 +286,6 @@ class InstrDesc(NamedTuple):
raise Exception("invalid regty for op 3, must be VEC")
# Miscellaneous Flags
if "SZ8" in self.flags: extraflags["opsize"] = 1
if "D64" in self.flags: extraflags["opsize"] = 2
if "F64" in self.flags: extraflags["opsize"] = 3
if "INSTR_WIDTH" in self.flags: extraflags["instr_width"] = 1
if "LOCK" in self.flags: extraflags["lock"] = 1
if "VSIB" in self.flags: extraflags["vsib"] = 1
if modrm: extraflags["modrm"] = 1