From 4abad246100cf1eab0e2a9f9b311c6b08adc790b Mon Sep 17 00:00:00 2001 From: Alexis Engelke Date: Sun, 27 Nov 2022 18:04:15 +0100 Subject: [PATCH] decode: Support half vector size This is realized by two changes: first, GP and vector operand size are completely separated using one extra bit. If the operand size of an instruction is derived from VEX.L (or EVEX.L'L), then the "opsize" bits indicate how to derive a smaller vector size (half/quarter/eighth). --- decode.c | 36 ++++++++++++----------- instrs.txt | 27 +++++++----------- parseinstrs.py | 77 ++++++++++++++++++++++++++++++++------------------ 3 files changed, 79 insertions(+), 61 deletions(-) diff --git a/decode.c b/decode.c index 54b6f8c..276572a 100644 --- a/decode.c +++ b/decode.c @@ -83,7 +83,7 @@ struct InstrDesc #define DESC_ZEROREG_VAL(desc) (((desc)->operand_indices >> 10) & 1) #define DESC_LOCK(desc) (((desc)->operand_indices >> 11) & 1) #define DESC_VSIB(desc) (((desc)->operand_indices >> 15) & 1) -#define DESC_OPSIZE(desc) (((desc)->operand_sizes >> 8) & 3) +#define DESC_OPSIZE(desc) (((desc)->reg_types >> 11) & 7) #define DESC_SIZE_FIX1(desc) (((desc)->operand_sizes >> 10) & 7) #define DESC_SIZE_FIX2(desc) (((desc)->operand_sizes >> 13) & 3) #define DESC_INSTR_WIDTH(desc) (((desc)->operand_sizes >> 15) & 1) @@ -288,21 +288,6 @@ prefix_end: instr->flags |= FD_FLAG_64; instr->address = address; - unsigned op_size; - if (DESC_OPSIZE(desc) == 1) - op_size = 1; - else if (mode == DECODE_64) - op_size = ((prefix_rex & PREFIX_REXW) || DESC_OPSIZE(desc) == 3) ? 4 : - UNLIKELY(prefix_66 && !DESC_IGN66(desc)) ? 2 : - DESC_OPSIZE(desc) ? 4 : - 3; - else - op_size = UNLIKELY(prefix_66 && !DESC_IGN66(desc)) ? 2 : 3; - - uint8_t vec_size = 5; - if (prefix_rex & PREFIX_VEXL) - vec_size = 6; - for (unsigned i = 0; i < sizeof(instr->operands) / sizeof(FdOp); i++) instr->operands[i] = (FdOp) {0}; @@ -310,6 +295,23 @@ prefix_end: return FD_ERR_PARTIAL; unsigned op_byte = buffer[off - 1] | (!DESC_MODRM(desc) ? 0xc0 : 0); + unsigned op_size; + unsigned op_size_alt = 0; + if (!(DESC_OPSIZE(desc) & 4)) { + if (DESC_OPSIZE(desc) == 1) + op_size = 1; + else if (mode == DECODE_64) + op_size = ((prefix_rex & PREFIX_REXW) || DESC_OPSIZE(desc) == 3) ? 4 : + UNLIKELY(prefix_66 && !DESC_IGN66(desc)) ? 2 : + DESC_OPSIZE(desc) ? 4 : + 3; + else + op_size = UNLIKELY(prefix_66 && !DESC_IGN66(desc)) ? 2 : 3; + } else { + op_size = 5 + !!(prefix_rex & PREFIX_VEXL); + op_size_alt = op_size - (DESC_OPSIZE(desc) & 3); + } + if (UNLIKELY(instr->type == FDI_MOV_CR || instr->type == FDI_MOV_DR)) { unsigned modreg = (op_byte >> 3) & 0x7; unsigned modrm = op_byte & 0x7; @@ -562,7 +564,7 @@ skip_modrm: } uint8_t operand_sizes[4] = { - DESC_SIZE_FIX1(desc), DESC_SIZE_FIX2(desc) + 1, op_size, vec_size + DESC_SIZE_FIX1(desc), DESC_SIZE_FIX2(desc) + 1, op_size, op_size_alt }; for (int i = 0; i < 4; i++) diff --git a/instrs.txt b/instrs.txt index 66e3ff5..ae78286 100644 --- a/instrs.txt +++ b/instrs.txt @@ -964,9 +964,8 @@ VEX.NP.0f59 RVM Vps Hps Wps - VMULPS F=AVX VEX.66.0f59 RVM Vpd Hpd Wpd - VMULPD F=AVX VEX.F3.LIG.0f59 RVM Vdq Hdq Wss - VMULSS F=AVX VEX.F2.LIG.0f59 RVM Vdq Hdq Wsd - VMULSD F=AVX -VEX.NP.L0.0f5a RM Vpd Wq - - VCVTPS2PD F=AVX -VEX.NP.L1.0f5a RM Vpd Wdq - - VCVTPS2PD F=AVX -VEX.66.0f5a RM Vdq Wpd - - VCVTPD2PS F=AVX +VEX.NP.0f5a RM Vpd Wh - - VCVTPS2PD F=AVX +VEX.66.0f5a RM Vh Wpd - - VCVTPD2PS F=AVX VEX.F3.LIG.0f5a RVM Vdq Hdq Wss - VCVTSS2SD F=AVX VEX.F2.LIG.0f5a RVM Vdq Hdq Wsd - VCVTSD2SS F=AVX VEX.NP.0f5b RM Vps Vx - - VCVTDQ2PS F=AVX @@ -1069,10 +1068,9 @@ VEX.66.0fe2 RVM Vx Hx Wx - VPSRAD F=AVX VEX.66.0fe3 RVM Vx Hx Wx - VPAVGW F=AVX VEX.66.0fe4 RVM Vx Hx Wx - VPMULHUW F=AVX VEX.66.0fe5 RVM Vx Hx Wx - VPMULHW F=AVX -VEX.66.0fe6 RM Vdq Wx - - VCVTTPD2DQ F=AVX -VEX.F3.L0.0fe6 RM Vx Wq - - VCVTDQ2PD F=AVX -VEX.F3.L1.0fe6 RM Vx Wdq - - VCVTDQ2PD F=AVX -VEX.F2.0fe6 RM Vdq Wx - - VCVTPD2DQ F=AVX +VEX.66.0fe6 RM Vh Wx - - VCVTTPD2DQ F=AVX +VEX.F3.0fe6 RM Vx Wh - - VCVTDQ2PD F=AVX +VEX.F2.0fe6 RM Vh Wx - - VCVTPD2DQ F=AVX VEX.66.0fe7/m MR Mx Vx - - VMOVNTDQ F=AVX VEX.66.0fe8 RVM Vx Hx Wx - VPSUBSB F=AVX VEX.66.0fe9 RVM Vx Hx Wx - VPSUBSW F=AVX @@ -1113,8 +1111,7 @@ VEX.66.W0.0f380c RVM Vx Hx Wx - VPERMILPS F=AVX VEX.66.W0.0f380d RVM Vx Hx Wx - VPERMILPD F=AVX VEX.66.W0.0f380e RM Vx Wx - - VTESTPS F=AVX VEX.66.W0.0f380f RM Vx Wx - - VTESTPD F=AVX -# TODO: Wx is actually half the size -VEX.66.W0.0f3813 RM Vx Wx - - VCVTPH2PS F=F16C +VEX.66.W0.0f3813 RM Vx Wh - - VCVTPH2PS F=F16C VEX.66.W0.L1.0f3816 RVM Vx Hx Wx - VPERMPS F=AVX2 VEX.66.0f3817 RM Vx Wx - - VPTEST F=AVX EFL=0--0m00m VEX.66.W0.0f3818 RM Vx Wd - - VBROADCASTSS F=AVX @@ -1172,13 +1169,11 @@ VEX.66.W0.0f388e MVR Wx Hx Vx - VPMASKMOVD F=AVX2 VEX.66.W1.0f388e MVR Wx Hx Vx - VPMASKMOVQ F=AVX2 VEX.66.W0.0f3890/m RMV Vx Md Hx - VPGATHERDD VSIB F=AVX2 VEX.66.W1.0f3890/m RMV Vx Mq Hx - VPGATHERDQ VSIB F=AVX2 -VEX.66.W0.L0.0f3891/m RMV Vq Md Hq - VPGATHERQD VSIB F=AVX2 -VEX.66.W0.L1.0f3891/m RMV Vdq Md Hdq - VPGATHERQD VSIB F=AVX2 +VEX.66.W0.0f3891/m RMV Vh Md Hh - VPGATHERQD VSIB F=AVX2 VEX.66.W1.0f3891/m RMV Vx Mq Hx - VPGATHERQQ VSIB F=AVX2 VEX.66.W0.0f3892/m RMV Vx Md Hx - VGATHERDPS VSIB F=AVX2 VEX.66.W1.0f3892/m RMV Vx Mq Hx - VGATHERDPD VSIB F=AVX2 -VEX.66.W0.L0.0f3893/m RMV Vq Md Hq - VGATHERQPS VSIB F=AVX2 -VEX.66.W0.L1.0f3893/m RMV Vdq Md Hdq - VGATHERQPS VSIB F=AVX2 +VEX.66.W0.0f3893/m RMV Vh Md Hh - VGATHERQPS VSIB F=AVX2 VEX.66.W1.0f3893/m RMV Vx Mq Hx - VGATHERQPD VSIB F=AVX2 VEX.66.W0.0f3896 RVM Vx Hx Wx - VFMADDSUB132PS F=FMA VEX.66.W1.0f3896 RVM Vx Hx Wx - VFMADDSUB132PD F=FMA @@ -1265,8 +1260,7 @@ VEX.66.W1.L0.0f3a16 MRI Eq Vx Ib - VPEXTRQ O64 F=AVX ENC_NOSZ VEX.66.L0.0f3a17 MRI Ed Vx Ib - VEXTRACTPS F=AVX ENC_NOSZ VEX.66.W0.L1.0f3a18 RVMI Vx Hx Wdq Ib VINSERTF128 F=AVX ENC_NOSZ VEX.66.W0.L1.0f3a19 MRI Wdq Vx Ib - VEXTRACTF128 F=AVX ENC_NOSZ -# TODO: Wx is actually half the size -VEX.66.W0.0f3a1d MRI Wx Hx Ib - VCVTPS2PH F=F16C +VEX.66.W0.0f3a1d MRI Wh Hx Ib - VCVTPS2PH F=F16C VEX.66.WIG.L0.0f3a20 RVMI Vx Hx Eb Ib VPINSRB F=AVX ENC_NOSZ VEX.66.L0.0f3a21 RVMI Vx Hx Wd Ib VINSERTPS F=AVX ENC_NOSZ VEX.66.W0.L0.0f3a22 RVMI Vdq Hdq Ey Ib VPINSRD F=AVX ENC_NOSZ @@ -1613,8 +1607,7 @@ VEX.F3.W0.0f38b0/m RM Vx Mx - - VCVTNEEBF162PS F=AVX-NE-CONVERT VEX.F2.W0.0f38b0/m RM Vx Mx - - VCVTNEOBF162PS F=AVX-NE-CONVERT VEX.66.W0.0f38b1/m RM Vx Mw - - VBCSTNESH2PS F=AVX-NE-CONVERT VEX.F3.W0.0f38b1/m RM Vx Mw - - VBCSTNEBF162PS F=AVX-NE-CONVERT -# TODO: Vdq is actually half the vector size -VEX.F3.W0.0f3872 RM Vdq Wps - - VCVTNEPS2BF16 F=AVX-NE-CONVERT +VEX.F3.W0.0f3872 RM Vh Wps - - VCVTNEPS2BF16 F=AVX-NE-CONVERT # AVX-IFMA VEX.66.W1.0f38b4 RVM Vx Hx Wx - VPMADD52LUQ F=AVX-IFMA diff --git a/parseinstrs.py b/parseinstrs.py index 918215e..dc4f733 100644 --- a/parseinstrs.py +++ b/parseinstrs.py @@ -22,14 +22,15 @@ INSTR_FLAGS_FIELDS, INSTR_FLAGS_SIZES = zip(*[ ("op1_size", 2), ("op2_size", 2), ("op3_size", 2), - ("opsize", 2), + ("unused2", 2), ("size_fix1", 3), ("size_fix2", 2), ("instr_width", 1), ("modrm_ty", 3), ("modreg_ty", 3), ("vexreg_ty", 2), - ("unused", 6), + ("unused", 3), + ("opsize", 3), ("modrm", 1), ("ign66", 1), ][::-1]) @@ -112,22 +113,25 @@ OPKIND_SIZES = { "b": 1, "w": 2, "d": 4, - "ss": 4, # Scalar single of XMM + "ss": 4, # Scalar single of XMM (d) "q": 8, - "sd": 8, # Scalar double of XMM + "sd": 8, # Scalar double of XMM (q) "t": 10, # FPU/ten-byte "dq": 16, "qq": 32, "oq": 64, # oct-quadword "": 0, # for MEMZ - "v": -1, - "y": -1, # actually, dword or qword - "z": -1, # actually, op-size maxed at 4 (immediates) - "a": -1, # actually, twice the size - "p": -1, # actually, far pointer = SZ_OP + 2 - "x": -2, - "pd": -2, # packed double - "ps": -2, # packed single + "v": -1, # operand size (w/d/q) + "y": -1, # operand size (d/q) + "z": -1, # w/d (immediates, min(operand size, 4)) + "a": -1, # z:z + "p": -1, # w:z + "x": -2, # vector size + "h": -3, # half x + "f": -4, # fourth x + "e": -5, # eighth x + "pd": -2, # packed double (x) + "ps": -2, # packed single (x) # Custom names "bs": -1, # sign-extended immediate @@ -141,10 +145,16 @@ class OpKind(NamedTuple): SZ_OP = -1 SZ_VEC = -2 + SZ_VEC_HALF = -3 + SZ_VEC_QUARTER = -4 + SZ_VEC_EIGHTH = -5 def abssize(self, opsz=None, vecsz=None): res = opsz if self.size == self.SZ_OP else \ - vecsz if self.size == self.SZ_VEC else self.size + vecsz if self.size == self.SZ_VEC else \ + vecsz >> 1 if self.size == self.SZ_VEC_HALF else \ + vecsz >> 2 if self.size == self.SZ_VEC_QUARTER else \ + vecsz >> 3 if self.size == self.SZ_VEC_EIGHTH else self.size if res is None: raise Exception("unspecified operand size") return res @@ -169,7 +179,6 @@ class InstrDesc(NamedTuple): "CR": 9, "DR": 10} OPKIND_SIZES = { 0: 0, 1: 1, 2: 2, 4: 3, 8: 4, 16: 5, 32: 6, 64: 7, 10: 0, - OpKind.SZ_OP: -2, OpKind.SZ_VEC: -3, } @classmethod @@ -222,28 +231,47 @@ class InstrDesc(NamedTuple): tys.append(self.OPKIND_REGTYS_ENC[op.kind]) return sum(ty << (4*i) for i, ty in enumerate(tys)) + def dynsizes(self): + dynopsz = set(op.size for op in self.operands if op.size < 0) + if {"INSTR_WIDTH", "SZ8"} & self.flags: dynopsz.add(OpKind.SZ_OP) + if OpKind.SZ_OP in dynopsz and len(dynopsz) > 1: + raise Exception(f"conflicting dynamic operand sizes in {self}") + return dynopsz + def encode(self, mnem, ign66, modrm): flags = ENCODINGS[self.encoding] extraflags = {} - opsz = set(self.OPKIND_SIZES[opkind.size] for opkind in self.operands) + dynopsz = self.dynsizes() # Operand size either refers to vectors or GP, but not both - if -2 in opsz and -3 in opsz: - raise Exception(f"conflicting gp vs. vec operand size in {self}") + if dynopsz and OpKind.SZ_OP not in dynopsz: # Vector operand size + if self.flags & {"SZ8", "D64", "F64", "INSTR_WIDTH", "LOCK", "U66"}: + raise Exception(f"incompatible flags in {self}") + # Allow at most the vector size together with one alternative + dynsizes = [OpKind.SZ_VEC] + list(dynopsz - {OpKind.SZ_VEC}) + extraflags["opsize"] = 4 | (OpKind.SZ_VEC - dynsizes[-1]) + if len(dynsizes) > 2: + raise Exception(f"conflicting vector operand sizes in {self}") + else: # either empty or GP operand size + dynsizes = [OpKind.SZ_OP] + if "SZ8" in self.flags: extraflags["opsize"] = 1 + if "D64" in self.flags: extraflags["opsize"] = 2 + if "F64" in self.flags: extraflags["opsize"] = 3 + extraflags["instr_width"] = "INSTR_WIDTH" in self.flags + extraflags["lock"] = "LOCK" in self.flags # Sort fixed sizes encodable in size_fix2 as second element. - fixed = sorted((x for x in opsz if x >= 0), key=lambda x: 1 <= x <= 4) + fixed = set(self.OPKIND_SIZES[op.size] for op in self.operands if op.size >= 0) + fixed = sorted(fixed, key=lambda x: 1 <= x <= 4) if len(fixed) > 2 or (len(fixed) == 2 and not (1 <= fixed[1] <= 4)): raise Exception(f"invalid fixed sizes {fixed} in {self}") - sizes = (fixed + [1, 1])[:2] + [-2, -3] # See operand_sizes in decode.c. + sizes = (fixed + [1, 1])[:2] + dynsizes # See operand_sizes in decode.c. extraflags["size_fix1"] = sizes[0] extraflags["size_fix2"] = sizes[1] - 1 for i, opkind in enumerate(self.operands): - sz = self.OPKIND_SIZES[opkind.size] + sz = self.OPKIND_SIZES[opkind.size] if opkind.size >= 0 else opkind.size extraflags["op%d_size"%i] = sizes.index(sz) - if i >= 3: - continue opname = ENCODING_OPORDER[self.encoding][i] if opname == "modrm": if opkind.kind == "MEM": @@ -258,11 +286,6 @@ class InstrDesc(NamedTuple): raise Exception("invalid regty for op 3, must be VEC") # Miscellaneous Flags - if "SZ8" in self.flags: extraflags["opsize"] = 1 - if "D64" in self.flags: extraflags["opsize"] = 2 - if "F64" in self.flags: extraflags["opsize"] = 3 - if "INSTR_WIDTH" in self.flags: extraflags["instr_width"] = 1 - if "LOCK" in self.flags: extraflags["lock"] = 1 if "VSIB" in self.flags: extraflags["vsib"] = 1 if modrm: extraflags["modrm"] = 1