diff --git a/decode.c b/decode.c index 54b6f8c..276572a 100644 --- a/decode.c +++ b/decode.c @@ -83,7 +83,7 @@ struct InstrDesc #define DESC_ZEROREG_VAL(desc) (((desc)->operand_indices >> 10) & 1) #define DESC_LOCK(desc) (((desc)->operand_indices >> 11) & 1) #define DESC_VSIB(desc) (((desc)->operand_indices >> 15) & 1) -#define DESC_OPSIZE(desc) (((desc)->operand_sizes >> 8) & 3) +#define DESC_OPSIZE(desc) (((desc)->reg_types >> 11) & 7) #define DESC_SIZE_FIX1(desc) (((desc)->operand_sizes >> 10) & 7) #define DESC_SIZE_FIX2(desc) (((desc)->operand_sizes >> 13) & 3) #define DESC_INSTR_WIDTH(desc) (((desc)->operand_sizes >> 15) & 1) @@ -288,21 +288,6 @@ prefix_end: instr->flags |= FD_FLAG_64; instr->address = address; - unsigned op_size; - if (DESC_OPSIZE(desc) == 1) - op_size = 1; - else if (mode == DECODE_64) - op_size = ((prefix_rex & PREFIX_REXW) || DESC_OPSIZE(desc) == 3) ? 4 : - UNLIKELY(prefix_66 && !DESC_IGN66(desc)) ? 2 : - DESC_OPSIZE(desc) ? 4 : - 3; - else - op_size = UNLIKELY(prefix_66 && !DESC_IGN66(desc)) ? 2 : 3; - - uint8_t vec_size = 5; - if (prefix_rex & PREFIX_VEXL) - vec_size = 6; - for (unsigned i = 0; i < sizeof(instr->operands) / sizeof(FdOp); i++) instr->operands[i] = (FdOp) {0}; @@ -310,6 +295,23 @@ prefix_end: return FD_ERR_PARTIAL; unsigned op_byte = buffer[off - 1] | (!DESC_MODRM(desc) ? 0xc0 : 0); + unsigned op_size; + unsigned op_size_alt = 0; + if (!(DESC_OPSIZE(desc) & 4)) { + if (DESC_OPSIZE(desc) == 1) + op_size = 1; + else if (mode == DECODE_64) + op_size = ((prefix_rex & PREFIX_REXW) || DESC_OPSIZE(desc) == 3) ? 4 : + UNLIKELY(prefix_66 && !DESC_IGN66(desc)) ? 2 : + DESC_OPSIZE(desc) ? 4 : + 3; + else + op_size = UNLIKELY(prefix_66 && !DESC_IGN66(desc)) ? 2 : 3; + } else { + op_size = 5 + !!(prefix_rex & PREFIX_VEXL); + op_size_alt = op_size - (DESC_OPSIZE(desc) & 3); + } + if (UNLIKELY(instr->type == FDI_MOV_CR || instr->type == FDI_MOV_DR)) { unsigned modreg = (op_byte >> 3) & 0x7; unsigned modrm = op_byte & 0x7; @@ -562,7 +564,7 @@ skip_modrm: } uint8_t operand_sizes[4] = { - DESC_SIZE_FIX1(desc), DESC_SIZE_FIX2(desc) + 1, op_size, vec_size + DESC_SIZE_FIX1(desc), DESC_SIZE_FIX2(desc) + 1, op_size, op_size_alt }; for (int i = 0; i < 4; i++) diff --git a/instrs.txt b/instrs.txt index 66e3ff5..ae78286 100644 --- a/instrs.txt +++ b/instrs.txt @@ -964,9 +964,8 @@ VEX.NP.0f59 RVM Vps Hps Wps - VMULPS F=AVX VEX.66.0f59 RVM Vpd Hpd Wpd - VMULPD F=AVX VEX.F3.LIG.0f59 RVM Vdq Hdq Wss - VMULSS F=AVX VEX.F2.LIG.0f59 RVM Vdq Hdq Wsd - VMULSD F=AVX -VEX.NP.L0.0f5a RM Vpd Wq - - VCVTPS2PD F=AVX -VEX.NP.L1.0f5a RM Vpd Wdq - - VCVTPS2PD F=AVX -VEX.66.0f5a RM Vdq Wpd - - VCVTPD2PS F=AVX +VEX.NP.0f5a RM Vpd Wh - - VCVTPS2PD F=AVX +VEX.66.0f5a RM Vh Wpd - - VCVTPD2PS F=AVX VEX.F3.LIG.0f5a RVM Vdq Hdq Wss - VCVTSS2SD F=AVX VEX.F2.LIG.0f5a RVM Vdq Hdq Wsd - VCVTSD2SS F=AVX VEX.NP.0f5b RM Vps Vx - - VCVTDQ2PS F=AVX @@ -1069,10 +1068,9 @@ VEX.66.0fe2 RVM Vx Hx Wx - VPSRAD F=AVX VEX.66.0fe3 RVM Vx Hx Wx - VPAVGW F=AVX VEX.66.0fe4 RVM Vx Hx Wx - VPMULHUW F=AVX VEX.66.0fe5 RVM Vx Hx Wx - VPMULHW F=AVX -VEX.66.0fe6 RM Vdq Wx - - VCVTTPD2DQ F=AVX -VEX.F3.L0.0fe6 RM Vx Wq - - VCVTDQ2PD F=AVX -VEX.F3.L1.0fe6 RM Vx Wdq - - VCVTDQ2PD F=AVX -VEX.F2.0fe6 RM Vdq Wx - - VCVTPD2DQ F=AVX +VEX.66.0fe6 RM Vh Wx - - VCVTTPD2DQ F=AVX +VEX.F3.0fe6 RM Vx Wh - - VCVTDQ2PD F=AVX +VEX.F2.0fe6 RM Vh Wx - - VCVTPD2DQ F=AVX VEX.66.0fe7/m MR Mx Vx - - VMOVNTDQ F=AVX VEX.66.0fe8 RVM Vx Hx Wx - VPSUBSB F=AVX VEX.66.0fe9 RVM Vx Hx Wx - VPSUBSW F=AVX @@ -1113,8 +1111,7 @@ VEX.66.W0.0f380c RVM Vx Hx Wx - VPERMILPS F=AVX VEX.66.W0.0f380d RVM Vx Hx Wx - VPERMILPD F=AVX VEX.66.W0.0f380e RM Vx Wx - - VTESTPS F=AVX VEX.66.W0.0f380f RM Vx Wx - - VTESTPD F=AVX -# TODO: Wx is actually half the size -VEX.66.W0.0f3813 RM Vx Wx - - VCVTPH2PS F=F16C +VEX.66.W0.0f3813 RM Vx Wh - - VCVTPH2PS F=F16C VEX.66.W0.L1.0f3816 RVM Vx Hx Wx - VPERMPS F=AVX2 VEX.66.0f3817 RM Vx Wx - - VPTEST F=AVX EFL=0--0m00m VEX.66.W0.0f3818 RM Vx Wd - - VBROADCASTSS F=AVX @@ -1172,13 +1169,11 @@ VEX.66.W0.0f388e MVR Wx Hx Vx - VPMASKMOVD F=AVX2 VEX.66.W1.0f388e MVR Wx Hx Vx - VPMASKMOVQ F=AVX2 VEX.66.W0.0f3890/m RMV Vx Md Hx - VPGATHERDD VSIB F=AVX2 VEX.66.W1.0f3890/m RMV Vx Mq Hx - VPGATHERDQ VSIB F=AVX2 -VEX.66.W0.L0.0f3891/m RMV Vq Md Hq - VPGATHERQD VSIB F=AVX2 -VEX.66.W0.L1.0f3891/m RMV Vdq Md Hdq - VPGATHERQD VSIB F=AVX2 +VEX.66.W0.0f3891/m RMV Vh Md Hh - VPGATHERQD VSIB F=AVX2 VEX.66.W1.0f3891/m RMV Vx Mq Hx - VPGATHERQQ VSIB F=AVX2 VEX.66.W0.0f3892/m RMV Vx Md Hx - VGATHERDPS VSIB F=AVX2 VEX.66.W1.0f3892/m RMV Vx Mq Hx - VGATHERDPD VSIB F=AVX2 -VEX.66.W0.L0.0f3893/m RMV Vq Md Hq - VGATHERQPS VSIB F=AVX2 -VEX.66.W0.L1.0f3893/m RMV Vdq Md Hdq - VGATHERQPS VSIB F=AVX2 +VEX.66.W0.0f3893/m RMV Vh Md Hh - VGATHERQPS VSIB F=AVX2 VEX.66.W1.0f3893/m RMV Vx Mq Hx - VGATHERQPD VSIB F=AVX2 VEX.66.W0.0f3896 RVM Vx Hx Wx - VFMADDSUB132PS F=FMA VEX.66.W1.0f3896 RVM Vx Hx Wx - VFMADDSUB132PD F=FMA @@ -1265,8 +1260,7 @@ VEX.66.W1.L0.0f3a16 MRI Eq Vx Ib - VPEXTRQ O64 F=AVX ENC_NOSZ VEX.66.L0.0f3a17 MRI Ed Vx Ib - VEXTRACTPS F=AVX ENC_NOSZ VEX.66.W0.L1.0f3a18 RVMI Vx Hx Wdq Ib VINSERTF128 F=AVX ENC_NOSZ VEX.66.W0.L1.0f3a19 MRI Wdq Vx Ib - VEXTRACTF128 F=AVX ENC_NOSZ -# TODO: Wx is actually half the size -VEX.66.W0.0f3a1d MRI Wx Hx Ib - VCVTPS2PH F=F16C +VEX.66.W0.0f3a1d MRI Wh Hx Ib - VCVTPS2PH F=F16C VEX.66.WIG.L0.0f3a20 RVMI Vx Hx Eb Ib VPINSRB F=AVX ENC_NOSZ VEX.66.L0.0f3a21 RVMI Vx Hx Wd Ib VINSERTPS F=AVX ENC_NOSZ VEX.66.W0.L0.0f3a22 RVMI Vdq Hdq Ey Ib VPINSRD F=AVX ENC_NOSZ @@ -1613,8 +1607,7 @@ VEX.F3.W0.0f38b0/m RM Vx Mx - - VCVTNEEBF162PS F=AVX-NE-CONVERT VEX.F2.W0.0f38b0/m RM Vx Mx - - VCVTNEOBF162PS F=AVX-NE-CONVERT VEX.66.W0.0f38b1/m RM Vx Mw - - VBCSTNESH2PS F=AVX-NE-CONVERT VEX.F3.W0.0f38b1/m RM Vx Mw - - VBCSTNEBF162PS F=AVX-NE-CONVERT -# TODO: Vdq is actually half the vector size -VEX.F3.W0.0f3872 RM Vdq Wps - - VCVTNEPS2BF16 F=AVX-NE-CONVERT +VEX.F3.W0.0f3872 RM Vh Wps - - VCVTNEPS2BF16 F=AVX-NE-CONVERT # AVX-IFMA VEX.66.W1.0f38b4 RVM Vx Hx Wx - VPMADD52LUQ F=AVX-IFMA diff --git a/parseinstrs.py b/parseinstrs.py index 918215e..dc4f733 100644 --- a/parseinstrs.py +++ b/parseinstrs.py @@ -22,14 +22,15 @@ INSTR_FLAGS_FIELDS, INSTR_FLAGS_SIZES = zip(*[ ("op1_size", 2), ("op2_size", 2), ("op3_size", 2), - ("opsize", 2), + ("unused2", 2), ("size_fix1", 3), ("size_fix2", 2), ("instr_width", 1), ("modrm_ty", 3), ("modreg_ty", 3), ("vexreg_ty", 2), - ("unused", 6), + ("unused", 3), + ("opsize", 3), ("modrm", 1), ("ign66", 1), ][::-1]) @@ -112,22 +113,25 @@ OPKIND_SIZES = { "b": 1, "w": 2, "d": 4, - "ss": 4, # Scalar single of XMM + "ss": 4, # Scalar single of XMM (d) "q": 8, - "sd": 8, # Scalar double of XMM + "sd": 8, # Scalar double of XMM (q) "t": 10, # FPU/ten-byte "dq": 16, "qq": 32, "oq": 64, # oct-quadword "": 0, # for MEMZ - "v": -1, - "y": -1, # actually, dword or qword - "z": -1, # actually, op-size maxed at 4 (immediates) - "a": -1, # actually, twice the size - "p": -1, # actually, far pointer = SZ_OP + 2 - "x": -2, - "pd": -2, # packed double - "ps": -2, # packed single + "v": -1, # operand size (w/d/q) + "y": -1, # operand size (d/q) + "z": -1, # w/d (immediates, min(operand size, 4)) + "a": -1, # z:z + "p": -1, # w:z + "x": -2, # vector size + "h": -3, # half x + "f": -4, # fourth x + "e": -5, # eighth x + "pd": -2, # packed double (x) + "ps": -2, # packed single (x) # Custom names "bs": -1, # sign-extended immediate @@ -141,10 +145,16 @@ class OpKind(NamedTuple): SZ_OP = -1 SZ_VEC = -2 + SZ_VEC_HALF = -3 + SZ_VEC_QUARTER = -4 + SZ_VEC_EIGHTH = -5 def abssize(self, opsz=None, vecsz=None): res = opsz if self.size == self.SZ_OP else \ - vecsz if self.size == self.SZ_VEC else self.size + vecsz if self.size == self.SZ_VEC else \ + vecsz >> 1 if self.size == self.SZ_VEC_HALF else \ + vecsz >> 2 if self.size == self.SZ_VEC_QUARTER else \ + vecsz >> 3 if self.size == self.SZ_VEC_EIGHTH else self.size if res is None: raise Exception("unspecified operand size") return res @@ -169,7 +179,6 @@ class InstrDesc(NamedTuple): "CR": 9, "DR": 10} OPKIND_SIZES = { 0: 0, 1: 1, 2: 2, 4: 3, 8: 4, 16: 5, 32: 6, 64: 7, 10: 0, - OpKind.SZ_OP: -2, OpKind.SZ_VEC: -3, } @classmethod @@ -222,28 +231,47 @@ class InstrDesc(NamedTuple): tys.append(self.OPKIND_REGTYS_ENC[op.kind]) return sum(ty << (4*i) for i, ty in enumerate(tys)) + def dynsizes(self): + dynopsz = set(op.size for op in self.operands if op.size < 0) + if {"INSTR_WIDTH", "SZ8"} & self.flags: dynopsz.add(OpKind.SZ_OP) + if OpKind.SZ_OP in dynopsz and len(dynopsz) > 1: + raise Exception(f"conflicting dynamic operand sizes in {self}") + return dynopsz + def encode(self, mnem, ign66, modrm): flags = ENCODINGS[self.encoding] extraflags = {} - opsz = set(self.OPKIND_SIZES[opkind.size] for opkind in self.operands) + dynopsz = self.dynsizes() # Operand size either refers to vectors or GP, but not both - if -2 in opsz and -3 in opsz: - raise Exception(f"conflicting gp vs. vec operand size in {self}") + if dynopsz and OpKind.SZ_OP not in dynopsz: # Vector operand size + if self.flags & {"SZ8", "D64", "F64", "INSTR_WIDTH", "LOCK", "U66"}: + raise Exception(f"incompatible flags in {self}") + # Allow at most the vector size together with one alternative + dynsizes = [OpKind.SZ_VEC] + list(dynopsz - {OpKind.SZ_VEC}) + extraflags["opsize"] = 4 | (OpKind.SZ_VEC - dynsizes[-1]) + if len(dynsizes) > 2: + raise Exception(f"conflicting vector operand sizes in {self}") + else: # either empty or GP operand size + dynsizes = [OpKind.SZ_OP] + if "SZ8" in self.flags: extraflags["opsize"] = 1 + if "D64" in self.flags: extraflags["opsize"] = 2 + if "F64" in self.flags: extraflags["opsize"] = 3 + extraflags["instr_width"] = "INSTR_WIDTH" in self.flags + extraflags["lock"] = "LOCK" in self.flags # Sort fixed sizes encodable in size_fix2 as second element. - fixed = sorted((x for x in opsz if x >= 0), key=lambda x: 1 <= x <= 4) + fixed = set(self.OPKIND_SIZES[op.size] for op in self.operands if op.size >= 0) + fixed = sorted(fixed, key=lambda x: 1 <= x <= 4) if len(fixed) > 2 or (len(fixed) == 2 and not (1 <= fixed[1] <= 4)): raise Exception(f"invalid fixed sizes {fixed} in {self}") - sizes = (fixed + [1, 1])[:2] + [-2, -3] # See operand_sizes in decode.c. + sizes = (fixed + [1, 1])[:2] + dynsizes # See operand_sizes in decode.c. extraflags["size_fix1"] = sizes[0] extraflags["size_fix2"] = sizes[1] - 1 for i, opkind in enumerate(self.operands): - sz = self.OPKIND_SIZES[opkind.size] + sz = self.OPKIND_SIZES[opkind.size] if opkind.size >= 0 else opkind.size extraflags["op%d_size"%i] = sizes.index(sz) - if i >= 3: - continue opname = ENCODING_OPORDER[self.encoding][i] if opname == "modrm": if opkind.kind == "MEM": @@ -258,11 +286,6 @@ class InstrDesc(NamedTuple): raise Exception("invalid regty for op 3, must be VEC") # Miscellaneous Flags - if "SZ8" in self.flags: extraflags["opsize"] = 1 - if "D64" in self.flags: extraflags["opsize"] = 2 - if "F64" in self.flags: extraflags["opsize"] = 3 - if "INSTR_WIDTH" in self.flags: extraflags["instr_width"] = 1 - if "LOCK" in self.flags: extraflags["lock"] = 1 if "VSIB" in self.flags: extraflags["vsib"] = 1 if modrm: extraflags["modrm"] = 1