From e04aff73dc3680e17f144d4c03dde29956460d16 Mon Sep 17 00:00:00 2001 From: Alexis Engelke Date: Sun, 2 Oct 2022 11:57:39 +0200 Subject: [PATCH] decode: Add AVX-512 support --- decode-test.c | 1266 ++++++++++++++++++++++++++++++++++++++++++++++++ decode.c | 94 +++- fadec-enc.h | 1 + fadec-enc2.h | 27 ++ fadec.h | 37 +- format.c | 70 ++- instrs.txt | 688 +++++++++++++++++++++++++- parseinstrs.py | 140 ++++-- 8 files changed, 2265 insertions(+), 58 deletions(-) diff --git a/decode-test.c b/decode-test.c index 14c5d7e..5bd353c 100644 --- a/decode-test.c +++ b/decode-test.c @@ -615,6 +615,15 @@ main(int argc, char** argv) TEST64("\x48\x0f\x6e\xc0", "movq mm0, rax"); TEST("\x0f\x70\xc0\x85", "pshufw mm0, mm0, 0x85"); + TEST("\x0f\x58\xc1", "addps xmm0, xmm1"); + TEST64("\x40\x0f\x58\xc1", "addps xmm0, xmm1"); + TEST64("\x41\x0f\x58\xc1", "addps xmm0, xmm9"); + TEST64("\x42\x0f\x58\xc1", "addps xmm0, xmm1"); // REX.X ignored + TEST64("\x43\x0f\x58\xc1", "addps xmm0, xmm9"); // REX.X ignored + TEST64("\x44\x0f\x58\xc1", "addps xmm8, xmm1"); + TEST64("\x45\x0f\x58\xc1", "addps xmm8, xmm9"); + TEST64("\x46\x0f\x58\xc1", "addps xmm8, xmm1"); // REX.X ignored + TEST64("\x47\x0f\x58\xc1", "addps xmm8, xmm9"); // REX.X ignored TEST("\xf3\x0f\x2a\xc1", "cvtsi2ss xmm0, ecx"); TEST("\xf3\x66\x0f\x2a\xc1", "cvtsi2ss xmm0, ecx"); TEST("\x66\xf3\x0f\x2a\xc1", "cvtsi2ss xmm0, ecx"); @@ -758,6 +767,17 @@ main(int argc, char** argv) TEST32("\xc5\xff\xf0\x11", "vlddqu ymm2, ymmword ptr [ecx]"); TEST64("\xc5\xff\xf0\x11", "vlddqu ymm2, ymmword ptr [rcx]"); + // VMOVDDUP with L0 has smaller second operand size. + TEST32("\xf2\x0f\x12\x08", "movddup xmm1, qword ptr [eax]"); + TEST64("\xf2\x0f\x12\x08", "movddup xmm1, qword ptr [rax]"); + TEST("\xf2\x0f\x12\xc8", "movddup xmm1, xmm0"); + TEST32("\xc5\xfb\x12\x08", "vmovddup xmm1, qword ptr [eax]"); + TEST64("\xc5\xfb\x12\x08", "vmovddup xmm1, qword ptr [rax]"); + TEST("\xc5\xfb\x12\xc8", "vmovddup xmm1, xmm0"); + TEST32("\xc5\xff\x12\x08", "vmovddup ymm1, ymmword ptr [eax]"); + TEST64("\xc5\xff\x12\x08", "vmovddup ymm1, ymmword ptr [rax]"); + TEST("\xc5\xff\x12\xc8", "vmovddup ymm1, ymm0"); + TEST("\xc5\xf1\xe1\xc2", "vpsraw xmm0, xmm1, xmm2"); TEST32("\xc5\xf1\xe1\x00", "vpsraw xmm0, xmm1, xmmword ptr [eax]"); TEST64("\xc5\xf1\xe1\x00", "vpsraw xmm0, xmm1, xmmword ptr [rax]"); @@ -1473,6 +1493,1252 @@ main(int argc, char** argv) // Maximum instruction length is 15 bytes. TEST("\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x66\x90", "PARTIAL"); + // Complete test of VADDPS and all encoding options + TEST("\x62\xf1\x74\x18\x58\xc2", "vaddps zmm0, zmm1, zmm2, {rn-sae}"); + TEST("\x62\xf1\x74\x38\x58\xc2", "vaddps zmm0, zmm1, zmm2, {rd-sae}"); + TEST("\x62\xf1\x74\x58\x58\xc2", "vaddps zmm0, zmm1, zmm2, {ru-sae}"); + TEST("\x62\xf1\x74\x78\x58\xc2", "vaddps zmm0, zmm1, zmm2, {rz-sae}"); + TEST("\x62\xf1\x74\x08\x58\xc2", "vaddps xmm0, xmm1, xmm2"); + TEST("\x62\xf1\x74\x09\x58\xc2", "vaddps xmm0{k1}, xmm1, xmm2"); + TEST("\x62\xf1\x74\x89\x58\xc2", "vaddps xmm0{k1}{z}, xmm1, xmm2"); + TEST("\x62\xf1\x74\x88\x58\xc2", "vaddps xmm0, xmm1, xmm2"); // EVEX.z = 1 + TEST("\x62\xf1\x74\x28\x58\xc2", "vaddps ymm0, ymm1, ymm2"); + TEST("\x62\xf1\x74\x29\x58\xc2", "vaddps ymm0{k1}, ymm1, ymm2"); + TEST("\x62\xf1\x74\xa9\x58\xc2", "vaddps ymm0{k1}{z}, ymm1, ymm2"); + TEST("\x62\xf1\x74\xa8\x58\xc2", "vaddps ymm0, ymm1, ymm2"); // EVEX.z = 1 + TEST("\x62\xf1\x74\x48\x58\xc2", "vaddps zmm0, zmm1, zmm2"); + TEST("\x62\xf1\x74\x49\x58\xc2", "vaddps zmm0{k1}, zmm1, zmm2"); + TEST("\x62\xf1\x74\xc9\x58\xc2", "vaddps zmm0{k1}{z}, zmm1, zmm2"); + TEST("\x62\xf1\x74\xc8\x58\xc2", "vaddps zmm0, zmm1, zmm2"); // EVEX.z = 1 + TEST("\x62\xf1\x74\x68\x58\xc2", "UD"); // EVEX.L'Lb = 110 + TEST64("\x62\xf1\x74\x08\x58\x00", "vaddps xmm0, xmm1, xmmword ptr [rax]"); + TEST64("\x62\xf1\x74\x0a\x58\x00", "vaddps xmm0{k2}, xmm1, xmmword ptr [rax]"); + TEST64("\x62\xf1\x74\x8a\x58\x00", "vaddps xmm0{k2}{z}, xmm1, xmmword ptr [rax]"); + TEST64("\x62\xf1\x74\x88\x58\x00", "vaddps xmm0, xmm1, xmmword ptr [rax]"); // EVEX.z = 1 + TEST64("\x62\xf1\x74\x18\x58\x00", "vaddps xmm0, xmm1, dword ptr [rax]{1to4}"); + TEST64("\x62\xf1\x74\x1a\x58\x00", "vaddps xmm0{k2}, xmm1, dword ptr [rax]{1to4}"); + TEST64("\x62\xf1\x74\x9a\x58\x00", "vaddps xmm0{k2}{z}, xmm1, dword ptr [rax]{1to4}"); + TEST64("\x62\xf1\x74\x98\x58\x00", "vaddps xmm0, xmm1, dword ptr [rax]{1to4}"); // EVEX.z = 1 + TEST64("\x62\xf1\x74\x28\x58\x00", "vaddps ymm0, ymm1, ymmword ptr [rax]"); + TEST64("\x62\xf1\x74\x2a\x58\x00", "vaddps ymm0{k2}, ymm1, ymmword ptr [rax]"); + TEST64("\x62\xf1\x74\xaa\x58\x00", "vaddps ymm0{k2}{z}, ymm1, ymmword ptr [rax]"); + TEST64("\x62\xf1\x74\xa8\x58\x00", "vaddps ymm0, ymm1, ymmword ptr [rax]"); // EVEX.z = 1 + TEST64("\x62\xf1\x74\x38\x58\x00", "vaddps ymm0, ymm1, dword ptr [rax]{1to8}"); + TEST64("\x62\xf1\x74\x3a\x58\x00", "vaddps ymm0{k2}, ymm1, dword ptr [rax]{1to8}"); + TEST64("\x62\xf1\x74\xba\x58\x00", "vaddps ymm0{k2}{z}, ymm1, dword ptr [rax]{1to8}"); + TEST64("\x62\xf1\x74\xb8\x58\x00", "vaddps ymm0, ymm1, dword ptr [rax]{1to8}"); // EVEX.z = 1 + TEST64("\x62\xf1\x74\x48\x58\x00", "vaddps zmm0, zmm1, zmmword ptr [rax]"); + TEST64("\x62\xf1\x74\x4a\x58\x00", "vaddps zmm0{k2}, zmm1, zmmword ptr [rax]"); + TEST64("\x62\xf1\x74\xca\x58\x00", "vaddps zmm0{k2}{z}, zmm1, zmmword ptr [rax]"); + TEST64("\x62\xf1\x74\xc8\x58\x00", "vaddps zmm0, zmm1, zmmword ptr [rax]"); // EVEX.z = 1 + TEST64("\x62\xf1\x74\x58\x58\x00", "vaddps zmm0, zmm1, dword ptr [rax]{1to16}"); + TEST64("\x62\xf1\x74\x5a\x58\x00", "vaddps zmm0{k2}, zmm1, dword ptr [rax]{1to16}"); + TEST64("\x62\xf1\x74\xda\x58\x00", "vaddps zmm0{k2}{z}, zmm1, dword ptr [rax]{1to16}"); + TEST64("\x62\xf1\x74\xd8\x58\x00", "vaddps zmm0, zmm1, dword ptr [rax]{1to16}"); // EVEX.z = 1 + TEST64("\x62\xf1\x74\x68\x58\x00", "UD"); // EVEX.L'L = 11 + TEST64("\x62\xf1\x74\x6a\x58\x00", "UD"); // EVEX.L'L = 11 + TEST64("\x62\xf1\x74\xea\x58\x00", "UD"); // EVEX.L'L = 11 + TEST64("\x62\xf1\x74\xe8\x58\x00", "UD"); // EVEX.L'L = 11 + TEST64("\x62\xf1\x74\x78\x58\x00", "UD"); // EVEX.L'L = 11 + TEST64("\x62\xf1\x74\x7a\x58\x00", "UD"); // EVEX.L'L = 11 + TEST64("\x62\xf1\x74\xfa\x58\x00", "UD"); // EVEX.L'L = 11 + TEST64("\x62\xf1\x74\xf8\x58\x00", "UD"); // EVEX.L'L = 11 + TEST64("\x62\xf1\x74\x08\x58\x40\x01", "vaddps xmm0, xmm1, xmmword ptr [rax+0x10]"); + TEST64("\x62\xf1\x74\x0a\x58\x40\x01", "vaddps xmm0{k2}, xmm1, xmmword ptr [rax+0x10]"); + TEST64("\x62\xf1\x74\x8a\x58\x40\x01", "vaddps xmm0{k2}{z}, xmm1, xmmword ptr [rax+0x10]"); + TEST64("\x62\xf1\x74\x88\x58\x40\x01", "vaddps xmm0, xmm1, xmmword ptr [rax+0x10]"); // EVEX.z = 1 + TEST64("\x62\xf1\x74\x18\x58\x40\x01", "vaddps xmm0, xmm1, dword ptr [rax+0x4]{1to4}"); + TEST64("\x62\xf1\x74\x1a\x58\x40\x01", "vaddps xmm0{k2}, xmm1, dword ptr [rax+0x4]{1to4}"); + TEST64("\x62\xf1\x74\x9a\x58\x40\x01", "vaddps xmm0{k2}{z}, xmm1, dword ptr [rax+0x4]{1to4}"); + TEST64("\x62\xf1\x74\x98\x58\x40\x01", "vaddps xmm0, xmm1, dword ptr [rax+0x4]{1to4}"); // EVEX.z = 1 + TEST64("\x62\xf1\x74\x28\x58\x40\x01", "vaddps ymm0, ymm1, ymmword ptr [rax+0x20]"); + TEST64("\x62\xf1\x74\x2a\x58\x40\x01", "vaddps ymm0{k2}, ymm1, ymmword ptr [rax+0x20]"); + TEST64("\x62\xf1\x74\xaa\x58\x40\x01", "vaddps ymm0{k2}{z}, ymm1, ymmword ptr [rax+0x20]"); + TEST64("\x62\xf1\x74\xa8\x58\x40\x01", "vaddps ymm0, ymm1, ymmword ptr [rax+0x20]"); // EVEX.z = 1 + TEST64("\x62\xf1\x74\x38\x58\x40\x01", "vaddps ymm0, ymm1, dword ptr [rax+0x4]{1to8}"); + TEST64("\x62\xf1\x74\x3a\x58\x40\x01", "vaddps ymm0{k2}, ymm1, dword ptr [rax+0x4]{1to8}"); + TEST64("\x62\xf1\x74\xba\x58\x40\x01", "vaddps ymm0{k2}{z}, ymm1, dword ptr [rax+0x4]{1to8}"); + TEST64("\x62\xf1\x74\xb8\x58\x40\x01", "vaddps ymm0, ymm1, dword ptr [rax+0x4]{1to8}"); // EVEX.z = 1 + TEST64("\x62\xf1\x74\x48\x58\x40\x01", "vaddps zmm0, zmm1, zmmword ptr [rax+0x40]"); + TEST64("\x62\xf1\x74\x4a\x58\x40\x01", "vaddps zmm0{k2}, zmm1, zmmword ptr [rax+0x40]"); + TEST64("\x62\xf1\x74\xca\x58\x40\x01", "vaddps zmm0{k2}{z}, zmm1, zmmword ptr [rax+0x40]"); + TEST64("\x62\xf1\x74\xc8\x58\x40\x01", "vaddps zmm0, zmm1, zmmword ptr [rax+0x40]"); // EVEX.z = 1 + TEST64("\x62\xf1\x74\x58\x58\x40\x01", "vaddps zmm0, zmm1, dword ptr [rax+0x4]{1to16}"); + TEST64("\x62\xf1\x74\x5a\x58\x40\x01", "vaddps zmm0{k2}, zmm1, dword ptr [rax+0x4]{1to16}"); + TEST64("\x62\xf1\x74\xda\x58\x40\x01", "vaddps zmm0{k2}{z}, zmm1, dword ptr [rax+0x4]{1to16}"); + TEST64("\x62\xf1\x74\xd8\x58\x40\x01", "vaddps zmm0, zmm1, dword ptr [rax+0x4]{1to16}"); // EVEX.z = 1 + TEST64("\x62\xf1\x74\x68\x58\x40\x01", "UD"); // EVEX.L'L = 11 + TEST64("\x62\xf1\x74\x6a\x58\x40\x01", "UD"); // EVEX.L'L = 11 + TEST64("\x62\xf1\x74\xea\x58\x40\x01", "UD"); // EVEX.L'L = 11 + TEST64("\x62\xf1\x74\xe8\x58\x40\x01", "UD"); // EVEX.L'L = 11 + TEST64("\x62\xf1\x74\x78\x58\x40\x01", "UD"); // EVEX.L'L = 11 + TEST64("\x62\xf1\x74\x7a\x58\x40\x01", "UD"); // EVEX.L'L = 11 + TEST64("\x62\xf1\x74\xfa\x58\x40\x01", "UD"); // EVEX.L'L = 11 + TEST64("\x62\xf1\x74\xf8\x58\x40\x01", "UD"); // EVEX.L'L = 11 + TEST64("\x62\xf1\x74\x08\x58\x40\xff", "vaddps xmm0, xmm1, xmmword ptr [rax-0x10]"); + TEST64("\x62\xf1\x74\x0a\x58\x40\xff", "vaddps xmm0{k2}, xmm1, xmmword ptr [rax-0x10]"); + TEST64("\x62\xf1\x74\x8a\x58\x40\xff", "vaddps xmm0{k2}{z}, xmm1, xmmword ptr [rax-0x10]"); + TEST64("\x62\xf1\x74\x88\x58\x40\xff", "vaddps xmm0, xmm1, xmmword ptr [rax-0x10]"); // EVEX.z = 1 + TEST64("\x62\xf1\x74\x18\x58\x40\xff", "vaddps xmm0, xmm1, dword ptr [rax-0x4]{1to4}"); + TEST64("\x62\xf1\x74\x1a\x58\x40\xff", "vaddps xmm0{k2}, xmm1, dword ptr [rax-0x4]{1to4}"); + TEST64("\x62\xf1\x74\x9a\x58\x40\xff", "vaddps xmm0{k2}{z}, xmm1, dword ptr [rax-0x4]{1to4}"); + TEST64("\x62\xf1\x74\x98\x58\x40\xff", "vaddps xmm0, xmm1, dword ptr [rax-0x4]{1to4}"); // EVEX.z = 1 + TEST64("\x62\xf1\x74\x28\x58\x40\xff", "vaddps ymm0, ymm1, ymmword ptr [rax-0x20]"); + TEST64("\x62\xf1\x74\x2a\x58\x40\xff", "vaddps ymm0{k2}, ymm1, ymmword ptr [rax-0x20]"); + TEST64("\x62\xf1\x74\xaa\x58\x40\xff", "vaddps ymm0{k2}{z}, ymm1, ymmword ptr [rax-0x20]"); + TEST64("\x62\xf1\x74\xa8\x58\x40\xff", "vaddps ymm0, ymm1, ymmword ptr [rax-0x20]"); // EVEX.z = 1 + TEST64("\x62\xf1\x74\x38\x58\x40\xff", "vaddps ymm0, ymm1, dword ptr [rax-0x4]{1to8}"); + TEST64("\x62\xf1\x74\x3a\x58\x40\xff", "vaddps ymm0{k2}, ymm1, dword ptr [rax-0x4]{1to8}"); + TEST64("\x62\xf1\x74\xba\x58\x40\xff", "vaddps ymm0{k2}{z}, ymm1, dword ptr [rax-0x4]{1to8}"); + TEST64("\x62\xf1\x74\xb8\x58\x40\xff", "vaddps ymm0, ymm1, dword ptr [rax-0x4]{1to8}"); // EVEX.z = 1 + TEST64("\x62\xf1\x74\x48\x58\x40\xff", "vaddps zmm0, zmm1, zmmword ptr [rax-0x40]"); + TEST64("\x62\xf1\x74\x4a\x58\x40\xff", "vaddps zmm0{k2}, zmm1, zmmword ptr [rax-0x40]"); + TEST64("\x62\xf1\x74\xca\x58\x40\xff", "vaddps zmm0{k2}{z}, zmm1, zmmword ptr [rax-0x40]"); + TEST64("\x62\xf1\x74\xc8\x58\x40\xff", "vaddps zmm0, zmm1, zmmword ptr [rax-0x40]"); // EVEX.z = 1 + TEST64("\x62\xf1\x74\x58\x58\x40\xff", "vaddps zmm0, zmm1, dword ptr [rax-0x4]{1to16}"); + TEST64("\x62\xf1\x74\x5a\x58\x40\xff", "vaddps zmm0{k2}, zmm1, dword ptr [rax-0x4]{1to16}"); + TEST64("\x62\xf1\x74\xda\x58\x40\xff", "vaddps zmm0{k2}{z}, zmm1, dword ptr [rax-0x4]{1to16}"); + TEST64("\x62\xf1\x74\xd8\x58\x40\xff", "vaddps zmm0, zmm1, dword ptr [rax-0x4]{1to16}"); // EVEX.z = 1 + TEST64("\x62\xf1\x74\x68\x58\x40\xff", "UD"); // EVEX.L'L = 11 + TEST64("\x62\xf1\x74\x6a\x58\x40\xff", "UD"); // EVEX.L'L = 11 + TEST64("\x62\xf1\x74\xea\x58\x40\xff", "UD"); // EVEX.L'L = 11 + TEST64("\x62\xf1\x74\xe8\x58\x40\xff", "UD"); // EVEX.L'L = 11 + TEST64("\x62\xf1\x74\x78\x58\x40\xff", "UD"); // EVEX.L'L = 11 + TEST64("\x62\xf1\x74\x7a\x58\x40\xff", "UD"); // EVEX.L'L = 11 + TEST64("\x62\xf1\x74\xfa\x58\x40\xff", "UD"); // EVEX.L'L = 11 + TEST64("\x62\xf1\x74\xf8\x58\x40\xff", "UD"); // EVEX.L'L = 11 + TEST64("\x62\xf1\x74\x08\x58\x80\x01\x00\x00\x00", "vaddps xmm0, xmm1, xmmword ptr [rax+0x1]"); + TEST64("\x62\xf1\x74\x0a\x58\x80\x01\x00\x00\x00", "vaddps xmm0{k2}, xmm1, xmmword ptr [rax+0x1]"); + TEST64("\x62\xf1\x74\x8a\x58\x80\x01\x00\x00\x00", "vaddps xmm0{k2}{z}, xmm1, xmmword ptr [rax+0x1]"); + TEST64("\x62\xf1\x74\x88\x58\x80\x01\x00\x00\x00", "vaddps xmm0, xmm1, xmmword ptr [rax+0x1]"); // EVEX.z = 1 + TEST64("\x62\xf1\x74\x18\x58\x80\x01\x00\x00\x00", "vaddps xmm0, xmm1, dword ptr [rax+0x1]{1to4}"); + TEST64("\x62\xf1\x74\x1a\x58\x80\x01\x00\x00\x00", "vaddps xmm0{k2}, xmm1, dword ptr [rax+0x1]{1to4}"); + TEST64("\x62\xf1\x74\x9a\x58\x80\x01\x00\x00\x00", "vaddps xmm0{k2}{z}, xmm1, dword ptr [rax+0x1]{1to4}"); + TEST64("\x62\xf1\x74\x98\x58\x80\x01\x00\x00\x00", "vaddps xmm0, xmm1, dword ptr [rax+0x1]{1to4}"); // EVEX.z = 1 + TEST64("\x62\xf1\x74\x28\x58\x80\x01\x00\x00\x00", "vaddps ymm0, ymm1, ymmword ptr [rax+0x1]"); + TEST64("\x62\xf1\x74\x2a\x58\x80\x01\x00\x00\x00", "vaddps ymm0{k2}, ymm1, ymmword ptr [rax+0x1]"); + TEST64("\x62\xf1\x74\xaa\x58\x80\x01\x00\x00\x00", "vaddps ymm0{k2}{z}, ymm1, ymmword ptr [rax+0x1]"); + TEST64("\x62\xf1\x74\xa8\x58\x80\x01\x00\x00\x00", "vaddps ymm0, ymm1, ymmword ptr [rax+0x1]"); // EVEX.z = 1 + TEST64("\x62\xf1\x74\x38\x58\x80\x01\x00\x00\x00", "vaddps ymm0, ymm1, dword ptr [rax+0x1]{1to8}"); + TEST64("\x62\xf1\x74\x3a\x58\x80\x01\x00\x00\x00", "vaddps ymm0{k2}, ymm1, dword ptr [rax+0x1]{1to8}"); + TEST64("\x62\xf1\x74\xba\x58\x80\x01\x00\x00\x00", "vaddps ymm0{k2}{z}, ymm1, dword ptr [rax+0x1]{1to8}"); + TEST64("\x62\xf1\x74\xb8\x58\x80\x01\x00\x00\x00", "vaddps ymm0, ymm1, dword ptr [rax+0x1]{1to8}"); // EVEX.z = 1 + TEST64("\x62\xf1\x74\x48\x58\x80\x01\x00\x00\x00", "vaddps zmm0, zmm1, zmmword ptr [rax+0x1]"); + TEST64("\x62\xf1\x74\x4a\x58\x80\x01\x00\x00\x00", "vaddps zmm0{k2}, zmm1, zmmword ptr [rax+0x1]"); + TEST64("\x62\xf1\x74\xca\x58\x80\x01\x00\x00\x00", "vaddps zmm0{k2}{z}, zmm1, zmmword ptr [rax+0x1]"); + TEST64("\x62\xf1\x74\xc8\x58\x80\x01\x00\x00\x00", "vaddps zmm0, zmm1, zmmword ptr [rax+0x1]"); // EVEX.z = 1 + TEST64("\x62\xf1\x74\x58\x58\x80\x01\x00\x00\x00", "vaddps zmm0, zmm1, dword ptr [rax+0x1]{1to16}"); + TEST64("\x62\xf1\x74\x5a\x58\x80\x01\x00\x00\x00", "vaddps zmm0{k2}, zmm1, dword ptr [rax+0x1]{1to16}"); + TEST64("\x62\xf1\x74\xda\x58\x80\x01\x00\x00\x00", "vaddps zmm0{k2}{z}, zmm1, dword ptr [rax+0x1]{1to16}"); + TEST64("\x62\xf1\x74\xd8\x58\x80\x01\x00\x00\x00", "vaddps zmm0, zmm1, dword ptr [rax+0x1]{1to16}"); // EVEX.z = 1 + TEST64("\x62\xf1\x74\x68\x58\x80\x01\x00\x00\x00", "UD"); // EVEX.L'L = 11 + TEST64("\x62\xf1\x74\x6a\x58\x80\x01\x00\x00\x00", "UD"); // EVEX.L'L = 11 + TEST64("\x62\xf1\x74\xea\x58\x80\x01\x00\x00\x00", "UD"); // EVEX.L'L = 11 + TEST64("\x62\xf1\x74\xe8\x58\x80\x01\x00\x00\x00", "UD"); // EVEX.L'L = 11 + TEST64("\x62\xf1\x74\x78\x58\x80\x01\x00\x00\x00", "UD"); // EVEX.L'L = 11 + TEST64("\x62\xf1\x74\x7a\x58\x80\x01\x00\x00\x00", "UD"); // EVEX.L'L = 11 + TEST64("\x62\xf1\x74\xfa\x58\x80\x01\x00\x00\x00", "UD"); // EVEX.L'L = 11 + TEST64("\x62\xf1\x74\xf8\x58\x80\x01\x00\x00\x00", "UD"); // EVEX.L'L = 11 + + // Test register extensions encoded in EVEX prefixs + TEST32("\x62\xf1\x74\x08\x58\xc2", "vaddps xmm0, xmm1, xmm2"); + TEST64("\x62\xf1\x74\x08\x58\xc2", "vaddps xmm0, xmm1, xmm2"); + TEST32("\x62\xd1\x74\x08\x58\xc2", "vaddps xmm0, xmm1, xmm2"); + TEST64("\x62\xd1\x74\x08\x58\xc2", "vaddps xmm0, xmm1, xmm10"); + TEST64("\x62\xb1\x74\x08\x58\xc2", "vaddps xmm0, xmm1, xmm18"); + TEST64("\x62\x91\x74\x08\x58\xc2", "vaddps xmm0, xmm1, xmm26"); + TEST32("\x62\xf1\x34\x08\x58\xc2", "vaddps xmm0, xmm1, xmm2"); + TEST64("\x62\xf1\x34\x08\x58\xc2", "vaddps xmm0, xmm9, xmm2"); + TEST32("\x62\xd1\x34\x08\x58\xc2", "vaddps xmm0, xmm1, xmm2"); + TEST64("\x62\xd1\x34\x08\x58\xc2", "vaddps xmm0, xmm9, xmm10"); + TEST64("\x62\xb1\x34\x08\x58\xc2", "vaddps xmm0, xmm9, xmm18"); + TEST64("\x62\x91\x34\x08\x58\xc2", "vaddps xmm0, xmm9, xmm26"); + TEST32("\x62\xf1\x74\x00\x58\xc2", "UD"); // EVEX.V' = 0 + TEST64("\x62\xf1\x74\x00\x58\xc2", "vaddps xmm0, xmm17, xmm2"); + TEST32("\x62\xd1\x74\x00\x58\xc2", "UD"); // EVEX.V' = 0 + TEST64("\x62\xd1\x74\x00\x58\xc2", "vaddps xmm0, xmm17, xmm10"); + TEST64("\x62\xb1\x74\x00\x58\xc2", "vaddps xmm0, xmm17, xmm18"); + TEST64("\x62\x91\x74\x00\x58\xc2", "vaddps xmm0, xmm17, xmm26"); + TEST32("\x62\xf1\x34\x00\x58\xc2", "UD"); // EVEX.V' = 0 + TEST64("\x62\xf1\x34\x00\x58\xc2", "vaddps xmm0, xmm25, xmm2"); + TEST32("\x62\xd1\x34\x00\x58\xc2", "UD"); // EVEX.V' = 0 + TEST64("\x62\xd1\x34\x00\x58\xc2", "vaddps xmm0, xmm25, xmm10"); + TEST64("\x62\xb1\x34\x00\x58\xc2", "vaddps xmm0, xmm25, xmm18"); + TEST64("\x62\x91\x34\x00\x58\xc2", "vaddps xmm0, xmm25, xmm26"); + TEST64("\x62\x71\x74\x08\x58\xc2", "vaddps xmm8, xmm1, xmm2"); + TEST64("\x62\x51\x74\x08\x58\xc2", "vaddps xmm8, xmm1, xmm10"); + TEST64("\x62\x31\x74\x08\x58\xc2", "vaddps xmm8, xmm1, xmm18"); + TEST64("\x62\x11\x74\x08\x58\xc2", "vaddps xmm8, xmm1, xmm26"); + TEST64("\x62\x71\x34\x08\x58\xc2", "vaddps xmm8, xmm9, xmm2"); + TEST64("\x62\x51\x34\x08\x58\xc2", "vaddps xmm8, xmm9, xmm10"); + TEST64("\x62\x31\x34\x08\x58\xc2", "vaddps xmm8, xmm9, xmm18"); + TEST64("\x62\x11\x34\x08\x58\xc2", "vaddps xmm8, xmm9, xmm26"); + TEST64("\x62\x71\x74\x00\x58\xc2", "vaddps xmm8, xmm17, xmm2"); + TEST64("\x62\x51\x74\x00\x58\xc2", "vaddps xmm8, xmm17, xmm10"); + TEST64("\x62\x31\x74\x00\x58\xc2", "vaddps xmm8, xmm17, xmm18"); + TEST64("\x62\x11\x74\x00\x58\xc2", "vaddps xmm8, xmm17, xmm26"); + TEST64("\x62\x71\x34\x00\x58\xc2", "vaddps xmm8, xmm25, xmm2"); + TEST64("\x62\x51\x34\x00\x58\xc2", "vaddps xmm8, xmm25, xmm10"); + TEST64("\x62\x31\x34\x00\x58\xc2", "vaddps xmm8, xmm25, xmm18"); + TEST64("\x62\x11\x34\x00\x58\xc2", "vaddps xmm8, xmm25, xmm26"); + TEST32("\x62\xe1\x74\x08\x58\xc2", "vaddps xmm0, xmm1, xmm2"); + TEST64("\x62\xe1\x74\x08\x58\xc2", "vaddps xmm16, xmm1, xmm2"); + TEST32("\x62\xc1\x74\x08\x58\xc2", "vaddps xmm0, xmm1, xmm2"); + TEST64("\x62\xc1\x74\x08\x58\xc2", "vaddps xmm16, xmm1, xmm10"); + TEST64("\x62\xa1\x74\x08\x58\xc2", "vaddps xmm16, xmm1, xmm18"); + TEST64("\x62\x81\x74\x08\x58\xc2", "vaddps xmm16, xmm1, xmm26"); + TEST32("\x62\xe1\x34\x08\x58\xc2", "vaddps xmm0, xmm1, xmm2"); + TEST64("\x62\xe1\x34\x08\x58\xc2", "vaddps xmm16, xmm9, xmm2"); + TEST32("\x62\xc1\x34\x08\x58\xc2", "vaddps xmm0, xmm1, xmm2"); + TEST64("\x62\xc1\x34\x08\x58\xc2", "vaddps xmm16, xmm9, xmm10"); + TEST64("\x62\xa1\x34\x08\x58\xc2", "vaddps xmm16, xmm9, xmm18"); + TEST64("\x62\x81\x34\x08\x58\xc2", "vaddps xmm16, xmm9, xmm26"); + TEST32("\x62\xe1\x74\x00\x58\xc2", "UD"); // EVEX.V' = 0 + TEST64("\x62\xe1\x74\x00\x58\xc2", "vaddps xmm16, xmm17, xmm2"); + TEST32("\x62\xc1\x74\x00\x58\xc2", "UD"); // EVEX.V' = 0 + TEST64("\x62\xc1\x74\x00\x58\xc2", "vaddps xmm16, xmm17, xmm10"); + TEST64("\x62\xa1\x74\x00\x58\xc2", "vaddps xmm16, xmm17, xmm18"); + TEST64("\x62\x81\x74\x00\x58\xc2", "vaddps xmm16, xmm17, xmm26"); + TEST32("\x62\xe1\x34\x00\x58\xc2", "UD"); // EVEX.V' = 0 + TEST64("\x62\xe1\x34\x00\x58\xc2", "vaddps xmm16, xmm25, xmm2"); + TEST32("\x62\xc1\x34\x00\x58\xc2", "UD"); // EVEX.V' = 0 + TEST64("\x62\xc1\x34\x00\x58\xc2", "vaddps xmm16, xmm25, xmm10"); + TEST64("\x62\xa1\x34\x00\x58\xc2", "vaddps xmm16, xmm25, xmm18"); + TEST64("\x62\x81\x34\x00\x58\xc2", "vaddps xmm16, xmm25, xmm26"); + TEST64("\x62\x61\x74\x08\x58\xc2", "vaddps xmm24, xmm1, xmm2"); + TEST64("\x62\x41\x74\x08\x58\xc2", "vaddps xmm24, xmm1, xmm10"); + TEST64("\x62\x21\x74\x08\x58\xc2", "vaddps xmm24, xmm1, xmm18"); + TEST64("\x62\x01\x74\x08\x58\xc2", "vaddps xmm24, xmm1, xmm26"); + TEST64("\x62\x61\x34\x08\x58\xc2", "vaddps xmm24, xmm9, xmm2"); + TEST64("\x62\x41\x34\x08\x58\xc2", "vaddps xmm24, xmm9, xmm10"); + TEST64("\x62\x21\x34\x08\x58\xc2", "vaddps xmm24, xmm9, xmm18"); + TEST64("\x62\x01\x34\x08\x58\xc2", "vaddps xmm24, xmm9, xmm26"); + TEST64("\x62\x61\x74\x00\x58\xc2", "vaddps xmm24, xmm17, xmm2"); + TEST64("\x62\x41\x74\x00\x58\xc2", "vaddps xmm24, xmm17, xmm10"); + TEST64("\x62\x21\x74\x00\x58\xc2", "vaddps xmm24, xmm17, xmm18"); + TEST64("\x62\x01\x74\x00\x58\xc2", "vaddps xmm24, xmm17, xmm26"); + TEST64("\x62\x61\x34\x00\x58\xc2", "vaddps xmm24, xmm25, xmm2"); + TEST64("\x62\x41\x34\x00\x58\xc2", "vaddps xmm24, xmm25, xmm10"); + TEST64("\x62\x21\x34\x00\x58\xc2", "vaddps xmm24, xmm25, xmm18"); + TEST64("\x62\x01\x34\x00\x58\xc2", "vaddps xmm24, xmm25, xmm26"); + + // VMOVDDUP has special tuple size for L0. + TEST32("\x62\xf1\xff\x08\x12\x48\x01", "vmovddup xmm1, qword ptr [eax+0x8]"); + TEST64("\x62\xf1\xff\x08\x12\x48\x01", "vmovddup xmm1, qword ptr [rax+0x8]"); + TEST("\x62\xf1\xff\x08\x12\xc8", "vmovddup xmm1, xmm0"); + TEST32("\x62\xf1\xff\x28\x12\x48\x01", "vmovddup ymm1, ymmword ptr [eax+0x20]"); + TEST64("\x62\xf1\xff\x28\x12\x48\x01", "vmovddup ymm1, ymmword ptr [rax+0x20]"); + TEST("\x62\xf1\xff\x28\x12\xc8", "vmovddup ymm1, ymm0"); + TEST32("\x62\xf1\xff\x48\x12\x48\x01", "vmovddup zmm1, zmmword ptr [eax+0x40]"); + TEST64("\x62\xf1\xff\x48\x12\x48\x01", "vmovddup zmm1, zmmword ptr [rax+0x40]"); + TEST("\x62\xf1\xff\x48\x12\xc8", "vmovddup zmm1, zmm0"); + + // Check EVEX.L'L constraints + TEST32("\x62\xf2\x7d\x08\x18\x48\x01", "vbroadcastss xmm1, dword ptr [eax+0x4]"); + TEST64("\x62\xf2\x7d\x08\x18\x48\x01", "vbroadcastss xmm1, dword ptr [rax+0x4]"); + TEST("\x62\xf2\x7d\x08\x18\xc8", "vbroadcastss xmm1, xmm0"); + TEST32("\x62\xf2\x7d\x28\x18\x48\x01", "vbroadcastss ymm1, dword ptr [eax+0x4]"); + TEST64("\x62\xf2\x7d\x28\x18\x48\x01", "vbroadcastss ymm1, dword ptr [rax+0x4]"); + TEST("\x62\xf2\x7d\x28\x18\xc8", "vbroadcastss ymm1, xmm0"); + TEST32("\x62\xf2\x7d\x48\x18\x48\x01", "vbroadcastss zmm1, dword ptr [eax+0x4]"); + TEST64("\x62\xf2\x7d\x48\x18\x48\x01", "vbroadcastss zmm1, dword ptr [rax+0x4]"); + TEST("\x62\xf2\x7d\x48\x18\xc8", "vbroadcastss zmm1, xmm0"); + TEST("\x62\xf2\x7d\x68\x18\x48\x01", "UD"); // EVEX.L'L = 3 + TEST("\x62\xf2\x7d\x68\x18\xc8", "UD"); // EVEX.L'L = 3 + TEST("\x62\xf2\x7d\x08\x19\x48\x01", "UD"); // EVEX.L'L = 0 + TEST("\x62\xf2\x7d\x08\x19\xc8", "UD"); // EVEX.L'L = 0 + TEST32("\x62\xf2\x7d\x28\x19\x48\x01", "vbroadcastf32x2 ymm1, qword ptr [eax+0x8]"); + TEST64("\x62\xf2\x7d\x28\x19\x48\x01", "vbroadcastf32x2 ymm1, qword ptr [rax+0x8]"); + TEST("\x62\xf2\x7d\x28\x19\xc8", "vbroadcastf32x2 ymm1, xmm0"); + TEST32("\x62\xf2\x7d\x48\x19\x48\x01", "vbroadcastf32x2 zmm1, qword ptr [eax+0x8]"); + TEST64("\x62\xf2\x7d\x48\x19\x48\x01", "vbroadcastf32x2 zmm1, qword ptr [rax+0x8]"); + TEST("\x62\xf2\x7d\x48\x19\xc8", "vbroadcastf32x2 zmm1, xmm0"); + TEST("\x62\xf2\x7d\x68\x19\x48\x01", "UD"); // EVEX.L'L = 3 + TEST("\x62\xf2\x7d\x68\x19\xc8", "UD"); // EVEX.L'L = 3 + TEST("\x62\xf2\xfd\x08\x19\x48\x01", "UD"); // EVEX.L'L = 0 + TEST("\x62\xf2\xfd\x08\x19\xc8", "UD"); // EVEX.L'L = 0 + TEST32("\x62\xf2\xfd\x28\x19\x48\x01", "vbroadcastsd ymm1, qword ptr [eax+0x8]"); + TEST64("\x62\xf2\xfd\x28\x19\x48\x01", "vbroadcastsd ymm1, qword ptr [rax+0x8]"); + TEST("\x62\xf2\xfd\x28\x19\xc8", "vbroadcastsd ymm1, xmm0"); + TEST32("\x62\xf2\xfd\x48\x19\x48\x01", "vbroadcastsd zmm1, qword ptr [eax+0x8]"); + TEST64("\x62\xf2\xfd\x48\x19\x48\x01", "vbroadcastsd zmm1, qword ptr [rax+0x8]"); + TEST("\x62\xf2\xfd\x48\x19\xc8", "vbroadcastsd zmm1, xmm0"); + TEST("\x62\xf2\xfd\x68\x19\x48\x01", "UD"); // EVEX.L'L = 3 + TEST("\x62\xf2\xfd\x68\x19\xc8", "UD"); // EVEX.L'L = 3 + TEST("\x62\xf2\x7d\x08\x1a\x48\x01", "UD"); // EVEX.L'L = 0 + TEST("\x62\xf2\x7d\x08\x1a\xc8", "UD"); // EVEX.L'L = 0 + TEST32("\x62\xf2\x7d\x28\x1a\x48\x01", "vbroadcastf32x4 ymm1, xmmword ptr [eax+0x10]"); + TEST64("\x62\xf2\x7d\x28\x1a\x48\x01", "vbroadcastf32x4 ymm1, xmmword ptr [rax+0x10]"); + TEST("\x62\xf2\x7d\x28\x1a\xc8", "UD"); // must have a memory operand + TEST32("\x62\xf2\x7d\x48\x1a\x48\x01", "vbroadcastf32x4 zmm1, xmmword ptr [eax+0x10]"); + TEST64("\x62\xf2\x7d\x48\x1a\x48\x01", "vbroadcastf32x4 zmm1, xmmword ptr [rax+0x10]"); + TEST("\x62\xf2\x7d\x48\x1a\xc8", "UD"); // must have a memory operand + TEST("\x62\xf2\x7d\x68\x1a\x48\x01", "UD"); // EVEX.L'L = 3 + TEST("\x62\xf2\x7d\x68\x1a\xc8", "UD"); // EVEX.L'L = 3 + TEST("\x62\xf2\xfd\x08\x1a\x48\x01", "UD"); // EVEX.L'L = 0 + TEST("\x62\xf2\xfd\x08\x1a\xc8", "UD"); // EVEX.L'L = 0 + TEST32("\x62\xf2\xfd\x28\x1a\x48\x01", "vbroadcastf64x2 ymm1, xmmword ptr [eax+0x10]"); + TEST64("\x62\xf2\xfd\x28\x1a\x48\x01", "vbroadcastf64x2 ymm1, xmmword ptr [rax+0x10]"); + TEST("\x62\xf2\xfd\x28\x1a\xc8", "UD"); // must have a memory operand + TEST32("\x62\xf2\xfd\x48\x1a\x48\x01", "vbroadcastf64x2 zmm1, xmmword ptr [eax+0x10]"); + TEST64("\x62\xf2\xfd\x48\x1a\x48\x01", "vbroadcastf64x2 zmm1, xmmword ptr [rax+0x10]"); + TEST("\x62\xf2\xfd\x48\x1a\xc8", "UD"); // must have a memory operand + TEST("\x62\xf2\xfd\x68\x1a\x48\x01", "UD"); // EVEX.L'L = 3 + TEST("\x62\xf2\xfd\x68\x1a\xc8", "UD"); // EVEX.L'L = 3 + TEST("\x62\xf2\x7d\x08\x1b\x48\x01", "UD"); // EVEX.L'L = 0 + TEST("\x62\xf2\x7d\x08\x1b\xc8", "UD"); // EVEX.L'L = 0 + TEST32("\x62\xf2\x7d\x28\x1b\x48\x01", "UD"); // EVEX.L'L = 1 + TEST64("\x62\xf2\x7d\x28\x1b\x48\x01", "UD"); // EVEX.L'L = 1 + TEST("\x62\xf2\x7d\x28\x1b\xc8", "UD"); // EVEX.L'L = 1 + TEST32("\x62\xf2\x7d\x48\x1b\x48\x01", "vbroadcastf32x8 zmm1, ymmword ptr [eax+0x20]"); + TEST64("\x62\xf2\x7d\x48\x1b\x48\x01", "vbroadcastf32x8 zmm1, ymmword ptr [rax+0x20]"); + TEST("\x62\xf2\x7d\x48\x1b\xc8", "UD"); // must have a memory operand + TEST("\x62\xf2\x7d\x68\x1b\x48\x01", "UD"); // EVEX.L'L = 3 + TEST("\x62\xf2\x7d\x68\x1b\xc8", "UD"); // EVEX.L'L = 3 + TEST("\x62\xf2\xfd\x08\x1b\x48\x01", "UD"); // EVEX.L'L = 0 + TEST("\x62\xf2\xfd\x08\x1b\xc8", "UD"); // EVEX.L'L = 0 + TEST32("\x62\xf2\xfd\x28\x1b\x48\x01", "UD"); // EVEX.L'L = 1 + TEST64("\x62\xf2\xfd\x28\x1b\x48\x01", "UD"); // EVEX.L'L = 1 + TEST("\x62\xf2\xfd\x28\x1b\xc8", "UD"); // EVEX.L'L = 1 + TEST32("\x62\xf2\xfd\x48\x1b\x48\x01", "vbroadcastf64x4 zmm1, ymmword ptr [eax+0x20]"); + TEST64("\x62\xf2\xfd\x48\x1b\x48\x01", "vbroadcastf64x4 zmm1, ymmword ptr [rax+0x20]"); + TEST("\x62\xf2\xfd\x48\x1b\xc8", "UD"); // must have a memory operand + TEST("\x62\xf2\xfd\x68\x1b\x48\x01", "UD"); // EVEX.L'L = 3 + TEST("\x62\xf2\xfd\x68\x1b\xc8", "UD"); // EVEX.L'L = 3 + + // EVEX PEXTR/PINSR/MOV_G2X/MOV_X2G/PBROADCAST ignore EVEX.W in 32-bit mode + // and have different mnemonics on due to this distinction. + TEST32("\x62\xf3\x7d\x08\x14\x00\x01", "vpextrb byte ptr [eax], xmm0, 0x1"); + TEST64("\x62\xf3\x7d\x08\x14\x00\x01", "vpextrb byte ptr [rax], xmm0, 0x1"); + TEST32("\x62\xf3\x7d\x08\x14\x40\x01\x01", "vpextrb byte ptr [eax+0x1], xmm0, 0x1"); + TEST64("\x62\xf3\x7d\x08\x14\x40\x01\x01", "vpextrb byte ptr [rax+0x1], xmm0, 0x1"); + TEST("\x62\xf3\x7d\x08\x14\xc0\x01", "vpextrb eax, xmm0, 0x1"); + TEST("\x62\xf3\x7d\x18\x14\xc0\x01", "UD"); // EVEX.b != 0 + TEST("\x62\xf3\x7d\x28\x14\xc0\x01", "UD"); // EVEX.L'L != 0 + TEST("\x62\xf3\x7d\x48\x14\xc0\x01", "UD"); // EVEX.L'L != 0 + TEST("\x62\xf3\x7d\x88\x14\xc0\x01", "UD"); // EVEX.z != 0 + TEST("\x62\xf3\x7d\x09\x14\xc0\x01", "UD"); // EVEX.aaa != 0 + TEST32("\x62\xf3\x7d\x08\x15\x00\x01", "vpextrw word ptr [eax], xmm0, 0x1"); + TEST64("\x62\xf3\x7d\x08\x15\x00\x01", "vpextrw word ptr [rax], xmm0, 0x1"); + TEST32("\x62\xf3\x7d\x08\x15\x40\x01\x01", "vpextrw word ptr [eax+0x2], xmm0, 0x1"); + TEST64("\x62\xf3\x7d\x08\x15\x40\x01\x01", "vpextrw word ptr [rax+0x2], xmm0, 0x1"); + TEST("\x62\xf3\x7d\x08\x15\xc0\x01", "vpextrw eax, xmm0, 0x1"); + TEST("\x62\xf1\x7d\x08\xc5\xc0\x01", "vpextrw eax, xmm0, 0x1"); + TEST("\x62\xf1\x7d\x08\xc5\x00\x01", "UD"); // must have register operand + TEST32("\x62\xf3\x7d\x08\x16\x00\x01", "vpextrd dword ptr [eax], xmm0, 0x1"); + TEST64("\x62\xf3\x7d\x08\x16\x00\x01", "vpextrd dword ptr [rax], xmm0, 0x1"); + TEST32("\x62\xf3\x7d\x08\x16\x40\x01\x01", "vpextrd dword ptr [eax+0x4], xmm0, 0x1"); + TEST64("\x62\xf3\x7d\x08\x16\x40\x01\x01", "vpextrd dword ptr [rax+0x4], xmm0, 0x1"); + TEST("\x62\xf3\x7d\x08\x16\xc0\x01", "vpextrd eax, xmm0, 0x1"); + TEST32("\x62\xf3\xfd\x08\x16\x00\x01", "vpextrd dword ptr [eax], xmm0, 0x1"); // EVEX.W ignored + TEST64("\x62\xf3\xfd\x08\x16\x00\x01", "vpextrq qword ptr [rax], xmm0, 0x1"); + TEST32("\x62\xf3\xfd\x08\x16\x40\x01\x01", "vpextrd dword ptr [eax+0x4], xmm0, 0x1"); // EVEX.W ignored + TEST64("\x62\xf3\xfd\x08\x16\x40\x01\x01", "vpextrq qword ptr [rax+0x8], xmm0, 0x1"); + TEST32("\x62\xf3\xfd\x08\x16\xc0\x01", "vpextrd eax, xmm0, 0x1"); // EVEX.W ignored + TEST64("\x62\xf3\xfd\x08\x16\xc0\x01", "vpextrq rax, xmm0, 0x1"); + TEST32("\x62\xf3\x75\x08\x20\x00\x01", "vpinsrb xmm0, xmm1, byte ptr [eax], 0x1"); + TEST64("\x62\xf3\x75\x08\x20\x00\x01", "vpinsrb xmm0, xmm1, byte ptr [rax], 0x1"); + TEST32("\x62\xf3\x75\x08\x20\x40\x01\x01", "vpinsrb xmm0, xmm1, byte ptr [eax+0x1], 0x1"); + TEST64("\x62\xf3\x75\x08\x20\x40\x01\x01", "vpinsrb xmm0, xmm1, byte ptr [rax+0x1], 0x1"); + TEST("\x62\xf3\x75\x08\x20\xc0\x01", "vpinsrb xmm0, xmm1, al, 0x1"); + TEST32("\x62\xf1\x75\x08\xc4\x00\x01", "vpinsrw xmm0, xmm1, word ptr [eax], 0x1"); + TEST64("\x62\xf1\x75\x08\xc4\x00\x01", "vpinsrw xmm0, xmm1, word ptr [rax], 0x1"); + TEST32("\x62\xf1\x75\x08\xc4\x40\x01\x01", "vpinsrw xmm0, xmm1, word ptr [eax+0x2], 0x1"); + TEST64("\x62\xf1\x75\x08\xc4\x40\x01\x01", "vpinsrw xmm0, xmm1, word ptr [rax+0x2], 0x1"); + TEST("\x62\xf1\x75\x08\xc4\xc0\x01", "vpinsrw xmm0, xmm1, ax, 0x1"); + TEST32("\x62\xf3\x75\x08\x22\x00\x01", "vpinsrd xmm0, xmm1, dword ptr [eax], 0x1"); + TEST64("\x62\xf3\x75\x08\x22\x00\x01", "vpinsrd xmm0, xmm1, dword ptr [rax], 0x1"); + TEST32("\x62\xf3\x75\x08\x22\x40\x01\x01", "vpinsrd xmm0, xmm1, dword ptr [eax+0x4], 0x1"); + TEST64("\x62\xf3\x75\x08\x22\x40\x01\x01", "vpinsrd xmm0, xmm1, dword ptr [rax+0x4], 0x1"); + TEST("\x62\xf3\x75\x08\x22\xc0\x01", "vpinsrd xmm0, xmm1, eax, 0x1"); + TEST32("\x62\xf3\xf5\x08\x22\x00\x01", "vpinsrd xmm0, xmm1, dword ptr [eax], 0x1"); // EVEX.W ignored + TEST64("\x62\xf3\xf5\x08\x22\x00\x01", "vpinsrq xmm0, xmm1, qword ptr [rax], 0x1"); + TEST32("\x62\xf3\xf5\x08\x22\x40\x01\x01", "vpinsrd xmm0, xmm1, dword ptr [eax+0x4], 0x1"); // EVEX.W ignored + TEST64("\x62\xf3\xf5\x08\x22\x40\x01\x01", "vpinsrq xmm0, xmm1, qword ptr [rax+0x8], 0x1"); + TEST32("\x62\xf3\xf5\x08\x22\xc0\x01", "vpinsrd xmm0, xmm1, eax, 0x1"); // EVEX.W ignored + TEST64("\x62\xf3\xf5\x08\x22\xc0\x01", "vpinsrq xmm0, xmm1, rax, 0x1"); + TEST32("\x62\xf1\x7d\x08\x6e\x40\x01", "vmovd xmm0, dword ptr [eax+0x4]"); + TEST64("\x62\xf1\x7d\x08\x6e\x40\x01", "vmovd xmm0, dword ptr [rax+0x4]"); + TEST("\x62\xf1\x7d\x28\x6e\x40\x01", "UD"); // EVEX.L'L = 1 + TEST("\x62\xf1\x7d\x48\x6e\x40\x01", "UD"); // EVEX.L'L = 2 + TEST("\x62\xf1\x7d\x68\x6e\x40\x01", "UD"); // EVEX.L'L = 3 + TEST("\x62\xf1\x7d\x08\x6e\xc1", "vmovd xmm0, ecx"); + TEST("\x62\xf1\x7d\x28\x6e\xc1", "UD"); // EVEX.L'L = 1 + TEST("\x62\xf1\x7d\x48\x6e\xc1", "UD"); // EVEX.L'L = 2 + TEST("\x62\xf1\x7d\x68\x6e\xc1", "UD"); // EVEX.L'L = 3 + TEST32("\x62\xf1\xfd\x08\x6e\x40\x01", "vmovd xmm0, dword ptr [eax+0x4]"); // EVEX.W ignored + TEST64("\x62\xf1\xfd\x08\x6e\x40\x01", "vmovq xmm0, qword ptr [rax+0x8]"); + TEST("\x62\xf1\xfd\x28\x6e\x40\x01", "UD"); // EVEX.L'L = 1 + TEST("\x62\xf1\xfd\x48\x6e\x40\x01", "UD"); // EVEX.L'L = 2 + TEST("\x62\xf1\xfd\x68\x6e\x40\x01", "UD"); // EVEX.L'L = 3 + TEST32("\x62\xf1\xfd\x08\x6e\xc1", "vmovd xmm0, ecx"); // EVEX.W ignored + TEST64("\x62\xf1\xfd\x08\x6e\xc1", "vmovq xmm0, rcx"); + TEST("\x62\xf1\xfd\x28\x6e\xc1", "UD"); // EVEX.L'L = 1 + TEST("\x62\xf1\xfd\x48\x6e\xc1", "UD"); // EVEX.L'L = 2 + TEST("\x62\xf1\xfd\x68\x6e\xc1", "UD"); // EVEX.L'L = 3 + TEST32("\x62\xf1\x7d\x08\x7e\x40\x01", "vmovd dword ptr [eax+0x4], xmm0"); + TEST64("\x62\xf1\x7d\x08\x7e\x40\x01", "vmovd dword ptr [rax+0x4], xmm0"); + TEST("\x62\xf1\x7d\x28\x7e\x40\x01", "UD"); // EVEX.L'L = 1 + TEST("\x62\xf1\x7d\x48\x7e\x40\x01", "UD"); // EVEX.L'L = 2 + TEST("\x62\xf1\x7d\x68\x7e\x40\x01", "UD"); // EVEX.L'L = 3 + TEST("\x62\xf1\x7d\x08\x7e\xc1", "vmovd ecx, xmm0"); + TEST("\x62\xf1\x7d\x28\x7e\xc1", "UD"); // EVEX.L'L = 1 + TEST("\x62\xf1\x7d\x48\x7e\xc1", "UD"); // EVEX.L'L = 2 + TEST("\x62\xf1\x7d\x68\x7e\xc1", "UD"); // EVEX.L'L = 3 + TEST32("\x62\xf1\xfd\x08\x7e\x40\x01", "vmovd dword ptr [eax+0x4], xmm0"); // EVEX.W ignored + TEST64("\x62\xf1\xfd\x08\x7e\x40\x01", "vmovq qword ptr [rax+0x8], xmm0"); + TEST("\x62\xf1\xfd\x28\x7e\x40\x01", "UD"); // EVEX.L'L = 1 + TEST("\x62\xf1\xfd\x48\x7e\x40\x01", "UD"); // EVEX.L'L = 2 + TEST("\x62\xf1\xfd\x68\x7e\x40\x01", "UD"); // EVEX.L'L = 3 + TEST32("\x62\xf1\xfd\x08\x7e\xc1", "vmovd ecx, xmm0"); // EVEX.W ignored + TEST64("\x62\xf1\xfd\x08\x7e\xc1", "vmovq rcx, xmm0"); + TEST("\x62\xf1\xfd\x28\x7e\xc1", "UD"); // EVEX.L'L = 1 + TEST("\x62\xf1\xfd\x48\x7e\xc1", "UD"); // EVEX.L'L = 2 + TEST("\x62\xf1\xfd\x68\x7e\xc1", "UD"); // EVEX.L'L = 3 + TEST("\x62\xf2\x7d\x08\x7a\x00", "UD"); // Must have register operand + TEST("\x62\xf2\x7d\x08\x7a\xc0", "vpbroadcastb xmm0, al"); + TEST("\x62\xf2\x7d\x28\x7a\xc0", "vpbroadcastb ymm0, al"); + TEST("\x62\xf2\x7d\x48\x7a\xc0", "vpbroadcastb zmm0, al"); + TEST("\x62\xf2\xfd\x08\x7a\xc0", "UD"); // EVEX.W = 1 + TEST("\x62\xf2\x7d\x18\x7a\xc0", "UD"); // EVEX.b = 1 + TEST("\x62\xf2\x7d\x09\x7a\xc0", "vpbroadcastb xmm0{k1}, al"); + TEST("\x62\xf2\x7d\x89\x7a\xc0", "vpbroadcastb xmm0{k1}{z}, al"); + TEST("\x62\xf2\x7d\x08\x7b\x00", "UD"); // Must have register operand + TEST("\x62\xf2\x7d\x08\x7b\xc0", "vpbroadcastw xmm0, ax"); + TEST("\x62\xf2\x7d\x28\x7b\xc0", "vpbroadcastw ymm0, ax"); + TEST("\x62\xf2\x7d\x48\x7b\xc0", "vpbroadcastw zmm0, ax"); + TEST("\x62\xf2\xfd\x08\x7b\xc0", "UD"); // EVEX.W = 1 + TEST("\x62\xf2\x7d\x18\x7b\xc0", "UD"); // EVEX.b = 1 + TEST("\x62\xf2\x7d\x09\x7b\xc0", "vpbroadcastw xmm0{k1}, ax"); + TEST("\x62\xf2\x7d\x89\x7b\xc0", "vpbroadcastw xmm0{k1}{z}, ax"); + TEST("\x62\xf2\x7d\x08\x7c\x00", "UD"); // Must have register operand + TEST("\x62\xf2\x7d\x08\x7c\xc0", "vpbroadcastd xmm0, eax"); + TEST("\x62\xf2\x7d\x28\x7c\xc0", "vpbroadcastd ymm0, eax"); + TEST("\x62\xf2\x7d\x48\x7c\xc0", "vpbroadcastd zmm0, eax"); + TEST("\x62\xf2\x7d\x18\x7c\xc0", "UD"); // EVEX.b = 1 + TEST("\x62\xf2\x7d\x09\x7c\xc0", "vpbroadcastd xmm0{k1}, eax"); + TEST("\x62\xf2\x7d\x89\x7c\xc0", "vpbroadcastd xmm0{k1}{z}, eax"); + TEST("\x62\xf2\xfd\x08\x7c\x00", "UD"); // Must have register operand + TEST32("\x62\xf2\xfd\x08\x7c\xc0", "vpbroadcastd xmm0, eax"); // EVEX.W ignored + TEST64("\x62\xf2\xfd\x08\x7c\xc0", "vpbroadcastq xmm0, rax"); + TEST32("\x62\xf2\xfd\x28\x7c\xc0", "vpbroadcastd ymm0, eax"); // EVEX.W ignored + TEST64("\x62\xf2\xfd\x28\x7c\xc0", "vpbroadcastq ymm0, rax"); + TEST32("\x62\xf2\xfd\x48\x7c\xc0", "vpbroadcastd zmm0, eax"); // EVEX.W ignored + TEST64("\x62\xf2\xfd\x48\x7c\xc0", "vpbroadcastq zmm0, rax"); + TEST("\x62\xf2\xfd\x18\x7c\xc0", "UD"); // EVEX.b = 1 + TEST32("\x62\xf2\xfd\x09\x7c\xc0", "vpbroadcastd xmm0{k1}, eax"); // EVEX.W ignored + TEST64("\x62\xf2\xfd\x09\x7c\xc0", "vpbroadcastq xmm0{k1}, rax"); + TEST32("\x62\xf2\xfd\x89\x7c\xc0", "vpbroadcastd xmm0{k1}{z}, eax"); // EVEX.W ignored + TEST64("\x62\xf2\xfd\x89\x7c\xc0", "vpbroadcastq xmm0{k1}{z}, rax"); + + // EVEX.z with memory or mask destination is UD + TEST32("\x62\xf2\x7d\x08\x63\x40\x01", "vpcompressb byte ptr [eax+0x1], xmm0"); + TEST64("\x62\xf2\x7d\x08\x63\x40\x01", "vpcompressb byte ptr [rax+0x1], xmm0"); + TEST32("\x62\xf2\x7d\x88\x63\x40\x01", "UD"); // EVEX.z = 1 + TEST64("\x62\xf2\x7d\x88\x63\x40\x01", "UD"); // EVEX.z = 1 + TEST32("\x62\xf2\x7d\x09\x63\x40\x01", "vpcompressb byte ptr [eax+0x1]{k1}, xmm0"); + TEST64("\x62\xf2\x7d\x09\x63\x40\x01", "vpcompressb byte ptr [rax+0x1]{k1}, xmm0"); + TEST32("\x62\xf2\x7d\x89\x63\x40\x01", "UD"); // EVEX.z = 1 + TEST64("\x62\xf2\x7d\x89\x63\x40\x01", "UD"); // EVEX.z = 1 + TEST("\x62\xf2\x7d\x08\x63\xc1", "vpcompressb xmm1, xmm0"); + TEST("\x62\xf2\x7d\x09\x63\xc1", "vpcompressb xmm1{k1}, xmm0"); + TEST("\x62\xf2\x7d\x88\x63\xc1", "vpcompressb xmm1, xmm0"); // EVEX.z ignored? + TEST("\x62\xf2\x7d\x89\x63\xc1", "vpcompressb xmm1{k1}{z}, xmm0"); + TEST("\x62\xf1\x75\x08\x74\xc2", "vpcmpeqb k0, xmm1, xmm2"); + TEST("\x62\xf1\x75\x09\x74\xc2", "vpcmpeqb k0{k1}, xmm1, xmm2"); + TEST("\x62\xf1\x75\x88\x74\xc2", "UD"); // EVEX.z = 1 + TEST("\x62\xf1\x75\x89\x74\xc2", "UD"); // EVEX.z = 1 + + // CVT(T?S[SD]2U?SI|U?SI2S[SD]) ignore EVEX.W in 32-bit mode. + TEST32("\x62\xf1\x7e\x08\x2c\x40\x01", "vcvttss2si eax, dword ptr [eax+0x4]"); + TEST64("\x62\xf1\x7e\x08\x2c\x40\x01", "vcvttss2si eax, dword ptr [rax+0x4]"); + TEST("\x62\xf1\x7e\x18\x2c\x40\x01", "UD"); // EVEX.b with memory operand + TEST("\x62\xf1\x7e\x08\x2c\xc0", "vcvttss2si eax, xmm0"); + TEST("\x62\xf1\x7e\x18\x2c\xc0", "vcvttss2si eax, xmm0, {sae}"); + TEST32("\x62\xf1\xfe\x08\x2c\x40\x01", "vcvttss2si eax, dword ptr [eax+0x4]"); // EVEX.W ignored + TEST64("\x62\xf1\xfe\x08\x2c\x40\x01", "vcvttss2si rax, dword ptr [rax+0x4]"); + TEST32("\x62\xf1\xfe\x08\x2c\xc0", "vcvttss2si eax, xmm0"); // EVEX.W ignored + TEST64("\x62\xf1\xfe\x08\x2c\xc0", "vcvttss2si rax, xmm0"); + TEST32("\x62\xf1\xfe\x18\x2c\xc0", "vcvttss2si eax, xmm0, {sae}"); // EVEX.W ignored + TEST64("\x62\xf1\xfe\x18\x2c\xc0", "vcvttss2si rax, xmm0, {sae}"); + TEST32("\x62\xf1\x7f\x08\x2c\x40\x01", "vcvttsd2si eax, qword ptr [eax+0x8]"); + TEST64("\x62\xf1\x7f\x08\x2c\x40\x01", "vcvttsd2si eax, qword ptr [rax+0x8]"); + TEST("\x62\xf1\x7f\x18\x2c\x40\x01", "UD"); // EVEX.b with memory operand + TEST("\x62\xf1\x7f\x08\x2c\xc0", "vcvttsd2si eax, xmm0"); + TEST("\x62\xf1\x7f\x18\x2c\xc0", "vcvttsd2si eax, xmm0, {sae}"); + TEST32("\x62\xf1\xff\x08\x2c\x40\x01", "vcvttsd2si eax, qword ptr [eax+0x8]"); // EVEX.W ignored + TEST64("\x62\xf1\xff\x08\x2c\x40\x01", "vcvttsd2si rax, qword ptr [rax+0x8]"); + TEST32("\x62\xf1\xff\x08\x2c\xc0", "vcvttsd2si eax, xmm0"); // EVEX.W ignored + TEST64("\x62\xf1\xff\x08\x2c\xc0", "vcvttsd2si rax, xmm0"); + TEST32("\x62\xf1\xff\x18\x2c\xc0", "vcvttsd2si eax, xmm0, {sae}"); // EVEX.W ignored + TEST64("\x62\xf1\xff\x18\x2c\xc0", "vcvttsd2si rax, xmm0, {sae}"); + TEST32("\x62\xf1\x7e\x08\x2d\x40\x01", "vcvtss2si eax, dword ptr [eax+0x4]"); + TEST64("\x62\xf1\x7e\x08\x2d\x40\x01", "vcvtss2si eax, dword ptr [rax+0x4]"); + TEST("\x62\xf1\x7e\x18\x2d\x40\x01", "UD"); // EVEX.b with memory operand + TEST("\x62\xf1\x7e\x08\x2d\xc0", "vcvtss2si eax, xmm0"); + TEST("\x62\xf1\x7e\x18\x2d\xc0", "vcvtss2si eax, xmm0, {rn-sae}"); + TEST32("\x62\xf1\xfe\x08\x2d\x40\x01", "vcvtss2si eax, dword ptr [eax+0x4]"); // EVEX.W ignored + TEST64("\x62\xf1\xfe\x08\x2d\x40\x01", "vcvtss2si rax, dword ptr [rax+0x4]"); + TEST32("\x62\xf1\xfe\x08\x2d\xc0", "vcvtss2si eax, xmm0"); // EVEX.W ignored + TEST64("\x62\xf1\xfe\x08\x2d\xc0", "vcvtss2si rax, xmm0"); + TEST32("\x62\xf1\xfe\x18\x2d\xc0", "vcvtss2si eax, xmm0, {rn-sae}"); // EVEX.W ignored + TEST64("\x62\xf1\xfe\x18\x2d\xc0", "vcvtss2si rax, xmm0, {rn-sae}"); + TEST32("\x62\xf1\x7f\x08\x2d\x40\x01", "vcvtsd2si eax, qword ptr [eax+0x8]"); + TEST64("\x62\xf1\x7f\x08\x2d\x40\x01", "vcvtsd2si eax, qword ptr [rax+0x8]"); + TEST("\x62\xf1\x7f\x18\x2d\x40\x01", "UD"); // EVEX.b with memory operand + TEST("\x62\xf1\x7f\x08\x2d\xc0", "vcvtsd2si eax, xmm0"); + TEST("\x62\xf1\x7f\x18\x2d\xc0", "vcvtsd2si eax, xmm0, {rn-sae}"); + TEST32("\x62\xf1\xff\x08\x2d\x40\x01", "vcvtsd2si eax, qword ptr [eax+0x8]"); // EVEX.W ignored + TEST64("\x62\xf1\xff\x08\x2d\x40\x01", "vcvtsd2si rax, qword ptr [rax+0x8]"); + TEST32("\x62\xf1\xff\x08\x2d\xc0", "vcvtsd2si eax, xmm0"); // EVEX.W ignored + TEST64("\x62\xf1\xff\x08\x2d\xc0", "vcvtsd2si rax, xmm0"); + TEST32("\x62\xf1\xff\x18\x2d\xc0", "vcvtsd2si eax, xmm0, {rn-sae}"); // EVEX.W ignored + TEST64("\x62\xf1\xff\x18\x2d\xc0", "vcvtsd2si rax, xmm0, {rn-sae}"); + TEST32("\x62\xf1\x7e\x08\x78\x40\x01", "vcvttss2usi eax, dword ptr [eax+0x4]"); + TEST64("\x62\xf1\x7e\x08\x78\x40\x01", "vcvttss2usi eax, dword ptr [rax+0x4]"); + TEST("\x62\xf1\x7e\x18\x78\x40\x01", "UD"); // EVEX.b with memory operand + TEST("\x62\xf1\x7e\x08\x78\xc0", "vcvttss2usi eax, xmm0"); + TEST("\x62\xf1\x7e\x18\x78\xc0", "vcvttss2usi eax, xmm0, {sae}"); + TEST32("\x62\xf1\xfe\x08\x78\x40\x01", "vcvttss2usi eax, dword ptr [eax+0x4]"); // EVEX.W ignored + TEST64("\x62\xf1\xfe\x08\x78\x40\x01", "vcvttss2usi rax, dword ptr [rax+0x4]"); + TEST32("\x62\xf1\xfe\x08\x78\xc0", "vcvttss2usi eax, xmm0"); // EVEX.W ignored + TEST64("\x62\xf1\xfe\x08\x78\xc0", "vcvttss2usi rax, xmm0"); + TEST32("\x62\xf1\xfe\x18\x78\xc0", "vcvttss2usi eax, xmm0, {sae}"); // EVEX.W ignored + TEST64("\x62\xf1\xfe\x18\x78\xc0", "vcvttss2usi rax, xmm0, {sae}"); + TEST32("\x62\xf1\x7f\x08\x78\x40\x01", "vcvttsd2usi eax, qword ptr [eax+0x8]"); + TEST64("\x62\xf1\x7f\x08\x78\x40\x01", "vcvttsd2usi eax, qword ptr [rax+0x8]"); + TEST("\x62\xf1\x7f\x18\x78\x40\x01", "UD"); // EVEX.b with memory operand + TEST("\x62\xf1\x7f\x08\x78\xc0", "vcvttsd2usi eax, xmm0"); + TEST("\x62\xf1\x7f\x18\x78\xc0", "vcvttsd2usi eax, xmm0, {sae}"); + TEST32("\x62\xf1\xff\x08\x78\x40\x01", "vcvttsd2usi eax, qword ptr [eax+0x8]"); // EVEX.W ignored + TEST64("\x62\xf1\xff\x08\x78\x40\x01", "vcvttsd2usi rax, qword ptr [rax+0x8]"); + TEST32("\x62\xf1\xff\x08\x78\xc0", "vcvttsd2usi eax, xmm0"); // EVEX.W ignored + TEST64("\x62\xf1\xff\x08\x78\xc0", "vcvttsd2usi rax, xmm0"); + TEST32("\x62\xf1\xff\x18\x78\xc0", "vcvttsd2usi eax, xmm0, {sae}"); // EVEX.W ignored + TEST64("\x62\xf1\xff\x18\x78\xc0", "vcvttsd2usi rax, xmm0, {sae}"); + TEST32("\x62\xf1\x7e\x08\x79\x40\x01", "vcvtss2usi eax, dword ptr [eax+0x4]"); + TEST64("\x62\xf1\x7e\x08\x79\x40\x01", "vcvtss2usi eax, dword ptr [rax+0x4]"); + TEST("\x62\xf1\x7e\x18\x79\x40\x01", "UD"); // EVEX.b with memory operand + TEST("\x62\xf1\x7e\x08\x79\xc0", "vcvtss2usi eax, xmm0"); + TEST("\x62\xf1\x7e\x18\x79\xc0", "vcvtss2usi eax, xmm0, {rn-sae}"); + TEST32("\x62\xf1\xfe\x08\x79\x40\x01", "vcvtss2usi eax, dword ptr [eax+0x4]"); // EVEX.W ignored + TEST64("\x62\xf1\xfe\x08\x79\x40\x01", "vcvtss2usi rax, dword ptr [rax+0x4]"); + TEST32("\x62\xf1\xfe\x08\x79\xc0", "vcvtss2usi eax, xmm0"); // EVEX.W ignored + TEST64("\x62\xf1\xfe\x08\x79\xc0", "vcvtss2usi rax, xmm0"); + TEST32("\x62\xf1\xfe\x18\x79\xc0", "vcvtss2usi eax, xmm0, {rn-sae}"); // EVEX.W ignored + TEST64("\x62\xf1\xfe\x18\x79\xc0", "vcvtss2usi rax, xmm0, {rn-sae}"); + TEST32("\x62\xf1\x7f\x08\x79\x40\x01", "vcvtsd2usi eax, qword ptr [eax+0x8]"); + TEST64("\x62\xf1\x7f\x08\x79\x40\x01", "vcvtsd2usi eax, qword ptr [rax+0x8]"); + TEST("\x62\xf1\x7f\x18\x79\x40\x01", "UD"); // EVEX.b with memory operand + TEST("\x62\xf1\x7f\x08\x79\xc0", "vcvtsd2usi eax, xmm0"); + TEST("\x62\xf1\x7f\x18\x79\xc0", "vcvtsd2usi eax, xmm0, {rn-sae}"); + TEST32("\x62\xf1\xff\x08\x79\x40\x01", "vcvtsd2usi eax, qword ptr [eax+0x8]"); // EVEX.W ignored + TEST64("\x62\xf1\xff\x08\x79\x40\x01", "vcvtsd2usi rax, qword ptr [rax+0x8]"); + TEST32("\x62\xf1\xff\x08\x79\xc0", "vcvtsd2usi eax, xmm0"); // EVEX.W ignored + TEST64("\x62\xf1\xff\x08\x79\xc0", "vcvtsd2usi rax, xmm0"); + TEST32("\x62\xf1\xff\x18\x79\xc0", "vcvtsd2usi eax, xmm0, {rn-sae}"); // EVEX.W ignored + TEST64("\x62\xf1\xff\x18\x79\xc0", "vcvtsd2usi rax, xmm0, {rn-sae}"); + TEST32("\x62\xf1\x6e\x08\x2a\x40\x01", "vcvtsi2ss xmm0, xmm2, dword ptr [eax+0x4]"); + TEST64("\x62\xf1\x6e\x08\x2a\x40\x01", "vcvtsi2ss xmm0, xmm2, dword ptr [rax+0x4]"); + TEST("\x62\xf1\x6e\x18\x2a\x40\x01", "UD"); // EVEX.b with memory operand + TEST("\x62\xf1\x6e\x08\x2a\xc0", "vcvtsi2ss xmm0, xmm2, eax"); + TEST("\x62\xf1\x6e\x18\x2a\xc0", "vcvtsi2ss xmm0, xmm2, eax, {rn-sae}"); + TEST32("\x62\xf1\xee\x08\x2a\x40\x01", "vcvtsi2ss xmm0, xmm2, dword ptr [eax+0x4]"); // EVEX.W ignored + TEST64("\x62\xf1\xee\x08\x2a\x40\x01", "vcvtsi2ss xmm0, xmm2, qword ptr [rax+0x8]"); + TEST32("\x62\xf1\xee\x08\x2a\xc0", "vcvtsi2ss xmm0, xmm2, eax"); // EVEX.W ignored + TEST64("\x62\xf1\xee\x08\x2a\xc0", "vcvtsi2ss xmm0, xmm2, rax"); + TEST32("\x62\xf1\xee\x18\x2a\xc0", "vcvtsi2ss xmm0, xmm2, eax, {rn-sae}"); // EVEX.W ignored + TEST64("\x62\xf1\xee\x18\x2a\xc0", "vcvtsi2ss xmm0, xmm2, rax, {rn-sae}"); + TEST32("\x62\xf1\x6f\x08\x2a\x40\x01", "vcvtsi2sd xmm0, xmm2, dword ptr [eax+0x4]"); + TEST64("\x62\xf1\x6f\x08\x2a\x40\x01", "vcvtsi2sd xmm0, xmm2, dword ptr [rax+0x4]"); + TEST("\x62\xf1\x6f\x18\x2a\x40\x01", "UD"); // EVEX.b with memory operand + TEST("\x62\xf1\x6f\x08\x2a\xc0", "vcvtsi2sd xmm0, xmm2, eax"); + TEST("\x62\xf1\x6f\x18\x2a\xc0", "vcvtsi2sd xmm0, xmm2, eax, {rn-sae}"); + TEST32("\x62\xf1\xef\x08\x2a\x40\x01", "vcvtsi2sd xmm0, xmm2, dword ptr [eax+0x4]"); // EVEX.W ignored + TEST64("\x62\xf1\xef\x08\x2a\x40\x01", "vcvtsi2sd xmm0, xmm2, qword ptr [rax+0x8]"); + TEST32("\x62\xf1\xef\x08\x2a\xc0", "vcvtsi2sd xmm0, xmm2, eax"); // EVEX.W ignored + TEST64("\x62\xf1\xef\x08\x2a\xc0", "vcvtsi2sd xmm0, xmm2, rax"); + TEST32("\x62\xf1\xef\x18\x2a\xc0", "vcvtsi2sd xmm0, xmm2, eax, {rn-sae}"); // EVEX.W ignored + TEST64("\x62\xf1\xef\x18\x2a\xc0", "vcvtsi2sd xmm0, xmm2, rax, {rn-sae}"); + TEST32("\x62\xf1\x6e\x08\x7b\x40\x01", "vcvtusi2ss xmm0, xmm2, dword ptr [eax+0x4]"); + TEST64("\x62\xf1\x6e\x08\x7b\x40\x01", "vcvtusi2ss xmm0, xmm2, dword ptr [rax+0x4]"); + TEST("\x62\xf1\x6e\x18\x7b\x40\x01", "UD"); // EVEX.b with memory operand + TEST("\x62\xf1\x6e\x08\x7b\xc0", "vcvtusi2ss xmm0, xmm2, eax"); + TEST("\x62\xf1\x6e\x18\x7b\xc0", "vcvtusi2ss xmm0, xmm2, eax, {rn-sae}"); + TEST32("\x62\xf1\xee\x08\x7b\x40\x01", "vcvtusi2ss xmm0, xmm2, dword ptr [eax+0x4]"); // EVEX.W ignored + TEST64("\x62\xf1\xee\x08\x7b\x40\x01", "vcvtusi2ss xmm0, xmm2, qword ptr [rax+0x8]"); + TEST32("\x62\xf1\xee\x08\x7b\xc0", "vcvtusi2ss xmm0, xmm2, eax"); // EVEX.W ignored + TEST64("\x62\xf1\xee\x08\x7b\xc0", "vcvtusi2ss xmm0, xmm2, rax"); + TEST32("\x62\xf1\xee\x18\x7b\xc0", "vcvtusi2ss xmm0, xmm2, eax, {rn-sae}"); // EVEX.W ignored + TEST64("\x62\xf1\xee\x18\x7b\xc0", "vcvtusi2ss xmm0, xmm2, rax, {rn-sae}"); + TEST32("\x62\xf1\x6f\x08\x7b\x40\x01", "vcvtusi2sd xmm0, xmm2, dword ptr [eax+0x4]"); + TEST64("\x62\xf1\x6f\x08\x7b\x40\x01", "vcvtusi2sd xmm0, xmm2, dword ptr [rax+0x4]"); + TEST("\x62\xf1\x6f\x18\x7b\x40\x01", "UD"); // EVEX.b with memory operand + TEST("\x62\xf1\x6f\x08\x7b\xc0", "vcvtusi2sd xmm0, xmm2, eax"); + TEST("\x62\xf1\x6f\x18\x7b\xc0", "vcvtusi2sd xmm0, xmm2, eax, {rn-sae}"); + TEST32("\x62\xf1\xef\x08\x7b\x40\x01", "vcvtusi2sd xmm0, xmm2, dword ptr [eax+0x4]"); // EVEX.W ignored + TEST64("\x62\xf1\xef\x08\x7b\x40\x01", "vcvtusi2sd xmm0, xmm2, qword ptr [rax+0x8]"); + TEST32("\x62\xf1\xef\x08\x7b\xc0", "vcvtusi2sd xmm0, xmm2, eax"); // EVEX.W ignored + TEST64("\x62\xf1\xef\x08\x7b\xc0", "vcvtusi2sd xmm0, xmm2, rax"); + TEST32("\x62\xf1\xef\x18\x7b\xc0", "vcvtusi2sd xmm0, xmm2, eax, {rn-sae}"); // EVEX.W ignored + TEST64("\x62\xf1\xef\x18\x7b\xc0", "vcvtusi2sd xmm0, xmm2, rax, {rn-sae}"); + + // 32-bit mode: no UD constraints for K-reg + // 64-bit mode: EVEX.R/EVEX.vvvv=0xxx causes UD for K-reg; EVEX.B is ignored + TEST("\xc5\xed\x41\x00", "UD"); // Must have register operand + TEST("\xc5\xed\x41\xcb", "kandb k1, k2, k3"); + TEST("\xc4\xe1\x6d\x41\xcb", "kandb k1, k2, k3"); // 3-byte VEX encoding + TEST("\xc4\xc1\x6d\x41\xcb", "kandb k1, k2, k3"); // VEX.B is ignored + TEST64("\xc4\x61\x6d\x41\xcb", "UD"); // VEX.R is UD + TEST32("\xc4\xe1\x2d\x41\xcb", "kandb k1, k2, k3"); // VEX.vvvv MSB is ignored + TEST64("\xc4\xe1\x2d\x41\xcb", "UD"); // VEX.vvvv = 0xxx + TEST64("\xc5\xad\x41\xcb", "UD"); // VEX.vvvv = 0xxx + + TEST("\x62\xf2\x7e\x08\x28\x00", "UD"); // Must have register operand + TEST("\x62\xf2\x7e\x08\x28\xc1", "vpmovm2b xmm0, k1"); + TEST32("\x62\xe2\x7e\x08\x28\xc1", "vpmovm2b xmm0, k1"); // EVEX.R' ignored + TEST64("\x62\xe2\x7e\x08\x28\xc1", "vpmovm2b xmm16, k1"); + TEST("\x62\xd2\x7e\x08\x28\xc1", "vpmovm2b xmm0, k1"); // EVEX.B ignored + TEST64("\x62\xb2\x7e\x08\x28\xc1", "vpmovm2b xmm0, k1"); // EVEX.X ignored + TEST64("\x62\x72\x7e\x08\x28\xc1", "vpmovm2b xmm8, k1"); + TEST("\x62\xf2\xfe\x08\x28\x00", "UD"); // Must have register operand + TEST("\x62\xf2\xfe\x08\x28\xc1", "vpmovm2w xmm0, k1"); + TEST32("\x62\xe2\xfe\x08\x28\xc1", "vpmovm2w xmm0, k1"); // EVEX.R' ignored + TEST64("\x62\xe2\xfe\x08\x28\xc1", "vpmovm2w xmm16, k1"); + TEST("\x62\xd2\xfe\x08\x28\xc1", "vpmovm2w xmm0, k1"); // EVEX.B ignored + TEST64("\x62\xb2\xfe\x08\x28\xc1", "vpmovm2w xmm0, k1"); // EVEX.X ignored + TEST64("\x62\x72\xfe\x08\x28\xc1", "vpmovm2w xmm8, k1"); + TEST("\x62\xf2\x7e\x08\x38\x00", "UD"); // Must have register operand + TEST("\x62\xf2\x7e\x08\x38\xc1", "vpmovm2d xmm0, k1"); + TEST32("\x62\xe2\x7e\x08\x38\xc1", "vpmovm2d xmm0, k1"); // EVEX.R' ignored + TEST64("\x62\xe2\x7e\x08\x38\xc1", "vpmovm2d xmm16, k1"); + TEST("\x62\xd2\x7e\x08\x38\xc1", "vpmovm2d xmm0, k1"); // EVEX.B ignored + TEST64("\x62\xb2\x7e\x08\x38\xc1", "vpmovm2d xmm0, k1"); // EVEX.X ignored + TEST64("\x62\x72\x7e\x08\x38\xc1", "vpmovm2d xmm8, k1"); + TEST("\x62\xf2\xfe\x08\x38\x00", "UD"); // Must have register operand + TEST("\x62\xf2\xfe\x08\x38\xc1", "vpmovm2q xmm0, k1"); + TEST32("\x62\xe2\xfe\x08\x38\xc1", "vpmovm2q xmm0, k1"); // EVEX.R' ignored + TEST64("\x62\xe2\xfe\x08\x38\xc1", "vpmovm2q xmm16, k1"); + TEST("\x62\xd2\xfe\x08\x38\xc1", "vpmovm2q xmm0, k1"); // EVEX.B ignored + TEST64("\x62\xb2\xfe\x08\x38\xc1", "vpmovm2q xmm0, k1"); // EVEX.X ignored + TEST64("\x62\x72\xfe\x08\x38\xc1", "vpmovm2q xmm8, k1"); + + TEST("\x62\xf2\x7e\x08\x29\x00", "UD"); // Must have register operand + TEST("\x62\xf2\x7e\x08\x29\xc1", "vpmovb2m k0, xmm1"); + TEST32("\x62\xe2\x7e\x08\x29\xc1", "vpmovb2m k0, xmm1"); // EVEX.R' ignored + TEST64("\x62\xe2\x7e\x08\x29\xc1", "UD"); // EVEX.R' for mask is UD + TEST32("\x62\xd2\x7e\x08\x29\xc1", "vpmovb2m k0, xmm1"); // EVEX.B ignored + TEST64("\x62\xd2\x7e\x08\x29\xc1", "vpmovb2m k0, xmm9"); + TEST64("\x62\xb2\x7e\x08\x29\xc1", "vpmovb2m k0, xmm17"); + TEST64("\x62\x72\x7e\x08\x29\xc1", "UD"); // EVEX.R for mask is UD + TEST("\x62\xf2\xfe\x08\x29\x00", "UD"); // Must have register operand + TEST("\x62\xf2\xfe\x08\x29\xc1", "vpmovw2m k0, xmm1"); + TEST32("\x62\xe2\xfe\x08\x29\xc1", "vpmovw2m k0, xmm1"); // EVEX.R' ignored + TEST64("\x62\xe2\xfe\x08\x29\xc1", "UD"); // EVEX.R' for mask is UD + TEST32("\x62\xd2\xfe\x08\x29\xc1", "vpmovw2m k0, xmm1"); // EVEX.B ignored + TEST64("\x62\xd2\xfe\x08\x29\xc1", "vpmovw2m k0, xmm9"); + TEST64("\x62\xb2\xfe\x08\x29\xc1", "vpmovw2m k0, xmm17"); + TEST64("\x62\x72\xfe\x08\x29\xc1", "UD"); // EVEX.R for mask is UD + TEST("\x62\xf2\x7e\x08\x39\x00", "UD"); // Must have register operand + TEST("\x62\xf2\x7e\x08\x39\xc1", "vpmovd2m k0, xmm1"); + TEST32("\x62\xe2\x7e\x08\x39\xc1", "vpmovd2m k0, xmm1"); // EVEX.R' ignored + TEST64("\x62\xe2\x7e\x08\x39\xc1", "UD"); // EVEX.R' for mask is UD + TEST32("\x62\xd2\x7e\x08\x39\xc1", "vpmovd2m k0, xmm1"); // EVEX.B ignored + TEST64("\x62\xd2\x7e\x08\x39\xc1", "vpmovd2m k0, xmm9"); + TEST64("\x62\xb2\x7e\x08\x39\xc1", "vpmovd2m k0, xmm17"); + TEST64("\x62\x72\x7e\x08\x39\xc1", "UD"); // EVEX.R for mask is UD + TEST("\x62\xf2\xfe\x08\x39\x00", "UD"); // Must have register operand + TEST("\x62\xf2\xfe\x08\x39\xc1", "vpmovq2m k0, xmm1"); + TEST32("\x62\xe2\xfe\x08\x39\xc1", "vpmovq2m k0, xmm1"); // EVEX.R' ignored + TEST64("\x62\xe2\xfe\x08\x39\xc1", "UD"); // EVEX.R' for mask is UD + TEST32("\x62\xd2\xfe\x08\x39\xc1", "vpmovq2m k0, xmm1"); // EVEX.B ignored + TEST64("\x62\xd2\xfe\x08\x39\xc1", "vpmovq2m k0, xmm9"); + TEST64("\x62\xb2\xfe\x08\x39\xc1", "vpmovq2m k0, xmm17"); + TEST64("\x62\x72\xfe\x08\x39\xc1", "UD"); // EVEX.R for mask is UD + + // VSIB encoding, test all combinations of EVEX.RXBR'V' once + TEST("\x62\xf2\x7d\x0a\xa2\xcc", "UD"); // Must have memory operand + TEST("\x62\xf2\x7d\x0a\xa2\x01", "UD"); // Must have SIB byte + TEST("\x62\xf2\x7d\x08\xa2\x0c\xe7", "UD"); // EVEX.aaa = 000 + TEST32("\x62\xf2\x7d\x0a\xa2\x0c\xe7", "vscatterdps dword ptr [edi+8*xmm4]{k2}, xmm1"); + TEST64("\x62\xf2\x7d\x0a\xa2\x0c\xe7", "vscatterdps dword ptr [rdi+8*xmm4]{k2}, xmm1"); + TEST32("\x62\xd2\x7d\x0a\xa2\x0c\xe7", "vscatterdps dword ptr [edi+8*xmm4]{k2}, xmm1"); + TEST64("\x62\xd2\x7d\x0a\xa2\x0c\xe7", "vscatterdps dword ptr [r15+8*xmm4]{k2}, xmm1"); + TEST64("\x62\xb2\x7d\x0a\xa2\x0c\xe7", "vscatterdps dword ptr [rdi+8*xmm12]{k2}, xmm1"); + TEST64("\x62\x92\x7d\x0a\xa2\x0c\xe7", "vscatterdps dword ptr [r15+8*xmm12]{k2}, xmm1"); + TEST32("\x62\xf2\x7d\x02\xa2\x0c\xe7", "UD"); // EVEX.V' == 0 + TEST64("\x62\xf2\x7d\x02\xa2\x0c\xe7", "vscatterdps dword ptr [rdi+8*xmm20]{k2}, xmm1"); + TEST32("\x62\xd2\x7d\x02\xa2\x0c\xe7", "UD"); // EVEX.V' == 0 + TEST64("\x62\xd2\x7d\x02\xa2\x0c\xe7", "vscatterdps dword ptr [r15+8*xmm20]{k2}, xmm1"); + TEST64("\x62\xb2\x7d\x02\xa2\x0c\xe7", "vscatterdps dword ptr [rdi+8*xmm28]{k2}, xmm1"); + TEST64("\x62\x92\x7d\x02\xa2\x0c\xe7", "vscatterdps dword ptr [r15+8*xmm28]{k2}, xmm1"); + TEST64("\x62\x72\x7d\x0a\xa2\x0c\xe7", "vscatterdps dword ptr [rdi+8*xmm4]{k2}, xmm9"); + TEST64("\x62\x52\x7d\x0a\xa2\x0c\xe7", "vscatterdps dword ptr [r15+8*xmm4]{k2}, xmm9"); + TEST64("\x62\x32\x7d\x0a\xa2\x0c\xe7", "vscatterdps dword ptr [rdi+8*xmm12]{k2}, xmm9"); + TEST64("\x62\x12\x7d\x0a\xa2\x0c\xe7", "vscatterdps dword ptr [r15+8*xmm12]{k2}, xmm9"); + TEST64("\x62\x72\x7d\x02\xa2\x0c\xe7", "vscatterdps dword ptr [rdi+8*xmm20]{k2}, xmm9"); + TEST64("\x62\x52\x7d\x02\xa2\x0c\xe7", "vscatterdps dword ptr [r15+8*xmm20]{k2}, xmm9"); + TEST64("\x62\x32\x7d\x02\xa2\x0c\xe7", "vscatterdps dword ptr [rdi+8*xmm28]{k2}, xmm9"); + TEST64("\x62\x12\x7d\x02\xa2\x0c\xe7", "vscatterdps dword ptr [r15+8*xmm28]{k2}, xmm9"); + TEST32("\x62\xe2\x7d\x0a\xa2\x0c\xe7", "vscatterdps dword ptr [edi+8*xmm4]{k2}, xmm1"); + TEST64("\x62\xe2\x7d\x0a\xa2\x0c\xe7", "vscatterdps dword ptr [rdi+8*xmm4]{k2}, xmm17"); + TEST32("\x62\xc2\x7d\x0a\xa2\x0c\xe7", "vscatterdps dword ptr [edi+8*xmm4]{k2}, xmm1"); + TEST64("\x62\xc2\x7d\x0a\xa2\x0c\xe7", "vscatterdps dword ptr [r15+8*xmm4]{k2}, xmm17"); + TEST64("\x62\xa2\x7d\x0a\xa2\x0c\xe7", "vscatterdps dword ptr [rdi+8*xmm12]{k2}, xmm17"); + TEST64("\x62\x82\x7d\x0a\xa2\x0c\xe7", "vscatterdps dword ptr [r15+8*xmm12]{k2}, xmm17"); + TEST32("\x62\xe2\x7d\x02\xa2\x0c\xe7", "UD"); // EVEX.V' == 0 + TEST64("\x62\xe2\x7d\x02\xa2\x0c\xe7", "vscatterdps dword ptr [rdi+8*xmm20]{k2}, xmm17"); + TEST32("\x62\xc2\x7d\x02\xa2\x0c\xe7", "UD"); // EVEX.V' == 0 + TEST64("\x62\xc2\x7d\x02\xa2\x0c\xe7", "vscatterdps dword ptr [r15+8*xmm20]{k2}, xmm17"); + TEST64("\x62\xa2\x7d\x02\xa2\x0c\xe7", "vscatterdps dword ptr [rdi+8*xmm28]{k2}, xmm17"); + TEST64("\x62\x82\x7d\x02\xa2\x0c\xe7", "vscatterdps dword ptr [r15+8*xmm28]{k2}, xmm17"); + TEST64("\x62\x62\x7d\x0a\xa2\x0c\xe7", "vscatterdps dword ptr [rdi+8*xmm4]{k2}, xmm25"); + TEST64("\x62\x42\x7d\x0a\xa2\x0c\xe7", "vscatterdps dword ptr [r15+8*xmm4]{k2}, xmm25"); + TEST64("\x62\x22\x7d\x0a\xa2\x0c\xe7", "vscatterdps dword ptr [rdi+8*xmm12]{k2}, xmm25"); + TEST64("\x62\x02\x7d\x0a\xa2\x0c\xe7", "vscatterdps dword ptr [r15+8*xmm12]{k2}, xmm25"); + TEST64("\x62\x62\x7d\x02\xa2\x0c\xe7", "vscatterdps dword ptr [rdi+8*xmm20]{k2}, xmm25"); + TEST64("\x62\x42\x7d\x02\xa2\x0c\xe7", "vscatterdps dword ptr [r15+8*xmm20]{k2}, xmm25"); + TEST64("\x62\x22\x7d\x02\xa2\x0c\xe7", "vscatterdps dword ptr [rdi+8*xmm28]{k2}, xmm25"); + TEST64("\x62\x02\x7d\x02\xa2\x0c\xe7", "vscatterdps dword ptr [r15+8*xmm28]{k2}, xmm25"); + TEST32("\x62\xf2\x7d\x2a\xa2\x0c\xe7", "vscatterdps dword ptr [edi+8*ymm4]{k2}, ymm1"); + TEST64("\x62\xf2\x7d\x2a\xa2\x0c\xe7", "vscatterdps dword ptr [rdi+8*ymm4]{k2}, ymm1"); + TEST32("\x62\xd2\x7d\x2a\xa2\x0c\xe7", "vscatterdps dword ptr [edi+8*ymm4]{k2}, ymm1"); + TEST64("\x62\xd2\x7d\x2a\xa2\x0c\xe7", "vscatterdps dword ptr [r15+8*ymm4]{k2}, ymm1"); + TEST64("\x62\xb2\x7d\x2a\xa2\x0c\xe7", "vscatterdps dword ptr [rdi+8*ymm12]{k2}, ymm1"); + TEST64("\x62\x92\x7d\x2a\xa2\x0c\xe7", "vscatterdps dword ptr [r15+8*ymm12]{k2}, ymm1"); + TEST32("\x62\xf2\x7d\x22\xa2\x0c\xe7", "UD"); // EVEX.V' == 0 + TEST64("\x62\xf2\x7d\x22\xa2\x0c\xe7", "vscatterdps dword ptr [rdi+8*ymm20]{k2}, ymm1"); + TEST32("\x62\xd2\x7d\x22\xa2\x0c\xe7", "UD"); // EVEX.V' == 0 + TEST64("\x62\xd2\x7d\x22\xa2\x0c\xe7", "vscatterdps dword ptr [r15+8*ymm20]{k2}, ymm1"); + TEST64("\x62\xb2\x7d\x22\xa2\x0c\xe7", "vscatterdps dword ptr [rdi+8*ymm28]{k2}, ymm1"); + TEST64("\x62\x92\x7d\x22\xa2\x0c\xe7", "vscatterdps dword ptr [r15+8*ymm28]{k2}, ymm1"); + TEST64("\x62\x72\x7d\x2a\xa2\x0c\xe7", "vscatterdps dword ptr [rdi+8*ymm4]{k2}, ymm9"); + TEST64("\x62\x52\x7d\x2a\xa2\x0c\xe7", "vscatterdps dword ptr [r15+8*ymm4]{k2}, ymm9"); + TEST64("\x62\x32\x7d\x2a\xa2\x0c\xe7", "vscatterdps dword ptr [rdi+8*ymm12]{k2}, ymm9"); + TEST64("\x62\x12\x7d\x2a\xa2\x0c\xe7", "vscatterdps dword ptr [r15+8*ymm12]{k2}, ymm9"); + TEST64("\x62\x72\x7d\x22\xa2\x0c\xe7", "vscatterdps dword ptr [rdi+8*ymm20]{k2}, ymm9"); + TEST64("\x62\x52\x7d\x22\xa2\x0c\xe7", "vscatterdps dword ptr [r15+8*ymm20]{k2}, ymm9"); + TEST64("\x62\x32\x7d\x22\xa2\x0c\xe7", "vscatterdps dword ptr [rdi+8*ymm28]{k2}, ymm9"); + TEST64("\x62\x12\x7d\x22\xa2\x0c\xe7", "vscatterdps dword ptr [r15+8*ymm28]{k2}, ymm9"); + TEST32("\x62\xe2\x7d\x2a\xa2\x0c\xe7", "vscatterdps dword ptr [edi+8*ymm4]{k2}, ymm1"); + TEST64("\x62\xe2\x7d\x2a\xa2\x0c\xe7", "vscatterdps dword ptr [rdi+8*ymm4]{k2}, ymm17"); + TEST32("\x62\xc2\x7d\x2a\xa2\x0c\xe7", "vscatterdps dword ptr [edi+8*ymm4]{k2}, ymm1"); + TEST64("\x62\xc2\x7d\x2a\xa2\x0c\xe7", "vscatterdps dword ptr [r15+8*ymm4]{k2}, ymm17"); + TEST64("\x62\xa2\x7d\x2a\xa2\x0c\xe7", "vscatterdps dword ptr [rdi+8*ymm12]{k2}, ymm17"); + TEST64("\x62\x82\x7d\x2a\xa2\x0c\xe7", "vscatterdps dword ptr [r15+8*ymm12]{k2}, ymm17"); + TEST32("\x62\xe2\x7d\x22\xa2\x0c\xe7", "UD"); // EVEX.V' == 0 + TEST64("\x62\xe2\x7d\x22\xa2\x0c\xe7", "vscatterdps dword ptr [rdi+8*ymm20]{k2}, ymm17"); + TEST32("\x62\xc2\x7d\x22\xa2\x0c\xe7", "UD"); // EVEX.V' == 0 + TEST64("\x62\xc2\x7d\x22\xa2\x0c\xe7", "vscatterdps dword ptr [r15+8*ymm20]{k2}, ymm17"); + TEST64("\x62\xa2\x7d\x22\xa2\x0c\xe7", "vscatterdps dword ptr [rdi+8*ymm28]{k2}, ymm17"); + TEST64("\x62\x82\x7d\x22\xa2\x0c\xe7", "vscatterdps dword ptr [r15+8*ymm28]{k2}, ymm17"); + TEST64("\x62\x62\x7d\x2a\xa2\x0c\xe7", "vscatterdps dword ptr [rdi+8*ymm4]{k2}, ymm25"); + TEST64("\x62\x42\x7d\x2a\xa2\x0c\xe7", "vscatterdps dword ptr [r15+8*ymm4]{k2}, ymm25"); + TEST64("\x62\x22\x7d\x2a\xa2\x0c\xe7", "vscatterdps dword ptr [rdi+8*ymm12]{k2}, ymm25"); + TEST64("\x62\x02\x7d\x2a\xa2\x0c\xe7", "vscatterdps dword ptr [r15+8*ymm12]{k2}, ymm25"); + TEST64("\x62\x62\x7d\x22\xa2\x0c\xe7", "vscatterdps dword ptr [rdi+8*ymm20]{k2}, ymm25"); + TEST64("\x62\x42\x7d\x22\xa2\x0c\xe7", "vscatterdps dword ptr [r15+8*ymm20]{k2}, ymm25"); + TEST64("\x62\x22\x7d\x22\xa2\x0c\xe7", "vscatterdps dword ptr [rdi+8*ymm28]{k2}, ymm25"); + TEST64("\x62\x02\x7d\x22\xa2\x0c\xe7", "vscatterdps dword ptr [r15+8*ymm28]{k2}, ymm25"); + TEST32("\x62\xf2\x7d\x4a\xa2\x0c\xe7", "vscatterdps dword ptr [edi+8*zmm4]{k2}, zmm1"); + TEST64("\x62\xf2\x7d\x4a\xa2\x0c\xe7", "vscatterdps dword ptr [rdi+8*zmm4]{k2}, zmm1"); + TEST32("\x62\xd2\x7d\x4a\xa2\x0c\xe7", "vscatterdps dword ptr [edi+8*zmm4]{k2}, zmm1"); + TEST64("\x62\xd2\x7d\x4a\xa2\x0c\xe7", "vscatterdps dword ptr [r15+8*zmm4]{k2}, zmm1"); + TEST64("\x62\xb2\x7d\x4a\xa2\x0c\xe7", "vscatterdps dword ptr [rdi+8*zmm12]{k2}, zmm1"); + TEST64("\x62\x92\x7d\x4a\xa2\x0c\xe7", "vscatterdps dword ptr [r15+8*zmm12]{k2}, zmm1"); + TEST32("\x62\xf2\x7d\x42\xa2\x0c\xe7", "UD"); // EVEX.V' == 0 + TEST64("\x62\xf2\x7d\x42\xa2\x0c\xe7", "vscatterdps dword ptr [rdi+8*zmm20]{k2}, zmm1"); + TEST32("\x62\xd2\x7d\x42\xa2\x0c\xe7", "UD"); // EVEX.V' == 0 + TEST64("\x62\xd2\x7d\x42\xa2\x0c\xe7", "vscatterdps dword ptr [r15+8*zmm20]{k2}, zmm1"); + TEST64("\x62\xb2\x7d\x42\xa2\x0c\xe7", "vscatterdps dword ptr [rdi+8*zmm28]{k2}, zmm1"); + TEST64("\x62\x92\x7d\x42\xa2\x0c\xe7", "vscatterdps dword ptr [r15+8*zmm28]{k2}, zmm1"); + TEST64("\x62\x72\x7d\x4a\xa2\x0c\xe7", "vscatterdps dword ptr [rdi+8*zmm4]{k2}, zmm9"); + TEST64("\x62\x52\x7d\x4a\xa2\x0c\xe7", "vscatterdps dword ptr [r15+8*zmm4]{k2}, zmm9"); + TEST64("\x62\x32\x7d\x4a\xa2\x0c\xe7", "vscatterdps dword ptr [rdi+8*zmm12]{k2}, zmm9"); + TEST64("\x62\x12\x7d\x4a\xa2\x0c\xe7", "vscatterdps dword ptr [r15+8*zmm12]{k2}, zmm9"); + TEST64("\x62\x72\x7d\x42\xa2\x0c\xe7", "vscatterdps dword ptr [rdi+8*zmm20]{k2}, zmm9"); + TEST64("\x62\x52\x7d\x42\xa2\x0c\xe7", "vscatterdps dword ptr [r15+8*zmm20]{k2}, zmm9"); + TEST64("\x62\x32\x7d\x42\xa2\x0c\xe7", "vscatterdps dword ptr [rdi+8*zmm28]{k2}, zmm9"); + TEST64("\x62\x12\x7d\x42\xa2\x0c\xe7", "vscatterdps dword ptr [r15+8*zmm28]{k2}, zmm9"); + TEST32("\x62\xe2\x7d\x4a\xa2\x0c\xe7", "vscatterdps dword ptr [edi+8*zmm4]{k2}, zmm1"); + TEST64("\x62\xe2\x7d\x4a\xa2\x0c\xe7", "vscatterdps dword ptr [rdi+8*zmm4]{k2}, zmm17"); + TEST32("\x62\xc2\x7d\x4a\xa2\x0c\xe7", "vscatterdps dword ptr [edi+8*zmm4]{k2}, zmm1"); + TEST64("\x62\xc2\x7d\x4a\xa2\x0c\xe7", "vscatterdps dword ptr [r15+8*zmm4]{k2}, zmm17"); + TEST64("\x62\xa2\x7d\x4a\xa2\x0c\xe7", "vscatterdps dword ptr [rdi+8*zmm12]{k2}, zmm17"); + TEST64("\x62\x82\x7d\x4a\xa2\x0c\xe7", "vscatterdps dword ptr [r15+8*zmm12]{k2}, zmm17"); + TEST32("\x62\xe2\x7d\x42\xa2\x0c\xe7", "UD"); // EVEX.V' == 0 + TEST64("\x62\xe2\x7d\x42\xa2\x0c\xe7", "vscatterdps dword ptr [rdi+8*zmm20]{k2}, zmm17"); + TEST32("\x62\xc2\x7d\x42\xa2\x0c\xe7", "UD"); // EVEX.V' == 0 + TEST64("\x62\xc2\x7d\x42\xa2\x0c\xe7", "vscatterdps dword ptr [r15+8*zmm20]{k2}, zmm17"); + TEST64("\x62\xa2\x7d\x42\xa2\x0c\xe7", "vscatterdps dword ptr [rdi+8*zmm28]{k2}, zmm17"); + TEST64("\x62\x82\x7d\x42\xa2\x0c\xe7", "vscatterdps dword ptr [r15+8*zmm28]{k2}, zmm17"); + TEST64("\x62\x62\x7d\x4a\xa2\x0c\xe7", "vscatterdps dword ptr [rdi+8*zmm4]{k2}, zmm25"); + TEST64("\x62\x42\x7d\x4a\xa2\x0c\xe7", "vscatterdps dword ptr [r15+8*zmm4]{k2}, zmm25"); + TEST64("\x62\x22\x7d\x4a\xa2\x0c\xe7", "vscatterdps dword ptr [rdi+8*zmm12]{k2}, zmm25"); + TEST64("\x62\x02\x7d\x4a\xa2\x0c\xe7", "vscatterdps dword ptr [r15+8*zmm12]{k2}, zmm25"); + TEST64("\x62\x62\x7d\x42\xa2\x0c\xe7", "vscatterdps dword ptr [rdi+8*zmm20]{k2}, zmm25"); + TEST64("\x62\x42\x7d\x42\xa2\x0c\xe7", "vscatterdps dword ptr [r15+8*zmm20]{k2}, zmm25"); + TEST64("\x62\x22\x7d\x42\xa2\x0c\xe7", "vscatterdps dword ptr [rdi+8*zmm28]{k2}, zmm25"); + TEST64("\x62\x02\x7d\x42\xa2\x0c\xe7", "vscatterdps dword ptr [r15+8*zmm28]{k2}, zmm25"); + TEST32("\x62\xf2\x7d\x0a\xa3\x0c\xe7", "vscatterqps dword ptr [edi+8*xmm4]{k2}, xmm1"); + TEST64("\x62\xf2\x7d\x0a\xa3\x0c\xe7", "vscatterqps dword ptr [rdi+8*xmm4]{k2}, xmm1"); + TEST32("\x62\xd2\x7d\x0a\xa3\x0c\xe7", "vscatterqps dword ptr [edi+8*xmm4]{k2}, xmm1"); + TEST64("\x62\xd2\x7d\x0a\xa3\x0c\xe7", "vscatterqps dword ptr [r15+8*xmm4]{k2}, xmm1"); + TEST64("\x62\xb2\x7d\x0a\xa3\x0c\xe7", "vscatterqps dword ptr [rdi+8*xmm12]{k2}, xmm1"); + TEST64("\x62\x92\x7d\x0a\xa3\x0c\xe7", "vscatterqps dword ptr [r15+8*xmm12]{k2}, xmm1"); + TEST32("\x62\xf2\x7d\x02\xa3\x0c\xe7", "UD"); // EVEX.V' == 0 + TEST64("\x62\xf2\x7d\x02\xa3\x0c\xe7", "vscatterqps dword ptr [rdi+8*xmm20]{k2}, xmm1"); + TEST32("\x62\xd2\x7d\x02\xa3\x0c\xe7", "UD"); // EVEX.V' == 0 + TEST64("\x62\xd2\x7d\x02\xa3\x0c\xe7", "vscatterqps dword ptr [r15+8*xmm20]{k2}, xmm1"); + TEST64("\x62\xb2\x7d\x02\xa3\x0c\xe7", "vscatterqps dword ptr [rdi+8*xmm28]{k2}, xmm1"); + TEST64("\x62\x92\x7d\x02\xa3\x0c\xe7", "vscatterqps dword ptr [r15+8*xmm28]{k2}, xmm1"); + TEST64("\x62\x72\x7d\x0a\xa3\x0c\xe7", "vscatterqps dword ptr [rdi+8*xmm4]{k2}, xmm9"); + TEST64("\x62\x52\x7d\x0a\xa3\x0c\xe7", "vscatterqps dword ptr [r15+8*xmm4]{k2}, xmm9"); + TEST64("\x62\x32\x7d\x0a\xa3\x0c\xe7", "vscatterqps dword ptr [rdi+8*xmm12]{k2}, xmm9"); + TEST64("\x62\x12\x7d\x0a\xa3\x0c\xe7", "vscatterqps dword ptr [r15+8*xmm12]{k2}, xmm9"); + TEST64("\x62\x72\x7d\x02\xa3\x0c\xe7", "vscatterqps dword ptr [rdi+8*xmm20]{k2}, xmm9"); + TEST64("\x62\x52\x7d\x02\xa3\x0c\xe7", "vscatterqps dword ptr [r15+8*xmm20]{k2}, xmm9"); + TEST64("\x62\x32\x7d\x02\xa3\x0c\xe7", "vscatterqps dword ptr [rdi+8*xmm28]{k2}, xmm9"); + TEST64("\x62\x12\x7d\x02\xa3\x0c\xe7", "vscatterqps dword ptr [r15+8*xmm28]{k2}, xmm9"); + TEST32("\x62\xe2\x7d\x0a\xa3\x0c\xe7", "vscatterqps dword ptr [edi+8*xmm4]{k2}, xmm1"); + TEST64("\x62\xe2\x7d\x0a\xa3\x0c\xe7", "vscatterqps dword ptr [rdi+8*xmm4]{k2}, xmm17"); + TEST32("\x62\xc2\x7d\x0a\xa3\x0c\xe7", "vscatterqps dword ptr [edi+8*xmm4]{k2}, xmm1"); + TEST64("\x62\xc2\x7d\x0a\xa3\x0c\xe7", "vscatterqps dword ptr [r15+8*xmm4]{k2}, xmm17"); + TEST64("\x62\xa2\x7d\x0a\xa3\x0c\xe7", "vscatterqps dword ptr [rdi+8*xmm12]{k2}, xmm17"); + TEST64("\x62\x82\x7d\x0a\xa3\x0c\xe7", "vscatterqps dword ptr [r15+8*xmm12]{k2}, xmm17"); + TEST32("\x62\xe2\x7d\x02\xa3\x0c\xe7", "UD"); // EVEX.V' == 0 + TEST64("\x62\xe2\x7d\x02\xa3\x0c\xe7", "vscatterqps dword ptr [rdi+8*xmm20]{k2}, xmm17"); + TEST32("\x62\xc2\x7d\x02\xa3\x0c\xe7", "UD"); // EVEX.V' == 0 + TEST64("\x62\xc2\x7d\x02\xa3\x0c\xe7", "vscatterqps dword ptr [r15+8*xmm20]{k2}, xmm17"); + TEST64("\x62\xa2\x7d\x02\xa3\x0c\xe7", "vscatterqps dword ptr [rdi+8*xmm28]{k2}, xmm17"); + TEST64("\x62\x82\x7d\x02\xa3\x0c\xe7", "vscatterqps dword ptr [r15+8*xmm28]{k2}, xmm17"); + TEST64("\x62\x62\x7d\x0a\xa3\x0c\xe7", "vscatterqps dword ptr [rdi+8*xmm4]{k2}, xmm25"); + TEST64("\x62\x42\x7d\x0a\xa3\x0c\xe7", "vscatterqps dword ptr [r15+8*xmm4]{k2}, xmm25"); + TEST64("\x62\x22\x7d\x0a\xa3\x0c\xe7", "vscatterqps dword ptr [rdi+8*xmm12]{k2}, xmm25"); + TEST64("\x62\x02\x7d\x0a\xa3\x0c\xe7", "vscatterqps dword ptr [r15+8*xmm12]{k2}, xmm25"); + TEST64("\x62\x62\x7d\x02\xa3\x0c\xe7", "vscatterqps dword ptr [rdi+8*xmm20]{k2}, xmm25"); + TEST64("\x62\x42\x7d\x02\xa3\x0c\xe7", "vscatterqps dword ptr [r15+8*xmm20]{k2}, xmm25"); + TEST64("\x62\x22\x7d\x02\xa3\x0c\xe7", "vscatterqps dword ptr [rdi+8*xmm28]{k2}, xmm25"); + TEST64("\x62\x02\x7d\x02\xa3\x0c\xe7", "vscatterqps dword ptr [r15+8*xmm28]{k2}, xmm25"); + TEST32("\x62\xf2\x7d\x2a\xa3\x0c\xe7", "vscatterqps dword ptr [edi+8*ymm4]{k2}, xmm1"); + TEST64("\x62\xf2\x7d\x2a\xa3\x0c\xe7", "vscatterqps dword ptr [rdi+8*ymm4]{k2}, xmm1"); + TEST32("\x62\xd2\x7d\x2a\xa3\x0c\xe7", "vscatterqps dword ptr [edi+8*ymm4]{k2}, xmm1"); + TEST64("\x62\xd2\x7d\x2a\xa3\x0c\xe7", "vscatterqps dword ptr [r15+8*ymm4]{k2}, xmm1"); + TEST64("\x62\xb2\x7d\x2a\xa3\x0c\xe7", "vscatterqps dword ptr [rdi+8*ymm12]{k2}, xmm1"); + TEST64("\x62\x92\x7d\x2a\xa3\x0c\xe7", "vscatterqps dword ptr [r15+8*ymm12]{k2}, xmm1"); + TEST32("\x62\xf2\x7d\x22\xa3\x0c\xe7", "UD"); // EVEX.V' == 0 + TEST64("\x62\xf2\x7d\x22\xa3\x0c\xe7", "vscatterqps dword ptr [rdi+8*ymm20]{k2}, xmm1"); + TEST32("\x62\xd2\x7d\x22\xa3\x0c\xe7", "UD"); // EVEX.V' == 0 + TEST64("\x62\xd2\x7d\x22\xa3\x0c\xe7", "vscatterqps dword ptr [r15+8*ymm20]{k2}, xmm1"); + TEST64("\x62\xb2\x7d\x22\xa3\x0c\xe7", "vscatterqps dword ptr [rdi+8*ymm28]{k2}, xmm1"); + TEST64("\x62\x92\x7d\x22\xa3\x0c\xe7", "vscatterqps dword ptr [r15+8*ymm28]{k2}, xmm1"); + TEST64("\x62\x72\x7d\x2a\xa3\x0c\xe7", "vscatterqps dword ptr [rdi+8*ymm4]{k2}, xmm9"); + TEST64("\x62\x52\x7d\x2a\xa3\x0c\xe7", "vscatterqps dword ptr [r15+8*ymm4]{k2}, xmm9"); + TEST64("\x62\x32\x7d\x2a\xa3\x0c\xe7", "vscatterqps dword ptr [rdi+8*ymm12]{k2}, xmm9"); + TEST64("\x62\x12\x7d\x2a\xa3\x0c\xe7", "vscatterqps dword ptr [r15+8*ymm12]{k2}, xmm9"); + TEST64("\x62\x72\x7d\x22\xa3\x0c\xe7", "vscatterqps dword ptr [rdi+8*ymm20]{k2}, xmm9"); + TEST64("\x62\x52\x7d\x22\xa3\x0c\xe7", "vscatterqps dword ptr [r15+8*ymm20]{k2}, xmm9"); + TEST64("\x62\x32\x7d\x22\xa3\x0c\xe7", "vscatterqps dword ptr [rdi+8*ymm28]{k2}, xmm9"); + TEST64("\x62\x12\x7d\x22\xa3\x0c\xe7", "vscatterqps dword ptr [r15+8*ymm28]{k2}, xmm9"); + TEST32("\x62\xe2\x7d\x2a\xa3\x0c\xe7", "vscatterqps dword ptr [edi+8*ymm4]{k2}, xmm1"); + TEST64("\x62\xe2\x7d\x2a\xa3\x0c\xe7", "vscatterqps dword ptr [rdi+8*ymm4]{k2}, xmm17"); + TEST32("\x62\xc2\x7d\x2a\xa3\x0c\xe7", "vscatterqps dword ptr [edi+8*ymm4]{k2}, xmm1"); + TEST64("\x62\xc2\x7d\x2a\xa3\x0c\xe7", "vscatterqps dword ptr [r15+8*ymm4]{k2}, xmm17"); + TEST64("\x62\xa2\x7d\x2a\xa3\x0c\xe7", "vscatterqps dword ptr [rdi+8*ymm12]{k2}, xmm17"); + TEST64("\x62\x82\x7d\x2a\xa3\x0c\xe7", "vscatterqps dword ptr [r15+8*ymm12]{k2}, xmm17"); + TEST32("\x62\xe2\x7d\x22\xa3\x0c\xe7", "UD"); // EVEX.V' == 0 + TEST64("\x62\xe2\x7d\x22\xa3\x0c\xe7", "vscatterqps dword ptr [rdi+8*ymm20]{k2}, xmm17"); + TEST32("\x62\xc2\x7d\x22\xa3\x0c\xe7", "UD"); // EVEX.V' == 0 + TEST64("\x62\xc2\x7d\x22\xa3\x0c\xe7", "vscatterqps dword ptr [r15+8*ymm20]{k2}, xmm17"); + TEST64("\x62\xa2\x7d\x22\xa3\x0c\xe7", "vscatterqps dword ptr [rdi+8*ymm28]{k2}, xmm17"); + TEST64("\x62\x82\x7d\x22\xa3\x0c\xe7", "vscatterqps dword ptr [r15+8*ymm28]{k2}, xmm17"); + TEST64("\x62\x62\x7d\x2a\xa3\x0c\xe7", "vscatterqps dword ptr [rdi+8*ymm4]{k2}, xmm25"); + TEST64("\x62\x42\x7d\x2a\xa3\x0c\xe7", "vscatterqps dword ptr [r15+8*ymm4]{k2}, xmm25"); + TEST64("\x62\x22\x7d\x2a\xa3\x0c\xe7", "vscatterqps dword ptr [rdi+8*ymm12]{k2}, xmm25"); + TEST64("\x62\x02\x7d\x2a\xa3\x0c\xe7", "vscatterqps dword ptr [r15+8*ymm12]{k2}, xmm25"); + TEST64("\x62\x62\x7d\x22\xa3\x0c\xe7", "vscatterqps dword ptr [rdi+8*ymm20]{k2}, xmm25"); + TEST64("\x62\x42\x7d\x22\xa3\x0c\xe7", "vscatterqps dword ptr [r15+8*ymm20]{k2}, xmm25"); + TEST64("\x62\x22\x7d\x22\xa3\x0c\xe7", "vscatterqps dword ptr [rdi+8*ymm28]{k2}, xmm25"); + TEST64("\x62\x02\x7d\x22\xa3\x0c\xe7", "vscatterqps dword ptr [r15+8*ymm28]{k2}, xmm25"); + TEST32("\x62\xf2\x7d\x4a\xa3\x0c\xe7", "vscatterqps dword ptr [edi+8*zmm4]{k2}, ymm1"); + TEST64("\x62\xf2\x7d\x4a\xa3\x0c\xe7", "vscatterqps dword ptr [rdi+8*zmm4]{k2}, ymm1"); + TEST32("\x62\xd2\x7d\x4a\xa3\x0c\xe7", "vscatterqps dword ptr [edi+8*zmm4]{k2}, ymm1"); + TEST64("\x62\xd2\x7d\x4a\xa3\x0c\xe7", "vscatterqps dword ptr [r15+8*zmm4]{k2}, ymm1"); + TEST64("\x62\xb2\x7d\x4a\xa3\x0c\xe7", "vscatterqps dword ptr [rdi+8*zmm12]{k2}, ymm1"); + TEST64("\x62\x92\x7d\x4a\xa3\x0c\xe7", "vscatterqps dword ptr [r15+8*zmm12]{k2}, ymm1"); + TEST32("\x62\xf2\x7d\x42\xa3\x0c\xe7", "UD"); // EVEX.V' == 0 + TEST64("\x62\xf2\x7d\x42\xa3\x0c\xe7", "vscatterqps dword ptr [rdi+8*zmm20]{k2}, ymm1"); + TEST32("\x62\xd2\x7d\x42\xa3\x0c\xe7", "UD"); // EVEX.V' == 0 + TEST64("\x62\xd2\x7d\x42\xa3\x0c\xe7", "vscatterqps dword ptr [r15+8*zmm20]{k2}, ymm1"); + TEST64("\x62\xb2\x7d\x42\xa3\x0c\xe7", "vscatterqps dword ptr [rdi+8*zmm28]{k2}, ymm1"); + TEST64("\x62\x92\x7d\x42\xa3\x0c\xe7", "vscatterqps dword ptr [r15+8*zmm28]{k2}, ymm1"); + TEST64("\x62\x72\x7d\x4a\xa3\x0c\xe7", "vscatterqps dword ptr [rdi+8*zmm4]{k2}, ymm9"); + TEST64("\x62\x52\x7d\x4a\xa3\x0c\xe7", "vscatterqps dword ptr [r15+8*zmm4]{k2}, ymm9"); + TEST64("\x62\x32\x7d\x4a\xa3\x0c\xe7", "vscatterqps dword ptr [rdi+8*zmm12]{k2}, ymm9"); + TEST64("\x62\x12\x7d\x4a\xa3\x0c\xe7", "vscatterqps dword ptr [r15+8*zmm12]{k2}, ymm9"); + TEST64("\x62\x72\x7d\x42\xa3\x0c\xe7", "vscatterqps dword ptr [rdi+8*zmm20]{k2}, ymm9"); + TEST64("\x62\x52\x7d\x42\xa3\x0c\xe7", "vscatterqps dword ptr [r15+8*zmm20]{k2}, ymm9"); + TEST64("\x62\x32\x7d\x42\xa3\x0c\xe7", "vscatterqps dword ptr [rdi+8*zmm28]{k2}, ymm9"); + TEST64("\x62\x12\x7d\x42\xa3\x0c\xe7", "vscatterqps dword ptr [r15+8*zmm28]{k2}, ymm9"); + TEST32("\x62\xe2\x7d\x4a\xa3\x0c\xe7", "vscatterqps dword ptr [edi+8*zmm4]{k2}, ymm1"); + TEST64("\x62\xe2\x7d\x4a\xa3\x0c\xe7", "vscatterqps dword ptr [rdi+8*zmm4]{k2}, ymm17"); + TEST32("\x62\xc2\x7d\x4a\xa3\x0c\xe7", "vscatterqps dword ptr [edi+8*zmm4]{k2}, ymm1"); + TEST64("\x62\xc2\x7d\x4a\xa3\x0c\xe7", "vscatterqps dword ptr [r15+8*zmm4]{k2}, ymm17"); + TEST64("\x62\xa2\x7d\x4a\xa3\x0c\xe7", "vscatterqps dword ptr [rdi+8*zmm12]{k2}, ymm17"); + TEST64("\x62\x82\x7d\x4a\xa3\x0c\xe7", "vscatterqps dword ptr [r15+8*zmm12]{k2}, ymm17"); + TEST32("\x62\xe2\x7d\x42\xa3\x0c\xe7", "UD"); // EVEX.V' == 0 + TEST64("\x62\xe2\x7d\x42\xa3\x0c\xe7", "vscatterqps dword ptr [rdi+8*zmm20]{k2}, ymm17"); + TEST32("\x62\xc2\x7d\x42\xa3\x0c\xe7", "UD"); // EVEX.V' == 0 + TEST64("\x62\xc2\x7d\x42\xa3\x0c\xe7", "vscatterqps dword ptr [r15+8*zmm20]{k2}, ymm17"); + TEST64("\x62\xa2\x7d\x42\xa3\x0c\xe7", "vscatterqps dword ptr [rdi+8*zmm28]{k2}, ymm17"); + TEST64("\x62\x82\x7d\x42\xa3\x0c\xe7", "vscatterqps dword ptr [r15+8*zmm28]{k2}, ymm17"); + TEST64("\x62\x62\x7d\x4a\xa3\x0c\xe7", "vscatterqps dword ptr [rdi+8*zmm4]{k2}, ymm25"); + TEST64("\x62\x42\x7d\x4a\xa3\x0c\xe7", "vscatterqps dword ptr [r15+8*zmm4]{k2}, ymm25"); + TEST64("\x62\x22\x7d\x4a\xa3\x0c\xe7", "vscatterqps dword ptr [rdi+8*zmm12]{k2}, ymm25"); + TEST64("\x62\x02\x7d\x4a\xa3\x0c\xe7", "vscatterqps dword ptr [r15+8*zmm12]{k2}, ymm25"); + TEST64("\x62\x62\x7d\x42\xa3\x0c\xe7", "vscatterqps dword ptr [rdi+8*zmm20]{k2}, ymm25"); + TEST64("\x62\x42\x7d\x42\xa3\x0c\xe7", "vscatterqps dword ptr [r15+8*zmm20]{k2}, ymm25"); + TEST64("\x62\x22\x7d\x42\xa3\x0c\xe7", "vscatterqps dword ptr [rdi+8*zmm28]{k2}, ymm25"); + TEST64("\x62\x02\x7d\x42\xa3\x0c\xe7", "vscatterqps dword ptr [r15+8*zmm28]{k2}, ymm25"); + TEST32("\x62\xf2\xfd\x0a\xa2\x0c\xe7", "vscatterdpd qword ptr [edi+8*xmm4]{k2}, xmm1"); + TEST64("\x62\xf2\xfd\x0a\xa2\x0c\xe7", "vscatterdpd qword ptr [rdi+8*xmm4]{k2}, xmm1"); + TEST32("\x62\xd2\xfd\x0a\xa2\x0c\xe7", "vscatterdpd qword ptr [edi+8*xmm4]{k2}, xmm1"); + TEST64("\x62\xd2\xfd\x0a\xa2\x0c\xe7", "vscatterdpd qword ptr [r15+8*xmm4]{k2}, xmm1"); + TEST64("\x62\xb2\xfd\x0a\xa2\x0c\xe7", "vscatterdpd qword ptr [rdi+8*xmm12]{k2}, xmm1"); + TEST64("\x62\x92\xfd\x0a\xa2\x0c\xe7", "vscatterdpd qword ptr [r15+8*xmm12]{k2}, xmm1"); + TEST32("\x62\xf2\xfd\x02\xa2\x0c\xe7", "UD"); // EVEX.V' == 0 + TEST64("\x62\xf2\xfd\x02\xa2\x0c\xe7", "vscatterdpd qword ptr [rdi+8*xmm20]{k2}, xmm1"); + TEST32("\x62\xd2\xfd\x02\xa2\x0c\xe7", "UD"); // EVEX.V' == 0 + TEST64("\x62\xd2\xfd\x02\xa2\x0c\xe7", "vscatterdpd qword ptr [r15+8*xmm20]{k2}, xmm1"); + TEST64("\x62\xb2\xfd\x02\xa2\x0c\xe7", "vscatterdpd qword ptr [rdi+8*xmm28]{k2}, xmm1"); + TEST64("\x62\x92\xfd\x02\xa2\x0c\xe7", "vscatterdpd qword ptr [r15+8*xmm28]{k2}, xmm1"); + TEST64("\x62\x72\xfd\x0a\xa2\x0c\xe7", "vscatterdpd qword ptr [rdi+8*xmm4]{k2}, xmm9"); + TEST64("\x62\x52\xfd\x0a\xa2\x0c\xe7", "vscatterdpd qword ptr [r15+8*xmm4]{k2}, xmm9"); + TEST64("\x62\x32\xfd\x0a\xa2\x0c\xe7", "vscatterdpd qword ptr [rdi+8*xmm12]{k2}, xmm9"); + TEST64("\x62\x12\xfd\x0a\xa2\x0c\xe7", "vscatterdpd qword ptr [r15+8*xmm12]{k2}, xmm9"); + TEST64("\x62\x72\xfd\x02\xa2\x0c\xe7", "vscatterdpd qword ptr [rdi+8*xmm20]{k2}, xmm9"); + TEST64("\x62\x52\xfd\x02\xa2\x0c\xe7", "vscatterdpd qword ptr [r15+8*xmm20]{k2}, xmm9"); + TEST64("\x62\x32\xfd\x02\xa2\x0c\xe7", "vscatterdpd qword ptr [rdi+8*xmm28]{k2}, xmm9"); + TEST64("\x62\x12\xfd\x02\xa2\x0c\xe7", "vscatterdpd qword ptr [r15+8*xmm28]{k2}, xmm9"); + TEST32("\x62\xe2\xfd\x0a\xa2\x0c\xe7", "vscatterdpd qword ptr [edi+8*xmm4]{k2}, xmm1"); + TEST64("\x62\xe2\xfd\x0a\xa2\x0c\xe7", "vscatterdpd qword ptr [rdi+8*xmm4]{k2}, xmm17"); + TEST32("\x62\xc2\xfd\x0a\xa2\x0c\xe7", "vscatterdpd qword ptr [edi+8*xmm4]{k2}, xmm1"); + TEST64("\x62\xc2\xfd\x0a\xa2\x0c\xe7", "vscatterdpd qword ptr [r15+8*xmm4]{k2}, xmm17"); + TEST64("\x62\xa2\xfd\x0a\xa2\x0c\xe7", "vscatterdpd qword ptr [rdi+8*xmm12]{k2}, xmm17"); + TEST64("\x62\x82\xfd\x0a\xa2\x0c\xe7", "vscatterdpd qword ptr [r15+8*xmm12]{k2}, xmm17"); + TEST32("\x62\xe2\xfd\x02\xa2\x0c\xe7", "UD"); // EVEX.V' == 0 + TEST64("\x62\xe2\xfd\x02\xa2\x0c\xe7", "vscatterdpd qword ptr [rdi+8*xmm20]{k2}, xmm17"); + TEST32("\x62\xc2\xfd\x02\xa2\x0c\xe7", "UD"); // EVEX.V' == 0 + TEST64("\x62\xc2\xfd\x02\xa2\x0c\xe7", "vscatterdpd qword ptr [r15+8*xmm20]{k2}, xmm17"); + TEST64("\x62\xa2\xfd\x02\xa2\x0c\xe7", "vscatterdpd qword ptr [rdi+8*xmm28]{k2}, xmm17"); + TEST64("\x62\x82\xfd\x02\xa2\x0c\xe7", "vscatterdpd qword ptr [r15+8*xmm28]{k2}, xmm17"); + TEST64("\x62\x62\xfd\x0a\xa2\x0c\xe7", "vscatterdpd qword ptr [rdi+8*xmm4]{k2}, xmm25"); + TEST64("\x62\x42\xfd\x0a\xa2\x0c\xe7", "vscatterdpd qword ptr [r15+8*xmm4]{k2}, xmm25"); + TEST64("\x62\x22\xfd\x0a\xa2\x0c\xe7", "vscatterdpd qword ptr [rdi+8*xmm12]{k2}, xmm25"); + TEST64("\x62\x02\xfd\x0a\xa2\x0c\xe7", "vscatterdpd qword ptr [r15+8*xmm12]{k2}, xmm25"); + TEST64("\x62\x62\xfd\x02\xa2\x0c\xe7", "vscatterdpd qword ptr [rdi+8*xmm20]{k2}, xmm25"); + TEST64("\x62\x42\xfd\x02\xa2\x0c\xe7", "vscatterdpd qword ptr [r15+8*xmm20]{k2}, xmm25"); + TEST64("\x62\x22\xfd\x02\xa2\x0c\xe7", "vscatterdpd qword ptr [rdi+8*xmm28]{k2}, xmm25"); + TEST64("\x62\x02\xfd\x02\xa2\x0c\xe7", "vscatterdpd qword ptr [r15+8*xmm28]{k2}, xmm25"); + TEST32("\x62\xf2\xfd\x2a\xa2\x0c\xe7", "vscatterdpd qword ptr [edi+8*xmm4]{k2}, ymm1"); + TEST64("\x62\xf2\xfd\x2a\xa2\x0c\xe7", "vscatterdpd qword ptr [rdi+8*xmm4]{k2}, ymm1"); + TEST32("\x62\xd2\xfd\x2a\xa2\x0c\xe7", "vscatterdpd qword ptr [edi+8*xmm4]{k2}, ymm1"); + TEST64("\x62\xd2\xfd\x2a\xa2\x0c\xe7", "vscatterdpd qword ptr [r15+8*xmm4]{k2}, ymm1"); + TEST64("\x62\xb2\xfd\x2a\xa2\x0c\xe7", "vscatterdpd qword ptr [rdi+8*xmm12]{k2}, ymm1"); + TEST64("\x62\x92\xfd\x2a\xa2\x0c\xe7", "vscatterdpd qword ptr [r15+8*xmm12]{k2}, ymm1"); + TEST32("\x62\xf2\xfd\x22\xa2\x0c\xe7", "UD"); // EVEX.V' == 0 + TEST64("\x62\xf2\xfd\x22\xa2\x0c\xe7", "vscatterdpd qword ptr [rdi+8*xmm20]{k2}, ymm1"); + TEST32("\x62\xd2\xfd\x22\xa2\x0c\xe7", "UD"); // EVEX.V' == 0 + TEST64("\x62\xd2\xfd\x22\xa2\x0c\xe7", "vscatterdpd qword ptr [r15+8*xmm20]{k2}, ymm1"); + TEST64("\x62\xb2\xfd\x22\xa2\x0c\xe7", "vscatterdpd qword ptr [rdi+8*xmm28]{k2}, ymm1"); + TEST64("\x62\x92\xfd\x22\xa2\x0c\xe7", "vscatterdpd qword ptr [r15+8*xmm28]{k2}, ymm1"); + TEST64("\x62\x72\xfd\x2a\xa2\x0c\xe7", "vscatterdpd qword ptr [rdi+8*xmm4]{k2}, ymm9"); + TEST64("\x62\x52\xfd\x2a\xa2\x0c\xe7", "vscatterdpd qword ptr [r15+8*xmm4]{k2}, ymm9"); + TEST64("\x62\x32\xfd\x2a\xa2\x0c\xe7", "vscatterdpd qword ptr [rdi+8*xmm12]{k2}, ymm9"); + TEST64("\x62\x12\xfd\x2a\xa2\x0c\xe7", "vscatterdpd qword ptr [r15+8*xmm12]{k2}, ymm9"); + TEST64("\x62\x72\xfd\x22\xa2\x0c\xe7", "vscatterdpd qword ptr [rdi+8*xmm20]{k2}, ymm9"); + TEST64("\x62\x52\xfd\x22\xa2\x0c\xe7", "vscatterdpd qword ptr [r15+8*xmm20]{k2}, ymm9"); + TEST64("\x62\x32\xfd\x22\xa2\x0c\xe7", "vscatterdpd qword ptr [rdi+8*xmm28]{k2}, ymm9"); + TEST64("\x62\x12\xfd\x22\xa2\x0c\xe7", "vscatterdpd qword ptr [r15+8*xmm28]{k2}, ymm9"); + TEST32("\x62\xe2\xfd\x2a\xa2\x0c\xe7", "vscatterdpd qword ptr [edi+8*xmm4]{k2}, ymm1"); + TEST64("\x62\xe2\xfd\x2a\xa2\x0c\xe7", "vscatterdpd qword ptr [rdi+8*xmm4]{k2}, ymm17"); + TEST32("\x62\xc2\xfd\x2a\xa2\x0c\xe7", "vscatterdpd qword ptr [edi+8*xmm4]{k2}, ymm1"); + TEST64("\x62\xc2\xfd\x2a\xa2\x0c\xe7", "vscatterdpd qword ptr [r15+8*xmm4]{k2}, ymm17"); + TEST64("\x62\xa2\xfd\x2a\xa2\x0c\xe7", "vscatterdpd qword ptr [rdi+8*xmm12]{k2}, ymm17"); + TEST64("\x62\x82\xfd\x2a\xa2\x0c\xe7", "vscatterdpd qword ptr [r15+8*xmm12]{k2}, ymm17"); + TEST32("\x62\xe2\xfd\x22\xa2\x0c\xe7", "UD"); // EVEX.V' == 0 + TEST64("\x62\xe2\xfd\x22\xa2\x0c\xe7", "vscatterdpd qword ptr [rdi+8*xmm20]{k2}, ymm17"); + TEST32("\x62\xc2\xfd\x22\xa2\x0c\xe7", "UD"); // EVEX.V' == 0 + TEST64("\x62\xc2\xfd\x22\xa2\x0c\xe7", "vscatterdpd qword ptr [r15+8*xmm20]{k2}, ymm17"); + TEST64("\x62\xa2\xfd\x22\xa2\x0c\xe7", "vscatterdpd qword ptr [rdi+8*xmm28]{k2}, ymm17"); + TEST64("\x62\x82\xfd\x22\xa2\x0c\xe7", "vscatterdpd qword ptr [r15+8*xmm28]{k2}, ymm17"); + TEST64("\x62\x62\xfd\x2a\xa2\x0c\xe7", "vscatterdpd qword ptr [rdi+8*xmm4]{k2}, ymm25"); + TEST64("\x62\x42\xfd\x2a\xa2\x0c\xe7", "vscatterdpd qword ptr [r15+8*xmm4]{k2}, ymm25"); + TEST64("\x62\x22\xfd\x2a\xa2\x0c\xe7", "vscatterdpd qword ptr [rdi+8*xmm12]{k2}, ymm25"); + TEST64("\x62\x02\xfd\x2a\xa2\x0c\xe7", "vscatterdpd qword ptr [r15+8*xmm12]{k2}, ymm25"); + TEST64("\x62\x62\xfd\x22\xa2\x0c\xe7", "vscatterdpd qword ptr [rdi+8*xmm20]{k2}, ymm25"); + TEST64("\x62\x42\xfd\x22\xa2\x0c\xe7", "vscatterdpd qword ptr [r15+8*xmm20]{k2}, ymm25"); + TEST64("\x62\x22\xfd\x22\xa2\x0c\xe7", "vscatterdpd qword ptr [rdi+8*xmm28]{k2}, ymm25"); + TEST64("\x62\x02\xfd\x22\xa2\x0c\xe7", "vscatterdpd qword ptr [r15+8*xmm28]{k2}, ymm25"); + TEST32("\x62\xf2\xfd\x4a\xa2\x0c\xe7", "vscatterdpd qword ptr [edi+8*ymm4]{k2}, zmm1"); + TEST64("\x62\xf2\xfd\x4a\xa2\x0c\xe7", "vscatterdpd qword ptr [rdi+8*ymm4]{k2}, zmm1"); + TEST32("\x62\xd2\xfd\x4a\xa2\x0c\xe7", "vscatterdpd qword ptr [edi+8*ymm4]{k2}, zmm1"); + TEST64("\x62\xd2\xfd\x4a\xa2\x0c\xe7", "vscatterdpd qword ptr [r15+8*ymm4]{k2}, zmm1"); + TEST64("\x62\xb2\xfd\x4a\xa2\x0c\xe7", "vscatterdpd qword ptr [rdi+8*ymm12]{k2}, zmm1"); + TEST64("\x62\x92\xfd\x4a\xa2\x0c\xe7", "vscatterdpd qword ptr [r15+8*ymm12]{k2}, zmm1"); + TEST32("\x62\xf2\xfd\x42\xa2\x0c\xe7", "UD"); // EVEX.V' == 0 + TEST64("\x62\xf2\xfd\x42\xa2\x0c\xe7", "vscatterdpd qword ptr [rdi+8*ymm20]{k2}, zmm1"); + TEST32("\x62\xd2\xfd\x42\xa2\x0c\xe7", "UD"); // EVEX.V' == 0 + TEST64("\x62\xd2\xfd\x42\xa2\x0c\xe7", "vscatterdpd qword ptr [r15+8*ymm20]{k2}, zmm1"); + TEST64("\x62\xb2\xfd\x42\xa2\x0c\xe7", "vscatterdpd qword ptr [rdi+8*ymm28]{k2}, zmm1"); + TEST64("\x62\x92\xfd\x42\xa2\x0c\xe7", "vscatterdpd qword ptr [r15+8*ymm28]{k2}, zmm1"); + TEST64("\x62\x72\xfd\x4a\xa2\x0c\xe7", "vscatterdpd qword ptr [rdi+8*ymm4]{k2}, zmm9"); + TEST64("\x62\x52\xfd\x4a\xa2\x0c\xe7", "vscatterdpd qword ptr [r15+8*ymm4]{k2}, zmm9"); + TEST64("\x62\x32\xfd\x4a\xa2\x0c\xe7", "vscatterdpd qword ptr [rdi+8*ymm12]{k2}, zmm9"); + TEST64("\x62\x12\xfd\x4a\xa2\x0c\xe7", "vscatterdpd qword ptr [r15+8*ymm12]{k2}, zmm9"); + TEST64("\x62\x72\xfd\x42\xa2\x0c\xe7", "vscatterdpd qword ptr [rdi+8*ymm20]{k2}, zmm9"); + TEST64("\x62\x52\xfd\x42\xa2\x0c\xe7", "vscatterdpd qword ptr [r15+8*ymm20]{k2}, zmm9"); + TEST64("\x62\x32\xfd\x42\xa2\x0c\xe7", "vscatterdpd qword ptr [rdi+8*ymm28]{k2}, zmm9"); + TEST64("\x62\x12\xfd\x42\xa2\x0c\xe7", "vscatterdpd qword ptr [r15+8*ymm28]{k2}, zmm9"); + TEST32("\x62\xe2\xfd\x4a\xa2\x0c\xe7", "vscatterdpd qword ptr [edi+8*ymm4]{k2}, zmm1"); + TEST64("\x62\xe2\xfd\x4a\xa2\x0c\xe7", "vscatterdpd qword ptr [rdi+8*ymm4]{k2}, zmm17"); + TEST32("\x62\xc2\xfd\x4a\xa2\x0c\xe7", "vscatterdpd qword ptr [edi+8*ymm4]{k2}, zmm1"); + TEST64("\x62\xc2\xfd\x4a\xa2\x0c\xe7", "vscatterdpd qword ptr [r15+8*ymm4]{k2}, zmm17"); + TEST64("\x62\xa2\xfd\x4a\xa2\x0c\xe7", "vscatterdpd qword ptr [rdi+8*ymm12]{k2}, zmm17"); + TEST64("\x62\x82\xfd\x4a\xa2\x0c\xe7", "vscatterdpd qword ptr [r15+8*ymm12]{k2}, zmm17"); + TEST32("\x62\xe2\xfd\x42\xa2\x0c\xe7", "UD"); // EVEX.V' == 0 + TEST64("\x62\xe2\xfd\x42\xa2\x0c\xe7", "vscatterdpd qword ptr [rdi+8*ymm20]{k2}, zmm17"); + TEST32("\x62\xc2\xfd\x42\xa2\x0c\xe7", "UD"); // EVEX.V' == 0 + TEST64("\x62\xc2\xfd\x42\xa2\x0c\xe7", "vscatterdpd qword ptr [r15+8*ymm20]{k2}, zmm17"); + TEST64("\x62\xa2\xfd\x42\xa2\x0c\xe7", "vscatterdpd qword ptr [rdi+8*ymm28]{k2}, zmm17"); + TEST64("\x62\x82\xfd\x42\xa2\x0c\xe7", "vscatterdpd qword ptr [r15+8*ymm28]{k2}, zmm17"); + TEST64("\x62\x62\xfd\x4a\xa2\x0c\xe7", "vscatterdpd qword ptr [rdi+8*ymm4]{k2}, zmm25"); + TEST64("\x62\x42\xfd\x4a\xa2\x0c\xe7", "vscatterdpd qword ptr [r15+8*ymm4]{k2}, zmm25"); + TEST64("\x62\x22\xfd\x4a\xa2\x0c\xe7", "vscatterdpd qword ptr [rdi+8*ymm12]{k2}, zmm25"); + TEST64("\x62\x02\xfd\x4a\xa2\x0c\xe7", "vscatterdpd qword ptr [r15+8*ymm12]{k2}, zmm25"); + TEST64("\x62\x62\xfd\x42\xa2\x0c\xe7", "vscatterdpd qword ptr [rdi+8*ymm20]{k2}, zmm25"); + TEST64("\x62\x42\xfd\x42\xa2\x0c\xe7", "vscatterdpd qword ptr [r15+8*ymm20]{k2}, zmm25"); + TEST64("\x62\x22\xfd\x42\xa2\x0c\xe7", "vscatterdpd qword ptr [rdi+8*ymm28]{k2}, zmm25"); + TEST64("\x62\x02\xfd\x42\xa2\x0c\xe7", "vscatterdpd qword ptr [r15+8*ymm28]{k2}, zmm25"); + TEST32("\x62\xf2\xfd\x0a\xa3\x0c\xe7", "vscatterqpd qword ptr [edi+8*xmm4]{k2}, xmm1"); + TEST64("\x62\xf2\xfd\x0a\xa3\x0c\xe7", "vscatterqpd qword ptr [rdi+8*xmm4]{k2}, xmm1"); + TEST32("\x62\xd2\xfd\x0a\xa3\x0c\xe7", "vscatterqpd qword ptr [edi+8*xmm4]{k2}, xmm1"); + TEST64("\x62\xd2\xfd\x0a\xa3\x0c\xe7", "vscatterqpd qword ptr [r15+8*xmm4]{k2}, xmm1"); + TEST64("\x62\xb2\xfd\x0a\xa3\x0c\xe7", "vscatterqpd qword ptr [rdi+8*xmm12]{k2}, xmm1"); + TEST64("\x62\x92\xfd\x0a\xa3\x0c\xe7", "vscatterqpd qword ptr [r15+8*xmm12]{k2}, xmm1"); + TEST32("\x62\xf2\xfd\x02\xa3\x0c\xe7", "UD"); // EVEX.V' == 0 + TEST64("\x62\xf2\xfd\x02\xa3\x0c\xe7", "vscatterqpd qword ptr [rdi+8*xmm20]{k2}, xmm1"); + TEST32("\x62\xd2\xfd\x02\xa3\x0c\xe7", "UD"); // EVEX.V' == 0 + TEST64("\x62\xd2\xfd\x02\xa3\x0c\xe7", "vscatterqpd qword ptr [r15+8*xmm20]{k2}, xmm1"); + TEST64("\x62\xb2\xfd\x02\xa3\x0c\xe7", "vscatterqpd qword ptr [rdi+8*xmm28]{k2}, xmm1"); + TEST64("\x62\x92\xfd\x02\xa3\x0c\xe7", "vscatterqpd qword ptr [r15+8*xmm28]{k2}, xmm1"); + TEST64("\x62\x72\xfd\x0a\xa3\x0c\xe7", "vscatterqpd qword ptr [rdi+8*xmm4]{k2}, xmm9"); + TEST64("\x62\x52\xfd\x0a\xa3\x0c\xe7", "vscatterqpd qword ptr [r15+8*xmm4]{k2}, xmm9"); + TEST64("\x62\x32\xfd\x0a\xa3\x0c\xe7", "vscatterqpd qword ptr [rdi+8*xmm12]{k2}, xmm9"); + TEST64("\x62\x12\xfd\x0a\xa3\x0c\xe7", "vscatterqpd qword ptr [r15+8*xmm12]{k2}, xmm9"); + TEST64("\x62\x72\xfd\x02\xa3\x0c\xe7", "vscatterqpd qword ptr [rdi+8*xmm20]{k2}, xmm9"); + TEST64("\x62\x52\xfd\x02\xa3\x0c\xe7", "vscatterqpd qword ptr [r15+8*xmm20]{k2}, xmm9"); + TEST64("\x62\x32\xfd\x02\xa3\x0c\xe7", "vscatterqpd qword ptr [rdi+8*xmm28]{k2}, xmm9"); + TEST64("\x62\x12\xfd\x02\xa3\x0c\xe7", "vscatterqpd qword ptr [r15+8*xmm28]{k2}, xmm9"); + TEST32("\x62\xe2\xfd\x0a\xa3\x0c\xe7", "vscatterqpd qword ptr [edi+8*xmm4]{k2}, xmm1"); + TEST64("\x62\xe2\xfd\x0a\xa3\x0c\xe7", "vscatterqpd qword ptr [rdi+8*xmm4]{k2}, xmm17"); + TEST32("\x62\xc2\xfd\x0a\xa3\x0c\xe7", "vscatterqpd qword ptr [edi+8*xmm4]{k2}, xmm1"); + TEST64("\x62\xc2\xfd\x0a\xa3\x0c\xe7", "vscatterqpd qword ptr [r15+8*xmm4]{k2}, xmm17"); + TEST64("\x62\xa2\xfd\x0a\xa3\x0c\xe7", "vscatterqpd qword ptr [rdi+8*xmm12]{k2}, xmm17"); + TEST64("\x62\x82\xfd\x0a\xa3\x0c\xe7", "vscatterqpd qword ptr [r15+8*xmm12]{k2}, xmm17"); + TEST32("\x62\xe2\xfd\x02\xa3\x0c\xe7", "UD"); // EVEX.V' == 0 + TEST64("\x62\xe2\xfd\x02\xa3\x0c\xe7", "vscatterqpd qword ptr [rdi+8*xmm20]{k2}, xmm17"); + TEST32("\x62\xc2\xfd\x02\xa3\x0c\xe7", "UD"); // EVEX.V' == 0 + TEST64("\x62\xc2\xfd\x02\xa3\x0c\xe7", "vscatterqpd qword ptr [r15+8*xmm20]{k2}, xmm17"); + TEST64("\x62\xa2\xfd\x02\xa3\x0c\xe7", "vscatterqpd qword ptr [rdi+8*xmm28]{k2}, xmm17"); + TEST64("\x62\x82\xfd\x02\xa3\x0c\xe7", "vscatterqpd qword ptr [r15+8*xmm28]{k2}, xmm17"); + TEST64("\x62\x62\xfd\x0a\xa3\x0c\xe7", "vscatterqpd qword ptr [rdi+8*xmm4]{k2}, xmm25"); + TEST64("\x62\x42\xfd\x0a\xa3\x0c\xe7", "vscatterqpd qword ptr [r15+8*xmm4]{k2}, xmm25"); + TEST64("\x62\x22\xfd\x0a\xa3\x0c\xe7", "vscatterqpd qword ptr [rdi+8*xmm12]{k2}, xmm25"); + TEST64("\x62\x02\xfd\x0a\xa3\x0c\xe7", "vscatterqpd qword ptr [r15+8*xmm12]{k2}, xmm25"); + TEST64("\x62\x62\xfd\x02\xa3\x0c\xe7", "vscatterqpd qword ptr [rdi+8*xmm20]{k2}, xmm25"); + TEST64("\x62\x42\xfd\x02\xa3\x0c\xe7", "vscatterqpd qword ptr [r15+8*xmm20]{k2}, xmm25"); + TEST64("\x62\x22\xfd\x02\xa3\x0c\xe7", "vscatterqpd qword ptr [rdi+8*xmm28]{k2}, xmm25"); + TEST64("\x62\x02\xfd\x02\xa3\x0c\xe7", "vscatterqpd qword ptr [r15+8*xmm28]{k2}, xmm25"); + TEST32("\x62\xf2\xfd\x2a\xa3\x0c\xe7", "vscatterqpd qword ptr [edi+8*ymm4]{k2}, ymm1"); + TEST64("\x62\xf2\xfd\x2a\xa3\x0c\xe7", "vscatterqpd qword ptr [rdi+8*ymm4]{k2}, ymm1"); + TEST32("\x62\xd2\xfd\x2a\xa3\x0c\xe7", "vscatterqpd qword ptr [edi+8*ymm4]{k2}, ymm1"); + TEST64("\x62\xd2\xfd\x2a\xa3\x0c\xe7", "vscatterqpd qword ptr [r15+8*ymm4]{k2}, ymm1"); + TEST64("\x62\xb2\xfd\x2a\xa3\x0c\xe7", "vscatterqpd qword ptr [rdi+8*ymm12]{k2}, ymm1"); + TEST64("\x62\x92\xfd\x2a\xa3\x0c\xe7", "vscatterqpd qword ptr [r15+8*ymm12]{k2}, ymm1"); + TEST32("\x62\xf2\xfd\x22\xa3\x0c\xe7", "UD"); // EVEX.V' == 0 + TEST64("\x62\xf2\xfd\x22\xa3\x0c\xe7", "vscatterqpd qword ptr [rdi+8*ymm20]{k2}, ymm1"); + TEST32("\x62\xd2\xfd\x22\xa3\x0c\xe7", "UD"); // EVEX.V' == 0 + TEST64("\x62\xd2\xfd\x22\xa3\x0c\xe7", "vscatterqpd qword ptr [r15+8*ymm20]{k2}, ymm1"); + TEST64("\x62\xb2\xfd\x22\xa3\x0c\xe7", "vscatterqpd qword ptr [rdi+8*ymm28]{k2}, ymm1"); + TEST64("\x62\x92\xfd\x22\xa3\x0c\xe7", "vscatterqpd qword ptr [r15+8*ymm28]{k2}, ymm1"); + TEST64("\x62\x72\xfd\x2a\xa3\x0c\xe7", "vscatterqpd qword ptr [rdi+8*ymm4]{k2}, ymm9"); + TEST64("\x62\x52\xfd\x2a\xa3\x0c\xe7", "vscatterqpd qword ptr [r15+8*ymm4]{k2}, ymm9"); + TEST64("\x62\x32\xfd\x2a\xa3\x0c\xe7", "vscatterqpd qword ptr [rdi+8*ymm12]{k2}, ymm9"); + TEST64("\x62\x12\xfd\x2a\xa3\x0c\xe7", "vscatterqpd qword ptr [r15+8*ymm12]{k2}, ymm9"); + TEST64("\x62\x72\xfd\x22\xa3\x0c\xe7", "vscatterqpd qword ptr [rdi+8*ymm20]{k2}, ymm9"); + TEST64("\x62\x52\xfd\x22\xa3\x0c\xe7", "vscatterqpd qword ptr [r15+8*ymm20]{k2}, ymm9"); + TEST64("\x62\x32\xfd\x22\xa3\x0c\xe7", "vscatterqpd qword ptr [rdi+8*ymm28]{k2}, ymm9"); + TEST64("\x62\x12\xfd\x22\xa3\x0c\xe7", "vscatterqpd qword ptr [r15+8*ymm28]{k2}, ymm9"); + TEST32("\x62\xe2\xfd\x2a\xa3\x0c\xe7", "vscatterqpd qword ptr [edi+8*ymm4]{k2}, ymm1"); + TEST64("\x62\xe2\xfd\x2a\xa3\x0c\xe7", "vscatterqpd qword ptr [rdi+8*ymm4]{k2}, ymm17"); + TEST32("\x62\xc2\xfd\x2a\xa3\x0c\xe7", "vscatterqpd qword ptr [edi+8*ymm4]{k2}, ymm1"); + TEST64("\x62\xc2\xfd\x2a\xa3\x0c\xe7", "vscatterqpd qword ptr [r15+8*ymm4]{k2}, ymm17"); + TEST64("\x62\xa2\xfd\x2a\xa3\x0c\xe7", "vscatterqpd qword ptr [rdi+8*ymm12]{k2}, ymm17"); + TEST64("\x62\x82\xfd\x2a\xa3\x0c\xe7", "vscatterqpd qword ptr [r15+8*ymm12]{k2}, ymm17"); + TEST32("\x62\xe2\xfd\x22\xa3\x0c\xe7", "UD"); // EVEX.V' == 0 + TEST64("\x62\xe2\xfd\x22\xa3\x0c\xe7", "vscatterqpd qword ptr [rdi+8*ymm20]{k2}, ymm17"); + TEST32("\x62\xc2\xfd\x22\xa3\x0c\xe7", "UD"); // EVEX.V' == 0 + TEST64("\x62\xc2\xfd\x22\xa3\x0c\xe7", "vscatterqpd qword ptr [r15+8*ymm20]{k2}, ymm17"); + TEST64("\x62\xa2\xfd\x22\xa3\x0c\xe7", "vscatterqpd qword ptr [rdi+8*ymm28]{k2}, ymm17"); + TEST64("\x62\x82\xfd\x22\xa3\x0c\xe7", "vscatterqpd qword ptr [r15+8*ymm28]{k2}, ymm17"); + TEST64("\x62\x62\xfd\x2a\xa3\x0c\xe7", "vscatterqpd qword ptr [rdi+8*ymm4]{k2}, ymm25"); + TEST64("\x62\x42\xfd\x2a\xa3\x0c\xe7", "vscatterqpd qword ptr [r15+8*ymm4]{k2}, ymm25"); + TEST64("\x62\x22\xfd\x2a\xa3\x0c\xe7", "vscatterqpd qword ptr [rdi+8*ymm12]{k2}, ymm25"); + TEST64("\x62\x02\xfd\x2a\xa3\x0c\xe7", "vscatterqpd qword ptr [r15+8*ymm12]{k2}, ymm25"); + TEST64("\x62\x62\xfd\x22\xa3\x0c\xe7", "vscatterqpd qword ptr [rdi+8*ymm20]{k2}, ymm25"); + TEST64("\x62\x42\xfd\x22\xa3\x0c\xe7", "vscatterqpd qword ptr [r15+8*ymm20]{k2}, ymm25"); + TEST64("\x62\x22\xfd\x22\xa3\x0c\xe7", "vscatterqpd qword ptr [rdi+8*ymm28]{k2}, ymm25"); + TEST64("\x62\x02\xfd\x22\xa3\x0c\xe7", "vscatterqpd qword ptr [r15+8*ymm28]{k2}, ymm25"); + TEST32("\x62\xf2\xfd\x4a\xa3\x0c\xe7", "vscatterqpd qword ptr [edi+8*zmm4]{k2}, zmm1"); + TEST64("\x62\xf2\xfd\x4a\xa3\x0c\xe7", "vscatterqpd qword ptr [rdi+8*zmm4]{k2}, zmm1"); + TEST32("\x62\xd2\xfd\x4a\xa3\x0c\xe7", "vscatterqpd qword ptr [edi+8*zmm4]{k2}, zmm1"); + TEST64("\x62\xd2\xfd\x4a\xa3\x0c\xe7", "vscatterqpd qword ptr [r15+8*zmm4]{k2}, zmm1"); + TEST64("\x62\xb2\xfd\x4a\xa3\x0c\xe7", "vscatterqpd qword ptr [rdi+8*zmm12]{k2}, zmm1"); + TEST64("\x62\x92\xfd\x4a\xa3\x0c\xe7", "vscatterqpd qword ptr [r15+8*zmm12]{k2}, zmm1"); + TEST32("\x62\xf2\xfd\x42\xa3\x0c\xe7", "UD"); // EVEX.V' == 0 + TEST64("\x62\xf2\xfd\x42\xa3\x0c\xe7", "vscatterqpd qword ptr [rdi+8*zmm20]{k2}, zmm1"); + TEST32("\x62\xd2\xfd\x42\xa3\x0c\xe7", "UD"); // EVEX.V' == 0 + TEST64("\x62\xd2\xfd\x42\xa3\x0c\xe7", "vscatterqpd qword ptr [r15+8*zmm20]{k2}, zmm1"); + TEST64("\x62\xb2\xfd\x42\xa3\x0c\xe7", "vscatterqpd qword ptr [rdi+8*zmm28]{k2}, zmm1"); + TEST64("\x62\x92\xfd\x42\xa3\x0c\xe7", "vscatterqpd qword ptr [r15+8*zmm28]{k2}, zmm1"); + TEST64("\x62\x72\xfd\x4a\xa3\x0c\xe7", "vscatterqpd qword ptr [rdi+8*zmm4]{k2}, zmm9"); + TEST64("\x62\x52\xfd\x4a\xa3\x0c\xe7", "vscatterqpd qword ptr [r15+8*zmm4]{k2}, zmm9"); + TEST64("\x62\x32\xfd\x4a\xa3\x0c\xe7", "vscatterqpd qword ptr [rdi+8*zmm12]{k2}, zmm9"); + TEST64("\x62\x12\xfd\x4a\xa3\x0c\xe7", "vscatterqpd qword ptr [r15+8*zmm12]{k2}, zmm9"); + TEST64("\x62\x72\xfd\x42\xa3\x0c\xe7", "vscatterqpd qword ptr [rdi+8*zmm20]{k2}, zmm9"); + TEST64("\x62\x52\xfd\x42\xa3\x0c\xe7", "vscatterqpd qword ptr [r15+8*zmm20]{k2}, zmm9"); + TEST64("\x62\x32\xfd\x42\xa3\x0c\xe7", "vscatterqpd qword ptr [rdi+8*zmm28]{k2}, zmm9"); + TEST64("\x62\x12\xfd\x42\xa3\x0c\xe7", "vscatterqpd qword ptr [r15+8*zmm28]{k2}, zmm9"); + TEST32("\x62\xe2\xfd\x4a\xa3\x0c\xe7", "vscatterqpd qword ptr [edi+8*zmm4]{k2}, zmm1"); + TEST64("\x62\xe2\xfd\x4a\xa3\x0c\xe7", "vscatterqpd qword ptr [rdi+8*zmm4]{k2}, zmm17"); + TEST32("\x62\xc2\xfd\x4a\xa3\x0c\xe7", "vscatterqpd qword ptr [edi+8*zmm4]{k2}, zmm1"); + TEST64("\x62\xc2\xfd\x4a\xa3\x0c\xe7", "vscatterqpd qword ptr [r15+8*zmm4]{k2}, zmm17"); + TEST64("\x62\xa2\xfd\x4a\xa3\x0c\xe7", "vscatterqpd qword ptr [rdi+8*zmm12]{k2}, zmm17"); + TEST64("\x62\x82\xfd\x4a\xa3\x0c\xe7", "vscatterqpd qword ptr [r15+8*zmm12]{k2}, zmm17"); + TEST32("\x62\xe2\xfd\x42\xa3\x0c\xe7", "UD"); // EVEX.V' == 0 + TEST64("\x62\xe2\xfd\x42\xa3\x0c\xe7", "vscatterqpd qword ptr [rdi+8*zmm20]{k2}, zmm17"); + TEST32("\x62\xc2\xfd\x42\xa3\x0c\xe7", "UD"); // EVEX.V' == 0 + TEST64("\x62\xc2\xfd\x42\xa3\x0c\xe7", "vscatterqpd qword ptr [r15+8*zmm20]{k2}, zmm17"); + TEST64("\x62\xa2\xfd\x42\xa3\x0c\xe7", "vscatterqpd qword ptr [rdi+8*zmm28]{k2}, zmm17"); + TEST64("\x62\x82\xfd\x42\xa3\x0c\xe7", "vscatterqpd qword ptr [r15+8*zmm28]{k2}, zmm17"); + TEST64("\x62\x62\xfd\x4a\xa3\x0c\xe7", "vscatterqpd qword ptr [rdi+8*zmm4]{k2}, zmm25"); + TEST64("\x62\x42\xfd\x4a\xa3\x0c\xe7", "vscatterqpd qword ptr [r15+8*zmm4]{k2}, zmm25"); + TEST64("\x62\x22\xfd\x4a\xa3\x0c\xe7", "vscatterqpd qword ptr [rdi+8*zmm12]{k2}, zmm25"); + TEST64("\x62\x02\xfd\x4a\xa3\x0c\xe7", "vscatterqpd qword ptr [r15+8*zmm12]{k2}, zmm25"); + TEST64("\x62\x62\xfd\x42\xa3\x0c\xe7", "vscatterqpd qword ptr [rdi+8*zmm20]{k2}, zmm25"); + TEST64("\x62\x42\xfd\x42\xa3\x0c\xe7", "vscatterqpd qword ptr [r15+8*zmm20]{k2}, zmm25"); + TEST64("\x62\x22\xfd\x42\xa3\x0c\xe7", "vscatterqpd qword ptr [rdi+8*zmm28]{k2}, zmm25"); + TEST64("\x62\x02\xfd\x42\xa3\x0c\xe7", "vscatterqpd qword ptr [r15+8*zmm28]{k2}, zmm25"); + + // All EVEX-VSIB instructions. VSCATTER* cases additionally test scaled offset. + TEST32("\x62\xf2\x7d\x09\xa2\x44\xe7\x01", "vscatterdps dword ptr [edi+8*xmm4+0x4]{k1}, xmm0"); + TEST64("\x62\xf2\x7d\x09\xa2\x44\xe7\x01", "vscatterdps dword ptr [rdi+8*xmm4+0x4]{k1}, xmm0"); + TEST32("\x62\xf2\x7d\x29\xa2\x44\xe7\x01", "vscatterdps dword ptr [edi+8*ymm4+0x4]{k1}, ymm0"); + TEST64("\x62\xf2\x7d\x29\xa2\x44\xe7\x01", "vscatterdps dword ptr [rdi+8*ymm4+0x4]{k1}, ymm0"); + TEST32("\x62\xf2\x7d\x49\xa2\x44\xe7\x01", "vscatterdps dword ptr [edi+8*zmm4+0x4]{k1}, zmm0"); + TEST64("\x62\xf2\x7d\x49\xa2\x44\xe7\x01", "vscatterdps dword ptr [rdi+8*zmm4+0x4]{k1}, zmm0"); + TEST32("\x62\xf2\x7d\x09\xa3\x44\xe7\x01", "vscatterqps dword ptr [edi+8*xmm4+0x4]{k1}, xmm0"); + TEST64("\x62\xf2\x7d\x09\xa3\x44\xe7\x01", "vscatterqps dword ptr [rdi+8*xmm4+0x4]{k1}, xmm0"); + TEST32("\x62\xf2\x7d\x29\xa3\x44\xe7\x01", "vscatterqps dword ptr [edi+8*ymm4+0x4]{k1}, xmm0"); + TEST64("\x62\xf2\x7d\x29\xa3\x44\xe7\x01", "vscatterqps dword ptr [rdi+8*ymm4+0x4]{k1}, xmm0"); + TEST32("\x62\xf2\x7d\x49\xa3\x44\xe7\x01", "vscatterqps dword ptr [edi+8*zmm4+0x4]{k1}, ymm0"); + TEST64("\x62\xf2\x7d\x49\xa3\x44\xe7\x01", "vscatterqps dword ptr [rdi+8*zmm4+0x4]{k1}, ymm0"); + TEST32("\x62\xf2\xfd\x09\xa2\x44\xe7\x01", "vscatterdpd qword ptr [edi+8*xmm4+0x8]{k1}, xmm0"); + TEST64("\x62\xf2\xfd\x09\xa2\x44\xe7\x01", "vscatterdpd qword ptr [rdi+8*xmm4+0x8]{k1}, xmm0"); + TEST32("\x62\xf2\xfd\x29\xa2\x44\xe7\x01", "vscatterdpd qword ptr [edi+8*xmm4+0x8]{k1}, ymm0"); + TEST64("\x62\xf2\xfd\x29\xa2\x44\xe7\x01", "vscatterdpd qword ptr [rdi+8*xmm4+0x8]{k1}, ymm0"); + TEST32("\x62\xf2\xfd\x49\xa2\x44\xe7\x01", "vscatterdpd qword ptr [edi+8*ymm4+0x8]{k1}, zmm0"); + TEST64("\x62\xf2\xfd\x49\xa2\x44\xe7\x01", "vscatterdpd qword ptr [rdi+8*ymm4+0x8]{k1}, zmm0"); + TEST32("\x62\xf2\xfd\x09\xa3\x44\xe7\x01", "vscatterqpd qword ptr [edi+8*xmm4+0x8]{k1}, xmm0"); + TEST64("\x62\xf2\xfd\x09\xa3\x44\xe7\x01", "vscatterqpd qword ptr [rdi+8*xmm4+0x8]{k1}, xmm0"); + TEST32("\x62\xf2\xfd\x29\xa3\x44\xe7\x01", "vscatterqpd qword ptr [edi+8*ymm4+0x8]{k1}, ymm0"); + TEST64("\x62\xf2\xfd\x29\xa3\x44\xe7\x01", "vscatterqpd qword ptr [rdi+8*ymm4+0x8]{k1}, ymm0"); + TEST32("\x62\xf2\xfd\x49\xa3\x44\xe7\x01", "vscatterqpd qword ptr [edi+8*zmm4+0x8]{k1}, zmm0"); + TEST64("\x62\xf2\xfd\x49\xa3\x44\xe7\x01", "vscatterqpd qword ptr [rdi+8*zmm4+0x8]{k1}, zmm0"); + TEST32("\x62\xf2\x7d\x09\xa0\x44\xe7\x01", "vpscatterdd dword ptr [edi+8*xmm4+0x4]{k1}, xmm0"); + TEST64("\x62\xf2\x7d\x09\xa0\x44\xe7\x01", "vpscatterdd dword ptr [rdi+8*xmm4+0x4]{k1}, xmm0"); + TEST32("\x62\xf2\x7d\x29\xa0\x44\xe7\x01", "vpscatterdd dword ptr [edi+8*ymm4+0x4]{k1}, ymm0"); + TEST64("\x62\xf2\x7d\x29\xa0\x44\xe7\x01", "vpscatterdd dword ptr [rdi+8*ymm4+0x4]{k1}, ymm0"); + TEST32("\x62\xf2\x7d\x49\xa0\x44\xe7\x01", "vpscatterdd dword ptr [edi+8*zmm4+0x4]{k1}, zmm0"); + TEST64("\x62\xf2\x7d\x49\xa0\x44\xe7\x01", "vpscatterdd dword ptr [rdi+8*zmm4+0x4]{k1}, zmm0"); + TEST32("\x62\xf2\x7d\x09\xa1\x44\xe7\x01", "vpscatterqd dword ptr [edi+8*xmm4+0x4]{k1}, xmm0"); + TEST64("\x62\xf2\x7d\x09\xa1\x44\xe7\x01", "vpscatterqd dword ptr [rdi+8*xmm4+0x4]{k1}, xmm0"); + TEST32("\x62\xf2\x7d\x29\xa1\x44\xe7\x01", "vpscatterqd dword ptr [edi+8*ymm4+0x4]{k1}, xmm0"); + TEST64("\x62\xf2\x7d\x29\xa1\x44\xe7\x01", "vpscatterqd dword ptr [rdi+8*ymm4+0x4]{k1}, xmm0"); + TEST32("\x62\xf2\x7d\x49\xa1\x44\xe7\x01", "vpscatterqd dword ptr [edi+8*zmm4+0x4]{k1}, ymm0"); + TEST64("\x62\xf2\x7d\x49\xa1\x44\xe7\x01", "vpscatterqd dword ptr [rdi+8*zmm4+0x4]{k1}, ymm0"); + TEST32("\x62\xf2\xfd\x09\xa0\x44\xe7\x01", "vpscatterdq qword ptr [edi+8*xmm4+0x8]{k1}, xmm0"); + TEST64("\x62\xf2\xfd\x09\xa0\x44\xe7\x01", "vpscatterdq qword ptr [rdi+8*xmm4+0x8]{k1}, xmm0"); + TEST32("\x62\xf2\xfd\x29\xa0\x44\xe7\x01", "vpscatterdq qword ptr [edi+8*xmm4+0x8]{k1}, ymm0"); + TEST64("\x62\xf2\xfd\x29\xa0\x44\xe7\x01", "vpscatterdq qword ptr [rdi+8*xmm4+0x8]{k1}, ymm0"); + TEST32("\x62\xf2\xfd\x49\xa0\x44\xe7\x01", "vpscatterdq qword ptr [edi+8*ymm4+0x8]{k1}, zmm0"); + TEST64("\x62\xf2\xfd\x49\xa0\x44\xe7\x01", "vpscatterdq qword ptr [rdi+8*ymm4+0x8]{k1}, zmm0"); + TEST32("\x62\xf2\xfd\x09\xa1\x44\xe7\x01", "vpscatterqq qword ptr [edi+8*xmm4+0x8]{k1}, xmm0"); + TEST64("\x62\xf2\xfd\x09\xa1\x44\xe7\x01", "vpscatterqq qword ptr [rdi+8*xmm4+0x8]{k1}, xmm0"); + TEST32("\x62\xf2\xfd\x29\xa1\x44\xe7\x01", "vpscatterqq qword ptr [edi+8*ymm4+0x8]{k1}, ymm0"); + TEST64("\x62\xf2\xfd\x29\xa1\x44\xe7\x01", "vpscatterqq qword ptr [rdi+8*ymm4+0x8]{k1}, ymm0"); + TEST32("\x62\xf2\xfd\x49\xa1\x44\xe7\x01", "vpscatterqq qword ptr [edi+8*zmm4+0x8]{k1}, zmm0"); + TEST64("\x62\xf2\xfd\x49\xa1\x44\xe7\x01", "vpscatterqq qword ptr [rdi+8*zmm4+0x8]{k1}, zmm0"); + TEST32("\x62\xf2\x7d\x09\x90\x44\xe7\x01", "vpgatherdd xmm0{k1}, dword ptr [edi+8*xmm4+0x4]"); + TEST64("\x62\xf2\x7d\x09\x90\x44\xe7\x01", "vpgatherdd xmm0{k1}, dword ptr [rdi+8*xmm4+0x4]"); + TEST32("\x62\xf2\x7d\x29\x90\x44\xe7\x01", "vpgatherdd ymm0{k1}, dword ptr [edi+8*ymm4+0x4]"); + TEST64("\x62\xf2\x7d\x29\x90\x44\xe7\x01", "vpgatherdd ymm0{k1}, dword ptr [rdi+8*ymm4+0x4]"); + TEST32("\x62\xf2\x7d\x49\x90\x44\xe7\x01", "vpgatherdd zmm0{k1}, dword ptr [edi+8*zmm4+0x4]"); + TEST64("\x62\xf2\x7d\x49\x90\x44\xe7\x01", "vpgatherdd zmm0{k1}, dword ptr [rdi+8*zmm4+0x4]"); + TEST32("\x62\xf2\x7d\x09\x91\x44\xe7\x01", "vpgatherqd xmm0{k1}, dword ptr [edi+8*xmm4+0x4]"); + TEST64("\x62\xf2\x7d\x09\x91\x44\xe7\x01", "vpgatherqd xmm0{k1}, dword ptr [rdi+8*xmm4+0x4]"); + TEST32("\x62\xf2\x7d\x29\x91\x44\xe7\x01", "vpgatherqd xmm0{k1}, dword ptr [edi+8*ymm4+0x4]"); + TEST64("\x62\xf2\x7d\x29\x91\x44\xe7\x01", "vpgatherqd xmm0{k1}, dword ptr [rdi+8*ymm4+0x4]"); + TEST32("\x62\xf2\x7d\x49\x91\x44\xe7\x01", "vpgatherqd ymm0{k1}, dword ptr [edi+8*zmm4+0x4]"); + TEST64("\x62\xf2\x7d\x49\x91\x44\xe7\x01", "vpgatherqd ymm0{k1}, dword ptr [rdi+8*zmm4+0x4]"); + TEST32("\x62\xf2\xfd\x09\x90\x44\xe7\x01", "vpgatherdq xmm0{k1}, qword ptr [edi+8*xmm4+0x8]"); + TEST64("\x62\xf2\xfd\x09\x90\x44\xe7\x01", "vpgatherdq xmm0{k1}, qword ptr [rdi+8*xmm4+0x8]"); + TEST32("\x62\xf2\xfd\x29\x90\x44\xe7\x01", "vpgatherdq ymm0{k1}, qword ptr [edi+8*xmm4+0x8]"); + TEST64("\x62\xf2\xfd\x29\x90\x44\xe7\x01", "vpgatherdq ymm0{k1}, qword ptr [rdi+8*xmm4+0x8]"); + TEST32("\x62\xf2\xfd\x49\x90\x44\xe7\x01", "vpgatherdq zmm0{k1}, qword ptr [edi+8*ymm4+0x8]"); + TEST64("\x62\xf2\xfd\x49\x90\x44\xe7\x01", "vpgatherdq zmm0{k1}, qword ptr [rdi+8*ymm4+0x8]"); + TEST32("\x62\xf2\xfd\x09\x91\x44\xe7\x01", "vpgatherqq xmm0{k1}, qword ptr [edi+8*xmm4+0x8]"); + TEST64("\x62\xf2\xfd\x09\x91\x44\xe7\x01", "vpgatherqq xmm0{k1}, qword ptr [rdi+8*xmm4+0x8]"); + TEST32("\x62\xf2\xfd\x29\x91\x44\xe7\x01", "vpgatherqq ymm0{k1}, qword ptr [edi+8*ymm4+0x8]"); + TEST64("\x62\xf2\xfd\x29\x91\x44\xe7\x01", "vpgatherqq ymm0{k1}, qword ptr [rdi+8*ymm4+0x8]"); + TEST32("\x62\xf2\xfd\x49\x91\x44\xe7\x01", "vpgatherqq zmm0{k1}, qword ptr [edi+8*zmm4+0x8]"); + TEST64("\x62\xf2\xfd\x49\x91\x44\xe7\x01", "vpgatherqq zmm0{k1}, qword ptr [rdi+8*zmm4+0x8]"); + TEST32("\x62\xf2\x7d\x09\x92\x44\xe7\x01", "vgatherdps xmm0{k1}, dword ptr [edi+8*xmm4+0x4]"); + TEST64("\x62\xf2\x7d\x09\x92\x44\xe7\x01", "vgatherdps xmm0{k1}, dword ptr [rdi+8*xmm4+0x4]"); + TEST32("\x62\xf2\x7d\x29\x92\x44\xe7\x01", "vgatherdps ymm0{k1}, dword ptr [edi+8*ymm4+0x4]"); + TEST64("\x62\xf2\x7d\x29\x92\x44\xe7\x01", "vgatherdps ymm0{k1}, dword ptr [rdi+8*ymm4+0x4]"); + TEST32("\x62\xf2\x7d\x49\x92\x44\xe7\x01", "vgatherdps zmm0{k1}, dword ptr [edi+8*zmm4+0x4]"); + TEST64("\x62\xf2\x7d\x49\x92\x44\xe7\x01", "vgatherdps zmm0{k1}, dword ptr [rdi+8*zmm4+0x4]"); + TEST32("\x62\xf2\x7d\x09\x93\x44\xe7\x01", "vgatherqps xmm0{k1}, dword ptr [edi+8*xmm4+0x4]"); + TEST64("\x62\xf2\x7d\x09\x93\x44\xe7\x01", "vgatherqps xmm0{k1}, dword ptr [rdi+8*xmm4+0x4]"); + TEST32("\x62\xf2\x7d\x29\x93\x44\xe7\x01", "vgatherqps xmm0{k1}, dword ptr [edi+8*ymm4+0x4]"); + TEST64("\x62\xf2\x7d\x29\x93\x44\xe7\x01", "vgatherqps xmm0{k1}, dword ptr [rdi+8*ymm4+0x4]"); + TEST32("\x62\xf2\x7d\x49\x93\x44\xe7\x01", "vgatherqps ymm0{k1}, dword ptr [edi+8*zmm4+0x4]"); + TEST64("\x62\xf2\x7d\x49\x93\x44\xe7\x01", "vgatherqps ymm0{k1}, dword ptr [rdi+8*zmm4+0x4]"); + TEST32("\x62\xf2\xfd\x09\x92\x44\xe7\x01", "vgatherdpd xmm0{k1}, qword ptr [edi+8*xmm4+0x8]"); + TEST64("\x62\xf2\xfd\x09\x92\x44\xe7\x01", "vgatherdpd xmm0{k1}, qword ptr [rdi+8*xmm4+0x8]"); + TEST32("\x62\xf2\xfd\x29\x92\x44\xe7\x01", "vgatherdpd ymm0{k1}, qword ptr [edi+8*xmm4+0x8]"); + TEST64("\x62\xf2\xfd\x29\x92\x44\xe7\x01", "vgatherdpd ymm0{k1}, qword ptr [rdi+8*xmm4+0x8]"); + TEST32("\x62\xf2\xfd\x49\x92\x44\xe7\x01", "vgatherdpd zmm0{k1}, qword ptr [edi+8*ymm4+0x8]"); + TEST64("\x62\xf2\xfd\x49\x92\x44\xe7\x01", "vgatherdpd zmm0{k1}, qword ptr [rdi+8*ymm4+0x8]"); + TEST32("\x62\xf2\xfd\x09\x93\x44\xe7\x01", "vgatherqpd xmm0{k1}, qword ptr [edi+8*xmm4+0x8]"); + TEST64("\x62\xf2\xfd\x09\x93\x44\xe7\x01", "vgatherqpd xmm0{k1}, qword ptr [rdi+8*xmm4+0x8]"); + TEST32("\x62\xf2\xfd\x29\x93\x44\xe7\x01", "vgatherqpd ymm0{k1}, qword ptr [edi+8*ymm4+0x8]"); + TEST64("\x62\xf2\xfd\x29\x93\x44\xe7\x01", "vgatherqpd ymm0{k1}, qword ptr [rdi+8*ymm4+0x8]"); + TEST32("\x62\xf2\xfd\x49\x93\x44\xe7\x01", "vgatherqpd zmm0{k1}, qword ptr [edi+8*zmm4+0x8]"); + TEST64("\x62\xf2\xfd\x49\x93\x44\xe7\x01", "vgatherqpd zmm0{k1}, qword ptr [rdi+8*zmm4+0x8]"); + puts(failed ? "Some tests FAILED" : "All tests PASSED"); return failed ? EXIT_FAILURE : EXIT_SUCCESS; } diff --git a/decode.c b/decode.c index e9b09d9..ef32461 100644 --- a/decode.c +++ b/decode.c @@ -81,6 +81,8 @@ struct InstrDesc #define DESC_VEXREG_IDX(desc) ((((desc)->operand_indices >> 4) & 3) ^ 3) #define DESC_IMM_CONTROL(desc) (((desc)->operand_indices >> 12) & 0x7) #define DESC_IMM_IDX(desc) ((((desc)->operand_indices >> 6) & 3) ^ 3) +#define DESC_EVEX_BCST(desc) (((desc)->operand_indices >> 8) & 1) +#define DESC_EVEX_MASK(desc) (((desc)->operand_indices >> 9) & 1) #define DESC_ZEROREG_VAL(desc) (((desc)->operand_indices >> 10) & 1) #define DESC_LOCK(desc) (((desc)->operand_indices >> 11) & 1) #define DESC_VSIB(desc) (((desc)->operand_indices >> 15) & 1) @@ -90,6 +92,8 @@ struct InstrDesc #define DESC_INSTR_WIDTH(desc) (((desc)->operand_sizes >> 15) & 1) #define DESC_MODRM(desc) (((desc)->reg_types >> 14) & 1) #define DESC_IGN66(desc) (((desc)->reg_types >> 15) & 1) +#define DESC_EVEX_SAE(desc) (((desc)->reg_types >> 8) & 1) +#define DESC_EVEX_ER(desc) (((desc)->reg_types >> 9) & 1) #define DESC_REGTY_MODRM(desc) (((desc)->reg_types >> 0) & 7) #define DESC_REGTY_MODREG(desc) (((desc)->reg_types >> 3) & 7) #define DESC_REGTY_VEXREG(desc) (((desc)->reg_types >> 6) & 3) @@ -257,9 +261,12 @@ prefix_end: if (UNLIKELY(off + 3 >= len)) return FD_ERR_PARTIAL; byte = buffer[off + 3]; + // prefix_evex is z:L'L/RC:b:V':aaa prefix_evex = byte | 0x08; // Ensure that prefix_evex is non-zero. - if (mode == DECODE_64) // V' ignored in 32-bit mode + if (mode == DECODE_64) // V' causes UD in 32-bit mode vex_operand |= byte & 0x08 ? 0 : 0x10; // V' + else if (!(byte & 0x08)) + return FD_ERR_UD; off += 4; } else // VEX @@ -299,6 +306,9 @@ prefix_end: uint8_t index = 0; index |= prefix_rex & PREFIX_REXW ? (1 << 0) : 0; index |= prefix_rex & PREFIX_VEXL ? (1 << 1) : 0; + // When EVEX.L'L is the rounding mode, the instruction must not have + // L'L constraints. + index |= (prefix_evex >> 4) & 6; table_idx = table_walk(table_idx, index, &kind); } @@ -327,6 +337,39 @@ prefix_end: return FD_ERR_PARTIAL; unsigned op_byte = buffer[off - 1] | (!DESC_MODRM(desc) ? 0xc0 : 0); + unsigned vexl = !!(prefix_rex & PREFIX_VEXL); + if (UNLIKELY(prefix_evex)) { + // VSIB inst (gather/scatter) without mask register or w/EVEX.z is UD + if (DESC_VSIB(desc) && (!(prefix_evex & 0x07) || (prefix_evex & 0x80))) + return FD_ERR_UD; + // Inst doesn't support masking, so EVEX.z or EVEX.aaa is UD + if (!DESC_EVEX_MASK(desc) && (prefix_evex & 0x87)) + return FD_ERR_UD; + + vexl = (prefix_evex >> 5) & 3; + // Cases for SAE/RC (reg operands only): + // - ER supported -> all ok + // - SAE supported -> assume L'L is RC, but ignored (undocumented) + // - Neither supported -> b == 0 + if ((prefix_evex & 0x10) && (op_byte & 0xc0) == 0xc0) { // EVEX.b+reg + if (!DESC_EVEX_SAE(desc)) + return FD_ERR_UD; + vexl = 2; + if (DESC_EVEX_ER(desc)) + instr->evex = prefix_evex; + else + instr->evex = (prefix_evex & 0x87) | 0x60; // set RC, clear B + } else { + if (UNLIKELY(vexl == 3)) // EVEX.L'L == 11b is UD + return FD_ERR_UD; + // Update V' to REX.W, s.t. broadcast size is exposed + unsigned rexw = prefix_rex & PREFIX_REXW ? 0x08 : 0x00; + instr->evex = (prefix_evex & 0x87) | rexw; + } + } else { + instr->evex = 0; + } + unsigned op_size; unsigned op_size_alt = 0; if (!(DESC_OPSIZE(desc) & 4)) { @@ -340,7 +383,7 @@ prefix_end: else op_size = UNLIKELY(prefix_66 && !DESC_IGN66(desc)) ? 2 : 3; } else { - op_size = 5 + !!(prefix_rex & PREFIX_VEXL); + op_size = 5 + vexl; op_size_alt = op_size - (DESC_OPSIZE(desc) & 3); } @@ -378,8 +421,14 @@ prefix_end: op_modreg->misc = (0350761 >> (3 * reg_ty)) & 0x7; if (LIKELY(!(reg_ty & 4))) reg_idx += prefix_rex & PREFIX_REXR ? 8 : 0; - // TODO-EVEX/64-bit: UD if PREFIX_REXR and misc == FD_RT_MASK - // TODO-EVEX/64-bit: UD if PREFIX_EVEXR2 and misc != FD_RT_VEC, otw. +16 + if (reg_ty == 2 && reg_idx >= 8) // REXR can't be set in 32-bit mode + return FD_ERR_UD; + if (reg_ty == 2 && (prefix_evex & 0x80)) // EVEX.z with mask as dest + return FD_ERR_UD; + if (reg_ty == 1) // PREFIX_REXRR ignored above in 32-bit mode + reg_idx += prefix_rex & PREFIX_REXRR ? 16 : 0; + else if (prefix_rex & PREFIX_REXRR) + return FD_ERR_UD; op_modreg->type = FD_OT_REG; op_modreg->size = operand_sizes[(desc->operand_sizes >> 2) & 3]; op_modreg->reg = reg_idx; @@ -399,7 +448,8 @@ prefix_end: op_modrm->misc = (07450061 >> (3 * reg_ty)) & 0x7; if (LIKELY(!(reg_ty & 4))) reg_idx += prefix_rex & PREFIX_REXB ? 8 : 0; - // TODO-EVEX/64-bit: Add PREFIX_REXX (+16) if FD_RT_VEC, ignore otw. + if (prefix_evex && reg_ty == 1) // vector registers only + reg_idx += prefix_rex & PREFIX_REXX ? 16 : 0; op_modrm->type = FD_OT_REG; op_modrm->reg = reg_idx; } @@ -418,9 +468,12 @@ prefix_end: unsigned idx = (sib & 0x38) >> 3; idx += prefix_rex & PREFIX_REXX ? 8 : 0; base = sib & 0x07; - // TODO-EVEX/64-bit: respect EVEX.V' as extra bit if (!vsib && idx == 4) idx = FD_REG_NONE; + if (vsib) { + idx |= vex_operand & 0x10; + vex_operand &= 0xf; + } op_modrm->misc = scale | idx; } else @@ -431,7 +484,20 @@ prefix_end: op_modrm->misc = FD_REG_NONE; } - op_modrm->type = FD_OT_MEM; + // EVEX.z for memory destination operand is UD. + if (DESC_MODRM_IDX(desc) == 0 && (prefix_evex & 0x80)) + return FD_ERR_UD; + + // EVEX.b for memory-operand without broadcast support is UD. + unsigned scale = op_modrm->size - 1; + if (UNLIKELY(prefix_evex & 0x10)) { + if (UNLIKELY(!DESC_EVEX_BCST(desc))) + return FD_ERR_UD; + scale = prefix_rex & PREFIX_REXW ? 3 : 2; + op_modrm->type = FD_OT_MEMBCST; + } else { + op_modrm->type = FD_OT_MEM; + } // RIP-relative addressing only if SIB-byte is absent if (mod == 0 && rm == 5 && mode == DECODE_64) @@ -445,8 +511,9 @@ prefix_end: { if (UNLIKELY(off + 1 > len)) return FD_ERR_PARTIAL; - // TODO-EVEX: scale by tupletype. instr->disp = (int8_t) LOAD_LE_1(&buffer[off]); + if (prefix_evex) + instr->disp <<= scale; off += 1; } else if (mod == 0x80 || (mod == 0 && base == 5)) @@ -462,7 +529,6 @@ prefix_end: } } } -skip_modrm: if (UNLIKELY(DESC_HAS_VEXREG(desc))) { @@ -472,12 +538,15 @@ skip_modrm: operand->size = operand_sizes[(desc->operand_sizes >> 4) & 3]; if (mode == DECODE_32) vex_operand &= 0x7; - // TODO-EVEX/64-bit: UD if FD_RT_MASK and vex_operand&8 != 0 - // TODO-EVEX/64-bit: UD if not FD_RT_VEC and vex_operand&16 != 0 - // Note: 32-bit will never UD here. + // Note: 32-bit will never UD here. EVEX.V' is caught above already. + // Note: UD if > 16 for non-VEC. No EVEX-encoded instruction uses + // EVEX.vvvv to refer to non-vector registers. Verified in parseinstrs. operand->reg = vex_operand | DESC_ZEROREG_VAL(desc); unsigned reg_ty = DESC_REGTY_VEXREG(desc); // GPL VEC MSK FPU + // In 64-bit mode: UD if FD_RT_MASK and vex_operand&8 != 0 + if (reg_ty == 2 && vex_operand >= 8) + return FD_ERR_UD; operand->misc = (04761 >> (3 * reg_ty)) & 0x7; } else if (vex_operand != 0) @@ -615,6 +684,7 @@ skip_modrm: return FD_ERR_UD; } +skip_modrm: if (UNLIKELY(prefix_lock)) { if (!DESC_LOCK(desc) || instr->operands[0].type != FD_OT_MEM) return FD_ERR_UD; diff --git a/fadec-enc.h b/fadec-enc.h index d865f51..30336c1 100644 --- a/fadec-enc.h +++ b/fadec-enc.h @@ -18,6 +18,7 @@ typedef enum { FE_MM0 = 0x500, FE_MM1, FE_MM2, FE_MM3, FE_MM4, FE_MM5, FE_MM6, FE_MM7, FE_XMM0 = 0x600, FE_XMM1, FE_XMM2, FE_XMM3, FE_XMM4, FE_XMM5, FE_XMM6, FE_XMM7, FE_XMM8, FE_XMM9, FE_XMM10, FE_XMM11, FE_XMM12, FE_XMM13, FE_XMM14, FE_XMM15, + FE_K0 = 0x700, FE_K1, FE_K2, FE_K3, FE_K4, FE_K5, FE_K6, FE_K7, } FeReg; typedef int64_t FeOp; diff --git a/fadec-enc2.h b/fadec-enc2.h index 6e21076..ac08589 100644 --- a/fadec-enc2.h +++ b/fadec-enc2.h @@ -92,6 +92,33 @@ typedef struct FeRegXMM { unsigned char idx; } FeRegXMM; #define FE_XMM12 FE_XMM(12) #define FE_XMM13 FE_XMM(13) #define FE_XMM14 FE_XMM(14) +#define FE_XMM15 FE_XMM(15) +#define FE_XMM16 FE_XMM(16) +#define FE_XMM17 FE_XMM(17) +#define FE_XMM18 FE_XMM(18) +#define FE_XMM19 FE_XMM(19) +#define FE_XMM20 FE_XMM(20) +#define FE_XMM21 FE_XMM(21) +#define FE_XMM22 FE_XMM(22) +#define FE_XMM23 FE_XMM(23) +#define FE_XMM24 FE_XMM(24) +#define FE_XMM25 FE_XMM(25) +#define FE_XMM26 FE_XMM(26) +#define FE_XMM27 FE_XMM(27) +#define FE_XMM28 FE_XMM(28) +#define FE_XMM29 FE_XMM(29) +#define FE_XMM30 FE_XMM(30) +#define FE_XMM31 FE_XMM(31) +typedef struct FeRegMASK { unsigned char idx; } FeRegMASK; +#define FE_K(idx) (FE_STRUCT(FeRegMASK) { idx }) +#define FE_K0 FE_MASK(0) +#define FE_K1 FE_MASK(1) +#define FE_K2 FE_MASK(2) +#define FE_K3 FE_MASK(3) +#define FE_K4 FE_MASK(4) +#define FE_K5 FE_MASK(5) +#define FE_K6 FE_MASK(6) +#define FE_K7 FE_MASK(7) typedef struct FeRegCR { unsigned char idx; } FeRegCR; #define FE_CR(idx) (FE_STRUCT(FeRegCR) { idx }) typedef struct FeRegDR { unsigned char idx; } FeRegDR; diff --git a/fadec.h b/fadec.h index 4274920..afaaa80 100644 --- a/fadec.h +++ b/fadec.h @@ -49,6 +49,7 @@ typedef enum { FD_OT_IMM = 2, FD_OT_MEM = 3, FD_OT_OFF = 4, + FD_OT_MEMBCST = 5, } FdOpType; typedef enum { @@ -78,6 +79,22 @@ typedef enum { FD_RT_MEM = 15, } FdRegType; +/** Do not depend on the actual enum values. **/ +typedef enum { + /** Round to nearest (even) **/ + FD_RC_RN = 1, + /** Round down **/ + FD_RC_RD = 3, + /** Round up **/ + FD_RC_RU = 5, + /** Round to zero (truncate) **/ + FD_RC_RZ = 7, + /** Rounding mode as specified in MXCSR **/ + FD_RC_MXCSR = 0, + /** Rounding mode irrelevant, but SAE **/ + FD_RC_SAE = 6, +} FdRoundControl; + /** Internal use only. **/ typedef struct { uint8_t type; @@ -94,7 +111,7 @@ typedef struct { uint8_t addrsz; uint8_t operandsz; uint8_t size; - uint8_t _pad0; + uint8_t evex; FdOp operands[4]; @@ -230,25 +247,35 @@ const char* fdi_name(FdInstrType ty); * if the memory operand has no base register. This is the only case where the * 64-bit register RIP can be returned, in which case the operand also has no * scaled index register. - * Only valid if FD_OP_TYPE == FD_OT_MEM **/ + * Only valid if FD_OP_TYPE == FD_OT_MEM/MEMBCST **/ #define FD_OP_BASE(instr,idx) ((FdReg) (instr)->operands[idx].reg) /** Gets the index of the index register from a memory operand, or FD_REG_NONE, * if the memory operand has no scaled index register. - * Only valid if FD_OP_TYPE == FD_OT_MEM **/ + * Only valid if FD_OP_TYPE == FD_OT_MEM/MEMBCST **/ #define FD_OP_INDEX(instr,idx) ((FdReg) (instr)->operands[idx].misc & 0x3f) /** Gets the scale of the index register from a memory operand when existent. * This does /not/ return the scale in an absolute value but returns the amount * of bits the index register is shifted to the left (i.e. the value in in the * range 0-3). The actual scale can be computed easily using 1<operands[idx].misc >> 6) /** Gets the sign-extended displacement of a memory operand. - * Only valid if FD_OP_TYPE == FD_OT_MEM **/ + * Only valid if FD_OP_TYPE == FD_OT_MEM/MEMBCST **/ #define FD_OP_DISP(instr,idx) ((int64_t) (instr)->disp) +/** Get whether the memory broadcast is 64-bit (otherwise: 32-bit). + * Only valid if FD_OP_TYPE == FD_OT_MEMBCST **/ +#define FD_OP_BCST64(instr,idx) (!!((instr)->evex & 0x08)) /** Gets the (sign-extended) encoded constant for an immediate operand. * Only valid if FD_OP_TYPE == FD_OT_IMM or FD_OP_TYPE == FD_OT_OFF **/ #define FD_OP_IMM(instr,idx) ((instr)->imm) +/** Get the opmask register for EVEX-encoded instructions; 0 for no mask. **/ +#define FD_MASKREG(instr) ((instr)->evex & 0x07) +/** Get whether zero masking shall be used. Only valid if FD_MASKREG != 0. **/ +#define FD_MASKZERO(instr) ((instr)->evex & 0x80) +/** Get rounding mode for EVEX-encoded instructions. See FdRoundControl. **/ +#define FD_ROUNDCONTROL(instr) ((FdRoundControl) (((instr)->evex & 0x70) >> 4)) + #ifdef __cplusplus } #endif diff --git a/format.c b/format.c index 29dc92f..38f61a0 100644 --- a/format.c +++ b/format.c @@ -248,6 +248,20 @@ fd_mnemonic(char buf[DECLARE_RESTRICTED_ARRAY_SIZE(48)], const FdInstr* instr) { if (FD_OPSIZELG(instr) == 3) sizesuffix[0] = '6', sizesuffix[1] = '4', sizesuffixlen = 2; break; + case FDI_EVX_MOV_G2X: + case FDI_EVX_MOV_X2G: + case FDI_EVX_PEXTR: + sizesuffix[0] = "bwdq"[FD_OP_SIZELG(instr, 0)]; + sizesuffixlen = 1; + break; + case FDI_EVX_PBROADCAST: + sizesuffix[0] = "bwdq"[FD_OP_SIZELG(instr, 1)]; + sizesuffixlen = 1; + break; + case FDI_EVX_PINSR: + sizesuffix[0] = "bwdq"[FD_OP_SIZELG(instr, 2)]; + sizesuffixlen = 1; + break; case FDI_RET: case FDI_ENTER: case FDI_LEAVE: @@ -329,7 +343,7 @@ fd_format_impl(char buf[DECLARE_RESTRICTED_ARRAY_SIZE(128)], const FdInstr* inst unsigned type = FD_OP_REG_TYPE(instr, i); unsigned idx = FD_OP_REG(instr, i); buf = fd_strpcatreg(buf, type, idx, size); - } else if (op_type == FD_OT_MEM) { + } else if (op_type == FD_OT_MEM || op_type == FD_OT_MEMBCST) { unsigned idx_rt = FD_RT_GPL; unsigned idx_sz = FD_ADDRSIZELG(instr); switch (FD_TYPE(instr)) { @@ -352,24 +366,52 @@ fd_format_impl(char buf[DECLARE_RESTRICTED_ARRAY_SIZE(128)], const FdInstr* inst break; case FDI_VPGATHERQD: case FDI_VGATHERQPS: + case FDI_EVX_PGATHERQD: + case FDI_EVX_GATHERQPS: idx_rt = FD_RT_VEC; idx_sz = FD_OP_SIZELG(instr, 0) + 1; break; + case FDI_EVX_PSCATTERQD: + case FDI_EVX_SCATTERQPS: + idx_rt = FD_RT_VEC; + idx_sz = FD_OP_SIZELG(instr, 1) + 1; + break; case FDI_VPGATHERDQ: case FDI_VGATHERDPD: + case FDI_EVX_PGATHERDQ: + case FDI_EVX_GATHERDPD: idx_rt = FD_RT_VEC; idx_sz = FD_OP_SIZELG(instr, 0) - 1; break; + case FDI_EVX_PSCATTERDQ: + case FDI_EVX_SCATTERDPD: + idx_rt = FD_RT_VEC; + idx_sz = FD_OP_SIZELG(instr, 1) - 1; + break; case FDI_VPGATHERDD: case FDI_VPGATHERQQ: case FDI_VGATHERDPS: case FDI_VGATHERQPD: + case FDI_EVX_PGATHERDD: + case FDI_EVX_PGATHERQQ: + case FDI_EVX_GATHERDPS: + case FDI_EVX_GATHERQPD: idx_rt = FD_RT_VEC; idx_sz = FD_OP_SIZELG(instr, 0); break; + case FDI_EVX_PSCATTERDD: + case FDI_EVX_PSCATTERQQ: + case FDI_EVX_SCATTERDPS: + case FDI_EVX_SCATTERQPD: + idx_rt = FD_RT_VEC; + idx_sz = FD_OP_SIZELG(instr, 1); + break; default: break; } + if (op_type == FD_OT_MEMBCST) + size = FD_OP_BCST64(instr, i) ? 3 : 2; + const char* ptrsizes = "\00 " "\11byte ptr " @@ -417,6 +459,14 @@ fd_format_impl(char buf[DECLARE_RESTRICTED_ARRAY_SIZE(128)], const FdInstr* inst if (disp || (!has_base && !has_idx)) buf = fd_strpcatnum(buf, disp); *buf++ = ']'; + + if (UNLIKELY(op_type == FD_OT_MEMBCST)) { + // {1toX}, X = FD_OP_SIZE(instr, i) / size (=> 2/4/8/16) + unsigned bcstszidx = FD_OP_SIZE(instr, i) >> (FD_OP_BCST64(instr, i) + 1); + const char* bcstsizes = "\6{1to2} \6{1to4} \6{1to8} \0 \7{1to16} "; + const char* bcstsize = bcstsizes + bcstszidx; + buf = fd_strpcat(buf, (struct FdStr) { bcstsize+1, *bcstsize }); + } } else if (op_type == FD_OT_IMM || op_type == FD_OT_OFF) { size_t immediate = FD_OP_IMM(instr, i); // Some instructions have actually two immediate operands which are @@ -454,6 +504,24 @@ fd_format_impl(char buf[DECLARE_RESTRICTED_ARRAY_SIZE(128)], const FdInstr* inst immediate &= 0xffffffff; buf = fd_strpcatnum(buf, immediate); } + + if (i == 0 && FD_MASKREG(instr)) { + *buf++ = '{'; + buf = fd_strpcatreg(buf, FD_RT_MASK, FD_MASKREG(instr), 0); + *buf++ = '}'; + if (FD_MASKZERO(instr)) + buf = fd_strpcat(buf, fd_stre("{z}")); + } + } + if (UNLIKELY(FD_ROUNDCONTROL(instr) != FD_RC_MXCSR)) { + switch (FD_ROUNDCONTROL(instr)) { + case FD_RC_RN: buf = fd_strpcat(buf, fd_stre(", {rn-sae}")); break; + case FD_RC_RD: buf = fd_strpcat(buf, fd_stre(", {rd-sae}")); break; + case FD_RC_RU: buf = fd_strpcat(buf, fd_stre(", {ru-sae}")); break; + case FD_RC_RZ: buf = fd_strpcat(buf, fd_stre(", {rz-sae}")); break; + case FD_RC_SAE: buf = fd_strpcat(buf, fd_stre(", {sae}")); break; + default: break; // should not happen + } } *buf++ = '\0'; return buf; diff --git a/instrs.txt b/instrs.txt index 5f37e0f..ec4e8c2 100644 --- a/instrs.txt +++ b/instrs.txt @@ -209,8 +209,8 @@ c1/7 MI Ev Ib - - SAR EFL=m--mmumm # RET immediate size handled in code c2 I Iw - - - RET+w F64 c3 NP - - - - RET+w F64 -c4/m RM Gz Mp - - LES I64 -c5/m RM Gz Mp - - LDS I64 +c4/m RM Gv Mp - - LES I64 +c5/m RM Gv Mp - - LDS I64 c6/0 MI Eb Ib - - MOV SZ8 c6f8 I Ib - - - XABORT F=HLERTM c7/0 MI Ev Iz - - MOV @@ -367,6 +367,8 @@ NP.0f01d7 NP - - - - ENCLU F=SGX 0f18/1m M Mb - - - PREFETCHT0 F=SSE 0f18/2m M Mb - - - PREFETCHT1 F=SSE 0f18/3m M Mb - - - PREFETCHT2 F=SSE +0f18/6m M Mb - - - PREFETCHIT1 O64 F=PREFETCHI +0f18/7m M Mb - - - PREFETCHIT0 O64 F=PREFETCHI # Reserved NOPs are weak, they can be overridden by other instructions. *0f18 MR Ev Gv - - RESERVED_NOP *0f19 MR Ev Gv - - RESERVED_NOP @@ -880,14 +882,10 @@ NP.0f38f9/m MR My Gy - - MOVDIRI F=MOVDIRI 66.0f3adf RMI Vdq Wdq Ib - AESKEYGENASSIST F=AESNI VEX.66.L0.0f38db RM Vdq Wdq - - VAESIMC F=AESNI,AVX # 256-bit encodings require VAES. -VEX.66.L0.0f38dc RVM Vx Hx Wx - VAESENC F=AESNI,AVX -VEX.66.L1.0f38dc RVM Vx Hx Wx - VAESENC F=AESNI,VAES,AVX -VEX.66.L0.0f38dd RVM Vx Hx Wx - VAESENCLAST F=AESNI,AVX -VEX.66.L1.0f38dd RVM Vx Hx Wx - VAESENCLAST F=AESNI,VAES,AVX -VEX.66.L0.0f38de RVM Vx Hx Wx - VAESDEC F=AESNI,AVX -VEX.66.L1.0f38de RVM Vx Hx Wx - VAESDEC F=AESNI,VAES,AVX -VEX.66.L0.0f38df RVM Vx Hx Wx - VAESDECLAST F=AESNI,AVX -VEX.66.L1.0f38df RVM Vx Hx Wx - VAESDECLAST F=AESNI,VAES,AVX +VEX.66.0f38dc RVM Vx Hx Wx - VAESENC F=AESNI,AVX +VEX.66.0f38dd RVM Vx Hx Wx - VAESENCLAST F=AESNI,AVX +VEX.66.0f38de RVM Vx Hx Wx - VAESDEC F=AESNI,AVX +VEX.66.0f38df RVM Vx Hx Wx - VAESDECLAST F=AESNI,AVX VEX.66.L0.0f3adf RMI Vdq Wdq Ib - VAESKEYGENASSIST F=AESNI,AVX # # AVX @@ -1265,7 +1263,7 @@ VEX.66.W1.L0.0f3a22 RVMI Vdq Hdq Ey Ib VPINSRQ O64 F=AVX ENC_NOSZ VEX.66.W0.L1.0f3a38 RVMI Vx Hx Wdq Ib VINSERTI128 F=AVX2 ENC_NOSZ VEX.66.W0.L1.0f3a39 MRI Wdq Vx Ib - VEXTRACTI128 F=AVX2 ENC_NOSZ VEX.66.0f3a40 RVMI Vx Hx Wx Ib VDPPS F=AVX -VEX.66.0f3a41 RVMI Vx Hx Wx Ib VDPPD F=AVX +VEX.66.L0.0f3a41 RVMI Vx Hx Wx Ib VDPPD F=AVX VEX.66.0f3a42 RVMI Vx Hx Wx Ib VMPSADBW F=AVX VEX.66.0f3a44 RVMI Vx Hx Wx Ib VPCLMULQDQ F=PCLMULQDQ,AVX VEX.66.W0.L1.0f3a46 RVMI Vx Hx Wx Ib VPERM2I128 F=AVX2 @@ -1533,6 +1531,12 @@ F3.0fae/4 M Ey - - - PTWRITE F=PTWRITE 66.0f38cf RM Vx Wx - - GF2P8MULB F=GFNI 66.0f3ace RMI Vx Wx Ib - GF2P8AFFINEQB F=GFNI 66.0f3acf RMI Vx Wx Ib - GF2P8AFFINEINVQB F=GFNI +VEX.66.W0.0f38cf RVM Vx Hx Wx - VGF2P8MULB F=AVX,GFNI +VEX.66.W1.0f3ace RVMI Vx Hx Wx Ib VGF2P8AFFINEQB F=AVX,GFNI +VEX.66.W1.0f3acf RVMI Vx Hx Wx Ib VGF2P8AFFINEINVQB F=AVX,GFNI +EVEX.66.W0.0f38cf RVM Vx Hx Wx - EVX_GF2P8MULB+k F=AVX,GFNI TUPLE_FULL_MEM +EVEX.66.W1.0f3ace RVMI Vx Hx Wx Ib EVX_GF2P8AFFINEQB+kb F=AVX512F,GFNI TUPLE_FULL_64 +EVEX.66.W1.0f3acf RVMI Vx Hx Wx Ib EVX_GF2P8AFFINEINVQB+kb F=AVX512F,GFNI TUPLE_FULL_64 # ENQCMD # TODO: Gy operands are address-sized @@ -1611,7 +1615,7 @@ VEX.66.W1.0f38b4 RVM Vx Hx Wx - VPMADD52LUQ F=AVX-IFMA VEX.66.W1.0f38b5 RVM Vx Hx Wx - VPMADD52HUQ F=AVX-IFMA # HRESET -#F3.0f3af0c0 IA Ib Rd - - HRESET F=HRESET +F3.0f3af0c0 I Ib - - - HRESET F=HRESET # SERIALIZE NP.0f01e8 NP - - - - SERIALIZE F=SERIALIZE @@ -1671,3 +1675,663 @@ F3.0f38fb/r RM Gd Rd - - ENCODEKEY256 F=AESKLE F2.0f00/6 M Ew - - - LKGS F=FRED F3.0f01ca NP - - - - ERETU F=FRED F2.0f01ca NP - - - - ERETS F=FRED + +# AVX512 +EVEX.NP.W0.0f58 RVM Vps Hps Wps - EVX_ADDPS+kbr F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f58 RVM Vpd Hpd Wpd - EVX_ADDPD+kbr F=AVX512F TUPLE_FULL_64 +EVEX.F3.W0.LIG.0f58 RVM Vdq Hdq Wss - EVX_ADDSS+kr F=AVX512F TUPLE1_SCALAR_32 +EVEX.F2.W1.LIG.0f58 RVM Vdq Hdq Wsd - EVX_ADDSD+kr F=AVX512F TUPLE1_SCALAR_64 +EVEX.66.WIG.0f38dc RVM Vx Hx Wx - EVX_AESENC F=AVX512F,VAES TUPLE_FULL_MEM +EVEX.66.WIG.0f38dd RVM Vx Hx Wx - EVX_AESENCLAST F=AVX512F,VAES TUPLE_FULL_MEM +EVEX.66.WIG.0f38de RVM Vx Hx Wx - EVX_AESDEC F=AVX512F,VAES TUPLE_FULL_MEM +EVEX.66.WIG.0f38df RVM Vx Hx Wx - EVX_AESDECLAST F=AVX512F,VAES TUPLE_FULL_MEM +EVEX.NP.W0.0f54 RVM Vps Hps Wps - EVX_ANDPS+kb F=AVX512DQ TUPLE_FULL_32 +EVEX.66.W1.0f54 RVM Vpd Hpd Wpd - EVX_ANDPD+kb F=AVX512DQ TUPLE_FULL_64 +EVEX.NP.W0.0f55 RVM Vps Hps Wps - EVX_ANDNPS+kb F=AVX512DQ TUPLE_FULL_32 +EVEX.66.W1.0f55 RVM Vpd Hpd Wpd - EVX_ANDNPD+kb F=AVX512DQ TUPLE_FULL_64 +EVEX.NP.W0.0fc2 RVMI Kb Hps Wps Ib EVX_CMPPS+kbe F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0fc2 RVMI Kb Hpd Wpd Ib EVX_CMPPD+kbe F=AVX512F TUPLE_FULL_64 +EVEX.F3.W0.LIG.0fc2 RVMI Kb Hss Wss Ib EVX_CMPSS+ke F=AVX512F TUPLE1_SCALAR_32 +EVEX.F2.W1.LIG.0fc2 RVMI Kb Hsd Wsd Ib EVX_CMPSD+ke F=AVX512F TUPLE1_SCALAR_64 +EVEX.NP.W0.LIG.0f2f RM Vss Wss - - EVX_COMISS+e F=AVX512F TUPLE1_SCALAR_32 EFL=0--0m0mm +EVEX.66.W1.LIG.0f2f RM Vsd Wsd - - EVX_COMISD+e F=AVX512F TUPLE1_SCALAR_64 EFL=0--0m0mm +# Note: SAE is ignored +EVEX.F3.W0.0fe6 RM Vpd Wh - - EVX_CVTDQ2PD+kbe F=AVX512F TUPLE_HALF_32 +EVEX.F2.W1.0fe6 RM Vh Wpd - - EVX_CVTPD2DQ+kbr F=AVX512F TUPLE_FULL_64 +EVEX.NP.W0.0f5b RM Vps Wps - - EVX_CVTDQ2PS+kbr F=AVX512F TUPLE_FULL_32 +EVEX.66.W0.0f5b RM Vps Wps - - EVX_CVTPS2DQ+kbr F=AVX512F TUPLE_FULL_32 +EVEX.NP.W0.0f5a RM Vpd Wh - - EVX_CVTPS2PD+kbe F=AVX512F TUPLE_HALF_32 +EVEX.66.W1.0f5a RM Vh Wpd - - EVX_CVTPD2PS+kbr F=AVX512F TUPLE_FULL_64 +EVEX.F3.LIG.0f2d RM Gy Wss - - EVX_CVTSS2SI+r F=AVX512F TUPLE1_FIXED_32 +EVEX.F2.LIG.0f2d RM Gy Wsd - - EVX_CVTSD2SI+r F=AVX512F TUPLE1_FIXED_64 +EVEX.F3.W0.LIG.0f5a RVM Vdq Hdq Wss - EVX_CVTSS2SD+ke F=AVX512F TUPLE1_SCALAR_32 +EVEX.F2.W1.LIG.0f5a RVM Vdq Hdq Wsd - EVX_CVTSD2SS+kr F=AVX512F TUPLE1_SCALAR_64 +EVEX.F3.LIG.0f2a RVM Vdq Hdq Ey - EVX_CVTSI2SS+r F=AVX512F TUPLE1_SCALAR_OPSZ +# Note: for W0, ER is ignored (i.e., will not UD, according to Intel SDM) +EVEX.F2.LIG.0f2a RVM Vdq Hdq Ey - EVX_CVTSI2SD+r F=AVX512F TUPLE1_SCALAR_OPSZ +EVEX.66.W1.0fe6 RM Vps Wpd - - EVX_CVTTPD2DQ+kbe F=AVX512F TUPLE_FULL_64 +EVEX.F3.W0.0f5b RM Vps Wps - - EVX_CVTTPS2DQ+kbe F=AVX512F TUPLE_FULL_32 +EVEX.F2.LIG.0f2c RM Gy Wsd - - EVX_CVTTSD2SI+e F=AVX512F TUPLE1_FIXED_64 +EVEX.F3.LIG.0f2c RM Gy Wss - - EVX_CVTTSS2SI+e F=AVX512F TUPLE1_FIXED_32 +EVEX.NP.W0.0f5e RVM Vps Hps Wps - EVX_DIVPS+kbr F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f5e RVM Vpd Hpd Wpd - EVX_DIVPD+kbr F=AVX512F TUPLE_FULL_64 +EVEX.F3.W0.LIG.0f5e RVM Vdq Hdq Wss - EVX_DIVSS+kr F=AVX512F TUPLE1_SCALAR_32 +EVEX.F2.W1.LIG.0f5e RVM Vdq Hdq Wsd - EVX_DIVSD+kr F=AVX512F TUPLE1_SCALAR_64 +# Note: tuple size is actually fixed at 32 bits, regardless of EVEX.W +EVEX.66.WIG.L0.0f3a17 MRI Ess Vps Ib - EVX_EXTRACTPS F=AVX512F TUPLE1_FIXED_32 +EVEX.66.W0.L0.0f3a21 RVMI Vps Hps Wss Ib EVX_INSERTPS F=AVX512F TUPLE1_SCALAR_32 +EVEX.NP.W0.0f5f RVM Vps Hps Wps - EVX_MAXPS+kbe F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f5f RVM Vpd Hpd Wpd - EVX_MAXPD+kbe F=AVX512F TUPLE_FULL_64 +EVEX.F3.W0.LIG.0f5f RVM Vdq Hdq Wss - EVX_MAXSS+ke F=AVX512F TUPLE1_SCALAR_32 +EVEX.F2.W1.LIG.0f5f RVM Vdq Hdq Wsd - EVX_MAXSD+ke F=AVX512F TUPLE1_SCALAR_64 +EVEX.NP.W0.0f5d RVM Vps Hps Wps - EVX_MINPS+kbe F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f5d RVM Vpd Hpd Wpd - EVX_MINPD+kbe F=AVX512F TUPLE_FULL_64 +EVEX.F3.W0.LIG.0f5d RVM Vdq Hdq Wss - EVX_MINSS+ke F=AVX512F TUPLE1_SCALAR_32 +EVEX.F2.W1.LIG.0f5d RVM Vdq Hdq Wsd - EVX_MINSD+ke F=AVX512F TUPLE1_SCALAR_64 +EVEX.NP.W0.0f28 RM Vps Wps - - EVX_MOVAPS+k F=AVX512F TUPLE_FULL_MEM +EVEX.66.W1.0f28 RM Vpd Wpd - - EVX_MOVAPD+k F=AVX512F TUPLE_FULL_MEM +EVEX.NP.W0.0f29 MR Wps Vps - - EVX_MOVAPS+k F=AVX512F TUPLE_FULL_MEM +EVEX.66.W1.0f29 MR Wpd Vpd - - EVX_MOVAPD+k F=AVX512F TUPLE_FULL_MEM +EVEX.66.L0.0f7e MR Ey Vy - - EVX_MOV_X2G F=AVX512F TUPLE1_SCALAR_OPSZ +EVEX.66.L0.0f6e RM Vy Ey - - EVX_MOV_G2X F=AVX512F TUPLE1_SCALAR_OPSZ +EVEX.F2.W1.L0.0f12 RM Vpd Wq - - EVX_MOVDDUP+k F=AVX512F TUPLE1_SCALAR_64 +EVEX.F2.W1.L12.0f12 RM Vpd Wpd - - EVX_MOVDDUP+k F=AVX512F TUPLE_MOVDDUP +EVEX.66.W0.0f6f RM Vx Wx - - EVX_MOVDQA32+k F=AVX512F TUPLE_FULL_MEM +EVEX.66.W1.0f6f RM Vx Wx - - EVX_MOVDQA64+k F=AVX512F TUPLE_FULL_MEM +EVEX.66.W0.0f7f MR Wx Vx - - EVX_MOVDQA32+k F=AVX512F TUPLE_FULL_MEM +EVEX.66.W1.0f7f MR Wx Vx - - EVX_MOVDQA64+k F=AVX512F TUPLE_FULL_MEM +EVEX.F3.W0.0f6f RM Vx Wx - - EVX_MOVDQU32+k F=AVX512F TUPLE_FULL_MEM +EVEX.F3.W1.0f6f RM Vx Wx - - EVX_MOVDQU64+k F=AVX512F TUPLE_FULL_MEM +EVEX.F3.W0.0f7f MR Wx Vx - - EVX_MOVDQU32+k F=AVX512F TUPLE_FULL_MEM +EVEX.F3.W1.0f7f MR Wx Vx - - EVX_MOVDQU64+k F=AVX512F TUPLE_FULL_MEM +EVEX.F2.W0.0f6f RM Vx Wx - - EVX_MOVDQU8+k F=AVX512BW TUPLE_FULL_MEM +EVEX.F2.W1.0f6f RM Vx Wx - - EVX_MOVDQU16+k F=AVX512BW TUPLE_FULL_MEM +EVEX.F2.W0.0f7f MR Wx Vx - - EVX_MOVDQU8+k F=AVX512BW TUPLE_FULL_MEM +EVEX.F2.W1.0f7f MR Wx Vx - - EVX_MOVDQU16+k F=AVX512BW TUPLE_FULL_MEM +EVEX.NP.W0.L0.0f12/m RVM Vps Hps Mq - EVX_MOVLPS F=AVX512F TUPLE2_32 +EVEX.NP.W0.L0.0f12/r RVM Vps Hps Ups - EVX_MOVHLPS F=AVX512F +EVEX.66.W1.L0.0f12/m RVM Vpd Hpd Msd - EVX_MOVLPD F=AVX512F TUPLE1_SCALAR_64 +EVEX.NP.W0.L0.0f13/m MR Mq Vq - - EVX_MOVLPS F=AVX512F TUPLE2_32 +EVEX.66.W1.L0.0f13/m MR Msd Vsd - - EVX_MOVLPD F=AVX512F TUPLE1_SCALAR_64 +EVEX.NP.W0.L0.0f16/m RVM Vps Hq Mq - EVX_MOVHPS F=AVX512F TUPLE2_32 +EVEX.NP.W0.L0.0f16/r RVM Vps Hq Uq - EVX_MOVLHPS F=AVX512F +EVEX.66.W1.L0.0f16/m RVM Vpd Hsd Msd - EVX_MOVHPD F=AVX512F TUPLE1_SCALAR_64 +EVEX.NP.W0.L0.0f17/m MR Mq Vq - - EVX_MOVHPS F=AVX512F TUPLE2_32 +EVEX.66.W1.L0.0f17/m MR Msd Vpd - - EVX_MOVHPD F=AVX512F TUPLE1_SCALAR_64 +EVEX.66.W0.0f382a/m RM Vx Mx - - EVX_MOVNTDQA F=AVX512F TUPLE_FULL_MEM +EVEX.66.W0.0fe7/m MR Mx Vx - - EVX_MOVNTDQ F=AVX512F TUPLE_FULL_MEM +EVEX.NP.W0.0f2b/m MR Mps Vps - - EVX_MOVNTPS F=AVX512F TUPLE_FULL_MEM +EVEX.66.W1.0f2b/m MR Mpd Vpd - - EVX_MOVNTPD F=AVX512F TUPLE_FULL_MEM +EVEX.F3.W1.L0.0f7e RM Vq Wq - - EVX_MOVQ F=AVX512F TUPLE1_SCALAR_64 +EVEX.66.W1.L0.0fd6 MR Wq Vq - - EVX_MOVQ F=AVX512F TUPLE1_SCALAR_64 +EVEX.F3.W0.LIG.0f10/m RM Vdq Mss - - EVX_MOVSS+k F=AVX512F TUPLE1_SCALAR_32 +EVEX.F3.W0.LIG.0f10/r RVM Vdq Hdq Uss - EVX_MOVSS+k F=AVX512F +EVEX.F2.W1.LIG.0f10/m RM Vdq Msd - - EVX_MOVSD+k F=AVX512F TUPLE1_SCALAR_64 +EVEX.F2.W1.LIG.0f10/r RVM Vdq Hdq Usd - EVX_MOVSD+k F=AVX512F +EVEX.F3.W0.LIG.0f11/m MR Mss Vss - - EVX_MOVSS+k F=AVX512F TUPLE1_SCALAR_32 +EVEX.F3.W0.LIG.0f11/r MVR Udq Hdq Vss - EVX_MOVSS+k F=AVX512F +EVEX.F2.W1.LIG.0f11/m MR Msd Vsd - - EVX_MOVSD+k F=AVX512F TUPLE1_SCALAR_64 +EVEX.F2.W1.LIG.0f11/r MVR Udq Hdq Vsd - EVX_MOVSD+k F=AVX512F +EVEX.F3.W0.0f12 RM Vps Wps - - EVX_MOVSLDUP+k F=AVX512F TUPLE_FULL_MEM +EVEX.F3.W0.0f16 RM Vps Wps - - EVX_MOVSHDUP+k F=AVX512F TUPLE_FULL_MEM +EVEX.NP.W0.0f10 RM Vps Wps - - EVX_MOVUPS+k F=AVX512F TUPLE_FULL_MEM +EVEX.66.W1.0f10 RM Vpd Wpd - - EVX_MOVUPD+k F=AVX512F TUPLE_FULL_MEM +EVEX.NP.W0.0f11 MR Wps Vps - - EVX_MOVUPS+k F=AVX512F TUPLE_FULL_MEM +EVEX.66.W1.0f11 MR Wpd Vpd - - EVX_MOVUPD+k F=AVX512F TUPLE_FULL_MEM +EVEX.NP.W0.0f59 RVM Vps Hps Wps - EVX_MULPS+kbr F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f59 RVM Vpd Hpd Wpd - EVX_MULPD+kbr F=AVX512F TUPLE_FULL_64 +EVEX.F3.W0.LIG.0f59 RVM Vdq Hdq Wss - EVX_MULSS+kr F=AVX512F TUPLE1_SCALAR_32 +EVEX.F2.W1.LIG.0f59 RVM Vdq Hdq Wsd - EVX_MULSD+kr F=AVX512F TUPLE1_SCALAR_64 +EVEX.NP.W0.0f56 RVM Vps Hps Wps - EVX_ORPS+kb F=AVX512DQ TUPLE_FULL_32 +EVEX.66.W1.0f56 RVM Vpd Hpd Wpd - EVX_ORPD+kb F=AVX512DQ TUPLE_FULL_64 +EVEX.66.WIG.0f381c RM Vx Wx - - EVX_PABSB+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.WIG.0f381d RM Vx Wx - - EVX_PABSW+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.W0.0f381e RM Vx Wx - - EVX_PABSD+kb F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f381f RM Vx Wx - - EVX_PABSQ+kb F=AVX512F TUPLE_FULL_64 +EVEX.66.WIG.0f63 RVM Vx Hx Wx - EVX_PACKSSWB+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.WIG.0f67 RVM Vx Hx Wx - EVX_PACKUSWB+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.W0.0f6b RVM Vx Hx Wx - EVX_PACKSSDW+kb F=AVX512BW TUPLE_FULL_32 +EVEX.66.W0.0f382b RVM Vx Hx Wx - EVX_PACKUSDW+kb F=AVX512BW TUPLE_FULL_32 +EVEX.66.WIG.0ffc RVM Vx Hx Wx - EVX_PADDB+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.WIG.0ffd RVM Vx Hx Wx - EVX_PADDW+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.W0.0ffe RVM Vx Hx Wx - EVX_PADDD+kb F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0fd4 RVM Vx Hx Wx - EVX_PADDQ+kb F=AVX512F TUPLE_FULL_64 +EVEX.66.WIG.0fec RVM Vx Hx Wx - EVX_PADDSB+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.WIG.0fed RVM Vx Hx Wx - EVX_PADDSW+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.WIG.0fdc RVM Vx Hx Wx - EVX_PADDUSB+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.WIG.0fdd RVM Vx Hx Wx - EVX_PADDUSW+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.WIG.0f3a0f RVMI Vx Hx Wx Ib EVX_PALIGNR+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.W0.0fdb RVM Vx Hx Wx - EVX_PANDD+kb F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0fdb RVM Vx Hx Wx - EVX_PANDQ+kb F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.0fdf RVM Vx Hx Wx - EVX_PANDND+kb F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0fdf RVM Vx Hx Wx - EVX_PANDNQ+kb F=AVX512F TUPLE_FULL_64 +EVEX.66.WIG.0fe0 RVM Vx Hx Wx - EVX_PAVGB+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.WIG.0fe3 RVM Vx Hx Wx - EVX_PAVGW+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.WIG.0f3a44 RVMI Vx Hx Wx Ib EVX_PCLMULQDQ F=AVX512F,VPCLMULQDQ TUPLE_FULL_MEM +EVEX.66.WIG.0f74 RVM K Hx Wx - EVX_PCMPEQB+k F=AVX512F TUPLE_FULL_MEM +EVEX.66.WIG.0f75 RVM K Hx Wx - EVX_PCMPEQW+k F=AVX512F TUPLE_FULL_MEM +EVEX.66.W0.0f76 RVM K Hx Wx - EVX_PCMPEQD+kb F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f3829 RVM K Hx Wx - EVX_PCMPEQQ+kb F=AVX512F TUPLE_FULL_64 +EVEX.66.WIG.0f64 RVM K Hx Wx - EVX_PCMPGTB+k F=AVX512F TUPLE_FULL_MEM +EVEX.66.WIG.0f65 RVM K Hx Wx - EVX_PCMPGTW+k F=AVX512F TUPLE_FULL_MEM +EVEX.66.W0.0f66 RVM K Hx Wx - EVX_PCMPGTD+kb F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f3837 RVM K Hx Wx - EVX_PCMPGTQ+kb F=AVX512F TUPLE_FULL_64 +EVEX.66.WIG.L0.0f3a14/m MRI Mb Vx Ib - EVX_PEXTRB F=AVX512BW TUPLE1_SCALAR_8 +EVEX.66.WIG.L0.0f3a14/r MRI Rd Vx Ib - EVX_PEXTRB F=AVX512BW +EVEX.66.WIG.L0.0fc5/r RMI Gd Ux Ib - EVX_PEXTRW F=AVX512BW +EVEX.66.WIG.L0.0f3a15/m MRI Mw Vx Ib - EVX_PEXTRW F=AVX512BW TUPLE1_SCALAR_16 +EVEX.66.WIG.L0.0f3a15/r MRI Rd Vx Ib - EVX_PEXTRW F=AVX512BW +EVEX.66.L0.0f3a16 MRI Ey Vdq Ib - EVX_PEXTR F=AVX512DQ TUPLE1_SCALAR_OPSZ +EVEX.66.WIG.L0.0f3a20 RVMI Vx Hx Eb Ib EVX_PINSR F=AVX512BW TUPLE1_SCALAR_8 +EVEX.66.WIG.L0.0fc4 RVMI Vx Hx Ew Ib EVX_PINSR F=AVX512BW TUPLE1_SCALAR_16 +EVEX.66.L0.0f3a22 RVMI Vdq Hdq Ey Ib EVX_PINSR F=AVX512DQ TUPLE1_SCALAR_OPSZ +EVEX.66.WIG.0f3804 RVM Vx Hx Wx - EVX_PMADDUBSW+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.WIG.0ff5 RVM Vx Hx Wx - EVX_PMADDWD+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.WIG.0fda RVM Vx Hx Wx - EVX_PMINUB+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.WIG.0fde RVM Vx Hx Wx - EVX_PMAXUB+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.WIG.0fea RVM Vx Hx Wx - EVX_PMINSW+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.WIG.0fee RVM Vx Hx Wx - EVX_PMAXSW+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.WIG.0f3838 RVM Vx Hx Wx - EVX_PMINSB+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.W0.0f3839 RVM Vx Hx Wx - EVX_PMINSD+kb F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f3839 RVM Vx Hx Wx - EVX_PMINSQ+kb F=AVX512F TUPLE_FULL_64 +EVEX.66.WIG.0f383a RVM Vx Hx Wx - EVX_PMINUW+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.W0.0f383b RVM Vx Hx Wx - EVX_PMINUD+kb F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f383b RVM Vx Hx Wx - EVX_PMINUQ+kb F=AVX512F TUPLE_FULL_64 +EVEX.66.WIG.0f383c RVM Vx Hx Wx - EVX_PMAXSB+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.W0.0f383d RVM Vx Hx Wx - EVX_PMAXSD+kb F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f383d RVM Vx Hx Wx - EVX_PMAXSQ+kb F=AVX512F TUPLE_FULL_64 +EVEX.66.WIG.0f383e RVM Vx Hx Wx - EVX_PMAXUW+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.W0.0f383f RVM Vx Hx Wx - EVX_PMAXUD+kb F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f383f RVM Vx Hx Wx - EVX_PMAXUQ+kb F=AVX512F TUPLE_FULL_64 +EVEX.66.WIG.0f3820 RM Vx Wh - - EVX_PMOVSXBW+k F=AVX512F TUPLE_HALF_MEM +EVEX.66.WIG.0f3821 RM Vx Wf - - EVX_PMOVSXBD+k F=AVX512F TUPLE_QUARTER_MEM +EVEX.66.WIG.0f3822 RM Vx We - - EVX_PMOVSXBQ+k F=AVX512F TUPLE_EIGHTH_MEM +EVEX.66.WIG.0f3823 RM Vx Wh - - EVX_PMOVSXWD+k F=AVX512F TUPLE_HALF_MEM +EVEX.66.WIG.0f3824 RM Vx Wf - - EVX_PMOVSXWQ+k F=AVX512F TUPLE_QUARTER_MEM +EVEX.66.W0.0f3825 RM Vx Wh - - EVX_PMOVSXDQ+k F=AVX512F TUPLE_HALF_MEM +EVEX.66.WIG.0f3830 RM Vx Wh - - EVX_PMOVZXBW+k F=AVX512F TUPLE_HALF_MEM +EVEX.66.WIG.0f3831 RM Vx Wf - - EVX_PMOVZXBD+k F=AVX512F TUPLE_QUARTER_MEM +EVEX.66.WIG.0f3832 RM Vx We - - EVX_PMOVZXBQ+k F=AVX512F TUPLE_EIGHTH_MEM +EVEX.66.WIG.0f3833 RM Vx Wh - - EVX_PMOVZXWD+k F=AVX512F TUPLE_HALF_MEM +EVEX.66.WIG.0f3834 RM Vx Wf - - EVX_PMOVZXWQ+k F=AVX512F TUPLE_QUARTER_MEM +EVEX.66.W0.0f3835 RM Vx Wh - - EVX_PMOVZXDQ+k F=AVX512F TUPLE_HALF_MEM +EVEX.66.W1.0f3828 RVM Vx Hx Wx - EVX_PMULDQ+kb F=AVX512F TUPLE_FULL_64 +EVEX.66.WIG.0f380b RVM Vx Hx Wx - EVX_PMULHRSW+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.WIG.0fe4 RVM Vx Hx Wx - EVX_PMULHUW+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.WIG.0fe5 RVM Vx Hx Wx - EVX_PMULHW+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.WIG.0fd5 RVM Vx Hx Wx - EVX_PMULLW+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.W0.0f3840 RVM Vx Hx Wx - EVX_PMULLD+kb F=AVX512DQ TUPLE_FULL_32 +EVEX.66.W1.0f3840 RVM Vx Hx Wx - EVX_PMULLQ+kb F=AVX512DQ TUPLE_FULL_64 +EVEX.66.W1.0ff4 RVM Vx Hx Wx - EVX_PMULUDQ+kb F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.0feb RVM Vx Hx Wx - EVX_PORD+kb F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0feb RVM Vx Hx Wx - EVX_PORQ+kb F=AVX512F TUPLE_FULL_64 +EVEX.66.WIG.0ff6 RVM Vx Hx Wx - EVX_PSADBW F=AVX512BW TUPLE_FULL_MEM +EVEX.66.WIG.0f3800 RVM Vx Hx Wx - EVX_PSHUFB+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.W0.0f70 RMI Vx Wx Ib - EVX_PSHUFD+kb F=AVX512F TUPLE_FULL_32 +EVEX.F3.WIG.0f70 RMI Vx Wx Ib - EVX_PSHUFHW+k F=AVX512BW TUPLE_FULL_MEM +EVEX.F2.WIG.0f70 RMI Vx Wx Ib - EVX_PSHUFLW+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.WIG.0f71/2 VMI Hx Wx Ib - EVX_PSRLW+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.WIG.0f71/4 VMI Hx Wx Ib - EVX_PSRAW+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.WIG.0f71/6 VMI Hx Wx Ib - EVX_PSLLW+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.W0.0f72/2 VMI Hx Wx Ib - EVX_PSRLD+kb F=AVX512F TUPLE_FULL_32 +EVEX.66.W0.0f72/4 VMI Hx Wx Ib - EVX_PSRAD+kb F=AVX512F TUPLE_FULL_32 +EVEX.66.W0.0f72/6 VMI Hx Wx Ib - EVX_PSLLD+kb F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f73/2 VMI Hx Wx Ib - EVX_PSRLQ+kb F=AVX512F TUPLE_FULL_64 +EVEX.66.W1.0f72/4 VMI Hx Wx Ib - EVX_PSRAQ+kb F=AVX512F TUPLE_FULL_64 +EVEX.66.W1.0f73/6 VMI Hx Wx Ib - EVX_PSLLQ+kb F=AVX512F TUPLE_FULL_64 +EVEX.66.WIG.0fd1 RVM Vx Hx Wdq - EVX_PSRLW+k F=AVX512BW TUPLE_MEM128 +EVEX.66.W0.0fd2 RVM Vx Hx Wdq - EVX_PSRLD+k F=AVX512F TUPLE_MEM128 +EVEX.66.W1.0fd3 RVM Vx Hx Wdq - EVX_PSRLQ+k F=AVX512F TUPLE_MEM128 +EVEX.66.WIG.0fe1 RVM Vx Hx Wdq - EVX_PSRAW+k F=AVX512BW TUPLE_MEM128 +EVEX.66.W0.0fe2 RVM Vx Hx Wdq - EVX_PSRAD+k F=AVX512F TUPLE_MEM128 +EVEX.66.W1.0fe2 RVM Vx Hx Wdq - EVX_PSRAQ+k F=AVX512F TUPLE_MEM128 +EVEX.66.WIG.0ff1 RVM Vx Hx Wdq - EVX_PSLLW+k F=AVX512BW TUPLE_MEM128 +EVEX.66.W0.0ff2 RVM Vx Hx Wdq - EVX_PSLLD+k F=AVX512F TUPLE_MEM128 +EVEX.66.W1.0ff3 RVM Vx Hx Wdq - EVX_PSLLQ+k F=AVX512F TUPLE_MEM128 +EVEX.66.WIG.0f73/3 VMI Hx Ux Ib - EVX_PSRLDQ F=AVX512BW TUPLE_FULL_MEM +EVEX.66.WIG.0f73/7 VMI Hx Ux Ib - EVX_PSLLDQ F=AVX512BW TUPLE_FULL_MEM +EVEX.66.WIG.0ff8 RVM Vx Hx Wx - EVX_PSUBB+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.WIG.0ff9 RVM Vx Hx Wx - EVX_PSUBW+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.W0.0ffa RVM Vx Hx Wx - EVX_PSUBD+kb F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0ffb RVM Vx Hx Wx - EVX_PSUBQ+kb F=AVX512F TUPLE_FULL_64 +EVEX.66.WIG.0fe8 RVM Vx Hx Wx - EVX_PSUBSB+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.WIG.0fe9 RVM Vx Hx Wx - EVX_PSUBSW+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.WIG.0fd8 RVM Vx Hx Wx - EVX_PSUBUSB+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.WIG.0fd9 RVM Vx Hx Wx - EVX_PSUBUSW+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.WIG.0f60 RVM Vx Hx Wx - EVX_PUNPCKLBW+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.WIG.0f61 RVM Vx Hx Wx - EVX_PUNPCKLWD+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.W0.0f62 RVM Vx Hx Wx - EVX_PUNPCKLDQ+kb F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f6c RVM Vx Hx Wx - EVX_PUNPCKLQDQ+kb F=AVX512F TUPLE_FULL_64 +EVEX.66.WIG.0f68 RVM Vx Hx Wx - EVX_PUNPCKHBW+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.WIG.0f69 RVM Vx Hx Wx - EVX_PUNPCKHWD+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.W0.0f6a RVM Vx Hx Wx - EVX_PUNPCKHDQ+kb F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f6d RVM Vx Hx Wx - EVX_PUNPCKHQDQ+kb F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.0fef RVM Vx Hx Wx - EVX_PXORD+kb F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0fef RVM Vx Hx Wx - EVX_PXORQ+kb F=AVX512F TUPLE_FULL_64 +EVEX.NP.W0.0fc6 RVMI Vx Hx Wx Ib EVX_SHUFPS+kb F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0fc6 RVMI Vx Hx Wx Ib EVX_SHUFPD+kb F=AVX512F TUPLE_FULL_64 +EVEX.NP.W0.0f51 RM Vps Wps - - EVX_SQRTPS+kbr F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f51 RM Vpd Wpd - - EVX_SQRTPD+kbr F=AVX512F TUPLE_FULL_64 +EVEX.F3.W0.LIG.0f51 RVM Vdq Hdq Wss - EVX_SQRTSS+kr F=AVX512F TUPLE1_SCALAR_32 +EVEX.F2.W1.LIG.0f51 RVM Vdq Hdq Wsd - EVX_SQRTSD+kr F=AVX512F TUPLE1_SCALAR_64 +EVEX.NP.W0.0f5c RVM Vps Hps Wps - EVX_SUBPS+kbr F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f5c RVM Vpd Hpd Wpd - EVX_SUBPD+kbr F=AVX512F TUPLE_FULL_64 +EVEX.F3.W0.LIG.0f5c RVM Vdq Hdq Wss - EVX_SUBSS+kr F=AVX512F TUPLE1_SCALAR_32 +EVEX.F2.W1.LIG.0f5c RVM Vdq Hdq Wsd - EVX_SUBSD+kr F=AVX512F TUPLE1_SCALAR_64 +EVEX.NP.W0.LIG.0f2e RM Vss Wss - - EVX_UCOMISS+e F=AVX512F TUPLE1_SCALAR_32 EFL=0--0m0mm +EVEX.66.W1.LIG.0f2e RM Vsd Wsd - - EVX_UCOMISD+e F=AVX512F TUPLE1_SCALAR_64 EFL=0--0m0mm +EVEX.NP.W0.0f14 RVM Vps Hps Wps - EVX_UNPCKLPS+kb F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f14 RVM Vpd Hpd Wpd - EVX_UNPCKLPD+kb F=AVX512F TUPLE_FULL_64 +EVEX.NP.W0.0f15 RVM Vps Hps Wps - EVX_UNPCKHPS+kb F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f15 RVM Vpd Hpd Wpd - EVX_UNPCKHPD+kb F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.0f3a03 RVMI Vx Hx Wx Ib EVX_ALIGND+kb F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f3a03 RVMI Vx Hx Wx Ib EVX_ALIGNQ+kb F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.0f3865 RVM Vx Hx Wx - EVX_BLENDMPS+kb F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f3865 RVM Vx Hx Wx - EVX_BLENDMPD+kb F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.0f3818 RM Vx Wd - - EVX_BROADCASTSS+k F=AVX512F TUPLE1_SCALAR_32 +EVEX.66.W0.L12.0f3819 RM Vx Wq - - EVX_BROADCASTF32X2+k F=AVX512DQ TUPLE2_32 +EVEX.66.W1.L12.0f3819 RM Vx Wq - - EVX_BROADCASTSD+k F=AVX512F TUPLE1_SCALAR_64 +EVEX.66.W0.L12.0f381a/m RM Vx Wdq - - EVX_BROADCASTF32X4+k F=AVX512F TUPLE4_32 +EVEX.66.W1.L12.0f381a/m RM Vx Wdq - - EVX_BROADCASTF64X2+k F=AVX512DQ TUPLE2_64 +EVEX.66.W0.L2.0f381b/m RM Vx Wqq - - EVX_BROADCASTF32X8+k F=AVX512DQ TUPLE8_32 +EVEX.66.W1.L2.0f381b/m RM Vx Wqq - - EVX_BROADCASTF64X4+k F=AVX512F TUPLE4_64 +# Note tuple type, scale is not memory size but element size +EVEX.66.W0.0f388a/m MR Md Vx - - EVX_COMPRESSPS+k F=AVX512F TUPLE1_SCALAR_32 +EVEX.66.W0.0f388a/r MR Ux Vx - - EVX_COMPRESSPS+k F=AVX512F +EVEX.66.W1.0f388a/m MR Mq Vx - - EVX_COMPRESSPD+k F=AVX512F TUPLE1_SCALAR_64 +EVEX.66.W1.0f388a/r MR Ux Vx - - EVX_COMPRESSPD+k F=AVX512F +EVEX.F2.W0.0f3872 RVM Vx Hx Wx - EVX_CVTNE2PS2BF16+kb F=AVX512_BF16 TUPLE_FULL_32 +EVEX.F3.W0.0f3872 RM Vh Wx - - EVX_CVTNEPS2BF16+kb F=AVX512_BF16 TUPLE_FULL_32 +EVEX.66.W0.0f7b RM Vx Wh - - EVX_CVTPS2QQ+kbr F=AVX512DQ TUPLE_HALF_32 +EVEX.66.W1.0f7b RM Vx Wx - - EVX_CVTPD2QQ+kbr F=AVX512DQ TUPLE_FULL_64 +EVEX.NP.W0.0f79 RM Vx Wx - - EVX_CVTPS2UDQ+kbr F=AVX512F TUPLE_FULL_32 +EVEX.NP.W1.0f79 RM Vh Wx - - EVX_CVTPD2UDQ+kbr F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.0f79 RM Vx Wh - - EVX_CVTPS2UQQ+kbr F=AVX512F TUPLE_HALF_32 +EVEX.66.W1.0f79 RM Vx Wx - - EVX_CVTPD2UQQ+kbr F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.0f3813 RM Vx Wh - - EVX_CVTPH2PS+ke F=AVX512F TUPLE_HALF_MEM +EVEX.66.W0.0f3a1d MRI Wh Vx Ib - EVX_CVTPS2PH+ke F=AVX512F TUPLE_HALF_MEM +EVEX.F3.W1.0fe6 RM Vx Wx - - EVX_CVTQQ2PD+kbr F=AVX512DQ TUPLE_FULL_64 +EVEX.NP.W1.0f5b RM Vh Wx - - EVX_CVTQQ2PS+kbr F=AVX512DQ TUPLE_FULL_64 +EVEX.F2.LIG.0f79 RM Gy Wsd - - EVX_CVTSD2USI+r F=AVX512F TUPLE1_FIXED_64 +EVEX.F3.LIG.0f79 RM Gy Wss - - EVX_CVTSS2USI+r F=AVX512F TUPLE1_FIXED_32 +# Note: for W0, ER is ignored (i.e., will not UD, according to Intel SDM) +EVEX.F2.LIG.0f7b RVM Vdq Hdq Ey - EVX_CVTUSI2SD+r F=AVX512F TUPLE1_SCALAR_OPSZ +EVEX.F3.LIG.0f7b RVM Vdq Hdq Ey - EVX_CVTUSI2SS+r F=AVX512F TUPLE1_SCALAR_OPSZ +EVEX.66.W0.0f7a RM Vx Wh - - EVX_CVTTPS2QQ+kbe F=AVX512DQ TUPLE_HALF_32 +EVEX.66.W1.0f7a RM Vx Wx - - EVX_CVTTPD2QQ+kbe F=AVX512DQ TUPLE_FULL_64 +EVEX.NP.W0.0f78 RM Vx Wx - - EVX_CVTTPS2UDQ+kbe F=AVX512F TUPLE_FULL_32 +EVEX.NP.W1.0f78 RM Vh Wx - - EVX_CVTTPD2UDQ+kbe F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.0f78 RM Vx Wh - - EVX_CVTTPS2UQQ+kbe F=AVX512F TUPLE_HALF_32 +EVEX.66.W1.0f78 RM Vx Wx - - EVX_CVTTPD2UQQ+kbe F=AVX512F TUPLE_FULL_64 +EVEX.F2.LIG.0f78 RM Gy Wsd - - EVX_CVTTSD2USI+e F=AVX512F TUPLE1_FIXED_64 +EVEX.F3.LIG.0f78 RM Gy Wss - - EVX_CVTTSS2USI+e F=AVX512F TUPLE1_FIXED_32 +# Note: SAE is ignored. +EVEX.F3.W0.0f7a RM Vx Wh - - EVX_CVTUDQ2PD+kbe F=AVX512F TUPLE_HALF_32 +EVEX.F2.W0.0f7a RM Vx Wx - - EVX_CVTUDQ2PS+kbr F=AVX512F TUPLE_FULL_32 +EVEX.F3.W1.0f7a RM Vx Wx - - EVX_CVTUQQ2PD+kbr F=AVX512F TUPLE_FULL_64 +EVEX.F2.W1.0f7a RM Vh Wx - - EVX_CVTUQQ2PS+kbr F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.0f3a42 RVMI Vx Hx Wx Ib EVX_DBPSADBW+k F=AVX512BW TUPLE_FULL_MEM +EVEX.F3.W0.0f3852 RVM Vx Hx Wx - EVX_DPBF16PS+kb F=AVX512_BF16 TUPLE_FULL_32 +# Note tuple type, scale is not memory size but element size +EVEX.66.W0.0f3888/m RM Vx Md - - EVX_EXPANDPS+k F=AVX512F TUPLE1_SCALAR_32 +EVEX.66.W0.0f3888/r RM Vx Ux - - EVX_EXPANDPS+k F=AVX512F +EVEX.66.W1.0f3888/m RM Vx Mq - - EVX_EXPANDPD+k F=AVX512F TUPLE1_SCALAR_64 +EVEX.66.W1.0f3888/r RM Vx Ux - - EVX_EXPANDPD+k F=AVX512F +EVEX.66.W0.L12.0f3a19 MRI Wdq Vx Ib - EVX_EXTRACTF32X4+k F=AVX512F TUPLE4_32 +EVEX.66.W1.L12.0f3a19 MRI Wdq Vx Ib - EVX_EXTRACTF64X2+k F=AVX512F TUPLE2_64 +EVEX.66.W0.L2.0f3a1b MRI Wqq Vx Ib - EVX_EXTRACTF32X8+k F=AVX512F TUPLE8_32 +EVEX.66.W1.L2.0f3a1b MRI Wqq Vx Ib - EVX_EXTRACTF64X4+k F=AVX512F TUPLE4_64 +EVEX.66.W0.L12.0f3a39 MRI Wdq Vx Ib - EVX_EXTRACTI32X4+k F=AVX512F TUPLE4_32 +EVEX.66.W1.L12.0f3a39 MRI Wdq Vx Ib - EVX_EXTRACTI64X2+k F=AVX512F TUPLE2_64 +EVEX.66.W0.L2.0f3a3b MRI Wqq Vx Ib - EVX_EXTRACTI32X8+k F=AVX512F TUPLE8_32 +EVEX.66.W1.L2.0f3a3b MRI Wqq Vx Ib - EVX_EXTRACTI64X4+k F=AVX512F TUPLE4_64 +EVEX.66.W0.0f3a54 RVMI Vps Hps Wps Ib EVX_FIXUPIMMPS+kbe F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f3a54 RVMI Vpd Hpd Wpd Ib EVX_FIXUPIMMPD+kbe F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.LIG.0f3a55 RVMI Vdq Hdq Wss Ib EVX_FIXUPIMMSS+ke F=AVX512F TUPLE1_SCALAR_32 +EVEX.66.W1.LIG.0f3a55 RVMI Vdq Hdq Wsd Ib EVX_FIXUPIMMSD+ke F=AVX512F TUPLE1_SCALAR_64 +# TODO: verify these, this is just copied from AVX/FMA. +EVEX.66.W0.0f3896 RVM Vx Hx Wx - EVX_FMADDSUB132PS+kbr F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f3896 RVM Vx Hx Wx - EVX_FMADDSUB132PD+kbr F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.0f3897 RVM Vx Hx Wx - EVX_FMSUBADD132PS+kbr F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f3897 RVM Vx Hx Wx - EVX_FMSUBADD132PD+kbr F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.0f3898 RVM Vx Hx Wx - EVX_FMADD132PS+kbr F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f3898 RVM Vx Hx Wx - EVX_FMADD132PD+kbr F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.LIG.0f3899 RVM Vdq Hdq Wss - EVX_FMADD132SS+kr F=AVX512F TUPLE1_SCALAR_32 +EVEX.66.W1.LIG.0f3899 RVM Vdq Hdq Wsd - EVX_FMADD132SD+kr F=AVX512F TUPLE1_SCALAR_64 +EVEX.66.W0.0f389a RVM Vx Hx Wx - EVX_FMSUB132PS+kbr F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f389a RVM Vx Hx Wx - EVX_FMSUB132PD+kbr F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.LIG.0f389b RVM Vdq Hdq Wss - EVX_FMSUB132SS+kr F=AVX512F TUPLE1_SCALAR_32 +EVEX.66.W1.LIG.0f389b RVM Vdq Hdq Wsd - EVX_FMSUB132SD+kr F=AVX512F TUPLE1_SCALAR_64 +EVEX.66.W0.0f389c RVM Vx Hx Wx - EVX_FNMADD132PS+kbr F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f389c RVM Vx Hx Wx - EVX_FNMADD132PD+kbr F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.LIG.0f389d RVM Vdq Hdq Wss - EVX_FNMADD132SS+kr F=AVX512F TUPLE1_SCALAR_32 +EVEX.66.W1.LIG.0f389d RVM Vdq Hdq Wsd - EVX_FNMADD132SD+kr F=AVX512F TUPLE1_SCALAR_64 +EVEX.66.W0.0f389e RVM Vx Hx Wx - EVX_FNMSUB132PS+kbr F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f389e RVM Vx Hx Wx - EVX_FNMSUB132PD+kbr F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.LIG.0f389f RVM Vdq Hdq Wss - EVX_FNMSUB132SS+kr F=AVX512F TUPLE1_SCALAR_32 +EVEX.66.W1.LIG.0f389f RVM Vdq Hdq Wsd - EVX_FNMSUB132SD+kr F=AVX512F TUPLE1_SCALAR_64 +EVEX.66.W0.0f38a6 RVM Vx Hx Wx - EVX_FMADDSUB213PS+kbr F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f38a6 RVM Vx Hx Wx - EVX_FMADDSUB213PD+kbr F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.0f38a7 RVM Vx Hx Wx - EVX_FMSUBADD213PS+kbr F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f38a7 RVM Vx Hx Wx - EVX_FMSUBADD213PD+kbr F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.0f38a8 RVM Vx Hx Wx - EVX_FMADD213PS+kbr F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f38a8 RVM Vx Hx Wx - EVX_FMADD213PD+kbr F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.LIG.0f38a9 RVM Vdq Hdq Wss - EVX_FMADD213SS+kr F=AVX512F TUPLE1_SCALAR_32 +EVEX.66.W1.LIG.0f38a9 RVM Vdq Hdq Wsd - EVX_FMADD213SD+kr F=AVX512F TUPLE1_SCALAR_64 +EVEX.66.W0.0f38aa RVM Vx Hx Wx - EVX_FMSUB213PS+kbr F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f38aa RVM Vx Hx Wx - EVX_FMSUB213PD+kbr F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.LIG.0f38ab RVM Vdq Hdq Wss - EVX_FMSUB213SS+kr F=AVX512F TUPLE1_SCALAR_32 +EVEX.66.W1.LIG.0f38ab RVM Vdq Hdq Wsd - EVX_FMSUB213SD+kr F=AVX512F TUPLE1_SCALAR_64 +EVEX.66.W0.0f38ac RVM Vx Hx Wx - EVX_FNMADD213PS+kbr F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f38ac RVM Vx Hx Wx - EVX_FNMADD213PD+kbr F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.LIG.0f38ad RVM Vdq Hdq Wss - EVX_FNMADD213SS+kr F=AVX512F TUPLE1_SCALAR_32 +EVEX.66.W1.LIG.0f38ad RVM Vdq Hdq Wsd - EVX_FNMADD213SD+kr F=AVX512F TUPLE1_SCALAR_64 +EVEX.66.W0.0f38ae RVM Vx Hx Wx - EVX_FNMSUB213PS+kbr F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f38ae RVM Vx Hx Wx - EVX_FNMSUB213PD+kbr F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.LIG.0f38af RVM Vdq Hdq Wss - EVX_FNMSUB213SS+kr F=AVX512F TUPLE1_SCALAR_32 +EVEX.66.W1.LIG.0f38af RVM Vdq Hdq Wsd - EVX_FNMSUB213SD+kr F=AVX512F TUPLE1_SCALAR_64 +EVEX.66.W0.0f38b6 RVM Vx Hx Wx - EVX_FMADDSUB231PS+kbr F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f38b6 RVM Vx Hx Wx - EVX_FMADDSUB231PD+kbr F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.0f38b7 RVM Vx Hx Wx - EVX_FMSUBADD231PS+kbr F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f38b7 RVM Vx Hx Wx - EVX_FMSUBADD231PD+kbr F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.0f38b8 RVM Vx Hx Wx - EVX_FMADD231PS+kbr F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f38b8 RVM Vx Hx Wx - EVX_FMADD231PD+kbr F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.LIG.0f38b9 RVM Vdq Hdq Wss - EVX_FMADD231SS+kr F=AVX512F TUPLE1_SCALAR_32 +EVEX.66.W1.LIG.0f38b9 RVM Vdq Hdq Wsd - EVX_FMADD231SD+kr F=AVX512F TUPLE1_SCALAR_64 +EVEX.66.W0.0f38ba RVM Vx Hx Wx - EVX_FMSUB231PS+kbr F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f38ba RVM Vx Hx Wx - EVX_FMSUB231PD+kbr F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.LIG.0f38bb RVM Vdq Hdq Wss - EVX_FMSUB231SS+kr F=AVX512F TUPLE1_SCALAR_32 +EVEX.66.W1.LIG.0f38bb RVM Vdq Hdq Wsd - EVX_FMSUB231SD+kr F=AVX512F TUPLE1_SCALAR_64 +EVEX.66.W0.0f38bc RVM Vx Hx Wx - EVX_FNMADD231PS+kbr F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f38bc RVM Vx Hx Wx - EVX_FNMADD231PD+kbr F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.LIG.0f38bd RVM Vdq Hdq Wss - EVX_FNMADD231SS+kr F=AVX512F TUPLE1_SCALAR_32 +EVEX.66.W1.LIG.0f38bd RVM Vdq Hdq Wsd - EVX_FNMADD231SD+kr F=AVX512F TUPLE1_SCALAR_64 +EVEX.66.W0.0f38be RVM Vx Hx Wx - EVX_FNMSUB231PS+kbr F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f38be RVM Vx Hx Wx - EVX_FNMSUB231PD+kbr F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.LIG.0f38bf RVM Vdq Hdq Wss - EVX_FNMSUB231SS+kr F=AVX512F TUPLE1_SCALAR_32 +EVEX.66.W1.LIG.0f38bf RVM Vdq Hdq Wsd - EVX_FNMSUB231SD+kr F=AVX512F TUPLE1_SCALAR_64 +EVEX.66.W0.0f3a66 RMI Kb Wps Ib - EVX_FPCLASSPS+kb F=AVX512DQ TUPLE_FULL_32 +EVEX.66.W1.0f3a66 RMI Kb Wpd Ib - EVX_FPCLASSPD+kb F=AVX512DQ TUPLE_FULL_64 +EVEX.66.W0.LIG.0f3a67 RMI Kb Wss Ib - EVX_FPCLASSSS+k F=AVX512DQ TUPLE1_SCALAR_32 +EVEX.66.W1.LIG.0f3a67 RMI Kb Wsd Ib - EVX_FPCLASSSD+k F=AVX512DQ TUPLE1_SCALAR_64 +EVEX.66.W0.0f3892/m RM Vx Md - - EVX_GATHERDPS+k F=AVX512F VSIB TUPLE1_SCALAR_32 +EVEX.66.W1.0f3892/m RM Vx Mq - - EVX_GATHERDPD+k F=AVX512F VSIB TUPLE1_SCALAR_64 +EVEX.66.W0.0f3893/m RM Vh Md - - EVX_GATHERQPS+k F=AVX512F VSIB TUPLE1_SCALAR_32 +EVEX.66.W1.0f3893/m RM Vx Mq - - EVX_GATHERQPD+k F=AVX512F VSIB TUPLE1_SCALAR_64 +EVEX.66.W0.0f3842 RM Vps Wps - - EVX_GETEXPPS+kbe F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f3842 RM Vpd Wpd - - EVX_GETEXPPD+kbe F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.LIG.0f3843 RVM Vdq Hdq Wss - EVX_GETEXPSS+ke F=AVX512F TUPLE1_SCALAR_32 +EVEX.66.W1.LIG.0f3843 RVM Vdq Hdq Wsd - EVX_GETEXPSD+ke F=AVX512F TUPLE1_SCALAR_64 +EVEX.66.W0.0f3a26 RMI Vps Wps Ib - EVX_GETMANTPS+kbe F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f3a26 RMI Vpd Wpd Ib - EVX_GETMANTPD+kbe F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.LIG.0f3a27 RVMI Vdq Hdq Wss Ib EVX_GETMANTSS+ke F=AVX512F TUPLE1_SCALAR_32 +EVEX.66.W1.LIG.0f3a27 RVMI Vdq Hdq Wsd Ib EVX_GETMANTSD+ke F=AVX512F TUPLE1_SCALAR_64 +EVEX.66.W0.L12.0f3a18 RVMI Vx Hx Wdq Ib EVX_INSERTF32X4+k F=AVX512F TUPLE4_32 +EVEX.66.W1.L12.0f3a18 RVMI Vx Hx Wdq Ib EVX_INSERTF64X2+k F=AVX512DQ TUPLE2_64 +EVEX.66.W0.L2.0f3a1a RVMI Vx Hx Wqq Ib EVX_INSERTF32X8+k F=AVX512DQ TUPLE8_32 +EVEX.66.W1.L2.0f3a1a RVMI Vx Hx Wqq Ib EVX_INSERTF64X4+k F=AVX512F TUPLE4_64 +EVEX.66.W0.L12.0f3a38 RVMI Vx Hx Wdq Ib EVX_INSERTI32X4+k F=AVX512F TUPLE4_32 +EVEX.66.W1.L12.0f3a38 RVMI Vx Hx Wdq Ib EVX_INSERTI64X2+k F=AVX512DQ TUPLE2_64 +EVEX.66.W0.L2.0f3a3a RVMI Vx Hx Wqq Ib EVX_INSERTI32X8+k F=AVX512DQ TUPLE8_32 +EVEX.66.W1.L2.0f3a3a RVMI Vx Hx Wqq Ib EVX_INSERTI64X4+k F=AVX512F TUPLE4_64 +EVEX.F2.W0.0f3868 RVM K Hx Wx - EVX_P2INTERSECTD+b F=AVX512_VP2INTERSECT TUPLE_FULL_32 +EVEX.F2.W1.0f3868 RVM K Hx Wx - EVX_P2INTERSECTQ+b F=AVX512_VP2INTERSECT TUPLE_FULL_64 +EVEX.66.W0.0f3866 RVM Vx Hx Wx - EVX_PBLENDMB+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.W1.0f3866 RVM Vx Hx Wx - EVX_PBLENDMW+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.W0.0f3864 RVM Vx Hx Wx - EVX_PBLENDMD+kb F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f3864 RVM Vx Hx Wx - EVX_PBLENDMQ+kb F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.0f387a/r RM Vx Rb - - EVX_PBROADCAST+k F=AVX512BW +EVEX.66.W0.0f387b/r RM Vx Rw - - EVX_PBROADCAST+k F=AVX512BW +EVEX.66.W0.0f387c/r RM Vx Rd - - EVX_PBROADCAST+k F=AVX512F +EVEX.66.W1.0f387c/r RM Vx Rd - - EVX_PBROADCAST+k I64 F=AVX512F +EVEX.66.W1.0f387c/r RM Vx Rq - - EVX_PBROADCAST+k O64 F=AVX512F +EVEX.66.W0.0f3878 RM Vx Wb - - EVX_PBROADCASTB+k F=AVX512BW TUPLE1_SCALAR_8 +EVEX.66.W0.0f3879 RM Vx Ww - - EVX_PBROADCASTW+k F=AVX512BW TUPLE1_SCALAR_16 +EVEX.66.W0.0f3858 RM Vx Wd - - EVX_PBROADCASTD+k F=AVX512F TUPLE1_SCALAR_32 +EVEX.66.W1.0f3859 RM Vx Wq - - EVX_PBROADCASTQ+k F=AVX512F TUPLE1_SCALAR_64 +EVEX.66.W0.0f3859 RM Vx Wq - - EVX_BROADCASTI32X2+k F=AVX512DQ TUPLE2_32 +EVEX.66.W0.L12.0f385a/m RM Vx Wdq - - EVX_BROADCASTI32X4+k F=AVX512DQ TUPLE4_32 +EVEX.66.W1.L12.0f385a/m RM Vx Wdq - - EVX_BROADCASTI64X2+k F=AVX512DQ TUPLE2_64 +EVEX.66.W0.L2.0f385b/m RM Vx Wqq - - EVX_BROADCASTI32X8+k F=AVX512DQ TUPLE8_32 +EVEX.66.W1.L2.0f385b/m RM Vx Wqq - - EVX_BROADCASTI64X4+k F=AVX512F TUPLE4_64 +EVEX.F3.W1.0f382a/r RM Vx K - - EVX_PBROADCASTMB2Q F=AVX512CD +EVEX.F3.W0.0f383a/r RM Vx K - - EVX_PBROADCASTMW2D F=AVX512CD +EVEX.66.W0.0f3a1e RVMI K Hx Wx Ib EVX_PCMPUD+kb F=AVX512F TUPLE_FULL_32 +EVEX.66.W0.0f3a1f RVMI K Hx Wx Ib EVX_PCMPD+kb F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f3a1e RVMI K Hx Wx Ib EVX_PCMPUQ+kb F=AVX512F TUPLE_FULL_64 +EVEX.66.W1.0f3a1f RVMI K Hx Wx Ib EVX_PCMPQ+kb F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.0f3a3e RVMI K Hx Wx Ib EVX_PCMPUB+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.W0.0f3a3f RVMI K Hx Wx Ib EVX_PCMPB+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.W1.0f3a3e RVMI K Hx Wx Ib EVX_PCMPUW+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.W1.0f3a3f RVMI K Hx Wx Ib EVX_PCMPW+k F=AVX512BW TUPLE_FULL_MEM +# Note tuple type, scale is not memory size but element size +EVEX.66.W0.0f3863/m MR Mb Vx - - EVX_PCOMPRESSB+k F=AVX512_VBMI2 TUPLE1_SCALAR_8 +EVEX.66.W0.0f3863/r MR Ux Vx - - EVX_PCOMPRESSB+k F=AVX512_VBMI2 +EVEX.66.W1.0f3863/m MR Mw Vx - - EVX_PCOMPRESSW+k F=AVX512_VBMI2 TUPLE1_SCALAR_16 +EVEX.66.W1.0f3863/r MR Ux Vx - - EVX_PCOMPRESSW+k F=AVX512_VBMI2 +EVEX.66.W0.0f388b/m MR Md Vx - - EVX_PCOMPRESSD+k F=AVX512F TUPLE1_SCALAR_32 +EVEX.66.W0.0f388b/r MR Ux Vx - - EVX_PCOMPRESSD+k F=AVX512F +EVEX.66.W1.0f388b/m MR Mq Vx - - EVX_PCOMPRESSQ+k F=AVX512F TUPLE1_SCALAR_64 +EVEX.66.W1.0f388b/r MR Ux Vx - - EVX_PCOMPRESSQ+k F=AVX512F +EVEX.66.W0.0f38c4 RM Vx Wx - - EVX_PCONFLICTD+kb F=AVX512CD TUPLE_FULL_32 +EVEX.66.W1.0f38c4 RM Vx Wx - - EVX_PCONFLICTQ+kb F=AVX512CD TUPLE_FULL_64 +EVEX.66.W0.0f3850 RVM Vx Hx Wx - EVX_PDPBUSD+kb F=AVX512_VNNI TUPLE_FULL_32 +EVEX.66.W0.0f3851 RVM Vx Hx Wx - EVX_PDPBUSDS+kb F=AVX512_VNNI TUPLE_FULL_32 +EVEX.66.W0.0f3852 RVM Vx Hx Wx - EVX_PDPWSSD+kb F=AVX512_VNNI TUPLE_FULL_32 +EVEX.66.W0.0f3853 RVM Vx Hx Wx - EVX_PDPWSSDS+kb F=AVX512_VNNI TUPLE_FULL_32 +EVEX.66.W0.0f388d RVM Vx Hx Wx - EVX_PERMB+k F=AVX512_VBMI TUPLE_FULL_MEM +EVEX.66.W1.0f388d RVM Vx Hx Wx - EVX_PERMW+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.W0.L12.0f3836 RVM Vx Hx Wx - EVX_PERMD+kb F=AVX512F TUPLE_FULL_32 +EVEX.66.W0.0f3875 RVM Vx Hx Wx - EVX_PERMI2B+k F=AVX512_VBMI TUPLE_FULL_MEM +EVEX.66.W1.0f3875 RVM Vx Hx Wx - EVX_PERMI2W+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.W0.0f3876 RVM Vx Hx Wx - EVX_PERMI2D+kb F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f3876 RVM Vx Hx Wx - EVX_PERMI2Q+kb F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.0f3877 RVM Vx Hx Wx - EVX_PERMI2PS+kb F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f3877 RVM Vx Hx Wx - EVX_PERMI2PD+kb F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.0f380c RVM Vx Hx Wx - EVX_PERMILPS+kb F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f380d RVM Vx Hx Wx - EVX_PERMILPD+kb F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.0f3a04 RMI Vx Wx Ib - EVX_PERMILPS+kb F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f3a05 RMI Vx Wx Ib - EVX_PERMILPD+kb F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.L12.0f3816 RVM Vx Hx Wx - EVX_PERMPS+kb F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.L12.0f3816 RVM Vx Hx Wx - EVX_PERMPD+kb F=AVX512F TUPLE_FULL_64 +EVEX.66.W1.L12.0f3836 RVM Vx Hx Wx - EVX_PERMQ+kb F=AVX512F TUPLE_FULL_64 +EVEX.66.W1.L12.0f3a00 RMI Vx Wx Ib - EVX_PERMQ+kb F=AVX512F TUPLE_FULL_64 +EVEX.66.W1.L12.0f3a01 RMI Vx Wx Ib - EVX_PERMPD+kb F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.0f387d RVM Vx Hx Wx - EVX_PERMT2B+k F=AVX512_VBMI TUPLE_FULL_MEM +EVEX.66.W1.0f387d RVM Vx Hx Wx - EVX_PERMT2W+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.W0.0f387e RVM Vx Hx Wx - EVX_PERMT2D+kb F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f387e RVM Vx Hx Wx - EVX_PERMT2Q+kb F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.0f387f RVM Vx Hx Wx - EVX_PERMT2PS+kb F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f387f RVM Vx Hx Wx - EVX_PERMT2PD+kb F=AVX512F TUPLE_FULL_64 +# Note tuple type, scale is not memory size but element size +EVEX.66.W0.0f3862/m RM Vx Mb - - EVX_PEXPANDB+k F=AVX512_VBMI2 TUPLE1_SCALAR_8 +EVEX.66.W0.0f3862/r RM Vx Ux - - EVX_PEXPANDB+k F=AVX512_VBMI2 +EVEX.66.W1.0f3862/m RM Vx Mw - - EVX_PEXPANDW+k F=AVX512_VBMI2 TUPLE1_SCALAR_16 +EVEX.66.W1.0f3862/r RM Vx Ux - - EVX_PEXPANDW+k F=AVX512_VBMI2 +EVEX.66.W0.0f3889/m RM Vx Md - - EVX_PEXPANDD+k F=AVX512F TUPLE1_SCALAR_32 +EVEX.66.W0.0f3889/r RM Vx Ux - - EVX_PEXPANDD+k F=AVX512F +EVEX.66.W1.0f3889/m RM Vx Mq - - EVX_PEXPANDQ+k F=AVX512F TUPLE1_SCALAR_64 +EVEX.66.W1.0f3889/r RM Vx Ux - - EVX_PEXPANDQ+k F=AVX512F +EVEX.66.W0.0f3890/m RM Vx Md - - EVX_PGATHERDD+k F=AVX512F VSIB TUPLE1_SCALAR_32 +EVEX.66.W1.0f3890/m RM Vx Mq - - EVX_PGATHERDQ+k F=AVX512F VSIB TUPLE1_SCALAR_64 +EVEX.66.W0.0f3891/m RM Vh Md - - EVX_PGATHERQD+k F=AVX512F VSIB TUPLE1_SCALAR_32 +EVEX.66.W1.0f3891/m RM Vx Mq - - EVX_PGATHERQQ+k F=AVX512F VSIB TUPLE1_SCALAR_64 +EVEX.66.W0.0f3844 RM Vx Wx - - EVX_PLZCNTD+kb F=AVX512CD TUPLE_FULL_32 +EVEX.66.W1.0f3844 RM Vx Wx - - EVX_PLZCNTQ+kb F=AVX512CD TUPLE_FULL_64 +EVEX.66.W1.0f38b4 RVM Vx Hx Wx - EVX_PMADD52LUQ+kb F=AVX512_IFMA TUPLE_FULL_64 +EVEX.66.W1.0f38b5 RVM Vx Hx Wx - EVX_PMADD52HUQ+kb F=AVX512_IFMA TUPLE_FULL_64 +EVEX.F3.W0.0f3829/r RM K Ux - - EVX_PMOVB2M F=AVX512BW +EVEX.F3.W1.0f3829/r RM K Ux - - EVX_PMOVW2M F=AVX512BW +EVEX.F3.W0.0f3839/r RM K Ux - - EVX_PMOVD2M F=AVX512DQ +EVEX.F3.W1.0f3839/r RM K Ux - - EVX_PMOVQ2M F=AVX512DQ +EVEX.F3.W0.0f3828/r RM Vx K - - EVX_PMOVM2B F=AVX512BW +EVEX.F3.W1.0f3828/r RM Vx K - - EVX_PMOVM2W F=AVX512BW +EVEX.F3.W0.0f3838/r RM Vx K - - EVX_PMOVM2D F=AVX512DQ +EVEX.F3.W1.0f3838/r RM Vx K - - EVX_PMOVM2Q F=AVX512DQ +EVEX.F3.W0.0f3830 MR Wh Vx - - EVX_PMOVWB+k F=AVX512BW TUPLE_HALF_MEM +EVEX.F3.W0.0f3820 MR Wh Vx - - EVX_PMOVSWB+k F=AVX512BW TUPLE_HALF_MEM +EVEX.F3.W0.0f3810 MR Wh Vx - - EVX_PMOVUSWB+k F=AVX512BW TUPLE_HALF_MEM +EVEX.F3.W0.0f3831 MR Wf Vx - - EVX_PMOVDB+k F=AVX512F TUPLE_QUARTER_MEM +EVEX.F3.W0.0f3821 MR Wf Vx - - EVX_PMOVSDB+k F=AVX512F TUPLE_QUARTER_MEM +EVEX.F3.W0.0f3811 MR Wf Vx - - EVX_PMOVUSDB+k F=AVX512F TUPLE_QUARTER_MEM +EVEX.F3.W0.0f3832 MR We Vx - - EVX_PMOVQB+k F=AVX512F TUPLE_EIGHTH_MEM +EVEX.F3.W0.0f3822 MR We Vx - - EVX_PMOVSQB+k F=AVX512F TUPLE_EIGHTH_MEM +EVEX.F3.W0.0f3812 MR We Vx - - EVX_PMOVUSQB+k F=AVX512F TUPLE_EIGHTH_MEM +EVEX.F3.W0.0f3833 MR Wh Vx - - EVX_PMOVDW+k F=AVX512F TUPLE_HALF_MEM +EVEX.F3.W0.0f3823 MR Wh Vx - - EVX_PMOVSDW+k F=AVX512F TUPLE_HALF_MEM +EVEX.F3.W0.0f3813 MR Wh Vx - - EVX_PMOVUSDW+k F=AVX512F TUPLE_HALF_MEM +EVEX.F3.W0.0f3834 MR Wf Vx - - EVX_PMOVQW+k F=AVX512F TUPLE_QUARTER_MEM +EVEX.F3.W0.0f3824 MR Wf Vx - - EVX_PMOVSQW+k F=AVX512F TUPLE_QUARTER_MEM +EVEX.F3.W0.0f3814 MR Wf Vx - - EVX_PMOVUSQW+k F=AVX512F TUPLE_QUARTER_MEM +EVEX.F3.W0.0f3835 MR Wh Vx - - EVX_PMOVQD+k F=AVX512F TUPLE_HALF_MEM +EVEX.F3.W0.0f3825 MR Wh Vx - - EVX_PMOVSQD+k F=AVX512F TUPLE_HALF_MEM +EVEX.F3.W0.0f3815 MR Wh Vx - - EVX_PMOVUSQD+k F=AVX512F TUPLE_HALF_MEM +EVEX.66.W1.0f3883 RVM Vx Hx Wx - EVX_PMULTISHIFTQB+kb F=AVX512_VBMI TUPLE_FULL_64 +EVEX.66.W0.0f3854 RM Vx Wx - - EVX_POPCNTB+k F=AVX512_BITALG TUPLE_FULL_MEM +EVEX.66.W1.0f3854 RM Vx Wx - - EVX_POPCNTW+k F=AVX512_BITALG TUPLE_FULL_MEM +EVEX.66.W0.0f3855 RM Vx Wx - - EVX_POPCNTD+kb F=AVX512_VPOPCNTDQ TUPLE_FULL_32 +EVEX.66.W1.0f3855 RM Vx Wx - - EVX_POPCNTQ+kb F=AVX512_VPOPCNTDQ TUPLE_FULL_64 +EVEX.66.W0.0f3814 RVM Vx Hx Wx - EVX_PRORVD+kb F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f3814 RVM Vx Hx Wx - EVX_PRORVQ+kb F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.0f72/0 VMI Hx Wx Ib - EVX_PRORD+kb F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f72/0 VMI Hx Wx Ib - EVX_PRORQ+kb F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.0f3815 RVM Vx Hx Wx - EVX_PROLVD+kb F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f3815 RVM Vx Hx Wx - EVX_PROLVQ+kb F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.0f72/1 VMI Hx Wx Ib - EVX_PROLD+kb F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f72/1 VMI Hx Wx Ib - EVX_PROLQ+kb F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.0f38a0/m MR Md Vx - - EVX_PSCATTERDD+k F=AVX512F VSIB TUPLE1_SCALAR_32 +EVEX.66.W1.0f38a0/m MR Mq Vx - - EVX_PSCATTERDQ+k F=AVX512F VSIB TUPLE1_SCALAR_64 +EVEX.66.W0.0f38a1/m MR Md Vh - - EVX_PSCATTERQD+k F=AVX512F VSIB TUPLE1_SCALAR_32 +EVEX.66.W1.0f38a1/m MR Mq Vx - - EVX_PSCATTERQQ+k F=AVX512F VSIB TUPLE1_SCALAR_64 +EVEX.66.W1.0f3a70 RVMI Vx Hx Wx Ib EVX_PSHLDW+k F=AVX512_VBMI2 TUPLE_FULL_MEM +EVEX.66.W0.0f3a71 RVMI Vx Hx Wx Ib EVX_PSHLDD+kb F=AVX512_VBMI2 TUPLE_FULL_32 +EVEX.66.W1.0f3a71 RVMI Vx Hx Wx Ib EVX_PSHLDQ+kb F=AVX512_VBMI2 TUPLE_FULL_64 +EVEX.66.W1.0f3870 RVM Vx Hx Wx - EVX_PSHLDVW+k F=AVX512_VBMI2 TUPLE_FULL_MEM +EVEX.66.W0.0f3871 RVM Vx Hx Wx - EVX_PSHLDVD+kb F=AVX512_VBMI2 TUPLE_FULL_32 +EVEX.66.W1.0f3871 RVM Vx Hx Wx - EVX_PSHLDVQ+kb F=AVX512_VBMI2 TUPLE_FULL_64 +EVEX.66.W1.0f3a72 RVMI Vx Hx Wx Ib EVX_PSHRDW+k F=AVX512_VBMI2 TUPLE_FULL_MEM +EVEX.66.W0.0f3a73 RVMI Vx Hx Wx Ib EVX_PSHRDD+kb F=AVX512_VBMI2 TUPLE_FULL_32 +EVEX.66.W1.0f3a73 RVMI Vx Hx Wx Ib EVX_PSHRDQ+kb F=AVX512_VBMI2 TUPLE_FULL_64 +EVEX.66.W1.0f3872 RVM Vx Hx Wx - EVX_PSHRDVW+k F=AVX512_VBMI2 TUPLE_FULL_MEM +EVEX.66.W0.0f3873 RVM Vx Hx Wx - EVX_PSHRDVD+kb F=AVX512_VBMI2 TUPLE_FULL_32 +EVEX.66.W1.0f3873 RVM Vx Hx Wx - EVX_PSHRDVQ+kb F=AVX512_VBMI2 TUPLE_FULL_64 +EVEX.66.W0.0f388f RVM K Hx Wx - EVX_PSHUFBITQMB+k F=AVX512_BITALG TUPLE_FULL_MEM +EVEX.66.W1.0f3812 RVM Vx Hx Wx - EVX_PSLLVW+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.W0.0f3847 RVM Vx Hx Wx - EVX_PSLLVD+kb F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f3847 RVM Vx Hx Wx - EVX_PSLLVQ+kb F=AVX512F TUPLE_FULL_64 +EVEX.66.W1.0f3811 RVM Vx Hx Wx - EVX_PSRAVW+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.W0.0f3846 RVM Vx Hx Wx - EVX_PSRAVD+kb F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f3846 RVM Vx Hx Wx - EVX_PSRAVQ+kb F=AVX512F TUPLE_FULL_64 +EVEX.66.W1.0f3810 RVM Vx Hx Wx - EVX_PSRLVW+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.W0.0f3845 RVM Vx Hx Wx - EVX_PSRLVD+kb F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f3845 RVM Vx Hx Wx - EVX_PSRLVQ+kb F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.0f3a25 RVMI Vx Hx Wx Ib EVX_PTERNLOGD+kb F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f3a25 RVMI Vx Hx Wx Ib EVX_PTERNLOGQ+kb F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.0f3826 RVM K Hx Wx - EVX_PTESTMB+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.W1.0f3826 RVM K Hx Wx - EVX_PTESTMW+k F=AVX512BW TUPLE_FULL_MEM +EVEX.66.W0.0f3827 RVM K Hx Wx - EVX_PTESTMD+kb F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f3827 RVM K Hx Wx - EVX_PTESTMQ+kb F=AVX512F TUPLE_FULL_64 +EVEX.F3.W0.0f3826 RVM K Hx Wx - EVX_PTESTNMB+k F=AVX512BW TUPLE_FULL_MEM +EVEX.F3.W1.0f3826 RVM K Hx Wx - EVX_PTESTNMW+k F=AVX512BW TUPLE_FULL_MEM +EVEX.F3.W0.0f3827 RVM K Hx Wx - EVX_PTESTNMD+kb F=AVX512F TUPLE_FULL_32 +EVEX.F3.W1.0f3827 RVM K Hx Wx - EVX_PTESTNMQ+kb F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.0f3a50 RVMI Vps Hps Wps Ib EVX_RANGEPS+kbe F=AVX512DQ TUPLE_FULL_32 +EVEX.66.W1.0f3a50 RVMI Vpd Hpd Wpd Ib EVX_RANGEPD+kbe F=AVX512DQ TUPLE_FULL_64 +EVEX.66.W0.LIG.0f3a51 RVMI Vdq Hdq Wss Ib EVX_RANGESS+ke F=AVX512DQ TUPLE1_SCALAR_32 +EVEX.66.W1.LIG.0f3a51 RVMI Vdq Hdq Wsd Ib EVX_RANGESD+ke F=AVX512DQ TUPLE1_SCALAR_64 +EVEX.66.W0.0f384c RM Vps Wps - - EVX_RCP14PS+kb F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f384c RM Vpd Wpd - - EVX_RCP14PD+kb F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.LIG.0f384d RVM Vdq Hdq Wss - EVX_RCP14SS+k F=AVX512F TUPLE1_SCALAR_32 +EVEX.66.W1.LIG.0f384d RVM Vdq Hdq Wsd - EVX_RCP14SD+k F=AVX512F TUPLE1_SCALAR_64 +EVEX.66.W0.0f3a56 RMI Vps Wps Ib - EVX_REDUCEPS+kbe F=AVX512DQ TUPLE_FULL_32 +EVEX.66.W1.0f3a56 RMI Vpd Wpd Ib - EVX_REDUCEPD+kbe F=AVX512DQ TUPLE_FULL_64 +EVEX.66.W0.LIG.0f3a57 RVMI Vdq Hdq Wss Ib EVX_REDUCESS+ke F=AVX512DQ TUPLE1_SCALAR_32 +EVEX.66.W1.LIG.0f3a57 RVMI Vdq Hdq Wsd Ib EVX_REDUCESD+ke F=AVX512DQ TUPLE1_SCALAR_64 +EVEX.66.W0.0f3a08 RMI Vps Wps Ib - EVX_RNDSCALEPS+kbe F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f3a09 RMI Vpd Wpd Ib - EVX_RNDSCALEPD+kbe F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.LIG.0f3a0a RVMI Vdq Hdq Wss Ib EVX_RNDSCALESS+ke F=AVX512F TUPLE1_SCALAR_32 +EVEX.66.W1.LIG.0f3a0b RVMI Vdq Hdq Wsd Ib EVX_RNDSCALESD+ke F=AVX512F TUPLE1_SCALAR_64 +EVEX.66.W0.0f384e RM Vps Wps - - EVX_RSQRT14PS+kb F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f384e RM Vpd Wpd - - EVX_RSQRT14PD+kb F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.LIG.0f384f RVM Vdq Hdq Wss - EVX_RSQRT14SS+k F=AVX512F TUPLE1_SCALAR_32 +EVEX.66.W1.LIG.0f384f RVM Vdq Hdq Wsd - EVX_RSQRT14SD+k F=AVX512F TUPLE1_SCALAR_64 +EVEX.66.W0.0f382c RVM Vps Hps Wps - EVX_SCALEFPS+kbr F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.0f382c RVM Vpd Hpd Wpd - EVX_SCALEFPD+kbr F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.LIG.0f382d RVM Vdq Hdq Wss - EVX_SCALEFSS+kr F=AVX512F TUPLE1_SCALAR_32 +EVEX.66.W1.LIG.0f382d RVM Vdq Hdq Wsd - EVX_SCALEFSD+kr F=AVX512F TUPLE1_SCALAR_64 +EVEX.66.W0.0f38a2/m MR Md Vx - - EVX_SCATTERDPS+k F=AVX512F VSIB TUPLE1_SCALAR_32 +EVEX.66.W1.0f38a2/m MR Mq Vx - - EVX_SCATTERDPD+k F=AVX512F VSIB TUPLE1_SCALAR_64 +EVEX.66.W0.0f38a3/m MR Md Vh - - EVX_SCATTERQPS+k F=AVX512F VSIB TUPLE1_SCALAR_32 +EVEX.66.W1.0f38a3/m MR Mq Vx - - EVX_SCATTERQPD+k F=AVX512F VSIB TUPLE1_SCALAR_64 +EVEX.66.W0.L12.0f3a23 RVMI Vps Hps Wps Ib EVX_SHUFF32X4+kb F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.L12.0f3a23 RVMI Vpd Hpd Wpd Ib EVX_SHUFF64X2+kb F=AVX512F TUPLE_FULL_64 +EVEX.66.W0.L12.0f3a43 RVMI Vx Hx Wx Ib EVX_SHUFI32X4+kb F=AVX512F TUPLE_FULL_32 +EVEX.66.W1.L12.0f3a43 RVMI Vx Hx Wx Ib EVX_SHUFI64X2+kb F=AVX512F TUPLE_FULL_64 +EVEX.NP.W0.0f57 RVM Vps Hps Wps - EVX_XORPS+kb F=AVX512DQ TUPLE_FULL_32 +EVEX.66.W1.0f57 RVM Vpd Hpd Wpd - EVX_XORPD+kb F=AVX512DQ TUPLE_FULL_64 + + +# AVX512 Mask instructions +VEX.66.W0.L1.0f41/r RVM Kb Kb Kb - KANDB F=AVX512DQ +VEX.NP.W0.L1.0f41/r RVM Kw Kw Kw - KANDW F=AVX512F +VEX.66.W1.L1.0f41/r RVM Kd Kd Kd - KANDD F=AVX512BW +VEX.NP.W1.L1.0f41/r RVM Kq Kq Kq - KANDQ F=AVX512BW +VEX.66.W0.L1.0f42/r RVM Kb Kb Kb - KANDNB F=AVX512DQ +VEX.NP.W0.L1.0f42/r RVM Kw Kw Kw - KANDNW F=AVX512F +VEX.66.W1.L1.0f42/r RVM Kd Kd Kd - KANDND F=AVX512BW +VEX.NP.W1.L1.0f42/r RVM Kq Kq Kq - KANDNQ F=AVX512BW +VEX.66.W0.L0.0f44/r RM Kb Kb - - KNOTB F=AVX512DQ +VEX.NP.W0.L0.0f44/r RM Kw Kw - - KNOTW F=AVX512F +VEX.66.W1.L0.0f44/r RM Kd Kd - - KNOTD F=AVX512BW +VEX.NP.W1.L0.0f44/r RM Kq Kq - - KNOTQ F=AVX512BW +VEX.66.W0.L1.0f45/r RVM Kb Kb Kb - KORB F=AVX512DQ +VEX.NP.W0.L1.0f45/r RVM Kw Kw Kw - KORW F=AVX512F +VEX.66.W1.L1.0f45/r RVM Kd Kd Kd - KORD F=AVX512BW +VEX.NP.W1.L1.0f45/r RVM Kq Kq Kq - KORQ F=AVX512BW +VEX.66.W0.L1.0f46/r RVM Kb Kb Kb - KXNORB F=AVX512DQ +VEX.NP.W0.L1.0f46/r RVM Kw Kw Kw - KXNORW F=AVX512F +VEX.66.W1.L1.0f46/r RVM Kd Kd Kd - KXNORD F=AVX512BW +VEX.NP.W1.L1.0f46/r RVM Kq Kq Kq - KXNORQ F=AVX512BW +VEX.66.W0.L1.0f47/r RVM Kb Kb Kb - KXORB F=AVX512DQ +VEX.NP.W0.L1.0f47/r RVM Kw Kw Kw - KXORW F=AVX512F +VEX.66.W1.L1.0f47/r RVM Kd Kd Kd - KXORD F=AVX512BW +VEX.NP.W1.L1.0f47/r RVM Kq Kq Kq - KXORQ F=AVX512BW +VEX.66.W0.L1.0f4a/r RVM Kb Kb Kb - KADDB F=AVX512DQ +VEX.NP.W0.L1.0f4a/r RVM Kw Kw Kw - KADDW F=AVX512DQ +VEX.66.W1.L1.0f4a/r RVM Kd Kd Kd - KADDD F=AVX512BW +VEX.NP.W1.L1.0f4a/r RVM Kq Kq Kq - KADDQ F=AVX512BW +VEX.66.W0.L1.0f4b/r RVM Kw Kb Kb - KUNPCKBW F=AVX512F +VEX.NP.W0.L1.0f4b/r RVM Kd Kw Kw - KUNPCKWD F=AVX512BW +VEX.NP.W1.L1.0f4b/r RVM Kq Kd Kd - KUNPCKDQ F=AVX512BW +VEX.66.W0.L0.0f98/r RM Kb Kb - - KORTESTB F=AVX512DQ EFL=0--0m00m +VEX.NP.W0.L0.0f98/r RM Kw Kw - - KORTESTW F=AVX512F EFL=0--0m00m +VEX.66.W1.L0.0f98/r RM Kd Kd - - KORTESTD F=AVX512BW EFL=0--0m00m +VEX.NP.W1.L0.0f98/r RM Kq Kq - - KORTESTQ F=AVX512BW EFL=0--0m00m +VEX.66.W0.L0.0f90 RM Kb Kb - - KMOVB F=AVX512DQ +VEX.NP.W0.L0.0f90 RM Kw Kw - - KMOVW F=AVX512F +VEX.66.W1.L0.0f90 RM Kd Kd - - KMOVD F=AVX512BW +VEX.NP.W1.L0.0f90 RM Kq Kq - - KMOVQ F=AVX512BW +VEX.66.W0.L0.0f91/m MR Mb Kb - - KMOVB F=AVX512DQ +VEX.NP.W0.L0.0f91/m MR Mw Kw - - KMOVW F=AVX512F +VEX.66.W1.L0.0f91/m MR Md Kd - - KMOVD F=AVX512BW +VEX.NP.W1.L0.0f91/m MR Mq Kq - - KMOVQ F=AVX512BW +VEX.66.W0.L0.0f92/r RM Kb Rd - - KMOVB F=AVX512DQ +VEX.NP.W0.L0.0f92/r RM Kw Rd - - KMOVW F=AVX512F +VEX.F2.W0.L0.0f92/r RM Kd Rd - - KMOVD F=AVX512BW +VEX.F2.W1.L0.0f92/r RM Kq Rq - - KMOVQ O64 F=AVX512BW +VEX.66.W0.L0.0f93/r RM Gd Kb - - KMOVB F=AVX512DQ +VEX.NP.W0.L0.0f93/r RM Gd Kw - - KMOVW F=AVX512F +VEX.F2.W0.L0.0f93/r RM Gd Kd - - KMOVD F=AVX512BW +VEX.F2.W1.L0.0f93/r RM Gq Kq - - KMOVQ O64 F=AVX512BW +VEX.66.W0.L0.0f99/r RM Kb Kb - - KTESTB F=AVX512DQ EFL=0--0m00m +VEX.NP.W0.L0.0f99/r RM Kw Kw - - KTESTW F=AVX512DQ EFL=0--0m00m +VEX.66.W1.L0.0f99/r RM Kd Kd - - KTESTD F=AVX512BW EFL=0--0m00m +VEX.NP.W1.L0.0f99/r RM Kq Kq - - KTESTQ F=AVX512BW EFL=0--0m00m +VEX.66.W0.L0.0f3a30/r RMI Kb Kb Ib - KSHIFTRB F=AVX512DQ +VEX.66.W1.L0.0f3a30/r RMI Kw Kw Ib - KSHIFTRW F=AVX512F +VEX.66.W0.L0.0f3a31/r RMI Kd Kd Ib - KSHIFTRD F=AVX512BW +VEX.66.W1.L0.0f3a31/r RMI Kq Kq Ib - KSHIFTRQ F=AVX512BW +VEX.66.W0.L0.0f3a32/r RMI Kb Kb Ib - KSHIFTLB F=AVX512DQ +VEX.66.W1.L0.0f3a32/r RMI Kw Kw Ib - KSHIFTLW F=AVX512F +VEX.66.W0.L0.0f3a33/r RMI Kd Kd Ib - KSHIFTLD F=AVX512BW +VEX.66.W1.L0.0f3a33/r RMI Kq Kq Ib - KSHIFTLQ F=AVX512BW diff --git a/parseinstrs.py b/parseinstrs.py index f263d02..16def04 100644 --- a/parseinstrs.py +++ b/parseinstrs.py @@ -14,7 +14,8 @@ INSTR_FLAGS_FIELDS, INSTR_FLAGS_SIZES = zip(*[ ("modreg_idx", 2), ("vexreg_idx", 2), # note: vexreg w/o vex prefix is zeroreg_val ("imm_idx", 2), - ("unused1", 2), + ("evex_bcst", 1), + ("evex_mask", 1), ("zeroreg_val", 1), ("lock", 1), ("imm_control", 3), @@ -31,7 +32,8 @@ INSTR_FLAGS_FIELDS, INSTR_FLAGS_SIZES = zip(*[ ("modreg_ty", 3), ("vexreg_ty", 2), ("imm_ty", 0), - ("unused", 3), + ("evex_rc", 2), + ("unused", 1), ("opsize", 3), ("modrm", 1), ("ign66", 1), @@ -141,9 +143,8 @@ OPKIND_SIZES = { "zq": 8, # z-immediate, but always 8-byte operand } class OpKind(NamedTuple): - kind: str + regkind: str sizestr: str - size: int SZ_OP = -1 SZ_VEC = -2 @@ -163,9 +164,15 @@ class OpKind(NamedTuple): def immsize(self, opsz): maxsz = 1 if self.sizestr == "bs" else 4 if self.sizestr[0] == "z" else 8 return min(maxsz, self.abssize(opsz)) + @property + def kind(self): + return OPKIND_CANONICALIZE[self.regkind] + @property + def size(self): + return OPKIND_SIZES[self.sizestr] @classmethod def parse(cls, op): - return cls(OPKIND_CANONICALIZE[op[0]], op[1:], OPKIND_SIZES[op[1:]]) + return cls(op[0], op[1:]) class InstrDesc(NamedTuple): mnemonic: str @@ -185,25 +192,28 @@ class InstrDesc(NamedTuple): ("modrm", "MEM"): 0, ("imm", "MEM"): 0, ("imm", "IMM"): 0, ("imm", "XMM"): 0, } - OPKIND_REGTYS_ENC = {"SEG": 3, "FPU": 4, "MMX": 5, "XMM": 6, "BND": 8, - "CR": 9, "DR": 10} + OPKIND_REGTYS_ENC = {"SEG": 3, "FPU": 4, "MMX": 5, "XMM": 6, "MASK": 7, + "BND": 8, "CR": 9, "DR": 10} OPKIND_SIZES = { 0: 0, 1: 1, 2: 2, 4: 3, 8: 4, 16: 5, 32: 6, 64: 7, 10: 0, + # OpKind.SZ_OP: -2, OpKind.SZ_VEC: -3, OpKind.SZ_HALFVEC: -4, } @classmethod def parse(cls, desc): desc = desc.split() - mnem_comp = desc[5].split("+", 1) - desc[5] = mnem_comp[0] - if len(mnem_comp) > 1 and "w" in mnem_comp[1]: - desc.append("INSTR_WIDTH") - if len(mnem_comp) > 1 and "a" in mnem_comp[1]: - desc.append("U67") - if len(mnem_comp) > 1 and "s" in mnem_comp[1]: - desc.append("USEG") + mnem, _, compactDesc = desc[5].partition("+") + flags = frozenset(desc[6:] + [{ + "w": "INSTR_WIDTH", + "a": "U67", + "s": "USEG", + "k": "MASK", + "b": "BCST", + "e": "SAE", + "r": "ER", + }[c] for c in compactDesc]) operands = tuple(OpKind.parse(op) for op in desc[1:5] if op != "-") - return cls(desc[5], desc[0], operands, frozenset(desc[6:])) + return cls(mnem, desc[0], operands, flags) def imm_size(self, opsz): flags = ENCODINGS[self.encoding] @@ -297,6 +307,10 @@ class InstrDesc(NamedTuple): # Miscellaneous Flags if "VSIB" in self.flags: extraflags["vsib"] = 1 + if "BCST" in self.flags: extraflags["evex_bcst"] = 1 + if "MASK" in self.flags: extraflags["evex_mask"] = 1 + if "SAE" in self.flags: extraflags["evex_rc"] = 1 + if "ER" in self.flags: extraflags["evex_rc"] = 3 if modrm: extraflags["modrm"] = 1 if "U66" not in self.flags and (ign66 or "I66" in self.flags): @@ -322,8 +336,8 @@ class EntryKind(Enum): return self == EntryKind.INSTR or self == EntryKind.WEAKINSTR opcode_regex = re.compile( - r"^(?:(?P(?PVEX\.)?(?PNP|66|F2|F3|NFx)\." + - r"(?:W(?P[01]|IG)\.)?(?:L(?P[01]|IG)\.)?))?" + + r"^(?:(?P(?PE?VEX\.)?(?PNP|66|F2|F3|NFx)\." + + r"(?:W(?P[01]|IG)\.)?(?:L(?P0|1|12|2|IG)\.)?))?" + r"(?P0f38|0f3a|0f|)" + r"(?P[0-9a-f]{2})" + r"(?:/(?P[0-7]|[rm]|[0-7][rm])|(?P[c-f][0-9a-f]))?(?P\+)?$") @@ -335,8 +349,8 @@ class Opcode(NamedTuple): extended: bool # Extend opc or opcext, if present modreg: Union[None, Tuple[Union[None, int], str]] # (modreg, "r"/"m"/"rm"), None opcext: Union[None, int] # 0xc0-0xff, or 0 - vex: bool - vexl: Union[str, None] # 0, 1, IG, None = used, both + vex: int # 0 = legacy, 1 = VEX, 2 = EVEX + vexl: Union[str, None] # 0, 1, 12, 2, IG, None = used, both rexw: Union[str, None] # 0, 1, IG, None = used, both @classmethod @@ -360,11 +374,71 @@ class Opcode(NamedTuple): extended=match.group("extended") is not None, modreg=modreg, opcext=int(match.group("opcext") or "0", 16) or None, - vex=match.group("vex") is not None, + vex=[None, "VEX.", "EVEX."].index(match.group("vex")), vexl=match.group("vexl"), rexw=match.group("rexw"), ) +def verifyOpcodeDesc(opcode, desc): + flags = ENCODINGS[desc.encoding] + if opcode.escape == 2 and flags.imm_control != 0: + raise Exception(f"0f38 has no immediate operand {opcode}, {desc}") + if opcode.escape == 3 and desc.imm_size(4) != 1: + raise Exception(f"0f3a must have immediate byte {opcode}, {desc}") + if opcode.vexl == "IG" and desc.dynsizes() - {OpKind.SZ_OP}: + raise Exception(f"(E)VEX.LIG with dynamic vector size {opcode}, {desc}") + if "VSIB" in desc.flags and (not opcode.modreg or opcode.modreg[1] != "m"): + raise Exception(f"VSIB for non-memory opcode {opcode}, {desc}") + if opcode.vex == 2 and flags.vexreg_idx: + # Checking this here allows to omit check for V' in decoder. + if desc.operands[flags.vexreg_idx ^ 3].kind != "XMM": + raise Exception(f"EVEX.vvvv must refer to XMM {opcode}, {desc}") + if opcode.vex == 2 and flags.modreg_idx and flags.modreg_idx ^ 3 != 0: + # EVEX.z=0 is only checked for mask operands in ModReg + if desc.operands[flags.modreg_idx ^ 3].kind == "MASK": + raise Exception(f"ModRM.reg mask not first operand {opcode}, {desc}") + # Verify tuple type + if opcode.vex == 2 and (not opcode.modreg or "m" in opcode.modreg[1]): + tts = [s for s in desc.flags if s.startswith("TUPLE")] + if len(tts) != 1: + raise Exception(f"missing tuple type in {opcode}, {desc}") + if flags.modrm_idx == 3 ^ 3: + raise Exception(f"missing memory operand {opcode}, {desc}") + # From Intel SDM + bcst, evexw, vszs = { + "TUPLE_FULL_32": (True, "0", ( 16, 32, 64)), + "TUPLE_FULL_64": (True, "1", ( 16, 32, 64)), + "TUPLE_HALF_32": (True, "0", ( 8, 16, 32)), + "TUPLE_HALF_64": (True, "1", ( 8, 16, 32)), + "TUPLE_FULL_MEM": (False, None, ( 16, 32, 64)), + "TUPLE_HALF_MEM": (False, None, ( 8, 16, 32)), + "TUPLE_QUARTER_MEM": (False, None, ( 4, 8, 16)), + "TUPLE_EIGHTH_MEM": (False, None, ( 2, 4, 8)), + "TUPLE1_SCALAR_8": (False, None, ( 1, 1, 1)), + "TUPLE1_SCALAR_16": (False, None, ( 2, 2, 2)), + "TUPLE1_SCALAR_32": (False, "0", ( 4, 4, 4)), + "TUPLE1_SCALAR_64": (False, "1", ( 8, 8, 8)), + "TUPLE1_SCALAR_OPSZ": (False, None, ( 0, 0, 0)), + "TUPLE1_FIXED_32": (False, None, ( 4, 4, 4)), + "TUPLE1_FIXED_64": (False, None, ( 8, 8, 8)), + "TUPLE2_32": (False, "0", ( 8, 8, 8)), + "TUPLE2_64": (False, "1", (None, 16, 16)), + "TUPLE4_32": (False, "0", (None, 16, 16)), + "TUPLE4_64": (False, "1", (None, None, 32)), + "TUPLE8_32": (False, "0", (None, None, 32)), + "TUPLE_MEM128": (False, None, ( 16, 16, 16)), + # TODO: Fix MOVDDUP tuple size :( + "TUPLE_MOVDDUP": (False, None, ( 16, 32, 64)), + }[tts[0]] + if "BCST" in desc.flags and not bcst: + raise Exception(f"broadcast on incompatible type {opcode}, {desc}") + if evexw and opcode.rexw != evexw: + raise Exception(f"incompatible EVEX.W {opcode}, {desc}") + for l, tupsz in enumerate(vszs): + opsz = desc.operands[flags.modrm_idx ^ 3].abssize(0, 16 << l) + if tupsz is not None and opsz != tupsz: + raise Exception(f"memory size {opsz} != {tupsz} {opcode}, {desc}") + class Trie: KIND_ORDER = (EntryKind.TABLE_ROOT, EntryKind.TABLE256, EntryKind.TABLE_PREFIX, EntryKind.TABLE16, @@ -375,7 +449,7 @@ class Trie: EntryKind.TABLE_PREFIX: 4, EntryKind.TABLE16: 16, EntryKind.TABLE8E: 8, - EntryKind.TABLE_VEX: 4, + EntryKind.TABLE_VEX: 8, } def __init__(self, root_count): @@ -412,9 +486,12 @@ class Trie: mod = {"m": [0], "r": [1<<3], "rm": [0, 1<<3]}[opc.modreg[1]] reg = [opc.modreg[0]] if opc.modreg[0] is not None else list(range(8)) t16 = [x + y for x in mod for y in reg] - if opc.vexl in ("0", "1") or opc.rexw in ("0", "1"): + if (opc.rexw or "IG") != "IG" or (opc.vexl or "IG") != "IG": rexw = {"0": [0], "1": [1<<0], "IG": [0, 1<<0]}[opc.rexw or "IG"] - vexl = {"0": [0], "1": [1<<1], "IG": [0, 1<<1]}[opc.vexl or "IG"] + if opc.vex < 2: + vexl = {"0": [0], "1": [1<<1], "IG": [0, 1<<1]}[opc.vexl or "IG"] + else: + vexl = {"0": [0], "12": [1<<1, 2<<1], "2": [2<<1], "IG": [0, 1<<1, 2<<1, 3<<1]}[opc.vexl or "IG"] tvex = list(map(sum, product(rexw, vexl))) # Order must match KIND_ORDER. return troot, t256, tprefix, t16, t8e, tvex @@ -566,9 +643,11 @@ def decode_table(entries, args): decode_mnems_lines = [f"FD_MNEMONIC({m},{i})\n" for i, m in enumerate(mnems)] mnemonics_intel = [m.replace("SSE_", "").replace("MMX_", "") + .replace("EVX_", "V") .replace("MOVABS", "MOV").replace("RESERVED_", "") .replace("JMPF", "JMP FAR").replace("CALLF", "CALL FAR") .replace("_S2G", "").replace("_G2S", "") + .replace("_X2G", "").replace("_G2X", "") .replace("_CR", "").replace("_DR", "") .replace("REP_", "REP ").replace("CMPXCHGD", "CMPXCHG") .replace("JCXZ", "JCXZ JECXZJRCXZ") @@ -608,6 +687,8 @@ def encode_mnems(entries): for weak, opcode, desc in entries: if "I64" in desc.flags or desc.mnemonic[:9] == "RESERVED_": continue + if opcode.vex == 2: # EVEX not implemented + continue opsizes, vecsizes = {0}, {0} prepend_opsize, prepend_vecsize = False, False @@ -631,7 +712,7 @@ def encode_mnems(entries): opsizes = {64} prepend_opsize = False elif opcode.vex and opcode.vexl != "IG": # vectors; don't care for SSE - vecsizes = {128, 256} + vecsizes = {128, 256} # TODO-EVEX if opcode.vexl: vecsizes -= {128 if opcode.vexl == "1" else 256} prepend_vecsize = not separate_opsize @@ -718,8 +799,10 @@ def encode_table(entries, args): opc_i |= 0x400000 if opcode.rexw == "1" else 0 if opcode.prefix == "LOCK": opc_i |= 0x800000 - elif opcode.vex: + elif opcode.vex == 1: opc_i |= 0x1000000 + 0x800000 * int(opcode.vexl or 0) + elif opcode.vex == 2: # TODO-EVEX + opc_i |= 0x2000000 + 0x800000 * int(opcode.vexl or 0) opc_i |= 0x8000000 if "VSIB" in desc.flags else 0 if alt >= 0x100: raise Exception("encode alternate bits exhausted") @@ -831,7 +914,7 @@ def encode2_table(entries, args): code += f" if (!op_imm_n(imm-1, imm_size)) goto next{i};\n" neednext = True - if opcode.vex: + if opcode.vex: # TODO-EVEX rexw, rexr, rexx, rexb = 0x8000, 0x80, 0x40, 0x20 else: rexw, rexr, rexx, rexb = 0x48, 0x44, 0x42, 0x41 @@ -864,7 +947,7 @@ def encode2_table(entries, args): if "m" in ots or "U67" in desc.flags: code += " if (UNLIKELY(flags & FE_ADDR32)) buf[idx++] = 0x67;\n" - if opcode.vex: + if opcode.vex: # TODO-EVEX ppl = ["NP", "66", "F3", "F2"].index(opcode.prefix) ppl |= 4 if opcode.vexl == "1" else 0 mayvex2 = opcode.rexw != "1" and opcode.escape == 1 @@ -957,6 +1040,7 @@ if __name__ == "__main__": line, weak = (line, False) if line[0] != "*" else (line[1:], True) opcode_string, desc_string = tuple(line.split(maxsplit=1)) opcode, desc = Opcode.parse(opcode_string), InstrDesc.parse(desc_string) + verifyOpcodeDesc(opcode, desc) if "UNDOC" not in desc.flags or args.with_undoc: entries.append((weak, opcode, desc))