tests/decode: Rewrite in C, speed-up is >100x

This commit is contained in:
Alexis Engelke
2020-06-27 17:31:07 +02:00
parent 9556d34a8a
commit ceea786c7f
15 changed files with 206 additions and 348 deletions

View File

@@ -1,13 +0,0 @@
decode 90 [NOP]
decode 0fcd [BSWAP reg4:r5]
decode 660fcd [BSWAP reg2:r5]
decode 6650 [PUSH reg2:r0]
decode a5 [MOVS_4]
decode f3a5 [rep:MOVS_4]
decode 66a5 [MOVS_2]
decode f366a5 [rep:MOVS_2]
decode f7d7 [NOT reg4:r7]
decode f717 [NOT mem4:r7]
decode f7142f [NOT mem4:r7+1*r5]
decode f7542f12 [NOT mem4:r7+1*r5+0x12]
decode f7942f34120000 [NOT mem4:r7+1*r5+0x1234]

View File

@@ -1,11 +0,0 @@
decode 66c8000000 [ENTER_2 imm4:0x0]
decode 66c8000f00 [ENTER_2 imm4:0xf00]
decode 66c8000001 [ENTER_2 imm4:0x10000]
decode32 c8000000 [ENTER_4 imm4:0x0]
decode32 c8000f00 [ENTER_4 imm4:0xf00]
decode32 c8000001 [ENTER_4 imm4:0x10000]
decode64 c8000000 [ENTER_8 imm4:0x0]
decode64 c8000f00 [ENTER_8 imm4:0xf00]
decode64 c8000001 [ENTER_8 imm4:0x10000]
decode64 d3e0 [SHL reg4:r0 reg1:r1]
decode64 0fa5d0 [SHLD reg4:r0 reg4:r2 reg1:r1]

View File

@@ -1,2 +0,0 @@
decode 69C708010000 [IMUL reg4:r0 reg4:r7 imm4:0x108]
decode 6BC708 [IMUL reg4:r0 reg4:r7 imm4:0x8]

View File

@@ -1,14 +0,0 @@
decode32 40 [INC reg4:r0]
decode32 43 [INC reg4:r3]
decode32 6647 [INC reg2:r7]
decode fec0 [INC reg1:r0]
decode fec4 [INC reg1:r0h]
decode ffc0 [INC reg4:r0]
decode ffc4 [INC reg4:r4]
decode ff00 [INC mem4:r0]
decode f0ff00 [lock:INC mem4:r0]
decode 66ffc0 [INC reg2:r0]
decode 66ffc4 [INC reg2:r4]
decode64 48ffc0 [INC reg8:r0]
decode64 48ffc4 [INC reg8:r4]
decode64 49ffc7 [INC reg8:r15]

View File

@@ -1,6 +0,0 @@
decode32 e900000000 [JMP off4:eip+0x0]
decode32 66e90100 [JMP off2:ip+0x1]
decode64 e900000000 [JMP off8:rip+0x0]
decode64 66e900000000 [JMP off8:rip+0x0]
decode 66e9000000 PARTIAL
decode 66e9 PARTIAL

View File

@@ -1,5 +0,0 @@
decode 660fbec2 [MOVSX reg2:r0 reg1:r2]
decode 0fbec2 [MOVSX reg4:r0 reg1:r2]
decode 0fbfc2 [MOVSX reg4:r0 reg2:r2]
decode64 480fbfc2 [MOVSX reg8:r0 reg2:r2]
decode64 4863c2 [MOVSX reg8:r0 reg4:r2]

View File

@@ -1,12 +0,0 @@
decode 66c3 [RET_2]
decode 66c20000 [RET_2 imm2:0x0]
decode 66c20d00 [RET_2 imm2:0xd]
decode 66c20dff [RET_2 imm2:0xff0d]
decode32 c3 [RET_4]
decode32 c20000 [RET_4 imm2:0x0]
decode32 c20d00 [RET_4 imm2:0xd]
decode32 c20dff [RET_4 imm2:0xff0d]
decode64 c3 [RET_8]
decode64 c20000 [RET_8 imm2:0x0]
decode64 c20d00 [RET_8 imm2:0xd]
decode64 c20dff [RET_8 imm2:0xff0d]

View File

@@ -1,8 +0,0 @@
decode f30f7e5c2408 [SSE_MOVQ reg16:r3 mem8:r4+0x8]
decode c5f96ec8 [VMOVD reg4:r1 reg4:r0]
decode64 c4e1f96ec8 [VMOVQ reg8:r1 reg8:r0]
decode32 c4e1f96ec8 [VMOVD reg4:r1 reg4:r0]
decode c5f22ac0 [VCVTSI2SS reg16:r0 reg16:r1 reg4:r0]
decode32 c4e1f22ac0 [VCVTSI2SS reg16:r0 reg16:r1 reg4:r0]
decode64 c4e1f22ac0 [VCVTSI2SS reg16:r0 reg16:r1 reg8:r0]
decode64 c4e2759004e7 [VPGATHERDD reg32:r0 mem32:r7+8*r4 reg32:r1]

View File

@@ -1 +0,0 @@
decode 660fc6c001 [SSE_SHUFPD reg16:r0 reg16:r0 imm1:0x1]

View File

@@ -1,95 +0,0 @@
#include <stdio.h>
#include <stdlib.h>
#include <inttypes.h>
#include <time.h>
#include <fadec.h>
static
uint8_t
parse_nibble(const char nibble)
{
if (nibble >= '0' && nibble <= '9')
return nibble - '0';
else if (nibble >= 'a' && nibble <= 'f')
return nibble - 'a' + 10;
else if (nibble >= 'A' && nibble <= 'F')
return nibble - 'A' + 10;
printf("Invalid hexadecimal number: %x\n", nibble);
exit(1);
}
int
main(int argc, char** argv)
{
if (argc != 3 && argc != 4)
{
printf("usage: %s [mode] [instruction bytes] ([repetitions])\n", argv[0]);
return -1;
}
size_t mode = strtoul(argv[1], NULL, 0);
// Avoid allocation by transforming hex to binary in-place.
uint8_t* code = (uint8_t*) argv[2];
uint8_t* code_end = code;
char* hex = argv[2];
for (; *hex; hex += 2, code_end++)
*code_end = (parse_nibble(hex[0]) << 4) | parse_nibble(hex[1]);
size_t length = (size_t) (code_end - code);
size_t repetitions = 1;
if (argc >= 4)
repetitions = strtoul(argv[3], NULL, 0);
struct timespec time_start;
struct timespec time_end;
FdInstr instr;
int retval = 0;
__asm__ volatile("" : : : "memory");
clock_gettime(CLOCK_MONOTONIC, &time_start);
for (size_t i = 0; i < repetitions; i++)
{
size_t current_off = 0;
while (current_off != length)
{
size_t remaining = length - current_off;
retval = fd_decode(code + current_off, remaining, mode, 0, &instr);
if (retval < 0)
break;
current_off += retval;
}
}
clock_gettime(CLOCK_MONOTONIC, &time_end);
__asm__ volatile("" : : : "memory");
if (retval >= 0)
{
char format_buffer[128];
fd_format(&instr, format_buffer, sizeof(format_buffer));
printf("%s\n", format_buffer);
}
else if (retval == FD_ERR_UD)
{
printf("UD\n");
}
else if (retval == FD_ERR_PARTIAL)
{
printf("PARTIAL\n");
}
if (repetitions > 1)
{
uint64_t nsecs = 1000000000ull * (time_end.tv_sec - time_start.tv_sec) +
(time_end.tv_nsec - time_start.tv_nsec);
printf("%" PRIu64 " ns\n", nsecs);
}
return 0;
}

View File

@@ -1,46 +1,4 @@
test_driver = executable('test_driver', 'driver.c',
dependencies: fadec,
c_args: ['-D_GNU_SOURCE'])
test_args = [files('test.py'), test_driver]
if decode_32
test_args += ['--32']
endif
if decode_64
test_args += ['--64']
endif
## Test cases
testcases = [
['prefixes', 'prefixes.txt'],
['modrm', 'modrm.txt'],
['enter', 'decode-enter.sh'],
['imul', 'decode-imul.sh'],
['inc', 'decode-inc.sh'],
['jmp', 'decode-jmp.txt'],
['movsx', 'decode-movsx.sh'],
['ret', 'decode-ret.sh'],
['sse-shufpd', 'decode-sse-shufpd.sh'],
['sse-movq', 'decode-sse-movq.sh'],
]
foreach case : testcases
test(case[0], python3, args: test_args + files(case[1]))
endforeach
## Benchmarks
#
# Note that we don't use meson's benchmark function here, because it doesn't
# give us the output we need by default.
benchmarks = [
'benchmarks.txt',
]
run_target('benchmark_decode',
command: [python3, test_args, '--benchmark', files(benchmarks)])
decode_test = executable('test_decode', 'test_decode.c',
dependencies: fadec)
test('decode', decode_test)

View File

@@ -1,34 +0,0 @@
# reg
decode 01c0 [ADD reg4:r0 reg4:r0]
decode 01c1 [ADD reg4:r1 reg4:r0]
decode 01d0 [ADD reg4:r0 reg4:r2]
decode 01ff [ADD reg4:r7 reg4:r7]
decode64 4101d0 [ADD reg4:r8 reg4:r2]
decode64 4501d0 [ADD reg4:r8 reg4:r10]
decode64 4501ff [ADD reg4:r15 reg4:r15]
# [reg]
decode 0100 [ADD mem4:r0 reg4:r0]
decode 0108 [ADD mem4:r0 reg4:r1]
decode 0101 [ADD mem4:r1 reg4:r0]
decode 0107 [ADD mem4:r7 reg4:r0]
decode 0138 [ADD mem4:r0 reg4:r7]
decode 010424 [ADD mem4:r4 reg4:r0]
decode64 410100 [ADD mem4:r8 reg4:r0]
decode64 440108 [ADD mem4:r0 reg4:r9]
decode64 450100 [ADD mem4:r8 reg4:r8]
decode64 410107 [ADD mem4:r15 reg4:r0]
decode64 41010424 [ADD mem4:r12 reg4:r0]
# [disp32]
decode32 010501000000 [ADD mem4:0x1 reg4:r0]
decode32 0105ffffffff [ADD mem4:-0x1 reg4:r0]
decode 01042501000000 [ADD mem4:0x1 reg4:r0]
decode64 4101042501000000 [ADD mem4:0x1 reg4:r0]
# [rip+disp32]
decode64 010501000000 [ADD mem4:r16+0x1 reg4:r0]
decode64 41010501000000 [ADD mem4:r16+0x1 reg4:r0]
# [reg+disp32]
decode 018001000000 [ADD mem4:r0+0x1 reg4:r0]
# [reg+eiz+disp32]
decode 01842501000000 [ADD mem4:r5+0x1 reg4:r0]
# [reg+s*reg+disp32]
decode64 4201842501000000 [ADD mem4:r5+1*r12+0x1 reg4:r0]

View File

@@ -1,32 +0,0 @@
decode 90 [NOP]
decode 2e90 [cs:NOP]
decode 2e2e90 [cs:NOP]
decode 2e2690 [es:NOP]
decode 262e90 [cs:NOP]
decode 266590 [gs:NOP]
decode 652690 [es:NOP]
decode 0f10c1 [SSE_MOVUPS reg16:r0 reg16:r1]
decode 660f10c1 [SSE_MOVUPD reg16:r0 reg16:r1]
decode f2660f10c1 [SSE_MOVSD reg16:r0 reg8:r1]
decode f3660f10c1 [SSE_MOVSS reg16:r0 reg4:r1]
decode f3f2660f10c1 [SSE_MOVSD reg16:r0 reg8:r1]
decode f266f3660f10c1 [SSE_MOVSS reg16:r0 reg4:r1]
decode64 4890 [NOP]
decode64 4990 [XCHG reg8:r8 reg8:r0]
decode64 6690 [NOP]
decode 66 PARTIAL
decode 0f PARTIAL
decode 80 PARTIAL
decode 0F01E2 [SMSW reg4:r2]
decode64 660f2000 [MOV_CR reg8:r0 reg0:r0]
decode64 0f20c8 UD
decode64 0f20d0 [MOV_CR reg8:r0 reg0:r2]
decode64 440f2008 UD
decode64 440f2100 UD
decode 8cc0 [MOV_S2G reg4:r0 reg0:r0]
decode64 448cc0 [MOV_S2G reg4:r0 reg0:r0]
decode 8ec0 [MOV_G2S reg0:r0 reg4:r0]
decode 8ec8 UD
decode d8c1 [FADD reg0:r0 reg0:r1]
decode64 41d8c1 [FADD reg0:r0 reg0:r1]
decode64 41dfe0 [FSTSW reg2:r0]

View File

@@ -1,70 +0,0 @@
#!/usr/bin/python3
import argparse
import statistics
import subprocess
import sys
def run(args, mode, code, expected):
inner_reps = 10000000 if args.benchmark else 1
outer_reps = 3 if args.benchmark else 1
times = []
for _ in range(outer_reps):
output = subprocess.check_output([args.driver, str(mode), code, str(inner_reps)],
universal_newlines=True)
instr, time = tuple(output.split("\n", 1))
if instr != expected:
raise Exception('wrong result, expected %r got %r (code %r)' %
(expected, instr, code))
if args.benchmark:
times.append(float(time.split()[0]) / inner_reps)
if args.benchmark:
mean = statistics.mean(times)
stdev = statistics.stdev(times)
print("{:2} {:50} {:6.3f} ns (std: {:6.3f} ns)".format(mode, expected, mean, stdev))
return times
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--benchmark", action="store_true")
parser.add_argument("--32", dest="test_modes", action="append_const", const=32)
parser.add_argument("--64", dest="test_modes", action="append_const", const=64)
parser.add_argument("driver")
parser.add_argument("cases", nargs="+", type=argparse.FileType('r'))
args = parser.parse_args()
failed, total = 0, 0
total_times = []
test_modes = frozenset(args.test_modes if args.test_modes else [32, 64])
for file in args.cases:
cases = [tuple(ln.strip().split(maxsplit=2)) for ln in file.readlines()
if ln and ln[0] != "#"]
for op, code, expected in cases:
case_modes = {"decode":{32,64},"decode32":{32},"decode64":{64}}[op]
if not case_modes & test_modes: continue
# Compatibility with old test system
if expected[0] == '"' and expected[-1] == '"':
expected = expected[1:-1]
try:
total += 1
for mode in case_modes & test_modes:
total_times += run(args, mode, code, expected)
except Exception as e:
failed += 1
print("FAILED: %s" % e)
if failed:
print("FAILED %d/%d tests" % (failed, total))
sys.exit(1)
else:
print("PASSED %d tests" % total)
if args.benchmark:
mean = statistics.mean(total_times)
stdev = statistics.stdev(total_times)
print("Average: {:6.3f} ns (std: {:6.3f} ns)".format(mean, stdev))

203
tests/test_decode.c Normal file
View File

@@ -0,0 +1,203 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <inttypes.h>
#include <fadec.h>
static
void
print_hex(const uint8_t* buf, size_t len)
{
for (size_t i = 0; i < len; i++)
printf("%02x", buf[i]);
}
static
int
test(const void* buf, size_t buf_len, unsigned mode, const char* exp_fmt)
{
FdInstr instr;
char fmt[128];
int retval = fd_decode(buf, buf_len, mode, 0, &instr);
if (retval == FD_ERR_INTERNAL) {
return 0; // not compiled with this arch-mode (32/64 bit)
} else if (retval == FD_ERR_PARTIAL) {
strcpy(fmt, "PARTIAL");
} else if (retval == FD_ERR_UD) {
strcpy(fmt, "UD");
} else {
fd_format(&instr, fmt, sizeof(fmt));
}
if ((retval < 0 || (unsigned) retval == buf_len) && !strcmp(fmt, exp_fmt))
return 0;
printf("Failed case ");
print_hex(buf, buf_len);
printf("\n Exp (%2zu): %s", buf_len, exp_fmt);
printf("\n Got (%2d): %s\n", retval, fmt);
return -1;
}
#define TEST1(mode, buf, exp_fmt) test(buf, sizeof(buf)-1, mode, exp_fmt)
#define TEST32(...) failed |= TEST1(32, __VA_ARGS__)
#define TEST64(...) failed |= TEST1(64, __VA_ARGS__)
#define TEST(...) failed |= TEST1(32, __VA_ARGS__) | TEST1(64, __VA_ARGS__)
int
main(int argc, char** argv)
{
(void) argc; (void) argv;
int failed = 0;
TEST("\x90", "[NOP]");
TEST("\x90", "[NOP]");
TEST("\x2e\x90", "[cs:NOP]");
TEST("\x2e\x2e\x90", "[cs:NOP]");
TEST("\x2e\x26\x90", "[es:NOP]");
TEST("\x26\x2e\x90", "[cs:NOP]");
TEST("\x26\x65\x90", "[gs:NOP]");
TEST("\x65\x26\x90", "[es:NOP]");
TEST("\x0f\x10\xc1", "[SSE_MOVUPS reg16:r0 reg16:r1]");
TEST("\x66\x0f\x10\xc1", "[SSE_MOVUPD reg16:r0 reg16:r1]");
TEST("\xf2\x66\x0f\x10\xc1", "[SSE_MOVSD reg16:r0 reg8:r1]");
TEST("\xf3\x66\x0f\x10\xc1", "[SSE_MOVSS reg16:r0 reg4:r1]");
TEST("\xf3\xf2\x66\x0f\x10\xc1", "[SSE_MOVSD reg16:r0 reg8:r1]");
TEST("\xf2\x66\xf3\x66\x0f\x10\xc1", "[SSE_MOVSS reg16:r0 reg4:r1]");
TEST64("\x48\x90", "[NOP]");
TEST64("\x49\x90", "[XCHG reg8:r8 reg8:r0]");
TEST64("\x48\x91", "[XCHG reg8:r1 reg8:r0]");
TEST64("\x48\x26\x91", "[es:XCHG reg4:r1 reg4:r0]");
TEST64("\x66\x90", "[NOP]");
TEST("\x66", "PARTIAL");
TEST("\x0f", "PARTIAL");
TEST("\x0f\x38", "PARTIAL");
TEST("\x0f\x3a", "PARTIAL");
TEST("\x80", "PARTIAL");
TEST("\x0F\x01\xE2", "[SMSW reg4:r2]");
TEST64("\x66\x0f\x20\x00", "[MOV_CR reg8:r0 reg0:r0]");
TEST64("\x0f\x20\xc8", "UD");
TEST64("\x0f\x20\xd0", "[MOV_CR reg8:r0 reg0:r2]");
TEST64("\x44\x0f\x20\x08", "UD");
TEST64("\x44\x0f\x21\x00", "UD");
TEST("\x8c\xc0", "[MOV_S2G reg4:r0 reg0:r0]");
TEST64("\x44\x8c\xc0", "[MOV_S2G reg4:r0 reg0:r0]");
TEST("\x8e\xc0", "[MOV_G2S reg0:r0 reg4:r0]");
TEST("\x8e\xc8", "UD"); // No mov cs, eax
TEST("\xd8\xc1", "[FADD reg0:r0 reg0:r1]");
TEST64("\x41\xd8\xc1", "[FADD reg0:r0 reg0:r1]");
TEST64("\x41\xdf\xe0", "[FSTSW reg2:r0]");
// ModRM Test cases
// reg
TEST("\x01\xc0", "[ADD reg4:r0 reg4:r0]");
TEST("\x01\xc1", "[ADD reg4:r1 reg4:r0]");
TEST("\x01\xd0", "[ADD reg4:r0 reg4:r2]");
TEST("\x01\xff", "[ADD reg4:r7 reg4:r7]");
TEST64("\x41\x01\xd0", "[ADD reg4:r8 reg4:r2]");
TEST64("\x45\x01\xd0", "[ADD reg4:r8 reg4:r10]");
TEST64("\x45\x01\xff", "[ADD reg4:r15 reg4:r15]");
// [reg]
TEST("\x01\x00", "[ADD mem4:r0 reg4:r0]");
TEST("\x01\x08", "[ADD mem4:r0 reg4:r1]");
TEST("\x01\x01", "[ADD mem4:r1 reg4:r0]");
TEST("\x01\x07", "[ADD mem4:r7 reg4:r0]");
TEST("\x01\x38", "[ADD mem4:r0 reg4:r7]");
TEST("\x01\x04\x24", "[ADD mem4:r4 reg4:r0]");
TEST64("\x41\x01\x00", "[ADD mem4:r8 reg4:r0]");
TEST64("\x44\x01\x08", "[ADD mem4:r0 reg4:r9]");
TEST64("\x45\x01\x00", "[ADD mem4:r8 reg4:r8]");
TEST64("\x41\x01\x07", "[ADD mem4:r15 reg4:r0]");
TEST64("\x41\x01\x04\x24", "[ADD mem4:r12 reg4:r0]");
// [disp32]
TEST32("\x01\x05\x01\x00\x00\x00", "[ADD mem4:0x1 reg4:r0]");
TEST32("\x01\x05\xff\xff\xff\xff", "[ADD mem4:-0x1 reg4:r0]");
TEST("\x01\x04\x25\x01\x00\x00\x00", "[ADD mem4:0x1 reg4:r0]");
TEST64("\x41\x01\x04\x25\x01\x00\x00\x00", "[ADD mem4:0x1 reg4:r0]");
// [rip+disp32]
TEST64("\x01\x05\x01\x00\x00\x00", "[ADD mem4:r16+0x1 reg4:r0]");
TEST64("\x41\x01\x05\x01\x00\x00\x00", "[ADD mem4:r16+0x1 reg4:r0]");
// [reg+disp32]
TEST("\x01\x80\x01\x00\x00\x00", "[ADD mem4:r0+0x1 reg4:r0]");
// [reg+eiz+disp32]
TEST("\x01\x84\x25\x01\x00\x00\x00", "[ADD mem4:r5+0x1 reg4:r0]");
// [reg+s*reg+disp32]
TEST64("\x42\x01\x84\x25\x01\x00\x00\x00", "[ADD mem4:r5+1*r12+0x1 reg4:r0]");
TEST("\x66\xc8\x00\x00\x00", "[ENTER_2 imm4:0x0]");
TEST("\x66\xc8\x00\x0f\x00", "[ENTER_2 imm4:0xf00]");
TEST("\x66\xc8\x00\x00\x01", "[ENTER_2 imm4:0x10000]");
TEST32("\xc8\x00\x00\x00", "[ENTER_4 imm4:0x0]");
TEST32("\xc8\x00\x0f\x00", "[ENTER_4 imm4:0xf00]");
TEST32("\xc8\x00\x00\x01", "[ENTER_4 imm4:0x10000]");
TEST64("\xc8\x00\x00\x00", "[ENTER_8 imm4:0x0]");
TEST64("\xc8\x00\x0f\x00", "[ENTER_8 imm4:0xf00]");
TEST64("\xc8\x00\x00\x01", "[ENTER_8 imm4:0x10000]");
TEST64("\xd3\xe0", "[SHL reg4:r0 reg1:r1]");
TEST64("\x0f\xa5\xd0", "[SHLD reg4:r0 reg4:r2 reg1:r1]");
TEST("\x69\xC7\x08\x01\x00\x00", "[IMUL reg4:r0 reg4:r7 imm4:0x108]");
TEST("\x6B\xC7\x08", "[IMUL reg4:r0 reg4:r7 imm4:0x8]");
TEST32("\x40", "[INC reg4:r0]");
TEST32("\x43", "[INC reg4:r3]");
TEST32("\x66\x47", "[INC reg2:r7]");
TEST("\xfe\xc0", "[INC reg1:r0]");
TEST("\xfe\xc4", "[INC reg1:r0h]");
TEST("\xff\xc0", "[INC reg4:r0]");
TEST("\xff\xc4", "[INC reg4:r4]");
TEST("\xff\x00", "[INC mem4:r0]");
TEST("\xf0\xff\x00", "[lock:INC mem4:r0]");
TEST("\x66\xff\xc0", "[INC reg2:r0]");
TEST("\x66\xff\xc4", "[INC reg2:r4]");
TEST64("\x48\xff\xc0", "[INC reg8:r0]");
TEST64("\x48\xff\xc4", "[INC reg8:r4]");
TEST64("\x49\xff\xc7", "[INC reg8:r15]");
TEST32("\xe9\x00\x00\x00\x00", "[JMP off4:eip+0x0]");
TEST32("\x66\xe9\x01\x00", "[JMP off2:ip+0x1]");
TEST64("\xe9\x00\x00\x00\x00", "[JMP off8:rip+0x0]");
TEST64("\x66\xe9\x00\x00\x00\x00", "[JMP off8:rip+0x0]");
TEST("\x66\xe9\x00", "PARTIAL");
TEST("\x66\xe9", "PARTIAL");
TEST("\x66\x0f\xbe\xc2", "[MOVSX reg2:r0 reg1:r2]");
TEST("\x0f\xbe\xc2", "[MOVSX reg4:r0 reg1:r2]");
TEST("\x0f\xbf\xc2", "[MOVSX reg4:r0 reg2:r2]");
TEST64("\x48\x0f\xbf\xc2", "[MOVSX reg8:r0 reg2:r2]");
TEST64("\x48\x63\xc2", "[MOVSX reg8:r0 reg4:r2]");
TEST("\x66\xc3", "[RET_2]");
TEST("\x66\xc2\x00\x00", "[RET_2 imm2:0x0]");
TEST("\x66\xc2\x0d\x00", "[RET_2 imm2:0xd]");
TEST("\x66\xc2\x0d\xff", "[RET_2 imm2:0xff0d]");
TEST32("\xc3", "[RET_4]");
TEST32("\xc2\x00\x00", "[RET_4 imm2:0x0]");
TEST32("\xc2\x0d\x00", "[RET_4 imm2:0xd]");
TEST32("\xc2\x0d\xff", "[RET_4 imm2:0xff0d]");
TEST64("\xc3", "[RET_8]");
TEST64("\xc2\x00\x00", "[RET_8 imm2:0x0]");
TEST64("\xc2\x0d\x00", "[RET_8 imm2:0xd]");
TEST64("\xc2\x0d\xff", "[RET_8 imm2:0xff0d]");
TEST("\x66\x0f\xc6\xc0\x01", "[SSE_SHUFPD reg16:r0 reg16:r0 imm1:0x1]");
TEST("\xf3\x0f\x7e\x5c\x24\x08", "[SSE_MOVQ reg16:r3 mem8:r4+0x8]");
TEST("\xc5\xf9\x6e\xc8", "[VMOVD reg4:r1 reg4:r0]");
TEST64("\xc4\xe1\xf9\x6e\xc8", "[VMOVQ reg8:r1 reg8:r0]");
TEST32("\xc4\xe1\xf9\x6e\xc8", "[VMOVD reg4:r1 reg4:r0]");
TEST("\xc5\xf2\x2a\xc0", "[VCVTSI2SS reg16:r0 reg16:r1 reg4:r0]");
TEST32("\xc4\xe1\xf2\x2a\xc0", "[VCVTSI2SS reg16:r0 reg16:r1 reg4:r0]");
TEST64("\xc4\xe1\xf2\x2a\xc0", "[VCVTSI2SS reg16:r0 reg16:r1 reg8:r0]");
TEST64("\xc4\xe2\x75\x90\x04\xe7", "[VPGATHERDD reg32:r0 mem32:r7+8*r4 reg32:r1]");
puts(failed ? "Some tests FAILED" : "All tests PASSED");
return failed ? EXIT_FAILURE : EXIT_SUCCESS;
}