From 1458bf9673ad4fe55e2b875447f542461ae5ac17 Mon Sep 17 00:00:00 2001 From: Alexis Engelke Date: Sun, 10 Jan 2021 16:03:40 +0100 Subject: [PATCH] encode: Support VEX-encoded instructions --- encode.c | 89 +++++++++++++++++++++++++++++++++------------ parseinstrs.py | 3 ++ tests/test_encode.c | 49 +++++++++++++++++++++++++ 3 files changed, 117 insertions(+), 24 deletions(-) diff --git a/encode.c b/encode.c index a42363d..106d2e8 100644 --- a/encode.c +++ b/encode.c @@ -15,6 +15,7 @@ enum { OPC_0F38 = 2 << 16, OPC_0F3A = 3 << 16, OPC_ESCAPE_MSK = 3 << 16, + OPC_VSIB = 1 << 18, OPC_66 = 1 << 19, OPC_F2 = 1 << 20, OPC_F3 = 1 << 21, @@ -31,6 +32,8 @@ enum { #define OPC_SEG_IDX 31 #define OPC_SEG_MSK (0x7l << OPC_SEG_IDX) +#define OPC_VEXOP_IDX 34 +#define OPC_VEXOP_MSK (0xfl << OPC_VEXOP_IDX) static bool op_mem(FeOp op) { return op < 0; } static bool op_reg(FeOp op) { return op >= 0; } @@ -56,16 +59,22 @@ static unsigned opc_size(uint64_t opc) { - if (opc & OPC_VEX) return 0; // TODO: support VEX encoding unsigned res = 1; + if (opc & OPC_VEX) { + if ((opc & (OPC_REXW|OPC_REXX|OPC_REXB|OPC_ESCAPE_MSK)) != OPC_0F) + res += 3; + else + res += 2; + } else { + if (opc & OPC_66) res++; + if (opc & OPC_F2) res++; + if (opc & OPC_F3) res++; + if (opc & (OPC_REX|OPC_REXW|OPC_REXR|OPC_REXX|OPC_REXB)) res++; + if (opc & OPC_ESCAPE_MSK) res++; + if ((opc & OPC_ESCAPE_MSK) == OPC_0F38 || (opc & OPC_ESCAPE_MSK) == OPC_0F3A) res++; + } if (opc & OPC_SEG_MSK) res++; if (opc & OPC_67) res++; - if (opc & OPC_66) res++; - if (opc & OPC_F2) res++; - if (opc & OPC_F3) res++; - if (opc & (OPC_REX|OPC_REXW|OPC_REXR|OPC_REXX|OPC_REXB)) res++; - if (opc & OPC_ESCAPE_MSK) res++; - if ((opc & OPC_ESCAPE_MSK) == OPC_0F38 || (opc & OPC_ESCAPE_MSK) == OPC_0F3A) res++; if ((opc & 0xc000) == 0xc000) res++; return res; } @@ -74,25 +83,46 @@ static int enc_opc(uint8_t** restrict buf, uint64_t opc) { - if (opc & OPC_VEX) return -1; // TODO: support VEX encoding if (opc & OPC_SEG_MSK) *(*buf)++ = (0x65643e362e2600 >> (8 * ((opc >> OPC_SEG_IDX) & 7))) & 0xff; if (opc & OPC_67) *(*buf)++ = 0x67; - if (opc & OPC_66) *(*buf)++ = 0x66; - if (opc & OPC_F2) *(*buf)++ = 0xF2; - if (opc & OPC_F3) *(*buf)++ = 0xF3; - if (opc & (OPC_REX|OPC_REXW|OPC_REXR|OPC_REXX|OPC_REXB)) - { - unsigned rex = 0x40; - if (opc & OPC_REXW) rex |= 8; - if (opc & OPC_REXR) rex |= 4; - if (opc & OPC_REXX) rex |= 2; - if (opc & OPC_REXB) rex |= 1; - *(*buf)++ = rex; + if (opc & OPC_VEX) { + bool vex3 = (opc & (OPC_REXW|OPC_REXX|OPC_REXB|OPC_ESCAPE_MSK)) != OPC_0F; + unsigned pp = 0; + if (opc & OPC_66) pp = 1; + if (opc & OPC_F3) pp = 2; + if (opc & OPC_F2) pp = 3; + *(*buf)++ = 0xc4 | !vex3; + unsigned b2 = pp | (opc & OPC_VEXL ? 0x4 : 0); + if (vex3) { + unsigned b1 = (opc & OPC_ESCAPE_MSK) >> 16; + if (!(opc & OPC_REXR)) b1 |= 0x80; + if (!(opc & OPC_REXX)) b1 |= 0x40; + if (!(opc & OPC_REXB)) b1 |= 0x20; + *(*buf)++ = b1; + if (opc & OPC_REXW) b2 |= 0x80; + } else { + if (!(opc & OPC_REXR)) b2 |= 0x80; + } + b2 |= (~((opc & OPC_VEXOP_MSK) >> OPC_VEXOP_IDX) & 0xf) << 3; + *(*buf)++ = b2; + } else { + if (opc & OPC_66) *(*buf)++ = 0x66; + if (opc & OPC_F2) *(*buf)++ = 0xF2; + if (opc & OPC_F3) *(*buf)++ = 0xF3; + if (opc & (OPC_REX|OPC_REXW|OPC_REXR|OPC_REXX|OPC_REXB)) + { + unsigned rex = 0x40; + if (opc & OPC_REXW) rex |= 8; + if (opc & OPC_REXR) rex |= 4; + if (opc & OPC_REXX) rex |= 2; + if (opc & OPC_REXB) rex |= 1; + *(*buf)++ = rex; + } + if (opc & OPC_ESCAPE_MSK) *(*buf)++ = 0x0F; + if ((opc & OPC_ESCAPE_MSK) == OPC_0F38) *(*buf)++ = 0x38; + if ((opc & OPC_ESCAPE_MSK) == OPC_0F3A) *(*buf)++ = 0x3A; } - if (opc & OPC_ESCAPE_MSK) *(*buf)++ = 0x0F; - if ((opc & OPC_ESCAPE_MSK) == OPC_0F38) *(*buf)++ = 0x38; - if ((opc & OPC_ESCAPE_MSK) == OPC_0F3A) *(*buf)++ = 0x3A; *(*buf)++ = opc & 0xff; if ((opc & 0xc000) == 0xc000) *(*buf)++ = (opc >> 8) & 0xff; return 0; @@ -150,10 +180,18 @@ enc_mr(uint8_t** restrict buf, uint64_t opc, uint64_t op0, uint64_t op1, off = op_mem_offset(op0); if (!!op_mem_idx(op0) != !!op_mem_scale(op0)) return -1; + if (!op_mem_idx(op0) && (opc & OPC_VSIB)) return -1; if (op_mem_idx(op0)) { - if (!op_reg_gpl(op_mem_idx(op0))) return -1; - if (op_reg_idx(op_mem_idx(op0)) == 4) return -1; + if (opc & OPC_VSIB) + { + if (!op_reg_xmm(op_mem_idx(op0))) return -1; + } + else + { + if (!op_reg_gpl(op_mem_idx(op0))) return -1; + if (op_reg_idx(op_mem_idx(op0)) == 4) return -1; + } idx = op_mem_idx(op0) & 7; int scalabs = op_mem_scale(op0); if (scalabs & (scalabs - 1)) return -1; @@ -323,6 +361,9 @@ fe_enc64_impl(uint8_t** restrict buf, uint64_t mnem, FeOp op0, FeOp op1, // NOP has no operands, so this must be the 32-bit OA XCHG if ((desc->opc & ~7) == 0x90 && ops[0] == FE_AX) goto next; + if (ei->vexreg) + opc |= ((uint64_t) op_reg_idx(ops[ei->vexreg^3])) << OPC_VEXOP_IDX; + if (ei->modrm) { FeOp modreg = ei->modreg ? ops[ei->modreg^3] : (opc & 0xff00) >> 8; if (enc_mr(buf, opc, ops[ei->modrm^3], modreg, desc->immsz)) goto fail; diff --git a/parseinstrs.py b/parseinstrs.py index d201b61..63f8878 100644 --- a/parseinstrs.py +++ b/parseinstrs.py @@ -484,6 +484,8 @@ def encode_table(entries): opc_i |= opcode.modreg[0] << 8 opc_flags = "" opc_flags += ["","|OPC_0F","|OPC_0F38","|OPC_0F3A"][opcode.escape] + if "VSIB" in desc.flags: + opc_flags += "|OPC_VSIB" if opcode.vex: hasvex, vecsizes = True, {128, 256} opc_flags += "|OPC_VEX" @@ -574,6 +576,7 @@ def encode_table(entries): tys_i = sum(ty << (4*i) for i, ty in enumerate(tys)) opc_s = hex(opc_i) + opc_flags + prefix[1] if opsize == 16: opc_s += "|OPC_66" + if vecsize == 256: opc_s += "|OPC_VEXL" if opsize == 64 and "DEF64" not in desc.flags and "FORCE64" not in desc.flags: opc_s += "|OPC_REXW" # Construct mnemonic name diff --git a/tests/test_encode.c b/tests/test_encode.c index a7b8498..dce7233 100644 --- a/tests/test_encode.c +++ b/tests/test_encode.c @@ -148,6 +148,55 @@ main(int argc, char** argv) TEST("\x66\x0f\x3a\x14\xc1\x02", FE_SSE_PEXTRBrri, FE_CX, FE_XMM0, 2); TEST("", FE_SSE_PEXTRBrri, FE_CH, FE_XMM0, 2); + // Test VEX encoding + TEST("\xc5\xfc\x77", FE_VZEROALL); + TEST("\xc5\xf8\x77", FE_VZEROUPPER); + TEST("\xc5\xf2\x58\xc2", FE_VADDSSrrr, FE_XMM0, FE_XMM1, FE_XMM2); + TEST("\xc5\xf0\x58\xc2", FE_VADDPS128rrr, FE_XMM0, FE_XMM1, FE_XMM2); + TEST("\xc5\xf4\x58\xc2", FE_VADDPS256rrr, FE_XMM0, FE_XMM1, FE_XMM2); + TEST("\xc4\xc1\x74\x58\xc0", FE_VADDPS256rrr, FE_XMM0, FE_XMM1, FE_XMM8); + TEST("\xc4\x62\x7d\x19\xc2", FE_VBROADCASTSD256rr, FE_XMM8, FE_XMM2); + TEST("\xc4\x62\x7d\x1a\xc2", FE_VBROADCASTF128_256rr, FE_XMM8, FE_XMM2); + TEST("\xc4\xe2\x71\x9d\xc2", FE_VFNMADD132SSrrr, FE_XMM0, FE_XMM1, FE_XMM2); + TEST("\xc4\xe2\xf1\x9d\xc2", FE_VFNMADD132SDrrr, FE_XMM0, FE_XMM1, FE_XMM2); + + // VSIB encodings + TEST("", FE_VGATHERDPS128rmr, FE_XMM0, FE_XMM0, FE_XMM1); // must have memory operand + TEST("", FE_VGATHERDPS128rmr, FE_XMM0, FE_MEM(FE_DI, 8, 0, 0), FE_XMM1); // must have SIB + TEST("", FE_VGATHERDPS128rmr, FE_XMM0, FE_MEM(FE_IP, 0, 0, 0), FE_XMM1); // must have SIB + TEST("\xc4\xe2\x71\x92\x04\xff", FE_VGATHERDPS128rmr, FE_XMM0, FE_MEM(FE_DI, 8, FE_XMM7, 0), FE_XMM1); + TEST("\xc4\xe2\x71\x92\x04\xe7", FE_VGATHERDPS128rmr, FE_XMM0, FE_MEM(FE_DI, 8, FE_XMM4, 0), FE_XMM1); + TEST("\xc4\xe2\x75\x92\x04\xff", FE_VGATHERDPS256rmr, FE_XMM0, FE_MEM(FE_DI, 8, FE_XMM7, 0), FE_XMM1); + TEST("\xc4\xe2\x75\x92\x04\xe7", FE_VGATHERDPS256rmr, FE_XMM0, FE_MEM(FE_DI, 8, FE_XMM4, 0), FE_XMM1); + TEST("\xc4\xe2\x71\x93\x04\xff", FE_VGATHERQPS128rmr, FE_XMM0, FE_MEM(FE_DI, 8, FE_XMM7, 0), FE_XMM1); + TEST("\xc4\xe2\x71\x93\x04\xe7", FE_VGATHERQPS128rmr, FE_XMM0, FE_MEM(FE_DI, 8, FE_XMM4, 0), FE_XMM1); + TEST("\xc4\xe2\x75\x93\x04\xff", FE_VGATHERQPS256rmr, FE_XMM0, FE_MEM(FE_DI, 8, FE_XMM7, 0), FE_XMM1); + TEST("\xc4\xe2\x75\x93\x04\xe7", FE_VGATHERQPS256rmr, FE_XMM0, FE_MEM(FE_DI, 8, FE_XMM4, 0), FE_XMM1); + TEST("\xc4\xe2\xf1\x92\x04\xff", FE_VGATHERDPD128rmr, FE_XMM0, FE_MEM(FE_DI, 8, FE_XMM7, 0), FE_XMM1); + TEST("\xc4\xe2\xf1\x92\x04\xe7", FE_VGATHERDPD128rmr, FE_XMM0, FE_MEM(FE_DI, 8, FE_XMM4, 0), FE_XMM1); + TEST("\xc4\xe2\xf5\x92\x04\xff", FE_VGATHERDPD256rmr, FE_XMM0, FE_MEM(FE_DI, 8, FE_XMM7, 0), FE_XMM1); + TEST("\xc4\xe2\xf5\x92\x04\xe7", FE_VGATHERDPD256rmr, FE_XMM0, FE_MEM(FE_DI, 8, FE_XMM4, 0), FE_XMM1); + TEST("\xc4\xe2\xf1\x93\x04\xff", FE_VGATHERQPD128rmr, FE_XMM0, FE_MEM(FE_DI, 8, FE_XMM7, 0), FE_XMM1); + TEST("\xc4\xe2\xf1\x93\x04\xe7", FE_VGATHERQPD128rmr, FE_XMM0, FE_MEM(FE_DI, 8, FE_XMM4, 0), FE_XMM1); + TEST("\xc4\xe2\xf5\x93\x04\xff", FE_VGATHERQPD256rmr, FE_XMM0, FE_MEM(FE_DI, 8, FE_XMM7, 0), FE_XMM1); + TEST("\xc4\xe2\xf5\x93\x04\xe7", FE_VGATHERQPD256rmr, FE_XMM0, FE_MEM(FE_DI, 8, FE_XMM4, 0), FE_XMM1); + TEST("\xc4\xe2\x71\x90\x04\xff", FE_VPGATHERDD128rmr, FE_XMM0, FE_MEM(FE_DI, 8, FE_XMM7, 0), FE_XMM1); + TEST("\xc4\xe2\x71\x90\x04\xe7", FE_VPGATHERDD128rmr, FE_XMM0, FE_MEM(FE_DI, 8, FE_XMM4, 0), FE_XMM1); + TEST("\xc4\xe2\x75\x90\x04\xff", FE_VPGATHERDD256rmr, FE_XMM0, FE_MEM(FE_DI, 8, FE_XMM7, 0), FE_XMM1); + TEST("\xc4\xe2\x75\x90\x04\xe7", FE_VPGATHERDD256rmr, FE_XMM0, FE_MEM(FE_DI, 8, FE_XMM4, 0), FE_XMM1); + TEST("\xc4\xe2\x71\x91\x04\xff", FE_VPGATHERQD128rmr, FE_XMM0, FE_MEM(FE_DI, 8, FE_XMM7, 0), FE_XMM1); + TEST("\xc4\xe2\x71\x91\x04\xe7", FE_VPGATHERQD128rmr, FE_XMM0, FE_MEM(FE_DI, 8, FE_XMM4, 0), FE_XMM1); + TEST("\xc4\xe2\x75\x91\x04\xff", FE_VPGATHERQD256rmr, FE_XMM0, FE_MEM(FE_DI, 8, FE_XMM7, 0), FE_XMM1); + TEST("\xc4\xe2\x75\x91\x04\xe7", FE_VPGATHERQD256rmr, FE_XMM0, FE_MEM(FE_DI, 8, FE_XMM4, 0), FE_XMM1); + TEST("\xc4\xe2\xf1\x90\x04\xff", FE_VPGATHERDQ128rmr, FE_XMM0, FE_MEM(FE_DI, 8, FE_XMM7, 0), FE_XMM1); + TEST("\xc4\xe2\xf1\x90\x04\xe7", FE_VPGATHERDQ128rmr, FE_XMM0, FE_MEM(FE_DI, 8, FE_XMM4, 0), FE_XMM1); + TEST("\xc4\xe2\xf5\x90\x04\xff", FE_VPGATHERDQ256rmr, FE_XMM0, FE_MEM(FE_DI, 8, FE_XMM7, 0), FE_XMM1); + TEST("\xc4\xe2\xf5\x90\x04\xe7", FE_VPGATHERDQ256rmr, FE_XMM0, FE_MEM(FE_DI, 8, FE_XMM4, 0), FE_XMM1); + TEST("\xc4\xe2\xf1\x91\x04\xff", FE_VPGATHERQQ128rmr, FE_XMM0, FE_MEM(FE_DI, 8, FE_XMM7, 0), FE_XMM1); + TEST("\xc4\xe2\xf1\x91\x04\xe7", FE_VPGATHERQQ128rmr, FE_XMM0, FE_MEM(FE_DI, 8, FE_XMM4, 0), FE_XMM1); + TEST("\xc4\xe2\xf5\x91\x04\xff", FE_VPGATHERQQ256rmr, FE_XMM0, FE_MEM(FE_DI, 8, FE_XMM7, 0), FE_XMM1); + TEST("\xc4\xe2\xf5\x91\x04\xe7", FE_VPGATHERQQ256rmr, FE_XMM0, FE_MEM(FE_DI, 8, FE_XMM4, 0), FE_XMM1); + // Test ModRM encoding TEST("\x01\x00", FE_ADD32mr, FE_MEM(FE_AX, 0, 0, 0), FE_AX); TEST("\x01\x04\x24", FE_ADD32mr, FE_MEM(FE_SP, 0, 0, 0), FE_AX);