encode: Add library for x86-64 encoding

This commit is contained in:
Alexis Engelke
2020-06-24 11:20:50 +02:00
parent 4e95c8d152
commit 69ce124354
7 changed files with 817 additions and 110 deletions

342
encode.c Normal file
View File

@@ -0,0 +1,342 @@
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <fadec-enc.h>
#define LIKELY(x) __builtin_expect((x), 1)
#define UNLIKELY(x) __builtin_expect((x), 0)
enum {
OPC_0F = 1 << 16,
OPC_0F38 = (1 << 17) | OPC_0F,
OPC_0F3A = (1 << 18) | OPC_0F,
OPC_66 = 1 << 19,
OPC_F2 = 1 << 20,
OPC_F3 = 1 << 21,
OPC_REXW = 1 << 22,
OPC_LOCK = 1 << 23,
OPC_VEX = 1 << 24,
OPC_VEXL = 1 << 25,
OPC_REXR = 1 << 28,
OPC_REXX = 1 << 27,
OPC_REXB = 1 << 26,
OPC_REX = 1 << 29,
OPC_67 = 1 << 30,
OPC_SEG = (1l << 31) | (1l << 32) | (1l << 33),
};
static bool op_mem(FeOp op) { return op < 0; }
static bool op_reg(FeOp op) { return op >= 0; }
static bool op_reg_gpl(FeOp op) { return (op & ~0xf) == 0x100; }
static bool op_reg_gph(FeOp op) { return (op & ~0x3) == 0x204; }
static bool op_reg_seg(FeOp op) { return (op & ~0x7) == 0x300 && (op & 7) < 6; }
static bool op_reg_fpu(FeOp op) { return (op & ~0x7) == 0x400; }
static bool op_reg_mmx(FeOp op) { return (op & ~0x7) == 0x500; }
static bool op_reg_xmm(FeOp op) { return (op & ~0xf) == 0x600; }
static int64_t op_mem_offset(FeOp op) { return (int32_t) op; }
static unsigned op_mem_base(FeOp op) { return (op >> 32) & 0xfff; }
static unsigned op_mem_idx(FeOp op) { return (op >> 44) & 0xfff; }
static unsigned op_mem_scale(FeOp op) { return (op >> 56) & 0xf; }
static unsigned op_reg_idx(FeOp op) { return op & 0xff; }
static bool op_imm_n(FeOp imm, unsigned immsz) {
if (immsz == 1 && (int8_t) imm != imm) return false;
if (immsz == 2 && (int16_t) imm != imm) return false;
if (immsz == 4 && (int32_t) imm != imm) return false;
return true;
}
static
unsigned
opc_size(uint64_t opc)
{
if (opc & OPC_VEX) return 0; // TODO: support VEX encoding
unsigned res = 1;
if (opc & OPC_SEG) res++;
if (opc & OPC_67) res++;
if (opc & OPC_66) res++;
if (opc & OPC_F2) res++;
if (opc & OPC_F3) res++;
if (opc & (OPC_REX|OPC_REXW|OPC_REXR|OPC_REXX|OPC_REXB)) res++;
if (opc & OPC_0F) res++;
if ((opc & OPC_0F38) == OPC_0F38) res++;
if ((opc & OPC_0F3A) == OPC_0F3A) res++;
if ((opc & 0xc000) == 0xc000) res++;
return res;
}
static
int
enc_opc(uint8_t** restrict buf, uint64_t opc)
{
if (opc & OPC_VEX) return -1; // TODO: support VEX encoding
if (opc & OPC_SEG)
*(*buf)++ = (0x65643e362e2600 >> (8 * ((opc >> 31) & 7))) & 0xff;
if (opc & OPC_67) *(*buf)++ = 0x67;
if (opc & OPC_66) *(*buf)++ = 0x66;
if (opc & OPC_F2) *(*buf)++ = 0xF2;
if (opc & OPC_F3) *(*buf)++ = 0xF3;
if (opc & (OPC_REX|OPC_REXW|OPC_REXR|OPC_REXX|OPC_REXB))
{
unsigned rex = 0x40;
if (opc & OPC_REXW) rex |= 8;
if (opc & OPC_REXR) rex |= 4;
if (opc & OPC_REXX) rex |= 2;
if (opc & OPC_REXB) rex |= 1;
*(*buf)++ = rex;
}
if (opc & OPC_0F) *(*buf)++ = 0x0F;
if ((opc & OPC_0F38) == OPC_0F38) *(*buf)++ = 0x38;
if ((opc & OPC_0F3A) == OPC_0F3A) *(*buf)++ = 0x3A;
*(*buf)++ = opc & 0xff;
if ((opc & 0xc000) == 0xc000) *(*buf)++ = (opc >> 8) & 0xff;
return 0;
}
static
int
enc_imm(uint8_t** restrict buf, uint64_t imm, unsigned immsz)
{
if (!op_imm_n(imm, immsz)) return -1;
for (unsigned i = 0; i < immsz; i++)
*(*buf)++ = imm >> 8 * i;
return 0;
}
static
int
enc_o(uint8_t** restrict buf, uint64_t opc, uint64_t op0)
{
if (op_reg_idx(op0) & 0x8) opc |= OPC_REXB;
bool has_rex = !!(opc & (OPC_REX|OPC_REXW|OPC_REXR|OPC_REXX|OPC_REXB));
if (has_rex && op_reg_gph(op0)) return -1;
if (enc_opc(buf, opc)) return -1;
*(*buf - 1) = (*(*buf - 1) & 0xf8) | (op_reg_idx(op0) & 0x7);
return 0;
}
static
int
enc_mr(uint8_t** restrict buf, uint64_t opc, uint64_t op0, uint64_t op1,
unsigned immsz)
{
// If !op_reg(op1), it is a constant value for ModRM.reg
if (op_reg(op0) && (op_reg_idx(op0) & 0x8)) opc |= OPC_REXB;
if (op_mem(op0) && (op_mem_base(op0) & 0x8)) opc |= OPC_REXB;
if (op_mem(op0) && (op_mem_idx(op0) & 0x8)) opc |= OPC_REXX;
if (op_reg(op1) && op_reg_idx(op1) & 0x8) opc |= OPC_REXR;
bool has_rex = !!(opc & (OPC_REX|OPC_REXW|OPC_REXR|OPC_REXX|OPC_REXB));
if (has_rex && (op_reg_gph(op0) || op_reg_gph(op1))) return -1;
int mod = 0, reg = op1 & 7, rm;
int scale = 0, idx = 4, base = 0;
int32_t off = 0;
bool withsib = false, mod0off = false;
if (op_reg(op0))
{
mod = 3;
rm = op_reg_idx(op0) & 7;
}
else
{
off = op_mem_offset(op0);
if (!!op_mem_idx(op0) != !!op_mem_scale(op0)) return -1;
if (op_mem_idx(op0))
{
if (!op_reg_gpl(op_mem_idx(op0))) return -1;
if (op_reg_idx(op_mem_idx(op0)) == 4) return -1;
idx = op_mem_idx(op0) & 7;
int scalabs = op_mem_scale(op0);
if (scalabs & (scalabs - 1)) return -1;
scale = (scalabs & 0xA ? 1 : 0) | (scalabs & 0xC ? 2 : 0);
withsib = true;
}
if (!op_mem_base(op0))
{
rm = 5;
mod0off = true;
withsib = true;
}
else if (op_mem_base(op0) == FE_IP)
{
rm = 5;
mod0off = true;
// Adjust offset, caller doesn't know instruction length.
off -= opc_size(opc) + 5 + immsz;
if (withsib) return -1;
}
else
{
if (!op_reg_gpl(op_mem_base(op0))) return -1;
rm = op_reg_idx(op_mem_base(op0)) & 7;
if (rm == 5) mod = 1;
}
if (off && op_imm_n(off, 1) && !mod0off)
mod = 1;
else if (off && !mod0off)
mod = 2;
if (withsib || rm == 4)
{
base = rm;
rm = 4;
}
}
if (enc_opc(buf, opc)) return -1;
*(*buf)++ = (mod << 6) | (reg << 3) | rm;
if (mod != 3 && rm == 4)
*(*buf)++ = (scale << 6) | (idx << 3) | base;
if (mod == 1) return enc_imm(buf, off, 1);
if (mod == 2 || mod0off) return enc_imm(buf, off, 4);
return 0;
}
typedef enum {
ENC_INVALID = 0,
ENC_NP,
ENC_M, ENC_M1, ENC_MI, ENC_MC, ENC_MR, ENC_RM, ENC_RMA, ENC_MRI, ENC_RMI, ENC_MRC,
ENC_I, ENC_IA, ENC_O, ENC_OI, ENC_OA, ENC_AO, ENC_A, ENC_D, ENC_FD, ENC_TD,
ENC_RVM, ENC_RVMI, ENC_RVMR, ENC_RMV, ENC_VM, ENC_VMI, ENC_MVR,
ENC_MAX
} Encoding;
struct EncodingInfo {
uint8_t modrm : 2;
uint8_t modreg : 2;
uint8_t vexreg : 2;
uint8_t immidx : 2;
uint8_t immctl : 3;
uint8_t zregidx : 2;
uint8_t zregval : 1;
};
const struct EncodingInfo encoding_infos[ENC_MAX] = {
[ENC_INVALID] = { 0 },
[ENC_NP] = { 0 },
[ENC_M] = { .modrm = 0^3 },
[ENC_M1] = { .modrm = 0^3, .immctl = 1, .immidx = 1 },
[ENC_MI] = { .modrm = 0^3, .immctl = 4, .immidx = 1 },
[ENC_MC] = { .modrm = 0^3, .zregidx = 1^3, .zregval = 1 },
[ENC_MR] = { .modrm = 0^3, .modreg = 1^3 },
[ENC_RM] = { .modrm = 1^3, .modreg = 0^3 },
[ENC_RMA] = { .modrm = 1^3, .modreg = 0^3, .zregidx = 2^3, .zregval = 0 },
[ENC_MRI] = { .modrm = 0^3, .modreg = 1^3, .immctl = 4, .immidx = 2 },
[ENC_RMI] = { .modrm = 1^3, .modreg = 0^3, .immctl = 4, .immidx = 2 },
[ENC_MRC] = { .modrm = 0^3, .modreg = 1^3, .zregidx = 2^3, .zregval = 1 },
[ENC_I] = { .immctl = 4, .immidx = 0 },
[ENC_IA] = { .zregidx = 0^3, .zregval = 0, .immctl = 4, .immidx = 1 },
[ENC_O] = { .modreg = 0^3 },
[ENC_OI] = { .modreg = 0^3, .immctl = 4, .immidx = 1 },
[ENC_OA] = { .modreg = 0^3, .zregidx = 1^3, .zregval = 0 },
[ENC_AO] = { .modreg = 1^3, .zregidx = 0^3, .zregval = 0 },
[ENC_A] = { .zregidx = 0^3, .zregval = 0 },
[ENC_D] = { .immctl = 6, .immidx = 0 },
[ENC_FD] = { .immctl = 2, .immidx = 1 },
[ENC_TD] = { .immctl = 2, .immidx = 0 },
[ENC_RVM] = { .modrm = 2^3, .modreg = 0^3, .vexreg = 1^3 },
[ENC_RVMI] = { .modrm = 2^3, .modreg = 0^3, .vexreg = 1^3, .immctl = 4, .immidx = 3 },
[ENC_RVMR] = { .modrm = 2^3, .modreg = 0^3, .vexreg = 1^3, .immctl = 3, .immidx = 3 },
[ENC_RMV] = { .modrm = 1^3, .modreg = 0^3, .vexreg = 2^3 },
[ENC_VM] = { .modrm = 1^3, .vexreg = 0^3 },
[ENC_VMI] = { .modrm = 1^3, .vexreg = 0^3, .immctl = 4, .immidx = 2 },
[ENC_MVR] = { .modrm = 0^3, .modreg = 1^3, .vexreg = 1^3 },
};
struct EncodeDesc {
uint64_t opc : 26;
uint64_t enc : 5;
uint64_t immsz : 4;
uint64_t alt : 13;
uint64_t tys : 16;
};
static const struct EncodeDesc descs[] = {
#include <fadec-enc-cases.inc>
};
int
fe_enc64_impl(uint8_t** restrict buf, uint64_t mnem, FeOp op0, FeOp op1,
FeOp op2, FeOp op3)
{
uint8_t* buf_start = *buf;
uint64_t ops[4] = {op0, op1, op2, op3};
uint64_t desc_idx = mnem & FE_MNEM_MASK;
if (UNLIKELY(desc_idx >= FE_MNEM_MAX)) goto fail;
do
{
const struct EncodeDesc* desc = &descs[desc_idx];
const struct EncodingInfo* ei = &encoding_infos[desc->enc];
uint64_t opc = desc->opc;
int64_t imm;
if (UNLIKELY(desc->enc == ENC_INVALID)) goto fail;
if (ei->zregidx)
if (op_reg_idx(ops[ei->zregidx^3]) != ei->zregval) goto next;
for (int i = 0; i < 4; i++) {
unsigned ty = (desc->tys >> (4 * i)) & 0xf;
FeOp op = ops[i];
if (ty == 0x0) continue;
if (ty == 0xf && !op_mem(op)) goto next;
if (ty == 0x1 && !op_reg_gpl(op)) goto next;
if (ty == 0x2 && !op_reg_gpl(op) && !op_reg_gph(op)) goto next;
if (ty == 0x2 && op_reg_gpl(op) && op >= FE_SP) opc |= OPC_REX;
if (ty == 0x3 && !op_reg_seg(op)) goto next;
if (ty == 0x4 && !op_reg_fpu(op)) goto next;
if (ty == 0x5 && !op_reg_mmx(op)) goto next;
if (ty == 0x6 && !op_reg_xmm(op)) goto next;
if (UNLIKELY(ty >= 7 && ty < 0xf)) goto next; // TODO: support BND, CR, DR
}
if (UNLIKELY(mnem & FE_ADDR32))
opc |= OPC_67;
if (UNLIKELY(mnem & FE_SEG_MASK))
opc |= (mnem & FE_SEG_MASK) << (31 - 16);
if (ei->immctl && ei->immctl != 3)
imm = ops[ei->immidx];
if (ei->immctl == 6) {
if (UNLIKELY(mnem & FE_JMPL) && desc->alt) goto next;
imm -= (int64_t) *buf + opc_size(opc) + desc->immsz;
}
if (UNLIKELY(ei->immctl == 1) && imm != 1) goto next;
if (ei->immctl >= 2 && !op_imm_n(imm, desc->immsz)) goto next;
// NOP has no operands, so this must be the 32-bit OA XCHG
if ((desc->opc & ~7) == 0x90 && ops[0] == FE_AX) goto next;
if (ei->modrm) {
FeOp modreg = ei->modreg ? ops[ei->modreg^3] : (opc & 0xff00) >> 8;
if (enc_mr(buf, opc, ops[ei->modrm^3], modreg, desc->immsz)) goto fail;
} else if (ei->modreg) {
if (enc_o(buf, opc, ops[ei->modreg^3])) goto fail;
} else {
if (enc_opc(buf, opc)) goto fail;
}
if (ei->immctl >= 2)
if (enc_imm(buf, imm, desc->immsz)) goto fail;
return 0;
next:
desc_idx = desc->alt;
} while (desc_idx != 0);
fail:
// Don't advance buffer on error; though we shouldn't write anything.
*buf = buf_start;
return -1;
}

52
fadec-enc.h Normal file
View File

@@ -0,0 +1,52 @@
#ifndef FD_FADEC_ENC_H_
#define FD_FADEC_ENC_H_
#include <stddef.h>
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef enum {
FE_AX = 0x100, FE_CX, FE_DX, FE_BX, FE_SP, FE_BP, FE_SI, FE_DI,
FE_R8, FE_R9, FE_R10, FE_R11, FE_R12, FE_R13, FE_R14, FE_R15, FE_IP,
FE_AH = 0x204, FE_CH, FE_DH, FE_BH,
FE_ES = 0x300, FE_CS, FE_SS, FE_DS, FE_FS, FE_GS,
FE_ST0 = 0x400, FE_ST1, FE_ST2, FE_ST3, FE_ST4, FE_ST5, FE_ST6, FE_ST7,
FE_MM0 = 0x500, FE_MM1, FE_MM2, FE_MM3, FE_MM4, FE_MM5, FE_MM6, FE_MM7,
FE_XMM0 = 0x600, FE_XMM1, FE_XMM2, FE_XMM3, FE_XMM4, FE_XMM5, FE_XMM6, FE_XMM7,
FE_XMM8, FE_XMM9, FE_XMM10, FE_XMM11, FE_XMM12, FE_XMM13, FE_XMM14, FE_XMM15,
} FeReg;
typedef int64_t FeOp;
#define FE_MEM(base,sc,idx,off) (INT64_MIN | ((int64_t) ((base) & 0xfff) << 32) | ((int64_t) ((idx) & 0xfff) << 44) | ((int64_t) ((sc) & 0xf) << 56) | ((off) & 0xffffffff))
#define FE_SEG(seg) ((((seg) & 0x7) + 1) << 16)
#define FE_SEG_MASK 0x70000
#define FE_ADDR32 0x80000
/** Used together with a RIP-relative (conditional) jump, this will force the
* use of the encoding with the largest distance. Useful for reserving a jump
* when the target offset is still unknown; if the jump is re-encoded later on,
* FE_JMPL must be specified there, too, so that the encoding lengths match. **/
#define FE_JMPL 0x100000
#define FE_MNEM_MASK 0xffff
enum {
#define FE_MNEMONIC(name,value) name = value,
#include <fadec-enc-mnems.inc>
#undef FE_MNEMONIC
FE_MNEM_MAX
};
#define fe_enc64_1(buf, mnem, op0, op1, op2, op3, ...) fe_enc64_impl(buf, mnem, op0, op1, op2, op3)
#define fe_enc64(buf, ...) fe_enc64_1(buf, __VA_ARGS__, 0, 0, 0, 0, 0)
int fe_enc64_impl(uint8_t** buf, uint64_t mnem, FeOp op0, FeOp op1, FeOp op2, FeOp op3);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -70,7 +70,7 @@
61 NP - - - - POPA ONLY32 INSTR_WIDTH
62 RM GP MEM - - BOUND ONLY32
63 MR GP16 GP16 - - ARPL ONLY32
63 RM GP GP32 - - MOVSX ONLY64
63 RM GP GP32 - - MOVSX ONLY64 ENC_SEPSZ
#64 SEG=FS prefix
#65 SEG=GS prefix
#66 operand size prefix
@@ -79,10 +79,10 @@
69 RMI GP GP IMM - IMUL
6a I IMM - - - PUSH DEF64 IMM_8
6b RMI GP GP IMM - IMUL IMM_8
6c NP - - - - INS SIZE_8 INSTR_WIDTH
6d NP - - - - INS INSTR_WIDTH
6e NP - - - - OUTS SIZE_8 INSTR_WIDTH
6f NP - - - - OUTS INSTR_WIDTH
6c NP - - - - INS SIZE_8 INSTR_WIDTH ENC_REP
6d NP - - - - INS INSTR_WIDTH ENC_REP
6e NP - - - - OUTS SIZE_8 INSTR_WIDTH ENC_REP
6f NP - - - - OUTS INSTR_WIDTH ENC_REP
70 D IMM - - - JO DEF64 IMM_8
71 D IMM - - - JNO DEF64 IMM_8
72 D IMM - - - JC DEF64 IMM_8
@@ -145,14 +145,7 @@
8f/0 M GP - - - POP DEF64
# Against frequent belief, only, XCHG (r/e)AX, (r)AX with 90 is NOP.
# As a lacking REX.B cannot be specified here, this is hardcoded.
90 OA GP GP - - XCHG_NOP
91 OA GP GP - - XCHG
92 OA GP GP - - XCHG
93 OA GP GP - - XCHG
94 OA GP GP - - XCHG
95 OA GP GP - - XCHG
96 OA GP GP - - XCHG
97 OA GP GP - - XCHG
90+ OA GP GP - - XCHG_NOP
98 NP - - - - C_EX INSTR_WIDTH
99 NP - - - - C_SEP INSTR_WIDTH
#9a CALLF TODO
@@ -165,18 +158,18 @@ a0 FD GP GP - - MOV SIZE_8
a1 FD GP GP - - MOV
a2 TD GP GP - - MOV SIZE_8
a3 TD GP GP - - MOV
a4 NP - - - - MOVS SIZE_8 INSTR_WIDTH
a5 NP - - - - MOVS INSTR_WIDTH
a6 NP - - - - CMPS SIZE_8 INSTR_WIDTH
a7 NP - - - - CMPS INSTR_WIDTH
a4 NP - - - - MOVS SIZE_8 INSTR_WIDTH ENC_REP
a5 NP - - - - MOVS INSTR_WIDTH ENC_REP
a6 NP - - - - CMPS SIZE_8 INSTR_WIDTH ENC_REPCC
a7 NP - - - - CMPS INSTR_WIDTH ENC_REPCC
a8 IA GP IMM - - TEST SIZE_8
a9 IA GP IMM - - TEST
aa NP - - - - STOS SIZE_8 INSTR_WIDTH
ab NP - - - - STOS INSTR_WIDTH
ac NP - - - - LODS SIZE_8 INSTR_WIDTH
ad NP - - - - LODS INSTR_WIDTH
ae NP - - - - SCAS SIZE_8 INSTR_WIDTH
af NP - - - - SCAS INSTR_WIDTH
aa NP - - - - STOS SIZE_8 INSTR_WIDTH ENC_REP
ab NP - - - - STOS INSTR_WIDTH ENC_REP
ac NP - - - - LODS SIZE_8 INSTR_WIDTH ENC_REP
ad NP - - - - LODS INSTR_WIDTH ENC_REP
ae NP - - - - SCAS SIZE_8 INSTR_WIDTH ENC_REPCC
af NP - - - - SCAS INSTR_WIDTH ENC_REPCC
b0+ OI GP IMM8 - - MOVABS SIZE_8
b8+ OI GP IMM - - MOVABS
c0/0 MI GP IMM8 - - ROL SIZE_8
@@ -442,8 +435,8 @@ NP.0f37 NP - - - - GETSEC
0fb3 MR GP GP - - BTR LOCK
0fb4 RM GP MEM - - LFS
0fb5 RM GP MEM - - LGS
0fb6 RM GP GP8 - - MOVZX
0fb7 RM GP GP16 - - MOVZX
0fb6 RM GP GP8 - - MOVZX ENC_SEPSZ
0fb7 RM GP GP16 - - MOVZX ENC_SEPSZ
RF3.0fb8 RM GP GP - - POPCNT
0fb9 RM GP GP - - UD1
0fba/4 MI GP IMM8 - - BT
@@ -457,8 +450,8 @@ RF3.0fbc RM GP GP - - TZCNT
RNP.0fbd RM GP GP - - BSR
RF2.0fbd RM GP GP - - BSR
RF3.0fbd RM GP GP - - LZCNT
0fbe RM GP GP8 - - MOVSX
0fbf RM GP GP16 - - MOVSX
0fbe RM GP GP8 - - MOVSX ENC_SEPSZ
0fbf RM GP GP16 - - MOVSX ENC_SEPSZ
0fc0 MR GP GP - - XADD SIZE_8 LOCK
0fc1 MR GP GP - - XADD LOCK
NP.0fc3 MR MEM GP - - MOVNTI
@@ -508,7 +501,7 @@ NP.0f77 NP - - - - MMX_EMMS
NP.W0.0f7e MR GP32 MMX - - MMX_MOVD
NP.W1.0f7e MR GP64 MMX - - MMX_MOVQ
NP.0f7f MR MMX MMX - - MMX_MOVQ
NP.0fc4 RMI MMX GP IMM8 - MMX_PINSRW
NP.0fc4 RMI MMX GP IMM8 - MMX_PINSRW ENC_NOSZ
NP.0fc5 RMI GP MMX IMM8 - MMX_PEXTRW DEF64 NOMEM
NP.0fd1 RM MMX MMX - - MMX_PSRLW
NP.0fd2 RM MMX MMX - - MMX_PSRLD
@@ -968,9 +961,9 @@ VEX.66.0f6a RVM XMM XMM XMM - VPUNPCKHDQ
VEX.66.0f6b RVM XMM XMM XMM - VPACKSSDW
VEX.66.0f6c RVM XMM XMM XMM - VPUNPCKLQDQ
VEX.66.0f6d RVM XMM XMM XMM - VPUNPCKHQDQ
VEX.66.W0.L0.0f6e RM XMM32 GP - - VMOVD
VEX.66.W1.L0.0f6e RM XMM32 GP - - VMOVD ONLY32
VEX.66.W1.L0.0f6e RM XMM64 GP - - VMOVQ ONLY64
VEX.66.W0.L0.0f6e RM XMM32 GP - - VMOVD ENC_NOSZ
VEX.66.W1.L0.0f6e RM XMM32 GP - - VMOVD ONLY32 ENC_NOSZ
VEX.66.W1.L0.0f6e RM XMM64 GP - - VMOVQ ONLY64 ENC_NOSZ
VEX.66.0f6f RM XMM XMM - - VMOVDQA
VEX.F3.0f6f RM XMM XMM - - VMOVDQU
VEX.66.0f70 RMI XMM XMM IMM8 - VPSHUFD
@@ -995,10 +988,10 @@ VEX.66.0f7c RVM XMM XMM XMM - VHADDPD
VEX.F2.0f7c RVM XMM XMM XMM - VHADDPS
VEX.66.0f7d RVM XMM XMM XMM - VHSUBPD
VEX.F2.0f7d RVM XMM XMM XMM - VHSUBPS
VEX.66.W0.L0.0f7e MR GP XMM32 - - VMOVD
VEX.66.W1.L0.0f7e MR GP XMM32 - - VMOVQ ONLY32
VEX.66.W1.L0.0f7e MR GP XMM64 - - VMOVQ ONLY64
VEX.F3.L0.0f7e RM XMM64 XMM64 - - VMOVQ
VEX.66.W0.L0.0f7e MR GP XMM32 - - VMOVD ENC_NOSZ
VEX.66.W1.L0.0f7e MR GP XMM32 - - VMOVQ ONLY32 ENC_NOSZ
VEX.66.W1.L0.0f7e MR GP XMM64 - - VMOVQ ONLY64 ENC_NOSZ
VEX.F3.L0.0f7e RM XMM64 XMM64 - - VMOVQ ENC_NOSZ
VEX.66.0f7f MR XMM XMM - - VMOVDQA
VEX.F3.0f7f MR XMM XMM - - VMOVDQU
VEX.NP.0fae//2 M GP32 - - - VLDMXCSR
@@ -1007,8 +1000,8 @@ VEX.NP.0fc2 RVMI XMM XMM XMM IMM8 VCMPPS
VEX.66.0fc2 RVMI XMM XMM XMM IMM8 VCMPPD
VEX.F3.LIG.0fc2 RVMI XMM XMM XMM32 IMM8 VCMPSS
VEX.F2.LIG.0fc2 RVMI XMM XMM XMM64 IMM8 VCMPSD
VEX.66.WIG.L0.0fc4 RVMI XMM XMM GP16 IMM8 VPINSRW
VEX.66.WIG.L0.0fc5 RMI GP XMM IMM8 - VPEXTRW DEF64 NOMEM
VEX.66.WIG.L0.0fc4 RVMI XMM XMM GP16 IMM8 VPINSRW ENC_NOSZ
VEX.66.WIG.L0.0fc5 RMI GP XMM IMM8 - VPEXTRW DEF64 NOMEM ENC_NOSZ
VEX.NP.0fc6 RVMI XMM XMM XMM IMM8 VSHUFPS
VEX.66.0fc6 RVMI XMM XMM XMM IMM8 VSHUFPD
VEX.NP.0fd0 RVM XMM XMM XMM - VADDSUBPS
@@ -1129,7 +1122,7 @@ VEX.66.W0.0f3847 RVM XMM XMM XMM - VPSLLVD
VEX.66.W1.0f3847 RVM XMM XMM XMM - VPSLLVQ
VEX.66.W0.0f3858 RM XMM XMM32 - - VPBROADCASTD
VEX.66.W0.0f3859 RM XMM XMM64 - - VPBROADCASTQ
VEX.66.W0.L1.0f385a RM XMM MEM128 - - VBROADCASTI128
VEX.66.W0.L1.0f385a RM XMM MEM128 - - VBROADCASTI128 ENC_NOSZ
VEX.66.W0.0f3878 RM XMM XMM8 - - VPBROADCASTB
VEX.66.W0.0f3879 RM XMM XMM16 - - VPBROADCASTW
VEX.66.W0.0f388c RVM XMM XMM XMM - VPMASKMOVD
@@ -1218,32 +1211,32 @@ VEX.66.0f3a0c RVMI XMM XMM XMM IMM8 VBLENDPS
VEX.66.0f3a0d RVMI XMM XMM XMM IMM8 VBLENDPD
VEX.66.0f3a0e RVMI XMM XMM XMM IMM8 VPBLENDW
VEX.66.0f3a0f RVMI XMM XMM XMM IMM8 VPALIGNR
VEX.66.WIG.L0.0f3a14 MRI GP8 XMM IMM8 - VPEXTRB
VEX.66.WIG.L0.0f3a14 MRI GP8 XMM IMM8 - VPEXTRB ENC_NOSZ
# TODO: also WIG for PEXTRW?
VEX.66.WIG.L0.0f3a15 MRI GP16 XMM IMM8 - VPEXTRW
VEX.66.W0.L0.0f3a16 MRI GP XMM IMM8 - VPEXTRD
VEX.66.W1.L0.0f3a16 MRI GP XMM IMM8 - VPEXTRD ONLY32
VEX.66.W1.L0.0f3a16 MRI GP XMM IMM8 - VPEXTRQ ONLY64
VEX.66.L0.0f3a17 MRI GP32 XMM IMM8 - VEXTRACTPS
VEX.66.W0.L1.0f3a18 RVMI XMM XMM XMM128 IMM8 VINSERTF128
VEX.66.W0.L1.0f3a19 MRI XMM128 XMM IMM8 - VEXTRACTF128
VEX.66.WIG.L0.0f3a15 MRI GP16 XMM IMM8 - VPEXTRW ENC_NOSZ
VEX.66.W0.L0.0f3a16 MRI GP XMM IMM8 - VPEXTRD ENC_NOSZ
VEX.66.W1.L0.0f3a16 MRI GP XMM IMM8 - VPEXTRD ONLY32 ENC_NOSZ
VEX.66.W1.L0.0f3a16 MRI GP XMM IMM8 - VPEXTRQ ONLY64 ENC_NOSZ
VEX.66.L0.0f3a17 MRI GP32 XMM IMM8 - VEXTRACTPS ENC_NOSZ
VEX.66.W0.L1.0f3a18 RVMI XMM XMM XMM128 IMM8 VINSERTF128 ENC_NOSZ
VEX.66.W0.L1.0f3a19 MRI XMM128 XMM IMM8 - VEXTRACTF128 ENC_NOSZ
VEX.66.W0.L1.0f3a1d MRI XMM XMM IMM8 - VCVTPS2PH
VEX.66.WIG.L0.0f3a20 RVMI XMM XMM GP8 IMM8 VPINSRB
VEX.66.L0.0f3a21 RVMI XMM XMM XMM32 IMM8 VINSERTPS
VEX.66.W0.L0.0f3a22 RVMI XMM XMM GP IMM8 VPINSRD
VEX.66.W1.L0.0f3a22 RVMI XMM XMM GP IMM8 VPINSRD ONLY32
VEX.66.W1.L0.0f3a22 RVMI XMM XMM GP IMM8 VPINSRQ ONLY64
VEX.66.W0.L1.0f3a38 RVMI XMM XMM XMM128 IMM8 VINSERTI128
VEX.66.W0.L1.0f3a39 MRI XMM128 XMM IMM8 - VEXTRACTI128
VEX.66.WIG.L0.0f3a20 RVMI XMM XMM GP8 IMM8 VPINSRB ENC_NOSZ
VEX.66.L0.0f3a21 RVMI XMM XMM XMM32 IMM8 VINSERTPS ENC_NOSZ
VEX.66.W0.L0.0f3a22 RVMI XMM XMM GP IMM8 VPINSRD ENC_NOSZ
VEX.66.W1.L0.0f3a22 RVMI XMM XMM GP IMM8 VPINSRD ONLY32 ENC_NOSZ
VEX.66.W1.L0.0f3a22 RVMI XMM XMM GP IMM8 VPINSRQ ONLY64 ENC_NOSZ
VEX.66.W0.L1.0f3a38 RVMI XMM XMM XMM128 IMM8 VINSERTI128 ENC_NOSZ
VEX.66.W0.L1.0f3a39 MRI XMM128 XMM IMM8 - VEXTRACTI128 ENC_NOSZ
VEX.66.0f3a40 RVMI XMM XMM XMM IMM8 VDPPS
VEX.66.0f3a41 RVMI XMM XMM XMM IMM8 VDPPD
VEX.66.0f3a42 RVMI XMM XMM XMM IMM8 VMPSADBW
VEX.66.0f3a44 RVMI XMM XMM XMM IMM8 VPCLMULQDQ
VEX.66.W0.L1.0f3a46 RVMI XMM XMM XMM IMM8 VPERM2I128
VEX.66.L0.0f3a60 RMI XMM XMM IMM8 - VPCMPESTRM
VEX.66.L0.0f3a61 RMI XMM XMM IMM8 - VPCMPESTRI
VEX.66.L0.0f3a62 RMI XMM XMM IMM8 - VPCMPISTRM
VEX.66.L0.0f3a63 RMI XMM XMM IMM8 - VPCMPISTRI
VEX.66.L0.0f3a60 RMI XMM XMM IMM8 - VPCMPESTRM ENC_NOSZ
VEX.66.L0.0f3a61 RMI XMM XMM IMM8 - VPCMPESTRI ENC_NOSZ
VEX.66.L0.0f3a62 RMI XMM XMM IMM8 - VPCMPISTRM ENC_NOSZ
VEX.66.L0.0f3a63 RMI XMM XMM IMM8 - VPCMPISTRI ENC_NOSZ
#
# BMI1
VEX.NP.L0.0f38f2 RVM GP GP GP - ANDN
@@ -1265,14 +1258,14 @@ VEX.F3.L0.0f38f7 RMV GP GP GP - SARX
F3.0f38f6 RM GP GP - - ADOX
#
# FPU
d8//0 M MEM32 - - - FADD
d8//1 M MEM32 - - - FMUL
d8//2 M MEM32 - - - FCOM
d8//3 M MEM32 - - - FCOMP
d8//4 M MEM32 - - - FSUB
d8//5 M MEM32 - - - FSUBR
d8//6 M MEM32 - - - FDIV
d8//7 M MEM32 - - - FDIVR
d8//0 M MEM32 - - - FADD ENC_SEPSZ
d8//1 M MEM32 - - - FMUL ENC_SEPSZ
d8//2 M MEM32 - - - FCOM ENC_SEPSZ
d8//3 M MEM32 - - - FCOMP ENC_SEPSZ
d8//4 M MEM32 - - - FSUB ENC_SEPSZ
d8//5 M MEM32 - - - FSUBR ENC_SEPSZ
d8//6 M MEM32 - - - FDIV ENC_SEPSZ
d8//7 M MEM32 - - - FDIVR ENC_SEPSZ
d8//c0+ AO FPU FPU - - FADD
d8//c8+ AO FPU FPU - - FMUL
d8//d0+ AO FPU FPU - - FCOM
@@ -1281,9 +1274,9 @@ d8//e0+ AO FPU FPU - - FSUB
d8//e8+ AO FPU FPU - - FSUBR
d8//f0+ AO FPU FPU - - FDIV
d8//f8+ AO FPU FPU - - FDIVR
d9//0 M MEM32 - - - FLD
d9//2 M MEM32 - - - FST
d9//3 M MEM32 - - - FSTP
d9//0 M MEM32 - - - FLD ENC_SEPSZ
d9//2 M MEM32 - - - FST ENC_SEPSZ
d9//3 M MEM32 - - - FSTP ENC_SEPSZ
d9//4 M MEMZ - - - FLDENV
d9//5 M MEM16 - - - FLDCW
d9//6 M MEMZ - - - FSTENV
@@ -1317,23 +1310,23 @@ d9//fc NP - - - - FRNDINT
d9//fd NP - - - - FSCALE
d9//fe NP - - - - FSIN
d9//ff NP - - - - FCOS
da//0 M MEM32 - - - FIADD
da//1 M MEM32 - - - FIMUL
da//2 M MEM32 - - - FICOM
da//3 M MEM32 - - - FICOMP
da//4 M MEM32 - - - FISUB
da//5 M MEM32 - - - FISUBR
da//6 M MEM32 - - - FIDIV
da//7 M MEM32 - - - FIDIVR
da//0 M MEM32 - - - FIADD ENC_SEPSZ
da//1 M MEM32 - - - FIMUL ENC_SEPSZ
da//2 M MEM32 - - - FICOM ENC_SEPSZ
da//3 M MEM32 - - - FICOMP ENC_SEPSZ
da//4 M MEM32 - - - FISUB ENC_SEPSZ
da//5 M MEM32 - - - FISUBR ENC_SEPSZ
da//6 M MEM32 - - - FIDIV ENC_SEPSZ
da//7 M MEM32 - - - FIDIVR ENC_SEPSZ
da//c0+ O FPU - - - FCMOVB
da//c8+ O FPU - - - FCMOVE
da//d0+ O FPU - - - FCMOVBE
da//d8+ O FPU - - - FCMOVU
da//e9 NP - - - - FUCOMPP
db//0 M MEM32 - - - FILD
db//1 M MEM32 - - - FISTTP
db//2 M MEM32 - - - FIST
db//3 M MEM32 - - - FISTP
db//0 M MEM32 - - - FILD ENC_SEPSZ
db//1 M MEM32 - - - FISTTP ENC_SEPSZ
db//2 M MEM32 - - - FIST ENC_SEPSZ
db//3 M MEM32 - - - FISTP ENC_SEPSZ
db//5 M FPU - - - FLD
db//7 M FPU - - - FSTP
db//c0+ O FPU - - - FCMOVNB
@@ -1344,24 +1337,24 @@ db//e2 NP - - - - FCLEX
db//e3 NP - - - - FINIT
db//e8+ O FPU - - - FUCOMI
db//f0+ O FPU - - - FCOMI
dc//0 M MEM64 - - - FADD
dc//1 M MEM64 - - - FMUL
dc//2 M MEM64 - - - FCOM
dc//3 M MEM64 - - - FCOMP
dc//4 M MEM64 - - - FSUB
dc//5 M MEM64 - - - FSUBR
dc//6 M MEM64 - - - FDIV
dc//7 M MEM64 - - - FDIVR
dc//0 M MEM64 - - - FADD ENC_SEPSZ
dc//1 M MEM64 - - - FMUL ENC_SEPSZ
dc//2 M MEM64 - - - FCOM ENC_SEPSZ
dc//3 M MEM64 - - - FCOMP ENC_SEPSZ
dc//4 M MEM64 - - - FSUB ENC_SEPSZ
dc//5 M MEM64 - - - FSUBR ENC_SEPSZ
dc//6 M MEM64 - - - FDIV ENC_SEPSZ
dc//7 M MEM64 - - - FDIVR ENC_SEPSZ
dc//c0+ OA FPU FPU - - FADD
dc//c8+ OA FPU FPU - - FMUL
dc//e0+ OA FPU FPU - - FSUBR
dc//e8+ OA FPU FPU - - FSUB
dc//f0+ OA FPU FPU - - FDIVR
dc//f8+ OA FPU FPU - - FDIV
dd//0 M MEM64 - - - FLD
dd//1 M MEM64 - - - FISTTP
dd//2 M MEM64 - - - FST
dd//3 M MEM64 - - - FSTP
dd//0 M MEM64 - - - FLD ENC_SEPSZ
dd//1 M MEM64 - - - FISTTP ENC_SEPSZ
dd//2 M MEM64 - - - FST ENC_SEPSZ
dd//3 M MEM64 - - - FSTP ENC_SEPSZ
dd//4 M MEMZ - - - FRSTOR
dd//6 M MEMZ - - - FSAVE
dd//7 M MEM16 - - - FSTSW
@@ -1370,14 +1363,14 @@ dd//d0+ O FPU - - - FST
dd//d8+ O FPU - - - FSTP
dd//e0+ O FPU - - - FUCOM
dd//e8+ O FPU - - - FUCOMP
de//0 M MEM16 - - - FIADD
de//1 M MEM16 - - - FIMUL
de//2 M MEM16 - - - FICOM
de//3 M MEM16 - - - FICOMP
de//4 M MEM16 - - - FISUB
de//5 M MEM16 - - - FISUBR
de//6 M MEM16 - - - FIDIV
de//7 M MEM16 - - - FIDIVR
de//0 M MEM16 - - - FIADD ENC_SEPSZ
de//1 M MEM16 - - - FIMUL ENC_SEPSZ
de//2 M MEM16 - - - FICOM ENC_SEPSZ
de//3 M MEM16 - - - FICOMP ENC_SEPSZ
de//4 M MEM16 - - - FISUB ENC_SEPSZ
de//5 M MEM16 - - - FISUBR ENC_SEPSZ
de//6 M MEM16 - - - FIDIV ENC_SEPSZ
de//7 M MEM16 - - - FIDIVR ENC_SEPSZ
de//c0+ OA FPU FPU - - FADDP
de//c8+ OA FPU FPU - - FMULP
de//d9 NP - - - - FCOMPP
@@ -1385,14 +1378,14 @@ de//e0+ OA FPU FPU - - FSUBRP
de//e8+ OA FPU FPU - - FSUBP
de//f0+ OA FPU FPU - - FDIVRP
de//f8+ OA FPU FPU - - FDIVP
df//0 M MEM16 - - - FILD
df//1 M MEM16 - - - FISTTP
df//2 M MEM16 - - - FIST
df//3 M MEM16 - - - FISTP
df//0 M MEM16 - - - FILD ENC_SEPSZ
df//1 M MEM16 - - - FISTTP ENC_SEPSZ
df//2 M MEM16 - - - FIST ENC_SEPSZ
df//3 M MEM16 - - - FISTP ENC_SEPSZ
df//4 M FPU - - - FBLD
df//5 M MEM64 - - - FILD
df//5 M MEM64 - - - FILD ENC_SEPSZ
df//6 M FPU - - - FBSTP
df//7 M MEM64 - - - FISTP
df//7 M MEM64 - - - FISTP ENC_SEPSZ
# FSTSW AX
df//e0 A GP16 - - - FSTSW
df//f0+ AO FPU FPU - - FCOMIP

View File

@@ -58,12 +58,16 @@ instr_data = custom_target('tables',
command: [python3, '@INPUT0@', '@INPUT1@', '@OUTPUT@'] + generate_args,
input: files('parseinstrs.py', 'instrs.txt'),
output: [
'fadec-mnems.inc', 'fadec-table.inc'
'fadec-mnems.inc', 'fadec-table.inc',
'fadec-enc-mnems.inc', 'fadec-enc-cases.inc',
],
install: true,
install_dir: [get_option('includedir'), false])
install_dir: [
get_option('includedir'), false,
get_option('includedir'), false,
])
libfadec = static_library('fadec', 'decode.c', 'format.c', instr_data,
libfadec = static_library('fadec', 'decode.c', 'encode.c', 'format.c', instr_data,
install: true)
fadec = declare_dependency(link_with: libfadec,
include_directories: include_directories('.'),
@@ -71,7 +75,7 @@ fadec = declare_dependency(link_with: libfadec,
subdir('tests')
install_headers('fadec.h')
install_headers('fadec.h', 'fadec-enc.h')
pkg = import('pkgconfig')
pkg.generate(libraries: libfadec,

View File

@@ -91,6 +91,13 @@ class OpKind(NamedTuple):
K_MEM = "mem"
K_IMM = "imm"
def abssize(self, opsz=None, vecsz=None):
res = opsz if self.size == self.SZ_OP else \
vecsz if self.size == self.SZ_VEC else self.size
if res is None:
raise Exception("unspecified operand size")
return res
OPKINDS = {
# sizeidx (0, fixedsz, opsz, vecsz), fixedsz (log2), regtype
"IMM": OpKind(OpKind.SZ_OP, OpKind.K_IMM),
@@ -423,6 +430,144 @@ template = """// Auto-generated file -- do not modify!
#endif
"""
def encode_table(entries):
mnemonics = defaultdict(list)
mnemonics["FE_NOP"].append(("NP", 0, 0, "0x90"))
for opcode, desc in entries:
if desc.mnemonic[:9] == "RESERVED_":
continue
if "ONLY32" in desc.flags or "UNDOC" in desc.flags:
continue
opsizes = {8} if "SIZE_8" in desc.flags else {16, 32, 64}
hasvex, vecsizes = False, {128}
opc_i = opcode.opc | (opcode.opcext[1] << 8 if opcode.opcext else 0)
opc_flags = ""
opc_flags += ["","|OPC_0F","|OPC_0F38","|OPC_0F3A"][opcode.escape]
if opcode.vex:
hasvex, vecsizes = True, {128, 256}
opc_flags += "|OPC_VEX"
if opcode.prefix:
opc_flags += ["", "|OPC_66", "|OPC_F3", "|OPC_F2"][opcode.prefix[1]]
if not opcode.prefix[0]: opsizes -= {16}
if opcode.vexl == "IG":
vecsizes = {0}
elif opcode.vexl:
vecsizes -= {128 if opcode.vexl == "1" else 256}
if opcode.vexl == "1": opc_flags += "|OPC_VEXL"
if opcode.rexw == "IG":
opsizes = {0}
elif opcode.rexw:
opsizes -= {32 if opcode.rexw == "1" else 64}
if opcode.rexw == "1": opc_flags += "|OPC_REXW"
if "DEF64" in desc.flags:
opsizes -= {32}
if "INSTR_WIDTH" not in desc.flags and all(op.size != OpKind.SZ_OP for op in desc.operands):
opsizes = {0}
if all(op.size != OpKind.SZ_VEC for op in desc.operands):
vecsizes = {0} # for VEX-encoded general-purpose instructions.
if "ENC_NOSZ" in desc.flags:
opsizes, vecsizes = {0}, {0}
# Where to put the operand size in the mnemonic
separate_opsize = "ENC_SEPSZ" in desc.flags
prepend_opsize = max(opsizes) > 0 and not separate_opsize
prepend_vecsize = hasvex and max(vecsizes) > 0 and not separate_opsize
optypes = ["", "", "", ""]
enc = ENCODINGS[desc.encoding]
if enc.modrm_idx:
if "NOMEM" in desc.flags:
optypes[enc.modrm_idx^3] = "r"
elif ((opcode.opcext and opcode.opcext[0] and opcode.opcext[1] < 8)
or desc.operands[enc.modrm_idx^3].kind == OpKind.K_MEM):
optypes[enc.modrm_idx^3] = "m"
else:
optypes[enc.modrm_idx^3] = "rm"
if enc.modreg_idx: optypes[enc.modreg_idx^3] = "r"
if enc.vexreg_idx: optypes[enc.vexreg_idx^3] = "r"
if enc.zeroreg_idx: optypes[enc.zeroreg_idx^3] = "r"
if enc.imm_control: optypes[enc.imm_idx^3] = " iariioo"[enc.imm_control]
optypes = product(*(ot for ot in optypes if ot))
prefixes = [("", "")]
if "LOCK" in desc.flags:
prefixes.append(("LOCK_", "|OPC_LOCK"))
if "ENC_REP" in desc.flags:
prefixes.append(("REP_", "|OPC_F3"))
if "ENC_REPCC" in desc.flags:
prefixes.append(("REPNZ_", "|OPC_F2"))
prefixes.append(("REPZ_", "|OPC_F3"))
for opsize, vecsize, prefix, ots in product(opsizes, vecsizes, prefixes, optypes):
if prefix[1] == "|OPC_LOCK" and ots[0] != "m":
continue
imm_size = 0
if enc.imm_control >= 4:
if desc.mnemonic == "ENTER":
imm_size = 3
elif "IMM_8" in desc.flags:
imm_size = 1
else:
max_imm_size = 4 if desc.mnemonic != "MOVABS" else 8
imm_opsize = desc.operands[enc.imm_idx^3].abssize(opsize//8)
imm_size = min(max_imm_size, imm_opsize)
tys = [] # operands that require special handling
for ot, op in zip(ots, desc.operands):
if ot == "m":
tys.append(0xf)
elif op.kind == "GP":
tys.append(2 if op.abssize(opsize//8) == 1 else 1)
else:
tys.append({
"imm": 0, "SEG": 3, "FPU": 4, "MMX": 5, "XMM": 6,
"BND": 8, "CR": 9, "DR": 10,
}.get(op.kind, -1))
tys_i = sum(ty << (4*i) for i, ty in enumerate(tys))
opc_s = hex(opc_i) + opc_flags + prefix[1]
if opsize == 16: opc_s += "|OPC_66"
if opsize == 64 and "DEF64" not in desc.flags: opc_s += "|OPC_REXW"
# Construct mnemonic name
mnem_name = {"MOVABS": "MOV", "XCHG_NOP": "XCHG"}.get(desc.mnemonic, desc.mnemonic)
name = "FE_" + prefix[0] + mnem_name
if prepend_opsize and not ("DEF64" in desc.flags and opsize == 64):
name += f"_{opsize}"[name[-1] not in "0123456789":]
if prepend_vecsize:
name += f"_{vecsize}"[name[-1] not in "0123456789":]
for ot, op in zip(ots, desc.operands):
name += ot.replace("o", "")
if separate_opsize:
name += f"{op.abssize(opsize//8, vecsize//8)*8}"
mnemonics[name].append((desc.encoding, imm_size, tys_i, opc_s))
descs = ""
alt_index = 0
for mnem, variants in sorted(mnemonics.items()):
dedup = []
for variant in variants:
if not any(x[:3] == variant[:3] for x in dedup):
dedup.append(variant)
enc_prio = ["O", "OA", "OI", "IA", "M", "MI", "MR", "RM"]
dedup.sort(key=lambda e: (e[1], e[0] in enc_prio and enc_prio.index(e[0])))
indices = [mnem] + [f"FE_MNEM_MAX+{alt_index+i}" for i in range(len(dedup) - 1)]
alt_list = indices[1:] + ["0"]
alt_index += len(alt_list) - 1
for idx, alt, (enc, immsz, tys_i, opc_s) in zip(indices, alt_list, dedup):
descs += f"[{idx}] = {{ .enc = ENC_{enc}, .immsz = {immsz}, .tys = {tys_i:#x}, .opc = {opc_s}, .alt = {alt} }},\n"
mnemonics_list = sorted(mnemonics.keys())
mnemonics_lut = {mnem: mnemonics_list.index(mnem) for mnem in mnemonics_list}
mnemonics_tab = "\n".join("FE_MNEMONIC(%s,%d)"%entry for entry in mnemonics_lut.items())
return mnemonics_tab, descs
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--32", dest="modes", action="append_const", const=32)
@@ -430,6 +575,8 @@ if __name__ == "__main__":
parser.add_argument("table", type=argparse.FileType('r'))
parser.add_argument("decode_mnems", type=argparse.FileType('w'))
parser.add_argument("decode_table", type=argparse.FileType('w'))
parser.add_argument("encode_mnems", type=argparse.FileType('w'))
parser.add_argument("encode_table", type=argparse.FileType('w'))
args = parser.parse_args()
entries = []
@@ -469,3 +616,7 @@ if __name__ == "__main__":
defines="\n".join("#define " + line for line in defines),
)
args.decode_table.write(decode_table)
fe_mnem_list, fe_code = encode_table(entries)
args.encode_mnems.write(fe_mnem_list)
args.encode_table.write(fe_code)

View File

@@ -2,3 +2,8 @@
decode_test = executable('test_decode', 'test_decode.c',
dependencies: fadec)
test('decode', decode_test)
encode_test = executable('test_encode', 'test_encode.c',
dependencies: fadec,
c_args: ['-D_GNU_SOURCE'])
test('encode', encode_test)

160
tests/test_encode.c Normal file
View File

@@ -0,0 +1,160 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <inttypes.h>
#include <time.h>
#include <fadec-enc.h>
static
void
print_hex(const uint8_t* buf, size_t len)
{
for (size_t i = 0; i < len; i++)
printf("%02x", buf[i]);
}
static
int
test(uint8_t* buf, const char* name, uint64_t mnem, uint64_t op0, uint64_t op1, uint64_t op2, uint64_t op3, const void* exp, size_t exp_len)
{
memset(buf, 0, 16);
uint8_t* inst = buf;
int res = fe_enc64(&inst, mnem, op0, op1, op2, op3);
if ((res != 0) != (exp_len == 0)) goto fail;
if (inst - buf != (ptrdiff_t) exp_len) goto fail;
if (memcmp(buf, exp, exp_len)) goto fail;
return 0;
fail:
printf("Failed case %s:\n", name);
printf(" Exp (%2zu): ", exp_len);
print_hex(exp, exp_len);
printf("\n Got (%2zd): ", inst - buf);
print_hex(buf, inst - buf);
printf("\n");
return -1;
}
#define TEST2(str, exp, exp_len, mnem, op0, op1, op2, op3, ...) test(buf, str, mnem, op0, op1, op2, op3, exp, exp_len)
#define TEST1(str, exp, ...) TEST2(str, exp, sizeof(exp)-1, __VA_ARGS__, 0, 0, 0, 0, 0)
#define TEST(exp, ...) failed |= TEST1(#__VA_ARGS__, exp, __VA_ARGS__)
int
main(int argc, char** argv)
{
(void) argc; (void) argv;
int failed = 0;
uint8_t buf[16];
TEST("\00\xe0", FE_ADD8rr, FE_AX, FE_AH);
TEST("", FE_ADD8rr, FE_SI, FE_AH);
TEST("\xeb\xfe", FE_JMP, (intptr_t) buf);
TEST("\xeb\x7f", FE_JMP, (intptr_t) buf + 129);
TEST("\xe9\xfb\xff\xff\xff", FE_JMP|FE_JMPL, (intptr_t) buf);
TEST("\xe9\x00\x00\x00\x00", FE_JMP|FE_JMPL, (intptr_t) buf + 5);
TEST("\x75\x00", FE_JNZ, (intptr_t) buf + 2);
TEST("\x0f\x85\x00\x00\x00\x00", FE_JNZ|FE_JMPL, (intptr_t) buf + 6);
TEST("\xe3\xfc", FE_JCXZ, (intptr_t) buf - 2);
TEST("\x67\xe3\xfb", FE_JCXZ|FE_ADDR32, (intptr_t) buf - 2);
TEST("\xe3\xfc", FE_JCXZ|FE_JMPL, (intptr_t) buf - 2);
TEST("\xac", FE_LODS8);
TEST("\x67\xac", FE_LODS8|FE_ADDR32);
TEST("\x50", FE_PUSHr, FE_AX);
TEST("\x66\x50", FE_PUSH16r, FE_AX);
TEST("\x54", FE_PUSHr, FE_SP);
TEST("\x41\x57", FE_PUSHr, FE_R15);
TEST("\x41\x50", FE_PUSHr, FE_R8);
TEST("\x9c", FE_PUSHF);
TEST("\xd2\xe4", FE_SHL8rr, FE_AH, FE_CX);
TEST("", FE_SHL8rr, FE_AH, FE_DX);
TEST("\xd0\xe0", FE_SHL8ri, FE_AX, 1);
TEST("\xc0\xe0\x02", FE_SHL8ri, FE_AX, 2);
TEST("\xc1\xe0\x02", FE_SHL32ri, FE_AX, 2);
TEST("\x48\xc1\xe0\x02", FE_SHL64ri, FE_AX, 2);
TEST("\x48\xf7\x28", FE_IMUL64m, FE_MEM(FE_AX, 0, 0, 0));
// TEST("\x66\x90", FE_XCHG16rr, FE_AX, FE_AX);
TEST("\xc2\x00\x00", FE_RETi, 0);
TEST("\xff\xd0", FE_CALLr, FE_AX);
TEST("\x66\xff\xd0", FE_CALL16r, FE_AX);
TEST("\x05\x00\x01\x00\x00", FE_ADD32ri, FE_AX, 0x100);
TEST("\x66\x05\x00\x01", FE_ADD16ri, FE_AX, 0x100);
TEST("\xb8\x05\x00\x01\x00", FE_MOV32ri, FE_AX, 0x10005);
TEST("\x48\xb8\x05\x00\x01\x00\xff\x00\x00\x00", FE_MOV64ri, FE_AX, 0xff00010005);
TEST("\x48\xc7\xc0\x00\x00\x00\x00", FE_MOV64ri, FE_AX, 0x0);
TEST("\x48\xc7\xc0\x00\x00\x00\x80", FE_MOV64ri, FE_AX, (int32_t) 0x80000000);
TEST("\x48\xb8\x00\x00\x00\x00\x00\x00\x00\x80", FE_MOV64ri, FE_AX, INT64_MIN);
TEST("\x48\xb8\x00\x00\x00\x80\x00\x00\x00\x00", FE_MOV64ri, FE_AX, 0x80000000);
TEST("\xb0\xff", FE_MOV8ri, FE_AX, (int8_t) 0xff);
TEST("\xb4\xff", FE_MOV8ri, FE_AH, -1);
TEST("\xb7\x64", FE_MOV8ri, FE_BH, 0x64);
TEST("\xc8\x33\x22\x11", FE_ENTERi, 0x112233);
TEST("\x0f\x05", FE_SYSCALL);
TEST("\x0f\x90\xc4", FE_SETO8r, FE_AH);
TEST("\x40\x0f\x90\xc4", FE_SETO8r, FE_SP);
TEST("\x41\x0f\x90\xc4", FE_SETO8r, FE_R12);
TEST("\xf3\x0f\xb8\xc2", FE_POPCNT32rr, FE_AX, FE_DX);
TEST("\x66\xf3\x0f\xb8\xc2", FE_POPCNT16rr, FE_AX, FE_DX);
TEST("\xf3\x48\x0f\xb8\xc2", FE_POPCNT64rr, FE_AX, FE_DX);
TEST("\x0f\xbc\xc2", FE_BSF32rr, FE_AX, FE_DX);
TEST("\x66\x0f\xbc\xc2", FE_BSF16rr, FE_AX, FE_DX);
TEST("\xf3\x0f\xbc\xc2", FE_TZCNT32rr, FE_AX, FE_DX);
TEST("\x66\xf3\x0f\xbc\xc2", FE_TZCNT16rr, FE_AX, FE_DX);
TEST("\x0f\x01\xd0", FE_XGETBV);
TEST("\x41\x90", FE_XCHG32rr, FE_R8, FE_AX);
TEST("\x91", FE_XCHG32rr, FE_CX, FE_AX);
TEST("\x66\x90", FE_XCHG16rr, FE_AX, FE_AX);
TEST("\x87\xc0", FE_XCHG32rr, FE_AX, FE_AX);
TEST("\x48\x90", FE_XCHG64rr, FE_AX, FE_AX);
TEST("\x90", FE_NOP);
TEST("\x0f\x1f\xc0", FE_NOP32r, FE_AX);
TEST("\x26\x01\x00", FE_ADD32mr|FE_SEG(FE_ES), FE_MEM(FE_AX, 0, 0, 0), FE_AX);
TEST("\x2e\x01\x00", FE_ADD32mr|FE_SEG(FE_CS), FE_MEM(FE_AX, 0, 0, 0), FE_AX);
TEST("\x36\x01\x00", FE_ADD32mr|FE_SEG(FE_SS), FE_MEM(FE_AX, 0, 0, 0), FE_AX);
TEST("\x3e\x01\x00", FE_ADD32mr|FE_SEG(FE_DS), FE_MEM(FE_AX, 0, 0, 0), FE_AX);
TEST("\x64\x01\x00", FE_ADD32mr|FE_SEG(FE_FS), FE_MEM(FE_AX, 0, 0, 0), FE_AX);
TEST("\x65\x01\x00", FE_ADD32mr|FE_SEG(FE_GS), FE_MEM(FE_AX, 0, 0, 0), FE_AX);
TEST("\x8e\xc0", FE_MOV_G2Srr, FE_ES, FE_AX);
TEST("\xae", FE_SCAS8);
TEST("\xf2\xae", FE_REPNZ_SCAS8);
TEST("\xf3\xae", FE_REPZ_SCAS8);
TEST("\x66\xab", FE_STOS16);
TEST("\x66\xf3\xab", FE_REP_STOS16);
TEST("\xab", FE_STOS32);
TEST("\xf3\xab", FE_REP_STOS32);
TEST("\x48\xab", FE_STOS64);
TEST("\xf3\x48\xab", FE_REP_STOS64);
// Test ModRM encoding
TEST("\x01\x00", FE_ADD32mr, FE_MEM(FE_AX, 0, 0, 0), FE_AX);
TEST("\x01\x04\x24", FE_ADD32mr, FE_MEM(FE_SP, 0, 0, 0), FE_AX);
TEST("\x01\x45\x00", FE_ADD32mr, FE_MEM(FE_BP, 0, 0, 0), FE_AX);
TEST("\x41\x01\x45\x00", FE_ADD32mr, FE_MEM(FE_R13, 0, 0, 0), FE_AX);
TEST("\x41\x01\x45\x80", FE_ADD32mr, FE_MEM(FE_R13, 0, 0, -0x80), FE_AX);
TEST("\x41\x01\x85\x80\x00\x00\x00", FE_ADD32mr, FE_MEM(FE_R13, 0, 0, 0x80), FE_AX);
TEST("\x01\x04\x25\x01\x00\x00\x00", FE_ADD32mr, FE_MEM(0, 0, 0, 0x1), FE_AX);
TEST("\x01\x04\x25\x00\x00\x00\x00", FE_ADD32mr, FE_MEM(0, 0, 0, 0), FE_AX);
TEST("", FE_ADD32mr, FE_MEM(0, 0, FE_AX, 0), FE_AX);
TEST("", FE_ADD32mr, FE_MEM(0, 3, FE_AX, 0), FE_AX);
TEST("", FE_ADD32mr, FE_MEM(0, 5, FE_AX, 0), FE_AX);
TEST("\x01\x04\x05\x00\x00\x00\x00", FE_ADD32mr, FE_MEM(0, 1, FE_AX, 0), FE_AX);
TEST("\x01\x04\xc5\x00\x00\x00\x00", FE_ADD32mr, FE_MEM(0, 8, FE_AX, 0), FE_AX);
TEST("", FE_ADD32mr, FE_MEM(0, 8, FE_SP, 0), FE_AX);
TEST("\x42\x01\x04\x05\x00\x00\x00\x00", FE_ADD32mr, FE_MEM(0, 1, FE_R8, 0), FE_AX);
// RIP-relative addressing, adds instruction size to offset.
TEST("\x01\x05\x01\x00\x00\x00", FE_ADD32mr, FE_MEM(FE_IP, 0, 0, 0x7), FE_AX);
TEST("", FE_ADD32mr, FE_MEM(FE_IP, 1, FE_AX, 0x7), FE_AX);
TEST("\x0f\xaf\x05\xf9\xff\xff\xff", FE_IMUL32rm, FE_AX, FE_MEM(FE_IP, 0, 0, 0));
TEST("\x6b\x05\xf9\xff\xff\xff\x02", FE_IMUL32rmi, FE_AX, FE_MEM(FE_IP, 0, 0, 0), 2);
TEST("\x66\x6b\x05\xf8\xff\xff\xff\x02", FE_IMUL16rmi, FE_AX, FE_MEM(FE_IP, 0, 0, 0), 2);
TEST("\x69\x05\xf6\xff\xff\xff\x80\x00\x00\x00", FE_IMUL32rmi, FE_AX, FE_MEM(FE_IP, 0, 0, 0), 0x80);
TEST("\x66\x69\x05\xf7\xff\xff\xff\x80\x00", FE_IMUL16rmi, FE_AX, FE_MEM(FE_IP, 0, 0, 0), 0x80);
puts(failed ? "Some tests FAILED" : "All tests PASSED");
return failed ? EXIT_FAILURE : EXIT_SUCCESS;
}