From 6b8c2968c1c8160b79e3b11bc3336c9e0840effa Mon Sep 17 00:00:00 2001 From: Alexis Engelke Date: Sun, 20 Feb 2022 17:21:04 +0100 Subject: [PATCH] encode2: Add new encoder API, one func per instr. This is an *experimental* (read: unstable) API which exposes encoding functionality as one function per instruction. This makes the encoding process itself significantly faster, at the cost of a much larger binary size (~1 MiB of code, no data) and much higher compilation time. --- .build.yml | 14 ++- encode2-test.c | 63 +++++++++++ encode2.c | 135 ++++++++++++++++++++++ fadec-enc2.h | 148 ++++++++++++++++++++++++ meson.build | 7 +- meson_options.txt | 2 + parseinstrs.py | 283 ++++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 646 insertions(+), 6 deletions(-) create mode 100644 encode2-test.c create mode 100644 encode2.c create mode 100644 fadec-enc2.h diff --git a/.build.yml b/.build.yml index 233fdee..0f6334d 100644 --- a/.build.yml +++ b/.build.yml @@ -5,8 +5,12 @@ packages: - meson tasks: - build: | - mkdir fadec-build - meson fadec-build fadec - ninja -C fadec-build -- check: | - ninja -C fadec-build test + mkdir fadec-build1 + meson fadec-build1 fadec + ninja -C fadec-build1 + ninja -C fadec-build1 test + # Complete test with encode2 API. + mkdir fadec-build2 + meson fadec-build2 fadec -Dwith_encode2=true + ninja -C fadec-build2 + ninja -C fadec-build2 test diff --git a/encode2-test.c b/encode2-test.c new file mode 100644 index 0000000..091d282 --- /dev/null +++ b/encode2-test.c @@ -0,0 +1,63 @@ + +#include +#include +#include +#include + +#include + + +static +void print_hex(const uint8_t* buf, size_t len) { + for (size_t i = 0; i < len; i++) + printf("%02x", buf[i]); +} + +static int +check(const uint8_t* buf, const void* exp, size_t exp_len, unsigned res, const char* name) { + if (__builtin_expect(res == exp_len && !memcmp(buf, exp, exp_len), 1)) + return 0; + printf("Failed case (new) %s:\n", name); + printf(" Exp (%2zu): ", exp_len); + print_hex((const uint8_t*)exp, exp_len); + printf("\n Got (%2u): ", res); + print_hex(buf, res); + printf("\n"); + return -1; +} + +#define TEST1(str, exp, name, ...) do { \ + memset(buf, 0, sizeof buf); \ + unsigned res = fe64_ ## name(buf, __VA_ARGS__); \ + failed |= check(buf, exp, sizeof(exp) - 1, res, str); \ + } while (0) +#define TEST(exp, ...) TEST1(#__VA_ARGS__, exp, __VA_ARGS__) + +int +main(void) { + int failed = 0; + uint8_t buf[16]; + + // This API is type safe and prohibits compilation of reg-type mismatches +#define ENC_TEST_TYPESAFE + // Silence -Warray-bounds with double cast +#define FE_PTR(off) (const void*) ((uintptr_t) buf + (off)) +#include "encode-test.inc" + + TEST("\x90", NOP, 0); + TEST("\x90", NOP, 1); + TEST("\x66\x90", NOP, 2); + TEST("\x0f\x1f\x00", NOP, 3); + TEST("\x0f\x1f\x40\x00", NOP, 4); + TEST("\x0f\x1f\x44\x00\x00", NOP, 5); + TEST("\x66\x0f\x1f\x44\x00\x00", NOP, 6); + TEST("\x0f\x1f\x80\x00\x00\x00\x00", NOP, 7); + TEST("\x0f\x1f\x84\x00\x00\x00\x00\x00", NOP, 8); + TEST("\x66\x0f\x1f\x84\x00\x00\x00\x00\x00", NOP, 9); + TEST("\x66\x0f\x1f\x84\x00\x00\x00\x00\x00\x90", NOP, 10); + TEST("\x66\x0f\x1f\x84\x00\x00\x00\x00\x00\x66\x90", NOP, 11); + TEST("\x66\x0f\x1f\x84\x00\x00\x00\x00\x00\x0f\x1f\x00", NOP, 12); + + puts(failed ? "Some tests FAILED" : "All tests PASSED"); + return failed ? EXIT_FAILURE : EXIT_SUCCESS; +} diff --git a/encode2.c b/encode2.c new file mode 100644 index 0000000..e5c7ac3 --- /dev/null +++ b/encode2.c @@ -0,0 +1,135 @@ + +#include +#include +#include + +#include + + +#define LIKELY(x) __builtin_expect((x), 1) +#define UNLIKELY(x) __builtin_expect((x), 0) + +#define op_reg_idx(op) (op).idx +#define op_reg_gph(op) (((op).idx & ~0x3) == 0x24) +#define op_mem_base(mem) op_reg_idx((mem).base) +#define op_mem_idx(mem) op_reg_idx((mem).idx) + +static bool +op_imm_n(int64_t imm, unsigned immsz) { + if (immsz == 0 && !imm) return true; + if (immsz == 1 && (int8_t) imm == imm) return true; + if (immsz == 2 && (int16_t) imm == imm) return true; + if (immsz == 3 && (imm&0xffffff) == imm) return true; + if (immsz == 4 && (int32_t) imm == imm) return true; + if (immsz == 8 && (int64_t) imm == imm) return true; + return false; +} + +static __attribute__((cold)) __attribute__((const)) uint8_t +enc_seg(int flags) { + return (0x65643e362e2600 >> (8 * (flags & FE_SEG_MASK))) & 0xff; +} + +static int +enc_imm(uint8_t* restrict buf, uint64_t imm, unsigned immsz) { + if (!op_imm_n(imm, immsz)) + return -1; + for (unsigned i = 0; i < immsz; i++) + *buf++ = imm >> 8 * i; + return 0; +} + +static int +enc_mem(uint8_t* restrict buf, unsigned bufidx, FeMem op0, uint64_t op1, + unsigned immsz, unsigned vsib) { + int mod = 0, reg = op1 & 7, rm; + int scale = 0, idx = 4, base = 0; + bool withsib = false, mod0off = false; + unsigned dispsz = 0; + int32_t off = op0.off; + + if ((op_reg_idx(op0.idx) != op_reg_idx(FE_NOREG)) != !!op0.scale) + return 0; + if (vsib || op_reg_idx(op0.idx) != op_reg_idx(FE_NOREG)) { + if (!vsib && op_reg_idx(op0.idx) == 4) + return 0; + idx = op_reg_idx(op0.idx) & 7; + int scalabs = op0.scale; + if (scalabs & (scalabs - 1)) + return 0; + scale = (scalabs & 0xA ? 1 : 0) | (scalabs & 0xC ? 2 : 0); + withsib = true; + } + + if (op0.base.idx == op_reg_idx(FE_NOREG)) { + rm = 5; + mod0off = true; + withsib = true; + } else if (op0.base.idx == FE_IP.idx) { + if (withsib) + return 0; + rm = 5; + mod0off = true; + // Adjust offset, caller doesn't know instruction length. + off -= bufidx + 5 + immsz; + } else { + rm = op_reg_idx(op0.base) & 7; + if (rm == 5) + mod = 1; + } + + if (off && op_imm_n(off, 1) && !mod0off) + mod = 1; + else if (off && !mod0off) + mod = 2; + + if (withsib || rm == 4) { + base = rm; + rm = 4; + } + + dispsz = mod == 1 ? 1 : (mod == 2 || mod0off) ? 4 : 0; + if (bufidx + 1 + (mod != 3 && rm == 4) + dispsz + immsz > 15) + return 0; + + buf[bufidx++] = (mod << 6) | (reg << 3) | rm; + if (mod != 3 && rm == 4) + buf[bufidx++] = (scale << 6) | (idx << 3) | base; + if (enc_imm(buf + bufidx, off, dispsz)) + return 0; + return 1 + (mod != 3 && rm == 4) + dispsz; +} + +static int +enc_mem_vsib(uint8_t* restrict buf, unsigned bufidx, FeMemV op0, uint64_t op1, + unsigned immsz, unsigned vsib) { + (void) vsib; + FeMem mem = FE_MEM(op0.base, op0.scale, FE_GP(op0.idx.idx), op0.off); + return enc_mem(buf, bufidx, mem, op1, immsz, 1); +} + +unsigned fe64_NOP(uint8_t* buf, unsigned flags) { + unsigned len = flags ? flags : 1; + // Taken from Intel SDM + static const uint8_t tbl[] = { + 0x90, + 0x66, 0x90, + 0x0f, 0x1f, 0x00, + 0x0f, 0x1f, 0x40, 0x00, + 0x0f, 0x1f, 0x44, 0x00, 0x00, + 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00, + 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00, + 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00, + }; + unsigned remain = len; + for (; remain > 9; remain -= 9) + for (unsigned i = 0; i < 9; i++) + *(buf++) = tbl[36 + i]; + const uint8_t* src = tbl + (remain * (remain - 1)) / 2; + for (unsigned i = 0; i < remain; i++) + *(buf++) = src[i]; + return len; +} + +#include diff --git a/fadec-enc2.h b/fadec-enc2.h new file mode 100644 index 0000000..6e21076 --- /dev/null +++ b/fadec-enc2.h @@ -0,0 +1,148 @@ + +#ifndef FD_FADEC_ENC2_H_ +#define FD_FADEC_ENC2_H_ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef __cplusplus +#define FE_STRUCT(name) name +#else +#define FE_STRUCT(name) (name) +#endif + +// Flags +#define FE_JMPL 0x8 +#define FE_ADDR32 0x10 +#define FE_SEG_MASK 0x7 +#define FE_SEG(seg) (((seg).idx + 1) & FE_SEG_MASK) + +typedef struct FeRegGP { unsigned char idx; } FeRegGP; +#define FE_GP(idx) (FE_STRUCT(FeRegGP) { idx }) +#define FE_AX FE_GP(0) +#define FE_CX FE_GP(1) +#define FE_DX FE_GP(2) +#define FE_BX FE_GP(3) +#define FE_SP FE_GP(4) +#define FE_BP FE_GP(5) +#define FE_SI FE_GP(6) +#define FE_DI FE_GP(7) +#define FE_R8 FE_GP(8) +#define FE_R9 FE_GP(9) +#define FE_R10 FE_GP(10) +#define FE_R11 FE_GP(11) +#define FE_R12 FE_GP(12) +#define FE_R13 FE_GP(13) +#define FE_R14 FE_GP(14) +#define FE_R15 FE_GP(15) +#define FE_IP FE_GP(16) +#define FE_NOREG FE_GP(0x80) +typedef struct FeRegGPH { unsigned char idx; } FeRegGPH; +#define FE_GPH(idx) (FE_STRUCT(FeRegGPH) { idx }) +#define FE_AH FE_GPH(4) +#define FE_CH FE_GPH(5) +#define FE_DH FE_GPH(6) +#define FE_BH FE_GPH(7) +typedef struct FeRegSREG { unsigned char idx; } FeRegSREG; +#define FE_SREG(idx) (FE_STRUCT(FeRegSREG) { idx }) +#define FE_ES FE_SREG(0) +#define FE_CS FE_SREG(1) +#define FE_SS FE_SREG(2) +#define FE_DS FE_SREG(3) +#define FE_FS FE_SREG(4) +#define FE_GS FE_SREG(5) +typedef struct FeRegST { unsigned char idx; } FeRegST; +#define FE_ST(idx) (FE_STRUCT(FeRegST) { idx }) +#define FE_ST0 FE_ST(0) +#define FE_ST1 FE_ST(1) +#define FE_ST2 FE_ST(2) +#define FE_ST3 FE_ST(3) +#define FE_ST4 FE_ST(4) +#define FE_ST5 FE_ST(5) +#define FE_ST6 FE_ST(6) +#define FE_ST7 FE_ST(7) +typedef struct FeRegMM { unsigned char idx; } FeRegMM; +#define FE_MM(idx) (FE_STRUCT(FeRegMM) { idx }) +#define FE_MM0 FE_MM(0) +#define FE_MM1 FE_MM(1) +#define FE_MM2 FE_MM(2) +#define FE_MM3 FE_MM(3) +#define FE_MM4 FE_MM(4) +#define FE_MM5 FE_MM(5) +#define FE_MM6 FE_MM(6) +#define FE_MM7 FE_MM(7) +typedef struct FeRegXMM { unsigned char idx; } FeRegXMM; +#define FE_XMM(idx) (FE_STRUCT(FeRegXMM) { idx }) +#define FE_XMM0 FE_XMM(0) +#define FE_XMM1 FE_XMM(1) +#define FE_XMM2 FE_XMM(2) +#define FE_XMM3 FE_XMM(3) +#define FE_XMM4 FE_XMM(4) +#define FE_XMM5 FE_XMM(5) +#define FE_XMM6 FE_XMM(6) +#define FE_XMM7 FE_XMM(7) +#define FE_XMM8 FE_XMM(8) +#define FE_XMM9 FE_XMM(9) +#define FE_XMM10 FE_XMM(10) +#define FE_XMM11 FE_XMM(11) +#define FE_XMM12 FE_XMM(12) +#define FE_XMM13 FE_XMM(13) +#define FE_XMM14 FE_XMM(14) +typedef struct FeRegCR { unsigned char idx; } FeRegCR; +#define FE_CR(idx) (FE_STRUCT(FeRegCR) { idx }) +typedef struct FeRegDR { unsigned char idx; } FeRegDR; +#define FE_DR(idx) (FE_STRUCT(FeRegDR) { idx }) + +// Internal only +typedef struct FeRegGPLH { unsigned char idx; } FeRegGPLH; +#define FE_GPLH(idx) (FE_STRUCT(FeRegGPLH) { idx }) +// Disambiguate GP and GPH -- C++ uses overloading; C uses _Generic. +#ifdef __cplusplus +} +namespace { + static constexpr inline FeRegGPLH FE_MAKE_GPLH(FeRegGP reg) { + return FE_GPLH(reg.idx); + } + static constexpr inline FeRegGPLH FE_MAKE_GPLH(FeRegGPH reg) { + return FE_GPLH(reg.idx + 0x20); + } +} +extern "C" { +#else +#define FE_MAKE_GPLH(reg) FE_GPLH(_Generic((reg), FeRegGPH: 0x20, FeRegGP: 0) | (reg).idx) +#endif + +typedef struct FeMem { + uint8_t flags; + FeRegGP base; + unsigned char scale; + // union { + FeRegGP idx; + // FeRegXMM idx_xmm; + // }; + int32_t off; +} FeMem; +#define FE_MEM(base,sc,idx,off) (FE_STRUCT(FeMem) { 0, base, sc, idx, off }) +typedef struct FeMemV { + uint8_t flags; + FeRegGP base; + unsigned char scale; + FeRegXMM idx; + int32_t off; +} FeMemV; +#define FE_MEMV(base,sc,idx,off) (FE_STRUCT(FeMemV) { 0, base, sc, idx, off }) + +// NOP is special: flags is interpreted as the length in bytes, 0 = 1 byte, too. +unsigned fe64_NOP(uint8_t* buf, unsigned flags); + +#include + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/meson.build b/meson.build index c0d2a1a..d7e030b 100644 --- a/meson.build +++ b/meson.build @@ -1,4 +1,4 @@ -project('fadec', ['c'], default_options: ['warning_level=3', 'c_std=c99'], +project('fadec', ['c'], default_options: ['warning_level=3', 'c_std=c11'], meson_version: '>=0.40') python3 = find_program('python3') @@ -45,6 +45,11 @@ if get_option('with_encode') headers += files('fadec-enc.h') sources += files('encode.c') endif +if get_option('with_encode2') + components += 'encode2' + headers += files('fadec-enc2.h') + sources += files('encode2.c') +endif generate_args = [] if get_option('archmode') != 'only64' diff --git a/meson_options.txt b/meson_options.txt index 7383a74..3e8dd68 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -2,3 +2,5 @@ option('archmode', type: 'combo', choices: ['both', 'only32', 'only64']) option('with_undoc', type: 'boolean', value: false) option('with_decode', type: 'boolean', value: true) option('with_encode', type: 'boolean', value: true) +# encode2 is off-by-default to reduce size and compile-time +option('with_encode2', type: 'boolean', value: false) diff --git a/parseinstrs.py b/parseinstrs.py index 2ecfaf9..93b505a 100644 --- a/parseinstrs.py +++ b/parseinstrs.py @@ -687,10 +687,293 @@ def encode_table(entries, args): mnem_tab = "".join(f"FE_MNEMONIC({m},{i})\n" for i, m in enumerate(mnem_list)) return mnem_tab, descs +def encode2_table(entries, args): + mnemonics = defaultdict(list) + for weak, opcode, desc in entries: + if "I64" in desc.flags or desc.mnemonic[:9] == "RESERVED_": + continue + + opsizes = {8} if "SZ8" in desc.flags else {16, 32, 64} + hasvex, vecsizes = False, {128} + + if opcode.vex: + hasvex, vecsizes = True, {128, 256} + if opcode.prefix in ("66", "F2", "F3") and "U66" not in desc.flags: + opsizes -= {16} + if opcode.vexl == "IG": + vecsizes = {0} + elif opcode.vexl: + vecsizes -= {128 if opcode.vexl == "1" else 256} + if opcode.rexw == "IG": + opsizes = {0} + elif opcode.rexw: + opsizes -= {32 if opcode.rexw == "1" else 64} + + if "I66" in desc.flags: + opsizes -= {16} + if "D64" in desc.flags: + opsizes -= {32} + if "SZ8" not in desc.flags and "INSTR_WIDTH" not in desc.flags and all(op.size != OpKind.SZ_OP for op in desc.operands): + opsizes = {0} + if "VSIB" not in desc.flags and all(op.size != OpKind.SZ_VEC for op in desc.operands): + vecsizes = {0} # for VEX-encoded general-purpose instructions. + if "ENC_NOSZ" in desc.flags: + opsizes, vecsizes = {0}, {0} + + # Where to put the operand size in the mnemonic + separate_opsize = "ENC_SEPSZ" in desc.flags + prepend_opsize = max(opsizes) > 0 and not separate_opsize + prepend_vecsize = hasvex and max(vecsizes) > 0 and not separate_opsize + + if "F64" in desc.flags: + opsizes = {64} + prepend_opsize = False + + modrm_type = opcode.modreg[1] if opcode.modreg else "rm" + optypes_base = desc.optype_str() + optypes = {optypes_base.replace("M", t) for t in modrm_type} + + prefixes = [("", "")] + if "LOCK" in desc.flags: + prefixes.append(("LOCK_", "LOCK")) + if "ENC_REP" in desc.flags: + prefixes.append(("REP_", "F3")) + if "ENC_REPCC" in desc.flags: + prefixes.append(("REPNZ_", "F2")) + prefixes.append(("REPZ_", "F3")) + + for opsize, vecsize, prefix, ots in product(opsizes, vecsizes, prefixes, optypes): + if prefix[1] == "LOCK" and ots[0] != "m": + continue + + spec_opcode = opcode + if prefix[1]: + spec_opcode = spec_opcode._replace(prefix=prefix[1]) + if opsize == 64 and "D64" not in desc.flags and "F64" not in desc.flags: + spec_opcode = spec_opcode._replace(rexw="1") + if vecsize == 256: + spec_opcode = spec_opcode._replace(vexl="1") + + # Construct mnemonic name + mnem_name = {"MOVABS": "MOV", "XCHG_NOP": "XCHG"}.get(desc.mnemonic, desc.mnemonic) + name = prefix[0] + mnem_name + if prepend_opsize and not ("D64" in desc.flags and opsize == 64): + name += f"_{opsize}"[name[-1] not in "0123456789":] + if prepend_vecsize: + name += f"_{vecsize}"[name[-1] not in "0123456789":] + for ot, op in zip(ots, desc.operands): + name += ot.replace("o", "") + if separate_opsize: + name += f"{op.abssize(opsize//8, vecsize//8)*8}" + mnemonics[name, opsize, ots].append((spec_opcode, desc)) + + enc_decls, enc_code = "", "" + for (mnem, opsize, ots), variants in mnemonics.items(): + dedup = OrderedDict() + for i, (opcode, desc) in enumerate(variants): + PRIO = ["O", "OA", "AO", "AM", "MA", "IA", "OI"] + enc_prio = PRIO.index(desc.encoding) if desc.encoding in PRIO else len(PRIO) + unique = 0 if desc.encoding != "S" else i + key = desc.imm_size(opsize//8), enc_prio, unique + if key not in dedup: + dedup[key] = opcode, desc + if desc.encoding == "S": + print(mnem, key, desc, dedup) + variants = [dedup[k] for k in sorted(dedup.keys())] + max_imm_size = max(k[0] for k in dedup.keys()) + + supports_high_regs = [] + if variants[0][1].mnemonic in ("MOVSX", "MOVZX") or opsize == 8: + # Should be the same for all variants + desc = variants[0][1] + for i, (ot, op) in enumerate(zip(ots, desc.operands)): + if ot == "r" and op.kind == "GP" and op.abssize(opsize//8) == 1: + supports_high_regs.append(i) + supports_vsib = "VSIB" in variants[0][1].flags + + if len({tuple(op.kind for op in v[1].operands) for v in variants}) > 1: + raise Exception(f"ambiguous operand kinds for {mnem}") + OPKIND_LUT = {"FPU": "ST", "SEG": "SREG", "MMX": "MM"} + reg_tys = [OPKIND_LUT.get(op.kind, op.kind) for op in variants[0][1].operands] + + fnname = f"fe64_{mnem}{'_impl' if supports_high_regs else ''}" + op_tys = [{ + "i": f"int{max_imm_size*8 if max_imm_size != 3 else 32}_t", + "a": "uintptr_t", + "r": f"FeReg{reg_ty if i not in supports_high_regs else 'GPLH'}", + "m": "FeMem" if not supports_vsib else "FeMemV", + "o": "const void*", + }[ot] for i, (ot, reg_ty) in enumerate(zip(ots, reg_tys))] + fn_opargs = "".join(f", {ty} op{i}" for i, ty in enumerate(op_tys)) + fn_sig = f"unsigned {fnname}(uint8_t* buf, int flags{fn_opargs})" + enc_decls += f"{fn_sig};\n" + if supports_high_regs: + enc_decls += f"#define fe64_{mnem}(buf, flags" + enc_decls += "".join(f", op{i}" for i in range(len(op_tys))) + enc_decls += f") {fnname}(buf, flags" + enc_decls += "".join(f", FE_MAKE_GPLH(op{i})" if i in supports_high_regs else f", op{i}" for i in range(len(op_tys))) + enc_decls += f")\n" + + code = f"{fn_sig} {{\n" + + code += " unsigned idx = 0, rex = 0, memoff;\n" + if max_imm_size or "a" in ots: + code += " int64_t imm; unsigned imm_size;\n" + code += " (void) flags; (void) memoff;\n" + + neednext = True + for i, (opcode, desc) in enumerate(variants): + if not neednext: + break + if i > 0: + code += f"\nnext{i-1}:\n" + neednext = False + + imm_size = desc.imm_size(opsize//8) + flags = ENCODINGS[desc.encoding] + # Select usable encoding. + if desc.encoding == "S": + # Segment encoding is weird. + code += f" if (op_reg_idx(op0)!={(opcode.opc>>3)&0x7:#x}) goto next{i};\n" + neednext = True + if desc.mnemonic == "XCHG_NOP" and opsize == 32: + # XCHG eax, eax must not be encoded as 90 -- that'd be NOP. + code += f" if (op_reg_idx(op0)==0&&op_reg_idx(op1)==0) goto next{i};\n" + neednext = True + if flags.zeroreg_idx: + code += f" if (op_reg_idx(op{flags.zeroreg_idx^3})!={flags.zeroreg_val}) goto next{i};\n" + neednext = True + if flags.imm_control: + if flags.imm_control != 3: + code += f" imm = (int64_t) op{flags.imm_idx^3};\n" + else: + code += f" imm = op_reg_idx(op{flags.imm_idx^3}) << 4;\n" + code += f" imm_size = {imm_size};\n" + if flags.imm_control == 1: + code += f" if (imm != 1) goto next{i};\n" + neednext = True + if flags.imm_control == 2: + code += " imm_size = flags & FE_ADDR32 ? 4 : 8;\n" + code += " if (imm_size == 4) imm = (int32_t) imm;\n" + if imm_size < max_imm_size and 2 <= flags.imm_control < 6: + code += f" if (!op_imm_n(imm, imm_size)) goto next{i};\n" + neednext = True + if flags.imm_control == 6: + # idx is subtracted below. + code += f" imm -= (int64_t) buf + imm_size;\n" + if i != len(variants) - 1: # only Jcc+JMP + code += f" if (flags&FE_JMPL) goto next{i};\n" + # assume one-byte opcode without escape/prefixes + code += f" if (!op_imm_n(imm-1, imm_size)) goto next{i};\n" + neednext = True + + if opcode.vex: + rexw, rexr, rexx, rexb = 0x8000, 0x80, 0x40, 0x20 + else: + rexw, rexr, rexx, rexb = 0x48, 0x44, 0x42, 0x41 + + if not opcode.vex: + for i in supports_high_regs: + code += f" if (op_reg_idx(op{i}) >= 4 && op_reg_idx(op{i}) <= 15) rex = 0x40;\n" + if opcode.rexw == "1": + code += f" rex |= {rexw:#x};\n" + if flags.modrm_idx: + ismem = ots[flags.modrm_idx^3] == "m" + if ismem: + code += f" if (op_mem_base(op{flags.modrm_idx^3})&8) rex |= {rexb:#x};\n" + code += f" if (op_mem_idx(op{flags.modrm_idx^3})&8) rex |= {rexx:#x};\n" + else: + if desc.operands[flags.modrm_idx^3].kind in ("GP", "XMM"): + code += f" if (op_reg_idx(op{flags.modrm_idx^3})&8) rex |= {rexb:#x};\n" + if flags.modreg_idx: + if desc.operands[flags.modreg_idx^3].kind in ("GP", "XMM", "CR", "DR"): + code += f" if (op_reg_idx(op{flags.modreg_idx^3})&8) rex |= {rexr:#x};\n" + elif flags.modreg_idx: # O encoding + if desc.operands[flags.modreg_idx^3].kind in ("GP", "XMM"): + code += f" if (op_reg_idx(op{flags.modreg_idx^3})&8) rex |= {rexb:#x};\n" + + for i in supports_high_regs: + code += f" if (rex && op_reg_gph(op{i})) return 0;\n" + + if "m" in ots or "USEG" in desc.flags: + code += " if (UNLIKELY(flags & FE_SEG_MASK)) buf[idx++] = enc_seg(flags);\n" + if "m" in ots or "U67" in desc.flags: + code += " if (UNLIKELY(flags & FE_ADDR32)) buf[idx++] = 0x67;\n" + + if opcode.vex: + ppl = ["NP", "66", "F3", "F2"].index(opcode.prefix) + ppl |= 4 if opcode.vexl == "1" else 0 + mayvex2 = opcode.rexw != "1" and opcode.escape == 1 + if mayvex2: + code += " if (!(rex&0x8060)) {\n" + code += " buf[idx++] = 0xc5;\n" + code += " rex ^= 0x80;\n" + code += " } else {\n" + code += " buf[idx++] = 0xc4;\n" + code += f" buf[idx++] = {0xe0+opcode.escape:#x}^rex;\n" + code += " rex >>= 8;\n" + if mayvex2: + code += " }\n" + vexop = 0 + if flags.vexreg_idx: + vexop = f"op_reg_idx(op{flags.vexreg_idx^3})" + code += f" buf[idx++] = {ppl}|rex|(({vexop}^15)<<3);\n" + else: + if opsize == 16 or opcode.prefix == "66": + code += " buf[idx++] = 0x66;\n" + if opcode.prefix in ("F2", "F3"): + code += f" buf[idx++] = 0x{opcode.prefix};\n" + if opcode.prefix == "LOCK": + code += f" buf[idx++] = 0xF0;\n" + code += f" if (rex) buf[idx++] = rex;\n" + if opcode.escape: + code += f" buf[idx++] = 0x0F;\n" + if opcode.escape == 2: + code += f" buf[idx++] = 0x38;\n" + elif opcode.escape == 3: + code += f" buf[idx++] = 0x3A;\n" + code += f" buf[idx++] = {opcode.opc:#x};\n" + if opcode.opcext: + code += f" buf[idx++] = {opcode.opcext:#x};\n" + + if flags.modrm: + modrm = f"op{flags.modrm_idx^3}" + if flags.modreg_idx: + modreg = f"op_reg_idx(op{flags.modreg_idx^3})" + else: + modreg = int(opcode.modreg[0]) if opcode.modreg else 0 + if ismem: + imm_size_expr = "imm_size" if flags.imm_control >= 2 else 0 + memfn = "enc_mem_vsib" if "VSIB" in desc.flags else "enc_mem" + code += f" memoff = {memfn}(buf, idx, {modrm}, {modreg}, {imm_size_expr}, 0);\n" + code += f" if (!memoff) return 0;\n idx += memoff;\n" + else: + modrm = f"op_reg_idx({modrm})" + code += f" buf[idx++] = 0xC0|(({modreg}&7)<<3)|({modrm}&7);\n" + elif flags.modrm_idx: + code += f" buf[idx-1] |= op_reg_idx(op{flags.modrm_idx^3}) & 7;\n" + + if flags.imm_control >= 2: + if flags.imm_control == 6: + code += f" imm -= idx;\n" + code += f" if (enc_imm(buf+idx, imm, imm_size)) return 0;\n" + code += f" idx += imm_size;\n" + code += f" return idx;\n" + + if neednext: + code += f"next{len(variants)-1}: return 0;\n" + code += "}\n" + + enc_code += code + + return enc_decls, enc_code + + if __name__ == "__main__": generators = { "decode": decode_table, "encode": encode_table, + "encode2": encode2_table, } parser = argparse.ArgumentParser()