Initial commit

This commit is contained in:
Alexis Engelke
2018-04-08 13:16:49 +00:00
commit a3f77dbf49
19 changed files with 2804 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
/build/

28
LICENSE Normal file
View File

@@ -0,0 +1,28 @@
Copyright (c) 2018, Alexis Engelke
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.

18
README.md Normal file
View File

@@ -0,0 +1,18 @@
# libx86decode
A fast and lightweight decoder for x86 and x86-64. *This is not a disassembler, it does not intend to procude valid assembly.* To meet the goal of speed, lookup tables are used to map the opcode the (internal) description of the instruction encoding. This table currently has a size of roughly 21 kiB.
Currently, decoding 32-bit assembly required the `ARCH_386` macro to be defined and is only tested when compiling as 32-bit binary. Decoding of 64-bit instruction requires the macro `ARCH_X86_64` and is only supported in 64-bit mode. This restriction might change in future.
### Known issues
- An implicit `FWAIT` in FPU instructions is decoded as a separate instruction. For example, the instruction `FINIT` is decoded as an `FWAIT` followed by an `FINIT` where as `FNINIT` is decoded as a plain `FINIT` instruction.
- The AVX VSIB encoding is not supported yet, all instructions using this will result in a decode error.
- A mandatory L0 or L1 in the VEX prefix is currently ignored to reduce the size of the prefix tables. The only instructions where this has an effect are `VZEROALL` (L1) and `VZEROUPPER` (L0) and are currently decoded as `VZERO`, the vector length prefix can be used to determine the actual instruction.
- The EVEX prefix (AVX-512) is not supported (yet).
- No ABI stability as the value associated with the mnemonics will change if further instructions are added. When using this library, please link it statically.
- The instruction formatter does not include prefixes. (Help needed.)
- The layout of entries in the tables can be improved to improve usage of caches. (Help needed.)
- Low test coverage. (Help needed.)
- No benchmarking has been performed yet. (Help needed.)
If you find any other issues, please report a bug. Or, even better, send a patch fixing the issue.

727
decode.c Normal file
View File

@@ -0,0 +1,727 @@
#include <stddef.h>
#include <stdint.h>
#include <decode.h>
#define LIKELY(x) __builtin_expect((x), 1)
#define UNLIKELY(x) __builtin_expect((x), 0)
#define DECODE_TABLE_DATA
static const uint8_t _decode_table[] = {
#include <decode-table.inc>
};
#undef DECODE_TABLE_DATA
#define ENTRY_NONE 0
#define ENTRY_INSTR 1
#define ENTRY_TABLE256 2
#define ENTRY_TABLE8 3
#define ENTRY_TABLE72 4
#define ENTRY_TABLE_PREFIX 5
#define ENTRY_MASK 7
#define ENTRY_IS_TABLE(kind) ((kind) >= ENTRY_TABLE256)
#define INSTR_ENC_ADDR 0x08
#define INSTR_ENC_IMM 0x10
#define INSTR_ENC_MODRM 0x80
#define INSTR_ENC_MODRM_BOTH 0x40
#define INSTR_ENC_FLIP 0x20
#define INSTR_ENC_OPCODE 0x40
#define INSTR_ENC_IMPLICIT_REG 0x04
#define LOAD_LE_1(buf) (((size_t) ((uint8_t*) buf)[0]))
#define LOAD_LE_2(buf) (((size_t) ((uint8_t*) buf)[0]) | \
((size_t) ((uint8_t*) buf)[1] << 8))
#define LOAD_LE_4(buf) (((size_t) ((uint8_t*) buf)[0]) | \
((size_t) ((uint8_t*) buf)[1] << 8) | \
((size_t) ((uint8_t*) buf)[2] << 16) | \
((size_t) ((uint8_t*) buf)[3] << 24))
#if defined(ARCH_X86_64)
#define LOAD_LE_8(buf) (((size_t) ((uint8_t*) buf)[0]) | \
((size_t) ((uint8_t*) buf)[1] << 8) | \
((size_t) ((uint8_t*) buf)[2] << 16) | \
((size_t) ((uint8_t*) buf)[3] << 24) | \
((size_t) ((uint8_t*) buf)[4] << 32) | \
((size_t) ((uint8_t*) buf)[5] << 40) | \
((size_t) ((uint8_t*) buf)[6] << 48) | \
((size_t) ((uint8_t*) buf)[7] << 56))
#endif
static
int
decode_prefixes(const uint8_t* buffer, int len, PrefixSet* out_prefixes,
uint8_t* out_vex_operand)
{
int off = 0;
PrefixSet prefixes = 0;
while (LIKELY(off < len))
{
uint8_t prefix = buffer[off];
if (prefix == 0x2E)
{
prefixes |= PREFIX_SEG_CS;
}
else if (prefix == 0x64)
{
prefixes |= PREFIX_SEG_FS;
}
else if (prefix == 0x65)
{
prefixes |= PREFIX_SEG_GS;
}
else if (prefix == 0x66)
{
prefixes |= PREFIX_OPSZ;
}
else if (prefix == 0x67)
{
prefixes |= PREFIX_ADDRSZ;
}
else if (prefix == 0xF0)
{
prefixes |= PREFIX_LOCK;
}
else if (prefix == 0xF2)
{
prefixes |= PREFIX_REPNZ;
}
else if (prefix == 0xF3)
{
prefixes |= PREFIX_REP;
}
#if defined(ARCH_X86_64)
else if (LIKELY(prefix >= 0x40 && prefix <= 0x4F))
{
prefixes |= PREFIX_REX;
if (prefix & 0x1)
{
prefixes |= PREFIX_REXB;
}
if (prefix & 0x2)
{
prefixes |= PREFIX_REXX;
}
if (prefix & 0x4)
{
prefixes |= PREFIX_REXR;
}
if (prefix & 0x8)
{
prefixes |= PREFIX_REXW;
}
// REX prefix is the last prefix.
off++;
break;
}
#endif
else if (UNLIKELY(prefix == 0xc4))
{
// 3-byte VEX
if (UNLIKELY(off + 2 >= len))
{
return -1;
}
#if defined(ARCH_386)
if ((buffer[off + 1] & 0xc0) != 0xc0)
{
break;
}
#endif
uint8_t byte2 = buffer[off + 1];
uint8_t byte3 = buffer[off + 2];
prefixes |= PREFIX_VEX;
#if defined(ARCH_X86_64)
if ((byte2 & 0x80) == 0)
{
prefixes |= PREFIX_REXR;
}
if ((byte2 & 0x40) == 0)
{
prefixes |= PREFIX_REXX;
}
if ((byte2 & 0x20) == 0)
{
prefixes |= PREFIX_REXB;
}
#endif
switch (byte2 & 0x1f)
{
case 0x01: prefixes |= PREFIX_ESC_0F; break;
case 0x02: prefixes |= PREFIX_ESC_0F38; break;
case 0x03: prefixes |= PREFIX_ESC_0F3A; break;
default: return -1;
}
if (byte3 & 0x04)
{
prefixes |= PREFIX_VEXL;
}
#if defined(ARCH_X86_64)
// SDM Vol 2A 2-16 (Dec. 2016)
// - "In 32-bit modes, VEX.W is silently ignored."
// - VEX.W either replaces REX.W, is don't care or is reserved.
if (byte3 & 0x80)
{
prefixes |= PREFIX_REXW;
}
#endif
*out_vex_operand = ((byte3 & 0x78) >> 3) ^ 0xf;
switch (byte3 & 0x03)
{
case 1: prefixes |= PREFIX_OPSZ; break;
case 2: prefixes |= PREFIX_REP; break;
case 3: prefixes |= PREFIX_REPNZ; break;
default: break;
}
// VEX prefix is always the last prefix.
off += 3;
break;
}
else if (UNLIKELY(prefix == 0xc5))
{
// 2-byte VEX
if (UNLIKELY(off + 1 >= len))
{
return -1;
}
#if defined(ARCH_386)
if ((buffer[off + 1] & 0xc0) != 0xc0)
{
break;
}
#endif
uint8_t byte = buffer[off + 1];
prefixes |= PREFIX_VEX | PREFIX_ESC_0F;
#if defined(ARCH_X86_64)
if ((byte & 0x80) == 0)
{
prefixes |= PREFIX_REXR;
}
#endif
if (byte & 0x04)
{
prefixes |= PREFIX_VEXL;
}
*out_vex_operand = ((byte & 0x78) >> 3) ^ 0xf;
switch (byte & 0x03)
{
case 1: prefixes |= PREFIX_OPSZ; break;
case 2: prefixes |= PREFIX_REP; break;
case 3: prefixes |= PREFIX_REPNZ; break;
default: break;
}
// VEX prefix is always the last prefix.
off += 2;
break;
}
else
{
break;
}
off++;
}
if (out_prefixes != NULL)
{
*out_prefixes = prefixes;
}
return off;
}
static
int
decode_modrm(const uint8_t* buffer, int len, Instr* instr,
struct Operand* out_o1, struct Operand* out_o2)
{
int off = 0;
if (UNLIKELY(off >= len))
{
return -1;
}
uint8_t modrm = buffer[off++];
uint8_t mod = (modrm & 0xc0) >> 6;
uint8_t mod_reg = (modrm & 0x38) >> 3;
uint8_t rm = modrm & 0x07;
// Operand 2 may be NULL when reg field is used as opcode extension
if (out_o2)
{
uint8_t reg_idx = mod_reg;
#if defined(ARCH_X86_64)
reg_idx += instr->prefixes & PREFIX_REXR ? 8 : 0;
#endif
out_o2->type = OT_REG;
out_o2->reg = reg_idx;
}
if (mod == 3)
{
uint8_t reg_idx = rm;
#if defined(ARCH_X86_64)
reg_idx += instr->prefixes & PREFIX_REXB ? 8 : 0;
#endif
out_o1->type = OT_REG;
out_o1->reg = reg_idx;
return off;
}
// SIB byte
uint8_t scale = 0;
uint8_t idx = 0;
uint8_t base = 0;
if (rm == 4)
{
if (UNLIKELY(off >= len))
{
return -1;
}
uint8_t sib = buffer[off++];
scale = ((sib & 0xc0) >> 6) + 1;
idx = (sib & 0x38) >> 3;
#if defined(ARCH_X86_64)
idx += instr->prefixes & PREFIX_REXX ? 8 : 0;
#endif
base = sib & 0x07;
}
if (mod == 1)
{
if (UNLIKELY(off + 1 > len))
{
return -1;
}
instr->disp = (int8_t) LOAD_LE_1(&buffer[off]);
off += 1;
}
else if (mod == 2 || (mod == 0 && (rm == 5 || base == 5)))
{
if (UNLIKELY(off + 4 > len))
{
return -1;
}
instr->disp = (int32_t) LOAD_LE_4(&buffer[off]);
off += 4;
}
else
{
instr->disp = 0;
}
out_o1->type = OT_MEM;
instr->scale = scale;
if (scale == 0)
{
if (mod == 0 && rm == 5)
{
#if defined(ARCH_X86_64)
out_o1->reg = RI_IP;
#else
out_o1->reg = REG_NONE;
#endif
return off;
}
uint8_t reg_idx = rm;
#if defined(ARCH_X86_64)
reg_idx += instr->prefixes & PREFIX_REXB ? 8 : 0;
#endif
out_o1->reg = reg_idx;
return off;
}
if (idx == 4)
{
instr->scale = 0;
}
else
{
instr->sreg = idx;
}
if (base == 5 && mod == 0)
{
out_o1->reg = REG_NONE;
}
else
{
uint8_t reg_idx = base;
#if defined(ARCH_X86_64)
reg_idx += instr->prefixes & PREFIX_REXB ? 8 : 0;
#endif
out_o1->reg = reg_idx;
}
return off;
}
struct InstrDesc
{
uint16_t type;
uint8_t operand_indices;
uint8_t operand_sizes;
uint8_t immediate;
uint32_t gp_size_8 : 1;
uint32_t gp_size_def64 : 1;
uint32_t gp_instr_width : 1;
uint32_t gp_fixed_operand_size : 3;
} __attribute__((packed));
#define DESC_HAS_MODRM(desc) (((desc)->operand_indices & (3 << 0)) != 0)
#define DESC_MODRM_IDX(desc) ((((desc)->operand_indices >> 0) & 3) ^ 3)
#define DESC_HAS_MODREG(desc) (((desc)->operand_indices & (3 << 2)) != 0)
#define DESC_MODREG_IDX(desc) ((((desc)->operand_indices >> 2) & 3) ^ 3)
#define DESC_HAS_VEXREG(desc) (((desc)->operand_indices & (3 << 4)) != 0)
#define DESC_VEXREG_IDX(desc) ((((desc)->operand_indices >> 4) & 3) ^ 3)
#define DESC_HAS_IMPLICIT(desc) (((desc)->operand_indices & (3 << 6)) != 0)
#define DESC_IMPLICIT_IDX(desc) ((((desc)->operand_indices >> 6) & 3) ^ 3)
#define DESC_IMM_CONTROL(desc) (((desc)->immediate >> 4) & 0x7)
#define DESC_IMM_IDX(desc) (((desc)->immediate & 3) ^ 3)
#define DESC_IMM_BYTE(desc) (((desc)->immediate >> 7) & 1)
int
decode(const uint8_t* buffer, int len, Instr* instr)
{
int retval;
int off = 0;
uint8_t vex_operand = 0;
PrefixSet prefixes = 0;
__builtin_memset(instr->operands, 0, sizeof(instr->operands));
retval = decode_prefixes(buffer + off, len - off, &prefixes, &vex_operand);
if (UNLIKELY(retval < 0 || off + retval >= len))
{
return -1;
}
off += retval;
uint16_t* table = (uint16_t*) _decode_table;
uint32_t kind = ENTRY_TABLE256;
if (UNLIKELY(prefixes & PREFIX_ESC_MASK))
{
uint32_t escape = prefixes & PREFIX_ESC_MASK;
table = (uint16_t*) &_decode_table[table[0x0F] & ~7];
if (escape == PREFIX_ESC_0F38)
{
table = (uint16_t*) &_decode_table[table[0x38] & ~7];
}
else if (escape == PREFIX_ESC_0F3A)
{
table = (uint16_t*) &_decode_table[table[0x3A] & ~7];
}
}
do
{
uint16_t entry = 0;
if (kind == ENTRY_TABLE256)
{
entry = table[buffer[off++]];
}
else if (kind == ENTRY_TABLE8)
{
entry = table[(buffer[off] >> 3) & 7];
}
else if (kind == ENTRY_TABLE72)
{
if ((buffer[off] & 0xc0) == 0xc0)
{
entry = table[buffer[off] - 0xb8];
if ((entry & ENTRY_MASK) != ENTRY_NONE)
{
off++;
}
else
{
entry = table[(buffer[off] >> 3) & 7];
}
}
else
{
entry = table[(buffer[off] >> 3) & 7];
}
}
else if (kind == ENTRY_TABLE_PREFIX)
{
uint8_t index = 0;
if (prefixes & PREFIX_OPSZ)
{
index = 1;
}
else if (prefixes & PREFIX_REP)
{
index = 2;
}
else if (prefixes & PREFIX_REPNZ)
{
index = 3;
}
#if defined(ARCH_X86_64)
index |= prefixes & PREFIX_REXW ? (1 << 2) : 0;
#endif
index |= prefixes & PREFIX_VEX ? (1 << 3) : 0;
// If a prefix is mandatory and used as opcode extension, it has no
// further effect on the instruction. This is especially important
// for the 0x66 prefix, which could otherwise override the operand
// size of general purpose registers.
prefixes &= ~(PREFIX_OPSZ | PREFIX_REPNZ | PREFIX_REP);
entry = table[index];
}
else
{
break;
}
kind = entry & ENTRY_MASK;
table = (uint16_t*) &_decode_table[entry & ~7];
} while (LIKELY(off < len));
if (UNLIKELY(kind != ENTRY_INSTR))
{
return -1;
}
struct InstrDesc* desc = (struct InstrDesc*) table;
instr->type = desc->type;
instr->prefixes = prefixes;
instr->address = (uintptr_t) buffer;
if (prefixes & PREFIX_SEG_FS)
{
instr->segment = RI_FS;
}
else if (prefixes & PREFIX_SEG_GS)
{
instr->segment = RI_GS;
}
else
{
instr->segment = RI_DS;
}
uint8_t op_size = 0;
if (desc->gp_size_8)
{
op_size = 1;
}
#if defined(ARCH_X86_64)
else if (prefixes & PREFIX_REXW)
{
op_size = 8;
}
#endif
else if (prefixes & PREFIX_OPSZ)
{
op_size = 2;
}
#if defined(ARCH_X86_64)
else if (desc->gp_size_def64)
{
op_size = 8;
}
#endif
else
{
op_size = 4;
}
if (UNLIKELY(desc->gp_instr_width))
{
instr->width = op_size;
}
else
{
instr->width = 0;
}
uint8_t vec_size = 16;
if (prefixes & PREFIX_VEXL)
{
vec_size = 32;
}
uint8_t operand_sizes[4] = {
0, 1 << desc->gp_fixed_operand_size, op_size, vec_size
};
for (int i = 0; i < 4; i++)
{
uint8_t enc_size = (desc->operand_sizes >> 2 * i) & 3;
instr->operands[i].size = operand_sizes[enc_size];
}
if (UNLIKELY(DESC_HAS_IMPLICIT(desc)))
{
struct Operand* operand = &instr->operands[DESC_IMPLICIT_IDX(desc)];
operand->type = OT_REG;
operand->reg = 0;
}
if (DESC_HAS_MODRM(desc))
{
struct Operand* operand1 = &instr->operands[DESC_MODRM_IDX(desc)];
struct Operand* operand2 = NULL;
if (DESC_HAS_MODREG(desc))
{
operand2 = &instr->operands[DESC_MODREG_IDX(desc)];
}
retval = decode_modrm(buffer + off, len - off, instr,
operand1, operand2);
if (UNLIKELY(retval < 0))
{
return -1;
}
off += retval;
}
else if (UNLIKELY(DESC_HAS_MODREG(desc)))
{
// If there is no ModRM, but a Mod-Reg, its opcode-encoded.
struct Operand* operand = &instr->operands[DESC_MODREG_IDX(desc)];
uint8_t reg_idx = buffer[off - 1] & 7;
#if defined(ARCH_X86_64)
reg_idx += prefixes & PREFIX_REXB ? 8 : 0;
#endif
operand->type = OT_REG;
operand->reg = reg_idx;
}
if (UNLIKELY(DESC_HAS_VEXREG(desc)))
{
struct Operand* operand = &instr->operands[DESC_VEXREG_IDX(desc)];
operand->type = OT_REG;
operand->reg = vex_operand;
}
uint32_t imm_control = DESC_IMM_CONTROL(desc);
if (UNLIKELY(imm_control == 1))
{
struct Operand* operand = &instr->operands[DESC_IMM_IDX(desc)];
operand->type = OT_IMM;
operand->size = 1;
instr->immediate = 1;
}
else if (UNLIKELY(imm_control == 2))
{
struct Operand* operand = &instr->operands[DESC_IMM_IDX(desc)];
operand->type = OT_MEM;
operand->reg = REG_NONE;
operand->size = op_size;
instr->scale = 0;
// TODO: Address size overrides
#if defined(ARCH_386)
if (UNLIKELY(off + 4 > len))
{
return -1;
}
instr->disp = LOAD_LE_4(&buffer[off]);
off += 4;
#else
if (UNLIKELY(off + 8 > len))
{
return -1;
}
instr->disp = LOAD_LE_8(&buffer[off]);
off += 8;
#endif
}
else if (UNLIKELY(imm_control != 0))
{
uint8_t imm_size;
if (DESC_IMM_BYTE(desc))
{
imm_size = 1;
}
else if (UNLIKELY(instr->type == IT_RET_IMM))
{
imm_size = 2;
}
else if (UNLIKELY(instr->type == IT_ENTER))
{
imm_size = 3;
}
else if (prefixes & PREFIX_OPSZ)
{
imm_size = 2;
}
#if defined(ARCH_X86_64)
else if (prefixes & PREFIX_REXW && instr->type == IT_MOVABS_IMM)
{
imm_size = 8;
}
#endif
else
{
imm_size = 4;
}
if (UNLIKELY(off + imm_size > len))
{
return -1;
}
if (imm_size == 1)
{
instr->immediate = (int8_t) LOAD_LE_1(&buffer[off]);
}
else if (imm_size == 2)
{
instr->immediate = (int16_t) LOAD_LE_2(&buffer[off]);
}
else if (imm_size == 3)
{
instr->immediate = LOAD_LE_2(&buffer[off]);
instr->immediate |= LOAD_LE_1(&buffer[off + 2]) << 16;
}
else if (imm_size == 4)
{
instr->immediate = (int32_t) LOAD_LE_4(&buffer[off]);
}
#if defined(ARCH_X86_64)
else if (imm_size == 8)
{
instr->immediate = (int64_t) LOAD_LE_8(&buffer[off]);
}
#endif
off += imm_size;
if (imm_control == 4)
{
instr->immediate += (uintptr_t) buffer + off;
}
struct Operand* operand = &instr->operands[DESC_IMM_IDX(desc)];
if (UNLIKELY(imm_control == 5))
{
operand->type = OT_REG;
operand->reg = (instr->immediate & 0xf0) >> 4;
}
else
{
operand->type = OT_IMM;
}
}
instr->size = off;
return off;
}

150
decode.h Normal file
View File

@@ -0,0 +1,150 @@
#ifndef ARMX86_DECODE_H
#define ARMX86_DECODE_H
#include <stddef.h>
#include <stdint.h>
#ifndef ssize_t
#define ssize_t intptr_t
#endif
#define DECODE_TABLE_MNEMONICS
#define MNEMONIC(name,value) IT_ ## name = value,
enum
{
#include <decode-table.inc>
};
#undef DECODE_TABLE_MNEMONICS
#undef MNEMONIC
enum RegIndex {
RI_AL = 0,
RI_CL,
RI_DL,
RI_BL,
RI_AH,
RI_CH,
RI_DH,
RI_BH,
RI_AX = 0,
RI_CX,
RI_DX,
RI_BX,
RI_SP,
RI_BP,
RI_SI,
RI_DI,
#if defined(ARCH_X86_64)
RI_R8,
RI_R9,
RI_R10,
RI_R11,
RI_R12,
RI_R13,
RI_R14,
RI_R15,
#endif
// EIP cannot be encoded in Protected/Compatibility Mode
#if defined(ARCH_X86_64)
RI_IP = 0x10,
#endif
RI_ES = 0,
RI_CS,
RI_SS,
RI_DS,
RI_FS,
RI_GS,
};
typedef uint8_t Reg;
#define reg_index(reg) (reg)
#define reg_is_none(reg) ((reg) == REG_NONE)
#define REG_NONE (0x3f)
enum PrefixSet
{
PREFIX_SEG_FS = 1 << 0,
PREFIX_SEG_GS = 1 << 1,
PREFIX_SEG_CS = 1 << 12,
PREFIX_OPSZ = 1 << 2,
PREFIX_ADDRSZ = 1 << 3,
PREFIX_LOCK = 1 << 4,
PREFIX_REPNZ = 1 << 5,
PREFIX_REP = 1 << 6,
#if defined(ARCH_X86_64)
PREFIX_REX = 1 << 7,
PREFIX_REXB = 1 << 8,
PREFIX_REXX = 1 << 9,
PREFIX_REXR = 1 << 10,
PREFIX_REXW = 1 << 11,
#endif
PREFIX_ESC_NONE = 0 << 13,
PREFIX_ESC_0F = 1 << 13,
PREFIX_ESC_0F38 = 2 << 13,
PREFIX_ESC_0F3A = 3 << 13,
PREFIX_ESC_MASK = 3 << 13,
PREFIX_VEX = 1 << 15,
PREFIX_VEXL = 1 << 16,
};
typedef enum PrefixSet PrefixSet;
enum OperandType
{
OT_NONE = 0,
OT_REG = 1,
OT_IMM = 2,
OT_MEM = 3,
};
struct Operand
{
uint8_t type : 2;
uint8_t reg : 6;
uint8_t size;
};
struct Instr
{
uint16_t type;
struct Operand operands[4];
uint8_t segment : 3;
uint8_t width : 5;
/**
* Encoded as 1 << (scale - 1) **or** no scaled register at all if zero.
**/
uint8_t scale : 3;
uint8_t sreg : 5;
PrefixSet prefixes;
size_t immediate;
intptr_t disp;
uintptr_t address;
uint32_t size : 4;
};
typedef struct Instr Instr;
#define INSTR_SEGMENT(instr) ((instr)->segment)
#define INSTR_WIDTH(instr) ((instr)->width)
#define INSTR_HAS_REP(instr) ((instr)->prefixes & PREFIX_REP)
#define INSTR_HAS_REPNZ(instr) ((instr)->prefixes & PREFIX_REPNZ)
#define INSTR_HAS_LOCK(instr) ((instr)->prefixes & PREFIX_LOCK)
#define INSTR_HAS_ADDRSZ(instr) ((instr)->prefixes & PREFIX_ADDRSZ)
#if defined(ARCH_X86_64)
#define INSTR_HAS_REX(instr) ((instr)->prefixes & PREFIX_REX)
#endif
int decode(const uint8_t* buffer, int len, Instr* out_instr);
void instr_format(const Instr* instr, char buffer[128]);
void instr_print(const Instr* instr) __attribute__((deprecated));
#endif

168
format.c Normal file
View File

@@ -0,0 +1,168 @@
#include <stddef.h>
#include <stdint.h>
#include <decode.h>
#define DECODE_TABLE_STRTAB1
static const char* _mnemonic_str =
#include <decode-table.inc>
;
#undef DECODE_TABLE_STRTAB1
#define DECODE_TABLE_STRTAB2
static const uint16_t _mnemonic_offs[] = {
#include <decode-table.inc>
};
#undef DECODE_TABLE_STRTAB2
static
void
instr_format_decimal(char** cur, uint32_t value)
{
char buffer[32];
size_t buf_idx = sizeof(buffer) - 1;
if (value == 0)
{
buffer[buf_idx] = '0';
}
else
{
while (value > 0)
{
uint32_t digit = value % 10;
buffer[buf_idx--] = '0' + digit;
value /= 10;
}
buf_idx++;
}
size_t length = sizeof(buffer) - buf_idx;
__builtin_memcpy(*cur, buffer + buf_idx, length);
*cur += length;
}
static
void
instr_format_hex(char** cur, size_t value)
{
char buffer[32];
size_t buf_idx = sizeof(buffer) - 1;
if (value == 0)
{
buffer[buf_idx] = '0';
}
else
{
while (value > 0)
{
uint32_t nibble = value & 0xf;
buffer[buf_idx--] = "0123456789abcdef"[nibble];
value >>= 4;
}
buf_idx++;
}
buffer[--buf_idx] = 'x';
buffer[--buf_idx] = '0';
size_t length = sizeof(buffer) - buf_idx;
__builtin_memcpy(*cur, buffer + buf_idx, length);
*cur += length;
}
void
instr_format(const Instr* instr, char buffer[128])
{
char* cur = buffer;
*(cur++) = '[';
const char* mnemonic = &_mnemonic_str[_mnemonic_offs[instr->type]];
while (*mnemonic)
{
*(cur++) = *(mnemonic++);
}
if (instr->width != 0)
{
*(cur++) = '_';
instr_format_decimal(&cur, instr->width);
}
for (int i = 0; i < 4; i++)
{
const struct Operand* operand = &instr->operands[i];
if (operand->type == OT_NONE)
{
break;
}
__builtin_memcpy(cur, " REG IMM MEM" + operand->type * 4 - 4, 4);
cur += 4;
instr_format_decimal(&cur, operand->size);
*(cur++) = ':';
switch (operand->type)
{
size_t immediate;
case OT_REG:
instr_format_decimal(&cur, reg_index(operand->reg));
break;
case OT_IMM:
immediate = instr->immediate;
if (operand->size == 1)
{
immediate &= 0xff;
}
else if (operand->size == 2)
{
immediate &= 0xffff;
}
#if defined(ARCH_X86_64)
else if (operand->size == 4)
{
immediate &= 0xffffffff;
}
#endif
instr_format_hex(&cur, immediate);
break;
case OT_MEM:
if (!reg_is_none(operand->reg))
{
instr_format_decimal(&cur, reg_index(operand->reg));
*(cur++) = ':';
}
if (instr->scale != 0)
{
uint8_t scale = 1 << (instr->scale - 1);
instr_format_decimal(&cur, scale);
*(cur++) = '*';
instr_format_decimal(&cur, reg_index(instr->sreg));
*(cur++) = ':';
}
if (instr->disp < 0)
{
*(cur++) = '-';
instr_format_hex(&cur, -instr->disp);
}
else
{
instr_format_hex(&cur, instr->disp);
}
break;
case OT_NONE:
default:
break;
}
}
*(cur++) = ']';
*(cur++) = '\0';
#ifndef NDEBUG
if (cur - buffer > 128)
{
__builtin_trap();
}
#endif
}

1177
instrs.txt Normal file

File diff suppressed because it is too large Load Diff

52
meson.build Normal file
View File

@@ -0,0 +1,52 @@
project('libx86decode', ['c'], default_options: ['warning_level=3', 'c_std=c99'])
python3 = find_program('python3')
if get_option('warning_level').to_int() >= 3
add_project_arguments(['-Wmissing-field-initializers',
'-Wunused-parameter',
'-Wold-style-definition',
'-Wmissing-declarations',
'-Wmissing-prototypes',
'-Wmissing-noreturn',
'-Wshadow',
'-Wpointer-arith',
'-Wcast-align',
'-Wwrite-strings',
'-Winline',
'-Wformat-nonliteral',
'-Wformat-security',
'-Wswitch-default',
'-Winit-self',
'-Wnested-externs',
'-Wstrict-prototypes',
'-Wmissing-include-dirs',
'-Wundef',
'-Waggregate-return',
'-Wredundant-decls',
'-Wno-overlength-strings',
'-Wmissing-format-attribute'],
language: 'c')
endif
c_compiler = meson.get_compiler('c')
pointer_size = c_compiler.sizeof('void*')
if pointer_size == 4
add_project_arguments(['-DARCH_386'], language: 'c')
elif pointer_size == 8
add_project_arguments(['-DARCH_X86_64'], language: 'c')
else
error('Invalid pointer size')
endif
instr_data = custom_target('tables',
command: [python3, '@INPUT0@', '@INPUT1@', '@OUTPUT@'],
input: files('parseinstrs.py', 'instrs.txt'),
output: ['decode-table.inc'])
libdecode = static_library('x86decode', 'decode.c', 'format.c', instr_data)
libx86decode = declare_dependency(link_with: libdecode,
include_directories: include_directories('.'),
sources: instr_data)
subdir('tests')

316
parseinstrs.py Normal file
View File

@@ -0,0 +1,316 @@
#!/usr/bin/python3
from binascii import unhexlify
from collections import OrderedDict, defaultdict
from copy import copy
from enum import Enum, IntEnum
from itertools import accumulate
import struct
import sys
def bitstruct(name, fields):
names, sizes = zip(*(field.split(":") for field in fields))
sizes = tuple(map(int, sizes))
offsets = (0,) + tuple(accumulate(sizes))
class __class:
def __init__(self, **kwargs):
for name in names:
setattr(self, name, kwargs.get(name, 0))
def _encode(self):
return sum((getattr(self, name) & ((1 << size) - 1)) << offset
for name, size, offset in zip(names, sizes, offsets))
__class.__name__ = name
__class._encode_size = offsets[-1]
return __class
InstrFlags = bitstruct("InstrFlags", [
"modrm_idx:2",
"modreg_idx:2",
"vexreg_idx:2",
"zeroreg_idx:2",
"operand_sizes:8",
"imm_idx:2",
"imm_size:2",
"imm_control:3",
"imm_byte:1",
"gp_size_8:1",
"gp_size_def64:1",
"gp_instr_width:1",
"gp_fixed_operand_size:3",
])
assert InstrFlags._encode_size <= 32
ENCODINGS = {
"NP": InstrFlags(),
"M": InstrFlags(modrm_idx=0^3),
"M1": InstrFlags(modrm_idx=0^3, imm_idx=1^3, imm_control=1),
"MI": InstrFlags(modrm_idx=0^3, imm_idx=1^3, imm_control=3),
"MR": InstrFlags(modrm_idx=0^3, modreg_idx=1^3),
"RM": InstrFlags(modrm_idx=1^3, modreg_idx=0^3),
"RMA": InstrFlags(modrm_idx=1^3, modreg_idx=0^3, zeroreg_idx=2^3),
"MRI": InstrFlags(modrm_idx=0^3, modreg_idx=1^3, imm_idx=2^3, imm_control=3),
"RMI": InstrFlags(modrm_idx=1^3, modreg_idx=0^3, imm_idx=2^3, imm_control=3),
"I": InstrFlags(imm_idx=0^3, imm_control=3),
"IA": InstrFlags(zeroreg_idx=0^3, imm_idx=1^3, imm_control=3),
"O": InstrFlags(modreg_idx=0^3),
"OI": InstrFlags(modreg_idx=0^3, imm_idx=1^3, imm_control=3),
"OA": InstrFlags(modreg_idx=0^3, zeroreg_idx=1^3),
"AO": InstrFlags(modreg_idx=1^3, zeroreg_idx=0^3),
"D": InstrFlags(imm_idx=0^3, imm_control=4),
"FD": InstrFlags(zeroreg_idx=0^3, imm_idx=1^3, imm_control=2),
"TD": InstrFlags(zeroreg_idx=1^3, imm_idx=0^3, imm_control=2),
"RVM": InstrFlags(modrm_idx=2^3, modreg_idx=0^3, vexreg_idx=1^3),
"RVMI": InstrFlags(modrm_idx=2^3, modreg_idx=0^3, vexreg_idx=1^3, imm_idx=3^3, imm_control=3, imm_byte=1),
"RVMR": InstrFlags(modrm_idx=2^3, modreg_idx=0^3, vexreg_idx=1^3, imm_idx=3^3, imm_control=5, imm_byte=1),
"RMV": InstrFlags(modrm_idx=1^3, modreg_idx=0^3, vexreg_idx=2^3),
"VM": InstrFlags(modrm_idx=1^3, vexreg_idx=0^3),
"VMI": InstrFlags(modrm_idx=1^3, vexreg_idx=0^3, imm_idx=2^3, imm_control=3, imm_byte=1),
"MVR": InstrFlags(modrm_idx=0^3, modreg_idx=2^3, vexreg_idx=1^3),
}
OPKIND_LOOKUP = {
"-": (0, 0),
"IMM": (2, 0),
"IMM8": (1, 0),
"IMM16": (1, 1),
"IMM32": (1, 2),
"GP": (2, 0),
"GP8": (1, 0),
"GP16": (1, 1),
"GP32": (1, 2),
"GP64": (1, 3),
"XMM": (3, 0),
"XMM8": (1, 0),
"XMM16": (1, 1),
"XMM32": (1, 2),
"XMM64": (1, 3),
"XMM128": (1, 4),
"XMM256": (1, 5),
"SREG": (0, 0),
"FPU": (0, 0),
}
def parse_desc(desc, ignore_flag):
desc = desc.split()
if ignore_flag in desc[6:]:
return None
fixed_opsz = set()
opsizes = 0
for i, opkind in enumerate(desc[1:5]):
enc_size, fixed_size = OPKIND_LOOKUP[opkind]
if enc_size == 1: fixed_opsz.add(fixed_size)
opsizes |= enc_size << 2 * i
flags = copy(ENCODINGS[desc[0]])
flags.operand_sizes = opsizes
if fixed_opsz: flags.gp_fixed_operand_size = next(iter(fixed_opsz))
# Miscellaneous Flags
if "DEF64" in desc[6:]: flags.gp_size_def64 = 1
if "SIZE_8" in desc[6:]: flags.gp_size_8 = 1
if "INSTR_WIDTH" in desc[6:]: flags.gp_instr_width = 1
if "IMM_8" in desc[6:]: flags.imm_byte = 1
return desc[5], flags._encode()
class EntryKind(Enum):
NONE = 0
INSTR = 1
TABLE256 = 2
TABLE8 = 3
TABLE72 = 4
TABLE_PREFIX = 5
@property
def table_length(self):
return {
EntryKind.INSTR: 0,
EntryKind.TABLE256: 256,
EntryKind.TABLE8: 8,
EntryKind.TABLE72: 72,
EntryKind.TABLE_PREFIX: 16
}[self]
import re
opcode_regex = re.compile(r"^(?P<prefixes>(?P<vex>VEX\.)?(?P<legacy>NP|66|F2|F3)\.(?P<rexw>W[01]\.)?(?P<vexl>L[01]\.)?)?(?P<opcode>(?:[0-9a-f]{2})+)(?P<modrm>//?[0-7]|//[c-f][0-9a-f])?(?P<extended>\+)?$")
def parse_opcode(opcode_string):
"""
Parse opcode string into list of type-index tuples.
"""
match = opcode_regex.match(opcode_string)
if match is None:
raise Exception("invalid opcode: '%s'" % opcode_string)
extended = match.group("extended") is not None
opcode = [(EntryKind.TABLE256, x) for x in unhexlify(match.group("opcode"))]
opcext = match.group("modrm")
if opcext:
if opcext[1] == "/":
opcext = int(opcext[2:], 16)
assert (0 <= opcext <= 7) or (0xc0 <= opcext <= 0xff)
if opcext >= 0xc0:
opcext -= 0xb8
opcode.append((EntryKind.TABLE72, opcext))
else:
opcode.append((EntryKind.TABLE8, int(opcext[1:], 16)))
if match.group("prefixes"):
assert not extended
legacy = {"NP": 0, "66": 1, "F3": 2, "F2": 3}[match.group("legacy")]
entry = legacy | ((1 << 3) if match.group("vex") else 0)
if match.group("vexl"):
print("ignored mandatory VEX.L prefix for:", opcode_string)
rexw = match.group("rexw")
if not rexw:
return [tuple(opcode) + ((EntryKind.TABLE_PREFIX, entry),),
tuple(opcode) + ((EntryKind.TABLE_PREFIX, entry | (1 << 2)),)]
entry |= (1 << 2) if "W1" in rexw else 0
return [tuple(opcode) + ((EntryKind.TABLE_PREFIX, entry),)]
if not extended:
return [tuple(opcode)]
last_type, last_index = opcode[-1]
assert last_type in (EntryKind.TABLE256, EntryKind.TABLE72)
assert last_index & 7 == 0
common_prefix = tuple(opcode[:-1])
return [common_prefix + ((last_type, last_index + i),) for i in range(8)]
class Table:
def __init__(self):
self.data = OrderedDict()
self.data["root"] = (EntryKind.TABLE256, [None] * 256)
self.mnemonics = set()
self.instrs = {}
def compile(self):
mnemonics = sorted(list(self.mnemonics))
offsets = {}
currentOffset = 0
stats = defaultdict(int)
for name, (kind, _) in self.data.items():
offsets[name] = currentOffset
stats[kind] += 1
if kind.table_length:
currentOffset += kind.table_length * 2
else:
currentOffset += 6
currentOffset = (currentOffset + 7) & ~7
assert currentOffset < 0x10000
data = b""
for name, (kind, value) in self.data.items():
if len(data) < offsets[name]:
data += b"\0" * (offsets[name] - len(data))
assert len(data) == offsets[name]
if kind == EntryKind.INSTR:
mnemonicIdx = mnemonics.index(value[0])
data += struct.pack("<HL", mnemonicIdx, value[1])
else: # Table
# count = sum(1 for x in value if x is not None)
# print("Table of kind", kind, "with %d/%d entries"%(count, kind.table_length))
for i, entry in enumerate(value):
if entry is not None:
targetKind, _ = self.data[entry]
value = (offsets[entry] & ~7) | targetKind.value
else:
value = 0
data += struct.pack("<H", value)
print("%d bytes, %d mnemonics"%(len(data),len(mnemonics)), stats)
return data, mnemonics
def add_opcode(self, opcode, instrData):
opcode = list(opcode) + [(None, None)]
opcode = [(opcode[i+1][0], opcode[i][1]) for i in range(len(opcode)-1)]
name, table = ">", self.data["root"]
for kind, byte in opcode[:-1]:
if table[1][byte] is None:
name += "_{:02x}".format(byte)
self.data[name] = kind, [None] * kind.table_length
table[1][byte] = name
else:
name = table[1][byte]
table = self.data[name]
assert table[0] == kind
# An opcode can occur once only.
assert table[1][opcode[-1][1]] is None
if instrData in self.instrs:
table[1][opcode[-1][1]] = self.instrs[instrData]
else:
name += "_l{:02x}".format(opcode[-1][1])
table[1][opcode[-1][1]] = name
self.mnemonics.add(instrData[0])
self.data[name] = EntryKind.INSTR, instrData
self.instrs[instrData] = name
def generate_cpp_table(table):
compiled, mnemonics = table.compile()
hexdata = ",".join("0x{:02x}".format(byte) for byte in compiled)
compiled_hex = "\n".join(hexdata[i:i+80] for i in range(0, len(hexdata), 80))
mnemonic_tab = [0]
for name in mnemonics:
mnemonic_tab.append(mnemonic_tab[-1] + len(name) + 1)
mnemonic_cstr = '"' + "\\0".join(mnemonics) + '"'
file = ""
file += "#if defined(DECODE_TABLE_DATA)\n"
file += compiled_hex + "\n"
file += "#elif defined(DECODE_TABLE_MNEMONICS)\n"
for value, name in enumerate(mnemonics):
file += "MNEMONIC({}, {})\n".format(name, value)
file += "#elif defined(DECODE_TABLE_STRTAB1)\n"
file += mnemonic_cstr + "\n"
file += "#elif defined(DECODE_TABLE_STRTAB2)\n"
file += ",".join(str(off) for off in mnemonic_tab) + "\n"
file += "#else\n"
file += "#error \"unspecified decode table\"\n"
file += "#endif\n"
return file
if __name__ == "__main__":
entries = defaultdict(list)
with open(sys.argv[1], "r") as f:
for line in f.read().splitlines():
if line and line[0] != "#":
opcode_string, desc = tuple(line.split(maxsplit=1))
for opcode in parse_opcode(opcode_string):
entries[opcode].append(desc)
table32 = Table()
table64 = Table()
masks = "ONLY64", "ONLY32"
for opcode, descs in entries.items():
for table, ignore_mask in zip((table32, table64), masks):
parsed = [parse_desc(desc, ignore_mask) for desc in descs]
parsed = [desc for desc in parsed if desc is not None]
assert len(parsed) <= 1
if parsed:
table.add_opcode(opcode, parsed[0])
tableFile2 = ""
tableFile2 += "#if defined(ARCH_386)\n"
tableFile2 += generate_cpp_table(table32)
tableFile2 += "#elif defined(ARCH_X86_64)\n"
tableFile2 += generate_cpp_table(table64)
tableFile2 += "#else\n"
tableFile2 += "#error \"unknown architecture\"\n"
tableFile2 += "#endif\n"
with open(sys.argv[2], "w") as f:
f.write(tableFile2)

31
tests/common.sh Normal file
View File

@@ -0,0 +1,31 @@
driver=$1
bits=$2
failed=0
total=0
decode() {
output=$($driver $1)
result=$?
total=$((total+1))
if [ $result -ne 0 ] || [ "$output" != "$2" ]
then
failed=$((failed+1))
echo "FAIL: decode $@"
echo "======================================="
echo "$output"
echo "======================================="
fi
}
decode32() { if [ $bits = 32 ]; then decode "$@"; fi }
decode64() { if [ $bits = 64 ]; then decode "$@"; fi }
. $3
if [ $failed -ne 0 ]
then
echo "FAILED: ${failed}/${total} cases"
exit 1
else
echo "PASS: ${total} cases passed"
fi

9
tests/decode-enter.sh Normal file
View File

@@ -0,0 +1,9 @@
decode 66c8000000 "[ENTER_2 IMM4:0x0]"
decode 66c8000f00 "[ENTER_2 IMM4:0xf00]"
decode 66c8000001 "[ENTER_2 IMM4:0x10000]"
decode32 c8000000 "[ENTER_4 IMM4:0x0]"
decode32 c8000f00 "[ENTER_4 IMM4:0xf00]"
decode32 c8000001 "[ENTER_4 IMM4:0x10000]"
decode64 c8000000 "[ENTER_8 IMM4:0x0]"
decode64 c8000f00 "[ENTER_8 IMM4:0xf00]"
decode64 c8000001 "[ENTER_8 IMM4:0x10000]"

2
tests/decode-imul.sh Normal file
View File

@@ -0,0 +1,2 @@
decode 69C708010000 "[IMUL3 REG4:0 REG4:7 IMM4:0x108]"
decode 6BC708 "[IMUL3 REG4:0 REG4:7 IMM4:0x8]"

12
tests/decode-inc.sh Normal file
View File

@@ -0,0 +1,12 @@
decode32 40 "[INC REG4:0]"
decode32 43 "[INC REG4:3]"
decode32 6647 "[INC REG2:7]"
decode fec0 "[INC REG1:0]"
decode fec4 "[INC REG1:4]"
decode ffc0 "[INC REG4:0]"
decode ffc4 "[INC REG4:4]"
decode 66ffc0 "[INC REG2:0]"
decode 66ffc4 "[INC REG2:4]"
decode64 48ffc0 "[INC REG8:0]"
decode64 48ffc4 "[INC REG8:4]"
decode64 49ffc7 "[INC REG8:15]"

5
tests/decode-movsx.sh Normal file
View File

@@ -0,0 +1,5 @@
decode 660fbec2 "[MOVSX REG2:0 REG1:2]"
decode 0fbec2 "[MOVSX REG4:0 REG1:2]"
decode 0fbfc2 "[MOVSX REG4:0 REG2:2]"
decode64 480fbfc2 "[MOVSX REG8:0 REG2:2]"
decode64 4863c2 "[MOVSX REG8:0 REG4:2]"

12
tests/decode-ret.sh Normal file
View File

@@ -0,0 +1,12 @@
decode 66c3 "[RET_2]"
decode 66c20000 "[RET_IMM_2 IMM2:0x0]"
decode 66c20d00 "[RET_IMM_2 IMM2:0xd]"
decode 66c20dff "[RET_IMM_2 IMM2:0xff0d]"
decode32 c3 "[RET_4]"
decode32 c20000 "[RET_IMM_4 IMM2:0x0]"
decode32 c20d00 "[RET_IMM_4 IMM2:0xd]"
decode32 c20dff "[RET_IMM_4 IMM2:0xff0d]"
decode64 c3 "[RET_8]"
decode64 c20000 "[RET_IMM_8 IMM2:0x0]"
decode64 c20d00 "[RET_IMM_8 IMM2:0xd]"
decode64 c20dff "[RET_IMM_8 IMM2:0xff0d]"

1
tests/decode-sse-movq.sh Normal file
View File

@@ -0,0 +1 @@
decode f30f7e5c2408 "[SSE_MOVQ_X2X REG8:3 MEM8:4:0x8]"

View File

@@ -0,0 +1 @@
decode 660fc6c001 "[SSE_SHUFPD REG16:0 REG16:0 IMM1:0x1]"

72
tests/driver.c Normal file
View File

@@ -0,0 +1,72 @@
#include <stdio.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <decode.h>
static
uint8_t
parse_nibble(const char nibble)
{
if (nibble >= '0' && nibble <= '9')
{
return nibble - '0';
}
else if (nibble >= 'a' && nibble <= 'f')
{
return nibble - 'a' + 10;
}
else if (nibble >= 'A' && nibble <= 'F')
{
return nibble - 'A' + 10;
}
else
{
printf("Invalid hexadecimal number: %x\n", nibble);
exit(1);
return 0;
}
}
int
main(int argc, char** argv)
{
if (argc != 2)
{
printf("usage: %s [instruction bytes]\n", argv[0]);
return -1;
}
void* code = mmap((void*) 0x1238000, 0x2000, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS|MAP_FIXED, -1, 0);
uint8_t* current_code = code;
char* hex = argv[1];
for (; *hex && *(hex + 1); hex += 2, current_code++)
{
*current_code = (parse_nibble(hex[0]) << 4) | parse_nibble(hex[1]);
}
size_t length = (size_t) current_code - (size_t) code;
Instr instr;
int result = decode(code, length, &instr);
if (result < 0)
{
puts("Decode failed.");
return -1;
}
else if ((size_t) result != length)
{
printf("Decode used %u bytes, not %u.\n", (unsigned int) result, (unsigned int) length);
return -1;
}
char buffer[128];
instr_format(&instr, buffer);
puts(buffer);
return 0;
}

22
tests/meson.build Normal file
View File

@@ -0,0 +1,22 @@
sh = find_program('sh')
cases = [
['enter', 'decode-enter.sh'],
['imul', 'decode-imul.sh'],
['inc', 'decode-inc.sh'],
['movsx', 'decode-movsx.sh'],
['ret', 'decode-ret.sh'],
['sse-shufpd', 'decode-sse-shufpd.sh'],
['sse-movq', 'decode-sse-movq.sh'],
]
test_driver = executable('test_driver', 'driver.c',
dependencies: libx86decode,
c_args: ['-D_GNU_SOURCE'])
test_args = files('common.sh') + [test_driver.full_path(), '@0@'.format(pointer_size * 8)]
foreach case : cases
test(case[0], sh, args: test_args + files(case[1]))
endforeach