Files
test-repo/format.c
Alexis Engelke 9245a97248 instrs: Add several AMD-only instructions
- 3DNow! instructions have a trailing immediate byte which indicates the
  opcode. Decoding this with the existing table structure requires more
  effort (in particular, a new lookup table after decoding ModRM would
  be required). Given that AMD even removed 3DNow! over 10 years ago, it
  appears unlikely that this will ever be fully supported. Adding the
  RMI-encoded pseudo-instruction "3DNOW" just to support that opcode.
- FEMMS is a legacy 3DNow! instruction.
- EXTRQ/INSERTQ are instructions with an "unusual" encoding and
  operation mode. This is another instance of 16-bit immediates.
- SVM (AMD's variant of VMX) and SNP instructions are AMD-only.
2021-01-10 15:18:44 +01:00

383 lines
12 KiB
C

#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <fadec.h>
static const char*
reg_name(unsigned rt, unsigned ri, unsigned size)
{
unsigned max;
const char (* table)[6];
#define TABLE(name, ...) \
case name: { \
static const char tab[][6] = { __VA_ARGS__ }; \
table = tab; max = sizeof(tab) / sizeof(*tab); \
break; \
}
switch (rt) {
default: return "(inv-ty)";
case FD_RT_GPL:
switch (size) {
default: return "(inv-sz)";
TABLE(1,
"al","cl","dl","bl","spl","bpl","sil","dil",
"r8b","r9b","r10b","r11b","r12b","r13b","r14b","r15b")
TABLE(2,
"ax","cx","dx","bx","sp","bp","si","di",
"r8w","r9w","r10w","r11w","r12w","r13w","r14w","r15w","ip")
TABLE(4,
"eax","ecx","edx","ebx","esp","ebp","esi","edi",
"r8d","r9d","r10d","r11d","r12d","r13d","r14d","r15d","eip")
TABLE(8,
"rax","rcx","rdx","rbx","rsp","rbp","rsi","rdi",
"r8","r9","r10","r11","r12","r13","r14","r15","rip")
}
break;
TABLE(FD_RT_GPH, "(inv)","(inv)","(inv)","(inv)","ah","ch","dh","bh")
TABLE(FD_RT_SEG, "es","cs","ss","ds","fs","gs")
TABLE(FD_RT_FPU, "st(0)","st(1)","st(2)","st(3)","st(4)","st(5)","st(6)","st(7)")
TABLE(FD_RT_MMX, "mm0","mm1","mm2","mm3","mm4","mm5","mm6","mm7")
TABLE(FD_RT_CR, "cr0","(inv)","cr2","cr3","cr4","(inv)","(inv)","(inv)","cr8")
TABLE(FD_RT_DR, "dr0","dr1","dr2","dr3","dr4","dr5","dr6","dr7")
case FD_RT_VEC:
switch (size) {
default: return "(inv-sz)";
case 1:
case 2:
case 4:
case 8:
TABLE(16,
"xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7",
"xmm8","xmm9","xmm10","xmm11","xmm12","xmm13","xmm14","xmm15")
TABLE(32,
"ymm0","ymm1","ymm2","ymm3","ymm4","ymm5","ymm6","ymm7",
"ymm8","ymm9","ymm10","ymm11","ymm12","ymm13","ymm14","ymm15")
}
break;
}
return ri < max ? table[ri] : "(inv-idx)";
}
#define FD_DECODE_TABLE_STRTAB1
static const char* _mnemonic_str =
#include <fadec-table.inc>
;
#undef FD_DECODE_TABLE_STRTAB1
#define FD_DECODE_TABLE_STRTAB2
static const uint16_t _mnemonic_offs[] = {
#include <fadec-table.inc>
};
#undef FD_DECODE_TABLE_STRTAB2
static char*
fd_strplcpy(char* dst, const char* src, size_t size)
{
while (*src && size > 1)
*dst++ = *src++, size--;
if (size)
*dst = 0;
return dst;
}
static char*
fd_format_hex(uint64_t val, char buf[static 17]) {
unsigned idx = 17;
buf[--idx] = 0;
do {
buf[--idx] = "0123456789abcdef"[val % 16];
val /= 16;
} while (val);
return &buf[idx];
}
const char*
fdi_name(FdInstrType ty) {
if (ty >= sizeof(_mnemonic_offs) / sizeof(_mnemonic_offs[0]))
return "(invalid)";
return &_mnemonic_str[_mnemonic_offs[ty]];
}
void
fd_format(const FdInstr* instr, char* buffer, size_t len)
{
fd_format_abs(instr, 0, buffer, len);
}
void
fd_format_abs(const FdInstr* instr, uint64_t addr, char* buffer, size_t len)
{
char tmp[21];
char* buf = buffer;
char* end = buffer + len;
if (FD_HAS_REP(instr))
buf = fd_strplcpy(buf, "rep ", end-buf);
if (FD_HAS_REPNZ(instr))
buf = fd_strplcpy(buf, "repnz ", end-buf);
if (FD_HAS_LOCK(instr))
buf = fd_strplcpy(buf, "lock ", end-buf);
const char* mnemonic = fdi_name(FD_TYPE(instr));
bool prefix_addrsize = false;
bool prefix_segment = false;
char sizesuffix[3] = {0, 0, 0};
switch (FD_OPSIZE(instr)) {
default: break;
case 1: sizesuffix[0] = 'b'; break;
case 2: sizesuffix[0] = 'w'; break;
case 4: sizesuffix[0] = 'd'; break;
case 8: sizesuffix[0] = 'q'; break;
}
if (FD_OP_TYPE(instr, 0) == FD_OT_OFF && FD_OP_SIZE(instr, 0) == 2)
sizesuffix[0] = 'w';
switch (FD_TYPE(instr)) {
case FDI_C_SEP:
switch (FD_OPSIZE(instr)) {
default: break;
case 2: mnemonic = "cwd"; break;
case 4: mnemonic = "cdq"; break;
case 8: mnemonic = "cqo"; break;
}
sizesuffix[0] = 0;
break;
case FDI_C_EX:
switch (FD_OPSIZE(instr)) {
default: break;
case 2: mnemonic = "cbw"; break;
case 4: mnemonic = "cwde"; break;
case 8: mnemonic = "cdqe"; break;
}
sizesuffix[0] = 0;
break;
case FDI_CMPXCHGD:
switch (FD_OPSIZE(instr)) {
default: break;
case 4: mnemonic = "cmpxchg8b"; break;
case 8: mnemonic = "cmpxchg16b"; break;
}
sizesuffix[0] = 0;
break;
case FDI_JCXZ:
switch (FD_ADDRSIZE(instr)) {
default: break;
case 4: mnemonic = "jecxz"; break;
case 8: mnemonic = "jrcxz"; break;
}
break;
case FDI_PUSH:
if (FD_OP_SIZE(instr, 0) == 2 && FD_OP_TYPE(instr, 0) == FD_OT_IMM)
sizesuffix[0] = 'w';
// FALLTHROUGH
case FDI_POP:
if (FD_OP_SIZE(instr, 0) == 2 && FD_OP_TYPE(instr, 0) == FD_OT_REG &&
FD_OP_REG_TYPE(instr, 0) == FD_RT_SEG)
sizesuffix[0] = 'w';
break;
case FDI_FXSAVE:
case FDI_FXRSTOR:
case FDI_XSAVE:
case FDI_XSAVEC:
case FDI_XSAVEOPT:
case FDI_XSAVES:
case FDI_XRSTOR:
case FDI_XRSTORS:
if (FD_OPSIZE(instr) == 8)
sizesuffix[0] = '6', sizesuffix[1] = '4';
else
sizesuffix[0] = 0;
break;
case FDI_RET:
case FDI_ENTER:
case FDI_LEAVE:
if (FD_OPSIZE(instr) == (FD_IS64(instr) ? 8 : 4))
sizesuffix[0] = 0;
break;
case FDI_LODS:
case FDI_MOVS:
case FDI_CMPS:
case FDI_OUTS:
prefix_segment = true;
// FALLTHROUGH
case FDI_STOS:
case FDI_SCAS:
case FDI_INS:
prefix_addrsize = true;
break;
default: break;
}
if (prefix_addrsize) {
if (FD_IS64(instr) && FD_ADDRSIZE(instr) == 4)
buf = fd_strplcpy(buf, "addr32 ", end-buf);
if (!FD_IS64(instr) && FD_ADDRSIZE(instr) == 2)
buf = fd_strplcpy(buf, "addr16 ", end-buf);
}
if (prefix_segment && FD_SEGMENT(instr) != FD_REG_NONE) {
buf = fd_strplcpy(buf, reg_name(FD_RT_SEG, FD_SEGMENT(instr), 2), end-buf);
buf = fd_strplcpy(buf, " ", end-buf);
}
buf = fd_strplcpy(buf, mnemonic, end-buf);
buf = fd_strplcpy(buf, sizesuffix, end-buf);
for (int i = 0; i < 4; i++)
{
FdOpType op_type = FD_OP_TYPE(instr, i);
if (op_type == FD_OT_NONE)
break;
buf = fd_strplcpy(buf, ", " + (i == 0), end-buf);
unsigned size = FD_OP_SIZE(instr, i);
if (op_type == FD_OT_REG) {
unsigned type = FD_OP_REG_TYPE(instr, i);
unsigned idx = FD_OP_REG(instr, i);
buf = fd_strplcpy(buf, reg_name(type, idx, size), end-buf);
} else if (op_type == FD_OT_MEM) {
unsigned idx_rt = FD_RT_GPL;
unsigned idx_sz = FD_ADDRSIZE(instr);
switch (FD_TYPE(instr)) {
case FDI_CMPXCHGD: size = 2 * FD_OPSIZE(instr); break;
case FDI_BOUND: size = 2 * size; break;
case FDI_JMPF:
case FDI_CALLF:
case FDI_LDS:
case FDI_LES:
case FDI_LFS:
case FDI_LGS:
case FDI_LSS:
size += 2;
break;
case FDI_FLD:
case FDI_FSTP:
case FDI_FBLD:
case FDI_FBSTP:
size = size != 0 ? size : 10;
break;
case FDI_VPGATHERQD:
case FDI_VGATHERQPS:
idx_rt = FD_RT_VEC;
idx_sz = FD_OP_SIZE(instr, 0) * 2;
break;
case FDI_VPGATHERDQ:
case FDI_VGATHERDPD:
idx_rt = FD_RT_VEC;
idx_sz = FD_OP_SIZE(instr, 0) / 2;
break;
case FDI_VPGATHERDD:
case FDI_VPGATHERQQ:
case FDI_VGATHERDPS:
case FDI_VGATHERQPD:
idx_rt = FD_RT_VEC;
idx_sz = FD_OP_SIZE(instr, 0);
break;
default: break;
}
switch (size) {
default: break;
case 1: buf = fd_strplcpy(buf, "byte ptr ", end-buf); break;
case 2: buf = fd_strplcpy(buf, "word ptr ", end-buf); break;
case 4: buf = fd_strplcpy(buf, "dword ptr ", end-buf); break;
case 6: buf = fd_strplcpy(buf, "fword ptr ", end-buf); break;
case 8: buf = fd_strplcpy(buf, "qword ptr ", end-buf); break;
case 10: buf = fd_strplcpy(buf, "tbyte ptr ", end-buf); break;
case 16: buf = fd_strplcpy(buf, "xmmword ptr ", end-buf); break;
case 32: buf = fd_strplcpy(buf, "ymmword ptr ", end-buf); break;
case 64: buf = fd_strplcpy(buf, "zmmword ptr ", end-buf); break;
}
unsigned seg = FD_SEGMENT(instr);
if (seg != FD_REG_NONE) {
buf = fd_strplcpy(buf, reg_name(FD_RT_SEG, seg, 2), end-buf);
buf = fd_strplcpy(buf, ":", end-buf);
}
buf = fd_strplcpy(buf, "[", end-buf);
bool has_base = FD_OP_BASE(instr, i) != FD_REG_NONE;
bool has_idx = FD_OP_INDEX(instr, i) != FD_REG_NONE;
if (has_base)
buf = fd_strplcpy(buf, reg_name(FD_RT_GPL, FD_OP_BASE(instr, i), FD_ADDRSIZE(instr)), end-buf);
if (has_idx) {
if (has_base)
buf = fd_strplcpy(buf, "+", end-buf);
buf = fd_strplcpy(buf, "1*\0002*\0004*\0008*" + 3*FD_OP_SCALE(instr, i), end-buf);
buf = fd_strplcpy(buf, reg_name(idx_rt, FD_OP_INDEX(instr, i), idx_sz), end-buf);
}
uint64_t disp = FD_OP_DISP(instr, i);
if (disp && (has_base || has_idx)) {
buf = fd_strplcpy(buf, (int64_t) disp < 0 ? "-" : "+", end-buf);
if ((int64_t) disp < 0)
disp = -disp;
}
if (FD_ADDRSIZE(instr) == 2)
disp &= 0xffff;
else if (FD_ADDRSIZE(instr) == 4)
disp &= 0xffffffff;
if (disp || (!has_base && !has_idx)) {
char* fmt = fd_format_hex(disp, tmp + 2);
*--fmt = 'x';
*--fmt = '0';
buf = fd_strplcpy(buf, fmt, end-buf);
}
buf = fd_strplcpy(buf, "]", end-buf);
} else if (op_type == FD_OT_IMM || op_type == FD_OT_OFF) {
size_t immediate = FD_OP_IMM(instr, i);
// Some instructions have actually two immediate operands which are
// decoded as a single operand. Split them here appropriately.
size_t splitimm = 0;
const char* splitsep = ", ";
switch (FD_TYPE(instr)) {
default:
goto nosplitimm;
case FDI_SSE_EXTRQ:
case FDI_SSE_INSERTQ:
splitimm = immediate & 0xff;
immediate = (immediate >> 8) & 0xff;
break;
case FDI_ENTER:
splitimm = immediate & 0xffff;
immediate = (immediate >> 16) & 0xff;
break;
case FDI_JMPF:
case FDI_CALLF:
splitsep = ":";
splitimm = (immediate >> 8*size) & 0xffff;
// immediate is masked below.
break;
}
char* fmt = fd_format_hex(splitimm, tmp + 2);
*--fmt = 'x';
*--fmt = '0';
buf = fd_strplcpy(buf, fmt, end-buf);
buf = fd_strplcpy(buf, splitsep, end-buf);
nosplitimm:
if (op_type == FD_OT_OFF)
immediate += addr + FD_SIZE(instr);
if (size == 1)
immediate &= 0xff;
else if (size == 2)
immediate &= 0xffff;
else if (size == 4)
immediate &= 0xffffffff;
fmt = fd_format_hex(immediate, tmp + 2);
*--fmt = 'x';
*--fmt = '0';
buf = fd_strplcpy(buf, fmt, end-buf);
}
}
}