From 8fdfe5382264618c9622f8032140df2ee2ce32fa Mon Sep 17 00:00:00 2001 From: Alexis Engelke Date: Sun, 13 Jan 2019 20:38:15 +0100 Subject: [PATCH] Output more available information in formatter The formatter now includes the following information: - Segment overrides - Address-size overrides - REP/REPNZ prefixes - LOCK prefix - High-byte registers (determined using presence of REX prefix) --- format.c | 195 +++++++++++++------------------------ tests/benchmarks.txt | 20 ++-- tests/decode-enter.sh | 18 ++-- tests/decode-imul.sh | 4 +- tests/decode-inc.sh | 26 ++--- tests/decode-movsx.sh | 10 +- tests/decode-ret.sh | 24 ++--- tests/decode-sse-movq.sh | 2 +- tests/decode-sse-shufpd.sh | 2 +- 9 files changed, 121 insertions(+), 180 deletions(-) diff --git a/format.c b/format.c index 9838dad..2ceee57 100644 --- a/format.c +++ b/format.c @@ -1,6 +1,7 @@ #include #include +#include #include @@ -17,152 +18,90 @@ static const uint16_t _mnemonic_offs[] = { }; #undef DECODE_TABLE_STRTAB2 -static -void -instr_format_decimal(char** cur, uint32_t value) -{ - char buffer[32]; - size_t buf_idx = sizeof(buffer) - 1; - if (value == 0) - { - buffer[buf_idx] = '0'; - } - else - { - while (value > 0) - { - uint32_t digit = value % 10; - buffer[buf_idx--] = '0' + digit; - value /= 10; - } - buf_idx++; - } - - size_t length = sizeof(buffer) - buf_idx; - __builtin_memcpy(*cur, buffer + buf_idx, length); - *cur += length; -} - -static -void -instr_format_hex(char** cur, size_t value) -{ - char buffer[32]; - size_t buf_idx = sizeof(buffer) - 1; - if (value == 0) - { - buffer[buf_idx] = '0'; - } - else - { - while (value > 0) - { - uint32_t nibble = value & 0xf; - buffer[buf_idx--] = "0123456789abcdef"[nibble]; - value >>= 4; - } - buf_idx++; - } - buffer[--buf_idx] = 'x'; - buffer[--buf_idx] = '0'; - - size_t length = sizeof(buffer) - buf_idx; - __builtin_memcpy(*cur, buffer + buf_idx, length); - *cur += length; -} +#define FMT_CONCAT(buf, end, ...) do { \ + buf += snprintf(buf, end - buf, __VA_ARGS__); \ + if (buf > end) \ + buf = end; \ + } while (0) void instr_format(const Instr* instr, char buffer[128]) { - char* cur = buffer; - *(cur++) = '['; + char* buf = buffer; + char* end = buffer + 128; - const char* mnemonic = &_mnemonic_str[_mnemonic_offs[instr->type]]; - while (*mnemonic) - { - *(cur++) = *(mnemonic++); - } + FMT_CONCAT(buf, end, "["); + if (INSTR_HAS_REP(instr)) + FMT_CONCAT(buf, end, "rep:"); + if (INSTR_HAS_REPNZ(instr)) + FMT_CONCAT(buf, end, "repnz:"); + if (INSTR_SEGMENT(instr) < 6) + FMT_CONCAT(buf, end, "%cs:", "ecsdfg"[INSTR_SEGMENT(instr)]); + if (INSTR_IS64(instr) && INSTR_ADDRSZ(instr) == 4) + FMT_CONCAT(buf, end, "addr32:"); + if (!INSTR_IS64(instr) && INSTR_ADDRSZ(instr) == 2) + FMT_CONCAT(buf, end, "addr16:"); + if (INSTR_HAS_LOCK(instr)) + FMT_CONCAT(buf, end, "lock:"); + FMT_CONCAT(buf, end, "%s", &_mnemonic_str[_mnemonic_offs[instr->type]]); if (INSTR_WIDTH(instr)) - { - *(cur++) = '_'; - instr_format_decimal(&cur, INSTR_WIDTH(instr)); - } + FMT_CONCAT(buf, end, "_%u", INSTR_WIDTH(instr)); for (int i = 0; i < 4; i++) { const struct Operand* operand = &instr->operands[i]; if (operand->type == OT_NONE) - { break; - } - __builtin_memcpy(cur, " REG IMM MEM" + operand->type * 4 - 4, 4); - cur += 4; - instr_format_decimal(&cur, operand->size); - *(cur++) = ':'; + const char* op_type_name = "reg\0imm\0mem" + operand->type * 4 - 4; + FMT_CONCAT(buf, end, " %s%u:", op_type_name, operand->size); switch (operand->type) { - size_t immediate; - case OT_REG: - instr_format_decimal(&cur, reg_index(operand->reg)); - break; - case OT_IMM: - immediate = instr->immediate; - if (operand->size == 1) - { - immediate &= 0xff; - } - else if (operand->size == 2) - { - immediate &= 0xffff; - } -#if defined(ARCH_X86_64) - else if (operand->size == 4) - { - immediate &= 0xffffffff; - } -#endif - instr_format_hex(&cur, immediate); - break; - case OT_MEM: - if (!reg_is_none(operand->reg)) - { - instr_format_decimal(&cur, reg_index(operand->reg)); - *(cur++) = ':'; - } - if (instr->scale != 0) - { - uint8_t scale = 1 << (instr->scale - 1); - instr_format_decimal(&cur, scale); - *(cur++) = '*'; - instr_format_decimal(&cur, reg_index(instr->sreg)); - *(cur++) = ':'; - } - if (instr->disp < 0) - { - *(cur++) = '-'; - instr_format_hex(&cur, -instr->disp); - } - else - { - instr_format_hex(&cur, instr->disp); - } - break; - case OT_NONE: - default: - break; + size_t immediate; + case OT_REG: + if (operand->size == 1 && !INSTR_HAS_REX(instr) && + operand->reg >= 4 && operand->reg < 8) + FMT_CONCAT(buf, end, "r%uh", operand->reg - 4); + else + FMT_CONCAT(buf, end, "r%u", operand->reg); + break; + case OT_IMM: + immediate = instr->immediate; + if (operand->size == 1) + immediate &= 0xff; + else if (operand->size == 2) + immediate &= 0xffff; + else if (operand->size == 4) + immediate &= 0xffffffff; + FMT_CONCAT(buf, end, "0x%lx", immediate); + break; + case OT_MEM: + if (!reg_is_none(operand->reg)) + { + FMT_CONCAT(buf, end, "r%u", operand->reg); + if (instr->scale != 0 || instr->disp > 0) + FMT_CONCAT(buf, end, "+"); + } + if (instr->scale != 0) + { + FMT_CONCAT(buf, end, "%u*r%u", 1 << (instr->scale - 1), + instr->sreg); + if (instr->disp > 0) + FMT_CONCAT(buf, end, "+"); + } + if (instr->disp < 0) + FMT_CONCAT(buf, end, "-0x%lx", -instr->disp); + else if ((reg_is_none(operand->reg) && instr->scale == 0) || + instr->disp > 0) + FMT_CONCAT(buf, end, "0x%lx", instr->disp); + break; + case OT_NONE: + default: + break; } } - *(cur++) = ']'; - *(cur++) = '\0'; - -#ifndef NDEBUG - if (cur - buffer > 128) - { - __builtin_trap(); - } -#endif + FMT_CONCAT(buf, end, "]"); } diff --git a/tests/benchmarks.txt b/tests/benchmarks.txt index ff5f040..5e422f2 100644 --- a/tests/benchmarks.txt +++ b/tests/benchmarks.txt @@ -1,13 +1,13 @@ decode 90 [NOP] -decode 0fcd [BSWAP REG4:5] -decode 660fcd [BSWAP REG2:5] -decode 6650 [PUSH REG2:0] +decode 0fcd [BSWAP reg4:r5] +decode 660fcd [BSWAP reg2:r5] +decode 6650 [PUSH reg2:r0] decode a5 [MOVS_4] -decode f3a5 [MOVS_4] +decode f3a5 [rep:MOVS_4] decode 66a5 [MOVS_2] -decode f366a5 [MOVS_2] -decode f7d7 [NOT REG4:7] -decode f717 [NOT MEM4:7:0x0] -decode f7142f [NOT MEM4:7:1*5:0x0] -decode f7542f12 [NOT MEM4:7:1*5:0x12] -decode f7942f34120000 [NOT MEM4:7:1*5:0x1234] +decode f366a5 [rep:MOVS_2] +decode f7d7 [NOT reg4:r7] +decode f717 [NOT mem4:r7] +decode f7142f [NOT mem4:r7+1*r5] +decode f7542f12 [NOT mem4:r7+1*r5+0x12] +decode f7942f34120000 [NOT mem4:r7+1*r5+0x1234] diff --git a/tests/decode-enter.sh b/tests/decode-enter.sh index 5800c5e..cca995e 100644 --- a/tests/decode-enter.sh +++ b/tests/decode-enter.sh @@ -1,9 +1,9 @@ -decode 66c8000000 "[ENTER_2 IMM4:0x0]" -decode 66c8000f00 "[ENTER_2 IMM4:0xf00]" -decode 66c8000001 "[ENTER_2 IMM4:0x10000]" -decode32 c8000000 "[ENTER_4 IMM4:0x0]" -decode32 c8000f00 "[ENTER_4 IMM4:0xf00]" -decode32 c8000001 "[ENTER_4 IMM4:0x10000]" -decode64 c8000000 "[ENTER_8 IMM4:0x0]" -decode64 c8000f00 "[ENTER_8 IMM4:0xf00]" -decode64 c8000001 "[ENTER_8 IMM4:0x10000]" +decode 66c8000000 [ENTER_2 imm4:0x0] +decode 66c8000f00 [ENTER_2 imm4:0xf00] +decode 66c8000001 [ENTER_2 imm4:0x10000] +decode32 c8000000 [ENTER_4 imm4:0x0] +decode32 c8000f00 [ENTER_4 imm4:0xf00] +decode32 c8000001 [ENTER_4 imm4:0x10000] +decode64 c8000000 [ENTER_8 imm4:0x0] +decode64 c8000f00 [ENTER_8 imm4:0xf00] +decode64 c8000001 [ENTER_8 imm4:0x10000] diff --git a/tests/decode-imul.sh b/tests/decode-imul.sh index b2ea7c5..df9e685 100644 --- a/tests/decode-imul.sh +++ b/tests/decode-imul.sh @@ -1,2 +1,2 @@ -decode 69C708010000 "[IMUL3 REG4:0 REG4:7 IMM4:0x108]" -decode 6BC708 "[IMUL3 REG4:0 REG4:7 IMM4:0x8]" +decode 69C708010000 [IMUL3 reg4:r0 reg4:r7 imm4:0x108] +decode 6BC708 [IMUL3 reg4:r0 reg4:r7 imm4:0x8] diff --git a/tests/decode-inc.sh b/tests/decode-inc.sh index 5547f1c..c11dde5 100644 --- a/tests/decode-inc.sh +++ b/tests/decode-inc.sh @@ -1,12 +1,14 @@ -decode32 40 "[INC REG4:0]" -decode32 43 "[INC REG4:3]" -decode32 6647 "[INC REG2:7]" -decode fec0 "[INC REG1:0]" -decode fec4 "[INC REG1:4]" -decode ffc0 "[INC REG4:0]" -decode ffc4 "[INC REG4:4]" -decode 66ffc0 "[INC REG2:0]" -decode 66ffc4 "[INC REG2:4]" -decode64 48ffc0 "[INC REG8:0]" -decode64 48ffc4 "[INC REG8:4]" -decode64 49ffc7 "[INC REG8:15]" +decode32 40 [INC reg4:r0] +decode32 43 [INC reg4:r3] +decode32 6647 [INC reg2:r7] +decode fec0 [INC reg1:r0] +decode fec4 [INC reg1:r0h] +decode ffc0 [INC reg4:r0] +decode ffc4 [INC reg4:r4] +decode ff00 [INC mem4:r0] +decode f0ff00 [lock:INC mem4:r0] +decode 66ffc0 [INC reg2:r0] +decode 66ffc4 [INC reg2:r4] +decode64 48ffc0 [INC reg8:r0] +decode64 48ffc4 [INC reg8:r4] +decode64 49ffc7 [INC reg8:r15] diff --git a/tests/decode-movsx.sh b/tests/decode-movsx.sh index fe39ef9..cc99227 100644 --- a/tests/decode-movsx.sh +++ b/tests/decode-movsx.sh @@ -1,5 +1,5 @@ -decode 660fbec2 "[MOVSX REG2:0 REG1:2]" -decode 0fbec2 "[MOVSX REG4:0 REG1:2]" -decode 0fbfc2 "[MOVSX REG4:0 REG2:2]" -decode64 480fbfc2 "[MOVSX REG8:0 REG2:2]" -decode64 4863c2 "[MOVSX REG8:0 REG4:2]" +decode 660fbec2 [MOVSX reg2:r0 reg1:r2] +decode 0fbec2 [MOVSX reg4:r0 reg1:r2] +decode 0fbfc2 [MOVSX reg4:r0 reg2:r2] +decode64 480fbfc2 [MOVSX reg8:r0 reg2:r2] +decode64 4863c2 [MOVSX reg8:r0 reg4:r2] diff --git a/tests/decode-ret.sh b/tests/decode-ret.sh index 3097072..745d85b 100644 --- a/tests/decode-ret.sh +++ b/tests/decode-ret.sh @@ -1,12 +1,12 @@ -decode 66c3 "[RET_2]" -decode 66c20000 "[RET_IMM_2 IMM2:0x0]" -decode 66c20d00 "[RET_IMM_2 IMM2:0xd]" -decode 66c20dff "[RET_IMM_2 IMM2:0xff0d]" -decode32 c3 "[RET_4]" -decode32 c20000 "[RET_IMM_4 IMM2:0x0]" -decode32 c20d00 "[RET_IMM_4 IMM2:0xd]" -decode32 c20dff "[RET_IMM_4 IMM2:0xff0d]" -decode64 c3 "[RET_8]" -decode64 c20000 "[RET_IMM_8 IMM2:0x0]" -decode64 c20d00 "[RET_IMM_8 IMM2:0xd]" -decode64 c20dff "[RET_IMM_8 IMM2:0xff0d]" +decode 66c3 [RET_2] +decode 66c20000 [RET_IMM_2 imm2:0x0] +decode 66c20d00 [RET_IMM_2 imm2:0xd] +decode 66c20dff [RET_IMM_2 imm2:0xff0d] +decode32 c3 [RET_4] +decode32 c20000 [RET_IMM_4 imm2:0x0] +decode32 c20d00 [RET_IMM_4 imm2:0xd] +decode32 c20dff [RET_IMM_4 imm2:0xff0d] +decode64 c3 [RET_8] +decode64 c20000 [RET_IMM_8 imm2:0x0] +decode64 c20d00 [RET_IMM_8 imm2:0xd] +decode64 c20dff [RET_IMM_8 imm2:0xff0d] diff --git a/tests/decode-sse-movq.sh b/tests/decode-sse-movq.sh index c237f9d..235f0cd 100644 --- a/tests/decode-sse-movq.sh +++ b/tests/decode-sse-movq.sh @@ -1 +1 @@ -decode f30f7e5c2408 "[SSE_MOVQ_X2X REG8:3 MEM8:4:0x8]" +decode f30f7e5c2408 [SSE_MOVQ_X2X reg8:r3 mem8:r4+0x8] diff --git a/tests/decode-sse-shufpd.sh b/tests/decode-sse-shufpd.sh index 598093a..a3286ea 100644 --- a/tests/decode-sse-shufpd.sh +++ b/tests/decode-sse-shufpd.sh @@ -1 +1 @@ -decode 660fc6c001 "[SSE_SHUFPD REG16:0 REG16:0 IMM1:0x1]" +decode 660fc6c001 [SSE_SHUFPD reg16:r0 reg16:r0 imm1:0x1]