diff --git a/decode.c b/decode.c index c224a4d..133eaa6 100644 --- a/decode.c +++ b/decode.c @@ -276,8 +276,7 @@ struct InstrDesc #define DESC_IMPLICIT_VAL(desc) (((desc)->immediate >> 2) & 1) #define DESC_LOCK(desc) (((desc)->immediate >> 3) & 1) #define DESC_VSIB(desc) (((desc)->immediate >> 7) & 1) -#define DESC_SIZE8(desc) (((desc)->operand_sizes >> 8) & 1) -#define DESC_SIZED64(desc) (((desc)->operand_sizes >> 9) & 1) +#define DESC_OPSIZE(desc) (((desc)->operand_sizes >> 8) & 3) #define DESC_SIZE_FIX1(desc) (((desc)->operand_sizes >> 10) & 7) #define DESC_SIZE_FIX2(desc) (((desc)->operand_sizes >> 13) & 3) #define DESC_INSTR_WIDTH(desc) (((desc)->operand_sizes >> 15) & 1) @@ -411,27 +410,21 @@ fd_decode(const uint8_t* buffer, size_t len_sz, int mode_int, uintptr_t address, struct InstrDesc* desc = (struct InstrDesc*) &_decode_table[table_idx]; - if (DESC_IGN66(desc)) - prefixes &= ~PREFIX_OPSZ; - instr->type = desc->type; instr->flags = prefixes & 0x7f; if (mode == DECODE_64) instr->flags |= FD_FLAG_64; instr->address = address; - uint8_t op_size = 0; - if (DESC_SIZE8(desc)) - op_size = 1; - else if (mode == DECODE_64 && (prefixes & PREFIX_REXW)) + unsigned op_size = 4; + if (DESC_OPSIZE(desc) == 2 && mode == DECODE_64) // DEF64 op_size = 8; - else if (prefixes & PREFIX_OPSZ) + if ((prefixes & PREFIX_OPSZ) && !DESC_IGN66(desc)) // opsize override op_size = 2; - else if (mode == DECODE_64 && DESC_SIZED64(desc)) + if (mode == DECODE_64 && (prefixes & PREFIX_REXW || DESC_OPSIZE(desc) == 3)) op_size = 8; - else - op_size = 4; - // Note: operand size is updates for jumps when handling immediate operands. + if (DESC_OPSIZE(desc) == 1) // force byte + op_size = 1; uint8_t vec_size = 16; if (prefixes & PREFIX_VEXL) @@ -551,10 +544,6 @@ fd_decode(const uint8_t* buffer, size_t len_sz, int mode_int, uintptr_t address, // 6/7 = offset, operand-sized/8 bit (used for jumps/calls) int imm_byte = imm_control & 1; int imm_offset = imm_control & 2; - // Jumps are always 8 or 32 bit on x86-64, and the operand size is - // forced to 64 bit. - if (mode == DECODE_64 && UNLIKELY(imm_offset)) - op_size = 8; uint8_t imm_size; if (imm_byte) diff --git a/instrs.txt b/instrs.txt index 2ba3406..c90c444 100644 --- a/instrs.txt +++ b/instrs.txt @@ -83,22 +83,22 @@ 6d NP - - - - INS INSTR_WIDTH ENC_REP 6e NP - - - - OUTS SIZE_8 INSTR_WIDTH ENC_REP 6f NP - - - - OUTS INSTR_WIDTH ENC_REP -70 D IMM - - - JO DEF64 IMM_8 -71 D IMM - - - JNO DEF64 IMM_8 -72 D IMM - - - JC DEF64 IMM_8 -73 D IMM - - - JNC DEF64 IMM_8 -74 D IMM - - - JZ DEF64 IMM_8 -75 D IMM - - - JNZ DEF64 IMM_8 -76 D IMM - - - JBE DEF64 IMM_8 -77 D IMM - - - JA DEF64 IMM_8 -78 D IMM - - - JS DEF64 IMM_8 -79 D IMM - - - JNS DEF64 IMM_8 -7a D IMM - - - JP DEF64 IMM_8 -7b D IMM - - - JNP DEF64 IMM_8 -7c D IMM - - - JL DEF64 IMM_8 -7d D IMM - - - JGE DEF64 IMM_8 -7e D IMM - - - JLE DEF64 IMM_8 -7f D IMM - - - JG DEF64 IMM_8 +70 D IMM - - - JO FORCE64 IMM_8 +71 D IMM - - - JNO FORCE64 IMM_8 +72 D IMM - - - JC FORCE64 IMM_8 +73 D IMM - - - JNC FORCE64 IMM_8 +74 D IMM - - - JZ FORCE64 IMM_8 +75 D IMM - - - JNZ FORCE64 IMM_8 +76 D IMM - - - JBE FORCE64 IMM_8 +77 D IMM - - - JA FORCE64 IMM_8 +78 D IMM - - - JS FORCE64 IMM_8 +79 D IMM - - - JNS FORCE64 IMM_8 +7a D IMM - - - JP FORCE64 IMM_8 +7b D IMM - - - JNP FORCE64 IMM_8 +7c D IMM - - - JL FORCE64 IMM_8 +7d D IMM - - - JGE FORCE64 IMM_8 +7e D IMM - - - JLE FORCE64 IMM_8 +7f D IMM - - - JG FORCE64 IMM_8 80/0 MI GP IMM - - ADD SIZE_8 LOCK 80/1 MI GP IMM - - OR SIZE_8 LOCK 80/2 MI GP IMM - - ADC SIZE_8 LOCK @@ -187,14 +187,15 @@ c1/4 MI GP IMM8 - - SHL c1/5 MI GP IMM8 - - SHR c1/7 MI GP IMM8 - - SAR # RET immediate size handled in code -c2 I IMM16 - - - RET DEF64 INSTR_WIDTH -c3 NP - - - - RET DEF64 INSTR_WIDTH +c2 I IMM16 - - - RET FORCE64 INSTR_WIDTH +c3 NP - - - - RET FORCE64 INSTR_WIDTH c4/m RM GP MEMZ - - LES ONLY32 c5/m RM GP MEMZ - - LDS ONLY32 c6/0 MI GP IMM - - MOV SIZE_8 c6f8 I IMM8 - - - XABORT c7/0 MI GP IMM - - MOV -c7f8 D IMM - - - XBEGIN +c7f8 D IMM32 - - - XBEGIN ONLY32 +c7f8 D IMM64 - - - XBEGIN ONLY64 # ENTER immediate handled in code c8 I IMM32 - - - ENTER DEF64 INSTR_WIDTH c9 NP - - - - LEAVE DEF64 INSTR_WIDTH @@ -238,18 +239,18 @@ d5 I IMM - - - AAD ONLY32 SIZE_8 #d6 unused d7 NP - - - - XLATB #d8-df FPU Escape -e0 D IMM - - - LOOPNZ DEF64 IMM_8 -e1 D IMM - - - LOOPZ DEF64 IMM_8 -e2 D IMM - - - LOOP DEF64 IMM_8 -e3 D IMM - - - JCXZ DEF64 IMM_8 +e0 D IMM - - - LOOPNZ FORCE64 IMM_8 +e1 D IMM - - - LOOPZ FORCE64 IMM_8 +e2 D IMM - - - LOOP FORCE64 IMM_8 +e3 D IMM - - - JCXZ FORCE64 IMM_8 e4 IA GP IMM8 - - IN SIZE_8 e5 IA GP IMM8 - - IN e6 IA GP IMM8 - - OUT SIZE_8 e7 IA GP IMM8 - - OUT -e8 D IMM - - - CALL DEF64 -e9 D IMM - - - JMP DEF64 +e8 D IMM - - - CALL FORCE64 +e9 D IMM - - - JMP FORCE64 #ea JMPf TODO, ONLY32 -eb D IMM - - - JMP DEF64 IMM_8 +eb D IMM - - - JMP FORCE64 IMM_8 ec NP - - - - IN SIZE_8 INSTR_WIDTH ed NP - - - - IN INSTR_WIDTH ee NP - - - - OUT SIZE_8 INSTR_WIDTH @@ -284,9 +285,9 @@ fe/0 M GP - - - INC SIZE_8 LOCK fe/1 M GP - - - DEC SIZE_8 LOCK ff/0 M GP - - - INC LOCK ff/1 M GP - - - DEC LOCK -ff/2 M GP - - - CALL DEF64 +ff/2 M GP - - - CALL FORCE64 ff/3m M MEMZ - - - CALLF -ff/4 M GP - - - JMP DEF64 +ff/4 M GP - - - JMP FORCE64 ff/5m M MEMZ - - - JMPF ff/6 M GP - - - PUSH DEF64 0f00/0 M GP16 - - - SLDT @@ -377,22 +378,22 @@ NP.0f37 NP - - - - GETSEC 0f4d RM GP GP - - CMOVGE 0f4e RM GP GP - - CMOVLE 0f4f RM GP GP - - CMOVG -0f80 D IMM - - - JO DEF64 -0f81 D IMM - - - JNO DEF64 -0f82 D IMM - - - JC DEF64 -0f83 D IMM - - - JNC DEF64 -0f84 D IMM - - - JZ DEF64 -0f85 D IMM - - - JNZ DEF64 -0f86 D IMM - - - JBE DEF64 -0f87 D IMM - - - JA DEF64 -0f88 D IMM - - - JS DEF64 -0f89 D IMM - - - JNS DEF64 -0f8a D IMM - - - JP DEF64 -0f8b D IMM - - - JNP DEF64 -0f8c D IMM - - - JL DEF64 -0f8d D IMM - - - JGE DEF64 -0f8e D IMM - - - JLE DEF64 -0f8f D IMM - - - JG DEF64 +0f80 D IMM - - - JO FORCE64 +0f81 D IMM - - - JNO FORCE64 +0f82 D IMM - - - JC FORCE64 +0f83 D IMM - - - JNC FORCE64 +0f84 D IMM - - - JZ FORCE64 +0f85 D IMM - - - JNZ FORCE64 +0f86 D IMM - - - JBE FORCE64 +0f87 D IMM - - - JA FORCE64 +0f88 D IMM - - - JS FORCE64 +0f89 D IMM - - - JNS FORCE64 +0f8a D IMM - - - JP FORCE64 +0f8b D IMM - - - JNP FORCE64 +0f8c D IMM - - - JL FORCE64 +0f8d D IMM - - - JGE FORCE64 +0f8e D IMM - - - JLE FORCE64 +0f8f D IMM - - - JG FORCE64 0f90 M GP - - - SETO SIZE_8 0f91 M GP - - - SETNO SIZE_8 0f92 M GP - - - SETC SIZE_8 diff --git a/parseinstrs.py b/parseinstrs.py index 3738106..6211866 100644 --- a/parseinstrs.py +++ b/parseinstrs.py @@ -39,8 +39,7 @@ InstrFlags = bitstruct("InstrFlags", [ "op1_size:2", "op2_size:2", "op3_size:2", - "size8:1", - "sized64:1", + "opsize:2", "size_fix1:3", "size_fix2:2", "instr_width:1", @@ -106,6 +105,7 @@ OPKINDS = { "IMM8": OpKind(1, OpKind.K_IMM), "IMM16": OpKind(2, OpKind.K_IMM), "IMM32": OpKind(4, OpKind.K_IMM), + "IMM64": OpKind(8, OpKind.K_IMM), "GP": OpKind(OpKind.SZ_OP, "GP"), "GP8": OpKind(1, "GP"), "GP16": OpKind(2, "GP"), @@ -179,8 +179,9 @@ class InstrDesc(NamedTuple): raise Exception("invalid regty for op 3, must be VEC") # Miscellaneous Flags - if "DEF64" in self.flags: flags.sized64 = 1 - if "SIZE_8" in self.flags: flags.size8 = 1 + if "SIZE_8" in self.flags: flags.opsize = 1 + if "DEF64" in self.flags: flags.opsize = 2 + if "FORCE64" in self.flags: flags.opsize = 3 if "INSTR_WIDTH" in self.flags: flags.instr_width = 1 if "LOCK" in self.flags: flags.lock = 1 if "VSIB" in self.flags: flags.vsib = 1 @@ -191,7 +192,7 @@ class InstrDesc(NamedTuple): if flags.imm_control >= 4: imm_op = next(op for op in self.operands if op.kind == OpKind.K_IMM) if ("IMM_8" in self.flags or imm_op.size == 1 or - (imm_op.size == OpKind.SZ_OP and flags.size8)): + (imm_op.size == OpKind.SZ_OP and "SIZE_8" in self.flags)): flags.imm_control |= 1 enc = flags._encode(6) @@ -487,6 +488,10 @@ def encode_table(entries): prepend_opsize = max(opsizes) > 0 and not separate_opsize prepend_vecsize = hasvex and max(vecsizes) > 0 and not separate_opsize + if "FORCE64" in desc.flags: + opsizes = {64} + prepend_opsize = False + optypes = ["", "", "", ""] enc = ENCODINGS[desc.encoding] if enc.modrm_idx: @@ -536,7 +541,7 @@ def encode_table(entries): tys_i = sum(ty << (4*i) for i, ty in enumerate(tys)) opc_s = hex(opc_i) + opc_flags + prefix[1] if opsize == 16: opc_s += "|OPC_66" - if opsize == 64 and "DEF64" not in desc.flags: opc_s += "|OPC_REXW" + if opsize == 64 and "DEF64" not in desc.flags and "FORCE64" not in desc.flags: opc_s += "|OPC_REXW" # Construct mnemonic name mnem_name = {"MOVABS": "MOV", "XCHG_NOP": "XCHG"}.get(desc.mnemonic, desc.mnemonic) diff --git a/tests/test_decode.c b/tests/test_decode.c index 6ddf3c8..df921c9 100644 --- a/tests/test_decode.c +++ b/tests/test_decode.c @@ -204,6 +204,11 @@ main(int argc, char** argv) TEST64("\x66\xeb\xff", "[JMP off8:rip+0xffffffffffffffff]"); TEST("\x66\xe9\x00", "PARTIAL"); TEST("\x66\xe9", "PARTIAL"); + TEST32("\xc7\xf8\xd3\x9c\xff\xff", "[XBEGIN off4:eip+0xffff9cd3]"); + TEST32("\x66\xc7\xf8\xd3\x9c", "[XBEGIN off4:eip+0xffff9cd3]"); + TEST64("\xc7\xf8\xd3\x9c\xff\xff", "[XBEGIN off8:rip+0xffffffffffff9cd3]"); + TEST64("\x66\xc7\xf8\xd3\x9c", "[XBEGIN off8:rip+0xffffffffffff9cd3]"); + TEST("\xa5", "[MOVS_4]"); TEST("\x66\xa5", "[MOVS_2]"); @@ -216,10 +221,14 @@ main(int argc, char** argv) TEST64("\x48\x0f\xbf\xc2", "[MOVSX reg8:r0 reg2:r2]"); TEST64("\x48\x63\xc2", "[MOVSX reg8:r0 reg4:r2]"); - TEST("\x66\xc3", "[RET_2]"); - TEST("\x66\xc2\x00\x00", "[RET_2 imm2:0x0]"); - TEST("\x66\xc2\x0d\x00", "[RET_2 imm2:0xd]"); - TEST("\x66\xc2\x0d\xff", "[RET_2 imm2:0xff0d]"); + TEST32("\x66\xc3", "[RET_2]"); + TEST32("\x66\xc2\x00\x00", "[RET_2 imm2:0x0]"); + TEST32("\x66\xc2\x0d\x00", "[RET_2 imm2:0xd]"); + TEST32("\x66\xc2\x0d\xff", "[RET_2 imm2:0xff0d]"); + TEST64("\x66\xc3", "[RET_8]"); + TEST64("\x66\xc2\x00\x00", "[RET_8 imm2:0x0]"); + TEST64("\x66\xc2\x0d\x00", "[RET_8 imm2:0xd]"); + TEST64("\x66\xc2\x0d\xff", "[RET_8 imm2:0xff0d]"); TEST32("\xc3", "[RET_4]"); TEST32("\xc2\x00\x00", "[RET_4 imm2:0x0]"); TEST32("\xc2\x0d\x00", "[RET_4 imm2:0xd]"); diff --git a/tests/test_encode.c b/tests/test_encode.c index 65d3f81..940b6a1 100644 --- a/tests/test_encode.c +++ b/tests/test_encode.c @@ -81,7 +81,6 @@ main(int argc, char** argv) // TEST("\x66\x90", FE_XCHG16rr, FE_AX, FE_AX); TEST("\xc2\x00\x00", FE_RETi, 0); TEST("\xff\xd0", FE_CALLr, FE_AX); - TEST("\x66\xff\xd0", FE_CALL16r, FE_AX); TEST("\x05\x00\x01\x00\x00", FE_ADD32ri, FE_AX, 0x100); TEST("\x66\x05\x00\x01", FE_ADD16ri, FE_AX, 0x100); TEST("\xb8\x05\x00\x01\x00", FE_MOV32ri, FE_AX, 0x10005);