diff --git a/decode.c b/decode.c index 501d0a0..05f16de 100644 --- a/decode.c +++ b/decode.c @@ -428,11 +428,17 @@ prefix_end: { // If there is no ModRM, but a Mod-Reg, its opcode-encoded. FdOp* operand = &instr->operands[DESC_MODREG_IDX(desc)]; - uint8_t reg_idx = buffer[off - 1] & 7; - // Only used for GP registers, therefore always apply REX.B. - reg_idx += prefix_rex & PREFIX_REXB ? 8 : 0; operand->type = FD_OT_REG; - operand->reg = reg_idx; + if (LIKELY(!DESC_VSIB(desc))) + { + // Only used for GP registers, therefore always apply REX.B. + operand->reg = (buffer[off - 1] & 7) + (prefix_rex & PREFIX_REXB ? 8 : 0); + } + else + { + operand->misc = FD_RT_SEG; + operand->reg = (buffer[off - 1] >> 3) & 7; + } } if (UNLIKELY(DESC_HAS_VEXREG(desc))) diff --git a/encode.c b/encode.c index 69d0f98..c44bb5f 100644 --- a/encode.c +++ b/encode.c @@ -205,7 +205,7 @@ typedef enum { ENC_NP, ENC_M, ENC_M1, ENC_MI, ENC_MC, ENC_MR, ENC_RM, ENC_RMA, ENC_MRI, ENC_RMI, ENC_MRC, ENC_AM, ENC_MA, - ENC_I, ENC_IA, ENC_O, ENC_OI, ENC_OA, ENC_A, ENC_D, ENC_FD, ENC_TD, + ENC_I, ENC_IA, ENC_O, ENC_OI, ENC_OA, ENC_S, ENC_A, ENC_D, ENC_FD, ENC_TD, ENC_RVM, ENC_RVMI, ENC_RVMR, ENC_RMV, ENC_VM, ENC_VMI, ENC_MVR, ENC_MAX } Encoding; @@ -240,6 +240,7 @@ const struct EncodingInfo encoding_infos[ENC_MAX] = { [ENC_O] = { .modreg = 0^3 }, [ENC_OI] = { .modreg = 0^3, .immctl = 4, .immidx = 1 }, [ENC_OA] = { .modreg = 0^3, .zregidx = 1^3, .zregval = 0 }, + [ENC_S] = { 0 }, [ENC_A] = { .zregidx = 0^3, .zregval = 0 }, [ENC_D] = { .immctl = 6, .immidx = 0 }, [ENC_FD] = { .immctl = 2, .immidx = 1 }, @@ -324,6 +325,8 @@ fe_enc64_impl(uint8_t** restrict buf, uint64_t mnem, FeOp op0, FeOp op1, if (enc_mr(buf, opc, ops[ei->modrm^3], modreg, desc->immsz)) goto fail; } else if (ei->modreg) { if (enc_o(buf, opc, ops[ei->modreg^3])) goto fail; + } else if (UNLIKELY(desc->enc == ENC_S)) { + if (enc_opc(buf, opc | (op_reg_idx(op0) << 3))) goto fail; } else { if (enc_opc(buf, opc)) goto fail; } diff --git a/instrs.txt b/instrs.txt index dc9e882..9ad98cd 100644 --- a/instrs.txt +++ b/instrs.txt @@ -4,15 +4,15 @@ 03 RM GP GP - - ADD 04 IA GP IMM - - ADD SIZE_8 05 IA GP IMM - - ADD -06 NP - - - - PUSH_ES ONLY32 INSTR_WIDTH -07 NP - - - - POP_ES ONLY32 INSTR_WIDTH +06 S SEG - - - PUSH ONLY32 +07 S SEG - - - POP ONLY32 08 MR GP GP - - OR SIZE_8 LOCK 09 MR GP GP - - OR LOCK 0a RM GP GP - - OR SIZE_8 0b RM GP GP - - OR 0c IA GP IMM - - OR SIZE_8 0d IA GP IMM - - OR -0e NP - - - - PUSH_CS ONLY32 INSTR_WIDTH +0e S SEG - - - PUSH ONLY32 #0f escape opcode 10 MR GP GP - - ADC SIZE_8 LOCK 11 MR GP GP - - ADC LOCK @@ -20,16 +20,16 @@ 13 RM GP GP - - ADC 14 IA GP IMM - - ADC SIZE_8 15 IA GP IMM - - ADC -16 NP - - - - PUSH_SS ONLY32 INSTR_WIDTH -17 NP - - - - POP_SS ONLY32 INSTR_WIDTH +16 S SEG - - - PUSH ONLY32 +17 S SEG - - - POP ONLY32 18 MR GP GP - - SBB SIZE_8 LOCK 19 MR GP GP - - SBB LOCK 1a RM GP GP - - SBB SIZE_8 1b RM GP GP - - SBB 1c IA GP IMM - - SBB SIZE_8 1d IA GP IMM - - SBB -1e NP - - - - PUSH_DS ONLY32 INSTR_WIDTH -1f NP - - - - POP_DS ONLY32 INSTR_WIDTH +1e S SEG - - - PUSH ONLY32 +1f S SEG - - - POP ONLY32 20 MR GP GP - - AND SIZE_8 LOCK 21 MR GP GP - - AND LOCK 22 RM GP GP - - AND SIZE_8 @@ -139,18 +139,18 @@ 89 MR GP GP - - MOV 8a RM GP GP - - MOV SIZE_8 8b RM GP GP - - MOV -8c/0 MR GP16 SREG - - MOV_S2G -8c/1 MR GP16 SREG - - MOV_S2G -8c/2 MR GP16 SREG - - MOV_S2G -8c/3 MR GP16 SREG - - MOV_S2G -8c/4 MR GP16 SREG - - MOV_S2G -8c/5 MR GP16 SREG - - MOV_S2G +8c/0 MR GP16 SEG16 - - MOV_S2G +8c/1 MR GP16 SEG16 - - MOV_S2G +8c/2 MR GP16 SEG16 - - MOV_S2G +8c/3 MR GP16 SEG16 - - MOV_S2G +8c/4 MR GP16 SEG16 - - MOV_S2G +8c/5 MR GP16 SEG16 - - MOV_S2G 8d/m RM GP MEMZ - - LEA -8e/0 RM SREG GP16 - - MOV_G2S -8e/2 RM SREG GP16 - - MOV_G2S -8e/3 RM SREG GP16 - - MOV_G2S -8e/4 RM SREG GP16 - - MOV_G2S -8e/5 RM SREG GP16 - - MOV_G2S +8e/0 RM SEG16 GP16 - - MOV_G2S +8e/2 RM SEG16 GP16 - - MOV_G2S +8e/3 RM SEG16 GP16 - - MOV_G2S +8e/4 RM SEG16 GP16 - - MOV_G2S +8e/5 RM SEG16 GP16 - - MOV_G2S 8f/0 M GP - - - POP DEF64 # Against frequent belief, only, XCHG (r/e)AX, (r)AX with 90 is NOP. # As a lacking REX.B cannot be specified here, this is hardcoded. @@ -415,14 +415,14 @@ NP.0f37 NP - - - - GETSEC 0f9d M GP - - - SETGE SIZE_8 0f9e M GP - - - SETLE SIZE_8 0f9f M GP - - - SETG SIZE_8 -0fa0 NP - - - - PUSH_FS DEF64 INSTR_WIDTH -0fa1 NP - - - - POP_FS DEF64 INSTR_WIDTH +0fa0 S SEG - - - PUSH DEF64 +0fa1 S SEG - - - POP DEF64 0fa2 NP - - - - CPUID 0fa3 MR GP GP - - BT 0fa4 MRI GP GP IMM8 - SHLD 0fa5 MRC GP GP GP8 - SHLD -0fa8 NP - - - - PUSH_GS DEF64 INSTR_WIDTH -0fa9 NP - - - - POP_GS DEF64 INSTR_WIDTH +0fa8 S SEG - - - PUSH DEF64 +0fa9 S SEG - - - POP DEF64 0faa NP - - - - RSM 0fab MR GP GP - - BTS LOCK 0fac MRI GP GP IMM8 - SHRD diff --git a/parseinstrs.py b/parseinstrs.py index 2b72dfe..9454e48 100644 --- a/parseinstrs.py +++ b/parseinstrs.py @@ -60,6 +60,7 @@ ENCODINGS = { "O": InstrFlags(modreg_idx=0^3), "OI": InstrFlags(modreg_idx=0^3, imm_idx=1^3, imm_control=4), "OA": InstrFlags(modreg_idx=0^3, zeroreg_idx=1^3), + "S": InstrFlags(modreg_idx=0^3, vsib=1), # segment register in bits 3,4,5 "A": InstrFlags(zeroreg_idx=0^3), "D": InstrFlags(imm_idx=0^3, imm_control=6), "FD": InstrFlags(zeroreg_idx=0^3, imm_idx=1^3, imm_control=2), @@ -110,7 +111,8 @@ OPKINDS = { "XMM64": OpKind(8, "XMM"), "XMM128": OpKind(16, "XMM"), "XMM256": OpKind(32, "XMM"), - "SREG": OpKind(2, "SEG"), + "SEG": OpKind(OpKind.SZ_OP, "SEG"), + "SEG16": OpKind(2, "SEG"), "FPU": OpKind(10, "FPU"), "MEM": OpKind(OpKind.SZ_OP, OpKind.K_MEM), "MEMV": OpKind(OpKind.SZ_VEC, OpKind.K_MEM), @@ -586,6 +588,7 @@ def encode_table(entries): for mnem, variants in mnemonics.items(): dedup = [] for variant in variants: + # TODO: when adapting to 32-bit mode, handle S encodings. if not any(x[:3] == variant[:3] for x in dedup): dedup.append(variant) diff --git a/tests/test_encode.c b/tests/test_encode.c index 940b6a1..5e1dd26 100644 --- a/tests/test_encode.c +++ b/tests/test_encode.c @@ -70,6 +70,10 @@ main(int argc, char** argv) TEST("\x54", FE_PUSHr, FE_SP); TEST("\x41\x57", FE_PUSHr, FE_R15); TEST("\x41\x50", FE_PUSHr, FE_R8); + TEST("\x0f\xa0", FE_PUSHr, FE_FS); + TEST("\x66\x0f\xa0", FE_PUSH16r, FE_FS); + TEST("\x0f\xa8", FE_PUSHr, FE_GS); + TEST("\x66\x0f\xa8", FE_PUSH16r, FE_GS); TEST("\x9c", FE_PUSHF); TEST("\xd2\xe4", FE_SHL8rr, FE_AH, FE_CX); TEST("", FE_SHL8rr, FE_AH, FE_DX);