diff --git a/encode-test.c b/encode-test.c index 5c97fdb..8fd3e37 100644 --- a/encode-test.c +++ b/encode-test.c @@ -69,18 +69,18 @@ main(int argc, char** argv) TEST("\x54", FE_PUSHr, FE_SP); TEST("\x41\x57", FE_PUSHr, FE_R15); TEST("\x41\x50", FE_PUSHr, FE_R8); - TEST("", FE_PUSHr, FE_ES); - TEST("", FE_PUSH16r, FE_ES); - TEST("", FE_PUSHr, FE_CS); - TEST("", FE_PUSH16r, FE_CS); - TEST("", FE_PUSHr, FE_SS); - TEST("", FE_PUSH16r, FE_SS); - TEST("", FE_PUSHr, FE_DS); - TEST("", FE_PUSH16r, FE_DS); - TEST("\x0f\xa0", FE_PUSHr, FE_FS); - TEST("\x66\x0f\xa0", FE_PUSH16r, FE_FS); - TEST("\x0f\xa8", FE_PUSHr, FE_GS); - TEST("\x66\x0f\xa8", FE_PUSH16r, FE_GS); + TEST("", FE_PUSH_SEGr, FE_ES); + TEST("", FE_PUSH_SEG16r, FE_ES); + TEST("", FE_PUSH_SEGr, FE_CS); + TEST("", FE_PUSH_SEG16r, FE_CS); + TEST("", FE_PUSH_SEGr, FE_SS); + TEST("", FE_PUSH_SEG16r, FE_SS); + TEST("", FE_PUSH_SEGr, FE_DS); + TEST("", FE_PUSH_SEG16r, FE_DS); + TEST("\x0f\xa0", FE_PUSH_SEGr, FE_FS); + TEST("\x66\x0f\xa0", FE_PUSH_SEG16r, FE_FS); + TEST("\x0f\xa8", FE_PUSH_SEGr, FE_GS); + TEST("\x66\x0f\xa8", FE_PUSH_SEG16r, FE_GS); TEST("\xff\x30", FE_PUSHm, FE_MEM(FE_AX, 0, 0, 0)); TEST("\xff\x31", FE_PUSHm, FE_MEM(FE_CX, 0, 0, 0)); TEST("\x9c", FE_PUSHF); diff --git a/instrs.txt b/instrs.txt index 83bb338..0cb8ad7 100644 --- a/instrs.txt +++ b/instrs.txt @@ -11,15 +11,15 @@ 03 RM Gv Ev - - ADD EFL=m--mmmmm 04 IA Rb Ib - - ADD SZ8 EFL=m--mmmmm 05 IA Rv Iz - - ADD EFL=m--mmmmm -06 S Sv - - - PUSH I64 -07 S Sv - - - POP I64 +06 S Sv - - - PUSH_SEG I64 +07 S Sv - - - POP_SEG I64 08 MR Eb Gb - - OR LOCK SZ8 EFL=0--mmum0 09 MR Ev Gv - - OR LOCK EFL=0--mmum0 0a RM Gb Eb - - OR SZ8 EFL=0--mmum0 0b RM Gv Ev - - OR EFL=0--mmum0 0c IA Rb Ib - - OR SZ8 EFL=0--mmum0 0d IA Rv Iz - - OR EFL=0--mmum0 -0e S Sv - - - PUSH I64 +0e S Sv - - - PUSH_SEG I64 #0f escape opcode 10 MR Eb Gb - - ADC LOCK SZ8 EFL=m--mmmmM 11 MR Ev Gv - - ADC LOCK EFL=m--mmmmM @@ -27,16 +27,16 @@ 13 RM Gv Ev - - ADC EFL=m--mmmmM 14 IA Rb Ib - - ADC SZ8 EFL=m--mmmmM 15 IA Rv Iz - - ADC EFL=m--mmmmM -16 S Sv - - - PUSH I64 -17 S Sv - - - POP I64 +16 S Sv - - - PUSH_SEG I64 +17 S Sv - - - POP_SEG I64 18 MR Eb Gb - - SBB LOCK SZ8 EFL=m--mmmmM 19 MR Ev Gv - - SBB LOCK EFL=m--mmmmM 1a RM Gb Eb - - SBB SZ8 EFL=m--mmmmM 1b RM Gv Ev - - SBB EFL=m--mmmmM 1c IA Rb Ib - - SBB SZ8 EFL=m--mmmmM 1d IA Rv Iz - - SBB EFL=m--mmmmM -1e S Sv - - - PUSH I64 -1f S Sv - - - POP I64 +1e S Sv - - - PUSH_SEG I64 +1f S Sv - - - POP_SEG I64 20 MR Eb Gb - - AND LOCK SZ8 EFL=0--mmum0 21 MR Ev Gv - - AND LOCK EFL=0--mmum0 22 RM Gb Eb - - AND SZ8 EFL=0--mmum0 @@ -377,10 +377,10 @@ NP.0f01d7 NP - - - - ENCLU F=SGX *0f1e MR Ev Gv - - RESERVED_NOP *0f1f MR Ev Gv - - RESERVED_NOP 0f1f/0 M Ev - - - NOP -0f20 MR Ry Cy - - MOV_CR I66 D64 CPL0 EFL=u--uuuuu -0f21 MR Ry Dy - - MOV_DR I66 D64 CPL0 EFL=u--uuuuu -0f22 RM Cy Ry - - MOV_CR I66 D64 CPL0 EFL=u--uuuuu -0f23 RM Dy Ry - - MOV_DR I66 D64 CPL0 EFL=u--uuuuu +0f20 MR Ry Cy - - MOV_CR2G I66 D64 CPL0 EFL=u--uuuuu +0f21 MR Ry Dy - - MOV_DR2G I66 D64 CPL0 EFL=u--uuuuu +0f22 RM Cy Ry - - MOV_G2CR I66 D64 CPL0 EFL=u--uuuuu +0f23 RM Dy Ry - - MOV_G2DR I66 D64 CPL0 EFL=u--uuuuu 0f30 NP - - - - WRMSR F=586 CPL0 0f31 NP - - - - RDTSC F=586 0f32 NP - - - - RDMSR F=586 CPL0 @@ -437,14 +437,14 @@ NP.0f37 NP - - - - GETSEC F=SMX EFL=MMMMMM 0f9d M Eb - - - SETGE SZ8 EFL=t--t---- 0f9e M Eb - - - SETLE SZ8 EFL=t--tt--- 0f9f M Eb - - - SETG SZ8 EFL=t--tt--- -0fa0 S Sv - - - PUSH D64 -0fa1 S Sv - - - POP D64 +0fa0 S Sv - - - PUSH_SEG D64 +0fa1 S Sv - - - POP_SEG D64 0fa2 NP - - - - CPUID F=586 0fa3 MR Ev Gv - - BT EFL=u--u-uum 0fa4 MRI Ev Gv Ib - SHLD EFL=u--mmumm 0fa5 MRC Ev Gv Rb - SHLD EFL=u--mmumm -0fa8 S Sv - - - PUSH D64 -0fa9 S Sv - - - POP D64 +0fa8 S Sv - - - PUSH_SEG D64 +0fa9 S Sv - - - POP_SEG D64 0faa NP - - - - RSM F=586 0fab MR Ev Gv - - BTS LOCK EFL=u--u-uum 0fac MRI Ev Gv Ib - SHRD EFL=u--mmumm @@ -502,8 +502,8 @@ NP.0f68 RM Pq Qd - - MMX_PUNPCKHBW F=MMX NP.0f69 RM Pq Qd - - MMX_PUNPCKHWD F=MMX NP.0f6a RM Pq Qd - - MMX_PUNPCKHDQ F=MMX NP.0f6b RM Pq Qd - - MMX_PACKSSDW F=MMX -NP.W0.0f6e RM Pq Ey - - MMX_MOVD F=MMX ENC_NOSZ -NP.W1.0f6e RM Pq Ey - - MMX_MOVQ F=MMX ENC_NOSZ +NP.W0.0f6e RM Pq Ey - - MMX_MOVD_G2M F=MMX ENC_NOSZ +NP.W1.0f6e RM Pq Ey - - MMX_MOVQ_G2M F=MMX ENC_NOSZ NP.0f6f RM Pq Qq - - MMX_MOVQ F=MMX NP.0f70 RMI Qq Pq Ib - MMX_PSHUFW F=SSE NP.0f71/2r MI Nq Ib - - MMX_PSRLW F=MMX @@ -518,8 +518,8 @@ NP.0f74 RM Pq Qq - - MMX_PCMPEQB F=MMX NP.0f75 RM Pq Qq - - MMX_PCMPEQW F=MMX NP.0f76 RM Pq Qq - - MMX_PCMPEQD F=MMX NP.0f77 NP - - - - MMX_EMMS F=MMX -NP.W0.0f7e MR Ey Py - - MMX_MOVD F=MMX ENC_NOSZ -NP.W1.0f7e MR Ey Py - - MMX_MOVQ F=MMX ENC_NOSZ +NP.W0.0f7e MR Ey Py - - MMX_MOVD_M2G F=MMX ENC_NOSZ +NP.W1.0f7e MR Ey Py - - MMX_MOVQ_M2G F=MMX ENC_NOSZ NP.0f7f MR Qq Pq - - MMX_MOVQ F=MMX # TODO: Ey operand is actually Ry/Mw NP.0fc4 RMI Pq Ey Ib - MMX_PINSRW F=SSE ENC_NOSZ @@ -694,8 +694,8 @@ F2.0f5f RM Vsd Wsd - - SSE_MAXSD F=SSE2 66.0f6b RM Vx Wx - - SSE_PACKSSDW F=SSE2 66.0f6c RM Vx Wx - - SSE_PUNPCKLQDQ F=SSE2 66.0f6d RM Vx Wx - - SSE_PUNPCKHQDQ F=SSE2 -66.W0.0f6e RM Vx Ey - - SSE_MOVD F=SSE2 ENC_NOSZ -66.W1.0f6e RM Vx Ey - - SSE_MOVQ F=SSE2 ENC_NOSZ +66.W0.0f6e RM Vx Ey - - SSE_MOVD_G2X F=SSE2 ENC_NOSZ +66.W1.0f6e RM Vx Ey - - SSE_MOVQ_G2X F=SSE2 ENC_NOSZ 66.0f6f RM Vx Wx - - SSE_MOVDQA F=SSE2 F3.0f6f RM Vx Wx - - SSE_MOVDQU F=SSE2 66.0f70 RMI Vx Wx Ib - SSE_PSHUFD F=SSE2 @@ -723,8 +723,8 @@ F2.0f79/r RM Vx Wx - - SSE_INSERTQ F=SSE4A ONLYAMD F2.0f7c RM Vx Wx - - SSE_HADDPS F=SSE3 66.0f7d RM Vx Wx - - SSE_HSUBPD F=SSE3 F2.0f7d RM Vx Wx - - SSE_HSUBPS F=SSE3 -66.W0.0f7e MR Ey Vy - - SSE_MOVD F=SSE2 ENC_NOSZ -66.W1.0f7e MR Ey Vy - - SSE_MOVQ F=SSE2 ENC_NOSZ +66.W0.0f7e MR Ey Vy - - SSE_MOVD_X2G F=SSE2 ENC_NOSZ +66.W1.0f7e MR Ey Vy - - SSE_MOVQ_X2G F=SSE2 ENC_NOSZ F3.0f7e RM Vx Wq - - SSE_MOVQ F=SSE2 66.0f7f MR Wx Vx - - SSE_MOVDQA F=SSE2 F3.0f7f MR Wx Vx - - SSE_MOVDQU F=SSE2 @@ -997,9 +997,9 @@ VEX.66.0f6a RVM Vx Hx Wx - VPUNPCKHDQ F=AVX VEX.66.0f6b RVM Vx Hx Wx - VPACKSSDW F=AVX VEX.66.0f6c RVM Vx Hx Wx - VPUNPCKLQDQ F=AVX VEX.66.0f6d RVM Vx Hx Wx - VPUNPCKHQDQ F=AVX -VEX.66.W0.L0.0f6e RM Vy Ey - - VMOVD F=AVX ENC_NOSZ -VEX.66.W1.L0.0f6e RM Vy Ey - - VMOVD I64 F=AVX ENC_NOSZ -VEX.66.W1.L0.0f6e RM Vy Ey - - VMOVQ O64 F=AVX ENC_NOSZ +VEX.66.W0.L0.0f6e RM Vy Ey - - VMOVD_G2X F=AVX ENC_NOSZ +VEX.66.W1.L0.0f6e RM Vy Ey - - VMOVD_G2X I64 F=AVX ENC_NOSZ +VEX.66.W1.L0.0f6e RM Vy Ey - - VMOVQ_G2X O64 F=AVX ENC_NOSZ VEX.66.0f6f RM Vx Wx - - VMOVDQA F=AVX VEX.F3.0f6f RM Vx Wx - - VMOVDQU F=AVX VEX.66.0f70 RMI Vx Wx Ib - VPSHUFD F=AVX @@ -1024,9 +1024,9 @@ VEX.66.0f7c RVM Vx Hx Wx - VHADDPD F=AVX VEX.F2.0f7c RVM Vx Hx Wx - VHADDPS F=AVX VEX.66.0f7d RVM Vx Hx Wx - VHSUBPD F=AVX VEX.F2.0f7d RVM Vx Hx Wx - VHSUBPS F=AVX -VEX.66.W0.L0.0f7e MR Ey Vy - - VMOVD F=AVX ENC_NOSZ -VEX.66.W1.L0.0f7e MR Ey Vy - - VMOVQ I64 F=AVX ENC_NOSZ -VEX.66.W1.L0.0f7e MR Ey Vy - - VMOVQ O64 F=AVX ENC_NOSZ +VEX.66.W0.L0.0f7e MR Ey Vy - - VMOVD_X2G F=AVX ENC_NOSZ +VEX.66.W1.L0.0f7e MR Ey Vy - - VMOVQ_X2G I64 F=AVX ENC_NOSZ +VEX.66.W1.L0.0f7e MR Ey Vy - - VMOVQ_X2G O64 F=AVX ENC_NOSZ VEX.F3.L0.0f7e RM Vq Wq - - VMOVQ F=AVX ENC_NOSZ VEX.66.0f7f MR Wx Vx - - VMOVDQA F=AVX VEX.F3.0f7f MR Wx Vx - - VMOVDQU F=AVX diff --git a/parseinstrs.py b/parseinstrs.py index 43c264f..6a99bbb 100644 --- a/parseinstrs.py +++ b/parseinstrs.py @@ -224,7 +224,7 @@ class InstrDesc(NamedTuple): tys.append(self.OPKIND_REGTYS_ENC[op.kind]) return sum(ty << (4*i) for i, ty in enumerate(tys)) - def encode(self, ign66, modrm): + def encode(self, mnem, ign66, modrm): flags = ENCODINGS[self.encoding] extraflags = {} @@ -276,7 +276,7 @@ class InstrDesc(NamedTuple): enc = flags._replace(**extraflags)._encode() enc = tuple((enc >> i) & 0xffff for i in range(0, 48, 16)) # First 2 bytes are the mnemonic, last 6 bytes are the encoding. - return f"{{FDI_{self.mnemonic}, {enc[0]}, {enc[1]}, {enc[2]}}}" + return f"{{FDI_{mnem}, {enc[0]}, {enc[1]}, {enc[2]}}}" class EntryKind(Enum): NONE = 0 @@ -500,15 +500,24 @@ def superstring(strs): return merged def decode_table(entries, modes): - mnems = sorted({desc.mnemonic for _, _, desc in entries}) - decode_mnems_lines = [f"FD_MNEMONIC({m},{i})\n" for i, m in enumerate(mnems)] - trie = Trie(root_count=len(modes)) - descs, desc_map = [], {} + mnems, descs, desc_map = set(), [], {} for weak, opcode, desc in entries: ign66 = opcode.prefix in ("NP", "66", "F2", "F3") modrm = opcode.modreg or opcode.opcext - descenc = desc.encode(ign66, modrm) + mnem = { + "PUSH_SEG": "PUSH", "POP_SEG": "POP", + "MOV_CR2G": "MOV_CR", "MOV_G2CR": "MOV_CR", + "MOV_DR2G": "MOV_DR", "MOV_G2DR": "MOV_DR", + "MMX_MOVD_M2G": "MMX_MOVD", "MMX_MOVD_G2M": "MMX_MOVD", + "MMX_MOVQ_M2G": "MMX_MOVQ", "MMX_MOVQ_G2M": "MMX_MOVQ", + "SSE_MOVD_X2G": "SSE_MOVD", "SSE_MOVD_G2X": "SSE_MOVD", + "SSE_MOVQ_X2G": "SSE_MOVQ", "SSE_MOVQ_G2X": "SSE_MOVQ", + "VMOVD_X2G": "VMOVD", "VMOVD_G2X": "VMOVD", + "VMOVQ_X2G": "VMOVQ", "VMOVQ_G2X": "VMOVQ", + }.get(desc.mnemonic, desc.mnemonic) + mnems.add(mnem) + descenc = desc.encode(mnem, ign66, modrm) desc_idx = desc_map.get(descenc) if desc_idx is None: desc_idx = desc_map[descenc] = len(descs) @@ -520,6 +529,9 @@ def decode_table(entries, modes): trie.deduplicate() table_data, root_offsets = trie.compile() + mnems = sorted(mnems) + decode_mnems_lines = [f"FD_MNEMONIC({m},{i})\n" for i, m in enumerate(mnems)] + mnemonics_intel = [m.replace("SSE_", "").replace("MMX_", "") .replace("MOVABS", "MOV").replace("RESERVED_", "") .replace("JMPF", "JMP FAR").replace("CALLF", "CALL FAR")