instrs: Disambiguate instr type by mnemonic

This commit is contained in:
Alexis Engelke
2022-02-20 15:37:23 +01:00
parent e78a89b610
commit 87fe6314b8
3 changed files with 60 additions and 48 deletions

View File

@@ -69,18 +69,18 @@ main(int argc, char** argv)
TEST("\x54", FE_PUSHr, FE_SP);
TEST("\x41\x57", FE_PUSHr, FE_R15);
TEST("\x41\x50", FE_PUSHr, FE_R8);
TEST("", FE_PUSHr, FE_ES);
TEST("", FE_PUSH16r, FE_ES);
TEST("", FE_PUSHr, FE_CS);
TEST("", FE_PUSH16r, FE_CS);
TEST("", FE_PUSHr, FE_SS);
TEST("", FE_PUSH16r, FE_SS);
TEST("", FE_PUSHr, FE_DS);
TEST("", FE_PUSH16r, FE_DS);
TEST("\x0f\xa0", FE_PUSHr, FE_FS);
TEST("\x66\x0f\xa0", FE_PUSH16r, FE_FS);
TEST("\x0f\xa8", FE_PUSHr, FE_GS);
TEST("\x66\x0f\xa8", FE_PUSH16r, FE_GS);
TEST("", FE_PUSH_SEGr, FE_ES);
TEST("", FE_PUSH_SEG16r, FE_ES);
TEST("", FE_PUSH_SEGr, FE_CS);
TEST("", FE_PUSH_SEG16r, FE_CS);
TEST("", FE_PUSH_SEGr, FE_SS);
TEST("", FE_PUSH_SEG16r, FE_SS);
TEST("", FE_PUSH_SEGr, FE_DS);
TEST("", FE_PUSH_SEG16r, FE_DS);
TEST("\x0f\xa0", FE_PUSH_SEGr, FE_FS);
TEST("\x66\x0f\xa0", FE_PUSH_SEG16r, FE_FS);
TEST("\x0f\xa8", FE_PUSH_SEGr, FE_GS);
TEST("\x66\x0f\xa8", FE_PUSH_SEG16r, FE_GS);
TEST("\xff\x30", FE_PUSHm, FE_MEM(FE_AX, 0, 0, 0));
TEST("\xff\x31", FE_PUSHm, FE_MEM(FE_CX, 0, 0, 0));
TEST("\x9c", FE_PUSHF);

View File

@@ -11,15 +11,15 @@
03 RM Gv Ev - - ADD EFL=m--mmmmm
04 IA Rb Ib - - ADD SZ8 EFL=m--mmmmm
05 IA Rv Iz - - ADD EFL=m--mmmmm
06 S Sv - - - PUSH I64
07 S Sv - - - POP I64
06 S Sv - - - PUSH_SEG I64
07 S Sv - - - POP_SEG I64
08 MR Eb Gb - - OR LOCK SZ8 EFL=0--mmum0
09 MR Ev Gv - - OR LOCK EFL=0--mmum0
0a RM Gb Eb - - OR SZ8 EFL=0--mmum0
0b RM Gv Ev - - OR EFL=0--mmum0
0c IA Rb Ib - - OR SZ8 EFL=0--mmum0
0d IA Rv Iz - - OR EFL=0--mmum0
0e S Sv - - - PUSH I64
0e S Sv - - - PUSH_SEG I64
#0f escape opcode
10 MR Eb Gb - - ADC LOCK SZ8 EFL=m--mmmmM
11 MR Ev Gv - - ADC LOCK EFL=m--mmmmM
@@ -27,16 +27,16 @@
13 RM Gv Ev - - ADC EFL=m--mmmmM
14 IA Rb Ib - - ADC SZ8 EFL=m--mmmmM
15 IA Rv Iz - - ADC EFL=m--mmmmM
16 S Sv - - - PUSH I64
17 S Sv - - - POP I64
16 S Sv - - - PUSH_SEG I64
17 S Sv - - - POP_SEG I64
18 MR Eb Gb - - SBB LOCK SZ8 EFL=m--mmmmM
19 MR Ev Gv - - SBB LOCK EFL=m--mmmmM
1a RM Gb Eb - - SBB SZ8 EFL=m--mmmmM
1b RM Gv Ev - - SBB EFL=m--mmmmM
1c IA Rb Ib - - SBB SZ8 EFL=m--mmmmM
1d IA Rv Iz - - SBB EFL=m--mmmmM
1e S Sv - - - PUSH I64
1f S Sv - - - POP I64
1e S Sv - - - PUSH_SEG I64
1f S Sv - - - POP_SEG I64
20 MR Eb Gb - - AND LOCK SZ8 EFL=0--mmum0
21 MR Ev Gv - - AND LOCK EFL=0--mmum0
22 RM Gb Eb - - AND SZ8 EFL=0--mmum0
@@ -377,10 +377,10 @@ NP.0f01d7 NP - - - - ENCLU F=SGX
*0f1e MR Ev Gv - - RESERVED_NOP
*0f1f MR Ev Gv - - RESERVED_NOP
0f1f/0 M Ev - - - NOP
0f20 MR Ry Cy - - MOV_CR I66 D64 CPL0 EFL=u--uuuuu
0f21 MR Ry Dy - - MOV_DR I66 D64 CPL0 EFL=u--uuuuu
0f22 RM Cy Ry - - MOV_CR I66 D64 CPL0 EFL=u--uuuuu
0f23 RM Dy Ry - - MOV_DR I66 D64 CPL0 EFL=u--uuuuu
0f20 MR Ry Cy - - MOV_CR2G I66 D64 CPL0 EFL=u--uuuuu
0f21 MR Ry Dy - - MOV_DR2G I66 D64 CPL0 EFL=u--uuuuu
0f22 RM Cy Ry - - MOV_G2CR I66 D64 CPL0 EFL=u--uuuuu
0f23 RM Dy Ry - - MOV_G2DR I66 D64 CPL0 EFL=u--uuuuu
0f30 NP - - - - WRMSR F=586 CPL0
0f31 NP - - - - RDTSC F=586
0f32 NP - - - - RDMSR F=586 CPL0
@@ -437,14 +437,14 @@ NP.0f37 NP - - - - GETSEC F=SMX EFL=MMMMMM
0f9d M Eb - - - SETGE SZ8 EFL=t--t----
0f9e M Eb - - - SETLE SZ8 EFL=t--tt---
0f9f M Eb - - - SETG SZ8 EFL=t--tt---
0fa0 S Sv - - - PUSH D64
0fa1 S Sv - - - POP D64
0fa0 S Sv - - - PUSH_SEG D64
0fa1 S Sv - - - POP_SEG D64
0fa2 NP - - - - CPUID F=586
0fa3 MR Ev Gv - - BT EFL=u--u-uum
0fa4 MRI Ev Gv Ib - SHLD EFL=u--mmumm
0fa5 MRC Ev Gv Rb - SHLD EFL=u--mmumm
0fa8 S Sv - - - PUSH D64
0fa9 S Sv - - - POP D64
0fa8 S Sv - - - PUSH_SEG D64
0fa9 S Sv - - - POP_SEG D64
0faa NP - - - - RSM F=586
0fab MR Ev Gv - - BTS LOCK EFL=u--u-uum
0fac MRI Ev Gv Ib - SHRD EFL=u--mmumm
@@ -502,8 +502,8 @@ NP.0f68 RM Pq Qd - - MMX_PUNPCKHBW F=MMX
NP.0f69 RM Pq Qd - - MMX_PUNPCKHWD F=MMX
NP.0f6a RM Pq Qd - - MMX_PUNPCKHDQ F=MMX
NP.0f6b RM Pq Qd - - MMX_PACKSSDW F=MMX
NP.W0.0f6e RM Pq Ey - - MMX_MOVD F=MMX ENC_NOSZ
NP.W1.0f6e RM Pq Ey - - MMX_MOVQ F=MMX ENC_NOSZ
NP.W0.0f6e RM Pq Ey - - MMX_MOVD_G2M F=MMX ENC_NOSZ
NP.W1.0f6e RM Pq Ey - - MMX_MOVQ_G2M F=MMX ENC_NOSZ
NP.0f6f RM Pq Qq - - MMX_MOVQ F=MMX
NP.0f70 RMI Qq Pq Ib - MMX_PSHUFW F=SSE
NP.0f71/2r MI Nq Ib - - MMX_PSRLW F=MMX
@@ -518,8 +518,8 @@ NP.0f74 RM Pq Qq - - MMX_PCMPEQB F=MMX
NP.0f75 RM Pq Qq - - MMX_PCMPEQW F=MMX
NP.0f76 RM Pq Qq - - MMX_PCMPEQD F=MMX
NP.0f77 NP - - - - MMX_EMMS F=MMX
NP.W0.0f7e MR Ey Py - - MMX_MOVD F=MMX ENC_NOSZ
NP.W1.0f7e MR Ey Py - - MMX_MOVQ F=MMX ENC_NOSZ
NP.W0.0f7e MR Ey Py - - MMX_MOVD_M2G F=MMX ENC_NOSZ
NP.W1.0f7e MR Ey Py - - MMX_MOVQ_M2G F=MMX ENC_NOSZ
NP.0f7f MR Qq Pq - - MMX_MOVQ F=MMX
# TODO: Ey operand is actually Ry/Mw
NP.0fc4 RMI Pq Ey Ib - MMX_PINSRW F=SSE ENC_NOSZ
@@ -694,8 +694,8 @@ F2.0f5f RM Vsd Wsd - - SSE_MAXSD F=SSE2
66.0f6b RM Vx Wx - - SSE_PACKSSDW F=SSE2
66.0f6c RM Vx Wx - - SSE_PUNPCKLQDQ F=SSE2
66.0f6d RM Vx Wx - - SSE_PUNPCKHQDQ F=SSE2
66.W0.0f6e RM Vx Ey - - SSE_MOVD F=SSE2 ENC_NOSZ
66.W1.0f6e RM Vx Ey - - SSE_MOVQ F=SSE2 ENC_NOSZ
66.W0.0f6e RM Vx Ey - - SSE_MOVD_G2X F=SSE2 ENC_NOSZ
66.W1.0f6e RM Vx Ey - - SSE_MOVQ_G2X F=SSE2 ENC_NOSZ
66.0f6f RM Vx Wx - - SSE_MOVDQA F=SSE2
F3.0f6f RM Vx Wx - - SSE_MOVDQU F=SSE2
66.0f70 RMI Vx Wx Ib - SSE_PSHUFD F=SSE2
@@ -723,8 +723,8 @@ F2.0f79/r RM Vx Wx - - SSE_INSERTQ F=SSE4A ONLYAMD
F2.0f7c RM Vx Wx - - SSE_HADDPS F=SSE3
66.0f7d RM Vx Wx - - SSE_HSUBPD F=SSE3
F2.0f7d RM Vx Wx - - SSE_HSUBPS F=SSE3
66.W0.0f7e MR Ey Vy - - SSE_MOVD F=SSE2 ENC_NOSZ
66.W1.0f7e MR Ey Vy - - SSE_MOVQ F=SSE2 ENC_NOSZ
66.W0.0f7e MR Ey Vy - - SSE_MOVD_X2G F=SSE2 ENC_NOSZ
66.W1.0f7e MR Ey Vy - - SSE_MOVQ_X2G F=SSE2 ENC_NOSZ
F3.0f7e RM Vx Wq - - SSE_MOVQ F=SSE2
66.0f7f MR Wx Vx - - SSE_MOVDQA F=SSE2
F3.0f7f MR Wx Vx - - SSE_MOVDQU F=SSE2
@@ -997,9 +997,9 @@ VEX.66.0f6a RVM Vx Hx Wx - VPUNPCKHDQ F=AVX
VEX.66.0f6b RVM Vx Hx Wx - VPACKSSDW F=AVX
VEX.66.0f6c RVM Vx Hx Wx - VPUNPCKLQDQ F=AVX
VEX.66.0f6d RVM Vx Hx Wx - VPUNPCKHQDQ F=AVX
VEX.66.W0.L0.0f6e RM Vy Ey - - VMOVD F=AVX ENC_NOSZ
VEX.66.W1.L0.0f6e RM Vy Ey - - VMOVD I64 F=AVX ENC_NOSZ
VEX.66.W1.L0.0f6e RM Vy Ey - - VMOVQ O64 F=AVX ENC_NOSZ
VEX.66.W0.L0.0f6e RM Vy Ey - - VMOVD_G2X F=AVX ENC_NOSZ
VEX.66.W1.L0.0f6e RM Vy Ey - - VMOVD_G2X I64 F=AVX ENC_NOSZ
VEX.66.W1.L0.0f6e RM Vy Ey - - VMOVQ_G2X O64 F=AVX ENC_NOSZ
VEX.66.0f6f RM Vx Wx - - VMOVDQA F=AVX
VEX.F3.0f6f RM Vx Wx - - VMOVDQU F=AVX
VEX.66.0f70 RMI Vx Wx Ib - VPSHUFD F=AVX
@@ -1024,9 +1024,9 @@ VEX.66.0f7c RVM Vx Hx Wx - VHADDPD F=AVX
VEX.F2.0f7c RVM Vx Hx Wx - VHADDPS F=AVX
VEX.66.0f7d RVM Vx Hx Wx - VHSUBPD F=AVX
VEX.F2.0f7d RVM Vx Hx Wx - VHSUBPS F=AVX
VEX.66.W0.L0.0f7e MR Ey Vy - - VMOVD F=AVX ENC_NOSZ
VEX.66.W1.L0.0f7e MR Ey Vy - - VMOVQ I64 F=AVX ENC_NOSZ
VEX.66.W1.L0.0f7e MR Ey Vy - - VMOVQ O64 F=AVX ENC_NOSZ
VEX.66.W0.L0.0f7e MR Ey Vy - - VMOVD_X2G F=AVX ENC_NOSZ
VEX.66.W1.L0.0f7e MR Ey Vy - - VMOVQ_X2G I64 F=AVX ENC_NOSZ
VEX.66.W1.L0.0f7e MR Ey Vy - - VMOVQ_X2G O64 F=AVX ENC_NOSZ
VEX.F3.L0.0f7e RM Vq Wq - - VMOVQ F=AVX ENC_NOSZ
VEX.66.0f7f MR Wx Vx - - VMOVDQA F=AVX
VEX.F3.0f7f MR Wx Vx - - VMOVDQU F=AVX

View File

@@ -224,7 +224,7 @@ class InstrDesc(NamedTuple):
tys.append(self.OPKIND_REGTYS_ENC[op.kind])
return sum(ty << (4*i) for i, ty in enumerate(tys))
def encode(self, ign66, modrm):
def encode(self, mnem, ign66, modrm):
flags = ENCODINGS[self.encoding]
extraflags = {}
@@ -276,7 +276,7 @@ class InstrDesc(NamedTuple):
enc = flags._replace(**extraflags)._encode()
enc = tuple((enc >> i) & 0xffff for i in range(0, 48, 16))
# First 2 bytes are the mnemonic, last 6 bytes are the encoding.
return f"{{FDI_{self.mnemonic}, {enc[0]}, {enc[1]}, {enc[2]}}}"
return f"{{FDI_{mnem}, {enc[0]}, {enc[1]}, {enc[2]}}}"
class EntryKind(Enum):
NONE = 0
@@ -500,15 +500,24 @@ def superstring(strs):
return merged
def decode_table(entries, modes):
mnems = sorted({desc.mnemonic for _, _, desc in entries})
decode_mnems_lines = [f"FD_MNEMONIC({m},{i})\n" for i, m in enumerate(mnems)]
trie = Trie(root_count=len(modes))
descs, desc_map = [], {}
mnems, descs, desc_map = set(), [], {}
for weak, opcode, desc in entries:
ign66 = opcode.prefix in ("NP", "66", "F2", "F3")
modrm = opcode.modreg or opcode.opcext
descenc = desc.encode(ign66, modrm)
mnem = {
"PUSH_SEG": "PUSH", "POP_SEG": "POP",
"MOV_CR2G": "MOV_CR", "MOV_G2CR": "MOV_CR",
"MOV_DR2G": "MOV_DR", "MOV_G2DR": "MOV_DR",
"MMX_MOVD_M2G": "MMX_MOVD", "MMX_MOVD_G2M": "MMX_MOVD",
"MMX_MOVQ_M2G": "MMX_MOVQ", "MMX_MOVQ_G2M": "MMX_MOVQ",
"SSE_MOVD_X2G": "SSE_MOVD", "SSE_MOVD_G2X": "SSE_MOVD",
"SSE_MOVQ_X2G": "SSE_MOVQ", "SSE_MOVQ_G2X": "SSE_MOVQ",
"VMOVD_X2G": "VMOVD", "VMOVD_G2X": "VMOVD",
"VMOVQ_X2G": "VMOVQ", "VMOVQ_G2X": "VMOVQ",
}.get(desc.mnemonic, desc.mnemonic)
mnems.add(mnem)
descenc = desc.encode(mnem, ign66, modrm)
desc_idx = desc_map.get(descenc)
if desc_idx is None:
desc_idx = desc_map[descenc] = len(descs)
@@ -520,6 +529,9 @@ def decode_table(entries, modes):
trie.deduplicate()
table_data, root_offsets = trie.compile()
mnems = sorted(mnems)
decode_mnems_lines = [f"FD_MNEMONIC({m},{i})\n" for i, m in enumerate(mnems)]
mnemonics_intel = [m.replace("SSE_", "").replace("MMX_", "")
.replace("MOVABS", "MOV").replace("RESERVED_", "")
.replace("JMPF", "JMP FAR").replace("CALLF", "CALL FAR")