From e78a89b61042a370e87c06b594cb5d6815f77d0c Mon Sep 17 00:00:00 2001 From: Alexis Engelke Date: Thu, 6 Jan 2022 10:31:09 +0100 Subject: [PATCH] instrs: Annotate 67h and segment override usage Some instructions honor an address-size override or a segment override, even in the absence of a directly encoded memory operand. These annotations are not yet used, but may be used in future to optimize the size of encoded instructions. --- instrs.txt | 52 +++++++++++++++++++++++++------------------------- parseinstrs.py | 9 +++++++-- 2 files changed, 33 insertions(+), 28 deletions(-) diff --git a/instrs.txt b/instrs.txt index e5462a0..83bb338 100644 --- a/instrs.txt +++ b/instrs.txt @@ -86,10 +86,10 @@ 69 RMI Gv Ev Iz - IMUL EFL=m--uuuum 6a I Ibs - - - PUSH D64 6b RMI Gv Ev Ibs - IMUL EFL=m--uuuum -6c NP - - - - INS+w SZ8 ENC_REP EFL=-t------ -6d NP - - - - INS+w ENC_REP EFL=-t------ -6e NP - - - - OUTS+w SZ8 ENC_REP EFL=-t------ -6f NP - - - - OUTS+w ENC_REP EFL=-t------ +6c NP - - - - INS+ws SZ8 ENC_REP EFL=-t------ +6d NP - - - - INS+ws ENC_REP EFL=-t------ +6e NP - - - - OUTS+was SZ8 ENC_REP EFL=-t------ +6f NP - - - - OUTS+was ENC_REP EFL=-t------ 70 D Jbs - - - JO F64 EFL=t------- 71 D Jbs - - - JNO F64 EFL=t------- 72 D Jbs - - - JC F64 EFL=-------t @@ -172,22 +172,22 @@ 9d NP - - - - POPF+w D64 EFL=mmmmmmmm 9e NP - - - - SAHF EFL=---mmmmm 9f NP - - - - LAHF EFL=---ttttt -a0 FD Rb Ob - - MOV SZ8 -a1 FD Rv Ov - - MOV -a2 TD Ob Rb - - MOV SZ8 -a3 TD Ov Rv - - MOV -a4 NP - - - - MOVS+w SZ8 ENC_REP EFL=-t------ -a5 NP - - - - MOVS+w ENC_REP EFL=-t------ -a6 NP - - - - CMPS+w SZ8 ENC_REPCC EFL=mt-mmmmm -a7 NP - - - - CMPS+w ENC_REPCC EFL=mt-mmmmm +a0 FD Rb Ob - - MOV+as SZ8 +a1 FD Rv Ov - - MOV+as +a2 TD Ob Rb - - MOV+as SZ8 +a3 TD Ov Rv - - MOV+as +a4 NP - - - - MOVS+was SZ8 ENC_REP EFL=-t------ +a5 NP - - - - MOVS+was ENC_REP EFL=-t------ +a6 NP - - - - CMPS+was SZ8 ENC_REPCC EFL=mt-mmmmm +a7 NP - - - - CMPS+was ENC_REPCC EFL=mt-mmmmm a8 IA Rb Ib - - TEST SZ8 EFL=0--mmum0 a9 IA Rv Iz - - TEST EFL=0--mmum0 -aa NP - - - - STOS+w SZ8 ENC_REP EFL=-t------ -ab NP - - - - STOS+w ENC_REP EFL=-t------ -ac NP - - - - LODS+w SZ8 ENC_REP EFL=-t------ -ad NP - - - - LODS+w ENC_REP EFL=-t------ -ae NP - - - - SCAS+w SZ8 ENC_REPCC EFL=mt-mmmmm -af NP - - - - SCAS+w ENC_REPCC EFL=mt-mmmmm +aa NP - - - - STOS+wa SZ8 ENC_REP EFL=-t------ +ab NP - - - - STOS+wa ENC_REP EFL=-t------ +ac NP - - - - LODS+was SZ8 ENC_REP EFL=-t------ +ad NP - - - - LODS+was ENC_REP EFL=-t------ +ae NP - - - - SCAS+wa SZ8 ENC_REPCC EFL=mt-mmmmm +af NP - - - - SCAS+wa ENC_REPCC EFL=mt-mmmmm b0+ OI Rb Ib - - MOVABS SZ8 b8+ OI Rv Iv - - MOVABS c0/0 MI Eb Ib - - ROL SZ8 EFL=m------m @@ -261,12 +261,12 @@ d3/7 MC Ev Rb - - SAR EFL=m--mmumm d4 I Ib - - - AAM I64 SZ8 EFL=u--mmumu d5 I Ib - - - AAD I64 SZ8 EFL=u--mmumu d6 NP - - - - SALC I64 UNDOC -d7 NP - - - - XLATB +d7 NP - - - - XLATB+as #d8-df FPU Escape -e0 D Jbs - - - LOOPNZ F64 EFL=----t--- -e1 D Jbs - - - LOOPZ F64 EFL=----t--- -e2 D Jbs - - - LOOP F64 -e3 D Jbs - - - JCXZ F64 +e0 D Jbs - - - LOOPNZ+a F64 EFL=----t--- +e1 D Jbs - - - LOOPZ+a F64 EFL=----t--- +e2 D Jbs - - - LOOP+a F64 +e3 D Jbs - - - JCXZ+a F64 e4 IA Rb Ib - - IN SZ8 e5 IA Rz Ib - - IN e6 IA Rb Ib - - OUT SZ8 @@ -561,7 +561,7 @@ NP.0ff3 RM Pq Qq - - MMX_PSLLQ F=MMX NP.0ff4 RM Pq Qq - - MMX_PMULUDQ F=MMX NP.0ff5 RM Pq Qq - - MMX_PMADDWD F=MMX NP.0ff6 RM Pq Qq - - MMX_PSADBW F=SSE -NP.0ff7/r RM Pq Nq - - MMX_MASKMOVQ F=SSE +NP.0ff7/r RM Pq Nq - - MMX_MASKMOVQ+as F=SSE NP.0ff8 RM Pq Qq - - MMX_PSUBB F=MMX NP.0ff9 RM Pq Qq - - MMX_PSUBW F=MMX NP.0ffa RM Pq Qq - - MMX_PSUBD F=MMX @@ -786,7 +786,7 @@ F2.0ff0/m RM Vx Mx - - SSE_LDDQU F=SSE3 66.0ff4 RM Vx Wx - - SSE_PMULUDQ F=SSE2 66.0ff5 RM Vx Wx - - SSE_PMADDWD F=SSE2 66.0ff6 RM Vx Wx - - SSE_PSADBW F=SSE2 -66.0ff7/r RM Vx Wx - - SSE_MASKMOVDQU F=SSE2 +66.0ff7/r RM Vx Wx - - SSE_MASKMOVDQU+as F=SSE2 66.0ff8 RM Vx Wx - - SSE_PSUBB F=SSE2 66.0ff9 RM Vx Wx - - SSE_PSUBW F=SSE2 66.0ffa RM Vx Wx - - SSE_PSUBD F=SSE2 @@ -1084,7 +1084,7 @@ VEX.66.0ff3 RVM Vx Hx Wx - VPSLLQ F=AVX VEX.66.0ff4 RVM Vx Hx Wx - VPMULUDQ F=AVX VEX.66.0ff5 RVM Vx Hx Wx - VPMADDWD F=AVX VEX.66.0ff6 RVM Vx Hx Wx - VPSADBW F=AVX -VEX.NP.L0.0ff7 RM Vx Wx - - VMASKMOVDQU F=AVX +VEX.NP.L0.0ff7 RM Vx Wx - - VMASKMOVDQU+as F=AVX VEX.66.0ff8 RVM Vx Hx Wx - VPSUBB F=AVX VEX.66.0ff9 RVM Vx Hx Wx - VPSUBW F=AVX VEX.66.0ffa RVM Vx Hx Wx - VPSUBD F=AVX diff --git a/parseinstrs.py b/parseinstrs.py index 6402cc7..43c264f 100644 --- a/parseinstrs.py +++ b/parseinstrs.py @@ -176,9 +176,14 @@ class InstrDesc(NamedTuple): @classmethod def parse(cls, desc): desc = desc.split() - if desc[5][-2:] == "+w": - desc[5] = desc[5][:-2] + mnem_comp = desc[5].split("+", 1) + desc[5] = mnem_comp[0] + if len(mnem_comp) > 1 and "w" in mnem_comp[1]: desc.append("INSTR_WIDTH") + if len(mnem_comp) > 1 and "a" in mnem_comp[1]: + desc.append("U67") + if len(mnem_comp) > 1 and "s" in mnem_comp[1]: + desc.append("USEG") operands = tuple(OpKind.parse(op) for op in desc[1:5] if op != "-") return cls(desc[5], desc[0], operands, frozenset(desc[6:]))