encode: Remove descriptor table

All relevant information is now encoded directly in the numeric value of
the mnemonic, significantly shrinking the size of the encoder.
This commit is contained in:
Alexis Engelke
2023-01-13 14:23:07 +01:00
parent 9f0ddeb44a
commit 2f7e8dd0de
4 changed files with 174 additions and 184 deletions

View File

@@ -660,6 +660,8 @@ def encode_mnems(entries):
spec_opcode = spec_opcode._replace(rexw="1")
if vecsize == 256:
spec_opcode = spec_opcode._replace(vexl="1")
if spec_opcode.vexl == "IG":
spec_opcode = spec_opcode._replace(vexl="0")
# Construct mnemonic name
mnem_name = {"MOVABS": "MOV", "XCHG_NOP": "XCHG"}.get(desc.mnemonic, desc.mnemonic)
@@ -690,42 +692,52 @@ def encode_mnems(entries):
def encode_table(entries, args):
mnemonics = encode_mnems(entries)
mnemonics["NOP", 0, ""] = [(Opcode.parse("90"), InstrDesc.parse("NP - - - - NOP"))]
descs = ""
alt_index = 0
mnem_map = {}
alt_table = [0] # first entry is unused
for (mnem, opsize, ots), variants in mnemonics.items():
indices = [f"FE_{mnem}"] + [f"FE_MNEM_MAX+{alt_index+i}" for i in range(len(variants) - 1)]
alt_list = indices[1:] + ["0"]
alt_index += len(alt_list) - 1
for idx, alt, (opcode, desc) in zip(indices, alt_list, variants):
supports_high_regs = []
if variants[0][1].mnemonic in ("MOVSX", "MOVZX") or opsize == 8:
# Should be the same for all variants
desc = variants[0][1]
for i, (ot, op) in enumerate(zip(ots, desc.operands)):
if ot == "r" and op.kind == "GP" and op.abssize(opsize//8) == 1:
supports_high_regs.append(i)
alt_indices = [i + len(alt_table) for i in range(len(variants) - 1)] + [0]
enc_opcs = []
for alt, (opcode, desc) in zip(alt_indices, variants):
opc_i = opcode.opc
if opcode.opcext:
opc_i |= opcode.opcext << 8
if opcode.modreg and opcode.modreg[0] is not None:
opc_i |= opcode.modreg[0] << 8
opc_s = f"{opc_i}"
opc_s += ["","|OPC_0F","|OPC_0F38","|OPC_0F3A"][opcode.escape]
if opcode.prefix in ("66", "F2", "F3", "LOCK"):
opc_s += "|OPC_" + opcode.prefix
if opsize == 16 and opcode.prefix != "66":
opc_s += "|OPC_66"
opc_s += "|OPC_VSIB" if "VSIB" in desc.flags else ""
opc_s += "|OPC_VEX" if opcode.vex else ""
opc_s += "|OPC_VEXL" if opcode.vexl == "1" else ""
opc_s += "|OPC_REXW" if opcode.rexw == "1" else ""
opc_i |= opcode.escape * 0x10000
opc_i |= 0x80000 if opcode.prefix == "66" or opsize == 16 else 0
opc_i |= 0x100000 if opcode.prefix == "F2" else 0
opc_i |= 0x200000 if opcode.prefix == "F3" else 0
opc_i |= 0x400000 if opcode.rexw == "1" else 0
if opcode.prefix == "LOCK":
opc_i |= 0x800000
elif opcode.vex:
opc_i |= 0x1000000 + 0x800000 * int(opcode.vexl or 0)
opc_i |= 0x8000000 if "VSIB" in desc.flags else 0
if alt >= 0x100:
raise Exception("encode alternate bits exhausted")
opc_i |= sum(1 << i for i in supports_high_regs) << 45
opc_i |= desc.imm_size(opsize//8) << 47
opc_i |= ["INVALID",
"NP", "M", "M1", "MI", "MC", "MR", "RM", "RMA", "MRI", "RMI", "MRC",
"AM", "MA", "I", "IA", "O", "OI", "OA", "S", "A", "D", "FD", "TD",
"RVM", "RVMI", "RVMR", "RMV", "VM", "VMI", "MVR", "MRV",
].index(desc.encoding) << 51
opc_i |= alt << 56
enc_opcs.append(opc_i)
mnem_map[f"FE_{mnem}"] = enc_opcs[0]
alt_table += enc_opcs[1:]
desc = {
"enc": f"ENC_{desc.encoding}",
"immsz": desc.imm_size(opsize//8),
"tys": desc.encode_regtys(ots, opsize//8),
"opc": opc_s,
"alt": alt,
}
desc_str = ", ".join(f".{k} = {v}" for k, v in desc.items())
descs += f"[{idx}] = {{ {desc_str} }},\n"
mnem_list = sorted(f"FE_{mnem}" for mnem, _, _ in mnemonics.keys())
mnem_tab = "".join(f"FE_MNEMONIC({m},{i})\n" for i, m in enumerate(mnem_list))
return mnem_tab, descs
mnem_tab = "".join(f"#define {m} {v:#x}\n" for m, v in mnem_map.items())
alt_tab = "".join(f"[{i}] = {v:#x},\n" for i, v in enumerate(alt_table))
return mnem_tab, alt_tab
def encode2_table(entries, args):
mnemonics = encode_mnems(entries)