The encoding tables are keyed by the controlling type variable only. We need to distinguish different encodings for instructions with multiple type variables. Add a TypePredicate instruction predicate which can check the type of an instruction value operand. Combine type checks into the instruction predicate for instructions with more than one type variable. Add Intel encodings for fcvt_from_sint.f32.i64 which can now be distinguished from fcvt_from_sint.f32.i32.
269 lines
8.8 KiB
Python
269 lines
8.8 KiB
Python
"""
|
|
Intel Encodings.
|
|
"""
|
|
from __future__ import absolute_import
|
|
from cdsl.predicates import IsUnsignedInt
|
|
from base import instructions as base
|
|
from base.formats import UnaryImm
|
|
from .defs import I32, I64
|
|
from . import recipes as r
|
|
from . import settings as cfg
|
|
from . import instructions as x86
|
|
from base.legalize import narrow, expand
|
|
|
|
I32.legalize_type(
|
|
default=narrow,
|
|
i32=expand,
|
|
f32=expand,
|
|
f64=expand)
|
|
|
|
I64.legalize_type(
|
|
default=narrow,
|
|
i32=expand,
|
|
i64=expand,
|
|
f32=expand,
|
|
f64=expand)
|
|
|
|
for inst, opc in [
|
|
(base.iadd, 0x01),
|
|
(base.isub, 0x29),
|
|
(base.band, 0x21),
|
|
(base.bor, 0x09),
|
|
(base.bxor, 0x31)]:
|
|
I32.enc(inst.i32, *r.rr(opc))
|
|
|
|
I64.enc(inst.i64, *r.rr.rex(opc, w=1))
|
|
I64.enc(inst.i32, *r.rr.rex(opc))
|
|
# REX-less encoding must come after REX encoding so we don't use it by
|
|
# default. Otherwise reg-alloc would never use r8 and up.
|
|
I64.enc(inst.i32, *r.rr(opc))
|
|
|
|
I32.enc(base.imul.i32, *r.rrx(0x0f, 0xaf))
|
|
I64.enc(base.imul.i64, *r.rrx.rex(0x0f, 0xaf, w=1))
|
|
I64.enc(base.imul.i32, *r.rrx.rex(0x0f, 0xaf))
|
|
I64.enc(base.imul.i32, *r.rrx(0x0f, 0xaf))
|
|
|
|
for inst, rrr in [
|
|
(x86.sdivmodx, 7),
|
|
(x86.udivmodx, 6)]:
|
|
I32.enc(inst.i32, *r.div(0xf7, rrr=rrr))
|
|
I64.enc(inst.i64, *r.div.rex(0xf7, rrr=rrr, w=1))
|
|
I64.enc(inst.i32, *r.div.rex(0xf7, rrr=rrr))
|
|
I64.enc(inst.i32, *r.div(0xf7, rrr=rrr))
|
|
|
|
I32.enc(base.copy.i32, *r.umr(0x89))
|
|
I64.enc(base.copy.i64, *r.umr.rex(0x89, w=1))
|
|
I64.enc(base.copy.i32, *r.umr.rex(0x89))
|
|
I64.enc(base.copy.i32, *r.umr(0x89))
|
|
|
|
I32.enc(base.regmove.i32, *r.rmov(0x89))
|
|
I64.enc(base.regmove.i64, *r.rmov.rex(0x89, w=1))
|
|
I64.enc(base.regmove.i32, *r.rmov.rex(0x89))
|
|
I64.enc(base.regmove.i32, *r.rmov(0x89))
|
|
|
|
# Immediate instructions with sign-extended 8-bit and 32-bit immediate.
|
|
for inst, rrr in [
|
|
(base.iadd_imm, 0),
|
|
(base.band_imm, 4),
|
|
(base.bor_imm, 1),
|
|
(base.bxor_imm, 6)]:
|
|
I32.enc(inst.i32, *r.rib(0x83, rrr=rrr))
|
|
I32.enc(inst.i32, *r.rid(0x81, rrr=rrr))
|
|
|
|
I64.enc(inst.i64, *r.rib.rex(0x83, rrr=rrr, w=1))
|
|
I64.enc(inst.i64, *r.rid.rex(0x81, rrr=rrr, w=1))
|
|
I64.enc(inst.i32, *r.rib.rex(0x83, rrr=rrr))
|
|
I64.enc(inst.i32, *r.rid.rex(0x81, rrr=rrr))
|
|
I64.enc(inst.i32, *r.rib(0x83, rrr=rrr))
|
|
I64.enc(inst.i32, *r.rid(0x81, rrr=rrr))
|
|
|
|
# TODO: band_imm.i64 with an unsigned 32-bit immediate can be encoded as
|
|
# band_imm.i32. Can even use the single-byte immediate for 0xffff_ffXX masks.
|
|
|
|
# Immediate constants.
|
|
I32.enc(base.iconst.i32, *r.puid(0xb8))
|
|
|
|
I64.enc(base.iconst.i32, *r.puid.rex(0xb8))
|
|
I64.enc(base.iconst.i32, *r.puid(0xb8))
|
|
# The 32-bit immediate movl also zero-extends to 64 bits.
|
|
I64.enc(base.iconst.i64, *r.puid.rex(0xb8),
|
|
instp=IsUnsignedInt(UnaryImm.imm, 32))
|
|
I64.enc(base.iconst.i64, *r.puid(0xb8),
|
|
instp=IsUnsignedInt(UnaryImm.imm, 32))
|
|
# Sign-extended 32-bit immediate.
|
|
I64.enc(base.iconst.i64, *r.uid.rex(0xc7, rrr=0, w=1))
|
|
# Finally, the 0xb8 opcode takes an 8-byte immediate with a REX.W prefix.
|
|
I64.enc(base.iconst.i64, *r.puiq.rex(0xb8, w=1))
|
|
|
|
# Shifts and rotates.
|
|
# Note that the dynamic shift amount is only masked by 5 or 6 bits; the 8-bit
|
|
# and 16-bit shifts would need explicit masking.
|
|
for inst, rrr in [
|
|
(base.rotl, 0),
|
|
(base.rotr, 1),
|
|
(base.ishl, 4),
|
|
(base.ushr, 5),
|
|
(base.sshr, 7)]:
|
|
I32.enc(inst.i32.i32, *r.rc(0xd3, rrr=rrr))
|
|
I64.enc(inst.i64.i64, *r.rc.rex(0xd3, rrr=rrr, w=1))
|
|
I64.enc(inst.i32.i32, *r.rc.rex(0xd3, rrr=rrr))
|
|
I64.enc(inst.i32.i32, *r.rc(0xd3, rrr=rrr))
|
|
|
|
# Population count.
|
|
I32.enc(base.popcnt.i32, *r.urm(0xf3, 0x0f, 0xb8), isap=cfg.use_popcnt)
|
|
I64.enc(base.popcnt.i64, *r.urm.rex(0xf3, 0x0f, 0xb8, w=1),
|
|
isap=cfg.use_popcnt)
|
|
I64.enc(base.popcnt.i32, *r.urm.rex(0xf3, 0x0f, 0xb8), isap=cfg.use_popcnt)
|
|
I64.enc(base.popcnt.i32, *r.urm(0xf3, 0x0f, 0xb8), isap=cfg.use_popcnt)
|
|
|
|
# Count leading zero bits.
|
|
I32.enc(base.clz.i32, *r.urm(0xf3, 0x0f, 0xbd), isap=cfg.use_lzcnt)
|
|
I64.enc(base.clz.i64, *r.urm.rex(0xf3, 0x0f, 0xbd, w=1),
|
|
isap=cfg.use_lzcnt)
|
|
I64.enc(base.clz.i32, *r.urm.rex(0xf3, 0x0f, 0xbd), isap=cfg.use_lzcnt)
|
|
I64.enc(base.clz.i32, *r.urm(0xf3, 0x0f, 0xbd), isap=cfg.use_lzcnt)
|
|
|
|
# Count trailing zero bits.
|
|
I32.enc(base.ctz.i32, *r.urm(0xf3, 0x0f, 0xbc), isap=cfg.use_bmi1)
|
|
I64.enc(base.ctz.i64, *r.urm.rex(0xf3, 0x0f, 0xbc, w=1),
|
|
isap=cfg.use_bmi1)
|
|
I64.enc(base.ctz.i32, *r.urm.rex(0xf3, 0x0f, 0xbc), isap=cfg.use_bmi1)
|
|
I64.enc(base.ctz.i32, *r.urm(0xf3, 0x0f, 0xbc), isap=cfg.use_bmi1)
|
|
|
|
# Loads and stores.
|
|
I32.enc(base.store.i32.i32, *r.st(0x89))
|
|
I32.enc(base.store.i32.i32, *r.stDisp8(0x89))
|
|
I32.enc(base.store.i32.i32, *r.stDisp32(0x89))
|
|
|
|
I32.enc(base.istore16.i32.i32, *r.st(0x66, 0x89))
|
|
I32.enc(base.istore16.i32.i32, *r.stDisp8(0x66, 0x89))
|
|
I32.enc(base.istore16.i32.i32, *r.stDisp32(0x66, 0x89))
|
|
|
|
I32.enc(base.istore8.i32.i32, *r.st_abcd(0x88))
|
|
I32.enc(base.istore8.i32.i32, *r.stDisp8_abcd(0x88))
|
|
I32.enc(base.istore8.i32.i32, *r.stDisp32_abcd(0x88))
|
|
|
|
I32.enc(base.load.i32.i32, *r.ld(0x8b))
|
|
I32.enc(base.load.i32.i32, *r.ldDisp8(0x8b))
|
|
I32.enc(base.load.i32.i32, *r.ldDisp32(0x8b))
|
|
|
|
I32.enc(base.uload16.i32.i32, *r.ld(0x0f, 0xb7))
|
|
I32.enc(base.uload16.i32.i32, *r.ldDisp8(0x0f, 0xb7))
|
|
I32.enc(base.uload16.i32.i32, *r.ldDisp32(0x0f, 0xb7))
|
|
|
|
I32.enc(base.sload16.i32.i32, *r.ld(0x0f, 0xbf))
|
|
I32.enc(base.sload16.i32.i32, *r.ldDisp8(0x0f, 0xbf))
|
|
I32.enc(base.sload16.i32.i32, *r.ldDisp32(0x0f, 0xbf))
|
|
|
|
I32.enc(base.uload8.i32.i32, *r.ld(0x0f, 0xb6))
|
|
I32.enc(base.uload8.i32.i32, *r.ldDisp8(0x0f, 0xb6))
|
|
I32.enc(base.uload8.i32.i32, *r.ldDisp32(0x0f, 0xb6))
|
|
|
|
I32.enc(base.sload8.i32.i32, *r.ld(0x0f, 0xbe))
|
|
I32.enc(base.sload8.i32.i32, *r.ldDisp8(0x0f, 0xbe))
|
|
I32.enc(base.sload8.i32.i32, *r.ldDisp32(0x0f, 0xbe))
|
|
|
|
#
|
|
# Call/return
|
|
#
|
|
I32.enc(base.call, *r.call_id(0xe8))
|
|
I32.enc(base.call_indirect.i32, *r.call_r(0xff, rrr=2))
|
|
I32.enc(base.x_return, *r.ret(0xc3))
|
|
I64.enc(base.x_return, *r.ret(0xc3))
|
|
|
|
#
|
|
# Branches
|
|
#
|
|
I32.enc(base.jump, *r.jmpb(0xeb))
|
|
I32.enc(base.jump, *r.jmpd(0xe9))
|
|
I64.enc(base.jump, *r.jmpb(0xeb))
|
|
I64.enc(base.jump, *r.jmpd(0xe9))
|
|
|
|
I32.enc(base.brz.i32, *r.tjccb(0x74))
|
|
I64.enc(base.brz.i64, *r.tjccb.rex(0x74, w=1))
|
|
I64.enc(base.brz.i32, *r.tjccb.rex(0x74))
|
|
I64.enc(base.brz.i32, *r.tjccb(0x74))
|
|
|
|
I32.enc(base.brnz.i32, *r.tjccb(0x75))
|
|
I64.enc(base.brnz.i64, *r.tjccb.rex(0x75, w=1))
|
|
I64.enc(base.brnz.i32, *r.tjccb.rex(0x75))
|
|
I64.enc(base.brnz.i32, *r.tjccb(0x75))
|
|
|
|
#
|
|
# Trap as ud2
|
|
#
|
|
I32.enc(base.trap, *r.noop(0x0f, 0x0b))
|
|
I64.enc(base.trap, *r.noop(0x0f, 0x0b))
|
|
|
|
#
|
|
# Comparisons
|
|
#
|
|
I32.enc(base.icmp.i32, *r.icscc(0x39))
|
|
I64.enc(base.icmp.i64, *r.icscc.rex(0x39, w=1))
|
|
I64.enc(base.icmp.i32, *r.icscc.rex(0x39))
|
|
I64.enc(base.icmp.i32, *r.icscc(0x39))
|
|
|
|
#
|
|
# Convert bool to int.
|
|
#
|
|
# This assumes that b1 is represented as an 8-bit low register with the value 0
|
|
# or 1.
|
|
I32.enc(base.bint.i32.b1, *r.urm_abcd(0x0f, 0xb6))
|
|
I64.enc(base.bint.i64.b1, *r.urm.rex(0x0f, 0xb6, w=1))
|
|
I64.enc(base.bint.i64.b1, *r.urm_abcd(0x0f, 0xb6)) # zext to i64 implicit.
|
|
I64.enc(base.bint.i32.b1, *r.urm.rex(0x0f, 0xb6))
|
|
I64.enc(base.bint.i32.b1, *r.urm_abcd(0x0f, 0xb6))
|
|
|
|
# Numerical conversions.
|
|
|
|
# Converting i64 to i32 is a no-op in 64-bit mode.
|
|
I64.enc(base.ireduce.i32.i64, r.null, 0)
|
|
I64.enc(base.sextend.i64.i32, *r.urm.rex(0x63, w=1))
|
|
# A 32-bit register copy clears the high 32 bits.
|
|
I64.enc(base.uextend.i64.i32, *r.umr.rex(0x89))
|
|
I64.enc(base.uextend.i64.i32, *r.umr(0x89))
|
|
|
|
#
|
|
# Floating point
|
|
#
|
|
|
|
# cvtsi2ss
|
|
I32.enc(base.fcvt_from_sint.f32.i32, *r.furm(0xf3, 0x0f, 0x2A))
|
|
I64.enc(base.fcvt_from_sint.f32.i64, *r.furm.rex(0xf3, 0x0f, 0x2A, w=1))
|
|
I64.enc(base.fcvt_from_sint.f32.i32, *r.furm.rex(0xf3, 0x0f, 0x2A))
|
|
I64.enc(base.fcvt_from_sint.f32.i32, *r.furm(0xf3, 0x0f, 0x2A))
|
|
|
|
# cvtsi2sd
|
|
I32.enc(base.fcvt_from_sint.f64.i32, *r.furm(0xf2, 0x0f, 0x2A))
|
|
I64.enc(base.fcvt_from_sint.f64.i64, *r.furm.rex(0xf2, 0x0f, 0x2A, w=1))
|
|
I64.enc(base.fcvt_from_sint.f64.i32, *r.furm.rex(0xf2, 0x0f, 0x2A))
|
|
I64.enc(base.fcvt_from_sint.f64.i32, *r.furm(0xf2, 0x0f, 0x2A))
|
|
|
|
# Binary arithmetic ops.
|
|
for inst, opc in [
|
|
(base.fadd, 0x58),
|
|
(base.fsub, 0x5c),
|
|
(base.fmul, 0x59),
|
|
(base.fdiv, 0x5e)]:
|
|
I32.enc(inst.f32, *r.frm(0xf3, 0x0f, opc))
|
|
I64.enc(inst.f32, *r.frm.rex(0xf3, 0x0f, opc))
|
|
I64.enc(inst.f32, *r.frm(0xf3, 0x0f, opc))
|
|
|
|
I32.enc(inst.f64, *r.frm(0xf2, 0x0f, opc))
|
|
I64.enc(inst.f64, *r.frm.rex(0xf2, 0x0f, opc))
|
|
I64.enc(inst.f64, *r.frm(0xf2, 0x0f, opc))
|
|
|
|
# Binary bitwise ops.
|
|
for inst, opc in [
|
|
(base.band, 0x54),
|
|
(base.band_not, 0x55),
|
|
(base.bor, 0x56),
|
|
(base.bxor, 0x57)]:
|
|
I32.enc(inst.f32, *r.frm(0x0f, opc))
|
|
I64.enc(inst.f32, *r.frm.rex(0x0f, opc))
|
|
I64.enc(inst.f32, *r.frm(0x0f, opc))
|
|
|
|
I32.enc(inst.f64, *r.frm(0x0f, opc))
|
|
I64.enc(inst.f64, *r.frm.rex(0x0f, opc))
|
|
I64.enc(inst.f64, *r.frm(0x0f, opc))
|