moved crates in lib/ to src/, renamed crates, modified some files' text (#660)

moved crates in lib/ to src/, renamed crates, modified some files' text (#660)
This commit is contained in:
lazypassion
2019-01-28 18:56:54 -05:00
committed by Dan Gohman
parent 54959cf5bb
commit 747ad3c4c5
508 changed files with 94 additions and 92 deletions

View File

@@ -0,0 +1,22 @@
"""
x86 Target Architecture
-----------------------
This target ISA generates code for x86 CPUs with two separate CPU modes:
`I32`
32-bit x86 architecture, also known as 'IA-32', also sometimes referred
to as 'i386', however note that Cranelift depends on instructions not
in the original `i386`, such as SSE2, CMOVcc, and UD2.
`I64`
x86-64 architecture, also known as 'AMD64`, `Intel 64`, and 'x64'.
"""
from __future__ import absolute_import
from . import defs
from . import encodings, settings, registers # noqa
from cdsl.isa import TargetISA # noqa
# Re-export the primary target ISA definition.
ISA = defs.ISA.finish() # type: TargetISA

View File

@@ -0,0 +1,28 @@
"""
x86 definitions.
Commonly used definitions.
"""
from __future__ import absolute_import
from cdsl.isa import TargetISA, CPUMode
import base.instructions
from . import instructions as x86
from base.immediates import floatcc
ISA = TargetISA('x86', [base.instructions.GROUP, x86.GROUP]) # type: TargetISA
# CPU modes for 32-bit and 64-bit operation.
X86_64 = CPUMode('I64', ISA)
X86_32 = CPUMode('I32', ISA)
# The set of floating point condition codes that are directly supported.
# Other condition codes need to be reversed or expressed as two tests.
supported_floatccs = [
floatcc.ord,
floatcc.uno,
floatcc.one,
floatcc.ueq,
floatcc.gt,
floatcc.ge,
floatcc.ult,
floatcc.ule]

View File

@@ -0,0 +1,748 @@
"""
x86 Encodings.
"""
from __future__ import absolute_import
from cdsl.predicates import IsZero32BitFloat, IsZero64BitFloat
from cdsl.predicates import IsUnsignedInt, Not, And
from base.predicates import IsColocatedFunc, IsColocatedData, LengthEquals
from base import instructions as base
from base import types
from base.formats import UnaryIeee32, UnaryIeee64, UnaryImm
from base.formats import FuncAddr, Call, LoadComplex, StoreComplex
from .defs import X86_64, X86_32
from . import recipes as r
from . import settings as cfg
from . import instructions as x86
from .legalize import x86_expand
from base.legalize import narrow, widen, expand_flags
from base.settings import allones_funcaddrs, is_pic
from .settings import use_sse41
try:
from typing import TYPE_CHECKING, Any # noqa
if TYPE_CHECKING:
from cdsl.instructions import MaybeBoundInst # noqa
from cdsl.predicates import FieldPredicate # noqa
except ImportError:
pass
X86_32.legalize_monomorphic(expand_flags)
X86_32.legalize_type(
default=narrow,
b1=expand_flags,
i8=widen,
i16=widen,
i32=x86_expand,
f32=x86_expand,
f64=x86_expand)
X86_64.legalize_monomorphic(expand_flags)
X86_64.legalize_type(
default=narrow,
b1=expand_flags,
i8=widen,
i16=widen,
i32=x86_expand,
i64=x86_expand,
f32=x86_expand,
f64=x86_expand)
#
# Helper functions for generating encodings.
#
def enc_x86_64(inst, recipe, *args, **kwargs):
# type: (MaybeBoundInst, r.TailRecipe, *int, **int) -> None
"""
Add encodings for `inst` to X86_64 with and without a REX prefix.
"""
X86_64.enc(inst, *recipe.rex(*args, **kwargs))
X86_64.enc(inst, *recipe(*args, **kwargs))
def enc_x86_64_instp(inst, recipe, instp, *args, **kwargs):
# type: (MaybeBoundInst, r.TailRecipe, FieldPredicate, *int, **int) -> None
"""
Add encodings for `inst` to X86_64 with and without a REX prefix.
"""
X86_64.enc(inst, *recipe.rex(*args, **kwargs), instp=instp)
X86_64.enc(inst, *recipe(*args, **kwargs), instp=instp)
def enc_both(inst, recipe, *args, **kwargs):
# type: (MaybeBoundInst, r.TailRecipe, *int, **Any) -> None
"""
Add encodings for `inst` to both X86_32 and X86_64.
"""
X86_32.enc(inst, *recipe(*args, **kwargs))
enc_x86_64(inst, recipe, *args, **kwargs)
def enc_both_instp(inst, recipe, instp, *args, **kwargs):
# type: (MaybeBoundInst, r.TailRecipe, FieldPredicate, *int, **Any) -> None
"""
Add encodings for `inst` to both X86_32 and X86_64.
"""
X86_32.enc(inst, *recipe(*args, **kwargs), instp=instp)
enc_x86_64_instp(inst, recipe, instp, *args, **kwargs)
def enc_i32_i64(inst, recipe, *args, **kwargs):
# type: (MaybeBoundInst, r.TailRecipe, *int, **int) -> None
"""
Add encodings for `inst.i32` to X86_32.
Add encodings for `inst.i32` to X86_64 with and without REX.
Add encodings for `inst.i64` to X86_64 with a REX.W prefix.
"""
X86_32.enc(inst.i32, *recipe(*args, **kwargs))
# REX-less encoding must come after REX encoding so we don't use it by
# default. Otherwise reg-alloc would never use r8 and up.
X86_64.enc(inst.i32, *recipe.rex(*args, **kwargs))
X86_64.enc(inst.i32, *recipe(*args, **kwargs))
X86_64.enc(inst.i64, *recipe.rex(*args, w=1, **kwargs))
def enc_i32_i64_instp(inst, recipe, instp, *args, **kwargs):
# type: (MaybeBoundInst, r.TailRecipe, FieldPredicate, *int, **int) -> None
"""
Add encodings for `inst.i32` to X86_32.
Add encodings for `inst.i32` to X86_64 with and without REX.
Add encodings for `inst.i64` to X86_64 with a REX.W prefix.
Similar to `enc_i32_i64` but applies `instp` to each encoding.
"""
X86_32.enc(inst.i32, *recipe(*args, **kwargs), instp=instp)
# REX-less encoding must come after REX encoding so we don't use it by
# default. Otherwise reg-alloc would never use r8 and up.
X86_64.enc(inst.i32, *recipe.rex(*args, **kwargs), instp=instp)
X86_64.enc(inst.i32, *recipe(*args, **kwargs), instp=instp)
X86_64.enc(inst.i64, *recipe.rex(*args, w=1, **kwargs), instp=instp)
def enc_i32_i64_ld_st(inst, w_bit, recipe, *args, **kwargs):
# type: (MaybeBoundInst, bool, r.TailRecipe, *int, **int) -> None
"""
Add encodings for `inst.i32` to X86_32.
Add encodings for `inst.i32` to X86_64 with and without REX.
Add encodings for `inst.i64` to X86_64 with a REX prefix, using the `w_bit`
argument to determine whether or not to set the REX.W bit.
"""
X86_32.enc(inst.i32.any, *recipe(*args, **kwargs))
# REX-less encoding must come after REX encoding so we don't use it by
# default. Otherwise reg-alloc would never use r8 and up.
X86_64.enc(inst.i32.any, *recipe.rex(*args, **kwargs))
X86_64.enc(inst.i32.any, *recipe(*args, **kwargs))
if w_bit:
X86_64.enc(inst.i64.any, *recipe.rex(*args, w=1, **kwargs))
else:
X86_64.enc(inst.i64.any, *recipe.rex(*args, **kwargs))
X86_64.enc(inst.i64.any, *recipe(*args, **kwargs))
for inst, opc in [
(base.iadd, 0x01),
(base.isub, 0x29),
(base.band, 0x21),
(base.bor, 0x09),
(base.bxor, 0x31)]:
enc_i32_i64(inst, r.rr, opc)
# x86 has a bitwise not instruction NOT.
enc_i32_i64(base.bnot, r.ur, 0xf7, rrr=2)
# Also add a `b1` encodings for the logic instructions.
# TODO: Should this be done with 8-bit instructions? It would improve
# partial register dependencies.
enc_both(base.band.b1, r.rr, 0x21)
enc_both(base.bor.b1, r.rr, 0x09)
enc_both(base.bxor.b1, r.rr, 0x31)
enc_i32_i64(base.imul, r.rrx, 0x0f, 0xaf)
enc_i32_i64(x86.sdivmodx, r.div, 0xf7, rrr=7)
enc_i32_i64(x86.udivmodx, r.div, 0xf7, rrr=6)
enc_i32_i64(x86.smulx, r.mulx, 0xf7, rrr=5)
enc_i32_i64(x86.umulx, r.mulx, 0xf7, rrr=4)
enc_i32_i64(base.copy, r.umr, 0x89)
for ty in [types.b1, types.i8, types.i16]:
enc_both(base.copy.bind(ty), r.umr, 0x89)
# For x86-64, only define REX forms for now, since we can't describe the
# special regunit immediate operands with the current constraint language.
for ty in [types.i8, types.i16, types.i32]:
X86_32.enc(base.regmove.bind(ty), *r.rmov(0x89))
X86_64.enc(base.regmove.bind(ty), *r.rmov.rex(0x89))
X86_64.enc(base.regmove.i64, *r.rmov.rex(0x89, w=1))
enc_both(base.regmove.b1, r.rmov, 0x89)
enc_both(base.regmove.i8, r.rmov, 0x89)
# Immediate instructions with sign-extended 8-bit and 32-bit immediate.
for inst, rrr in [
(base.iadd_imm, 0),
(base.band_imm, 4),
(base.bor_imm, 1),
(base.bxor_imm, 6)]:
enc_i32_i64(inst, r.r_ib, 0x83, rrr=rrr)
enc_i32_i64(inst, r.r_id, 0x81, rrr=rrr)
# TODO: band_imm.i64 with an unsigned 32-bit immediate can be encoded as
# band_imm.i32. Can even use the single-byte immediate for 0xffff_ffXX masks.
# Immediate constants.
X86_32.enc(base.iconst.i32, *r.pu_id(0xb8))
X86_64.enc(base.iconst.i32, *r.pu_id.rex(0xb8))
X86_64.enc(base.iconst.i32, *r.pu_id(0xb8))
# The 32-bit immediate movl also zero-extends to 64 bits.
X86_64.enc(base.iconst.i64, *r.pu_id.rex(0xb8),
instp=IsUnsignedInt(UnaryImm.imm, 32))
X86_64.enc(base.iconst.i64, *r.pu_id(0xb8),
instp=IsUnsignedInt(UnaryImm.imm, 32))
# Sign-extended 32-bit immediate.
X86_64.enc(base.iconst.i64, *r.u_id.rex(0xc7, rrr=0, w=1))
# Finally, the 0xb8 opcode takes an 8-byte immediate with a REX.W prefix.
X86_64.enc(base.iconst.i64, *r.pu_iq.rex(0xb8, w=1))
# bool constants.
enc_both(base.bconst.b1, r.pu_id_bool, 0xb8)
# Shifts and rotates.
# Note that the dynamic shift amount is only masked by 5 or 6 bits; the 8-bit
# and 16-bit shifts would need explicit masking.
for inst, rrr in [
(base.rotl, 0),
(base.rotr, 1),
(base.ishl, 4),
(base.ushr, 5),
(base.sshr, 7)]:
# Cannot use enc_i32_i64 for this pattern because instructions require
# .any suffix.
X86_32.enc(inst.i32.any, *r.rc(0xd3, rrr=rrr))
X86_64.enc(inst.i64.any, *r.rc.rex(0xd3, rrr=rrr, w=1))
X86_64.enc(inst.i32.any, *r.rc.rex(0xd3, rrr=rrr))
X86_64.enc(inst.i32.any, *r.rc(0xd3, rrr=rrr))
for inst, rrr in [
(base.ishl_imm, 4),
(base.ushr_imm, 5),
(base.sshr_imm, 7)]:
enc_i32_i64(inst, r.r_ib, 0xc1, rrr=rrr)
# Population count.
X86_32.enc(base.popcnt.i32, *r.urm(0xf3, 0x0f, 0xb8), isap=cfg.use_popcnt)
X86_64.enc(base.popcnt.i64, *r.urm.rex(0xf3, 0x0f, 0xb8, w=1),
isap=cfg.use_popcnt)
X86_64.enc(base.popcnt.i32, *r.urm.rex(0xf3, 0x0f, 0xb8), isap=cfg.use_popcnt)
X86_64.enc(base.popcnt.i32, *r.urm(0xf3, 0x0f, 0xb8), isap=cfg.use_popcnt)
# Count leading zero bits.
X86_32.enc(base.clz.i32, *r.urm(0xf3, 0x0f, 0xbd), isap=cfg.use_lzcnt)
X86_64.enc(base.clz.i64, *r.urm.rex(0xf3, 0x0f, 0xbd, w=1),
isap=cfg.use_lzcnt)
X86_64.enc(base.clz.i32, *r.urm.rex(0xf3, 0x0f, 0xbd), isap=cfg.use_lzcnt)
X86_64.enc(base.clz.i32, *r.urm(0xf3, 0x0f, 0xbd), isap=cfg.use_lzcnt)
# Count trailing zero bits.
X86_32.enc(base.ctz.i32, *r.urm(0xf3, 0x0f, 0xbc), isap=cfg.use_bmi1)
X86_64.enc(base.ctz.i64, *r.urm.rex(0xf3, 0x0f, 0xbc, w=1),
isap=cfg.use_bmi1)
X86_64.enc(base.ctz.i32, *r.urm.rex(0xf3, 0x0f, 0xbc), isap=cfg.use_bmi1)
X86_64.enc(base.ctz.i32, *r.urm(0xf3, 0x0f, 0xbc), isap=cfg.use_bmi1)
#
# Loads and stores.
#
ldcomplexp = LengthEquals(LoadComplex, 2)
for recipe in [r.ldWithIndex, r.ldWithIndexDisp8, r.ldWithIndexDisp32]:
enc_i32_i64_instp(base.load_complex, recipe, ldcomplexp, 0x8b)
enc_x86_64_instp(base.uload32_complex, recipe, ldcomplexp, 0x8b)
X86_64.enc(base.sload32_complex, *recipe.rex(0x63, w=1),
instp=ldcomplexp)
enc_i32_i64_instp(base.uload16_complex, recipe, ldcomplexp, 0x0f, 0xb7)
enc_i32_i64_instp(base.sload16_complex, recipe, ldcomplexp, 0x0f, 0xbf)
enc_i32_i64_instp(base.uload8_complex, recipe, ldcomplexp, 0x0f, 0xb6)
enc_i32_i64_instp(base.sload8_complex, recipe, ldcomplexp, 0x0f, 0xbe)
stcomplexp = LengthEquals(StoreComplex, 3)
for recipe in [r.stWithIndex, r.stWithIndexDisp8, r.stWithIndexDisp32]:
enc_i32_i64_instp(base.store_complex, recipe, stcomplexp, 0x89)
enc_x86_64_instp(base.istore32_complex, recipe, stcomplexp, 0x89)
enc_both_instp(base.istore16_complex.i32, recipe, stcomplexp, 0x66, 0x89)
enc_x86_64_instp(base.istore16_complex.i64, recipe, stcomplexp, 0x66, 0x89)
for recipe in [r.stWithIndex_abcd,
r.stWithIndexDisp8_abcd,
r.stWithIndexDisp32_abcd]:
enc_both_instp(base.istore8_complex.i32, recipe, stcomplexp, 0x88)
enc_x86_64_instp(base.istore8_complex.i64, recipe, stcomplexp, 0x88)
for recipe in [r.st, r.stDisp8, r.stDisp32]:
enc_i32_i64_ld_st(base.store, True, recipe, 0x89)
enc_x86_64(base.istore32.i64.any, recipe, 0x89)
enc_i32_i64_ld_st(base.istore16, False, recipe, 0x66, 0x89)
# Byte stores are more complicated because the registers they can address
# depends of the presence of a REX prefix. The st*_abcd recipes fall back to
# the corresponding st* recipes when a REX prefix is applied.
for recipe in [r.st_abcd, r.stDisp8_abcd, r.stDisp32_abcd]:
enc_both(base.istore8.i32.any, recipe, 0x88)
enc_x86_64(base.istore8.i64.any, recipe, 0x88)
enc_i32_i64(base.spill, r.spillSib32, 0x89)
enc_i32_i64(base.regspill, r.regspill32, 0x89)
# Use a 32-bit write for spilling `b1`, `i8` and `i16` to avoid
# constraining the permitted registers.
# See MIN_SPILL_SLOT_SIZE which makes this safe.
for ty in [types.b1, types.i8, types.i16]:
enc_both(base.spill.bind(ty), r.spillSib32, 0x89)
enc_both(base.regspill.bind(ty), r.regspill32, 0x89)
for recipe in [r.ld, r.ldDisp8, r.ldDisp32]:
enc_i32_i64_ld_st(base.load, True, recipe, 0x8b)
enc_x86_64(base.uload32.i64, recipe, 0x8b)
X86_64.enc(base.sload32.i64, *recipe.rex(0x63, w=1))
enc_i32_i64_ld_st(base.uload16, True, recipe, 0x0f, 0xb7)
enc_i32_i64_ld_st(base.sload16, True, recipe, 0x0f, 0xbf)
enc_i32_i64_ld_st(base.uload8, True, recipe, 0x0f, 0xb6)
enc_i32_i64_ld_st(base.sload8, True, recipe, 0x0f, 0xbe)
enc_i32_i64(base.fill, r.fillSib32, 0x8b)
enc_i32_i64(base.regfill, r.regfill32, 0x8b)
# Load 32 bits from `b1`, `i8` and `i16` spill slots. See `spill.b1` above.
for ty in [types.b1, types.i8, types.i16]:
enc_both(base.fill.bind(ty), r.fillSib32, 0x8b)
enc_both(base.regfill.bind(ty), r.regfill32, 0x8b)
# Push and Pop
X86_32.enc(x86.push.i32, *r.pushq(0x50))
enc_x86_64(x86.push.i64, r.pushq, 0x50)
X86_32.enc(x86.pop.i32, *r.popq(0x58))
enc_x86_64(x86.pop.i64, r.popq, 0x58)
# Copy Special
# For x86-64, only define REX forms for now, since we can't describe the
# special regunit immediate operands with the current constraint language.
X86_64.enc(base.copy_special, *r.copysp.rex(0x89, w=1))
X86_32.enc(base.copy_special, *r.copysp(0x89))
# Adjust SP down by a dynamic value (or up, with a negative operand).
X86_32.enc(base.adjust_sp_down.i32, *r.adjustsp(0x29))
X86_64.enc(base.adjust_sp_down.i64, *r.adjustsp.rex(0x29, w=1))
# Adjust SP up by an immediate (or down, with a negative immediate)
X86_32.enc(base.adjust_sp_up_imm, *r.adjustsp_ib(0x83))
X86_32.enc(base.adjust_sp_up_imm, *r.adjustsp_id(0x81))
X86_64.enc(base.adjust_sp_up_imm, *r.adjustsp_ib.rex(0x83, w=1))
X86_64.enc(base.adjust_sp_up_imm, *r.adjustsp_id.rex(0x81, w=1))
# Adjust SP down by an immediate (or up, with a negative immediate)
X86_32.enc(base.adjust_sp_down_imm, *r.adjustsp_ib(0x83, rrr=5))
X86_32.enc(base.adjust_sp_down_imm, *r.adjustsp_id(0x81, rrr=5))
X86_64.enc(base.adjust_sp_down_imm, *r.adjustsp_ib.rex(0x83, rrr=5, w=1))
X86_64.enc(base.adjust_sp_down_imm, *r.adjustsp_id.rex(0x81, rrr=5, w=1))
#
# Float loads and stores.
#
enc_both(base.load.f32.any, r.fld, 0xf3, 0x0f, 0x10)
enc_both(base.load.f32.any, r.fldDisp8, 0xf3, 0x0f, 0x10)
enc_both(base.load.f32.any, r.fldDisp32, 0xf3, 0x0f, 0x10)
enc_both(base.load_complex.f32, r.fldWithIndex, 0xf3, 0x0f, 0x10)
enc_both(base.load_complex.f32, r.fldWithIndexDisp8, 0xf3, 0x0f, 0x10)
enc_both(base.load_complex.f32, r.fldWithIndexDisp32, 0xf3, 0x0f, 0x10)
enc_both(base.load.f64.any, r.fld, 0xf2, 0x0f, 0x10)
enc_both(base.load.f64.any, r.fldDisp8, 0xf2, 0x0f, 0x10)
enc_both(base.load.f64.any, r.fldDisp32, 0xf2, 0x0f, 0x10)
enc_both(base.load_complex.f64, r.fldWithIndex, 0xf2, 0x0f, 0x10)
enc_both(base.load_complex.f64, r.fldWithIndexDisp8, 0xf2, 0x0f, 0x10)
enc_both(base.load_complex.f64, r.fldWithIndexDisp32, 0xf2, 0x0f, 0x10)
enc_both(base.store.f32.any, r.fst, 0xf3, 0x0f, 0x11)
enc_both(base.store.f32.any, r.fstDisp8, 0xf3, 0x0f, 0x11)
enc_both(base.store.f32.any, r.fstDisp32, 0xf3, 0x0f, 0x11)
enc_both(base.store_complex.f32, r.fstWithIndex, 0xf3, 0x0f, 0x11)
enc_both(base.store_complex.f32, r.fstWithIndexDisp8, 0xf3, 0x0f, 0x11)
enc_both(base.store_complex.f32, r.fstWithIndexDisp32, 0xf3, 0x0f, 0x11)
enc_both(base.store.f64.any, r.fst, 0xf2, 0x0f, 0x11)
enc_both(base.store.f64.any, r.fstDisp8, 0xf2, 0x0f, 0x11)
enc_both(base.store.f64.any, r.fstDisp32, 0xf2, 0x0f, 0x11)
enc_both(base.store_complex.f64, r.fstWithIndex, 0xf2, 0x0f, 0x11)
enc_both(base.store_complex.f64, r.fstWithIndexDisp8, 0xf2, 0x0f, 0x11)
enc_both(base.store_complex.f64, r.fstWithIndexDisp32, 0xf2, 0x0f, 0x11)
enc_both(base.fill.f32, r.ffillSib32, 0xf3, 0x0f, 0x10)
enc_both(base.regfill.f32, r.fregfill32, 0xf3, 0x0f, 0x10)
enc_both(base.fill.f64, r.ffillSib32, 0xf2, 0x0f, 0x10)
enc_both(base.regfill.f64, r.fregfill32, 0xf2, 0x0f, 0x10)
enc_both(base.spill.f32, r.fspillSib32, 0xf3, 0x0f, 0x11)
enc_both(base.regspill.f32, r.fregspill32, 0xf3, 0x0f, 0x11)
enc_both(base.spill.f64, r.fspillSib32, 0xf2, 0x0f, 0x11)
enc_both(base.regspill.f64, r.fregspill32, 0xf2, 0x0f, 0x11)
#
# Function addresses.
#
# Non-PIC, all-ones funcaddresses.
X86_32.enc(base.func_addr.i32, *r.fnaddr4(0xb8),
isap=And(Not(allones_funcaddrs), Not(is_pic)))
X86_64.enc(base.func_addr.i64, *r.fnaddr8.rex(0xb8, w=1),
isap=And(Not(allones_funcaddrs), Not(is_pic)))
# Non-PIC, all-zeros funcaddresses.
X86_32.enc(base.func_addr.i32, *r.allones_fnaddr4(0xb8),
isap=And(allones_funcaddrs, Not(is_pic)))
X86_64.enc(base.func_addr.i64, *r.allones_fnaddr8.rex(0xb8, w=1),
isap=And(allones_funcaddrs, Not(is_pic)))
# 64-bit, colocated, both PIC and non-PIC. Use the lea instruction's
# pc-relative field.
X86_64.enc(base.func_addr.i64, *r.pcrel_fnaddr8.rex(0x8d, w=1),
instp=IsColocatedFunc(FuncAddr.func_ref))
# 64-bit, non-colocated, PIC.
X86_64.enc(base.func_addr.i64, *r.got_fnaddr8.rex(0x8b, w=1),
isap=is_pic)
#
# Global addresses.
#
# Non-PIC
X86_32.enc(base.symbol_value.i32, *r.gvaddr4(0xb8),
isap=Not(is_pic))
X86_64.enc(base.symbol_value.i64, *r.gvaddr8.rex(0xb8, w=1),
isap=Not(is_pic))
# PIC, colocated
X86_64.enc(base.symbol_value.i64, *r.pcrel_gvaddr8.rex(0x8d, w=1),
isap=is_pic,
instp=IsColocatedData())
# PIC, non-colocated
X86_64.enc(base.symbol_value.i64, *r.got_gvaddr8.rex(0x8b, w=1),
isap=is_pic)
#
# Stack addresses.
#
# TODO: Add encoding rules for stack_load and stack_store, so that they
# don't get legalized to stack_addr + load/store.
#
X86_32.enc(base.stack_addr.i32, *r.spaddr4_id(0x8d))
X86_64.enc(base.stack_addr.i64, *r.spaddr8_id.rex(0x8d, w=1))
#
# Call/return
#
# 32-bit, both PIC and non-PIC.
X86_32.enc(base.call, *r.call_id(0xe8))
# 64-bit, colocated, both PIC and non-PIC. Use the call instruction's
# pc-relative field.
X86_64.enc(base.call, *r.call_id(0xe8),
instp=IsColocatedFunc(Call.func_ref))
# 64-bit, non-colocated, PIC. There is no 64-bit non-colocated non-PIC version,
# since non-PIC is currently using the large model, which requires calls be
# lowered to func_addr+call_indirect.
X86_64.enc(base.call, *r.call_plt_id(0xe8), isap=is_pic)
X86_32.enc(base.call_indirect.i32, *r.call_r(0xff, rrr=2))
X86_64.enc(base.call_indirect.i64, *r.call_r.rex(0xff, rrr=2))
X86_64.enc(base.call_indirect.i64, *r.call_r(0xff, rrr=2))
X86_32.enc(base.x_return, *r.ret(0xc3))
X86_64.enc(base.x_return, *r.ret(0xc3))
#
# Branches
#
enc_both(base.jump, r.jmpb, 0xeb)
enc_both(base.jump, r.jmpd, 0xe9)
enc_both(base.brif, r.brib, 0x70)
enc_both(base.brif, r.brid, 0x0f, 0x80)
# Not all float condition codes are legal, see `supported_floatccs`.
enc_both(base.brff, r.brfb, 0x70)
enc_both(base.brff, r.brfd, 0x0f, 0x80)
# Note that the tjccd opcode will be prefixed with 0x0f.
enc_i32_i64(base.brz, r.tjccb, 0x74)
enc_i32_i64(base.brz, r.tjccd, 0x84)
enc_i32_i64(base.brnz, r.tjccb, 0x75)
enc_i32_i64(base.brnz, r.tjccd, 0x85)
# Branch on a b1 value in a register only looks at the low 8 bits. See also
# bint encodings below.
#
# Start with the worst-case encoding for X86_32 only. The register allocator
# can't handle a branch with an ABCD-constrained operand.
X86_32.enc(base.brz.b1, *r.t8jccd_long(0x84))
X86_32.enc(base.brnz.b1, *r.t8jccd_long(0x85))
enc_both(base.brz.b1, r.t8jccb_abcd, 0x74)
enc_both(base.brz.b1, r.t8jccd_abcd, 0x84)
enc_both(base.brnz.b1, r.t8jccb_abcd, 0x75)
enc_both(base.brnz.b1, r.t8jccd_abcd, 0x85)
#
# Jump tables
#
X86_64.enc(base.jump_table_entry.i64.any.any, *r.jt_entry.rex(0x63, w=1))
X86_32.enc(base.jump_table_entry.i32.any.any, *r.jt_entry(0x8b))
X86_64.enc(base.jump_table_base.i64, *r.jt_base.rex(0x8d, w=1))
X86_32.enc(base.jump_table_base.i32, *r.jt_base(0x8d))
enc_x86_64(base.indirect_jump_table_br.i64, r.indirect_jmp, 0xff, rrr=4)
X86_32.enc(base.indirect_jump_table_br.i32, *r.indirect_jmp(0xff, rrr=4))
#
# Trap as ud2
#
X86_32.enc(base.trap, *r.trap(0x0f, 0x0b))
X86_64.enc(base.trap, *r.trap(0x0f, 0x0b))
# Debug trap as int3
X86_32.enc(base.debugtrap, r.debugtrap, 0)
X86_64.enc(base.debugtrap, r.debugtrap, 0)
# Using a standard EncRecipe, not the TailRecipe.
X86_32.enc(base.trapif, r.trapif, 0)
X86_64.enc(base.trapif, r.trapif, 0)
X86_32.enc(base.trapff, r.trapff, 0)
X86_64.enc(base.trapff, r.trapff, 0)
#
# Comparisons
#
enc_i32_i64(base.icmp, r.icscc, 0x39)
enc_i32_i64(base.icmp_imm, r.icscc_ib, 0x83, rrr=7)
enc_i32_i64(base.icmp_imm, r.icscc_id, 0x81, rrr=7)
enc_i32_i64(base.ifcmp, r.rcmp, 0x39)
enc_i32_i64(base.ifcmp_imm, r.rcmp_ib, 0x83, rrr=7)
enc_i32_i64(base.ifcmp_imm, r.rcmp_id, 0x81, rrr=7)
# TODO: We could special-case ifcmp_imm(x, 0) to TEST(x, x).
X86_32.enc(base.ifcmp_sp.i32, *r.rcmp_sp(0x39))
X86_64.enc(base.ifcmp_sp.i64, *r.rcmp_sp.rex(0x39, w=1))
#
# Convert flags to bool.
#
# This encodes `b1` as an 8-bit low register with the value 0 or 1.
enc_both(base.trueif, r.seti_abcd, 0x0f, 0x90)
enc_both(base.trueff, r.setf_abcd, 0x0f, 0x90)
#
# Conditional move (a.k.a integer select)
#
enc_i32_i64(base.selectif, r.cmov, 0x0F, 0x40)
#
# Bit scan forwards and reverse
#
enc_i32_i64(x86.bsf, r.bsf_and_bsr, 0x0F, 0xBC)
enc_i32_i64(x86.bsr, r.bsf_and_bsr, 0x0F, 0xBD)
#
# Convert bool to int.
#
# This assumes that b1 is represented as an 8-bit low register with the value 0
# or 1.
#
# Encode movzbq as movzbl, because it's equivalent and shorter.
X86_32.enc(base.bint.i32.b1, *r.urm_noflags_abcd(0x0f, 0xb6))
X86_64.enc(base.bint.i64.b1, *r.urm_noflags.rex(0x0f, 0xb6))
X86_64.enc(base.bint.i64.b1, *r.urm_noflags_abcd(0x0f, 0xb6))
X86_64.enc(base.bint.i32.b1, *r.urm_noflags.rex(0x0f, 0xb6))
X86_64.enc(base.bint.i32.b1, *r.urm_noflags_abcd(0x0f, 0xb6))
# Numerical conversions.
# Reducing an integer is a no-op.
X86_32.enc(base.ireduce.i8.i16, r.null, 0)
X86_32.enc(base.ireduce.i8.i32, r.null, 0)
X86_32.enc(base.ireduce.i16.i32, r.null, 0)
X86_64.enc(base.ireduce.i8.i16, r.null, 0)
X86_64.enc(base.ireduce.i8.i32, r.null, 0)
X86_64.enc(base.ireduce.i16.i32, r.null, 0)
X86_64.enc(base.ireduce.i8.i64, r.null, 0)
X86_64.enc(base.ireduce.i16.i64, r.null, 0)
X86_64.enc(base.ireduce.i32.i64, r.null, 0)
# TODO: Add encodings for cbw, cwde, cdqe, which are sign-extending
# instructions for %al/%ax/%eax to %ax/%eax/%rax.
# movsbl
X86_32.enc(base.sextend.i32.i8, *r.urm_noflags_abcd(0x0f, 0xbe))
X86_64.enc(base.sextend.i32.i8, *r.urm_noflags.rex(0x0f, 0xbe))
X86_64.enc(base.sextend.i32.i8, *r.urm_noflags_abcd(0x0f, 0xbe))
# movswl
X86_32.enc(base.sextend.i32.i16, *r.urm_noflags(0x0f, 0xbf))
X86_64.enc(base.sextend.i32.i16, *r.urm_noflags.rex(0x0f, 0xbf))
X86_64.enc(base.sextend.i32.i16, *r.urm_noflags(0x0f, 0xbf))
# movsbq
X86_64.enc(base.sextend.i64.i8, *r.urm_noflags.rex(0x0f, 0xbe, w=1))
# movswq
X86_64.enc(base.sextend.i64.i16, *r.urm_noflags.rex(0x0f, 0xbf, w=1))
# movslq
X86_64.enc(base.sextend.i64.i32, *r.urm_noflags.rex(0x63, w=1))
# movzbl
X86_32.enc(base.uextend.i32.i8, *r.urm_noflags_abcd(0x0f, 0xb6))
X86_64.enc(base.uextend.i32.i8, *r.urm_noflags.rex(0x0f, 0xb6))
X86_64.enc(base.uextend.i32.i8, *r.urm_noflags_abcd(0x0f, 0xb6))
# movzwl
X86_32.enc(base.uextend.i32.i16, *r.urm_noflags(0x0f, 0xb7))
X86_64.enc(base.uextend.i32.i16, *r.urm_noflags.rex(0x0f, 0xb7))
X86_64.enc(base.uextend.i32.i16, *r.urm_noflags(0x0f, 0xb7))
# movzbq, encoded as movzbl because it's equivalent and shorter
X86_64.enc(base.uextend.i64.i8, *r.urm_noflags.rex(0x0f, 0xb6))
X86_64.enc(base.uextend.i64.i8, *r.urm_noflags(0x0f, 0xb6))
# movzwq, encoded as movzwl because it's equivalent and shorter
X86_64.enc(base.uextend.i64.i16, *r.urm_noflags.rex(0x0f, 0xb7))
X86_64.enc(base.uextend.i64.i16, *r.urm_noflags(0x0f, 0xb7))
# A 32-bit register copy clears the high 32 bits.
X86_64.enc(base.uextend.i64.i32, *r.umr.rex(0x89))
X86_64.enc(base.uextend.i64.i32, *r.umr(0x89))
#
# Floating point
#
# floating-point constants equal to 0.0 can be encoded using either
# `xorps` or `xorpd`, for 32-bit and 64-bit floats respectively.
X86_32.enc(base.f32const, *r.f32imm_z(0x0f, 0x57),
instp=IsZero32BitFloat(UnaryIeee32.imm))
X86_32.enc(base.f64const, *r.f64imm_z(0x66, 0x0f, 0x57),
instp=IsZero64BitFloat(UnaryIeee64.imm))
enc_x86_64_instp(base.f32const, r.f32imm_z,
IsZero32BitFloat(UnaryIeee32.imm), 0x0f, 0x57)
enc_x86_64_instp(base.f64const, r.f64imm_z,
IsZero64BitFloat(UnaryIeee64.imm), 0x66, 0x0f, 0x57)
# movd
enc_both(base.bitcast.f32.i32, r.frurm, 0x66, 0x0f, 0x6e)
enc_both(base.bitcast.i32.f32, r.rfumr, 0x66, 0x0f, 0x7e)
# movq
X86_64.enc(base.bitcast.f64.i64, *r.frurm.rex(0x66, 0x0f, 0x6e, w=1))
X86_64.enc(base.bitcast.i64.f64, *r.rfumr.rex(0x66, 0x0f, 0x7e, w=1))
# movaps
enc_both(base.copy.f32, r.furm, 0x0f, 0x28)
enc_both(base.copy.f64, r.furm, 0x0f, 0x28)
# For x86-64, only define REX forms for now, since we can't describe the
# special regunit immediate operands with the current constraint language.
X86_32.enc(base.regmove.f32, *r.frmov(0x0f, 0x28))
X86_64.enc(base.regmove.f32, *r.frmov.rex(0x0f, 0x28))
# For x86-64, only define REX forms for now, since we can't describe the
# special regunit immediate operands with the current constraint language.
X86_32.enc(base.regmove.f64, *r.frmov(0x0f, 0x28))
X86_64.enc(base.regmove.f64, *r.frmov.rex(0x0f, 0x28))
# cvtsi2ss
enc_i32_i64(base.fcvt_from_sint.f32, r.frurm, 0xf3, 0x0f, 0x2a)
# cvtsi2sd
enc_i32_i64(base.fcvt_from_sint.f64, r.frurm, 0xf2, 0x0f, 0x2a)
# cvtss2sd
enc_both(base.fpromote.f64.f32, r.furm, 0xf3, 0x0f, 0x5a)
# cvtsd2ss
enc_both(base.fdemote.f32.f64, r.furm, 0xf2, 0x0f, 0x5a)
# cvttss2si
enc_both(x86.cvtt2si.i32.f32, r.rfurm, 0xf3, 0x0f, 0x2c)
X86_64.enc(x86.cvtt2si.i64.f32, *r.rfurm.rex(0xf3, 0x0f, 0x2c, w=1))
# cvttsd2si
enc_both(x86.cvtt2si.i32.f64, r.rfurm, 0xf2, 0x0f, 0x2c)
X86_64.enc(x86.cvtt2si.i64.f64, *r.rfurm.rex(0xf2, 0x0f, 0x2c, w=1))
# Exact square roots.
enc_both(base.sqrt.f32, r.furm, 0xf3, 0x0f, 0x51)
enc_both(base.sqrt.f64, r.furm, 0xf2, 0x0f, 0x51)
# Rounding. The recipe looks at the opcode to pick an immediate.
for inst in [
base.nearest,
base.floor,
base.ceil,
base.trunc]:
enc_both(inst.f32, r.furmi_rnd, 0x66, 0x0f, 0x3a, 0x0a, isap=use_sse41)
enc_both(inst.f64, r.furmi_rnd, 0x66, 0x0f, 0x3a, 0x0b, isap=use_sse41)
# Binary arithmetic ops.
for inst, opc in [
(base.fadd, 0x58),
(base.fsub, 0x5c),
(base.fmul, 0x59),
(base.fdiv, 0x5e),
(x86.fmin, 0x5d),
(x86.fmax, 0x5f)]:
enc_both(inst.f32, r.fa, 0xf3, 0x0f, opc)
enc_both(inst.f64, r.fa, 0xf2, 0x0f, opc)
# Binary bitwise ops.
for inst, opc in [
(base.band, 0x54),
(base.bor, 0x56),
(base.bxor, 0x57)]:
enc_both(inst.f32, r.fa, 0x0f, opc)
enc_both(inst.f64, r.fa, 0x0f, opc)
# The `andnps(x,y)` instruction computes `~x&y`, while band_not(x,y)` is `x&~y.
enc_both(base.band_not.f32, r.fax, 0x0f, 0x55)
enc_both(base.band_not.f64, r.fax, 0x0f, 0x55)
# Comparisons.
#
# This only covers the condition codes in `supported_floatccs`, the rest are
# handled by legalization patterns.
enc_both(base.fcmp.f32, r.fcscc, 0x0f, 0x2e)
enc_both(base.fcmp.f64, r.fcscc, 0x66, 0x0f, 0x2e)
enc_both(base.ffcmp.f32, r.fcmp, 0x0f, 0x2e)
enc_both(base.ffcmp.f64, r.fcmp, 0x66, 0x0f, 0x2e)

View File

@@ -0,0 +1,173 @@
"""
Supplementary instruction definitions for x86.
This module defines additional instructions that are useful only to the x86
target ISA.
"""
from base.types import iflags
from cdsl.operands import Operand
from cdsl.typevar import TypeVar
from cdsl.instructions import Instruction, InstructionGroup
GROUP = InstructionGroup("x86", "x86-specific instruction set")
iWord = TypeVar('iWord', 'A scalar integer machine word', ints=(32, 64))
nlo = Operand('nlo', iWord, doc='Low part of numerator')
nhi = Operand('nhi', iWord, doc='High part of numerator')
d = Operand('d', iWord, doc='Denominator')
q = Operand('q', iWord, doc='Quotient')
r = Operand('r', iWord, doc='Remainder')
udivmodx = Instruction(
'x86_udivmodx', r"""
Extended unsigned division.
Concatenate the bits in `nhi` and `nlo` to form the numerator.
Interpret the bits as an unsigned number and divide by the unsigned
denominator `d`. Trap when `d` is zero or if the quotient is larger
than the range of the output.
Return both quotient and remainder.
""",
ins=(nlo, nhi, d), outs=(q, r), can_trap=True)
sdivmodx = Instruction(
'x86_sdivmodx', r"""
Extended signed division.
Concatenate the bits in `nhi` and `nlo` to form the numerator.
Interpret the bits as a signed number and divide by the signed
denominator `d`. Trap when `d` is zero or if the quotient is outside
the range of the output.
Return both quotient and remainder.
""",
ins=(nlo, nhi, d), outs=(q, r), can_trap=True)
argL = Operand('argL', iWord)
argR = Operand('argR', iWord)
resLo = Operand('resLo', iWord)
resHi = Operand('resHi', iWord)
umulx = Instruction(
'x86_umulx', r"""
Unsigned integer multiplication, producing a double-length result.
Polymorphic over all scalar integer types, but does not support vector
types.
""",
ins=(argL, argR), outs=(resLo, resHi))
smulx = Instruction(
'x86_smulx', r"""
Signed integer multiplication, producing a double-length result.
Polymorphic over all scalar integer types, but does not support vector
types.
""",
ins=(argL, argR), outs=(resLo, resHi))
Float = TypeVar(
'Float', 'A scalar or vector floating point number',
floats=True, simd=True)
IntTo = TypeVar(
'IntTo', 'An integer type with the same number of lanes',
ints=(32, 64), simd=True)
x = Operand('x', Float)
a = Operand('a', IntTo)
cvtt2si = Instruction(
'x86_cvtt2si', r"""
Convert with truncation floating point to signed integer.
The source floating point operand is converted to a signed integer by
rounding towards zero. If the result can't be represented in the output
type, returns the smallest signed value the output type can represent.
This instruction does not trap.
""",
ins=x, outs=a)
x = Operand('x', Float)
a = Operand('a', Float)
y = Operand('y', Float)
fmin = Instruction(
'x86_fmin', r"""
Floating point minimum with x86 semantics.
This is equivalent to the C ternary operator `x < y ? x : y` which
differs from :inst:`fmin` when either operand is NaN or when comparing
+0.0 to -0.0.
When the two operands don't compare as LT, `y` is returned unchanged,
even if it is a signalling NaN.
""",
ins=(x, y), outs=a)
fmax = Instruction(
'x86_fmax', r"""
Floating point maximum with x86 semantics.
This is equivalent to the C ternary operator `x > y ? x : y` which
differs from :inst:`fmax` when either operand is NaN or when comparing
+0.0 to -0.0.
When the two operands don't compare as GT, `y` is returned unchanged,
even if it is a signalling NaN.
""",
ins=(x, y), outs=a)
x = Operand('x', iWord)
push = Instruction(
'x86_push', r"""
Pushes a value onto the stack.
Decrements the stack pointer and stores the specified value on to the top.
This is polymorphic in i32 and i64. However, it is only implemented for i64
in 64-bit mode, and only for i32 in 32-bit mode.
""",
ins=x, can_store=True, other_side_effects=True)
pop = Instruction(
'x86_pop', r"""
Pops a value from the stack.
Loads a value from the top of the stack and then increments the stack
pointer.
This is polymorphic in i32 and i64. However, it is only implemented for i64
in 64-bit mode, and only for i32 in 32-bit mode.
""",
outs=x, can_load=True, other_side_effects=True)
y = Operand('y', iWord)
rflags = Operand('rflags', iflags)
bsr = Instruction(
'x86_bsr', r"""
Bit Scan Reverse -- returns the bit-index of the most significant 1
in the word. Result is undefined if the argument is zero. However, it
sets the Z flag depending on the argument, so it is at least easy to
detect and handle that case.
This is polymorphic in i32 and i64. It is implemented for both i64 and
i32 in 64-bit mode, and only for i32 in 32-bit mode.
""",
ins=x, outs=(y, rflags))
bsf = Instruction(
'x86_bsf', r"""
Bit Scan Forwards -- returns the bit-index of the least significant 1
in the word. Is otherwise identical to 'bsr', just above.
""",
ins=x, outs=(y, rflags))
GROUP.close()

View File

@@ -0,0 +1,229 @@
"""
Custom legalization patterns for x86.
"""
from __future__ import absolute_import
from cdsl.ast import Var
from cdsl.xform import Rtl, XFormGroup
from base.immediates import imm64, intcc, floatcc
from base import legalize as shared
from base import instructions as insts
from . import instructions as x86
from .defs import ISA
x86_expand = XFormGroup(
'x86_expand',
"""
Legalize instructions by expansion.
Use x86-specific instructions if needed.
""",
isa=ISA, chain=shared.expand_flags)
a = Var('a')
dead = Var('dead')
x = Var('x')
xhi = Var('xhi')
y = Var('y')
a1 = Var('a1')
a2 = Var('a2')
#
# Division and remainder.
#
# The srem expansion requires custom code because srem INT_MIN, -1 is not
# allowed to trap. The other ops need to check avoid_div_traps.
x86_expand.custom_legalize(insts.sdiv, 'expand_sdivrem')
x86_expand.custom_legalize(insts.srem, 'expand_sdivrem')
x86_expand.custom_legalize(insts.udiv, 'expand_udivrem')
x86_expand.custom_legalize(insts.urem, 'expand_udivrem')
#
# Double length (widening) multiplication
#
resLo = Var('resLo')
resHi = Var('resHi')
x86_expand.legalize(
resHi << insts.umulhi(x, y),
Rtl(
(resLo, resHi) << x86.umulx(x, y)
))
x86_expand.legalize(
resHi << insts.smulhi(x, y),
Rtl(
(resLo, resHi) << x86.smulx(x, y)
))
# Floating point condition codes.
#
# The 8 condition codes in `supported_floatccs` are directly supported by a
# `ucomiss` or `ucomisd` instruction. The remaining codes need legalization
# patterns.
# Equality needs an explicit `ord` test which checks the parity bit.
x86_expand.legalize(
a << insts.fcmp(floatcc.eq, x, y),
Rtl(
a1 << insts.fcmp(floatcc.ord, x, y),
a2 << insts.fcmp(floatcc.ueq, x, y),
a << insts.band(a1, a2)
))
x86_expand.legalize(
a << insts.fcmp(floatcc.ne, x, y),
Rtl(
a1 << insts.fcmp(floatcc.uno, x, y),
a2 << insts.fcmp(floatcc.one, x, y),
a << insts.bor(a1, a2)
))
# Inequalities that need to be reversed.
for cc, rev_cc in [
(floatcc.lt, floatcc.gt),
(floatcc.le, floatcc.ge),
(floatcc.ugt, floatcc.ult),
(floatcc.uge, floatcc.ule)]:
x86_expand.legalize(
a << insts.fcmp(cc, x, y),
Rtl(
a << insts.fcmp(rev_cc, y, x)
))
# We need to modify the CFG for min/max legalization.
x86_expand.custom_legalize(insts.fmin, 'expand_minmax')
x86_expand.custom_legalize(insts.fmax, 'expand_minmax')
# Conversions from unsigned need special handling.
x86_expand.custom_legalize(insts.fcvt_from_uint, 'expand_fcvt_from_uint')
# Conversions from float to int can trap and modify the control flow graph.
x86_expand.custom_legalize(insts.fcvt_to_sint, 'expand_fcvt_to_sint')
x86_expand.custom_legalize(insts.fcvt_to_uint, 'expand_fcvt_to_uint')
x86_expand.custom_legalize(insts.fcvt_to_sint_sat, 'expand_fcvt_to_sint_sat')
x86_expand.custom_legalize(insts.fcvt_to_uint_sat, 'expand_fcvt_to_uint_sat')
# Count leading and trailing zeroes, for baseline x86_64
c_minus_one = Var('c_minus_one')
c_thirty_one = Var('c_thirty_one')
c_thirty_two = Var('c_thirty_two')
c_sixty_three = Var('c_sixty_three')
c_sixty_four = Var('c_sixty_four')
index1 = Var('index1')
r2flags = Var('r2flags')
index2 = Var('index2')
x86_expand.legalize(
a << insts.clz.i64(x),
Rtl(
c_minus_one << insts.iconst(imm64(-1)),
c_sixty_three << insts.iconst(imm64(63)),
(index1, r2flags) << x86.bsr(x),
index2 << insts.selectif(intcc.eq, r2flags, c_minus_one, index1),
a << insts.isub(c_sixty_three, index2),
))
x86_expand.legalize(
a << insts.clz.i32(x),
Rtl(
c_minus_one << insts.iconst(imm64(-1)),
c_thirty_one << insts.iconst(imm64(31)),
(index1, r2flags) << x86.bsr(x),
index2 << insts.selectif(intcc.eq, r2flags, c_minus_one, index1),
a << insts.isub(c_thirty_one, index2),
))
x86_expand.legalize(
a << insts.ctz.i64(x),
Rtl(
c_sixty_four << insts.iconst(imm64(64)),
(index1, r2flags) << x86.bsf(x),
a << insts.selectif(intcc.eq, r2flags, c_sixty_four, index1),
))
x86_expand.legalize(
a << insts.ctz.i32(x),
Rtl(
c_thirty_two << insts.iconst(imm64(32)),
(index1, r2flags) << x86.bsf(x),
a << insts.selectif(intcc.eq, r2flags, c_thirty_two, index1),
))
# Population count for baseline x86_64
qv1 = Var('qv1')
qv3 = Var('qv3')
qv4 = Var('qv4')
qv5 = Var('qv5')
qv6 = Var('qv6')
qv7 = Var('qv7')
qv8 = Var('qv8')
qv9 = Var('qv9')
qv10 = Var('qv10')
qv11 = Var('qv11')
qv12 = Var('qv12')
qv13 = Var('qv13')
qv14 = Var('qv14')
qv15 = Var('qv15')
qv16 = Var('qv16')
qc77 = Var('qc77')
qc0F = Var('qc0F')
qc01 = Var('qc01')
x86_expand.legalize(
qv16 << insts.popcnt.i64(qv1),
Rtl(
qv3 << insts.ushr_imm(qv1, imm64(1)),
qc77 << insts.iconst(imm64(0x7777777777777777)),
qv4 << insts.band(qv3, qc77),
qv5 << insts.isub(qv1, qv4),
qv6 << insts.ushr_imm(qv4, imm64(1)),
qv7 << insts.band(qv6, qc77),
qv8 << insts.isub(qv5, qv7),
qv9 << insts.ushr_imm(qv7, imm64(1)),
qv10 << insts.band(qv9, qc77),
qv11 << insts.isub(qv8, qv10),
qv12 << insts.ushr_imm(qv11, imm64(4)),
qv13 << insts.iadd(qv11, qv12),
qc0F << insts.iconst(imm64(0x0F0F0F0F0F0F0F0F)),
qv14 << insts.band(qv13, qc0F),
qc01 << insts.iconst(imm64(0x0101010101010101)),
qv15 << insts.imul(qv14, qc01),
qv16 << insts.ushr_imm(qv15, imm64(56))
))
lv1 = Var('lv1')
lv3 = Var('lv3')
lv4 = Var('lv4')
lv5 = Var('lv5')
lv6 = Var('lv6')
lv7 = Var('lv7')
lv8 = Var('lv8')
lv9 = Var('lv9')
lv10 = Var('lv10')
lv11 = Var('lv11')
lv12 = Var('lv12')
lv13 = Var('lv13')
lv14 = Var('lv14')
lv15 = Var('lv15')
lv16 = Var('lv16')
lc77 = Var('lc77')
lc0F = Var('lc0F')
lc01 = Var('lc01')
x86_expand.legalize(
lv16 << insts.popcnt.i32(lv1),
Rtl(
lv3 << insts.ushr_imm(lv1, imm64(1)),
lc77 << insts.iconst(imm64(0x77777777)),
lv4 << insts.band(lv3, lc77),
lv5 << insts.isub(lv1, lv4),
lv6 << insts.ushr_imm(lv4, imm64(1)),
lv7 << insts.band(lv6, lc77),
lv8 << insts.isub(lv5, lv7),
lv9 << insts.ushr_imm(lv7, imm64(1)),
lv10 << insts.band(lv9, lc77),
lv11 << insts.isub(lv8, lv10),
lv12 << insts.ushr_imm(lv11, imm64(4)),
lv13 << insts.iadd(lv11, lv12),
lc0F << insts.iconst(imm64(0x0F0F0F0F)),
lv14 << insts.band(lv13, lc0F),
lc01 << insts.iconst(imm64(0x01010101)),
lv15 << insts.imul(lv14, lc01),
lv16 << insts.ushr_imm(lv15, imm64(24))
))

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,61 @@
"""
x86 register banks.
While the floating-point registers are straight-forward, the general purpose
register bank has a few quirks on x86. We have these encodings of the 8-bit
registers:
I32 I64 | 16b 32b 64b
000 AL AL | AX EAX RAX
001 CL CL | CX ECX RCX
010 DL DL | DX EDX RDX
011 BL BL | BX EBX RBX
100 AH SPL | SP ESP RSP
101 CH BPL | BP EBP RBP
110 DH SIL | SI ESI RSI
111 BH DIL | DI EDI RDI
Here, the I64 column refers to the registers you get with a REX prefix. Without
the REX prefix, you get the I32 registers.
The 8-bit registers are not that useful since WebAssembly only has i32 and i64
data types, and the H-registers even less so. Rather than trying to model the
H-registers accurately, we'll avoid using them in both I32 and I64 modes.
"""
from __future__ import absolute_import
from cdsl.registers import RegBank, RegClass, Stack
from .defs import ISA
IntRegs = RegBank(
'IntRegs', ISA,
'General purpose registers',
units=16, prefix='r',
names='rax rcx rdx rbx rsp rbp rsi rdi'.split())
FloatRegs = RegBank(
'FloatRegs', ISA,
'SSE floating point registers',
units=16, prefix='xmm')
FlagRegs = RegBank(
'FlagRegs', ISA,
'Flag registers',
units=1,
pressure_tracking=False,
names=['rflags'])
GPR = RegClass(IntRegs)
GPR8 = GPR[0:8]
ABCD = GPR[0:4]
FPR = RegClass(FloatRegs)
FPR8 = FPR[0:8]
FLAG = RegClass(FlagRegs)
# Constraints for stack operands.
# Stack operand with a 32-bit signed displacement from either RBP or RSP.
StackGPR32 = Stack(GPR)
StackFPR32 = Stack(FPR)
RegClass.extract_names(globals())

View File

@@ -0,0 +1,54 @@
"""
x86 settings.
"""
from __future__ import absolute_import
from cdsl.settings import SettingGroup, BoolSetting, Preset
from cdsl.predicates import And
import base.settings as shared
from .defs import ISA
ISA.settings = SettingGroup('x86', parent=shared.group)
# The has_* settings here correspond to CPUID bits.
# CPUID.01H:ECX
has_sse3 = BoolSetting("SSE3: CPUID.01H:ECX.SSE3[bit 0]")
has_ssse3 = BoolSetting("SSSE3: CPUID.01H:ECX.SSSE3[bit 9]")
has_sse41 = BoolSetting("SSE4.1: CPUID.01H:ECX.SSE4_1[bit 19]")
has_sse42 = BoolSetting("SSE4.2: CPUID.01H:ECX.SSE4_2[bit 20]")
has_popcnt = BoolSetting("POPCNT: CPUID.01H:ECX.POPCNT[bit 23]")
has_avx = BoolSetting("AVX: CPUID.01H:ECX.AVX[bit 28]")
# CPUID.(EAX=07H, ECX=0H):EBX
has_bmi1 = BoolSetting("BMI1: CPUID.(EAX=07H, ECX=0H):EBX.BMI1[bit 3]")
has_bmi2 = BoolSetting("BMI2: CPUID.(EAX=07H, ECX=0H):EBX.BMI2[bit 8]")
# CPUID.EAX=80000001H:ECX
has_lzcnt = BoolSetting("LZCNT: CPUID.EAX=80000001H:ECX.LZCNT[bit 5]")
# The use_* settings here are used to determine if a feature can be used.
use_sse41 = And(has_sse41)
use_sse42 = And(has_sse42, use_sse41)
use_popcnt = And(has_popcnt, has_sse42)
use_bmi1 = And(has_bmi1)
use_lzcnt = And(has_lzcnt)
# Presets corresponding to x86 CPUs.
baseline = Preset()
nehalem = Preset(
has_sse3, has_ssse3, has_sse41, has_sse42, has_popcnt)
haswell = Preset(nehalem, has_bmi1, has_bmi2, has_lzcnt)
broadwell = Preset(haswell)
skylake = Preset(broadwell)
cannonlake = Preset(skylake)
icelake = Preset(cannonlake)
znver1 = Preset(
has_sse3, has_ssse3, has_sse41, has_sse42, has_popcnt,
has_bmi1, has_bmi2, has_lzcnt)
ISA.settings.close(globals())