moved crates in lib/ to src/, renamed crates, modified some files' text (#660)
moved crates in lib/ to src/, renamed crates, modified some files' text (#660)
This commit is contained in:
24
cranelift/codegen/meta-python/isa/__init__.py
Normal file
24
cranelift/codegen/meta-python/isa/__init__.py
Normal file
@@ -0,0 +1,24 @@
|
||||
"""
|
||||
Cranelift target ISA definitions
|
||||
--------------------------------
|
||||
|
||||
The :py:mod:`isa` package contains sub-packages for each target instruction set
|
||||
architecture supported by Cranelift.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from cdsl.isa import TargetISA # noqa
|
||||
from . import riscv, x86, arm32, arm64
|
||||
|
||||
try:
|
||||
from typing import List # noqa
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
def all_isas():
|
||||
# type: () -> List[TargetISA]
|
||||
"""
|
||||
Get a list of all the supported target ISAs. Each target ISA is represented
|
||||
as a :py:class:`cranelift.TargetISA` instance.
|
||||
"""
|
||||
return [riscv.ISA, x86.ISA, arm32.ISA, arm64.ISA]
|
||||
15
cranelift/codegen/meta-python/isa/arm32/__init__.py
Normal file
15
cranelift/codegen/meta-python/isa/arm32/__init__.py
Normal file
@@ -0,0 +1,15 @@
|
||||
"""
|
||||
ARM 32-bit Architecture
|
||||
-----------------------
|
||||
|
||||
This target ISA generates code for ARMv7 and ARMv8 CPUs in 32-bit mode
|
||||
(AArch32). We support both ARM and Thumb2 instruction encodings.
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from . import defs
|
||||
from . import settings, registers # noqa
|
||||
from cdsl.isa import TargetISA # noqa
|
||||
|
||||
# Re-export the primary target ISA definition.
|
||||
ISA = defs.ISA.finish() # type: TargetISA
|
||||
19
cranelift/codegen/meta-python/isa/arm32/defs.py
Normal file
19
cranelift/codegen/meta-python/isa/arm32/defs.py
Normal file
@@ -0,0 +1,19 @@
|
||||
"""
|
||||
ARM 32-bit definitions.
|
||||
|
||||
Commonly used definitions.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from cdsl.isa import TargetISA, CPUMode
|
||||
import base.instructions
|
||||
from base.legalize import narrow
|
||||
|
||||
ISA = TargetISA('arm32', [base.instructions.GROUP]) # type: TargetISA
|
||||
|
||||
# CPU modes for 32-bit ARM and Thumb2.
|
||||
A32 = CPUMode('A32', ISA)
|
||||
T32 = CPUMode('T32', ISA)
|
||||
|
||||
# TODO: Refine these.
|
||||
A32.legalize_type(narrow)
|
||||
T32.legalize_type(narrow)
|
||||
45
cranelift/codegen/meta-python/isa/arm32/registers.py
Normal file
45
cranelift/codegen/meta-python/isa/arm32/registers.py
Normal file
@@ -0,0 +1,45 @@
|
||||
"""
|
||||
ARM32 register banks.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from cdsl.registers import RegBank, RegClass
|
||||
from .defs import ISA
|
||||
|
||||
|
||||
# Define the larger float bank first to avoid the alignment gap.
|
||||
FloatRegs = RegBank(
|
||||
'FloatRegs', ISA, r"""
|
||||
Floating point registers.
|
||||
|
||||
The floating point register units correspond to the S-registers, but
|
||||
extended as if there were 64 registers.
|
||||
|
||||
- S registers are one unit each.
|
||||
- D registers are two units each, even D16 and above.
|
||||
- Q registers are 4 units each.
|
||||
""",
|
||||
units=64, prefix='s')
|
||||
|
||||
# Special register units:
|
||||
# - r15 is the program counter.
|
||||
# - r14 is the link register.
|
||||
# - r13 is usually the stack pointer.
|
||||
IntRegs = RegBank(
|
||||
'IntRegs', ISA,
|
||||
'General purpose registers',
|
||||
units=16, prefix='r')
|
||||
|
||||
FlagRegs = RegBank(
|
||||
'FlagRegs', ISA,
|
||||
'Flag registers',
|
||||
units=1,
|
||||
pressure_tracking=False,
|
||||
names=['nzcv'])
|
||||
|
||||
GPR = RegClass(IntRegs)
|
||||
S = RegClass(FloatRegs, count=32)
|
||||
D = RegClass(FloatRegs, width=2)
|
||||
Q = RegClass(FloatRegs, width=4)
|
||||
FLAG = RegClass(FlagRegs)
|
||||
|
||||
RegClass.extract_names(globals())
|
||||
11
cranelift/codegen/meta-python/isa/arm32/settings.py
Normal file
11
cranelift/codegen/meta-python/isa/arm32/settings.py
Normal file
@@ -0,0 +1,11 @@
|
||||
"""
|
||||
ARM32 settings.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from cdsl.settings import SettingGroup
|
||||
import base.settings as shared
|
||||
from .defs import ISA
|
||||
|
||||
ISA.settings = SettingGroup('arm32', parent=shared.group)
|
||||
|
||||
ISA.settings.close(globals())
|
||||
14
cranelift/codegen/meta-python/isa/arm64/__init__.py
Normal file
14
cranelift/codegen/meta-python/isa/arm64/__init__.py
Normal file
@@ -0,0 +1,14 @@
|
||||
"""
|
||||
ARM 64-bit Architecture
|
||||
-----------------------
|
||||
|
||||
ARMv8 CPUs running the Aarch64 architecture.
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from . import defs
|
||||
from . import settings, registers # noqa
|
||||
from cdsl.isa import TargetISA # noqa
|
||||
|
||||
# Re-export the primary target ISA definition.
|
||||
ISA = defs.ISA.finish() # type: TargetISA
|
||||
15
cranelift/codegen/meta-python/isa/arm64/defs.py
Normal file
15
cranelift/codegen/meta-python/isa/arm64/defs.py
Normal file
@@ -0,0 +1,15 @@
|
||||
"""
|
||||
ARM64 definitions.
|
||||
|
||||
Commonly used definitions.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from cdsl.isa import TargetISA, CPUMode
|
||||
import base.instructions
|
||||
from base.legalize import narrow
|
||||
|
||||
ISA = TargetISA('arm64', [base.instructions.GROUP]) # type: TargetISA
|
||||
A64 = CPUMode('A64', ISA)
|
||||
|
||||
# TODO: Refine these
|
||||
A64.legalize_type(narrow)
|
||||
32
cranelift/codegen/meta-python/isa/arm64/registers.py
Normal file
32
cranelift/codegen/meta-python/isa/arm64/registers.py
Normal file
@@ -0,0 +1,32 @@
|
||||
"""
|
||||
Aarch64 register banks.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from cdsl.registers import RegBank, RegClass
|
||||
from .defs import ISA
|
||||
|
||||
|
||||
# The `x31` regunit serves as the stack pointer / zero register depending on
|
||||
# context. We reserve it and don't model the difference.
|
||||
IntRegs = RegBank(
|
||||
'IntRegs', ISA,
|
||||
'General purpose registers',
|
||||
units=32, prefix='x')
|
||||
|
||||
FloatRegs = RegBank(
|
||||
'FloatRegs', ISA,
|
||||
'Floating point registers',
|
||||
units=32, prefix='v')
|
||||
|
||||
FlagRegs = RegBank(
|
||||
'FlagRegs', ISA,
|
||||
'Flag registers',
|
||||
units=1,
|
||||
pressure_tracking=False,
|
||||
names=['nzcv'])
|
||||
|
||||
GPR = RegClass(IntRegs)
|
||||
FPR = RegClass(FloatRegs)
|
||||
FLAG = RegClass(FlagRegs)
|
||||
|
||||
RegClass.extract_names(globals())
|
||||
11
cranelift/codegen/meta-python/isa/arm64/settings.py
Normal file
11
cranelift/codegen/meta-python/isa/arm64/settings.py
Normal file
@@ -0,0 +1,11 @@
|
||||
"""
|
||||
ARM64 settings.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from cdsl.settings import SettingGroup
|
||||
import base.settings as shared
|
||||
from .defs import ISA
|
||||
|
||||
ISA.settings = SettingGroup('arm64', parent=shared.group)
|
||||
|
||||
ISA.settings.close(globals())
|
||||
33
cranelift/codegen/meta-python/isa/riscv/__init__.py
Normal file
33
cranelift/codegen/meta-python/isa/riscv/__init__.py
Normal file
@@ -0,0 +1,33 @@
|
||||
"""
|
||||
RISC-V Target
|
||||
-------------
|
||||
|
||||
`RISC-V <https://riscv.org/>`_ is an open instruction set architecture
|
||||
originally developed at UC Berkeley. It is a RISC-style ISA with either a
|
||||
32-bit (RV32I) or 64-bit (RV32I) base instruction set and a number of optional
|
||||
extensions:
|
||||
|
||||
RV32M / RV64M
|
||||
Integer multiplication and division.
|
||||
|
||||
RV32A / RV64A
|
||||
Atomics.
|
||||
|
||||
RV32F / RV64F
|
||||
Single-precision IEEE floating point.
|
||||
|
||||
RV32D / RV64D
|
||||
Double-precision IEEE floating point.
|
||||
|
||||
RV32G / RV64G
|
||||
General purpose instruction sets. This represents the union of the I, M, A,
|
||||
F, and D instruction sets listed above.
|
||||
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from . import defs
|
||||
from . import encodings, settings, registers # noqa
|
||||
from cdsl.isa import TargetISA # noqa
|
||||
|
||||
# Re-export the primary target ISA definition.
|
||||
ISA = defs.ISA.finish() # type: TargetISA
|
||||
14
cranelift/codegen/meta-python/isa/riscv/defs.py
Normal file
14
cranelift/codegen/meta-python/isa/riscv/defs.py
Normal file
@@ -0,0 +1,14 @@
|
||||
"""
|
||||
RISC-V definitions.
|
||||
|
||||
Commonly used definitions.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from cdsl.isa import TargetISA, CPUMode
|
||||
import base.instructions
|
||||
|
||||
ISA = TargetISA('riscv', [base.instructions.GROUP]) # type: TargetISA
|
||||
|
||||
# CPU modes for 32-bit and 64-bit operation.
|
||||
RV32 = CPUMode('RV32', ISA)
|
||||
RV64 = CPUMode('RV64', ISA)
|
||||
162
cranelift/codegen/meta-python/isa/riscv/encodings.py
Normal file
162
cranelift/codegen/meta-python/isa/riscv/encodings.py
Normal file
@@ -0,0 +1,162 @@
|
||||
"""
|
||||
RISC-V Encodings.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from base import instructions as base
|
||||
from base.immediates import intcc
|
||||
from .defs import RV32, RV64
|
||||
from .recipes import OPIMM, OPIMM32, OP, OP32, LUI, BRANCH, JALR, JAL
|
||||
from .recipes import LOAD, STORE
|
||||
from .recipes import R, Rshamt, Ricmp, Ii, Iz, Iicmp, Iret, Icall, Icopy
|
||||
from .recipes import U, UJ, UJcall, SB, SBzero, GPsp, GPfi, Irmov
|
||||
from .settings import use_m
|
||||
from cdsl.ast import Var
|
||||
from base.legalize import narrow, expand
|
||||
|
||||
RV32.legalize_monomorphic(expand)
|
||||
RV32.legalize_type(
|
||||
default=narrow,
|
||||
i32=expand,
|
||||
f32=expand,
|
||||
f64=expand)
|
||||
|
||||
RV64.legalize_monomorphic(expand)
|
||||
RV64.legalize_type(
|
||||
default=narrow,
|
||||
i32=expand,
|
||||
i64=expand,
|
||||
f32=expand,
|
||||
f64=expand)
|
||||
|
||||
# Dummies for instruction predicates.
|
||||
x = Var('x')
|
||||
y = Var('y')
|
||||
dest = Var('dest')
|
||||
args = Var('args')
|
||||
|
||||
# Basic arithmetic binary instructions are encoded in an R-type instruction.
|
||||
for inst, inst_imm, f3, f7 in [
|
||||
(base.iadd, base.iadd_imm, 0b000, 0b0000000),
|
||||
(base.isub, None, 0b000, 0b0100000),
|
||||
(base.bxor, base.bxor_imm, 0b100, 0b0000000),
|
||||
(base.bor, base.bor_imm, 0b110, 0b0000000),
|
||||
(base.band, base.band_imm, 0b111, 0b0000000)
|
||||
]:
|
||||
RV32.enc(inst.i32, R, OP(f3, f7))
|
||||
RV64.enc(inst.i64, R, OP(f3, f7))
|
||||
|
||||
# Immediate versions for add/xor/or/and.
|
||||
if inst_imm:
|
||||
RV32.enc(inst_imm.i32, Ii, OPIMM(f3))
|
||||
RV64.enc(inst_imm.i64, Ii, OPIMM(f3))
|
||||
|
||||
# 32-bit ops in RV64.
|
||||
RV64.enc(base.iadd.i32, R, OP32(0b000, 0b0000000))
|
||||
RV64.enc(base.isub.i32, R, OP32(0b000, 0b0100000))
|
||||
# There are no andiw/oriw/xoriw variations.
|
||||
RV64.enc(base.iadd_imm.i32, Ii, OPIMM32(0b000))
|
||||
|
||||
# Use iadd_imm with %x0 to materialize constants.
|
||||
RV32.enc(base.iconst.i32, Iz, OPIMM(0b000))
|
||||
RV64.enc(base.iconst.i32, Iz, OPIMM(0b000))
|
||||
RV64.enc(base.iconst.i64, Iz, OPIMM(0b000))
|
||||
|
||||
# Dynamic shifts have the same masking semantics as the clif base instructions.
|
||||
for inst, inst_imm, f3, f7 in [
|
||||
(base.ishl, base.ishl_imm, 0b001, 0b0000000),
|
||||
(base.ushr, base.ushr_imm, 0b101, 0b0000000),
|
||||
(base.sshr, base.sshr_imm, 0b101, 0b0100000),
|
||||
]:
|
||||
RV32.enc(inst.i32.i32, R, OP(f3, f7))
|
||||
RV64.enc(inst.i64.i64, R, OP(f3, f7))
|
||||
RV64.enc(inst.i32.i32, R, OP32(f3, f7))
|
||||
# Allow i32 shift amounts in 64-bit shifts.
|
||||
RV64.enc(inst.i64.i32, R, OP(f3, f7))
|
||||
RV64.enc(inst.i32.i64, R, OP32(f3, f7))
|
||||
|
||||
# Immediate shifts.
|
||||
RV32.enc(inst_imm.i32, Rshamt, OPIMM(f3, f7))
|
||||
RV64.enc(inst_imm.i64, Rshamt, OPIMM(f3, f7))
|
||||
RV64.enc(inst_imm.i32, Rshamt, OPIMM32(f3, f7))
|
||||
|
||||
# Signed and unsigned integer 'less than'. There are no 'w' variants for
|
||||
# comparing 32-bit numbers in RV64.
|
||||
RV32.enc(base.icmp.i32(intcc.slt, x, y), Ricmp, OP(0b010, 0b0000000))
|
||||
RV64.enc(base.icmp.i64(intcc.slt, x, y), Ricmp, OP(0b010, 0b0000000))
|
||||
RV32.enc(base.icmp.i32(intcc.ult, x, y), Ricmp, OP(0b011, 0b0000000))
|
||||
RV64.enc(base.icmp.i64(intcc.ult, x, y), Ricmp, OP(0b011, 0b0000000))
|
||||
|
||||
RV32.enc(base.icmp_imm.i32(intcc.slt, x, y), Iicmp, OPIMM(0b010))
|
||||
RV64.enc(base.icmp_imm.i64(intcc.slt, x, y), Iicmp, OPIMM(0b010))
|
||||
RV32.enc(base.icmp_imm.i32(intcc.ult, x, y), Iicmp, OPIMM(0b011))
|
||||
RV64.enc(base.icmp_imm.i64(intcc.ult, x, y), Iicmp, OPIMM(0b011))
|
||||
|
||||
# Integer constants with the low 12 bits clear are materialized by lui.
|
||||
RV32.enc(base.iconst.i32, U, LUI())
|
||||
RV64.enc(base.iconst.i32, U, LUI())
|
||||
RV64.enc(base.iconst.i64, U, LUI())
|
||||
|
||||
# "M" Standard Extension for Integer Multiplication and Division.
|
||||
# Gated by the `use_m` flag.
|
||||
RV32.enc(base.imul.i32, R, OP(0b000, 0b0000001), isap=use_m)
|
||||
RV64.enc(base.imul.i64, R, OP(0b000, 0b0000001), isap=use_m)
|
||||
RV64.enc(base.imul.i32, R, OP32(0b000, 0b0000001), isap=use_m)
|
||||
|
||||
# Control flow.
|
||||
|
||||
# Unconditional branches.
|
||||
RV32.enc(base.jump, UJ, JAL())
|
||||
RV64.enc(base.jump, UJ, JAL())
|
||||
RV32.enc(base.call, UJcall, JAL())
|
||||
RV64.enc(base.call, UJcall, JAL())
|
||||
|
||||
# Conditional branches.
|
||||
for cond, f3 in [
|
||||
(intcc.eq, 0b000),
|
||||
(intcc.ne, 0b001),
|
||||
(intcc.slt, 0b100),
|
||||
(intcc.sge, 0b101),
|
||||
(intcc.ult, 0b110),
|
||||
(intcc.uge, 0b111)
|
||||
]:
|
||||
RV32.enc(base.br_icmp.i32(cond, x, y, dest, args), SB, BRANCH(f3))
|
||||
RV64.enc(base.br_icmp.i64(cond, x, y, dest, args), SB, BRANCH(f3))
|
||||
|
||||
for inst, f3 in [
|
||||
(base.brz, 0b000),
|
||||
(base.brnz, 0b001)
|
||||
]:
|
||||
RV32.enc(inst.i32, SBzero, BRANCH(f3))
|
||||
RV64.enc(inst.i64, SBzero, BRANCH(f3))
|
||||
RV32.enc(inst.b1, SBzero, BRANCH(f3))
|
||||
RV64.enc(inst.b1, SBzero, BRANCH(f3))
|
||||
|
||||
# Returns are a special case of JALR using %x1 to hold the return address.
|
||||
# The return address is provided by a special-purpose `link` return value that
|
||||
# is added by legalize_signature().
|
||||
RV32.enc(base.x_return, Iret, JALR())
|
||||
RV64.enc(base.x_return, Iret, JALR())
|
||||
RV32.enc(base.call_indirect.i32, Icall, JALR())
|
||||
RV64.enc(base.call_indirect.i64, Icall, JALR())
|
||||
|
||||
# Spill and fill.
|
||||
RV32.enc(base.spill.i32, GPsp, STORE(0b010))
|
||||
RV64.enc(base.spill.i32, GPsp, STORE(0b010))
|
||||
RV64.enc(base.spill.i64, GPsp, STORE(0b011))
|
||||
RV32.enc(base.fill.i32, GPfi, LOAD(0b010))
|
||||
RV64.enc(base.fill.i32, GPfi, LOAD(0b010))
|
||||
RV64.enc(base.fill.i64, GPfi, LOAD(0b011))
|
||||
|
||||
# Register copies.
|
||||
RV32.enc(base.copy.i32, Icopy, OPIMM(0b000))
|
||||
RV64.enc(base.copy.i64, Icopy, OPIMM(0b000))
|
||||
RV64.enc(base.copy.i32, Icopy, OPIMM32(0b000))
|
||||
|
||||
RV32.enc(base.regmove.i32, Irmov, OPIMM(0b000))
|
||||
RV64.enc(base.regmove.i64, Irmov, OPIMM(0b000))
|
||||
RV64.enc(base.regmove.i32, Irmov, OPIMM32(0b000))
|
||||
|
||||
RV32.enc(base.copy.b1, Icopy, OPIMM(0b000))
|
||||
RV64.enc(base.copy.b1, Icopy, OPIMM(0b000))
|
||||
RV32.enc(base.regmove.b1, Irmov, OPIMM(0b000))
|
||||
RV64.enc(base.regmove.b1, Irmov, OPIMM(0b000))
|
||||
225
cranelift/codegen/meta-python/isa/riscv/recipes.py
Normal file
225
cranelift/codegen/meta-python/isa/riscv/recipes.py
Normal file
@@ -0,0 +1,225 @@
|
||||
"""
|
||||
RISC-V Encoding recipes.
|
||||
|
||||
The encoding recipes defined here more or less correspond to the RISC-V native
|
||||
instruction formats described in the reference:
|
||||
|
||||
The RISC-V Instruction Set Manual
|
||||
Volume I: User-Level ISA
|
||||
Version 2.1
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from cdsl.isa import EncRecipe
|
||||
from cdsl.predicates import IsSignedInt
|
||||
from cdsl.registers import Stack
|
||||
from base.formats import Binary, BinaryImm, MultiAry, IntCompare, IntCompareImm
|
||||
from base.formats import Unary, UnaryImm, BranchIcmp, Branch, Jump
|
||||
from base.formats import Call, CallIndirect, RegMove
|
||||
from .registers import GPR
|
||||
|
||||
# The low 7 bits of a RISC-V instruction is the base opcode. All 32-bit
|
||||
# instructions have 11 as the two low bits, with bits 6:2 determining the base
|
||||
# opcode.
|
||||
#
|
||||
# Encbits for the 32-bit recipes are opcode[6:2] | (funct3 << 5) | ...
|
||||
# The functions below encode the encbits.
|
||||
|
||||
|
||||
def LOAD(funct3):
|
||||
# type: (int) -> int
|
||||
assert funct3 <= 0b111
|
||||
return 0b00000 | (funct3 << 5)
|
||||
|
||||
|
||||
def STORE(funct3):
|
||||
# type: (int) -> int
|
||||
assert funct3 <= 0b111
|
||||
return 0b01000 | (funct3 << 5)
|
||||
|
||||
|
||||
def BRANCH(funct3):
|
||||
# type: (int) -> int
|
||||
assert funct3 <= 0b111
|
||||
return 0b11000 | (funct3 << 5)
|
||||
|
||||
|
||||
def JALR(funct3=0):
|
||||
# type: (int) -> int
|
||||
assert funct3 <= 0b111
|
||||
return 0b11001 | (funct3 << 5)
|
||||
|
||||
|
||||
def JAL():
|
||||
# type: () -> int
|
||||
return 0b11011
|
||||
|
||||
|
||||
def OPIMM(funct3, funct7=0):
|
||||
# type: (int, int) -> int
|
||||
assert funct3 <= 0b111
|
||||
return 0b00100 | (funct3 << 5) | (funct7 << 8)
|
||||
|
||||
|
||||
def OPIMM32(funct3, funct7=0):
|
||||
# type: (int, int) -> int
|
||||
assert funct3 <= 0b111
|
||||
return 0b00110 | (funct3 << 5) | (funct7 << 8)
|
||||
|
||||
|
||||
def OP(funct3, funct7):
|
||||
# type: (int, int) -> int
|
||||
assert funct3 <= 0b111
|
||||
assert funct7 <= 0b1111111
|
||||
return 0b01100 | (funct3 << 5) | (funct7 << 8)
|
||||
|
||||
|
||||
def OP32(funct3, funct7):
|
||||
# type: (int, int) -> int
|
||||
assert funct3 <= 0b111
|
||||
assert funct7 <= 0b1111111
|
||||
return 0b01110 | (funct3 << 5) | (funct7 << 8)
|
||||
|
||||
|
||||
def AIUPC():
|
||||
# type: () -> int
|
||||
return 0b00101
|
||||
|
||||
|
||||
def LUI():
|
||||
# type: () -> int
|
||||
return 0b01101
|
||||
|
||||
|
||||
# R-type 32-bit instructions: These are mostly binary arithmetic instructions.
|
||||
# The encbits are `opcode[6:2] | (funct3 << 5) | (funct7 << 8)
|
||||
R = EncRecipe(
|
||||
'R', Binary, base_size=4, ins=(GPR, GPR), outs=GPR,
|
||||
emit='put_r(bits, in_reg0, in_reg1, out_reg0, sink);')
|
||||
|
||||
# R-type with an immediate shift amount instead of rs2.
|
||||
Rshamt = EncRecipe(
|
||||
'Rshamt', BinaryImm, base_size=4, ins=GPR, outs=GPR,
|
||||
emit='put_rshamt(bits, in_reg0, imm.into(), out_reg0, sink);')
|
||||
|
||||
# R-type encoding of an integer comparison.
|
||||
Ricmp = EncRecipe(
|
||||
'Ricmp', IntCompare, base_size=4, ins=(GPR, GPR), outs=GPR,
|
||||
emit='put_r(bits, in_reg0, in_reg1, out_reg0, sink);')
|
||||
|
||||
Ii = EncRecipe(
|
||||
'Ii', BinaryImm, base_size=4, ins=GPR, outs=GPR,
|
||||
instp=IsSignedInt(BinaryImm.imm, 12),
|
||||
emit='put_i(bits, in_reg0, imm.into(), out_reg0, sink);')
|
||||
|
||||
# I-type instruction with a hardcoded %x0 rs1.
|
||||
Iz = EncRecipe(
|
||||
'Iz', UnaryImm, base_size=4, ins=(), outs=GPR,
|
||||
instp=IsSignedInt(UnaryImm.imm, 12),
|
||||
emit='put_i(bits, 0, imm.into(), out_reg0, sink);')
|
||||
|
||||
# I-type encoding of an integer comparison.
|
||||
Iicmp = EncRecipe(
|
||||
'Iicmp', IntCompareImm, base_size=4, ins=GPR, outs=GPR,
|
||||
instp=IsSignedInt(IntCompareImm.imm, 12),
|
||||
emit='put_i(bits, in_reg0, imm.into(), out_reg0, sink);')
|
||||
|
||||
# I-type encoding for `jalr` as a return instruction. We won't use the
|
||||
# immediate offset.
|
||||
# The variable return values are not encoded.
|
||||
Iret = EncRecipe(
|
||||
'Iret', MultiAry, base_size=4, ins=(), outs=(),
|
||||
emit='''
|
||||
// Return instructions are always a jalr to %x1.
|
||||
// The return address is provided as a special-purpose link argument.
|
||||
put_i(
|
||||
bits,
|
||||
1, // rs1 = %x1
|
||||
0, // no offset.
|
||||
0, // rd = %x0: no address written.
|
||||
sink,
|
||||
);
|
||||
''')
|
||||
|
||||
# I-type encoding for `jalr` as a call_indirect.
|
||||
Icall = EncRecipe(
|
||||
'Icall', CallIndirect, base_size=4, ins=GPR, outs=(),
|
||||
emit='''
|
||||
// call_indirect instructions are jalr with rd=%x1.
|
||||
put_i(
|
||||
bits,
|
||||
in_reg0,
|
||||
0, // no offset.
|
||||
1, // rd = %x1: link register.
|
||||
sink,
|
||||
);
|
||||
''')
|
||||
|
||||
|
||||
# Copy of a GPR is implemented as addi x, 0.
|
||||
Icopy = EncRecipe(
|
||||
'Icopy', Unary, base_size=4, ins=GPR, outs=GPR,
|
||||
emit='put_i(bits, in_reg0, 0, out_reg0, sink);')
|
||||
|
||||
# Same for a GPR regmove.
|
||||
Irmov = EncRecipe(
|
||||
'Irmov', RegMove, base_size=4, ins=GPR, outs=(),
|
||||
emit='put_i(bits, src, 0, dst, sink);')
|
||||
|
||||
# U-type instructions have a 20-bit immediate that targets bits 12-31.
|
||||
U = EncRecipe(
|
||||
'U', UnaryImm, base_size=4, ins=(), outs=GPR,
|
||||
instp=IsSignedInt(UnaryImm.imm, 32, 12),
|
||||
emit='put_u(bits, imm.into(), out_reg0, sink);')
|
||||
|
||||
# UJ-type unconditional branch instructions.
|
||||
UJ = EncRecipe(
|
||||
'UJ', Jump, base_size=4, ins=(), outs=(), branch_range=(0, 21),
|
||||
emit='''
|
||||
let dest = i64::from(func.offsets[destination]);
|
||||
let disp = dest - i64::from(sink.offset());
|
||||
put_uj(bits, disp, 0, sink);
|
||||
''')
|
||||
|
||||
UJcall = EncRecipe(
|
||||
'UJcall', Call, base_size=4, ins=(), outs=(),
|
||||
emit='''
|
||||
sink.reloc_external(Reloc::RiscvCall,
|
||||
&func.dfg.ext_funcs[func_ref].name,
|
||||
0);
|
||||
// rd=%x1 is the standard link register.
|
||||
put_uj(bits, 0, 1, sink);
|
||||
''')
|
||||
|
||||
# SB-type branch instructions.
|
||||
SB = EncRecipe(
|
||||
'SB', BranchIcmp, base_size=4,
|
||||
ins=(GPR, GPR), outs=(),
|
||||
branch_range=(0, 13),
|
||||
emit='''
|
||||
let dest = i64::from(func.offsets[destination]);
|
||||
let disp = dest - i64::from(sink.offset());
|
||||
put_sb(bits, disp, in_reg0, in_reg1, sink);
|
||||
''')
|
||||
|
||||
# SB-type branch instruction with rs2 fixed to zero.
|
||||
SBzero = EncRecipe(
|
||||
'SBzero', Branch, base_size=4,
|
||||
ins=(GPR), outs=(),
|
||||
branch_range=(0, 13),
|
||||
emit='''
|
||||
let dest = i64::from(func.offsets[destination]);
|
||||
let disp = dest - i64::from(sink.offset());
|
||||
put_sb(bits, disp, in_reg0, 0, sink);
|
||||
''')
|
||||
|
||||
# Spill of a GPR.
|
||||
GPsp = EncRecipe(
|
||||
'GPsp', Unary, base_size=4,
|
||||
ins=GPR, outs=Stack(GPR),
|
||||
emit='unimplemented!();')
|
||||
|
||||
# Fill of a GPR.
|
||||
GPfi = EncRecipe(
|
||||
'GPfi', Unary, base_size=4,
|
||||
ins=Stack(GPR), outs=GPR,
|
||||
emit='unimplemented!();')
|
||||
23
cranelift/codegen/meta-python/isa/riscv/registers.py
Normal file
23
cranelift/codegen/meta-python/isa/riscv/registers.py
Normal file
@@ -0,0 +1,23 @@
|
||||
"""
|
||||
RISC-V register banks.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from cdsl.registers import RegBank, RegClass
|
||||
from .defs import ISA
|
||||
|
||||
|
||||
# We include `x0`, a.k.a `zero` in the register bank. It will be reserved.
|
||||
IntRegs = RegBank(
|
||||
'IntRegs', ISA,
|
||||
'General purpose registers',
|
||||
units=32, prefix='x')
|
||||
|
||||
FloatRegs = RegBank(
|
||||
'FloatRegs', ISA,
|
||||
'Floating point registers',
|
||||
units=32, prefix='f')
|
||||
|
||||
GPR = RegClass(IntRegs)
|
||||
FPR = RegClass(FloatRegs)
|
||||
|
||||
RegClass.extract_names(globals())
|
||||
31
cranelift/codegen/meta-python/isa/riscv/settings.py
Normal file
31
cranelift/codegen/meta-python/isa/riscv/settings.py
Normal file
@@ -0,0 +1,31 @@
|
||||
"""
|
||||
RISC-V settings.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from cdsl.settings import SettingGroup, BoolSetting
|
||||
from cdsl.predicates import And
|
||||
import base.settings as shared
|
||||
from .defs import ISA
|
||||
|
||||
ISA.settings = SettingGroup('riscv', parent=shared.group)
|
||||
|
||||
supports_m = BoolSetting("CPU supports the 'M' extension (mul/div)")
|
||||
supports_a = BoolSetting("CPU supports the 'A' extension (atomics)")
|
||||
supports_f = BoolSetting("CPU supports the 'F' extension (float)")
|
||||
supports_d = BoolSetting("CPU supports the 'D' extension (double)")
|
||||
|
||||
enable_m = BoolSetting(
|
||||
"Enable the use of 'M' instructions if available",
|
||||
default=True)
|
||||
|
||||
enable_e = BoolSetting(
|
||||
"Enable the 'RV32E' instruction set with only 16 registers")
|
||||
|
||||
use_m = And(supports_m, enable_m)
|
||||
use_a = And(supports_a, shared.enable_atomics)
|
||||
use_f = And(supports_f, shared.enable_float)
|
||||
use_d = And(supports_d, shared.enable_float)
|
||||
|
||||
full_float = And(shared.enable_simd, supports_f, supports_d)
|
||||
|
||||
ISA.settings.close(globals())
|
||||
22
cranelift/codegen/meta-python/isa/x86/__init__.py
Normal file
22
cranelift/codegen/meta-python/isa/x86/__init__.py
Normal file
@@ -0,0 +1,22 @@
|
||||
"""
|
||||
x86 Target Architecture
|
||||
-----------------------
|
||||
|
||||
This target ISA generates code for x86 CPUs with two separate CPU modes:
|
||||
|
||||
`I32`
|
||||
32-bit x86 architecture, also known as 'IA-32', also sometimes referred
|
||||
to as 'i386', however note that Cranelift depends on instructions not
|
||||
in the original `i386`, such as SSE2, CMOVcc, and UD2.
|
||||
|
||||
`I64`
|
||||
x86-64 architecture, also known as 'AMD64`, `Intel 64`, and 'x64'.
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from . import defs
|
||||
from . import encodings, settings, registers # noqa
|
||||
from cdsl.isa import TargetISA # noqa
|
||||
|
||||
# Re-export the primary target ISA definition.
|
||||
ISA = defs.ISA.finish() # type: TargetISA
|
||||
28
cranelift/codegen/meta-python/isa/x86/defs.py
Normal file
28
cranelift/codegen/meta-python/isa/x86/defs.py
Normal file
@@ -0,0 +1,28 @@
|
||||
"""
|
||||
x86 definitions.
|
||||
|
||||
Commonly used definitions.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from cdsl.isa import TargetISA, CPUMode
|
||||
import base.instructions
|
||||
from . import instructions as x86
|
||||
from base.immediates import floatcc
|
||||
|
||||
ISA = TargetISA('x86', [base.instructions.GROUP, x86.GROUP]) # type: TargetISA
|
||||
|
||||
# CPU modes for 32-bit and 64-bit operation.
|
||||
X86_64 = CPUMode('I64', ISA)
|
||||
X86_32 = CPUMode('I32', ISA)
|
||||
|
||||
# The set of floating point condition codes that are directly supported.
|
||||
# Other condition codes need to be reversed or expressed as two tests.
|
||||
supported_floatccs = [
|
||||
floatcc.ord,
|
||||
floatcc.uno,
|
||||
floatcc.one,
|
||||
floatcc.ueq,
|
||||
floatcc.gt,
|
||||
floatcc.ge,
|
||||
floatcc.ult,
|
||||
floatcc.ule]
|
||||
748
cranelift/codegen/meta-python/isa/x86/encodings.py
Normal file
748
cranelift/codegen/meta-python/isa/x86/encodings.py
Normal file
@@ -0,0 +1,748 @@
|
||||
"""
|
||||
x86 Encodings.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from cdsl.predicates import IsZero32BitFloat, IsZero64BitFloat
|
||||
from cdsl.predicates import IsUnsignedInt, Not, And
|
||||
from base.predicates import IsColocatedFunc, IsColocatedData, LengthEquals
|
||||
from base import instructions as base
|
||||
from base import types
|
||||
from base.formats import UnaryIeee32, UnaryIeee64, UnaryImm
|
||||
from base.formats import FuncAddr, Call, LoadComplex, StoreComplex
|
||||
from .defs import X86_64, X86_32
|
||||
from . import recipes as r
|
||||
from . import settings as cfg
|
||||
from . import instructions as x86
|
||||
from .legalize import x86_expand
|
||||
from base.legalize import narrow, widen, expand_flags
|
||||
from base.settings import allones_funcaddrs, is_pic
|
||||
from .settings import use_sse41
|
||||
|
||||
try:
|
||||
from typing import TYPE_CHECKING, Any # noqa
|
||||
if TYPE_CHECKING:
|
||||
from cdsl.instructions import MaybeBoundInst # noqa
|
||||
from cdsl.predicates import FieldPredicate # noqa
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
X86_32.legalize_monomorphic(expand_flags)
|
||||
X86_32.legalize_type(
|
||||
default=narrow,
|
||||
b1=expand_flags,
|
||||
i8=widen,
|
||||
i16=widen,
|
||||
i32=x86_expand,
|
||||
f32=x86_expand,
|
||||
f64=x86_expand)
|
||||
|
||||
X86_64.legalize_monomorphic(expand_flags)
|
||||
X86_64.legalize_type(
|
||||
default=narrow,
|
||||
b1=expand_flags,
|
||||
i8=widen,
|
||||
i16=widen,
|
||||
i32=x86_expand,
|
||||
i64=x86_expand,
|
||||
f32=x86_expand,
|
||||
f64=x86_expand)
|
||||
|
||||
|
||||
#
|
||||
# Helper functions for generating encodings.
|
||||
#
|
||||
|
||||
def enc_x86_64(inst, recipe, *args, **kwargs):
|
||||
# type: (MaybeBoundInst, r.TailRecipe, *int, **int) -> None
|
||||
"""
|
||||
Add encodings for `inst` to X86_64 with and without a REX prefix.
|
||||
"""
|
||||
X86_64.enc(inst, *recipe.rex(*args, **kwargs))
|
||||
X86_64.enc(inst, *recipe(*args, **kwargs))
|
||||
|
||||
|
||||
def enc_x86_64_instp(inst, recipe, instp, *args, **kwargs):
|
||||
# type: (MaybeBoundInst, r.TailRecipe, FieldPredicate, *int, **int) -> None
|
||||
"""
|
||||
Add encodings for `inst` to X86_64 with and without a REX prefix.
|
||||
"""
|
||||
X86_64.enc(inst, *recipe.rex(*args, **kwargs), instp=instp)
|
||||
X86_64.enc(inst, *recipe(*args, **kwargs), instp=instp)
|
||||
|
||||
|
||||
def enc_both(inst, recipe, *args, **kwargs):
|
||||
# type: (MaybeBoundInst, r.TailRecipe, *int, **Any) -> None
|
||||
"""
|
||||
Add encodings for `inst` to both X86_32 and X86_64.
|
||||
"""
|
||||
X86_32.enc(inst, *recipe(*args, **kwargs))
|
||||
enc_x86_64(inst, recipe, *args, **kwargs)
|
||||
|
||||
|
||||
def enc_both_instp(inst, recipe, instp, *args, **kwargs):
|
||||
# type: (MaybeBoundInst, r.TailRecipe, FieldPredicate, *int, **Any) -> None
|
||||
"""
|
||||
Add encodings for `inst` to both X86_32 and X86_64.
|
||||
"""
|
||||
X86_32.enc(inst, *recipe(*args, **kwargs), instp=instp)
|
||||
enc_x86_64_instp(inst, recipe, instp, *args, **kwargs)
|
||||
|
||||
|
||||
def enc_i32_i64(inst, recipe, *args, **kwargs):
|
||||
# type: (MaybeBoundInst, r.TailRecipe, *int, **int) -> None
|
||||
"""
|
||||
Add encodings for `inst.i32` to X86_32.
|
||||
Add encodings for `inst.i32` to X86_64 with and without REX.
|
||||
Add encodings for `inst.i64` to X86_64 with a REX.W prefix.
|
||||
"""
|
||||
X86_32.enc(inst.i32, *recipe(*args, **kwargs))
|
||||
|
||||
# REX-less encoding must come after REX encoding so we don't use it by
|
||||
# default. Otherwise reg-alloc would never use r8 and up.
|
||||
X86_64.enc(inst.i32, *recipe.rex(*args, **kwargs))
|
||||
X86_64.enc(inst.i32, *recipe(*args, **kwargs))
|
||||
|
||||
X86_64.enc(inst.i64, *recipe.rex(*args, w=1, **kwargs))
|
||||
|
||||
|
||||
def enc_i32_i64_instp(inst, recipe, instp, *args, **kwargs):
|
||||
# type: (MaybeBoundInst, r.TailRecipe, FieldPredicate, *int, **int) -> None
|
||||
"""
|
||||
Add encodings for `inst.i32` to X86_32.
|
||||
Add encodings for `inst.i32` to X86_64 with and without REX.
|
||||
Add encodings for `inst.i64` to X86_64 with a REX.W prefix.
|
||||
|
||||
Similar to `enc_i32_i64` but applies `instp` to each encoding.
|
||||
"""
|
||||
X86_32.enc(inst.i32, *recipe(*args, **kwargs), instp=instp)
|
||||
|
||||
# REX-less encoding must come after REX encoding so we don't use it by
|
||||
# default. Otherwise reg-alloc would never use r8 and up.
|
||||
X86_64.enc(inst.i32, *recipe.rex(*args, **kwargs), instp=instp)
|
||||
X86_64.enc(inst.i32, *recipe(*args, **kwargs), instp=instp)
|
||||
|
||||
X86_64.enc(inst.i64, *recipe.rex(*args, w=1, **kwargs), instp=instp)
|
||||
|
||||
|
||||
def enc_i32_i64_ld_st(inst, w_bit, recipe, *args, **kwargs):
|
||||
# type: (MaybeBoundInst, bool, r.TailRecipe, *int, **int) -> None
|
||||
"""
|
||||
Add encodings for `inst.i32` to X86_32.
|
||||
Add encodings for `inst.i32` to X86_64 with and without REX.
|
||||
Add encodings for `inst.i64` to X86_64 with a REX prefix, using the `w_bit`
|
||||
argument to determine whether or not to set the REX.W bit.
|
||||
"""
|
||||
X86_32.enc(inst.i32.any, *recipe(*args, **kwargs))
|
||||
|
||||
# REX-less encoding must come after REX encoding so we don't use it by
|
||||
# default. Otherwise reg-alloc would never use r8 and up.
|
||||
X86_64.enc(inst.i32.any, *recipe.rex(*args, **kwargs))
|
||||
X86_64.enc(inst.i32.any, *recipe(*args, **kwargs))
|
||||
|
||||
if w_bit:
|
||||
X86_64.enc(inst.i64.any, *recipe.rex(*args, w=1, **kwargs))
|
||||
else:
|
||||
X86_64.enc(inst.i64.any, *recipe.rex(*args, **kwargs))
|
||||
X86_64.enc(inst.i64.any, *recipe(*args, **kwargs))
|
||||
|
||||
|
||||
for inst, opc in [
|
||||
(base.iadd, 0x01),
|
||||
(base.isub, 0x29),
|
||||
(base.band, 0x21),
|
||||
(base.bor, 0x09),
|
||||
(base.bxor, 0x31)]:
|
||||
enc_i32_i64(inst, r.rr, opc)
|
||||
|
||||
# x86 has a bitwise not instruction NOT.
|
||||
enc_i32_i64(base.bnot, r.ur, 0xf7, rrr=2)
|
||||
|
||||
# Also add a `b1` encodings for the logic instructions.
|
||||
# TODO: Should this be done with 8-bit instructions? It would improve
|
||||
# partial register dependencies.
|
||||
enc_both(base.band.b1, r.rr, 0x21)
|
||||
enc_both(base.bor.b1, r.rr, 0x09)
|
||||
enc_both(base.bxor.b1, r.rr, 0x31)
|
||||
|
||||
enc_i32_i64(base.imul, r.rrx, 0x0f, 0xaf)
|
||||
enc_i32_i64(x86.sdivmodx, r.div, 0xf7, rrr=7)
|
||||
enc_i32_i64(x86.udivmodx, r.div, 0xf7, rrr=6)
|
||||
|
||||
enc_i32_i64(x86.smulx, r.mulx, 0xf7, rrr=5)
|
||||
enc_i32_i64(x86.umulx, r.mulx, 0xf7, rrr=4)
|
||||
|
||||
enc_i32_i64(base.copy, r.umr, 0x89)
|
||||
for ty in [types.b1, types.i8, types.i16]:
|
||||
enc_both(base.copy.bind(ty), r.umr, 0x89)
|
||||
|
||||
# For x86-64, only define REX forms for now, since we can't describe the
|
||||
# special regunit immediate operands with the current constraint language.
|
||||
for ty in [types.i8, types.i16, types.i32]:
|
||||
X86_32.enc(base.regmove.bind(ty), *r.rmov(0x89))
|
||||
X86_64.enc(base.regmove.bind(ty), *r.rmov.rex(0x89))
|
||||
X86_64.enc(base.regmove.i64, *r.rmov.rex(0x89, w=1))
|
||||
|
||||
enc_both(base.regmove.b1, r.rmov, 0x89)
|
||||
enc_both(base.regmove.i8, r.rmov, 0x89)
|
||||
|
||||
# Immediate instructions with sign-extended 8-bit and 32-bit immediate.
|
||||
for inst, rrr in [
|
||||
(base.iadd_imm, 0),
|
||||
(base.band_imm, 4),
|
||||
(base.bor_imm, 1),
|
||||
(base.bxor_imm, 6)]:
|
||||
enc_i32_i64(inst, r.r_ib, 0x83, rrr=rrr)
|
||||
enc_i32_i64(inst, r.r_id, 0x81, rrr=rrr)
|
||||
|
||||
# TODO: band_imm.i64 with an unsigned 32-bit immediate can be encoded as
|
||||
# band_imm.i32. Can even use the single-byte immediate for 0xffff_ffXX masks.
|
||||
|
||||
# Immediate constants.
|
||||
X86_32.enc(base.iconst.i32, *r.pu_id(0xb8))
|
||||
|
||||
X86_64.enc(base.iconst.i32, *r.pu_id.rex(0xb8))
|
||||
X86_64.enc(base.iconst.i32, *r.pu_id(0xb8))
|
||||
# The 32-bit immediate movl also zero-extends to 64 bits.
|
||||
X86_64.enc(base.iconst.i64, *r.pu_id.rex(0xb8),
|
||||
instp=IsUnsignedInt(UnaryImm.imm, 32))
|
||||
X86_64.enc(base.iconst.i64, *r.pu_id(0xb8),
|
||||
instp=IsUnsignedInt(UnaryImm.imm, 32))
|
||||
# Sign-extended 32-bit immediate.
|
||||
X86_64.enc(base.iconst.i64, *r.u_id.rex(0xc7, rrr=0, w=1))
|
||||
# Finally, the 0xb8 opcode takes an 8-byte immediate with a REX.W prefix.
|
||||
X86_64.enc(base.iconst.i64, *r.pu_iq.rex(0xb8, w=1))
|
||||
|
||||
# bool constants.
|
||||
enc_both(base.bconst.b1, r.pu_id_bool, 0xb8)
|
||||
|
||||
# Shifts and rotates.
|
||||
# Note that the dynamic shift amount is only masked by 5 or 6 bits; the 8-bit
|
||||
# and 16-bit shifts would need explicit masking.
|
||||
for inst, rrr in [
|
||||
(base.rotl, 0),
|
||||
(base.rotr, 1),
|
||||
(base.ishl, 4),
|
||||
(base.ushr, 5),
|
||||
(base.sshr, 7)]:
|
||||
# Cannot use enc_i32_i64 for this pattern because instructions require
|
||||
# .any suffix.
|
||||
X86_32.enc(inst.i32.any, *r.rc(0xd3, rrr=rrr))
|
||||
X86_64.enc(inst.i64.any, *r.rc.rex(0xd3, rrr=rrr, w=1))
|
||||
X86_64.enc(inst.i32.any, *r.rc.rex(0xd3, rrr=rrr))
|
||||
X86_64.enc(inst.i32.any, *r.rc(0xd3, rrr=rrr))
|
||||
|
||||
for inst, rrr in [
|
||||
(base.ishl_imm, 4),
|
||||
(base.ushr_imm, 5),
|
||||
(base.sshr_imm, 7)]:
|
||||
enc_i32_i64(inst, r.r_ib, 0xc1, rrr=rrr)
|
||||
|
||||
# Population count.
|
||||
X86_32.enc(base.popcnt.i32, *r.urm(0xf3, 0x0f, 0xb8), isap=cfg.use_popcnt)
|
||||
X86_64.enc(base.popcnt.i64, *r.urm.rex(0xf3, 0x0f, 0xb8, w=1),
|
||||
isap=cfg.use_popcnt)
|
||||
X86_64.enc(base.popcnt.i32, *r.urm.rex(0xf3, 0x0f, 0xb8), isap=cfg.use_popcnt)
|
||||
X86_64.enc(base.popcnt.i32, *r.urm(0xf3, 0x0f, 0xb8), isap=cfg.use_popcnt)
|
||||
|
||||
# Count leading zero bits.
|
||||
X86_32.enc(base.clz.i32, *r.urm(0xf3, 0x0f, 0xbd), isap=cfg.use_lzcnt)
|
||||
X86_64.enc(base.clz.i64, *r.urm.rex(0xf3, 0x0f, 0xbd, w=1),
|
||||
isap=cfg.use_lzcnt)
|
||||
X86_64.enc(base.clz.i32, *r.urm.rex(0xf3, 0x0f, 0xbd), isap=cfg.use_lzcnt)
|
||||
X86_64.enc(base.clz.i32, *r.urm(0xf3, 0x0f, 0xbd), isap=cfg.use_lzcnt)
|
||||
|
||||
# Count trailing zero bits.
|
||||
X86_32.enc(base.ctz.i32, *r.urm(0xf3, 0x0f, 0xbc), isap=cfg.use_bmi1)
|
||||
X86_64.enc(base.ctz.i64, *r.urm.rex(0xf3, 0x0f, 0xbc, w=1),
|
||||
isap=cfg.use_bmi1)
|
||||
X86_64.enc(base.ctz.i32, *r.urm.rex(0xf3, 0x0f, 0xbc), isap=cfg.use_bmi1)
|
||||
X86_64.enc(base.ctz.i32, *r.urm(0xf3, 0x0f, 0xbc), isap=cfg.use_bmi1)
|
||||
|
||||
#
|
||||
# Loads and stores.
|
||||
#
|
||||
|
||||
ldcomplexp = LengthEquals(LoadComplex, 2)
|
||||
for recipe in [r.ldWithIndex, r.ldWithIndexDisp8, r.ldWithIndexDisp32]:
|
||||
enc_i32_i64_instp(base.load_complex, recipe, ldcomplexp, 0x8b)
|
||||
enc_x86_64_instp(base.uload32_complex, recipe, ldcomplexp, 0x8b)
|
||||
X86_64.enc(base.sload32_complex, *recipe.rex(0x63, w=1),
|
||||
instp=ldcomplexp)
|
||||
enc_i32_i64_instp(base.uload16_complex, recipe, ldcomplexp, 0x0f, 0xb7)
|
||||
enc_i32_i64_instp(base.sload16_complex, recipe, ldcomplexp, 0x0f, 0xbf)
|
||||
enc_i32_i64_instp(base.uload8_complex, recipe, ldcomplexp, 0x0f, 0xb6)
|
||||
enc_i32_i64_instp(base.sload8_complex, recipe, ldcomplexp, 0x0f, 0xbe)
|
||||
|
||||
stcomplexp = LengthEquals(StoreComplex, 3)
|
||||
for recipe in [r.stWithIndex, r.stWithIndexDisp8, r.stWithIndexDisp32]:
|
||||
enc_i32_i64_instp(base.store_complex, recipe, stcomplexp, 0x89)
|
||||
enc_x86_64_instp(base.istore32_complex, recipe, stcomplexp, 0x89)
|
||||
enc_both_instp(base.istore16_complex.i32, recipe, stcomplexp, 0x66, 0x89)
|
||||
enc_x86_64_instp(base.istore16_complex.i64, recipe, stcomplexp, 0x66, 0x89)
|
||||
|
||||
for recipe in [r.stWithIndex_abcd,
|
||||
r.stWithIndexDisp8_abcd,
|
||||
r.stWithIndexDisp32_abcd]:
|
||||
enc_both_instp(base.istore8_complex.i32, recipe, stcomplexp, 0x88)
|
||||
enc_x86_64_instp(base.istore8_complex.i64, recipe, stcomplexp, 0x88)
|
||||
|
||||
for recipe in [r.st, r.stDisp8, r.stDisp32]:
|
||||
enc_i32_i64_ld_st(base.store, True, recipe, 0x89)
|
||||
enc_x86_64(base.istore32.i64.any, recipe, 0x89)
|
||||
enc_i32_i64_ld_st(base.istore16, False, recipe, 0x66, 0x89)
|
||||
|
||||
# Byte stores are more complicated because the registers they can address
|
||||
# depends of the presence of a REX prefix. The st*_abcd recipes fall back to
|
||||
# the corresponding st* recipes when a REX prefix is applied.
|
||||
for recipe in [r.st_abcd, r.stDisp8_abcd, r.stDisp32_abcd]:
|
||||
enc_both(base.istore8.i32.any, recipe, 0x88)
|
||||
enc_x86_64(base.istore8.i64.any, recipe, 0x88)
|
||||
|
||||
enc_i32_i64(base.spill, r.spillSib32, 0x89)
|
||||
enc_i32_i64(base.regspill, r.regspill32, 0x89)
|
||||
|
||||
# Use a 32-bit write for spilling `b1`, `i8` and `i16` to avoid
|
||||
# constraining the permitted registers.
|
||||
# See MIN_SPILL_SLOT_SIZE which makes this safe.
|
||||
for ty in [types.b1, types.i8, types.i16]:
|
||||
enc_both(base.spill.bind(ty), r.spillSib32, 0x89)
|
||||
enc_both(base.regspill.bind(ty), r.regspill32, 0x89)
|
||||
|
||||
for recipe in [r.ld, r.ldDisp8, r.ldDisp32]:
|
||||
enc_i32_i64_ld_st(base.load, True, recipe, 0x8b)
|
||||
enc_x86_64(base.uload32.i64, recipe, 0x8b)
|
||||
X86_64.enc(base.sload32.i64, *recipe.rex(0x63, w=1))
|
||||
enc_i32_i64_ld_st(base.uload16, True, recipe, 0x0f, 0xb7)
|
||||
enc_i32_i64_ld_st(base.sload16, True, recipe, 0x0f, 0xbf)
|
||||
enc_i32_i64_ld_st(base.uload8, True, recipe, 0x0f, 0xb6)
|
||||
enc_i32_i64_ld_st(base.sload8, True, recipe, 0x0f, 0xbe)
|
||||
|
||||
enc_i32_i64(base.fill, r.fillSib32, 0x8b)
|
||||
enc_i32_i64(base.regfill, r.regfill32, 0x8b)
|
||||
|
||||
# Load 32 bits from `b1`, `i8` and `i16` spill slots. See `spill.b1` above.
|
||||
for ty in [types.b1, types.i8, types.i16]:
|
||||
enc_both(base.fill.bind(ty), r.fillSib32, 0x8b)
|
||||
enc_both(base.regfill.bind(ty), r.regfill32, 0x8b)
|
||||
|
||||
# Push and Pop
|
||||
X86_32.enc(x86.push.i32, *r.pushq(0x50))
|
||||
enc_x86_64(x86.push.i64, r.pushq, 0x50)
|
||||
|
||||
X86_32.enc(x86.pop.i32, *r.popq(0x58))
|
||||
enc_x86_64(x86.pop.i64, r.popq, 0x58)
|
||||
|
||||
# Copy Special
|
||||
# For x86-64, only define REX forms for now, since we can't describe the
|
||||
# special regunit immediate operands with the current constraint language.
|
||||
X86_64.enc(base.copy_special, *r.copysp.rex(0x89, w=1))
|
||||
X86_32.enc(base.copy_special, *r.copysp(0x89))
|
||||
|
||||
# Adjust SP down by a dynamic value (or up, with a negative operand).
|
||||
X86_32.enc(base.adjust_sp_down.i32, *r.adjustsp(0x29))
|
||||
X86_64.enc(base.adjust_sp_down.i64, *r.adjustsp.rex(0x29, w=1))
|
||||
|
||||
# Adjust SP up by an immediate (or down, with a negative immediate)
|
||||
X86_32.enc(base.adjust_sp_up_imm, *r.adjustsp_ib(0x83))
|
||||
X86_32.enc(base.adjust_sp_up_imm, *r.adjustsp_id(0x81))
|
||||
X86_64.enc(base.adjust_sp_up_imm, *r.adjustsp_ib.rex(0x83, w=1))
|
||||
X86_64.enc(base.adjust_sp_up_imm, *r.adjustsp_id.rex(0x81, w=1))
|
||||
|
||||
# Adjust SP down by an immediate (or up, with a negative immediate)
|
||||
X86_32.enc(base.adjust_sp_down_imm, *r.adjustsp_ib(0x83, rrr=5))
|
||||
X86_32.enc(base.adjust_sp_down_imm, *r.adjustsp_id(0x81, rrr=5))
|
||||
X86_64.enc(base.adjust_sp_down_imm, *r.adjustsp_ib.rex(0x83, rrr=5, w=1))
|
||||
X86_64.enc(base.adjust_sp_down_imm, *r.adjustsp_id.rex(0x81, rrr=5, w=1))
|
||||
|
||||
#
|
||||
# Float loads and stores.
|
||||
#
|
||||
|
||||
enc_both(base.load.f32.any, r.fld, 0xf3, 0x0f, 0x10)
|
||||
enc_both(base.load.f32.any, r.fldDisp8, 0xf3, 0x0f, 0x10)
|
||||
enc_both(base.load.f32.any, r.fldDisp32, 0xf3, 0x0f, 0x10)
|
||||
|
||||
enc_both(base.load_complex.f32, r.fldWithIndex, 0xf3, 0x0f, 0x10)
|
||||
enc_both(base.load_complex.f32, r.fldWithIndexDisp8, 0xf3, 0x0f, 0x10)
|
||||
enc_both(base.load_complex.f32, r.fldWithIndexDisp32, 0xf3, 0x0f, 0x10)
|
||||
|
||||
enc_both(base.load.f64.any, r.fld, 0xf2, 0x0f, 0x10)
|
||||
enc_both(base.load.f64.any, r.fldDisp8, 0xf2, 0x0f, 0x10)
|
||||
enc_both(base.load.f64.any, r.fldDisp32, 0xf2, 0x0f, 0x10)
|
||||
|
||||
enc_both(base.load_complex.f64, r.fldWithIndex, 0xf2, 0x0f, 0x10)
|
||||
enc_both(base.load_complex.f64, r.fldWithIndexDisp8, 0xf2, 0x0f, 0x10)
|
||||
enc_both(base.load_complex.f64, r.fldWithIndexDisp32, 0xf2, 0x0f, 0x10)
|
||||
|
||||
enc_both(base.store.f32.any, r.fst, 0xf3, 0x0f, 0x11)
|
||||
enc_both(base.store.f32.any, r.fstDisp8, 0xf3, 0x0f, 0x11)
|
||||
enc_both(base.store.f32.any, r.fstDisp32, 0xf3, 0x0f, 0x11)
|
||||
|
||||
enc_both(base.store_complex.f32, r.fstWithIndex, 0xf3, 0x0f, 0x11)
|
||||
enc_both(base.store_complex.f32, r.fstWithIndexDisp8, 0xf3, 0x0f, 0x11)
|
||||
enc_both(base.store_complex.f32, r.fstWithIndexDisp32, 0xf3, 0x0f, 0x11)
|
||||
|
||||
enc_both(base.store.f64.any, r.fst, 0xf2, 0x0f, 0x11)
|
||||
enc_both(base.store.f64.any, r.fstDisp8, 0xf2, 0x0f, 0x11)
|
||||
enc_both(base.store.f64.any, r.fstDisp32, 0xf2, 0x0f, 0x11)
|
||||
|
||||
enc_both(base.store_complex.f64, r.fstWithIndex, 0xf2, 0x0f, 0x11)
|
||||
enc_both(base.store_complex.f64, r.fstWithIndexDisp8, 0xf2, 0x0f, 0x11)
|
||||
enc_both(base.store_complex.f64, r.fstWithIndexDisp32, 0xf2, 0x0f, 0x11)
|
||||
|
||||
enc_both(base.fill.f32, r.ffillSib32, 0xf3, 0x0f, 0x10)
|
||||
enc_both(base.regfill.f32, r.fregfill32, 0xf3, 0x0f, 0x10)
|
||||
enc_both(base.fill.f64, r.ffillSib32, 0xf2, 0x0f, 0x10)
|
||||
enc_both(base.regfill.f64, r.fregfill32, 0xf2, 0x0f, 0x10)
|
||||
|
||||
enc_both(base.spill.f32, r.fspillSib32, 0xf3, 0x0f, 0x11)
|
||||
enc_both(base.regspill.f32, r.fregspill32, 0xf3, 0x0f, 0x11)
|
||||
enc_both(base.spill.f64, r.fspillSib32, 0xf2, 0x0f, 0x11)
|
||||
enc_both(base.regspill.f64, r.fregspill32, 0xf2, 0x0f, 0x11)
|
||||
|
||||
#
|
||||
# Function addresses.
|
||||
#
|
||||
|
||||
# Non-PIC, all-ones funcaddresses.
|
||||
X86_32.enc(base.func_addr.i32, *r.fnaddr4(0xb8),
|
||||
isap=And(Not(allones_funcaddrs), Not(is_pic)))
|
||||
X86_64.enc(base.func_addr.i64, *r.fnaddr8.rex(0xb8, w=1),
|
||||
isap=And(Not(allones_funcaddrs), Not(is_pic)))
|
||||
|
||||
# Non-PIC, all-zeros funcaddresses.
|
||||
X86_32.enc(base.func_addr.i32, *r.allones_fnaddr4(0xb8),
|
||||
isap=And(allones_funcaddrs, Not(is_pic)))
|
||||
X86_64.enc(base.func_addr.i64, *r.allones_fnaddr8.rex(0xb8, w=1),
|
||||
isap=And(allones_funcaddrs, Not(is_pic)))
|
||||
|
||||
# 64-bit, colocated, both PIC and non-PIC. Use the lea instruction's
|
||||
# pc-relative field.
|
||||
X86_64.enc(base.func_addr.i64, *r.pcrel_fnaddr8.rex(0x8d, w=1),
|
||||
instp=IsColocatedFunc(FuncAddr.func_ref))
|
||||
|
||||
# 64-bit, non-colocated, PIC.
|
||||
X86_64.enc(base.func_addr.i64, *r.got_fnaddr8.rex(0x8b, w=1),
|
||||
isap=is_pic)
|
||||
|
||||
#
|
||||
# Global addresses.
|
||||
#
|
||||
|
||||
# Non-PIC
|
||||
X86_32.enc(base.symbol_value.i32, *r.gvaddr4(0xb8),
|
||||
isap=Not(is_pic))
|
||||
X86_64.enc(base.symbol_value.i64, *r.gvaddr8.rex(0xb8, w=1),
|
||||
isap=Not(is_pic))
|
||||
|
||||
# PIC, colocated
|
||||
X86_64.enc(base.symbol_value.i64, *r.pcrel_gvaddr8.rex(0x8d, w=1),
|
||||
isap=is_pic,
|
||||
instp=IsColocatedData())
|
||||
|
||||
# PIC, non-colocated
|
||||
X86_64.enc(base.symbol_value.i64, *r.got_gvaddr8.rex(0x8b, w=1),
|
||||
isap=is_pic)
|
||||
|
||||
#
|
||||
# Stack addresses.
|
||||
#
|
||||
# TODO: Add encoding rules for stack_load and stack_store, so that they
|
||||
# don't get legalized to stack_addr + load/store.
|
||||
#
|
||||
X86_32.enc(base.stack_addr.i32, *r.spaddr4_id(0x8d))
|
||||
X86_64.enc(base.stack_addr.i64, *r.spaddr8_id.rex(0x8d, w=1))
|
||||
|
||||
#
|
||||
# Call/return
|
||||
#
|
||||
|
||||
# 32-bit, both PIC and non-PIC.
|
||||
X86_32.enc(base.call, *r.call_id(0xe8))
|
||||
|
||||
# 64-bit, colocated, both PIC and non-PIC. Use the call instruction's
|
||||
# pc-relative field.
|
||||
X86_64.enc(base.call, *r.call_id(0xe8),
|
||||
instp=IsColocatedFunc(Call.func_ref))
|
||||
|
||||
# 64-bit, non-colocated, PIC. There is no 64-bit non-colocated non-PIC version,
|
||||
# since non-PIC is currently using the large model, which requires calls be
|
||||
# lowered to func_addr+call_indirect.
|
||||
X86_64.enc(base.call, *r.call_plt_id(0xe8), isap=is_pic)
|
||||
|
||||
X86_32.enc(base.call_indirect.i32, *r.call_r(0xff, rrr=2))
|
||||
X86_64.enc(base.call_indirect.i64, *r.call_r.rex(0xff, rrr=2))
|
||||
X86_64.enc(base.call_indirect.i64, *r.call_r(0xff, rrr=2))
|
||||
|
||||
X86_32.enc(base.x_return, *r.ret(0xc3))
|
||||
X86_64.enc(base.x_return, *r.ret(0xc3))
|
||||
|
||||
#
|
||||
# Branches
|
||||
#
|
||||
enc_both(base.jump, r.jmpb, 0xeb)
|
||||
enc_both(base.jump, r.jmpd, 0xe9)
|
||||
|
||||
enc_both(base.brif, r.brib, 0x70)
|
||||
enc_both(base.brif, r.brid, 0x0f, 0x80)
|
||||
|
||||
# Not all float condition codes are legal, see `supported_floatccs`.
|
||||
enc_both(base.brff, r.brfb, 0x70)
|
||||
enc_both(base.brff, r.brfd, 0x0f, 0x80)
|
||||
|
||||
# Note that the tjccd opcode will be prefixed with 0x0f.
|
||||
enc_i32_i64(base.brz, r.tjccb, 0x74)
|
||||
enc_i32_i64(base.brz, r.tjccd, 0x84)
|
||||
enc_i32_i64(base.brnz, r.tjccb, 0x75)
|
||||
enc_i32_i64(base.brnz, r.tjccd, 0x85)
|
||||
|
||||
# Branch on a b1 value in a register only looks at the low 8 bits. See also
|
||||
# bint encodings below.
|
||||
#
|
||||
# Start with the worst-case encoding for X86_32 only. The register allocator
|
||||
# can't handle a branch with an ABCD-constrained operand.
|
||||
X86_32.enc(base.brz.b1, *r.t8jccd_long(0x84))
|
||||
X86_32.enc(base.brnz.b1, *r.t8jccd_long(0x85))
|
||||
|
||||
enc_both(base.brz.b1, r.t8jccb_abcd, 0x74)
|
||||
enc_both(base.brz.b1, r.t8jccd_abcd, 0x84)
|
||||
enc_both(base.brnz.b1, r.t8jccb_abcd, 0x75)
|
||||
enc_both(base.brnz.b1, r.t8jccd_abcd, 0x85)
|
||||
|
||||
#
|
||||
# Jump tables
|
||||
#
|
||||
X86_64.enc(base.jump_table_entry.i64.any.any, *r.jt_entry.rex(0x63, w=1))
|
||||
X86_32.enc(base.jump_table_entry.i32.any.any, *r.jt_entry(0x8b))
|
||||
|
||||
X86_64.enc(base.jump_table_base.i64, *r.jt_base.rex(0x8d, w=1))
|
||||
X86_32.enc(base.jump_table_base.i32, *r.jt_base(0x8d))
|
||||
|
||||
enc_x86_64(base.indirect_jump_table_br.i64, r.indirect_jmp, 0xff, rrr=4)
|
||||
X86_32.enc(base.indirect_jump_table_br.i32, *r.indirect_jmp(0xff, rrr=4))
|
||||
|
||||
#
|
||||
# Trap as ud2
|
||||
#
|
||||
X86_32.enc(base.trap, *r.trap(0x0f, 0x0b))
|
||||
X86_64.enc(base.trap, *r.trap(0x0f, 0x0b))
|
||||
|
||||
# Debug trap as int3
|
||||
X86_32.enc(base.debugtrap, r.debugtrap, 0)
|
||||
X86_64.enc(base.debugtrap, r.debugtrap, 0)
|
||||
|
||||
# Using a standard EncRecipe, not the TailRecipe.
|
||||
X86_32.enc(base.trapif, r.trapif, 0)
|
||||
X86_64.enc(base.trapif, r.trapif, 0)
|
||||
X86_32.enc(base.trapff, r.trapff, 0)
|
||||
X86_64.enc(base.trapff, r.trapff, 0)
|
||||
|
||||
#
|
||||
# Comparisons
|
||||
#
|
||||
enc_i32_i64(base.icmp, r.icscc, 0x39)
|
||||
enc_i32_i64(base.icmp_imm, r.icscc_ib, 0x83, rrr=7)
|
||||
enc_i32_i64(base.icmp_imm, r.icscc_id, 0x81, rrr=7)
|
||||
enc_i32_i64(base.ifcmp, r.rcmp, 0x39)
|
||||
enc_i32_i64(base.ifcmp_imm, r.rcmp_ib, 0x83, rrr=7)
|
||||
enc_i32_i64(base.ifcmp_imm, r.rcmp_id, 0x81, rrr=7)
|
||||
# TODO: We could special-case ifcmp_imm(x, 0) to TEST(x, x).
|
||||
|
||||
X86_32.enc(base.ifcmp_sp.i32, *r.rcmp_sp(0x39))
|
||||
X86_64.enc(base.ifcmp_sp.i64, *r.rcmp_sp.rex(0x39, w=1))
|
||||
|
||||
#
|
||||
# Convert flags to bool.
|
||||
#
|
||||
# This encodes `b1` as an 8-bit low register with the value 0 or 1.
|
||||
enc_both(base.trueif, r.seti_abcd, 0x0f, 0x90)
|
||||
enc_both(base.trueff, r.setf_abcd, 0x0f, 0x90)
|
||||
|
||||
#
|
||||
# Conditional move (a.k.a integer select)
|
||||
#
|
||||
enc_i32_i64(base.selectif, r.cmov, 0x0F, 0x40)
|
||||
|
||||
#
|
||||
# Bit scan forwards and reverse
|
||||
#
|
||||
enc_i32_i64(x86.bsf, r.bsf_and_bsr, 0x0F, 0xBC)
|
||||
enc_i32_i64(x86.bsr, r.bsf_and_bsr, 0x0F, 0xBD)
|
||||
|
||||
#
|
||||
# Convert bool to int.
|
||||
#
|
||||
# This assumes that b1 is represented as an 8-bit low register with the value 0
|
||||
# or 1.
|
||||
#
|
||||
# Encode movzbq as movzbl, because it's equivalent and shorter.
|
||||
X86_32.enc(base.bint.i32.b1, *r.urm_noflags_abcd(0x0f, 0xb6))
|
||||
X86_64.enc(base.bint.i64.b1, *r.urm_noflags.rex(0x0f, 0xb6))
|
||||
X86_64.enc(base.bint.i64.b1, *r.urm_noflags_abcd(0x0f, 0xb6))
|
||||
X86_64.enc(base.bint.i32.b1, *r.urm_noflags.rex(0x0f, 0xb6))
|
||||
X86_64.enc(base.bint.i32.b1, *r.urm_noflags_abcd(0x0f, 0xb6))
|
||||
|
||||
# Numerical conversions.
|
||||
|
||||
# Reducing an integer is a no-op.
|
||||
X86_32.enc(base.ireduce.i8.i16, r.null, 0)
|
||||
X86_32.enc(base.ireduce.i8.i32, r.null, 0)
|
||||
X86_32.enc(base.ireduce.i16.i32, r.null, 0)
|
||||
|
||||
X86_64.enc(base.ireduce.i8.i16, r.null, 0)
|
||||
X86_64.enc(base.ireduce.i8.i32, r.null, 0)
|
||||
X86_64.enc(base.ireduce.i16.i32, r.null, 0)
|
||||
X86_64.enc(base.ireduce.i8.i64, r.null, 0)
|
||||
X86_64.enc(base.ireduce.i16.i64, r.null, 0)
|
||||
X86_64.enc(base.ireduce.i32.i64, r.null, 0)
|
||||
|
||||
# TODO: Add encodings for cbw, cwde, cdqe, which are sign-extending
|
||||
# instructions for %al/%ax/%eax to %ax/%eax/%rax.
|
||||
|
||||
# movsbl
|
||||
X86_32.enc(base.sextend.i32.i8, *r.urm_noflags_abcd(0x0f, 0xbe))
|
||||
X86_64.enc(base.sextend.i32.i8, *r.urm_noflags.rex(0x0f, 0xbe))
|
||||
X86_64.enc(base.sextend.i32.i8, *r.urm_noflags_abcd(0x0f, 0xbe))
|
||||
|
||||
# movswl
|
||||
X86_32.enc(base.sextend.i32.i16, *r.urm_noflags(0x0f, 0xbf))
|
||||
X86_64.enc(base.sextend.i32.i16, *r.urm_noflags.rex(0x0f, 0xbf))
|
||||
X86_64.enc(base.sextend.i32.i16, *r.urm_noflags(0x0f, 0xbf))
|
||||
|
||||
# movsbq
|
||||
X86_64.enc(base.sextend.i64.i8, *r.urm_noflags.rex(0x0f, 0xbe, w=1))
|
||||
|
||||
# movswq
|
||||
X86_64.enc(base.sextend.i64.i16, *r.urm_noflags.rex(0x0f, 0xbf, w=1))
|
||||
|
||||
# movslq
|
||||
X86_64.enc(base.sextend.i64.i32, *r.urm_noflags.rex(0x63, w=1))
|
||||
|
||||
# movzbl
|
||||
X86_32.enc(base.uextend.i32.i8, *r.urm_noflags_abcd(0x0f, 0xb6))
|
||||
X86_64.enc(base.uextend.i32.i8, *r.urm_noflags.rex(0x0f, 0xb6))
|
||||
X86_64.enc(base.uextend.i32.i8, *r.urm_noflags_abcd(0x0f, 0xb6))
|
||||
|
||||
# movzwl
|
||||
X86_32.enc(base.uextend.i32.i16, *r.urm_noflags(0x0f, 0xb7))
|
||||
X86_64.enc(base.uextend.i32.i16, *r.urm_noflags.rex(0x0f, 0xb7))
|
||||
X86_64.enc(base.uextend.i32.i16, *r.urm_noflags(0x0f, 0xb7))
|
||||
|
||||
# movzbq, encoded as movzbl because it's equivalent and shorter
|
||||
X86_64.enc(base.uextend.i64.i8, *r.urm_noflags.rex(0x0f, 0xb6))
|
||||
X86_64.enc(base.uextend.i64.i8, *r.urm_noflags(0x0f, 0xb6))
|
||||
|
||||
# movzwq, encoded as movzwl because it's equivalent and shorter
|
||||
X86_64.enc(base.uextend.i64.i16, *r.urm_noflags.rex(0x0f, 0xb7))
|
||||
X86_64.enc(base.uextend.i64.i16, *r.urm_noflags(0x0f, 0xb7))
|
||||
|
||||
# A 32-bit register copy clears the high 32 bits.
|
||||
X86_64.enc(base.uextend.i64.i32, *r.umr.rex(0x89))
|
||||
X86_64.enc(base.uextend.i64.i32, *r.umr(0x89))
|
||||
|
||||
|
||||
#
|
||||
# Floating point
|
||||
#
|
||||
|
||||
# floating-point constants equal to 0.0 can be encoded using either
|
||||
# `xorps` or `xorpd`, for 32-bit and 64-bit floats respectively.
|
||||
X86_32.enc(base.f32const, *r.f32imm_z(0x0f, 0x57),
|
||||
instp=IsZero32BitFloat(UnaryIeee32.imm))
|
||||
X86_32.enc(base.f64const, *r.f64imm_z(0x66, 0x0f, 0x57),
|
||||
instp=IsZero64BitFloat(UnaryIeee64.imm))
|
||||
|
||||
enc_x86_64_instp(base.f32const, r.f32imm_z,
|
||||
IsZero32BitFloat(UnaryIeee32.imm), 0x0f, 0x57)
|
||||
enc_x86_64_instp(base.f64const, r.f64imm_z,
|
||||
IsZero64BitFloat(UnaryIeee64.imm), 0x66, 0x0f, 0x57)
|
||||
|
||||
# movd
|
||||
enc_both(base.bitcast.f32.i32, r.frurm, 0x66, 0x0f, 0x6e)
|
||||
enc_both(base.bitcast.i32.f32, r.rfumr, 0x66, 0x0f, 0x7e)
|
||||
|
||||
# movq
|
||||
X86_64.enc(base.bitcast.f64.i64, *r.frurm.rex(0x66, 0x0f, 0x6e, w=1))
|
||||
X86_64.enc(base.bitcast.i64.f64, *r.rfumr.rex(0x66, 0x0f, 0x7e, w=1))
|
||||
|
||||
# movaps
|
||||
enc_both(base.copy.f32, r.furm, 0x0f, 0x28)
|
||||
enc_both(base.copy.f64, r.furm, 0x0f, 0x28)
|
||||
|
||||
# For x86-64, only define REX forms for now, since we can't describe the
|
||||
# special regunit immediate operands with the current constraint language.
|
||||
X86_32.enc(base.regmove.f32, *r.frmov(0x0f, 0x28))
|
||||
X86_64.enc(base.regmove.f32, *r.frmov.rex(0x0f, 0x28))
|
||||
|
||||
# For x86-64, only define REX forms for now, since we can't describe the
|
||||
# special regunit immediate operands with the current constraint language.
|
||||
X86_32.enc(base.regmove.f64, *r.frmov(0x0f, 0x28))
|
||||
X86_64.enc(base.regmove.f64, *r.frmov.rex(0x0f, 0x28))
|
||||
|
||||
# cvtsi2ss
|
||||
enc_i32_i64(base.fcvt_from_sint.f32, r.frurm, 0xf3, 0x0f, 0x2a)
|
||||
|
||||
# cvtsi2sd
|
||||
enc_i32_i64(base.fcvt_from_sint.f64, r.frurm, 0xf2, 0x0f, 0x2a)
|
||||
|
||||
# cvtss2sd
|
||||
enc_both(base.fpromote.f64.f32, r.furm, 0xf3, 0x0f, 0x5a)
|
||||
|
||||
# cvtsd2ss
|
||||
enc_both(base.fdemote.f32.f64, r.furm, 0xf2, 0x0f, 0x5a)
|
||||
|
||||
# cvttss2si
|
||||
enc_both(x86.cvtt2si.i32.f32, r.rfurm, 0xf3, 0x0f, 0x2c)
|
||||
X86_64.enc(x86.cvtt2si.i64.f32, *r.rfurm.rex(0xf3, 0x0f, 0x2c, w=1))
|
||||
|
||||
# cvttsd2si
|
||||
enc_both(x86.cvtt2si.i32.f64, r.rfurm, 0xf2, 0x0f, 0x2c)
|
||||
X86_64.enc(x86.cvtt2si.i64.f64, *r.rfurm.rex(0xf2, 0x0f, 0x2c, w=1))
|
||||
|
||||
# Exact square roots.
|
||||
enc_both(base.sqrt.f32, r.furm, 0xf3, 0x0f, 0x51)
|
||||
enc_both(base.sqrt.f64, r.furm, 0xf2, 0x0f, 0x51)
|
||||
|
||||
# Rounding. The recipe looks at the opcode to pick an immediate.
|
||||
for inst in [
|
||||
base.nearest,
|
||||
base.floor,
|
||||
base.ceil,
|
||||
base.trunc]:
|
||||
enc_both(inst.f32, r.furmi_rnd, 0x66, 0x0f, 0x3a, 0x0a, isap=use_sse41)
|
||||
enc_both(inst.f64, r.furmi_rnd, 0x66, 0x0f, 0x3a, 0x0b, isap=use_sse41)
|
||||
|
||||
|
||||
# Binary arithmetic ops.
|
||||
for inst, opc in [
|
||||
(base.fadd, 0x58),
|
||||
(base.fsub, 0x5c),
|
||||
(base.fmul, 0x59),
|
||||
(base.fdiv, 0x5e),
|
||||
(x86.fmin, 0x5d),
|
||||
(x86.fmax, 0x5f)]:
|
||||
enc_both(inst.f32, r.fa, 0xf3, 0x0f, opc)
|
||||
enc_both(inst.f64, r.fa, 0xf2, 0x0f, opc)
|
||||
|
||||
# Binary bitwise ops.
|
||||
for inst, opc in [
|
||||
(base.band, 0x54),
|
||||
(base.bor, 0x56),
|
||||
(base.bxor, 0x57)]:
|
||||
enc_both(inst.f32, r.fa, 0x0f, opc)
|
||||
enc_both(inst.f64, r.fa, 0x0f, opc)
|
||||
|
||||
# The `andnps(x,y)` instruction computes `~x&y`, while band_not(x,y)` is `x&~y.
|
||||
enc_both(base.band_not.f32, r.fax, 0x0f, 0x55)
|
||||
enc_both(base.band_not.f64, r.fax, 0x0f, 0x55)
|
||||
|
||||
# Comparisons.
|
||||
#
|
||||
# This only covers the condition codes in `supported_floatccs`, the rest are
|
||||
# handled by legalization patterns.
|
||||
enc_both(base.fcmp.f32, r.fcscc, 0x0f, 0x2e)
|
||||
enc_both(base.fcmp.f64, r.fcscc, 0x66, 0x0f, 0x2e)
|
||||
|
||||
enc_both(base.ffcmp.f32, r.fcmp, 0x0f, 0x2e)
|
||||
enc_both(base.ffcmp.f64, r.fcmp, 0x66, 0x0f, 0x2e)
|
||||
173
cranelift/codegen/meta-python/isa/x86/instructions.py
Normal file
173
cranelift/codegen/meta-python/isa/x86/instructions.py
Normal file
@@ -0,0 +1,173 @@
|
||||
"""
|
||||
Supplementary instruction definitions for x86.
|
||||
|
||||
This module defines additional instructions that are useful only to the x86
|
||||
target ISA.
|
||||
"""
|
||||
|
||||
from base.types import iflags
|
||||
from cdsl.operands import Operand
|
||||
from cdsl.typevar import TypeVar
|
||||
from cdsl.instructions import Instruction, InstructionGroup
|
||||
|
||||
|
||||
GROUP = InstructionGroup("x86", "x86-specific instruction set")
|
||||
|
||||
iWord = TypeVar('iWord', 'A scalar integer machine word', ints=(32, 64))
|
||||
|
||||
nlo = Operand('nlo', iWord, doc='Low part of numerator')
|
||||
nhi = Operand('nhi', iWord, doc='High part of numerator')
|
||||
d = Operand('d', iWord, doc='Denominator')
|
||||
q = Operand('q', iWord, doc='Quotient')
|
||||
r = Operand('r', iWord, doc='Remainder')
|
||||
|
||||
udivmodx = Instruction(
|
||||
'x86_udivmodx', r"""
|
||||
Extended unsigned division.
|
||||
|
||||
Concatenate the bits in `nhi` and `nlo` to form the numerator.
|
||||
Interpret the bits as an unsigned number and divide by the unsigned
|
||||
denominator `d`. Trap when `d` is zero or if the quotient is larger
|
||||
than the range of the output.
|
||||
|
||||
Return both quotient and remainder.
|
||||
""",
|
||||
ins=(nlo, nhi, d), outs=(q, r), can_trap=True)
|
||||
|
||||
sdivmodx = Instruction(
|
||||
'x86_sdivmodx', r"""
|
||||
Extended signed division.
|
||||
|
||||
Concatenate the bits in `nhi` and `nlo` to form the numerator.
|
||||
Interpret the bits as a signed number and divide by the signed
|
||||
denominator `d`. Trap when `d` is zero or if the quotient is outside
|
||||
the range of the output.
|
||||
|
||||
Return both quotient and remainder.
|
||||
""",
|
||||
ins=(nlo, nhi, d), outs=(q, r), can_trap=True)
|
||||
|
||||
argL = Operand('argL', iWord)
|
||||
argR = Operand('argR', iWord)
|
||||
resLo = Operand('resLo', iWord)
|
||||
resHi = Operand('resHi', iWord)
|
||||
|
||||
umulx = Instruction(
|
||||
'x86_umulx', r"""
|
||||
Unsigned integer multiplication, producing a double-length result.
|
||||
|
||||
Polymorphic over all scalar integer types, but does not support vector
|
||||
types.
|
||||
""",
|
||||
ins=(argL, argR), outs=(resLo, resHi))
|
||||
|
||||
smulx = Instruction(
|
||||
'x86_smulx', r"""
|
||||
Signed integer multiplication, producing a double-length result.
|
||||
|
||||
Polymorphic over all scalar integer types, but does not support vector
|
||||
types.
|
||||
""",
|
||||
ins=(argL, argR), outs=(resLo, resHi))
|
||||
|
||||
Float = TypeVar(
|
||||
'Float', 'A scalar or vector floating point number',
|
||||
floats=True, simd=True)
|
||||
IntTo = TypeVar(
|
||||
'IntTo', 'An integer type with the same number of lanes',
|
||||
ints=(32, 64), simd=True)
|
||||
|
||||
x = Operand('x', Float)
|
||||
a = Operand('a', IntTo)
|
||||
|
||||
cvtt2si = Instruction(
|
||||
'x86_cvtt2si', r"""
|
||||
Convert with truncation floating point to signed integer.
|
||||
|
||||
The source floating point operand is converted to a signed integer by
|
||||
rounding towards zero. If the result can't be represented in the output
|
||||
type, returns the smallest signed value the output type can represent.
|
||||
|
||||
This instruction does not trap.
|
||||
""",
|
||||
ins=x, outs=a)
|
||||
|
||||
x = Operand('x', Float)
|
||||
a = Operand('a', Float)
|
||||
y = Operand('y', Float)
|
||||
|
||||
fmin = Instruction(
|
||||
'x86_fmin', r"""
|
||||
Floating point minimum with x86 semantics.
|
||||
|
||||
This is equivalent to the C ternary operator `x < y ? x : y` which
|
||||
differs from :inst:`fmin` when either operand is NaN or when comparing
|
||||
+0.0 to -0.0.
|
||||
|
||||
When the two operands don't compare as LT, `y` is returned unchanged,
|
||||
even if it is a signalling NaN.
|
||||
""",
|
||||
ins=(x, y), outs=a)
|
||||
|
||||
fmax = Instruction(
|
||||
'x86_fmax', r"""
|
||||
Floating point maximum with x86 semantics.
|
||||
|
||||
This is equivalent to the C ternary operator `x > y ? x : y` which
|
||||
differs from :inst:`fmax` when either operand is NaN or when comparing
|
||||
+0.0 to -0.0.
|
||||
|
||||
When the two operands don't compare as GT, `y` is returned unchanged,
|
||||
even if it is a signalling NaN.
|
||||
""",
|
||||
ins=(x, y), outs=a)
|
||||
|
||||
|
||||
x = Operand('x', iWord)
|
||||
|
||||
push = Instruction(
|
||||
'x86_push', r"""
|
||||
Pushes a value onto the stack.
|
||||
|
||||
Decrements the stack pointer and stores the specified value on to the top.
|
||||
|
||||
This is polymorphic in i32 and i64. However, it is only implemented for i64
|
||||
in 64-bit mode, and only for i32 in 32-bit mode.
|
||||
""",
|
||||
ins=x, can_store=True, other_side_effects=True)
|
||||
|
||||
pop = Instruction(
|
||||
'x86_pop', r"""
|
||||
Pops a value from the stack.
|
||||
|
||||
Loads a value from the top of the stack and then increments the stack
|
||||
pointer.
|
||||
|
||||
This is polymorphic in i32 and i64. However, it is only implemented for i64
|
||||
in 64-bit mode, and only for i32 in 32-bit mode.
|
||||
""",
|
||||
outs=x, can_load=True, other_side_effects=True)
|
||||
|
||||
y = Operand('y', iWord)
|
||||
rflags = Operand('rflags', iflags)
|
||||
|
||||
bsr = Instruction(
|
||||
'x86_bsr', r"""
|
||||
Bit Scan Reverse -- returns the bit-index of the most significant 1
|
||||
in the word. Result is undefined if the argument is zero. However, it
|
||||
sets the Z flag depending on the argument, so it is at least easy to
|
||||
detect and handle that case.
|
||||
|
||||
This is polymorphic in i32 and i64. It is implemented for both i64 and
|
||||
i32 in 64-bit mode, and only for i32 in 32-bit mode.
|
||||
""",
|
||||
ins=x, outs=(y, rflags))
|
||||
|
||||
bsf = Instruction(
|
||||
'x86_bsf', r"""
|
||||
Bit Scan Forwards -- returns the bit-index of the least significant 1
|
||||
in the word. Is otherwise identical to 'bsr', just above.
|
||||
""",
|
||||
ins=x, outs=(y, rflags))
|
||||
|
||||
GROUP.close()
|
||||
229
cranelift/codegen/meta-python/isa/x86/legalize.py
Normal file
229
cranelift/codegen/meta-python/isa/x86/legalize.py
Normal file
@@ -0,0 +1,229 @@
|
||||
"""
|
||||
Custom legalization patterns for x86.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from cdsl.ast import Var
|
||||
from cdsl.xform import Rtl, XFormGroup
|
||||
from base.immediates import imm64, intcc, floatcc
|
||||
from base import legalize as shared
|
||||
from base import instructions as insts
|
||||
from . import instructions as x86
|
||||
from .defs import ISA
|
||||
|
||||
x86_expand = XFormGroup(
|
||||
'x86_expand',
|
||||
"""
|
||||
Legalize instructions by expansion.
|
||||
|
||||
Use x86-specific instructions if needed.
|
||||
""",
|
||||
isa=ISA, chain=shared.expand_flags)
|
||||
|
||||
a = Var('a')
|
||||
dead = Var('dead')
|
||||
x = Var('x')
|
||||
xhi = Var('xhi')
|
||||
y = Var('y')
|
||||
a1 = Var('a1')
|
||||
a2 = Var('a2')
|
||||
|
||||
#
|
||||
# Division and remainder.
|
||||
#
|
||||
# The srem expansion requires custom code because srem INT_MIN, -1 is not
|
||||
# allowed to trap. The other ops need to check avoid_div_traps.
|
||||
x86_expand.custom_legalize(insts.sdiv, 'expand_sdivrem')
|
||||
x86_expand.custom_legalize(insts.srem, 'expand_sdivrem')
|
||||
x86_expand.custom_legalize(insts.udiv, 'expand_udivrem')
|
||||
x86_expand.custom_legalize(insts.urem, 'expand_udivrem')
|
||||
|
||||
#
|
||||
# Double length (widening) multiplication
|
||||
#
|
||||
resLo = Var('resLo')
|
||||
resHi = Var('resHi')
|
||||
x86_expand.legalize(
|
||||
resHi << insts.umulhi(x, y),
|
||||
Rtl(
|
||||
(resLo, resHi) << x86.umulx(x, y)
|
||||
))
|
||||
|
||||
x86_expand.legalize(
|
||||
resHi << insts.smulhi(x, y),
|
||||
Rtl(
|
||||
(resLo, resHi) << x86.smulx(x, y)
|
||||
))
|
||||
|
||||
# Floating point condition codes.
|
||||
#
|
||||
# The 8 condition codes in `supported_floatccs` are directly supported by a
|
||||
# `ucomiss` or `ucomisd` instruction. The remaining codes need legalization
|
||||
# patterns.
|
||||
|
||||
# Equality needs an explicit `ord` test which checks the parity bit.
|
||||
x86_expand.legalize(
|
||||
a << insts.fcmp(floatcc.eq, x, y),
|
||||
Rtl(
|
||||
a1 << insts.fcmp(floatcc.ord, x, y),
|
||||
a2 << insts.fcmp(floatcc.ueq, x, y),
|
||||
a << insts.band(a1, a2)
|
||||
))
|
||||
x86_expand.legalize(
|
||||
a << insts.fcmp(floatcc.ne, x, y),
|
||||
Rtl(
|
||||
a1 << insts.fcmp(floatcc.uno, x, y),
|
||||
a2 << insts.fcmp(floatcc.one, x, y),
|
||||
a << insts.bor(a1, a2)
|
||||
))
|
||||
|
||||
# Inequalities that need to be reversed.
|
||||
for cc, rev_cc in [
|
||||
(floatcc.lt, floatcc.gt),
|
||||
(floatcc.le, floatcc.ge),
|
||||
(floatcc.ugt, floatcc.ult),
|
||||
(floatcc.uge, floatcc.ule)]:
|
||||
x86_expand.legalize(
|
||||
a << insts.fcmp(cc, x, y),
|
||||
Rtl(
|
||||
a << insts.fcmp(rev_cc, y, x)
|
||||
))
|
||||
|
||||
# We need to modify the CFG for min/max legalization.
|
||||
x86_expand.custom_legalize(insts.fmin, 'expand_minmax')
|
||||
x86_expand.custom_legalize(insts.fmax, 'expand_minmax')
|
||||
|
||||
# Conversions from unsigned need special handling.
|
||||
x86_expand.custom_legalize(insts.fcvt_from_uint, 'expand_fcvt_from_uint')
|
||||
# Conversions from float to int can trap and modify the control flow graph.
|
||||
x86_expand.custom_legalize(insts.fcvt_to_sint, 'expand_fcvt_to_sint')
|
||||
x86_expand.custom_legalize(insts.fcvt_to_uint, 'expand_fcvt_to_uint')
|
||||
x86_expand.custom_legalize(insts.fcvt_to_sint_sat, 'expand_fcvt_to_sint_sat')
|
||||
x86_expand.custom_legalize(insts.fcvt_to_uint_sat, 'expand_fcvt_to_uint_sat')
|
||||
|
||||
# Count leading and trailing zeroes, for baseline x86_64
|
||||
c_minus_one = Var('c_minus_one')
|
||||
c_thirty_one = Var('c_thirty_one')
|
||||
c_thirty_two = Var('c_thirty_two')
|
||||
c_sixty_three = Var('c_sixty_three')
|
||||
c_sixty_four = Var('c_sixty_four')
|
||||
index1 = Var('index1')
|
||||
r2flags = Var('r2flags')
|
||||
index2 = Var('index2')
|
||||
|
||||
x86_expand.legalize(
|
||||
a << insts.clz.i64(x),
|
||||
Rtl(
|
||||
c_minus_one << insts.iconst(imm64(-1)),
|
||||
c_sixty_three << insts.iconst(imm64(63)),
|
||||
(index1, r2flags) << x86.bsr(x),
|
||||
index2 << insts.selectif(intcc.eq, r2flags, c_minus_one, index1),
|
||||
a << insts.isub(c_sixty_three, index2),
|
||||
))
|
||||
|
||||
x86_expand.legalize(
|
||||
a << insts.clz.i32(x),
|
||||
Rtl(
|
||||
c_minus_one << insts.iconst(imm64(-1)),
|
||||
c_thirty_one << insts.iconst(imm64(31)),
|
||||
(index1, r2flags) << x86.bsr(x),
|
||||
index2 << insts.selectif(intcc.eq, r2flags, c_minus_one, index1),
|
||||
a << insts.isub(c_thirty_one, index2),
|
||||
))
|
||||
|
||||
x86_expand.legalize(
|
||||
a << insts.ctz.i64(x),
|
||||
Rtl(
|
||||
c_sixty_four << insts.iconst(imm64(64)),
|
||||
(index1, r2flags) << x86.bsf(x),
|
||||
a << insts.selectif(intcc.eq, r2flags, c_sixty_four, index1),
|
||||
))
|
||||
|
||||
x86_expand.legalize(
|
||||
a << insts.ctz.i32(x),
|
||||
Rtl(
|
||||
c_thirty_two << insts.iconst(imm64(32)),
|
||||
(index1, r2flags) << x86.bsf(x),
|
||||
a << insts.selectif(intcc.eq, r2flags, c_thirty_two, index1),
|
||||
))
|
||||
|
||||
|
||||
# Population count for baseline x86_64
|
||||
qv1 = Var('qv1')
|
||||
qv3 = Var('qv3')
|
||||
qv4 = Var('qv4')
|
||||
qv5 = Var('qv5')
|
||||
qv6 = Var('qv6')
|
||||
qv7 = Var('qv7')
|
||||
qv8 = Var('qv8')
|
||||
qv9 = Var('qv9')
|
||||
qv10 = Var('qv10')
|
||||
qv11 = Var('qv11')
|
||||
qv12 = Var('qv12')
|
||||
qv13 = Var('qv13')
|
||||
qv14 = Var('qv14')
|
||||
qv15 = Var('qv15')
|
||||
qv16 = Var('qv16')
|
||||
qc77 = Var('qc77')
|
||||
qc0F = Var('qc0F')
|
||||
qc01 = Var('qc01')
|
||||
x86_expand.legalize(
|
||||
qv16 << insts.popcnt.i64(qv1),
|
||||
Rtl(
|
||||
qv3 << insts.ushr_imm(qv1, imm64(1)),
|
||||
qc77 << insts.iconst(imm64(0x7777777777777777)),
|
||||
qv4 << insts.band(qv3, qc77),
|
||||
qv5 << insts.isub(qv1, qv4),
|
||||
qv6 << insts.ushr_imm(qv4, imm64(1)),
|
||||
qv7 << insts.band(qv6, qc77),
|
||||
qv8 << insts.isub(qv5, qv7),
|
||||
qv9 << insts.ushr_imm(qv7, imm64(1)),
|
||||
qv10 << insts.band(qv9, qc77),
|
||||
qv11 << insts.isub(qv8, qv10),
|
||||
qv12 << insts.ushr_imm(qv11, imm64(4)),
|
||||
qv13 << insts.iadd(qv11, qv12),
|
||||
qc0F << insts.iconst(imm64(0x0F0F0F0F0F0F0F0F)),
|
||||
qv14 << insts.band(qv13, qc0F),
|
||||
qc01 << insts.iconst(imm64(0x0101010101010101)),
|
||||
qv15 << insts.imul(qv14, qc01),
|
||||
qv16 << insts.ushr_imm(qv15, imm64(56))
|
||||
))
|
||||
|
||||
lv1 = Var('lv1')
|
||||
lv3 = Var('lv3')
|
||||
lv4 = Var('lv4')
|
||||
lv5 = Var('lv5')
|
||||
lv6 = Var('lv6')
|
||||
lv7 = Var('lv7')
|
||||
lv8 = Var('lv8')
|
||||
lv9 = Var('lv9')
|
||||
lv10 = Var('lv10')
|
||||
lv11 = Var('lv11')
|
||||
lv12 = Var('lv12')
|
||||
lv13 = Var('lv13')
|
||||
lv14 = Var('lv14')
|
||||
lv15 = Var('lv15')
|
||||
lv16 = Var('lv16')
|
||||
lc77 = Var('lc77')
|
||||
lc0F = Var('lc0F')
|
||||
lc01 = Var('lc01')
|
||||
x86_expand.legalize(
|
||||
lv16 << insts.popcnt.i32(lv1),
|
||||
Rtl(
|
||||
lv3 << insts.ushr_imm(lv1, imm64(1)),
|
||||
lc77 << insts.iconst(imm64(0x77777777)),
|
||||
lv4 << insts.band(lv3, lc77),
|
||||
lv5 << insts.isub(lv1, lv4),
|
||||
lv6 << insts.ushr_imm(lv4, imm64(1)),
|
||||
lv7 << insts.band(lv6, lc77),
|
||||
lv8 << insts.isub(lv5, lv7),
|
||||
lv9 << insts.ushr_imm(lv7, imm64(1)),
|
||||
lv10 << insts.band(lv9, lc77),
|
||||
lv11 << insts.isub(lv8, lv10),
|
||||
lv12 << insts.ushr_imm(lv11, imm64(4)),
|
||||
lv13 << insts.iadd(lv11, lv12),
|
||||
lc0F << insts.iconst(imm64(0x0F0F0F0F)),
|
||||
lv14 << insts.band(lv13, lc0F),
|
||||
lc01 << insts.iconst(imm64(0x01010101)),
|
||||
lv15 << insts.imul(lv14, lc01),
|
||||
lv16 << insts.ushr_imm(lv15, imm64(24))
|
||||
))
|
||||
2056
cranelift/codegen/meta-python/isa/x86/recipes.py
Normal file
2056
cranelift/codegen/meta-python/isa/x86/recipes.py
Normal file
File diff suppressed because it is too large
Load Diff
61
cranelift/codegen/meta-python/isa/x86/registers.py
Normal file
61
cranelift/codegen/meta-python/isa/x86/registers.py
Normal file
@@ -0,0 +1,61 @@
|
||||
"""
|
||||
x86 register banks.
|
||||
|
||||
While the floating-point registers are straight-forward, the general purpose
|
||||
register bank has a few quirks on x86. We have these encodings of the 8-bit
|
||||
registers:
|
||||
|
||||
I32 I64 | 16b 32b 64b
|
||||
000 AL AL | AX EAX RAX
|
||||
001 CL CL | CX ECX RCX
|
||||
010 DL DL | DX EDX RDX
|
||||
011 BL BL | BX EBX RBX
|
||||
100 AH SPL | SP ESP RSP
|
||||
101 CH BPL | BP EBP RBP
|
||||
110 DH SIL | SI ESI RSI
|
||||
111 BH DIL | DI EDI RDI
|
||||
|
||||
Here, the I64 column refers to the registers you get with a REX prefix. Without
|
||||
the REX prefix, you get the I32 registers.
|
||||
|
||||
The 8-bit registers are not that useful since WebAssembly only has i32 and i64
|
||||
data types, and the H-registers even less so. Rather than trying to model the
|
||||
H-registers accurately, we'll avoid using them in both I32 and I64 modes.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from cdsl.registers import RegBank, RegClass, Stack
|
||||
from .defs import ISA
|
||||
|
||||
|
||||
IntRegs = RegBank(
|
||||
'IntRegs', ISA,
|
||||
'General purpose registers',
|
||||
units=16, prefix='r',
|
||||
names='rax rcx rdx rbx rsp rbp rsi rdi'.split())
|
||||
|
||||
FloatRegs = RegBank(
|
||||
'FloatRegs', ISA,
|
||||
'SSE floating point registers',
|
||||
units=16, prefix='xmm')
|
||||
|
||||
FlagRegs = RegBank(
|
||||
'FlagRegs', ISA,
|
||||
'Flag registers',
|
||||
units=1,
|
||||
pressure_tracking=False,
|
||||
names=['rflags'])
|
||||
|
||||
GPR = RegClass(IntRegs)
|
||||
GPR8 = GPR[0:8]
|
||||
ABCD = GPR[0:4]
|
||||
FPR = RegClass(FloatRegs)
|
||||
FPR8 = FPR[0:8]
|
||||
FLAG = RegClass(FlagRegs)
|
||||
|
||||
# Constraints for stack operands.
|
||||
|
||||
# Stack operand with a 32-bit signed displacement from either RBP or RSP.
|
||||
StackGPR32 = Stack(GPR)
|
||||
StackFPR32 = Stack(FPR)
|
||||
|
||||
RegClass.extract_names(globals())
|
||||
54
cranelift/codegen/meta-python/isa/x86/settings.py
Normal file
54
cranelift/codegen/meta-python/isa/x86/settings.py
Normal file
@@ -0,0 +1,54 @@
|
||||
"""
|
||||
x86 settings.
|
||||
"""
|
||||
from __future__ import absolute_import
|
||||
from cdsl.settings import SettingGroup, BoolSetting, Preset
|
||||
from cdsl.predicates import And
|
||||
import base.settings as shared
|
||||
from .defs import ISA
|
||||
|
||||
ISA.settings = SettingGroup('x86', parent=shared.group)
|
||||
|
||||
# The has_* settings here correspond to CPUID bits.
|
||||
|
||||
# CPUID.01H:ECX
|
||||
has_sse3 = BoolSetting("SSE3: CPUID.01H:ECX.SSE3[bit 0]")
|
||||
has_ssse3 = BoolSetting("SSSE3: CPUID.01H:ECX.SSSE3[bit 9]")
|
||||
has_sse41 = BoolSetting("SSE4.1: CPUID.01H:ECX.SSE4_1[bit 19]")
|
||||
has_sse42 = BoolSetting("SSE4.2: CPUID.01H:ECX.SSE4_2[bit 20]")
|
||||
has_popcnt = BoolSetting("POPCNT: CPUID.01H:ECX.POPCNT[bit 23]")
|
||||
has_avx = BoolSetting("AVX: CPUID.01H:ECX.AVX[bit 28]")
|
||||
|
||||
# CPUID.(EAX=07H, ECX=0H):EBX
|
||||
has_bmi1 = BoolSetting("BMI1: CPUID.(EAX=07H, ECX=0H):EBX.BMI1[bit 3]")
|
||||
has_bmi2 = BoolSetting("BMI2: CPUID.(EAX=07H, ECX=0H):EBX.BMI2[bit 8]")
|
||||
|
||||
# CPUID.EAX=80000001H:ECX
|
||||
has_lzcnt = BoolSetting("LZCNT: CPUID.EAX=80000001H:ECX.LZCNT[bit 5]")
|
||||
|
||||
|
||||
# The use_* settings here are used to determine if a feature can be used.
|
||||
|
||||
use_sse41 = And(has_sse41)
|
||||
use_sse42 = And(has_sse42, use_sse41)
|
||||
use_popcnt = And(has_popcnt, has_sse42)
|
||||
use_bmi1 = And(has_bmi1)
|
||||
use_lzcnt = And(has_lzcnt)
|
||||
|
||||
# Presets corresponding to x86 CPUs.
|
||||
|
||||
baseline = Preset()
|
||||
|
||||
nehalem = Preset(
|
||||
has_sse3, has_ssse3, has_sse41, has_sse42, has_popcnt)
|
||||
haswell = Preset(nehalem, has_bmi1, has_bmi2, has_lzcnt)
|
||||
broadwell = Preset(haswell)
|
||||
skylake = Preset(broadwell)
|
||||
cannonlake = Preset(skylake)
|
||||
icelake = Preset(cannonlake)
|
||||
|
||||
znver1 = Preset(
|
||||
has_sse3, has_ssse3, has_sse41, has_sse42, has_popcnt,
|
||||
has_bmi1, has_bmi2, has_lzcnt)
|
||||
|
||||
ISA.settings.close(globals())
|
||||
Reference in New Issue
Block a user