moved crates in lib/ to src/, renamed crates, modified some files' text (#660)

moved crates in lib/ to src/, renamed crates, modified some files' text (#660)
This commit is contained in:
lazypassion
2019-01-28 18:56:54 -05:00
committed by Dan Gohman
parent 54959cf5bb
commit 747ad3c4c5
508 changed files with 94 additions and 92 deletions

View File

@@ -0,0 +1,24 @@
"""
Cranelift target ISA definitions
--------------------------------
The :py:mod:`isa` package contains sub-packages for each target instruction set
architecture supported by Cranelift.
"""
from __future__ import absolute_import
from cdsl.isa import TargetISA # noqa
from . import riscv, x86, arm32, arm64
try:
from typing import List # noqa
except ImportError:
pass
def all_isas():
# type: () -> List[TargetISA]
"""
Get a list of all the supported target ISAs. Each target ISA is represented
as a :py:class:`cranelift.TargetISA` instance.
"""
return [riscv.ISA, x86.ISA, arm32.ISA, arm64.ISA]

View File

@@ -0,0 +1,15 @@
"""
ARM 32-bit Architecture
-----------------------
This target ISA generates code for ARMv7 and ARMv8 CPUs in 32-bit mode
(AArch32). We support both ARM and Thumb2 instruction encodings.
"""
from __future__ import absolute_import
from . import defs
from . import settings, registers # noqa
from cdsl.isa import TargetISA # noqa
# Re-export the primary target ISA definition.
ISA = defs.ISA.finish() # type: TargetISA

View File

@@ -0,0 +1,19 @@
"""
ARM 32-bit definitions.
Commonly used definitions.
"""
from __future__ import absolute_import
from cdsl.isa import TargetISA, CPUMode
import base.instructions
from base.legalize import narrow
ISA = TargetISA('arm32', [base.instructions.GROUP]) # type: TargetISA
# CPU modes for 32-bit ARM and Thumb2.
A32 = CPUMode('A32', ISA)
T32 = CPUMode('T32', ISA)
# TODO: Refine these.
A32.legalize_type(narrow)
T32.legalize_type(narrow)

View File

@@ -0,0 +1,45 @@
"""
ARM32 register banks.
"""
from __future__ import absolute_import
from cdsl.registers import RegBank, RegClass
from .defs import ISA
# Define the larger float bank first to avoid the alignment gap.
FloatRegs = RegBank(
'FloatRegs', ISA, r"""
Floating point registers.
The floating point register units correspond to the S-registers, but
extended as if there were 64 registers.
- S registers are one unit each.
- D registers are two units each, even D16 and above.
- Q registers are 4 units each.
""",
units=64, prefix='s')
# Special register units:
# - r15 is the program counter.
# - r14 is the link register.
# - r13 is usually the stack pointer.
IntRegs = RegBank(
'IntRegs', ISA,
'General purpose registers',
units=16, prefix='r')
FlagRegs = RegBank(
'FlagRegs', ISA,
'Flag registers',
units=1,
pressure_tracking=False,
names=['nzcv'])
GPR = RegClass(IntRegs)
S = RegClass(FloatRegs, count=32)
D = RegClass(FloatRegs, width=2)
Q = RegClass(FloatRegs, width=4)
FLAG = RegClass(FlagRegs)
RegClass.extract_names(globals())

View File

@@ -0,0 +1,11 @@
"""
ARM32 settings.
"""
from __future__ import absolute_import
from cdsl.settings import SettingGroup
import base.settings as shared
from .defs import ISA
ISA.settings = SettingGroup('arm32', parent=shared.group)
ISA.settings.close(globals())

View File

@@ -0,0 +1,14 @@
"""
ARM 64-bit Architecture
-----------------------
ARMv8 CPUs running the Aarch64 architecture.
"""
from __future__ import absolute_import
from . import defs
from . import settings, registers # noqa
from cdsl.isa import TargetISA # noqa
# Re-export the primary target ISA definition.
ISA = defs.ISA.finish() # type: TargetISA

View File

@@ -0,0 +1,15 @@
"""
ARM64 definitions.
Commonly used definitions.
"""
from __future__ import absolute_import
from cdsl.isa import TargetISA, CPUMode
import base.instructions
from base.legalize import narrow
ISA = TargetISA('arm64', [base.instructions.GROUP]) # type: TargetISA
A64 = CPUMode('A64', ISA)
# TODO: Refine these
A64.legalize_type(narrow)

View File

@@ -0,0 +1,32 @@
"""
Aarch64 register banks.
"""
from __future__ import absolute_import
from cdsl.registers import RegBank, RegClass
from .defs import ISA
# The `x31` regunit serves as the stack pointer / zero register depending on
# context. We reserve it and don't model the difference.
IntRegs = RegBank(
'IntRegs', ISA,
'General purpose registers',
units=32, prefix='x')
FloatRegs = RegBank(
'FloatRegs', ISA,
'Floating point registers',
units=32, prefix='v')
FlagRegs = RegBank(
'FlagRegs', ISA,
'Flag registers',
units=1,
pressure_tracking=False,
names=['nzcv'])
GPR = RegClass(IntRegs)
FPR = RegClass(FloatRegs)
FLAG = RegClass(FlagRegs)
RegClass.extract_names(globals())

View File

@@ -0,0 +1,11 @@
"""
ARM64 settings.
"""
from __future__ import absolute_import
from cdsl.settings import SettingGroup
import base.settings as shared
from .defs import ISA
ISA.settings = SettingGroup('arm64', parent=shared.group)
ISA.settings.close(globals())

View File

@@ -0,0 +1,33 @@
"""
RISC-V Target
-------------
`RISC-V <https://riscv.org/>`_ is an open instruction set architecture
originally developed at UC Berkeley. It is a RISC-style ISA with either a
32-bit (RV32I) or 64-bit (RV32I) base instruction set and a number of optional
extensions:
RV32M / RV64M
Integer multiplication and division.
RV32A / RV64A
Atomics.
RV32F / RV64F
Single-precision IEEE floating point.
RV32D / RV64D
Double-precision IEEE floating point.
RV32G / RV64G
General purpose instruction sets. This represents the union of the I, M, A,
F, and D instruction sets listed above.
"""
from __future__ import absolute_import
from . import defs
from . import encodings, settings, registers # noqa
from cdsl.isa import TargetISA # noqa
# Re-export the primary target ISA definition.
ISA = defs.ISA.finish() # type: TargetISA

View File

@@ -0,0 +1,14 @@
"""
RISC-V definitions.
Commonly used definitions.
"""
from __future__ import absolute_import
from cdsl.isa import TargetISA, CPUMode
import base.instructions
ISA = TargetISA('riscv', [base.instructions.GROUP]) # type: TargetISA
# CPU modes for 32-bit and 64-bit operation.
RV32 = CPUMode('RV32', ISA)
RV64 = CPUMode('RV64', ISA)

View File

@@ -0,0 +1,162 @@
"""
RISC-V Encodings.
"""
from __future__ import absolute_import
from base import instructions as base
from base.immediates import intcc
from .defs import RV32, RV64
from .recipes import OPIMM, OPIMM32, OP, OP32, LUI, BRANCH, JALR, JAL
from .recipes import LOAD, STORE
from .recipes import R, Rshamt, Ricmp, Ii, Iz, Iicmp, Iret, Icall, Icopy
from .recipes import U, UJ, UJcall, SB, SBzero, GPsp, GPfi, Irmov
from .settings import use_m
from cdsl.ast import Var
from base.legalize import narrow, expand
RV32.legalize_monomorphic(expand)
RV32.legalize_type(
default=narrow,
i32=expand,
f32=expand,
f64=expand)
RV64.legalize_monomorphic(expand)
RV64.legalize_type(
default=narrow,
i32=expand,
i64=expand,
f32=expand,
f64=expand)
# Dummies for instruction predicates.
x = Var('x')
y = Var('y')
dest = Var('dest')
args = Var('args')
# Basic arithmetic binary instructions are encoded in an R-type instruction.
for inst, inst_imm, f3, f7 in [
(base.iadd, base.iadd_imm, 0b000, 0b0000000),
(base.isub, None, 0b000, 0b0100000),
(base.bxor, base.bxor_imm, 0b100, 0b0000000),
(base.bor, base.bor_imm, 0b110, 0b0000000),
(base.band, base.band_imm, 0b111, 0b0000000)
]:
RV32.enc(inst.i32, R, OP(f3, f7))
RV64.enc(inst.i64, R, OP(f3, f7))
# Immediate versions for add/xor/or/and.
if inst_imm:
RV32.enc(inst_imm.i32, Ii, OPIMM(f3))
RV64.enc(inst_imm.i64, Ii, OPIMM(f3))
# 32-bit ops in RV64.
RV64.enc(base.iadd.i32, R, OP32(0b000, 0b0000000))
RV64.enc(base.isub.i32, R, OP32(0b000, 0b0100000))
# There are no andiw/oriw/xoriw variations.
RV64.enc(base.iadd_imm.i32, Ii, OPIMM32(0b000))
# Use iadd_imm with %x0 to materialize constants.
RV32.enc(base.iconst.i32, Iz, OPIMM(0b000))
RV64.enc(base.iconst.i32, Iz, OPIMM(0b000))
RV64.enc(base.iconst.i64, Iz, OPIMM(0b000))
# Dynamic shifts have the same masking semantics as the clif base instructions.
for inst, inst_imm, f3, f7 in [
(base.ishl, base.ishl_imm, 0b001, 0b0000000),
(base.ushr, base.ushr_imm, 0b101, 0b0000000),
(base.sshr, base.sshr_imm, 0b101, 0b0100000),
]:
RV32.enc(inst.i32.i32, R, OP(f3, f7))
RV64.enc(inst.i64.i64, R, OP(f3, f7))
RV64.enc(inst.i32.i32, R, OP32(f3, f7))
# Allow i32 shift amounts in 64-bit shifts.
RV64.enc(inst.i64.i32, R, OP(f3, f7))
RV64.enc(inst.i32.i64, R, OP32(f3, f7))
# Immediate shifts.
RV32.enc(inst_imm.i32, Rshamt, OPIMM(f3, f7))
RV64.enc(inst_imm.i64, Rshamt, OPIMM(f3, f7))
RV64.enc(inst_imm.i32, Rshamt, OPIMM32(f3, f7))
# Signed and unsigned integer 'less than'. There are no 'w' variants for
# comparing 32-bit numbers in RV64.
RV32.enc(base.icmp.i32(intcc.slt, x, y), Ricmp, OP(0b010, 0b0000000))
RV64.enc(base.icmp.i64(intcc.slt, x, y), Ricmp, OP(0b010, 0b0000000))
RV32.enc(base.icmp.i32(intcc.ult, x, y), Ricmp, OP(0b011, 0b0000000))
RV64.enc(base.icmp.i64(intcc.ult, x, y), Ricmp, OP(0b011, 0b0000000))
RV32.enc(base.icmp_imm.i32(intcc.slt, x, y), Iicmp, OPIMM(0b010))
RV64.enc(base.icmp_imm.i64(intcc.slt, x, y), Iicmp, OPIMM(0b010))
RV32.enc(base.icmp_imm.i32(intcc.ult, x, y), Iicmp, OPIMM(0b011))
RV64.enc(base.icmp_imm.i64(intcc.ult, x, y), Iicmp, OPIMM(0b011))
# Integer constants with the low 12 bits clear are materialized by lui.
RV32.enc(base.iconst.i32, U, LUI())
RV64.enc(base.iconst.i32, U, LUI())
RV64.enc(base.iconst.i64, U, LUI())
# "M" Standard Extension for Integer Multiplication and Division.
# Gated by the `use_m` flag.
RV32.enc(base.imul.i32, R, OP(0b000, 0b0000001), isap=use_m)
RV64.enc(base.imul.i64, R, OP(0b000, 0b0000001), isap=use_m)
RV64.enc(base.imul.i32, R, OP32(0b000, 0b0000001), isap=use_m)
# Control flow.
# Unconditional branches.
RV32.enc(base.jump, UJ, JAL())
RV64.enc(base.jump, UJ, JAL())
RV32.enc(base.call, UJcall, JAL())
RV64.enc(base.call, UJcall, JAL())
# Conditional branches.
for cond, f3 in [
(intcc.eq, 0b000),
(intcc.ne, 0b001),
(intcc.slt, 0b100),
(intcc.sge, 0b101),
(intcc.ult, 0b110),
(intcc.uge, 0b111)
]:
RV32.enc(base.br_icmp.i32(cond, x, y, dest, args), SB, BRANCH(f3))
RV64.enc(base.br_icmp.i64(cond, x, y, dest, args), SB, BRANCH(f3))
for inst, f3 in [
(base.brz, 0b000),
(base.brnz, 0b001)
]:
RV32.enc(inst.i32, SBzero, BRANCH(f3))
RV64.enc(inst.i64, SBzero, BRANCH(f3))
RV32.enc(inst.b1, SBzero, BRANCH(f3))
RV64.enc(inst.b1, SBzero, BRANCH(f3))
# Returns are a special case of JALR using %x1 to hold the return address.
# The return address is provided by a special-purpose `link` return value that
# is added by legalize_signature().
RV32.enc(base.x_return, Iret, JALR())
RV64.enc(base.x_return, Iret, JALR())
RV32.enc(base.call_indirect.i32, Icall, JALR())
RV64.enc(base.call_indirect.i64, Icall, JALR())
# Spill and fill.
RV32.enc(base.spill.i32, GPsp, STORE(0b010))
RV64.enc(base.spill.i32, GPsp, STORE(0b010))
RV64.enc(base.spill.i64, GPsp, STORE(0b011))
RV32.enc(base.fill.i32, GPfi, LOAD(0b010))
RV64.enc(base.fill.i32, GPfi, LOAD(0b010))
RV64.enc(base.fill.i64, GPfi, LOAD(0b011))
# Register copies.
RV32.enc(base.copy.i32, Icopy, OPIMM(0b000))
RV64.enc(base.copy.i64, Icopy, OPIMM(0b000))
RV64.enc(base.copy.i32, Icopy, OPIMM32(0b000))
RV32.enc(base.regmove.i32, Irmov, OPIMM(0b000))
RV64.enc(base.regmove.i64, Irmov, OPIMM(0b000))
RV64.enc(base.regmove.i32, Irmov, OPIMM32(0b000))
RV32.enc(base.copy.b1, Icopy, OPIMM(0b000))
RV64.enc(base.copy.b1, Icopy, OPIMM(0b000))
RV32.enc(base.regmove.b1, Irmov, OPIMM(0b000))
RV64.enc(base.regmove.b1, Irmov, OPIMM(0b000))

View File

@@ -0,0 +1,225 @@
"""
RISC-V Encoding recipes.
The encoding recipes defined here more or less correspond to the RISC-V native
instruction formats described in the reference:
The RISC-V Instruction Set Manual
Volume I: User-Level ISA
Version 2.1
"""
from __future__ import absolute_import
from cdsl.isa import EncRecipe
from cdsl.predicates import IsSignedInt
from cdsl.registers import Stack
from base.formats import Binary, BinaryImm, MultiAry, IntCompare, IntCompareImm
from base.formats import Unary, UnaryImm, BranchIcmp, Branch, Jump
from base.formats import Call, CallIndirect, RegMove
from .registers import GPR
# The low 7 bits of a RISC-V instruction is the base opcode. All 32-bit
# instructions have 11 as the two low bits, with bits 6:2 determining the base
# opcode.
#
# Encbits for the 32-bit recipes are opcode[6:2] | (funct3 << 5) | ...
# The functions below encode the encbits.
def LOAD(funct3):
# type: (int) -> int
assert funct3 <= 0b111
return 0b00000 | (funct3 << 5)
def STORE(funct3):
# type: (int) -> int
assert funct3 <= 0b111
return 0b01000 | (funct3 << 5)
def BRANCH(funct3):
# type: (int) -> int
assert funct3 <= 0b111
return 0b11000 | (funct3 << 5)
def JALR(funct3=0):
# type: (int) -> int
assert funct3 <= 0b111
return 0b11001 | (funct3 << 5)
def JAL():
# type: () -> int
return 0b11011
def OPIMM(funct3, funct7=0):
# type: (int, int) -> int
assert funct3 <= 0b111
return 0b00100 | (funct3 << 5) | (funct7 << 8)
def OPIMM32(funct3, funct7=0):
# type: (int, int) -> int
assert funct3 <= 0b111
return 0b00110 | (funct3 << 5) | (funct7 << 8)
def OP(funct3, funct7):
# type: (int, int) -> int
assert funct3 <= 0b111
assert funct7 <= 0b1111111
return 0b01100 | (funct3 << 5) | (funct7 << 8)
def OP32(funct3, funct7):
# type: (int, int) -> int
assert funct3 <= 0b111
assert funct7 <= 0b1111111
return 0b01110 | (funct3 << 5) | (funct7 << 8)
def AIUPC():
# type: () -> int
return 0b00101
def LUI():
# type: () -> int
return 0b01101
# R-type 32-bit instructions: These are mostly binary arithmetic instructions.
# The encbits are `opcode[6:2] | (funct3 << 5) | (funct7 << 8)
R = EncRecipe(
'R', Binary, base_size=4, ins=(GPR, GPR), outs=GPR,
emit='put_r(bits, in_reg0, in_reg1, out_reg0, sink);')
# R-type with an immediate shift amount instead of rs2.
Rshamt = EncRecipe(
'Rshamt', BinaryImm, base_size=4, ins=GPR, outs=GPR,
emit='put_rshamt(bits, in_reg0, imm.into(), out_reg0, sink);')
# R-type encoding of an integer comparison.
Ricmp = EncRecipe(
'Ricmp', IntCompare, base_size=4, ins=(GPR, GPR), outs=GPR,
emit='put_r(bits, in_reg0, in_reg1, out_reg0, sink);')
Ii = EncRecipe(
'Ii', BinaryImm, base_size=4, ins=GPR, outs=GPR,
instp=IsSignedInt(BinaryImm.imm, 12),
emit='put_i(bits, in_reg0, imm.into(), out_reg0, sink);')
# I-type instruction with a hardcoded %x0 rs1.
Iz = EncRecipe(
'Iz', UnaryImm, base_size=4, ins=(), outs=GPR,
instp=IsSignedInt(UnaryImm.imm, 12),
emit='put_i(bits, 0, imm.into(), out_reg0, sink);')
# I-type encoding of an integer comparison.
Iicmp = EncRecipe(
'Iicmp', IntCompareImm, base_size=4, ins=GPR, outs=GPR,
instp=IsSignedInt(IntCompareImm.imm, 12),
emit='put_i(bits, in_reg0, imm.into(), out_reg0, sink);')
# I-type encoding for `jalr` as a return instruction. We won't use the
# immediate offset.
# The variable return values are not encoded.
Iret = EncRecipe(
'Iret', MultiAry, base_size=4, ins=(), outs=(),
emit='''
// Return instructions are always a jalr to %x1.
// The return address is provided as a special-purpose link argument.
put_i(
bits,
1, // rs1 = %x1
0, // no offset.
0, // rd = %x0: no address written.
sink,
);
''')
# I-type encoding for `jalr` as a call_indirect.
Icall = EncRecipe(
'Icall', CallIndirect, base_size=4, ins=GPR, outs=(),
emit='''
// call_indirect instructions are jalr with rd=%x1.
put_i(
bits,
in_reg0,
0, // no offset.
1, // rd = %x1: link register.
sink,
);
''')
# Copy of a GPR is implemented as addi x, 0.
Icopy = EncRecipe(
'Icopy', Unary, base_size=4, ins=GPR, outs=GPR,
emit='put_i(bits, in_reg0, 0, out_reg0, sink);')
# Same for a GPR regmove.
Irmov = EncRecipe(
'Irmov', RegMove, base_size=4, ins=GPR, outs=(),
emit='put_i(bits, src, 0, dst, sink);')
# U-type instructions have a 20-bit immediate that targets bits 12-31.
U = EncRecipe(
'U', UnaryImm, base_size=4, ins=(), outs=GPR,
instp=IsSignedInt(UnaryImm.imm, 32, 12),
emit='put_u(bits, imm.into(), out_reg0, sink);')
# UJ-type unconditional branch instructions.
UJ = EncRecipe(
'UJ', Jump, base_size=4, ins=(), outs=(), branch_range=(0, 21),
emit='''
let dest = i64::from(func.offsets[destination]);
let disp = dest - i64::from(sink.offset());
put_uj(bits, disp, 0, sink);
''')
UJcall = EncRecipe(
'UJcall', Call, base_size=4, ins=(), outs=(),
emit='''
sink.reloc_external(Reloc::RiscvCall,
&func.dfg.ext_funcs[func_ref].name,
0);
// rd=%x1 is the standard link register.
put_uj(bits, 0, 1, sink);
''')
# SB-type branch instructions.
SB = EncRecipe(
'SB', BranchIcmp, base_size=4,
ins=(GPR, GPR), outs=(),
branch_range=(0, 13),
emit='''
let dest = i64::from(func.offsets[destination]);
let disp = dest - i64::from(sink.offset());
put_sb(bits, disp, in_reg0, in_reg1, sink);
''')
# SB-type branch instruction with rs2 fixed to zero.
SBzero = EncRecipe(
'SBzero', Branch, base_size=4,
ins=(GPR), outs=(),
branch_range=(0, 13),
emit='''
let dest = i64::from(func.offsets[destination]);
let disp = dest - i64::from(sink.offset());
put_sb(bits, disp, in_reg0, 0, sink);
''')
# Spill of a GPR.
GPsp = EncRecipe(
'GPsp', Unary, base_size=4,
ins=GPR, outs=Stack(GPR),
emit='unimplemented!();')
# Fill of a GPR.
GPfi = EncRecipe(
'GPfi', Unary, base_size=4,
ins=Stack(GPR), outs=GPR,
emit='unimplemented!();')

View File

@@ -0,0 +1,23 @@
"""
RISC-V register banks.
"""
from __future__ import absolute_import
from cdsl.registers import RegBank, RegClass
from .defs import ISA
# We include `x0`, a.k.a `zero` in the register bank. It will be reserved.
IntRegs = RegBank(
'IntRegs', ISA,
'General purpose registers',
units=32, prefix='x')
FloatRegs = RegBank(
'FloatRegs', ISA,
'Floating point registers',
units=32, prefix='f')
GPR = RegClass(IntRegs)
FPR = RegClass(FloatRegs)
RegClass.extract_names(globals())

View File

@@ -0,0 +1,31 @@
"""
RISC-V settings.
"""
from __future__ import absolute_import
from cdsl.settings import SettingGroup, BoolSetting
from cdsl.predicates import And
import base.settings as shared
from .defs import ISA
ISA.settings = SettingGroup('riscv', parent=shared.group)
supports_m = BoolSetting("CPU supports the 'M' extension (mul/div)")
supports_a = BoolSetting("CPU supports the 'A' extension (atomics)")
supports_f = BoolSetting("CPU supports the 'F' extension (float)")
supports_d = BoolSetting("CPU supports the 'D' extension (double)")
enable_m = BoolSetting(
"Enable the use of 'M' instructions if available",
default=True)
enable_e = BoolSetting(
"Enable the 'RV32E' instruction set with only 16 registers")
use_m = And(supports_m, enable_m)
use_a = And(supports_a, shared.enable_atomics)
use_f = And(supports_f, shared.enable_float)
use_d = And(supports_d, shared.enable_float)
full_float = And(shared.enable_simd, supports_f, supports_d)
ISA.settings.close(globals())

View File

@@ -0,0 +1,22 @@
"""
x86 Target Architecture
-----------------------
This target ISA generates code for x86 CPUs with two separate CPU modes:
`I32`
32-bit x86 architecture, also known as 'IA-32', also sometimes referred
to as 'i386', however note that Cranelift depends on instructions not
in the original `i386`, such as SSE2, CMOVcc, and UD2.
`I64`
x86-64 architecture, also known as 'AMD64`, `Intel 64`, and 'x64'.
"""
from __future__ import absolute_import
from . import defs
from . import encodings, settings, registers # noqa
from cdsl.isa import TargetISA # noqa
# Re-export the primary target ISA definition.
ISA = defs.ISA.finish() # type: TargetISA

View File

@@ -0,0 +1,28 @@
"""
x86 definitions.
Commonly used definitions.
"""
from __future__ import absolute_import
from cdsl.isa import TargetISA, CPUMode
import base.instructions
from . import instructions as x86
from base.immediates import floatcc
ISA = TargetISA('x86', [base.instructions.GROUP, x86.GROUP]) # type: TargetISA
# CPU modes for 32-bit and 64-bit operation.
X86_64 = CPUMode('I64', ISA)
X86_32 = CPUMode('I32', ISA)
# The set of floating point condition codes that are directly supported.
# Other condition codes need to be reversed or expressed as two tests.
supported_floatccs = [
floatcc.ord,
floatcc.uno,
floatcc.one,
floatcc.ueq,
floatcc.gt,
floatcc.ge,
floatcc.ult,
floatcc.ule]

View File

@@ -0,0 +1,748 @@
"""
x86 Encodings.
"""
from __future__ import absolute_import
from cdsl.predicates import IsZero32BitFloat, IsZero64BitFloat
from cdsl.predicates import IsUnsignedInt, Not, And
from base.predicates import IsColocatedFunc, IsColocatedData, LengthEquals
from base import instructions as base
from base import types
from base.formats import UnaryIeee32, UnaryIeee64, UnaryImm
from base.formats import FuncAddr, Call, LoadComplex, StoreComplex
from .defs import X86_64, X86_32
from . import recipes as r
from . import settings as cfg
from . import instructions as x86
from .legalize import x86_expand
from base.legalize import narrow, widen, expand_flags
from base.settings import allones_funcaddrs, is_pic
from .settings import use_sse41
try:
from typing import TYPE_CHECKING, Any # noqa
if TYPE_CHECKING:
from cdsl.instructions import MaybeBoundInst # noqa
from cdsl.predicates import FieldPredicate # noqa
except ImportError:
pass
X86_32.legalize_monomorphic(expand_flags)
X86_32.legalize_type(
default=narrow,
b1=expand_flags,
i8=widen,
i16=widen,
i32=x86_expand,
f32=x86_expand,
f64=x86_expand)
X86_64.legalize_monomorphic(expand_flags)
X86_64.legalize_type(
default=narrow,
b1=expand_flags,
i8=widen,
i16=widen,
i32=x86_expand,
i64=x86_expand,
f32=x86_expand,
f64=x86_expand)
#
# Helper functions for generating encodings.
#
def enc_x86_64(inst, recipe, *args, **kwargs):
# type: (MaybeBoundInst, r.TailRecipe, *int, **int) -> None
"""
Add encodings for `inst` to X86_64 with and without a REX prefix.
"""
X86_64.enc(inst, *recipe.rex(*args, **kwargs))
X86_64.enc(inst, *recipe(*args, **kwargs))
def enc_x86_64_instp(inst, recipe, instp, *args, **kwargs):
# type: (MaybeBoundInst, r.TailRecipe, FieldPredicate, *int, **int) -> None
"""
Add encodings for `inst` to X86_64 with and without a REX prefix.
"""
X86_64.enc(inst, *recipe.rex(*args, **kwargs), instp=instp)
X86_64.enc(inst, *recipe(*args, **kwargs), instp=instp)
def enc_both(inst, recipe, *args, **kwargs):
# type: (MaybeBoundInst, r.TailRecipe, *int, **Any) -> None
"""
Add encodings for `inst` to both X86_32 and X86_64.
"""
X86_32.enc(inst, *recipe(*args, **kwargs))
enc_x86_64(inst, recipe, *args, **kwargs)
def enc_both_instp(inst, recipe, instp, *args, **kwargs):
# type: (MaybeBoundInst, r.TailRecipe, FieldPredicate, *int, **Any) -> None
"""
Add encodings for `inst` to both X86_32 and X86_64.
"""
X86_32.enc(inst, *recipe(*args, **kwargs), instp=instp)
enc_x86_64_instp(inst, recipe, instp, *args, **kwargs)
def enc_i32_i64(inst, recipe, *args, **kwargs):
# type: (MaybeBoundInst, r.TailRecipe, *int, **int) -> None
"""
Add encodings for `inst.i32` to X86_32.
Add encodings for `inst.i32` to X86_64 with and without REX.
Add encodings for `inst.i64` to X86_64 with a REX.W prefix.
"""
X86_32.enc(inst.i32, *recipe(*args, **kwargs))
# REX-less encoding must come after REX encoding so we don't use it by
# default. Otherwise reg-alloc would never use r8 and up.
X86_64.enc(inst.i32, *recipe.rex(*args, **kwargs))
X86_64.enc(inst.i32, *recipe(*args, **kwargs))
X86_64.enc(inst.i64, *recipe.rex(*args, w=1, **kwargs))
def enc_i32_i64_instp(inst, recipe, instp, *args, **kwargs):
# type: (MaybeBoundInst, r.TailRecipe, FieldPredicate, *int, **int) -> None
"""
Add encodings for `inst.i32` to X86_32.
Add encodings for `inst.i32` to X86_64 with and without REX.
Add encodings for `inst.i64` to X86_64 with a REX.W prefix.
Similar to `enc_i32_i64` but applies `instp` to each encoding.
"""
X86_32.enc(inst.i32, *recipe(*args, **kwargs), instp=instp)
# REX-less encoding must come after REX encoding so we don't use it by
# default. Otherwise reg-alloc would never use r8 and up.
X86_64.enc(inst.i32, *recipe.rex(*args, **kwargs), instp=instp)
X86_64.enc(inst.i32, *recipe(*args, **kwargs), instp=instp)
X86_64.enc(inst.i64, *recipe.rex(*args, w=1, **kwargs), instp=instp)
def enc_i32_i64_ld_st(inst, w_bit, recipe, *args, **kwargs):
# type: (MaybeBoundInst, bool, r.TailRecipe, *int, **int) -> None
"""
Add encodings for `inst.i32` to X86_32.
Add encodings for `inst.i32` to X86_64 with and without REX.
Add encodings for `inst.i64` to X86_64 with a REX prefix, using the `w_bit`
argument to determine whether or not to set the REX.W bit.
"""
X86_32.enc(inst.i32.any, *recipe(*args, **kwargs))
# REX-less encoding must come after REX encoding so we don't use it by
# default. Otherwise reg-alloc would never use r8 and up.
X86_64.enc(inst.i32.any, *recipe.rex(*args, **kwargs))
X86_64.enc(inst.i32.any, *recipe(*args, **kwargs))
if w_bit:
X86_64.enc(inst.i64.any, *recipe.rex(*args, w=1, **kwargs))
else:
X86_64.enc(inst.i64.any, *recipe.rex(*args, **kwargs))
X86_64.enc(inst.i64.any, *recipe(*args, **kwargs))
for inst, opc in [
(base.iadd, 0x01),
(base.isub, 0x29),
(base.band, 0x21),
(base.bor, 0x09),
(base.bxor, 0x31)]:
enc_i32_i64(inst, r.rr, opc)
# x86 has a bitwise not instruction NOT.
enc_i32_i64(base.bnot, r.ur, 0xf7, rrr=2)
# Also add a `b1` encodings for the logic instructions.
# TODO: Should this be done with 8-bit instructions? It would improve
# partial register dependencies.
enc_both(base.band.b1, r.rr, 0x21)
enc_both(base.bor.b1, r.rr, 0x09)
enc_both(base.bxor.b1, r.rr, 0x31)
enc_i32_i64(base.imul, r.rrx, 0x0f, 0xaf)
enc_i32_i64(x86.sdivmodx, r.div, 0xf7, rrr=7)
enc_i32_i64(x86.udivmodx, r.div, 0xf7, rrr=6)
enc_i32_i64(x86.smulx, r.mulx, 0xf7, rrr=5)
enc_i32_i64(x86.umulx, r.mulx, 0xf7, rrr=4)
enc_i32_i64(base.copy, r.umr, 0x89)
for ty in [types.b1, types.i8, types.i16]:
enc_both(base.copy.bind(ty), r.umr, 0x89)
# For x86-64, only define REX forms for now, since we can't describe the
# special regunit immediate operands with the current constraint language.
for ty in [types.i8, types.i16, types.i32]:
X86_32.enc(base.regmove.bind(ty), *r.rmov(0x89))
X86_64.enc(base.regmove.bind(ty), *r.rmov.rex(0x89))
X86_64.enc(base.regmove.i64, *r.rmov.rex(0x89, w=1))
enc_both(base.regmove.b1, r.rmov, 0x89)
enc_both(base.regmove.i8, r.rmov, 0x89)
# Immediate instructions with sign-extended 8-bit and 32-bit immediate.
for inst, rrr in [
(base.iadd_imm, 0),
(base.band_imm, 4),
(base.bor_imm, 1),
(base.bxor_imm, 6)]:
enc_i32_i64(inst, r.r_ib, 0x83, rrr=rrr)
enc_i32_i64(inst, r.r_id, 0x81, rrr=rrr)
# TODO: band_imm.i64 with an unsigned 32-bit immediate can be encoded as
# band_imm.i32. Can even use the single-byte immediate for 0xffff_ffXX masks.
# Immediate constants.
X86_32.enc(base.iconst.i32, *r.pu_id(0xb8))
X86_64.enc(base.iconst.i32, *r.pu_id.rex(0xb8))
X86_64.enc(base.iconst.i32, *r.pu_id(0xb8))
# The 32-bit immediate movl also zero-extends to 64 bits.
X86_64.enc(base.iconst.i64, *r.pu_id.rex(0xb8),
instp=IsUnsignedInt(UnaryImm.imm, 32))
X86_64.enc(base.iconst.i64, *r.pu_id(0xb8),
instp=IsUnsignedInt(UnaryImm.imm, 32))
# Sign-extended 32-bit immediate.
X86_64.enc(base.iconst.i64, *r.u_id.rex(0xc7, rrr=0, w=1))
# Finally, the 0xb8 opcode takes an 8-byte immediate with a REX.W prefix.
X86_64.enc(base.iconst.i64, *r.pu_iq.rex(0xb8, w=1))
# bool constants.
enc_both(base.bconst.b1, r.pu_id_bool, 0xb8)
# Shifts and rotates.
# Note that the dynamic shift amount is only masked by 5 or 6 bits; the 8-bit
# and 16-bit shifts would need explicit masking.
for inst, rrr in [
(base.rotl, 0),
(base.rotr, 1),
(base.ishl, 4),
(base.ushr, 5),
(base.sshr, 7)]:
# Cannot use enc_i32_i64 for this pattern because instructions require
# .any suffix.
X86_32.enc(inst.i32.any, *r.rc(0xd3, rrr=rrr))
X86_64.enc(inst.i64.any, *r.rc.rex(0xd3, rrr=rrr, w=1))
X86_64.enc(inst.i32.any, *r.rc.rex(0xd3, rrr=rrr))
X86_64.enc(inst.i32.any, *r.rc(0xd3, rrr=rrr))
for inst, rrr in [
(base.ishl_imm, 4),
(base.ushr_imm, 5),
(base.sshr_imm, 7)]:
enc_i32_i64(inst, r.r_ib, 0xc1, rrr=rrr)
# Population count.
X86_32.enc(base.popcnt.i32, *r.urm(0xf3, 0x0f, 0xb8), isap=cfg.use_popcnt)
X86_64.enc(base.popcnt.i64, *r.urm.rex(0xf3, 0x0f, 0xb8, w=1),
isap=cfg.use_popcnt)
X86_64.enc(base.popcnt.i32, *r.urm.rex(0xf3, 0x0f, 0xb8), isap=cfg.use_popcnt)
X86_64.enc(base.popcnt.i32, *r.urm(0xf3, 0x0f, 0xb8), isap=cfg.use_popcnt)
# Count leading zero bits.
X86_32.enc(base.clz.i32, *r.urm(0xf3, 0x0f, 0xbd), isap=cfg.use_lzcnt)
X86_64.enc(base.clz.i64, *r.urm.rex(0xf3, 0x0f, 0xbd, w=1),
isap=cfg.use_lzcnt)
X86_64.enc(base.clz.i32, *r.urm.rex(0xf3, 0x0f, 0xbd), isap=cfg.use_lzcnt)
X86_64.enc(base.clz.i32, *r.urm(0xf3, 0x0f, 0xbd), isap=cfg.use_lzcnt)
# Count trailing zero bits.
X86_32.enc(base.ctz.i32, *r.urm(0xf3, 0x0f, 0xbc), isap=cfg.use_bmi1)
X86_64.enc(base.ctz.i64, *r.urm.rex(0xf3, 0x0f, 0xbc, w=1),
isap=cfg.use_bmi1)
X86_64.enc(base.ctz.i32, *r.urm.rex(0xf3, 0x0f, 0xbc), isap=cfg.use_bmi1)
X86_64.enc(base.ctz.i32, *r.urm(0xf3, 0x0f, 0xbc), isap=cfg.use_bmi1)
#
# Loads and stores.
#
ldcomplexp = LengthEquals(LoadComplex, 2)
for recipe in [r.ldWithIndex, r.ldWithIndexDisp8, r.ldWithIndexDisp32]:
enc_i32_i64_instp(base.load_complex, recipe, ldcomplexp, 0x8b)
enc_x86_64_instp(base.uload32_complex, recipe, ldcomplexp, 0x8b)
X86_64.enc(base.sload32_complex, *recipe.rex(0x63, w=1),
instp=ldcomplexp)
enc_i32_i64_instp(base.uload16_complex, recipe, ldcomplexp, 0x0f, 0xb7)
enc_i32_i64_instp(base.sload16_complex, recipe, ldcomplexp, 0x0f, 0xbf)
enc_i32_i64_instp(base.uload8_complex, recipe, ldcomplexp, 0x0f, 0xb6)
enc_i32_i64_instp(base.sload8_complex, recipe, ldcomplexp, 0x0f, 0xbe)
stcomplexp = LengthEquals(StoreComplex, 3)
for recipe in [r.stWithIndex, r.stWithIndexDisp8, r.stWithIndexDisp32]:
enc_i32_i64_instp(base.store_complex, recipe, stcomplexp, 0x89)
enc_x86_64_instp(base.istore32_complex, recipe, stcomplexp, 0x89)
enc_both_instp(base.istore16_complex.i32, recipe, stcomplexp, 0x66, 0x89)
enc_x86_64_instp(base.istore16_complex.i64, recipe, stcomplexp, 0x66, 0x89)
for recipe in [r.stWithIndex_abcd,
r.stWithIndexDisp8_abcd,
r.stWithIndexDisp32_abcd]:
enc_both_instp(base.istore8_complex.i32, recipe, stcomplexp, 0x88)
enc_x86_64_instp(base.istore8_complex.i64, recipe, stcomplexp, 0x88)
for recipe in [r.st, r.stDisp8, r.stDisp32]:
enc_i32_i64_ld_st(base.store, True, recipe, 0x89)
enc_x86_64(base.istore32.i64.any, recipe, 0x89)
enc_i32_i64_ld_st(base.istore16, False, recipe, 0x66, 0x89)
# Byte stores are more complicated because the registers they can address
# depends of the presence of a REX prefix. The st*_abcd recipes fall back to
# the corresponding st* recipes when a REX prefix is applied.
for recipe in [r.st_abcd, r.stDisp8_abcd, r.stDisp32_abcd]:
enc_both(base.istore8.i32.any, recipe, 0x88)
enc_x86_64(base.istore8.i64.any, recipe, 0x88)
enc_i32_i64(base.spill, r.spillSib32, 0x89)
enc_i32_i64(base.regspill, r.regspill32, 0x89)
# Use a 32-bit write for spilling `b1`, `i8` and `i16` to avoid
# constraining the permitted registers.
# See MIN_SPILL_SLOT_SIZE which makes this safe.
for ty in [types.b1, types.i8, types.i16]:
enc_both(base.spill.bind(ty), r.spillSib32, 0x89)
enc_both(base.regspill.bind(ty), r.regspill32, 0x89)
for recipe in [r.ld, r.ldDisp8, r.ldDisp32]:
enc_i32_i64_ld_st(base.load, True, recipe, 0x8b)
enc_x86_64(base.uload32.i64, recipe, 0x8b)
X86_64.enc(base.sload32.i64, *recipe.rex(0x63, w=1))
enc_i32_i64_ld_st(base.uload16, True, recipe, 0x0f, 0xb7)
enc_i32_i64_ld_st(base.sload16, True, recipe, 0x0f, 0xbf)
enc_i32_i64_ld_st(base.uload8, True, recipe, 0x0f, 0xb6)
enc_i32_i64_ld_st(base.sload8, True, recipe, 0x0f, 0xbe)
enc_i32_i64(base.fill, r.fillSib32, 0x8b)
enc_i32_i64(base.regfill, r.regfill32, 0x8b)
# Load 32 bits from `b1`, `i8` and `i16` spill slots. See `spill.b1` above.
for ty in [types.b1, types.i8, types.i16]:
enc_both(base.fill.bind(ty), r.fillSib32, 0x8b)
enc_both(base.regfill.bind(ty), r.regfill32, 0x8b)
# Push and Pop
X86_32.enc(x86.push.i32, *r.pushq(0x50))
enc_x86_64(x86.push.i64, r.pushq, 0x50)
X86_32.enc(x86.pop.i32, *r.popq(0x58))
enc_x86_64(x86.pop.i64, r.popq, 0x58)
# Copy Special
# For x86-64, only define REX forms for now, since we can't describe the
# special regunit immediate operands with the current constraint language.
X86_64.enc(base.copy_special, *r.copysp.rex(0x89, w=1))
X86_32.enc(base.copy_special, *r.copysp(0x89))
# Adjust SP down by a dynamic value (or up, with a negative operand).
X86_32.enc(base.adjust_sp_down.i32, *r.adjustsp(0x29))
X86_64.enc(base.adjust_sp_down.i64, *r.adjustsp.rex(0x29, w=1))
# Adjust SP up by an immediate (or down, with a negative immediate)
X86_32.enc(base.adjust_sp_up_imm, *r.adjustsp_ib(0x83))
X86_32.enc(base.adjust_sp_up_imm, *r.adjustsp_id(0x81))
X86_64.enc(base.adjust_sp_up_imm, *r.adjustsp_ib.rex(0x83, w=1))
X86_64.enc(base.adjust_sp_up_imm, *r.adjustsp_id.rex(0x81, w=1))
# Adjust SP down by an immediate (or up, with a negative immediate)
X86_32.enc(base.adjust_sp_down_imm, *r.adjustsp_ib(0x83, rrr=5))
X86_32.enc(base.adjust_sp_down_imm, *r.adjustsp_id(0x81, rrr=5))
X86_64.enc(base.adjust_sp_down_imm, *r.adjustsp_ib.rex(0x83, rrr=5, w=1))
X86_64.enc(base.adjust_sp_down_imm, *r.adjustsp_id.rex(0x81, rrr=5, w=1))
#
# Float loads and stores.
#
enc_both(base.load.f32.any, r.fld, 0xf3, 0x0f, 0x10)
enc_both(base.load.f32.any, r.fldDisp8, 0xf3, 0x0f, 0x10)
enc_both(base.load.f32.any, r.fldDisp32, 0xf3, 0x0f, 0x10)
enc_both(base.load_complex.f32, r.fldWithIndex, 0xf3, 0x0f, 0x10)
enc_both(base.load_complex.f32, r.fldWithIndexDisp8, 0xf3, 0x0f, 0x10)
enc_both(base.load_complex.f32, r.fldWithIndexDisp32, 0xf3, 0x0f, 0x10)
enc_both(base.load.f64.any, r.fld, 0xf2, 0x0f, 0x10)
enc_both(base.load.f64.any, r.fldDisp8, 0xf2, 0x0f, 0x10)
enc_both(base.load.f64.any, r.fldDisp32, 0xf2, 0x0f, 0x10)
enc_both(base.load_complex.f64, r.fldWithIndex, 0xf2, 0x0f, 0x10)
enc_both(base.load_complex.f64, r.fldWithIndexDisp8, 0xf2, 0x0f, 0x10)
enc_both(base.load_complex.f64, r.fldWithIndexDisp32, 0xf2, 0x0f, 0x10)
enc_both(base.store.f32.any, r.fst, 0xf3, 0x0f, 0x11)
enc_both(base.store.f32.any, r.fstDisp8, 0xf3, 0x0f, 0x11)
enc_both(base.store.f32.any, r.fstDisp32, 0xf3, 0x0f, 0x11)
enc_both(base.store_complex.f32, r.fstWithIndex, 0xf3, 0x0f, 0x11)
enc_both(base.store_complex.f32, r.fstWithIndexDisp8, 0xf3, 0x0f, 0x11)
enc_both(base.store_complex.f32, r.fstWithIndexDisp32, 0xf3, 0x0f, 0x11)
enc_both(base.store.f64.any, r.fst, 0xf2, 0x0f, 0x11)
enc_both(base.store.f64.any, r.fstDisp8, 0xf2, 0x0f, 0x11)
enc_both(base.store.f64.any, r.fstDisp32, 0xf2, 0x0f, 0x11)
enc_both(base.store_complex.f64, r.fstWithIndex, 0xf2, 0x0f, 0x11)
enc_both(base.store_complex.f64, r.fstWithIndexDisp8, 0xf2, 0x0f, 0x11)
enc_both(base.store_complex.f64, r.fstWithIndexDisp32, 0xf2, 0x0f, 0x11)
enc_both(base.fill.f32, r.ffillSib32, 0xf3, 0x0f, 0x10)
enc_both(base.regfill.f32, r.fregfill32, 0xf3, 0x0f, 0x10)
enc_both(base.fill.f64, r.ffillSib32, 0xf2, 0x0f, 0x10)
enc_both(base.regfill.f64, r.fregfill32, 0xf2, 0x0f, 0x10)
enc_both(base.spill.f32, r.fspillSib32, 0xf3, 0x0f, 0x11)
enc_both(base.regspill.f32, r.fregspill32, 0xf3, 0x0f, 0x11)
enc_both(base.spill.f64, r.fspillSib32, 0xf2, 0x0f, 0x11)
enc_both(base.regspill.f64, r.fregspill32, 0xf2, 0x0f, 0x11)
#
# Function addresses.
#
# Non-PIC, all-ones funcaddresses.
X86_32.enc(base.func_addr.i32, *r.fnaddr4(0xb8),
isap=And(Not(allones_funcaddrs), Not(is_pic)))
X86_64.enc(base.func_addr.i64, *r.fnaddr8.rex(0xb8, w=1),
isap=And(Not(allones_funcaddrs), Not(is_pic)))
# Non-PIC, all-zeros funcaddresses.
X86_32.enc(base.func_addr.i32, *r.allones_fnaddr4(0xb8),
isap=And(allones_funcaddrs, Not(is_pic)))
X86_64.enc(base.func_addr.i64, *r.allones_fnaddr8.rex(0xb8, w=1),
isap=And(allones_funcaddrs, Not(is_pic)))
# 64-bit, colocated, both PIC and non-PIC. Use the lea instruction's
# pc-relative field.
X86_64.enc(base.func_addr.i64, *r.pcrel_fnaddr8.rex(0x8d, w=1),
instp=IsColocatedFunc(FuncAddr.func_ref))
# 64-bit, non-colocated, PIC.
X86_64.enc(base.func_addr.i64, *r.got_fnaddr8.rex(0x8b, w=1),
isap=is_pic)
#
# Global addresses.
#
# Non-PIC
X86_32.enc(base.symbol_value.i32, *r.gvaddr4(0xb8),
isap=Not(is_pic))
X86_64.enc(base.symbol_value.i64, *r.gvaddr8.rex(0xb8, w=1),
isap=Not(is_pic))
# PIC, colocated
X86_64.enc(base.symbol_value.i64, *r.pcrel_gvaddr8.rex(0x8d, w=1),
isap=is_pic,
instp=IsColocatedData())
# PIC, non-colocated
X86_64.enc(base.symbol_value.i64, *r.got_gvaddr8.rex(0x8b, w=1),
isap=is_pic)
#
# Stack addresses.
#
# TODO: Add encoding rules for stack_load and stack_store, so that they
# don't get legalized to stack_addr + load/store.
#
X86_32.enc(base.stack_addr.i32, *r.spaddr4_id(0x8d))
X86_64.enc(base.stack_addr.i64, *r.spaddr8_id.rex(0x8d, w=1))
#
# Call/return
#
# 32-bit, both PIC and non-PIC.
X86_32.enc(base.call, *r.call_id(0xe8))
# 64-bit, colocated, both PIC and non-PIC. Use the call instruction's
# pc-relative field.
X86_64.enc(base.call, *r.call_id(0xe8),
instp=IsColocatedFunc(Call.func_ref))
# 64-bit, non-colocated, PIC. There is no 64-bit non-colocated non-PIC version,
# since non-PIC is currently using the large model, which requires calls be
# lowered to func_addr+call_indirect.
X86_64.enc(base.call, *r.call_plt_id(0xe8), isap=is_pic)
X86_32.enc(base.call_indirect.i32, *r.call_r(0xff, rrr=2))
X86_64.enc(base.call_indirect.i64, *r.call_r.rex(0xff, rrr=2))
X86_64.enc(base.call_indirect.i64, *r.call_r(0xff, rrr=2))
X86_32.enc(base.x_return, *r.ret(0xc3))
X86_64.enc(base.x_return, *r.ret(0xc3))
#
# Branches
#
enc_both(base.jump, r.jmpb, 0xeb)
enc_both(base.jump, r.jmpd, 0xe9)
enc_both(base.brif, r.brib, 0x70)
enc_both(base.brif, r.brid, 0x0f, 0x80)
# Not all float condition codes are legal, see `supported_floatccs`.
enc_both(base.brff, r.brfb, 0x70)
enc_both(base.brff, r.brfd, 0x0f, 0x80)
# Note that the tjccd opcode will be prefixed with 0x0f.
enc_i32_i64(base.brz, r.tjccb, 0x74)
enc_i32_i64(base.brz, r.tjccd, 0x84)
enc_i32_i64(base.brnz, r.tjccb, 0x75)
enc_i32_i64(base.brnz, r.tjccd, 0x85)
# Branch on a b1 value in a register only looks at the low 8 bits. See also
# bint encodings below.
#
# Start with the worst-case encoding for X86_32 only. The register allocator
# can't handle a branch with an ABCD-constrained operand.
X86_32.enc(base.brz.b1, *r.t8jccd_long(0x84))
X86_32.enc(base.brnz.b1, *r.t8jccd_long(0x85))
enc_both(base.brz.b1, r.t8jccb_abcd, 0x74)
enc_both(base.brz.b1, r.t8jccd_abcd, 0x84)
enc_both(base.brnz.b1, r.t8jccb_abcd, 0x75)
enc_both(base.brnz.b1, r.t8jccd_abcd, 0x85)
#
# Jump tables
#
X86_64.enc(base.jump_table_entry.i64.any.any, *r.jt_entry.rex(0x63, w=1))
X86_32.enc(base.jump_table_entry.i32.any.any, *r.jt_entry(0x8b))
X86_64.enc(base.jump_table_base.i64, *r.jt_base.rex(0x8d, w=1))
X86_32.enc(base.jump_table_base.i32, *r.jt_base(0x8d))
enc_x86_64(base.indirect_jump_table_br.i64, r.indirect_jmp, 0xff, rrr=4)
X86_32.enc(base.indirect_jump_table_br.i32, *r.indirect_jmp(0xff, rrr=4))
#
# Trap as ud2
#
X86_32.enc(base.trap, *r.trap(0x0f, 0x0b))
X86_64.enc(base.trap, *r.trap(0x0f, 0x0b))
# Debug trap as int3
X86_32.enc(base.debugtrap, r.debugtrap, 0)
X86_64.enc(base.debugtrap, r.debugtrap, 0)
# Using a standard EncRecipe, not the TailRecipe.
X86_32.enc(base.trapif, r.trapif, 0)
X86_64.enc(base.trapif, r.trapif, 0)
X86_32.enc(base.trapff, r.trapff, 0)
X86_64.enc(base.trapff, r.trapff, 0)
#
# Comparisons
#
enc_i32_i64(base.icmp, r.icscc, 0x39)
enc_i32_i64(base.icmp_imm, r.icscc_ib, 0x83, rrr=7)
enc_i32_i64(base.icmp_imm, r.icscc_id, 0x81, rrr=7)
enc_i32_i64(base.ifcmp, r.rcmp, 0x39)
enc_i32_i64(base.ifcmp_imm, r.rcmp_ib, 0x83, rrr=7)
enc_i32_i64(base.ifcmp_imm, r.rcmp_id, 0x81, rrr=7)
# TODO: We could special-case ifcmp_imm(x, 0) to TEST(x, x).
X86_32.enc(base.ifcmp_sp.i32, *r.rcmp_sp(0x39))
X86_64.enc(base.ifcmp_sp.i64, *r.rcmp_sp.rex(0x39, w=1))
#
# Convert flags to bool.
#
# This encodes `b1` as an 8-bit low register with the value 0 or 1.
enc_both(base.trueif, r.seti_abcd, 0x0f, 0x90)
enc_both(base.trueff, r.setf_abcd, 0x0f, 0x90)
#
# Conditional move (a.k.a integer select)
#
enc_i32_i64(base.selectif, r.cmov, 0x0F, 0x40)
#
# Bit scan forwards and reverse
#
enc_i32_i64(x86.bsf, r.bsf_and_bsr, 0x0F, 0xBC)
enc_i32_i64(x86.bsr, r.bsf_and_bsr, 0x0F, 0xBD)
#
# Convert bool to int.
#
# This assumes that b1 is represented as an 8-bit low register with the value 0
# or 1.
#
# Encode movzbq as movzbl, because it's equivalent and shorter.
X86_32.enc(base.bint.i32.b1, *r.urm_noflags_abcd(0x0f, 0xb6))
X86_64.enc(base.bint.i64.b1, *r.urm_noflags.rex(0x0f, 0xb6))
X86_64.enc(base.bint.i64.b1, *r.urm_noflags_abcd(0x0f, 0xb6))
X86_64.enc(base.bint.i32.b1, *r.urm_noflags.rex(0x0f, 0xb6))
X86_64.enc(base.bint.i32.b1, *r.urm_noflags_abcd(0x0f, 0xb6))
# Numerical conversions.
# Reducing an integer is a no-op.
X86_32.enc(base.ireduce.i8.i16, r.null, 0)
X86_32.enc(base.ireduce.i8.i32, r.null, 0)
X86_32.enc(base.ireduce.i16.i32, r.null, 0)
X86_64.enc(base.ireduce.i8.i16, r.null, 0)
X86_64.enc(base.ireduce.i8.i32, r.null, 0)
X86_64.enc(base.ireduce.i16.i32, r.null, 0)
X86_64.enc(base.ireduce.i8.i64, r.null, 0)
X86_64.enc(base.ireduce.i16.i64, r.null, 0)
X86_64.enc(base.ireduce.i32.i64, r.null, 0)
# TODO: Add encodings for cbw, cwde, cdqe, which are sign-extending
# instructions for %al/%ax/%eax to %ax/%eax/%rax.
# movsbl
X86_32.enc(base.sextend.i32.i8, *r.urm_noflags_abcd(0x0f, 0xbe))
X86_64.enc(base.sextend.i32.i8, *r.urm_noflags.rex(0x0f, 0xbe))
X86_64.enc(base.sextend.i32.i8, *r.urm_noflags_abcd(0x0f, 0xbe))
# movswl
X86_32.enc(base.sextend.i32.i16, *r.urm_noflags(0x0f, 0xbf))
X86_64.enc(base.sextend.i32.i16, *r.urm_noflags.rex(0x0f, 0xbf))
X86_64.enc(base.sextend.i32.i16, *r.urm_noflags(0x0f, 0xbf))
# movsbq
X86_64.enc(base.sextend.i64.i8, *r.urm_noflags.rex(0x0f, 0xbe, w=1))
# movswq
X86_64.enc(base.sextend.i64.i16, *r.urm_noflags.rex(0x0f, 0xbf, w=1))
# movslq
X86_64.enc(base.sextend.i64.i32, *r.urm_noflags.rex(0x63, w=1))
# movzbl
X86_32.enc(base.uextend.i32.i8, *r.urm_noflags_abcd(0x0f, 0xb6))
X86_64.enc(base.uextend.i32.i8, *r.urm_noflags.rex(0x0f, 0xb6))
X86_64.enc(base.uextend.i32.i8, *r.urm_noflags_abcd(0x0f, 0xb6))
# movzwl
X86_32.enc(base.uextend.i32.i16, *r.urm_noflags(0x0f, 0xb7))
X86_64.enc(base.uextend.i32.i16, *r.urm_noflags.rex(0x0f, 0xb7))
X86_64.enc(base.uextend.i32.i16, *r.urm_noflags(0x0f, 0xb7))
# movzbq, encoded as movzbl because it's equivalent and shorter
X86_64.enc(base.uextend.i64.i8, *r.urm_noflags.rex(0x0f, 0xb6))
X86_64.enc(base.uextend.i64.i8, *r.urm_noflags(0x0f, 0xb6))
# movzwq, encoded as movzwl because it's equivalent and shorter
X86_64.enc(base.uextend.i64.i16, *r.urm_noflags.rex(0x0f, 0xb7))
X86_64.enc(base.uextend.i64.i16, *r.urm_noflags(0x0f, 0xb7))
# A 32-bit register copy clears the high 32 bits.
X86_64.enc(base.uextend.i64.i32, *r.umr.rex(0x89))
X86_64.enc(base.uextend.i64.i32, *r.umr(0x89))
#
# Floating point
#
# floating-point constants equal to 0.0 can be encoded using either
# `xorps` or `xorpd`, for 32-bit and 64-bit floats respectively.
X86_32.enc(base.f32const, *r.f32imm_z(0x0f, 0x57),
instp=IsZero32BitFloat(UnaryIeee32.imm))
X86_32.enc(base.f64const, *r.f64imm_z(0x66, 0x0f, 0x57),
instp=IsZero64BitFloat(UnaryIeee64.imm))
enc_x86_64_instp(base.f32const, r.f32imm_z,
IsZero32BitFloat(UnaryIeee32.imm), 0x0f, 0x57)
enc_x86_64_instp(base.f64const, r.f64imm_z,
IsZero64BitFloat(UnaryIeee64.imm), 0x66, 0x0f, 0x57)
# movd
enc_both(base.bitcast.f32.i32, r.frurm, 0x66, 0x0f, 0x6e)
enc_both(base.bitcast.i32.f32, r.rfumr, 0x66, 0x0f, 0x7e)
# movq
X86_64.enc(base.bitcast.f64.i64, *r.frurm.rex(0x66, 0x0f, 0x6e, w=1))
X86_64.enc(base.bitcast.i64.f64, *r.rfumr.rex(0x66, 0x0f, 0x7e, w=1))
# movaps
enc_both(base.copy.f32, r.furm, 0x0f, 0x28)
enc_both(base.copy.f64, r.furm, 0x0f, 0x28)
# For x86-64, only define REX forms for now, since we can't describe the
# special regunit immediate operands with the current constraint language.
X86_32.enc(base.regmove.f32, *r.frmov(0x0f, 0x28))
X86_64.enc(base.regmove.f32, *r.frmov.rex(0x0f, 0x28))
# For x86-64, only define REX forms for now, since we can't describe the
# special regunit immediate operands with the current constraint language.
X86_32.enc(base.regmove.f64, *r.frmov(0x0f, 0x28))
X86_64.enc(base.regmove.f64, *r.frmov.rex(0x0f, 0x28))
# cvtsi2ss
enc_i32_i64(base.fcvt_from_sint.f32, r.frurm, 0xf3, 0x0f, 0x2a)
# cvtsi2sd
enc_i32_i64(base.fcvt_from_sint.f64, r.frurm, 0xf2, 0x0f, 0x2a)
# cvtss2sd
enc_both(base.fpromote.f64.f32, r.furm, 0xf3, 0x0f, 0x5a)
# cvtsd2ss
enc_both(base.fdemote.f32.f64, r.furm, 0xf2, 0x0f, 0x5a)
# cvttss2si
enc_both(x86.cvtt2si.i32.f32, r.rfurm, 0xf3, 0x0f, 0x2c)
X86_64.enc(x86.cvtt2si.i64.f32, *r.rfurm.rex(0xf3, 0x0f, 0x2c, w=1))
# cvttsd2si
enc_both(x86.cvtt2si.i32.f64, r.rfurm, 0xf2, 0x0f, 0x2c)
X86_64.enc(x86.cvtt2si.i64.f64, *r.rfurm.rex(0xf2, 0x0f, 0x2c, w=1))
# Exact square roots.
enc_both(base.sqrt.f32, r.furm, 0xf3, 0x0f, 0x51)
enc_both(base.sqrt.f64, r.furm, 0xf2, 0x0f, 0x51)
# Rounding. The recipe looks at the opcode to pick an immediate.
for inst in [
base.nearest,
base.floor,
base.ceil,
base.trunc]:
enc_both(inst.f32, r.furmi_rnd, 0x66, 0x0f, 0x3a, 0x0a, isap=use_sse41)
enc_both(inst.f64, r.furmi_rnd, 0x66, 0x0f, 0x3a, 0x0b, isap=use_sse41)
# Binary arithmetic ops.
for inst, opc in [
(base.fadd, 0x58),
(base.fsub, 0x5c),
(base.fmul, 0x59),
(base.fdiv, 0x5e),
(x86.fmin, 0x5d),
(x86.fmax, 0x5f)]:
enc_both(inst.f32, r.fa, 0xf3, 0x0f, opc)
enc_both(inst.f64, r.fa, 0xf2, 0x0f, opc)
# Binary bitwise ops.
for inst, opc in [
(base.band, 0x54),
(base.bor, 0x56),
(base.bxor, 0x57)]:
enc_both(inst.f32, r.fa, 0x0f, opc)
enc_both(inst.f64, r.fa, 0x0f, opc)
# The `andnps(x,y)` instruction computes `~x&y`, while band_not(x,y)` is `x&~y.
enc_both(base.band_not.f32, r.fax, 0x0f, 0x55)
enc_both(base.band_not.f64, r.fax, 0x0f, 0x55)
# Comparisons.
#
# This only covers the condition codes in `supported_floatccs`, the rest are
# handled by legalization patterns.
enc_both(base.fcmp.f32, r.fcscc, 0x0f, 0x2e)
enc_both(base.fcmp.f64, r.fcscc, 0x66, 0x0f, 0x2e)
enc_both(base.ffcmp.f32, r.fcmp, 0x0f, 0x2e)
enc_both(base.ffcmp.f64, r.fcmp, 0x66, 0x0f, 0x2e)

View File

@@ -0,0 +1,173 @@
"""
Supplementary instruction definitions for x86.
This module defines additional instructions that are useful only to the x86
target ISA.
"""
from base.types import iflags
from cdsl.operands import Operand
from cdsl.typevar import TypeVar
from cdsl.instructions import Instruction, InstructionGroup
GROUP = InstructionGroup("x86", "x86-specific instruction set")
iWord = TypeVar('iWord', 'A scalar integer machine word', ints=(32, 64))
nlo = Operand('nlo', iWord, doc='Low part of numerator')
nhi = Operand('nhi', iWord, doc='High part of numerator')
d = Operand('d', iWord, doc='Denominator')
q = Operand('q', iWord, doc='Quotient')
r = Operand('r', iWord, doc='Remainder')
udivmodx = Instruction(
'x86_udivmodx', r"""
Extended unsigned division.
Concatenate the bits in `nhi` and `nlo` to form the numerator.
Interpret the bits as an unsigned number and divide by the unsigned
denominator `d`. Trap when `d` is zero or if the quotient is larger
than the range of the output.
Return both quotient and remainder.
""",
ins=(nlo, nhi, d), outs=(q, r), can_trap=True)
sdivmodx = Instruction(
'x86_sdivmodx', r"""
Extended signed division.
Concatenate the bits in `nhi` and `nlo` to form the numerator.
Interpret the bits as a signed number and divide by the signed
denominator `d`. Trap when `d` is zero or if the quotient is outside
the range of the output.
Return both quotient and remainder.
""",
ins=(nlo, nhi, d), outs=(q, r), can_trap=True)
argL = Operand('argL', iWord)
argR = Operand('argR', iWord)
resLo = Operand('resLo', iWord)
resHi = Operand('resHi', iWord)
umulx = Instruction(
'x86_umulx', r"""
Unsigned integer multiplication, producing a double-length result.
Polymorphic over all scalar integer types, but does not support vector
types.
""",
ins=(argL, argR), outs=(resLo, resHi))
smulx = Instruction(
'x86_smulx', r"""
Signed integer multiplication, producing a double-length result.
Polymorphic over all scalar integer types, but does not support vector
types.
""",
ins=(argL, argR), outs=(resLo, resHi))
Float = TypeVar(
'Float', 'A scalar or vector floating point number',
floats=True, simd=True)
IntTo = TypeVar(
'IntTo', 'An integer type with the same number of lanes',
ints=(32, 64), simd=True)
x = Operand('x', Float)
a = Operand('a', IntTo)
cvtt2si = Instruction(
'x86_cvtt2si', r"""
Convert with truncation floating point to signed integer.
The source floating point operand is converted to a signed integer by
rounding towards zero. If the result can't be represented in the output
type, returns the smallest signed value the output type can represent.
This instruction does not trap.
""",
ins=x, outs=a)
x = Operand('x', Float)
a = Operand('a', Float)
y = Operand('y', Float)
fmin = Instruction(
'x86_fmin', r"""
Floating point minimum with x86 semantics.
This is equivalent to the C ternary operator `x < y ? x : y` which
differs from :inst:`fmin` when either operand is NaN or when comparing
+0.0 to -0.0.
When the two operands don't compare as LT, `y` is returned unchanged,
even if it is a signalling NaN.
""",
ins=(x, y), outs=a)
fmax = Instruction(
'x86_fmax', r"""
Floating point maximum with x86 semantics.
This is equivalent to the C ternary operator `x > y ? x : y` which
differs from :inst:`fmax` when either operand is NaN or when comparing
+0.0 to -0.0.
When the two operands don't compare as GT, `y` is returned unchanged,
even if it is a signalling NaN.
""",
ins=(x, y), outs=a)
x = Operand('x', iWord)
push = Instruction(
'x86_push', r"""
Pushes a value onto the stack.
Decrements the stack pointer and stores the specified value on to the top.
This is polymorphic in i32 and i64. However, it is only implemented for i64
in 64-bit mode, and only for i32 in 32-bit mode.
""",
ins=x, can_store=True, other_side_effects=True)
pop = Instruction(
'x86_pop', r"""
Pops a value from the stack.
Loads a value from the top of the stack and then increments the stack
pointer.
This is polymorphic in i32 and i64. However, it is only implemented for i64
in 64-bit mode, and only for i32 in 32-bit mode.
""",
outs=x, can_load=True, other_side_effects=True)
y = Operand('y', iWord)
rflags = Operand('rflags', iflags)
bsr = Instruction(
'x86_bsr', r"""
Bit Scan Reverse -- returns the bit-index of the most significant 1
in the word. Result is undefined if the argument is zero. However, it
sets the Z flag depending on the argument, so it is at least easy to
detect and handle that case.
This is polymorphic in i32 and i64. It is implemented for both i64 and
i32 in 64-bit mode, and only for i32 in 32-bit mode.
""",
ins=x, outs=(y, rflags))
bsf = Instruction(
'x86_bsf', r"""
Bit Scan Forwards -- returns the bit-index of the least significant 1
in the word. Is otherwise identical to 'bsr', just above.
""",
ins=x, outs=(y, rflags))
GROUP.close()

View File

@@ -0,0 +1,229 @@
"""
Custom legalization patterns for x86.
"""
from __future__ import absolute_import
from cdsl.ast import Var
from cdsl.xform import Rtl, XFormGroup
from base.immediates import imm64, intcc, floatcc
from base import legalize as shared
from base import instructions as insts
from . import instructions as x86
from .defs import ISA
x86_expand = XFormGroup(
'x86_expand',
"""
Legalize instructions by expansion.
Use x86-specific instructions if needed.
""",
isa=ISA, chain=shared.expand_flags)
a = Var('a')
dead = Var('dead')
x = Var('x')
xhi = Var('xhi')
y = Var('y')
a1 = Var('a1')
a2 = Var('a2')
#
# Division and remainder.
#
# The srem expansion requires custom code because srem INT_MIN, -1 is not
# allowed to trap. The other ops need to check avoid_div_traps.
x86_expand.custom_legalize(insts.sdiv, 'expand_sdivrem')
x86_expand.custom_legalize(insts.srem, 'expand_sdivrem')
x86_expand.custom_legalize(insts.udiv, 'expand_udivrem')
x86_expand.custom_legalize(insts.urem, 'expand_udivrem')
#
# Double length (widening) multiplication
#
resLo = Var('resLo')
resHi = Var('resHi')
x86_expand.legalize(
resHi << insts.umulhi(x, y),
Rtl(
(resLo, resHi) << x86.umulx(x, y)
))
x86_expand.legalize(
resHi << insts.smulhi(x, y),
Rtl(
(resLo, resHi) << x86.smulx(x, y)
))
# Floating point condition codes.
#
# The 8 condition codes in `supported_floatccs` are directly supported by a
# `ucomiss` or `ucomisd` instruction. The remaining codes need legalization
# patterns.
# Equality needs an explicit `ord` test which checks the parity bit.
x86_expand.legalize(
a << insts.fcmp(floatcc.eq, x, y),
Rtl(
a1 << insts.fcmp(floatcc.ord, x, y),
a2 << insts.fcmp(floatcc.ueq, x, y),
a << insts.band(a1, a2)
))
x86_expand.legalize(
a << insts.fcmp(floatcc.ne, x, y),
Rtl(
a1 << insts.fcmp(floatcc.uno, x, y),
a2 << insts.fcmp(floatcc.one, x, y),
a << insts.bor(a1, a2)
))
# Inequalities that need to be reversed.
for cc, rev_cc in [
(floatcc.lt, floatcc.gt),
(floatcc.le, floatcc.ge),
(floatcc.ugt, floatcc.ult),
(floatcc.uge, floatcc.ule)]:
x86_expand.legalize(
a << insts.fcmp(cc, x, y),
Rtl(
a << insts.fcmp(rev_cc, y, x)
))
# We need to modify the CFG for min/max legalization.
x86_expand.custom_legalize(insts.fmin, 'expand_minmax')
x86_expand.custom_legalize(insts.fmax, 'expand_minmax')
# Conversions from unsigned need special handling.
x86_expand.custom_legalize(insts.fcvt_from_uint, 'expand_fcvt_from_uint')
# Conversions from float to int can trap and modify the control flow graph.
x86_expand.custom_legalize(insts.fcvt_to_sint, 'expand_fcvt_to_sint')
x86_expand.custom_legalize(insts.fcvt_to_uint, 'expand_fcvt_to_uint')
x86_expand.custom_legalize(insts.fcvt_to_sint_sat, 'expand_fcvt_to_sint_sat')
x86_expand.custom_legalize(insts.fcvt_to_uint_sat, 'expand_fcvt_to_uint_sat')
# Count leading and trailing zeroes, for baseline x86_64
c_minus_one = Var('c_minus_one')
c_thirty_one = Var('c_thirty_one')
c_thirty_two = Var('c_thirty_two')
c_sixty_three = Var('c_sixty_three')
c_sixty_four = Var('c_sixty_four')
index1 = Var('index1')
r2flags = Var('r2flags')
index2 = Var('index2')
x86_expand.legalize(
a << insts.clz.i64(x),
Rtl(
c_minus_one << insts.iconst(imm64(-1)),
c_sixty_three << insts.iconst(imm64(63)),
(index1, r2flags) << x86.bsr(x),
index2 << insts.selectif(intcc.eq, r2flags, c_minus_one, index1),
a << insts.isub(c_sixty_three, index2),
))
x86_expand.legalize(
a << insts.clz.i32(x),
Rtl(
c_minus_one << insts.iconst(imm64(-1)),
c_thirty_one << insts.iconst(imm64(31)),
(index1, r2flags) << x86.bsr(x),
index2 << insts.selectif(intcc.eq, r2flags, c_minus_one, index1),
a << insts.isub(c_thirty_one, index2),
))
x86_expand.legalize(
a << insts.ctz.i64(x),
Rtl(
c_sixty_four << insts.iconst(imm64(64)),
(index1, r2flags) << x86.bsf(x),
a << insts.selectif(intcc.eq, r2flags, c_sixty_four, index1),
))
x86_expand.legalize(
a << insts.ctz.i32(x),
Rtl(
c_thirty_two << insts.iconst(imm64(32)),
(index1, r2flags) << x86.bsf(x),
a << insts.selectif(intcc.eq, r2flags, c_thirty_two, index1),
))
# Population count for baseline x86_64
qv1 = Var('qv1')
qv3 = Var('qv3')
qv4 = Var('qv4')
qv5 = Var('qv5')
qv6 = Var('qv6')
qv7 = Var('qv7')
qv8 = Var('qv8')
qv9 = Var('qv9')
qv10 = Var('qv10')
qv11 = Var('qv11')
qv12 = Var('qv12')
qv13 = Var('qv13')
qv14 = Var('qv14')
qv15 = Var('qv15')
qv16 = Var('qv16')
qc77 = Var('qc77')
qc0F = Var('qc0F')
qc01 = Var('qc01')
x86_expand.legalize(
qv16 << insts.popcnt.i64(qv1),
Rtl(
qv3 << insts.ushr_imm(qv1, imm64(1)),
qc77 << insts.iconst(imm64(0x7777777777777777)),
qv4 << insts.band(qv3, qc77),
qv5 << insts.isub(qv1, qv4),
qv6 << insts.ushr_imm(qv4, imm64(1)),
qv7 << insts.band(qv6, qc77),
qv8 << insts.isub(qv5, qv7),
qv9 << insts.ushr_imm(qv7, imm64(1)),
qv10 << insts.band(qv9, qc77),
qv11 << insts.isub(qv8, qv10),
qv12 << insts.ushr_imm(qv11, imm64(4)),
qv13 << insts.iadd(qv11, qv12),
qc0F << insts.iconst(imm64(0x0F0F0F0F0F0F0F0F)),
qv14 << insts.band(qv13, qc0F),
qc01 << insts.iconst(imm64(0x0101010101010101)),
qv15 << insts.imul(qv14, qc01),
qv16 << insts.ushr_imm(qv15, imm64(56))
))
lv1 = Var('lv1')
lv3 = Var('lv3')
lv4 = Var('lv4')
lv5 = Var('lv5')
lv6 = Var('lv6')
lv7 = Var('lv7')
lv8 = Var('lv8')
lv9 = Var('lv9')
lv10 = Var('lv10')
lv11 = Var('lv11')
lv12 = Var('lv12')
lv13 = Var('lv13')
lv14 = Var('lv14')
lv15 = Var('lv15')
lv16 = Var('lv16')
lc77 = Var('lc77')
lc0F = Var('lc0F')
lc01 = Var('lc01')
x86_expand.legalize(
lv16 << insts.popcnt.i32(lv1),
Rtl(
lv3 << insts.ushr_imm(lv1, imm64(1)),
lc77 << insts.iconst(imm64(0x77777777)),
lv4 << insts.band(lv3, lc77),
lv5 << insts.isub(lv1, lv4),
lv6 << insts.ushr_imm(lv4, imm64(1)),
lv7 << insts.band(lv6, lc77),
lv8 << insts.isub(lv5, lv7),
lv9 << insts.ushr_imm(lv7, imm64(1)),
lv10 << insts.band(lv9, lc77),
lv11 << insts.isub(lv8, lv10),
lv12 << insts.ushr_imm(lv11, imm64(4)),
lv13 << insts.iadd(lv11, lv12),
lc0F << insts.iconst(imm64(0x0F0F0F0F)),
lv14 << insts.band(lv13, lc0F),
lc01 << insts.iconst(imm64(0x01010101)),
lv15 << insts.imul(lv14, lc01),
lv16 << insts.ushr_imm(lv15, imm64(24))
))

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,61 @@
"""
x86 register banks.
While the floating-point registers are straight-forward, the general purpose
register bank has a few quirks on x86. We have these encodings of the 8-bit
registers:
I32 I64 | 16b 32b 64b
000 AL AL | AX EAX RAX
001 CL CL | CX ECX RCX
010 DL DL | DX EDX RDX
011 BL BL | BX EBX RBX
100 AH SPL | SP ESP RSP
101 CH BPL | BP EBP RBP
110 DH SIL | SI ESI RSI
111 BH DIL | DI EDI RDI
Here, the I64 column refers to the registers you get with a REX prefix. Without
the REX prefix, you get the I32 registers.
The 8-bit registers are not that useful since WebAssembly only has i32 and i64
data types, and the H-registers even less so. Rather than trying to model the
H-registers accurately, we'll avoid using them in both I32 and I64 modes.
"""
from __future__ import absolute_import
from cdsl.registers import RegBank, RegClass, Stack
from .defs import ISA
IntRegs = RegBank(
'IntRegs', ISA,
'General purpose registers',
units=16, prefix='r',
names='rax rcx rdx rbx rsp rbp rsi rdi'.split())
FloatRegs = RegBank(
'FloatRegs', ISA,
'SSE floating point registers',
units=16, prefix='xmm')
FlagRegs = RegBank(
'FlagRegs', ISA,
'Flag registers',
units=1,
pressure_tracking=False,
names=['rflags'])
GPR = RegClass(IntRegs)
GPR8 = GPR[0:8]
ABCD = GPR[0:4]
FPR = RegClass(FloatRegs)
FPR8 = FPR[0:8]
FLAG = RegClass(FlagRegs)
# Constraints for stack operands.
# Stack operand with a 32-bit signed displacement from either RBP or RSP.
StackGPR32 = Stack(GPR)
StackFPR32 = Stack(FPR)
RegClass.extract_names(globals())

View File

@@ -0,0 +1,54 @@
"""
x86 settings.
"""
from __future__ import absolute_import
from cdsl.settings import SettingGroup, BoolSetting, Preset
from cdsl.predicates import And
import base.settings as shared
from .defs import ISA
ISA.settings = SettingGroup('x86', parent=shared.group)
# The has_* settings here correspond to CPUID bits.
# CPUID.01H:ECX
has_sse3 = BoolSetting("SSE3: CPUID.01H:ECX.SSE3[bit 0]")
has_ssse3 = BoolSetting("SSSE3: CPUID.01H:ECX.SSSE3[bit 9]")
has_sse41 = BoolSetting("SSE4.1: CPUID.01H:ECX.SSE4_1[bit 19]")
has_sse42 = BoolSetting("SSE4.2: CPUID.01H:ECX.SSE4_2[bit 20]")
has_popcnt = BoolSetting("POPCNT: CPUID.01H:ECX.POPCNT[bit 23]")
has_avx = BoolSetting("AVX: CPUID.01H:ECX.AVX[bit 28]")
# CPUID.(EAX=07H, ECX=0H):EBX
has_bmi1 = BoolSetting("BMI1: CPUID.(EAX=07H, ECX=0H):EBX.BMI1[bit 3]")
has_bmi2 = BoolSetting("BMI2: CPUID.(EAX=07H, ECX=0H):EBX.BMI2[bit 8]")
# CPUID.EAX=80000001H:ECX
has_lzcnt = BoolSetting("LZCNT: CPUID.EAX=80000001H:ECX.LZCNT[bit 5]")
# The use_* settings here are used to determine if a feature can be used.
use_sse41 = And(has_sse41)
use_sse42 = And(has_sse42, use_sse41)
use_popcnt = And(has_popcnt, has_sse42)
use_bmi1 = And(has_bmi1)
use_lzcnt = And(has_lzcnt)
# Presets corresponding to x86 CPUs.
baseline = Preset()
nehalem = Preset(
has_sse3, has_ssse3, has_sse41, has_sse42, has_popcnt)
haswell = Preset(nehalem, has_bmi1, has_bmi2, has_lzcnt)
broadwell = Preset(haswell)
skylake = Preset(broadwell)
cannonlake = Preset(skylake)
icelake = Preset(cannonlake)
znver1 = Preset(
has_sse3, has_ssse3, has_sse41, has_sse42, has_popcnt,
has_bmi1, has_bmi2, has_lzcnt)
ISA.settings.close(globals())