Check in the wasmstandalone code.

This is based on the code in https://github.com/denismerigoux/cretonne/commits/wasm2cretonne
before wasmstandalone was removed, with minor updates for the new library structure.
It is not yet updated for the latest cretonne API changes.
This commit is contained in:
Dan Gohman
2017-09-05 17:06:51 -07:00
parent 8f6957296e
commit d0fe50a2a8
679 changed files with 31 additions and 57859 deletions

1
docs/.gitignore vendored
View File

@@ -1 +0,0 @@
_build

View File

@@ -1,196 +0,0 @@
# Makefile for Sphinx documentation
#
# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = sphinx-build
SPHINXABUILD = sphinx-autobuild
PAPER =
BUILDDIR = _build
# User-friendly check for sphinx-build
ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
endif
# Internal variables.
PAPEROPT_a4 = -D latex_paper_size=a4
PAPEROPT_letter = -D latex_paper_size=letter
ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
# the i18n builder cannot share the environment and doctrees with the others
I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest coverage gettext
help:
@echo "Please use \`make <target>' where <target> is one of"
@echo " html to make standalone HTML files"
@echo " dirhtml to make HTML files named index.html in directories"
@echo " singlehtml to make a single large HTML file"
@echo " pickle to make pickle files"
@echo " json to make JSON files"
@echo " htmlhelp to make HTML files and a HTML help project"
@echo " qthelp to make HTML files and a qthelp project"
@echo " applehelp to make an Apple Help Book"
@echo " devhelp to make HTML files and a Devhelp project"
@echo " epub to make an epub"
@echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
@echo " latexpdf to make LaTeX files and run them through pdflatex"
@echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
@echo " text to make text files"
@echo " man to make manual pages"
@echo " texinfo to make Texinfo files"
@echo " info to make Texinfo files and run them through makeinfo"
@echo " gettext to make PO message catalogs"
@echo " changes to make an overview of all changed/added/deprecated items"
@echo " xml to make Docutils-native XML files"
@echo " pseudoxml to make pseudoxml-XML files for display purposes"
@echo " linkcheck to check all external links for integrity"
@echo " doctest to run all doctests embedded in the documentation (if enabled)"
@echo " coverage to run coverage check of the documentation (if enabled)"
clean:
rm -rf $(BUILDDIR)/*
html:
$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
autohtml: html
$(SPHINXABUILD) -z ../lib/cretonne/meta --ignore '.*.sw?' -b html -E $(ALLSPHINXOPTS) $(BUILDDIR)/html
dirhtml:
$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
@echo
@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
singlehtml:
$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
@echo
@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
pickle:
$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
@echo
@echo "Build finished; now you can process the pickle files."
json:
$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
@echo
@echo "Build finished; now you can process the JSON files."
htmlhelp:
$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
@echo
@echo "Build finished; now you can run HTML Help Workshop with the" \
".hhp project file in $(BUILDDIR)/htmlhelp."
qthelp:
$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
@echo
@echo "Build finished; now you can run "qcollectiongenerator" with the" \
".qhcp project file in $(BUILDDIR)/qthelp, like this:"
@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/cretonne.qhcp"
@echo "To view the help file:"
@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/cretonne.qhc"
applehelp:
$(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
@echo
@echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
@echo "N.B. You won't be able to view it unless you put it in" \
"~/Library/Documentation/Help or install it in your application" \
"bundle."
devhelp:
$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
@echo
@echo "Build finished."
@echo "To view the help file:"
@echo "# mkdir -p $$HOME/.local/share/devhelp/cretonne"
@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/cretonne"
@echo "# devhelp"
epub:
$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
@echo
@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
latex:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo
@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
@echo "Run \`make' in that directory to run these through (pdf)latex" \
"(use \`make latexpdf' here to do that automatically)."
latexpdf:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo "Running LaTeX files through pdflatex..."
$(MAKE) -C $(BUILDDIR)/latex all-pdf
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
latexpdfja:
$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
@echo "Running LaTeX files through platex and dvipdfmx..."
$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
text:
$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
@echo
@echo "Build finished. The text files are in $(BUILDDIR)/text."
man:
$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
@echo
@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
texinfo:
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
@echo
@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
@echo "Run \`make' in that directory to run these through makeinfo" \
"(use \`make info' here to do that automatically)."
info:
$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
@echo "Running Texinfo files through makeinfo..."
make -C $(BUILDDIR)/texinfo info
@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
gettext:
$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
@echo
@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
changes:
$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
@echo
@echo "The overview file is in $(BUILDDIR)/changes."
linkcheck:
$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
@echo
@echo "Link check complete; look for any errors in the above output " \
"or in $(BUILDDIR)/linkcheck/output.txt."
doctest:
$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
@echo "Testing of doctests in the sources finished, look at the " \
"results in $(BUILDDIR)/doctest/output.txt."
coverage:
$(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
@echo "Testing of coverage in the sources finished, look at the " \
"results in $(BUILDDIR)/coverage/python.txt."
xml:
$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
@echo
@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
pseudoxml:
$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
@echo
@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."

View File

@@ -1,199 +0,0 @@
*************************
Cretonne compared to LLVM
*************************
`LLVM <http://llvm.org>`_ is a collection of compiler components implemented as
a set of C++ libraries. It can be used to build both JIT compilers and static
compilers like `Clang <http://clang.llvm.org>`_, and it is deservedly very
popular. `Chris Lattner's chapter about LLVM
<http://www.aosabook.org/en/llvm.html>`_ in the `Architecture of Open Source
Applications <http://aosabook.org/en/index.html>`_ book gives an excellent
overview of the architecture and design of LLVM.
Cretonne and LLVM are superficially similar projects, so it is worth
highlighting some of the differences and similarities. Both projects:
- Use an ISA-agnostic input language in order to mostly abstract away the
differences between target instruction set architectures.
- Depend extensively on SSA form.
- Have both textual and in-memory forms of their primary intermediate language.
(LLVM also has a binary bitcode format; Cretonne doesn't.)
- Can target multiple ISAs.
- Can cross-compile by default without rebuilding the code generator.
Cretonne's scope is much smaller than that of LLVM. The classical three main
parts of a compiler are:
1. The language-dependent front end parses and type-checks the input program.
2. Common optimizations that are independent of both the input language and the
target ISA.
3. The code generator which depends strongly on the target ISA.
LLVM provides both common optimizations *and* a code generator. Cretonne only
provides the last part, the code generator. LLVM additionally provides
infrastructure for building assemblers and disassemblers. Cretonne does not
handle assembly at all---it only generates binary machine code.
Intermediate representations
============================
LLVM uses multiple intermediate representations as it translates a program to
binary machine code:
`LLVM IR <http://llvm.org/docs/LangRef.html>`_
This is the primary intermediate language which has textual, binary, and
in-memory representations. It serves two main purposes:
- An ISA-agnostic, stable(ish) input language that front ends can generate
easily.
- Intermediate representation for common mid-level optimizations. A large
library of code analysis and transformation passes operate on LLVM IR.
`SelectionDAG <http://llvm.org/docs/CodeGenerator.html#instruction-selection-section>`_
A graph-based representation of the code in a single basic block is used by
the instruction selector. It has both ISA-agnostic and ISA-specific
opcodes. These main passes are run on the SelectionDAG representation:
- Type legalization eliminates all value types that don't have a
representation in the target ISA registers.
- Operation legalization eliminates all opcodes that can't be mapped to
target ISA instructions.
- DAG-combine cleans up redundant code after the legalization passes.
- Instruction selection translates ISA-agnostic expressions to ISA-specific
instructions.
The SelectionDAG representation automatically eliminates common
subexpressions and dead code.
`MachineInstr <http://llvm.org/docs/CodeGenerator.html#machine-code-representation>`_
A linear representation of ISA-specific instructions that initially is in
SSA form, but it can also represent non-SSA form during and after register
allocation. Many low-level optimizations run on MI code. The most important
passes are:
- Scheduling.
- Register allocation.
`MC <http://llvm.org/docs/CodeGenerator.html#the-mc-layer>`_
MC serves as the output abstraction layer and is the basis for LLVM's
integrated assembler. It is used for:
- Branch relaxation.
- Emitting assembly or binary object code.
- Assemblers.
- Disassemblers.
There is an ongoing "global instruction selection" project to replace the
SelectionDAG representation with ISA-agnostic opcodes on the MachineInstr
representation. Some target ISAs have a fast instruction selector that can
translate simple code directly to MachineInstrs, bypassing SelectionDAG when
possible.
:doc:`Cretonne <langref>` uses a single intermediate language to cover these
levels of abstraction. This is possible in part because of Cretonne's smaller
scope.
- Cretonne does not provide assemblers and disassemblers, so it is not
necessary to be able to represent every weird instruction in an ISA. Only
those instructions that the code generator emits have a representation.
- Cretonne's opcodes are ISA-agnostic, but after legalization / instruction
selection, each instruction is annotated with an ISA-specific encoding which
represents a native instruction.
- SSA form is preserved throughout. After register allocation, each SSA value
is annotated with an assigned ISA register or stack slot.
The Cretonne intermediate language is similar to LLVM IR, but at a slightly
lower level of abstraction.
Program structure
-----------------
In LLVM IR, the largest representable unit is the *module* which corresponds
more or less to a C translation unit. It is a collection of functions and
global variables that may contain references to external symbols too.
In Cretonne IL, the largest representable unit is the *function*. This is so
that functions can easily be compiled in parallel without worrying about
references to shared data structures. Cretonne does not have any
inter-procedural optimizations like inlining.
An LLVM IR function is a graph of *basic blocks*. A Cretonne IL function is a
graph of *extended basic blocks* that may contain internal branch instructions.
The main difference is that an LLVM conditional branch instruction has two
target basic blocks---a true and a false edge. A Cretonne branch instruction
only has a single target and falls through to the next instruction when its
condition is false. The Cretonne representation is closer to how machine code
works; LLVM's representation is more abstract.
LLVM uses `phi instructions
<http://llvm.org/docs/LangRef.html#phi-instruction>`_ in its SSA
representation. Cretonne passes arguments to EBBs instead. The two
representations are equivalent, but the EBB arguments are better suited to
handle EBBs that may contain multiple branches to the same destination block
with different arguments. Passing arguments to an EBB looks a lot like passing
arguments to a function call, and the register allocator treats them very
similarly. Arguments are assigned to registers or stack locations.
Value types
-----------
:ref:`Cretonne's type system <value-types>` is mostly a subset of LLVM's type
system. It is less abstract and closer to the types that common ISA registers
can hold.
- Integer types are limited to powers of two from :cton:type:`i8` to
:cton:type:`i64`. LLVM can represent integer types of arbitrary bit width.
- Floating point types are limited to :cton:type:`f32` and :cton:type:`f64`
which is what WebAssembly provides. It is possible that 16-bit and 128-bit
types will be added in the future.
- Addresses are represented as integers---There are no Cretonne pointer types.
LLVM currently has rich pointer types that include the pointee type. It may
move to a simpler 'address' type in the future. Cretonne may add a single
address type too.
- SIMD vector types are limited to a power-of-two number of vector lanes up to
256. LLVM allows an arbitrary number of SIMD lanes.
- Cretonne has no aggregate types. LLVM has named and anonymous struct types as
well as array types.
Cretonne has multiple boolean types, whereas LLVM simply uses `i1`. The sized
Cretonne boolean types are used to represent SIMD vector masks like ``b32x4``
where each lane is either all 0 or all 1 bits.
Cretonne instructions and function calls can return multiple result values. LLVM
instead models this by returning a single value of an aggregate type.
Instruction set
---------------
LLVM has a small well-defined basic instruction set and a large number of
intrinsics, some of which are ISA-specific. Cretonne has a larger instruction
set and no intrinsics. Some Cretonne instructions are ISA-specific.
Since Cretonne instructions are used all the way until the binary machine code
is emitted, there are opcodes for every native instruction that can be
generated. There is a lot of overlap between different ISAs, so for example the
:cton:inst:`iadd_imm` instruction is used by every ISA that can add an
immediate integer to a register. A simple RISC ISA like RISC-V can be defined
with only shared instructions, while an Intel ISA needs a number of specific
instructions to model addressing modes.
Undefined behavior
==================
Cretonne does not generally exploit undefined behavior in its optimizations.
LLVM's mid-level optimizations do, but it should be noted that LLVM's low-level code
generator rarely needs to make use of undefined behavior either.
LLVM provides ``nsw`` and ``nuw`` flags for its arithmetic that invoke
undefined behavior on overflow. Cretonne does not provide this functionality.
Its arithmetic instructions either produce a value or a trap.
LLVM has an ``unreachable`` instruction which is used to indicate impossible
code paths. Cretonne only has an explicit :cton:inst:`trap` instruction.
Cretonne does make assumptions about aliasing. For example, it assumes that it
has full control of the stack objects in a function, and that they can only be
modified by function calls if their address have escaped. It is quite likely
that Cretonne will admit more detailed aliasing annotations on load/store
instructions in the future. When these annotations are incorrect, undefined
behavior ensues.

View File

@@ -1,137 +0,0 @@
# -*- coding: utf-8 -*-
#
# cretonne documentation build configuration file, created by
# sphinx-quickstart on Fri Jan 8 10:11:19 2016.
#
# This file is execfile()d with the current directory set to its
# containing dir.
#
# Note that not all possible configuration values are present in this
# autogenerated file.
#
# All configuration values have a default; values that are commented out
# serve to show the default.
from __future__ import absolute_import
import sys
import os
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
sys.path.insert(0, os.path.abspath('.'))
# Also add the meta directory to sys.path so autodoc can find the Cretonne meta
# language definitions.
sys.path.insert(0, os.path.abspath('../lib/cretonne/meta'))
# -- General configuration ------------------------------------------------
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
'sphinx.ext.autodoc',
'sphinx.ext.todo',
'sphinx.ext.mathjax',
'sphinx.ext.ifconfig',
'sphinx.ext.graphviz',
'sphinx.ext.inheritance_diagram',
'cton_domain',
'cton_lexer',
]
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
# source_suffix = ['.rst', '.md']
source_suffix = '.rst'
# The master toctree document.
master_doc = 'index'
# General information about the project.
project = u'cretonne'
copyright = u'2016, Cretonne Developers'
author = u'Cretonne Developers'
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
#
# The short X.Y version.
version = u'0.0'
# The full version, including alpha/beta/rc tags.
release = u'0.0'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
#
# This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases.
language = None
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
exclude_patterns = ['_build']
# The name of the Pygments (syntax highlighting) style to use.
pygments_style = 'sphinx'
# If true, `todo` and `todoList` produce output, else they produce nothing.
todo_include_todos = True
# -- Options for HTML output ----------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
html_theme = 'sphinx_rtd_theme'
# Output file base name for HTML help builder.
htmlhelp_basename = 'cretonnedoc'
# -- Options for LaTeX output ---------------------------------------------
latex_elements = {
}
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
(master_doc, 'cretonne.tex', u'cretonne Documentation',
author, 'manual'),
]
# -- Options for manual page output ---------------------------------------
# One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section).
man_pages = [
(master_doc, 'cretonne', u'cretonne Documentation',
[author], 1)
]
# -- Options for Texinfo output -------------------------------------------
# Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author,
# dir menu entry, description, category)
texinfo_documents = [
(master_doc, 'cretonne', u'cretonne Documentation',
author, 'cretonne', 'One line description of project.',
'Miscellaneous'),
]
# -- Options for Graphviz -------------------------------------------------
graphviz_output_format = 'svg'
inheritance_graph_attrs = dict(rankdir='TD')

View File

@@ -1,385 +0,0 @@
# -*- coding: utf-8 -*-
#
# Sphinx domain for documenting compiler intermediate languages.
#
# This defines a 'cton' Sphinx domain with the following directives and roles:
#
# .. cton::type:: type
# Document an IR type.
# .. cton:inst:: v1, v2 = inst op1, op2
# Document an IR instruction.
#
from __future__ import absolute_import
import re
from docutils import nodes
from docutils.parsers.rst import directives
from sphinx import addnodes
from sphinx.directives import ObjectDescription
from sphinx.domains import Domain, ObjType
from sphinx.locale import l_
from sphinx.roles import XRefRole
from sphinx.util.docfields import Field, GroupedField, TypedField
from sphinx.util.nodes import make_refnode
import sphinx.ext.autodoc
class CtonObject(ObjectDescription):
"""
Any kind of Cretonne IL object.
This is a shared base class for the different kinds of indexable objects
in the Cretonne IL reference.
"""
option_spec = {
'noindex': directives.flag,
'module': directives.unchanged,
'annotation': directives.unchanged,
}
def add_target_and_index(self, name, sig, signode):
"""
Add ``name`` the the index.
:param name: The object name returned by :func:`handle_signature`.
:param sig: The signature text.
:param signode: The output node.
"""
targetname = self.objtype + '-' + name
if targetname not in self.state.document.ids:
signode['names'].append(targetname)
signode['ids'].append(targetname)
signode['first'] = (not self.names)
self.state.document.note_explicit_target(signode)
inv = self.env.domaindata['cton']['objects']
if name in inv:
self.state_machine.reporter.warning(
'duplicate Cretonne object description of %s, ' % name +
'other instance in ' + self.env.doc2path(inv[name][0]),
line=self.lineno)
inv[name] = (self.env.docname, self.objtype)
indextext = self.get_index_text(name)
if indextext:
self.indexnode['entries'].append(('single', indextext,
targetname, '', None))
# Type variables are indicated as %T.
typevar = re.compile('(\%[A-Z])')
def parse_type(name, signode):
"""
Parse a type with embedded type vars and append to signode.
Return a a string that can be compiled into a regular expression matching
the type.
"""
re_str = ''
for part in typevar.split(name):
if part == '':
continue
if len(part) == 2 and part[0] == '%':
# This is a type parameter. Don't display the %, use emphasis
# instead.
part = part[1]
signode += nodes.emphasis(part, part)
re_str += r'\w+'
else:
signode += addnodes.desc_name(part, part)
re_str += re.escape(part)
return re_str
class CtonType(CtonObject):
"""A Cretonne IL type description."""
def handle_signature(self, sig, signode):
"""
Parse type signature in ``sig`` and append description to signode.
Return a global object name for ``add_target_and_index``.
"""
name = sig.strip()
parse_type(name, signode)
return name
def get_index_text(self, name):
return name + ' (IL type)'
sep_equal = re.compile('\s*=\s*')
sep_comma = re.compile('\s*,\s*')
def parse_params(s, signode):
for i, p in enumerate(sep_comma.split(s)):
if i != 0:
signode += nodes.Text(', ')
signode += nodes.emphasis(p, p)
class CtonInst(CtonObject):
"""A Cretonne IL instruction."""
doc_field_types = [
TypedField('argument', label=l_('Arguments'),
names=('in', 'arg'),
typerolename='type', typenames=('type',)),
TypedField('result', label=l_('Results'),
names=('out', 'result'),
typerolename='type', typenames=('type',)),
GroupedField(
'typevar', names=('typevar',), label=l_('Type Variables')),
GroupedField('flag', names=('flag',), label=l_('Flags')),
Field('resulttype', label=l_('Result type'), has_arg=False,
names=('rtype',)),
]
def handle_signature(self, sig, signode):
# Look for signatures like
#
# v1, v2 = foo op1, op2
# v1 = foo
# foo op1
parts = re.split(sep_equal, sig, 1)
if len(parts) == 2:
# Outgoing parameters.
parse_params(parts[0], signode)
signode += nodes.Text(' = ')
name = parts[1]
else:
name = parts[0]
# Parse 'name arg, arg'
parts = name.split(None, 1)
name = parts[0]
signode += addnodes.desc_name(name, name)
if len(parts) == 2:
# Incoming parameters.
signode += nodes.Text(' ')
parse_params(parts[1], signode)
return name
def get_index_text(self, name):
return name
class CtonInstGroup(CtonObject):
"""A Cretonne IL instruction group."""
class CretonneDomain(Domain):
"""Cretonne domain for intermediate language objects."""
name = 'cton'
label = 'Cretonne'
object_types = {
'type': ObjType(l_('type'), 'type'),
'inst': ObjType(l_('instruction'), 'inst')
}
directives = {
'type': CtonType,
'inst': CtonInst,
'instgroup': CtonInstGroup,
}
roles = {
'type': XRefRole(),
'inst': XRefRole(),
'instgroup': XRefRole(),
}
initial_data = {
'objects': {}, # fullname -> docname, objtype
}
def clear_doc(self, docname):
for fullname, (fn, _l) in list(self.data['objects'].items()):
if fn == docname:
del self.data['objects'][fullname]
def merge_domaindata(self, docnames, otherdata):
for fullname, (fn, objtype) in otherdata['objects'].items():
if fn in docnames:
self.data['objects'][fullname] = (fn, objtype)
def resolve_xref(self, env, fromdocname, builder, typ, target, node,
contnode):
objects = self.data['objects']
if target not in objects:
return None
obj = objects[target]
return make_refnode(builder, fromdocname, obj[0],
obj[1] + '-' + target, contnode, target)
def resolve_any_xref(self, env, fromdocname, builder, target,
node, contnode):
objects = self.data['objects']
if target not in objects:
return []
obj = objects[target]
return [('cton:' + self.role_for_objtype(obj[1]),
make_refnode(builder, fromdocname, obj[0],
obj[1] + '-' + target, contnode, target))]
class TypeDocumenter(sphinx.ext.autodoc.Documenter):
# Invoke with .. autoctontype::
objtype = 'ctontype'
# Convert into cton:type directives
domain = 'cton'
directivetype = 'type'
@classmethod
def can_document_member(cls, member, membername, isattr, parent):
return False
def resolve_name(self, modname, parents, path, base):
return 'base.types', [base]
def add_content(self, more_content, no_docstring=False):
super(TypeDocumenter, self).add_content(more_content, no_docstring)
sourcename = self.get_sourcename()
membytes = self.object.membytes
if membytes:
self.add_line(u':bytes: {}'.format(membytes), sourcename)
else:
self.add_line(u':bytes: Can\'t be stored in memory', sourcename)
class InstDocumenter(sphinx.ext.autodoc.Documenter):
# Invoke with .. autoinst::
objtype = 'inst'
# Convert into cton:inst directives
domain = 'cton'
directivetype = 'inst'
@classmethod
def can_document_member(cls, member, membername, isattr, parent):
return False
def resolve_name(self, modname, parents, path, base):
if path:
return path.rstrip('.'), [base]
else:
return 'base.instructions', [base]
def format_signature(self):
inst = self.object
sig = inst.name
if len(inst.outs) > 0:
sig = ', '.join([op.name for op in inst.outs]) + ' = ' + sig
if len(inst.ins) > 0:
op = inst.ins[0]
sig += ' ' + op.name
# If the first input is variable-args, this is 'return'. No parens.
if op.kind.name == 'variable_args':
sig += '...'.format(op.name)
for op in inst.ins[1:]:
# This is a call or branch with args in (...).
if op.kind.name == 'variable_args':
sig += '({}...)'.format(op.name)
else:
sig += ', ' + op.name
return sig
def add_directive_header(self, sig):
"""Add the directive header and options to the generated content."""
domain = getattr(self, 'domain', 'cton')
directive = getattr(self, 'directivetype', self.objtype)
sourcename = self.get_sourcename()
self.add_line(u'.. %s:%s:: %s' % (domain, directive, sig), sourcename)
if self.options.noindex:
self.add_line(u' :noindex:', sourcename)
def add_content(self, more_content, no_docstring=False):
super(InstDocumenter, self).add_content(more_content, no_docstring)
sourcename = self.get_sourcename()
inst = self.object
# Add inputs and outputs.
for op in inst.ins:
if op.is_value():
typ = op.typevar
else:
typ = op.kind
self.add_line(u':in {} {}: {}'.format(
typ, op.name, op.get_doc()), sourcename)
for op in inst.outs:
if op.is_value():
typ = op.typevar
else:
typ = op.kind
self.add_line(u':out {} {}: {}'.format(
typ, op.name, op.get_doc()), sourcename)
# Document type inference for polymorphic instructions.
if inst.is_polymorphic:
if inst.ctrl_typevar is not None:
if inst.use_typevar_operand:
tvopnum = inst.value_opnums[inst.format.typevar_operand]
self.add_line(
u':typevar {}: inferred from {}'
.format(
inst.ctrl_typevar.name,
inst.ins[tvopnum]),
sourcename)
else:
self.add_line(
u':typevar {}: explicitly provided'
.format(inst.ctrl_typevar.name),
sourcename)
for tv in inst.other_typevars:
self.add_line(
u':typevar {}: from input operand'.format(tv.name),
sourcename)
class InstGroupDocumenter(sphinx.ext.autodoc.ModuleLevelDocumenter):
# Invoke with .. autoinstgroup::
objtype = 'instgroup'
# Convert into cton:instgroup directives
domain = 'cton'
directivetype = 'instgroup'
@classmethod
def can_document_member(cls, member, membername, isattr, parent):
return False
def format_name(self):
return "{}.{}".format(self.modname, ".".join(self.objpath))
def add_content(self, more_content, no_docstring=False):
super(InstGroupDocumenter, self).add_content(
more_content, no_docstring)
sourcename = self.get_sourcename()
indexed = self.env.domaindata['cton']['objects']
names = [inst.name for inst in self.object.instructions]
names.sort()
for name in names:
if name in indexed:
self.add_line(u':cton:inst:`{}`'.format(name), sourcename)
else:
self.add_line(u'``{}``'.format(name), sourcename)
def setup(app):
app.add_domain(CretonneDomain)
app.add_autodocumenter(TypeDocumenter)
app.add_autodocumenter(InstDocumenter)
app.add_autodocumenter(InstGroupDocumenter)
return {'version': '0.1'}

View File

@@ -1,72 +0,0 @@
# -*- coding: utf-8 -*-
#
# Pygments lexer for Cretonne.
from __future__ import absolute_import
from pygments.lexer import RegexLexer, bygroups, words
from pygments.token import Comment, String, Keyword, Whitespace, Number, Name
from pygments.token import Operator, Punctuation, Text
def keywords(*args):
return words(args, prefix=r'\b', suffix=r'\b')
class CretonneLexer(RegexLexer):
name = 'Cretonne'
aliases = ['cton']
filenames = ['*.cton']
tokens = {
'root': [
# Test header lines.
(r'^(test|isa|set)(?:( +)([-\w]+)' +
r'(?:(=)(?:(\d+)|(yes|no|true|false|on|off)|(\w+)))?)*' +
r'( *)$',
bygroups(Keyword.Namespace, Whitespace, Name.Attribute,
Operator, Number.Integer, Keyword.Constant,
Name.Constant, Whitespace)),
# Comments with filecheck or other test directive.
(r'(; *)([a-z]+:)(.*?)$',
bygroups(Comment.Single, Comment.Special, Comment.Single)),
# Plain comments.
(r';.*?$', Comment.Single),
# Strings are in double quotes, support \xx escapes only.
(r'"([^"\\]+|\\[0-9a-fA-F]{2})*"', String),
# A naked function name following 'function' is also a string.
(r'\b(function)([ \t]+)(\w+)\b',
bygroups(Keyword, Whitespace, String.Symbol)),
# Numbers.
(r'[-+]?0[xX][0-9a-fA-F]+', Number.Hex),
(r'[-+]?0[xX][0-9a-fA-F]*\.[0-9a-fA-F]*([pP]\d+)?', Number.Hex),
(r'[-+]?(\d+\.\d+([eE]\d+)?|s?NaN|Inf)', Number.Float),
(r'[-+]?\d+', Number.Integer),
# Known attributes.
(keywords('uext', 'sext'), Name.Attribute),
# Well known value types.
(r'\b(b\d+|i\d+|f32|f64)(x\d+)?\b', Keyword.Type),
# v<nn> = value
# ss<nn> = stack slot
# jt<nn> = jump table
(r'(v|ss|jt)\d+', Name.Variable),
# ebb<nn> = extended basic block
(r'(ebb)\d+', Name.Label),
# Match instruction names in context.
(r'(=)( *)([a-z]\w*)',
bygroups(Operator, Whitespace, Name.Function)),
(r'^( *)([a-z]\w*\b)(?! *[,=])',
bygroups(Whitespace, Name.Function)),
# Other names: results and arguments
(r'[a-z]\w*', Name),
(r'->|=|:', Operator),
(r'[{}(),.]', Punctuation),
(r'[ \t]+', Text),
],
}
def setup(app):
"""Setup Sphinx extension."""
app.add_lexer('cton', CretonneLexer())
return {'version': '0.1'}

View File

@@ -1,8 +0,0 @@
float
average(const float *array, size_t count)
{
double sum = 0;
for (size_t i = 0; i < count; i++)
sum += array[i];
return sum / count;
}

View File

@@ -1,33 +0,0 @@
test verifier
function %average(i32, i32) -> f32 native {
ss1 = local 8 ; Stack slot for ``sum``.
ebb1(v1: i32, v2: i32):
v3 = f64const 0x0.0
stack_store v3, ss1
brz v2, ebb3 ; Handle count == 0.
v4 = iconst.i32 0
jump ebb2(v4)
ebb2(v5: i32):
v6 = imul_imm v5, 4
v7 = iadd v1, v6
v8 = heap_load.f32 v7 ; array[i]
v9 = fpromote.f64 v8
v10 = stack_load.f64 ss1
v11 = fadd v9, v10
stack_store v11, ss1
v12 = iadd_imm v5, 1
v13 = icmp ult v12, v2
brnz v13, ebb2(v12) ; Loop backedge.
v14 = stack_load.f64 ss1
v15 = fcvt_from_uint.f64 v2
v16 = fdiv v14, v15
v17 = fdemote.f32 v16
return v17
ebb3:
v100 = f32const +NaN
return v100
}

View File

@@ -1,25 +0,0 @@
Cretonne Code Generator
=======================
Contents:
.. toctree::
:maxdepth: 1
langref
metaref
testing
regalloc
compare-llvm
Indices and tables
==================
* :ref:`genindex`
* :ref:`modindex`
* :ref:`search`
Todo list
=========
.. todolist::

View File

@@ -1,924 +0,0 @@
***************************
Cretonne Language Reference
***************************
.. default-domain:: cton
.. highlight:: cton
The Cretonne intermediate language (:term:`IL`) has two equivalent
representations: an *in-memory data structure* that the code generator library
is using, and a *text format* which is used for test cases and debug output.
Files containing Cretonne textual IL have the ``.cton`` filename extension.
This reference uses the text format to describe IL semantics but glosses over
the finer details of the lexical and syntactic structure of the format.
Overall structure
=================
Cretonne compiles functions independently. A ``.cton`` IL file may contain
multiple functions, and the programmatic API can create multiple function
handles at the same time, but the functions don't share any data or reference
each other directly.
This is a simple C function that computes the average of an array of floats:
.. literalinclude:: example.c
:language: c
Here is the same function compiled into Cretonne IL:
.. literalinclude:: example.cton
:language: cton
:lines: 2-
The first line of a function definition provides the function *name* and
the :term:`function signature` which declares the argument and return types.
Then follows the :term:`function preamble` which declares a number of entities
that can be referenced inside the function. In the example above, the preamble
declares a single local variable, ``ss1``.
After the preamble follows the :term:`function body` which consists of
:term:`extended basic block`\s (EBBs), the first of which is the
:term:`entry block`. Every EBB ends with a :term:`terminator instruction`, so
execution can never fall through to the next EBB without an explicit branch.
A ``.cton`` file consists of a sequence of independent function definitions:
.. productionlist::
function_list : { function }
function : function_spec "{" preamble function_body "}"
function_spec : "function" function_name signature
preamble : { preamble_decl }
function_body : { extended_basic_block }
Static single assignment form
-----------------------------
The instructions in the function body use and produce *values* in SSA form. This
means that every value is defined exactly once, and every use of a value must be
dominated by the definition.
Cretonne does not have phi instructions but uses *EBB arguments* instead. An EBB
can be defined with a list of typed arguments. Whenever control is transferred
to the EBB, values for the arguments must be provided. When entering a function,
the incoming function arguments are passed as arguments to the entry EBB.
Instructions define zero, one, or more result values. All SSA values are either
EBB arguments or instruction results.
In the example above, the loop induction variable ``i`` is represented as three
SSA values: In the entry block, ``v4`` is the initial value. In the loop block
``ebb2``, the EBB argument ``v5`` represents the value of the induction
variable during each iteration. Finally, ``v12`` is computed as the induction
variable value for the next iteration.
It can be difficult to generate correct SSA form if the program being converted
into Cretonne :term:`IL` contains multiple assignments to the same variables.
Such variables can be presented to Cretonne as :term:`stack slot`\s instead.
Stack slots are accessed with the :inst:`stack_store` and :inst:`stack_load`
instructions which behave more like variable accesses in a typical programming
language. Cretonne can perform the necessary data-flow analysis to convert stack
slots to SSA form.
.. _value-types:
Value types
===========
All SSA values have a type which determines the size and shape (for SIMD
vectors) of the value. Many instructions are polymorphic -- they can operate on
different types.
Boolean types
-------------
Boolean values are either true or false. While this only requires a single bit
to represent, more bits are often used when holding a boolean value in a
register or in memory. The :type:`b1` type represents an abstract boolean
value. It can only exist as an SSA value, it can't be stored in memory or
converted to another type. The larger boolean types can be stored in memory.
.. todo:: Clarify the representation of larger boolean types.
The multi-bit boolean types can be interpreted in different ways. We could
declare that zero means false and non-zero means true. This may require
unwanted normalization code in some places.
We could specify a fixed encoding like all ones for true. This would then
lead to undefined behavior if untrusted code uses the multibit booleans
incorrectly.
Something like this:
- External code is not allowed to load/store multi-bit booleans or
otherwise expose the representation.
- Each target specifies the exact representation of a multi-bit boolean.
.. autoctontype:: b1
.. autoctontype:: b8
.. autoctontype:: b16
.. autoctontype:: b32
.. autoctontype:: b64
Integer types
-------------
Integer values have a fixed size and can be interpreted as either signed or
unsigned. Some instructions will interpret an operand as a signed or unsigned
number, others don't care.
.. autoctontype:: i8
.. autoctontype:: i16
.. autoctontype:: i32
.. autoctontype:: i64
Floating point types
--------------------
The floating point types have the IEEE semantics that are supported by most
hardware. There is no support for higher-precision types like quads or
double-double formats.
.. autoctontype:: f32
.. autoctontype:: f64
SIMD vector types
-----------------
A SIMD vector type represents a vector of values from one of the scalar types
(boolean, integer, and floating point). Each scalar value in a SIMD type is
called a *lane*. The number of lanes must be a power of two in the range 2-256.
.. type:: i%Bx%N
A SIMD vector of integers. The lane type :type:`iB` is one of the integer
types :type:`i8` ... :type:`i64`.
Some concrete integer vector types are :type:`i32x4`, :type:`i64x8`, and
:type:`i16x4`.
The size of a SIMD integer vector in memory is :math:`N B\over 8` bytes.
.. type:: f32x%N
A SIMD vector of single precision floating point numbers.
Some concrete :type:`f32` vector types are: :type:`f32x2`, :type:`f32x4`,
and :type:`f32x8`.
The size of a :type:`f32` vector in memory is :math:`4N` bytes.
.. type:: f64x%N
A SIMD vector of double precision floating point numbers.
Some concrete :type:`f64` vector types are: :type:`f64x2`, :type:`f64x4`,
and :type:`f64x8`.
The size of a :type:`f64` vector in memory is :math:`8N` bytes.
.. type:: b1x%N
A boolean SIMD vector.
Boolean vectors are used when comparing SIMD vectors. For example,
comparing two :type:`i32x4` values would produce a :type:`b1x4` result.
Like the :type:`b1` type, a boolean vector cannot be stored in memory.
Pseudo-types and type classes
-----------------------------
These are not concrete types, but convenient names uses to refer to real types
in this reference.
.. type:: iPtr
A Pointer-sized integer.
This is either :type:`i32`, or :type:`i64`, depending on whether the target
platform has 32-bit or 64-bit pointers.
.. type:: iB
Any of the scalar integer types :type:`i8` -- :type:`i64`.
.. type:: Int
Any scalar *or vector* integer type: :type:`iB` or :type:`iBxN`.
.. type:: fB
Either of the floating point scalar types: :type:`f32` or :type:`f64`.
.. type:: Float
Any scalar *or vector* floating point type: :type:`fB` or :type:`fBxN`.
.. type:: %Tx%N
Any SIMD vector type.
.. type:: Mem
Any type that can be stored in memory: :type:`Int` or :type:`Float`.
.. type:: Logic
Either :type:`b1` or :type:`b1xN`.
.. type:: Testable
Either :type:`b1` or :type:`iN`.
Immediate operand types
-----------------------
These types are not part of the normal SSA type system. They are used to
indicate the different kinds of immediate operands on an instruction.
.. type:: imm64
A 64-bit immediate integer. The value of this operand is interpreted as a
signed two's complement integer. Instruction encodings may limit the valid
range.
In the textual format, :type:`imm64` immediates appear as decimal or
hexadecimal literals using the same syntax as C.
.. type:: offset32
A signed 32-bit immediate address offset.
In the textual format, :type:`offset32` immediates always have an explicit
sign, and a 0 offset may be omitted.
.. type:: ieee32
A 32-bit immediate floating point number in the IEEE 754-2008 binary32
interchange format. All bit patterns are allowed.
.. type:: ieee64
A 64-bit immediate floating point number in the IEEE 754-2008 binary64
interchange format. All bit patterns are allowed.
.. type:: bool
A boolean immediate value, either false or true.
In the textual format, :type:`bool` immediates appear as 'false'
and 'true'.
.. type:: intcc
An integer condition code. See the :inst:`icmp` instruction for details.
.. type:: floatcc
A floating point condition code. See the :inst:`fcmp` instruction for details.
The two IEEE floating point immediate types :type:`ieee32` and :type:`ieee64`
are displayed as hexadecimal floating point literals in the textual :term:`IL`
format. Decimal floating point literals are not allowed because some computer
systems can round differently when converting to binary. The hexadecimal
floating point format is mostly the same as the one used by C99, but extended
to represent all NaN bit patterns:
Normal numbers
Compatible with C99: ``-0x1.Tpe`` where ``T`` are the trailing
significand bits encoded as hexadecimal, and ``e`` is the unbiased exponent
as a decimal number. :type:`ieee32` has 23 trailing significand bits. They
are padded with an extra LSB to produce 6 hexadecimal digits. This is not
necessary for :type:`ieee64` which has 52 trailing significand bits
forming 13 hexadecimal digits with no padding.
Zeros
Positive and negative zero are displayed as ``0.0`` and ``-0.0`` respectively.
Subnormal numbers
Compatible with C99: ``-0x0.Tpemin`` where ``T`` are the trailing
significand bits encoded as hexadecimal, and ``emin`` is the minimum exponent
as a decimal number.
Infinities
Either ``-Inf`` or ``Inf``.
Quiet NaNs
Quiet NaNs have the MSB of the trailing significand set. If the remaining
bits of the trailing significand are all zero, the value is displayed as
``-NaN`` or ``NaN``. Otherwise, ``-NaN:0xT`` where ``T`` are the trailing
significand bits encoded as hexadecimal.
Signaling NaNs
Displayed as ``-sNaN:0xT``.
Control flow
============
Branches transfer control to a new EBB and provide values for the target EBB's
arguments, if it has any. Conditional branches only take the branch if their
condition is satisfied, otherwise execution continues at the following
instruction in the EBB.
.. autoinst:: jump
.. autoinst:: fallthrough
.. autoinst:: brz
.. autoinst:: brnz
.. autoinst:: br_icmp
.. autoinst:: br_table
.. inst:: JT = jump_table EBB0, EBB1, ..., EBBn
Declare a jump table in the :term:`function preamble`.
This declares a jump table for use by the :inst:`br_table` indirect branch
instruction. Entries in the table are either EBB names, or ``0`` which
indicates an absent entry.
The EBBs listed must belong to the current function, and they can't have
any arguments.
:arg EBB0: Target EBB when ``x = 0``.
:arg EBB1: Target EBB when ``x = 1``.
:arg EBBn: Target EBB when ``x = n``.
:result: A jump table identifier. (Not an SSA value).
Traps stop the program because something went wrong. The exact behavior depends
on the target instruction set architecture and operating system. There are
explicit trap instructions defined below, but some instructions may also cause
traps for certain input value. For example, :inst:`udiv` traps when the divisor
is zero.
.. autoinst:: trap
.. autoinst:: trapz
.. autoinst:: trapnz
Function calls
==============
A function call needs a target function and a :term:`function signature`. The
target function may be determined dynamically at runtime, but the signature
must be known when the function call is compiled. The function signature
describes how to call the function, including arguments, return values, and the
calling convention:
.. productionlist::
signature : "(" [arglist] ")" ["->" retlist] [call_conv]
arglist : arg { "," arg }
retlist : arglist
arg : type [argext] [argspecial]
argext : "uext" | "sext"
argspecial: "sret" | "link" | "fp" | "csr"
callconv : `string`
Arguments and return values have flags whose meaning is mostly target
dependent. They make it possible to call native functions on the target
platform. When calling other Cretonne functions, the flags are not necessary.
Functions that are called directly must be declared in the :term:`function
preamble`:
.. inst:: FN = function NAME signature
Declare a function so it can be called directly.
:arg NAME: Name of the function, passed to the linker for resolution.
:arg signature: Function signature. See below.
:result FN: A function identifier that can be used with :inst:`call`.
.. autoinst:: call
.. autoinst:: x_return
This simple example illustrates direct function calls and signatures::
function %gcd(i32 uext, i32 uext) -> i32 uext "C" {
fn1 = function %divmod(i32 uext, i32 uext) -> i32 uext, i32 uext
ebb1(v1: i32, v2: i32):
brz v2, ebb2
v3, v4 = call fn1(v1, v2)
br ebb1(v2, v4)
ebb2:
return v1
}
Indirect function calls use a signature declared in the preamble.
.. autoinst:: call_indirect
.. todo:: Define safe indirect function calls.
The :inst:`call_indirect` instruction is dangerous to use in a sandboxed
environment since it is not easy to verify the callee address.
We need a table-driven indirect call instruction, similar to
:inst:`br_table`.
Memory
======
Cretonne provides fully general :inst:`load` and :inst:`store` instructions for
accessing memory. However, it can be very complicated to verify the safety of
general loads and stores when compiling code for a sandboxed environment, so
Cretonne also provides more restricted memory operations that are always safe.
.. autoinst:: load
.. autoinst:: store
Loads and stores are *misaligned* if the resultant address is not a multiple of
the expected alignment. Depending on the target architecture, misaligned memory
accesses may trap, or they may work. Sometimes, operating systems catch
alignment traps and emulate the misaligned memory access.
Extending loads and truncating stores
-------------------------------------
Most ISAs provide instructions that load an integer value smaller than a register
and extends it to the width of the register. Similarly, store instructions that
only write the low bits of an integer register are common.
Cretonne provides extending loads and truncation stores for 8, 16, and 32-bit
memory accesses.
.. autoinst:: uload8
.. autoinst:: sload8
.. autoinst:: istore8
.. autoinst:: uload16
.. autoinst:: sload16
.. autoinst:: istore16
.. autoinst:: uload32
.. autoinst:: sload32
.. autoinst:: istore32
Local variables
---------------
One set of restricted memory operations access the current function's stack
frame. The stack frame is divided into fixed-size stack slots that are
allocated in the :term:`function preamble`. Stack slots are not typed, they
simply represent a contiguous sequence of bytes in the stack frame.
.. inst:: SS = local Bytes, Flags...
Allocate a stack slot for a local variable in the preamble.
If no alignment is specified, Cretonne will pick an appropriate alignment
for the stack slot based on its size and access patterns.
:arg Bytes: Stack slot size on bytes.
:flag align(N): Request at least N bytes alignment.
:result SS: Stack slot index.
.. autoinst:: stack_load
.. autoinst:: stack_store
The dedicated stack access instructions are easy for the compiler to reason
about because stack slots and offsets are fixed at compile time. For example,
the alignment of these stack memory accesses can be inferred from the offsets
and stack slot alignments.
It can be necessary to escape from the safety of the restricted instructions by
taking the address of a stack slot.
.. autoinst:: stack_addr
The :inst:`stack_addr` instruction can be used to macro-expand the stack access
instructions before instruction selection::
v1 = stack_load.f64 ss3, 16
; Expands to:
v9 = stack_addr ss3, 16
v1 = load.f64 v9
Heaps
-----
Code compiled from WebAssembly or asm.js runs in a sandbox where it can't access
all process memory. Instead, it is given a small set of memory areas to work
in, and all accesses are bounds checked. Cretonne models this through the
concept of *heaps*.
A heap is declared in the function preamble and can be accessed with restricted
instructions that trap on out-of-bounds accesses. Heap addresses can be smaller
than the native pointer size, for example unsigned :type:`i32` offsets on a
64-bit architecture.
.. inst:: H = heap Name
Declare a heap in the function preamble.
This doesn't allocate memory, it just retrieves a handle to a sandbox from
the runtime environment.
:arg Name: String identifying the heap in the runtime environment.
:result H: Heap identifier.
.. autoinst:: heap_load
.. autoinst:: heap_store
When optimizing heap accesses, Cretonne may separate the heap bounds checking
and address computations from the memory accesses.
.. autoinst:: heap_addr
A small example using heaps::
function %vdup(i32, i32) {
h1 = heap "main"
ebb1(v1: i32, v2: i32):
v3 = heap_load.i32x4 h1, v1, 0
v4 = heap_addr h1, v2, 32 ; Shared range check for two stores.
store v3, v4, 0
store v3, v4, 16
return
}
The final expansion of the :inst:`heap_addr` range check and address conversion
depends on the runtime environment.
Operations
==========
The remaining instruction set is mostly arithmetic.
A few instructions have variants that take immediate operands (e.g.,
:inst:`band` / :inst:`band_imm`), but in general an instruction is required to
load a constant into an SSA value.
.. autoinst:: select
Constant materialization
------------------------
.. autoinst:: iconst
.. autoinst:: f32const
.. autoinst:: f64const
.. autoinst:: bconst
Live range splitting
--------------------
Cretonne's register allocator assigns each SSA value to a register or a spill
slot on the stack for its entire live range. Since the live range of an SSA
value can be quite large, it is sometimes beneficial to split the live range
into smaller parts.
A live range is split by creating new SSA values that are copies or the
original value or each other. The copies are created by inserting :inst:`copy`,
:inst:`spill`, or :inst:`fill` instructions, depending on whether the values
are assigned to registers or stack slots.
This approach permits SSA form to be preserved throughout the register
allocation pass and beyond.
.. autoinst:: copy
.. autoinst:: spill
.. autoinst:: fill
Register values can be temporarily diverted to other registers by the
:inst:`regmove` instruction.
.. autoinst:: regmove
Vector operations
-----------------
.. autoinst:: vsplit
.. autoinst:: vconcat
.. autoinst:: vselect
.. autoinst:: splat
.. autoinst:: insertlane
.. autoinst:: extractlane
Integer operations
------------------
.. autoinst:: icmp
.. autoinst:: icmp_imm
.. autoinst:: iadd
.. autoinst:: iadd_imm
.. autoinst:: iadd_cin
.. autoinst:: iadd_cout
.. autoinst:: iadd_carry
.. autoinst:: isub
.. autoinst:: irsub_imm
.. autoinst:: isub_bin
.. autoinst:: isub_bout
.. autoinst:: isub_borrow
.. autoinst:: imul
.. autoinst:: imul_imm
.. todo:: Larger multiplication results.
For example, ``smulx`` which multiplies :type:`i32` operands to produce a
:type:`i64` result. Alternatively, ``smulhi`` and ``smullo`` pairs.
.. autoinst:: udiv
.. autoinst:: udiv_imm
.. autoinst:: sdiv
.. autoinst:: sdiv_imm
.. autoinst:: urem
.. autoinst:: urem_imm
.. autoinst:: srem
.. autoinst:: srem_imm
.. todo:: Minimum / maximum.
NEON has ``smin``, ``smax``, ``umin``, and ``umax`` instructions. We should
replicate those for both scalar and vector integer types. Even if the
target ISA doesn't have scalar operations, these are good pattern matching
targets.
.. todo:: Saturating arithmetic.
Mostly for SIMD use, but again these are good patterns for contraction.
Something like ``usatadd``, ``usatsub``, ``ssatadd``, and ``ssatsub`` is a
good start.
Bitwise operations
------------------
The bitwise operations and operate on any value type: Integers, floating point
numbers, and booleans. When operating on integer or floating point types, the
bitwise operations are working on the binary representation of the values. When
operating on boolean values, the bitwise operations work as logical operators.
.. autoinst:: band
.. autoinst:: band_imm
.. autoinst:: bor
.. autoinst:: bor_imm
.. autoinst:: bxor
.. autoinst:: bxor_imm
.. autoinst:: bnot
.. autoinst:: band_not
.. autoinst:: bor_not
.. autoinst:: bxor_not
The shift and rotate operations only work on integer types (scalar and vector).
The shift amount does not have to be the same type as the value being shifted.
Only the low `B` bits of the shift amount is significant.
When operating on an integer vector type, the shift amount is still a scalar
type, and all the lanes are shifted the same amount. The shift amount is masked
to the number of bits in a *lane*, not the full size of the vector type.
.. autoinst:: rotl
.. autoinst:: rotl_imm
.. autoinst:: rotr
.. autoinst:: rotr_imm
.. autoinst:: ishl
.. autoinst:: ishl_imm
.. autoinst:: ushr
.. autoinst:: ushr_imm
.. autoinst:: sshr
.. autoinst:: sshr_imm
The bit-counting instructions below are scalar only.
.. autoinst:: clz
.. autoinst:: cls
.. autoinst:: ctz
.. autoinst:: popcnt
Floating point operations
-------------------------
These operations generally follow IEEE 754-2008 semantics.
.. autoinst:: fcmp
.. autoinst:: fadd
.. autoinst:: fsub
.. autoinst:: fmul
.. autoinst:: fdiv
.. autoinst:: sqrt
.. autoinst:: fma
Sign bit manipulations
~~~~~~~~~~~~~~~~~~~~~~
The sign manipulating instructions work as bitwise operations, so they don't
have special behavior for signaling NaN operands. The exponent and trailing
significand bits are always preserved.
.. autoinst:: fneg
.. autoinst:: fabs
.. autoinst:: fcopysign
Minimum and maximum
~~~~~~~~~~~~~~~~~~~
These instructions return the larger or smaller of their operands. They differ
in their handling of quiet NaN inputs. Note that signaling NaN operands always
cause a NaN result.
When comparing zeroes, these instructions behave as if :math:`-0.0 < 0.0`.
.. autoinst:: fmin
.. autoinst:: fminnum
.. autoinst:: fmax
.. autoinst:: fmaxnum
Rounding
~~~~~~~~
These instructions round their argument to a nearby integral value, still
represented as a floating point number.
.. autoinst:: ceil
.. autoinst:: floor
.. autoinst:: trunc
.. autoinst:: nearest
Conversion operations
---------------------
.. autoinst:: bitcast
.. autoinst:: breduce
.. autoinst:: bextend
.. autoinst:: bint
.. autoinst:: bmask
.. autoinst:: ireduce
.. autoinst:: uextend
.. autoinst:: sextend
.. autoinst:: fpromote
.. autoinst:: fdemote
.. autoinst:: fcvt_to_uint
.. autoinst:: fcvt_to_sint
.. autoinst:: fcvt_from_uint
.. autoinst:: fcvt_from_sint
Legalization operations
-----------------------
These instructions are used as helpers when legalizing types and operations for
the target ISA.
.. autoinst:: isplit
.. autoinst:: iconcat
ISA-specific instructions
=========================
Target ISAs can define supplemental instructions that do not make sense to
support generally.
Intel
-----
Instructions that can only be used by the Intel target ISA.
.. autoinst:: isa.intel.instructions.sdivmodx
.. autoinst:: isa.intel.instructions.udivmodx
Instruction groups
==================
All of the shared instructions are part of the :instgroup:`base` instruction
group.
.. autoinstgroup:: base.instructions.GROUP
Target ISAs may define further instructions in their own instruction groups:
.. autoinstgroup:: isa.intel.instructions.GROUP
Implementation limits
=====================
Cretonne's intermediate representation imposes some limits on the size of
functions and the number of entities allowed. If these limits are exceeded, the
implementation will panic.
Number of instructions in a function
At most :math:`2^{31} - 1`.
Number of EBBs in a function
At most :math:`2^{31} - 1`.
Every EBB needs at least a terminator instruction anyway.
Number of secondary values in a function
At most :math:`2^{31} - 1`.
Secondary values are any SSA values that are not the first result of an
instruction.
Other entities declared in the preamble
At most :math:`2^{32} - 1`.
This covers things like stack slots, jump tables, external functions, and
function signatures, etc.
Number of arguments to an EBB
At most :math:`2^{16}`.
Number of arguments to a function
At most :math:`2^{16}`.
This follows from the limit on arguments to the entry EBB. Note that
Cretonne may add a handful of ABI register arguments as function signatures
are lowered. This is for representing things like the link register, the
incoming frame pointer, and callee-saved registers that are saved in the
prologue.
Size of function call arguments on the stack
At most :math:`2^{32} - 1` bytes.
This is probably not possible to achieve given the limit on the number of
arguments, except by requiring extremely large offsets for stack arguments.
Glossary
========
.. glossary::
intermediate language
IL
The language used to describe functions to Cretonne. This reference
describes the syntax and semantics of the Cretonne IL. The IL has two
forms: Textual and an in-memory intermediate representation
(:term:`IR`).
intermediate representation
IR
The in-memory representation of :term:`IL`. The data structures
Cretonne uses to represent a program internally are called the
intermediate representation. Cretonne's IR can be converted to text
losslessly.
function signature
A function signature describes how to call a function. It consists of:
- The calling convention.
- The number of arguments and return values. (Functions can return
multiple values.)
- Type and flags of each argument.
- Type and flags of each return value.
Not all function attributes are part of the signature. For example, a
function that never returns could be marked as ``noreturn``, but that
is not necessary to know when calling it, so it is just an attribute,
and not part of the signature.
function preamble
A list of declarations of entities that are used by the function body.
Some of the entities that can be declared in the preamble are:
- Local variables.
- Functions that are called directly.
- Function signatures for indirect function calls.
- Function flags and attributes that are not part of the signature.
function body
The extended basic blocks which contain all the executable code in a
function. The function body follows the function preamble.
basic block
A maximal sequence of instructions that can only be entered from the
top, and that contains no branch or terminator instructions except for
the last instruction.
extended basic block
EBB
A maximal sequence of instructions that can only be entered from the
top, and that contains no :term:`terminator instruction`\s except for
the last one. An EBB can contain conditional branches that can fall
through to the following instructions in the block, but only the first
instruction in the EBB can be a branch target.
The last instruction in an EBB must be a :term:`terminator instruction`,
so execution cannot flow through to the next EBB in the function. (But
there may be a branch to the next EBB.)
Note that some textbooks define an EBB as a maximal *subtree* in the
control flow graph where only the root can be a join node. This
definition is not equivalent to Cretonne EBBs.
terminator instruction
A control flow instruction that unconditionally directs the flow of
execution somewhere else. Execution never continues at the instruction
following a terminator instruction.
The basic terminator instructions are :inst:`br`, :inst:`return`, and
:inst:`trap`. Conditional branches and instructions that trap
conditionally are not terminator instructions.
entry block
The :term:`EBB` that is executed first in a function. Currently, a
Cretonne function must have exactly one entry block which must be the
first block in the function. The types of the entry block arguments must
match the types of arguments in the function signature.
stack slot
A fixed size memory allocation in the current function's activation
frame. Also called a local variable.

View File

@@ -1,263 +0,0 @@
@ECHO OFF
REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set BUILDDIR=_build
set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
set I18NSPHINXOPTS=%SPHINXOPTS% .
if NOT "%PAPER%" == "" (
set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
)
if "%1" == "" goto help
if "%1" == "help" (
:help
echo.Please use `make ^<target^>` where ^<target^> is one of
echo. html to make standalone HTML files
echo. dirhtml to make HTML files named index.html in directories
echo. singlehtml to make a single large HTML file
echo. pickle to make pickle files
echo. json to make JSON files
echo. htmlhelp to make HTML files and a HTML help project
echo. qthelp to make HTML files and a qthelp project
echo. devhelp to make HTML files and a Devhelp project
echo. epub to make an epub
echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter
echo. text to make text files
echo. man to make manual pages
echo. texinfo to make Texinfo files
echo. gettext to make PO message catalogs
echo. changes to make an overview over all changed/added/deprecated items
echo. xml to make Docutils-native XML files
echo. pseudoxml to make pseudoxml-XML files for display purposes
echo. linkcheck to check all external links for integrity
echo. doctest to run all doctests embedded in the documentation if enabled
echo. coverage to run coverage check of the documentation if enabled
goto end
)
if "%1" == "clean" (
for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
del /q /s %BUILDDIR%\*
goto end
)
REM Check if sphinx-build is available and fallback to Python version if any
%SPHINXBUILD% 1>NUL 2>NUL
if errorlevel 9009 goto sphinx_python
goto sphinx_ok
:sphinx_python
set SPHINXBUILD=python -m sphinx.__init__
%SPHINXBUILD% 2> nul
if errorlevel 9009 (
echo.
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
echo.installed, then set the SPHINXBUILD environment variable to point
echo.to the full path of the 'sphinx-build' executable. Alternatively you
echo.may add the Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
echo.http://sphinx-doc.org/
exit /b 1
)
:sphinx_ok
if "%1" == "html" (
%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The HTML pages are in %BUILDDIR%/html.
goto end
)
if "%1" == "dirhtml" (
%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
goto end
)
if "%1" == "singlehtml" (
%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
goto end
)
if "%1" == "pickle" (
%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
if errorlevel 1 exit /b 1
echo.
echo.Build finished; now you can process the pickle files.
goto end
)
if "%1" == "json" (
%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
if errorlevel 1 exit /b 1
echo.
echo.Build finished; now you can process the JSON files.
goto end
)
if "%1" == "htmlhelp" (
%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
if errorlevel 1 exit /b 1
echo.
echo.Build finished; now you can run HTML Help Workshop with the ^
.hhp project file in %BUILDDIR%/htmlhelp.
goto end
)
if "%1" == "qthelp" (
%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
if errorlevel 1 exit /b 1
echo.
echo.Build finished; now you can run "qcollectiongenerator" with the ^
.qhcp project file in %BUILDDIR%/qthelp, like this:
echo.^> qcollectiongenerator %BUILDDIR%\qthelp\cretonne.qhcp
echo.To view the help file:
echo.^> assistant -collectionFile %BUILDDIR%\qthelp\cretonne.ghc
goto end
)
if "%1" == "devhelp" (
%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
if errorlevel 1 exit /b 1
echo.
echo.Build finished.
goto end
)
if "%1" == "epub" (
%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The epub file is in %BUILDDIR%/epub.
goto end
)
if "%1" == "latex" (
%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
if errorlevel 1 exit /b 1
echo.
echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
goto end
)
if "%1" == "latexpdf" (
%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
cd %BUILDDIR%/latex
make all-pdf
cd %~dp0
echo.
echo.Build finished; the PDF files are in %BUILDDIR%/latex.
goto end
)
if "%1" == "latexpdfja" (
%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
cd %BUILDDIR%/latex
make all-pdf-ja
cd %~dp0
echo.
echo.Build finished; the PDF files are in %BUILDDIR%/latex.
goto end
)
if "%1" == "text" (
%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The text files are in %BUILDDIR%/text.
goto end
)
if "%1" == "man" (
%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The manual pages are in %BUILDDIR%/man.
goto end
)
if "%1" == "texinfo" (
%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
goto end
)
if "%1" == "gettext" (
%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
goto end
)
if "%1" == "changes" (
%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
if errorlevel 1 exit /b 1
echo.
echo.The overview file is in %BUILDDIR%/changes.
goto end
)
if "%1" == "linkcheck" (
%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
if errorlevel 1 exit /b 1
echo.
echo.Link check complete; look for any errors in the above output ^
or in %BUILDDIR%/linkcheck/output.txt.
goto end
)
if "%1" == "doctest" (
%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
if errorlevel 1 exit /b 1
echo.
echo.Testing of doctests in the sources finished, look at the ^
results in %BUILDDIR%/doctest/output.txt.
goto end
)
if "%1" == "coverage" (
%SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage
if errorlevel 1 exit /b 1
echo.
echo.Testing of coverage in the sources finished, look at the ^
results in %BUILDDIR%/coverage/python.txt.
goto end
)
if "%1" == "xml" (
%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The XML files are in %BUILDDIR%/xml.
goto end
)
if "%1" == "pseudoxml" (
%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
goto end
)
:end

View File

@@ -1,483 +0,0 @@
********************************
Cretonne Meta Language Reference
********************************
.. default-domain:: py
.. highlight:: python
.. module:: cdsl
The Cretonne meta language is used to define instructions for Cretonne. It is a
domain specific language embedded in Python. This document describes the Python
modules that form the embedded DSL.
The meta language descriptions are Python modules under the
:file:`lib/cretonne/meta` directory. The descriptions are processed in two
steps:
1. The Python modules are imported. This has the effect of building static data
structures in global variables in the modules. These static data structures
in the :mod:`base` and :mod:`isa` packages use the classes in the
:mod:`cdsl` package to describe instruction sets and other properties.
2. The static data structures are processed to produce Rust source code and
constant tables.
The main driver for this source code generation process is the
:file:`lib/cretonne/meta/build.py` script which is invoked as part of the build
process if anything in the :file:`lib/cretonne/meta` directory has changed
since the last build.
.. module:: cdsl.settings
Settings
========
Settings are used by the environment embedding Cretonne to control the details
of code generation. Each setting is defined in the meta language so a compact
and consistent Rust representation can be generated. Shared settings are defined
in the :mod:`base.settings` module. Some settings are specific to a target ISA,
and defined in a :file:`settings.py` module under the appropriate
:file:`lib/cretonne/meta/isa/*` directory.
Settings can take boolean on/off values, small numbers, or explicitly enumerated
symbolic values. Each type is represented by a sub-class of :class:`Setting`:
.. inheritance-diagram:: Setting BoolSetting NumSetting EnumSetting
:parts: 1
.. autoclass:: Setting
.. autoclass:: BoolSetting
.. autoclass:: NumSetting
.. autoclass:: EnumSetting
All settings must belong to a *group*, represented by a :class:`SettingGroup`
object.
.. autoclass:: SettingGroup
Normally, a setting group corresponds to all settings defined in a module. Such
a module looks like this::
group = SettingGroup('example')
foo = BoolSetting('use the foo')
bar = BoolSetting('enable bars', True)
opt = EnumSetting('optimization level', 'Debug', 'Release')
group.close(globals())
.. module:: cdsl.instructions
Instruction descriptions
========================
New instructions are defined as instances of the :class:`Instruction`
class. As instruction instances are created, they are added to the currently
open :class:`InstructionGroup`.
.. autoclass:: InstructionGroup
:members:
The basic Cretonne instruction set described in :doc:`langref` is defined by the
Python module :mod:`base.instructions`. This module has a global variable
:data:`base.instructions.GROUP` which is an :class:`InstructionGroup` instance
containing all the base instructions.
.. autoclass:: Instruction
.. currentmodule:: cdsl.operands
An instruction is defined with a set of distinct input and output operands which
must be instances of the :class:`Operand` class.
.. autoclass:: Operand
Cretonne uses two separate type systems for operand kinds and SSA values.
.. module:: cdsl.typevar
Type variables
--------------
Instruction descriptions can be made polymorphic by using
:class:`cdsl.operands.Operand` instances that refer to a *type variable*
instead of a concrete value type. Polymorphism only works for SSA value
operands. Other operands have a fixed operand kind.
.. autoclass:: TypeVar
:members:
If multiple operands refer to the same type variable they will be required to
have the same concrete type. For example, this defines an integer addition
instruction::
Int = TypeVar('Int', 'A scalar or vector integer type', ints=True, simd=True)
a = Operand('a', Int)
x = Operand('x', Int)
y = Operand('y', Int)
iadd = Instruction('iadd', 'Integer addition', ins=(x, y), outs=a)
The type variable `Int` is allowed to vary over all scalar and vector integer
value types, but in a given instance of the `iadd` instruction, the two
operands must have the same type, and the result will be the same type as the
inputs.
There are some practical restrictions on the use of type variables, see
:ref:`restricted-polymorphism`.
Immediate operands
------------------
.. currentmodule:: cdsl.operands
Immediate instruction operands don't correspond to SSA values, but have values
that are encoded directly in the instruction. Immediate operands don't
have types from the :class:`cdsl.types.ValueType` type system; they often have
enumerated values of a specific type. The type of an immediate operand is
indicated with an instance of :class:`ImmediateKind`.
.. autoclass:: ImmediateKind
.. automodule:: base.immediates
:members:
Entity references
-----------------
.. currentmodule:: cdsl.operands
Instruction operands can also refer to other entities in the same function. This
can be extended basic blocks, or entities declared in the function preamble.
.. autoclass:: EntityRefKind
.. automodule:: base.entities
:members:
Value types
-----------
.. currentmodule:: cdsl.types
Concrete value types are represented as instances of :class:`ValueType`. There
are subclasses to represent scalar and vector types.
.. autoclass:: ValueType
.. inheritance-diagram:: ValueType ScalarType VectorType IntType FloatType BoolType
:parts: 1
.. autoclass:: ScalarType
:members:
.. autoclass:: VectorType
:members:
.. autoclass:: IntType
:members:
.. autoclass:: FloatType
:members:
.. autoclass:: BoolType
:members:
.. automodule:: base.types
:members:
There are no predefined vector types, but they can be created as needed with
the :func:`ScalarType.by` function.
.. module:: cdsl.operands
Instruction representation
==========================
The Rust in-memory representation of instructions is derived from the
instruction descriptions. Part of the representation is generated, and part is
written as Rust code in the ``cretonne.instructions`` module. The instruction
representation depends on the input operand kinds and whether the instruction
can produce multiple results.
.. autoclass:: OperandKind
.. inheritance-diagram:: OperandKind ImmediateKind EntityRefKind
Since all SSA value operands are represented as a `Value` in Rust code, value
types don't affect the representation. Two special operand kinds are used to
represent SSA values:
.. autodata:: VALUE
.. autodata:: VARIABLE_ARGS
.. module:: cdsl.formats
When an instruction description is created, it is automatically assigned a
predefined instruction format which is an instance of
:class:`InstructionFormat`:
.. autoclass:: InstructionFormat
.. _restricted-polymorphism:
Restricted polymorphism
-----------------------
The instruction format strictly controls the kinds of operands on an
instruction, but it does not constrain value types at all. A given instruction
description typically does constrain the allowed value types for its value
operands. The type variables give a lot of freedom in describing the value type
constraints, in practice more freedom than what is needed for normal instruction
set architectures. In order to simplify the Rust representation of value type
constraints, some restrictions are imposed on the use of type variables.
A polymorphic instruction has a single *controlling type variable*. For a given
opcode, this type variable must be the type of the first result or the type of
the input value operand designated by the `typevar_operand` argument to the
:py:class:`InstructionFormat` constructor. By default, this is the first value
operand, which works most of the time.
The value types of instruction results must be one of the following:
1. A concrete value type.
2. The controlling type variable.
3. A type variable derived from the controlling type variable.
This means that all result types can be computed from the controlling type
variable.
Input values to the instruction are allowed a bit more freedom. Input value
types must be one of:
1. A concrete value type.
2. The controlling type variable.
3. A type variable derived from the controlling type variable.
4. A free type variable that is not used by any other operands.
This means that the type of an input operand can either be computed from the
controlling type variable, or it can vary independently of the other operands.
Encodings
=========
.. currentmodule:: cdsl.isa
Encodings describe how Cretonne instructions are mapped to binary machine code
for the target architecture. After the legalization pass, all remaining
instructions are expected to map 1-1 to native instruction encodings. Cretonne
instructions that can't be encoded for the current architecture are called
:term:`illegal instruction`\s.
Some instruction set architectures have different :term:`CPU mode`\s with
incompatible encodings. For example, a modern ARMv8 CPU might support three
different CPU modes: *A64* where instructions are encoded in 32 bits, *A32*
where all instructions are 32 bits, and *T32* which has a mix of 16-bit and
32-bit instruction encodings. These are incompatible encoding spaces, and while
an :cton:inst:`iadd` instruction can be encoded in 32 bits in each of them, it's
not the same 32 bits. It's a judgement call if CPU modes should be modelled as
separate targets, or as sub-modes of the same target. In the ARMv8 case, the
different register banks means that it makes sense to model A64 as a separate
target architecture, while A32 and T32 are CPU modes of the 32-bit ARM target.
In a given CPU mode, there may be multiple valid encodings of the same
instruction. Both RISC-V and ARMv8's T32 mode have 32-bit encodings of all
instructions with 16-bit encodings available for some opcodes if certain
constraints are satisfied.
.. autoclass:: CPUMode
Encodings are guarded by :term:`sub-target predicate`\s. For example, the RISC-V
"C" extension which specifies the compressed encodings may not be supported, and
a predicate would be used to disable all of the 16-bit encodings in that case.
This can also affect whether an instruction is legal. For example, x86 has a
predicate that controls the SSE 4.1 instruction encodings. When that predicate
is false, the SSE 4.1 instructions are not available.
Encodings also have a :term:`instruction predicate` which depends on the
specific values of the instruction's immediate fields. This is used to ensure
that immediate address offsets are within range, for example. The instructions
in the base Cretonne instruction set can often represent a wider range of
immediates than any specific encoding. The fixed-size RISC-style encodings tend
to have more range limitations than CISC-style variable length encodings like
x86.
The diagram below shows the relationship between the classes involved in
specifying instruction encodings:
.. digraph:: encoding
node [shape=record]
EncRecipe -> SubtargetPred
EncRecipe -> InstrFormat
EncRecipe -> InstrPred
Encoding [label="{Encoding|Opcode+TypeVars}"]
Encoding -> EncRecipe [label="+EncBits"]
Encoding -> CPUMode
Encoding -> SubtargetPred
Encoding -> InstrPred
Encoding -> Opcode
Opcode -> InstrFormat
CPUMode -> Target
An :py:class:`Encoding` instance specifies the encoding of a concrete
instruction. The following properties are used to select instructions to be
encoded:
- An opcode, i.e. :cton:inst:`iadd_imm`, that must match the instruction's
opcode.
- Values for any type variables if the opcode represents a polymorphic
instruction.
- An :term:`instruction predicate` that must be satisfied by the instruction's
immediate operands.
- The CPU mode that must be active.
- A :term:`sub-target predicate` that must be satisfied by the currently active
sub-target.
An encoding specifies an *encoding recipe* along with some *encoding bits* that
the recipe can use for native opcode fields etc. The encoding recipe has
additional constraints that must be satisfied:
- An :py:class:`InstructionFormat` that must match the format required by the
opcodes of any encodings that use this recipe.
- An additional :term:`instruction predicate`.
- An additional :term:`sub-target predicate`.
The additional predicates in the :py:class:`EncRecipe` are merged with the
per-encoding predicates when generating the encoding matcher code. Often
encodings only need the recipe predicates.
.. autoclass:: EncRecipe
Register constraints
====================
After an encoding recipe has been chosen for an instruction, it is the register
allocator's job to make sure that the recipe's :term:`Register constraint`\s
are satisfied. Most ISAs have separate integer and floating point registers,
and instructions can usually only use registers from one of the banks. Some
instruction encodings are even more constrained and can only use a subset of
the registers in a bank. These constraints are expressed in terms of register
classes.
Sometimes the result of an instruction is placed in a register that must be the
same as one of the input registers. Some instructions even use a fixed register
for inputs or results.
Each encoding recipe specifies separate constraints for its value operands and
result. These constraints are separate from the instruction predicate which can
only evaluate the instruction's immediate operands.
.. module:: cdsl.registers
.. autoclass:: RegBank
Register class constraints
--------------------------
The most common type of register constraint is the register class. It specifies
that an operand or result must be allocated one of the registers from the given
register class::
IntRegs = RegBank('IntRegs', ISA, 'General purpose registers', units=16, prefix='r')
GPR = RegClass(IntRegs)
R = EncRecipe('R', Binary, ins=(GPR, GPR), outs=GPR)
This defines an encoding recipe for the ``Binary`` instruction format where
both input operands must be allocated from the ``GPR`` register class.
.. autoclass:: RegClass
Tied register operands
----------------------
In more compact machine code encodings, it is common to require that the result
register is the same as one of the inputs. This is represented with tied
operands::
CR = EncRecipe('CR', Binary, ins=(GPR, GPR), outs=0)
This indicates that the result value must be allocated to the same register as
the first input value. Tied operand constraints can only be used for result
values, so the number always refers to one of the input values.
Fixed register operands
-----------------------
Some instructions use hard-coded input and output registers for some value
operands. An example is the ``pblendvb`` Intel SSE instruction which takes one
of its three value operands in the hard-coded ``%xmm0`` register::
XMM0 = FPR[0]
SSE66_XMM0 = EncRecipe('SSE66_XMM0', Ternary, ins=(FPR, FPR, XMM0), outs=0)
The syntax ``FPR[0]`` selects the first register from the ``FPR`` register
class which consists of all the XMM registers.
Stack operands
--------------
Cretonne's register allocator can assign an SSA value to a stack slot if there
isn't enough registers. It will insert :cton:inst:`spill` and :cton:inst:`fill`
instructions as needed to satisfy instruction operand constraints, but it is
also possible to have instructions that can access stack slots directly::
CSS = EncRecipe('CSS', Unary, ins=GPR, outs=Stack(GPR))
An output stack value implies a store to the stack, an input value implies a
load.
.. module:: cdsl.isa
Targets
=======
Cretonne can be compiled with support for multiple target instruction set
architectures. Each ISA is represented by a :py:class:`cdsl.isa.TargetISA` instance.
.. autoclass:: TargetISA
The definitions for each supported target live in a package under
:file:`lib/cretonne/meta/isa`.
.. automodule:: isa
:members:
.. automodule:: isa.riscv
.. automodule:: isa.intel
.. automodule:: isa.arm32
.. automodule:: isa.arm64
Glossary
========
.. glossary::
Illegal instruction
An instruction is considered illegal if there is no encoding available
for the current CPU mode. The legality of an instruction depends on the
value of :term:`sub-target predicate`\s, so it can't always be
determined ahead of time.
CPU mode
Every target defines one or more CPU modes that determine how the CPU
decodes binary instructions. Some CPUs can switch modes dynamically with
a branch instruction (like ARM/Thumb), while other modes are
process-wide (like x86 32/64-bit).
Sub-target predicate
A predicate that depends on the current sub-target configuration.
Examples are "Use SSE 4.1 instructions", "Use RISC-V compressed
encodings". Sub-target predicates can depend on both detected CPU
features and configuration settings.
Instruction predicate
A predicate that depends on the immediate fields of an instruction. An
example is "the load address offset must be a 10-bit signed integer".
Instruction predicates do not depend on the registers selected for value
operands.
Register constraint
Value operands and results correspond to machine registers. Encodings may
constrain operands to either a fixed register or a register class. There
may also be register constraints between operands, for example some
encodings require that the result register is one of the input
registers.

View File

@@ -1,239 +0,0 @@
*******************************
Register Allocation in Cretonne
*******************************
.. default-domain:: cton
.. highlight:: rust
Cretonne uses a *decoupled, SSA-based* register allocator. Decoupled means that
register allocation is split into two primary phases: *spilling* and
*coloring*. SSA-based means that the code stays in SSA form throughout the
register allocator, and in fact is still in SSA form after register allocation.
Before the register allocator is run, all instructions in the function must be
*legalized*, which means that every instruction has an entry in the
``encodings`` table. The encoding entries also provide register class
constraints on the instruction's operands that the register allocator must
satisfy.
After the register allocator has run, the ``locations`` table provides a
register or stack slot location for all SSA values used by the function. The
register allocator may have inserted :inst:`spill`, :inst:`fill`, and
:inst:`copy` instructions to make that possible.
SSA-based register allocation
=============================
The phases of the SSA-based register allocator are:
Liveness analysis
For each SSA value, determine exactly where it is live.
Spilling
The process of deciding which SSA values go in a stack slot and which
values go in a register. The spilling phase can also split live ranges by
inserting :inst:`copy` instructions, or transform the code in other ways to
reduce the number of values kept in registers.
After spilling, the number of live register values never exceeds the number
of available registers.
Coloring
The process of assigning specific registers to the live values. It's a
property of SSA form that this can be done in a linear scan of the
dominator tree without causing any additional spills.
EBB argument fixup
The coloring phase does not guarantee that EBB arguments are placed in the
correct registers and/or stack slots before jumping to the EBB. It will
try its best, but not making this guarantee is essential to the speed of
the coloring phase. (EBB arguments correspond to PHI nodes in traditional
SSA form).
The argument fixup phase inserts 'shuffle code' before jumps and branches
to place the argument values in their expected locations.
The contract between the spilling and coloring phases is that the number of
values in registers never exceeds the number of available registers. This
sounds simple enough in theory, but in practice there are some complications.
Real-world complications to SSA coloring
----------------------------------------
In practice, instruction set architectures don't have "K interchangeable
registers", and register pressure can't be measured with a single number. There
are complications:
Different register banks
Most ISAs separate integer registers from floating point registers, and
instructions require their operands to come from a specific bank. This is a
fairly simple problem to deal with since the register banks are completely
disjoint. We simply count the number of integer and floating-point values
that are live independently, and make sure that each number does not exceed
the size of their respective register banks.
Instructions with fixed operands
Some instructions use a fixed register for an operand. This happens on the
Intel ISAs:
- Dynamic shift and rotate instructions take the shift amount in CL.
- Division instructions use RAX and RDX for both input and output operands.
- Wide multiply instructions use fixed RAX and RDX registers for input and
output operands.
- A few SSE variable blend instructions use a hardwired XMM0 input operand.
Operands constrained to register subclasses
Some instructions can only use a subset of the registers for some operands.
For example, the ARM NEON vmla (scalar) instruction requires the scalar
operand to be located in D0-15 or even D0-7, depending on the data type.
The other operands can be from the full D0-31 register set.
ABI boundaries
Before making a function call, arguments must be placed in specific
registers and stack locations determined by the ABI, and return values
appear in fixed registers.
Some registers can be clobbered by the call and some are saved by the
callee. In some cases, only the low bits of a register are saved by the
callee. For example, ARM64 callees save only the low 64 bits of v8-15, and
Win64 callees only save the low 128 bits of AVX registers.
ABI boundaries also affect the location of arguments to the entry block and
return values passed to the :inst:`return` instruction.
Aliasing registers
Different registers sometimes share the same bits in the register bank.
This can make it difficult to measure register pressure. For example, the
Intel registers RAX, EAX, AX, AL, and AH overlap.
If only one of the aliasing registers can be used at a time, the aliasing
doesn't cause problems since the registers can simply be counted as one
unit.
Early clobbers
Sometimes an instruction requires that the register used for an output
operand does not alias any of the input operands. This happens for inline
assembly and in some other special cases.
Liveness Analysis
=================
Both spilling and coloring need to know exactly where SSA values are live. The
liveness analysis computes this information.
The data structure representing the live range of a value uses the linear
layout of the function. All instructions and EBB headers are assigned a
*program position*. A starting point for a live range can be one of the
following:
- The instruction where the value is defined.
- The EBB header where the value is an EBB argument.
- An EBB header where the value is live-in because it was defined in a
dominating block.
The ending point of a live range can be:
- The last instruction to use the value.
- A branch or jump to an EBB where the value is live-in.
When all the EBBs in a function are laid out linearly, the live range of a
value doesn't have to be a contiguous interval, although it will be in a
majority of cases. There can be holes in the linear live range.
The part of a value's live range that falls inside a single EBB will always be
an interval without any holes. This follows from the dominance requirements of
SSA. A live range is represented as:
- The interval inside the EBB where the value is defined.
- A set of intervals for EBBs where the value is live-in.
Any value that is only used inside a single EBB will have an empty set of
live-in intervals. Some values are live across large parts of the function, and
this can often be represented with coalesced live-in intervals covering many
EBBs. It is important that the live range data structure doesn't have to grow
linearly with the number of EBBs covered by a live range.
This representation is very similar to LLVM's ``LiveInterval`` data structure
with a few important differences:
- The Cretonne ``LiveRange`` only covers a single SSA value, while LLVM's
``LiveInterval`` represents the union of multiple related SSA values in a
virtual register. This makes Cretonne's representation smaller because
individual segments don't have to annotated with a value number.
- Cretonne stores the def-interval separately from a list of coalesced live-in
intervals, while LLVM stores an array of segments. The two representations
are equivalent, but Cretonne optimizes for the common case of a value that is
only used locally.
- It is simpler to check if two live ranges are overlapping. The dominance
properties of SSA form means that it is only necessary to check the
def-interval of each live range against the intervals of the other range. It
is not necessary to check for overlap between the two sets of live-in
intervals. This makes the overlap check logarithmic in the number of live-in
intervals instead of linear.
- LLVM represents a program point as ``SlotIndex`` which holds a pointer to a
32-byte ``IndexListEntry`` struct. The entries are organized in a double
linked list that mirrors the ordering of instructions in a basic block. This
allows 'tombstone' program points corresponding to instructions that have
been deleted.
Cretonne uses a 32-bit program point representation that encodes an
instruction or EBB number directly. There are no 'tombstones' for deleted
instructions, and no mirrored linked list of instructions. Live ranges must
be updated when instructions are deleted.
A consequence of Cretonne's more compact representation is that two program
points can't be compared without the context of a function layout.
Spilling algorithm
==================
There is no one way of implementing spilling, and different tradeoffs between
compilation time and code quality are possible. Any spilling algorithm will
need a way of tracking the register pressure so the colorability condition can
be satisfied.
Coloring algorithm
==================
The SSA coloring algorithm is based on a single observation: If two SSA values
interfere, one of the values must be live where the other value is defined.
We visit the EBBs in a topological order such that all dominating EBBs are
visited before the current EBB. The instructions in an EBB are visited in a
top-down order, and each value define by the instruction is assigned an
available register. With this iteration order, every value that is live at an
instruction has already been assigned to a register.
This coloring algorithm works if the following condition holds:
At every instruction, consider the values live through the instruction. No
matter how the live values have been assigned to registers, there must be
available registers of the right register classes available for the values
defined by the instruction.
We'll need to modify this condition in order to deal with the real-world
complications.
The coloring algorithm needs to keep track of the set of live values at each
instruction. At the top of an EBB, this set can be computed as the union of:
- The set of live values before the immediately dominating branch or jump
instruction. The topological iteration order guarantees that this set is
available. Values whose live range indicate that they are not live-in to the
current EBB should be filtered out.
- The set of arguments to the EBB. These values should all be live-in, although
it is possible that some are dead and never used anywhere.
For each live value, we also track its kill point in the current EBB. This is
the last instruction to use the value in the EBB. Values that are live-out
through the EBB terminator don't have a kill point. Note that the kill point
can be a branch to another EBB that uses the value, so the kill instruction
doesn't have to be a use of the value.
When advancing past an instruction, the live set is updated:
- Any values whose kill point is the current instruction are removed.
- Any values defined by the instruction are added, unless their kill point is
the current instruction. This corresponds to a dead def which has no uses.

View File

@@ -1,354 +0,0 @@
****************
Testing Cretonne
****************
Cretonne is tested at multiple levels of abstraction and integration. When
possible, Rust unit tests are used to verify single functions and types. When
testing the interaction between compiler passes, file-level tests are
appropriate.
The top-level shell script :file:`test-all.sh` runs all of the tests in the
Cretonne repository.
Rust tests
==========
.. highlight:: rust
Rust and Cargo have good support for testing. Cretonne uses unit tests, doc
tests, and integration tests where appropriate.
Unit tests
----------
Unit test live in a ``tests`` sub-module of the code they are testing::
pub fn add(x: u32, y: u32) -> u32 {
x + y
}
#[cfg(test)]
mod tests {
use super::add;
#[test]
check_add() {
assert_eq!(add(2, 2), 4);
}
}
Since sub-modules have access to non-public items in a Rust module, unit tests
can be used to test module-internal functions and types too.
Doc tests
---------
Documentation comments can contain code snippets which are also compiled and
tested::
//! The `Flags` struct is immutable once it has been created. A `Builder` instance is used to
//! create it.
//!
//! # Example
//! ```
//! use cretonne::settings::{self, Configurable};
//!
//! let mut b = settings::builder();
//! b.set("opt_level", "fastest");
//!
//! let f = settings::Flags::new(&b);
//! assert_eq!(f.opt_level(), settings::OptLevel::Fastest);
//! ```
These tests are useful for demonstrating how to use an API, and running them
regularly makes sure that they stay up to date. Documentation tests are not
appropriate for lots of assertions; use unit tests for that.
Integration tests
-----------------
Integration tests are Rust source files that are compiled and linked
individually. They are used to exercise the external API of the crates under
test.
These tests are usually found in the :file:`tests` top-level directory where
they have access to all the crates in the Cretonne repository. The
:file:`lib/cretonne` and :file:`lib/reader` crates have no external
dependencies, which can make testing tedious. Integration tests that don't need
to depend on other crates can be placed in :file:`lib/cretonne/tests` and
:file:`lib/reader/tests`.
File tests
==========
.. highlight:: cton
Compilers work with large data structures representing programs, and it quickly
gets unwieldy to generate test data programmatically. File-level tests make it
easier to provide substantial input functions for the compiler tests.
File tests are :file:`*.cton` files in the :file:`filetests/` directory
hierarchy. Each file has a header describing what to test followed by a number
of input functions in the :doc:`Cretonne textual intermediate language
<langref>`:
.. productionlist::
test_file : test_header `function_list`
test_header : test_commands (`isa_specs` | `settings`)
test_commands : test_command { test_command }
test_command : "test" test_name { option } "\n"
The available test commands are described below.
Many test commands only make sense in the context of a target instruction set
architecture. These tests require one or more ISA specifications in the test
header:
.. productionlist::
isa_specs : { [`settings`] isa_spec }
isa_spec : "isa" isa_name { `option` } "\n"
The options given on the ``isa`` line modify the ISA-specific settings defined in
:file:`lib/cretonne/meta/isa/*/settings.py`.
All types of tests allow shared Cretonne settings to be modified:
.. productionlist::
settings : { setting }
setting : "set" { option } "\n"
option : flag | setting "=" value
The shared settings available for all target ISAs are defined in
:file:`lib/cretonne/meta/cretonne/settings.py`.
The ``set`` lines apply settings cumulatively::
test legalizer
set opt_level=best
set is_64bit=1
isa riscv
set is_64bit=0
isa riscv supports_m=false
function %foo() {}
This example will run the legalizer test twice. Both runs will have
``opt_level=best``, but they will have different ``is_64bit`` settings. The 32-bit
run will also have the RISC-V specific flag ``supports_m`` disabled.
Filecheck
---------
Many of the test commands described below use *filecheck* to verify their
output. Filecheck is a Rust implementation of the LLVM tool of the same name.
See the :file:`lib/filecheck` `documentation <https://docs.rs/filecheck/>`_ for
details of its syntax.
Comments in :file:`.cton` files are associated with the entity they follow.
This typically means an instruction or the whole function. Those tests that
use filecheck will extract comments associated with each function (or its
entities) and scan them for filecheck directives. The test output for each
function is then matched against the filecheck directives for that function.
Comments appearing before the first function in a file apply to every function.
This is useful for defining common regular expression variables with the
``regex:`` directive, for example.
Note that LLVM's file tests don't separate filecheck directives by their
associated function. It verifies the concatenated output against all filecheck
directives in the test file. LLVM's :command:`FileCheck` command has a
``CHECK-LABEL:`` directive to help separate the output from different functions.
Cretonne's tests don't need this.
Filecheck variables
~~~~~~~~~~~~~~~~~~~
Cretonne's IL parser causes entities like values and EBBs to be renumbered. It
maintains a source mapping to resolve references in the text, but when a
function is written out as text as part of a test, all of the entities have the
new numbers. This can complicate the filecheck directives since they need to
refer to the new entity numbers, not the ones in the adjacent source text.
To help with this, the parser's source-to-entity mapping is made available as
predefined filecheck variables. A value by the source name ``v10`` can be
referenced as the filecheck variable ``$v10``. The variable expands to the
renumbered entity name.
`test cat`
----------
This is one of the simplest file tests, used for testing the conversion to and
from textual IL. The ``test cat`` command simply parses each function and
converts it back to text again. The text of each function is then matched
against the associated filecheck directives.
Example::
function %r1() -> i32, f32 {
ebb1:
v10 = iconst.i32 3
v20 = f32const 0.0
return v10, v20
}
; sameln: function %r1() -> i32, f32 {
; nextln: ebb0:
; nextln: v0 = iconst.i32 3
; nextln: v1 = f32const 0.0
; nextln: return v0, v1
; nextln: }
Notice that the values ``v10`` and ``v20`` in the source were renumbered to
``v0`` and ``v1`` respectively during parsing. The equivalent test using
filecheck variables would be::
function %r1() -> i32, f32 {
ebb1:
v10 = iconst.i32 3
v20 = f32const 0.0
return v10, v20
}
; sameln: function %r1() -> i32, f32 {
; nextln: ebb0:
; nextln: $v10 = iconst.i32 3
; nextln: $v20 = f32const 0.0
; nextln: return $v10, $v20
; nextln: }
`test verifier`
---------------
Run each function through the IL verifier and check that it produces the
expected error messages.
Expected error messages are indicated with an ``error:`` directive *on the
instruction that produces the verifier error*. Both the error message and
reported location of the error is verified::
test verifier
function %test(i32) {
ebb0(v0: i32):
jump ebb1 ; error: terminator
return
}
This example test passes if the verifier fails with an error message containing
the sub-string ``"terminator"`` *and* the error is reported for the ``jump``
instruction.
If a function contains no ``error:`` annotations, the test passes if the
function verifies correctly.
`test print-cfg`
----------------
Print the control flow graph of each function as a Graphviz graph, and run
filecheck over the result. See also the :command:`cton-util print-cfg`
command::
; For testing cfg generation. This code is nonsense.
test print-cfg
test verifier
function %nonsense(i32, i32) -> f32 {
; check: digraph %nonsense {
; regex: I=\binst\d+\b
; check: label="{ebb0 | <$(BRZ=$I)>brz ebb2 | <$(JUMP=$I)>jump ebb1}"]
ebb0(v1: i32, v2: i32):
brz v2, ebb2 ; unordered: ebb0:$BRZ -> ebb2
v4 = iconst.i32 0
jump ebb1(v4) ; unordered: ebb0:$JUMP -> ebb1
ebb1(v5: i32):
return v1
ebb2:
v100 = f32const 0.0
return v100
}
`test domtree`
--------------
Compute the dominator tree of each function and validate it against the
``dominates:`` annotations::
test domtree
function %test(i32) {
ebb0(v0: i32):
jump ebb1 ; dominates: ebb1
ebb1:
brz v0, ebb3 ; dominates: ebb3
jump ebb2 ; dominates: ebb2
ebb2:
jump ebb3
ebb3:
return
}
Every reachable extended basic block except for the entry block has an
*immediate dominator* which is a jump or branch instruction. This test passes
if the ``dominates:`` annotations on the immediate dominator instructions are
both correct and complete.
`test legalizer`
----------------
Legalize each function for the specified target ISA and run the resulting
function through filecheck. This test command can be used to validate the
encodings selected for legal instructions as well as the instruction
transformations performed by the legalizer.
`test regalloc`
---------------
Test the register allocator.
First, each function is legalized for the specified target ISA. This is
required for register allocation since the instruction encodings provide
register class constraints to the register allocator.
Second, the register allocator is run on the function, inserting spill code and
assigning registers and stack slots to all values.
The resulting function is then run through filecheck.
`test binemit`
--------------
Test the emission of binary machine code.
The functions must contains instructions that are annotated with both encodings
and value locations (registers or stack slots). For instructions that are
annotated with a `bin:` directive, the emitted hexadecimal machine code for
that instruction is compared to the directive::
test binemit
isa riscv
function %int32() {
ebb0:
[-,%x5] v1 = iconst.i32 1
[-,%x6] v2 = iconst.i32 2
[R#0c,%x7] v10 = iadd v1, v2 ; bin: 006283b3
[R#200c,%x8] v11 = isub v1, v2 ; bin: 40628433
return
}
If any instructions are unencoded (indicated with a `[-]` encoding field), they
will be encoded using the same mechanism as the legalizer uses. However,
illegal instructions for the ISA won't be expanded into other instruction
sequences. Instead the test will fail.
Value locations must be present if they are required to compute the binary
bits. Missing value locations will cause the test to crash.
`test simple-gvn`
-----------------
Test the simple GVN pass.
The simple GVN pass is run on each function, and then results are run
through filecheck.