Merge commit '051bc08d23df0930be5e959645c50dd0cdf411d4'

2017-08-29 07:12:47 -07:00
parent 344fbed77a 051bc08d23
commit 8f6957296e
685 changed files with 59063 additions and 9 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,10 +1,11 @@
-# Generated by Cargo
+*.pyc
 # will have compiled files and executables
 /target/
 # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
 # More information here http://doc.crates.io/guide.html#cargotoml-vs-cargolock
 Cargo.lock
 # These are backup files generated by rustfmt
 **/*.rs.bk
 *.swp
 *.swo
 tags
 /target/
 Cargo.lock
 .*.rustfmt
 cretonne.dbg*
 .mypy_cache
 rusty-tags.*
--- a/.travis.yml
+++ b/.travis.yml
@@ -0,0 +1,18 @@
 language: rust
 rust:
    - stable
    - beta
 dist: trusty
 sudo: false
 addons:
    apt:
        packages:
            - python3-pip
 install:
    - pip3 install --user --upgrade mypy flake8
    - travis_wait ./check-rustfmt.sh --install
 script: ./test-all.sh
 cache:
    cargo: true
    directories:
        - $HOME/.cache/pip
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -0,0 +1,26 @@
 [package]
 name = "cretonne-tools"
 authors = ["The Cretonne Project Developers"]
 version = "0.0.0"
 description = "Binaries for testing the Cretonne library"
 license = "Apache-2.0"
 documentation = "https://cretonne.readthedocs.io/"
 repository = "https://github.com/stoklund/cretonne"
 publish = false
 [[bin]]
 name = "cton-util"
 path = "src/cton-util.rs"
 [dependencies]
 cretonne = { path = "lib/cretonne" }
 cretonne-reader = { path = "lib/reader" }
 cretonne-frontend = { path = "lib/frontend" }
 wasm2cretonne-util = { path = "lib/wasm2cretonne-util" }
 filecheck = { path = "lib/filecheck" }
 docopt = "0.8.0"
 serde = "1.0.8"
 serde_derive = "1.0.8"
 num_cpus = "1.5.1"
 [workspace]
--- a/1
+++ b/1
@@ -1,3 +1,4 @@
                                 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/
--- a/README.rst
+++ b/README.rst
@@ -0,0 +1,74 @@
 =======================
 Cretonne Code Generator
 =======================
 Cretonne is a low-level retargetable code generator. It translates a
 target-independent intermediate language into executable machine code.
 *This is a work in progress that is not yet functional.*
 .. image:: https://readthedocs.org/projects/cretonne/badge/?version=latest
    :target: https://cretonne.readthedocs.io/en/latest/?badge=latest
    :alt: Documentation Status
 .. image:: https://travis-ci.org/stoklund/cretonne.svg?branch=master
    :target: https://travis-ci.org/stoklund/cretonne
    :alt: Build Status
 Cretonne is designed to be a code generator for WebAssembly with these design
 goals:
 No undefined behavior
    Cretonne does not have a `nasal demons clause <http://www.catb.org/jargon/html/N/nasal-demons.html>`_, and it won't generate code
    with unexpected behavior if invariants are broken.
 Portable semantics
    As far as possible, Cretonne's input language has well-defined semantics
    that are the same on all target architectures. The semantics are usually
    the same as WebAssembly's.
 Fast sandbox verification
    Cretonne's input language has a safe subset for sandboxed code. No advanced
    analysis is required to verify memory safety as long as only the safe
    instructions are used. The safe instruction set is expressive enough to
    implement WebAssembly.
 Scalable performance
    Cretonne can be configured to generate code as quickly as possible, or it
    can generate very good code at the cost of slower compile times.
 Predictable performance
    When optimizing, Cretonne focuses on adapting the target-independent IL to
    the quirks of the target architecture. There are no advanced optimizations
    that sometimes work, sometimes fail.
 Building Cretonne
 -----------------
 Cretonne is using the Cargo package manager format. First, ensure you have
 installed a current stable rust (stable, beta, and nightly should all work, but
 only stable and beta are tested consistently). Then, change the working
 directory to your clone of cretonne and run::
    cargo build
 This will create a *target/debug* directory where you can find the generated
 binary.
 To build the optimized binary for release::
    cargo build --release
 You can then run tests with::
    ./test-all.sh
 Building the documentation
 --------------------------
 To build the Cretonne documentation, you need the `Sphinx documentation
 generator <http://www.sphinx-doc.org/>`_::
    $ pip install sphinx sphinx-autobuild sphinx_rtd_theme
    $ cd cretonne/docs
    $ make html
    $ open _build/html/index.html
 We don't support Sphinx versions before 1.4 since the format of index tuples
 has changed.
--- a/check-rustfmt.sh
+++ b/check-rustfmt.sh
@@ -0,0 +1,35 @@
 #!/bin/bash
 #
 # Usage: check-rustfmt.sh [--install]
 #
 # Check that the desired version of rustfmt is installed.
 #
 # Rustfmt is still immature enough that its formatting decisions can change
 # between versions. This makes it difficult to enforce a certain style in a
 # test script since not all developers will upgrade rustfmt at the same time.
 # To work around this, we only verify formatting when a specific version of
 # rustfmt is installed.
 #
 # Exits 0 if the right version of rustfmt is installed, 1 otherwise.
 #
 # With the --install option, also tries to install the right version.
 # This version should always be bumped to the newest version available.
 VERS="0.8.4"
 if cargo install --list | grep -q "^rustfmt v$VERS"; then
    exit 0
 fi
 if [ "$1" != "--install" ]; then
    echo "********************************************************************"
    echo "*  Please install rustfmt v$VERS to verify formatting.             *"
    echo "*  If a newer version of rustfmt is available, update this script. *"
    echo "********************************************************************"
    echo "$0 --install"
    sleep 1
    exit 1
 fi
 echo "Installing rustfmt v$VERS."
 cargo install --force --vers="$VERS" rustfmt
--- a/docs/.gitignore
+++ b/docs/.gitignore
@@ -0,0 +1 @@
 _build
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -0,0 +1,196 @@
 # Makefile for Sphinx documentation
 #
 # You can set these variables from the command line.
 SPHINXOPTS    =
 SPHINXBUILD   = sphinx-build
 SPHINXABUILD  = sphinx-autobuild
 PAPER         =
 BUILDDIR      = _build
 # User-friendly check for sphinx-build
 ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
 $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
 endif
 # Internal variables.
 PAPEROPT_a4     = -D latex_paper_size=a4
 PAPEROPT_letter = -D latex_paper_size=letter
 ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 # the i18n builder cannot share the environment and doctrees with the others
 I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest coverage gettext
 help:
 	@echo "Please use \`make <target>' where <target> is one of"
 	@echo "  html       to make standalone HTML files"
 	@echo "  dirhtml    to make HTML files named index.html in directories"
 	@echo "  singlehtml to make a single large HTML file"
 	@echo "  pickle     to make pickle files"
 	@echo "  json       to make JSON files"
 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 	@echo "  qthelp     to make HTML files and a qthelp project"
 	@echo "  applehelp  to make an Apple Help Book"
 	@echo "  devhelp    to make HTML files and a Devhelp project"
 	@echo "  epub       to make an epub"
 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
 	@echo "  text       to make text files"
 	@echo "  man        to make manual pages"
 	@echo "  texinfo    to make Texinfo files"
 	@echo "  info       to make Texinfo files and run them through makeinfo"
 	@echo "  gettext    to make PO message catalogs"
 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 	@echo "  xml        to make Docutils-native XML files"
 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 	@echo "  linkcheck  to check all external links for integrity"
 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 	@echo "  coverage   to run coverage check of the documentation (if enabled)"
 clean:
 	rm -rf $(BUILDDIR)/*
 html:
 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 	@echo
 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 autohtml: html
 	$(SPHINXABUILD) -z ../lib/cretonne/meta --ignore '.*.sw?' -b html -E $(ALLSPHINXOPTS) $(BUILDDIR)/html
 dirhtml:
 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 	@echo
 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 singlehtml:
 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 	@echo
 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 pickle:
 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 	@echo
 	@echo "Build finished; now you can process the pickle files."
 json:
 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 	@echo
 	@echo "Build finished; now you can process the JSON files."
 htmlhelp:
 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 	@echo
 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 qthelp:
 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 	@echo
 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/cretonne.qhcp"
 	@echo "To view the help file:"
 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/cretonne.qhc"
 applehelp:
 	$(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
 	@echo
 	@echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
 	@echo "N.B. You won't be able to view it unless you put it in" \
 	      "~/Library/Documentation/Help or install it in your application" \
 	      "bundle."
 devhelp:
 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
 	@echo
 	@echo "Build finished."
 	@echo "To view the help file:"
 	@echo "# mkdir -p $$HOME/.local/share/devhelp/cretonne"
 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/cretonne"
 	@echo "# devhelp"
 epub:
 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
 	@echo
 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
 latex:
 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
 	@echo
 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
 	      "(use \`make latexpdf' here to do that automatically)."
 latexpdf:
 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
 	@echo "Running LaTeX files through pdflatex..."
 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
 latexpdfja:
 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
 	@echo "Running LaTeX files through platex and dvipdfmx..."
 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
 text:
 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
 	@echo
 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
 man:
 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
 	@echo
 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
 texinfo:
 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
 	@echo
 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
 	@echo "Run \`make' in that directory to run these through makeinfo" \
 	      "(use \`make info' here to do that automatically)."
 info:
 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
 	@echo "Running Texinfo files through makeinfo..."
 	make -C $(BUILDDIR)/texinfo info
 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
 gettext:
 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
 	@echo
 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
 changes:
 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
 	@echo
 	@echo "The overview file is in $(BUILDDIR)/changes."
 linkcheck:
 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
 	@echo
 	@echo "Link check complete; look for any errors in the above output " \
 	      "or in $(BUILDDIR)/linkcheck/output.txt."
 doctest:
 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
 	@echo "Testing of doctests in the sources finished, look at the " \
 	      "results in $(BUILDDIR)/doctest/output.txt."
 coverage:
 	$(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
 	@echo "Testing of coverage in the sources finished, look at the " \
 	      "results in $(BUILDDIR)/coverage/python.txt."
 xml:
 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
 	@echo
 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
 pseudoxml:
 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
 	@echo
 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
--- a/docs/compare-llvm.rst
+++ b/docs/compare-llvm.rst
@@ -0,0 +1,199 @@
 *************************
 Cretonne compared to LLVM
 *************************
 `LLVM <http://llvm.org>`_ is a collection of compiler components implemented as
 a set of C++ libraries. It can be used to build both JIT compilers and static
 compilers like `Clang <http://clang.llvm.org>`_, and it is deservedly very
 popular. `Chris Lattner's chapter about LLVM
 <http://www.aosabook.org/en/llvm.html>`_ in the `Architecture of Open Source
 Applications <http://aosabook.org/en/index.html>`_ book gives an excellent
 overview of the architecture and design of LLVM.
 Cretonne and LLVM are superficially similar projects, so it is worth
 highlighting some of the differences and similarities. Both projects:
 - Use an ISA-agnostic input language in order to mostly abstract away the
  differences between target instruction set architectures.
 - Depend extensively on SSA form.
 - Have both textual and in-memory forms of their primary intermediate language.
  (LLVM also has a binary bitcode format; Cretonne doesn't.)
 - Can target multiple ISAs.
 - Can cross-compile by default without rebuilding the code generator.
 Cretonne's scope is much smaller than that of LLVM. The classical three main
 parts of a compiler are:
 1. The language-dependent front end parses and type-checks the input program.
 2. Common optimizations that are independent of both the input language and the
   target ISA.
 3. The code generator which depends strongly on the target ISA.
 LLVM provides both common optimizations *and* a code generator. Cretonne only
 provides the last part, the code generator. LLVM additionally provides
 infrastructure for building assemblers and disassemblers. Cretonne does not
 handle assembly at all---it only generates binary machine code.
 Intermediate representations
 ============================
 LLVM uses multiple intermediate representations as it translates a program to
 binary machine code:
 `LLVM IR <http://llvm.org/docs/LangRef.html>`_
    This is the primary intermediate language which has textual, binary, and
    in-memory representations. It serves two main purposes:
    - An ISA-agnostic, stable(ish) input language that front ends can generate
      easily.
    - Intermediate representation for common mid-level optimizations. A large
      library of code analysis and transformation passes operate on LLVM IR.
 `SelectionDAG <http://llvm.org/docs/CodeGenerator.html#instruction-selection-section>`_
    A graph-based representation of the code in a single basic block is used by
    the instruction selector. It has both ISA-agnostic and ISA-specific
    opcodes. These main passes are run on the SelectionDAG representation:
    - Type legalization eliminates all value types that don't have a
      representation in the target ISA registers.
    - Operation legalization eliminates all opcodes that can't be mapped to
      target ISA instructions.
    - DAG-combine cleans up redundant code after the legalization passes.
    - Instruction selection translates ISA-agnostic expressions to ISA-specific
      instructions.
    The SelectionDAG representation automatically eliminates common
    subexpressions and dead code.
 `MachineInstr <http://llvm.org/docs/CodeGenerator.html#machine-code-representation>`_
    A linear representation of ISA-specific instructions that initially is in
    SSA form, but it can also represent non-SSA form during and after register
    allocation. Many low-level optimizations run on MI code. The most important
    passes are:
    - Scheduling.
    - Register allocation.
 `MC <http://llvm.org/docs/CodeGenerator.html#the-mc-layer>`_
    MC serves as the output abstraction layer and is the basis for LLVM's
    integrated assembler. It is used for:
    - Branch relaxation.
    - Emitting assembly or binary object code.
    - Assemblers.
    - Disassemblers.
 There is an ongoing "global instruction selection" project to replace the
 SelectionDAG representation with ISA-agnostic opcodes on the MachineInstr
 representation. Some target ISAs have a fast instruction selector that can
 translate simple code directly to MachineInstrs, bypassing SelectionDAG when
 possible.
 :doc:`Cretonne <langref>` uses a single intermediate language to cover these
 levels of abstraction. This is possible in part because of Cretonne's smaller
 scope.
 - Cretonne does not provide assemblers and disassemblers, so it is not
  necessary to be able to represent every weird instruction in an ISA. Only
  those instructions that the code generator emits have a representation.
 - Cretonne's opcodes are ISA-agnostic, but after legalization / instruction
  selection, each instruction is annotated with an ISA-specific encoding which
  represents a native instruction.
 - SSA form is preserved throughout. After register allocation, each SSA value
  is annotated with an assigned ISA register or stack slot.
 The Cretonne intermediate language is similar to LLVM IR, but at a slightly
 lower level of abstraction.
 Program structure
 -----------------
 In LLVM IR, the largest representable unit is the *module* which corresponds
 more or less to a C translation unit. It is a collection of functions and
 global variables that may contain references to external symbols too.
 In Cretonne IL, the largest representable unit is the *function*. This is so
 that functions can easily be compiled in parallel without worrying about
 references to shared data structures. Cretonne does not have any
 inter-procedural optimizations like inlining.
 An LLVM IR function is a graph of *basic blocks*. A Cretonne IL function is a
 graph of *extended basic blocks* that may contain internal branch instructions.
 The main difference is that an LLVM conditional branch instruction has two
 target basic blocks---a true and a false edge. A Cretonne branch instruction
 only has a single target and falls through to the next instruction when its
 condition is false. The Cretonne representation is closer to how machine code
 works; LLVM's representation is more abstract.
 LLVM uses `phi instructions
 <http://llvm.org/docs/LangRef.html#phi-instruction>`_ in its SSA
 representation. Cretonne passes arguments to EBBs instead. The two
 representations are equivalent, but the EBB arguments are better suited to
 handle EBBs that may contain multiple branches to the same destination block
 with different arguments. Passing arguments to an EBB looks a lot like passing
 arguments to a function call, and the register allocator treats them very
 similarly. Arguments are assigned to registers or stack locations.
 Value types
 -----------
 :ref:`Cretonne's type system <value-types>` is mostly a subset of LLVM's type
 system. It is less abstract and closer to the types that common ISA registers
 can hold.
 - Integer types are limited to powers of two from :cton:type:`i8` to
  :cton:type:`i64`. LLVM can represent integer types of arbitrary bit width.
 - Floating point types are limited to :cton:type:`f32` and :cton:type:`f64`
  which is what WebAssembly provides. It is possible that 16-bit and 128-bit
  types will be added in the future.
 - Addresses are represented as integers---There are no Cretonne pointer types.
  LLVM currently has rich pointer types that include the pointee type. It may
  move to a simpler 'address' type in the future. Cretonne may add a single
  address type too.
 - SIMD vector types are limited to a power-of-two number of vector lanes up to
  256. LLVM allows an arbitrary number of SIMD lanes.
 - Cretonne has no aggregate types. LLVM has named and anonymous struct types as
  well as array types.
 Cretonne has multiple boolean types, whereas LLVM simply uses `i1`. The sized
 Cretonne boolean types are used to represent SIMD vector masks like ``b32x4``
 where each lane is either all 0 or all 1 bits.
 Cretonne instructions and function calls can return multiple result values. LLVM
 instead models this by returning a single value of an aggregate type.
 Instruction set
 ---------------
 LLVM has a small well-defined basic instruction set and a large number of
 intrinsics, some of which are ISA-specific. Cretonne has a larger instruction
 set and no intrinsics. Some Cretonne instructions are ISA-specific.
 Since Cretonne instructions are used all the way until the binary machine code
 is emitted, there are opcodes for every native instruction that can be
 generated. There is a lot of overlap between different ISAs, so for example the
 :cton:inst:`iadd_imm` instruction is used by every ISA that can add an
 immediate integer to a register. A simple RISC ISA like RISC-V can be defined
 with only shared instructions, while an Intel ISA needs a number of specific
 instructions to model addressing modes.
 Undefined behavior
 ==================
 Cretonne does not generally exploit undefined behavior in its optimizations.
 LLVM's mid-level optimizations do, but it should be noted that LLVM's low-level code
 generator rarely needs to make use of undefined behavior either.
 LLVM provides ``nsw`` and ``nuw`` flags for its arithmetic that invoke
 undefined behavior on overflow. Cretonne does not provide this functionality.
 Its arithmetic instructions either produce a value or a trap.
 LLVM has an ``unreachable`` instruction which is used to indicate impossible
 code paths. Cretonne only has an explicit :cton:inst:`trap` instruction.
 Cretonne does make assumptions about aliasing. For example, it assumes that it
 has full control of the stack objects in a function, and that they can only be
 modified by function calls if their address have escaped. It is quite likely
 that Cretonne will admit more detailed aliasing annotations on load/store
 instructions in the future. When these annotations are incorrect, undefined
 behavior ensues.
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -0,0 +1,137 @@
 # -*- coding: utf-8 -*-
 #
 # cretonne documentation build configuration file, created by
 # sphinx-quickstart on Fri Jan  8 10:11:19 2016.
 #
 # This file is execfile()d with the current directory set to its
 # containing dir.
 #
 # Note that not all possible configuration values are present in this
 # autogenerated file.
 #
 # All configuration values have a default; values that are commented out
 # serve to show the default.
 from __future__ import absolute_import
 import sys
 import os
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 sys.path.insert(0, os.path.abspath('.'))
 # Also add the meta directory to sys.path so autodoc can find the Cretonne meta
 # language definitions.
 sys.path.insert(0, os.path.abspath('../lib/cretonne/meta'))
 # -- General configuration ------------------------------------------------
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
 extensions = [
    'sphinx.ext.autodoc',
    'sphinx.ext.todo',
    'sphinx.ext.mathjax',
    'sphinx.ext.ifconfig',
    'sphinx.ext.graphviz',
    'sphinx.ext.inheritance_diagram',
    'cton_domain',
    'cton_lexer',
 ]
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']
 # The suffix(es) of source filenames.
 # You can specify multiple suffix as a list of string:
 # source_suffix = ['.rst', '.md']
 source_suffix = '.rst'
 # The master toctree document.
 master_doc = 'index'
 # General information about the project.
 project = u'cretonne'
 copyright = u'2016, Cretonne Developers'
 author = u'Cretonne Developers'
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
 # built documents.
 #
 # The short X.Y version.
 version = u'0.0'
 # The full version, including alpha/beta/rc tags.
 release = u'0.0'
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
 #
 # This is also used if you do content translation via gettext catalogs.
 # Usually you set "language" from the command line for these cases.
 language = None
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 exclude_patterns = ['_build']
 # The name of the Pygments (syntax highlighting) style to use.
 pygments_style = 'sphinx'
 # If true, `todo` and `todoList` produce output, else they produce nothing.
 todo_include_todos = True
 # -- Options for HTML output ----------------------------------------------
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
 html_theme = 'sphinx_rtd_theme'
 # Output file base name for HTML help builder.
 htmlhelp_basename = 'cretonnedoc'
 # -- Options for LaTeX output ---------------------------------------------
 latex_elements = {
 }
 # Grouping the document tree into LaTeX files. List of tuples
 # (source start file, target name, title,
 #  author, documentclass [howto, manual, or own class]).
 latex_documents = [
  (master_doc, 'cretonne.tex', u'cretonne Documentation',
   author, 'manual'),
 ]
 # -- Options for manual page output ---------------------------------------
 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
 man_pages = [
    (master_doc, 'cretonne', u'cretonne Documentation',
     [author], 1)
 ]
 # -- Options for Texinfo output -------------------------------------------
 # Grouping the document tree into Texinfo files. List of tuples
 # (source start file, target name, title, author,
 #  dir menu entry, description, category)
 texinfo_documents = [
  (master_doc, 'cretonne', u'cretonne Documentation',
   author, 'cretonne', 'One line description of project.',
   'Miscellaneous'),
 ]
 # -- Options for Graphviz -------------------------------------------------
 graphviz_output_format = 'svg'
 inheritance_graph_attrs = dict(rankdir='TD')
--- a/docs/cton_domain.py
+++ b/docs/cton_domain.py
@@ -0,0 +1,385 @@
 # -*- coding: utf-8 -*-
 #
 # Sphinx domain for documenting compiler intermediate languages.
 #
 # This defines a 'cton' Sphinx domain with the following directives and roles:
 #
 # .. cton::type:: type
 #     Document an IR type.
 # .. cton:inst:: v1, v2 = inst op1, op2
 #     Document an IR instruction.
 #
 from __future__ import absolute_import
 import re
 from docutils import nodes
 from docutils.parsers.rst import directives
 from sphinx import addnodes
 from sphinx.directives import ObjectDescription
 from sphinx.domains import Domain, ObjType
 from sphinx.locale import l_
 from sphinx.roles import XRefRole
 from sphinx.util.docfields import Field, GroupedField, TypedField
 from sphinx.util.nodes import make_refnode
 import sphinx.ext.autodoc
 class CtonObject(ObjectDescription):
    """
    Any kind of Cretonne IL object.
    This is a shared base class for the different kinds of indexable objects
    in the Cretonne IL reference.
    """
    option_spec = {
        'noindex': directives.flag,
        'module': directives.unchanged,
        'annotation': directives.unchanged,
    }
    def add_target_and_index(self, name, sig, signode):
        """
        Add ``name`` the the index.
        :param name: The object name returned by :func:`handle_signature`.
        :param sig: The signature text.
        :param signode: The output node.
        """
        targetname = self.objtype + '-' + name
        if targetname not in self.state.document.ids:
            signode['names'].append(targetname)
            signode['ids'].append(targetname)
            signode['first'] = (not self.names)
            self.state.document.note_explicit_target(signode)
            inv = self.env.domaindata['cton']['objects']
            if name in inv:
                self.state_machine.reporter.warning(
                    'duplicate Cretonne object description of %s, ' % name +
                    'other instance in ' + self.env.doc2path(inv[name][0]),
                    line=self.lineno)
            inv[name] = (self.env.docname, self.objtype)
        indextext = self.get_index_text(name)
        if indextext:
            self.indexnode['entries'].append(('single', indextext,
                                              targetname, '', None))
 # Type variables are indicated as %T.
 typevar = re.compile('(\%[A-Z])')
 def parse_type(name, signode):
    """
    Parse a type with embedded type vars and append to signode.
    Return a a string that can be compiled into a regular expression matching
    the type.
    """
    re_str = ''
    for part in typevar.split(name):
        if part == '':
            continue
        if len(part) == 2 and part[0] == '%':
            # This is a type parameter. Don't display the %, use emphasis
            # instead.
            part = part[1]
            signode += nodes.emphasis(part, part)
            re_str += r'\w+'
        else:
            signode += addnodes.desc_name(part, part)
            re_str += re.escape(part)
    return re_str
 class CtonType(CtonObject):
    """A Cretonne IL type description."""
    def handle_signature(self, sig, signode):
        """
        Parse type signature in ``sig`` and append description to signode.
        Return a global object name for ``add_target_and_index``.
        """
        name = sig.strip()
        parse_type(name, signode)
        return name
    def get_index_text(self, name):
        return name + ' (IL type)'
 sep_equal = re.compile('\s*=\s*')
 sep_comma = re.compile('\s*,\s*')
 def parse_params(s, signode):
    for i, p in enumerate(sep_comma.split(s)):
        if i != 0:
            signode += nodes.Text(', ')
        signode += nodes.emphasis(p, p)
 class CtonInst(CtonObject):
    """A Cretonne IL instruction."""
    doc_field_types = [
        TypedField('argument', label=l_('Arguments'),
                   names=('in', 'arg'),
                   typerolename='type', typenames=('type',)),
        TypedField('result', label=l_('Results'),
                   names=('out', 'result'),
                   typerolename='type', typenames=('type',)),
        GroupedField(
            'typevar', names=('typevar',), label=l_('Type Variables')),
        GroupedField('flag', names=('flag',), label=l_('Flags')),
        Field('resulttype', label=l_('Result type'), has_arg=False,
              names=('rtype',)),
    ]
    def handle_signature(self, sig, signode):
        # Look for signatures like
        #
        #   v1, v2 = foo op1, op2
        #   v1 = foo
        #   foo op1
        parts = re.split(sep_equal, sig, 1)
        if len(parts) == 2:
            # Outgoing parameters.
            parse_params(parts[0], signode)
            signode += nodes.Text(' = ')
            name = parts[1]
        else:
            name = parts[0]
        # Parse 'name arg, arg'
        parts = name.split(None, 1)
        name = parts[0]
        signode += addnodes.desc_name(name, name)
        if len(parts) == 2:
            # Incoming parameters.
            signode += nodes.Text(' ')
            parse_params(parts[1], signode)
        return name
    def get_index_text(self, name):
        return name
 class CtonInstGroup(CtonObject):
    """A Cretonne IL instruction group."""
 class CretonneDomain(Domain):
    """Cretonne domain for intermediate language objects."""
    name = 'cton'
    label = 'Cretonne'
    object_types = {
        'type': ObjType(l_('type'), 'type'),
        'inst': ObjType(l_('instruction'), 'inst')
    }
    directives = {
        'type': CtonType,
        'inst': CtonInst,
        'instgroup': CtonInstGroup,
    }
    roles = {
        'type': XRefRole(),
        'inst': XRefRole(),
        'instgroup': XRefRole(),
    }
    initial_data = {
        'objects': {},  # fullname -> docname, objtype
    }
    def clear_doc(self, docname):
        for fullname, (fn, _l) in list(self.data['objects'].items()):
            if fn == docname:
                del self.data['objects'][fullname]
    def merge_domaindata(self, docnames, otherdata):
        for fullname, (fn, objtype) in otherdata['objects'].items():
            if fn in docnames:
                self.data['objects'][fullname] = (fn, objtype)
    def resolve_xref(self, env, fromdocname, builder, typ, target, node,
                     contnode):
        objects = self.data['objects']
        if target not in objects:
            return None
        obj = objects[target]
        return make_refnode(builder, fromdocname, obj[0],
                            obj[1] + '-' + target, contnode, target)
    def resolve_any_xref(self, env, fromdocname, builder, target,
                         node, contnode):
        objects = self.data['objects']
        if target not in objects:
            return []
        obj = objects[target]
        return [('cton:' + self.role_for_objtype(obj[1]),
                 make_refnode(builder, fromdocname, obj[0],
                              obj[1] + '-' + target, contnode, target))]
 class TypeDocumenter(sphinx.ext.autodoc.Documenter):
    # Invoke with .. autoctontype::
    objtype = 'ctontype'
    # Convert into cton:type directives
    domain = 'cton'
    directivetype = 'type'
    @classmethod
    def can_document_member(cls, member, membername, isattr, parent):
        return False
    def resolve_name(self, modname, parents, path, base):
        return 'base.types', [base]
    def add_content(self, more_content, no_docstring=False):
        super(TypeDocumenter, self).add_content(more_content, no_docstring)
        sourcename = self.get_sourcename()
        membytes = self.object.membytes
        if membytes:
            self.add_line(u':bytes: {}'.format(membytes), sourcename)
        else:
            self.add_line(u':bytes: Can\'t be stored in memory', sourcename)
 class InstDocumenter(sphinx.ext.autodoc.Documenter):
    # Invoke with .. autoinst::
    objtype = 'inst'
    # Convert into cton:inst directives
    domain = 'cton'
    directivetype = 'inst'
    @classmethod
    def can_document_member(cls, member, membername, isattr, parent):
        return False
    def resolve_name(self, modname, parents, path, base):
        if path:
            return path.rstrip('.'), [base]
        else:
            return 'base.instructions', [base]
    def format_signature(self):
        inst = self.object
        sig = inst.name
        if len(inst.outs) > 0:
            sig = ', '.join([op.name for op in inst.outs]) + ' = ' + sig
        if len(inst.ins) > 0:
            op = inst.ins[0]
            sig += ' ' + op.name
            # If the first input is variable-args, this is 'return'. No parens.
            if op.kind.name == 'variable_args':
                sig += '...'.format(op.name)
            for op in inst.ins[1:]:
                # This is a call or branch with args in (...).
                if op.kind.name == 'variable_args':
                    sig += '({}...)'.format(op.name)
                else:
                    sig += ', ' + op.name
        return sig
    def add_directive_header(self, sig):
        """Add the directive header and options to the generated content."""
        domain = getattr(self, 'domain', 'cton')
        directive = getattr(self, 'directivetype', self.objtype)
        sourcename = self.get_sourcename()
        self.add_line(u'.. %s:%s:: %s' % (domain, directive, sig), sourcename)
        if self.options.noindex:
            self.add_line(u'   :noindex:', sourcename)
    def add_content(self, more_content, no_docstring=False):
        super(InstDocumenter, self).add_content(more_content, no_docstring)
        sourcename = self.get_sourcename()
        inst = self.object
        # Add inputs and outputs.
        for op in inst.ins:
            if op.is_value():
                typ = op.typevar
            else:
                typ = op.kind
            self.add_line(u':in {} {}: {}'.format(
                typ, op.name, op.get_doc()), sourcename)
        for op in inst.outs:
            if op.is_value():
                typ = op.typevar
            else:
                typ = op.kind
            self.add_line(u':out {} {}: {}'.format(
                typ, op.name, op.get_doc()), sourcename)
        # Document type inference for polymorphic instructions.
        if inst.is_polymorphic:
            if inst.ctrl_typevar is not None:
                if inst.use_typevar_operand:
                    tvopnum = inst.value_opnums[inst.format.typevar_operand]
                    self.add_line(
                            u':typevar {}: inferred from {}'
                            .format(
                                inst.ctrl_typevar.name,
                                inst.ins[tvopnum]),
                            sourcename)
                else:
                    self.add_line(
                            u':typevar {}: explicitly provided'
                            .format(inst.ctrl_typevar.name),
                            sourcename)
            for tv in inst.other_typevars:
                self.add_line(
                        u':typevar {}: from input operand'.format(tv.name),
                        sourcename)
 class InstGroupDocumenter(sphinx.ext.autodoc.ModuleLevelDocumenter):
    # Invoke with .. autoinstgroup::
    objtype = 'instgroup'
    # Convert into cton:instgroup directives
    domain = 'cton'
    directivetype = 'instgroup'
    @classmethod
    def can_document_member(cls, member, membername, isattr, parent):
        return False
    def format_name(self):
        return "{}.{}".format(self.modname, ".".join(self.objpath))
    def add_content(self, more_content, no_docstring=False):
        super(InstGroupDocumenter, self).add_content(
                more_content, no_docstring)
        sourcename = self.get_sourcename()
        indexed = self.env.domaindata['cton']['objects']
        names = [inst.name for inst in self.object.instructions]
        names.sort()
        for name in names:
            if name in indexed:
                self.add_line(u':cton:inst:`{}`'.format(name), sourcename)
            else:
                self.add_line(u'``{}``'.format(name), sourcename)
 def setup(app):
    app.add_domain(CretonneDomain)
    app.add_autodocumenter(TypeDocumenter)
    app.add_autodocumenter(InstDocumenter)
    app.add_autodocumenter(InstGroupDocumenter)
    return {'version': '0.1'}
--- a/docs/cton_lexer.py
+++ b/docs/cton_lexer.py
@@ -0,0 +1,72 @@
 # -*- coding: utf-8 -*-
 #
 # Pygments lexer for Cretonne.
 from __future__ import absolute_import
 from pygments.lexer import RegexLexer, bygroups, words
 from pygments.token import Comment, String, Keyword, Whitespace, Number, Name
 from pygments.token import Operator, Punctuation, Text
 def keywords(*args):
    return words(args, prefix=r'\b', suffix=r'\b')
 class CretonneLexer(RegexLexer):
    name = 'Cretonne'
    aliases = ['cton']
    filenames = ['*.cton']
    tokens = {
        'root': [
            # Test header lines.
            (r'^(test|isa|set)(?:( +)([-\w]+)' +
             r'(?:(=)(?:(\d+)|(yes|no|true|false|on|off)|(\w+)))?)*' +
             r'( *)$',
                bygroups(Keyword.Namespace, Whitespace, Name.Attribute,
                         Operator, Number.Integer, Keyword.Constant,
                         Name.Constant, Whitespace)),
            # Comments with filecheck or other test directive.
            (r'(; *)([a-z]+:)(.*?)$',
                bygroups(Comment.Single, Comment.Special, Comment.Single)),
            # Plain comments.
            (r';.*?$', Comment.Single),
            # Strings are in double quotes, support \xx escapes only.
            (r'"([^"\\]+|\\[0-9a-fA-F]{2})*"', String),
            # A naked function name following 'function' is also a string.
            (r'\b(function)([ \t]+)(\w+)\b',
                bygroups(Keyword, Whitespace, String.Symbol)),
            # Numbers.
            (r'[-+]?0[xX][0-9a-fA-F]+', Number.Hex),
            (r'[-+]?0[xX][0-9a-fA-F]*\.[0-9a-fA-F]*([pP]\d+)?', Number.Hex),
            (r'[-+]?(\d+\.\d+([eE]\d+)?|s?NaN|Inf)', Number.Float),
            (r'[-+]?\d+', Number.Integer),
            # Known attributes.
            (keywords('uext', 'sext'), Name.Attribute),
            # Well known value types.
            (r'\b(b\d+|i\d+|f32|f64)(x\d+)?\b', Keyword.Type),
            # v<nn> = value
            # ss<nn> = stack slot
            # jt<nn> = jump table
            (r'(v|ss|jt)\d+', Name.Variable),
            # ebb<nn> = extended basic block
            (r'(ebb)\d+', Name.Label),
            # Match instruction names in context.
            (r'(=)( *)([a-z]\w*)',
                bygroups(Operator, Whitespace, Name.Function)),
            (r'^( *)([a-z]\w*\b)(?! *[,=])',
                bygroups(Whitespace, Name.Function)),
            # Other names: results and arguments
            (r'[a-z]\w*', Name),
            (r'->|=|:', Operator),
            (r'[{}(),.]', Punctuation),
            (r'[ \t]+', Text),
        ],
    }
 def setup(app):
    """Setup Sphinx extension."""
    app.add_lexer('cton', CretonneLexer())
    return {'version': '0.1'}
--- a/docs/example.c
+++ b/docs/example.c
@@ -0,0 +1,8 @@
 float
 average(const float *array, size_t count)
 {
    double sum = 0;
    for (size_t i = 0; i < count; i++)
        sum += array[i];
    return sum / count;
 }
--- a/docs/example.cton
+++ b/docs/example.cton
@@ -0,0 +1,33 @@
 test verifier
 function %average(i32, i32) -> f32 native {
    ss1 = local 8            ; Stack slot for ``sum``.
 ebb1(v1: i32, v2: i32):
    v3 = f64const 0x0.0
    stack_store v3, ss1
    brz v2, ebb3                  ; Handle count == 0.
    v4 = iconst.i32 0
    jump ebb2(v4)
 ebb2(v5: i32):
    v6 = imul_imm v5, 4
    v7 = iadd v1, v6
    v8 = heap_load.f32 v7         ; array[i]
    v9 = fpromote.f64 v8
    v10 = stack_load.f64 ss1
    v11 = fadd v9, v10
    stack_store v11, ss1
    v12 = iadd_imm v5, 1
    v13 = icmp ult v12, v2
    brnz v13, ebb2(v12)           ; Loop backedge.
    v14 = stack_load.f64 ss1
    v15 = fcvt_from_uint.f64 v2
    v16 = fdiv v14, v15
    v17 = fdemote.f32 v16
    return v17
 ebb3:
    v100 = f32const +NaN
    return v100
 }
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -0,0 +1,25 @@
 Cretonne Code Generator
 =======================
 Contents:
 .. toctree::
   :maxdepth: 1
   langref
   metaref
   testing
   regalloc
   compare-llvm
 Indices and tables
 ==================
 * :ref:`genindex`
 * :ref:`modindex`
 * :ref:`search`
 Todo list
 =========
 .. todolist::
--- a/docs/langref.rst
+++ b/docs/langref.rst
@@ -0,0 +1,924 @@
 ***************************
 Cretonne Language Reference
 ***************************
 .. default-domain:: cton
 .. highlight:: cton
 The Cretonne intermediate language (:term:`IL`) has two equivalent
 representations: an *in-memory data structure* that the code generator library
 is using, and a *text format* which is used for test cases and debug output.
 Files containing Cretonne textual IL have the ``.cton`` filename extension.
 This reference uses the text format to describe IL semantics but glosses over
 the finer details of the lexical and syntactic structure of the format.
 Overall structure
 =================
 Cretonne compiles functions independently. A ``.cton`` IL file may contain
 multiple functions, and the programmatic API can create multiple function
 handles at the same time, but the functions don't share any data or reference
 each other directly.
 This is a simple C function that computes the average of an array of floats:
 .. literalinclude:: example.c
    :language: c
 Here is the same function compiled into Cretonne IL:
 .. literalinclude:: example.cton
    :language: cton
    :lines: 2-
 The first line of a function definition provides the function *name* and
 the :term:`function signature` which declares the argument and return types.
 Then follows the :term:`function preamble` which declares a number of entities
 that can be referenced inside the function. In the example above, the preamble
 declares a single local variable, ``ss1``.
 After the preamble follows the :term:`function body` which consists of
 :term:`extended basic block`\s (EBBs), the first of which is the
 :term:`entry block`. Every EBB ends with a :term:`terminator instruction`, so
 execution can never fall through to the next EBB without an explicit branch.
 A ``.cton`` file consists of a sequence of independent function definitions:
 .. productionlist::
    function_list : { function }
    function      : function_spec "{" preamble function_body "}"
    function_spec : "function" function_name signature
    preamble      : { preamble_decl }
    function_body : { extended_basic_block }
 Static single assignment form
 -----------------------------
 The instructions in the function body use and produce *values* in SSA form. This
 means that every value is defined exactly once, and every use of a value must be
 dominated by the definition.
 Cretonne does not have phi instructions but uses *EBB arguments* instead. An EBB
 can be defined with a list of typed arguments. Whenever control is transferred
 to the EBB, values for the arguments must be provided. When entering a function,
 the incoming function arguments are passed as arguments to the entry EBB.
 Instructions define zero, one, or more result values. All SSA values are either
 EBB arguments or instruction results.
 In the example above, the loop induction variable ``i`` is represented as three
 SSA values: In the entry block, ``v4`` is the initial value. In the loop block
 ``ebb2``, the EBB argument ``v5`` represents the value of the induction
 variable during each iteration. Finally, ``v12`` is computed as the induction
 variable value for the next iteration.
 It can be difficult to generate correct SSA form if the program being converted
 into Cretonne :term:`IL` contains multiple assignments to the same variables.
 Such variables can be presented to Cretonne as :term:`stack slot`\s instead.
 Stack slots are accessed with the :inst:`stack_store` and :inst:`stack_load`
 instructions which behave more like variable accesses in a typical programming
 language. Cretonne can perform the necessary data-flow analysis to convert stack
 slots to SSA form.
 .. _value-types:
 Value types
 ===========
 All SSA values have a type which determines the size and shape (for SIMD
 vectors) of the value. Many instructions are polymorphic -- they can operate on
 different types.
 Boolean types
 -------------
 Boolean values are either true or false. While this only requires a single bit
 to represent, more bits are often used when holding a boolean value in a
 register or in memory. The :type:`b1` type represents an abstract boolean
 value. It can only exist as an SSA value, it can't be stored in memory or
 converted to another type. The larger boolean types can be stored in memory.
 .. todo:: Clarify the representation of larger boolean types.
    The multi-bit boolean types can be interpreted in different ways. We could
    declare that zero means false and non-zero means true. This may require
    unwanted normalization code in some places.
    We could specify a fixed encoding like all ones for true. This would then
    lead to undefined behavior if untrusted code uses the multibit booleans
    incorrectly.
    Something like this:
    - External code is not allowed to load/store multi-bit booleans or
      otherwise expose the representation.
    - Each target specifies the exact representation of a multi-bit boolean.
 .. autoctontype:: b1
 .. autoctontype:: b8
 .. autoctontype:: b16
 .. autoctontype:: b32
 .. autoctontype:: b64
 Integer types
 -------------
 Integer values have a fixed size and can be interpreted as either signed or
 unsigned. Some instructions will interpret an operand as a signed or unsigned
 number, others don't care.
 .. autoctontype:: i8
 .. autoctontype:: i16
 .. autoctontype:: i32
 .. autoctontype:: i64
 Floating point types
 --------------------
 The floating point types have the IEEE semantics that are supported by most
 hardware. There is no support for higher-precision types like quads or
 double-double formats.
 .. autoctontype:: f32
 .. autoctontype:: f64
 SIMD vector types
 -----------------
 A SIMD vector type represents a vector of values from one of the scalar types
 (boolean, integer, and floating point). Each scalar value in a SIMD type is
 called a *lane*. The number of lanes must be a power of two in the range 2-256.
 .. type:: i%Bx%N
    A SIMD vector of integers. The lane type :type:`iB` is one of the integer
    types :type:`i8` ... :type:`i64`.
    Some concrete integer vector types are :type:`i32x4`, :type:`i64x8`, and
    :type:`i16x4`.
    The size of a SIMD integer vector in memory is :math:`N B\over 8` bytes.
 .. type:: f32x%N
    A SIMD vector of single precision floating point numbers.
    Some concrete :type:`f32` vector types are: :type:`f32x2`, :type:`f32x4`,
    and :type:`f32x8`.
    The size of a :type:`f32` vector in memory is :math:`4N` bytes.
 .. type:: f64x%N
    A SIMD vector of double precision floating point numbers.
    Some concrete :type:`f64` vector types are: :type:`f64x2`, :type:`f64x4`,
    and :type:`f64x8`.
    The size of a :type:`f64` vector in memory is :math:`8N` bytes.
 .. type:: b1x%N
    A boolean SIMD vector.
    Boolean vectors are used when comparing SIMD vectors. For example,
    comparing two :type:`i32x4` values would produce a :type:`b1x4` result.
    Like the :type:`b1` type, a boolean vector cannot be stored in memory.
 Pseudo-types and type classes
 -----------------------------
 These are not concrete types, but convenient names uses to refer to real types
 in this reference.
 .. type:: iPtr
    A Pointer-sized integer.
    This is either :type:`i32`, or :type:`i64`, depending on whether the target
    platform has 32-bit or 64-bit pointers.
 .. type:: iB
    Any of the scalar integer types :type:`i8` -- :type:`i64`.
 .. type:: Int
    Any scalar *or vector* integer type: :type:`iB` or :type:`iBxN`.
 .. type:: fB
    Either of the floating point scalar types: :type:`f32` or :type:`f64`.
 .. type:: Float
    Any scalar *or vector* floating point type: :type:`fB` or :type:`fBxN`.
 .. type:: %Tx%N
    Any SIMD vector type.
 .. type:: Mem
    Any type that can be stored in memory: :type:`Int` or :type:`Float`.
 .. type:: Logic
    Either :type:`b1` or :type:`b1xN`.
 .. type:: Testable
    Either :type:`b1` or :type:`iN`.
 Immediate operand types
 -----------------------
 These types are not part of the normal SSA type system. They are used to
 indicate the different kinds of immediate operands on an instruction.
 .. type:: imm64
    A 64-bit immediate integer. The value of this operand is interpreted as a
    signed two's complement integer. Instruction encodings may limit the valid
    range.
    In the textual format, :type:`imm64` immediates appear as decimal or
    hexadecimal literals using the same syntax as C.
 .. type:: offset32
    A signed 32-bit immediate address offset.
    In the textual format, :type:`offset32` immediates always have an explicit
    sign, and a 0 offset may be omitted.
 .. type:: ieee32
    A 32-bit immediate floating point number in the IEEE 754-2008 binary32
    interchange format. All bit patterns are allowed.
 .. type:: ieee64
    A 64-bit immediate floating point number in the IEEE 754-2008 binary64
    interchange format. All bit patterns are allowed.
 .. type:: bool
    A boolean immediate value, either false or true.
    In the textual format, :type:`bool` immediates appear as 'false'
    and 'true'.
 .. type:: intcc
    An integer condition code. See the :inst:`icmp` instruction for details.
 .. type:: floatcc
    A floating point condition code. See the :inst:`fcmp` instruction for details.
 The two IEEE floating point immediate types :type:`ieee32` and :type:`ieee64`
 are displayed as hexadecimal floating point literals in the textual :term:`IL`
 format. Decimal floating point literals are not allowed because some computer
 systems can round differently when converting to binary. The hexadecimal
 floating point format is mostly the same as the one used by C99, but extended
 to represent all NaN bit patterns:
 Normal numbers
    Compatible with C99: ``-0x1.Tpe`` where ``T`` are the trailing
    significand bits encoded as hexadecimal, and ``e`` is the unbiased exponent
    as a decimal number. :type:`ieee32` has 23 trailing significand bits. They
    are padded with an extra LSB to produce 6 hexadecimal digits. This is not
    necessary for :type:`ieee64` which has 52 trailing significand bits
    forming 13 hexadecimal digits with no padding.
 Zeros
    Positive and negative zero are displayed as ``0.0`` and ``-0.0`` respectively.
 Subnormal numbers
    Compatible with C99: ``-0x0.Tpemin`` where ``T`` are the trailing
    significand bits encoded as hexadecimal, and ``emin`` is the minimum exponent
    as a decimal number.
 Infinities
    Either ``-Inf`` or ``Inf``.
 Quiet NaNs
    Quiet NaNs have the MSB of the trailing significand set. If the remaining
    bits of the trailing significand are all zero, the value is displayed as
    ``-NaN`` or ``NaN``. Otherwise, ``-NaN:0xT`` where ``T`` are the trailing
    significand bits encoded as hexadecimal.
 Signaling NaNs
    Displayed as ``-sNaN:0xT``.
 Control flow
 ============
 Branches transfer control to a new EBB and provide values for the target EBB's
 arguments, if it has any. Conditional branches only take the branch if their
 condition is satisfied, otherwise execution continues at the following
 instruction in the EBB.
 .. autoinst:: jump
 .. autoinst:: fallthrough
 .. autoinst:: brz
 .. autoinst:: brnz
 .. autoinst:: br_icmp
 .. autoinst:: br_table
 .. inst:: JT = jump_table EBB0, EBB1, ..., EBBn
    Declare a jump table in the :term:`function preamble`.
    This declares a jump table for use by the :inst:`br_table` indirect branch
    instruction. Entries in the table are either EBB names, or ``0`` which
    indicates an absent entry.
    The EBBs listed must belong to the current function, and they can't have
    any arguments.
    :arg EBB0: Target EBB when ``x = 0``.
    :arg EBB1: Target EBB when ``x = 1``.
    :arg EBBn: Target EBB when ``x = n``.
    :result: A jump table identifier. (Not an SSA value).
 Traps stop the program because something went wrong. The exact behavior depends
 on the target instruction set architecture and operating system. There are
 explicit trap instructions defined below, but some instructions may also cause
 traps for certain input value. For example, :inst:`udiv` traps when the divisor
 is zero.
 .. autoinst:: trap
 .. autoinst:: trapz
 .. autoinst:: trapnz
 Function calls
 ==============
 A function call needs a target function and a :term:`function signature`. The
 target function may be determined dynamically at runtime, but the signature
 must be known when the function call is compiled. The function signature
 describes how to call the function, including arguments, return values, and the
 calling convention:
 .. productionlist::
    signature : "(" [arglist] ")" ["->" retlist] [call_conv]
    arglist   : arg { "," arg }
    retlist   : arglist
    arg       : type [argext] [argspecial]
    argext    : "uext" | "sext"
    argspecial: "sret" | "link" | "fp" | "csr"
    callconv  : `string`
 Arguments and return values have flags whose meaning is mostly target
 dependent. They make it possible to call native functions on the target
 platform. When calling other Cretonne functions, the flags are not necessary.
 Functions that are called directly must be declared in the :term:`function
 preamble`:
 .. inst:: FN = function NAME signature
    Declare a function so it can be called directly.
    :arg NAME: Name of the function, passed to the linker for resolution.
    :arg signature: Function signature. See below.
    :result FN: A function identifier that can be used with :inst:`call`.
 .. autoinst:: call
 .. autoinst:: x_return
 This simple example illustrates direct function calls and signatures::
    function %gcd(i32 uext, i32 uext) -> i32 uext "C" {
        fn1 = function %divmod(i32 uext, i32 uext) -> i32 uext, i32 uext
    ebb1(v1: i32, v2: i32):
        brz v2, ebb2
        v3, v4 = call fn1(v1, v2)
        br ebb1(v2, v4)
    ebb2:
        return v1
    }
 Indirect function calls use a signature declared in the preamble.
 .. autoinst:: call_indirect
 .. todo:: Define safe indirect function calls.
    The :inst:`call_indirect` instruction is dangerous to use in a sandboxed
    environment since it is not easy to verify the callee address.
    We need a table-driven indirect call instruction, similar to
    :inst:`br_table`.
 Memory
 ======
 Cretonne provides fully general :inst:`load` and :inst:`store` instructions for
 accessing memory. However, it can be very complicated to verify the safety of
 general loads and stores when compiling code for a sandboxed environment, so
 Cretonne also provides more restricted memory operations that are always safe.
 .. autoinst:: load
 .. autoinst:: store
 Loads and stores are *misaligned* if the resultant address is not a multiple of
 the expected alignment. Depending on the target architecture, misaligned memory
 accesses may trap, or they may work. Sometimes, operating systems catch
 alignment traps and emulate the misaligned memory access.
 Extending loads and truncating stores
 -------------------------------------
 Most ISAs provide instructions that load an integer value smaller than a register
 and extends it to the width of the register. Similarly, store instructions that
 only write the low bits of an integer register are common.
 Cretonne provides extending loads and truncation stores for 8, 16, and 32-bit
 memory accesses.
 .. autoinst:: uload8
 .. autoinst:: sload8
 .. autoinst:: istore8
 .. autoinst:: uload16
 .. autoinst:: sload16
 .. autoinst:: istore16
 .. autoinst:: uload32
 .. autoinst:: sload32
 .. autoinst:: istore32
 Local variables
 ---------------
 One set of restricted memory operations access the current function's stack
 frame. The stack frame is divided into fixed-size stack slots that are
 allocated in the :term:`function preamble`. Stack slots are not typed, they
 simply represent a contiguous sequence of bytes in the stack frame.
 .. inst:: SS = local Bytes, Flags...
    Allocate a stack slot for a local variable in the preamble.
    If no alignment is specified, Cretonne will pick an appropriate alignment
    for the stack slot based on its size and access patterns.
    :arg Bytes: Stack slot size on bytes.
    :flag align(N): Request at least N bytes alignment.
    :result SS: Stack slot index.
 .. autoinst:: stack_load
 .. autoinst:: stack_store
 The dedicated stack access instructions are easy for the compiler to reason
 about because stack slots and offsets are fixed at compile time. For example,
 the alignment of these stack memory accesses can be inferred from the offsets
 and stack slot alignments.
 It can be necessary to escape from the safety of the restricted instructions by
 taking the address of a stack slot.
 .. autoinst:: stack_addr
 The :inst:`stack_addr` instruction can be used to macro-expand the stack access
 instructions before instruction selection::
    v1 = stack_load.f64 ss3, 16
    ; Expands to:
    v9 = stack_addr ss3, 16
    v1 = load.f64 v9
 Heaps
 -----
 Code compiled from WebAssembly or asm.js runs in a sandbox where it can't access
 all process memory. Instead, it is given a small set of memory areas to work
 in, and all accesses are bounds checked. Cretonne models this through the
 concept of *heaps*.
 A heap is declared in the function preamble and can be accessed with restricted
 instructions that trap on out-of-bounds accesses. Heap addresses can be smaller
 than the native pointer size, for example unsigned :type:`i32` offsets on a
 64-bit architecture.
 .. inst:: H = heap Name
    Declare a heap in the function preamble.
    This doesn't allocate memory, it just retrieves a handle to a sandbox from
    the runtime environment.
    :arg Name: String identifying the heap in the runtime environment.
    :result H: Heap identifier.
 .. autoinst:: heap_load
 .. autoinst:: heap_store
 When optimizing heap accesses, Cretonne may separate the heap bounds checking
 and address computations from the memory accesses.
 .. autoinst:: heap_addr
 A small example using heaps::
    function %vdup(i32, i32) {
        h1 = heap "main"
    ebb1(v1: i32, v2: i32):
        v3 = heap_load.i32x4 h1, v1, 0
        v4 = heap_addr h1, v2, 32      ; Shared range check for two stores.
        store v3, v4, 0
        store v3, v4, 16
        return
    }
 The final expansion of the :inst:`heap_addr` range check and address conversion
 depends on the runtime environment.
 Operations
 ==========
 The remaining instruction set is mostly arithmetic.
 A few instructions have variants that take immediate operands (e.g.,
 :inst:`band` / :inst:`band_imm`), but in general an instruction is required to
 load a constant into an SSA value.
 .. autoinst:: select
 Constant materialization
 ------------------------
 .. autoinst:: iconst
 .. autoinst:: f32const
 .. autoinst:: f64const
 .. autoinst:: bconst
 Live range splitting
 --------------------
 Cretonne's register allocator assigns each SSA value to a register or a spill
 slot on the stack for its entire live range. Since the live range of an SSA
 value can be quite large, it is sometimes beneficial to split the live range
 into smaller parts.
 A live range is split by creating new SSA values that are copies or the
 original value or each other. The copies are created by inserting :inst:`copy`,
 :inst:`spill`, or :inst:`fill` instructions, depending on whether the values
 are assigned to registers or stack slots.
 This approach permits SSA form to be preserved throughout the register
 allocation pass and beyond.
 .. autoinst:: copy
 .. autoinst:: spill
 .. autoinst:: fill
 Register values can be temporarily diverted to other registers by the
 :inst:`regmove` instruction.
 .. autoinst:: regmove
 Vector operations
 -----------------
 .. autoinst:: vsplit
 .. autoinst:: vconcat
 .. autoinst:: vselect
 .. autoinst:: splat
 .. autoinst:: insertlane
 .. autoinst:: extractlane
 Integer operations
 ------------------
 .. autoinst:: icmp
 .. autoinst:: icmp_imm
 .. autoinst:: iadd
 .. autoinst:: iadd_imm
 .. autoinst:: iadd_cin
 .. autoinst:: iadd_cout
 .. autoinst:: iadd_carry
 .. autoinst:: isub
 .. autoinst:: irsub_imm
 .. autoinst:: isub_bin
 .. autoinst:: isub_bout
 .. autoinst:: isub_borrow
 .. autoinst:: imul
 .. autoinst:: imul_imm
 .. todo:: Larger multiplication results.
    For example, ``smulx`` which multiplies :type:`i32` operands to produce a
    :type:`i64` result. Alternatively, ``smulhi`` and ``smullo`` pairs.
 .. autoinst:: udiv
 .. autoinst:: udiv_imm
 .. autoinst:: sdiv
 .. autoinst:: sdiv_imm
 .. autoinst:: urem
 .. autoinst:: urem_imm
 .. autoinst:: srem
 .. autoinst:: srem_imm
 .. todo:: Minimum / maximum.
    NEON has ``smin``, ``smax``, ``umin``, and ``umax`` instructions. We should
    replicate those for both scalar and vector integer types. Even if the
    target ISA doesn't have scalar operations, these are good pattern matching
    targets.
 .. todo:: Saturating arithmetic.
    Mostly for SIMD use, but again these are good patterns for contraction.
    Something like ``usatadd``, ``usatsub``, ``ssatadd``, and ``ssatsub`` is a
    good start.
 Bitwise operations
 ------------------
 The bitwise operations and operate on any value type: Integers, floating point
 numbers, and booleans. When operating on integer or floating point types, the
 bitwise operations are working on the binary representation of the values. When
 operating on boolean values, the bitwise operations work as logical operators.
 .. autoinst:: band
 .. autoinst:: band_imm
 .. autoinst:: bor
 .. autoinst:: bor_imm
 .. autoinst:: bxor
 .. autoinst:: bxor_imm
 .. autoinst:: bnot
 .. autoinst:: band_not
 .. autoinst:: bor_not
 .. autoinst:: bxor_not
 The shift and rotate operations only work on integer types (scalar and vector).
 The shift amount does not have to be the same type as the value being shifted.
 Only the low `B` bits of the shift amount is significant.
 When operating on an integer vector type, the shift amount is still a scalar
 type, and all the lanes are shifted the same amount. The shift amount is masked
 to the number of bits in a *lane*, not the full size of the vector type.
 .. autoinst:: rotl
 .. autoinst:: rotl_imm
 .. autoinst:: rotr
 .. autoinst:: rotr_imm
 .. autoinst:: ishl
 .. autoinst:: ishl_imm
 .. autoinst:: ushr
 .. autoinst:: ushr_imm
 .. autoinst:: sshr
 .. autoinst:: sshr_imm
 The bit-counting instructions below are scalar only.
 .. autoinst:: clz
 .. autoinst:: cls
 .. autoinst:: ctz
 .. autoinst:: popcnt
 Floating point operations
 -------------------------
 These operations generally follow IEEE 754-2008 semantics.
 .. autoinst:: fcmp
 .. autoinst:: fadd
 .. autoinst:: fsub
 .. autoinst:: fmul
 .. autoinst:: fdiv
 .. autoinst:: sqrt
 .. autoinst:: fma
 Sign bit manipulations
 ~~~~~~~~~~~~~~~~~~~~~~
 The sign manipulating instructions work as bitwise operations, so they don't
 have special behavior for signaling NaN operands. The exponent and trailing
 significand bits are always preserved.
 .. autoinst:: fneg
 .. autoinst:: fabs
 .. autoinst:: fcopysign
 Minimum and maximum
 ~~~~~~~~~~~~~~~~~~~
 These instructions return the larger or smaller of their operands. They differ
 in their handling of quiet NaN inputs. Note that signaling NaN operands always
 cause a NaN result.
 When comparing zeroes, these instructions behave as if :math:`-0.0 < 0.0`.
 .. autoinst:: fmin
 .. autoinst:: fminnum
 .. autoinst:: fmax
 .. autoinst:: fmaxnum
 Rounding
 ~~~~~~~~
 These instructions round their argument to a nearby integral value, still
 represented as a floating point number.
 .. autoinst:: ceil
 .. autoinst:: floor
 .. autoinst:: trunc
 .. autoinst:: nearest
 Conversion operations
 ---------------------
 .. autoinst:: bitcast
 .. autoinst:: breduce
 .. autoinst:: bextend
 .. autoinst:: bint
 .. autoinst:: bmask
 .. autoinst:: ireduce
 .. autoinst:: uextend
 .. autoinst:: sextend
 .. autoinst:: fpromote
 .. autoinst:: fdemote
 .. autoinst:: fcvt_to_uint
 .. autoinst:: fcvt_to_sint
 .. autoinst:: fcvt_from_uint
 .. autoinst:: fcvt_from_sint
 Legalization operations
 -----------------------
 These instructions are used as helpers when legalizing types and operations for
 the target ISA.
 .. autoinst:: isplit
 .. autoinst:: iconcat
 ISA-specific instructions
 =========================
 Target ISAs can define supplemental instructions that do not make sense to
 support generally.
 Intel
 -----
 Instructions that can only be used by the Intel target ISA.
 .. autoinst:: isa.intel.instructions.sdivmodx
 .. autoinst:: isa.intel.instructions.udivmodx
 Instruction groups
 ==================
 All of the shared instructions are part of the :instgroup:`base` instruction
 group.
 .. autoinstgroup:: base.instructions.GROUP
 Target ISAs may define further instructions in their own instruction groups:
 .. autoinstgroup:: isa.intel.instructions.GROUP
 Implementation limits
 =====================
 Cretonne's intermediate representation imposes some limits on the size of
 functions and the number of entities allowed. If these limits are exceeded, the
 implementation will panic.
 Number of instructions in a function
    At most :math:`2^{31} - 1`.
 Number of EBBs in a function
    At most :math:`2^{31} - 1`.
    Every EBB needs at least a terminator instruction anyway.
 Number of secondary values in a function
    At most :math:`2^{31} - 1`.
    Secondary values are any SSA values that are not the first result of an
    instruction.
 Other entities declared in the preamble
    At most :math:`2^{32} - 1`.
    This covers things like stack slots, jump tables, external functions, and
    function signatures, etc.
 Number of arguments to an EBB
    At most :math:`2^{16}`.
 Number of arguments to a function
    At most :math:`2^{16}`.
    This follows from the limit on arguments to the entry EBB. Note that
    Cretonne may add a handful of ABI register arguments as function signatures
    are lowered. This is for representing things like the link register, the
    incoming frame pointer, and callee-saved registers that are saved in the
    prologue.
 Size of function call arguments on the stack
    At most :math:`2^{32} - 1` bytes.
    This is probably not possible to achieve given the limit on the number of
    arguments, except by requiring extremely large offsets for stack arguments.
 Glossary
 ========
 .. glossary::
    intermediate language
    IL
        The language used to describe functions to Cretonne. This reference
        describes the syntax and semantics of the Cretonne IL. The IL has two
        forms: Textual and an in-memory intermediate representation
        (:term:`IR`).
    intermediate representation
    IR
        The in-memory representation of :term:`IL`. The data structures
        Cretonne uses to represent a program internally are called the
        intermediate representation. Cretonne's IR can be converted to text
        losslessly.
    function signature
        A function signature describes how to call a function. It consists of:
        - The calling convention.
        - The number of arguments and return values. (Functions can return
          multiple values.)
        - Type and flags of each argument.
        - Type and flags of each return value.
        Not all function attributes are part of the signature. For example, a
        function that never returns could be marked as ``noreturn``, but that
        is not necessary to know when calling it, so it is just an attribute,
        and not part of the signature.
    function preamble
        A list of declarations of entities that are used by the function body.
        Some of the entities that can be declared in the preamble are:
        - Local variables.
        - Functions that are called directly.
        - Function signatures for indirect function calls.
        - Function flags and attributes that are not part of the signature.
    function body
        The extended basic blocks which contain all the executable code in a
        function. The function body follows the function preamble.
    basic block
        A maximal sequence of instructions that can only be entered from the
        top, and that contains no branch or terminator instructions except for
        the last instruction.
    extended basic block
    EBB
        A maximal sequence of instructions that can only be entered from the
        top, and that contains no :term:`terminator instruction`\s except for
        the last one. An EBB can contain conditional branches that can fall
        through to the following instructions in the block, but only the first
        instruction in the EBB can be a branch target.
        The last instruction in an EBB must be a :term:`terminator instruction`,
        so execution cannot flow through to the next EBB in the function. (But
        there may be a branch to the next EBB.)
        Note that some textbooks define an EBB as a maximal *subtree* in the
        control flow graph where only the root can be a join node. This
        definition is not equivalent to Cretonne EBBs.
    terminator instruction
        A control flow instruction that unconditionally directs the flow of
        execution somewhere else. Execution never continues at the instruction
        following a terminator instruction.
        The basic terminator instructions are :inst:`br`, :inst:`return`, and
        :inst:`trap`. Conditional branches and instructions that trap
        conditionally are not terminator instructions.
    entry block
        The :term:`EBB` that is executed first in a function. Currently, a
        Cretonne function must have exactly one entry block which must be the
        first block in the function. The types of the entry block arguments must
        match the types of arguments in the function signature.
    stack slot
        A fixed size memory allocation in the current function's activation
        frame. Also called a local variable.
--- a/docs/make.bat
+++ b/docs/make.bat
@@ -0,0 +1,263 @@
@ECHO OFF
 REM Command file for Sphinx documentation
 if "%SPHINXBUILD%" == "" (
 	set SPHINXBUILD=sphinx-build
 )
 set BUILDDIR=_build
 set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
 set I18NSPHINXOPTS=%SPHINXOPTS% .
 if NOT "%PAPER%" == "" (
 	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
 	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
 )
 if "%1" == "" goto help
 if "%1" == "help" (
 	:help
 	echo.Please use `make ^<target^>` where ^<target^> is one of
 	echo.  html       to make standalone HTML files
 	echo.  dirhtml    to make HTML files named index.html in directories
 	echo.  singlehtml to make a single large HTML file
 	echo.  pickle     to make pickle files
 	echo.  json       to make JSON files
 	echo.  htmlhelp   to make HTML files and a HTML help project
 	echo.  qthelp     to make HTML files and a qthelp project
 	echo.  devhelp    to make HTML files and a Devhelp project
 	echo.  epub       to make an epub
 	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
 	echo.  text       to make text files
 	echo.  man        to make manual pages
 	echo.  texinfo    to make Texinfo files
 	echo.  gettext    to make PO message catalogs
 	echo.  changes    to make an overview over all changed/added/deprecated items
 	echo.  xml        to make Docutils-native XML files
 	echo.  pseudoxml  to make pseudoxml-XML files for display purposes
 	echo.  linkcheck  to check all external links for integrity
 	echo.  doctest    to run all doctests embedded in the documentation if enabled
 	echo.  coverage   to run coverage check of the documentation if enabled
 	goto end
 )
 if "%1" == "clean" (
 	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
 	del /q /s %BUILDDIR%\*
 	goto end
 )
 REM Check if sphinx-build is available and fallback to Python version if any
 %SPHINXBUILD% 1>NUL 2>NUL
 if errorlevel 9009 goto sphinx_python
 goto sphinx_ok
 :sphinx_python
 set SPHINXBUILD=python -m sphinx.__init__
 %SPHINXBUILD% 2> nul
 if errorlevel 9009 (
 	echo.
 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
 	echo.installed, then set the SPHINXBUILD environment variable to point
 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
 	echo.may add the Sphinx directory to PATH.
 	echo.
 	echo.If you don't have Sphinx installed, grab it from
 	echo.http://sphinx-doc.org/
 	exit /b 1
 )
 :sphinx_ok
 if "%1" == "html" (
 	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
 	if errorlevel 1 exit /b 1
 	echo.
 	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
 	goto end
 )
 if "%1" == "dirhtml" (
 	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
 	if errorlevel 1 exit /b 1
 	echo.
 	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
 	goto end
 )
 if "%1" == "singlehtml" (
 	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
 	if errorlevel 1 exit /b 1
 	echo.
 	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
 	goto end
 )
 if "%1" == "pickle" (
 	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
 	if errorlevel 1 exit /b 1
 	echo.
 	echo.Build finished; now you can process the pickle files.
 	goto end
 )
 if "%1" == "json" (
 	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
 	if errorlevel 1 exit /b 1
 	echo.
 	echo.Build finished; now you can process the JSON files.
 	goto end
 )
 if "%1" == "htmlhelp" (
 	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
 	if errorlevel 1 exit /b 1
 	echo.
 	echo.Build finished; now you can run HTML Help Workshop with the ^
 .hhp project file in %BUILDDIR%/htmlhelp.
 	goto end
 )
 if "%1" == "qthelp" (
 	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
 	if errorlevel 1 exit /b 1
 	echo.
 	echo.Build finished; now you can run "qcollectiongenerator" with the ^
 .qhcp project file in %BUILDDIR%/qthelp, like this:
 	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\cretonne.qhcp
 	echo.To view the help file:
 	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\cretonne.ghc
 	goto end
 )
 if "%1" == "devhelp" (
 	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
 	if errorlevel 1 exit /b 1
 	echo.
 	echo.Build finished.
 	goto end
 )
 if "%1" == "epub" (
 	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
 	if errorlevel 1 exit /b 1
 	echo.
 	echo.Build finished. The epub file is in %BUILDDIR%/epub.
 	goto end
 )
 if "%1" == "latex" (
 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
 	if errorlevel 1 exit /b 1
 	echo.
 	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
 	goto end
 )
 if "%1" == "latexpdf" (
 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
 	cd %BUILDDIR%/latex
 	make all-pdf
 	cd %~dp0
 	echo.
 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
 	goto end
 )
 if "%1" == "latexpdfja" (
 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
 	cd %BUILDDIR%/latex
 	make all-pdf-ja
 	cd %~dp0
 	echo.
 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
 	goto end
 )
 if "%1" == "text" (
 	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
 	if errorlevel 1 exit /b 1
 	echo.
 	echo.Build finished. The text files are in %BUILDDIR%/text.
 	goto end
 )
 if "%1" == "man" (
 	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
 	if errorlevel 1 exit /b 1
 	echo.
 	echo.Build finished. The manual pages are in %BUILDDIR%/man.
 	goto end
 )
 if "%1" == "texinfo" (
 	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
 	if errorlevel 1 exit /b 1
 	echo.
 	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
 	goto end
 )
 if "%1" == "gettext" (
 	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
 	if errorlevel 1 exit /b 1
 	echo.
 	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
 	goto end
 )
 if "%1" == "changes" (
 	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
 	if errorlevel 1 exit /b 1
 	echo.
 	echo.The overview file is in %BUILDDIR%/changes.
 	goto end
 )
 if "%1" == "linkcheck" (
 	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
 	if errorlevel 1 exit /b 1
 	echo.
 	echo.Link check complete; look for any errors in the above output ^
 or in %BUILDDIR%/linkcheck/output.txt.
 	goto end
 )
 if "%1" == "doctest" (
 	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
 	if errorlevel 1 exit /b 1
 	echo.
 	echo.Testing of doctests in the sources finished, look at the ^
 results in %BUILDDIR%/doctest/output.txt.
 	goto end
 )
 if "%1" == "coverage" (
 	%SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage
 	if errorlevel 1 exit /b 1
 	echo.
 	echo.Testing of coverage in the sources finished, look at the ^
 results in %BUILDDIR%/coverage/python.txt.
 	goto end
 )
 if "%1" == "xml" (
 	%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
 	if errorlevel 1 exit /b 1
 	echo.
 	echo.Build finished. The XML files are in %BUILDDIR%/xml.
 	goto end
 )
 if "%1" == "pseudoxml" (
 	%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
 	if errorlevel 1 exit /b 1
 	echo.
 	echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
 	goto end
 )
 :end
--- a/docs/metaref.rst
+++ b/docs/metaref.rst
@@ -0,0 +1,483 @@
 ********************************
 Cretonne Meta Language Reference
 ********************************
 .. default-domain:: py
 .. highlight:: python
 .. module:: cdsl
 The Cretonne meta language is used to define instructions for Cretonne. It is a
 domain specific language embedded in Python. This document describes the Python
 modules that form the embedded DSL.
 The meta language descriptions are Python modules under the
 :file:`lib/cretonne/meta` directory. The descriptions are processed in two
 steps:
 1. The Python modules are imported. This has the effect of building static data
   structures in global variables in the modules. These static data structures
   in the :mod:`base` and :mod:`isa` packages use the classes in the
   :mod:`cdsl` package to describe instruction sets and other properties.
 2. The static data structures are processed to produce Rust source code and
   constant tables.
 The main driver for this source code generation process is the
 :file:`lib/cretonne/meta/build.py` script which is invoked as part of the build
 process if anything in the :file:`lib/cretonne/meta` directory has changed
 since the last build.
 .. module:: cdsl.settings
 Settings
 ========
 Settings are used by the environment embedding Cretonne to control the details
 of code generation. Each setting is defined in the meta language so a compact
 and consistent Rust representation can be generated. Shared settings are defined
 in the :mod:`base.settings` module. Some settings are specific to a target ISA,
 and defined in a :file:`settings.py` module under the appropriate
 :file:`lib/cretonne/meta/isa/*` directory.
 Settings can take boolean on/off values, small numbers, or explicitly enumerated
 symbolic values. Each type is represented by a sub-class of :class:`Setting`:
 .. inheritance-diagram:: Setting BoolSetting NumSetting EnumSetting
    :parts: 1
 .. autoclass:: Setting
 .. autoclass:: BoolSetting
 .. autoclass:: NumSetting
 .. autoclass:: EnumSetting
 All settings must belong to a *group*, represented by a :class:`SettingGroup`
 object.
 .. autoclass:: SettingGroup
 Normally, a setting group corresponds to all settings defined in a module. Such
 a module looks like this::
    group = SettingGroup('example')
    foo = BoolSetting('use the foo')
    bar = BoolSetting('enable bars', True)
    opt = EnumSetting('optimization level', 'Debug', 'Release')
    group.close(globals())
 .. module:: cdsl.instructions
 Instruction descriptions
 ========================
 New instructions are defined as instances of the :class:`Instruction`
 class. As instruction instances are created, they are added to the currently
 open :class:`InstructionGroup`.
 .. autoclass:: InstructionGroup
    :members:
 The basic Cretonne instruction set described in :doc:`langref` is defined by the
 Python module :mod:`base.instructions`. This module has a global variable
 :data:`base.instructions.GROUP` which is an :class:`InstructionGroup` instance
 containing all the base instructions.
 .. autoclass:: Instruction
 .. currentmodule:: cdsl.operands
 An instruction is defined with a set of distinct input and output operands which
 must be instances of the :class:`Operand` class.
 .. autoclass:: Operand
 Cretonne uses two separate type systems for operand kinds and SSA values.
 .. module:: cdsl.typevar
 Type variables
 --------------
 Instruction descriptions can be made polymorphic by using
 :class:`cdsl.operands.Operand` instances that refer to a *type variable*
 instead of a concrete value type. Polymorphism only works for SSA value
 operands. Other operands have a fixed operand kind.
 .. autoclass:: TypeVar
    :members:
 If multiple operands refer to the same type variable they will be required to
 have the same concrete type. For example, this defines an integer addition
 instruction::
    Int = TypeVar('Int', 'A scalar or vector integer type', ints=True, simd=True)
    a = Operand('a', Int)
    x = Operand('x', Int)
    y = Operand('y', Int)
    iadd = Instruction('iadd', 'Integer addition', ins=(x, y), outs=a)
 The type variable `Int` is allowed to vary over all scalar and vector integer
 value types, but in a given instance of the `iadd` instruction, the two
 operands must have the same type, and the result will be the same type as the
 inputs.
 There are some practical restrictions on the use of type variables, see
 :ref:`restricted-polymorphism`.
 Immediate operands
 ------------------
 .. currentmodule:: cdsl.operands
 Immediate instruction operands don't correspond to SSA values, but have values
 that are encoded directly in the instruction. Immediate operands don't
 have types from the :class:`cdsl.types.ValueType` type system; they often have
 enumerated values of a specific type. The type of an immediate operand is
 indicated with an instance of :class:`ImmediateKind`.
 .. autoclass:: ImmediateKind
 .. automodule:: base.immediates
    :members:
 Entity references
 -----------------
 .. currentmodule:: cdsl.operands
 Instruction operands can also refer to other entities in the same function. This
 can be extended basic blocks, or entities declared in the function preamble.
 .. autoclass:: EntityRefKind
 .. automodule:: base.entities
    :members:
 Value types
 -----------
 .. currentmodule:: cdsl.types
 Concrete value types are represented as instances of :class:`ValueType`. There
 are subclasses to represent scalar and vector types.
 .. autoclass:: ValueType
 .. inheritance-diagram:: ValueType ScalarType VectorType IntType FloatType BoolType
    :parts: 1
 .. autoclass:: ScalarType
    :members:
 .. autoclass:: VectorType
    :members:
 .. autoclass:: IntType
    :members:
 .. autoclass:: FloatType
    :members:
 .. autoclass:: BoolType
    :members:
 .. automodule:: base.types
    :members:
 There are no predefined vector types, but they can be created as needed with
 the :func:`ScalarType.by` function.
 .. module:: cdsl.operands
 Instruction representation
 ==========================
 The Rust in-memory representation of instructions is derived from the
 instruction descriptions. Part of the representation is generated, and part is
 written as Rust code in the ``cretonne.instructions`` module. The instruction
 representation depends on the input operand kinds and whether the instruction
 can produce multiple results.
 .. autoclass:: OperandKind
 .. inheritance-diagram:: OperandKind ImmediateKind EntityRefKind
 Since all SSA value operands are represented as a `Value` in Rust code, value
 types don't affect the representation. Two special operand kinds are used to
 represent SSA values:
 .. autodata:: VALUE
 .. autodata:: VARIABLE_ARGS
 .. module:: cdsl.formats
 When an instruction description is created, it is automatically assigned a
 predefined instruction format which is an instance of
 :class:`InstructionFormat`:
 .. autoclass:: InstructionFormat
 .. _restricted-polymorphism:
 Restricted polymorphism
 -----------------------
 The instruction format strictly controls the kinds of operands on an
 instruction, but it does not constrain value types at all. A given instruction
 description typically does constrain the allowed value types for its value
 operands. The type variables give a lot of freedom in describing the value type
 constraints, in practice more freedom than what is needed for normal instruction
 set architectures. In order to simplify the Rust representation of value type
 constraints, some restrictions are imposed on the use of type variables.
 A polymorphic instruction has a single *controlling type variable*. For a given
 opcode, this type variable must be the type of the first result or the type of
 the input value operand designated by the `typevar_operand` argument to the
 :py:class:`InstructionFormat` constructor. By default, this is the first value
 operand, which works most of the time.
 The value types of instruction results must be one of the following:
 1. A concrete value type.
 2. The controlling type variable.
 3. A type variable derived from the controlling type variable.
 This means that all result types can be computed from the controlling type
 variable.
 Input values to the instruction are allowed a bit more freedom. Input value
 types must be one of:
 1. A concrete value type.
 2. The controlling type variable.
 3. A type variable derived from the controlling type variable.
 4. A free type variable that is not used by any other operands.
 This means that the type of an input operand can either be computed from the
 controlling type variable, or it can vary independently of the other operands.
 Encodings
 =========
 .. currentmodule:: cdsl.isa
 Encodings describe how Cretonne instructions are mapped to binary machine code
 for the target architecture. After the legalization pass, all remaining
 instructions are expected to map 1-1 to native instruction encodings. Cretonne
 instructions that can't be encoded for the current architecture are called
 :term:`illegal instruction`\s.
 Some instruction set architectures have different :term:`CPU mode`\s with
 incompatible encodings. For example, a modern ARMv8 CPU might support three
 different CPU modes: *A64* where instructions are encoded in 32 bits, *A32*
 where all instructions are 32 bits, and *T32* which has a mix of 16-bit and
 32-bit instruction encodings. These are incompatible encoding spaces, and while
 an :cton:inst:`iadd` instruction can be encoded in 32 bits in each of them, it's
 not the same 32 bits. It's a judgement call if CPU modes should be modelled as
 separate targets, or as sub-modes of the same target. In the ARMv8 case, the
 different register banks means that it makes sense to model A64 as a separate
 target architecture, while A32 and T32 are CPU modes of the 32-bit ARM target.
 In a given CPU mode, there may be multiple valid encodings of the same
 instruction. Both RISC-V and ARMv8's T32 mode have 32-bit encodings of all
 instructions with 16-bit encodings available for some opcodes if certain
 constraints are satisfied.
 .. autoclass:: CPUMode
 Encodings are guarded by :term:`sub-target predicate`\s. For example, the RISC-V
 "C" extension which specifies the compressed encodings may not be supported, and
 a predicate would be used to disable all of the 16-bit encodings in that case.
 This can also affect whether an instruction is legal. For example, x86 has a
 predicate that controls the SSE 4.1 instruction encodings. When that predicate
 is false, the SSE 4.1 instructions are not available.
 Encodings also have a :term:`instruction predicate` which depends on the
 specific values of the instruction's immediate fields. This is used to ensure
 that immediate address offsets are within range, for example. The instructions
 in the base Cretonne instruction set can often represent a wider range of
 immediates than any specific encoding. The fixed-size RISC-style encodings tend
 to have more range limitations than CISC-style variable length encodings like
 x86.
 The diagram below shows the relationship between the classes involved in
 specifying instruction encodings:
 .. digraph:: encoding
    node [shape=record]
    EncRecipe -> SubtargetPred
    EncRecipe -> InstrFormat
    EncRecipe -> InstrPred
    Encoding [label="{Encoding|Opcode+TypeVars}"]
    Encoding -> EncRecipe [label="+EncBits"]
    Encoding -> CPUMode
    Encoding -> SubtargetPred
    Encoding -> InstrPred
    Encoding -> Opcode
    Opcode -> InstrFormat
    CPUMode -> Target
 An :py:class:`Encoding` instance specifies the encoding of a concrete
 instruction. The following properties are used to select instructions to be
 encoded:
 - An opcode, i.e. :cton:inst:`iadd_imm`, that must match the instruction's
  opcode.
 - Values for any type variables if the opcode represents a polymorphic
  instruction.
 - An :term:`instruction predicate` that must be satisfied by the instruction's
  immediate operands.
 - The CPU mode that must be active.
 - A :term:`sub-target predicate` that must be satisfied by the currently active
  sub-target.
 An encoding specifies an *encoding recipe* along with some *encoding bits* that
 the recipe can use for native opcode fields etc. The encoding recipe has
 additional constraints that must be satisfied:
 - An :py:class:`InstructionFormat` that must match the format required by the
  opcodes of any encodings that use this recipe.
 - An additional :term:`instruction predicate`.
 - An additional :term:`sub-target predicate`.
 The additional predicates in the :py:class:`EncRecipe` are merged with the
 per-encoding predicates when generating the encoding matcher code. Often
 encodings only need the recipe predicates.
 .. autoclass:: EncRecipe
 Register constraints
 ====================
 After an encoding recipe has been chosen for an instruction, it is the register
 allocator's job to make sure that the recipe's :term:`Register constraint`\s
 are satisfied. Most ISAs have separate integer and floating point registers,
 and instructions can usually only use registers from one of the banks. Some
 instruction encodings are even more constrained and can only use a subset of
 the registers in a bank. These constraints are expressed in terms of register
 classes.
 Sometimes the result of an instruction is placed in a register that must be the
 same as one of the input registers. Some instructions even use a fixed register
 for inputs or results.
 Each encoding recipe specifies separate constraints for its value operands and
 result. These constraints are separate from the instruction predicate which can
 only evaluate the instruction's immediate operands.
 .. module:: cdsl.registers
 .. autoclass:: RegBank
 Register class constraints
 --------------------------
 The most common type of register constraint is the register class. It specifies
 that an operand or result must be allocated one of the registers from the given
 register class::
    IntRegs = RegBank('IntRegs', ISA, 'General purpose registers', units=16, prefix='r')
    GPR = RegClass(IntRegs)
    R = EncRecipe('R', Binary, ins=(GPR, GPR), outs=GPR)
 This defines an encoding recipe for the ``Binary`` instruction format where
 both input operands must be allocated from the ``GPR`` register class.
 .. autoclass:: RegClass
 Tied register operands
 ----------------------
 In more compact machine code encodings, it is common to require that the result
 register is the same as one of the inputs. This is represented with tied
 operands::
    CR = EncRecipe('CR', Binary, ins=(GPR, GPR), outs=0)
 This indicates that the result value must be allocated to the same register as
 the first input value. Tied operand constraints can only be used for result
 values, so the number always refers to one of the input values.
 Fixed register operands
 -----------------------
 Some instructions use hard-coded input and output registers for some value
 operands. An example is the ``pblendvb`` Intel SSE instruction which takes one
 of its three value operands in the hard-coded ``%xmm0`` register::
    XMM0 = FPR[0]
    SSE66_XMM0 = EncRecipe('SSE66_XMM0', Ternary, ins=(FPR, FPR, XMM0), outs=0)
 The syntax ``FPR[0]`` selects the first register from the ``FPR`` register
 class which consists of all the XMM registers.
 Stack operands
 --------------
 Cretonne's register allocator can assign an SSA value to a stack slot if there
 isn't enough registers. It will insert :cton:inst:`spill` and :cton:inst:`fill`
 instructions as needed to satisfy instruction operand constraints, but it is
 also possible to have instructions that can access stack slots directly::
    CSS = EncRecipe('CSS', Unary, ins=GPR, outs=Stack(GPR))
 An output stack value implies a store to the stack, an input value implies a
 load.
 .. module:: cdsl.isa
 Targets
 =======
 Cretonne can be compiled with support for multiple target instruction set
 architectures. Each ISA is represented by a :py:class:`cdsl.isa.TargetISA` instance.
 .. autoclass:: TargetISA
 The definitions for each supported target live in a package under
 :file:`lib/cretonne/meta/isa`.
 .. automodule:: isa
    :members:
 .. automodule:: isa.riscv
 .. automodule:: isa.intel
 .. automodule:: isa.arm32
 .. automodule:: isa.arm64
 Glossary
 ========
 .. glossary::
    Illegal instruction
        An instruction is considered illegal if there is no encoding available
        for the current CPU mode. The legality of an instruction depends on the
        value of :term:`sub-target predicate`\s, so it can't always be
        determined ahead of time.
    CPU mode
        Every target defines one or more CPU modes that determine how the CPU
        decodes binary instructions. Some CPUs can switch modes dynamically with
        a branch instruction (like ARM/Thumb), while other modes are
        process-wide (like x86 32/64-bit).
    Sub-target predicate
        A predicate that depends on the current sub-target configuration.
        Examples are "Use SSE 4.1 instructions", "Use RISC-V compressed
        encodings". Sub-target predicates can depend on both detected CPU
        features and configuration settings.
    Instruction predicate
        A predicate that depends on the immediate fields of an instruction. An
        example is "the load address offset must be a 10-bit signed integer".
        Instruction predicates do not depend on the registers selected for value
        operands.
    Register constraint
        Value operands and results correspond to machine registers. Encodings may
        constrain operands to either a fixed register or a register class. There
        may also be register constraints between operands, for example some
        encodings require that the result register is one of the input
        registers.
--- a/docs/regalloc.rst
+++ b/docs/regalloc.rst
@@ -0,0 +1,239 @@
 *******************************
 Register Allocation in Cretonne
 *******************************
 .. default-domain:: cton
 .. highlight:: rust
 Cretonne uses a *decoupled, SSA-based* register allocator. Decoupled means that
 register allocation is split into two primary phases: *spilling* and
 *coloring*. SSA-based means that the code stays in SSA form throughout the
 register allocator, and in fact is still in SSA form after register allocation.
 Before the register allocator is run, all instructions in the function must be
 *legalized*, which means that every instruction has an entry in the
 ``encodings`` table. The encoding entries also provide register class
 constraints on the instruction's operands that the register allocator must
 satisfy.
 After the register allocator has run, the ``locations`` table provides a
 register or stack slot location for all SSA values used by the function. The
 register allocator may have inserted :inst:`spill`, :inst:`fill`, and
 :inst:`copy` instructions to make that possible.
 SSA-based register allocation
 =============================
 The phases of the SSA-based register allocator are:
 Liveness analysis
    For each SSA value, determine exactly where it is live.
 Spilling
    The process of deciding which SSA values go in a stack slot and which
    values go in a register. The spilling phase can also split live ranges by
    inserting :inst:`copy` instructions, or transform the code in other ways to
    reduce the number of values kept in registers.
    After spilling, the number of live register values never exceeds the number
    of available registers.
 Coloring
    The process of assigning specific registers to the live values. It's a
    property of SSA form that this can be done in a linear scan of the
    dominator tree without causing any additional spills.
 EBB argument fixup
    The coloring phase does not guarantee that EBB arguments are placed in the
    correct registers and/or stack slots before jumping to the EBB. It will
    try its best, but not making this guarantee is essential to the speed of
    the coloring phase. (EBB arguments correspond to PHI nodes in traditional
    SSA form).
    The argument fixup phase inserts 'shuffle code' before jumps and branches
    to place the argument values in their expected locations.
 The contract between the spilling and coloring phases is that the number of
 values in registers never exceeds the number of available registers. This
 sounds simple enough in theory, but in practice there are some complications.
 Real-world complications to SSA coloring
 ----------------------------------------
 In practice, instruction set architectures don't have "K interchangeable
 registers", and register pressure can't be measured with a single number. There
 are complications:
 Different register banks
    Most ISAs separate integer registers from floating point registers, and
    instructions require their operands to come from a specific bank. This is a
    fairly simple problem to deal with since the register banks are completely
    disjoint. We simply count the number of integer and floating-point values
    that are live independently, and make sure that each number does not exceed
    the size of their respective register banks.
 Instructions with fixed operands
    Some instructions use a fixed register for an operand. This happens on the
    Intel ISAs:
    - Dynamic shift and rotate instructions take the shift amount in CL.
    - Division instructions use RAX and RDX for both input and output operands.
    - Wide multiply instructions use fixed RAX and RDX registers for input and
      output operands.
    - A few SSE variable blend instructions use a hardwired XMM0 input operand.
 Operands constrained to register subclasses
    Some instructions can only use a subset of the registers for some operands.
    For example, the ARM NEON vmla (scalar) instruction requires the scalar
    operand to be located in D0-15 or even D0-7, depending on the data type.
    The other operands can be from the full D0-31 register set.
 ABI boundaries
    Before making a function call, arguments must be placed in specific
    registers and stack locations determined by the ABI, and return values
    appear in fixed registers.
    Some registers can be clobbered by the call and some are saved by the
    callee. In some cases, only the low bits of a register are saved by the
    callee. For example, ARM64 callees save only the low 64 bits of v8-15, and
    Win64 callees only save the low 128 bits of AVX registers.
    ABI boundaries also affect the location of arguments to the entry block and
    return values passed to the :inst:`return` instruction.
 Aliasing registers
    Different registers sometimes share the same bits in the register bank.
    This can make it difficult to measure register pressure. For example, the
    Intel registers RAX, EAX, AX, AL, and AH overlap.
    If only one of the aliasing registers can be used at a time, the aliasing
    doesn't cause problems since the registers can simply be counted as one
    unit.
 Early clobbers
    Sometimes an instruction requires that the register used for an output
    operand does not alias any of the input operands. This happens for inline
    assembly and in some other special cases.
 Liveness Analysis
 =================
 Both spilling and coloring need to know exactly where SSA values are live. The
 liveness analysis computes this information.
 The data structure representing the live range of a value uses the linear
 layout of the function. All instructions and EBB headers are assigned a
 *program position*. A starting point for a live range can be one of the
 following:
 - The instruction where the value is defined.
 - The EBB header where the value is an EBB argument.
 - An EBB header where the value is live-in because it was defined in a
  dominating block.
 The ending point of a live range can be:
 - The last instruction to use the value.
 - A branch or jump to an EBB where the value is live-in.
 When all the EBBs in a function are laid out linearly, the live range of a
 value doesn't have to be a contiguous interval, although it will be in a
 majority of cases. There can be holes in the linear live range.
 The part of a value's live range that falls inside a single EBB will always be
 an interval without any holes. This follows from the dominance requirements of
 SSA. A live range is represented as:
 - The interval inside the EBB where the value is defined.
 - A set of intervals for EBBs where the value is live-in.
 Any value that is only used inside a single EBB will have an empty set of
 live-in intervals. Some values are live across large parts of the function, and
 this can often be represented with coalesced live-in intervals covering many
 EBBs. It is important that the live range data structure doesn't have to grow
 linearly with the number of EBBs covered by a live range.
 This representation is very similar to LLVM's ``LiveInterval`` data structure
 with a few important differences:
 - The Cretonne ``LiveRange`` only covers a single SSA value, while LLVM's
  ``LiveInterval`` represents the union of multiple related SSA values in a
  virtual register. This makes Cretonne's representation smaller because
  individual segments don't have to annotated with a value number.
 - Cretonne stores the def-interval separately from a list of coalesced live-in
  intervals, while LLVM stores an array of segments. The two representations
  are equivalent, but Cretonne optimizes for the common case of a value that is
  only used locally.
 - It is simpler to check if two live ranges are overlapping. The dominance
  properties of SSA form means that it is only necessary to check the
  def-interval of each live range against the intervals of the other range. It
  is not necessary to check for overlap between the two sets of live-in
  intervals. This makes the overlap check logarithmic in the number of live-in
  intervals instead of linear.
 - LLVM represents a program point as ``SlotIndex`` which holds a pointer to a
  32-byte ``IndexListEntry`` struct. The entries are organized in a double
  linked list that mirrors the ordering of instructions in a basic block. This
  allows 'tombstone' program points corresponding to instructions that have
  been deleted.
  Cretonne uses a 32-bit program point representation that encodes an
  instruction or EBB number directly. There are no 'tombstones' for deleted
  instructions, and no mirrored linked list of instructions. Live ranges must
  be updated when instructions are deleted.
 A consequence of Cretonne's more compact representation is that two program
 points can't be compared without the context of a function layout.
 Spilling algorithm
 ==================
 There is no one way of implementing spilling, and different tradeoffs between
 compilation time and code quality are possible. Any spilling algorithm will
 need a way of tracking the register pressure so the colorability condition can
 be satisfied.
 Coloring algorithm
 ==================
 The SSA coloring algorithm is based on a single observation: If two SSA values
 interfere, one of the values must be live where the other value is defined.
 We visit the EBBs in a topological order such that all dominating EBBs are
 visited before the current EBB. The instructions in an EBB are visited in a
 top-down order, and each value define by the instruction is assigned an
 available register. With this iteration order, every value that is live at an
 instruction has already been assigned to a register.
 This coloring algorithm works if the following condition holds:
    At every instruction, consider the values live through the instruction. No
    matter how the live values have been assigned to registers, there must be
    available registers of the right register classes available for the values
    defined by the instruction.
 We'll need to modify this condition in order to deal with the real-world
 complications.
 The coloring algorithm needs to keep track of the set of live values at each
 instruction. At the top of an EBB, this set can be computed as the union of:
 - The set of live values before the immediately dominating branch or jump
  instruction. The topological iteration order guarantees that this set is
  available. Values whose live range indicate that they are not live-in to the
  current EBB should be filtered out.
 - The set of arguments to the EBB. These values should all be live-in, although
  it is possible that some are dead and never used anywhere.
 For each live value, we also track its kill point in the current EBB. This is
 the last instruction to use the value in the EBB. Values that are live-out
 through the EBB terminator don't have a kill point. Note that the kill point
 can be a branch to another EBB that uses the value, so the kill instruction
 doesn't have to be a use of the value.
 When advancing past an instruction, the live set is updated:
 - Any values whose kill point is the current instruction are removed.
 - Any values defined by the instruction are added, unless their kill point is
  the current instruction. This corresponds to a dead def which has no uses.
--- a/docs/testing.rst
+++ b/docs/testing.rst
@@ -0,0 +1,354 @@
 ****************
 Testing Cretonne
 ****************
 Cretonne is tested at multiple levels of abstraction and integration. When
 possible, Rust unit tests are used to verify single functions and types. When
 testing the interaction between compiler passes, file-level tests are
 appropriate.
 The top-level shell script :file:`test-all.sh` runs all of the tests in the
 Cretonne repository.
 Rust tests
 ==========
 .. highlight:: rust
 Rust and Cargo have good support for testing. Cretonne uses unit tests, doc
 tests, and integration tests where appropriate.
 Unit tests
 ----------
 Unit test live in a ``tests`` sub-module of the code they are testing::
    pub fn add(x: u32, y: u32) -> u32 {
        x + y
    }
    #[cfg(test)]
    mod tests {
        use super::add;
        #[test]
        check_add() {
            assert_eq!(add(2, 2), 4);
        }
    }
 Since sub-modules have access to non-public items in a Rust module, unit tests
 can be used to test module-internal functions and types too.
 Doc tests
 ---------
 Documentation comments can contain code snippets which are also compiled and
 tested::
    //! The `Flags` struct is immutable once it has been created. A `Builder` instance is used to
    //! create it.
    //!
    //! # Example
    //! ```
    //! use cretonne::settings::{self, Configurable};
    //!
    //! let mut b = settings::builder();
    //! b.set("opt_level", "fastest");
    //!
    //! let f = settings::Flags::new(&b);
    //! assert_eq!(f.opt_level(), settings::OptLevel::Fastest);
    //! ```
 These tests are useful for demonstrating how to use an API, and running them
 regularly makes sure that they stay up to date. Documentation tests are not
 appropriate for lots of assertions; use unit tests for that.
 Integration tests
 -----------------
 Integration tests are Rust source files that are compiled and linked
 individually. They are used to exercise the external API of the crates under
 test.
 These tests are usually found in the :file:`tests` top-level directory where
 they have access to all the crates in the Cretonne repository. The
 :file:`lib/cretonne` and :file:`lib/reader` crates have no external
 dependencies, which can make testing tedious. Integration tests that don't need
 to depend on other crates can be placed in :file:`lib/cretonne/tests` and
 :file:`lib/reader/tests`.
 File tests
 ==========
 .. highlight:: cton
 Compilers work with large data structures representing programs, and it quickly
 gets unwieldy to generate test data programmatically. File-level tests make it
 easier to provide substantial input functions for the compiler tests.
 File tests are :file:`*.cton` files in the :file:`filetests/` directory
 hierarchy. Each file has a header describing what to test followed by a number
 of input functions in the :doc:`Cretonne textual intermediate language
 <langref>`:
 .. productionlist::
    test_file     : test_header `function_list`
    test_header   : test_commands (`isa_specs` | `settings`)
    test_commands : test_command { test_command }
    test_command  : "test" test_name { option } "\n"
 The available test commands are described below.
 Many test commands only make sense in the context of a target instruction set
 architecture. These tests require one or more ISA specifications in the test
 header:
 .. productionlist::
    isa_specs     : { [`settings`] isa_spec }
    isa_spec      : "isa" isa_name { `option` } "\n"
 The options given on the ``isa`` line modify the ISA-specific settings defined in
 :file:`lib/cretonne/meta/isa/*/settings.py`.
 All types of tests allow shared Cretonne settings to be modified:
 .. productionlist::
    settings      : { setting }
    setting       : "set" { option } "\n"
    option        : flag | setting "=" value
 The shared settings available for all target ISAs are defined in
 :file:`lib/cretonne/meta/cretonne/settings.py`.
 The ``set`` lines apply settings cumulatively::
    test legalizer
    set opt_level=best
    set is_64bit=1
    isa riscv
    set is_64bit=0
    isa riscv supports_m=false
    function %foo() {}
 This example will run the legalizer test twice. Both runs will have
 ``opt_level=best``, but they will have different ``is_64bit`` settings. The 32-bit
 run will also have the RISC-V specific flag ``supports_m`` disabled.
 Filecheck
 ---------
 Many of the test commands described below use *filecheck* to verify their
 output. Filecheck is a Rust implementation of the LLVM tool of the same name.
 See the :file:`lib/filecheck` `documentation <https://docs.rs/filecheck/>`_ for
 details of its syntax.
 Comments in :file:`.cton` files are associated with the entity they follow.
 This typically means an instruction or the whole function. Those tests that
 use filecheck will extract comments associated with each function (or its
 entities) and scan them for filecheck directives. The test output for each
 function is then matched against the filecheck directives for that function.
 Comments appearing before the first function in a file apply to every function.
 This is useful for defining common regular expression variables with the
 ``regex:`` directive, for example.
 Note that LLVM's file tests don't separate filecheck directives by their
 associated function. It verifies the concatenated output against all filecheck
 directives in the test file. LLVM's :command:`FileCheck` command has a
 ``CHECK-LABEL:`` directive to help separate the output from different functions.
 Cretonne's tests don't need this.
 Filecheck variables
 ~~~~~~~~~~~~~~~~~~~
 Cretonne's IL parser causes entities like values and EBBs to be renumbered. It
 maintains a source mapping to resolve references in the text, but when a
 function is written out as text as part of a test, all of the entities have the
 new numbers. This can complicate the filecheck directives since they need to
 refer to the new entity numbers, not the ones in the adjacent source text.
 To help with this, the parser's source-to-entity mapping is made available as
 predefined filecheck variables. A value by the source name ``v10`` can be
 referenced as the filecheck variable ``$v10``. The variable expands to the
 renumbered entity name.
 `test cat`
 ----------
 This is one of the simplest file tests, used for testing the conversion to and
 from textual IL. The ``test cat`` command simply parses each function and
 converts it back to text again. The text of each function is then matched
 against the associated filecheck directives.
 Example::
    function %r1() -> i32, f32 {
    ebb1:
        v10 = iconst.i32 3
        v20 = f32const 0.0
        return v10, v20
    }
    ; sameln: function %r1() -> i32, f32 {
    ; nextln: ebb0:
    ; nextln:     v0 = iconst.i32 3
    ; nextln:     v1 = f32const 0.0
    ; nextln:     return v0, v1
    ; nextln: }
 Notice that the values ``v10`` and ``v20`` in the source were renumbered to
 ``v0`` and ``v1`` respectively during parsing. The equivalent test using
 filecheck variables would be::
    function %r1() -> i32, f32 {
    ebb1:
        v10 = iconst.i32 3
        v20 = f32const 0.0
        return v10, v20
    }
    ; sameln: function %r1() -> i32, f32 {
    ; nextln: ebb0:
    ; nextln:     $v10 = iconst.i32 3
    ; nextln:     $v20 = f32const 0.0
    ; nextln:     return $v10, $v20
    ; nextln: }
 `test verifier`
 ---------------
 Run each function through the IL verifier and check that it produces the
 expected error messages.
 Expected error messages are indicated with an ``error:`` directive *on the
 instruction that produces the verifier error*. Both the error message and
 reported location of the error is verified::
    test verifier
    function %test(i32) {
        ebb0(v0: i32):
            jump ebb1       ; error: terminator
            return
    }
 This example test passes if the verifier fails with an error message containing
 the sub-string ``"terminator"`` *and* the error is reported for the ``jump``
 instruction.
 If a function contains no ``error:`` annotations, the test passes if the
 function verifies correctly.
 `test print-cfg`
 ----------------
 Print the control flow graph of each function as a Graphviz graph, and run
 filecheck over the result. See also the :command:`cton-util print-cfg`
 command::
    ; For testing cfg generation. This code is nonsense.
    test print-cfg
    test verifier
    function %nonsense(i32, i32) -> f32 {
    ; check: digraph %nonsense {
    ; regex: I=\binst\d+\b
    ; check: label="{ebb0 | <$(BRZ=$I)>brz ebb2 | <$(JUMP=$I)>jump ebb1}"]
    ebb0(v1: i32, v2: i32):
        brz v2, ebb2            ; unordered: ebb0:$BRZ -> ebb2
        v4 = iconst.i32 0
        jump ebb1(v4)           ; unordered: ebb0:$JUMP -> ebb1
    ebb1(v5: i32):
        return v1
    ebb2:
        v100 = f32const 0.0
        return v100
    }
 `test domtree`
 --------------
 Compute the dominator tree of each function and validate it against the
 ``dominates:`` annotations::
    test domtree
    function %test(i32) {
        ebb0(v0: i32):
            jump ebb1     ; dominates: ebb1
        ebb1:
            brz v0, ebb3  ; dominates: ebb3
            jump ebb2     ; dominates: ebb2
        ebb2:
            jump ebb3
        ebb3:
            return
    }
 Every reachable extended basic block except for the entry block has an
 *immediate dominator* which is a jump or branch instruction. This test passes
 if the ``dominates:`` annotations on the immediate dominator instructions are
 both correct and complete.
 `test legalizer`
 ----------------
 Legalize each function for the specified target ISA and run the resulting
 function through filecheck. This test command can be used to validate the
 encodings selected for legal instructions as well as the instruction
 transformations performed by the legalizer.
 `test regalloc`
 ---------------
 Test the register allocator.
 First, each function is legalized for the specified target ISA. This is
 required for register allocation since the instruction encodings provide
 register class constraints to the register allocator.
 Second, the register allocator is run on the function, inserting spill code and
 assigning registers and stack slots to all values.
 The resulting function is then run through filecheck.
 `test binemit`
 --------------
 Test the emission of binary machine code.
 The functions must contains instructions that are annotated with both encodings
 and value locations (registers or stack slots). For instructions that are
 annotated with a `bin:` directive, the emitted hexadecimal machine code for
 that instruction is compared to the directive::
    test binemit
    isa riscv
    function %int32() {
    ebb0:
        [-,%x5]             v1 = iconst.i32 1
        [-,%x6]             v2 = iconst.i32 2
        [R#0c,%x7]          v10 = iadd v1, v2       ; bin: 006283b3
        [R#200c,%x8]        v11 = isub v1, v2       ; bin: 40628433
        return
    }
 If any instructions are unencoded (indicated with a `[-]` encoding field), they
 will be encoded using the same mechanism as the legalizer uses. However,
 illegal instructions for the ISA won't be expanded into other instruction
 sequences. Instead the test will fail.
 Value locations must be present if they are required to compute the binary
 bits. Missing value locations will cause the test to crash.
 `test simple-gvn`
 -----------------
 Test the simple GVN pass.
 The simple GVN pass is run on each function, and then results are run
 through filecheck.
--- a/filetests/cfg/loop.cton
+++ b/filetests/cfg/loop.cton
@@ -0,0 +1,35 @@
 ; For testing cfg generation. This code is nonsense.
 test print-cfg
 test verifier
 function %nonsense(i32, i32) -> f32 {
 ; check: digraph %nonsense {
 ; regex: I=\binst\d+\b
 ; check: label="{ebb0 | <$(BRZ=$I)>brz ebb2 | <$(JUMP=$I)>jump ebb1}"]
 ebb0(v1: i32, v2: i32):
    v3 = f64const 0x0.0
    brz v2, ebb2            ; unordered: ebb0:$BRZ -> ebb2
    v4 = iconst.i32 0
    jump ebb1(v4)           ; unordered: ebb0:$JUMP -> ebb1
 ebb1(v5: i32):
    v6 = imul_imm v5, 4
    v7 = iadd v1, v6
    v8 = f32const 0.0
    v9 = f32const 0.0
    v10 = f32const 0.0
    v11 = fadd v9, v10
    v12 = iadd_imm v5, 1
    v13 = icmp ult v12, v2
    brnz v13, ebb1(v12)     ; unordered: ebb1:inst12 -> ebb1
    v14 = f64const 0.0
    v15 = f64const 0.0
    v16 = fdiv v14, v15
    v17 = f32const 0.0
    return v17
 ebb2:
    v100 = f32const 0.0
    return v100
 }
--- a/filetests/cfg/traps_early.cton
+++ b/filetests/cfg/traps_early.cton
@@ -0,0 +1,21 @@
 ; For testing cfg generation. This code explores the implications of encountering
 ; a terminating instruction before any connections have been made.
 test print-cfg
 test verifier
 function %nonsense(i32) {
 ; check: digraph %nonsense {
 ebb0(v1: i32):
    trap            ; error: terminator instruction was encountered before the end
    brnz v1, ebb2   ; unordered: ebb0:inst1 -> ebb2
    jump ebb1       ; unordered: ebb0:inst2 -> ebb1
 ebb1:
    v2 = iconst.i32 0
    v3 = iadd v1, v3
    jump ebb0(v3)   ; unordered: ebb1:inst5 -> ebb0
 ebb2:
    return v1
 }
--- a/filetests/cfg/unused_node.cton
+++ b/filetests/cfg/unused_node.cton
@@ -0,0 +1,21 @@
 ; For testing cfg generation where some block is never reached.
 test print-cfg
 function %not_reached(i32) -> i32 {
 ; check: digraph %not_reached {
 ; check:     ebb0 [shape=record, label="{ebb0 | <inst0>brnz ebb2}"]
 ; check:     ebb1 [shape=record, label="{ebb1 | <inst4>jump ebb0}"]
 ; check:     ebb2 [shape=record, label="{ebb2}"]
 ebb0(v0: i32):
    brnz v0, ebb2       ; unordered: ebb0:inst0 -> ebb2
    trap
 ebb1:
    v1 = iconst.i32 1
    v2 = iadd v0, v1
    jump ebb0(v2)       ; unordered: ebb1:inst4 -> ebb0
 ebb2:
    return v0
 }
--- a/filetests/domtree/basic.cton
+++ b/filetests/domtree/basic.cton
@@ -0,0 +1,13 @@
 test domtree
 function %test(i32) {
    ebb0(v0: i32):
        jump ebb1     ; dominates: ebb1
    ebb1:
        brz v0, ebb3  ; dominates: ebb3
        jump ebb2     ; dominates: ebb2
    ebb2:
        jump ebb3
    ebb3:
        return
 }
--- a/filetests/domtree/loops.cton
+++ b/filetests/domtree/loops.cton
@@ -0,0 +1,20 @@
 test domtree
 function %test(i32) {
    ebb0(v0: i32):
        brz v0, ebb1  ; dominates: ebb1 ebb3 ebb4 ebb5
        jump ebb2     ; dominates: ebb2
    ebb1:
        jump ebb3
    ebb2:
        brz v0, ebb4
        jump ebb5
    ebb3:
        jump ebb4
    ebb4:
        brz v0, ebb3
        jump ebb5
    ebb5:
        brz v0, ebb4
        return
 }
--- a/filetests/domtree/loops2.cton
+++ b/filetests/domtree/loops2.cton
@@ -0,0 +1,31 @@
 test domtree
 function %test(i32) {
    ebb0(v0: i32):
        brz v0, ebb1    ; dominates: ebb1 ebb6
        brnz v0, ebb2   ; dominates: ebb2 ebb9
        jump ebb3       ; dominates: ebb3
    ebb1:
        jump ebb6
    ebb2:
        brz v0, ebb4    ; dominates: ebb4 ebb7 ebb8
        jump ebb5       ; dominates: ebb5
    ebb3:
        jump ebb9
    ebb4:
        brz v0, ebb4
        brnz v0, ebb6
        jump ebb7
    ebb5:
        brz v0, ebb7
        brnz v0, ebb8
        jump ebb9
    ebb6:
        return
    ebb7:
        jump ebb8
    ebb8:
        return
    ebb9:
        return
 }
--- a/filetests/domtree/tall-tree.cton
+++ b/filetests/domtree/tall-tree.cton
@@ -0,0 +1,33 @@
 test domtree
 function %test(i32) {
    ebb0(v0: i32):
        brz v0, ebb1    ; dominates: ebb1
        brnz v0, ebb2   ; dominates: ebb2 ebb5
        jump ebb3       ; dominates: ebb3
    ebb1:
        jump ebb4       ; dominates: ebb4
    ebb2:
        jump ebb5
    ebb3:
        jump ebb5
    ebb4:
        brz v0, ebb6    ; dominates: ebb6 ebb10
        jump ebb7       ; dominates: ebb7
    ebb5:
        return
    ebb6:
        brz v0, ebb8    ; dominates: ebb11 ebb8
        brnz v0, ebb9   ; dominates: ebb9
        jump ebb10
    ebb7:
        jump ebb10
    ebb8:
        jump ebb11
    ebb9:
        jump ebb11
    ebb10:
        return
    ebb11:
        return
 }
--- a/filetests/domtree/wide-tree.cton
+++ b/filetests/domtree/wide-tree.cton
@@ -0,0 +1,41 @@
 test domtree
 function %test(i32) {
    ebb0(v0: i32):
        brz v0, ebb13   ; dominates: ebb13
        jump ebb1       ; dominates: ebb1
    ebb1:
        brz v0, ebb2    ; dominates: ebb2 ebb7
        brnz v0, ebb3   ; dominates: ebb3
        brz v0, ebb4    ; dominates: ebb4
        brnz v0, ebb5   ; dominates: ebb5
        jump ebb6       ; dominates: ebb6
    ebb2:
        jump ebb7
    ebb3:
        jump ebb7
    ebb4:
        jump ebb7
    ebb5:
        jump ebb7
    ebb6:
        jump ebb7
    ebb7:
        brnz v0, ebb8   ; dominates: ebb8 ebb12
        brz v0, ebb9    ; dominates: ebb9
        brnz v0, ebb10  ; dominates: ebb10
        jump ebb11      ; dominates: ebb11
    ebb8:
        jump ebb12
    ebb9:
        jump ebb12
    ebb10:
        brz v0, ebb13
        jump ebb12
    ebb11:
        jump ebb13
    ebb12:
        return
    ebb13:
        return
 }
--- a/filetests/isa/intel/abi64.cton
+++ b/filetests/isa/intel/abi64.cton
@@ -0,0 +1,20 @@
 ; Test the legalization of function signatures.
 test legalizer
 set is_64bit
 isa intel
 ; regex: V=v\d+
 function %f() {
    sig0 = (i32) -> i32 native
    ; check: sig0 = (i32 [%rdi]) -> i32 [%rax] native
    sig1 = (i64) -> b1 native
    ; check: sig1 = (i64 [%rdi]) -> b1 [%rax] native
    sig2 = (f32, i64) -> f64 native
    ; check: sig2 = (f32 [%xmm0], i64 [%rdi]) -> f64 [%xmm0] native
 ebb0:
    return
 }
--- a/filetests/isa/intel/binary32-float.cton
+++ b/filetests/isa/intel/binary32-float.cton
@@ -0,0 +1,146 @@
 ; Binary emission of 32-bit floating point code.
 test binemit
 isa intel has_sse2
 ; The binary encodings can be verified with the command:
 ;
 ;   sed -ne 's/^ *; asm: *//p' filetests/isa/intel/binary32-float.cton | llvm-mc -show-encoding -triple=i386
 ;
 function %F32() {
 ebb0:
    [-,%rcx]            v0 = iconst.i32 1
    [-,%rsi]            v1 = iconst.i32 2
    ; asm: cvtsi2ss %ecx, %xmm5
    [-,%xmm5]           v10 = fcvt_from_sint.f32 v0             ; bin: f3 0f 2a e9
    ; asm: cvtsi2ss %esi, %xmm2
    [-,%xmm2]           v11 = fcvt_from_sint.f32 v1             ; bin: f3 0f 2a d6
    ; asm: cvtss2sd %xmm2, %xmm5
    [-,%xmm5]           v12 = fpromote.f64 v11                  ; bin: f3 0f 5a ea
    ; asm: cvtss2sd %xmm5, %xmm2
    [-,%xmm2]           v13 = fpromote.f64 v10                  ; bin: f3 0f 5a d5
    ; asm: movd %ecx, %xmm5
    [-,%xmm5]           v14 = bitcast.f32 v0                    ; bin: 66 0f 6e e9
    ; asm: movd %esi, %xmm2
    [-,%xmm2]           v15 = bitcast.f32 v1                    ; bin: 66 0f 6e d6
    ; asm: movd %xmm5, %ecx
    [-,%rcx]            v16 = bitcast.i32 v10                   ; bin: 66 0f 7e e9
    ; asm: movd %xmm2, %esi
    [-,%rsi]            v17 = bitcast.i32 v11                   ; bin: 66 0f 7e d6
    ; Binary arithmetic.
    ; asm: addss %xmm2, %xmm5
    [-,%xmm5]           v20 = fadd v10, v11                     ; bin: f3 0f 58 ea
    ; asm: addss %xmm5, %xmm2
    [-,%xmm2]           v21 = fadd v11, v10                     ; bin: f3 0f 58 d5 
    ; asm: subss %xmm2, %xmm5
    [-,%xmm5]           v22 = fsub v10, v11                     ; bin: f3 0f 5c ea
    ; asm: subss %xmm5, %xmm2
    [-,%xmm2]           v23 = fsub v11, v10                     ; bin: f3 0f 5c d5
    ; asm: mulss %xmm2, %xmm5
    [-,%xmm5]           v24 = fmul v10, v11                     ; bin: f3 0f 59 ea
    ; asm: mulss %xmm5, %xmm2
    [-,%xmm2]           v25 = fmul v11, v10                     ; bin: f3 0f 59 d5
    ; asm: divss %xmm2, %xmm5
    [-,%xmm5]           v26 = fdiv v10, v11                     ; bin: f3 0f 5e ea
    ; asm: divss %xmm5, %xmm2
    [-,%xmm2]           v27 = fdiv v11, v10                     ; bin: f3 0f 5e d5
    ; Bitwise ops.
    ; We use the *ps SSE instructions for everything because they are smaller.
    ; asm: andps %xmm2, %xmm5
    [-,%xmm5]           v30 = band v10, v11                     ; bin: 0f 54 ea
    ; asm: andps %xmm5, %xmm2
    [-,%xmm2]           v31 = band v11, v10                     ; bin: 0f 54 d5
    ; asm: andnps %xmm2, %xmm5
    [-,%xmm5]           v32 = band_not v10, v11                 ; bin: 0f 55 ea
    ; asm: andnps %xmm5, %xmm2
    [-,%xmm2]           v33 = band_not v11, v10                 ; bin: 0f 55 d5
    ; asm: orps %xmm2, %xmm5
    [-,%xmm5]           v34 = bor v10, v11                      ; bin: 0f 56 ea
    ; asm: orps %xmm5, %xmm2
    [-,%xmm2]           v35 = bor v11, v10                      ; bin: 0f 56 d5
    ; asm: xorps %xmm2, %xmm5
    [-,%xmm5]           v36 = bxor v10, v11                     ; bin: 0f 57 ea
    ; asm: xorps %xmm5, %xmm2
    [-,%xmm2]           v37 = bxor v11, v10                     ; bin: 0f 57 d5
    return
 }
 function %F64() {
 ebb0:
    [-,%rcx]            v0 = iconst.i32 1
    [-,%rsi]            v1 = iconst.i32 2
    ; asm: cvtsi2sd %ecx, %xmm5
    [-,%xmm5]           v10 = fcvt_from_sint.f64 v0             ; bin: f2 0f 2a e9
    ; asm: cvtsi2sd %esi, %xmm2
    [-,%xmm2]           v11 = fcvt_from_sint.f64 v1             ; bin: f2 0f 2a d6
    ; asm: cvtsd2ss %xmm2, %xmm5
    [-,%xmm5]           v12 = fdemote.f32 v11                   ; bin: f2 0f 5a ea
    ; asm: cvtsd2ss %xmm5, %xmm2
    [-,%xmm2]           v13 = fdemote.f32 v10                   ; bin: f2 0f 5a d5
    ; No i64 <-> f64 bitcasts in 32-bit mode.
    ; Binary arithmetic.
    ; asm: addsd %xmm2, %xmm5
    [-,%xmm5]           v20 = fadd v10, v11                     ; bin: f2 0f 58 ea
    ; asm: addsd %xmm5, %xmm2
    [-,%xmm2]           v21 = fadd v11, v10                     ; bin: f2 0f 58 d5 
    ; asm: subsd %xmm2, %xmm5
    [-,%xmm5]           v22 = fsub v10, v11                     ; bin: f2 0f 5c ea
    ; asm: subsd %xmm5, %xmm2
    [-,%xmm2]           v23 = fsub v11, v10                     ; bin: f2 0f 5c d5
    ; asm: mulsd %xmm2, %xmm5
    [-,%xmm5]           v24 = fmul v10, v11                     ; bin: f2 0f 59 ea
    ; asm: mulsd %xmm5, %xmm2
    [-,%xmm2]           v25 = fmul v11, v10                     ; bin: f2 0f 59 d5
    ; asm: divsd %xmm2, %xmm5
    [-,%xmm5]           v26 = fdiv v10, v11                     ; bin: f2 0f 5e ea
    ; asm: divsd %xmm5, %xmm2
    [-,%xmm2]           v27 = fdiv v11, v10                     ; bin: f2 0f 5e d5
    ; Bitwise ops.
    ; We use the *ps SSE instructions for everything because they are smaller.
    ; asm: andps %xmm2, %xmm5
    [-,%xmm5]           v30 = band v10, v11                     ; bin: 0f 54 ea
    ; asm: andps %xmm5, %xmm2
    [-,%xmm2]           v31 = band v11, v10                     ; bin: 0f 54 d5
    ; asm: andnps %xmm2, %xmm5
    [-,%xmm5]           v32 = band_not v10, v11                 ; bin: 0f 55 ea
    ; asm: andnps %xmm5, %xmm2
    [-,%xmm2]           v33 = band_not v11, v10                 ; bin: 0f 55 d5
    ; asm: orps %xmm2, %xmm5
    [-,%xmm5]           v34 = bor v10, v11                      ; bin: 0f 56 ea
    ; asm: orps %xmm5, %xmm2
    [-,%xmm2]           v35 = bor v11, v10                      ; bin: 0f 56 d5
    ; asm: xorps %xmm2, %xmm5
    [-,%xmm5]           v36 = bxor v10, v11                     ; bin: 0f 57 ea
    ; asm: xorps %xmm5, %xmm2
    [-,%xmm2]           v37 = bxor v11, v10                     ; bin: 0f 57 d5
    return
 }
--- a/filetests/isa/intel/binary32.cton
+++ b/filetests/isa/intel/binary32.cton
@@ -0,0 +1,368 @@
 ; binary emission of 32-bit code.
 test binemit
 isa intel haswell
 ; The binary encodings can be verified with the command:
 ;
 ;   sed -ne 's/^ *; asm: *//p' filetests/isa/intel/binary32.cton | llvm-mc -show-encoding -triple=i386
 ;
 function %I32() {
    fn0 = function %foo()
    sig0 = ()
 ebb0:
    ; asm: movl $1, %ecx
    [-,%rcx]            v1 = iconst.i32 1        ; bin: b9 00000001
    ; asm: movl $2, %esi
    [-,%rsi]            v2 = iconst.i32 2        ; bin: be 00000002
    ; Integer Register-Register Operations.
    ; asm: addl %esi, %ecx
    [-,%rcx]             v10 = iadd v1, v2       ; bin: 01 f1
    ; asm: addl %ecx, %esi
    [-,%rsi]             v11 = iadd v2, v1       ; bin: 01 ce
    ; asm: subl %esi, %ecx
    [-,%rcx]             v12 = isub v1, v2       ; bin: 29 f1
    ; asm: subl %ecx, %esi
    [-,%rsi]             v13 = isub v2, v1       ; bin: 29 ce
    ; asm: andl %esi, %ecx
    [-,%rcx]             v14 = band v1, v2       ; bin: 21 f1
    ; asm: andl %ecx, %esi
    [-,%rsi]             v15 = band v2, v1       ; bin: 21 ce
    ; asm: orl %esi, %ecx
    [-,%rcx]             v16 = bor v1, v2        ; bin: 09 f1
    ; asm: orl %ecx, %esi
    [-,%rsi]             v17 = bor v2, v1        ; bin: 09 ce
    ; asm: xorl %esi, %ecx
    [-,%rcx]             v18 = bxor v1, v2       ; bin: 31 f1
    ; asm: xorl %ecx, %esi
    [-,%rsi]             v19 = bxor v2, v1       ; bin: 31 ce
    ; Dynamic shifts take the shift amount in %rcx.
    ; asm: shll %cl, %esi
    [-,%rsi]             v20 = ishl v2, v1       ; bin: d3 e6
    ; asm: shll %cl, %ecx
    [-,%rcx]             v21 = ishl v1, v1       ; bin: d3 e1
    ; asm: shrl %cl, %esi
    [-,%rsi]             v22 = ushr v2, v1       ; bin: d3 ee
    ; asm: shrl %cl, %ecx
    [-,%rcx]             v23 = ushr v1, v1       ; bin: d3 e9
    ; asm: sarl %cl, %esi
    [-,%rsi]             v24 = sshr v2, v1       ; bin: d3 fe
    ; asm: sarl %cl, %ecx
    [-,%rcx]             v25 = sshr v1, v1       ; bin: d3 f9
    ; asm: roll %cl, %esi
    [-,%rsi]             v26 = rotl v2, v1       ; bin: d3 c6
    ; asm: roll %cl, %ecx
    [-,%rcx]             v27 = rotl v1, v1       ; bin: d3 c1
    ; asm: rorl %cl, %esi
    [-,%rsi]             v28 = rotr v2, v1       ; bin: d3 ce
    ; asm: rorl %cl, %ecx
    [-,%rcx]             v29 = rotr v1, v1       ; bin: d3 c9
    ; Integer Register - Immediate 8-bit operations.
    ; The 8-bit immediate is sign-extended.
    ; asm: addl $-128, %ecx
    [-,%rcx]             v30 = iadd_imm v1, -128 ; bin: 83 c1 80
    ; asm: addl $10, %esi
    [-,%rsi]             v31 = iadd_imm v2, 10   ; bin: 83 c6 0a
    ; asm: andl $-128, %ecx
    [-,%rcx]             v32 = band_imm v1, -128 ; bin: 83 e1 80
    ; asm: andl $10, %esi
    [-,%rsi]             v33 = band_imm v2, 10   ; bin: 83 e6 0a
    ; asm: orl $-128, %ecx
    [-,%rcx]             v34 = bor_imm v1, -128  ; bin: 83 c9 80
    ; asm: orl $10, %esi
    [-,%rsi]             v35 = bor_imm v2, 10    ; bin: 83 ce 0a
    ; asm: xorl $-128, %ecx
    [-,%rcx]             v36 = bxor_imm v1, -128 ; bin: 83 f1 80
    ; asm: xorl $10, %esi
    [-,%rsi]             v37 = bxor_imm v2, 10   ; bin: 83 f6 0a
    ; Integer Register - Immediate 32-bit operations.
    ; asm: addl $-128000, %ecx
    [-,%rcx]             v40 = iadd_imm v1, -128000 ; bin: 81 c1 fffe0c00
    ; asm: addl $1000000, %esi
    [-,%rsi]             v41 = iadd_imm v2, 1000000 ; bin: 81 c6 000f4240
    ; asm: andl $-128000, %ecx
    [-,%rcx]             v42 = band_imm v1, -128000 ; bin: 81 e1 fffe0c00
    ; asm: andl $1000000, %esi
    [-,%rsi]             v43 = band_imm v2, 1000000 ; bin: 81 e6 000f4240
    ; asm: orl $-128000, %ecx
    [-,%rcx]             v44 = bor_imm v1, -128000  ; bin: 81 c9 fffe0c00
    ; asm: orl $1000000, %esi
    [-,%rsi]             v45 = bor_imm v2, 1000000  ; bin: 81 ce 000f4240
    ; asm: xorl $-128000, %ecx
    [-,%rcx]             v46 = bxor_imm v1, -128000 ; bin: 81 f1 fffe0c00
    ; asm: xorl $1000000, %esi
    [-,%rsi]             v47 = bxor_imm v2, 1000000 ; bin: 81 f6 000f4240
    ; More arithmetic.
    ; asm: imull %esi, %ecx
    [-,%rcx]             v50 = imul v1, v2       ; bin: 0f af ce
    ; asm: imull %ecx, %esi
    [-,%rsi]             v51 = imul v2, v1       ; bin: 0f af f1
    ; asm: movl $1, %eax
    [-,%rax]      v52 = iconst.i32 1                    ; bin: b8 00000001
    ; asm: movl $2, %edx
    [-,%rdx]      v53 = iconst.i32 2                    ; bin: ba 00000002
    ; asm: idivl %ecx
    [-,%rax,%rdx] v54, v55 = x86_sdivmodx v52, v53, v1  ; bin: f7 f9
    ; asm: idivl %esi
    [-,%rax,%rdx] v56, v57 = x86_sdivmodx v52, v53, v2  ; bin: f7 fe
    ; asm: divl %ecx
    [-,%rax,%rdx] v58, v59 = x86_udivmodx v52, v53, v1  ; bin: f7 f1
    ; asm: divl %esi
    [-,%rax,%rdx] v60, v61 = x86_udivmodx v52, v53, v2  ; bin: f7 f6
    ; Register copies.
    ; asm: movl %esi, %ecx
    [-,%rcx]             v80 = copy v2           ; bin: 89 f1
    ; asm: movl %ecx, %esi
    [-,%rsi]             v81 = copy v1           ; bin: 89 ce
    ; Load/Store instructions.
    ; Register indirect addressing with no displacement.
    ; asm: movl %ecx, (%esi)
    store v1, v2                                ; bin: 89 0e
    ; asm: movl %esi, (%ecx)
    store v2, v1                                ; bin: 89 31
    ; asm: movw %cx, (%esi)
    istore16 v1, v2                             ; bin: 66 89 0e
    ; asm: movw %si, (%ecx)
    istore16 v2, v1                             ; bin: 66 89 31
    ; asm: movb %cl, (%esi)
    istore8 v1, v2                              ; bin: 88 0e
    ; Can't store %sil in 32-bit mode (needs REX prefix).
    ; asm: movl (%ecx), %edi
    [-,%rdi]            v100 = load.i32 v1      ; bin: 8b 39
    ; asm: movl (%esi), %edx
    [-,%rdx]            v101 = load.i32 v2      ; bin: 8b 16
    ; asm: movzwl (%ecx), %edi
    [-,%rdi]            v102 = uload16.i32 v1   ; bin: 0f b7 39
    ; asm: movzwl (%esi), %edx
    [-,%rdx]            v103 = uload16.i32 v2   ; bin: 0f b7 16
    ; asm: movswl (%ecx), %edi
    [-,%rdi]            v104 = sload16.i32 v1   ; bin: 0f bf 39
    ; asm: movswl (%esi), %edx
    [-,%rdx]            v105 = sload16.i32 v2   ; bin: 0f bf 16
    ; asm: movzbl (%ecx), %edi
    [-,%rdi]            v106 = uload8.i32 v1    ; bin: 0f b6 39
    ; asm: movzbl (%esi), %edx
    [-,%rdx]            v107 = uload8.i32 v2    ; bin: 0f b6 16
    ; asm: movsbl (%ecx), %edi
    [-,%rdi]            v108 = sload8.i32 v1    ; bin: 0f be 39
    ; asm: movsbl (%esi), %edx
    [-,%rdx]            v109 = sload8.i32 v2    ; bin: 0f be 16
    ; Register-indirect with 8-bit signed displacement.
    ; asm: movl %ecx, 100(%esi)
    store v1, v2+100                            ; bin: 89 4e 64
    ; asm: movl %esi, -100(%ecx)
    store v2, v1-100                            ; bin: 89 71 9c
    ; asm: movw %cx, 100(%esi)
    istore16 v1, v2+100                         ; bin: 66 89 4e 64
    ; asm: movw %si, -100(%ecx)
    istore16 v2, v1-100                         ; bin: 66 89 71 9c
    ; asm: movb %cl, 100(%esi)
    istore8 v1, v2+100                          ; bin: 88 4e 64
    ; asm: movl 50(%ecx), %edi
    [-,%rdi]            v110 = load.i32 v1+50           ; bin: 8b 79 32
    ; asm: movl -50(%esi), %edx
    [-,%rdx]            v111 = load.i32 v2-50           ; bin: 8b 56 ce
    ; asm: movzwl 50(%ecx), %edi
    [-,%rdi]            v112 = uload16.i32 v1+50        ; bin: 0f b7 79 32
    ; asm: movzwl -50(%esi), %edx
    [-,%rdx]            v113 = uload16.i32 v2-50        ; bin: 0f b7 56 ce
    ; asm: movswl 50(%ecx), %edi
    [-,%rdi]            v114 = sload16.i32 v1+50        ; bin: 0f bf 79 32
    ; asm: movswl -50(%esi), %edx
    [-,%rdx]            v115 = sload16.i32 v2-50        ; bin: 0f bf 56 ce
    ; asm: movzbl 50(%ecx), %edi
    [-,%rdi]            v116 = uload8.i32 v1+50         ; bin: 0f b6 79 32
    ; asm: movzbl -50(%esi), %edx
    [-,%rdx]            v117 = uload8.i32 v2-50         ; bin: 0f b6 56 ce
    ; asm: movsbl 50(%ecx), %edi
    [-,%rdi]            v118 = sload8.i32 v1+50         ; bin: 0f be 79 32
    ; asm: movsbl -50(%esi), %edx
    [-,%rdx]            v119 = sload8.i32 v2-50         ; bin: 0f be 56 ce
    ; Register-indirect with 32-bit signed displacement.
    ; asm: movl %ecx, 10000(%esi)
    store v1, v2+10000                          ; bin: 89 8e 00002710
    ; asm: movl %esi, -10000(%ecx)
    store v2, v1-10000                          ; bin: 89 b1 ffffd8f0
    ; asm: movw %cx, 10000(%esi)
    istore16 v1, v2+10000                       ; bin: 66 89 8e 00002710
    ; asm: movw %si, -10000(%ecx)
    istore16 v2, v1-10000                       ; bin: 66 89 b1 ffffd8f0
    ; asm: movb %cl, 10000(%esi)
    istore8 v1, v2+10000                        ; bin: 88 8e 00002710
    ; asm: movl 50000(%ecx), %edi
    [-,%rdi]            v120 = load.i32 v1+50000           ; bin: 8b b9 0000c350
    ; asm: movl -50000(%esi), %edx
    [-,%rdx]            v121 = load.i32 v2-50000           ; bin: 8b 96 ffff3cb0
    ; asm: movzwl 50000(%ecx), %edi
    [-,%rdi]            v122 = uload16.i32 v1+50000        ; bin: 0f b7 b9 0000c350
    ; asm: movzwl -50000(%esi), %edx
    [-,%rdx]            v123 = uload16.i32 v2-50000        ; bin: 0f b7 96 ffff3cb0
    ; asm: movswl 50000(%ecx), %edi
    [-,%rdi]            v124 = sload16.i32 v1+50000        ; bin: 0f bf b9 0000c350
    ; asm: movswl -50000(%esi), %edx
    [-,%rdx]            v125 = sload16.i32 v2-50000        ; bin: 0f bf 96 ffff3cb0
    ; asm: movzbl 50000(%ecx), %edi
    [-,%rdi]            v126 = uload8.i32 v1+50000         ; bin: 0f b6 b9 0000c350
    ; asm: movzbl -50000(%esi), %edx
    [-,%rdx]            v127 = uload8.i32 v2-50000         ; bin: 0f b6 96 ffff3cb0
    ; asm: movsbl 50000(%ecx), %edi
    [-,%rdi]            v128 = sload8.i32 v1+50000         ; bin: 0f be b9 0000c350
    ; asm: movsbl -50000(%esi), %edx
    [-,%rdx]            v129 = sload8.i32 v2-50000         ; bin: 0f be 96 ffff3cb0
    ; Bit-counting instructions.
    ; asm: popcntl %esi, %ecx
    [-,%rcx]            v200 = popcnt v2        ; bin: f3 0f b8 ce
    ; asm: popcntl %ecx, %esi
    [-,%rsi]            v201 = popcnt v1        ; bin: f3 0f b8 f1
    ; asm: lzcntl %esi, %ecx
    [-,%rcx]            v202 = clz v2           ; bin: f3 0f bd ce
    ; asm: lzcntl %ecx, %esi
    [-,%rsi]            v203 = clz v1           ; bin: f3 0f bd f1
    ; asm: tzcntl %esi, %ecx
    [-,%rcx]            v204 = ctz v2           ; bin: f3 0f bc ce
    ; asm: tzcntl %ecx, %esi
    [-,%rsi]            v205 = ctz v1           ; bin: f3 0f bc f1
    ; Integer comparisons.
    ; asm: cmpl %esi, %ecx
    ; asm: sete %bl
    [-,%rbx]            v300 = icmp eq v1, v2   ; bin: 39 f1 0f 94 c3
    ; asm: cmpl %ecx, %esi
    ; asm: sete %dl
    [-,%rdx]            v301 = icmp eq v2, v1   ; bin: 39 ce 0f 94 c2
    ; asm: cmpl %esi, %ecx
    ; asm: setne %bl
    [-,%rbx]            v302 = icmp ne v1, v2   ; bin: 39 f1 0f 95 c3
    ; asm: cmpl %ecx, %esi
    ; asm: setne %dl
    [-,%rdx]            v303 = icmp ne v2, v1   ; bin: 39 ce 0f 95 c2
    ; asm: cmpl %esi, %ecx
    ; asm: setl %bl
    [-,%rbx]            v304 = icmp slt v1, v2  ; bin: 39 f1 0f 9c c3
    ; asm: cmpl %ecx, %esi
    ; asm: setl %dl
    [-,%rdx]            v305 = icmp slt v2, v1  ; bin: 39 ce 0f 9c c2
    ; asm: cmpl %esi, %ecx
    ; asm: setge %bl
    [-,%rbx]            v306 = icmp sge v1, v2  ; bin: 39 f1 0f 9d c3
    ; asm: cmpl %ecx, %esi
    ; asm: setge %dl
    [-,%rdx]            v307 = icmp sge v2, v1  ; bin: 39 ce 0f 9d c2
    ; asm: cmpl %esi, %ecx
    ; asm: setg %bl
    [-,%rbx]            v308 = icmp sgt v1, v2  ; bin: 39 f1 0f 9f c3
    ; asm: cmpl %ecx, %esi
    ; asm: setg %dl
    [-,%rdx]            v309 = icmp sgt v2, v1  ; bin: 39 ce 0f 9f c2
    ; asm: cmpl %esi, %ecx
    ; asm: setle %bl
    [-,%rbx]            v310 = icmp sle v1, v2  ; bin: 39 f1 0f 9e c3
    ; asm: cmpl %ecx, %esi
    ; asm: setle %dl
    [-,%rdx]            v311 = icmp sle v2, v1  ; bin: 39 ce 0f 9e c2
    ; asm: cmpl %esi, %ecx
    ; asm: setb %bl
    [-,%rbx]            v312 = icmp ult v1, v2  ; bin: 39 f1 0f 92 c3
    ; asm: cmpl %ecx, %esi
    ; asm: setb %dl
    [-,%rdx]            v313 = icmp ult v2, v1  ; bin: 39 ce 0f 92 c2
    ; asm: cmpl %esi, %ecx
    ; asm: setae %bl
    [-,%rbx]            v314 = icmp uge v1, v2  ; bin: 39 f1 0f 93 c3
    ; asm: cmpl %ecx, %esi
    ; asm: setae %dl
    [-,%rdx]            v315 = icmp uge v2, v1  ; bin: 39 ce 0f 93 c2
    ; asm: cmpl %esi, %ecx
    ; asm: seta %bl
    [-,%rbx]            v316 = icmp ugt v1, v2  ; bin: 39 f1 0f 97 c3
    ; asm: cmpl %ecx, %esi
    ; asm: seta %dl
    [-,%rdx]            v317 = icmp ugt v2, v1  ; bin: 39 ce 0f 97 c2
    ; asm: cmpl %esi, %ecx
    ; asm: setbe %bl
    [-,%rbx]            v318 = icmp ule v1, v2  ; bin: 39 f1 0f 96 c3
    ; asm: cmpl %ecx, %esi
    ; asm: setbe %dl
    [-,%rdx]            v319 = icmp ule v2, v1  ; bin: 39 ce 0f 96 c2
    ; Bool-to-int conversions.
    ; asm: movzbl %bl, %ecx
    [-,%rcx]             v350 = bint.i32 v300   ; bin: 0f b6 cb
    ; asm: movzbl %dl, %esi
    [-,%rsi]             v351 = bint.i32 v301   ; bin: 0f b6 f2
    ; asm: call foo
    call fn0()                                  ; bin: e8 PCRel4(fn0) 00000000
    ; asm: call *%ecx
    call_indirect sig0, v1()                    ; bin: ff d1
    ; asm: call *%esi
    call_indirect sig0, v2()                    ; bin: ff d6
    ; asm: testl %ecx, %ecx
    ; asm: je ebb1
    brz v1, ebb1                                ; bin: 85 c9 74 0e
    ; asm: testl %esi, %esi
    ; asm: je ebb1
    brz v2, ebb1                                ; bin: 85 f6 74 0a
    ; asm: testl %ecx, %ecx
    ; asm: jne ebb1
    brnz v1, ebb1                               ; bin: 85 c9 75 06
    ; asm: testl %esi, %esi
    ; asm: jne ebb1
    brnz v2, ebb1                               ; bin: 85 f6 75 02
    ; asm: jmp ebb2
    jump ebb2                                   ; bin: eb 01
    ; asm: ebb1:
 ebb1:
    ; asm: ret
    return                                      ; bin: c3
    ; asm: ebb2:
 ebb2:
    trap                                        ; bin: 0f 0b
 }
--- a/filetests/isa/intel/binary64-float.cton
+++ b/filetests/isa/intel/binary64-float.cton
@@ -0,0 +1,169 @@
 ; Binary emission of 64-bit floating point code.
 test binemit
 set is_64bit
 isa intel has_sse2
 ; The binary encodings can be verified with the command:
 ;
 ;   sed -ne 's/^ *; asm: *//p' filetests/isa/intel/binary64-float.cton | llvm-mc -show-encoding -triple=x86_64
 ;
 function %F32() {
 ebb0:
    [-,%r11]            v0 = iconst.i32 1
    [-,%rsi]            v1 = iconst.i32 2
    [-,%rax]            v2 = iconst.i64 11
    [-,%r14]            v3 = iconst.i64 12
    ; asm: cvtsi2ssl %r11d, %xmm5
    [-,%xmm5]           v10 = fcvt_from_sint.f32 v0             ; bin: f3 41 0f 2a eb
    ; asm: cvtsi2ssl %esi, %xmm10
    [-,%xmm10]          v11 = fcvt_from_sint.f32 v1             ; bin: f3 44 0f 2a d6
    ; asm: cvtsi2ssq %rax, %xmm5
    [-,%xmm5]           v12 = fcvt_from_sint.f32 v2             ; bin: f3 48 0f 2a e8
    ; asm: cvtsi2ssq %r14, %xmm10
    [-,%xmm10]          v13 = fcvt_from_sint.f32 v3             ; bin: f3 4d 0f 2a d6
    ; asm: cvtss2sd %xmm10, %xmm5
    [-,%xmm5]           v14 = fpromote.f64 v11                  ; bin: f3 41 0f 5a ea
    ; asm: cvtss2sd %xmm5, %xmm10
    [-,%xmm10]          v15 = fpromote.f64 v10                  ; bin: f3 44 0f 5a d5
    ; asm: movd %r11d, %xmm5
    [-,%xmm5]           v16 = bitcast.f32 v0                    ; bin: 66 41 0f 6e eb
    ; asm: movd %esi, %xmm10
    [-,%xmm10]          v17 = bitcast.f32 v1                    ; bin: 66 44 0f 6e d6
    ; asm: movd %xmm5, %ecx
    [-,%rcx]            v18 = bitcast.i32 v10                   ; bin: 66 40 0f 7e e9
    ; asm: movd %xmm10, %esi
    [-,%rsi]            v19 = bitcast.i32 v11                   ; bin: 66 44 0f 7e d6
    ; Binary arithmetic.
    ; asm: addss %xmm10, %xmm5
    [-,%xmm5]           v20 = fadd v10, v11                     ; bin: f3 41 0f 58 ea
    ; asm: addss %xmm5, %xmm10
    [-,%xmm10]          v21 = fadd v11, v10                     ; bin: f3 44 0f 58 d5
    ; asm: subss %xmm10, %xmm5
    [-,%xmm5]           v22 = fsub v10, v11                     ; bin: f3 41 0f 5c ea
    ; asm: subss %xmm5, %xmm10
    [-,%xmm10]          v23 = fsub v11, v10                     ; bin: f3 44 0f 5c d5
    ; asm: mulss %xmm10, %xmm5
    [-,%xmm5]           v24 = fmul v10, v11                     ; bin: f3 41 0f 59 ea
    ; asm: mulss %xmm5, %xmm10
    [-,%xmm10]          v25 = fmul v11, v10                     ; bin: f3 44 0f 59 d5
    ; asm: divss %xmm10, %xmm5
    [-,%xmm5]           v26 = fdiv v10, v11                     ; bin: f3 41 0f 5e ea
    ; asm: divss %xmm5, %xmm10
    [-,%xmm10]          v27 = fdiv v11, v10                     ; bin: f3 44 0f 5e d5
    ; Bitwise ops.
    ; We use the *ps SSE instructions for everything because they are smaller.
    ; asm: andps %xmm10, %xmm5
    [-,%xmm5]           v30 = band v10, v11                     ; bin: 41 0f 54 ea
    ; asm: andps %xmm5, %xmm10
    [-,%xmm10]          v31 = band v11, v10                     ; bin: 44 0f 54 d5
    ; asm: andnps %xmm10, %xmm5
    [-,%xmm5]           v32 = band_not v10, v11                 ; bin: 41 0f 55 ea
    ; asm: andnps %xmm5, %xmm10
    [-,%xmm10]          v33 = band_not v11, v10                 ; bin: 44 0f 55 d5
    ; asm: orps %xmm10, %xmm5
    [-,%xmm5]           v34 = bor v10, v11                      ; bin: 41 0f 56 ea
    ; asm: orps %xmm5, %xmm10
    [-,%xmm10]          v35 = bor v11, v10                      ; bin: 44 0f 56 d5
    ; asm: xorps %xmm10, %xmm5
    [-,%xmm5]           v36 = bxor v10, v11                     ; bin: 41 0f 57 ea
    ; asm: xorps %xmm5, %xmm10
    [-,%xmm10]          v37 = bxor v11, v10                     ; bin: 44 0f 57 d5
    return
 }
 function %F64() {
 ebb0:
    [-,%r11]            v0 = iconst.i32 1
    [-,%rsi]            v1 = iconst.i32 2
    [-,%rax]            v2 = iconst.i64 11
    [-,%r14]            v3 = iconst.i64 12
    ; asm: cvtsi2sdl %r11d, %xmm5
    [-,%xmm5]           v10 = fcvt_from_sint.f64 v0             ; bin: f2 41 0f 2a eb
    ; asm: cvtsi2sdl %esi, %xmm10
    [-,%xmm10]          v11 = fcvt_from_sint.f64 v1             ; bin: f2 44 0f 2a d6
    ; asm: cvtsi2sdq %rax, %xmm5
    [-,%xmm5]           v12 = fcvt_from_sint.f64 v2             ; bin: f2 48 0f 2a e8
    ; asm: cvtsi2sdq %r14, %xmm10
    [-,%xmm10]          v13 = fcvt_from_sint.f64 v3             ; bin: f2 4d 0f 2a d6
    ; asm: cvtsd2ss %xmm10, %xmm5
    [-,%xmm5]           v14 = fdemote.f32 v11                   ; bin: f2 41 0f 5a ea
    ; asm: cvtsd2ss %xmm5, %xmm10
    [-,%xmm10]          v15 = fdemote.f32 v10                   ; bin: f2 44 0f 5a d5
    ; asm: movq %rax, %xmm5
    [-,%xmm5]           v16 = bitcast.f64 v2                    ; bin: 66 48 0f 6e e8
    ; asm: movq %r14, %xmm10
    [-,%xmm10]          v17 = bitcast.f64 v3                    ; bin: 66 4d 0f 6e d6
    ; asm: movq %xmm5, %rcx
    [-,%rcx]            v18 = bitcast.i64 v10                   ; bin: 66 48 0f 7e e9
    ; asm: movq %xmm10, %rsi
    [-,%rsi]            v19 = bitcast.i64 v11                   ; bin: 66 4c 0f 7e d6
    ; Binary arithmetic.
    ; asm: addsd %xmm10, %xmm5
    [-,%xmm5]           v20 = fadd v10, v11                     ; bin: f2 41 0f 58 ea
    ; asm: addsd %xmm5, %xmm10
    [-,%xmm10]          v21 = fadd v11, v10                     ; bin: f2 44 0f 58 d5
    ; asm: subsd %xmm10, %xmm5
    [-,%xmm5]           v22 = fsub v10, v11                     ; bin: f2 41 0f 5c ea
    ; asm: subsd %xmm5, %xmm10
    [-,%xmm10]          v23 = fsub v11, v10                     ; bin: f2 44 0f 5c d5
    ; asm: mulsd %xmm10, %xmm5
    [-,%xmm5]           v24 = fmul v10, v11                     ; bin: f2 41 0f 59 ea
    ; asm: mulsd %xmm5, %xmm10
    [-,%xmm10]          v25 = fmul v11, v10                     ; bin: f2 44 0f 59 d5
    ; asm: divsd %xmm10, %xmm5
    [-,%xmm5]           v26 = fdiv v10, v11                     ; bin: f2 41 0f 5e ea
    ; asm: divsd %xmm5, %xmm10
    [-,%xmm10]          v27 = fdiv v11, v10                     ; bin: f2 44 0f 5e d5
    ; Bitwise ops.
    ; We use the *ps SSE instructions for everything because they are smaller.
    ; asm: andps %xmm10, %xmm5
    [-,%xmm5]           v30 = band v10, v11                     ; bin: 41 0f 54 ea
    ; asm: andps %xmm5, %xmm10
    [-,%xmm10]          v31 = band v11, v10                     ; bin: 44 0f 54 d5
    ; asm: andnps %xmm10, %xmm5
    [-,%xmm5]           v32 = band_not v10, v11                 ; bin: 41 0f 55 ea
    ; asm: andnps %xmm5, %xmm10
    [-,%xmm10]          v33 = band_not v11, v10                 ; bin: 44 0f 55 d5
    ; asm: orps %xmm10, %xmm5
    [-,%xmm5]           v34 = bor v10, v11                      ; bin: 41 0f 56 ea
    ; asm: orps %xmm5, %xmm10
    [-,%xmm10]          v35 = bor v11, v10                      ; bin: 44 0f 56 d5
    ; asm: xorps %xmm10, %xmm5
    [-,%xmm5]           v36 = bxor v10, v11                     ; bin: 41 0f 57 ea
    ; asm: xorps %xmm5, %xmm10
    [-,%xmm10]          v37 = bxor v11, v10                     ; bin: 44 0f 57 d5
    return
 }
--- a/filetests/isa/intel/binary64.cton
+++ b/filetests/isa/intel/binary64.cton
@@ -0,0 +1,848 @@
 ; binary emission of 64-bit code.
 test binemit
 set is_64bit
 isa intel haswell
 ; The binary encodings can be verified with the command:
 ;
 ;   sed -ne 's/^ *; asm: *//p' filetests/isa/intel/binary64.cton | llvm-mc -show-encoding -triple=x86_64
 ;
 ; Tests for i64 instructions.
 function %I64() {
    fn0 = function %foo()
    sig0 = ()
 ebb0:
    ; Integer Constants.
    ; asm: movq $0x01020304f1f2f3f4, %rcx
    [-,%rcx]            v1 = iconst.i64 0x0102_0304_f1f2_f3f4 ; bin: 48 b9 01020304f1f2f3f4
    ; asm: movq $0x11020304f1f2f3f4, %rsi
    [-,%rsi]            v2 = iconst.i64 0x1102_0304_f1f2_f3f4 ; bin: 48 be 11020304f1f2f3f4
    ; asm: movq $0x21020304f1f2f3f4, %r10
    [-,%r10]            v3 = iconst.i64 0x2102_0304_f1f2_f3f4 ; bin: 49 ba 21020304f1f2f3f4
    ; asm: movl $0xff001122, %r8d                             # 32-bit zero-extended constant.
    [-,%r8]             v4 = iconst.i64 0xff00_1122           ; bin: 41 b8 ff001122
    ; asm: movq $0xffffffff88001122, %r14                     # 32-bit sign-extended constant.
    [-,%r14]            v5 = iconst.i64 0xffff_ffff_8800_1122 ; bin: 49 c7 c6 88001122
    ; Integer Register-Register Operations.
    ; asm: addq %rsi, %rcx
    [-,%rcx]             v10 = iadd v1, v2       ; bin: 48 01 f1
    ; asm: addq %r10, %rsi
    [-,%rsi]             v11 = iadd v2, v3       ; bin: 4c 01 d6
    ; asm: addq %rcx, %r10
    [-,%r10]             v12 = iadd v3, v1       ; bin: 49 01 ca
    ; asm: subq %rsi, %rcx
    [-,%rcx]             v20 = isub v1, v2       ; bin: 48 29 f1
    ; asm: subq %r10, %rsi
    [-,%rsi]             v21 = isub v2, v3       ; bin: 4c 29 d6
    ; asm: subq %rcx, %r10
    [-,%r10]             v22 = isub v3, v1       ; bin: 49 29 ca
    ; asm: andq %rsi, %rcx
    [-,%rcx]             v30 = band v1, v2       ; bin: 48 21 f1
    ; asm: andq %r10, %rsi
    [-,%rsi]             v31 = band v2, v3       ; bin: 4c 21 d6
    ; asm: andq %rcx, %r10
    [-,%r10]             v32 = band v3, v1       ; bin: 49 21 ca
    ; asm: orq %rsi, %rcx
    [-,%rcx]             v40 = bor v1, v2       ; bin: 48 09 f1
    ; asm: orq %r10, %rsi
    [-,%rsi]             v41 = bor v2, v3       ; bin: 4c 09 d6
    ; asm: orq %rcx, %r10
    [-,%r10]             v42 = bor v3, v1       ; bin: 49 09 ca
    ; asm: xorq %rsi, %rcx
    [-,%rcx]             v50 = bxor v1, v2       ; bin: 48 31 f1
    ; asm: xorq %r10, %rsi
    [-,%rsi]             v51 = bxor v2, v3       ; bin: 4c 31 d6
    ; asm: xorq %rcx, %r10
    [-,%r10]             v52 = bxor v3, v1       ; bin: 49 31 ca
    ; asm: shlq %cl, %rsi
    [-,%rsi]             v60 = ishl v2, v1       ; bin: 48 d3 e6
    ; asm: shlq %cl, %r10
    [-,%r10]             v61 = ishl v3, v1       ; bin: 49 d3 e2
    ; asm: sarq %cl, %rsi
    [-,%rsi]             v62 = sshr v2, v1       ; bin: 48 d3 fe
    ; asm: sarq %cl, %r10
    [-,%r10]             v63 = sshr v3, v1       ; bin: 49 d3 fa
    ; asm: shrq %cl, %rsi
    [-,%rsi]             v64 = ushr v2, v1       ; bin: 48 d3 ee
    ; asm: shrq %cl, %r10
    [-,%r10]             v65 = ushr v3, v1       ; bin: 49 d3 ea
    ; asm: rolq %cl, %rsi
    [-,%rsi]             v66 = rotl v2, v1       ; bin: 48 d3 c6
    ; asm: rolq %cl, %r10
    [-,%r10]             v67 = rotl v3, v1       ; bin: 49 d3 c2
    ; asm: rorq %cl, %rsi
    [-,%rsi]             v68 = rotr v2, v1       ; bin: 48 d3 ce
    ; asm: rorq %cl, %r10
    [-,%r10]             v69 = rotr v3, v1       ; bin: 49 d3 ca
    ; Integer Register-Immediate Operations.
    ; These 64-bit ops all use a 32-bit immediate that is sign-extended to 64 bits.
    ; Some take 8-bit immediates that are sign-extended to 64 bits.
    ; asm: addq $-100000, %rcx
    [-,%rcx]     v70 = iadd_imm v1, -100000     ; bin: 48 81 c1 fffe7960
    ; asm: addq $100000, %rsi
    [-,%rsi]     v71 = iadd_imm v2, 100000      ; bin: 48 81 c6 000186a0
    ; asm: addq $0x7fffffff, %r10
    [-,%r10]     v72 = iadd_imm v3, 0x7fff_ffff ; bin: 49 81 c2 7fffffff
    ; asm: addq $100, %r8
    [-,%r8]      v73 = iadd_imm v4, 100         ; bin: 49 83 c0 64
    ; asm: addq $-100, %r14
    [-,%r14]     v74 = iadd_imm v5, -100        ; bin: 49 83 c6 9c
    ; asm: andq $-100000, %rcx
    [-,%rcx]     v80 = band_imm v1, -100000     ; bin: 48 81 e1 fffe7960
    ; asm: andq $100000, %rsi
    [-,%rsi]     v81 = band_imm v2, 100000      ; bin: 48 81 e6 000186a0
    ; asm: andq $0x7fffffff, %r10
    [-,%r10]     v82 = band_imm v3, 0x7fff_ffff ; bin: 49 81 e2 7fffffff
    ; asm: andq $100, %r8
    [-,%r8]      v83 = band_imm v4, 100         ; bin: 49 83 e0 64
    ; asm: andq $-100, %r14
    [-,%r14]     v84 = band_imm v5, -100        ; bin: 49 83 e6 9c
    ; asm: orq $-100000, %rcx
    [-,%rcx]     v90 = bor_imm v1, -100000      ; bin: 48 81 c9 fffe7960
    ; asm: orq $100000, %rsi
    [-,%rsi]     v91 = bor_imm v2, 100000       ; bin: 48 81 ce 000186a0
    ; asm: orq $0x7fffffff, %r10
    [-,%r10]     v92 = bor_imm v3, 0x7fff_ffff  ; bin: 49 81 ca 7fffffff
    ; asm: orq $100, %r8
    [-,%r8]      v93 = bor_imm v4, 100          ; bin: 49 83 c8 64
    ; asm: orq $-100, %r14
    [-,%r14]     v94 = bor_imm v5, -100         ; bin: 49 83 ce 9c
    ; asm: ret
    ; asm: xorq $-100000, %rcx
    [-,%rcx]     v100 = bxor_imm v1, -100000     ; bin: 48 81 f1 fffe7960
    ; asm: xorq $100000, %rsi
    [-,%rsi]     v101 = bxor_imm v2, 100000      ; bin: 48 81 f6 000186a0
    ; asm: xorq $0x7fffffff, %r10
    [-,%r10]     v102 = bxor_imm v3, 0x7fff_ffff ; bin: 49 81 f2 7fffffff
    ; asm: xorq $100, %r8
    [-,%r8]      v103 = bxor_imm v4, 100         ; bin: 49 83 f0 64
    ; asm: xorq $-100, %r14
    [-,%r14]     v104 = bxor_imm v5, -100        ; bin: 49 83 f6 9c
    ; Register copies.
    ; asm: movq %rsi, %rcx
    [-,%rcx]             v110 = copy v2          ; bin: 48 89 f1
    ; asm: movq %r10, %rsi
    [-,%rsi]             v111 = copy v3          ; bin: 4c 89 d6
    ; asm: movq %rcx, %r10
    [-,%r10]             v112 = copy v1          ; bin: 49 89 ca
    ; Load/Store instructions.
    ; Register indirect addressing with no displacement.
    ; asm: movq %rcx, (%rsi)
    store v1, v2                                ; bin: 48 89 0e
    ; asm: movq %rsi, (%rcx)
    store v2, v1                                ; bin: 48 89 31
    ; asm: movl %ecx, (%rsi)
    istore32 v1, v2                             ; bin: 40 89 0e
    ; asm: movl %esi, (%rcx)
    istore32 v2, v1                             ; bin: 40 89 31
    ; asm: movw %cx, (%rsi)
    istore16 v1, v2                             ; bin: 66 40 89 0e
    ; asm: movw %si, (%rcx)
    istore16 v2, v1                             ; bin: 66 40 89 31
    ; asm: movb %cl, (%rsi)
    istore8 v1, v2                              ; bin: 40 88 0e
    ; asm: movb %sil, (%rcx)
    istore8 v2, v1                              ; bin: 40 88 31
    ; asm: movq (%rcx), %rdi
    [-,%rdi]            v120 = load.i64 v1      ; bin: 48 8b 39
    ; asm: movq (%rsi), %rdx
    [-,%rdx]            v121 = load.i64 v2      ; bin: 48 8b 16
    ; asm: movl (%rcx), %edi
    [-,%rdi]            v122 = uload32.i64 v1   ; bin: 40 8b 39
    ; asm: movl (%rsi), %edx
    [-,%rdx]            v123 = uload32.i64 v2   ; bin: 40 8b 16
    ; asm: movslq (%rcx), %rdi
    [-,%rdi]            v124 = sload32.i64 v1   ; bin: 48 63 39
    ; asm: movslq (%rsi), %rdx
    [-,%rdx]            v125 = sload32.i64 v2   ; bin: 48 63 16
    ; asm: movzwq (%rcx), %rdi
    [-,%rdi]            v126 = uload16.i64 v1   ; bin: 48 0f b7 39
    ; asm: movzwq (%rsi), %rdx
    [-,%rdx]            v127 = uload16.i64 v2   ; bin: 48 0f b7 16
    ; asm: movswq (%rcx), %rdi
    [-,%rdi]            v128 = sload16.i64 v1   ; bin: 48 0f bf 39
    ; asm: movswq (%rsi), %rdx
    [-,%rdx]            v129 = sload16.i64 v2   ; bin: 48 0f bf 16
    ; asm: movzbq (%rcx), %rdi
    [-,%rdi]            v130 = uload8.i64 v1    ; bin: 48 0f b6 39
    ; asm: movzbq (%rsi), %rdx
    [-,%rdx]            v131 = uload8.i64 v2    ; bin: 48 0f b6 16
    ; asm: movsbq (%rcx), %rdi
    [-,%rdi]            v132 = sload8.i64 v1    ; bin: 48 0f be 39
    ; asm: movsbq (%rsi), %rdx
    [-,%rdx]            v133 = sload8.i64 v2    ; bin: 48 0f be 16
    ; Register-indirect with 8-bit signed displacement.
    ; asm: movq %rcx, 100(%rsi)
    store v1, v2+100                            ; bin: 48 89 4e 64
    ; asm: movq %rsi, -100(%rcx)
    store v2, v1-100                            ; bin: 48 89 71 9c
    ; asm: movl %ecx, 100(%rsi)
    istore32 v1, v2+100                         ; bin: 40 89 4e 64
    ; asm: movl %esi, -100(%rcx)
    istore32 v2, v1-100                         ; bin: 40 89 71 9c
    ; asm: movw %cx, 100(%rsi)
    istore16 v1, v2+100                         ; bin: 66 40 89 4e 64
    ; asm: movw %si, -100(%rcx)
    istore16 v2, v1-100                         ; bin: 66 40 89 71 9c
    ; asm: movb %cl, 100(%rsi)
    istore8 v1, v2+100                          ; bin: 40 88 4e 64
    ; asm: movb %sil, 100(%rcx)
    istore8 v2, v1+100                          ; bin: 40 88 71 64
    ; asm: movq 50(%rcx), %rdi
    [-,%rdi]            v140 = load.i64 v1+50           ; bin: 48 8b 79 32
    ; asm: movq -50(%rsi), %rdx
    [-,%rdx]            v141 = load.i64 v2-50           ; bin: 48 8b 56 ce
    ; asm: movl 50(%rcx), %edi
    [-,%rdi]            v142 = uload32.i64 v1+50        ; bin: 40 8b 79 32
    ; asm: movl -50(%rsi), %edx
    [-,%rdx]            v143 = uload32.i64 v2-50        ; bin: 40 8b 56 ce
    ; asm: movslq 50(%rcx), %rdi
    [-,%rdi]            v144 = sload32.i64 v1+50        ; bin: 48 63 79 32
    ; asm: movslq -50(%rsi), %rdx
    [-,%rdx]            v145 = sload32.i64 v2-50        ; bin: 48 63 56 ce
    ; asm: movzwq 50(%rcx), %rdi
    [-,%rdi]            v146 = uload16.i64 v1+50        ; bin: 48 0f b7 79 32
    ; asm: movzwq -50(%rsi), %rdx
    [-,%rdx]            v147 = uload16.i64 v2-50        ; bin: 48 0f b7 56 ce
    ; asm: movswq 50(%rcx), %rdi
    [-,%rdi]            v148 = sload16.i64 v1+50        ; bin: 48 0f bf 79 32
    ; asm: movswq -50(%rsi), %rdx
    [-,%rdx]            v149 = sload16.i64 v2-50        ; bin: 48 0f bf 56 ce
    ; asm: movzbq 50(%rcx), %rdi
    [-,%rdi]            v150 = uload8.i64 v1+50         ; bin: 48 0f b6 79 32
    ; asm: movzbq -50(%rsi), %rdx
    [-,%rdx]            v151 = uload8.i64 v2-50         ; bin: 48 0f b6 56 ce
    ; asm: movsbq 50(%rcx), %rdi
    [-,%rdi]            v152 = sload8.i64 v1+50         ; bin: 48 0f be 79 32
    ; asm: movsbq -50(%rsi), %rdx
    [-,%rdx]            v153 = sload8.i64 v2-50         ; bin: 48 0f be 56 ce
    ; Register-indirect with 32-bit signed displacement.
    ; asm: movq %rcx, 10000(%rsi)
    store v1, v2+10000                          ; bin: 48 89 8e 00002710
    ; asm: movq %rsi, -10000(%rcx)
    store v2, v1-10000                          ; bin: 48 89 b1 ffffd8f0
    ; asm: movl %ecx, 10000(%rsi)
    istore32 v1, v2+10000                       ; bin: 40 89 8e 00002710
    ; asm: movl %esi, -10000(%rcx)
    istore32 v2, v1-10000                       ; bin: 40 89 b1 ffffd8f0
    ; asm: movw %cx, 10000(%rsi)
    istore16 v1, v2+10000                       ; bin: 66 40 89 8e 00002710
    ; asm: movw %si, -10000(%rcx)
    istore16 v2, v1-10000                       ; bin: 66 40 89 b1 ffffd8f0
    ; asm: movb %cl, 10000(%rsi)
    istore8 v1, v2+10000                        ; bin: 40 88 8e 00002710
    ; asm: movb %sil, 10000(%rcx)
    istore8 v2, v1+10000                        ; bin: 40 88 b1 00002710
    ; asm: movq 50000(%rcx), %rdi
    [-,%rdi]            v160 = load.i64 v1+50000           ; bin: 48 8b b9 0000c350
    ; asm: movq -50000(%rsi), %rdx
    [-,%rdx]            v161 = load.i64 v2-50000           ; bin: 48 8b 96 ffff3cb0
    ; asm: movl 50000(%rcx), %edi
    [-,%rdi]            v162 = uload32.i64 v1+50000        ; bin: 40 8b b9 0000c350
    ; asm: movl -50000(%rsi), %edx
    [-,%rdx]            v163 = uload32.i64 v2-50000        ; bin: 40 8b 96 ffff3cb0
    ; asm: movslq 50000(%rcx), %rdi
    [-,%rdi]            v164 = sload32.i64 v1+50000        ; bin: 48 63 b9 0000c350
    ; asm: movslq -50000(%rsi), %rdx
    [-,%rdx]            v165 = sload32.i64 v2-50000        ; bin: 48 63 96 ffff3cb0
    ; asm: movzwq 50000(%rcx), %rdi
    [-,%rdi]            v166 = uload16.i64 v1+50000        ; bin: 48 0f b7 b9 0000c350
    ; asm: movzwq -50000(%rsi), %rdx
    [-,%rdx]            v167 = uload16.i64 v2-50000        ; bin: 48 0f b7 96 ffff3cb0
    ; asm: movswq 50000(%rcx), %rdi
    [-,%rdi]            v168 = sload16.i64 v1+50000        ; bin: 48 0f bf b9 0000c350
    ; asm: movswq -50000(%rsi), %rdx
    [-,%rdx]            v169 = sload16.i64 v2-50000        ; bin: 48 0f bf 96 ffff3cb0
    ; asm: movzbq 50000(%rcx), %rdi
    [-,%rdi]            v170 = uload8.i64 v1+50000         ; bin: 48 0f b6 b9 0000c350
    ; asm: movzbq -50000(%rsi), %rdx
    [-,%rdx]            v171 = uload8.i64 v2-50000         ; bin: 48 0f b6 96 ffff3cb0
    ; asm: movsbq 50000(%rcx), %rdi
    [-,%rdi]            v172 = sload8.i64 v1+50000         ; bin: 48 0f be b9 0000c350
    ; asm: movsbq -50000(%rsi), %rdx
    [-,%rdx]            v173 = sload8.i64 v2-50000         ; bin: 48 0f be 96 ffff3cb0
    ; More arithmetic.
    ; asm: imulq %rsi, %rcx
    [-,%rcx]             v180 = imul v1, v2       ; bin: 48 0f af ce
    ; asm: imulq %r10, %rsi
    [-,%rsi]             v181 = imul v2, v3       ; bin: 49 0f af f2
    ; asm: imulq %rcx, %r10
    [-,%r10]             v182 = imul v3, v1       ; bin: 4c 0f af d1
    [-,%rax]      v190 = iconst.i64 1
    [-,%rdx]      v191 = iconst.i64 2
    ; asm: idivq %rcx
    [-,%rax,%rdx] v192, v193 = x86_sdivmodx v130, v131, v1  ; bin: 48 f7 f9
    ; asm: idivq %rsi
    [-,%rax,%rdx] v194, v195 = x86_sdivmodx v130, v131, v2  ; bin: 48 f7 fe
    ; asm: idivq %r10
    [-,%rax,%rdx] v196, v197 = x86_sdivmodx v130, v131, v3  ; bin: 49 f7 fa
    ; asm: divq %rcx
    [-,%rax,%rdx] v198, v199 = x86_udivmodx v130, v131, v1  ; bin: 48 f7 f1
    ; asm: divq %rsi
    [-,%rax,%rdx] v200, v201 = x86_udivmodx v130, v131, v2  ; bin: 48 f7 f6
    ; asm: divq %r10
    [-,%rax,%rdx] v202, v203 = x86_udivmodx v130, v131, v3  ; bin: 49 f7 f2
    ; Bit-counting instructions.
    ; asm: popcntq %rsi, %rcx
    [-,%rcx]            v210 = popcnt v2        ; bin: f3 48 0f b8 ce
    ; asm: popcntq %r10, %rsi
    [-,%rsi]            v211 = popcnt v3        ; bin: f3 49 0f b8 f2
    ; asm: popcntq %rcx, %r10
    [-,%r10]            v212 = popcnt v1        ; bin: f3 4c 0f b8 d1
    ; asm: lzcntq %rsi, %rcx
    [-,%rcx]            v213 = clz v2           ; bin: f3 48 0f bd ce
    ; asm: lzcntq %r10, %rsi
    [-,%rsi]            v214 = clz v3           ; bin: f3 49 0f bd f2
    ; asm: lzcntq %rcx, %r10
    [-,%r10]            v215 = clz v1           ; bin: f3 4c 0f bd d1
    ; asm: tzcntq %rsi, %rcx
    [-,%rcx]            v216 = ctz v2           ; bin: f3 48 0f bc ce
    ; asm: tzcntq %r10, %rsi
    [-,%rsi]            v217 = ctz v3           ; bin: f3 49 0f bc f2
    ; asm: tzcntq %rcx, %r10
    [-,%r10]            v218 = ctz v1           ; bin: f3 4c 0f bc d1
    ; Integer comparisons.
    ; asm: cmpq %rsi, %rcx
    ; asm: sete %bl
    [-,%rbx]            v300 = icmp eq v1, v2   ; bin: 48 39 f1 0f 94 c3
    ; asm: cmpq %r10, %rsi
    ; asm: sete %dl
    [-,%rdx]            v301 = icmp eq v2, v3   ; bin: 4c 39 d6 0f 94 c2
    ; asm: cmpq %rsi, %rcx
    ; asm: setne %bl
    [-,%rbx]            v302 = icmp ne v1, v2   ; bin: 48 39 f1 0f 95 c3
    ; asm: cmpq %r10, %rsi
    ; asm: setne %dl
    [-,%rdx]            v303 = icmp ne v2, v3   ; bin: 4c 39 d6 0f 95 c2
    ; asm: cmpq %rsi, %rcx
    ; asm: setl %bl
    [-,%rbx]            v304 = icmp slt v1, v2  ; bin: 48 39 f1 0f 9c c3
    ; asm: cmpq %r10, %rsi
    ; asm: setl %dl
    [-,%rdx]            v305 = icmp slt v2, v3  ; bin: 4c 39 d6 0f 9c c2
    ; asm: cmpq %rsi, %rcx
    ; asm: setge %bl
    [-,%rbx]            v306 = icmp sge v1, v2  ; bin: 48 39 f1 0f 9d c3
    ; asm: cmpq %r10, %rsi
    ; asm: setge %dl
    [-,%rdx]            v307 = icmp sge v2, v3  ; bin: 4c 39 d6 0f 9d c2
    ; asm: cmpq %rsi, %rcx
    ; asm: setg %bl
    [-,%rbx]            v308 = icmp sgt v1, v2  ; bin: 48 39 f1 0f 9f c3
    ; asm: cmpq %r10, %rsi
    ; asm: setg %dl
    [-,%rdx]            v309 = icmp sgt v2, v3  ; bin: 4c 39 d6 0f 9f c2
    ; asm: cmpq %rsi, %rcx
    ; asm: setle %bl
    [-,%rbx]            v310 = icmp sle v1, v2  ; bin: 48 39 f1 0f 9e c3
    ; asm: cmpq %r10, %rsi
    ; asm: setle %dl
    [-,%rdx]            v311 = icmp sle v2, v3  ; bin: 4c 39 d6 0f 9e c2
    ; asm: cmpq %rsi, %rcx
    ; asm: setb %bl
    [-,%rbx]            v312 = icmp ult v1, v2  ; bin: 48 39 f1 0f 92 c3
    ; asm: cmpq %r10, %rsi
    ; asm: setb %dl
    [-,%rdx]            v313 = icmp ult v2, v3  ; bin: 4c 39 d6 0f 92 c2
    ; asm: cmpq %rsi, %rcx
    ; asm: setae %bl
    [-,%rbx]            v314 = icmp uge v1, v2  ; bin: 48 39 f1 0f 93 c3
    ; asm: cmpq %r10, %rsi
    ; asm: setae %dl
    [-,%rdx]            v315 = icmp uge v2, v3  ; bin: 4c 39 d6 0f 93 c2
    ; asm: cmpq %rsi, %rcx
    ; asm: seta %bl
    [-,%rbx]            v316 = icmp ugt v1, v2  ; bin: 48 39 f1 0f 97 c3
    ; asm: cmpq %r10, %rsi
    ; asm: seta %dl
    [-,%rdx]            v317 = icmp ugt v2, v3  ; bin: 4c 39 d6 0f 97 c2
    ; asm: cmpq %rsi, %rcx
    ; asm: setbe %bl
    [-,%rbx]            v318 = icmp ule v1, v2  ; bin: 48 39 f1 0f 96 c3
    ; asm: cmpq %r10, %rsi
    ; asm: setbe %dl
    [-,%rdx]            v319 = icmp ule v2, v3  ; bin: 4c 39 d6 0f 96 c2
    ; Bool-to-int conversions.
    ; asm: movzbq %bl, %rcx
    [-,%rcx]             v350 = bint.i64 v300   ; bin: 48 0f b6 cb
    ; asm: movzbq %dl, %rsi
    [-,%rsi]             v351 = bint.i64 v301   ; bin: 48 0f b6 f2
    ; asm: testq %rcx, %rcx
    ; asm: je ebb1
    brz v1, ebb1                                ; bin: 48 85 c9 74 1b
    ; asm: testq %rsi, %rsi
    ; asm: je ebb1
    brz v2, ebb1                                ; bin: 48 85 f6 74 16
    ; asm: testq %r10, %r10
    ; asm: je ebb1
    brz v3, ebb1                                ; bin: 4d 85 d2 74 11
    ; asm: testq %rcx, %rcx
    ; asm: jne ebb1
    brnz v1, ebb1                               ; bin: 48 85 c9 75 0c
    ; asm: testq %rsi, %rsi
    ; asm: jne ebb1
    brnz v2, ebb1                               ; bin: 48 85 f6 75 07
    ; asm: testq %r10, %r10
    ; asm: jne ebb1
    brnz v3, ebb1                               ; bin: 4d 85 d2 75 02
    ; asm: jmp ebb2
    jump ebb2                                   ; bin: eb 01
    ; asm: ebb1:
 ebb1:
    return                                       ; bin: c3
    ; asm: ebb2:
 ebb2:
    jump ebb1                                   ; bin: eb fd
 }
 ; Tests for i32 instructions in 64-bit mode.
 ;
 ; Note that many i32 instructions can be encoded both with and without a REX
 ; prefix if they only use the low 8 registers. Here, we are testing the REX
 ; encodings which are chosen by default. Switching to non-REX encodings should
 ; be done by an instruction shrinking pass.
 function %I32() {
    fn0 = function %foo()
    sig0 = ()
 ebb0:
    ; Integer Constants.
    ; asm: movl $0x01020304, %ecx
    [-,%rcx]            v1 = iconst.i32 0x0102_0304           ; bin: 40 b9 01020304
    ; asm: movl $0x11020304, %esi
    [-,%rsi]            v2 = iconst.i32 0x1102_0304           ; bin: 40 be 11020304
    ; asm: movl $0x21020304, %r10d
    [-,%r10]            v3 = iconst.i32 0x2102_0304           ; bin: 41 ba 21020304
    ; asm: movl $0xff001122, %r8d
    [-,%r8]             v4 = iconst.i32 0xff00_1122           ; bin: 41 b8 ff001122
    ; asm: movl $0x88001122, %r14d
    [-,%r14]            v5 = iconst.i32 0xffff_ffff_8800_1122 ; bin: 41 be 88001122
    ; Load/Store instructions.
    ; Register indirect addressing with no displacement.
    ; asm: movl (%rcx), %edi
    [-,%rdi]            v10 = load.i32 v1      ; bin: 40 8b 39
    ; asm: movl (%rsi), %edx
    [-,%rdx]            v11 = load.i32 v2      ; bin: 40 8b 16
    ; asm: movzwl (%rcx), %edi
    [-,%rdi]            v12 = uload16.i32 v1   ; bin: 40 0f b7 39
    ; asm: movzwl (%rsi), %edx
    [-,%rdx]            v13 = uload16.i32 v2   ; bin: 40 0f b7 16
    ; asm: movswl (%rcx), %edi
    [-,%rdi]            v14 = sload16.i32 v1   ; bin: 40 0f bf 39
    ; asm: movswl (%rsi), %edx
    [-,%rdx]            v15 = sload16.i32 v2   ; bin: 40 0f bf 16
    ; asm: movzbl (%rcx), %edi
    [-,%rdi]            v16 = uload8.i32 v1    ; bin: 40 0f b6 39
    ; asm: movzbl (%rsi), %edx
    [-,%rdx]            v17 = uload8.i32 v2    ; bin: 40 0f b6 16
    ; asm: movsbl (%rcx), %edi
    [-,%rdi]            v18 = sload8.i32 v1    ; bin: 40 0f be 39
    ; asm: movsbl (%rsi), %edx
    [-,%rdx]            v19 = sload8.i32 v2    ; bin: 40 0f be 16
    ; Register-indirect with 8-bit signed displacement.
    ; asm: movl 50(%rcx), %edi
    [-,%rdi]            v20 = load.i32 v1+50           ; bin: 40 8b 79 32
    ; asm: movl -50(%rsi), %edx
    [-,%rdx]            v21 = load.i32 v2-50           ; bin: 40 8b 56 ce
    ; asm: movzwl 50(%rcx), %edi
    [-,%rdi]            v22 = uload16.i32 v1+50        ; bin: 40 0f b7 79 32
    ; asm: movzwl -50(%rsi), %edx
    [-,%rdx]            v23 = uload16.i32 v2-50        ; bin: 40 0f b7 56 ce
    ; asm: movswl 50(%rcx), %edi
    [-,%rdi]            v24 = sload16.i32 v1+50        ; bin: 40 0f bf 79 32
    ; asm: movswl -50(%rsi), %edx
    [-,%rdx]            v25 = sload16.i32 v2-50        ; bin: 40 0f bf 56 ce
    ; asm: movzbl 50(%rcx), %edi
    [-,%rdi]            v26 = uload8.i32 v1+50         ; bin: 40 0f b6 79 32
    ; asm: movzbl -50(%rsi), %edx
    [-,%rdx]            v27 = uload8.i32 v2-50         ; bin: 40 0f b6 56 ce
    ; asm: movsbl 50(%rcx), %edi
    [-,%rdi]            v28 = sload8.i32 v1+50         ; bin: 40 0f be 79 32
    ; asm: movsbl -50(%rsi), %edx
    [-,%rdx]            v29 = sload8.i32 v2-50         ; bin: 40 0f be 56 ce
    ; Register-indirect with 32-bit signed displacement.
    ; asm: movl 50000(%rcx), %edi
    [-,%rdi]            v30 = load.i32 v1+50000           ; bin: 40 8b b9 0000c350
    ; asm: movl -50000(%rsi), %edx
    [-,%rdx]            v31 = load.i32 v2-50000           ; bin: 40 8b 96 ffff3cb0
    ; asm: movzwl 50000(%rcx), %edi
    [-,%rdi]            v32 = uload16.i32 v1+50000        ; bin: 40 0f b7 b9 0000c350
    ; asm: movzwl -50000(%rsi), %edx
    [-,%rdx]            v33 = uload16.i32 v2-50000        ; bin: 40 0f b7 96 ffff3cb0
    ; asm: movswl 50000(%rcx), %edi
    [-,%rdi]            v34 = sload16.i32 v1+50000        ; bin: 40 0f bf b9 0000c350
    ; asm: movswl -50000(%rsi), %edx
    [-,%rdx]            v35 = sload16.i32 v2-50000        ; bin: 40 0f bf 96 ffff3cb0
    ; asm: movzbl 50000(%rcx), %edi
    [-,%rdi]            v36 = uload8.i32 v1+50000         ; bin: 40 0f b6 b9 0000c350
    ; asm: movzbl -50000(%rsi), %edx
    [-,%rdx]            v37 = uload8.i32 v2-50000         ; bin: 40 0f b6 96 ffff3cb0
    ; asm: movsbl 50000(%rcx), %edi
    [-,%rdi]            v38 = sload8.i32 v1+50000         ; bin: 40 0f be b9 0000c350
    ; asm: movsbl -50000(%rsi), %edx
    [-,%rdx]            v39 = sload8.i32 v2-50000         ; bin: 40 0f be 96 ffff3cb0
    ; Integer Register-Register Operations.
    ; asm: addl %esi, %ecx
    [-,%rcx]             v40 = iadd v1, v2       ; bin: 40 01 f1
    ; asm: addl %r10d, %esi
    [-,%rsi]             v41 = iadd v2, v3       ; bin: 44 01 d6
    ; asm: addl %ecx, %r10d
    [-,%r10]             v42 = iadd v3, v1       ; bin: 41 01 ca
    ; asm: subl %esi, %ecx
    [-,%rcx]             v50 = isub v1, v2       ; bin: 40 29 f1
    ; asm: subl %r10d, %esi
    [-,%rsi]             v51 = isub v2, v3       ; bin: 44 29 d6
    ; asm: subl %ecx, %r10d
    [-,%r10]             v52 = isub v3, v1       ; bin: 41 29 ca
    ; asm: andl %esi, %ecx
    [-,%rcx]             v60 = band v1, v2       ; bin: 40 21 f1
    ; asm: andl %r10d, %esi
    [-,%rsi]             v61 = band v2, v3       ; bin: 44 21 d6
    ; asm: andl %ecx, %r10d
    [-,%r10]             v62 = band v3, v1       ; bin: 41 21 ca
    ; asm: orl %esi, %ecx
    [-,%rcx]             v70 = bor v1, v2       ; bin: 40 09 f1
    ; asm: orl %r10d, %esi
    [-,%rsi]             v71 = bor v2, v3       ; bin: 44 09 d6
    ; asm: orl %ecx, %r10d
    [-,%r10]             v72 = bor v3, v1       ; bin: 41 09 ca
    ; asm: xorl %esi, %ecx
    [-,%rcx]             v80 = bxor v1, v2       ; bin: 40 31 f1
    ; asm: xorl %r10d, %esi
    [-,%rsi]             v81 = bxor v2, v3       ; bin: 44 31 d6
    ; asm: xorl %ecx, %r10d
    [-,%r10]             v82 = bxor v3, v1       ; bin: 41 31 ca
    ; asm: shll %cl, %esi
    [-,%rsi]             v90 = ishl v2, v1       ; bin: 40 d3 e6
    ; asm: shll %cl, %r10d
    [-,%r10]             v91 = ishl v3, v1       ; bin: 41 d3 e2
    ; asm: sarl %cl, %esi
    [-,%rsi]             v92 = sshr v2, v1       ; bin: 40 d3 fe
    ; asm: sarl %cl, %r10d
    [-,%r10]             v93 = sshr v3, v1       ; bin: 41 d3 fa
    ; asm: shrl %cl, %esi
    [-,%rsi]             v94 = ushr v2, v1       ; bin: 40 d3 ee
    ; asm: shrl %cl, %r10d
    [-,%r10]             v95 = ushr v3, v1       ; bin: 41 d3 ea
    ; asm: roll %cl, %esi
    [-,%rsi]             v96 = rotl v2, v1       ; bin: 40 d3 c6
    ; asm: roll %cl, %r10d
    [-,%r10]             v97 = rotl v3, v1       ; bin: 41 d3 c2
    ; asm: rorl %cl, %esi
    [-,%rsi]             v98 = rotr v2, v1       ; bin: 40 d3 ce
    ; asm: rorl %cl, %r10d
    [-,%r10]             v99 = rotr v3, v1       ; bin: 41 d3 ca
    ; Integer Register-Immediate Operations.
    ; These 64-bit ops all use a 32-bit immediate that is sign-extended to 64 bits.
    ; Some take 8-bit immediates that are sign-extended to 64 bits.
    ; asm: addl $-100000, %ecx
    [-,%rcx]     v100 = iadd_imm v1, -100000     ; bin: 40 81 c1 fffe7960
    ; asm: addl $100000, %esi
    [-,%rsi]     v101 = iadd_imm v2, 100000      ; bin: 40 81 c6 000186a0
    ; asm: addl $0x7fffffff, %r10d
    [-,%r10]     v102 = iadd_imm v3, 0x7fff_ffff ; bin: 41 81 c2 7fffffff
    ; asm: addl $100, %r8d
    [-,%r8]      v103 = iadd_imm v4, 100         ; bin: 41 83 c0 64
    ; asm: addl $-100, %r14d
    [-,%r14]     v104 = iadd_imm v5, -100        ; bin: 41 83 c6 9c
    ; asm: andl $-100000, %ecx
    [-,%rcx]     v110 = band_imm v1, -100000     ; bin: 40 81 e1 fffe7960
    ; asm: andl $100000, %esi
    [-,%rsi]     v111 = band_imm v2, 100000      ; bin: 40 81 e6 000186a0
    ; asm: andl $0x7fffffff, %r10d
    [-,%r10]     v112 = band_imm v3, 0x7fff_ffff ; bin: 41 81 e2 7fffffff
    ; asm: andl $100, %r8d
    [-,%r8]      v113 = band_imm v4, 100         ; bin: 41 83 e0 64
    ; asm: andl $-100, %r14d
    [-,%r14]     v114 = band_imm v5, -100        ; bin: 41 83 e6 9c
    ; asm: orl $-100000, %ecx
    [-,%rcx]     v120 = bor_imm v1, -100000      ; bin: 40 81 c9 fffe7960
    ; asm: orl $100000, %esi
    [-,%rsi]     v121 = bor_imm v2, 100000       ; bin: 40 81 ce 000186a0
    ; asm: orl $0x7fffffff, %r10d
    [-,%r10]     v122 = bor_imm v3, 0x7fff_ffff  ; bin: 41 81 ca 7fffffff
    ; asm: orl $100, %r8d
    [-,%r8]      v123 = bor_imm v4, 100          ; bin: 41 83 c8 64
    ; asm: orl $-100, %r14d
    [-,%r14]     v124 = bor_imm v5, -100         ; bin: 41 83 ce 9c
    ; asm: ret
    ; asm: xorl $-100000, %ecx
    [-,%rcx]     v130 = bxor_imm v1, -100000     ; bin: 40 81 f1 fffe7960
    ; asm: xorl $100000, %esi
    [-,%rsi]     v131 = bxor_imm v2, 100000      ; bin: 40 81 f6 000186a0
    ; asm: xorl $0x7fffffff, %r10d
    [-,%r10]     v132 = bxor_imm v3, 0x7fff_ffff ; bin: 41 81 f2 7fffffff
    ; asm: xorl $100, %r8d
    [-,%r8]      v133 = bxor_imm v4, 100         ; bin: 41 83 f0 64
    ; asm: xorl $-100, %r14d
    [-,%r14]     v134 = bxor_imm v5, -100        ; bin: 41 83 f6 9c
    ; Register copies.
    ; asm: movl %esi, %ecx
    [-,%rcx]             v140 = copy v2          ; bin: 40 89 f1
    ; asm: movl %r10d, %esi
    [-,%rsi]             v141 = copy v3          ; bin: 44 89 d6
    ; asm: movl %ecx, %r10d
    [-,%r10]             v142 = copy v1          ; bin: 41 89 ca
    ; More arithmetic.
    ; asm: imull %esi, %ecx
    [-,%rcx]             v150 = imul v1, v2       ; bin: 40 0f af ce
    ; asm: imull %r10d, %esi
    [-,%rsi]             v151 = imul v2, v3       ; bin: 41 0f af f2
    ; asm: imull %ecx, %r10d
    [-,%r10]             v152 = imul v3, v1       ; bin: 44 0f af d1
    [-,%rax]      v160 = iconst.i32 1
    [-,%rdx]      v161 = iconst.i32 2
    ; asm: idivl %ecx
    [-,%rax,%rdx] v162, v163 = x86_sdivmodx v130, v131, v1  ; bin: 40 f7 f9
    ; asm: idivl %esi
    [-,%rax,%rdx] v164, v165 = x86_sdivmodx v130, v131, v2  ; bin: 40 f7 fe
    ; asm: idivl %r10d
    [-,%rax,%rdx] v166, v167 = x86_sdivmodx v130, v131, v3  ; bin: 41 f7 fa
    ; asm: divl %ecx
    [-,%rax,%rdx] v168, v169 = x86_udivmodx v130, v131, v1  ; bin: 40 f7 f1
    ; asm: divl %esi
    [-,%rax,%rdx] v170, v171 = x86_udivmodx v130, v131, v2  ; bin: 40 f7 f6
    ; asm: divl %r10d
    [-,%rax,%rdx] v172, v173 = x86_udivmodx v130, v131, v3  ; bin: 41 f7 f2
    ; Bit-counting instructions.
    ; asm: popcntl %esi, %ecx
    [-,%rcx]            v200 = popcnt v2         ; bin: f3 40 0f b8 ce
    ; asm: popcntl %r10d, %esi
    [-,%rsi]            v201 = popcnt v3         ; bin: f3 41 0f b8 f2
    ; asm: popcntl %ecx, %r10d
    [-,%r10]            v202 = popcnt v1         ; bin: f3 44 0f b8 d1
    ; asm: lzcntl %esi, %ecx
    [-,%rcx]            v203 = clz v2            ; bin: f3 40 0f bd ce
    ; asm: lzcntl %r10d, %esi
    [-,%rsi]            v204 = clz v3            ; bin: f3 41 0f bd f2
    ; asm: lzcntl %ecx, %r10d
    [-,%r10]            v205 = clz v1            ; bin: f3 44 0f bd d1
    ; asm: tzcntl %esi, %ecx
    [-,%rcx]            v206 = ctz v2            ; bin: f3 40 0f bc ce
    ; asm: tzcntl %r10d, %esi
    [-,%rsi]            v207 = ctz v3            ; bin: f3 41 0f bc f2
    ; asm: tzcntl %ecx, %r10d
    [-,%r10]            v208 = ctz v1            ; bin: f3 44 0f bc d1
    ; Integer comparisons.
    ; asm: cmpl %esi, %ecx
    ; asm: sete %bl
    [-,%rbx]            v300 = icmp eq v1, v2   ; bin: 40 39 f1 0f 94 c3
    ; asm: cmpl %r10d, %esi
    ; asm: sete %dl
    [-,%rdx]            v301 = icmp eq v2, v3   ; bin: 44 39 d6 0f 94 c2
    ; asm: cmpl %esi, %ecx
    ; asm: setne %bl
    [-,%rbx]            v302 = icmp ne v1, v2   ; bin: 40 39 f1 0f 95 c3
    ; asm: cmpl %r10d, %esi
    ; asm: setne %dl
    [-,%rdx]            v303 = icmp ne v2, v3   ; bin: 44 39 d6 0f 95 c2
    ; asm: cmpl %esi, %ecx
    ; asm: setl %bl
    [-,%rbx]            v304 = icmp slt v1, v2  ; bin: 40 39 f1 0f 9c c3
    ; asm: cmpl %r10d, %esi
    ; asm: setl %dl
    [-,%rdx]            v305 = icmp slt v2, v3  ; bin: 44 39 d6 0f 9c c2
    ; asm: cmpl %esi, %ecx
    ; asm: setge %bl
    [-,%rbx]            v306 = icmp sge v1, v2  ; bin: 40 39 f1 0f 9d c3
    ; asm: cmpl %r10d, %esi
    ; asm: setge %dl
    [-,%rdx]            v307 = icmp sge v2, v3  ; bin: 44 39 d6 0f 9d c2
    ; asm: cmpl %esi, %ecx
    ; asm: setg %bl
    [-,%rbx]            v308 = icmp sgt v1, v2  ; bin: 40 39 f1 0f 9f c3
    ; asm: cmpl %r10d, %esi
    ; asm: setg %dl
    [-,%rdx]            v309 = icmp sgt v2, v3  ; bin: 44 39 d6 0f 9f c2
    ; asm: cmpl %esi, %ecx
    ; asm: setle %bl
    [-,%rbx]            v310 = icmp sle v1, v2  ; bin: 40 39 f1 0f 9e c3
    ; asm: cmpl %r10d, %esi
    ; asm: setle %dl
    [-,%rdx]            v311 = icmp sle v2, v3  ; bin: 44 39 d6 0f 9e c2
    ; asm: cmpl %esi, %ecx
    ; asm: setb %bl
    [-,%rbx]            v312 = icmp ult v1, v2  ; bin: 40 39 f1 0f 92 c3
    ; asm: cmpl %r10d, %esi
    ; asm: setb %dl
    [-,%rdx]            v313 = icmp ult v2, v3  ; bin: 44 39 d6 0f 92 c2
    ; asm: cmpl %esi, %ecx
    ; asm: setae %bl
    [-,%rbx]            v314 = icmp uge v1, v2  ; bin: 40 39 f1 0f 93 c3
    ; asm: cmpl %r10d, %esi
    ; asm: setae %dl
    [-,%rdx]            v315 = icmp uge v2, v3  ; bin: 44 39 d6 0f 93 c2
    ; asm: cmpl %esi, %ecx
    ; asm: seta %bl
    [-,%rbx]            v316 = icmp ugt v1, v2  ; bin: 40 39 f1 0f 97 c3
    ; asm: cmpl %r10d, %esi
    ; asm: seta %dl
    [-,%rdx]            v317 = icmp ugt v2, v3  ; bin: 44 39 d6 0f 97 c2
    ; asm: cmpl %esi, %ecx
    ; asm: setbe %bl
    [-,%rbx]            v318 = icmp ule v1, v2  ; bin: 40 39 f1 0f 96 c3
    ; asm: cmpl %r10d, %esi
    ; asm: setbe %dl
    [-,%rdx]            v319 = icmp ule v2, v3  ; bin: 44 39 d6 0f 96 c2
    ; Bool-to-int conversions.
    ; asm: movzbl %bl, %ecx
    [-,%rcx]             v350 = bint.i32 v300   ; bin: 40 0f b6 cb
    ; asm: movzbl %dl, %esi
    [-,%rsi]             v351 = bint.i32 v301   ; bin: 40 0f b6 f2
    ; asm: testl %ecx, %ecx
    ; asm: je ebb1x
    brz v1, ebb1                                ; bin: 40 85 c9 74 1b
    ; asm: testl %esi, %esi
    ; asm: je ebb1x
    brz v2, ebb1                                ; bin: 40 85 f6 74 16
    ; asm: testl %r10d, %r10d
    ; asm: je ebb1x
    brz v3, ebb1                                ; bin: 45 85 d2 74 11
    ; asm: testl %ecx, %ecx
    ; asm: jne ebb1x
    brnz v1, ebb1                               ; bin: 40 85 c9 75 0c
    ; asm: testl %esi, %esi
    ; asm: jne ebb1x
    brnz v2, ebb1                               ; bin: 40 85 f6 75 07
    ; asm: testl %r10d, %r10d
    ; asm: jne ebb1x
    brnz v3, ebb1                               ; bin: 45 85 d2 75 02
    ; asm: jmp ebb2x
    jump ebb2                                   ; bin: eb 01
    ; asm: ebb1x:
 ebb1:
    return                                       ; bin: c3
    ; asm: ebb2x:
 ebb2:
    jump ebb1                                   ; bin: eb fd
 }
 ; Tests for i64/i32 conversion instructions.
 function %I64_I32() {
 ebb0:
    [-,%rcx]            v1 = iconst.i64 1
    [-,%rsi]            v2 = iconst.i64 2
    [-,%r10]            v3 = iconst.i64 3
    [-,%rcx]            v11 = ireduce.i32 v1            ; bin:
    [-,%rsi]            v12 = ireduce.i32 v2            ; bin:
    [-,%r10]            v13 = ireduce.i32 v3            ; bin:
    ; asm: movslq %ecx, %rsi
    [-,%rsi]            v20 = sextend.i64 v11           ; bin: 48 63 f1
    ; asm: movslq %esi, %r10
    [-,%r10]            v21 = sextend.i64 v12           ; bin: 4c 63 d6
    ; asm: movslq %r10d, %rcx
    [-,%rcx]            v22 = sextend.i64 v13           ; bin: 49 63 ca
    ; asm: movl %ecx, %esi
    [-,%rsi]            v30 = uextend.i64 v11           ; bin: 40 89 ce
    ; asm: movl %esi, %r10d
    [-,%r10]            v31 = uextend.i64 v12           ; bin: 41 89 f2
    ; asm: movl %r10d, %ecx
    [-,%rcx]            v32 = uextend.i64 v13           ; bin: 44 89 d1
    trap                                                ; bin: 0f 0b
 }
--- a/filetests/isa/riscv/abi-e.cton
+++ b/filetests/isa/riscv/abi-e.cton
@@ -0,0 +1,14 @@
 ; Test the legalization of function signatures for RV32E.
 test legalizer
 isa riscv enable_e
 ; regex: V=v\d+
 function %f() {
    ; Spilling into the stack args after %x15 since %16 and up are not
    ; available in RV32E.
    sig0 = (i64, i64, i64, i64) -> i64 native
    ; check: sig0 = (i32 [%x10], i32 [%x11], i32 [%x12], i32 [%x13], i32 [%x14], i32 [%x15], i32 [0], i32 [4]) -> i32 [%x10], i32 [%x11] native
 ebb0:
    return
 }
--- a/filetests/isa/riscv/abi.cton
+++ b/filetests/isa/riscv/abi.cton
@@ -0,0 +1,32 @@
 ; Test the legalization of function signatures.
 test legalizer
 isa riscv
 ; regex: V=v\d+
 function %f() {
    sig0 = (i32) -> i32 native
    ; check: sig0 = (i32 [%x10]) -> i32 [%x10] native
    sig1 = (i64) -> b1 native
    ; check: sig1 = (i32 [%x10], i32 [%x11]) -> b1 [%x10] native
    ; The i64 argument must go in an even-odd register pair.
    sig2 = (f32, i64) -> f64 native
    ; check: sig2 = (f32 [%f10], i32 [%x12], i32 [%x13]) -> f64 [%f10] native
    ; Spilling into the stack args.
    sig3 = (f64, f64, f64, f64, f64, f64, f64, i64) -> f64 native
    ; check: sig3 = (f64 [%f10], f64 [%f11], f64 [%f12], f64 [%f13], f64 [%f14], f64 [%f15], f64 [%f16], i32 [0], i32 [4]) -> f64 [%f10] native
    ; Splitting vectors.
    sig4 = (i32x4) native
    ; check: sig4 = (i32 [%x10], i32 [%x11], i32 [%x12], i32 [%x13]) native
    ; Splitting vectors, then splitting ints.
    sig5 = (i64x4) native
    ; check: sig5 = (i32 [%x10], i32 [%x11], i32 [%x12], i32 [%x13], i32 [%x14], i32 [%x15], i32 [%x16], i32 [%x17]) native
 ebb0:
    return
 }
--- a/filetests/isa/riscv/binary32.cton
+++ b/filetests/isa/riscv/binary32.cton
@@ -0,0 +1,145 @@
 ; Binary emission of 32-bit code.
 test binemit
 isa riscv
 function %RV32I(i32 link [%x1]) -> i32 link [%x1] {
    fn0 = function %foo()
    sig0 = ()
 ebb0(v9999: i32):
    [-,%x10]            v1 = iconst.i32 1
    [-,%x21]            v2 = iconst.i32 2
    ; Integer Register-Register Operations.
    ; add
    [-,%x7]             v10 = iadd v1, v2       ; bin: 015503b3
    [-,%x16]            v11 = iadd v2, v1       ; bin: 00aa8833
    ; sub
    [-,%x7]             v12 = isub v1, v2       ; bin: 415503b3
    [-,%x16]            v13 = isub v2, v1       ; bin: 40aa8833
    ; and
    [-,%x7]             v20 = band v1, v2       ; bin: 015573b3
    [-,%x16]            v21 = band v2, v1       ; bin: 00aaf833
    ; or
    [-,%x7]             v22 = bor v1, v2        ; bin: 015563b3
    [-,%x16]            v23 = bor v2, v1        ; bin: 00aae833
    ; xor
    [-,%x7]             v24 = bxor v1, v2       ; bin: 015543b3
    [-,%x16]            v25 = bxor v2, v1       ; bin: 00aac833
    ; sll
    [-,%x7]             v30 = ishl v1, v2       ; bin: 015513b3
    [-,%x16]            v31 = ishl v2, v1       ; bin: 00aa9833
    ; srl
    [-,%x7]             v32 = ushr v1, v2       ; bin: 015553b3
    [-,%x16]            v33 = ushr v2, v1       ; bin: 00aad833
    ; sra
    [-,%x7]             v34 = sshr v1, v2       ; bin: 415553b3
    [-,%x16]            v35 = sshr v2, v1       ; bin: 40aad833
    ; slt
    [-,%x7]             v42 = icmp slt v1, v2   ; bin: 015523b3
    [-,%x16]            v43 = icmp slt v2, v1   ; bin: 00aaa833
    ; sltu
    [-,%x7]             v44 = icmp ult v1, v2   ; bin: 015533b3
    [-,%x16]            v45 = icmp ult v2, v1   ; bin: 00aab833
    ; Integer Register-Immediate Instructions
    ; addi
    [-,%x7]     v100 = iadd_imm v1, 1000       ; bin: 3e850393
    [-,%x16]    v101 = iadd_imm v2, -905       ; bin: c77a8813
    ; andi
    [-,%x7]     v110 = band_imm v1, 1000       ; bin: 3e857393
    [-,%x16]    v111 = band_imm v2, -905       ; bin: c77af813
    ; ori
    [-,%x7]     v112 = bor_imm v1, 1000        ; bin: 3e856393
    [-,%x16]    v113 = bor_imm v2, -905        ; bin: c77ae813
    ; xori
    [-,%x7]     v114 = bxor_imm v1, 1000       ; bin: 3e854393
    [-,%x16]    v115 = bxor_imm v2, -905       ; bin: c77ac813
    ; slli
    [-,%x7]     v120 = ishl_imm v1, 31         ; bin: 01f51393
    [-,%x16]    v121 = ishl_imm v2, 8          ; bin: 008a9813
    ; srli
    [-,%x7]     v122 = ushr_imm v1, 31         ; bin: 01f55393
    [-,%x16]    v123 = ushr_imm v2, 8          ; bin: 008ad813
    ; srai
    [-,%x7]     v124 = sshr_imm v1, 31         ; bin: 41f55393
    [-,%x16]    v125 = sshr_imm v2, 8          ; bin: 408ad813
    ; slti
    [-,%x7]     v130 = icmp_imm slt v1, 1000   ; bin: 3e852393
    [-,%x16]    v131 = icmp_imm slt v2, -905   ; bin: c77aa813
    ; sltiu
    [-,%x7]     v132 = icmp_imm ult v1, 1000   ; bin: 3e853393
    [-,%x16]    v133 = icmp_imm ult v2, -905   ; bin: c77ab813
    ; lui
    [-,%x7]     v140 = iconst.i32 0x12345000            ; bin: 123453b7
    [-,%x16]    v141 = iconst.i32 0xffffffff_fedcb000   ; bin: fedcb837
    ; addi
    [-,%x7]     v142 = iconst.i32 1000                  ; bin: 3e800393
    [-,%x16]    v143 = iconst.i32 -905                  ; bin: c7700813
    ; Copies alias to iadd_imm.
    [-,%x7]     v150 = copy v1                          ; bin: 00050393
    [-,%x16]    v151 = copy v2                          ; bin: 000a8813
    ; Control Transfer Instructions
    ; jal %x1, fn0
    call fn0()                                  ; bin: Call(fn0) 000000ef
    ; jalr %x1, %x10
    call_indirect sig0, v1()                    ; bin: 000500e7
    call_indirect sig0, v2()                    ; bin: 000a80e7
    brz v1, ebb3
    brnz v1, ebb1
    ; jalr %x0, %x1, 0
    return v9999                        ; bin: 00008067
 ebb1:
    ; beq 0x000
    br_icmp eq v1, v2, ebb1             ; bin: 01550063
    ; bne 0xffc
    br_icmp ne v1, v2, ebb1             ; bin: ff551ee3
    ; blt 0xff8
    br_icmp slt v1, v2, ebb1            ; bin: ff554ce3
    ; bge 0xff4
    br_icmp sge v1, v2, ebb1            ; bin: ff555ae3
    ; bltu 0xff0
    br_icmp ult v1, v2, ebb1            ; bin: ff5568e3
    ; bgeu 0xfec
    br_icmp uge v1, v2, ebb1            ; bin: ff5576e3
    ; Forward branches.
    ; beq 0x018
    br_icmp eq v2, v1, ebb2             ; bin: 00aa8c63
    ; bne 0x014
    br_icmp ne v2, v1, ebb2             ; bin: 00aa9a63
    ; blt 0x010
    br_icmp slt v2, v1, ebb2            ; bin: 00aac863
    ; bge 0x00c
    br_icmp sge v2, v1, ebb2            ; bin: 00aad663
    ; bltu 0x008
    br_icmp ult v2, v1, ebb2            ; bin: 00aae463
    ; bgeu 0x004
    br_icmp uge v2, v1, ebb2            ; bin: 00aaf263
    fallthrough ebb2
 ebb2:
    ; jal %x0, 0x00000
    jump ebb2                           ; bin: 0000006f
 ebb3:
    ; beq x, %x0
    brz v1, ebb3                        ; bin: 00050063
    ; bne x, %x0
    brnz v1, ebb3                       ; bin: fe051ee3
    ; jal %x0, 0x1ffff4
    jump ebb2                           ; bin: ff5ff06f
 }
--- a/filetests/isa/riscv/encoding.cton
+++ b/filetests/isa/riscv/encoding.cton
@@ -0,0 +1,21 @@
 test legalizer
 isa riscv supports_m=1
 function %int32(i32, i32) {
 ebb0(v1: i32, v2: i32):
    v10 = iadd v1, v2
    ; check: [R#0c]
    ; sameln: $v10 = iadd
    v11 = isub v1, v2
    ; check: [R#200c]
    ; sameln: $v11 = isub
    v12 = imul v1, v2
    ; check: [R#10c]
    ; sameln: $v12 = imul
    return
    ; check: [Iret#19]
    ; sameln: return
 }
--- a/filetests/isa/riscv/expand-i32.cton
+++ b/filetests/isa/riscv/expand-i32.cton
@@ -0,0 +1,38 @@
 ; Test the legalization of i32 instructions that don't have RISC-V versions.
 test legalizer
 set is_64bit=0
 isa riscv supports_m=1
 set is_64bit=1
 isa riscv supports_m=1
 ; regex: V=v\d+
 function %carry_out(i32, i32) -> i32, b1 {
 ebb0(v1: i32, v2: i32):
    v3, v4 = iadd_cout v1, v2
    return v3, v4
 }
 ; check: $v3 = iadd $v1, $v2
 ; check: $v4 = icmp ult $v3, $v1
 ; check: return $v3, $v4
 ; Expanding illegal immediate constants.
 ; Note that at some point we'll probably expand the iconst as well.
 function %large_imm(i32) -> i32 {
 ebb0(v0: i32):
    v1 = iadd_imm v0, 1000000000
    return v1
 }
 ; check: $(cst=$V) = iconst.i32 0x3b9a_ca00
 ; check: $v1 = iadd $v0, $cst
 ; check: return $v1
 function %bitclear(i32, i32) -> i32 {
 ebb0(v0: i32, v1: i32):
    v2 = band_not v0, v1
    ; check: bnot
    ; check: band
    return v2
 }
--- a/filetests/isa/riscv/legalize-abi.cton
+++ b/filetests/isa/riscv/legalize-abi.cton
@@ -0,0 +1,134 @@
 ; Test legalizer's handling of ABI boundaries.
 test legalizer
 isa riscv
 ; regex: V=v\d+
 ; regex: SS=ss\d+
 ; regex: WS=\s+
 function %int_split_args(i64) -> i64 {
 ebb0(v0: i64):
    ; check: $ebb0($(v0l=$V): i32, $(v0h=$V): i32, $(link=$V): i32):
    ; check: $v0 = iconcat $v0l, $v0h
    v1 = iadd_imm v0, 1
    ; check: $(v1l=$V), $(v1h=$V) = isplit $v1
    ; check: return $v1l, $v1h, $link
    return v1
 }
 function %split_call_arg(i32) {
    fn1 = function %foo(i64)
    fn2 = function %foo(i32, i64)
 ebb0(v0: i32):
    v1 = uextend.i64 v0
    call fn1(v1)
    ; check: $(v1l=$V), $(v1h=$V) = isplit $v1
    ; check: call $fn1($v1l, $v1h)
    call fn2(v0, v1)
    ; check: call $fn2($v0, $V, $V)
    return
 }
 function %split_ret_val() {
    fn1 = function %foo() -> i64
 ebb0:
    v1 = call fn1()
    ; check: $ebb0($(link=$V): i32):
    ; nextln: $(v1l=$V), $(v1h=$V) = call $fn1()
    ; check: $v1 = iconcat $v1l, $v1h
    jump ebb1(v1)
    ; check: jump $ebb1($v1)
 ebb1(v10: i64):
    jump ebb1(v10)
 }
 ; First return value is fine, second one is expanded.
 function %split_ret_val2() {
    fn1 = function %foo() -> i32, i64
 ebb0:
    v1, v2 = call fn1()
    ; check: $ebb0($(link=$V): i32):
    ; nextln: $v1, $(v2l=$V), $(v2h=$V) = call $fn1()
    ; check: $v2 = iconcat $v2l, $v2h
    jump ebb1(v1, v2)
    ; check: jump $ebb1($v1, $v2)
 ebb1(v9: i32, v10: i64):
    jump ebb1(v9, v10)
 }
 function %int_ext(i8, i8 sext, i8 uext) -> i8 uext {
 ebb0(v1: i8, v2: i8, v3: i8):
    ; check: $ebb0($v1: i8, $(v2x=$V): i32, $(v3x=$V): i32, $(link=$V): i32):
    ; check: $v2 = ireduce.i8 $v2x
    ; check: $v3 = ireduce.i8 $v3x
    ; check: $(v1x=$V) = uextend.i32 $v1
    ; check: return $v1x, $link
    return v1
 }
 ; Function produces single return value, still need to copy.
 function %ext_ret_val() {
    fn1 = function %foo() -> i8 sext
 ebb0:
    v1 = call fn1()
    ; check: $ebb0($V: i32):
    ; nextln: $(rv=$V) = call $fn1()
    ; check: $v1 = ireduce.i8 $rv
    jump ebb1(v1)
    ; check: jump $ebb1($v1)
 ebb1(v10: i8):
    jump ebb1(v10)
 }
 function %vector_split_args(i64x4) -> i64x4 {
 ebb0(v0: i64x4):
    ; check: $ebb0($(v0al=$V): i32, $(v0ah=$V): i32, $(v0bl=$V): i32, $(v0bh=$V): i32, $(v0cl=$V): i32, $(v0ch=$V): i32, $(v0dl=$V): i32, $(v0dh=$V): i32, $(link=$V): i32):
    ; check: $(v0a=$V) = iconcat $v0al, $v0ah
    ; check: $(v0b=$V) = iconcat $v0bl, $v0bh
    ; check: $(v0ab=$V) = vconcat $v0a, $v0b
    ; check: $(v0c=$V) = iconcat $v0cl, $v0ch
    ; check: $(v0d=$V) = iconcat $v0dl, $v0dh
    ; check: $(v0cd=$V) = vconcat $v0c, $v0d
    ; check: $v0 = vconcat $v0ab, $v0cd
    v1 = bxor v0, v0
    ; check: $(v1ab=$V), $(v1cd=$V) = vsplit $v1
    ; check: $(v1a=$V), $(v1b=$V) = vsplit $v1ab
    ; check: $(v1al=$V), $(v1ah=$V) = isplit $v1a
    ; check: $(v1bl=$V), $(v1bh=$V) = isplit $v1b
    ; check: $(v1c=$V), $(v1d=$V) = vsplit $v1cd
    ; check: $(v1cl=$V), $(v1ch=$V) = isplit $v1c
    ; check: $(v1dl=$V), $(v1dh=$V) = isplit $v1d
    ; check: return $v1al, $v1ah, $v1bl, $v1bh, $v1cl, $v1ch, $v1dl, $v1dh, $link
    return v1
 }
 function %indirect(i32) {
    sig1 = () native
 ebb0(v0: i32):
    call_indirect sig1, v0()
    return
 }
 ; The first argument to call_indirect doesn't get altered.
 function %indirect_arg(i32, f32x2) {
    sig1 = (f32x2) native
 ebb0(v0: i32, v1: f32x2):
    call_indirect sig1, v0(v1)
    ; check: call_indirect $sig1, $v0($V, $V)
    return
 }
 ; Call a function that takes arguments on the stack.
 function %stack_args(i32) {
    ; check: $(ss0=$SS) = outgoing_arg 4
    fn1 = function %foo(i64, i64, i64, i64, i32)
 ebb0(v0: i32):
    v1 = iconst.i64 1
    call fn1(v1, v1, v1, v1, v0)
    ; check: [GPsp#48,$ss0]$WS $(v0s=$V) = spill $v0
    ; check: call $fn1($(=.*), $v0s)
    return
 }
--- a/filetests/isa/riscv/legalize-i64.cton
+++ b/filetests/isa/riscv/legalize-i64.cton
@@ -0,0 +1,64 @@
 ; Test the legalization of i64 arithmetic instructions.
 test legalizer
 isa riscv supports_m=1
 ; regex: V=v\d+
 function %bitwise_and(i64, i64) -> i64 {
 ebb0(v1: i64, v2: i64):
    v3 = band v1, v2
    return v3
 }
 ; check: $ebb0($(v1l=$V): i32, $(v1h=$V): i32, $(v2l=$V): i32, $(v2h=$V): i32, $(link=$V): i32):
 ; check: [R#ec
 ; sameln: $(v3l=$V) = band $v1l, $v2l
 ; check: [R#ec
 ; sameln: $(v3h=$V) = band $v1h, $v2h
 ; check: $v3 = iconcat $v3l, $v3h
 ; check: return $v3l, $v3h, $link
 function %bitwise_or(i64, i64) -> i64 {
 ebb0(v1: i64, v2: i64):
    v3 = bor v1, v2
    return v3
 }
 ; check: $ebb0($(v1l=$V): i32, $(v1h=$V): i32, $(v2l=$V): i32, $(v2h=$V): i32, $(link=$V): i32):
 ; check: [R#cc
 ; sameln: $(v3l=$V) = bor $v1l, $v2l
 ; check: [R#cc
 ; sameln: $(v3h=$V) = bor $v1h, $v2h
 ; check: $v3 = iconcat $v3l, $v3h
 ; check: return $v3l, $v3h, $link
 function %bitwise_xor(i64, i64) -> i64 {
 ebb0(v1: i64, v2: i64):
    v3 = bxor v1, v2
    return v3
 }
 ; check: $ebb0($(v1l=$V): i32, $(v1h=$V): i32, $(v2l=$V): i32, $(v2h=$V): i32, $(link=$V): i32):
 ; check: [R#8c
 ; sameln: $(v3l=$V) = bxor $v1l, $v2l
 ; check: [R#8c
 ; sameln: $(v3h=$V) = bxor $v1h, $v2h
 ; check: $v3 = iconcat $v3l, $v3h
 ; check: return $v3l, $v3h, $link
 function %arith_add(i64, i64) -> i64 {
 ; Legalizing iadd.i64 requires two steps:
 ; 1. Narrow to iadd_cout.i32, then
 ; 2. Expand iadd_cout.i32 since RISC-V has no carry flag.
 ebb0(v1: i64, v2: i64):
    v3 = iadd v1, v2
    return v3
 }
 ; check: $ebb0($(v1l=$V): i32, $(v1h=$V): i32, $(v2l=$V): i32, $(v2h=$V): i32, $(link=$V): i32):
 ; check: [R#0c
 ; sameln: $(v3l=$V) = iadd $v1l, $v2l
 ; check: $(c=$V) = icmp ult $v3l, $v1l
 ; check: [R#0c
 ; sameln: $(v3h1=$V) = iadd $v1h, $v2h
 ; check: $(c_int=$V) = bint.i32 $c
 ; check: [R#0c
 ; sameln: $(v3h=$V) = iadd $v3h1, $c_int
 ; check: $v3 = iconcat $v3l, $v3h
 ; check: return $v3l, $v3h, $link
--- a/filetests/isa/riscv/parse-encoding.cton
+++ b/filetests/isa/riscv/parse-encoding.cton
@@ -0,0 +1,36 @@
 ; Test the parser's support for encoding annotations.
 test legalizer
 isa riscv
 function %parse_encoding(i32 [%x5]) -> i32 [%x10] {
    ; check: function %parse_encoding(i32 [%x5], i32 link [%x1]) -> i32 [%x10], i32 link [%x1] native {
    sig0 = (i32 [%x10]) -> i32 [%x10] native
    ; check: sig0 = (i32 [%x10]) -> i32 [%x10] native
    sig1 = (i32 [%x10], i32 [%x11]) -> b1 [%x10] native
    ; check: sig1 = (i32 [%x10], i32 [%x11]) -> b1 [%x10] native
    sig2 = (f32 [%f10], i32 [%x12], i32 [%x13]) -> f64 [%f10] native
    ; check: sig2 = (f32 [%f10], i32 [%x12], i32 [%x13]) -> f64 [%f10] native
    ; Arguments on stack where not necessary
    sig3 = (f64 [%f10], i32 [0], i32 [4]) -> f64 [%f10] native
    ; check: sig3 = (f64 [%f10], i32 [0], i32 [4]) -> f64 [%f10] native
    ; Stack argument before register argument
    sig4 = (f32 [72], i32 [%x10]) native
    ; check: sig4 = (f32 [72], i32 [%x10]) native
    ; Return value on stack
    sig5 = () -> f32 [0] native
    ; check: sig5 = () -> f32 [0] native
    ; function + signature
    fn15 = function %bar(i32 [%x10]) -> b1 [%x10] native
    ; check: sig6 = (i32 [%x10]) -> b1 [%x10] native
    ; nextln: fn0 = sig6 %bar
 ebb0(v0: i32):
    return v0
 }
--- a/filetests/isa/riscv/regmove.cton
+++ b/filetests/isa/riscv/regmove.cton
@@ -0,0 +1,15 @@
 ; Test tracking of register moves.
 test binemit
 isa riscv
 function %regmoves(i32 link [%x1]) -> i32 link [%x1] {
 ebb0(v9999: i32):
    [-,%x10]            v1 = iconst.i32 1
    [-,%x7]             v2 = iadd_imm v1, 1000          ; bin: 3e850393
                        regmove v1, %x10 -> %x11        ; bin: 00050593
    [-,%x7]             v3 = iadd_imm v1, 1000          ; bin: 3e858393
                        regmove v1, %x11 -> %x10        ; bin: 00058513
    [-,%x7]             v4 = iadd_imm v1, 1000          ; bin: 3e850393
    return v9999
 }
--- a/filetests/isa/riscv/split-args.cton
+++ b/filetests/isa/riscv/split-args.cton
@@ -0,0 +1,55 @@
 ; Test the legalization of EBB arguments that are split.
 test legalizer
 isa riscv
 ; regex: V=v\d+
 function %simple(i64, i64) -> i64 {
 ebb0(v1: i64, v2: i64):
 ; check: $ebb0($(v1l=$V): i32, $(v1h=$V): i32, $(v2l=$V): i32, $(v2h=$V): i32, $(link=$V): i32):
    jump ebb1(v1)
    ; check: jump $ebb1($v1l, $v1h)
 ebb1(v3: i64):
 ; check: $ebb1($(v3l=$V): i32, $(v3h=$V): i32):
    v4 = band v3, v2
    ; check: $(v4l=$V) = band $v3l, $v2l
    ; check: $(v4h=$V) = band $v3h, $v2h
    return v4
    ; check: return $v4l, $v4h, $link
 }
 function %multi(i64) -> i64 {
 ebb1(v1: i64):
 ; check: $ebb1($(v1l=$V): i32, $(v1h=$V): i32, $(link=$V): i32):
    jump ebb2(v1, v1)
    ; check: jump $ebb2($v1l, $v1l, $v1h, $v1h)
 ebb2(v2: i64, v3: i64):
 ; check: $ebb2($(v2l=$V): i32, $(v3l=$V): i32, $(v2h=$V): i32, $(v3h=$V): i32):
    jump ebb3(v2)
    ; check: jump $ebb3($v2l, $v2h)
 ebb3(v4: i64):
 ; check: $ebb3($(v4l=$V): i32, $(v4h=$V): i32):
    v5 = band v4, v3
    ; check: $(v5l=$V) = band $v4l, $v3l
    ; check: $(v5h=$V) = band $v4h, $v3h
    return v5
    ; check: return $v5l, $v5h, $link
 }
 function %loop(i64, i64) -> i64 {
 ebb0(v1: i64, v2: i64):
 ; check: $ebb0($(v1l=$V): i32, $(v1h=$V): i32, $(v2l=$V): i32, $(v2h=$V): i32, $(link=$V): i32):
    jump ebb1(v1)
    ; check: jump $ebb1($v1l, $v1h)
 ebb1(v3: i64):
 ; check: $ebb1($(v3l=$V): i32, $(v3h=$V): i32):
    v4 = band v3, v2
    ; check: $(v4l=$V) = band $v3l, $v2l
    ; check: $(v4h=$V) = band $v3h, $v2h
    jump ebb1(v4)
    ; check: jump $ebb1($v4l, $v4h)
 }
--- a/filetests/isa/riscv/verify-encoding.cton
+++ b/filetests/isa/riscv/verify-encoding.cton
@@ -0,0 +1,21 @@
 test verifier
 isa riscv
 function %RV32I(i32 link [%x1]) -> i32 link [%x1] {
    fn0 = function %foo()
 ebb0(v9999: i32):
    ; iconst.i32 needs legalizing, so it should throw a
    [R#0,-]         v1 = iconst.i32 0xf0f0f0f0f0 ; error: Instruction failed to re-encode
    return v9999
 }
 function %RV32I(i32 link [%x1]) -> i32 link [%x1] {
    fn0 = function %foo()
 ebb0(v9999: i32):
    v1 = iconst.i32 1
    v2 = iconst.i32 2
    [R#0,-]         v3 = iadd v1, v2 ; error: Instruction re-encoding
    return v9999
 }
--- a/filetests/licm/basic.cton
+++ b/filetests/licm/basic.cton
@@ -0,0 +1,31 @@
 test licm
 function %simple_loop(i32) -> i32 {
 ebb1(v0: i32):
    v1 = iconst.i32 1
    v2 = iconst.i32 2
    v3 = iadd v1, v2
    brz v0, ebb2(v0)
    v4 = isub v0, v1
    jump ebb1(v4)
 ebb2(v5: i32):
    return v5
 }
 ; sameln: function %simple_loop
 ; nextln: ebb2(v6: i32):
 ; nextln:     v1 = iconst.i32 1
 ; nextln:     v2 = iconst.i32 2
 ; nextln:     v3 = iadd v1, v2
 ; nextln:     jump ebb0(v6)
 ; nextln: 
 ; nextln: ebb0(v0: i32):
 ; nextln:     brz v0, ebb1(v0)
 ; nextln:     v4 = isub v0, v1
 ; nextln:     jump ebb0(v4)
 ; nextln: 
 ; nextln: ebb1(v5: i32):
 ; nextln:     return v5
 ; nextln: }
--- a/filetests/licm/complex.cton
+++ b/filetests/licm/complex.cton
@@ -0,0 +1,81 @@
 test licm
 function %complex(i32) -> i32 {
 ebb0(v0: i32):
    v1 = iconst.i32 1
    v19 = iconst.i32 4
    v2 = iadd v1, v0
    brz v0, ebb1(v1)
    jump ebb3(v2)
 ebb1(v3: i32):
    v4 = iconst.i32 2
    v5 = iadd v3, v2
    v6 = iadd v4, v0
    jump ebb2(v6)
 ebb2(v7: i32):
    v8 = iadd v7, v3
    v9 = iadd v0, v2
    brz v0, ebb1(v7)
    jump ebb5(v8)
 ebb3(v10: i32):
    v11 = iconst.i32 3
    v12 = iadd v10, v11
    v13 = iadd v2, v11
    jump ebb4(v11)
 ebb4(v14: i32):
    v15 = iadd v12, v2
    brz v0, ebb3(v14)
    jump ebb5(v14)
 ebb5(v16: i32):
    v17 = iadd v16, v1
    v18 = iadd v1, v19
    brz v0, ebb0(v18)
    return v17
 }
 ; sameln: function %complex
 ; nextln: ebb6(v20: i32):
 ; nextln:     v1 = iconst.i32 1
 ; nextln:     v2 = iconst.i32 4
 ; nextln:     v5 = iconst.i32 2
 ; nextln:     v12 = iconst.i32 3
 ; nextln:     v19 = iadd v1, v2
 ; nextln:     jump ebb0(v20)
 ; nextln: 
 ; nextln: ebb0(v0: i32):
 ; nextln:     v3 = iadd.i32 v1, v0
 ; nextln:     v7 = iadd.i32 v5, v0
 ; nextln:     v10 = iadd v0, v3
 ; nextln:     brz v0, ebb1(v1)
 ; nextln:     v14 = iadd v3, v12
 ; nextln:     jump ebb3(v3)
 ; nextln: 
 ; nextln: ebb1(v4: i32):
 ; nextln:     v6 = iadd v4, v3
 ; nextln:     jump ebb2(v7)
 ; nextln: 
 ; nextln: ebb2(v8: i32):
 ; nextln:     v9 = iadd v8, v4
 ; nextln:     brz.i32 v0, ebb1(v8)
 ; nextln:     jump ebb5(v9)
 ; nextln: 
 ; nextln: ebb3(v11: i32):
 ; nextln:     v13 = iadd v11, v12
 ; nextln:     jump ebb4(v12)
 ; nextln: 
 ; nextln: ebb4(v15: i32):
 ; nextln:     v16 = iadd.i32 v13, v3
 ; nextln:     brz.i32 v0, ebb3(v15)
 ; nextln:     jump ebb5(v15)
 ; nextln: 
 ; nextln: ebb5(v17: i32):
 ; nextln:     v18 = iadd v17, v1
 ; nextln:     brz.i32 v0, ebb0(v19)
 ; nextln:     return v18
 ; nextln: }
--- a/filetests/licm/multiple-blocks.cton
+++ b/filetests/licm/multiple-blocks.cton
@@ -0,0 +1,46 @@
 test licm
 function %multiple_blocks(i32) -> i32 {
 ebb0(v0: i32):
    jump ebb1(v0)
 ebb1(v10: i32):
    v11 = iconst.i32 1
    v12 = iconst.i32 2
    v13 = iadd v11, v12
    brz v10, ebb2(v10)
    v15 = isub v10, v11
    brz v15, ebb3(v15)
    v14 = isub v10, v11
    jump ebb1(v14)
 ebb2(v20: i32):
    return v20
 ebb3(v30: i32):
    v31 = iadd v11, v13
    jump ebb1(v30)
 }
 ; sameln:function %multiple_blocks(i32) -> i32 {
 ; nextln: ebb0(v0: i32):
 ; nextln:     v2 = iconst.i32 1
 ; nextln:     v3 = iconst.i32 2
 ; nextln:     v4 = iadd v2, v3
 ; nextln:     v9 = iadd v2, v4
 ; nextln:     jump ebb1(v0)
 ; nextln: 
 ; nextln: ebb1(v1: i32):
 ; nextln:     brz v1, ebb2(v1)
 ; nextln:     v5 = isub v1, v2
 ; nextln:     brz v5, ebb3(v5)
 ; nextln:     v6 = isub v1, v2
 ; nextln:     jump ebb1(v6)
 ; nextln: 
 ; nextln: ebb2(v7: i32):
 ; nextln:     return v7
 ; nextln: 
 ; nextln: ebb3(v8: i32):
 ; nextln:     jump ebb1(v8)
 ; nextln: }
--- a/filetests/licm/nested_loops.cton
+++ b/filetests/licm/nested_loops.cton
@@ -0,0 +1,52 @@
 test licm
 function %nested_loops(i32) -> i32 {
 ebb0(v0: i32):
    v1 = iconst.i32 1
    v2 = iconst.i32 2
    v3 = iadd v1, v2
    v4 = isub v0, v1
    jump ebb1(v4,v4)
 ebb1(v10: i32,v11: i32):
    brz v11, ebb2(v10)
    v12 = iconst.i32 1
    v15 = iadd v12, v4
    v13 = isub v11, v12
    jump ebb1(v10,v13)
 ebb2(v20: i32):
    brz v20, ebb3(v20)
    jump ebb0(v20)
 ebb3(v30: i32):
    return v30
 }
 ; sameln:function %nested_loops(i32) -> i32 {
 ; nextln: ebb4(v12: i32):
 ; nextln:     v1 = iconst.i32 1
 ; nextln:     v2 = iconst.i32 2
 ; nextln:     v3 = iadd v1, v2
 ; nextln:     v7 = iconst.i32 1
 ; nextln:     jump ebb0(v12)
 ; nextln: 
 ; nextln: ebb0(v0: i32):
 ; nextln:     v4 = isub v0, v1
 ; nextln:     v8 = iadd.i32 v7, v4
 ; nextln:     jump ebb1(v4, v4)
 ; nextln: 
 ; nextln: ebb1(v5: i32, v6: i32):
 ; nextln:     brz v6, ebb2(v5)
 ; nextln:     v9 = isub v6, v7
 ; nextln:     jump ebb1(v5, v9)
 ; nextln: 
 ; nextln: ebb2(v10: i32):
 ; nextln:     brz v10, ebb3(v10)
 ; nextln:     jump ebb0(v10)
 ; nextln: 
 ; nextln: ebb3(v11: i32):
 ; nextln:     return v11
 ; nextln: }
--- a/filetests/parser/branch.cton
+++ b/filetests/parser/branch.cton
@@ -0,0 +1,113 @@
 ; Parsing branches and jumps.
 test cat
 ; Jumps with no arguments. The '()' empty argument list is optional.
 function %minimal() {
 ebb0:
    jump ebb1
 ebb1:
    jump ebb0()
 }
 ; sameln: function %minimal() native {
 ; nextln: ebb0:
 ; nextln:     jump ebb1
 ; nextln: 
 ; nextln: ebb1:
 ; nextln:     jump ebb0
 ; nextln: }
 ; Jumps with 1 arg.
 function %onearg(i32) {
 ebb0(v90: i32):
    jump ebb1(v90)
 ebb1(v91: i32):
    jump ebb0(v91)
 }
 ; sameln: function %onearg(i32) native {
 ; nextln: ebb0($v90: i32):
 ; nextln:     jump ebb1($v90)
 ; nextln: 
 ; nextln: ebb1($v91: i32):
 ; nextln:     jump ebb0($v91)
 ; nextln: }
 ; Jumps with 2 args.
 function %twoargs(i32, f32) {
 ebb0(v90: i32, v91: f32):
    jump ebb1(v90, v91)
 ebb1(v92: i32, v93: f32):
    jump ebb0(v92, v93)
 }
 ; sameln: function %twoargs(i32, f32) native {
 ; nextln: ebb0($v90: i32, $v91: f32):
 ; nextln:     jump ebb1($v90, $v91)
 ; nextln: 
 ; nextln: ebb1($v92: i32, $v93: f32):
 ; nextln:     jump ebb0($v92, $v93)
 ; nextln: }
 ; Branches with no arguments. The '()' empty argument list is optional.
 function %minimal(i32) {
 ebb0(v90: i32):
    brz v90, ebb1
 ebb1:
    brnz v90, ebb1()
 }
 ; sameln: function %minimal(i32) native {
 ; nextln: ebb0($v90: i32):
 ; nextln:     brz $v90, ebb1
 ; nextln: 
 ; nextln: ebb1:
 ; nextln:     brnz.i32 $v90, ebb1
 ; nextln: }
 function %twoargs(i32, f32) {
 ebb0(v90: i32, v91: f32):
    brz v90, ebb1(v90, v91)
 ebb1(v92: i32, v93: f32):
    brnz v90, ebb0(v92, v93)
 }
 ; sameln: function %twoargs(i32, f32) native {
 ; nextln: ebb0($v90: i32, $v91: f32):
 ; nextln:     brz $v90, ebb1($v90, $v91)
 ; nextln: 
 ; nextln: ebb1($v92: i32, $v93: f32):
 ; nextln:     brnz.i32 $v90, ebb0($v92, $v93)
 ; nextln: }
 function %jumptable(i32) {
    jt200 = jump_table 0, 0
    jt2 = jump_table 0, 0, ebb10, ebb40, ebb20, ebb30
 ebb10(v3: i32):
    br_table v3, jt2
    trap
 ebb20:
    trap
 ebb30:
    trap
 ebb40:
    trap
 }
 ; sameln: function %jumptable(i32) native {
 ; nextln:     jt0 = jump_table 0
 ; nextln:     jt1 = jump_table 0, 0, ebb0, ebb3, ebb1, ebb2
 ; nextln: 
 ; nextln: ebb0($v3: i32):
 ; nextln:     br_table $v3, jt1
 ; nextln:     trap
 ; nextln: 
 ; nextln: ebb1:
 ; nextln:     trap
 ; nextln: 
 ; nextln: ebb2:
 ; nextln:     trap
 ; nextln: 
 ; nextln: ebb3:
 ; nextln:     trap
 ; nextln: }
--- a/filetests/parser/call.cton
+++ b/filetests/parser/call.cton
@@ -0,0 +1,80 @@
 ; Parser tests for call and return syntax.
 test cat
 function %mini() {
 ebb1:
    return
 }
 ; sameln: function %mini() native {
 ; nextln: ebb0:
 ; nextln:     return
 ; nextln: }
 function %r1() -> i32, f32 spiderwasm {
 ebb1:
    v1 = iconst.i32 3
    v2 = f32const 0.0
    return v1, v2
 }
 ; sameln: function %r1() -> i32, f32 spiderwasm {
 ; nextln: ebb0:
 ; nextln:     $v1 = iconst.i32 3
 ; nextln:     $v2 = f32const 0.0
 ; nextln:     return $v1, $v2
 ; nextln: }
 function %signatures() {
    sig10 = ()
    sig11 = (i32, f64) -> i32, b1 spiderwasm
    fn5 = sig11 %foo
    fn8 = function %bar(i32) -> b1
 }
 ; sameln: function %signatures() native {
 ; nextln:     $sig10 = () native
 ; nextln:     $sig11 = (i32, f64) -> i32, b1 spiderwasm
 ; nextln:     sig2 = (i32) -> b1 native
 ; nextln:     $fn5 = $sig11 %foo
 ; nextln:     $fn8 = sig2 %bar
 ; nextln: }
 function %direct() {
    fn0 = function %none()
    fn1 = function %one() -> i32
    fn2 = function %two() -> i32, f32
 ebb0:
    call fn0()
    v1 = call fn1()
    v2, v3 = call fn2()
    return
 }
 ; check: call $fn0()
 ; check: $v1 = call $fn1()
 ; check: $v2, $v3 = call $fn2()
 ; check: return
 function %indirect(i64) {
    sig0 = (i64)
    sig1 = () -> i32
    sig2 = () -> i32, f32
 ebb0(v0: i64):
    v1 = call_indirect sig1, v0()
    call_indirect sig0, v1(v0)
    v3, v4 = call_indirect sig2, v1()
    return
 }
 ; check: $v1 = call_indirect $sig1, $v0()
 ; check: call_indirect $sig0, $v1($v0)
 ; check: $v3, $v4 = call_indirect $sig2, $v1()
 ; check: return
 ; Special purpose function arguments
 function %special1(i32 sret, i32 fp, i32 csr, i32 link) -> i32 link, i32 fp, i32 csr, i32 sret {
 ebb0(v1: i32, v2: i32, v3: i32, v4: i32):
    return v4, v2, v3, v1
 }
 ; check: function %special1(i32 sret, i32 fp, i32 csr, i32 link) -> i32 link, i32 fp, i32 csr, i32 sret native {
 ; check: ebb0($v1: i32, $v2: i32, $v3: i32, $v4: i32):
 ; check:     return $v4, $v2, $v3, $v1
 ; check: }
--- a/filetests/parser/instruction_encoding.cton
+++ b/filetests/parser/instruction_encoding.cton
@@ -0,0 +1,24 @@
 test cat
 isa riscv
 ; regex: WS=[ \t]*
 function %foo(i32, i32) {
 ebb1(v0: i32, v1: i32):
    [-,-] v2 = iadd v0, v1
    [-] trap
    [R#1234, %x5, %x11] v6, v7 = iadd_cout v2, v0
    [Rshamt#beef, %x25] v8 = ishl_imm v6, 2
    v9 = iadd v8, v7
    [Iret#5] return v0, v8
 }
 ; sameln: function %foo(i32, i32) native {
 ; nextln: $ebb1($v0: i32, $v1: i32):
 ; nextln:     [-,-]$WS $v2 = iadd $v0, $v1
 ; nextln:     [-]$WS trap
 ; nextln:     [R#1234,%x5,%x11]$WS $v6, $v7 = iadd_cout $v2, $v0
 ; nextln:     [Rshamt#beef,%x25]$WS $v8 = ishl_imm $v6, 2
 ; nextln:     [-,-]$WS $v9 = iadd $v8, $v7
 ; nextln:     [Iret#05]$WS return $v0, $v8
 ; nextln: }
--- a/filetests/parser/keywords.cton
+++ b/filetests/parser/keywords.cton
@@ -0,0 +1,5 @@
 test cat
 ; 'function' is not a keyword, and can be used as the name of a function too.
 function %function() {}
 ; check: function %function() native
--- a/filetests/parser/rewrite.cton
+++ b/filetests/parser/rewrite.cton
@@ -0,0 +1,37 @@
 ; The .cton parser can't preserve the actual entity numbers in the input file
 ; since entities are numbered as they are created. For entities declared in the
 ; preamble, this is no problem, but for EBB and value references, mapping
 ; source numbers to real numbers can be a problem.
 ;
 ; It is possible to refer to instructions and EBBs that have not yet been
 ; defined in the lexical order, so the parser needs to rewrite these references
 ; after the fact.
 test cat
 ; Check that defining numbers are rewritten.
 function %defs() {
 ebb100(v20: i32):
    v1000 = iconst.i32x8 5
    v9200 = f64const 0x4.0p0
    trap
 }
 ; sameln: function %defs() native {
 ; nextln: $ebb100($v20: i32):
 ; nextln:     $v1000 = iconst.i32x8 5
 ; nextln:     $v9200 = f64const 0x1.0000000000000p2
 ; nextln:     trap
 ; nextln: }
 ; Using values.
 function %use_value() {
 ebb100(v20: i32):
    v1000 = iadd_imm v20, 5
    v200 = iadd v20, v1000
    jump ebb100(v1000)
 }
 ; sameln: function %use_value() native {
 ; nextln: ebb0($v20: i32):
 ; nextln:     $v1000 = iadd_imm $v20, 5
 ; nextln:     $v200 = iadd $v20, $v1000
 ; nextln:     jump ebb0($v1000)
 ; nextln: }
--- a/filetests/parser/ternary.cton
+++ b/filetests/parser/ternary.cton
@@ -0,0 +1,24 @@
 test cat
 test verifier
 function %add_i96(i32, i32, i32, i32, i32, i32) -> i32, i32, i32 {
 ebb1(v1: i32, v2: i32, v3: i32, v4: i32, v5: i32, v6: i32):
    v10, v11 = iadd_cout v1, v4
    ;check: $v10, $v11 = iadd_cout $v1, $v4
    v20, v21 = iadd_carry v2, v5, v11
    ; check: $v20, $v21 = iadd_carry $v2, $v5, $v11
    v30 = iadd_cin v3, v6, v21
    ; check: $v30 = iadd_cin $v3, $v6, $v21
    return v10, v20, v30
 }
 function %sub_i96(i32, i32, i32, i32, i32, i32) -> i32, i32, i32 {
 ebb1(v1: i32, v2: i32, v3: i32, v4: i32, v5: i32, v6: i32):
    v10, v11 = isub_bout v1, v4
    ;check: $v10, $v11 = isub_bout $v1, $v4
    v20, v21 = isub_borrow v2, v5, v11
    ; check: $v20, $v21 = isub_borrow $v2, $v5, $v11
    v30 = isub_bin v3, v6, v21
    ; check: $v30 = isub_bin $v3, $v6, $v21
    return v10, v20, v30
 }
--- a/filetests/parser/tiny.cton
+++ b/filetests/parser/tiny.cton
@@ -0,0 +1,193 @@
 test cat
 ; The smallest possible function.
 function %minimal() {
 ebb0:
    trap
 }
 ; sameln: function %minimal() native {
 ; nextln: ebb0:
 ; nextln:     trap
 ; nextln: }
 ; Create and use values.
 ; Polymorphic instructions with type suffix.
 function %ivalues() {
 ebb0:
    v0 = iconst.i32 2
    v1 = iconst.i8 6
    v2 = ishl v0, v1
 }
 ; sameln: function %ivalues() native {
 ; nextln: ebb0:
 ; nextln:     $v0 = iconst.i32 2
 ; nextln:     $v1 = iconst.i8 6
 ; nextln:     $v2 = ishl $v0, $v1
 ; nextln: }
 ; Create and use values.
 ; Polymorphic instructions with type suffix.
 function %bvalues() {
 ebb0:
    v0 = bconst.b32 true
    v1 = bconst.b8 false
    v2 = bextend.b32 v1
    v3 = bxor v0, v2
 }
 ; sameln: function %bvalues() native {
 ; nextln: ebb0:
 ; nextln:     $v0 = bconst.b32 true
 ; nextln:     $v1 = bconst.b8 false
 ; nextln:     $v2 = bextend.b32 v1
 ; nextln:     $v3 = bxor v0, v2
 ; nextln: }
 ; Polymorphic istruction controlled by second operand.
 function %select() {
 ebb0(v90: i32, v91: i32, v92: b1):
    v0 = select v92, v90, v91
 }
 ; sameln: function %select() native {
 ; nextln: ebb0($v90: i32, $v91: i32, $v92: b1):
 ; nextln:     $v0 = select $v92, $v90, $v91
 ; nextln: }
 ; Lane indexes.
 function %lanes() {
 ebb0:
    v0 = iconst.i32x4 2
    v1 = extractlane v0, 3
    v2 = insertlane v0, 1, v1
 }
 ; sameln: function %lanes() native {
 ; nextln: ebb0:
 ; nextln:     $v0 = iconst.i32x4 2
 ; nextln:     $v1 = extractlane $v0, 3
 ; nextln:     $v2 = insertlane $v0, 1, $v1
 ; nextln: }
 ; Integer condition codes.
 function %icmp(i32, i32) {
 ebb0(v90: i32, v91: i32):
    v0 = icmp eq v90, v91
    v1 = icmp ult v90, v91
    v2 = icmp_imm sge v90, -12
    v3 = irsub_imm v91, 45
    br_icmp eq v90, v91, ebb0(v91, v90)
 }
 ; sameln: function %icmp(i32, i32) native {
 ; nextln: ebb0($v90: i32, $v91: i32):
 ; nextln:     $v0 = icmp eq $v90, $v91
 ; nextln:     $v1 = icmp ult $v90, $v91
 ; nextln:     $v2 = icmp_imm sge $v90, -12
 ; nextln:     $v3 = irsub_imm $v91, 45
 ; nextln:     br_icmp eq $v90, $v91, ebb0($v91, $v90)
 ; nextln: }
 ; Floating condition codes.
 function %fcmp(f32, f32) {
 ebb0(v90: f32, v91: f32):
    v0 = fcmp eq v90, v91
    v1 = fcmp uno v90, v91
    v2 = fcmp lt v90, v91
 }
 ; sameln: function %fcmp(f32, f32) native {
 ; nextln: ebb0($v90: f32, $v91: f32):
 ; nextln:     $v0 = fcmp eq $v90, $v91
 ; nextln:     $v1 = fcmp uno $v90, $v91
 ; nextln:     $v2 = fcmp lt $v90, $v91
 ; nextln: }
 ; The bitcast instruction has two type variables: The controlling type variable
 ; controls the outout type, and the input type is a free variable.
 function %bitcast(i32, f32) {
 ebb0(v90: i32, v91: f32):
    v0 = bitcast.i8x4 v90
    v1 = bitcast.i32 v91
 }
 ; sameln: function %bitcast(i32, f32) native {
 ; nextln: ebb0($v90: i32, $v91: f32):
 ; nextln:     $v0 = bitcast.i8x4 $v90
 ; nextln:     $v1 = bitcast.i32 $v91
 ; nextln: }
 ; Stack slot references
 function %stack() {
    ss10 = spill_slot 8
    ss2 = local 4
    ss3 = incoming_arg 4, offset 8
    ss4 = outgoing_arg 4
 ebb0:
    v1 = stack_load.i32 ss10
    v2 = stack_load.i32 ss10+4
    stack_store v1, ss10+2
    stack_store v2, ss2
 }
 ; sameln: function %stack() native {
 ; nextln:    $ss10 = spill_slot 8
 ; nextln:    $ss2 = local 4
 ; nextln:    $ss3 = incoming_arg 4, offset 8
 ; nextln:    $ss4 = outgoing_arg 4
 ; check: ebb0:
 ; nextln: $v1 = stack_load.i32 $ss10
 ; nextln: $v2 = stack_load.i32 $ss10+4
 ; nextln: stack_store $v1, $ss10+2
 ; nextln: stack_store $v2, $ss2
 ; Heap access instructions.
 function %heap(i32) {
    ; TODO: heap0 = heap %foo
 ebb0(v1: i32):
    v2 = heap_load.f32 v1
    v3 = heap_load.f32 v1+12
    heap_store v3, v1
 }
 ; sameln: function %heap(i32) native {
 ; nextln: ebb0($v1: i32):
 ; nextln:     $v2 = heap_load.f32 $v1
 ; nextln:     $v3 = heap_load.f32 $v1+12
 ; nextln:     heap_store $v3, $v1
 ; Memory access instructions.
 function %memory(i32) {
 ebb0(v1: i32):
    v2 = load.i64 v1
    v3 = load.i64 aligned v1
    v4 = load.i64 notrap v1
    v5 = load.i64 notrap aligned v1
    v6 = load.i64 aligned notrap v1
    v7 = load.i64 v1-12
    v8 = load.i64 notrap v1+0x1_0000
    store v2, v1
    store aligned v3, v1+12
    store notrap aligned v3, v1-12
 }
 ; sameln: function %memory(i32) native {
 ; nextln: ebb0($v1: i32):
 ; nextln:     $v2 = load.i64 $v1
 ; nextln:     $v3 = load.i64 aligned $v1
 ; nextln:     $v4 = load.i64 notrap $v1
 ; nextln:     $v5 = load.i64 notrap aligned $v1
 ; nextln:     $v6 = load.i64 notrap aligned $v1
 ; nextln:     $v7 = load.i64 $v1-12
 ; nextln:     $v8 = load.i64 notrap $v1+0x0001_0000
 ; nextln:     store $v2, $v1
 ; nextln:     store aligned $v3, $v1+12
 ; nextln:     store notrap aligned $v3, $v1-12
 ; Register diversions.
 ; This test file has no ISA, so we can unly use register unit numbers.
 function %diversion(i32) {
 ebb0(v1: i32):
    regmove v1, %10 -> %20
    regmove v1, %20 -> %10
    return
 }
 ; sameln: function %diversion(i32) native {
 ; nextln: ebb0($v1: i32):
 ; nextln:     regmove $v1, %10 -> %20
 ; nextln:     regmove $v1, %20 -> %10
 ; nextln:     return
 ; nextln: }
--- a/filetests/regalloc/basic.cton
+++ b/filetests/regalloc/basic.cton
@@ -0,0 +1,80 @@
 test regalloc
 ; We can add more ISAs once they have defined encodings.
 isa riscv
 ; regex: RX=%x\d+
 function %add(i32, i32) {
 ebb0(v1: i32, v2: i32):
    v3 = iadd v1, v2
 ; check: [R#0c,%x5]
 ; sameln: iadd
    return
 }
 ; Function with a dead argument.
 function %dead_arg(i32, i32) -> i32{
 ebb0(v1: i32, v2: i32):
 ; not: regmove
 ; check: return $v1
    return v1
 }
 ; Return a value from a different register.
 function %move1(i32, i32) -> i32 {
 ebb0(v1: i32, v2: i32):
 ; not: regmove
 ; check: regmove $v2, %x11 -> %x10
 ; nextln: return $v2
        return v2
 }
 ; Swap two registers.
 function %swap(i32, i32) -> i32, i32 {
 ebb0(v1: i32, v2: i32):
 ; not: regmove
 ; check: regmove $v2, %x11 -> $(tmp=$RX)
 ; nextln: regmove $v1, %x10 -> %x11
 ; nextln: regmove $v2, $tmp -> %x10
 ; nextln: return $v2, $v1
        return v2, v1
 }
 ; Return an EBB argument.
 function %retebb(i32, i32) -> i32 {
 ebb0(v1: i32, v2: i32):
    brnz v1, ebb1(v1)
    jump ebb1(v2)
 ebb1(v10: i32):
    return v10
 }
 ; Pass an EBB argument as a function argument.
 function %callebb(i32, i32) -> i32 {
    fn0 = function %foo(i32) -> i32
 ebb0(v1: i32, v2: i32):
    brnz v1, ebb1(v1)
    jump ebb1(v2)
 ebb1(v10: i32):
    v11 = call fn0(v10)
    return v11
 }
 ; Pass an EBB argument as a jump argument.
 function %jumpebb(i32, i32) -> i32 {
    fn0 = function %foo(i32) -> i32
 ebb0(v1: i32, v2: i32):
    brnz v1, ebb1(v1, v2)
    jump ebb1(v2, v1)
 ebb1(v10: i32, v11: i32):
    jump ebb2(v10, v11)
 ebb2(v20: i32, v21: i32):
    return v21
 }
--- a/filetests/regalloc/coalesce.cton
+++ b/filetests/regalloc/coalesce.cton
@@ -0,0 +1,111 @@
 test regalloc
 isa riscv
 ; Test the coalescer.
 ; regex: V=v\d+
 ; regex: WS=\s+
 ; This function is already CSSA, so no copies should be inserted.
 function %cssa(i32) -> i32 {
 ebb0(v0: i32):
    ; not: copy
    ; v0 is used by the branch and passed as an arg - that's no conflict.
    brnz v0, ebb1(v0)
    ; v0 is live across the branch above. That's no conflict.
    v1 = iadd_imm v0, 7
    jump ebb1(v1)
 ebb1(v10: i32):
    v11 = iadd_imm v10, 7
    return v11
 }
 function %trivial(i32) -> i32 {
 ebb0(v0: i32):
    ; check: $(cp1=$V) = copy $v0
    ; nextln: brnz $v0, $ebb1($cp1)
    brnz v0, ebb1(v0)
    ; not: copy
    v1 = iadd_imm v0, 7
    jump ebb1(v1)
 ebb1(v10: i32):
    ; Use v0 in the destination EBB causes a conflict.
    v11 = iadd v10, v0
    return v11
 }
 ; A value is used as an SSA argument twice in the same branch.
 function %dualuse(i32) -> i32 {
 ebb0(v0: i32):
    ; check: $(cp1=$V) = copy $v0
    ; nextln: brnz $v0, $ebb1($v0, $cp1)
    brnz v0, ebb1(v0, v0)
    ; not: copy
    v1 = iadd_imm v0, 7
    v2 = iadd_imm v1, 56
    jump ebb1(v1, v2)
 ebb1(v10: i32, v11: i32):
    v12 = iadd v10, v11
    return v12
 }
 ; Interference away from the branch
 ; The interference can be broken with a copy at either branch.
 function %interference(i32) -> i32 {
 ebb0(v0: i32):
    ; not: copy
    brnz v0, ebb1(v0)
    v1 = iadd_imm v0, 7
    ; v1 and v0 interfere here:
    v2 = iadd_imm v0, 8
    ; check: $(cp1=$V) = copy $v1
    ; not: copy
    ; check: jump $ebb1($cp1)
    jump ebb1(v1)
 ebb1(v10: i32):
    ; not: copy
    v11 = iadd_imm v10, 7
    return v11
 }
 ; A loop where one induction variable is used as a backedge argument.
 function %fibonacci(i32) -> i32 {
 ebb0(v0: i32):
    ; not: copy
    v1 = iconst.i32 1
    v2 = iconst.i32 2
    jump ebb1(v1, v2)
 ebb1(v10: i32, v11: i32):
    ; v11 needs to be isolated because it interferes with v10.
    ; check: $ebb1($v10: i32, $(nv11a=$V): i32)
    ; check: $v11 = copy $nv11a
    v12 = iadd v10, v11
    v13 = icmp ult v12, v0
    ; check: $(nv11b=$V) = copy $v11
    ; not: copy
    ; check: brnz $v13, $ebb1($nv11b, $v12)
    brnz v13, ebb1(v11, v12)
    return v12
 }
 ; Function arguments passed on the stack aren't allowed to be part of a virtual
 ; register, at least for now. This is because the other values in the virtual
 ; register would need to be spilled to the incoming_arg stack slot which we treat
 ; as belonging to the caller.
 function %stackarg(i32, i32, i32, i32, i32, i32, i32, i32, i32) -> i32 {
 ; check: ss0 = incoming_arg 4
 ; not: incoming_arg
 ebb0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i32, v5: i32, v6: i32, v7: i32, v8: i32):
    ; check: fill v8
    ; not: v8
    brnz v0, ebb1(v8)
    jump ebb1(v7)
 ebb1(v10: i32):
    v11 = iadd_imm v10, 1
    return v11
 }
--- a/filetests/regalloc/constraints.cton
+++ b/filetests/regalloc/constraints.cton
@@ -0,0 +1,82 @@
 test regalloc
 isa intel
 ; regex: V=v\d+
 ; regex: REG=%r([abcd]x|[sd]i)
 ; Tied operands, both are killed at instruction.
 function %tied_easy() -> i32 {
 ebb0:
    v0 = iconst.i32 12
    v1 = iconst.i32 13
    ; not: copy
    ; check: isub
    v2 = isub v0, v1
    return v2
 }
 ; Tied operand is live after instruction.
 function %tied_alive() -> i32 {
 ebb0:
    v0 = iconst.i32 12
    v1 = iconst.i32 13
    ; check: $(v0c=$V) = copy $v0
    ; check: $v2 = isub $v0c, $v1
    v2 = isub v0, v1
    ; check: $v3 = iadd $v2, $v0
    v3 = iadd v2, v0
    return v3
 }
 ; Fixed register constraint.
 function %fixed_op() -> i32 {
 ebb0:
    ; check: ,%rax]
    ; sameln: $v0 = iconst.i32 12
    v0 = iconst.i32 12
    v1 = iconst.i32 13
    ; The dynamic shift amount must be in %rcx
    ; check: regmove $v0, %rax -> %rcx
    v2 = ishl v1, v0
    return v2
 }
 ; Fixed register constraint twice.
 function %fixed_op_twice() -> i32 {
 ebb0:
    ; check: ,%rax]
    ; sameln: $v0 = iconst.i32 12
    v0 = iconst.i32 12
    v1 = iconst.i32 13
    ; The dynamic shift amount must be in %rcx
    ; check: regmove $v0, %rax -> %rcx
    v2 = ishl v1, v0
    ; check: regmove $v0, %rcx -> $REG
    ; check: regmove $v2, $REG -> %rcx
    v3 = ishl v0, v2
    return v3
 }
 ; Tied use of a diverted register.
 function %fixed_op_twice() -> i32 {
 ebb0:
    ; check: ,%rax]
    ; sameln: $v0 = iconst.i32 12
    v0 = iconst.i32 12
    v1 = iconst.i32 13
    ; The dynamic shift amount must be in %rcx
    ; check: regmove $v0, %rax -> %rcx
    ; check: $v2 = ishl $v1, $v0
    v2 = ishl v1, v0
    ; Now v0 is globally allocated to %rax, but diverted to %rcx.
    ; Check that the tied def gets the diverted register.
    v3 = isub v0, v2
    ; not: regmove
    ; check: ,%rcx]
    ; sameln: isub
    ; Move it into place for the return value.
    ; check: regmove $v3, %rcx -> %rax
    return v3
 }
--- a/filetests/regalloc/spill.cton
+++ b/filetests/regalloc/spill.cton
@@ -0,0 +1,196 @@
 test regalloc
 ; Test the spiler on an ISA with few registers.
 ; RV32E has 16 registers, where:
 ; - %x0 is hardwired to zero.
 ; - %x1 is the return address.
 ; - %x2 is the stack pointer.
 ; - %x3 is the global pointer.
 ; - %x4 is the thread pointer.
 ; - %x10-%x15 are function arguments.
 ;
 ; regex: V=v\d+
 ; regex: WS=\s+
 isa riscv enable_e
 ; In straight-line code, the first value defined is spilled.
 ; That is in order:
 ; 1. The argument v1.
 ; 2. The link register.
 ; 3. The first computed value, v2
 function %pyramid(i32) -> i32 {
 ; check: ss0 = spill_slot 4
 ; check: ss1 = spill_slot 4
 ; check: ss2 = spill_slot 4
 ; not: spill_slot
 ebb0(v1: i32):
 ; check: $ebb0($(rv1=$V): i32, $(rlink=$V): i32)
    ; check: ,ss0]$WS $v1 = spill $rv1
    ; nextln: ,ss1]$WS $(link=$V) = spill $rlink
    ; not: spill
    v2 = iadd_imm v1, 12
    ; check: $(r1v2=$V) = iadd_imm
    ; nextln: ,ss2]$WS $v2 = spill $r1v2
    ; not: spill
    v3 = iadd_imm v2, 12
    v4 = iadd_imm v3, 12
    v5 = iadd_imm v4, 12
    v6 = iadd_imm v5, 12
    v7 = iadd_imm v6, 12
    v8 = iadd_imm v7, 12
    v9 = iadd_imm v8, 12
    v10 = iadd_imm v9, 12
    v11 = iadd_imm v10, 12
    v12 = iadd_imm v11, 12
    v13 = iadd_imm v12, 12
    v14 = iadd_imm v13, 12
    v33 = iadd v13, v14
    ; check: iadd $v13
    v32 = iadd v33, v12
    v31 = iadd v32, v11
    v30 = iadd v31, v10
    v29 = iadd v30, v9
    v28 = iadd v29, v8
    v27 = iadd v28, v7
    v26 = iadd v27, v6
    v25 = iadd v26, v5
    v24 = iadd v25, v4
    v23 = iadd v24, v3
    v22 = iadd v23, v2
    ; check: $(r2v2=$V) = fill $v2
    ; check: $v22 = iadd $v23, $r2v2
    v21 = iadd v22, v1
    ; check: $(r2v1=$V) = fill $v1
    ; check: $v21 = iadd $v22, $r2v1
    ; check: $(rlink2=$V) = fill $link
    return v21
    ; check: return $v21, $rlink2
 }
 ; All values live across a call must be spilled
 function %across_call(i32) {
    fn0 = function %foo(i32)
 ebb0(v1: i32):
    ; check: $v1 = spill
    call fn0(v1)
    ; check: call $fn0
    call fn0(v1)
    ; check: fill $v1
    ; check: call $fn0
    return
 }
 ; The same value used for two function arguments.
 function %doubleuse(i32) {
    fn0 = function %xx(i32, i32)
 ebb0(v0: i32):
    ; check: $(c=$V) = copy $v0
    call fn0(v0, v0)
    ; check: call $fn0($v0, $c)
    return
 }
 ; The same value used as indirect callee and argument.
 function %doubleuse_icall1(i32) {
    sig0 = (i32) native
 ebb0(v0: i32):
    ; not:copy
    call_indirect sig0, v0(v0)
    return
 }
 ; The same value used as indirect callee and two arguments.
 function %doubleuse_icall2(i32) {
    sig0 = (i32, i32) native
 ebb0(v0: i32):
    ; check: $(c=$V) = copy $v0
    call_indirect sig0, v0(v0, v0)
    ; check: call_indirect $sig0, $v0($v0, $c)
    return
 }
 ; Two arguments on the stack.
 function %stackargs(i32, i32, i32, i32, i32, i32, i32, i32) -> i32 {
 ; check: ss0 = incoming_arg 4
 ; check: ss1 = incoming_arg 4, offset 4
 ; not: incoming_arg
 ebb0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i32, v5: i32, v6: i32, v7: i32):
    ; unordered: fill $v6
    ; unordered: fill $v7
    v10 = iadd v6, v7
    return v10
 }
 ; More EBB arguments than registers.
 function %ebbargs(i32) -> i32 {
 ebb0(v1: i32):
    ; check: $v1 = spill
    v2 = iconst.i32 1
    jump ebb1(v2, v2, v2, v2, v2, v2, v2, v2, v2, v2, v2, v2)
 ebb1(v10: i32, v11: i32, v12: i32, v13: i32, v14: i32, v15: i32, v16: i32, v17: i32, v18: i32, v19: i32, v20: i32, v21: i32):
    v22 = iadd v10, v11
    v23 = iadd v22, v12
    v24 = iadd v23, v13
    v25 = iadd v24, v14
    v26 = iadd v25, v15
    v27 = iadd v26, v16
    v28 = iadd v27, v17
    v29 = iadd v28, v18
    v30 = iadd v29, v19
    v31 = iadd v30, v20
    v32 = iadd v31, v21
    v33 = iadd v32, v1
    return v33
 }
 ; In straight-line code, the first value defined is spilled.
 ; That is in order:
 ; 1. The argument v1.
 ; 2. The link register.
 ; 3. The first computed value, v2
 function %use_spilled_value(i32) -> i32 {
 ; check: ss0 = spill_slot 4
 ; check: ss1 = spill_slot 4
 ; check: ss2 = spill_slot 4
 ebb0(v1: i32):
 ; check: $ebb0($(rv1=$V): i32, $(rlink=$V): i32)
    ; check: ,ss0]$WS $v1 = spill $rv1
    ; nextln: ,ss1]$WS $(link=$V) = spill $rlink
    ; not: spill
    v2 = iadd_imm v1, 12
    ; check: $(r1v2=$V) = iadd_imm
    ; nextln: ,ss2]$WS $v2 = spill $r1v2
    v3 = iadd_imm v2, 12
    v4 = iadd_imm v3, 12
    v5 = iadd_imm v4, 12
    v6 = iadd_imm v5, 12
    v7 = iadd_imm v6, 12
    v8 = iadd_imm v7, 12
    v9 = iadd_imm v8, 12
    v10 = iadd_imm v9, 12
    v11 = iadd_imm v10, 12
    v12 = iadd_imm v11, 12
    v13 = iadd_imm v12, 12
    v14 = iadd_imm v13, 12
    ; Here we have maximum register pressure, and v2 has been spilled.
    ; What happens if we use it?
    v33 = iadd v2, v14
    v32 = iadd v33, v12
    v31 = iadd v32, v11
    v30 = iadd v31, v10
    v29 = iadd v30, v9
    v28 = iadd v29, v8
    v27 = iadd v28, v7
    v26 = iadd v27, v6
    v25 = iadd v26, v5
    v24 = iadd v25, v4
    v23 = iadd v24, v3
    v22 = iadd v23, v2
    v21 = iadd v22, v1
    v20 = iadd v21, v13
    v19 = iadd v20, v2
    return v21
 }
--- a/filetests/simple_gvn/basic.cton
+++ b/filetests/simple_gvn/basic.cton
@@ -0,0 +1,41 @@
 test simple-gvn
 function %simple_redundancy(i32, i32) -> i32 {
 ebb0(v0: i32, v1: i32):
    v2 = iadd v0, v1
    v3 = iadd v0, v1
    v4 = imul v2, v3
 ; check: v4 = imul $v2, $v2
    return v4
 }
 function %cascading_redundancy(i32, i32) -> i32 {
 ebb0(v0: i32, v1: i32):
    v2 = iadd v0, v1
    v3 = iadd v0, v1
    v4 = imul v2, v3
    v5 = imul v2, v2
    v6 = iadd v4, v5
 ; check: v6 = iadd $v4, $v4
    return v6
 }
 function %redundancies_on_some_paths(i32, i32, i32) -> i32 {
 ebb0(v0: i32, v1: i32, v2: i32):
    v3 = iadd v0, v1
    brz v3, ebb1
    v4 = iadd v0, v1
    jump ebb2(v4)
 ; check: jump ebb2(v3)
 ebb1:
    v5 = iadd v0, v1
    jump ebb2(v5)
 ; check: jump ebb2(v3)
 ebb2(v6: i32):
    v7 = iadd v0, v1
    v8 = iadd v6, v7
 ; check: v8 = iadd v6, v3
    return v8
 }
--- a/filetests/verifier/bad_layout.cton
+++ b/filetests/verifier/bad_layout.cton
@@ -0,0 +1,19 @@
 test verifier
 function %test(i32) {
    ebb0(v0: i32):
        jump ebb1       ; error: terminator
        return
    ebb1:
        jump ebb2
        brz v0, ebb3
    ebb2:
        jump ebb3
    ebb3:
        return
 }
 function %test(i32) {    ; Ok
    ebb0(v0: i32):
        return
 }
--- a/filetests/verifier/unreachable_code.cton
+++ b/filetests/verifier/unreachable_code.cton
@@ -0,0 +1,23 @@
 test verifier
 function %test() -> i32 {   ; Ok
 ebb0:
    v0 = iconst.i32 0
    v1 = iconst.i32 0
    jump ebb2
 ebb2:
    jump ebb4
 ebb4:
    jump ebb2
 ebb3(v2: i32):
    v4 = iadd.i32 v1, v2
    jump ebb9(v4)
 ebb9(v7: i32):
    v9 = iadd.i32 v2, v7
    return v9
 }
--- a/filetests/wasm/control.cton
+++ b/filetests/wasm/control.cton
@@ -0,0 +1,50 @@
 ; Test basic code generation for control flow WebAssembly instructions.
 test compile
 set is_64bit=0
 isa intel haswell
 set is_64bit=1
 isa intel haswell
 function %br_if(i32) -> i32 {
 ebb0(v0: i32):
    v1 = iconst.i32 1
    brz v0, ebb1(v1)
    jump ebb2
 ebb1(v2: i32):
    return v2
 ebb2:
    jump ebb1(v0)
 }
 function %br_if_not(i32) -> i32 {
 ebb0(v0: i32):
    v1 = iconst.i32 1
    brnz v0, ebb1(v0)
    jump ebb2
 ebb1(v2: i32):
    return v2
 ebb2:
    jump ebb1(v0)
 }
 function %br_if_fallthrough(i32) -> i32 {
 ebb0(v0: i32):
    v1 = iconst.i32 1
    brz v0, ebb1(v1)
    ; This jump gets converted to a fallthrough.
    jump ebb1(v0)
 ebb1(v2: i32):
    return v2
 }
 function %undefined() {
 ebb0:
    trap
 }
--- a/filetests/wasm/conversions.cton
+++ b/filetests/wasm/conversions.cton
@@ -0,0 +1,94 @@
 ; Test code generation for WebAssembly type conversion operators.
 test compile
 set is_64bit=1
 isa intel haswell
 function %i32_wrap_i64(i64) -> i32 {
 ebb0(v0: i64):
    v1 = ireduce.i32 v0
    return v1
 }
 function %i64_extend_s_i32(i32) -> i64 {
 ebb0(v0: i32):
    v1 = sextend.i64 v0
    return v1
 }
 function %i64_extend_u_i32(i32) -> i64 {
 ebb0(v0: i32):
    v1 = uextend.i64 v0
    return v1
 }
 ; function %i32_trunc_s_f32(f32) -> i32
 ; function %i32_trunc_u_f32(f32) -> i32
 ; function %i32_trunc_s_f64(f64) -> i32
 ; function %i32_trunc_u_f64(f64) -> i32
 ; function %i64_trunc_s_f32(f32) -> i64
 ; function %i64_trunc_u_f32(f32) -> i64
 ; function %i64_trunc_s_f64(f64) -> i64
 ; function %i64_trunc_u_f64(f64) -> i64
 function %f32_trunc_f64(f64) -> f32 {
 ebb0(v0: f64):
    v1 = fdemote.f32 v0
    return v1
 }
 function %f64_promote_f32(f32) -> f64 {
 ebb0(v0: f32):
    v1 = fpromote.f64 v0
    return v1
 }
 function %f32_convert_s_i32(i32) -> f32 {
 ebb0(v0: i32):
    v1 = fcvt_from_sint.f32 v0
    return v1
 }
 function %f64_convert_s_i32(i32) -> f64 {
 ebb0(v0: i32):
    v1 = fcvt_from_sint.f64 v0
    return v1
 }
 function %f32_convert_s_i64(i64) -> f32 {
 ebb0(v0: i64):
    v1 = fcvt_from_sint.f32 v0
    return v1
 }
 function %f64_convert_s_i64(i64) -> f64 {
 ebb0(v0: i64):
    v1 = fcvt_from_sint.f64 v0
    return v1
 }
 ; TODO: f*_convert_u_i* (Don't exist on Intel).
 function %i32_reinterpret_f32(f32) -> i32 {
 ebb0(v0: f32):
    v1 = bitcast.i32 v0
    return v1
 }
 function %f32_reinterpret_i32(i32) -> f32 {
 ebb0(v0: i32):
    v1 = bitcast.f32 v0
    return v1
 }
 function %i64_reinterpret_f64(f64) -> i64 {
 ebb0(v0: f64):
    v1 = bitcast.i64 v0
    return v1
 }
 function %f64_reinterpret_i64(i64) -> f64 {
 ebb0(v0: i64):
    v1 = bitcast.f64 v0
    return v1
 }
--- a/filetests/wasm/f32-arith.cton
+++ b/filetests/wasm/f32-arith.cton
@@ -0,0 +1,52 @@
 ; Test basic code generation for f32 arithmetic WebAssembly instructions.
 test compile
 set is_64bit=0
 isa intel haswell
 set is_64bit=1
 isa intel haswell
 ; Constants.
 ; function %f32_const() -> f32
 ; Unary operations
 ; function %f32_abs(f32) -> f32
 ; function %f32_neg(f32) -> f32
 ; function %f32_sqrt(f32) -> f32
 ; function %f32_ceil(f32) -> f32
 ; function %f32_floor(f32) -> f32
 ; function %f32_trunc(f32) -> f32
 ; function %f32_nearest (f32) -> f32
 ; Binary Operations
 function %f32_add(f32, f32) -> f32 {
 ebb0(v0: f32, v1: f32):
    v2 = fadd v0, v1
    return v2
 }
 function %f32_sub(f32, f32) -> f32 {
 ebb0(v0: f32, v1: f32):
    v2 = fsub v0, v1
    return v2
 }
 function %f32_mul(f32, f32) -> f32 {
 ebb0(v0: f32, v1: f32):
    v2 = fmul v0, v1
    return v2
 }
 function %f32_div(f32, f32) -> f32 {
 ebb0(v0: f32, v1: f32):
    v2 = fdiv v0, v1
    return v2
 }
 ; function %f32_min(f32, f32) -> f32
 ; function %f32_max(f32, f32) -> f32
 ; function %f32_copysign(f32, f32) -> f32
--- a/filetests/wasm/f64-arith.cton
+++ b/filetests/wasm/f64-arith.cton
@@ -0,0 +1,52 @@
 ; Test basic code generation for f64 arithmetic WebAssembly instructions.
 test compile
 set is_64bit=0
 isa intel haswell
 set is_64bit=1
 isa intel haswell
 ; Constants.
 ; function %f64_const() -> f64
 ; Unary operations
 ; function %f64_abs(f64) -> f64
 ; function %f64_neg(f64) -> f64
 ; function %f64_sqrt(f64) -> f64
 ; function %f64_ceil(f64) -> f64
 ; function %f64_floor(f64) -> f64
 ; function %f64_trunc(f64) -> f64
 ; function %f64_nearest (f64) -> f64
 ; Binary Operations
 function %f64_add(f64, f64) -> f64 {
 ebb0(v0: f64, v1: f64):
    v2 = fadd v0, v1
    return v2
 }
 function %f64_sub(f64, f64) -> f64 {
 ebb0(v0: f64, v1: f64):
    v2 = fsub v0, v1
    return v2
 }
 function %f64_mul(f64, f64) -> f64 {
 ebb0(v0: f64, v1: f64):
    v2 = fmul v0, v1
    return v2
 }
 function %f64_div(f64, f64) -> f64 {
 ebb0(v0: f64, v1: f64):
    v2 = fdiv v0, v1
    return v2
 }
 ; function %f64_min(f64, f64) -> f64
 ; function %f64_max(f64, f64) -> f64
 ; function %f64_copysign(f64, f64) -> f64
--- a/filetests/wasm/i32-arith.cton
+++ b/filetests/wasm/i32-arith.cton
@@ -0,0 +1,128 @@
 ; Test basic code generation for i32 arithmetic WebAssembly instructions.
 test compile
 set is_64bit=0
 isa intel haswell
 set is_64bit=1
 isa intel haswell
 ; Constants.
 function %i32_const() -> i32 {
 ebb0:
    v0 = iconst.i32 0x8765_4321
    return v0
 }
 ; Unary operations.
 function %i32_clz(i32) -> i32 {
 ebb0(v0: i32):
    v1 = clz v0
    return v1
 }
 function %i32_ctz(i32) -> i32 {
 ebb0(v0: i32):
    v1 = ctz v0
    return v1
 }
 function %i32_popcnt(i32) -> i32 {
 ebb0(v0: i32):
    v1 = popcnt v0
    return v1
 }
 ; Binary operations.
 function %i32_add(i32, i32) -> i32 {
 ebb0(v0: i32, v1: i32):
    v2 = iadd v0, v1
    return v2
 }
 function %i32_sub(i32, i32) -> i32 {
 ebb0(v0: i32, v1: i32):
    v2 = isub v0, v1
    return v2
 }
 function %i32_mul(i32, i32) -> i32 {
 ebb0(v0: i32, v1: i32):
    v2 = imul v0, v1
    return v2
 }
 function %i32_div_s(i32, i32) -> i32 {
 ebb0(v0: i32, v1: i32):
    v2 = sdiv v0, v1
    return v2
 }
 function %i32_div_u(i32, i32) -> i32 {
 ebb0(v0: i32, v1: i32):
    v2 = udiv v0, v1
    return v2
 }
 function %i32_rem_s(i32, i32) -> i32 {
 ebb0(v0: i32, v1: i32):
    v2 = srem v0, v1
    return v2
 }
 function %i32_rem_u(i32, i32) -> i32 {
 ebb0(v0: i32, v1: i32):
    v2 = urem v0, v1
    return v2
 }
 function %i32_and(i32, i32) -> i32 {
 ebb0(v0: i32, v1: i32):
    v2 = band v0, v1
    return v2
 }
 function %i32_or(i32, i32) -> i32 {
 ebb0(v0: i32, v1: i32):
    v2 = bor v0, v1
    return v2
 }
 function %i32_xor(i32, i32) -> i32 {
 ebb0(v0: i32, v1: i32):
    v2 = bxor v0, v1
    return v2
 }
 function %i32_shl(i32, i32) -> i32 {
 ebb0(v0: i32, v1: i32):
    v2 = ishl v0, v1
    return v2
 }
 function %i32_shr_s(i32, i32) -> i32 {
 ebb0(v0: i32, v1: i32):
    v2 = sshr v0, v1
    return v2
 }
 function %i32_shr_u(i32, i32) -> i32 {
 ebb0(v0: i32, v1: i32):
    v2 = ushr v0, v1
    return v2
 }
 function %i32_rotl(i32, i32) -> i32 {
 ebb0(v0: i32, v1: i32):
    v2 = rotl v0, v1
    return v2
 }
 function %i32_rotr(i32, i32) -> i32 {
 ebb0(v0: i32, v1: i32):
    v2 = rotr v0, v1
    return v2
 }
--- a/filetests/wasm/i32-compares.cton
+++ b/filetests/wasm/i32-compares.cton
@@ -0,0 +1,85 @@
 ; Test code generation for WebAssembly i32 comparison operators.
 test compile
 set is_64bit=0
 isa intel haswell
 set is_64bit=1
 isa intel haswell
 function %i32_eqz(i32) -> i32 {
 ebb0(v0: i32):
    v1 = icmp_imm eq v0, 0
    v2 = bint.i32 v1
    return v2
 }
 function %i32_eq(i32, i32) -> i32 {
 ebb0(v0: i32, v1: i32):
    v2 = icmp eq v0, v1
    v3 = bint.i32 v2
    return v3
 }
 function %i32_ne(i32, i32) -> i32 {
 ebb0(v0: i32, v1: i32):
    v2 = icmp ne v0, v1
    v3 = bint.i32 v2
    return v3
 }
 function %i32_lt_s(i32, i32) -> i32 {
 ebb0(v0: i32, v1: i32):
    v2 = icmp slt v0, v1
    v3 = bint.i32 v2
    return v3
 }
 function %i32_lt_u(i32, i32) -> i32 {
 ebb0(v0: i32, v1: i32):
    v2 = icmp ult v0, v1
    v3 = bint.i32 v2
    return v3
 }
 function %i32_gt_s(i32, i32) -> i32 {
 ebb0(v0: i32, v1: i32):
    v2 = icmp sgt v0, v1
    v3 = bint.i32 v2
    return v3
 }
 function %i32_gt_u(i32, i32) -> i32 {
 ebb0(v0: i32, v1: i32):
    v2 = icmp ugt v0, v1
    v3 = bint.i32 v2
    return v3
 }
 function %i32_le_s(i32, i32) -> i32 {
 ebb0(v0: i32, v1: i32):
    v2 = icmp sle v0, v1
    v3 = bint.i32 v2
    return v3
 }
 function %i32_le_u(i32, i32) -> i32 {
 ebb0(v0: i32, v1: i32):
    v2 = icmp ule v0, v1
    v3 = bint.i32 v2
    return v3
 }
 function %i32_ge_s(i32, i32) -> i32 {
 ebb0(v0: i32, v1: i32):
    v2 = icmp sge v0, v1
    v3 = bint.i32 v2
    return v3
 }
 function %i32_ge_u(i32, i32) -> i32 {
 ebb0(v0: i32, v1: i32):
    v2 = icmp uge v0, v1
    v3 = bint.i32 v2
    return v3
 }
--- a/filetests/wasm/i64-arith.cton
+++ b/filetests/wasm/i64-arith.cton
@@ -0,0 +1,125 @@
 ; Test basic code generation for i64 arithmetic WebAssembly instructions.
 test compile
 set is_64bit=1
 isa intel haswell
 ; Constants.
 function %i64_const() -> i64 {
 ebb0:
    v0 = iconst.i64 0x8765_4321
    return v0
 }
 ; Unary operations.
 function %i64_clz(i64) -> i64 {
 ebb0(v0: i64):
    v1 = clz v0
    return v1
 }
 function %i64_ctz(i64) -> i64 {
 ebb0(v0: i64):
    v1 = ctz v0
    return v1
 }
 function %i64_popcnt(i64) -> i64 {
 ebb0(v0: i64):
    v1 = popcnt v0
    return v1
 }
 ; Binary operations.
 function %i64_add(i64, i64) -> i64 {
 ebb0(v0: i64, v1: i64):
    v2 = iadd v0, v1
    return v2
 }
 function %i64_sub(i64, i64) -> i64 {
 ebb0(v0: i64, v1: i64):
    v2 = isub v0, v1
    return v2
 }
 function %i64_mul(i64, i64) -> i64 {
 ebb0(v0: i64, v1: i64):
    v2 = imul v0, v1
    return v2
 }
 function %i32_div_s(i32, i32) -> i32 {
 ebb0(v0: i32, v1: i32):
    v2 = sdiv v0, v1
    return v2
 }
 function %i32_div_u(i32, i32) -> i32 {
 ebb0(v0: i32, v1: i32):
    v2 = udiv v0, v1
    return v2
 }
 function %i32_rem_s(i32, i32) -> i32 {
 ebb0(v0: i32, v1: i32):
    v2 = srem v0, v1
    return v2
 }
 function %i32_rem_u(i32, i32) -> i32 {
 ebb0(v0: i32, v1: i32):
    v2 = urem v0, v1
    return v2
 }
 function %i64_and(i64, i64) -> i64 {
 ebb0(v0: i64, v1: i64):
    v2 = band v0, v1
    return v2
 }
 function %i64_or(i64, i64) -> i64 {
 ebb0(v0: i64, v1: i64):
    v2 = bor v0, v1
    return v2
 }
 function %i64_xor(i64, i64) -> i64 {
 ebb0(v0: i64, v1: i64):
    v2 = bxor v0, v1
    return v2
 }
 function %i64_shl(i64, i64) -> i64 {
 ebb0(v0: i64, v1: i64):
    v2 = ishl v0, v1
    return v2
 }
 function %i64_shr_s(i64, i64) -> i64 {
 ebb0(v0: i64, v1: i64):
    v2 = sshr v0, v1
    return v2
 }
 function %i64_shr_u(i64, i64) -> i64 {
 ebb0(v0: i64, v1: i64):
    v2 = ushr v0, v1
    return v2
 }
 function %i64_rotl(i64, i64) -> i64 {
 ebb0(v0: i64, v1: i64):
    v2 = rotl v0, v1
    return v2
 }
 function %i64_rotr(i64, i64) -> i64 {
 ebb0(v0: i64, v1: i64):
    v2 = rotr v0, v1
    return v2
 }
--- a/filetests/wasm/i64-compares.cton
+++ b/filetests/wasm/i64-compares.cton
@@ -0,0 +1,82 @@
 ; Test code generation for WebAssembly i64 comparison operators.
 test compile
 set is_64bit=1
 isa intel haswell
 function %i64_eqz(i64) -> i32 {
 ebb0(v0: i64):
    v1 = icmp_imm eq v0, 0
    v2 = bint.i32 v1
    return v2
 }
 function %i64_eq(i64, i64) -> i32 {
 ebb0(v0: i64, v1: i64):
    v2 = icmp eq v0, v1
    v3 = bint.i32 v2
    return v3
 }
 function %i64_ne(i64, i64) -> i32 {
 ebb0(v0: i64, v1: i64):
    v2 = icmp ne v0, v1
    v3 = bint.i32 v2
    return v3
 }
 function %i64_lt_s(i64, i64) -> i32 {
 ebb0(v0: i64, v1: i64):
    v2 = icmp slt v0, v1
    v3 = bint.i32 v2
    return v3
 }
 function %i64_lt_u(i64, i64) -> i32 {
 ebb0(v0: i64, v1: i64):
    v2 = icmp ult v0, v1
    v3 = bint.i32 v2
    return v3
 }
 function %i64_gt_s(i64, i64) -> i32 {
 ebb0(v0: i64, v1: i64):
    v2 = icmp sgt v0, v1
    v3 = bint.i32 v2
    return v3
 }
 function %i64_gt_u(i64, i64) -> i32 {
 ebb0(v0: i64, v1: i64):
    v2 = icmp ugt v0, v1
    v3 = bint.i32 v2
    return v3
 }
 function %i64_le_s(i64, i64) -> i32 {
 ebb0(v0: i64, v1: i64):
    v2 = icmp sle v0, v1
    v3 = bint.i32 v2
    return v3
 }
 function %i64_le_u(i64, i64) -> i32 {
 ebb0(v0: i64, v1: i64):
    v2 = icmp ule v0, v1
    v3 = bint.i32 v2
    return v3
 }
 function %i64_ge_s(i64, i64) -> i32 {
 ebb0(v0: i64, v1: i64):
    v2 = icmp sge v0, v1
    v3 = bint.i32 v2
    return v3
 }
 function %i64_ge_u(i64, i64) -> i32 {
 ebb0(v0: i64, v1: i64):
    v2 = icmp uge v0, v1
    v3 = bint.i32 v2
    return v3
 }
--- a/format-all.sh
+++ b/format-all.sh
@@ -0,0 +1,17 @@
 #!/bin/bash
 # Format all sources using rustfmt.
 # Exit immediately on errors.
 set -e
 cd $(dirname "$0")
 src=$(pwd)
 # Make sure we can find rustfmt.
 export PATH="$PATH:$HOME/.cargo/bin"
 for crate in $(find "$src" -name Cargo.toml); do
    cd $(dirname "$crate")
    cargo fmt -- "$@"
 done
--- a/lib/cretonne/Cargo.toml
+++ b/lib/cretonne/Cargo.toml
@@ -0,0 +1,19 @@
 [package]
 authors = ["The Cretonne Project Developers"]
 name = "cretonne"
 version = "0.0.0"
 description = "Low-level code generator library"
 license = "Apache-2.0"
 documentation = "https://cretonne.readthedocs.io/"
 repository = "https://github.com/stoklund/cretonne"
 publish = false
 build = "build.rs"
 [lib]
 name = "cretonne"
 [dependencies]
 # It is a goal of the cretonne crate to have minimal external dependencies.
 # Please don't add any unless they are essential to the task of creating binary
 # machine code. Integration tests that need external dependencies can be
 # accomodated in `tests`.
--- a/lib/cretonne/build.rs
+++ b/lib/cretonne/build.rs
@@ -0,0 +1,151 @@
 // Build script.
 //
 // This program is run by Cargo when building lib/cretonne. It is used to generate Rust code from
 // the language definitions in the lib/cretonne/meta directory.
 //
 // Environment:
 //
 // OUT_DIR
 //     Directory where generated files should be placed.
 //
 // TARGET
 //     Target triple provided by Cargo.
 //
 // CRETONNE_TARGETS (Optional)
 //     A setting for conditional compilation of isa targets. Possible values can be "native" or
 //     known isa targets separated by ','.
 //
 // The build script expects to be run from the directory where this build.rs file lives. The
 // current directory is used to find the sources.
 use std::env;
 use std::process;
 fn main() {
    let out_dir = env::var("OUT_DIR").expect("The OUT_DIR environment variable must be set");
    let target_triple = env::var("TARGET").expect("The TARGET environment variable must be set");
    let cretonne_targets = env::var("CRETONNE_TARGETS").ok();
    let cretonne_targets = cretonne_targets.as_ref().map(|s| s.as_ref());
    // Configure isa targets cfg.
    match isa_targets(cretonne_targets, &target_triple) {
        Ok(isa_targets) => {
            for isa in &isa_targets {
                println!("cargo:rustc-cfg=build_{}", isa.name());
            }
        }
        Err(err) => {
            eprintln!("Error: {}", err);
            process::exit(1);
        }
    }
    println!("Build script generating files in {}", out_dir);
    let cur_dir = env::current_dir().expect("Can't access current working directory");
    let crate_dir = cur_dir.as_path();
    // Make sure we rebuild is this build script changes.
    // I guess that won't happen if you have non-UTF8 bytes in your path names.
    // The `build.py` script prints out its own dependencies.
    println!("cargo:rerun-if-changed={}",
             crate_dir.join("build.rs").to_string_lossy());
    // Scripts are in `$crate_dir/meta`.
    let meta_dir = crate_dir.join("meta");
    let build_script = meta_dir.join("build.py");
    // Launch build script with Python. We'll just find python in the path.
    let status = process::Command::new("python")
        .current_dir(crate_dir)
        .arg(build_script)
        .arg("--out-dir")
        .arg(out_dir)
        .status()
        .expect("Failed to launch second-level build script");
    if !status.success() {
        process::exit(status.code().unwrap());
    }
 }
 /// Represents known ISA target.
 #[derive(Copy, Clone)]
 enum Isa {
    Riscv,
    Intel,
    Arm32,
    Arm64,
 }
 impl Isa {
    /// Creates isa target using name.
    fn new(name: &str) -> Option<Self> {
        Isa::all()
            .iter()
            .cloned()
            .filter(|isa| isa.name() == name)
            .next()
    }
    /// Creates isa target from arch.
    fn from_arch(arch: &str) -> Option<Isa> {
        Isa::all()
            .iter()
            .cloned()
            .filter(|isa| isa.is_arch_applicable(arch))
            .next()
    }
    /// Returns all supported isa targets.
    fn all() -> [Isa; 4] {
        [Isa::Riscv, Isa::Intel, Isa::Arm32, Isa::Arm64]
    }
    /// Returns name of the isa target.
    fn name(&self) -> &'static str {
        match *self {
            Isa::Riscv => "riscv",
            Isa::Intel => "intel",
            Isa::Arm32 => "arm32",
            Isa::Arm64 => "arm64",
        }
    }
    /// Checks if arch is applicable for the isa target.
    fn is_arch_applicable(&self, arch: &str) -> bool {
        match *self {
            Isa::Riscv => arch == "riscv",
            Isa::Intel => ["x86_64", "i386", "i586", "i686"].contains(&arch),
            Isa::Arm32 => arch.starts_with("arm") || arch.starts_with("thumb"),
            Isa::Arm64 => arch == "aarch64",
        }
    }
 }
 /// Returns isa targets to configure conditional compilation.
 fn isa_targets(cretonne_targets: Option<&str>, target_triple: &str) -> Result<Vec<Isa>, String> {
    match cretonne_targets {
        Some("native") => {
            Isa::from_arch(target_triple.split('-').next().unwrap())
                .map(|isa| vec![isa])
                .ok_or_else(|| {
                                format!("no supported isa found for target triple `{}`",
                                        target_triple)
                            })
        }
        Some(targets) => {
            let unknown_isa_targets = targets
                .split(',')
                .filter(|target| Isa::new(target).is_none())
                .collect::<Vec<_>>();
            let isa_targets = targets.split(',').flat_map(Isa::new).collect::<Vec<_>>();
            match (unknown_isa_targets.is_empty(), isa_targets.is_empty()) {
                (true, true) => Ok(Isa::all().to_vec()),
                (true, _) => Ok(isa_targets),
                (_, _) => Err(format!("unknown isa targets: `{}`", unknown_isa_targets.join(", "))),
            }
        }
        None => Ok(Isa::all().to_vec()),
    }
 }
--- a/lib/cretonne/meta/base/init.py
+++ b/lib/cretonne/meta/base/init.py
@@ -0,0 +1 @@
 """Definitions for the base Cretonne language."""
--- a/lib/cretonne/meta/base/entities.py
+++ b/lib/cretonne/meta/base/entities.py
@@ -0,0 +1,29 @@
 """
 The `cretonne.entities` module predefines all the Cretonne entity reference
 operand types. There are corresponding definitions in the `cretonne.entities`
 Rust module.
 """
 from __future__ import absolute_import
 from cdsl.operands import EntityRefKind
 #: A reference to an extended basic block in the same function.
 #: This is primarliy used in control flow instructions.
 ebb = EntityRefKind(
        'ebb', 'An extended basic block in the same function.',
        default_member='destination')
 #: A reference to a stack slot declared in the function preamble.
 stack_slot = EntityRefKind('stack_slot', 'A stack slot.')
 #: A reference to a function sugnature declared in the function preamble.
 #: Tbis is used to provide the call signature in an indirect call instruction.
 sig_ref = EntityRefKind('sig_ref', 'A function signature.')
 #: A reference to an external function declared in the function preamble.
 #: This is used to provide the callee and signature in a call instruction.
 func_ref = EntityRefKind('func_ref', 'An external function.')
 #: A reference to a jump table declared in the function preamble.
 jump_table = EntityRefKind(
        'jump_table', 'A jump table.', default_member='table')
--- a/lib/cretonne/meta/base/formats.py
+++ b/lib/cretonne/meta/base/formats.py
@@ -0,0 +1,64 @@
 """
 The cretonne.formats defines all instruction formats.
 Every instruction format has a corresponding `InstructionData` variant in the
 Rust representation of cretonne IL, so all instruction formats must be defined
 in this module.
 """
 from __future__ import absolute_import
 from cdsl.formats import InstructionFormat
 from cdsl.operands import VALUE, VARIABLE_ARGS
 from .immediates import imm64, uimm8, ieee32, ieee64, offset32, uoffset32
 from .immediates import boolean, intcc, floatcc, memflags, regunit
 from .entities import ebb, sig_ref, func_ref, jump_table, stack_slot
 Nullary = InstructionFormat()
 Unary = InstructionFormat(VALUE)
 UnaryImm = InstructionFormat(imm64)
 UnaryIeee32 = InstructionFormat(ieee32)
 UnaryIeee64 = InstructionFormat(ieee64)
 UnaryBool = InstructionFormat(boolean)
 Binary = InstructionFormat(VALUE, VALUE)
 BinaryImm = InstructionFormat(VALUE, imm64)
 # The select instructions are controlled by the second VALUE operand.
 # The first VALUE operand is the controlling flag which has a derived type.
 # The fma instruction has the same constraint on all inputs.
 Ternary = InstructionFormat(VALUE, VALUE, VALUE, typevar_operand=1)
 # Catch-all for instructions with many outputs and inputs and no immediate
 # operands.
 MultiAry = InstructionFormat(VARIABLE_ARGS)
 InsertLane = InstructionFormat(VALUE, ('lane', uimm8), VALUE)
 ExtractLane = InstructionFormat(VALUE, ('lane', uimm8))
 IntCompare = InstructionFormat(intcc, VALUE, VALUE)
 IntCompareImm = InstructionFormat(intcc, VALUE, imm64)
 FloatCompare = InstructionFormat(floatcc, VALUE, VALUE)
 Jump = InstructionFormat(ebb, VARIABLE_ARGS)
 Branch = InstructionFormat(VALUE, ebb, VARIABLE_ARGS)
 BranchIcmp = InstructionFormat(intcc, VALUE, VALUE, ebb, VARIABLE_ARGS)
 BranchTable = InstructionFormat(VALUE, jump_table)
 Call = InstructionFormat(func_ref, VARIABLE_ARGS)
 IndirectCall = InstructionFormat(sig_ref, VALUE, VARIABLE_ARGS)
 Load = InstructionFormat(memflags, VALUE, offset32)
 Store = InstructionFormat(memflags, VALUE, VALUE, offset32)
 StackLoad = InstructionFormat(stack_slot, offset32)
 StackStore = InstructionFormat(VALUE, stack_slot, offset32)
 # Accessing a WebAssembly heap.
 # TODO: Add a reference to a `heap` declared in the preamble.
 HeapLoad = InstructionFormat(VALUE, uoffset32)
 HeapStore = InstructionFormat(VALUE, VALUE, uoffset32)
 RegMove = InstructionFormat(VALUE, ('src', regunit), ('dst', regunit))
 # Finally extract the names of global variables in this module.
 InstructionFormat.extract_names(globals())
--- a/lib/cretonne/meta/base/immediates.py
+++ b/lib/cretonne/meta/base/immediates.py
@@ -0,0 +1,111 @@
 """
 The `cretonne.immediates` module predefines all the Cretonne immediate operand
 types.
 """
 from __future__ import absolute_import
 from cdsl.operands import ImmediateKind
 #: A 64-bit immediate integer operand.
 #:
 #: This type of immediate integer can interact with SSA values with any
 #: :py:class:`cretonne.IntType` type.
 imm64 = ImmediateKind('imm64', 'A 64-bit immediate integer.')
 #: An unsigned 8-bit immediate integer operand.
 #:
 #: This small operand is used to indicate lane indexes in SIMD vectors and
 #: immediate bit counts on shift instructions.
 uimm8 = ImmediateKind('uimm8', 'An 8-bit immediate unsigned integer.')
 #: A 32-bit immediate signed offset.
 #:
 #: This is used to represent an immediate address offset in load/store
 #: instructions.
 offset32 = ImmediateKind(
        'offset32',
        'A 32-bit immediate signed offset.',
        default_member='offset')
 #: A 32-bit immediate unsigned offset.
 #:
 #: This is used to represent an immediate address offset in WebAssembly memory
 #: instructions.
 uoffset32 = ImmediateKind(
        'uoffset32',
        'A 32-bit immediate unsigned offset.',
        default_member='offset')
 #: A 32-bit immediate floating point operand.
 #:
 #: IEEE 754-2008 binary32 interchange format.
 ieee32 = ImmediateKind('ieee32', 'A 32-bit immediate floating point number.')
 #: A 64-bit immediate floating point operand.
 #:
 #: IEEE 754-2008 binary64 interchange format.
 ieee64 = ImmediateKind('ieee64', 'A 64-bit immediate floating point number.')
 #: An immediate boolean operand.
 #:
 #: This type of immediate boolean can interact with SSA values with any
 #: :py:class:`cretonne.BoolType` type.
 boolean = ImmediateKind('bool', 'An immediate boolean.',
                        rust_type='bool')
 #: A condition code for comparing integer values.
 #:
 #: This enumerated operand kind is used for the :cton:inst:`icmp` instruction
 #: and corresponds to the `condcodes::IntCC` Rust type.
 intcc = ImmediateKind(
        'intcc',
        'An integer comparison condition code.',
        default_member='cond', rust_type='IntCC',
        values={
            'eq':  'Equal',
            'ne':  'NotEqual',
            'sge': 'SignedGreaterThanOrEqual',
            'sgt': 'SignedGreaterThan',
            'sle': 'SignedLessThanOrEqual',
            'slt': 'SignedLessThan',
            'uge': 'UnsignedGreaterThanOrEqual',
            'ugt': 'UnsignedGreaterThan',
            'ule': 'UnsignedLessThanOrEqual',
            'ult': 'UnsignedLessThan',
        })
 #: A condition code for comparing floating point values.
 #:
 #: This enumerated operand kind is used for the :cton:inst:`fcmp` instruction
 #: and corresponds to the `condcodes::FloatCC` Rust type.
 floatcc = ImmediateKind(
        'floatcc',
        'A floating point comparison condition code.',
        default_member='cond', rust_type='FloatCC',
        values={
            'ord': 'Ordered',
            'uno': 'Unordered',
            'eq':  'Equal',
            'ne':  'NotEqual',
            'one': 'OrderedNotEqual',
            'ueq': 'UnorderedOrEqual',
            'lt':  'LessThan',
            'le':  'LessThanOrEqual',
            'gt':  'GreaterThan',
            'ge':  'GreaterThanOrEqual',
            'ult': 'UnorderedOrLessThan',
            'ule': 'UnorderedOrLessThanOrEqual',
            'ugt': 'UnorderedOrGreaterThan',
            'uge': 'UnorderedOrGreaterThanOrEqual',
        })
 #: Flags for memory operations like :cton:inst:`load` and :cton:inst:`store`.
 memflags = ImmediateKind(
        'memflags',
        'Memory operation flags',
        default_member='flags', rust_type='MemFlags')
 #: A register unit in the current target ISA.
 regunit = ImmediateKind(
        'regunit',
        'A register unit in the target ISA',
        rust_type='RegUnit')
--- a/lib/cretonne/meta/base/instructions.py
+++ b/lib/cretonne/meta/base/instructions.py
--- a/lib/cretonne/meta/base/legalize.py
+++ b/lib/cretonne/meta/base/legalize.py
@@ -0,0 +1,189 @@
 """
 Patterns for legalizing the `base` instruction set.
 The base Cretonne instruction set is 'fat', and many instructions don't have
 legal representations in a given target ISA. This module defines legalization
 patterns that describe how base instructions can be transformed to other base
 instructions that are legal.
 """
 from __future__ import absolute_import
 from .immediates import intcc
 from .instructions import iadd, iadd_cout, iadd_cin, iadd_carry, iadd_imm
 from .instructions import isub, isub_bin, isub_bout, isub_borrow
 from .instructions import band, bor, bxor, isplit, iconcat
 from .instructions import bnot, band_not, bor_not, bxor_not
 from .instructions import icmp, icmp_imm
 from .instructions import iconst, bint
 from .instructions import ishl, ishl_imm, sshr, sshr_imm, ushr, ushr_imm
 from .instructions import rotl, rotl_imm, rotr, rotr_imm
 from cdsl.ast import Var
 from cdsl.xform import Rtl, XFormGroup
 narrow = XFormGroup('narrow', """
        Legalize instructions by narrowing.
        The transformations in the 'narrow' group work by expressing
        instructions in terms of smaller types. Operations on vector types are
        expressed in terms of vector types with fewer lanes, and integer
        operations are expressed in terms of smaller integer types.
        """)
 widen = XFormGroup('widen', """
        Legalize instructions by widening.
        The transformations in the 'widen' group work by expressing
        instructions in terms of larger types.
        """)
 expand = XFormGroup('expand', """
        Legalize instructions by expansion.
        Rewrite instructions in terms of other instructions, generally
        operating on the same types as the original instructions.
        """)
 x = Var('x')
 y = Var('y')
 a = Var('a')
 a1 = Var('a1')
 a2 = Var('a2')
 b = Var('b')
 b1 = Var('b1')
 b2 = Var('b2')
 b_in = Var('b_in')
 b_int = Var('b_int')
 c = Var('c')
 c1 = Var('c1')
 c2 = Var('c2')
 c_in = Var('c_in')
 c_int = Var('c_int')
 xl = Var('xl')
 xh = Var('xh')
 yl = Var('yl')
 yh = Var('yh')
 al = Var('al')
 ah = Var('ah')
 cc = Var('cc')
 narrow.legalize(
        a << iadd(x, y),
        Rtl(
            (xl, xh) << isplit(x),
            (yl, yh) << isplit(y),
            (al, c) << iadd_cout(xl, yl),
            ah << iadd_cin(xh, yh, c),
            a << iconcat(al, ah)
        ))
 narrow.legalize(
        a << isub(x, y),
        Rtl(
            (xl, xh) << isplit(x),
            (yl, yh) << isplit(y),
            (al, b) << isub_bout(xl, yl),
            ah << isub_bin(xh, yh, b),
            a << iconcat(al, ah)
        ))
 for bitop in [band, bor, bxor]:
    narrow.legalize(
            a << bitop(x, y),
            Rtl(
                (xl, xh) << isplit(x),
                (yl, yh) << isplit(y),
                al << bitop(xl, yl),
                ah << bitop(xh, yh),
                a << iconcat(al, ah)
            ))
 # Expand integer operations with carry for RISC architectures that don't have
 # the flags.
 expand.legalize(
        (a, c) << iadd_cout(x, y),
        Rtl(
            a << iadd(x, y),
            c << icmp(intcc.ult, a, x)
        ))
 expand.legalize(
        (a, b) << isub_bout(x, y),
        Rtl(
            a << isub(x, y),
            b << icmp(intcc.ugt, a, x)
        ))
 expand.legalize(
        a << iadd_cin(x, y, c),
        Rtl(
            a1 << iadd(x, y),
            c_int << bint(c),
            a << iadd(a1, c_int)
        ))
 expand.legalize(
        a << isub_bin(x, y, b),
        Rtl(
            a1 << isub(x, y),
            b_int << bint(b),
            a << isub(a1, b_int)
        ))
 expand.legalize(
        (a, c) << iadd_carry(x, y, c_in),
        Rtl(
            (a1, c1) << iadd_cout(x, y),
            c_int << bint(c_in),
            (a, c2) << iadd_cout(a1, c_int),
            c << bor(c1, c2)
        ))
 expand.legalize(
        (a, b) << isub_borrow(x, y, b_in),
        Rtl(
            (a1, b1) << isub_bout(x, y),
            b_int << bint(b_in),
            (a, b2) << isub_bout(a1, b_int),
            b << bor(b1, b2)
        ))
 # Expansions for immediate operands that are out of range.
 expand.legalize(
        a << iadd_imm(x, y),
        Rtl(
            a1 << iconst(y),
            a << iadd(x, a1)
        ))
 # Rotates and shifts.
 for inst_imm,      inst in [
        (rotl_imm, rotl),
        (rotr_imm, rotr),
        (ishl_imm, ishl),
        (sshr_imm, sshr),
        (ushr_imm, ushr)]:
    expand.legalize(
            a << inst_imm(x, y),
            Rtl(
                a1 << iconst.i32(y),
                a << inst(x, a1)
            ))
 expand.legalize(
        a << icmp_imm(cc, x, y),
        Rtl(
            a1 << iconst(y),
            a << icmp(cc, x, a1)
        ))
 # Expansions for *_not variants of bitwise ops.
 for inst_not,      inst in [
        (band_not, band),
        (bor_not,  bor),
        (bxor_not, bxor)]:
    expand.legalize(
            a << inst_not(x, y),
            Rtl(
                a1 << bnot(y),
                a << inst(x, a1)
            ))
--- a/lib/cretonne/meta/base/semantics.py
+++ b/lib/cretonne/meta/base/semantics.py
@@ -0,0 +1,181 @@
 from __future__ import absolute_import
 from semantics.primitives import prim_to_bv, prim_from_bv, bvsplit, bvconcat,\
    bvadd, bvult, bvzeroext, bvsignext
 from .instructions import vsplit, vconcat, iadd, iadd_cout, icmp, bextend, \
    isplit, iconcat, iadd_cin, iadd_carry
 from .immediates import intcc
 from cdsl.xform import Rtl
 from cdsl.ast import Var
 from cdsl.typevar import TypeSet
 from cdsl.ti import InTypeset
 x = Var('x')
 y = Var('y')
 a = Var('a')
 b = Var('b')
 c_out = Var('c_out')
 c_in = Var('c_in')
 bvc_out = Var('bvc_out')
 bvc_in = Var('bvc_in')
 xhi = Var('xhi')
 yhi = Var('yhi')
 ahi = Var('ahi')
 bhi = Var('bhi')
 xlo = Var('xlo')
 ylo = Var('ylo')
 alo = Var('alo')
 blo = Var('blo')
 lo = Var('lo')
 hi = Var('hi')
 bvx = Var('bvx')
 bvy = Var('bvy')
 bva = Var('bva')
 bvt = Var('bvt')
 bvs = Var('bvs')
 bva_wide = Var('bva_wide')
 bvlo = Var('bvlo')
 bvhi = Var('bvhi')
 ScalarTS = TypeSet(lanes=(1, 1), ints=True, floats=True, bools=True)
 vsplit.set_semantics(
    (lo, hi) << vsplit(x),
    Rtl(
        bvx << prim_to_bv(x),
        (bvlo, bvhi) << bvsplit(bvx),
        lo << prim_from_bv(bvlo),
        hi << prim_from_bv(bvhi)
    ))
 vconcat.set_semantics(
    x << vconcat(lo, hi),
    Rtl(
        bvlo << prim_to_bv(lo),
        bvhi << prim_to_bv(hi),
        bvx << bvconcat(bvlo, bvhi),
        x << prim_from_bv(bvx)
    ))
 iadd.set_semantics(
    a << iadd(x, y),
    (Rtl(
        bvx << prim_to_bv(x),
        bvy << prim_to_bv(y),
        bva << bvadd(bvx, bvy),
        a << prim_from_bv(bva)
    ), [InTypeset(x.get_typevar(), ScalarTS)]),
    Rtl(
        (xlo, xhi) << vsplit(x),
        (ylo, yhi) << vsplit(y),
        alo << iadd(xlo, ylo),
        ahi << iadd(xhi, yhi),
        a << vconcat(alo, ahi)
    ))
 #
 # Integer arithmetic with carry and/or borrow.
 #
 iadd_cin.set_semantics(
    a << iadd_cin(x, y, c_in),
    Rtl(
        bvx << prim_to_bv(x),
        bvy << prim_to_bv(y),
        bvc_in << prim_to_bv(c_in),
        bvs << bvzeroext(bvc_in),
        bvt << bvadd(bvx, bvy),
        bva << bvadd(bvt, bvs),
        a << prim_from_bv(bva)
    ))
 iadd_cout.set_semantics(
    (a, c_out) << iadd_cout(x, y),
    Rtl(
        bvx << prim_to_bv(x),
        bvy << prim_to_bv(y),
        bva << bvadd(bvx, bvy),
        bvc_out << bvult(bva, bvx),
        a << prim_from_bv(bva),
        c_out << prim_from_bv(bvc_out)
    ))
 iadd_carry.set_semantics(
    (a, c_out) << iadd_carry(x, y, c_in),
    Rtl(
        bvx << prim_to_bv(x),
        bvy << prim_to_bv(y),
        bvc_in << prim_to_bv(c_in),
        bvs << bvzeroext(bvc_in),
        bvt << bvadd(bvx, bvy),
        bva << bvadd(bvt, bvs),
        bvc_out << bvult(bva, bvx),
        a << prim_from_bv(bva),
        c_out << prim_from_bv(bvc_out)
    ))
 bextend.set_semantics(
    a << bextend(x),
    (Rtl(
        bvx << prim_to_bv(x),
        bvy << bvsignext(bvx),
        a << prim_from_bv(bvy)
    ), [InTypeset(x.get_typevar(), ScalarTS)]),
    Rtl(
        (xlo, xhi) << vsplit(x),
        alo << bextend(xlo),
        ahi << bextend(xhi),
        a << vconcat(alo, ahi)
    ))
 icmp.set_semantics(
    a << icmp(intcc.ult, x, y),
    (Rtl(
        bvx << prim_to_bv(x),
        bvy << prim_to_bv(y),
        bva << bvult(bvx, bvy),
        bva_wide << bvzeroext(bva),
        a << prim_from_bv(bva_wide),
    ), [InTypeset(x.get_typevar(), ScalarTS)]),
    Rtl(
        (xlo, xhi) << vsplit(x),
        (ylo, yhi) << vsplit(y),
        alo << icmp(intcc.ult, xlo, ylo),
        ahi << icmp(intcc.ult, xhi, yhi),
        b << vconcat(alo, ahi),
        a << bextend(b)
    ))
 #
 # Legalization helper instructions.
 #
 isplit.set_semantics(
    (xlo, xhi) << isplit(x),
    (Rtl(
        bvx << prim_to_bv(x),
        (bvlo, bvhi) << bvsplit(bvx),
        xlo << prim_from_bv(bvlo),
        xhi << prim_from_bv(bvhi)
    ), [InTypeset(x.get_typevar(), ScalarTS)]),
    Rtl(
        (a, b) << vsplit(x),
        (alo, ahi) << isplit(a),
        (blo, bhi) << isplit(b),
        xlo << vconcat(alo, blo),
        xhi << vconcat(bhi, bhi)
    ))
 iconcat.set_semantics(
    x << iconcat(xlo, xhi),
    (Rtl(
        bvlo << prim_to_bv(xlo),
        bvhi << prim_to_bv(xhi),
        bvx << bvconcat(bvlo, bvhi),
        x << prim_from_bv(bvx)
    ), [InTypeset(x.get_typevar(), ScalarTS)]),
    Rtl(
        (alo, ahi) << vsplit(xlo),
        (blo, bhi) << vsplit(xhi),
        a << iconcat(alo, blo),
        b << iconcat(ahi, bhi),
        x << vconcat(a, b),
    ))
--- a/lib/cretonne/meta/base/settings.py
+++ b/lib/cretonne/meta/base/settings.py
@@ -0,0 +1,45 @@
 """
 Cretonne shared settings.
 This module defines settings are are relevant for all code generators.
 """
 from __future__ import absolute_import
 from cdsl.settings import SettingGroup, BoolSetting, EnumSetting
 group = SettingGroup('shared')
 opt_level = EnumSetting(
        """
        Optimization level:
        - default: Very profitable optimizations enabled, none slow.
        - best: Enable all optimizations
        - fastest: Optimize for compile time by disabling most optimizations.
        """,
        'default', 'best', 'fastest')
 enable_verifier = BoolSetting(
        """
        Run the Cretonne IL verifier at strategic times during compilation.
        This makes compilation slower but catches many bugs. The verifier is
        disabled by default, except when reading Cretonne IL from a text file.
        """)
 is_64bit = BoolSetting("Enable 64-bit code generation")
 is_compressed = BoolSetting("Enable compressed instructions")
 enable_float = BoolSetting(
        """Enable the use of floating-point instructions""",
        default=True)
 enable_simd = BoolSetting(
        """Enable the use of SIMD instructions.""",
        default=True)
 enable_atomics = BoolSetting(
        """Enable the use of atomic instructions""",
        default=True)
 group.close(globals())
--- a/lib/cretonne/meta/base/types.py
+++ b/lib/cretonne/meta/base/types.py
@@ -0,0 +1,33 @@
 """
 The base.types module predefines all the Cretonne scalar types.
 """
 from __future__ import absolute_import
 from cdsl.types import IntType, FloatType, BoolType
 #: Boolean.
 b1 = BoolType(1)    #: 1-bit bool. Type is abstract (can't be stored in mem)
 b8 = BoolType(8)    #: 8-bit bool.
 b16 = BoolType(16)  #: 16-bit bool.
 b32 = BoolType(32)  #: 32-bit bool.
 b64 = BoolType(64)  #: 64-bit bool.
 i8 = IntType(8)     #: 8-bit int.
 i16 = IntType(16)   #: 16-bit int.
 i32 = IntType(32)   #: 32-bit int.
 i64 = IntType(64)   #: 64-bit int.
 #: IEEE single precision.
 f32 = FloatType(
        32, """
        A 32-bit floating point type represented in the IEEE 754-2008
        *binary32* interchange format. This corresponds to the :c:type:`float`
        type in most C implementations.
        """)
 #: IEEE double precision.
 f64 = FloatType(
        64, """
        A 64-bit floating point type represented in the IEEE 754-2008
        *binary64* interchange format. This corresponds to the :c:type:`double`
        type in most C implementations.
        """)
--- a/lib/cretonne/meta/build.py
+++ b/lib/cretonne/meta/build.py
@@ -0,0 +1,32 @@
 # Second-level build script.
 #
 # This script is run from lib/cretonne/build.rs to generate Rust files.
 from __future__ import absolute_import
 import argparse
 import isa
 import gen_types
 import gen_instr
 import gen_settings
 import gen_build_deps
 import gen_encoding
 import gen_legalizer
 import gen_registers
 import gen_binemit
 parser = argparse.ArgumentParser(description='Generate sources for Cretonne.')
 parser.add_argument('--out-dir', help='set output directory')
 args = parser.parse_args()
 out_dir = args.out_dir
 isas = isa.all_isas()
 gen_types.generate(out_dir)
 gen_instr.generate(isas, out_dir)
 gen_settings.generate(isas, out_dir)
 gen_encoding.generate(isas, out_dir)
 gen_legalizer.generate(isas, out_dir)
 gen_registers.generate(isas, out_dir)
 gen_binemit.generate(isas, out_dir)
 gen_build_deps.generate()
--- a/lib/cretonne/meta/cdsl/init.py
+++ b/lib/cretonne/meta/cdsl/init.py
@@ -0,0 +1,59 @@
 """
 Cretonne DSL classes.
 This module defines the classes that are used to define Cretonne instructions
 and other entitties.
 """
 from __future__ import absolute_import
 import re
 camel_re = re.compile('(^|_)([a-z])')
 def camel_case(s):
    # type: (str) -> str
    """Convert the string s to CamelCase:
        >>> camel_case('x')
        'X'
        >>> camel_case('camel_case')
        'CamelCase'
    """
    return camel_re.sub(lambda m: m.group(2).upper(), s)
 def is_power_of_two(x):
    # type: (int) -> bool
    """Check if `x` is a power of two:
        >>> is_power_of_two(0)
        False
        >>> is_power_of_two(1)
        True
        >>> is_power_of_two(2)
        True
        >>> is_power_of_two(3)
        False
    """
    return x > 0 and x & (x-1) == 0
 def next_power_of_two(x):
    # type: (int) -> int
    """
    Compute the next power of two that is greater than `x`:
        >>> next_power_of_two(0)
        1
        >>> next_power_of_two(1)
        2
        >>> next_power_of_two(2)
        4
        >>> next_power_of_two(3)
        4
        >>> next_power_of_two(4)
        8
    """
    s = 1
    while x & (x + 1) != 0:
        x |= x >> s
        s *= 2
    return x + 1
--- a/lib/cretonne/meta/cdsl/ast.py
+++ b/lib/cretonne/meta/cdsl/ast.py
@@ -0,0 +1,501 @@
 """
 Abstract syntax trees.
 This module defines classes that can be used to create abstract syntax trees
 for patern matching an rewriting of cretonne instructions.
 """
 from __future__ import absolute_import
 from . import instructions
 from .typevar import TypeVar
 from .predicates import IsEqual, And, TypePredicate
 try:
    from typing import Union, Tuple, Sequence, TYPE_CHECKING, Dict, List  # noqa
    from typing import Optional, Set # noqa
    if TYPE_CHECKING:
        from .operands import ImmediateKind  # noqa
        from .predicates import PredNode  # noqa
        VarMap = Dict["Var", "Var"]
 except ImportError:
    pass
 def replace_var(arg, m):
    # type: (Expr, VarMap) -> Expr
    """
    Given a var v return either m[v] or a new variable v' (and remember
    m[v]=v'). Otherwise return the argument unchanged
    """
    if isinstance(arg, Var):
        new_arg = m.get(arg, Var(arg.name))  # type: Var
        m[arg] = new_arg
        return new_arg
    return arg
 class Def(object):
    """
    An AST definition associates a set of variables with the values produced by
    an expression.
    Example:
    >>> from base.instructions import iadd_cout, iconst
    >>> x = Var('x')
    >>> y = Var('y')
    >>> x << iconst(4)
    (Var(x),) << Apply(iconst, (4,))
    >>> (x, y) << iadd_cout(4, 5)
    (Var(x), Var(y)) << Apply(iadd_cout, (4, 5))
    The `<<` operator is used to create variable definitions.
    :param defs: Single variable or tuple of variables to be defined.
    :param expr: Expression generating the values.
    """
    def __init__(self, defs, expr):
        # type: (Union[Var, Tuple[Var, ...]], Apply) -> None
        if not isinstance(defs, tuple):
            self.defs = (defs,)  # type: Tuple[Var, ...]
        else:
            self.defs = defs
        assert isinstance(expr, Apply)
        self.expr = expr
    def __repr__(self):
        # type: () -> str
        return "{} << {!r}".format(self.defs, self.expr)
    def __str__(self):
        # type: () -> str
        if len(self.defs) == 1:
            return "{!s} << {!s}".format(self.defs[0], self.expr)
        else:
            return "({}) << {!s}".format(
                    ', '.join(map(str, self.defs)), self.expr)
    def copy(self, m):
        # type: (VarMap) -> Def
        """
        Return a copy of this Def with vars replaced with fresh variables,
        in accordance with the map m. Update m as neccessary.
        """
        new_expr = self.expr.copy(m)
        new_defs = []  # type: List[Var]
        for v in self.defs:
            new_v = replace_var(v, m)
            assert(isinstance(new_v, Var))
            new_defs.append(new_v)
        return Def(tuple(new_defs), new_expr)
    def definitions(self):
        # type: () -> Set[Var]
        """ Return the set of all Vars that are defined by self"""
        return set(self.defs)
    def uses(self):
        # type: () -> Set[Var]
        """ Return the set of all Vars that are used(read) by self"""
        return set(self.expr.vars())
    def vars(self):
        # type: () -> Set[Var]
        """Return the set of all Vars in self that correspond to SSA values"""
        return self.definitions().union(self.uses())
    def substitution(self, other, s):
        # type: (Def, VarMap) -> Optional[VarMap]
        """
        If the Defs self and other agree structurally, return a variable
        substitution to transform self to other. Otherwise return None. Two
        Defs agree structurally if there exists a Var substitution, that can
        transform one into the other. See Apply.substitution() for more
        details.
        """
        s = self.expr.substitution(other.expr, s)
        if (s is None):
            return s
        assert len(self.defs) == len(other.defs)
        for (self_d, other_d) in zip(self.defs, other.defs):
            assert self_d not in s  # Guaranteed by SSA form
            s[self_d] = other_d
        return s
 class Expr(object):
    """
    An AST expression.
    """
 class Var(Expr):
    """
    A free variable.
    When variables are used in `XForms` with source and destination patterns,
    they are classified as follows:
    Input values
        Uses in the source pattern with no preceding def. These may appear as
        inputs in the destination pattern too, but no new inputs can be
        introduced.
    Output values
        Variables that are defined in both the source and destination pattern.
        These values may have uses outside the source pattern, and the
        destination pattern must compute the same value.
    Intermediate values
        Values that are defined in the source pattern, but not in the
        destination pattern. These may have uses outside the source pattern, so
        the defining instruction can't be deleted immediately.
    Temporary values
        Values that are defined only in the destination pattern.
    """
    def __init__(self, name, typevar=None):
        # type: (str, TypeVar) -> None
        self.name = name
        # The `Def` defining this variable in a source pattern.
        self.src_def = None  # type: Def
        # The `Def` defining this variable in a destination pattern.
        self.dst_def = None  # type: Def
        # TypeVar representing the type of this variable.
        self.typevar = typevar  # type: TypeVar
        # The original 'typeof(x)' type variable that was created for this Var.
        # This one doesn't change. `self.typevar` above may be changed to
        # another typevar by type inference.
        self.original_typevar = self.typevar  # type: TypeVar
    def __str__(self):
        # type: () -> str
        return self.name
    def __repr__(self):
        # type: () -> str
        s = self.name
        if self.src_def:
            s += ", src"
        if self.dst_def:
            s += ", dst"
        return "Var({})".format(s)
    # Context bits for `set_def` indicating which pattern has defines of this
    # var.
    SRCCTX = 1
    DSTCTX = 2
    def set_def(self, context, d):
        # type: (int, Def) -> None
        """
        Set the `Def` that defines this variable in the given context.
        The `context` must be one of `SRCCTX` or `DSTCTX`
        """
        if context == self.SRCCTX:
            self.src_def = d
        else:
            self.dst_def = d
    def get_def(self, context):
        # type: (int) -> Def
        """
        Get the def of this variable in context.
        The `context` must be one of `SRCCTX` or `DSTCTX`
        """
        if context == self.SRCCTX:
            return self.src_def
        else:
            return self.dst_def
    def is_input(self):
        # type: () -> bool
        """Is this an input value to the src pattern?"""
        return self.src_def is None and self.dst_def is None
    def is_output(self):
        # type: () -> bool
        """Is this an output value, defined in both src and dst patterns?"""
        return self.src_def is not None and self.dst_def is not None
    def is_intermediate(self):
        # type: () -> bool
        """Is this an intermediate value, defined only in the src pattern?"""
        return self.src_def is not None and self.dst_def is None
    def is_temp(self):
        # type: () -> bool
        """Is this a temp value, defined only in the dst pattern?"""
        return self.src_def is None and self.dst_def is not None
    def get_typevar(self):
        # type: () -> TypeVar
        """Get the type variable representing the type of this variable."""
        if not self.typevar:
            # Create a TypeVar allowing all types.
            tv = TypeVar(
                    'typeof_{}'.format(self),
                    'Type of the pattern variable `{}`'.format(self),
                    ints=True, floats=True, bools=True,
                    scalars=True, simd=True, bitvecs=True)
            self.original_typevar = tv
            self.typevar = tv
        return self.typevar
    def set_typevar(self, tv):
        # type: (TypeVar) -> None
        self.typevar = tv
    def has_free_typevar(self):
        # type: () -> bool
        """
        Check if this variable has a free type variable.
        If not, the type of this variable is computed from the type of another
        variable.
        """
        if not self.typevar or self.typevar.is_derived:
            return False
        return self.typevar is self.original_typevar
    def rust_type(self):
        # type: () -> str
        """
        Get a Rust expression that computes the type of this variable.
        It is assumed that local variables exist corresponding to the free type
        variables.
        """
        return self.typevar.rust_expr()
 class Apply(Expr):
    """
    Apply an instruction to arguments.
    An `Apply` AST expression is created by using function call syntax on
    instructions. This applies to both bound and unbound polymorphic
    instructions:
    >>> from base.instructions import jump, iadd
    >>> jump('next', ())
    Apply(jump, ('next', ()))
    >>> iadd.i32('x', 'y')
    Apply(iadd.i32, ('x', 'y'))
    :param inst: The instruction being applied, an `Instruction` or
                 `BoundInstruction` instance.
    :param args: Tuple of arguments.
    """
    def __init__(self, inst, args):
        # type: (instructions.MaybeBoundInst, Tuple[Expr, ...]) -> None  # noqa
        if isinstance(inst, instructions.BoundInstruction):
            self.inst = inst.inst
            self.typevars = inst.typevars
        else:
            assert isinstance(inst, instructions.Instruction)
            self.inst = inst
            self.typevars = ()
        self.args = args
        assert len(self.inst.ins) == len(args)
    def __rlshift__(self, other):
        # type: (Union[Var, Tuple[Var, ...]]) -> Def
        """
        Define variables using `var << expr` or `(v1, v2) << expr`.
        """
        return Def(other, self)
    def instname(self):
        # type: () -> str
        i = self.inst.name
        for t in self.typevars:
            i += '.{}'.format(t)
        return i
    def __repr__(self):
        # type: () -> str
        return "Apply({}, {})".format(self.instname(), self.args)
    def __str__(self):
        # type: () -> str
        args = ', '.join(map(str, self.args))
        return '{}({})'.format(self.instname(), args)
    def rust_builder(self, defs=None):
        # type: (Sequence[Var]) -> str
        """
        Return a Rust Builder method call for instantiating this instruction
        application.
        The `defs` argument should be a list of variables defined by this
        instruction. It is used to construct a result type if necessary.
        """
        args = ', '.join(map(str, self.args))
        # Do we need to pass an explicit type argument?
        if self.inst.is_polymorphic and not self.inst.use_typevar_operand:
            args = defs[0].rust_type() + ', ' + args
        method = self.inst.snake_name()
        return '{}({})'.format(method, args)
    def inst_predicate(self):
        # type: () -> PredNode
        """
        Construct an instruction predicate that verifies the immediate operands
        on this instruction.
        Immediate operands in a source pattern can be either free variables or
        constants like `ConstantInt` and `Enumerator`. We don't currently
        support constraints on free variables, but we may in the future.
        """
        pred = None  # type: PredNode
        iform = self.inst.format
        # Examine all of the immediate operands.
        for ffield, opnum in zip(iform.imm_fields, self.inst.imm_opnums):
            arg = self.args[opnum]
            # Ignore free variables for now. We may add variable predicates
            # later.
            if isinstance(arg, Var):
                continue
            pred = And.combine(pred, IsEqual(ffield, arg))
        # Add checks for any bound type variables.
        for bound_ty, tv in zip(self.typevars, self.inst.all_typevars()):
            if bound_ty is None:
                continue
            type_chk = TypePredicate.typevar_check(self.inst, tv, bound_ty)
            pred = And.combine(pred, type_chk)
        return pred
    def copy(self, m):
        # type: (VarMap) -> Apply
        """
        Return a copy of this Expr with vars replaced with fresh variables,
        in accordance with the map m. Update m as neccessary.
        """
        return Apply(self.inst, tuple(map(lambda e: replace_var(e, m),
                                          self.args)))
    def vars(self):
        # type: () -> Set[Var]
        """Return the set of all Vars in self that correspond to SSA values"""
        res = set()
        for i in self.inst.value_opnums:
            arg = self.args[i]
            assert isinstance(arg, Var)
            res.add(arg)
        return res
    def substitution(self, other, s):
        # type: (Apply, VarMap) -> Optional[VarMap]
        """
        If the application self and other agree structurally, return a variable
        substitution to transform self to other. Otherwise return None. Two
        applications agree structurally if:
            1) They are over the same instruction
            2) Every Var v in self, maps to a single Var w in other. I.e for
               each use of v in self, w is used in the corresponding place in
               other.
        """
        if self.inst != other.inst:
            return None
        # Guaranteed by self.inst == other.inst
        assert (len(self.args) == len(other.args))
        for (self_a, other_a) in zip(self.args, other.args):
            if (isinstance(self_a, Var)):
                if not isinstance(other_a, Var):
                    return None
                if (self_a not in s):
                    s[self_a] = other_a
                else:
                    if (s[self_a] != other_a):
                        return None
            elif isinstance(self_a, ConstantInt):
                if not isinstance(other_a, ConstantInt):
                    return None
                assert self_a.kind == other_a.kind
                if (self_a.value != other_a.value):
                    return None
            else:
                assert isinstance(self_a, Enumerator)
                if not isinstance(other_a, Enumerator):
                    # Currently don't support substitutions Var->Enumerator
                    return None
                # Guaranteed by self.inst == other.inst
                assert self_a.kind == other_a.kind
                if (self_a.value != other_a.value):
                    return None
        return s
 class ConstantInt(Expr):
    """
    A value of an integer immediate operand.
    Immediate operands like `imm64` or `offset32` can be specified in AST
    expressions using the call syntax: `imm64(5)` which greates a `ConstantInt`
    node.
    """
    def __init__(self, kind, value):
        # type: (ImmediateKind, int) -> None
        self.kind = kind
        self.value = value
    def __str__(self):
        # type: () -> str
        """
        Get the Rust expression form of this constant.
        """
        return str(self.value)
    def __repr__(self):
        # type: () -> str
        return '{}({})'.format(self.kind, self.value)
 class Enumerator(Expr):
    """
    A value of an enumerated immediate operand.
    Some immediate operand kinds like `intcc` and `floatcc` have an enumerated
    range of values corresponding to a Rust enum type. An `Enumerator` object
    is an AST leaf node representing one of the values.
    :param kind: The enumerated `ImmediateKind` containing the value.
    :param value: The textual IL representation of the value.
    `Enumerator` nodes are not usually created directly. They are created by
    using the dot syntax on immediate kinds: `intcc.ult`.
    """
    def __init__(self, kind, value):
        # type: (ImmediateKind, str) -> None
        self.kind = kind
        self.value = value
    def __str__(self):
        # type: () -> str
        """
        Get the Rust expression form of this enumerator.
        """
        return self.kind.rust_enumerator(self.value)
    def __repr__(self):
        # type: () -> str
        return '{}.{}'.format(self.kind, self.value)
--- a/lib/cretonne/meta/cdsl/formats.py
+++ b/lib/cretonne/meta/cdsl/formats.py
@@ -0,0 +1,232 @@
 """Classes for describing instruction formats."""
 from __future__ import absolute_import
 from .operands import OperandKind, VALUE, VARIABLE_ARGS
 from .operands import Operand  # noqa
 # The typing module is only required by mypy, and we don't use these imports
 # outside type comments.
 try:
    from typing import Dict, List, Tuple, Union, Any, Sequence, Iterable  # noqa
 except ImportError:
    pass
 class InstructionContext(object):
    """
    Most instruction predicates refer to immediate fields of a specific
    instruction format, so their `predicate_context()` method returns the
    specific instruction format.
    Predicates that only care about the types of SSA values are independent of
    the instruction format. They can be evaluated in the context of any
    instruction.
    The singleton `InstructionContext` class serves as the predicate context
    for these predicates.
    """
    def __init__(self):
        # type: () -> None
        self.name = 'inst'
 # Singleton instance.
 instruction_context = InstructionContext()
 class InstructionFormat(object):
    """
    Every instruction opcode has a corresponding instruction format which
    determines the number of operands and their kinds. Instruction formats are
    identified structurally, i.e., the format of an instruction is derived from
    the kinds of operands used in its declaration.
    The instruction format stores two separate lists of operands: Immediates
    and values. Immediate operands (including entity references) are
    represented as explicit members in the `InstructionData` variants. The
    value operands are stored differently, depending on how many there are.
    Beyond a certain point, instruction formats switch to an external value
    list for storing value arguments. Value lists can hold an arbitrary number
    of values.
    All instruction formats must be predefined in the
    :py:mod:`cretonne.formats` module.
    :param kinds: List of `OperandKind` objects describing the operands.
    :param name: Instruction format name in CamelCase. This is used as a Rust
        variant name in both the `InstructionData` and `InstructionFormat`
        enums.
    :param typevar_operand: Index of the value input operand that is used to
        infer the controlling type variable. By default, this is `0`, the first
        `value` operand. The index is relative to the values only, ignoring
        immediate operands.
    """
    # Map (imm_kinds, num_value_operands) -> format
    _registry = dict()  # type: Dict[Tuple[Tuple[OperandKind, ...], int, bool], InstructionFormat]  # noqa
    # All existing formats.
    all_formats = list()  # type: List[InstructionFormat]
    def __init__(self, *kinds, **kwargs):
        # type: (*Union[OperandKind, Tuple[str, OperandKind]], **Any) -> None # noqa
        self.name = kwargs.get('name', None)  # type: str
        self.parent = instruction_context
        # The number of value operands stored in the format, or `None` when
        # `has_value_list` is set.
        self.num_value_operands = 0
        # Does this format use a value list for storing value operands?
        self.has_value_list = False
        # Operand fields for the immediate operands. All other instruction
        # operands are values or variable argument lists. They are all handled
        # specially.
        self.imm_fields = tuple(self._process_member_names(kinds))
        # The typevar_operand argument must point to a 'value' operand.
        self.typevar_operand = kwargs.get('typevar_operand', None)  # type: int
        if self.typevar_operand is not None:
            if not self.has_value_list:
                assert self.typevar_operand < self.num_value_operands, \
                        "typevar_operand must indicate a 'value' operand"
        elif self.has_value_list or self.num_value_operands > 0:
            # Default to the first 'value' operand, if there is one.
            self.typevar_operand = 0
        # Compute a signature for the global registry.
        imm_kinds = tuple(f.kind for f in self.imm_fields)
        sig = (imm_kinds, self.num_value_operands, self.has_value_list)
        if sig in InstructionFormat._registry:
            raise RuntimeError(
                "Format '{}' has the same signature as existing format '{}'"
                .format(self.name, InstructionFormat._registry[sig]))
        InstructionFormat._registry[sig] = self
        InstructionFormat.all_formats.append(self)
    def _process_member_names(self, kinds):
        # type: (Sequence[Union[OperandKind, Tuple[str, OperandKind]]]) -> Iterable[FormatField]  # noqa
        """
        Extract names of all the immediate operands in the kinds tuple.
        Each entry is either an `OperandKind` instance, or a `(member, kind)`
        pair. The member names correspond to members in the Rust
        `InstructionData` data structure.
        Updates the fields `self.num_value_operands` and `self.has_value_list`.
        Yields the immediate operand fields.
        """
        inum = 0
        for arg in kinds:
            if isinstance(arg, OperandKind):
                member = arg.default_member
                k = arg
            else:
                member, k = arg
            # We define 'immediate' as not a value or variable arguments.
            if k is VALUE:
                self.num_value_operands += 1
            elif k is VARIABLE_ARGS:
                self.has_value_list = True
            else:
                yield FormatField(self, inum, k, member)
                inum += 1
    def __str__(self):
        # type: () -> str
        args = ', '.join(
                '{}: {}'.format(f.member, f.kind) for f in self.imm_fields)
        return '{}(imms=({}), vals={})'.format(
                self.name, args, self.num_value_operands)
    def __getattr__(self, attr):
        # type: (str) -> FormatField
        """
        Make immediate instruction format members available as attributes.
        Each non-value format member becomes a corresponding `FormatField`
        attribute.
        """
        for f in self.imm_fields:
            if f.member == attr:
                # Cache this field attribute so we won't have to search again.
                setattr(self, attr, f)
                return f
        raise AttributeError(
                '{} is neither a {} member or a '
                .format(attr, self.name) +
                'normal InstructionFormat attribute')
    @staticmethod
    def lookup(ins, outs):
        # type: (Sequence[Operand], Sequence[Operand]) -> InstructionFormat
        """
        Find an existing instruction format that matches the given lists of
        instruction inputs and outputs.
        The `ins` and `outs` arguments correspond to the
        :py:class:`Instruction` arguments of the same name, except they must be
        tuples of :py:`Operand` objects.
        """
        # Construct a signature.
        imm_kinds = tuple(op.kind for op in ins if op.is_immediate())
        num_values = sum(1 for op in ins if op.is_value())
        has_varargs = (VARIABLE_ARGS in tuple(op.kind for op in ins))
        sig = (imm_kinds, num_values, has_varargs)
        if sig in InstructionFormat._registry:
            return InstructionFormat._registry[sig]
        # Try another value list format as an alternative.
        sig = (imm_kinds, 0, True)
        if sig in InstructionFormat._registry:
            return InstructionFormat._registry[sig]
        raise RuntimeError(
                'No instruction format matches '
                'imms={}, vals={}, varargs={}'.format(
                    imm_kinds, num_values, has_varargs))
    @staticmethod
    def extract_names(globs):
        # type: (Dict[str, Any]) -> None
        """
        Given a dict mapping name -> object as returned by `globals()`, find
        all the InstructionFormat objects and set their name from the dict key.
        This is used to name a bunch of global variables in a module.
        """
        for name, obj in globs.items():
            if isinstance(obj, InstructionFormat):
                assert obj.name is None
                obj.name = name
 class FormatField(object):
    """
    An immediate field in an instruction format.
    This corresponds to a single member of a variant of the `InstructionData`
    data type.
    :param iformat: Parent `InstructionFormat`.
    :param immnum: Immediate operand number in parent.
    :param kind: Immediate Operand kind.
    :param member: Member name in `InstructionData` variant.
    """
    def __init__(self, iform, immnum, kind, member):
        # type: (InstructionFormat, int, OperandKind, str) -> None
        self.format = iform
        self.immnum = immnum
        self.kind = kind
        self.member = member
    def __str__(self):
        # type: () -> str
        return '{}.{}'.format(self.format.name, self.member)
    def rust_name(self):
        # type: () -> str
        return self.member
--- a/lib/cretonne/meta/cdsl/instructions.py
+++ b/lib/cretonne/meta/cdsl/instructions.py
@@ -0,0 +1,427 @@
 """Classes for defining instructions."""
 from __future__ import absolute_import
 from . import camel_case
 from .types import ValueType
 from .operands import Operand
 from .formats import InstructionFormat
 try:
    from typing import Union, Sequence, List, Tuple, Any, TYPE_CHECKING  # noqa
    from typing import Dict # noqa
    if TYPE_CHECKING:
        from .ast import Expr, Apply, Var, Def  # noqa
        from .typevar import TypeVar  # noqa
        from .ti import TypeConstraint  # noqa
        from .xform import XForm, Rtl
        # List of operands for ins/outs:
        OpList = Union[Sequence[Operand], Operand]
        ConstrList = Union[Sequence[TypeConstraint], TypeConstraint]
        MaybeBoundInst = Union['Instruction', 'BoundInstruction']
        InstructionSemantics = Sequence[XForm]
        RtlCase = Union[Rtl, Tuple[Rtl, Sequence[TypeConstraint]]]
 except ImportError:
    pass
 class InstructionGroup(object):
    """
    Every instruction must belong to exactly one instruction group. A given
    target architecture can support instructions from multiple groups, and it
    does not necessarily support all instructions in a group.
    New instructions are automatically added to the currently open instruction
    group.
    """
    # The currently open instruction group.
    _current = None  # type: InstructionGroup
    def open(self):
        # type: () -> None
        """
        Open this instruction group such that future new instructions are
        added to this group.
        """
        assert InstructionGroup._current is None, (
                "Can't open {} since {} is already open"
                .format(self, InstructionGroup._current))
        InstructionGroup._current = self
    def close(self):
        # type: () -> None
        """
        Close this instruction group. This function should be called before
        opening another instruction group.
        """
        assert InstructionGroup._current is self, (
                "Can't close {}, the open instuction group is {}"
                .format(self, InstructionGroup._current))
        InstructionGroup._current = None
    def __init__(self, name, doc):
        # type: (str, str) -> None
        self.name = name
        self.__doc__ = doc
        self.instructions = []  # type: List[Instruction]
        self.open()
    @staticmethod
    def append(inst):
        # type: (Instruction) -> None
        assert InstructionGroup._current, \
                "Open an instruction group before defining instructions."
        InstructionGroup._current.instructions.append(inst)
 class Instruction(object):
    """
    The operands to the instruction are specified as two tuples: ``ins`` and
    ``outs``. Since the Python singleton tuple syntax is a bit awkward, it is
    allowed to specify a singleton as just the operand itself, i.e., `ins=x`
    and `ins=(x,)` are both allowed and mean the same thing.
    :param name: Instruction mnemonic, also becomes opcode name.
    :param doc: Documentation string.
    :param ins: Tuple of input operands. This can be a mix of SSA value
                operands and other operand kinds.
    :param outs: Tuple of output operands. The output operands must be SSA
                values or `variable_args`.
    :param constraints: Tuple of instruction-specific TypeConstraints.
    :param is_terminator: This is a terminator instruction.
    :param is_branch: This is a branch instruction.
    :param is_call: This is a call instruction.
    :param is_return: This is a return instruction.
    :param can_trap: This instruction can trap.
    :param can_load: This instruction can load from memory.
    :param can_store: This instruction can store to memory.
    :param other_side_effects: Instruction has other side effects.
    """
    # Boolean instruction attributes that can be passed as keyword arguments to
    # the constructor. Map attribute name to doc comment for generated Rust
    # code.
    ATTRIBS = {
            'is_terminator': 'True for instructions that terminate the EBB.',
            'is_branch': 'True for all branch or jump instructions.',
            'is_call': 'Is this a call instruction?',
            'is_return': 'Is this a return instruction?',
            'can_load': 'Can this instruction read from memory?',
            'can_store': 'Can this instruction write to memory?',
            'can_trap': 'Can this instruction cause a trap?',
            'other_side_effects':
            'Does this instruction have other side effects besides can_*',
            }
    def __init__(self, name, doc, ins=(), outs=(), constraints=(), **kwargs):
        # type: (str, str, OpList, OpList, ConstrList, **Any) -> None
        self.name = name
        self.camel_name = camel_case(name)
        self.__doc__ = doc
        self.ins = self._to_operand_tuple(ins)
        self.outs = self._to_operand_tuple(outs)
        self.constraints = self._to_constraint_tuple(constraints)
        self.format = InstructionFormat.lookup(self.ins, self.outs)
        self.semantics = None  # type: InstructionSemantics
        # Opcode number, assigned by gen_instr.py.
        self.number = None  # type: int
        # Indexes into `self.outs` for value results.
        # Other results are `variable_args`.
        self.value_results = tuple(
                i for i, o in enumerate(self.outs) if o.is_value())
        # Indexes into `self.ins` for value operands.
        self.value_opnums = tuple(
                i for i, o in enumerate(self.ins) if o.is_value())
        # Indexes into `self.ins` for non-value operands.
        self.imm_opnums = tuple(
                i for i, o in enumerate(self.ins) if o.is_immediate())
        self._verify_polymorphic()
        for attr in kwargs:
            if attr not in Instruction.ATTRIBS:
                raise AssertionError(
                        "unknown instruction attribute '" + attr + "'")
        for attr in Instruction.ATTRIBS:
            setattr(self, attr, not not kwargs.get(attr, False))
        InstructionGroup.append(self)
    def __str__(self):
        # type: () -> str
        prefix = ', '.join(o.name for o in self.outs)
        if prefix:
            prefix = prefix + ' = '
        suffix = ', '.join(o.name for o in self.ins)
        return '{}{} {}'.format(prefix, self.name, suffix)
    def snake_name(self):
        # type: () -> str
        """
        Get the snake_case name of this instruction.
        Keywords in Rust and Python are altered by appending a '_'
        """
        if self.name == 'return':
            return 'return_'
        else:
            return self.name
    def blurb(self):
        # type: () -> str
        """Get the first line of the doc comment"""
        for line in self.__doc__.split('\n'):
            line = line.strip()
            if line:
                return line
        return ""
    def _verify_polymorphic(self):
        # type: () -> None
        """
        Check if this instruction is polymorphic, and verify its use of type
        variables.
        """
        poly_ins = [
                i for i in self.value_opnums
                if self.ins[i].typevar.free_typevar()]
        poly_outs = [
                i for i, o in enumerate(self.outs)
                if o.is_value() and o.typevar.free_typevar()]
        self.is_polymorphic = len(poly_ins) > 0 or len(poly_outs) > 0
        if not self.is_polymorphic:
            return
        # Prefer to use the typevar_operand to infer the controlling typevar.
        self.use_typevar_operand = False
        typevar_error = None
        if self.format.typevar_operand is not None:
            try:
                opnum = self.value_opnums[self.format.typevar_operand]
                tv = self.ins[opnum].typevar
                if tv is tv.free_typevar() or tv.singleton_type() is not None:
                    self.other_typevars = self._verify_ctrl_typevar(tv)
                    self.ctrl_typevar = tv
                    self.use_typevar_operand = True
            except RuntimeError as e:
                typevar_error = e
        if not self.use_typevar_operand:
            # The typevar_operand argument doesn't work. Can we infer from the
            # first result instead?
            if len(self.outs) == 0:
                if typevar_error:
                    raise typevar_error
                else:
                    raise RuntimeError(
                            "typevar_operand must be a free type variable")
            tv = self.outs[0].typevar
            if tv is not tv.free_typevar():
                raise RuntimeError("first result must be a free type variable")
            self.other_typevars = self._verify_ctrl_typevar(tv)
            self.ctrl_typevar = tv
    def _verify_ctrl_typevar(self, ctrl_typevar):
        # type: (TypeVar) -> List[TypeVar]
        """
        Verify that the use of TypeVars is consistent with `ctrl_typevar` as
        the controlling type variable.
        All polymorhic inputs must either be derived from `ctrl_typevar` or be
        independent free type variables only used once.
        All polymorphic results must be derived from `ctrl_typevar`.
        Return list of other type variables used, or raise an error.
        """
        other_tvs = []  # type: List[TypeVar]
        # Check value inputs.
        for opnum in self.value_opnums:
            typ = self.ins[opnum].typevar
            tv = typ.free_typevar()
            # Non-polymorphic or derived form ctrl_typevar is OK.
            if tv is None or tv is ctrl_typevar:
                continue
            # No other derived typevars allowed.
            if typ is not tv:
                raise RuntimeError(
                        "{}: type variable {} must be derived from {}"
                        .format(self.ins[opnum], typ.name, ctrl_typevar))
            # Other free type variables can only be used once each.
            if tv in other_tvs:
                raise RuntimeError(
                        "type variable {} can't be used more than once"
                        .format(tv.name))
            other_tvs.append(tv)
        # Check outputs.
        for result in self.outs:
            if not result.is_value():
                continue
            typ = result.typevar
            tv = typ.free_typevar()
            # Non-polymorphic or derived from ctrl_typevar is OK.
            if tv is None or tv is ctrl_typevar:
                continue
            raise RuntimeError(
                    "type variable in output not derived from ctrl_typevar")
        return other_tvs
    def all_typevars(self):
        # type: () -> List[TypeVar]
        """
        Get a list of all type variables in the instruction.
        """
        if self.is_polymorphic:
            return [self.ctrl_typevar] + self.other_typevars
        else:
            return []
    @staticmethod
    def _to_operand_tuple(x):
        # type: (Union[Sequence[Operand], Operand]) -> Tuple[Operand, ...]
        # Allow a single Operand instance instead of the awkward singleton
        # tuple syntax.
        if isinstance(x, Operand):
            x = (x,)
        else:
            x = tuple(x)
        for op in x:
            assert isinstance(op, Operand)
        return x
    @staticmethod
    def _to_constraint_tuple(x):
        # type: (ConstrList) -> Tuple[TypeConstraint, ...]
        """
        Allow a single TypeConstraint instance instead of the awkward singleton
        tuple syntax.
        """
        # import placed here to avoid circular dependency
        from .ti import TypeConstraint  # noqa
        if isinstance(x, TypeConstraint):
            x = (x,)
        else:
            x = tuple(x)
        for op in x:
            assert isinstance(op, TypeConstraint)
        return x
    def bind(self, *args):
        # type: (*ValueType) -> BoundInstruction
        """
        Bind a polymorphic instruction to a concrete list of type variable
        values.
        """
        assert self.is_polymorphic
        return BoundInstruction(self, args)
    def __getattr__(self, name):
        # type: (str) -> BoundInstruction
        """
        Bind a polymorphic instruction to a single type variable with dot
        syntax:
        >>> iadd.i32
        """
        assert name != 'any', 'Wildcard not allowed for ctrl_typevar'
        return self.bind(ValueType.by_name(name))
    def fully_bound(self):
        # type: () -> Tuple[Instruction, Tuple[ValueType, ...]]
        """
        Verify that all typevars have been bound, and return a
        `(inst, typevars)` pair.
        This version in `Instruction` itself allows non-polymorphic
        instructions to duck-type as `BoundInstruction`\s.
        """
        assert not self.is_polymorphic, self
        return (self, ())
    def __call__(self, *args):
        # type: (*Expr) -> Apply
        """
        Create an `ast.Apply` AST node representing the application of this
        instruction to the arguments.
        """
        from .ast import Apply  # noqa
        return Apply(self, args)
    def set_semantics(self, src, *dsts):
        # type: (Union[Def, Apply], *RtlCase) -> None
        """Set our semantics."""
        from semantics import verify_semantics
        from .xform import XForm, Rtl
        sem = []  # type: List[XForm]
        for dst in dsts:
            if isinstance(dst, Rtl):
                sem.append(XForm(Rtl(src).copy({}), dst))
            else:
                assert isinstance(dst, tuple)
                sem.append(XForm(Rtl(src).copy({}), dst[0],
                                 constraints=dst[1]))
        verify_semantics(self, Rtl(src), sem)
        self.semantics = sem
 class BoundInstruction(object):
    """
    A polymorphic `Instruction` bound to concrete type variables.
    """
    def __init__(self, inst, typevars):
        # type: (Instruction, Tuple[ValueType, ...]) -> None
        self.inst = inst
        self.typevars = typevars
        assert len(typevars) <= 1 + len(inst.other_typevars)
    def __str__(self):
        # type: () -> str
        return '.'.join([self.inst.name, ] + list(map(str, self.typevars)))
    def bind(self, *args):
        # type: (*ValueType) -> BoundInstruction
        """
        Bind additional typevars.
        """
        return BoundInstruction(self.inst, self.typevars + args)
    def __getattr__(self, name):
        # type: (str) -> BoundInstruction
        """
        Bind an additional typevar dot syntax:
        >>> uext.i32.i8
        """
        if name == 'any':
            # This is a wild card bind represented as a None type variable.
            return self.bind(None)
        return self.bind(ValueType.by_name(name))
    def fully_bound(self):
        # type: () -> Tuple[Instruction, Tuple[ValueType, ...]]
        """
        Verify that all typevars have been bound, and return a
        `(inst, typevars)` pair.
        """
        if len(self.typevars) < 1 + len(self.inst.other_typevars):
            unb = ', '.join(
                    str(tv) for tv in
                    self.inst.other_typevars[len(self.typevars) - 1:])
            raise AssertionError("Unbound typevar {} in {}".format(unb, self))
        assert len(self.typevars) == 1 + len(self.inst.other_typevars)
        return (self.inst, self.typevars)
    def __call__(self, *args):
        # type: (*Expr) -> Apply
        """
        Create an `ast.Apply` AST node representing the application of this
        instruction to the arguments.
        """
        from .ast import Apply  # noqa
        return Apply(self, args)
--- a/lib/cretonne/meta/cdsl/isa.py
+++ b/lib/cretonne/meta/cdsl/isa.py
@@ -0,0 +1,462 @@
 """Defining instruction set architectures."""
 from __future__ import absolute_import
 from collections import OrderedDict
 from .predicates import And, TypePredicate
 from .registers import RegClass, Register, Stack
 from .ast import Apply
 from .types import ValueType
 from .instructions import InstructionGroup
 # The typing module is only required by mypy, and we don't use these imports
 # outside type comments.
 try:
    from typing import Tuple, Union, Any, Iterable, Sequence, List, Set, Dict, TYPE_CHECKING  # noqa
    if TYPE_CHECKING:
        from .instructions import MaybeBoundInst, InstructionGroup, InstructionFormat  # noqa
        from .predicates import PredNode, PredKey  # noqa
        from .settings import SettingGroup  # noqa
        from .registers import RegBank  # noqa
        from .xform import XFormGroup  # noqa
        OperandConstraint = Union[RegClass, Register, int, Stack]
        ConstraintSeq = Union[OperandConstraint, Tuple[OperandConstraint, ...]]
        # Instruction specification for encodings. Allows for predicated
        # instructions.
        InstSpec = Union[MaybeBoundInst, Apply]
        BranchRange = Sequence[int]
        # A recipe predicate consisting of an ISA predicate and an instruction
        # predicate.
        RecipePred = Tuple[PredNode, PredNode]
 except ImportError:
    pass
 class TargetISA(object):
    """
    A target instruction set architecture.
    The `TargetISA` class collects everything known about a target ISA.
    :param name: Short mnemonic name for the ISA.
    :param instruction_groups: List of `InstructionGroup` instances that are
        relevant for this ISA.
    """
    def __init__(self, name, instruction_groups):
        # type: (str, Sequence[InstructionGroup]) -> None
        self.name = name
        self.settings = None  # type: SettingGroup
        self.instruction_groups = instruction_groups
        self.cpumodes = list()  # type: List[CPUMode]
        self.regbanks = list()  # type: List[RegBank]
        self.regclasses = list()  # type: List[RegClass]
        self.legalize_codes = OrderedDict()  # type: OrderedDict[XFormGroup, int]  # noqa
        # Unique copies of all predicates.
        self._predicates = dict()  # type: Dict[PredKey, PredNode]
        assert InstructionGroup._current is None,\
            "InstructionGroup {} is still open!"\
            .format(InstructionGroup._current.name)
    def __str__(self):
        # type: () -> str
        return self.name
    def finish(self):
        # type: () -> TargetISA
        """
        Finish the definition of a target ISA after adding all CPU modes and
        settings.
        This computes some derived properties that are used in multiple
        places.
        :returns self:
        """
        self._collect_encoding_recipes()
        self._collect_predicates()
        self._collect_regclasses()
        self._collect_legalize_codes()
        return self
    def _collect_encoding_recipes(self):
        # type: () -> None
        """
        Collect and number all encoding recipes in use.
        """
        self.all_recipes = list()  # type: List[EncRecipe]
        rcps = set()  # type: Set[EncRecipe]
        for cpumode in self.cpumodes:
            for enc in cpumode.encodings:
                recipe = enc.recipe
                if recipe not in rcps:
                    assert recipe.number is None
                    recipe.number = len(rcps)
                    rcps.add(recipe)
                    self.all_recipes.append(recipe)
                    # Make sure ISA predicates are registered.
                    if recipe.isap:
                        recipe.isap = self.unique_pred(recipe.isap)
                        self.settings.number_predicate(recipe.isap)
                    recipe.instp = self.unique_pred(recipe.instp)
    def _collect_predicates(self):
        # type: () -> None
        """
        Collect and number all predicates in use.
        Ensures that all ISA predicates have an assigned bit number in
        `self.settings`.
        """
        self.instp_number = OrderedDict()  # type: OrderedDict[PredNode, int]
        for cpumode in self.cpumodes:
            for enc in cpumode.encodings:
                instp = enc.instp
                if instp and instp not in self.instp_number:
                    # assign predicate number starting from 0.
                    n = len(self.instp_number)
                    self.instp_number[instp] = n
                # All referenced ISA predicates must have a number in
                # `self.settings`. This may cause some parent predicates to be
                # replicated here, which is OK.
                if enc.isap:
                    self.settings.number_predicate(enc.isap)
    def _collect_regclasses(self):
        # type: () -> None
        """
        Collect and number register classes.
        Every register class needs a unique index, and the classes need to be
        topologically ordered.
        We also want all the top-level register classes to be first.
        """
        # Compute subclasses and top-level classes in each bank.
        # Collect the top-level classes so they get numbered consecutively.
        for bank in self.regbanks:
            bank.finish_regclasses()
            self.regclasses.extend(bank.toprcs)
        # The limit on the number of top-level register classes can be raised.
        # This should be coordinated with the `MAX_TOPRCS` constant in
        # `isa/registers.rs`.
        assert len(self.regclasses) <= 4, "Too many top-level register classes"
        # Collect all of the non-top-level register classes.
        # They are numbered strictly after the top-level classes.
        for bank in self.regbanks:
            self.regclasses.extend(
                    rc for rc in bank.classes if not rc.is_toprc())
        for idx, rc in enumerate(self.regclasses):
            rc.index = idx
        # The limit on the number of register classes can be changed. It should
        # be coordinated with the `RegClassMask` and `RegClassIndex` types in
        # `isa/registers.rs`.
        assert len(self.regclasses) <= 32, "Too many register classes"
    def _collect_legalize_codes(self):
        # type: () -> None
        """
        Make sure all legalization transforms have been assigned a code.
        """
        for cpumode in self.cpumodes:
            self.legalize_code(cpumode.default_legalize)
            for x in sorted(cpumode.type_legalize.values(),
                            key=lambda x: x.name):
                self.legalize_code(x)
    def legalize_code(self, xgrp):
        # type: (XFormGroup) -> int
        """
        Get the legalization code for the transform group `xgrp`. Assign one if
        necessary.
        Each target ISA has its own list of legalization actions with
        associated legalize codes that appear in the encoding tables.
        This method is used to maintain the registry of legalization actions
        and their table codes.
        """
        if xgrp in self.legalize_codes:
            code = self.legalize_codes[xgrp]
        else:
            code = len(self.legalize_codes)
            self.legalize_codes[xgrp] = code
        return code
    def unique_pred(self, pred):
        # type: (PredNode) -> PredNode
        """
        Get a unique predicate that is equivalent to `pred`.
        """
        if pred is None:
            return pred
        # TODO: We could actually perform some algebraic simplifications. It's
        # not clear if it is worthwhile.
        k = pred.predicate_key()
        if k in self._predicates:
            return self._predicates[k]
        self._predicates[k] = pred
        return pred
 class CPUMode(object):
    """
    A CPU mode determines which instruction encodings are active.
    All instruction encodings are associated with exactly one `CPUMode`, and
    all CPU modes are associated with exactly one `TargetISA`.
    :param name: Short mnemonic name for the CPU mode.
    :param target: Associated `TargetISA`.
    """
    def __init__(self, name, isa):
        # type: (str, TargetISA) -> None
        self.name = name
        self.isa = isa
        self.encodings = []  # type: List[Encoding]
        isa.cpumodes.append(self)
        # Tables for configuring legalization actions when no valid encoding
        # exists for an instruction.
        self.default_legalize = None  # type: XFormGroup
        self.type_legalize = dict()  # type: Dict[ValueType, XFormGroup]
    def __str__(self):
        # type: () -> str
        return self.name
    def enc(self, *args, **kwargs):
        # type: (*Any, **Any) -> None
        """
        Add a new encoding to this CPU mode.
        Arguments are the `Encoding constructor arguments, except for the first
        `CPUMode argument which is implied.
        """
        self.encodings.append(Encoding(self, *args, **kwargs))
    def legalize_type(self, default=None, **kwargs):
        # type: (XFormGroup, **XFormGroup) -> None
        """
        Configure the legalization action per controlling type variable.
        Instructions that have a controlling type variable mentioned in one of
        the arguments will be legalized according to the action specified here
        instead of  using the `legalize_default` action.
        The keyword arguments are value type names:
            mode.legalize_type(i8=widen, i16=widen, i32=expand)
        The `default` argument specifies the action to take for controlling
        type variables that don't have an explicitly configured action.
        """
        if default is not None:
            self.default_legalize = default
        for name, xgrp in kwargs.items():
            ty = ValueType.by_name(name)
            self.type_legalize[ty] = xgrp
    def get_legalize_action(self, ty):
        # type: (ValueType) -> XFormGroup
        """
        Get the legalization action to use for `ty`.
        """
        return self.type_legalize.get(ty, self.default_legalize)
 class EncRecipe(object):
    """
    A recipe for encoding instructions with a given format.
    Many different instructions can be encoded by the same recipe, but they
    must all have the same instruction format.
    The `ins` and `outs` arguments are tuples specifying the register
    allocation constraints for the value operands and results respectively. The
    possible constraints for an operand are:
    - A `RegClass` specifying the set of allowed registers.
    - A `Register` specifying a fixed-register operand.
    - An integer indicating that this result is tied to a value operand, so
      they must use the same register.
    - A `Stack` specifying a value in a stack slot.
    The `branch_range` argument must be provided for recipes that can encode
    branch instructions. It is an `(origin, bits)` tuple describing the exact
    range that can be encoded in a branch instruction.
    :param name: Short mnemonic name for this recipe.
    :param format: All encoded instructions must have this
            :py:class:`InstructionFormat`.
    :param size: Number of bytes in the binary encoded instruction.
    :param: ins Tuple of register constraints for value operands.
    :param: outs Tuple of register constraints for results.
    :param: branch_range `(origin, bits)` range for branches.
    :param: instp Instruction predicate.
    :param: isap ISA predicate.
    :param: emit Rust code for binary emission.
    """
    def __init__(
            self,
            name,               # type: str
            format,             # type: InstructionFormat
            size,               # type: int
            ins,                # type: ConstraintSeq
            outs,               # type: ConstraintSeq
            branch_range=None,  # type: BranchRange
            instp=None,         # type: PredNode
            isap=None,          # type: PredNode
            emit=None           # type: str
            ):
        # type: (...) -> None
        self.name = name
        self.format = format
        assert size >= 0
        self.size = size
        self.branch_range = branch_range
        self.instp = instp
        self.isap = isap
        self.emit = emit
        if instp:
            assert instp.predicate_context() == format
        self.number = None  # type: int
        self.ins = self._verify_constraints(ins)
        if not format.has_value_list:
            assert len(self.ins) == format.num_value_operands
        self.outs = self._verify_constraints(outs)
    def __str__(self):
        # type: () -> str
        return self.name
    def _verify_constraints(self, seq):
        # type: (ConstraintSeq) -> Sequence[OperandConstraint]
        if not isinstance(seq, tuple):
            seq = (seq,)
        for c in seq:
            if isinstance(c, int):
                # An integer constraint is bound to a value operand.
                # Check that it is in range.
                assert c >= 0 and c < len(self.ins)
            else:
                assert (isinstance(c, RegClass)
                        or isinstance(c, Register)
                        or isinstance(c, Stack))
        return seq
    def ties(self):
        # type: () -> Tuple[Dict[int, int], Dict[int, int]]
        """
        Return two dictionaries representing the tied operands.
        The first maps input number to tied output number, the second maps
        output number to tied input number.
        """
        i2o = dict()  # type: Dict[int, int]
        o2i = dict()  # type: Dict[int, int]
        for o, i in enumerate(self.outs):
            if isinstance(i, int):
                i2o[i] = o
                o2i[o] = i
        return (i2o, o2i)
    def recipe_pred(self):
        # type: () -> RecipePred
        """
        Get the combined recipe predicate which includes both the ISA predicate
        and the instruction predicate.
        Return `None` if this recipe has neither predicate.
        """
        if self.isap is None and self.instp is None:
            return None
        else:
            return (self.isap, self.instp)
 class Encoding(object):
    """
    Encoding for a concrete instruction.
    An `Encoding` object ties an instruction opcode with concrete type
    variables together with and encoding recipe and encoding bits.
    The concrete instruction can be in three different forms:
    1. A naked opcode: `trap` for non-polymorphic instructions.
    2. With bound type variables: `iadd.i32` for polymorphic instructions.
    3. With operands providing constraints: `icmp.i32(intcc.eq, x, y)`.
    If the instruction is polymorphic, all type variables must be provided.
    :param cpumode: The CPU mode where the encoding is active.
    :param inst: The :py:class:`Instruction` or :py:class:`BoundInstruction`
                 being encoded.
    :param recipe: The :py:class:`EncRecipe` to use.
    :param encbits: Additional encoding bits to be interpreted by `recipe`.
    :param instp: Instruction predicate, or `None`.
    :param isap: ISA predicate, or `None`.
    """
    def __init__(self, cpumode, inst, recipe, encbits, instp=None, isap=None):
        # type: (CPUMode, InstSpec, EncRecipe, int, PredNode, PredNode) -> None # noqa
        assert isinstance(cpumode, CPUMode)
        assert isinstance(recipe, EncRecipe)
        # Check for possible instruction predicates in `inst`.
        if isinstance(inst, Apply):
            instp = And.combine(instp, inst.inst_predicate())
            self.inst = inst.inst
            self.typevars = inst.typevars
        else:
            self.inst, self.typevars = inst.fully_bound()
            # Add secondary type variables to the instruction predicate.
            # This is already included by Apply.inst_predicate() above.
            if len(self.typevars) > 1:
                for tv, vt in zip(self.inst.other_typevars, self.typevars[1:]):
                    # A None tv is an 'any' wild card: `ishl.i32.any`.
                    if vt is None:
                        continue
                    typred = TypePredicate.typevar_check(self.inst, tv, vt)
                    instp = And.combine(instp, typred)
        self.cpumode = cpumode
        assert self.inst.format == recipe.format, (
                "Format {} must match recipe: {}".format(
                    self.inst.format, recipe.format))
        if self.inst.is_branch:
            assert recipe.branch_range, (
                    'Recipe {} for {} must have a branch_range'
                    .format(recipe, self.inst.name))
        self.recipe = recipe
        self.encbits = encbits
        # Record specific predicates. Note that the recipe also has predicates.
        self.instp = self.cpumode.isa.unique_pred(instp)
        self.isap = self.cpumode.isa.unique_pred(isap)
    def __str__(self):
        # type: () -> str
        return '[{}#{:02x}]'.format(self.recipe, self.encbits)
    def ctrl_typevar(self):
        # type: () -> ValueType
        """
        Get the controlling type variable for this encoding or `None`.
        """
        if self.typevars:
            return self.typevars[0]
        else:
            return None
--- a/lib/cretonne/meta/cdsl/operands.py
+++ b/lib/cretonne/meta/cdsl/operands.py
@@ -0,0 +1,222 @@
 """Classes for describing instruction operands."""
 from __future__ import absolute_import
 from . import camel_case
 from .types import ValueType
 from .typevar import TypeVar
 try:
    from typing import Union, Dict, TYPE_CHECKING  # noqa
    OperandSpec = Union['OperandKind', ValueType, TypeVar]
    if TYPE_CHECKING:
        from .ast import Enumerator, ConstantInt  # noqa
 except ImportError:
    pass
 # Kinds of operands.
 #
 # Each instruction has an opcode and a number of operands. The opcode
 # determines the instruction format, and the format determines the number of
 # operands and the kind of each operand.
 class OperandKind(object):
    """
    An instance of the `OperandKind` class corresponds to a kind of operand.
    Each operand kind has a corresponding type in the Rust representation of an
    instruction.
    """
    def __init__(self, name, doc, default_member=None, rust_type=None):
        # type: (str, str, str, str) -> None
        self.name = name
        self.__doc__ = doc
        self.default_member = default_member
        # The camel-cased name of an operand kind is also the Rust type used to
        # represent it.
        self.rust_type = rust_type or camel_case(name)
    def __str__(self):
        # type: () -> str
        return self.name
    def __repr__(self):
        # type: () -> str
        return 'OperandKind({})'.format(self.name)
 #: An SSA value operand. This is a value defined by another instruction.
 VALUE = OperandKind(
        'value', """
        An SSA value defined by another instruction.
        This kind of operand can represent any SSA value type, but the
        instruction format may restrict the valid value types for a given
        operand.
        """)
 #: A variable-sized list of value operands. Use for Ebb and function call
 #: arguments.
 VARIABLE_ARGS = OperandKind(
        'variable_args', """
        A variable size list of `value` operands.
        Use this to represent arguemtns passed to a function call, arguments
        passed to an extended basic block, or a variable number of results
        returned from an instruction.
        """,
        rust_type='&[Value]')
 # Instances of immediate operand types are provided in the
 # `cretonne.immediates` module.
 class ImmediateKind(OperandKind):
    """
    The kind of an immediate instruction operand.
    :param default_member: The default member name of this kind the
                           `InstructionData` data structure.
    """
    def __init__(
            self, name, doc,
            default_member='imm',
            rust_type=None,
            values=None):
        # type: (str, str, str, str, Dict[str, str]) -> None
        super(ImmediateKind, self).__init__(
                name, doc, default_member, rust_type)
        self.values = values
    def __repr__(self):
        # type: () -> str
        return 'ImmediateKind({})'.format(self.name)
    def __getattr__(self, value):
        # type: (str) -> Enumerator
        """
        Enumerated immediate kinds allow the use of dot syntax to produce
        `Enumerator` AST nodes: `icmp.i32(intcc.ult, a, b)`.
        """
        from .ast import Enumerator  # noqa
        if not self.values:
            raise AssertionError(
                    '{n} is not an enumerated operand kind: {n}.{a}'.format(
                        n=self.name, a=value))
        if value not in self.values:
            raise AssertionError(
                    'No such {n} enumerator: {n}.{a}'.format(
                        n=self.name, a=value))
        return Enumerator(self, value)
    def __call__(self, value):
        # type: (int) -> ConstantInt
        """
        Create an AST node representing a constant integer:
            iconst(imm64(0))
        """
        from .ast import ConstantInt  # noqa
        if self.values:
            raise AssertionError(
                    "{}({}): Can't make a constant numeric value for an enum"
                    .format(self.name, value))
        return ConstantInt(self, value)
    def rust_enumerator(self, value):
        # type: (str) -> str
        """
        Get the qualified Rust name of the enumerator value `value`.
        """
        return '{}::{}'.format(self.rust_type, self.values[value])
 # Instances of entity reference operand types are provided in the
 # `cretonne.entities` module.
 class EntityRefKind(OperandKind):
    """
    The kind of an entity reference instruction operand.
    """
    def __init__(self, name, doc, default_member=None, rust_type=None):
        # type: (str, str, str, str) -> None
        super(EntityRefKind, self).__init__(
                name, doc, default_member or name, rust_type)
    def __repr__(self):
        # type: () -> str
        return 'EntityRefKind({})'.format(self.name)
 class Operand(object):
    """
    An instruction operand can be an *immediate*, an *SSA value*, or an *entity
    reference*. The type of the operand is one of:
    1. A :py:class:`ValueType` instance indicates an SSA value operand with a
       concrete type.
    2. A :py:class:`TypeVar` instance indicates an SSA value operand, and the
       instruction is polymorphic over the possible concrete types that the
       type variable can assume.
    3. An :py:class:`ImmediateKind` instance indicates an immediate operand
       whose value is encoded in the instruction itself rather than being
       passed as an SSA value.
    4. An :py:class:`EntityRefKind` instance indicates an operand that
       references another entity in the function, typically something declared
       in the function preamble.
    """
    def __init__(self, name, typ, doc=''):
        # type: (str, OperandSpec, str) -> None
        self.name = name
        self.__doc__ = doc
        # Decode the operand spec and set self.kind.
        # Only VALUE operands have a typevar member.
        if isinstance(typ, ValueType):
            self.kind = VALUE
            self.typevar = TypeVar.singleton(typ)
        elif isinstance(typ, TypeVar):
            self.kind = VALUE
            self.typevar = typ
        else:
            assert isinstance(typ, OperandKind)
            self.kind = typ
    def get_doc(self):
        # type: () -> str
        if self.__doc__:
            return self.__doc__
        if self.kind is VALUE:
            return self.typevar.__doc__
        return self.kind.__doc__
    def __str__(self):
        # type: () -> str
        return "`{}`".format(self.name)
    def is_value(self):
        # type: () -> bool
        """
        Is this an SSA value operand?
        """
        return self.kind is VALUE
    def is_varargs(self):
        # type: () -> bool
        """
        Is this a VARIABLE_ARGS operand?
        """
        return self.kind is VARIABLE_ARGS
    def is_immediate(self):
        # type: () -> bool
        """
        Is this an immediate operand?
        Note that this includes both `ImmediateKind` operands *and* entity
        references. It is any operand that doesn't represent a value
        dependency.
        """
        return self.kind is not VALUE and self.kind is not VARIABLE_ARGS
--- a/lib/cretonne/meta/cdsl/predicates.py
+++ b/lib/cretonne/meta/cdsl/predicates.py
@@ -0,0 +1,375 @@
 """
 Cretonne predicates.
 A *predicate* is a function that computes a boolean result. The inputs to the
 function determine the kind of predicate:
 - An *ISA predicate* is evaluated on the current ISA settings together with the
  shared settings defined in the :py:mod:`settings` module. Once a target ISA
  has been configured, the value of all ISA predicates is known.
 - An *Instruction predicate* is evaluated on an instruction instance, so it can
  inspect all the immediate fields and type variables of the instruction.
  Instruction predicates can be evaluated before register allocation, so they
  can not depend on specific register assignments to the value operands or
  outputs.
 Predicates can also be computed from other predicates using the `And`, `Or`,
 and `Not` combinators defined in this module.
 All predicates have a *context* which determines where they can be evaluated.
 For an ISA predicate, the context is the ISA settings group. For an instruction
 predicate, the context is the instruction format.
 """
 from __future__ import absolute_import
 from functools import reduce
 from .formats import instruction_context
 try:
    from typing import Sequence, Tuple, Set, Any, Union, TYPE_CHECKING  # noqa
    if TYPE_CHECKING:
        from .formats import InstructionFormat, InstructionContext, FormatField  # noqa
        from .instructions import Instruction  # noqa
        from .settings import BoolSetting, SettingGroup  # noqa
        from .types import ValueType  # noqa
        from .typevar import TypeVar  # noqa
        PredContext = Union[SettingGroup, InstructionFormat,
                            InstructionContext]
        PredLeaf = Union[BoolSetting, 'FieldPredicate', 'TypePredicate']
        PredNode = Union[PredLeaf, 'Predicate']
        # A predicate key is a (recursive) tuple of primitive types that
        # uniquely describes a predicate. It is used for interning.
        PredKey = Tuple[Any, ...]
 except ImportError:
    pass
 def _is_parent(a, b):
    # type: (PredContext, PredContext) -> bool
    """
    Return true if a is a parent of b, or equal to it.
    """
    while b and a is not b:
        b = getattr(b, 'parent', None)
    return a is b
 def _descendant(a, b):
    # type: (PredContext, PredContext) -> PredContext
    """
    If a is a parent of b or b is a parent of a, return the descendant of the
    two.
    If neither is a parent of the other, return None.
    """
    if _is_parent(a, b):
        return b
    if _is_parent(b, a):
        return a
    return None
 class Predicate(object):
    """
    Superclass for all computed predicates.
    Leaf predicates can have other types, such as `Setting`.
    :param parts: Tuple of components in the predicate expression.
    """
    def __init__(self, parts):
        # type: (Sequence[PredNode]) -> None
        self.parts = parts
        self.context = reduce(
                _descendant,
                (p.predicate_context() for p in parts))
        assert self.context, "Incompatible predicate parts"
        self.predkey = None  # type: PredKey
    def __str__(self):
        # type: () -> str
        return '{}({})'.format(type(self).__name__,
                               ', '.join(map(str, self.parts)))
    def predicate_context(self):
        # type: () -> PredContext
        return self.context
    def predicate_leafs(self, leafs):
        # type: (Set[PredLeaf]) -> None
        """
        Collect all leaf predicates into the `leafs` set.
        """
        for part in self.parts:
            part.predicate_leafs(leafs)
    def rust_predicate(self, prec):
        # type: (int) -> str
        raise NotImplementedError("rust_predicate is an abstract method")
    def predicate_key(self):
        # type: () -> PredKey
        """Tuple uniquely identifying a predicate."""
        if not self.predkey:
            p = tuple(p.predicate_key() for p in self.parts)  # type: PredKey
            self.predkey = (type(self).__name__,) + p
        return self.predkey
 class And(Predicate):
    """
    Computed predicate that is true if all parts are true.
    """
    precedence = 2
    def __init__(self, *args):
        # type: (*PredNode) -> None
        super(And, self).__init__(args)
    def rust_predicate(self, prec):
        # type: (int) -> str
        """
        Return a Rust expression computing the value of this predicate.
        The surrounding precedence determines whether parentheses are needed:
        0. An `if` statement.
        1. An `||` expression.
        2. An `&&` expression.
        3. A `!` expression.
        """
        s = ' && '.join(p.rust_predicate(And.precedence) for p in self.parts)
        if prec > And.precedence:
            s = '({})'.format(s)
        return s
    @staticmethod
    def combine(*args):
        # type: (*PredNode) -> PredNode
        """
        Combine a sequence of predicates, allowing for `None` members.
        Return a predicate that is true when all non-`None` arguments are true,
        or `None` if all of the arguments are `None`.
        """
        args = tuple(p for p in args if p)
        if args == ():
            return None
        if len(args) == 1:
            return args[0]
        # We have multiple predicate args. Combine with `And`.
        return And(*args)
 class Or(Predicate):
    """
    Computed predicate that is true if any parts are true.
    """
    precedence = 1
    def __init__(self, *args):
        # type: (*PredNode) -> None
        super(Or, self).__init__(args)
    def rust_predicate(self, prec):
        # type: (int) -> str
        s = ' || '.join(p.rust_predicate(Or.precedence) for p in self.parts)
        if prec > Or.precedence:
            s = '({})'.format(s)
        return s
 class Not(Predicate):
    """
    Computed predicate that is true if its single part is false.
    """
    precedence = 3
    def __init__(self, part):
        # type: (PredNode) -> None
        super(Not, self).__init__((part,))
    def rust_predicate(self, prec):
        # type: (int) -> str
        return '!' + self.parts[0].rust_predicate(Not.precedence)
 class FieldPredicate(object):
    """
    An instruction predicate that performs a test on a single `FormatField`.
    :param field: The `FormatField` to be tested.
    :param function: Boolean predicate function to call.
    :param args: Additional arguments for the predicate function.
    """
    def __init__(self, field, function, args):
        # type: (FormatField, str, Sequence[Any]) -> None
        self.field = field
        self.function = function
        self.args = args
    def __str__(self):
        # type: () -> str
        args = (self.field.rust_name(),) + tuple(map(str, self.args))
        return '{}({})'.format(self.function, ', '.join(args))
    def predicate_context(self):
        # type: () -> PredContext
        """
        This predicate can be evaluated in the context of an instruction
        format.
        """
        iform = self.field.format  # type: InstructionFormat
        return iform
    def predicate_key(self):
        # type: () -> PredKey
        a = tuple(map(str, self.args))
        return (self.function, str(self.field)) + a
    def predicate_leafs(self, leafs):
        # type: (Set[PredLeaf]) -> None
        leafs.add(self)
    def rust_predicate(self, prec):
        # type: (int) -> str
        """
        Return a string of Rust code that evaluates this predicate.
        """
        # Prepend `field` to the predicate function arguments.
        args = (self.field.rust_name(),) + tuple(map(str, self.args))
        return 'predicates::{}({})'.format(self.function, ', '.join(args))
 class IsEqual(FieldPredicate):
    """
    Instruction predicate that checks if an immediate instruction format field
    is equal to a constant value.
    :param field: `FormatField` to be checked.
    :param value: The constant value to compare against.
    """
    def __init__(self, field, value):
        # type: (FormatField, Any) -> None
        super(IsEqual, self).__init__(field, 'is_equal', (value,))
        self.value = value
 class IsSignedInt(FieldPredicate):
    """
    Instruction predicate that checks if an immediate instruction format field
    is representable as an n-bit two's complement integer.
    :param field: `FormatField` to be checked.
    :param width: Number of bits in the allowed range.
    :param scale: Number of low bits that must be 0.
    The predicate is true if the field is in the range:
    `-2^(width-1) -- 2^(width-1)-1`
    and a multiple of `2^scale`.
    """
    def __init__(self, field, width, scale=0):
        # type: (FormatField, int, int) -> None
        super(IsSignedInt, self).__init__(
                field, 'is_signed_int', (width, scale))
        self.width = width
        self.scale = scale
        assert width >= 0 and width <= 64
        assert scale >= 0 and scale < width
 class IsUnsignedInt(FieldPredicate):
    """
    Instruction predicate that checks if an immediate instruction format field
    is representable as an n-bit unsigned complement integer.
    :param field: `FormatField` to be checked.
    :param width: Number of bits in the allowed range.
    :param scale: Number of low bits that must be 0.
    The predicate is true if the field is in the range:
    `0 -- 2^width - 1` and a multiple of `2^scale`.
    """
    def __init__(self, field, width, scale=0):
        # type: (FormatField, int, int) -> None
        super(IsUnsignedInt, self).__init__(
                field, 'is_unsigned_int', (width, scale))
        self.width = width
        self.scale = scale
        assert width >= 0 and width <= 64
        assert scale >= 0 and scale < width
 class TypePredicate(object):
    """
    An instruction predicate that checks the type of an SSA argument value.
    Type predicates are used to implement encodings for instructions with
    multiple type variables. The encoding tables are keyed by the controlling
    type variable, type predicates check any secondary type variables.
    A type predicate is not bound to any specific instruction format.
    :param value_arg: Index of the value argument to type check.
    :param value_type: The required value type.
    """
    def __init__(self, value_arg, value_type):
        # type: (int, ValueType) -> None
        assert value_arg >= 0
        assert value_type is not None
        self.value_arg = value_arg
        self.value_type = value_type
    def __str__(self):
        # type: () -> str
        return 'args[{}]:{}'.format(self.value_arg, self.value_type)
    def predicate_context(self):
        # type: () -> PredContext
        return instruction_context
    def predicate_key(self):
        # type: () -> PredKey
        return ('typecheck', self.value_arg, self.value_type.name)
    def predicate_leafs(self, leafs):
        # type: (Set[PredLeaf]) -> None
        leafs.add(self)
    @staticmethod
    def typevar_check(inst, typevar, value_type):
        # type: (Instruction, TypeVar, ValueType) -> TypePredicate
        """
        Return a type check predicate for the given type variable in `inst`.
        The type variable must appear directly as the type of one of the
        operands to `inst`, so this is only guaranteed to work for secondary
        type variables.
        Find an `inst` value operand whose type is determined by `typevar` and
        create a `TypePredicate` that checks that the type variable has the
        value `value_type`.
        """
        # Find the first value operand whose type is `typevar`.
        value_arg = next(i for i, opnum in enumerate(inst.value_opnums)
                         if inst.ins[opnum].typevar == typevar)
        return TypePredicate(value_arg, value_type)
    def rust_predicate(self, prec):
        # type: (int) -> str
        """
        Return Rust code for evaluating this predicate.
        It is assumed that the context has `dfg` and `args` variables.
        """
        return 'dfg.value_type(args[{}]) == {}'.format(
                self.value_arg, self.value_type.rust_name())
--- a/lib/cretonne/meta/cdsl/registers.py
+++ b/lib/cretonne/meta/cdsl/registers.py
@@ -0,0 +1,354 @@
 """
 Register set definitions
 ------------------------
 Each ISA defines a separate register set that is used by the register allocator
 and the final binary encoding of machine code.
 The CPU registers are first divided into disjoint register banks, represented
 by a `RegBank` instance. Registers in different register banks never interfere
 with each other. A typical CPU will have a general purpose and a floating point
 register bank.
 A register bank consists of a number of *register units* which are the smallest
 indivisible units of allocation and interference. A register unit doesn't
 necessarily correspond to a particular number of bits in a register, it is more
 like a placeholder that can be used to determine of a register is taken or not.
 The register allocator works with *register classes* which can allocate one or
 more register units at a time. A register class allocates more than one
 register unit at a time when its registers are composed of smaller allocatable
 units. For example, the ARM double precision floating point registers are
 composed of two single precision registers.
 """
 from __future__ import absolute_import
 from . import is_power_of_two, next_power_of_two
 try:
    from typing import Sequence, Tuple, List, Dict, Any, TYPE_CHECKING  # noqa
    if TYPE_CHECKING:
        from .isa import TargetISA  # noqa
        # A tuple uniquely identifying a register class inside a register bank.
        # (count, width, start)
        RCTup = Tuple[int, int, int]
 except ImportError:
    pass
 # The number of 32-bit elements in a register unit mask
 MASK_LEN = 3
 # The maximum total number of register units allowed.
 # This limit can be raised by also adjusting the RegUnitMask type in
 # src/isa/registers.rs.
 MAX_UNITS = MASK_LEN * 32
 class RegBank(object):
    """
    A register bank belonging to an ISA.
    A register bank controls a set of *register units* disjoint from all the
    other register banks in the ISA. The register units are numbered uniquely
    within the target ISA, and the units in a register bank form a contiguous
    sequence starting from a sufficiently aligned point that their low bits can
    be used directly when encoding machine code instructions.
    Register units can be given generated names like `r0`, `r1`, ..., or a
    tuple of special register unit names can be provided.
    :param name: Name of this register bank.
    :param doc: Documentation string.
    :param units: Number of register units.
    :param prefix: Prefix for generated unit names.
    :param names: Special names for the first units. May be shorter than
                  `units`, the remaining units are named using `prefix`.
    """
    def __init__(self, name, isa, doc, units, prefix='r', names=()):
        # type: (str, TargetISA, str, int, str, Sequence[str]) -> None
        self.name = name
        self.isa = isa
        self.first_unit = 0
        self.units = units
        self.prefix = prefix
        self.names = names
        self.classes = list()  # type: List[RegClass]
        self.toprcs = list()  # type: List[RegClass]
        self.first_toprc_index = None  # type: int
        assert len(names) <= units
        if isa.regbanks:
            # Get the next free unit number.
            last = isa.regbanks[-1]
            u = last.first_unit + last.units
            align = units
            if not is_power_of_two(align):
                align = next_power_of_two(align)
            self.first_unit = (u + align - 1) & -align
        self.index = len(isa.regbanks)
        isa.regbanks.append(self)
    def __repr__(self):
        # type: () -> str
        return ('RegBank({}, units={}, first_unit={})'
                .format(self.name, self.units, self.first_unit))
    def finish_regclasses(self):
        # type: () -> None
        """
        Compute subclasses and the top-level register class.
        Verify that the set of register classes satisfies:
        1. Closed under intersection: The intersection of any two register
           classes in the set is either empty or identical to a member of the
           set.
        2. There are no identical classes under different names.
        3. Classes are sorted topologically such that all subclasses have a
           higher index that the superclass.
        We could reorder classes topologically here instead of just enforcing
        the order, but the ordering tends to fall out naturally anyway.
        """
        cmap = dict()  # type: Dict[RCTup, RegClass]
        for rc in self.classes:
            # All register classes must be given a name.
            assert rc.name, "Anonymous register class found"
            # Check for duplicates.
            tup = rc.rctup()
            if tup in cmap:
                raise AssertionError(
                        '{} and {} are identical register classes'
                        .format(rc, cmap[tup]))
            cmap[tup] = rc
        # Check intersections and topological order.
        for idx, rc1 in enumerate(self.classes):
            rc1.toprc = rc1
            for rc2 in self.classes[0:idx]:
                itup = rc1.intersect(rc2)
                if itup is None:
                    continue
                if itup not in cmap:
                    raise AssertionError(
                        'intersection of {} and {} missing'
                        .format(rc1, rc2))
                irc = cmap[itup]
                # rc1 > rc2, so rc2 can't be the sub-class.
                if irc is rc2:
                    raise AssertionError(
                            'Bad topological order: {}/{}'
                            .format(rc1, rc2))
                if irc is rc1:
                    # The intersection of rc1 and rc2 is rc1, so it must be a
                    # sub-class.
                    rc2.subclasses.append(rc1)
                    rc1.toprc = rc2.toprc
            if rc1.is_toprc():
                self.toprcs.append(rc1)
    def unit_by_name(self, name):
        # type: (str) -> int
        """
        Get a register unit in this bank by name.
        """
        if name in self.names:
            r = self.names.index(name)
        elif name.startswith(self.prefix):
            r = int(name[len(self.prefix):])
        assert r < self.units, 'Invalid register name: ' + name
        return self.first_unit + r
 class RegClass(object):
    """
    A register class is a subset of register units in a RegBank along with a
    strategy for allocating registers.
    The *width* parameter determines how many register units are allocated at a
    time. Usually it that is one, but for example the ARM D registers are
    allocated two units at a time. When multiple units are allocated, it is
    always a contiguous set of unit numbers.
    :param bank: The register bank we're allocating from.
    :param count: The maximum number of allocations in this register class. By
                  default, the whole register bank can be allocated.
    :param width: How many units to allocate at a time.
    :param start: The first unit to allocate, relative to `bank.first.unit`.
    """
    def __init__(self, bank, count=None, width=1, start=0):
        # type: (RegBank, int, int, int) -> None
        self.name = None  # type: str
        self.index = None  # type: int
        self.bank = bank
        self.start = start
        self.width = width
        # This is computed later in `finish_regclasses()`.
        self.subclasses = list()  # type: List[RegClass]
        self.toprc = None  # type: RegClass
        assert width > 0
        assert start >= 0 and start < bank.units
        if count is None:
            count = bank.units // width
        self.count = count
        bank.classes.append(self)
    def __str__(self):
        # type: () -> str
        return self.name
    def is_toprc(self):
        # type: () -> bool
        """
        Is this a top-level register class?
        A top-level register class has no sub-classes. This can only be
        answered aster running `finish_regclasses()`.
        """
        return self.toprc is self
    def rctup(self):
        # type: () -> RCTup
        """
        Get a tuple that uniquely identifies the registers in this class.
        The tuple can be used as a dictionary key to ensure that there are no
        duplicate register classes.
        """
        return (self.count, self.width, self.start)
    def intersect(self, other):
        # type: (RegClass) -> RCTup
        """
        Get a tuple representing the intersction of two register classes.
        Returns `None` if the two classes are disjoint.
        """
        if self.width != other.width:
            return None
        s_end = self.start + self.count * self.width
        o_end = other.start + other.count * other.width
        if self.start >= o_end or other.start >= s_end:
            return None
        # We have an overlap.
        start = max(self.start, other.start)
        end = min(s_end, o_end)
        count = (end - start) // self.width
        assert count > 0
        return (count, self.width, start)
    def __getitem__(self, sliced):
        # type: (slice) -> RegClass
        """
        Create a sub-class of a register class using slice notation. The slice
        indexes refer to allocations in the parent register class, not register
        units.
        """
        assert isinstance(sliced, slice), "RegClass slicing can't be 1 reg"
        # We could add strided sub-classes if needed.
        assert sliced.step is None, 'Subclass striding not supported'
        w = self.width
        s = self.start + sliced.start * w
        c = sliced.stop - sliced.start
        assert c > 1, "Can't have single-register classes"
        return RegClass(self.bank, count=c, width=w, start=s)
    def __getattr__(self, attr):
        # type: (str) -> Register
        """
        Get a specific register in the class by name.
        For example: `GPR.r5`.
        """
        return Register(self, self.bank.unit_by_name(attr))
    def mask(self):
        # type: () -> List[int]
        """
        Compute a bit-mask of the register units allocated by this register
        class.
        Return as a list of 32-bit integers.
        """
        mask = [0] * MASK_LEN
        start = self.bank.first_unit + self.start
        for a in range(self.count):
            u = start + a * self.width
            b = u % 32
            # We need fancier masking code if a register can straddle mask
            # words. This will only happen with widths that are not powers of
            # two.
            assert b + self.width <= 32, 'Register straddles words'
            mask[u // 32] |= 1 << b
        return mask
    def subclass_mask(self):
        # type: () -> int
        """
        Compute a bit-mask of subclasses, including self.
        """
        m = 1 << self.index
        for rc in self.subclasses:
            m |= 1 << rc.index
        return m
    @staticmethod
    def extract_names(globs):
        # type: (Dict[str, Any]) -> None
        """
        Given a dict mapping name -> object as returned by `globals()`, find
        all the RegClass objects and set their name from the dict key.
        This is used to name a bunch of global variables in a module.
        """
        for name, obj in globs.items():
            if isinstance(obj, RegClass):
                assert obj.name is None
                obj.name = name
 class Register(object):
    """
    A specific register in a register class.
    A register is identified by the top-level register class it belongs to and
    its first register unit.
    Specific registers are used to describe constraints on instructions where
    some operands must use a fixed register.
    Register instances can be created with the constructor, or accessed as
    attributes on the register class: `GPR.rcx`.
    """
    def __init__(self, rc, unit):
        # type: (RegClass, int) -> None
        self.regclass = rc
        self.unit = unit
 class Stack(object):
    """
    An operand that must be in a stack slot.
    A `Stack` object can be used to indicate an operand constraint for a value
    operand that must live in a stack slot.
    """
    def __init__(self, rc):
        # type: (RegClass) -> None
        self.regclass = rc
--- a/lib/cretonne/meta/cdsl/settings.py
+++ b/lib/cretonne/meta/cdsl/settings.py
@@ -0,0 +1,407 @@
 """Classes for describing settings and groups of settings."""
 from __future__ import absolute_import
 from collections import OrderedDict
 from .predicates import Predicate
 try:
    from typing import Tuple, Set, List, Dict, Any, Union, TYPE_CHECKING  # noqa
    BoolOrPresetOrDict = Union['BoolSetting', 'Preset', Dict['Setting', Any]]
    if TYPE_CHECKING:
        from .predicates import PredLeaf, PredNode, PredKey  # noqa
 except ImportError:
    pass
 class Setting(object):
    """
    A named setting variable that can be configured externally to Cretonne.
    Settings are normally not named when they are created. They get their name
    from the `extract_names` method.
    """
    def __init__(self, doc):
        # type: (str) -> None
        self.name = None  # type: str  # Assigned later by `extract_names()`.
        self.__doc__ = doc
        # Offset of byte in settings vector containing this setting.
        self.byte_offset = None  # type: int
        # Index into the generated DESCRIPTORS table.
        self.descriptor_index = None  # type: int
        self.group = SettingGroup.append(self)
    def __str__(self):
        # type: () -> str
        return '{}.{}'.format(self.group.name, self.name)
    def default_byte(self):
        # type: () -> int
        raise NotImplementedError("default_byte is an abstract method")
    def byte_for_value(self, value):
        # type: (Any) -> int
        """Get the setting byte value that corresponds to `value`"""
        raise NotImplementedError("byte_for_value is an abstract method")
    def byte_mask(self):
        # type: () -> int
        """Get a mask of bits in our byte that are relevant to this setting."""
        # Only BoolSetting has a different mask.
        return 0xff
 class BoolSetting(Setting):
    """
    A named setting with a boolean on/off value.
    :param doc: Documentation string.
    :param default: The default value of this setting.
    """
    def __init__(self, doc, default=False):
        # type: (str, bool) -> None
        super(BoolSetting, self).__init__(doc)
        self.default = default
        self.bit_offset = None  # type: int
    def default_byte(self):
        # type: () -> int
        """
        Get the default value of this setting, as a byte that can be bitwise
        or'ed with the other booleans sharing the same byte.
        """
        if self.default:
            return 1 << self.bit_offset
        else:
            return 0
    def byte_for_value(self, value):
        # type: (Any) -> int
        if value:
            return 1 << self.bit_offset
        else:
            return 0
    def byte_mask(self):
        # type: () -> int
        return 1 << self.bit_offset
    def predicate_context(self):
        # type: () -> SettingGroup
        """
        Return the context where this setting can be evaluated as a (leaf)
        predicate.
        """
        return self.group
    def predicate_key(self):
        # type: () -> PredKey
        assert self.name, "Can't compute key before setting is named"
        return ('setting', self.group.name, self.name)
    def predicate_leafs(self, leafs):
        # type: (Set[PredLeaf]) -> None
        leafs.add(self)
    def rust_predicate(self, prec):
        # type: (int) -> str
        """
        Return the Rust code to compute the value of this setting.
        The emitted code assumes that the setting group exists as a local
        variable.
        """
        return '{}.{}()'.format(self.group.name, self.name)
 class NumSetting(Setting):
    """
    A named setting with an integral value in the range 0--255.
    :param doc: Documentation string.
    :param default: The default value of this setting.
    """
    def __init__(self, doc, default=0):
        # type: (str, int) -> None
        super(NumSetting, self).__init__(doc)
        assert default == int(default)
        assert default >= 0 and default <= 255
        self.default = default
    def default_byte(self):
        # type: () -> int
        return self.default
    def byte_for_value(self, value):
        # type: (Any) -> int
        assert isinstance(value, int), "NumSetting must be set to an int"
        assert value >= 0 and value <= 255
        return value
 class EnumSetting(Setting):
    """
    A named setting with an enumerated set of possible values.
    The default value is always the first enumerator.
    :param doc: Documentation string.
    :param args: Tuple of unique strings representing the possible values.
    """
    def __init__(self, doc, *args):
        # type: (str, *str) -> None
        super(EnumSetting, self).__init__(doc)
        assert len(args) > 0, "EnumSetting must have at least one value"
        self.values = tuple(str(x) for x in args)
        self.default = self.values[0]
    def default_byte(self):
        # type: () -> int
        return 0
    def byte_for_value(self, value):
        # type: (Any) -> int
        return self.values.index(value)
 class SettingGroup(object):
    """
    A group of settings.
    Whenever a :class:`Setting` object is created, it is added to the currently
    open group. A setting group must be closed explicitly before another can be
    opened.
    :param name: Short mnemonic name for setting group.
    :param parent: Parent settings group.
    """
    # The currently open setting group.
    _current = None  # type: SettingGroup
    def __init__(self, name, parent=None):
        # type: (str, SettingGroup) -> None
        self.name = name
        self.parent = parent
        self.settings = []  # type: List[Setting]
        # Named predicates computed from settings in this group or its
        # parents.
        self.named_predicates = OrderedDict()  # type: OrderedDict[str, Predicate]  # noqa
        # All boolean predicates that can be accessed by number. This includes:
        # - All boolean settings in this group.
        # - All named predicates.
        # - Added anonymous predicates, see `number_predicate()`.
        # - Added parent predicates that are replicated in this group.
        # Maps predicate -> number.
        self.predicate_number = OrderedDict()  # type: OrderedDict[PredNode, int]  # noqa
        self.presets = []  # type: List[Preset]
        # Fully qualified Rust module name. See gen_settings.py.
        self.qual_mod = None  # type: str
        self.open()
    def open(self):
        # type: () -> None
        """
        Open this setting group such that future new settings are added to this
        group.
        """
        assert SettingGroup._current is None, (
                "Can't open {} since {} is already open"
                .format(self, SettingGroup._current))
        SettingGroup._current = self
    def close(self, globs=None):
        # type: (Dict[str, Any]) -> None
        """
        Close this setting group. This function must be called before opening
        another setting group.
        :param globs: Pass in `globals()` to run `extract_names` on all
            settings defined in the module.
        """
        assert SettingGroup._current is self, (
                "Can't close {}, the open setting group is {}"
                .format(self, SettingGroup._current))
        SettingGroup._current = None
        if globs:
            for name, obj in globs.items():
                if isinstance(obj, Setting):
                    assert obj.name is None, obj.name
                    obj.name = name
                if isinstance(obj, Predicate):
                    self.named_predicates[name] = obj
                if isinstance(obj, Preset):
                    assert obj.name is None, obj.name
                    obj.name = name
        self.layout()
    @staticmethod
    def append(setting):
        # type: (Setting) -> SettingGroup
        g = SettingGroup._current
        assert g, "Open a setting group before defining settings."
        g.settings.append(setting)
        return g
    @staticmethod
    def append_preset(preset):
        # type: (Preset) -> SettingGroup
        g = SettingGroup._current
        assert g, "Open a setting group before defining presets."
        g.presets.append(preset)
        return g
    def number_predicate(self, pred):
        # type: (PredNode) -> int
        """
        Make sure that `pred` has an assigned number, and will be included in
        this group's bit vector.
        The numbered predicates include:
        - `BoolSetting` settings that belong to this group.
        - `Predicate` instances in `named_predicates`.
        - `Predicate` instances without a name.
        - Settings or computed predicates that belong to the parent group, but
          need to be accessible by number in this group.
        The numbered predicates are referenced by the encoding tables as ISA
        predicates. See the `isap` field on `Encoding`.
        :returns: The assigned predicate number in this group.
        """
        if pred in self.predicate_number:
            return self.predicate_number[pred]
        else:
            number = len(self.predicate_number)
            self.predicate_number[pred] = number
            return number
    def layout(self):
        # type: () -> None
        """
        Compute the layout of the byte vector used to represent this settings
        group.
        The byte vector contains the following entries in order:
        1. Byte-sized settings like `NumSetting` and `EnumSetting`.
        2. `BoolSetting` settings.
        3. Precomputed named predicates.
        4. Other numbered predicates, including anonymous predicates and parent
           predicates that need to be accessible by number.
        Set `self.settings_size` to the length of the byte vector prefix that
        contains the settings. All bytes after that are computed, not
        configured.
        Set `self.boolean_offset` to the beginning of the numbered predicates,
        2. in the list above.
        Assign `byte_offset` and `bit_offset` fields in all settings.
        After calling this method, no more settings can be added, but
        additional predicates can be made accessible with `number_predicate()`.
        """
        assert len(self.predicate_number) == 0, "Too late for layout"
        # Assign the non-boolean settings.
        byte_offset = 0
        for s in self.settings:
            if not isinstance(s, BoolSetting):
                s.byte_offset = byte_offset
                byte_offset += 1
        # Then the boolean settings.
        self.boolean_offset = byte_offset
        for s in self.settings:
            if isinstance(s, BoolSetting):
                number = self.number_predicate(s)
                s.byte_offset = byte_offset + number // 8
                s.bit_offset = number % 8
        # This is the end of the settings. Round up to a whole number of bytes.
        self.boolean_settings = len(self.predicate_number)
        self.settings_size = self.byte_size()
        # Now assign numbers to all our named predicates.
        for name, pred in self.named_predicates.items():
            self.number_predicate(pred)
    def byte_size(self):
        # type: () -> int
        """
        Compute the number of bytes required to hold all settings and
        precomputed predicates.
        This is the size of the byte-sized settings plus all the numbered
        predcate bits rounded up to a whole number of bytes.
        """
        return self.boolean_offset + (len(self.predicate_number) + 7) // 8
 class Preset(object):
    """
    A collection of setting values that are applied at once.
    A `Preset` represents a shorthand notation for applying a number of
    settings at once. Example:
        nehalem = Preset(has_sse41, has_cmov, has_avx=0)
    Enabling the `nehalem` setting is equivalent to enabling `has_sse41` and
    `has_cmov` while disabling the `has_avx` setting.
    """
    def __init__(self, *args):
        # type: (*BoolOrPresetOrDict) -> None
        self.name = None  # type: str  # Assigned later by `SettingGroup`.
        # Each tuple provides the value for a setting.
        self.values = list()  # type: List[Tuple[Setting, Any]]
        for arg in args:
            if isinstance(arg, Preset):
                # Any presets in args are immediately expanded.
                self.values.extend(arg.values)
            elif isinstance(arg, dict):
                # A dictionary of key: value pairs.
                self.values.extend(arg.items())
            else:
                # A BoolSetting to enable.
                assert isinstance(arg, BoolSetting)
                self.values.append((arg, True))
        self.group = SettingGroup.append_preset(self)
        # Index into the generated DESCRIPTORS table.
        self.descriptor_index = None  # type: int
    def layout(self):
        # type: () -> List[Tuple[int, int]]
        """
        Compute a list of (mask, byte) pairs that incorporate all values in
        this preset.
        The list will have an entry for each setting byte in the settings
        group.
        """
        l = [(0, 0)] * self.group.settings_size
        # Apply setting values in order.
        for s, v in self.values:
            ofs = s.byte_offset
            s_mask = s.byte_mask()
            s_val = s.byte_for_value(v)
            assert (s_val & ~s_mask) == 0
            l_mask, l_val = l[ofs]
            # Accumulated mask of modified bits.
            l_mask |= s_mask
            # Overwrite the relevant bits with the new value.
            l_val = (l_val & ~s_mask) | s_val
            l[ofs] = (l_mask, l_val)
        return l
--- a/lib/cretonne/meta/cdsl/test_ast.py
+++ b/lib/cretonne/meta/cdsl/test_ast.py
@@ -0,0 +1,28 @@
 from __future__ import absolute_import
 from unittest import TestCase
 from doctest import DocTestSuite
 from . import ast
 from base.instructions import jump, iadd
 def load_tests(loader, tests, ignore):
    tests.addTests(DocTestSuite(ast))
    return tests
 x = 'x'
 y = 'y'
 a = 'a'
 class TestPatterns(TestCase):
    def test_apply(self):
        i = jump(x, y)
        self.assertEqual(repr(i), "Apply(jump, ('x', 'y'))")
        i = iadd.i32(x, y)
        self.assertEqual(repr(i), "Apply(iadd.i32, ('x', 'y'))")
    def test_single_ins(self):
        pat = a << iadd.i32(x, y)
        self.assertEqual(repr(pat), "('a',) << Apply(iadd.i32, ('x', 'y'))")
--- a/lib/cretonne/meta/cdsl/test_package.py
+++ b/lib/cretonne/meta/cdsl/test_package.py
@@ -0,0 +1,8 @@
 from __future__ import absolute_import
 import doctest
 import cdsl
 def load_tests(loader, tests, ignore):
    tests.addTests(doctest.DocTestSuite(cdsl))
    return tests
--- a/lib/cretonne/meta/cdsl/test_ti.py
+++ b/lib/cretonne/meta/cdsl/test_ti.py
@@ -0,0 +1,605 @@
 from __future__ import absolute_import
 from base.instructions import vselect, vsplit, vconcat, iconst, iadd, bint,\
    b1, icmp, iadd_cout, iadd_cin, uextend, sextend, ireduce, fpromote, \
    fdemote
 from base.legalize import narrow, expand
 from base.immediates import intcc
 from base.types import i32, i8
 from .typevar import TypeVar
 from .ast import Var, Def
 from .xform import Rtl, XForm
 from .ti import ti_rtl, subst, TypeEnv, get_type_env, TypesEqual, WiderOrEq
 from unittest import TestCase
 from functools import reduce
 try:
    from .ti import TypeMap, ConstraintList, VarTyping, TypingOrError # noqa
    from typing import List, Dict, Tuple, TYPE_CHECKING, cast # noqa
 except ImportError:
    TYPE_CHECKING = False
 def agree(me, other):
    # type: (TypeEnv, TypeEnv) -> bool
    """
    Given TypeEnvs me and other, check if they agree. As part of that build
    a map m from TVs in me to their corresponding TVs in other.
    Specifically:
        1. Check that all TVs that are keys in me.type_map are also defined
           in other.type_map
        2. For any tv in me.type_map check that:
            me[tv].get_typeset() == other[tv].get_typeset()
        3. Set m[me[tv]] = other[tv] in the substitution m
        4. If we find another tv1 such that me[tv1] == me[tv], assert that
           other[tv1] == m[me[tv1]] == m[me[tv]] = other[tv]
        5. Check that me and other have the same constraints under the
           substitution m
    """
    m = {}  # type: TypeMap
    # Check that our type map and other's agree and built substitution m
    for tv in me.type_map:
        if (me[tv] not in m):
            m[me[tv]] = other[tv]
            if me[tv].get_typeset() != other[tv].get_typeset():
                return False
        else:
            if m[me[tv]] != other[tv]:
                return False
    # Translate our constraints using m, and sort
    me_equiv_constr = sorted([constr.translate(m)
                              for constr in me.constraints], key=repr)
    # Sort other's constraints
    other_equiv_constr = sorted([constr.translate(other)
                                 for constr in other.constraints], key=repr)
    return me_equiv_constr == other_equiv_constr
 def check_typing(got_or_err, expected, symtab=None):
    # type: (TypingOrError, Tuple[VarTyping, ConstraintList], Dict[str, Var]) -> None # noqa
    """
    Check that a the typing we received (got_or_err) complies with the
    expected typing (expected). If symtab is specified, substitute the Vars in
    expected using symtab first (used when checking type inference on XForms)
    """
    (m, c) = expected
    got = get_type_env(got_or_err)
    if (symtab is not None):
        # For xforms we first need to re-write our TVs in terms of the tvs
        # stored internally in the XForm. Use the symtab passed
        subst_m = {k.get_typevar(): symtab[str(k)].get_typevar()
                   for k in m.keys()}
        # Convert m from a Var->TypeVar map to TypeVar->TypeVar map where
        # the key TypeVar is re-written to its XForm internal version
        tv_m = {subst(k.get_typevar(), subst_m): v for (k, v) in m.items()}
        # Rewrite the TVs in the input constraints to their XForm internal
        # versions
        c = [constr.translate(subst_m) for constr in c]
    else:
        # If no symtab, just convert m from Var->TypeVar map to a
        # TypeVar->TypeVar map
        tv_m = {k.get_typevar(): v for (k, v) in m.items()}
    expected_typ = TypeEnv((tv_m, c))
    assert agree(expected_typ, got), \
        "typings disagree:\n {} \n {}".format(got.dot(),
                                              expected_typ.dot())
 def check_concrete_typing_rtl(var_types, rtl):
    # type: (VarTyping, Rtl) -> None
    """
    Check that a concrete type assignment var_types (Dict[Var, TypeVar]) is
    valid for an Rtl rtl.  Specifically check that:
    1) For each Var v \in rtl, v is defined in var_types
    2) For all v, var_types[v] is a singleton type
    3) For each v, and each location u, where v is used with expected type
       tv_u, var_types[v].get_typeset() is a subset of
       subst(tv_u, m).get_typeset() where m is the substitution of
       formals->actuals we are building so far.
    4) If tv_u is non-derived and not in m, set m[tv_u]= var_types[v]
    """
    for d in rtl.rtl:
        assert isinstance(d, Def)
        inst = d.expr.inst
        # Accumulate all actual TVs for value defs/opnums in actual_tvs
        actual_tvs = [var_types[d.defs[i]] for i in inst.value_results]
        for v in [d.expr.args[i] for i in inst.value_opnums]:
            assert isinstance(v, Var)
            actual_tvs.append(var_types[v])
        # Accumulate all formal TVs for value defs/opnums in actual_tvs
        formal_tvs = [inst.outs[i].typevar for i in inst.value_results] +\
                     [inst.ins[i].typevar for i in inst.value_opnums]
        m = {}  # type: TypeMap
        # For each actual/formal pair check that they agree
        for (actual_tv, formal_tv) in zip(actual_tvs, formal_tvs):
            # actual should be a singleton
            assert actual_tv.singleton_type() is not None
            formal_tv = subst(formal_tv, m)
            # actual should agree with the concretized formal
            assert actual_tv.get_typeset().issubset(formal_tv.get_typeset())
            if formal_tv not in m and not formal_tv.is_derived:
                m[formal_tv] = actual_tv
 def check_concrete_typing_xform(var_types, xform):
    # type: (VarTyping, XForm) -> None
    """
    Check a concrete type assignment var_types for an XForm xform
    """
    check_concrete_typing_rtl(var_types, xform.src)
    check_concrete_typing_rtl(var_types, xform.dst)
 class TypeCheckingBaseTest(TestCase):
    def setUp(self):
        # type: () -> None
        self.v0 = Var("v0")
        self.v1 = Var("v1")
        self.v2 = Var("v2")
        self.v3 = Var("v3")
        self.v4 = Var("v4")
        self.v5 = Var("v5")
        self.v6 = Var("v6")
        self.v7 = Var("v7")
        self.v8 = Var("v8")
        self.v9 = Var("v9")
        self.imm0 = Var("imm0")
        self.IxN_nonscalar = TypeVar("IxN", "", ints=True, scalars=False,
                                     simd=True)
        self.TxN = TypeVar("TxN", "", ints=True, bools=True, floats=True,
                           scalars=False, simd=True)
        self.b1 = TypeVar.singleton(b1)
 class TestRTL(TypeCheckingBaseTest):
    def test_bad_rtl1(self):
        # type: () -> None
        r = Rtl(
                (self.v0, self.v1) << vsplit(self.v2),
                self.v3 << vconcat(self.v0, self.v2),
        )
        ti = TypeEnv()
        self.assertEqual(ti_rtl(r, ti),
                         "On line 1: fail ti on `typeof_v2` <: `1`: " +
                         "Error: empty type created when unifying " +
                         "`typeof_v2` and `half_vector(typeof_v2)`")
    def test_vselect(self):
        # type: () -> None
        r = Rtl(
                self.v0 << vselect(self.v1, self.v2, self.v3),
        )
        ti = TypeEnv()
        typing = ti_rtl(r, ti)
        txn = self.TxN.get_fresh_copy("TxN1")
        check_typing(typing, ({
            self.v0: txn,
            self.v1: txn.as_bool(),
            self.v2: txn,
            self.v3: txn
        }, []))
    def test_vselect_icmpimm(self):
        # type: () -> None
        r = Rtl(
                self.v0 << iconst(self.imm0),
                self.v1 << icmp(intcc.eq, self.v2, self.v0),
                self.v5 << vselect(self.v1, self.v3, self.v4),
        )
        ti = TypeEnv()
        typing = ti_rtl(r, ti)
        ixn = self.IxN_nonscalar.get_fresh_copy("IxN1")
        txn = self.TxN.get_fresh_copy("TxN1")
        check_typing(typing, ({
            self.v0: ixn,
            self.v1: ixn.as_bool(),
            self.v2: ixn,
            self.v3: txn,
            self.v4: txn,
            self.v5: txn,
        }, [TypesEqual(ixn.as_bool(), txn.as_bool())]))
    def test_vselect_vsplits(self):
        # type: () -> None
        r = Rtl(
                self.v3 << vselect(self.v0, self.v1, self.v2),
                (self.v4, self.v5) << vsplit(self.v3),
                (self.v6, self.v7) << vsplit(self.v4),
        )
        ti = TypeEnv()
        typing = ti_rtl(r, ti)
        t = TypeVar("t", "", ints=True, bools=True, floats=True,
                    simd=(4, 256))
        check_typing(typing, ({
            self.v0: t.as_bool(),
            self.v1: t,
            self.v2: t,
            self.v3: t,
            self.v4: t.half_vector(),
            self.v5: t.half_vector(),
            self.v6: t.half_vector().half_vector(),
            self.v7: t.half_vector().half_vector(),
        }, []))
    def test_vselect_vconcats(self):
        # type: () -> None
        r = Rtl(
                self.v3 << vselect(self.v0, self.v1, self.v2),
                self.v8 << vconcat(self.v3, self.v3),
                self.v9 << vconcat(self.v8, self.v8),
        )
        ti = TypeEnv()
        typing = ti_rtl(r, ti)
        t = TypeVar("t", "", ints=True, bools=True, floats=True,
                    simd=(2, 64))
        check_typing(typing, ({
            self.v0: t.as_bool(),
            self.v1: t,
            self.v2: t,
            self.v3: t,
            self.v8: t.double_vector(),
            self.v9: t.double_vector().double_vector(),
        }, []))
    def test_vselect_vsplits_vconcats(self):
        # type: () -> None
        r = Rtl(
                self.v3 << vselect(self.v0, self.v1, self.v2),
                (self.v4, self.v5) << vsplit(self.v3),
                (self.v6, self.v7) << vsplit(self.v4),
                self.v8 << vconcat(self.v3, self.v3),
                self.v9 << vconcat(self.v8, self.v8),
        )
        ti = TypeEnv()
        typing = ti_rtl(r, ti)
        t = TypeVar("t", "", ints=True, bools=True, floats=True,
                    simd=(4, 64))
        check_typing(typing, ({
            self.v0: t.as_bool(),
            self.v1: t,
            self.v2: t,
            self.v3: t,
            self.v4: t.half_vector(),
            self.v5: t.half_vector(),
            self.v6: t.half_vector().half_vector(),
            self.v7: t.half_vector().half_vector(),
            self.v8: t.double_vector(),
            self.v9: t.double_vector().double_vector(),
        }, []))
    def test_bint(self):
        # type: () -> None
        r = Rtl(
            self.v4 << iadd(self.v1, self.v2),
            self.v5 << bint(self.v3),
            self.v0 << iadd(self.v4, self.v5)
        )
        ti = TypeEnv()
        typing = ti_rtl(r, ti)
        itype = TypeVar("t", "", ints=True, simd=(1, 256))
        btype = TypeVar("b", "", bools=True, simd=True)
        # Check that self.v5 gets the same integer type as
        # the rest of them
        # TODO: Add constraint nlanes(v3) == nlanes(v1) when we
        # add that type constraint to bint
        check_typing(typing, ({
            self.v1:    itype,
            self.v2:    itype,
            self.v4:    itype,
            self.v5:    itype,
            self.v3:    btype,
            self.v0:    itype,
        }, []))
    def test_fully_bound_inst_inference_bad(self):
        # Incompatible bound instructions fail accordingly
        r = Rtl(
                self.v3 << uextend.i32(self.v1),
                self.v4 << uextend.i16(self.v2),
                self.v5 << iadd(self.v3, self.v4),
            )
        ti = TypeEnv()
        typing = ti_rtl(r, ti)
        self.assertEqual(typing,
                         "On line 2: fail ti on `typeof_v4` <: `4`: " +
                         "Error: empty type created when unifying " +
                         "`i16` and `i32`")
    def test_extend_reduce(self):
        # type: () -> None
        r = Rtl(
            self.v1 << uextend(self.v0),
            self.v2 << ireduce(self.v1),
            self.v3 << sextend(self.v2),
        )
        ti = TypeEnv()
        typing = ti_rtl(r, ti)
        typing = typing.extract()
        itype0 = TypeVar("t", "", ints=True, simd=(1, 256))
        itype1 = TypeVar("t1", "", ints=True, simd=(1, 256))
        itype2 = TypeVar("t2", "", ints=True, simd=(1, 256))
        itype3 = TypeVar("t3", "", ints=True, simd=(1, 256))
        check_typing(typing, ({
            self.v0:    itype0,
            self.v1:    itype1,
            self.v2:    itype2,
            self.v3:    itype3,
        }, [WiderOrEq(itype1, itype0),
            WiderOrEq(itype1, itype2),
            WiderOrEq(itype3, itype2)]))
    def test_extend_reduce_enumeration(self):
        # type: () -> None
        for op in (uextend, sextend, ireduce):
            r = Rtl(
                self.v1 << op(self.v0),
            )
            ti = TypeEnv()
            typing = ti_rtl(r, ti).extract()
            # The number of possible typings is 9 * (3+ 2*2 + 3) = 90
            l = [(t[self.v0], t[self.v1]) for t in typing.concrete_typings()]
            assert (len(l) == len(set(l)) and len(l) == 90)
            for (tv0, tv1) in l:
                typ0, typ1 = (tv0.singleton_type(), tv1.singleton_type())
                if (op == ireduce):
                    assert typ0.wider_or_equal(typ1)
                else:
                    assert typ1.wider_or_equal(typ0)
    def test_fpromote_fdemote(self):
        # type: () -> None
        r = Rtl(
            self.v1 << fpromote(self.v0),
            self.v2 << fdemote(self.v1),
        )
        ti = TypeEnv()
        typing = ti_rtl(r, ti)
        typing = typing.extract()
        ftype0 = TypeVar("t", "", floats=True, simd=(1, 256))
        ftype1 = TypeVar("t1", "", floats=True, simd=(1, 256))
        ftype2 = TypeVar("t2", "", floats=True, simd=(1, 256))
        check_typing(typing, ({
            self.v0:    ftype0,
            self.v1:    ftype1,
            self.v2:    ftype2,
        }, [WiderOrEq(ftype1, ftype0),
            WiderOrEq(ftype1, ftype2)]))
    def test_fpromote_fdemote_enumeration(self):
        # type: () -> None
        for op in (fpromote, fdemote):
            r = Rtl(
                self.v1 << op(self.v0),
            )
            ti = TypeEnv()
            typing = ti_rtl(r, ti).extract()
            # The number of possible typings is 9*(2 + 1) = 27
            l = [(t[self.v0], t[self.v1]) for t in typing.concrete_typings()]
            assert (len(l) == len(set(l)) and len(l) == 27)
            for (tv0, tv1) in l:
                (typ0, typ1) = (tv0.singleton_type(), tv1.singleton_type())
                if (op == fdemote):
                    assert typ0.wider_or_equal(typ1)
                else:
                    assert typ1.wider_or_equal(typ0)
 class TestXForm(TypeCheckingBaseTest):
    def test_iadd_cout(self):
        # type: () -> None
        x = XForm(Rtl((self.v0, self.v1) << iadd_cout(self.v2, self.v3),),
                  Rtl(
                      self.v0 << iadd(self.v2, self.v3),
                      self.v1 << icmp(intcc.ult, self.v0, self.v2)
                  ))
        itype = TypeVar("t", "", ints=True, simd=(1, 1))
        check_typing(x.ti, ({
            self.v0:    itype,
            self.v2:    itype,
            self.v3:    itype,
            self.v1:    itype.as_bool(),
        }, []), x.symtab)
    def test_iadd_cin(self):
        # type: () -> None
        x = XForm(Rtl(self.v0 << iadd_cin(self.v1, self.v2, self.v3)),
                  Rtl(
                      self.v4 << iadd(self.v1, self.v2),
                      self.v5 << bint(self.v3),
                      self.v0 << iadd(self.v4, self.v5)
                  ))
        itype = TypeVar("t", "", ints=True, simd=(1, 1))
        check_typing(x.ti, ({
            self.v0:    itype,
            self.v1:    itype,
            self.v2:    itype,
            self.v3:    self.b1,
            self.v4:    itype,
            self.v5:    itype,
        }, []), x.symtab)
    def test_enumeration_with_constraints(self):
        # type: () -> None
        xform = XForm(
            Rtl(
                self.v0 << iconst(self.imm0),
                self.v1 << icmp(intcc.eq, self.v2, self.v0),
                self.v5 << vselect(self.v1, self.v3, self.v4)
            ),
            Rtl(
                self.v0 << iconst(self.imm0),
                self.v1 << icmp(intcc.eq, self.v2, self.v0),
                self.v5 << vselect(self.v1, self.v3, self.v4)
            ))
        # Check all var assigns are correct
        assert len(xform.ti.constraints) > 0
        concrete_var_assigns = list(xform.ti.concrete_typings())
        v0 = xform.symtab[str(self.v0)]
        v1 = xform.symtab[str(self.v1)]
        v2 = xform.symtab[str(self.v2)]
        v3 = xform.symtab[str(self.v3)]
        v4 = xform.symtab[str(self.v4)]
        v5 = xform.symtab[str(self.v5)]
        for var_m in concrete_var_assigns:
            assert var_m[v0] == var_m[v2] and \
                   var_m[v3] == var_m[v4] and\
                   var_m[v5] == var_m[v3] and\
                   var_m[v1] == var_m[v2].as_bool() and\
                   var_m[v1].get_typeset() == var_m[v3].as_bool().get_typeset()
            check_concrete_typing_xform(var_m, xform)
        # The number of possible typings here is:
        # 8 cases for v0 = i8xN times 2 options for v3 - i8, b8 = 16
        # 8 cases for v0 = i16xN times 2 options for v3 - i16, b16 = 16
        # 8 cases for v0 = i32xN times 3 options for v3 - i32, b32, f32 = 24
        # 8 cases for v0 = i64xN times 3 options for v3 - i64, b64, f64 = 24
        #
        # (Note we have 8 cases for lanes since vselect prevents scalars)
        # Total: 2*16 + 2*24 = 80
        assert len(concrete_var_assigns) == 80
    def test_base_legalizations_enumeration(self):
        # type: () -> None
        for xform in narrow.xforms + expand.xforms:
            # Any legalization patterns we defined should have at least 1
            # concrete typing
            concrete_typings_list = list(xform.ti.concrete_typings())
            assert len(concrete_typings_list) > 0
            # If there are no free_typevars, this is a non-polymorphic pattern.
            # There should be only one possible concrete typing.
            if (len(xform.ti.free_typevars()) == 0):
                assert len(concrete_typings_list) == 1
                continue
            # For any patterns where the type env includes constraints, at
            # least one of the "theoretically possible" concrete typings must
            # be prevented by the constraints. (i.e. we are not emitting
            # unneccessary constraints).
            # We check that by asserting that the number of concrete typings is
            # less than the number of all possible free typevar assignments
            if (len(xform.ti.constraints) > 0):
                theoretical_num_typings =\
                    reduce(lambda x, y:    x*y,
                           [tv.get_typeset().size()
                            for tv in xform.ti.free_typevars()], 1)
                assert len(concrete_typings_list) < theoretical_num_typings
            # Check the validity of each individual concrete typing against the
            # xform
            for concrete_typing in concrete_typings_list:
                check_concrete_typing_xform(concrete_typing, xform)
    def test_bound_inst_inference(self):
        # First example from issue #26
        x = XForm(
            Rtl(
                self.v0 << iadd(self.v1, self.v2),
            ),
            Rtl(
                self.v3 << uextend.i32(self.v1),
                self.v4 << uextend.i32(self.v2),
                self.v5 << iadd(self.v3, self.v4),
                self.v0 << ireduce(self.v5)
            ))
        itype = TypeVar("t", "", ints=True, simd=True)
        i32t = TypeVar.singleton(i32)
        check_typing(x.ti, ({
            self.v0:    itype,
            self.v1:    itype,
            self.v2:    itype,
            self.v3:    i32t,
            self.v4:    i32t,
            self.v5:    i32t,
        }, [WiderOrEq(i32t, itype)]), x.symtab)
    def test_bound_inst_inference1(self):
        # Second example taken from issue #26
        x = XForm(
            Rtl(
                self.v0 << iadd(self.v1, self.v2),
            ),
            Rtl(
                self.v3 << uextend(self.v1),
                self.v4 << uextend(self.v2),
                self.v5 << iadd.i32(self.v3, self.v4),
                self.v0 << ireduce(self.v5)
            ))
        itype = TypeVar("t", "", ints=True, simd=True)
        i32t = TypeVar.singleton(i32)
        check_typing(x.ti, ({
            self.v0:    itype,
            self.v1:    itype,
            self.v2:    itype,
            self.v3:    i32t,
            self.v4:    i32t,
            self.v5:    i32t,
        }, [WiderOrEq(i32t, itype)]), x.symtab)
    def test_fully_bound_inst_inference(self):
        # Second example taken from issue #26 with complete bounds
        x = XForm(
            Rtl(
                self.v0 << iadd(self.v1, self.v2),
            ),
            Rtl(
                self.v3 << uextend.i32.i8(self.v1),
                self.v4 << uextend.i32.i8(self.v2),
                self.v5 << iadd(self.v3, self.v4),
                self.v0 << ireduce(self.v5)
            ))
        i8t = TypeVar.singleton(i8)
        i32t = TypeVar.singleton(i32)
        # Note no constraints here since they are all trivial
        check_typing(x.ti, ({
            self.v0:    i8t,
            self.v1:    i8t,
            self.v2:    i8t,
            self.v3:    i32t,
            self.v4:    i32t,
            self.v5:    i32t,
        }, []), x.symtab)
    def test_fully_bound_inst_inference_bad(self):
        # Can't force a mistyped XForm using bound instructions
        with self.assertRaises(AssertionError):
            XForm(
                Rtl(
                    self.v0 << iadd(self.v1, self.v2),
                ),
                Rtl(
                    self.v3 << uextend.i32.i8(self.v1),
                    self.v4 << uextend.i32.i16(self.v2),
                    self.v5 << iadd(self.v3, self.v4),
                    self.v0 << ireduce(self.v5)
                ))
--- a/lib/cretonne/meta/cdsl/test_typevar.py
+++ b/lib/cretonne/meta/cdsl/test_typevar.py
@@ -0,0 +1,266 @@
 from __future__ import absolute_import
 from unittest import TestCase
 from doctest import DocTestSuite
 from . import typevar
 from .typevar import TypeSet, TypeVar
 from base.types import i32, i16, b1, f64
 from itertools import product
 from functools import reduce
 def load_tests(loader, tests, ignore):
    tests.addTests(DocTestSuite(typevar))
    return tests
 class TestTypeSet(TestCase):
    def test_invalid(self):
        with self.assertRaises(AssertionError):
            TypeSet(lanes=(2, 1))
        with self.assertRaises(AssertionError):
            TypeSet(ints=(32, 16))
        with self.assertRaises(AssertionError):
            TypeSet(floats=(32, 16))
        with self.assertRaises(AssertionError):
            TypeSet(bools=(32, 16))
        with self.assertRaises(AssertionError):
            TypeSet(ints=(32, 33))
    def test_hash(self):
        a = TypeSet(lanes=True, ints=True, floats=True)
        b = TypeSet(lanes=True, ints=True, floats=True)
        c = TypeSet(lanes=True, ints=(8, 16), floats=True)
        self.assertEqual(a, b)
        self.assertNotEqual(a, c)
        s = set()
        s.add(a)
        self.assertTrue(a in s)
        self.assertTrue(b in s)
        self.assertFalse(c in s)
    def test_hash_modified(self):
        a = TypeSet(lanes=True, ints=True, floats=True)
        s = set()
        s.add(a)
        a.ints.remove(64)
        # Can't rehash after modification.
        with self.assertRaises(AssertionError):
            a in s
    def test_forward_images(self):
        a = TypeSet(lanes=(2, 8), ints=(8, 8), floats=(32, 32))
        b = TypeSet(lanes=(1, 8), ints=(8, 8), floats=(32, 32))
        self.assertEqual(a.lane_of(), TypeSet(ints=(8, 8), floats=(32, 32)))
        c = TypeSet(lanes=(2, 8))
        c.bools = set([8, 32])
        # Test case with disjoint intervals
        self.assertEqual(a.as_bool(), c)
        # For as_bool check b1 is present when 1 \in lanes
        d = TypeSet(lanes=(1, 8))
        d.bools = set([1, 8, 32])
        self.assertEqual(b.as_bool(), d)
        self.assertEqual(TypeSet(lanes=(1, 32)).half_vector(),
                         TypeSet(lanes=(1, 16)))
        self.assertEqual(TypeSet(lanes=(1, 32)).double_vector(),
                         TypeSet(lanes=(2, 64)))
        self.assertEqual(TypeSet(lanes=(128, 256)).double_vector(),
                         TypeSet(lanes=(256, 256)))
        self.assertEqual(TypeSet(ints=(8, 32)).half_width(),
                         TypeSet(ints=(8, 16)))
        self.assertEqual(TypeSet(ints=(8, 32)).double_width(),
                         TypeSet(ints=(16, 64)))
        self.assertEqual(TypeSet(ints=(32, 64)).double_width(),
                         TypeSet(ints=(64, 64)))
        # Should produce an empty ts
        self.assertEqual(TypeSet(floats=(32, 32)).half_width(),
                         TypeSet())
        self.assertEqual(TypeSet(floats=(32, 64)).half_width(),
                         TypeSet(floats=(32, 32)))
        self.assertEqual(TypeSet(floats=(32, 32)).double_width(),
                         TypeSet(floats=(64, 64)))
        self.assertEqual(TypeSet(floats=(32, 64)).double_width(),
                         TypeSet(floats=(64, 64)))
        # Bools have trickier behavior around b1 (since b2, b4 don't exist)
        self.assertEqual(TypeSet(bools=(1, 8)).half_width(),
                         TypeSet())
        t = TypeSet()
        t.bools = set([8, 16])
        self.assertEqual(TypeSet(bools=(1, 32)).half_width(), t)
        # double_width() of bools={1, 8, 16} must not include 2 or 8
        t.bools = set([16, 32])
        self.assertEqual(TypeSet(bools=(1, 16)).double_width(), t)
        self.assertEqual(TypeSet(bools=(32, 64)).double_width(),
                         TypeSet(bools=(64, 64)))
    def test_get_singleton(self):
        # Raise error when calling get_singleton() on non-singleton TS
        t = TypeSet(lanes=(1, 1), ints=(8, 8), floats=(32, 32))
        with self.assertRaises(AssertionError):
            t.get_singleton()
        t = TypeSet(lanes=(1, 2), floats=(32, 32))
        with self.assertRaises(AssertionError):
            t.get_singleton()
        self.assertEqual(TypeSet(ints=(16, 16)).get_singleton(), i16)
        self.assertEqual(TypeSet(floats=(64, 64)).get_singleton(), f64)
        self.assertEqual(TypeSet(bools=(1, 1)).get_singleton(), b1)
        self.assertEqual(TypeSet(lanes=(4, 4), ints=(32, 32)).get_singleton(),
                         i32.by(4))
    def test_preimage(self):
        t = TypeSet(lanes=(1, 1), ints=(8, 8), floats=(32, 32))
        # LANEOF
        self.assertEqual(TypeSet(lanes=True, ints=(8, 8), floats=(32, 32)),
                         t.preimage(TypeVar.LANEOF))
        # Inverse of empty set is still empty across LANEOF
        self.assertEqual(TypeSet(),
                         TypeSet().preimage(TypeVar.LANEOF))
        # ASBOOL
        t = TypeSet(lanes=(1, 4), bools=(1, 64))
        self.assertEqual(t.preimage(TypeVar.ASBOOL),
                         TypeSet(lanes=(1, 4), ints=True, bools=True,
                                 floats=True))
        # Half/Double Vector
        t = TypeSet(lanes=(1, 1), ints=(8, 8))
        t1 = TypeSet(lanes=(256, 256), ints=(8, 8))
        self.assertEqual(t.preimage(TypeVar.DOUBLEVECTOR).size(), 0)
        self.assertEqual(t1.preimage(TypeVar.HALFVECTOR).size(), 0)
        t = TypeSet(lanes=(1, 16), ints=(8, 16), floats=(32, 32))
        t1 = TypeSet(lanes=(64, 256), bools=(1, 32))
        self.assertEqual(t.preimage(TypeVar.DOUBLEVECTOR),
                         TypeSet(lanes=(1, 8), ints=(8, 16), floats=(32, 32)))
        self.assertEqual(t1.preimage(TypeVar.HALFVECTOR),
                         TypeSet(lanes=(128, 256), bools=(1, 32)))
        # Half/Double Width
        t = TypeSet(ints=(8, 8), floats=(32, 32), bools=(1, 8))
        t1 = TypeSet(ints=(64, 64), floats=(64, 64), bools=(64, 64))
        self.assertEqual(t.preimage(TypeVar.DOUBLEWIDTH).size(), 0)
        self.assertEqual(t1.preimage(TypeVar.HALFWIDTH).size(), 0)
        t = TypeSet(lanes=(1, 16), ints=(8, 16), floats=(32, 64))
        t1 = TypeSet(lanes=(64, 256), bools=(1, 64))
        self.assertEqual(t.preimage(TypeVar.DOUBLEWIDTH),
                         TypeSet(lanes=(1, 16), ints=(8, 8), floats=(32, 32)))
        self.assertEqual(t1.preimage(TypeVar.HALFWIDTH),
                         TypeSet(lanes=(64, 256), bools=(16, 64)))
 def has_non_bijective_derived_f(iterable):
    return any(not TypeVar.is_bijection(x) for x in iterable)
 class TestTypeVar(TestCase):
    def test_functions(self):
        x = TypeVar('x', 'all ints', ints=True)
        with self.assertRaises(AssertionError):
            x.double_width()
        with self.assertRaises(AssertionError):
            x.half_width()
        x2 = TypeVar('x2', 'i16 and up', ints=(16, 64))
        with self.assertRaises(AssertionError):
            x2.double_width()
        self.assertEqual(str(x2.half_width()), '`half_width(x2)`')
        self.assertEqual(x2.half_width().rust_expr(), 'x2.half_width()')
        self.assertEqual(
                x2.half_width().double_width().rust_expr(),
                'x2.half_width().double_width()')
        x3 = TypeVar('x3', 'up to i32', ints=(8, 32))
        self.assertEqual(str(x3.double_width()), '`double_width(x3)`')
        with self.assertRaises(AssertionError):
            x3.half_width()
    def test_singleton(self):
        x = TypeVar.singleton(i32)
        self.assertEqual(str(x), '`i32`')
        self.assertEqual(min(x.type_set.ints), 32)
        self.assertEqual(max(x.type_set.ints), 32)
        self.assertEqual(min(x.type_set.lanes), 1)
        self.assertEqual(max(x.type_set.lanes), 1)
        self.assertEqual(len(x.type_set.floats), 0)
        self.assertEqual(len(x.type_set.bools), 0)
        x = TypeVar.singleton(i32.by(4))
        self.assertEqual(str(x), '`i32x4`')
        self.assertEqual(min(x.type_set.ints), 32)
        self.assertEqual(max(x.type_set.ints), 32)
        self.assertEqual(min(x.type_set.lanes), 4)
        self.assertEqual(max(x.type_set.lanes), 4)
        self.assertEqual(len(x.type_set.floats), 0)
        self.assertEqual(len(x.type_set.bools), 0)
    def test_stress_constrain_types(self):
        # Get all 43 possible derived vars of length up to 2
        funcs = [TypeVar.LANEOF,
                 TypeVar.ASBOOL, TypeVar.HALFVECTOR, TypeVar.DOUBLEVECTOR,
                 TypeVar.HALFWIDTH, TypeVar.DOUBLEWIDTH]
        v = [()] + [(x,) for x in funcs] + list(product(*[funcs, funcs]))
        # For each pair of derived variables
        for (i1, i2) in product(v, v):
            # Compute the derived sets for each  starting with a full typeset
            full_ts = TypeSet(lanes=True, floats=True, ints=True, bools=True)
            ts1 = reduce(lambda ts, func:   ts.image(func), i1, full_ts)
            ts2 = reduce(lambda ts, func:   ts.image(func), i2, full_ts)
            # Compute intersection
            intersect = ts1.copy()
            intersect &= ts2
            # Propagate instersections backward
            ts1_src = reduce(lambda ts, func:   ts.preimage(func),
                             reversed(i1),
                             intersect)
            ts2_src = reduce(lambda ts, func:   ts.preimage(func),
                             reversed(i2),
                             intersect)
            # If the intersection or its propagated forms are empty, then these
            # two variables can never overlap. For example x.double_vector and
            # x.lane_of.
            if (intersect.size() == 0 or ts1_src.size() == 0 or
                    ts2_src.size() == 0):
                continue
            # Should be safe to create derived tvs from ts1_src and ts2_src
            tv1 = reduce(lambda tv, func:   TypeVar.derived(tv, func),
                         i1,
                         TypeVar.from_typeset(ts1_src))
            tv2 = reduce(lambda tv, func:   TypeVar.derived(tv, func),
                         i2,
                         TypeVar.from_typeset(ts2_src))
            # In the absence of AS_BOOL image(preimage(f)) == f so the
            # typesets of tv1 and tv2 should be exactly intersection
            assert tv1.get_typeset() == intersect or\
                has_non_bijective_derived_f(i1)
            assert tv2.get_typeset() == intersect or\
                has_non_bijective_derived_f(i2)
--- a/lib/cretonne/meta/cdsl/test_xform.py
+++ b/lib/cretonne/meta/cdsl/test_xform.py
@@ -0,0 +1,94 @@
 from __future__ import absolute_import
 from unittest import TestCase
 from doctest import DocTestSuite
 from base.instructions import iadd, iadd_imm, iconst, icmp
 from base.immediates import intcc
 from . import xform
 from .ast import Var
 from .xform import Rtl, XForm
 def load_tests(loader, tests, ignore):
    tests.addTests(DocTestSuite(xform))
    return tests
 x = Var('x')
 y = Var('y')
 z = Var('z')
 u = Var('u')
 a = Var('a')
 b = Var('b')
 c = Var('c')
 CC1 = Var('CC1')
 CC2 = Var('CC2')
 class TestXForm(TestCase):
    def test_macro_pattern(self):
        src = Rtl(a << iadd_imm(x, y))
        dst = Rtl(
                c << iconst(y),
                a << iadd(x, c))
        XForm(src, dst)
    def test_def_input(self):
        # Src pattern has a def which is an input in dst.
        src = Rtl(a << iadd_imm(x, 1))
        dst = Rtl(y << iadd_imm(a, 1))
        with self.assertRaisesRegexp(
                AssertionError,
                "'a' used as both input and def"):
            XForm(src, dst)
    def test_input_def(self):
        # Converse of the above.
        src = Rtl(y << iadd_imm(a, 1))
        dst = Rtl(a << iadd_imm(x, 1))
        with self.assertRaisesRegexp(
                AssertionError,
                "'a' used as both input and def"):
            XForm(src, dst)
    def test_extra_input(self):
        src = Rtl(a << iadd_imm(x, 1))
        dst = Rtl(a << iadd(x, y))
        with self.assertRaisesRegexp(AssertionError, "extra inputs in dst"):
            XForm(src, dst)
    def test_double_def(self):
        src = Rtl(
                a << iadd_imm(x, 1),
                a << iadd(x, y))
        dst = Rtl(a << iadd(x, y))
        with self.assertRaisesRegexp(AssertionError, "'a' multiply defined"):
            XForm(src, dst)
    def test_subst_imm(self):
        src = Rtl(a << iconst(x))
        dst = Rtl(c << iconst(y))
        assert src.substitution(dst, {}) == {a: c, x: y}
    def test_subst_enum_var(self):
        src = Rtl(a << icmp(CC1, x, y))
        dst = Rtl(b << icmp(CC2, z, u))
        assert src.substitution(dst, {}) == {a: b, CC1: CC2, x: z, y: u}
    def test_subst_enum_const(self):
        src = Rtl(a << icmp(intcc.eq, x, y))
        dst = Rtl(b << icmp(intcc.eq, z, u))
        assert src.substitution(dst, {}) == {a: b, x: z, y: u}
    def test_subst_enum_bad(self):
        src = Rtl(a << icmp(CC1, x, y))
        dst = Rtl(b << icmp(intcc.eq, z, u))
        assert src.substitution(dst, {}) is None
        src = Rtl(a << icmp(intcc.eq, x, y))
        dst = Rtl(b << icmp(CC1, z, u))
        assert src.substitution(dst, {}) is None
        src = Rtl(a << icmp(intcc.eq, x, y))
        dst = Rtl(b << icmp(intcc.sge, z, u))
        assert src.substitution(dst, {}) is None
--- a/lib/cretonne/meta/cdsl/ti.py
+++ b/lib/cretonne/meta/cdsl/ti.py
@@ -0,0 +1,886 @@
 """
 Type Inference
 """
 from .typevar import TypeVar
 from .ast import Def, Var
 from copy import copy
 from itertools import product
 try:
    from typing import Dict, TYPE_CHECKING, Union, Tuple, Optional, Set # noqa
    from typing import Iterable, List, Any, TypeVar as MTypeVar # noqa
    from typing import cast
    from .xform import Rtl, XForm # noqa
    from .ast import Expr # noqa
    from .typevar import TypeSet # noqa
    if TYPE_CHECKING:
        T = MTypeVar('T')
        TypeMap = Dict[TypeVar, TypeVar]
        VarTyping = Dict[Var, TypeVar]
 except ImportError:
    TYPE_CHECKING = False
    pass
 class TypeConstraint(object):
    """
    Base class for all runtime-emittable type constraints.
    """
    def translate(self, m):
        # type: (Union[TypeEnv, TypeMap]) -> TypeConstraint
        """
        Translate any TypeVars in the constraint according to the map or
        TypeEnv m
        """
        def translate_one(a):
            # type: (Any) -> Any
            if (isinstance(a, TypeVar)):
                return m[a] if isinstance(m, TypeEnv) else subst(a, m)
            return a
        res = None  # type: TypeConstraint
        res = self.__class__(*tuple(map(translate_one, self._args())))
        return res
    def __eq__(self, other):
        # type: (object) -> bool
        if (not isinstance(other, self.__class__)):
            return False
        assert isinstance(other, TypeConstraint)  # help MyPy figure out other
        return self._args() == other._args()
    def is_concrete(self):
        # type: () -> bool
        """
        Return true iff all typevars in the constraint are singletons.
        """
        return [] == list(filter(lambda x:  x.singleton_type() is None,
                                 self.tvs()))
    def __hash__(self):
        # type: () -> int
        return hash(self._args())
    def _args(self):
        # type: () -> Tuple[Any,...]
        """
        Return a tuple with the exact arguments passed to __init__ to create
        this object.
        """
        assert False, "Abstract"
    def tvs(self):
        # type: () -> Iterable[TypeVar]
        """
        Return the typevars contained in this constraint.
        """
        return filter(lambda x:  isinstance(x, TypeVar), self._args())
    def is_trivial(self):
        # type: () -> bool
        """
        Return true if this constrain is statically decidable.
        """
        assert False, "Abstract"
    def eval(self):
        # type: () -> bool
        """
        Evaluate this constraint. Should only be called when the constraint has
        been translated to concrete types.
        """
        assert False, "Abstract"
    def __repr__(self):
        # type: () -> str
        return (self.__class__.__name__ + '(' +
                ', '.join(map(str, self._args())) + ')')
 class TypesEqual(TypeConstraint):
    """
    Constraint specifying that two derived type vars must have the same runtime
    type.
    """
    def __init__(self, tv1, tv2):
        # type: (TypeVar, TypeVar) -> None
        (self.tv1, self.tv2) = sorted([tv1, tv2], key=repr)
    def _args(self):
        # type: () -> Tuple[Any,...]
        """ See TypeConstraint._args() """
        return (self.tv1, self.tv2)
    def is_trivial(self):
        # type: () -> bool
        """ See TypeConstraint.is_trivial() """
        return self.tv1 == self.tv2 or self.is_concrete()
    def eval(self):
        # type: () -> bool
        """ See TypeConstraint.eval() """
        assert self.is_concrete()
        return self.tv1.singleton_type() == self.tv2.singleton_type()
 class InTypeset(TypeConstraint):
    """
    Constraint specifying that a type var must belong to some typeset.
    """
    def __init__(self, tv, ts):
        # type: (TypeVar, TypeSet) -> None
        assert not tv.is_derived and tv.name.startswith("typeof_")
        self.tv = tv
        self.ts = ts
    def _args(self):
        # type: () -> Tuple[Any,...]
        """ See TypeConstraint._args() """
        return (self.tv, self.ts)
    def is_trivial(self):
        # type: () -> bool
        """ See TypeConstraint.is_trivial() """
        tv_ts = self.tv.get_typeset().copy()
        # Trivially True
        if (tv_ts.issubset(self.ts)):
            return True
        # Trivially false
        tv_ts &= self.ts
        if (tv_ts.size() == 0):
            return True
        return self.is_concrete()
    def eval(self):
        # type: () -> bool
        """ See TypeConstraint.eval() """
        assert self.is_concrete()
        return self.tv.get_typeset().issubset(self.ts)
 class WiderOrEq(TypeConstraint):
    """
    Constraint specifying that a type var tv1 must be wider than or equal to
    type var tv2 at runtime. This requires that:
        1) They have the same number of lanes
        2) In a lane tv1 has at least as many bits as tv2.
    """
    def __init__(self, tv1, tv2):
        # type: (TypeVar, TypeVar) -> None
        self.tv1 = tv1
        self.tv2 = tv2
    def _args(self):
        # type: () -> Tuple[Any,...]
        """ See TypeConstraint._args() """
        return (self.tv1, self.tv2)
    def is_trivial(self):
        # type: () -> bool
        """ See TypeConstraint.is_trivial() """
        # Trivially true
        if (self.tv1 == self.tv2):
            return True
        ts1 = self.tv1.get_typeset()
        ts2 = self.tv2.get_typeset()
        def set_wider_or_equal(s1, s2):
            # type: (Set[int], Set[int]) -> bool
            return len(s1) > 0 and len(s2) > 0 and min(s1) >= max(s2)
        # Trivially True
        if set_wider_or_equal(ts1.ints, ts2.ints) and\
           set_wider_or_equal(ts1.floats, ts2.floats) and\
           set_wider_or_equal(ts1.bools, ts2.bools):
            return True
        def set_narrower(s1, s2):
            # type: (Set[int], Set[int]) -> bool
            return len(s1) > 0 and len(s2) > 0 and min(s1) < max(s2)
        # Trivially False
        if set_narrower(ts1.ints, ts2.ints) and\
           set_narrower(ts1.floats, ts2.floats) and\
           set_narrower(ts1.bools, ts2.bools):
            return True
        # Trivially False
        if len(ts1.lanes.intersection(ts2.lanes)) == 0:
            return True
        return self.is_concrete()
    def eval(self):
        # type: () -> bool
        """ See TypeConstraint.eval() """
        assert self.is_concrete()
        typ1 = self.tv1.singleton_type()
        typ2 = self.tv2.singleton_type()
        return typ1.wider_or_equal(typ2)
 class SameWidth(TypeConstraint):
    """
    Constraint specifying that two types have the same width. E.g. i32x2 has
    the same width as i64x1, i16x4, f32x2, f64, b1x64 etc.
    """
    def __init__(self, tv1, tv2):
        # type: (TypeVar, TypeVar) -> None
        self.tv1 = tv1
        self.tv2 = tv2
    def _args(self):
        # type: () -> Tuple[Any,...]
        """ See TypeConstraint._args() """
        return (self.tv1, self.tv2)
    def is_trivial(self):
        # type: () -> bool
        """ See TypeConstraint.is_trivial() """
        # Trivially true
        if (self.tv1 == self.tv2):
            return True
        ts1 = self.tv1.get_typeset()
        ts2 = self.tv2.get_typeset()
        # Trivially False
        if len(ts1.widths().intersection(ts2.widths())) == 0:
            return True
        return self.is_concrete()
    def eval(self):
        # type: () -> bool
        """ See TypeConstraint.eval() """
        assert self.is_concrete()
        typ1 = self.tv1.singleton_type()
        typ2 = self.tv2.singleton_type()
        return (typ1.width() == typ2.width())
 class TypeEnv(object):
    """
    Class encapsulating the neccessary book keeping for type inference.
        :attribute type_map: dict holding the equivalence relations between tvs
        :attribute constraints: a list of accumulated constraints - tuples
                            (tv1, tv2)) where tv1 and tv2 are equal
        :attribute ranks: dictionary recording the (optional) ranks for tvs.
                          'rank' is a partial ordering on TVs based on their
                          origin. See comments in rank() and register().
        :attribute vars: a set containing all known Vars
        :attribute idx: counter used to get fresh ids
    """
    RANK_SINGLETON = 5
    RANK_INPUT = 4
    RANK_INTERMEDIATE = 3
    RANK_OUTPUT = 2
    RANK_TEMP = 1
    RANK_INTERNAL = 0
    def __init__(self, arg=None):
        # type: (Optional[Tuple[TypeMap, List[TypeConstraint]]]) -> None
        self.ranks = {}  # type: Dict[TypeVar, int]
        self.vars = set()  # type: Set[Var]
        if arg is None:
            self.type_map = {}  # type: TypeMap
            self.constraints = []  # type: List[TypeConstraint]
        else:
            self.type_map, self.constraints = arg
        self.idx = 0
    def __getitem__(self, arg):
        # type: (Union[TypeVar, Var]) -> TypeVar
        """
        Lookup the canonical representative for a Var/TypeVar.
        """
        if (isinstance(arg, Var)):
            assert arg in self.vars
            tv = arg.get_typevar()
        else:
            assert (isinstance(arg, TypeVar))
            tv = arg
        while tv in self.type_map:
            tv = self.type_map[tv]
        if tv.is_derived:
            tv = TypeVar.derived(self[tv.base], tv.derived_func)
        return tv
    def equivalent(self, tv1, tv2):
        # type: (TypeVar, TypeVar) -> None
        """
        Record a that the free tv1 is part of the same equivalence class as
        tv2.  The canonical representative of the merged class is tv2's
        cannonical representative.
        """
        assert not tv1.is_derived
        assert self[tv1] == tv1
        # Make sure we don't create cycles
        if tv2.is_derived:
            assert self[tv2.base] != tv1
        self.type_map[tv1] = tv2
    def add_constraint(self, constr):
        # type: (TypeConstraint) -> None
        """
        Add a new constraint
        """
        if (constr in self.constraints):
            return
        # InTypeset constraints can be expressed by constraining the typeset of
        # a variable. No need to add them to self.constraints
        if (isinstance(constr, InTypeset)):
            self[constr.tv].constrain_types_by_ts(constr.ts)
            return
        self.constraints.append(constr)
    def get_uid(self):
        # type: () -> str
        r = str(self.idx)
        self.idx += 1
        return r
    def __repr__(self):
        # type: () -> str
        return self.dot()
    def rank(self, tv):
        # type: (TypeVar) -> int
        """
        Get the rank of tv in the partial order. TVs directly associated with a
        Var get their rank from the Var (see register()).  Internally generated
        non-derived TVs implicitly get the lowest rank (0). Derived variables
        get their rank from their free typevar.  Singletons have the highest
        rank. TVs associated with vars in a source pattern have a higher rank
        than TVs associted with temporary vars.
        """
        default_rank = TypeEnv.RANK_INTERNAL if tv.singleton_type() is None \
            else TypeEnv.RANK_SINGLETON
        if tv.is_derived:
            tv = tv.free_typevar()
        return self.ranks.get(tv, default_rank)
    def register(self, v):
        # type: (Var) -> None
        """
        Register a new Var v.  This computes a rank for the associated TypeVar
        for v, which is used to impose a partial order on type variables.
        """
        self.vars.add(v)
        if v.is_input():
            r = TypeEnv.RANK_INPUT
        elif v.is_intermediate():
            r = TypeEnv.RANK_INTERMEDIATE
        elif v.is_output():
            r = TypeEnv.RANK_OUTPUT
        else:
            assert(v.is_temp())
            r = TypeEnv.RANK_TEMP
        self.ranks[v.get_typevar()] = r
    def free_typevars(self):
        # type: () -> List[TypeVar]
        """
        Get the free typevars in the current type env.
        """
        tvs = set([self[tv].free_typevar() for tv in self.type_map.keys()])
        tvs = tvs.union(set([self[v].free_typevar() for v in self.vars]))
        # Filter out None here due to singleton type vars
        return sorted(filter(lambda x: x is not None, tvs),
                      key=lambda x:   x.name)
    def normalize(self):
        # type: () -> None
        """
        Normalize by:
            - collapsing any roots that don't correspond to a concrete TV AND
              have a single TV derived from them or equivalent to them
        E.g. if we have a root of the tree that looks like:
          typeof_a   typeof_b
                 \  /
              typeof_x
                  |
                half_width(1)
                  |
                  1
        we want to collapse the linear path between 1 and typeof_x. The
        resulting graph is:
          typeof_a   typeof_b
                 \  /
              typeof_x
        """
        source_tvs = set([v.get_typevar() for v in self.vars])
        children = {}  # type: Dict[TypeVar, Set[TypeVar]]
        for v in self.type_map.values():
            if not v.is_derived:
                continue
            t = v.free_typevar()
            s = children.get(t, set())
            s.add(v)
            children[t] = s
        for (a, b) in self.type_map.items():
            s = children.get(b, set())
            s.add(a)
            children[b] = s
        for r in self.free_typevars():
            while (r not in source_tvs and r in children and
                   len(children[r]) == 1):
                child = list(children[r])[0]
                if child in self.type_map:
                    assert self.type_map[child] == r
                    del self.type_map[child]
                r = child
    def extract(self):
        # type: () -> TypeEnv
        """
        Extract a clean type environment from self, that only mentions
        TVs associated with real variables
        """
        vars_tvs = set([v.get_typevar() for v in self.vars])
        new_type_map = {tv: self[tv] for tv in vars_tvs if tv != self[tv]}
        new_constraints = []  # type: List[TypeConstraint]
        for constr in self.constraints:
            constr = constr.translate(self)
            if constr.is_trivial() or constr in new_constraints:
                continue
            # Sanity: translated constraints should refer to only real vars
            for arg in constr._args():
                if (not isinstance(arg, TypeVar)):
                    continue
                arg_free_tv = arg.free_typevar()
                assert arg_free_tv is None or arg_free_tv in vars_tvs
            new_constraints.append(constr)
        # Sanity: translated typemap should refer to only real vars
        for (k, v) in new_type_map.items():
            assert k in vars_tvs
            assert v.free_typevar() is None or v.free_typevar() in vars_tvs
        t = TypeEnv()
        t.type_map = new_type_map
        t.constraints = new_constraints
        # ranks and vars contain only TVs associated with real vars
        t.ranks = copy(self.ranks)
        t.vars = copy(self.vars)
        return t
    def concrete_typings(self):
        # type: () -> Iterable[VarTyping]
        """
        Return an iterable over all possible concrete typings permitted by this
        TypeEnv.
        """
        free_tvs = self.free_typevars()
        free_tv_iters = [tv.get_typeset().concrete_types() for tv in free_tvs]
        for concrete_types in product(*free_tv_iters):
            # Build type substitutions for all free vars
            m = {tv: TypeVar.singleton(typ)
                 for (tv, typ) in zip(free_tvs, concrete_types)}
            concrete_var_map = {v: subst(self[v.get_typevar()], m)
                                for v in self.vars}
            # Check if constraints are satisfied for this typing
            failed = None
            for constr in self.constraints:
                concrete_constr = constr.translate(m)
                if not concrete_constr.eval():
                    failed = concrete_constr
                    break
            if (failed is not None):
                continue
            yield concrete_var_map
    def permits(self, concrete_typing):
        # type: (VarTyping) -> bool
        """
        Return true iff this TypeEnv permits the (possibly partial) concrete
        variable type mapping concrete_typing.
        """
        # Each variable has a concrete type, that is a subset of its inferred
        # typeset.
        for (v, typ) in concrete_typing.items():
            assert typ.singleton_type() is not None
            if not typ.get_typeset().issubset(self[v].get_typeset()):
                return False
        m = {self[v]: typ for (v, typ) in concrete_typing.items()}
        # Constraints involving vars in concrete_typing are satisfied
        for constr in self.constraints:
            try:
                # If the constraint includes only vars in concrete_typing, we
                # can translate it using m. Otherwise we encounter a KeyError
                # and ignore it
                constr = constr.translate(m)
                if not constr.eval():
                    return False
            except KeyError:
                pass
        return True
    def dot(self):
        # type: () -> str
        """
        Return a representation of self as a graph in dot format.
            Nodes correspond to TypeVariables.
            Dotted edges correspond to equivalences between TVS
            Solid edges correspond to derivation relations between TVs.
            Dashed edges correspond to equivalence constraints.
        """
        def label(s):
            # type: (TypeVar) -> str
            return "\"" + str(s) + "\""
        # Add all registered TVs (as some of them may be singleton nodes not
        # appearing in the graph
        nodes = set()  # type: Set[TypeVar]
        edges = set()  # type: Set[Tuple[TypeVar, TypeVar, str, str, Optional[str]]] # noqa
        def add_nodes(*args):
            # type: (*TypeVar) -> None
            for tv in args:
                nodes.add(tv)
                while (tv.is_derived):
                    nodes.add(tv.base)
                    edges.add((tv, tv.base, "solid", "forward",
                               tv.derived_func))
                    tv = tv.base
        for v in self.vars:
            add_nodes(v.get_typevar())
        for (tv1, tv2) in self.type_map.items():
            # Add all intermediate TVs appearing in edges
            add_nodes(tv1, tv2)
            edges.add((tv1, tv2, "dotted", "forward", None))
        for constr in self.constraints:
            if isinstance(constr, TypesEqual):
                add_nodes(constr.tv1, constr.tv2)
                edges.add((constr.tv1, constr.tv2, "dashed", "none", "equal"))
            elif isinstance(constr, WiderOrEq):
                add_nodes(constr.tv1, constr.tv2)
                edges.add((constr.tv1, constr.tv2, "dashed", "forward", ">="))
            elif isinstance(constr, SameWidth):
                add_nodes(constr.tv1, constr.tv2)
                edges.add((constr.tv1, constr.tv2, "dashed", "none",
                           "same_width"))
            else:
                assert False, "Can't display constraint {}".format(constr)
        root_nodes = set([x for x in nodes
                          if x not in self.type_map and not x.is_derived])
        r = "digraph {\n"
        for n in nodes:
            r += label(n)
            if n in root_nodes:
                r += "[xlabel=\"{}\"]".format(self[n].get_typeset())
            r += ";\n"
        for (n1, n2, style, direction, elabel) in edges:
            e = label(n1) + "->" + label(n2)
            e += "[style={},dir={}".format(style, direction)
            if elabel is not None:
                e += ",label=\"{}\"".format(elabel)
            e += "];\n"
            r += e
        r += "}"
        return r
 if TYPE_CHECKING:
    TypingError = str
    TypingOrError = Union[TypeEnv, TypingError]
 def get_error(typing_or_err):
    # type: (TypingOrError) -> Optional[TypingError]
    """
    Helper function to appease mypy when checking the result of typing.
    """
    if isinstance(typing_or_err, str):
        if (TYPE_CHECKING):
            return cast(TypingError, typing_or_err)
        else:
            return typing_or_err
    else:
        return None
 def get_type_env(typing_or_err):
    # type: (TypingOrError) -> TypeEnv
    """
    Helper function to appease mypy when checking the result of typing.
    """
    assert isinstance(typing_or_err, TypeEnv), \
        "Unexpected error: {}".format(typing_or_err)
    if (TYPE_CHECKING):
        return cast(TypeEnv, typing_or_err)
    else:
        return typing_or_err
 def subst(tv, tv_map):
    # type: (TypeVar, TypeMap) -> TypeVar
    """
    Perform substition on the input tv using the TypeMap tv_map.
    """
    if tv in tv_map:
        return tv_map[tv]
    if tv.is_derived:
        return TypeVar.derived(subst(tv.base, tv_map), tv.derived_func)
    return tv
 def normalize_tv(tv):
    # type: (TypeVar) -> TypeVar
    """
    Normalize a (potentially derived) TV using the following rules:
        - vector and width derived functions commute
        {HALF,DOUBLE}VECTOR({HALF,DOUBLE}WIDTH(base)) ->
            {HALF,DOUBLE}WIDTH({HALF,DOUBLE}VECTOR(base))
        - half/double pairs collapse
        {HALF,DOUBLE}WIDTH({DOUBLE,HALF}WIDTH(base)) -> base
        {HALF,DOUBLE}VECTOR({DOUBLE,HALF}VECTOR(base)) -> base
    """
    vector_derives = [TypeVar.HALFVECTOR, TypeVar.DOUBLEVECTOR]
    width_derives = [TypeVar.HALFWIDTH, TypeVar.DOUBLEWIDTH]
    if not tv.is_derived:
        return tv
    df = tv.derived_func
    if (tv.base.is_derived):
        base_df = tv.base.derived_func
        # Reordering: {HALFWIDTH, DOUBLEWIDTH} commute with {HALFVECTOR,
        # DOUBLEVECTOR}. Arbitrarily pick WIDTH < VECTOR
        if df in vector_derives and base_df in width_derives:
            return normalize_tv(
                    TypeVar.derived(
                        TypeVar.derived(tv.base.base, df), base_df))
        # Cancelling: HALFWIDTH, DOUBLEWIDTH and HALFVECTOR, DOUBLEVECTOR
        # cancel each other. Note: This doesn't hide any over/underflows,
        # since we 1) assert the safety of each TV in the chain upon its
        # creation, and 2) the base typeset is only allowed to shrink.
        if (df, base_df) in \
                [(TypeVar.HALFVECTOR, TypeVar.DOUBLEVECTOR),
                 (TypeVar.DOUBLEVECTOR, TypeVar.HALFVECTOR),
                 (TypeVar.HALFWIDTH, TypeVar.DOUBLEWIDTH),
                 (TypeVar.DOUBLEWIDTH, TypeVar.HALFWIDTH)]:
            return normalize_tv(tv.base.base)
    return TypeVar.derived(normalize_tv(tv.base), df)
 def constrain_fixpoint(tv1, tv2):
    # type: (TypeVar, TypeVar) -> None
    """
    Given typevars tv1 and tv2 (which could be derived from one another)
    constrain their typesets to be the same. When one is derived from the
    other, repeat the constrain process until fixpoint.
    """
    # Constrain tv2's typeset as long as tv1's typeset is changing.
    while True:
        old_tv1_ts = tv1.get_typeset().copy()
        tv2.constrain_types(tv1)
        if tv1.get_typeset() == old_tv1_ts:
            break
    old_tv2_ts = tv2.get_typeset().copy()
    tv1.constrain_types(tv2)
    assert old_tv2_ts == tv2.get_typeset()
 def unify(tv1, tv2, typ):
    # type: (TypeVar, TypeVar, TypeEnv) -> TypingOrError
    """
    Unify tv1 and tv2 in the current type environment typ, and return an
    updated type environment or error.
    """
    tv1 = normalize_tv(typ[tv1])
    tv2 = normalize_tv(typ[tv2])
    # Already unified
    if tv1 == tv2:
        return typ
    if typ.rank(tv2) < typ.rank(tv1):
        return unify(tv2, tv1, typ)
    constrain_fixpoint(tv1, tv2)
    if (tv1.get_typeset().size() == 0 or tv2.get_typeset().size() == 0):
        return "Error: empty type created when unifying {} and {}"\
               .format(tv1, tv2)
    # Free -> Derived(Free)
    if not tv1.is_derived:
        typ.equivalent(tv1, tv2)
        return typ
    if (tv1.is_derived and TypeVar.is_bijection(tv1.derived_func)):
        inv_f = TypeVar.inverse_func(tv1.derived_func)
        return unify(tv1.base, normalize_tv(TypeVar.derived(tv2, inv_f)), typ)
    typ.add_constraint(TypesEqual(tv1, tv2))
    return typ
 def move_first(l, i):
    # type: (List[T], int) -> List[T]
    return [l[i]] + l[:i] + l[i+1:]
 def ti_def(definition, typ):
    # type: (Def, TypeEnv) -> TypingOrError
    """
    Perform type inference on one Def in the current type environment typ and
    return an updated type environment or error.
    At a high level this works by creating fresh copies of each formal type var
    in the Def's instruction's signature, and unifying the formal tv with the
    corresponding actual tv.
    """
    expr = definition.expr
    inst = expr.inst
    # Create a dict m mapping each free typevar in the signature of definition
    # to a fresh copy of itself.
    free_formal_tvs = inst.all_typevars()
    m = {tv: tv.get_fresh_copy(str(typ.get_uid())) for tv in free_formal_tvs}
    # Update m with any explicitly bound type vars
    for (idx, bound_typ) in enumerate(expr.typevars):
        m[free_formal_tvs[idx]] = TypeVar.singleton(bound_typ)
    # Get fresh copies for each typevar in the signature (both free and
    # derived)
    fresh_formal_tvs = \
        [subst(inst.outs[i].typevar, m) for i in inst.value_results] +\
        [subst(inst.ins[i].typevar, m) for i in inst.value_opnums]
    # Get the list of actual Vars
    actual_vars = []  # type: List[Expr]
    actual_vars += [definition.defs[i] for i in inst.value_results]
    actual_vars += [expr.args[i] for i in inst.value_opnums]
    # Get the list of the actual TypeVars
    actual_tvs = []
    for v in actual_vars:
        assert(isinstance(v, Var))
        # Register with TypeEnv that this typevar corresponds ot variable v,
        # and thus has a given rank
        typ.register(v)
        actual_tvs.append(v.get_typevar())
    # Make sure we unify the control typevar first.
    if inst.is_polymorphic:
        idx = fresh_formal_tvs.index(m[inst.ctrl_typevar])
        fresh_formal_tvs = move_first(fresh_formal_tvs, idx)
        actual_tvs = move_first(actual_tvs, idx)
    # Unify each actual typevar with the correpsonding fresh formal tv
    for (actual_tv, formal_tv) in zip(actual_tvs, fresh_formal_tvs):
        typ_or_err = unify(actual_tv, formal_tv, typ)
        err = get_error(typ_or_err)
        if (err):
            return "fail ti on {} <: {}: ".format(actual_tv, formal_tv) + err
        typ = get_type_env(typ_or_err)
    # Add any instruction specific constraints
    for constr in inst.constraints:
        typ.add_constraint(constr.translate(m))
    return typ
 def ti_rtl(rtl, typ):
    # type: (Rtl, TypeEnv) -> TypingOrError
    """
    Perform type inference on an Rtl in a starting type env typ.  Return an
    updated type environment or error.
    """
    for (i, d) in enumerate(rtl.rtl):
        assert (isinstance(d, Def))
        typ_or_err = ti_def(d, typ)
        err = get_error(typ_or_err)  # type: Optional[TypingError]
        if (err):
            return "On line {}: ".format(i) + err
        typ = get_type_env(typ_or_err)
    return typ
 def ti_xform(xform, typ):
    # type: (XForm, TypeEnv) -> TypingOrError
    """
    Perform type inference on an Rtl in a starting type env typ.  Return an
    updated type environment or error.
    """
    typ_or_err = ti_rtl(xform.src, typ)
    err = get_error(typ_or_err)  # type: Optional[TypingError]
    if (err):
        return "In src pattern: " + err
    typ = get_type_env(typ_or_err)
    typ_or_err = ti_rtl(xform.dst, typ)
    err = get_error(typ_or_err)
    if (err):
        return "In dst pattern: " + err
    typ = get_type_env(typ_or_err)
    return get_type_env(typ_or_err)
--- a/lib/cretonne/meta/cdsl/types.py
+++ b/lib/cretonne/meta/cdsl/types.py
@@ -0,0 +1,286 @@
 """Cretonne ValueType hierarchy"""
 from __future__ import absolute_import
 import math
 try:
    from typing import Dict, List, cast, TYPE_CHECKING # noqa
 except ImportError:
    TYPE_CHECKING = False
    pass
 # ValueType instances (i8, i32, ...) are provided in the cretonne.types module.
 class ValueType(object):
    """
    A concrete SSA value type.
    All SSA values have a type that is described by an instance of `ValueType`
    or one of its subclasses.
    """
    # Map name -> ValueType.
    _registry = dict()  # type: Dict[str, ValueType]
    # List of all the scalar types.
    all_scalars = list()  # type: List[ScalarType]
    def __init__(self, name, membytes, doc):
        # type: (str, int, str) -> None
        self.name = name
        self.number = None  # type: int
        self.membytes = membytes
        self.__doc__ = doc
        assert name not in ValueType._registry
        ValueType._registry[name] = self
    def __str__(self):
        # type: () -> str
        return self.name
    def rust_name(self):
        # type: () -> str
        return 'ir::types::' + self.name.upper()
    @staticmethod
    def by_name(name):
        # type: (str) -> ValueType
        if name in ValueType._registry:
            return ValueType._registry[name]
        else:
            raise AttributeError("No type named '{}'".format(name))
    def lane_bits(self):
        # type: () -> int
        """Return the number of bits in a lane."""
        assert False, "Abstract"
    def lane_count(self):
        # type: () -> int
        """Return the number of lanes."""
        assert False, "Abstract"
    def width(self):
        # type: () -> int
        """Return the total number of bits of an instance of this type."""
        return self.lane_count() * self.lane_bits()
    def wider_or_equal(self, other):
        # type: (ValueType) -> bool
        """
        Return true iff:
            1. self and other have equal number of lanes
            2. each lane in self has at least as many bits as a lane in other
        """
        return (self.lane_count() == other.lane_count() and
                self.lane_bits() >= other.lane_bits())
 class ScalarType(ValueType):
    """
    A concrete scalar (not vector) type.
    Also tracks a unique set of :py:class:`VectorType` instances with this type
    as the lane type.
    """
    def __init__(self, name, membytes, doc):
        # type: (str, int, str) -> None
        super(ScalarType, self).__init__(name, membytes, doc)
        self._vectors = dict()  # type: Dict[int, VectorType]
        # Assign numbers starting from 1. (0 is VOID).
        ValueType.all_scalars.append(self)
        self.number = len(ValueType.all_scalars)
        assert self.number < 16, 'Too many scalar types'
    def __repr__(self):
        # type: () -> str
        return 'ScalarType({})'.format(self.name)
    def by(self, lanes):
        # type: (int) -> VectorType
        """
        Get a vector type with this type as the lane type.
        For example, ``i32.by(4)`` returns the :obj:`i32x4` type.
        """
        if lanes in self._vectors:
            return self._vectors[lanes]
        else:
            v = VectorType(self, lanes)
            self._vectors[lanes] = v
            return v
    def lane_count(self):
        # type: () -> int
        """Return the number of lanes."""
        return 1
 class VectorType(ValueType):
    """
    A concrete SIMD vector type.
    A vector type has a lane type which is an instance of :class:`ScalarType`,
    and a positive number of lanes.
    """
    def __init__(self, base, lanes):
        # type: (ScalarType, int) -> None
        assert isinstance(base, ScalarType), 'SIMD lanes must be scalar types'
        super(VectorType, self).__init__(
                name='{}x{}'.format(base.name, lanes),
                membytes=lanes*base.membytes,
                doc="""
                A SIMD vector with {} lanes containing a `{}` each.
                """.format(lanes, base.name))
        self.base = base
        self.lanes = lanes
        self.number = 16*int(math.log(lanes, 2)) + base.number
    def __repr__(self):
        # type: () -> str
        return ('VectorType(base={}, lanes={})'
                .format(self.base.name, self.lanes))
    def lane_count(self):
        # type: () -> int
        """Return the number of lanes."""
        return self.lanes
    def lane_bits(self):
        # type: () -> int
        """Return the number of bits in a lane."""
        return self.base.lane_bits()
 class IntType(ScalarType):
    """A concrete scalar integer type."""
    def __init__(self, bits):
        # type: (int) -> None
        assert bits > 0, 'IntType must have positive number of bits'
        super(IntType, self).__init__(
                name='i{:d}'.format(bits),
                membytes=bits // 8,
                doc="An integer type with {} bits.".format(bits))
        self.bits = bits
    def __repr__(self):
        # type: () -> str
        return 'IntType(bits={})'.format(self.bits)
    @staticmethod
    def with_bits(bits):
        # type: (int) -> IntType
        typ = ValueType.by_name('i{:d}'.format(bits))
        if TYPE_CHECKING:
            return cast(IntType, typ)
        else:
            return typ
    def lane_bits(self):
        # type: () -> int
        """Return the number of bits in a lane."""
        return self.bits
 class FloatType(ScalarType):
    """A concrete scalar floating point type."""
    def __init__(self, bits, doc):
        # type: (int, str) -> None
        assert bits > 0, 'FloatType must have positive number of bits'
        super(FloatType, self).__init__(
                name='f{:d}'.format(bits),
                membytes=bits // 8,
                doc=doc)
        self.bits = bits
    def __repr__(self):
        # type: () -> str
        return 'FloatType(bits={})'.format(self.bits)
    @staticmethod
    def with_bits(bits):
        # type: (int) -> FloatType
        typ = ValueType.by_name('f{:d}'.format(bits))
        if TYPE_CHECKING:
            return cast(FloatType, typ)
        else:
            return typ
    def lane_bits(self):
        # type: () -> int
        """Return the number of bits in a lane."""
        return self.bits
 class BoolType(ScalarType):
    """A concrete scalar boolean type."""
    def __init__(self, bits):
        # type: (int) -> None
        assert bits > 0, 'BoolType must have positive number of bits'
        super(BoolType, self).__init__(
                name='b{:d}'.format(bits),
                membytes=bits // 8,
                doc="A boolean type with {} bits.".format(bits))
        self.bits = bits
    def __repr__(self):
        # type: () -> str
        return 'BoolType(bits={})'.format(self.bits)
    @staticmethod
    def with_bits(bits):
        # type: (int) -> BoolType
        typ = ValueType.by_name('b{:d}'.format(bits))
        if TYPE_CHECKING:
            return cast(BoolType, typ)
        else:
            return typ
    def lane_bits(self):
        # type: () -> int
        """Return the number of bits in a lane."""
        return self.bits
 class BVType(ValueType):
    """A flat bitvector type. Used for semantics description only."""
    def __init__(self, bits):
        # type: (int) -> None
        assert bits > 0, 'Must have positive number of bits'
        super(BVType, self).__init__(
                name='bv{:d}'.format(bits),
                membytes=bits // 8,
                doc="A bitvector type with {} bits.".format(bits))
        self.bits = bits
    def __repr__(self):
        # type: () -> str
        return 'BVType(bits={})'.format(self.bits)
    @staticmethod
    def with_bits(bits):
        # type: (int) -> BVType
        name = 'bv{:d}'.format(bits)
        if name not in ValueType._registry:
            return BVType(bits)
        typ = ValueType.by_name(name)
        if TYPE_CHECKING:
            return cast(BVType, typ)
        else:
            return typ
    def lane_bits(self):
        # type: () -> int
        """Return the number of bits in a lane."""
        return self.bits
    def lane_count(self):
        # type: () -> int
        """Return the number of lane. For BVtypes always 1."""
        return 1
--- a/lib/cretonne/meta/cdsl/typevar.py
+++ b/lib/cretonne/meta/cdsl/typevar.py
@@ -0,0 +1,853 @@
 """
 Type variables for Parametric polymorphism.
 Cretonne instructions and instruction transformations can be specified to be
 polymorphic by using type variables.
 """
 from __future__ import absolute_import
 import math
 from . import types, is_power_of_two
 from copy import deepcopy
 try:
    from typing import Tuple, Union, Iterable, Any, Set, TYPE_CHECKING # noqa
    if TYPE_CHECKING:
        from srcgen import Formatter  # noqa
        from .types import ValueType  # noqa
        Interval = Tuple[int, int]
        # An Interval where `True` means 'everything'
        BoolInterval = Union[bool, Interval]
 except ImportError:
    pass
 MAX_LANES = 256
 MAX_BITS = 64
 MAX_BITVEC = MAX_BITS * MAX_LANES
 def int_log2(x):
    # type: (int) -> int
    return int(math.log(x, 2))
 def intersect(a, b):
    # type: (Interval, Interval) -> Interval
    """
    Given two `(min, max)` inclusive intervals, compute their intersection.
    Use `(None, None)` to represent the empty interval on input and output.
    """
    if a[0] is None or b[0] is None:
        return (None, None)
    lo = max(a[0], b[0])
    assert lo is not None
    hi = min(a[1], b[1])
    assert hi is not None
    if lo <= hi:
        return (lo, hi)
    else:
        return (None, None)
 def is_empty(intv):
    # type: (Interval) -> bool
    return intv is None or intv is False or intv == (None, None)
 def encode_bitset(vals, size):
    # type: (Iterable[int], int) -> int
    """
    Encode a set of values (each between 0 and size) as a bitset of width size.
    """
    res = 0
    assert is_power_of_two(size) and size <= 64
    for v in vals:
        assert 0 <= v and v < size
        res |= 1 << v
    return res
 def pp_set(s):
    # type: (Iterable[Any]) -> str
    """
    Return a consistent string representation of a set (ordering is fixed)
    """
    return '{' + ', '.join([repr(x) for x in sorted(s)]) + '}'
 def decode_interval(intv, full_range, default=None):
    # type: (BoolInterval, Interval, int) -> Interval
    """
    Decode an interval specification which can take the following values:
    True
        Use the `full_range`.
    `False` or `None`
        An empty interval
    (lo, hi)
        An explicit interval
    """
    if isinstance(intv, tuple):
        # mypy buig here: 'builtins.None' object is not iterable
        lo, hi = intv
        assert is_power_of_two(lo)
        assert is_power_of_two(hi)
        assert lo <= hi
        assert lo >= full_range[0]
        assert hi <= full_range[1]
        return intv
    if intv:
        return full_range
    else:
        return (default, default)
 def interval_to_set(intv):
    # type: (Interval) -> Set
    if is_empty(intv):
        return set()
    (lo, hi) = intv
    assert is_power_of_two(lo)
    assert is_power_of_two(hi)
    assert lo <= hi
    return set([2**i for i in range(int_log2(lo), int_log2(hi)+1)])
 def legal_bool(bits):
    # type: (int) -> bool
    """
    True iff bits is a legal bit width for a bool type.
    bits == 1 || bits \in { 8, 16, .. MAX_BITS }
    """
    return bits == 1 or \
        (bits >= 8 and bits <= MAX_BITS and is_power_of_two(bits))
 class TypeSet(object):
    """
    A set of types.
    We don't allow arbitrary subsets of types, but use a parametrized approach
    instead.
    Objects of this class can be used as dictionary keys.
    Parametrized type sets are specified in terms of ranges:
    - The permitted range of vector lanes, where 1 indicates a scalar type.
    - The permitted range of integer types.
    - The permitted range of floating point types, and
    - The permitted range of boolean types.
    The ranges are inclusive from smallest bit-width to largest bit-width.
    A typeset representing scalar integer types `i8` through `i32`:
    >>> TypeSet(ints=(8, 32))
    TypeSet(lanes={1}, ints={8, 16, 32})
    Passing `True` instead of a range selects all available scalar types:
    >>> TypeSet(ints=True)
    TypeSet(lanes={1}, ints={8, 16, 32, 64})
    >>> TypeSet(floats=True)
    TypeSet(lanes={1}, floats={32, 64})
    >>> TypeSet(bools=True)
    TypeSet(lanes={1}, bools={1, 8, 16, 32, 64})
    Similarly, passing `True` for the lanes selects all possible scalar and
    vector types:
    >>> TypeSet(lanes=True, ints=True)
    TypeSet(lanes={1, 2, 4, 8, 16, 32, 64, 128, 256}, ints={8, 16, 32, 64})
    :param lanes: `(min, max)` inclusive range of permitted vector lane counts.
    :param ints: `(min, max)` inclusive range of permitted scalar integer
                 widths.
    :param floats: `(min, max)` inclusive range of permitted scalar floating
                   point widths.
    :param bools: `(min, max)` inclusive range of permitted scalar boolean
                  widths.
    :param bitvecs : `(min, max)` inclusive range of permitted bitvector
                  widths.
    """
    def __init__(self, lanes=None, ints=None, floats=None, bools=None,
                 bitvecs=None):
        # type: (BoolInterval, BoolInterval, BoolInterval, BoolInterval, BoolInterval) -> None # noqa
        self.lanes = interval_to_set(decode_interval(lanes, (1, MAX_LANES), 1))
        self.ints = interval_to_set(decode_interval(ints, (8, MAX_BITS)))
        self.floats = interval_to_set(decode_interval(floats, (32, 64)))
        self.bools = interval_to_set(decode_interval(bools, (1, MAX_BITS)))
        self.bools = set(filter(legal_bool, self.bools))
        self.bitvecs = interval_to_set(decode_interval(bitvecs,
                                                       (1, MAX_BITVEC)))
    def copy(self):
        # type: (TypeSet) -> TypeSet
        """
        Return a copy of our self. deepcopy is sufficient and safe here, since
        TypeSet contains only sets of numbers.
        """
        return deepcopy(self)
    def typeset_key(self):
        # type: () -> Tuple[Tuple, Tuple, Tuple, Tuple, Tuple]
        """Key tuple used for hashing and equality."""
        return (tuple(sorted(list(self.lanes))),
                tuple(sorted(list(self.ints))),
                tuple(sorted(list(self.floats))),
                tuple(sorted(list(self.bools))),
                tuple(sorted(list(self.bitvecs))))
    def __hash__(self):
        # type: () -> int
        h = hash(self.typeset_key())
        assert h == getattr(self, 'prev_hash', h), "TypeSet changed!"
        self.prev_hash = h
        return h
    def __eq__(self, other):
        # type: (object) -> bool
        if isinstance(other, TypeSet):
            return self.typeset_key() == other.typeset_key()
        else:
            return False
    def __ne__(self, other):
        # type: (object) -> bool
        return not self.__eq__(other)
    def __repr__(self):
        # type: () -> str
        s = 'TypeSet(lanes={}'.format(pp_set(self.lanes))
        if len(self.ints) > 0:
            s += ', ints={}'.format(pp_set(self.ints))
        if len(self.floats) > 0:
            s += ', floats={}'.format(pp_set(self.floats))
        if len(self.bools) > 0:
            s += ', bools={}'.format(pp_set(self.bools))
        if len(self.bitvecs) > 0:
            s += ', bitvecs={}'.format(pp_set(self.bitvecs))
        return s + ')'
    def emit_fields(self, fmt):
        # type: (Formatter) -> None
        """Emit field initializers for this typeset."""
        assert len(self.bitvecs) == 0, "Bitvector types are not emitable."
        fmt.comment(repr(self))
        fields = (('lanes', 16),
                  ('ints', 8),
                  ('floats', 8),
                  ('bools', 8))
        for (field, bits) in fields:
            vals = [int_log2(x) for x in getattr(self, field)]
            fmt.line('{}: BitSet::<u{}>({}),'
                     .format(field, bits, encode_bitset(vals, bits)))
    def __iand__(self, other):
        # type: (TypeSet) -> TypeSet
        """
        Intersect self with other type set.
        >>> a = TypeSet(lanes=True, ints=(16, 32))
        >>> a
        TypeSet(lanes={1, 2, 4, 8, 16, 32, 64, 128, 256}, ints={16, 32})
        >>> b = TypeSet(lanes=(4, 16), ints=True)
        >>> a &= b
        >>> a
        TypeSet(lanes={4, 8, 16}, ints={16, 32})
        >>> a = TypeSet(lanes=True, bools=(1, 8))
        >>> b = TypeSet(lanes=True, bools=(16, 32))
        >>> a &= b
        >>> a
        TypeSet(lanes={1, 2, 4, 8, 16, 32, 64, 128, 256})
        """
        self.lanes.intersection_update(other.lanes)
        self.ints.intersection_update(other.ints)
        self.floats.intersection_update(other.floats)
        self.bools.intersection_update(other.bools)
        self.bitvecs.intersection_update(other.bitvecs)
        return self
    def issubset(self, other):
        # type: (TypeSet) -> bool
        """
        Return true iff self is a subset of other
        """
        return self.lanes.issubset(other.lanes) and \
            self.ints.issubset(other.ints) and \
            self.floats.issubset(other.floats) and \
            self.bools.issubset(other.bools) and \
            self.bitvecs.issubset(other.bitvecs)
    def lane_of(self):
        # type: () -> TypeSet
        """
        Return a TypeSet describing the image of self across lane_of
        """
        new = self.copy()
        new.lanes = set([1])
        new.bitvecs = set()
        return new
    def as_bool(self):
        # type: () -> TypeSet
        """
        Return a TypeSet describing the image of self across as_bool
        """
        new = self.copy()
        new.ints = set()
        new.floats = set()
        new.bitvecs = set()
        if len(self.lanes.difference(set([1]))) > 0:
            new.bools = self.ints.union(self.floats).union(self.bools)
        if 1 in self.lanes:
            new.bools.add(1)
        return new
    def half_width(self):
        # type: () -> TypeSet
        """
        Return a TypeSet describing the image of self across halfwidth
        """
        new = self.copy()
        new.ints = set([x//2 for x in self.ints if x > 8])
        new.floats = set([x//2 for x in self.floats if x > 32])
        new.bools = set([x//2 for x in self.bools if x > 8])
        new.bitvecs = set([x//2 for x in self.bitvecs if x > 1])
        return new
    def double_width(self):
        # type: () -> TypeSet
        """
        Return a TypeSet describing the image of self across doublewidth
        """
        new = self.copy()
        new.ints = set([x*2 for x in self.ints if x < MAX_BITS])
        new.floats = set([x*2 for x in self.floats if x < MAX_BITS])
        new.bools = set(filter(legal_bool,
                               set([x*2 for x in self.bools if x < MAX_BITS])))
        new.bitvecs = set([x*2 for x in self.bitvecs if x < MAX_BITVEC])
        return new
    def half_vector(self):
        # type: () -> TypeSet
        """
        Return a TypeSet describing the image of self across halfvector
        """
        new = self.copy()
        new.bitvecs = set()
        new.lanes = set([x//2 for x in self.lanes if x > 1])
        return new
    def double_vector(self):
        # type: () -> TypeSet
        """
        Return a TypeSet describing the image of self across doublevector
        """
        new = self.copy()
        new.bitvecs = set()
        new.lanes = set([x*2 for x in self.lanes if x < MAX_LANES])
        return new
    def to_bitvec(self):
        # type: () -> TypeSet
        """
        Return a TypeSet describing the image of self across to_bitvec
        """
        assert len(self.bitvecs) == 0
        all_scalars = self.ints.union(self.floats.union(self.bools))
        new = self.copy()
        new.lanes = set([1])
        new.ints = set()
        new.bools = set()
        new.floats = set()
        new.bitvecs = set([lane_w * nlanes for lane_w in all_scalars
                           for nlanes in self.lanes])
        return new
    def image(self, func):
        # type: (str) -> TypeSet
        """
        Return the image of self across the derived function func
        """
        if (func == TypeVar.LANEOF):
            return self.lane_of()
        elif (func == TypeVar.ASBOOL):
            return self.as_bool()
        elif (func == TypeVar.HALFWIDTH):
            return self.half_width()
        elif (func == TypeVar.DOUBLEWIDTH):
            return self.double_width()
        elif (func == TypeVar.HALFVECTOR):
            return self.half_vector()
        elif (func == TypeVar.DOUBLEVECTOR):
            return self.double_vector()
        elif (func == TypeVar.TOBITVEC):
            return self.to_bitvec()
        else:
            assert False, "Unknown derived function: " + func
    def preimage(self, func):
        # type: (str) -> TypeSet
        """
        Return the inverse image of self across the derived function func
        """
        # The inverse of the empty set is always empty
        if (self.size() == 0):
            return self
        if (func == TypeVar.LANEOF):
            new = self.copy()
            new.bitvecs = set()
            new.lanes = set([2**i for i in range(0, int_log2(MAX_LANES)+1)])
            return new
        elif (func == TypeVar.ASBOOL):
            new = self.copy()
            new.bitvecs = set()
            if 1 not in self.bools:
                new.ints = self.bools.difference(set([1]))
                new.floats = self.bools.intersection(set([32, 64]))
                # If b1 is not in our typeset, than lanes=1 cannot be in the
                # pre-image, as as_bool() of scalars is always b1.
                new.lanes = self.lanes.difference(set([1]))
            else:
                new.ints = set([2**x for x in range(3, 7)])
                new.floats = set([32, 64])
            return new
        elif (func == TypeVar.HALFWIDTH):
            return self.double_width()
        elif (func == TypeVar.DOUBLEWIDTH):
            return self.half_width()
        elif (func == TypeVar.HALFVECTOR):
            return self.double_vector()
        elif (func == TypeVar.DOUBLEVECTOR):
            return self.half_vector()
        elif (func == TypeVar.TOBITVEC):
            new = TypeSet()
            # Start with all possible lanes/ints/floats/bools
            lanes = interval_to_set(decode_interval(True, (1, MAX_LANES), 1))
            ints = interval_to_set(decode_interval(True, (8, MAX_BITS)))
            floats = interval_to_set(decode_interval(True, (32, 64)))
            bools = interval_to_set(decode_interval(True, (1, MAX_BITS)))
            # See which combinations have a size that appears in self.bitvecs
            has_t = set()  # type: Set[Tuple[str, int, int]]
            for l in lanes:
                for i in ints:
                    if i * l in self.bitvecs:
                        has_t.add(('i', i, l))
                for i in bools:
                    if i * l in self.bitvecs:
                        has_t.add(('b', i, l))
                for i in floats:
                    if i * l in self.bitvecs:
                        has_t.add(('f', i, l))
            for (t, width, lane) in has_t:
                new.lanes.add(lane)
                if (t == 'i'):
                    new.ints.add(width)
                elif (t == 'b'):
                    new.bools.add(width)
                else:
                    assert t == 'f'
                    new.floats.add(width)
            return new
        else:
            assert False, "Unknown derived function: " + func
    def size(self):
        # type: () -> int
        """
        Return the number of concrete types represented by this typeset
        """
        return len(self.lanes) * (len(self.ints) + len(self.floats) +
                                  len(self.bools) + len(self.bitvecs))
    def concrete_types(self):
        # type: () -> Iterable[types.ValueType]
        def by(scalar, lanes):
            # type: (types.ScalarType, int) -> types.ValueType
            if (lanes == 1):
                return scalar
            else:
                return scalar.by(lanes)
        for nlanes in self.lanes:
            for bits in self.ints:
                yield by(types.IntType.with_bits(bits), nlanes)
            for bits in self.floats:
                yield by(types.FloatType.with_bits(bits), nlanes)
            for bits in self.bools:
                yield by(types.BoolType.with_bits(bits), nlanes)
            for bits in self.bitvecs:
                assert nlanes == 1
                yield types.BVType.with_bits(bits)
    def get_singleton(self):
        # type: () -> types.ValueType
        """
        Return the singleton type represented by self. Can only call on
        typesets containing 1 type.
        """
        types = list(self.concrete_types())
        assert len(types) == 1
        return types[0]
    def widths(self):
        # type: () -> Set[int]
        """ Return a set of the widths of all possible types in self"""
        scalar_w = self.ints.union(self.floats.union(self.bools))
        scalar_w = scalar_w.union(self.bitvecs)
        return set(w * l for l in self.lanes for w in scalar_w)
 class TypeVar(object):
    """
    Type variables can be used in place of concrete types when defining
    instructions. This makes the instructions *polymorphic*.
    A type variable is restricted to vary over a subset of the value types.
    This subset is specified by a set of flags that control the permitted base
    types and whether the type variable can assume scalar or vector types, or
    both.
    :param name: Short name of type variable used in instruction descriptions.
    :param doc: Documentation string.
    :param ints: Allow all integer base types, or `(min, max)` bit-range.
    :param floats: Allow all floating point base types, or `(min, max)`
                   bit-range.
    :param bools: Allow all boolean base types, or `(min, max)` bit-range.
    :param scalars: Allow type variable to assume scalar types.
    :param simd: Allow type variable to assume vector types, or `(min, max)`
                 lane count range.
    :param bitvecs: Allow all BitVec base types, or `(min, max)` bit-range.
    """
    def __init__(
            self, name, doc,
            ints=False, floats=False, bools=False,
            scalars=True, simd=False, bitvecs=False,
            base=None, derived_func=None):
        # type: (str, str, BoolInterval, BoolInterval, BoolInterval, bool, BoolInterval, BoolInterval, TypeVar, str) -> None # noqa
        self.name = name
        self.__doc__ = doc
        self.is_derived = isinstance(base, TypeVar)
        if base:
            assert self.is_derived
            assert derived_func
            self.base = base
            self.derived_func = derived_func
            self.name = '{}({})'.format(derived_func, base.name)
        else:
            min_lanes = 1 if scalars else 2
            lanes = decode_interval(simd, (min_lanes, MAX_LANES), 1)
            self.type_set = TypeSet(
                    lanes=lanes,
                    ints=ints,
                    floats=floats,
                    bools=bools,
                    bitvecs=bitvecs)
    @staticmethod
    def singleton(typ):
        # type: (types.ValueType) -> TypeVar
        """Create a type variable that can only assume a single type."""
        scalar = None  # type: ValueType
        if isinstance(typ, types.VectorType):
            scalar = typ.base
            lanes = (typ.lanes, typ.lanes)
        elif isinstance(typ, types.ScalarType):
            scalar = typ
            lanes = (1, 1)
        else:
            assert isinstance(typ, types.BVType)
            scalar = typ
            lanes = (1, 1)
        ints = None
        floats = None
        bools = None
        bitvecs = None
        if isinstance(scalar, types.IntType):
            ints = (scalar.bits, scalar.bits)
        elif isinstance(scalar, types.FloatType):
            floats = (scalar.bits, scalar.bits)
        elif isinstance(scalar, types.BoolType):
            bools = (scalar.bits, scalar.bits)
        elif isinstance(scalar, types.BVType):
            bitvecs = (scalar.bits, scalar.bits)
        tv = TypeVar(
                typ.name, typ.__doc__,
                ints=ints, floats=floats, bools=bools,
                bitvecs=bitvecs, simd=lanes)
        return tv
    def __str__(self):
        # type: () -> str
        return "`{}`".format(self.name)
    def __repr__(self):
        # type: () -> str
        if self.is_derived:
            return (
                    'TypeVar({}, base={}, derived_func={})'
                    .format(self.name, self.base, self.derived_func))
        else:
            return (
                    'TypeVar({}, {})'
                    .format(self.name, self.type_set))
    def __hash__(self):
        # type: () -> int
        if (not self.is_derived):
            return object.__hash__(self)
        return hash((self.derived_func, self.base))
    def __eq__(self, other):
        # type: (object) -> bool
        if not isinstance(other, TypeVar):
            return False
        if self.is_derived and other.is_derived:
            return (
                    self.derived_func == other.derived_func and
                    self.base == other.base)
        else:
            return self is other
    def __ne__(self, other):
        # type: (object) -> bool
        return not self.__eq__(other)
    # Supported functions for derived type variables.
    # The names here must match the method names on `ir::types::Type`.
    # The camel_case of the names must match `enum OperandConstraint` in
    # `instructions.rs`.
    LANEOF = 'lane_of'
    ASBOOL = 'as_bool'
    HALFWIDTH = 'half_width'
    DOUBLEWIDTH = 'double_width'
    HALFVECTOR = 'half_vector'
    DOUBLEVECTOR = 'double_vector'
    TOBITVEC = 'to_bitvec'
    @staticmethod
    def is_bijection(func):
        # type: (str) -> bool
        return func in [
            TypeVar.HALFWIDTH,
            TypeVar.DOUBLEWIDTH,
            TypeVar.HALFVECTOR,
            TypeVar.DOUBLEVECTOR]
    @staticmethod
    def inverse_func(func):
        # type: (str) -> str
        return {
            TypeVar.HALFWIDTH: TypeVar.DOUBLEWIDTH,
            TypeVar.DOUBLEWIDTH: TypeVar.HALFWIDTH,
            TypeVar.HALFVECTOR: TypeVar.DOUBLEVECTOR,
            TypeVar.DOUBLEVECTOR: TypeVar.HALFVECTOR
        }[func]
    @staticmethod
    def derived(base, derived_func):
        # type: (TypeVar, str) -> TypeVar
        """Create a type variable that is a function of another."""
        # Safety checks to avoid over/underflows.
        ts = base.get_typeset()
        if derived_func == TypeVar.HALFWIDTH:
            if len(ts.ints) > 0:
                assert min(ts.ints) > 8, "Can't halve all integer types"
            if len(ts.floats) > 0:
                assert min(ts.floats) > 32, "Can't halve all float types"
            if len(ts.bools) > 0:
                assert min(ts.bools) > 8, "Can't halve all boolean types"
        elif derived_func == TypeVar.DOUBLEWIDTH:
            if len(ts.ints) > 0:
                assert max(ts.ints) < MAX_BITS,\
                    "Can't double all integer types."
            if len(ts.floats) > 0:
                assert max(ts.floats) < MAX_BITS,\
                    "Can't double all float types."
            if len(ts.bools) > 0:
                assert max(ts.bools) < MAX_BITS, "Can't double all bool types."
        elif derived_func == TypeVar.HALFVECTOR:
            assert min(ts.lanes) > 1, "Can't halve a scalar type"
        elif derived_func == TypeVar.DOUBLEVECTOR:
            assert max(ts.lanes) < MAX_LANES, "Can't double 256 lanes."
        return TypeVar(None, None, base=base, derived_func=derived_func)
    @staticmethod
    def from_typeset(ts):
        # type: (TypeSet) -> TypeVar
        """ Create a type variable from a type set."""
        tv = TypeVar(None, None)
        tv.type_set = ts
        return tv
    def lane_of(self):
        # type: () -> TypeVar
        """
        Return a derived type variable that is the scalar lane type of this
        type variable.
        When this type variable assumes a scalar type, the derived type will be
        the same scalar type.
        """
        return TypeVar.derived(self, self.LANEOF)
    def as_bool(self):
        # type: () -> TypeVar
        """
        Return a derived type variable that has the same vector geometry as
        this type variable, but with boolean lanes. Scalar types map to `b1`.
        """
        return TypeVar.derived(self, self.ASBOOL)
    def half_width(self):
        # type: () -> TypeVar
        """
        Return a derived type variable that has the same number of vector lanes
        as this one, but the lanes are half the width.
        """
        return TypeVar.derived(self, self.HALFWIDTH)
    def double_width(self):
        # type: () -> TypeVar
        """
        Return a derived type variable that has the same number of vector lanes
        as this one, but the lanes are double the width.
        """
        return TypeVar.derived(self, self.DOUBLEWIDTH)
    def half_vector(self):
        # type: () -> TypeVar
        """
        Return a derived type variable that has half the number of vector lanes
        as this one, with the same lane type.
        """
        return TypeVar.derived(self, self.HALFVECTOR)
    def double_vector(self):
        # type: () -> TypeVar
        """
        Return a derived type variable that has twice the number of vector
        lanes as this one, with the same lane type.
        """
        return TypeVar.derived(self, self.DOUBLEVECTOR)
    def to_bitvec(self):
        # type: () -> TypeVar
        """
        Return a derived type variable that represent a flat bitvector with
        the same size as self
        """
        return TypeVar.derived(self, self.TOBITVEC)
    def singleton_type(self):
        # type: () -> ValueType
        """
        If the associated typeset has a single type return it. Otherwise return
        None
        """
        ts = self.get_typeset()
        if ts.size() != 1:
            return None
        return ts.get_singleton()
    def free_typevar(self):
        # type: () -> TypeVar
        """
        Get the free type variable controlling this one.
        """
        if self.is_derived:
            return self.base.free_typevar()
        elif self.singleton_type() is not None:
            # A singleton type variable is not a proper free variable.
            return None
        else:
            return self
    def rust_expr(self):
        # type: () -> str
        """
        Get a Rust expression that computes the type of this type variable.
        """
        if self.is_derived:
            return '{}.{}()'.format(
                    self.base.rust_expr(), self.derived_func)
        elif self.singleton_type():
            return self.singleton_type().rust_name()
        else:
            return self.name
    def constrain_types_by_ts(self, ts):
        # type: (TypeSet) -> None
        """
        Constrain the range of types this variable can assume to a subset of
        those in the typeset ts.
        """
        if not self.is_derived:
            self.type_set &= ts
        else:
            self.base.constrain_types_by_ts(ts.preimage(self.derived_func))
    def constrain_types(self, other):
        # type: (TypeVar) -> None
        """
        Constrain the range of types this variable can assume to a subset of
        those `other` can assume.
        """
        if self is other:
            return
        self.constrain_types_by_ts(other.get_typeset())
    def get_typeset(self):
        # type: () -> TypeSet
        """
        Returns the typeset for this TV. If the TV is derived, computes it
        recursively from the derived function and the base's typeset.
        """
        if not self.is_derived:
            return self.type_set
        else:
            return self.base.get_typeset().image(self.derived_func)
    def get_fresh_copy(self, name):
        # type: (str) -> TypeVar
        """
        Get a fresh copy of self. Can only be called on free typevars.
        """
        assert not self.is_derived
        tv = TypeVar.from_typeset(self.type_set.copy())
        tv.name = name
        return tv
--- a/Show More
+++ b/Show More
		`@@ -0,0 +1 @@`
							`"""Definitions for the base Cretonne language."""`