diff --git a/.gitmodules b/.gitmodules
index 3a079448b1..65787f6699 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,10 +1,6 @@
 [submodule "spec_testsuite"]
 	path = spec_testsuite
 	url = https://github.com/WebAssembly/testsuite
-[submodule "lightbeam"]
-	path = lightbeam
-	url = https://github.com/CraneStation/lightbeam.git
-	branch = master
 [submodule "wasmtime-api/c-examples/wasm-c-api"]
 	path = wasmtime-api/c-examples/wasm-c-api
 	url = https://github.com/WebAssembly/wasm-c-api
diff --git a/Cargo.toml b/Cargo.toml
index 6f5a04f27d..80232c6617 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -52,5 +52,5 @@ members = [
 [features]
 # Enable all supported architectures by default.
 default = ["cranelift-codegen/all-arch"]
-lightbeam = ["wasmtime-environ/lightbeam", "wasmtime-jit/lightbeam"]
+lightbeam = ["wasmtime-environ/lightbeam", "wasmtime-jit/lightbeam", "wasmtime-wast/lightbeam"]
 wasi-c = ["wasmtime-wasi-c"]
diff --git a/build.rs b/build.rs
index 56cd2fb8a2..9697d9903a 100644
--- a/build.rs
+++ b/build.rs
@@ -14,12 +14,28 @@ fn main() {
     let mut out = File::create(out_dir.join("wast_testsuite_tests.rs"))
         .expect("error generating test source file");
 
-    test_directory(&mut out, "misc_testsuite").expect("generating tests");
-    test_directory(&mut out, "spec_testsuite").expect("generating tests");
-    test_file(&mut out, "spec_testsuite/proposals/simd/simd_const.wast").expect("generating tests");
+    for strategy in &[
+        "Cranelift",
+        #[cfg(feature = "lightbeam")]
+        "Lightbeam",
+    ] {
+        writeln!(out, "#[allow(non_snake_case)]").expect("generating tests");
+        writeln!(out, "mod {} {{", strategy).expect("generating tests");
+
+        test_directory(&mut out, "misc_testsuite", strategy).expect("generating tests");
+        test_directory(&mut out, "spec_testsuite", strategy).expect("generating tests");
+        test_file(
+            &mut out,
+            "spec_testsuite/proposals/simd/simd_const.wast",
+            strategy,
+        )
+        .expect("generating tests");
+
+        writeln!(out, "}}").expect("generating tests");
+    }
 }
 
-fn test_directory(out: &mut File, testsuite: &str) -> io::Result<()> {
+fn test_directory(out: &mut File, testsuite: &str, strategy: &str) -> io::Result<()> {
     let mut dir_entries: Vec<_> = read_dir(testsuite)
         .expect("reading testsuite directory")
         .map(|r| r.expect("reading testsuite directory entry"))
@@ -44,76 +60,105 @@ fn test_directory(out: &mut File, testsuite: &str) -> io::Result<()> {
 
     dir_entries.sort_by_key(|dir| dir.path());
 
+    start_test_module(out, testsuite)?;
+    for dir_entry in dir_entries {
+        write_testsuite_tests(out, &dir_entry.path(), testsuite, strategy)?;
+    }
+    finish_test_module(out)
+}
+
+fn test_file(out: &mut File, testfile: &str, strategy: &str) -> io::Result<()> {
+    let testsuite = "single_file_spec_test";
+    let path = Path::new(testfile);
+    start_test_module(out, testsuite)?;
+    write_testsuite_tests(out, path, testsuite, strategy)?;
+    finish_test_module(out)
+}
+
+fn start_test_module(out: &mut File, testsuite: &str) -> io::Result<()> {
     writeln!(
         out,
-        "mod {} {{",
+        "    mod {} {{",
         Path::new(testsuite)
             .file_stem()
             .expect("testsuite filename should have a stem")
             .to_str()
             .expect("testsuite filename should be representable as a string")
-            .replace("-", "_")
+            .replace("-", "_"),
     )?;
     writeln!(
         out,
-        "    use super::{{native_isa, Path, WastContext, Compiler, Features}};"
-    )?;
-    for dir_entry in dir_entries {
-        write_testsuite_tests(out, &dir_entry.path(), testsuite)?;
-    }
-    writeln!(out, "}}")?;
-    Ok(())
+        "        use super::super::{{native_isa, Path, WastContext, Compiler, Features, CompilationStrategy}};"
+    )
 }
 
-fn test_file(out: &mut File, testfile: &str) -> io::Result<()> {
-    let path = Path::new(testfile);
-    write_testsuite_tests(out, path, "single_file_spec_test")
+fn finish_test_module(out: &mut File) -> io::Result<()> {
+    writeln!(out, "    }}")
 }
 
-fn write_testsuite_tests(out: &mut File, path: &Path, testsuite: &str) -> io::Result<()> {
+fn write_testsuite_tests(
+    out: &mut File,
+    path: &Path,
+    testsuite: &str,
+    strategy: &str,
+) -> io::Result<()> {
     let stemstr = path
         .file_stem()
         .expect("file_stem")
         .to_str()
         .expect("to_str");
 
-    writeln!(out, "    #[test]")?;
-    if ignore(testsuite, stemstr) {
-        writeln!(out, "    #[ignore]")?;
+    writeln!(out, "        #[test]")?;
+    if ignore(testsuite, stemstr, strategy) {
+        writeln!(out, "        #[ignore]")?;
     }
-    writeln!(out, "    fn r#{}() {{", &stemstr.replace("-", "_"))?;
-    writeln!(out, "        let isa = native_isa();")?;
-    writeln!(out, "        let compiler = Compiler::new(isa);")?;
+    writeln!(out, "        fn r#{}() {{", &stemstr.replace("-", "_"))?;
+    writeln!(out, "            let isa = native_isa();")?;
     writeln!(
         out,
-        "        let features = Features {{ simd: true, ..Default::default() }};"
+        "            let compiler = Compiler::new(isa, CompilationStrategy::{});",
+        strategy
     )?;
     writeln!(
         out,
-        "        let mut wast_context = WastContext::new(Box::new(compiler)).with_features(features);"
+        "            let features = Features {{ simd: true, ..Default::default() }};"
     )?;
-    writeln!(out, "        wast_context")?;
-    writeln!(out, "            .register_spectest()")?;
     writeln!(
         out,
-        "            .expect(\"instantiating \\\"spectest\\\"\");"
+        "            let mut wast_context = WastContext::new(Box::new(compiler)).with_features(features);"
     )?;
-    writeln!(out, "        wast_context")?;
-    write!(out, "            .run_file(Path::new(\"")?;
+    writeln!(out, "            wast_context")?;
+    writeln!(out, "                .register_spectest()")?;
+    writeln!(
+        out,
+        "                .expect(\"instantiating \\\"spectest\\\"\");"
+    )?;
+    writeln!(out, "            wast_context")?;
+    write!(out, "                .run_file(Path::new(\"")?;
     // Write out the string with escape_debug to prevent special characters such
     // as backslash from being reinterpreted.
     for c in path.display().to_string().chars() {
         write!(out, "{}", c.escape_debug())?;
     }
     writeln!(out, "\"))")?;
-    writeln!(out, "            .expect(\"error running wast file\");",)?;
-    writeln!(out, "    }}")?;
+    writeln!(out, "                .expect(\"error running wast file\");",)?;
+    writeln!(out, "        }}")?;
     writeln!(out)?;
     Ok(())
 }
 
 /// Ignore tests that aren't supported yet.
-fn ignore(testsuite: &str, name: &str) -> bool {
+fn ignore(testsuite: &str, name: &str, strategy: &str) -> bool {
+    match strategy {
+        #[cfg(feature = "lightbeam")]
+        "Lightbeam" => match (testsuite, name) {
+            ("single_file_spec_test", "simd_const") => return true,
+            _ => (),
+        },
+        "Cranelift" => {}
+        _ => panic!("unrecognized strategy"),
+    }
+
     if cfg!(windows) {
         return match (testsuite, name) {
             ("spec_testsuite", "address") => true,
diff --git a/fuzz/fuzz_targets/compile.rs b/fuzz/fuzz_targets/compile.rs
index c0483f7c50..1749fdffd8 100644
--- a/fuzz/fuzz_targets/compile.rs
+++ b/fuzz/fuzz_targets/compile.rs
@@ -13,10 +13,10 @@ use std::cell::RefCell;
 use std::collections::HashMap;
 use std::rc::Rc;
 use wasmparser::validate;
-use wasmtime_jit::{CompiledModule, Compiler, NullResolver};
+use wasmtime_jit::{CompilationStrategy, CompiledModule, Compiler, NullResolver};
 
 fuzz_target!(|data: &[u8]| {
-    if !validate(data, None) {
+    if validate(data, None).is_err() {
         return;
     }
     let flag_builder = settings::builder();
@@ -24,11 +24,32 @@ fuzz_target!(|data: &[u8]| {
         panic!("host machine is not a supported target");
     });
     let isa = isa_builder.finish(settings::Flags::new(flag_builder));
-    let mut compiler = Compiler::new(isa);
+    let mut compiler = Compiler::new(isa, CompilationStrategy::Cranelift);
     let mut resolver = NullResolver {};
     let global_exports = Rc::new(RefCell::new(HashMap::new()));
-    let _compiled = match CompiledModule::new(&mut compiler, data, &mut resolver, global_exports, false) {
-        Ok(x) => x,
-        Err(_) => return,
-    };
+    let _compiled =
+        match CompiledModule::new(&mut compiler, data, &mut resolver, global_exports, false) {
+            Ok(x) => x,
+            Err(_) => return,
+        };
+});
+
+#[cfg(feature = "lightbeam")]
+fuzz_target!(|data: &[u8]| {
+    if validate(data, None).is_err() {
+        return;
+    }
+    let flag_builder = settings::builder();
+    let isa_builder = cranelift_native::builder().unwrap_or_else(|_| {
+        panic!("host machine is not a supported target");
+    });
+    let isa = isa_builder.finish(settings::Flags::new(flag_builder));
+    let mut compiler = Compiler::new(isa, CompilationStrategy::Lightbeam);
+    let mut resolver = NullResolver {};
+    let global_exports = Rc::new(RefCell::new(HashMap::new()));
+    let _compiled =
+        match CompiledModule::new(&mut compiler, data, &mut resolver, global_exports, false) {
+            Ok(x) => x,
+            Err(_) => return,
+        };
 });
diff --git a/lightbeam b/lightbeam
deleted file mode 160000
index bbd7e91d1c..0000000000
--- a/lightbeam
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit bbd7e91d1c524b7d7f5b88c4e89cffe03c70b3ef
diff --git a/lightbeam/Cargo.toml b/lightbeam/Cargo.toml
new file mode 100644
index 0000000000..1113ee61d7
--- /dev/null
+++ b/lightbeam/Cargo.toml
@@ -0,0 +1,36 @@
+[package]
+name = "lightbeam"
+version = "0.0.0"
+authors = ["The Lightbeam Project Developers"]
+license = "Apache-2.0 WITH LLVM-exception"
+readme = "README.md"
+categories = ["wasm"]
+keywords = ["webassembly", "wasm", "compile", "compiler", "jit"]
+publish = false
+edition = "2018"
+
+[dependencies]
+smallvec = "0.6"
+dynasm = "0.5.1"
+dynasmrt = "0.5.1"
+wasmparser = "0.39.1"
+memoffset = "0.5.1"
+itertools = "0.8"
+capstone = "0.6.0"
+failure = "0.1.3"
+failure_derive = "0.1.3"
+cranelift-codegen = "0.44"
+multi_mut = "0.1"
+either = "1.5"
+typemap = "0.3"
+
+[dev-dependencies]
+lazy_static = "1.2"
+wabt = "0.9.2"
+quickcheck = "0.9.0"
+
+[badges]
+maintenance = { status = "experimental" }
+
+[features]
+bench = []
diff --git a/lightbeam/LICENSE b/lightbeam/LICENSE
new file mode 100644
index 0000000000..f9d81955f4
--- /dev/null
+++ b/lightbeam/LICENSE
@@ -0,0 +1,220 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+
+--- LLVM Exceptions to the Apache 2.0 License ----
+
+As an exception, if, as a result of your compiling your source code, portions
+of this Software are embedded into an Object form of such source code, you
+may redistribute such embedded portions in such Object form without complying
+with the conditions of Sections 4(a), 4(b) and 4(d) of the License.
+
+In addition, if you combine or link compiled forms of this Software with
+software that is licensed under the GPLv2 ("Combined Software") and if a
+court of competent jurisdiction determines that the patent provision (Section
+3), the indemnity provision (Section 9) or other Section of the License
+conflicts with the conditions of the GPLv2, you may retroactively and
+prospectively choose to deem waived or otherwise exclude such Section(s) of
+the License, but only in their entirety and only with respect to the Combined
+Software.
+
diff --git a/lightbeam/README.md b/lightbeam/README.md
new file mode 100644
index 0000000000..b344ac5919
--- /dev/null
+++ b/lightbeam/README.md
@@ -0,0 +1,168 @@
+# Lightbeam
+
+Lightbeam is an optimising one-pass streaming compiler for WebAssembly, intended for use in [Wasmtime][wasmtime].
+
+[wasmtime]: https://github.com/CraneStation/wasmtime
+
+## Quality of output
+
+Already - with a very small number of relatively simple optimisation rules - Lightbeam produces surprisingly high-quality output considering how restricted it is. It even produces better code than Cranelift, Firefox or both for some workloads. Here's a very simple example, this recursive fibonacci function in Rust:
+
+```rust
+fn fib(n: i32) -> i32 {
+    if n == 0 || n == 1 {
+        1
+    } else {
+        fib(n - 1) + fib(n - 2)
+    }
+}
+```
+
+When compiled with optimisations enabled, rustc will produce the following WebAssembly:
+
+```rust
+(module
+  (func $fib (param $p0 i32) (result i32)
+    (local $l1 i32)
+    (set_local $l1
+      (i32.const 1))
+    (block $B0
+      (br_if $B0
+        (i32.lt_u
+          (get_local $p0)
+          (i32.const 2)))
+      (set_local $l1
+        (i32.const 1))
+      (loop $L1
+        (set_local $l1
+          (i32.add
+            (call $fib
+              (i32.add
+                (get_local $p0)
+                (i32.const -1)))
+            (get_local $l1)))
+        (br_if $L1
+          (i32.gt_u
+            (tee_local $p0
+              (i32.add
+                (get_local $p0)
+                (i32.const -2)))
+            (i32.const 1)))))
+    (get_local $l1)))
+```
+
+Firefox's optimising compiler produces the following assembly (labels cleaned up somewhat):
+
+```asm
+fib:
+  sub rsp, 0x18
+  cmp qword ptr [r14 + 0x28], rsp
+  jae stack_overflow
+  mov dword ptr [rsp + 0xc], edi
+  cmp edi, 2
+  jae .Lelse
+  mov eax, 1
+  mov dword ptr [rsp + 8], eax
+  jmp .Lreturn
+.Lelse:
+  mov dword ptr [rsp + 0xc], edi
+  mov eax, 1
+  mov dword ptr [rsp + 8], eax
+.Lloop:
+  mov edi, dword ptr [rsp + 0xc]
+  add edi, -1
+  call 0
+  mov ecx, dword ptr [rsp + 8]
+  add ecx, eax
+  mov dword ptr [rsp + 8], ecx
+  mov ecx, dword ptr [rsp + 0xc]
+  add ecx, -2
+  mov dword ptr [rsp + 0xc], ecx
+  cmp ecx, 1
+  ja .Lloop
+.Lreturn:
+  mov eax, dword ptr [rsp + 8]
+  nop
+  add rsp, 0x18
+  ret
+```
+
+Cranelift with optimisations enabled produces similar:
+
+```asm
+fib:
+  push   rbp
+  mov    rbp, rsp
+  sub    rsp, 0x20
+  mov    qword ptr [rsp + 0x10], rdi
+  mov    dword ptr [rsp + 0x1c], esi
+  mov    eax, 1
+  mov    dword ptr [rsp + 0x18], eax
+  mov    eax, dword ptr [rsp + 0x1c]
+  cmp    eax, 2
+  jb     .Lreturn
+  movabs rax, 0
+  mov    qword ptr [rsp + 8], rax
+.Lloop:
+  mov    eax, dword ptr [rsp + 0x1c]
+  add    eax, -1
+  mov    rcx, qword ptr [rsp + 8]
+  mov    rdx, qword ptr [rsp + 0x10]
+  mov    rdi, rdx
+  mov    esi, eax
+  call   rcx
+  mov    ecx, dword ptr [rsp + 0x18]
+  add    eax, ecx
+  mov    dword ptr [rsp + 0x18], eax
+  mov    eax, dword ptr [rsp + 0x1c]
+  add    eax, -2
+  mov    dword ptr [rsp + 0x1c], eax
+  mov    eax, dword ptr [rsp + 0x1c]
+  cmp    eax, 1
+  ja     .Lloop
+.Lreturn:
+  mov    eax, dword ptr [rsp + 0x18]
+  add    rsp, 0x20
+  pop    rbp
+  ret
+```
+
+Whereas Lightbeam produces smaller code with far fewer memory accesses than both (and fewer blocks than Firefox's output):
+
+```asm
+fib:
+  cmp  esi, 2
+  mov  eax, 1
+  jb   .Lreturn
+  mov  eax, 1
+.Lloop:
+  mov  rcx, rsi
+  add  ecx, 0xffffffff
+  push rsi
+  push rax
+  push rax
+  mov  rsi, rcx
+  call fib
+  add  eax, [rsp + 8]
+  mov  rcx, [rsp + 0x10]
+  add  ecx, 0xfffffffe
+  cmp  ecx, 1
+  mov  rsi, rcx
+  lea  rsp, [rsp + 0x18]
+  ja   .Lloop
+.Lreturn:
+  ret  
+```
+
+Now obviously I'm not advocating for replacing Firefox's optimising compiler with Lightbeam since the latter can only really produce better code when receiving optimised WebAssembly (and so debug-mode or hand-written WebAssembly may produce much worse output). However, this shows that even with the restrictions of a streaming compiler it's absolutely possible to produce high-quality assembly output. For the assembly above, the Lightbeam output runs within 15% of native speed. This is paramount for one of Lightbeam's intended usecases for real-time systems that want good runtime performance but cannot tolerate compiler bombs.
+
+## Specification compliance
+
+Lightbeam passes 100% of the specification test suite, but that doesn't necessarily mean that it's 100% specification-compliant. Hopefully as we run a fuzzer against it we can find any issues and get Lightbeam to a state where it can be used in production.
+ 
+## Getting involved
+
+Our [issue tracker][issue tracker] is pretty barren right now since this is currently more-or-less a one-person project, but if you want to get involved jump into the [CraneStation Gitter room][cranestation-gitter] and someone can direct you to the right place. I wish I could say "the most useful thing you can do is play with it and open issues where you find problems" but until it passes the spec suite that won't be very helpful.
+
+[cranestation-gitter]: https://gitter.im/CraneStation/Lobby
+[issue tracker]: https://github.com/CraneStation/lightbeam/issues
diff --git a/lightbeam/examples/test.rs b/lightbeam/examples/test.rs
new file mode 100644
index 0000000000..3fcbf7c573
--- /dev/null
+++ b/lightbeam/examples/test.rs
@@ -0,0 +1,36 @@
+extern crate lightbeam;
+
+use lightbeam::translate;
+use std::fs::File;
+use std::io;
+use std::io::Read;
+use std::path::Path;
+
+fn read_to_end<P: AsRef<Path>>(path: P) -> io::Result<Vec<u8>> {
+    let mut buffer = Vec::new();
+    if path.as_ref() == Path::new("-") {
+        let stdin = io::stdin();
+        let mut stdin = stdin.lock();
+        stdin.read_to_end(&mut buffer)?;
+    } else {
+        let mut file = File::open(path)?;
+        file.read_to_end(&mut buffer)?;
+    }
+    Ok(buffer)
+}
+
+fn maybe_main() -> Result<(), String> {
+    let data = read_to_end("test.wasm").map_err(|e| e.to_string())?;
+    let translated = translate(&data).map_err(|e| e.to_string())?;
+    let result: u32 = translated.execute_func(0, (5u32, 3u32)).unwrap();
+    println!("f(5, 3) = {}", result);
+
+    Ok(())
+}
+
+fn main() {
+    match maybe_main() {
+        Ok(()) => (),
+        Err(e) => eprintln!("error: {}", e),
+    }
+}
diff --git a/lightbeam/src/backend.rs b/lightbeam/src/backend.rs
new file mode 100644
index 0000000000..77d351ea2c
--- /dev/null
+++ b/lightbeam/src/backend.rs
@@ -0,0 +1,5553 @@
+use crate::error::Error;
+use crate::microwasm::{BrTarget, Ieee32, Ieee64, SignlessType, Type, Value, F32, F64, I32, I64};
+use crate::module::ModuleContext;
+use cranelift_codegen::{binemit, ir};
+use dynasm::dynasm;
+use dynasmrt::x64::Assembler;
+use dynasmrt::{AssemblyOffset, DynamicLabel, DynasmApi, DynasmLabelApi, ExecutableBuffer};
+use either::Either;
+use std::{
+    any::{Any, TypeId},
+    collections::HashMap,
+    convert::TryFrom,
+    fmt::Display,
+    iter::{self, FromIterator},
+    mem,
+    ops::RangeInclusive,
+};
+
+use self::registers::*;
+
+// TODO: Get rid of this! It's a total hack.
+mod magic {
+    use cranelift_codegen::ir;
+
+    /// Compute an `ir::ExternalName` for the `memory.grow` libcall for
+    /// 32-bit locally-defined memories.
+    pub fn get_memory32_grow_name() -> ir::ExternalName {
+        ir::ExternalName::user(1, 0)
+    }
+
+    /// Compute an `ir::ExternalName` for the `memory.grow` libcall for
+    /// 32-bit imported memories.
+    pub fn get_imported_memory32_grow_name() -> ir::ExternalName {
+        ir::ExternalName::user(1, 1)
+    }
+
+    /// Compute an `ir::ExternalName` for the `memory.size` libcall for
+    /// 32-bit locally-defined memories.
+    pub fn get_memory32_size_name() -> ir::ExternalName {
+        ir::ExternalName::user(1, 2)
+    }
+
+    /// Compute an `ir::ExternalName` for the `memory.size` libcall for
+    /// 32-bit imported memories.
+    pub fn get_imported_memory32_size_name() -> ir::ExternalName {
+        ir::ExternalName::user(1, 3)
+    }
+}
+
+/// Size of a pointer on the target in bytes.
+const WORD_SIZE: u32 = 8;
+
+type RegId = u8;
+
+#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq)]
+pub enum GPR {
+    Rq(RegId),
+    Rx(RegId),
+}
+
+#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq)]
+pub enum GPRType {
+    Rq,
+    Rx,
+}
+
+impl From<SignlessType> for GPRType {
+    fn from(other: SignlessType) -> GPRType {
+        match other {
+            I32 | I64 => GPRType::Rq,
+            F32 | F64 => GPRType::Rx,
+        }
+    }
+}
+
+impl From<SignlessType> for Option<GPRType> {
+    fn from(other: SignlessType) -> Self {
+        Some(other.into())
+    }
+}
+
+impl GPR {
+    fn type_(self) -> GPRType {
+        match self {
+            GPR::Rq(_) => GPRType::Rq,
+            GPR::Rx(_) => GPRType::Rx,
+        }
+    }
+
+    fn rq(self) -> Option<RegId> {
+        match self {
+            GPR::Rq(r) => Some(r),
+            GPR::Rx(_) => None,
+        }
+    }
+
+    fn rx(self) -> Option<RegId> {
+        match self {
+            GPR::Rx(r) => Some(r),
+            GPR::Rq(_) => None,
+        }
+    }
+}
+
+pub fn arg_locs(types: impl IntoIterator<Item = SignlessType>) -> Vec<CCLoc> {
+    let types = types.into_iter();
+    let mut out = Vec::with_capacity(types.size_hint().0);
+    // TODO: VmCtx is in the first register
+    let mut int_gpr_iter = INTEGER_ARGS_IN_GPRS.iter();
+    let mut float_gpr_iter = FLOAT_ARGS_IN_GPRS.iter();
+    let mut stack_idx = 0;
+
+    for ty in types {
+        match ty {
+            I32 | I64 => out.push(int_gpr_iter.next().map(|&r| CCLoc::Reg(r)).unwrap_or_else(
+                || {
+                    let out = CCLoc::Stack(stack_idx);
+                    stack_idx += 1;
+                    out
+                },
+            )),
+            F32 | F64 => out.push(
+                float_gpr_iter
+                    .next()
+                    .map(|&r| CCLoc::Reg(r))
+                    .expect("Float args on stack not yet supported"),
+            ),
+        }
+    }
+
+    out
+}
+
+pub fn ret_locs(types: impl IntoIterator<Item = SignlessType>) -> Vec<CCLoc> {
+    let types = types.into_iter();
+    let mut out = Vec::with_capacity(types.size_hint().0);
+    // TODO: VmCtx is in the first register
+    let mut int_gpr_iter = INTEGER_RETURN_GPRS.iter();
+    let mut float_gpr_iter = FLOAT_RETURN_GPRS.iter();
+
+    for ty in types {
+        match ty {
+            I32 | I64 => out.push(CCLoc::Reg(
+                *int_gpr_iter
+                    .next()
+                    .expect("We don't support stack returns yet"),
+            )),
+            F32 | F64 => out.push(CCLoc::Reg(
+                *float_gpr_iter
+                    .next()
+                    .expect("We don't support stack returns yet"),
+            )),
+        }
+    }
+
+    out
+}
+
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+struct GPRs {
+    bits: u16,
+}
+
+impl GPRs {
+    fn new() -> Self {
+        Self { bits: 0 }
+    }
+}
+
+#[allow(dead_code)]
+pub mod registers {
+    use super::{RegId, GPR};
+
+    pub mod rq {
+        use super::RegId;
+
+        pub const RAX: RegId = 0;
+        pub const RCX: RegId = 1;
+        pub const RDX: RegId = 2;
+        pub const RBX: RegId = 3;
+        pub const RSP: RegId = 4;
+        pub const RBP: RegId = 5;
+        pub const RSI: RegId = 6;
+        pub const RDI: RegId = 7;
+        pub const R8: RegId = 8;
+        pub const R9: RegId = 9;
+        pub const R10: RegId = 10;
+        pub const R11: RegId = 11;
+        pub const R12: RegId = 12;
+        pub const R13: RegId = 13;
+        pub const R14: RegId = 14;
+        pub const R15: RegId = 15;
+    }
+
+    pub const RAX: GPR = GPR::Rq(self::rq::RAX);
+    pub const RCX: GPR = GPR::Rq(self::rq::RCX);
+    pub const RDX: GPR = GPR::Rq(self::rq::RDX);
+    pub const RBX: GPR = GPR::Rq(self::rq::RBX);
+    pub const RSP: GPR = GPR::Rq(self::rq::RSP);
+    pub const RBP: GPR = GPR::Rq(self::rq::RBP);
+    pub const RSI: GPR = GPR::Rq(self::rq::RSI);
+    pub const RDI: GPR = GPR::Rq(self::rq::RDI);
+    pub const R8: GPR = GPR::Rq(self::rq::R8);
+    pub const R9: GPR = GPR::Rq(self::rq::R9);
+    pub const R10: GPR = GPR::Rq(self::rq::R10);
+    pub const R11: GPR = GPR::Rq(self::rq::R11);
+    pub const R12: GPR = GPR::Rq(self::rq::R12);
+    pub const R13: GPR = GPR::Rq(self::rq::R13);
+    pub const R14: GPR = GPR::Rq(self::rq::R14);
+    pub const R15: GPR = GPR::Rq(self::rq::R15);
+
+    pub const XMM0: GPR = GPR::Rx(0);
+    pub const XMM1: GPR = GPR::Rx(1);
+    pub const XMM2: GPR = GPR::Rx(2);
+    pub const XMM3: GPR = GPR::Rx(3);
+    pub const XMM4: GPR = GPR::Rx(4);
+    pub const XMM5: GPR = GPR::Rx(5);
+    pub const XMM6: GPR = GPR::Rx(6);
+    pub const XMM7: GPR = GPR::Rx(7);
+    pub const XMM8: GPR = GPR::Rx(8);
+    pub const XMM9: GPR = GPR::Rx(9);
+    pub const XMM10: GPR = GPR::Rx(10);
+    pub const XMM11: GPR = GPR::Rx(11);
+    pub const XMM12: GPR = GPR::Rx(12);
+    pub const XMM13: GPR = GPR::Rx(13);
+    pub const XMM14: GPR = GPR::Rx(14);
+    pub const XMM15: GPR = GPR::Rx(15);
+
+    pub const NUM_GPRS: u8 = 16;
+}
+
+const SIGN_MASK_F64: u64 = 0b1000000000000000000000000000000000000000000000000000000000000000;
+const REST_MASK_F64: u64 = !SIGN_MASK_F64;
+const SIGN_MASK_F32: u32 = 0b10000000000000000000000000000000;
+const REST_MASK_F32: u32 = !SIGN_MASK_F32;
+
+impl GPRs {
+    fn take(&mut self) -> Option<RegId> {
+        let lz = self.bits.trailing_zeros();
+        if lz < 16 {
+            let gpr = lz as RegId;
+            self.mark_used(gpr);
+            Some(gpr)
+        } else {
+            None
+        }
+    }
+
+    fn mark_used(&mut self, gpr: RegId) {
+        self.bits &= !(1 << gpr as u16);
+    }
+
+    fn release(&mut self, gpr: RegId) {
+        debug_assert!(
+            !self.is_free(gpr),
+            "released register {} was already free",
+            gpr
+        );
+        self.bits |= 1 << gpr;
+    }
+
+    fn is_free(&self, gpr: RegId) -> bool {
+        (self.bits & (1 << gpr)) != 0
+    }
+}
+
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+pub struct Registers {
+    /// Registers at 64 bits and below (al/ah/ax/eax/rax, for example)
+    scratch_64: (GPRs, [u8; NUM_GPRS as usize]),
+    /// Registers at 128 bits (xmm0, for example)
+    scratch_128: (GPRs, [u8; NUM_GPRS as usize]),
+}
+
+impl Default for Registers {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl Registers {
+    pub fn new() -> Self {
+        let mut result = Self {
+            scratch_64: (GPRs::new(), [1; NUM_GPRS as _]),
+            scratch_128: (GPRs::new(), [1; NUM_GPRS as _]),
+        };
+
+        // Give ourselves a few scratch registers to work with, for now.
+        for &scratch in SCRATCH_REGS {
+            result.release(scratch);
+        }
+
+        result
+    }
+
+    fn scratch_counts_mut(&mut self, gpr: GPR) -> (u8, &mut (GPRs, [u8; NUM_GPRS as usize])) {
+        match gpr {
+            GPR::Rq(r) => (r, &mut self.scratch_64),
+            GPR::Rx(r) => (r, &mut self.scratch_128),
+        }
+    }
+
+    fn scratch_counts(&self, gpr: GPR) -> (u8, &(GPRs, [u8; NUM_GPRS as usize])) {
+        match gpr {
+            GPR::Rq(r) => (r, &self.scratch_64),
+            GPR::Rx(r) => (r, &self.scratch_128),
+        }
+    }
+
+    pub fn mark_used(&mut self, gpr: GPR) {
+        let (gpr, scratch_counts) = self.scratch_counts_mut(gpr);
+        scratch_counts.0.mark_used(gpr);
+        scratch_counts.1[gpr as usize] += 1;
+    }
+
+    pub fn num_usages(&self, gpr: GPR) -> u8 {
+        let (gpr, scratch_counts) = self.scratch_counts(gpr);
+        scratch_counts.1[gpr as usize]
+    }
+
+    pub fn take(&mut self, ty: impl Into<GPRType>) -> Option<GPR> {
+        let (mk_gpr, scratch_counts) = match ty.into() {
+            GPRType::Rq => (GPR::Rq as fn(_) -> _, &mut self.scratch_64),
+            GPRType::Rx => (GPR::Rx as fn(_) -> _, &mut self.scratch_128),
+        };
+
+        let out = scratch_counts.0.take()?;
+        scratch_counts.1[out as usize] += 1;
+        Some(mk_gpr(out))
+    }
+
+    pub fn release(&mut self, gpr: GPR) {
+        let (gpr, scratch_counts) = self.scratch_counts_mut(gpr);
+        let c = &mut scratch_counts.1[gpr as usize];
+        *c = c
+            .checked_sub(1)
+            .unwrap_or_else(|| panic!("Double-freed register: {}", gpr));
+        if *c == 0 {
+            scratch_counts.0.release(gpr);
+        }
+    }
+
+    pub fn is_free(&self, gpr: GPR) -> bool {
+        let (gpr, scratch_counts) = self.scratch_counts(gpr);
+        scratch_counts.0.is_free(gpr)
+    }
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct BlockCallingConvention {
+    pub stack_depth: StackDepth,
+    pub arguments: Vec<CCLoc>,
+}
+
+impl BlockCallingConvention {
+    pub fn function_start(args: impl IntoIterator<Item = CCLoc>) -> Self {
+        BlockCallingConvention {
+            // We start and return the function with stack depth 1 since we must
+            // allow space for the saved return address.
+            stack_depth: StackDepth(1),
+            arguments: Vec::from_iter(args),
+        }
+    }
+}
+
+// TODO: Combine this with `ValueLocation`?
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+pub enum CCLoc {
+    /// Value exists in a register.
+    Reg(GPR),
+    /// Value exists on the stack.
+    Stack(i32),
+}
+
+impl CCLoc {
+    fn try_from(other: ValueLocation) -> Option<Self> {
+        match other {
+            ValueLocation::Reg(reg) => Some(CCLoc::Reg(reg)),
+            ValueLocation::Stack(offset) => Some(CCLoc::Stack(offset)),
+            _ => None,
+        }
+    }
+}
+
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+pub enum CondCode {
+    CF0,
+    CF1,
+    ZF0,
+    ZF1,
+    CF0AndZF0,
+    CF1OrZF1,
+    ZF0AndSFEqOF,
+    ZF1OrSFNeOF,
+    SFEqOF,
+    SFNeOF,
+}
+
+mod cc {
+    use super::CondCode;
+
+    pub const EQUAL: CondCode = CondCode::ZF0;
+    pub const NOT_EQUAL: CondCode = CondCode::ZF1;
+    pub const GE_U: CondCode = CondCode::CF0;
+    pub const LT_U: CondCode = CondCode::CF1;
+    pub const GT_U: CondCode = CondCode::CF0AndZF0;
+    pub const LE_U: CondCode = CondCode::CF1OrZF1;
+    pub const GE_S: CondCode = CondCode::SFEqOF;
+    pub const LT_S: CondCode = CondCode::SFNeOF;
+    pub const GT_S: CondCode = CondCode::ZF0AndSFEqOF;
+    pub const LE_S: CondCode = CondCode::ZF1OrSFNeOF;
+}
+
+impl std::ops::Not for CondCode {
+    type Output = Self;
+
+    fn not(self) -> Self {
+        use CondCode::*;
+
+        match self {
+            CF0 => CF1,
+            CF1 => CF0,
+            ZF0 => ZF1,
+            ZF1 => ZF0,
+            CF0AndZF0 => CF1OrZF1,
+            CF1OrZF1 => CF0AndZF0,
+            ZF0AndSFEqOF => ZF1OrSFNeOF,
+            ZF1OrSFNeOF => ZF0AndSFEqOF,
+            SFEqOF => SFNeOF,
+            SFNeOF => SFEqOF,
+        }
+    }
+}
+
+/// Describes location of a value.
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+pub enum ValueLocation {
+    /// Value exists in a register.
+    Reg(GPR),
+    /// Value exists on the stack. Note that this offset is from the rsp as it
+    /// was when we entered the function.
+    Stack(i32),
+    /// Value is a literal
+    Immediate(Value),
+    /// Value is a set condition code
+    Cond(CondCode),
+}
+
+impl From<CCLoc> for ValueLocation {
+    fn from(other: CCLoc) -> Self {
+        match other {
+            CCLoc::Reg(r) => ValueLocation::Reg(r),
+            CCLoc::Stack(o) => ValueLocation::Stack(o),
+        }
+    }
+}
+
+impl ValueLocation {
+    fn stack(self) -> Option<i32> {
+        match self {
+            ValueLocation::Stack(o) => Some(o),
+            _ => None,
+        }
+    }
+
+    fn reg(self) -> Option<GPR> {
+        match self {
+            ValueLocation::Reg(r) => Some(r),
+            _ => None,
+        }
+    }
+
+    fn immediate(self) -> Option<Value> {
+        match self {
+            ValueLocation::Immediate(i) => Some(i),
+            _ => None,
+        }
+    }
+
+    fn imm_i32(self) -> Option<i32> {
+        self.immediate().and_then(Value::as_i32)
+    }
+
+    fn imm_i64(self) -> Option<i64> {
+        self.immediate().and_then(Value::as_i64)
+    }
+
+    fn imm_f32(self) -> Option<Ieee32> {
+        self.immediate().and_then(Value::as_f32)
+    }
+
+    fn imm_f64(self) -> Option<Ieee64> {
+        self.immediate().and_then(Value::as_f64)
+    }
+}
+
+// TODO: This assumes only system-v calling convention.
+// In system-v calling convention the first 6 arguments are passed via registers.
+// All rest arguments are passed on the stack.
+const INTEGER_ARGS_IN_GPRS: &[GPR] = &[RSI, RDX, RCX, R8, R9];
+const INTEGER_RETURN_GPRS: &[GPR] = &[RAX, RDX];
+const FLOAT_ARGS_IN_GPRS: &[GPR] = &[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7];
+const FLOAT_RETURN_GPRS: &[GPR] = &[XMM0, XMM1];
+// List of scratch registers taken from https://wiki.osdev.org/System_V_ABI
+const SCRATCH_REGS: &[GPR] = &[
+    RSI, RDX, RCX, R8, R9, RAX, R10, R11, XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8,
+    XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15,
+];
+const VMCTX: RegId = rq::RDI;
+
+#[must_use]
+#[derive(Debug, Clone)]
+pub struct FunctionEnd {
+    should_generate_epilogue: bool,
+}
+
+pub struct CodeGenSession<'module, M> {
+    assembler: Assembler,
+    pub module_context: &'module M,
+    pub op_offset_map: Vec<(AssemblyOffset, Box<dyn Display + Send + Sync>)>,
+    labels: Labels,
+    func_starts: Vec<(Option<AssemblyOffset>, DynamicLabel)>,
+}
+
+impl<'module, M> CodeGenSession<'module, M> {
+    pub fn new(func_count: u32, module_context: &'module M) -> Self {
+        let mut assembler = Assembler::new().unwrap();
+        let func_starts = iter::repeat_with(|| (None, assembler.new_dynamic_label()))
+            .take(func_count as usize)
+            .collect::<Vec<_>>();
+
+        CodeGenSession {
+            assembler,
+            op_offset_map: Default::default(),
+            labels: Default::default(),
+            func_starts,
+            module_context,
+        }
+    }
+
+    pub fn new_context<'this>(
+        &'this mut self,
+        func_idx: u32,
+        reloc_sink: &'this mut dyn binemit::RelocSink,
+    ) -> Context<'this, M> {
+        {
+            let func_start = &mut self.func_starts[func_idx as usize];
+
+            // At this point we know the exact start address of this function. Save it
+            // and define dynamic label at this location.
+            func_start.0 = Some(self.assembler.offset());
+            self.assembler.dynamic_label(func_start.1);
+        }
+
+        Context {
+            asm: &mut self.assembler,
+            current_function: func_idx,
+            reloc_sink,
+            func_starts: &self.func_starts,
+            labels: &mut self.labels,
+            block_state: Default::default(),
+            module_context: self.module_context,
+        }
+    }
+
+    fn finalize(&mut self) {
+        let mut values = self.labels.values_mut().collect::<Vec<_>>();
+        values.sort_unstable_by_key(|(_, align, _)| *align);
+        for (label, align, func) in values {
+            if let Some(mut func) = func.take() {
+                dynasm!(self.assembler
+                    ; .align *align as usize
+                );
+                self.assembler.dynamic_label(label.0);
+                func(&mut self.assembler);
+            }
+        }
+    }
+
+    pub fn into_translated_code_section(mut self) -> Result<TranslatedCodeSection, Error> {
+        self.finalize();
+        let exec_buf = self
+            .assembler
+            .finalize()
+            .map_err(|_asm| Error::Assembler("assembler error".to_owned()))?;
+        let func_starts = self
+            .func_starts
+            .iter()
+            .map(|(offset, _)| offset.unwrap())
+            .collect::<Vec<_>>();
+        Ok(TranslatedCodeSection {
+            exec_buf,
+            func_starts,
+            op_offset_map: self.op_offset_map,
+            // TODO
+            relocatable_accesses: vec![],
+        })
+    }
+}
+
+#[derive(Debug)]
+struct RelocateAddress {
+    reg: Option<GPR>,
+    imm: usize,
+}
+
+#[derive(Debug)]
+struct RelocateAccess {
+    position: AssemblyOffset,
+    dst_reg: GPR,
+    address: RelocateAddress,
+}
+
+pub struct TranslatedCodeSection {
+    exec_buf: ExecutableBuffer,
+    func_starts: Vec<AssemblyOffset>,
+    relocatable_accesses: Vec<RelocateAccess>,
+    op_offset_map: Vec<(AssemblyOffset, Box<dyn Display + Send + Sync>)>,
+}
+
+impl TranslatedCodeSection {
+    pub fn func_start(&self, idx: usize) -> *const u8 {
+        let offset = self.func_starts[idx];
+        self.exec_buf.ptr(offset)
+    }
+
+    pub fn func_range(&self, idx: usize) -> std::ops::Range<usize> {
+        let end = self
+            .func_starts
+            .get(idx + 1)
+            .map(|i| i.0)
+            .unwrap_or(self.exec_buf.len());
+
+        self.func_starts[idx].0..end
+    }
+
+    pub fn funcs<'a>(&'a self) -> impl Iterator<Item = std::ops::Range<usize>> + 'a {
+        (0..self.func_starts.len()).map(move |i| self.func_range(i))
+    }
+
+    pub fn buffer(&self) -> &[u8] {
+        &*self.exec_buf
+    }
+
+    pub fn disassemble(&self) {
+        crate::disassemble::disassemble(&*self.exec_buf, &self.op_offset_map).unwrap();
+    }
+}
+
+#[derive(Debug, Default, Clone)]
+pub struct BlockState {
+    pub stack: Stack,
+    pub depth: StackDepth,
+    pub regs: Registers,
+}
+
+type Stack = Vec<ValueLocation>;
+
+#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
+enum LabelValue {
+    I32(i32),
+    I64(i64),
+}
+
+impl From<Value> for LabelValue {
+    fn from(other: Value) -> LabelValue {
+        match other {
+            Value::I32(v) => LabelValue::I32(v),
+            Value::I64(v) => LabelValue::I64(v),
+            Value::F32(v) => LabelValue::I32(v.to_bits() as _),
+            Value::F64(v) => LabelValue::I64(v.to_bits() as _),
+        }
+    }
+}
+
+type Labels = HashMap<
+    (u32, Either<TypeId, (LabelValue, Option<LabelValue>)>),
+    (Label, u32, Option<Box<dyn FnMut(&mut Assembler)>>),
+>;
+
+pub struct Context<'this, M> {
+    pub asm: &'this mut Assembler,
+    reloc_sink: &'this mut dyn binemit::RelocSink,
+    module_context: &'this M,
+    current_function: u32,
+    func_starts: &'this Vec<(Option<AssemblyOffset>, DynamicLabel)>,
+    /// Each push and pop on the value stack increments or decrements this value by 1 respectively.
+    pub block_state: BlockState,
+    labels: &'this mut Labels,
+}
+
+/// Label in code.
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+pub struct Label(DynamicLabel);
+
+/// Offset from starting value of SP counted in words.
+#[derive(Default, Debug, Copy, Clone, PartialEq, Eq)]
+pub struct StackDepth(u32);
+
+impl StackDepth {
+    pub fn reserve(&mut self, slots: u32) {
+        self.0 = self.0.checked_add(slots).unwrap();
+    }
+
+    pub fn free(&mut self, slots: u32) {
+        self.0 = self.0.checked_sub(slots).unwrap();
+    }
+}
+
+macro_rules! int_div {
+    ($full_div_s:ident, $full_div_u:ident, $div_u:ident, $div_s:ident, $rem_u:ident, $rem_s:ident, $imm_fn:ident, $signed_ty:ty, $unsigned_ty:ty, $reg_ty:tt, $pointer_ty:tt) => {
+        // TODO: Fast div using mul for constant divisor? It looks like LLVM doesn't do that for us when
+        //       emitting Wasm.
+        pub fn $div_u(&mut self) {
+            let divisor = self.pop();
+            let dividend = self.pop();
+
+            if let (Some(dividend), Some(divisor)) = (dividend.$imm_fn(), divisor.$imm_fn()) {
+                if divisor == 0 {
+                    self.trap();
+                    self.push(ValueLocation::Immediate((0 as $unsigned_ty).into()));
+                } else {
+                    self.push(ValueLocation::Immediate(
+                        <$unsigned_ty>::wrapping_div(dividend as _, divisor as _).into(),
+                    ));
+                }
+
+                return;
+            }
+
+            let (div, rem, saved) = self.$full_div_u(divisor, dividend);
+
+            self.free_value(rem);
+
+            let div = match div {
+                ValueLocation::Reg(div)  => {
+                    if saved.clone().any(|dst| dst == div) {
+                        let new = self.take_reg(I32).unwrap();
+                        dynasm!(self.asm
+                            ; mov Rq(new.rq().unwrap()), Rq(div.rq().unwrap())
+                        );
+                        self.block_state.regs.release(div);
+                        ValueLocation::Reg(new)
+                    } else {
+                        ValueLocation::Reg(div)
+                    }
+                }
+                _ => div,
+            };
+
+            self.cleanup_gprs(saved);
+
+            self.push(div);
+        }
+
+        // TODO: Fast div using mul for constant divisor? It looks like LLVM doesn't do that for us when
+        //       emitting Wasm.
+        pub fn $div_s(&mut self) {
+            let divisor = self.pop();
+            let dividend = self.pop();
+
+            if let (Some(dividend), Some(divisor)) = (dividend.$imm_fn(), divisor.$imm_fn()) {
+                if divisor == 0 {
+                    self.trap();
+                    self.push(ValueLocation::Immediate((0 as $signed_ty).into()));
+                } else {
+                    self.push(ValueLocation::Immediate(
+                        <$signed_ty>::wrapping_div(dividend, divisor).into(),
+                    ));
+                }
+
+                return;
+            }
+
+            let (div, rem, saved) = self.$full_div_s(divisor, dividend);
+
+            self.free_value(rem);
+
+            let div = match div {
+                ValueLocation::Reg(div)  => {
+                    if saved.clone().any(|dst| dst == div) {
+                        let new = self.take_reg(I32).unwrap();
+                        dynasm!(self.asm
+                            ; mov Rq(new.rq().unwrap()), Rq(div.rq().unwrap())
+                        );
+                        self.block_state.regs.release(div);
+                        ValueLocation::Reg(new)
+                    } else {
+                        ValueLocation::Reg(div)
+                    }
+                }
+                _ => div,
+            };
+
+            self.cleanup_gprs(saved);
+
+            self.push(div);
+        }
+
+        pub fn $rem_u(&mut self) {
+            let divisor = self.pop();
+            let dividend = self.pop();
+
+            if let (Some(dividend), Some(divisor)) = (dividend.$imm_fn(), divisor.$imm_fn()) {
+                if divisor == 0 {
+                    self.trap();
+                    self.push(ValueLocation::Immediate((0 as $unsigned_ty).into()));
+                } else {
+                    self.push(ValueLocation::Immediate(
+                        (dividend as $unsigned_ty % divisor as $unsigned_ty).into(),
+                    ));
+                }
+                return;
+            }
+
+            let (div, rem, saved) = self.$full_div_u(divisor, dividend);
+
+            self.free_value(div);
+
+            let rem = match rem {
+                ValueLocation::Reg(rem)  => {
+                    if saved.clone().any(|dst| dst == rem) {
+                        let new = self.take_reg(I32).unwrap();
+                        dynasm!(self.asm
+                            ; mov Rq(new.rq().unwrap()), Rq(rem.rq().unwrap())
+                        );
+                        self.block_state.regs.release(rem);
+                        ValueLocation::Reg(new)
+                    } else {
+                        ValueLocation::Reg(rem)
+                    }
+                }
+                _ => rem,
+            };
+
+            self.cleanup_gprs(saved);
+
+            self.push(rem);
+        }
+
+        pub fn $rem_s(&mut self) {
+            let mut divisor = self.pop();
+            let dividend = self.pop();
+
+            if let (Some(dividend), Some(divisor)) = (dividend.$imm_fn(), divisor.$imm_fn()) {
+                if divisor == 0 {
+                    self.trap();
+                    self.push(ValueLocation::Immediate((0 as $signed_ty).into()));
+                } else {
+                    self.push(ValueLocation::Immediate((dividend % divisor).into()));
+                }
+                return;
+            }
+
+            let is_neg1 = self.create_label();
+
+            let current_depth = self.block_state.depth.clone();
+
+            // TODO: This could cause segfaults because of implicit push/pop
+            let gen_neg1_case = match divisor {
+                ValueLocation::Immediate(_) => {
+                    if divisor.$imm_fn().unwrap() == -1 {
+                        self.push(ValueLocation::Immediate((-1 as $signed_ty).into()));
+                        self.free_value(dividend);
+                        return;
+                    }
+
+                    false
+                }
+                ValueLocation::Reg(_) => {
+                    let reg = self.into_reg(GPRType::Rq, &mut divisor).unwrap();
+                    dynasm!(self.asm
+                        ; cmp $reg_ty(reg.rq().unwrap()), -1
+                    );
+                    // TODO: We could choose `current_depth` as the depth here instead but we currently
+                    //       don't for simplicity
+                    self.set_stack_depth(current_depth.clone());
+                    dynasm!(self.asm
+                        ; je =>is_neg1.0
+                    );
+
+                    true
+                }
+                ValueLocation::Stack(offset) => {
+                    let offset = self.adjusted_offset(offset);
+                    dynasm!(self.asm
+                        ; cmp $pointer_ty [rsp + offset], -1
+                    );
+                    self.set_stack_depth(current_depth.clone());
+                    dynasm!(self.asm
+                        ; je =>is_neg1.0
+                    );
+
+                    true
+                }
+                ValueLocation::Cond(_) => {
+                    // `cc` can never be `-1`, only `0` and `1`
+                    false
+                }
+            };
+
+            let (div, rem, saved) = self.$full_div_s(divisor, dividend);
+
+            self.free_value(div);
+
+            let rem = match rem {
+                ValueLocation::Reg(rem) => {
+                    if saved.clone().any(|dst| dst == rem) {
+                        let new = self.take_reg(I32).unwrap();
+                        dynasm!(self.asm
+                            ; mov Rq(new.rq().unwrap()), Rq(rem.rq().unwrap())
+                        );
+                        self.block_state.regs.release(rem);
+                        ValueLocation::Reg(new)
+                    } else {
+                        ValueLocation::Reg(rem)
+                    }
+                }
+                _ => rem,
+            };
+
+            self.cleanup_gprs(saved);
+
+            if gen_neg1_case {
+                let ret = self.create_label();
+                self.set_stack_depth(current_depth.clone());
+                dynasm!(self.asm
+                    ; jmp =>ret.0
+                );
+                self.define_label(is_neg1);
+
+                self.copy_value(
+                    ValueLocation::Immediate((0 as $signed_ty).into()),
+                    CCLoc::try_from(rem).expect("Programmer error")
+                );
+
+                self.set_stack_depth(current_depth.clone());
+                self.define_label(ret);
+            }
+
+            self.push(rem);
+        }
+    }
+}
+
+macro_rules! unop {
+    ($name:ident, $instr:ident, $reg_ty:tt, $typ:ty, $const_fallback:expr) => {
+        pub fn $name(&mut self) {
+            let mut val = self.pop();
+
+            let out_val = match val {
+                ValueLocation::Immediate(imm) =>
+                    ValueLocation::Immediate(
+                        ($const_fallback(imm.as_int().unwrap() as $typ) as $typ).into()
+                    ),
+                ValueLocation::Stack(offset) => {
+                    let offset = self.adjusted_offset(offset);
+                    let temp = self.take_reg(Type::for_::<$typ>()).unwrap();
+                    dynasm!(self.asm
+                        ; $instr $reg_ty(temp.rq().unwrap()), [rsp + offset]
+                    );
+                    ValueLocation::Reg(temp)
+                }
+                ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
+                    let reg = self.into_reg(GPRType::Rq, &mut val).unwrap();
+                    let temp = self.take_reg(Type::for_::<$typ>()).unwrap();
+                    dynasm!(self.asm
+                        ; $instr $reg_ty(temp.rq().unwrap()), $reg_ty(reg.rq().unwrap())
+                    );
+                    ValueLocation::Reg(temp)
+                }
+            };
+
+            self.free_value(val);
+            self.push(out_val);
+        }
+    }
+}
+
+macro_rules! conversion {
+    (
+        $name:ident,
+        $instr:ident,
+        $in_reg_ty:tt,
+        $in_reg_fn:ident,
+        $out_reg_ty:tt,
+        $out_reg_fn:ident,
+        $in_typ:ty,
+        $out_typ:ty,
+        $const_ty_fn:ident,
+        $const_fallback:expr
+    ) => {
+        pub fn $name(&mut self) {
+            let mut val = self.pop();
+
+            let out_val = match val {
+                ValueLocation::Immediate(imm) =>
+                    ValueLocation::Immediate(
+                        $const_fallback(imm.$const_ty_fn().unwrap()).into()
+                    ),
+                ValueLocation::Stack(offset) => {
+                    let offset = self.adjusted_offset(offset);
+                    let temp = self.take_reg(Type::for_::<$out_typ>()).unwrap();
+                    dynasm!(self.asm
+                        ; $instr $out_reg_ty(temp.$out_reg_fn().unwrap()), [rsp + offset]
+                    );
+
+                    ValueLocation::Reg(temp)
+                }
+                ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
+                    let reg = self.into_reg(Type::for_::<$in_typ>(), &mut val).unwrap();
+                    let temp = self.take_reg(Type::for_::<$out_typ>()).unwrap();
+
+                    dynasm!(self.asm
+                        ; $instr $out_reg_ty(temp.$out_reg_fn().unwrap()), $in_reg_ty(reg.$in_reg_fn().unwrap())
+                    );
+
+                    ValueLocation::Reg(temp)
+                }
+            };
+
+            self.free_value(val);
+
+            self.push(out_val);
+        }
+    }
+}
+
+// TODO: Support immediate `count` parameters
+macro_rules! shift {
+    ($name:ident, $reg_ty:tt, $instr:ident, $const_fallback:expr, $ty:expr) => {
+        pub fn $name(&mut self) {
+            let mut count = self.pop();
+            let mut val = self.pop();
+
+            if let Some(imm) = count.immediate() {
+                if let Some(imm) = imm.as_int() {
+                    if let Ok(imm) = i8::try_from(imm) {
+                        let reg = self.into_temp_reg($ty, &mut val).unwrap();
+
+                        dynasm!(self.asm
+                            ; $instr $reg_ty(reg.rq().unwrap()), imm
+                        );
+                        self.push(ValueLocation::Reg(reg));
+                        return;
+                    }
+                }
+            }
+
+            if val == ValueLocation::Reg(RCX) {
+                let new = self.take_reg($ty).unwrap();
+                self.copy_value(val, CCLoc::Reg(new));
+                self.free_value(val);
+                val = ValueLocation::Reg(new);
+            }
+
+            // TODO: Maybe allocate `RCX`, write `count` to it and then free `count`.
+            //       Once we've implemented refcounting this will do the right thing
+            //       for free.
+            let temp_rcx = match count {
+                ValueLocation::Reg(RCX) => {None}
+                other => {
+                    let out = if self.block_state.regs.is_free(RCX) {
+                        None
+                    } else {
+                        let new_reg = self.take_reg(I32).unwrap();
+                        dynasm!(self.asm
+                            ; mov Rq(new_reg.rq().unwrap()), rcx
+                        );
+                        Some(new_reg)
+                    };
+
+                    match other {
+                        ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
+                            let gpr = self.into_reg(I32, &mut count).unwrap();
+                            dynasm!(self.asm
+                                ; mov cl, Rb(gpr.rq().unwrap())
+                            );
+                        }
+                        ValueLocation::Stack(offset) => {
+                            let offset = self.adjusted_offset(offset);
+                            dynasm!(self.asm
+                                ; mov cl, [rsp + offset]
+                            );
+                        }
+                        ValueLocation::Immediate(imm) => {
+                            dynasm!(self.asm
+                                ; mov cl, imm.as_int().unwrap() as i8
+                            );
+                        }
+                    }
+
+                    out
+                }
+            };
+
+            self.free_value(count);
+            self.block_state.regs.mark_used(RCX);
+            count = ValueLocation::Reg(RCX);
+
+            let reg = self.into_temp_reg($ty, &mut val).unwrap();
+
+            dynasm!(self.asm
+                ; $instr $reg_ty(reg.rq().unwrap()), cl
+            );
+
+            self.free_value(count);
+
+            if let Some(gpr) = temp_rcx {
+                dynasm!(self.asm
+                    ; mov rcx, Rq(gpr.rq().unwrap())
+                );
+                self.block_state.regs.release(gpr);
+            }
+
+            self.push(val);
+        }
+    }
+}
+
+macro_rules! cmp_i32 {
+    ($name:ident, $flags:expr, $reverse_flags:expr, $const_fallback:expr) => {
+        pub fn $name(&mut self) {
+            let mut right = self.pop();
+            let mut left = self.pop();
+
+            let out = if let Some(i) = left.imm_i32() {
+                match right {
+                    ValueLocation::Stack(offset) => {
+                        let offset = self.adjusted_offset(offset);
+
+                        dynasm!(self.asm
+                            ; cmp DWORD [rsp + offset], i
+                        );
+                        ValueLocation::Cond($reverse_flags)
+                    }
+                    ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
+                        let rreg = self.into_reg(I32, &mut right).unwrap();
+                        dynasm!(self.asm
+                            ; cmp Rd(rreg.rq().unwrap()), i
+                        );
+                        ValueLocation::Cond($reverse_flags)
+                    }
+                    ValueLocation::Immediate(right) => {
+                        ValueLocation::Immediate(
+                            (if $const_fallback(i, right.as_i32().unwrap()) {
+                                1i32
+                            } else {
+                                0i32
+                            }).into()
+                        )
+                    }
+                }
+            } else {
+                let lreg = self.into_reg(I32, &mut left).unwrap();
+
+                match right {
+                    ValueLocation::Stack(offset) => {
+                        let offset = self.adjusted_offset(offset);
+                        dynasm!(self.asm
+                            ; cmp Rd(lreg.rq().unwrap()), [rsp + offset]
+                        );
+                    }
+                    ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
+                        let rreg = self.into_reg(I32, &mut right).unwrap();
+                        dynasm!(self.asm
+                            ; cmp Rd(lreg.rq().unwrap()), Rd(rreg.rq().unwrap())
+                        );
+                    }
+                    ValueLocation::Immediate(i) => {
+                        dynasm!(self.asm
+                            ; cmp Rd(lreg.rq().unwrap()), i.as_i32().unwrap()
+                        );
+                    }
+                }
+
+                ValueLocation::Cond($flags)
+            };
+
+            self.free_value(left);
+            self.free_value(right);
+
+            self.push(out);
+        }
+    }
+}
+
+macro_rules! cmp_i64 {
+    ($name:ident, $flags:expr, $reverse_flags:expr, $const_fallback:expr) => {
+        pub fn $name(&mut self) {
+            let mut right = self.pop();
+            let mut left = self.pop();
+
+            let out = if let Some(i) = left.imm_i64() {
+                match right {
+                    ValueLocation::Stack(offset) => {
+                        let offset = self.adjusted_offset(offset);
+                        if let Some(i) = i.try_into() {
+                            dynasm!(self.asm
+                                ; cmp QWORD [rsp + offset], i
+                            );
+                        } else {
+                            let lreg = self.into_reg(I32, &mut left).unwrap();
+                            dynasm!(self.asm
+                                ; cmp QWORD [rsp + offset], Rq(lreg.rq().unwrap())
+                            );
+                        }
+                        ValueLocation::Cond($reverse_flags)
+                    }
+                    ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
+                        let rreg = self.into_reg(I32, &mut right).unwrap();
+                        if let Some(i) = i.try_into() {
+                            dynasm!(self.asm
+                                ; cmp Rq(rreg.rq().unwrap()), i
+                            );
+                        } else {
+                            let lreg = self.into_reg(I32, &mut left).unwrap();
+                            dynasm!(self.asm
+                                ; cmp Rq(rreg.rq().unwrap()), Rq(lreg.rq().unwrap())
+                            );
+                        }
+                        ValueLocation::Cond($reverse_flags)
+                    }
+                    ValueLocation::Immediate(right) => {
+                        ValueLocation::Immediate(
+                            (if $const_fallback(i, right.as_i64().unwrap()) {
+                                1i32
+                            } else {
+                                0i32
+                            }).into()
+                        )
+                    }
+                }
+            } else {
+                let lreg = self.into_reg(I64, &mut left).unwrap();
+
+                match right {
+                    ValueLocation::Stack(offset) => {
+                        let offset = self.adjusted_offset(offset);
+                        dynasm!(self.asm
+                            ; cmp Rq(lreg.rq().unwrap()), [rsp + offset]
+                        );
+                    }
+                    ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
+                        let rreg = self.into_reg(I32, &mut right).unwrap();
+                        dynasm!(self.asm
+                            ; cmp Rq(lreg.rq().unwrap()), Rq(rreg.rq().unwrap())
+                        );
+                    }
+                    ValueLocation::Immediate(i) => {
+                        let i = i.as_i64().unwrap();
+                        if let Some(i) = i.try_into() {
+                            dynasm!(self.asm
+                                    ; cmp Rq(lreg.rq().unwrap()), i
+                            );
+                        } else {
+                            let rreg = self.into_reg(I32, &mut right).unwrap();
+                            dynasm!(self.asm
+                                ; cmp Rq(lreg.rq().unwrap()), Rq(rreg.rq().unwrap())
+                            );
+                        }
+                    }
+                }
+
+                ValueLocation::Cond($flags)
+            };
+
+            self.free_value(left);
+            self.free_value(right);
+            self.push(out);
+        }
+    }
+}
+
+macro_rules! cmp_f32 {
+    ($name:ident, $reverse_name:ident, $instr:ident, $const_fallback:expr) => {
+        cmp_float!(
+            comiss,
+            f32,
+            imm_f32,
+            $name,
+            $reverse_name,
+            $instr,
+            $const_fallback
+        );
+    };
+}
+
+macro_rules! eq_float {
+    ($name:ident, $instr:ident, $imm_fn:ident, $const_fallback:expr) => {
+        pub fn $name(&mut self) {
+            let right = self.pop();
+            let left = self.pop();
+
+            if let Some(right) = right.immediate() {
+                if let Some(left) = left.immediate() {
+                    self.push(ValueLocation::Immediate(
+                        if $const_fallback(left.$imm_fn().unwrap(), right.$imm_fn().unwrap()) {
+                            1u32
+                        } else {
+                            0
+                        }.into()
+                    ));
+                    return;
+                }
+            }
+
+            let (mut left, mut right) = match left {
+                ValueLocation::Reg(r) if self.block_state.regs.num_usages(r) <= 1 => (left, right),
+                _ =>  (right, left)
+            };
+
+            let lreg = self.into_temp_reg(GPRType::Rx, &mut left).unwrap();
+            let rreg = self.into_reg(GPRType::Rx, &mut right).unwrap();
+            let out = self.take_reg(I32).unwrap();
+
+            dynasm!(self.asm
+                ; $instr Rx(lreg.rx().unwrap()), Rx(rreg.rx().unwrap())
+                ; movd Rd(out.rq().unwrap()), Rx(lreg.rx().unwrap())
+                ; and Rd(out.rq().unwrap()), 1
+            );
+
+            self.push(ValueLocation::Reg(out));
+            self.free_value(left);
+            self.free_value(right);
+        }
+
+    }
+}
+
+macro_rules! minmax_float {
+    (
+        $name:ident,
+        $instr:ident,
+        $cmpinstr:ident,
+        $addinstr:ident,
+        $combineinstr:ident,
+        $imm_fn:ident,
+        $const_fallback:expr
+    ) => {
+        pub fn $name(&mut self) {
+            let right = self.pop();
+            let left = self.pop();
+
+            if let Some(right) = right.immediate() {
+                if let Some(left) = left.immediate() {
+                    self.push(ValueLocation::Immediate(
+                        $const_fallback(left.$imm_fn().unwrap(), right.$imm_fn().unwrap()).into()
+                    ));
+                    return;
+                }
+            }
+
+            let (mut left, mut right) = match left {
+                ValueLocation::Reg(r) if self.block_state.regs.num_usages(r) <= 1 => (left, right),
+                _ =>  (right, left)
+            };
+
+            let lreg = self.into_temp_reg(GPRType::Rx, &mut left).unwrap();
+            let rreg = self.into_reg(GPRType::Rx, &mut right).unwrap();
+
+            dynasm!(self.asm
+                ; $cmpinstr Rx(lreg.rx().unwrap()), Rx(rreg.rx().unwrap())
+                ; je >equal
+                ; $instr Rx(lreg.rx().unwrap()), Rx(rreg.rx().unwrap())
+                ; jmp >ret
+            ; equal:
+                ; jnp >equal_but_not_parity
+                ; $addinstr Rx(lreg.rx().unwrap()), Rx(rreg.rx().unwrap())
+                ; jmp >ret
+            ; equal_but_not_parity:
+                ; $combineinstr Rx(lreg.rx().unwrap()), Rx(rreg.rx().unwrap())
+            ; ret:
+            );
+
+            self.push(left);
+            self.free_value(right);
+        }
+
+    }
+}
+
+macro_rules! cmp_f64 {
+    ($name:ident, $reverse_name:ident, $instr:ident, $const_fallback:expr) => {
+        cmp_float!(
+            comisd,
+            f64,
+            imm_f64,
+            $name,
+            $reverse_name,
+            $instr,
+            $const_fallback
+        );
+    };
+}
+
+macro_rules! cmp_float {
+    (@helper $cmp_instr:ident, $ty:ty, $imm_fn:ident, $self:expr, $left:expr, $right:expr, $instr:ident, $const_fallback:expr) => {{
+        let (left, right, this) = ($left, $right, $self);
+        if let (Some(left), Some(right)) = (left.$imm_fn(), right.$imm_fn()) {
+            if $const_fallback(<$ty>::from_bits(left.to_bits()), <$ty>::from_bits(right.to_bits())) {
+                ValueLocation::Immediate(1i32.into())
+            } else {
+                ValueLocation::Immediate(0i32.into())
+            }
+        } else {
+            let lreg = this.into_reg(GPRType::Rx, left).unwrap();
+            let result = this.take_reg(I32).unwrap();
+
+            match right {
+                ValueLocation::Stack(offset) => {
+                    let offset = this.adjusted_offset(*offset);
+
+                    dynasm!(this.asm
+                        ; xor Rq(result.rq().unwrap()), Rq(result.rq().unwrap())
+                        ; $cmp_instr Rx(lreg.rx().unwrap()), [rsp + offset]
+                        ; $instr Rb(result.rq().unwrap())
+                    );
+                }
+                right => {
+                    let rreg = this.into_reg(GPRType::Rx, right).unwrap();
+
+                    dynasm!(this.asm
+                        ; xor Rq(result.rq().unwrap()), Rq(result.rq().unwrap())
+                        ; $cmp_instr Rx(lreg.rx().unwrap()), Rx(rreg.rx().unwrap())
+                        ; $instr Rb(result.rq().unwrap())
+                    );
+                }
+            }
+
+            ValueLocation::Reg(result)
+        }
+    }};
+    ($cmp_instr:ident, $ty:ty, $imm_fn:ident, $name:ident, $reverse_name:ident, $instr:ident, $const_fallback:expr) => {
+        pub fn $name(&mut self) {
+            let mut right = self.pop();
+            let mut left = self.pop();
+
+            let out = cmp_float!(@helper
+                $cmp_instr,
+                $ty,
+                $imm_fn,
+                &mut *self,
+                &mut left,
+                &mut right,
+                $instr,
+                $const_fallback
+            );
+
+            self.free_value(left);
+            self.free_value(right);
+
+            self.push(out);
+        }
+
+        pub fn $reverse_name(&mut self) {
+            let mut right = self.pop();
+            let mut left = self.pop();
+
+            let out = cmp_float!(@helper
+                $cmp_instr,
+                $ty,
+                $imm_fn,
+                &mut *self,
+                &mut right,
+                &mut left,
+                $instr,
+                $const_fallback
+            );
+
+            self.free_value(left);
+            self.free_value(right);
+
+            self.push(out);
+        }
+    };
+}
+
+macro_rules! binop_i32 {
+    ($name:ident, $instr:ident, $const_fallback:expr) => {
+        binop!(
+            $name,
+            $instr,
+            $const_fallback,
+            Rd,
+            rq,
+            I32,
+            imm_i32,
+            |this: &mut Context<_>, op1: GPR, i| dynasm!(this.asm
+                ; $instr Rd(op1.rq().unwrap()), i
+            )
+        );
+    };
+}
+
+macro_rules! commutative_binop_i32 {
+    ($name:ident, $instr:ident, $const_fallback:expr) => {
+        commutative_binop!(
+            $name,
+            $instr,
+            $const_fallback,
+            Rd,
+            rq,
+            I32,
+            imm_i32,
+            |this: &mut Context<_>, op1: GPR, i| dynasm!(this.asm
+                ; $instr Rd(op1.rq().unwrap()), i
+            )
+        );
+    };
+}
+
+macro_rules! binop_i64 {
+    ($name:ident, $instr:ident, $const_fallback:expr) => {
+        binop!(
+            $name,
+            $instr,
+            $const_fallback,
+            Rq,
+            rq,
+            I64,
+            imm_i64,
+            |this: &mut Context<_>, op1: GPR, i| dynasm!(this.asm
+                ; $instr Rq(op1.rq().unwrap()), i
+            )
+        );
+    };
+}
+
+macro_rules! commutative_binop_i64 {
+    ($name:ident, $instr:ident, $const_fallback:expr) => {
+        commutative_binop!(
+            $name,
+            $instr,
+            $const_fallback,
+            Rq,
+            rq,
+            I64,
+            imm_i64,
+            |this: &mut Context<_>, op1: GPR, i| dynasm!(this.asm
+                ; $instr Rq(op1.rq().unwrap()), i
+            )
+        );
+    };
+}
+
+macro_rules! binop_f32 {
+    ($name:ident, $instr:ident, $const_fallback:expr) => {
+        binop!(
+            $name,
+            $instr,
+            |a: Ieee32, b: Ieee32| Ieee32::from_bits(
+                $const_fallback(f32::from_bits(a.to_bits()), f32::from_bits(b.to_bits())).to_bits()
+            ),
+            Rx,
+            rx,
+            F32,
+            imm_f32,
+            |_, _, _| unreachable!()
+        );
+    };
+}
+
+macro_rules! commutative_binop_f32 {
+    ($name:ident, $instr:ident, $const_fallback:expr) => {
+        commutative_binop!(
+            $name,
+            $instr,
+            |a: Ieee32, b: Ieee32| Ieee32::from_bits(
+                $const_fallback(f32::from_bits(a.to_bits()), f32::from_bits(b.to_bits())).to_bits()
+            ),
+            Rx,
+            rx,
+            F32,
+            imm_f32,
+            |_, _, _| unreachable!()
+        );
+    };
+}
+
+macro_rules! binop_f64 {
+    ($name:ident, $instr:ident, $const_fallback:expr) => {
+        binop!(
+            $name,
+            $instr,
+            |a: Ieee64, b: Ieee64| Ieee64::from_bits(
+                $const_fallback(f64::from_bits(a.to_bits()), f64::from_bits(b.to_bits())).to_bits()
+            ),
+            Rx,
+            rx,
+            F64,
+            imm_f64,
+            |_, _, _| unreachable!()
+        );
+    };
+}
+
+macro_rules! commutative_binop_f64 {
+    ($name:ident, $instr:ident, $const_fallback:expr) => {
+        commutative_binop!(
+            $name,
+            $instr,
+            |a: Ieee64, b: Ieee64| Ieee64::from_bits(
+                $const_fallback(f64::from_bits(a.to_bits()), f64::from_bits(b.to_bits())).to_bits()
+            ),
+            Rx,
+            rx,
+            F64,
+            imm_f64,
+            |_, _, _| unreachable!()
+        );
+    };
+}
+macro_rules! commutative_binop {
+    ($name:ident, $instr:ident, $const_fallback:expr, $reg_ty:tt, $reg_fn:ident, $ty:expr, $imm_fn:ident, $direct_imm:expr) => {
+        binop!(
+            $name,
+            $instr,
+            $const_fallback,
+            $reg_ty,
+            $reg_fn,
+            $ty,
+            $imm_fn,
+            $direct_imm,
+            |op1: ValueLocation, op0: ValueLocation| match op1 {
+                ValueLocation::Reg(_) => (op1, op0),
+                _ => {
+                    if op0.immediate().is_some() {
+                        (op1, op0)
+                    } else {
+                        (op0, op1)
+                    }
+                }
+            }
+        );
+    };
+}
+
+macro_rules! binop {
+    ($name:ident, $instr:ident, $const_fallback:expr, $reg_ty:tt, $reg_fn:ident, $ty:expr, $imm_fn:ident, $direct_imm:expr) => {
+        binop!($name, $instr, $const_fallback, $reg_ty, $reg_fn, $ty, $imm_fn, $direct_imm, |a, b| (a, b));
+    };
+    ($name:ident, $instr:ident, $const_fallback:expr, $reg_ty:tt, $reg_fn:ident, $ty:expr, $imm_fn:ident, $direct_imm:expr, $map_op:expr) => {
+        pub fn $name(&mut self) {
+            let right = self.pop();
+            let left = self.pop();
+
+            if let Some(i1) = left.$imm_fn() {
+                if let Some(i0) = right.$imm_fn() {
+                    self.block_state.stack.push(ValueLocation::Immediate($const_fallback(i1, i0).into()));
+                    return;
+                }
+            }
+
+            let (mut left, mut right) = $map_op(left, right);
+            let lreg = self.into_temp_reg($ty, &mut left).unwrap();
+
+            match right {
+                ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
+                    // This handles the case where we (for example) have a float in an `Rq` reg
+                    let right_reg = self.into_reg($ty, &mut right).unwrap();
+                    dynasm!(self.asm
+                        ; $instr $reg_ty(lreg.$reg_fn().unwrap()), $reg_ty(right_reg.$reg_fn().unwrap())
+                    );
+                }
+                ValueLocation::Stack(offset) => {
+                    let offset = self.adjusted_offset(offset);
+                    dynasm!(self.asm
+                        ; $instr $reg_ty(lreg.$reg_fn().unwrap()), [rsp + offset]
+                    );
+                }
+                ValueLocation::Immediate(i) => {
+                    if let Some(i) = i.as_int().and_then(|i| i.try_into()) {
+                        $direct_imm(&mut *self, lreg, i);
+                    } else {
+                        let scratch = self.take_reg($ty).unwrap();
+                        self.immediate_to_reg(scratch, i);
+
+                        dynasm!(self.asm
+                            ; $instr $reg_ty(lreg.$reg_fn().unwrap()), $reg_ty(scratch.$reg_fn().unwrap())
+                        );
+
+                        self.block_state.regs.release(scratch);
+                    }
+                }
+            }
+
+            self.free_value(right);
+            self.push(left);
+        }
+    }
+}
+
+macro_rules! load {
+    (@inner $name:ident, $rtype:expr, $reg_ty:tt, $emit_fn:expr) => {
+        pub fn $name(&mut self, offset: u32) {
+            fn load_to_reg<_M: ModuleContext>(
+                ctx: &mut Context<_M>,
+                dst: GPR,
+                (offset, runtime_offset): (i32, Result<i32, GPR>)
+            ) {
+                let mem_index = 0;
+                let reg_offset = ctx.module_context
+                    .defined_memory_index(mem_index)
+                    .map(|index| (
+                        None,
+                        ctx.module_context.vmctx_vmmemory_definition(index) as i32
+                    ));
+                let (reg, mem_offset) = reg_offset.unwrap_or_else(|| {
+                    let reg = ctx.take_reg(I64).unwrap();
+
+                    dynasm!(ctx.asm
+                        ; mov Rq(reg.rq().unwrap()), [
+                            Rq(VMCTX) + ctx.module_context.vmctx_vmmemory_import_from(mem_index) as i32
+                        ]
+                    );
+
+                    (Some(reg), 0)
+                });
+
+                let vmctx = GPR::Rq(VMCTX);
+
+                if ctx.module_context.emit_memory_bounds_check() {
+                    let trap_label = ctx.trap_label();
+                    let addr_reg = match runtime_offset {
+                        Ok(imm) => {
+                            let addr_reg = ctx.take_reg(I64).unwrap();
+                            dynasm!(ctx.asm
+                                ; mov Rq(addr_reg.rq().unwrap()), QWORD imm as i64 + offset as i64
+                            );
+                            addr_reg
+                        }
+                        Err(gpr) => {
+                            if offset == 0 {
+                                ctx.to_reg(I32, ValueLocation::Reg(gpr)).unwrap()
+                            } else if offset > 0 {
+                                let addr_reg = ctx.take_reg(I64).unwrap();
+                                dynasm!(ctx.asm
+                                    ; lea Rq(addr_reg.rq().unwrap()), [Rq(gpr.rq().unwrap()) + offset]
+                                );
+                                addr_reg
+                            } else {
+                                let addr_reg = ctx.take_reg(I64).unwrap();
+                                let offset_reg = ctx.take_reg(I64).unwrap();
+                                dynasm!(ctx.asm
+                                    ; mov Rd(offset_reg.rq().unwrap()), offset
+                                    ; mov Rq(addr_reg.rq().unwrap()), Rq(gpr.rq().unwrap())
+                                    ; add Rq(addr_reg.rq().unwrap()), Rq(offset_reg.rq().unwrap())
+                                );
+                                ctx.block_state.regs.release(offset_reg);
+                                addr_reg
+                            }
+                        }
+                    };
+                    dynasm!(ctx.asm
+                        ; cmp [
+                            Rq(reg.unwrap_or(vmctx).rq().unwrap()) +
+                                mem_offset +
+                                ctx.module_context.vmmemory_definition_current_length() as i32
+                        ], Rq(addr_reg.rq().unwrap())
+                        ; jna =>trap_label.0
+                    );
+                    ctx.block_state.regs.release(addr_reg);
+                }
+
+                let mem_ptr_reg = ctx.take_reg(I64).unwrap();
+                dynasm!(ctx.asm
+                    ; mov Rq(mem_ptr_reg.rq().unwrap()), [
+                        Rq(reg.unwrap_or(vmctx).rq().unwrap()) +
+                            mem_offset +
+                            ctx.module_context.vmmemory_definition_base() as i32
+                    ]
+                );
+                if let Some(reg) = reg {
+                    ctx.block_state.regs.release(reg);
+                }
+                $emit_fn(ctx, dst, mem_ptr_reg, runtime_offset, offset);
+                ctx.block_state.regs.release(mem_ptr_reg);
+            }
+
+            let base = self.pop();
+
+            let temp = self.take_reg($rtype).unwrap();
+
+            match base {
+                ValueLocation::Immediate(i) => {
+                    load_to_reg(self, temp, (offset as _, Ok(i.as_i32().unwrap())));
+                }
+                mut base => {
+                    let gpr = self.into_reg(I32, &mut base).unwrap();
+                    load_to_reg(self, temp, (offset as _, Err(gpr)));
+                    self.free_value(base);
+                }
+            }
+
+            self.push(ValueLocation::Reg(temp));
+        }
+    };
+    ($name:ident, $rtype:expr, $reg_ty:tt, NONE, $rq_instr:ident, $ty:ident) => {
+        load!(@inner
+            $name,
+            $rtype,
+            $reg_ty,
+            |ctx: &mut Context<_>, dst: GPR, mem_ptr_reg: GPR, runtime_offset: Result<i32, GPR>, offset: i32| {
+                match runtime_offset {
+                    Ok(imm) => {
+                        dynasm!(ctx.asm
+                            ; $rq_instr $reg_ty(dst.rq().unwrap()), $ty [Rq(mem_ptr_reg.rq().unwrap()) + offset + imm]
+                        );
+                    }
+                    Err(offset_reg) => {
+                        dynasm!(ctx.asm
+                            ; $rq_instr $reg_ty(dst.rq().unwrap()), $ty [Rq(mem_ptr_reg.rq().unwrap()) + Rq(offset_reg.rq().unwrap()) + offset]
+                        );
+                    }
+                }
+            }
+        );
+    };
+    ($name:ident, $rtype:expr, $reg_ty:tt, $xmm_instr:ident, $rq_instr:ident, $ty:ident) => {
+        load!(@inner
+            $name,
+            $rtype,
+            $reg_ty,
+            |ctx: &mut Context<_>, dst: GPR, mem_ptr_reg: GPR, runtime_offset: Result<i32, GPR>, offset: i32| {
+                match (dst, runtime_offset) {
+                    (GPR::Rq(r), Ok(imm)) => {
+                        dynasm!(ctx.asm
+                            ; $rq_instr $reg_ty(r), $ty [Rq(mem_ptr_reg.rq().unwrap()) + offset + imm]
+                        );
+                    }
+                    (GPR::Rx(r), Ok(imm)) => {
+                        if let Some(combined) = offset.checked_add(imm) {
+                            dynasm!(ctx.asm
+                                ; $xmm_instr Rx(r), $ty [Rq(mem_ptr_reg.rq().unwrap()) + combined]
+                            );
+                        } else {
+                            let offset_reg = ctx.take_reg(GPRType::Rq).unwrap();
+                            dynasm!(ctx.asm
+                                ; mov Rq(offset_reg.rq().unwrap()), offset
+                                ; $xmm_instr Rx(r), $ty [
+                                    Rq(mem_ptr_reg.rq().unwrap()) +
+                                    Rq(offset_reg.rq().unwrap()) +
+                                    imm
+                                ]
+                            );
+                            ctx.block_state.regs.release(offset_reg);
+                        }
+                    }
+                    (GPR::Rq(r), Err(offset_reg)) => {
+                        dynasm!(ctx.asm
+                            ; $rq_instr $reg_ty(r), $ty [Rq(mem_ptr_reg.rq().unwrap()) + Rq(offset_reg.rq().unwrap()) + offset]
+                        );
+                    }
+                    (GPR::Rx(r), Err(offset_reg)) => {
+                        dynasm!(ctx.asm
+                            ; $xmm_instr Rx(r), $ty [Rq(mem_ptr_reg.rq().unwrap()) + Rq(offset_reg.rq().unwrap()) + offset]
+                        );
+                    }
+                }
+            }
+        );
+    };
+}
+
+macro_rules! store {
+    (@inner $name:ident, $int_reg_ty:tt, $match_offset:expr, $size:ident) => {
+        pub fn $name(&mut self, offset: u32) {
+            fn store_from_reg<_M: ModuleContext>(
+                ctx: &mut Context<_M>,
+                src: GPR,
+                (offset, runtime_offset): (i32, Result<i32, GPR>)
+            ) {
+                let mem_index = 0;
+                let reg_offset = ctx.module_context
+                    .defined_memory_index(mem_index)
+                    .map(|index| (
+                        None,
+                        ctx.module_context.vmctx_vmmemory_definition(index) as i32
+                    ));
+                let (reg, mem_offset) = reg_offset.unwrap_or_else(|| {
+                    let reg = ctx.take_reg(I64).unwrap();
+
+                    dynasm!(ctx.asm
+                        ; mov Rq(reg.rq().unwrap()), [
+                            Rq(VMCTX) + ctx.module_context.vmctx_vmmemory_import_from(mem_index) as i32
+                        ]
+                    );
+
+                    (Some(reg), 0)
+                });
+
+                let vmctx = GPR::Rq(VMCTX);
+
+                if ctx.module_context.emit_memory_bounds_check() {
+                    let trap_label = ctx.trap_label();
+                    let addr_reg = match runtime_offset {
+                        Ok(imm) => {
+                            let addr_reg = ctx.take_reg(I64).unwrap();
+                            dynasm!(ctx.asm
+                                ; mov Rq(addr_reg.rq().unwrap()), QWORD imm as i64 + offset as i64
+                            );
+                            addr_reg
+                        }
+                        Err(gpr) => {
+                            if offset == 0 {
+                                ctx.to_reg(I32, ValueLocation::Reg(gpr)).unwrap()
+                            } else if offset > 0 {
+                                let addr_reg = ctx.take_reg(I64).unwrap();
+                                dynasm!(ctx.asm
+                                    ; lea Rq(addr_reg.rq().unwrap()), [Rq(gpr.rq().unwrap()) + offset]
+                                );
+                                addr_reg
+                            } else {
+                                let addr_reg = ctx.take_reg(I64).unwrap();
+                                let offset_reg = ctx.take_reg(I64).unwrap();
+                                dynasm!(ctx.asm
+                                    ; mov Rd(offset_reg.rq().unwrap()), offset
+                                    ; mov Rq(addr_reg.rq().unwrap()), Rq(gpr.rq().unwrap())
+                                    ; add Rq(addr_reg.rq().unwrap()), Rq(offset_reg.rq().unwrap())
+                                );
+                                ctx.block_state.regs.release(offset_reg);
+                                addr_reg
+                            }
+                        }
+                    };
+                    dynasm!(ctx.asm
+                        ; cmp Rq(addr_reg.rq().unwrap()), [
+                            Rq(reg.unwrap_or(vmctx).rq().unwrap()) +
+                                mem_offset +
+                                ctx.module_context.vmmemory_definition_current_length() as i32
+                        ]
+                        ; jae =>trap_label.0
+                    );
+                    ctx.block_state.regs.release(addr_reg);
+                }
+
+                let mem_ptr_reg = ctx.take_reg(I64).unwrap();
+                dynasm!(ctx.asm
+                    ; mov Rq(mem_ptr_reg.rq().unwrap()), [
+                        Rq(reg.unwrap_or(vmctx).rq().unwrap()) +
+                            mem_offset +
+                            ctx.module_context.vmmemory_definition_base() as i32
+                    ]
+                );
+                if let Some(reg) = reg {
+                    ctx.block_state.regs.release(reg);
+                }
+                let src = $match_offset(ctx, mem_ptr_reg, runtime_offset, offset, src);
+                ctx.block_state.regs.release(mem_ptr_reg);
+                ctx.block_state.regs.release(src);
+            }
+
+            assert!(offset <= i32::max_value() as u32);
+
+            let mut src = self.pop();
+            let base = self.pop();
+
+            // `store_from_reg` frees `src`
+            // TODO: Would it be better to free it outside `store_from_reg`?
+            let src_reg = self.into_reg(None, &mut src).unwrap();
+
+            match base {
+                ValueLocation::Immediate(i) => {
+                    store_from_reg(self, src_reg, (offset as i32, Ok(i.as_i32().unwrap())));
+                }
+                mut base => {
+                    let gpr = self.into_reg(I32, &mut base).unwrap();
+                    store_from_reg(self, src_reg, (offset as i32, Err(gpr)));
+                    self.free_value(base);
+                }
+            }
+        }
+    };
+    ($name:ident, $int_reg_ty:tt, NONE, $size:ident) => {
+        store!(@inner
+            $name,
+            $int_reg_ty,
+            |ctx: &mut Context<_>, mem_ptr_reg: GPR, runtime_offset: Result<i32, GPR>, offset: i32, src| {
+                let src_reg = ctx.into_temp_reg(GPRType::Rq, &mut ValueLocation::Reg(src)).unwrap();
+
+                match runtime_offset {
+                    Ok(imm) => {
+                        dynasm!(ctx.asm
+                            ; mov [Rq(mem_ptr_reg.rq().unwrap()) + offset + imm], $int_reg_ty(src_reg.rq().unwrap())
+                        );
+                    }
+                    Err(offset_reg) => {
+                        dynasm!(ctx.asm
+                            ; mov [Rq(mem_ptr_reg.rq().unwrap()) + Rq(offset_reg.rq().unwrap()) + offset], $int_reg_ty(src_reg.rq().unwrap())
+                        );
+                    }
+                }
+
+                src_reg
+            },
+            $size
+        );
+    };
+    ($name:ident, $int_reg_ty:tt, $xmm_instr:ident, $size:ident) => {
+        store!(@inner
+            $name,
+            $int_reg_ty,
+            |ctx: &mut Context<_>, mem_ptr_reg: GPR, runtime_offset: Result<i32, GPR>, offset: i32, src| {
+                match (runtime_offset, src) {
+                    (Ok(imm), GPR::Rq(r)) => {
+                        dynasm!(ctx.asm
+                            ; mov [Rq(mem_ptr_reg.rq().unwrap()) + offset + imm], $int_reg_ty(r)
+                        );
+                    }
+                    (Ok(imm), GPR::Rx(r)) => {
+                        dynasm!(ctx.asm
+                            ; $xmm_instr [Rq(mem_ptr_reg.rq().unwrap()) + offset + imm], Rx(r)
+                        );
+                    }
+                    (Err(offset_reg), GPR::Rq(r)) => {
+                        dynasm!(ctx.asm
+                            ; mov [Rq(mem_ptr_reg.rq().unwrap()) + Rq(offset_reg.rq().unwrap()) + offset], $int_reg_ty(r)
+                        );
+                    }
+                    (Err(offset_reg), GPR::Rx(r)) => {
+                        dynasm!(ctx.asm
+                            ; $xmm_instr [Rq(mem_ptr_reg.rq().unwrap()) + Rq(offset_reg.rq().unwrap()) + offset], Rx(r)
+                        );
+                    }
+                }
+
+                src
+            },
+            $size
+        );
+    };
+}
+
+trait TryInto<O> {
+    fn try_into(self) -> Option<O>;
+}
+
+impl TryInto<i64> for u64 {
+    fn try_into(self) -> Option<i64> {
+        let max = i64::max_value() as u64;
+
+        if self <= max {
+            Some(self as i64)
+        } else {
+            None
+        }
+    }
+}
+
+impl TryInto<i32> for i64 {
+    fn try_into(self) -> Option<i32> {
+        let min = i32::min_value() as i64;
+        let max = i32::max_value() as i64;
+
+        if self >= min && self <= max {
+            Some(self as i32)
+        } else {
+            None
+        }
+    }
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct VirtualCallingConvention {
+    pub stack: Stack,
+    pub depth: StackDepth,
+}
+
+impl<'this, M: ModuleContext> Context<'this, M> {
+    fn free_reg(&mut self, type_: GPRType) -> bool {
+        let pos = if let Some(pos) = self
+            .block_state
+            .stack
+            .iter()
+            .position(|r| r.reg().map(|reg| reg.type_() == type_).unwrap_or(false))
+        {
+            pos
+        } else {
+            return false;
+        };
+
+        let old_loc = self.block_state.stack[pos];
+        let new_loc = self.push_physical(old_loc);
+        self.block_state.stack[pos] = new_loc;
+
+        let reg = old_loc.reg().unwrap();
+
+        for elem in &mut self.block_state.stack[pos + 1..] {
+            if *elem == old_loc {
+                *elem = new_loc;
+                self.block_state.regs.release(reg);
+            }
+        }
+
+        true
+    }
+
+    fn take_reg(&mut self, r: impl Into<GPRType>) -> Option<GPR> {
+        let r = r.into();
+        loop {
+            if let Some(gpr) = self.block_state.regs.take(r) {
+                break Some(gpr);
+            }
+
+            if !self.free_reg(r) {
+                break None;
+            }
+        }
+    }
+
+    pub fn virtual_calling_convention(&self) -> VirtualCallingConvention {
+        VirtualCallingConvention {
+            stack: self.block_state.stack.clone(),
+            depth: self.block_state.depth,
+        }
+    }
+
+    /// Create a new undefined label.
+    pub fn create_label(&mut self) -> Label {
+        Label(self.asm.new_dynamic_label())
+    }
+
+    pub fn define_host_fn(&mut self, host_fn: *const u8) {
+        dynasm!(self.asm
+            ; mov rax, QWORD host_fn as i64
+            ; call rax
+            ; ret
+        );
+    }
+
+    fn adjusted_offset(&self, offset: i32) -> i32 {
+        (self.block_state.depth.0 as i32 + offset) * WORD_SIZE as i32
+    }
+
+    cmp_i32!(i32_eq, cc::EQUAL, cc::EQUAL, |a, b| a == b);
+    cmp_i32!(i32_neq, cc::NOT_EQUAL, cc::NOT_EQUAL, |a, b| a != b);
+    // `dynasm-rs` inexplicably doesn't support setb but `setnae` (and `setc`) are synonymous
+    cmp_i32!(i32_lt_u, cc::LT_U, cc::GT_U, |a, b| (a as u32) < (b as u32));
+    cmp_i32!(i32_le_u, cc::LE_U, cc::GE_U, |a, b| (a as u32)
+        <= (b as u32));
+    cmp_i32!(i32_gt_u, cc::GT_U, cc::LT_U, |a, b| (a as u32) > (b as u32));
+    cmp_i32!(i32_ge_u, cc::GE_U, cc::LE_U, |a, b| (a as u32)
+        >= (b as u32));
+    cmp_i32!(i32_lt_s, cc::LT_S, cc::GT_S, |a, b| a < b);
+    cmp_i32!(i32_le_s, cc::LE_S, cc::GE_S, |a, b| a <= b);
+    cmp_i32!(i32_gt_s, cc::GT_S, cc::LT_S, |a, b| a > b);
+    cmp_i32!(i32_ge_s, cc::GE_S, cc::LE_S, |a, b| a >= b);
+
+    cmp_i64!(i64_eq, cc::EQUAL, cc::EQUAL, |a, b| a == b);
+    cmp_i64!(i64_neq, cc::NOT_EQUAL, cc::NOT_EQUAL, |a, b| a != b);
+    // `dynasm-rs` inexplicably doesn't support setb but `setnae` (and `setc`) are synonymous
+    cmp_i64!(i64_lt_u, cc::LT_U, cc::GT_U, |a, b| (a as u64) < (b as u64));
+    cmp_i64!(i64_le_u, cc::LE_U, cc::GE_U, |a, b| (a as u64)
+        <= (b as u64));
+    cmp_i64!(i64_gt_u, cc::GT_U, cc::LT_U, |a, b| (a as u64) > (b as u64));
+    cmp_i64!(i64_ge_u, cc::GE_U, cc::LE_U, |a, b| (a as u64)
+        >= (b as u64));
+    cmp_i64!(i64_lt_s, cc::LT_S, cc::GT_S, |a, b| a < b);
+    cmp_i64!(i64_le_s, cc::LE_S, cc::GE_S, |a, b| a <= b);
+    cmp_i64!(i64_gt_s, cc::GT_S, cc::LT_S, |a, b| a > b);
+    cmp_i64!(i64_ge_s, cc::GE_S, cc::LE_S, |a, b| a >= b);
+
+    cmp_f32!(f32_gt, f32_lt, seta, |a, b| a > b);
+    cmp_f32!(f32_ge, f32_le, setnc, |a, b| a >= b);
+    eq_float!(
+        f32_eq,
+        cmpeqss,
+        as_f32,
+        |a: Ieee32, b: Ieee32| f32::from_bits(a.to_bits()) == f32::from_bits(b.to_bits())
+    );
+    eq_float!(
+        f32_ne,
+        cmpneqss,
+        as_f32,
+        |a: Ieee32, b: Ieee32| f32::from_bits(a.to_bits()) != f32::from_bits(b.to_bits())
+    );
+
+    cmp_f64!(f64_gt, f64_lt, seta, |a, b| a > b);
+    cmp_f64!(f64_ge, f64_le, setnc, |a, b| a >= b);
+    eq_float!(
+        f64_eq,
+        cmpeqsd,
+        as_f64,
+        |a: Ieee64, b: Ieee64| f64::from_bits(a.to_bits()) == f64::from_bits(b.to_bits())
+    );
+    eq_float!(
+        f64_ne,
+        cmpneqsd,
+        as_f64,
+        |a: Ieee64, b: Ieee64| f64::from_bits(a.to_bits()) != f64::from_bits(b.to_bits())
+    );
+
+    // TODO: Should we do this logic in `eq` and just have this delegate to `eq`?
+    //       That would mean that `eqz` and `eq` with a const 0 argument don't
+    //       result in different code. It would also allow us to generate better
+    //       code for `neq` and `gt_u` with const 0 operand
+    pub fn i32_eqz(&mut self) {
+        let mut val = self.pop();
+
+        if let ValueLocation::Immediate(Value::I32(i)) = val {
+            self.push(ValueLocation::Immediate(
+                (if i == 0 { 1i32 } else { 0 }).into(),
+            ));
+            return;
+        }
+
+        if let ValueLocation::Cond(loc) = val {
+            self.push(ValueLocation::Cond(!loc));
+            return;
+        }
+
+        let reg = self.into_reg(I32, &mut val).unwrap();
+        let out = self.take_reg(I32).unwrap();
+
+        dynasm!(self.asm
+            ; xor Rd(out.rq().unwrap()), Rd(out.rq().unwrap())
+            ; test Rd(reg.rq().unwrap()), Rd(reg.rq().unwrap())
+            ; setz Rb(out.rq().unwrap())
+        );
+
+        self.free_value(val);
+
+        self.push(ValueLocation::Reg(out));
+    }
+
+    pub fn i64_eqz(&mut self) {
+        let mut val = self.pop();
+
+        if let ValueLocation::Immediate(Value::I64(i)) = val {
+            self.push(ValueLocation::Immediate(
+                (if i == 0 { 1i32 } else { 0 }).into(),
+            ));
+            return;
+        }
+
+        if let ValueLocation::Cond(loc) = val {
+            self.push(ValueLocation::Cond(!loc));
+            return;
+        }
+
+        let reg = self.into_reg(I64, &mut val).unwrap();
+        let out = self.take_reg(I64).unwrap();
+
+        dynasm!(self.asm
+            ; xor Rd(out.rq().unwrap()), Rd(out.rq().unwrap())
+            ; test Rq(reg.rq().unwrap()), Rq(reg.rq().unwrap())
+            ; setz Rb(out.rq().unwrap())
+        );
+
+        self.free_value(val);
+
+        self.push(ValueLocation::Reg(out));
+    }
+
+    fn br_on_cond_code(&mut self, label: Label, cond: CondCode) {
+        match cond {
+            cc::EQUAL => dynasm!(self.asm
+                ; je =>label.0
+            ),
+            cc::NOT_EQUAL => dynasm!(self.asm
+                ; jne =>label.0
+            ),
+            cc::GT_U => dynasm!(self.asm
+                ; ja =>label.0
+            ),
+            cc::GE_U => dynasm!(self.asm
+                ; jae =>label.0
+            ),
+            cc::LT_U => dynasm!(self.asm
+                ; jb =>label.0
+            ),
+            cc::LE_U => dynasm!(self.asm
+                ; jbe =>label.0
+            ),
+            cc::GT_S => dynasm!(self.asm
+                ; jg =>label.0
+            ),
+            cc::GE_S => dynasm!(self.asm
+                ; jge =>label.0
+            ),
+            cc::LT_S => dynasm!(self.asm
+                ; jl =>label.0
+            ),
+            cc::LE_S => dynasm!(self.asm
+                ; jle =>label.0
+            ),
+        }
+    }
+
+    /// Pops i32 predicate and branches to the specified label
+    /// if the predicate is equal to zero.
+    pub fn br_if_false(
+        &mut self,
+        target: impl Into<BrTarget<Label>>,
+        pass_args: impl FnOnce(&mut Self),
+    ) {
+        let mut val = self.pop();
+        let label = target
+            .into()
+            .label()
+            .map(|c| *c)
+            .unwrap_or_else(|| self.ret_label());
+
+        let cond = match val {
+            ValueLocation::Cond(cc) => !cc,
+            _ => {
+                let predicate = self.into_reg(I32, &mut val).unwrap();
+                dynasm!(self.asm
+                    ; test Rd(predicate.rq().unwrap()), Rd(predicate.rq().unwrap())
+                );
+
+                CondCode::ZF0
+            }
+        };
+
+        self.free_value(val);
+
+        pass_args(self);
+
+        self.br_on_cond_code(label, cond);
+    }
+
+    /// Pops i32 predicate and branches to the specified label
+    /// if the predicate is not equal to zero.
+    pub fn br_if_true(
+        &mut self,
+        target: impl Into<BrTarget<Label>>,
+        pass_args: impl FnOnce(&mut Self),
+    ) {
+        let mut val = self.pop();
+        let label = target
+            .into()
+            .label()
+            .map(|c| *c)
+            .unwrap_or_else(|| self.ret_label());
+
+        let cond = match val {
+            ValueLocation::Cond(cc) => cc,
+            _ => {
+                let predicate = self.into_reg(I32, &mut val).unwrap();
+                dynasm!(self.asm
+                    ; test Rd(predicate.rq().unwrap()), Rd(predicate.rq().unwrap())
+                );
+
+                CondCode::ZF1
+            }
+        };
+
+        self.free_value(val);
+
+        pass_args(self);
+
+        self.br_on_cond_code(label, cond);
+    }
+
+    /// Branch unconditionally to the specified label.
+    pub fn br(&mut self, label: impl Into<BrTarget<Label>>) {
+        match label.into() {
+            BrTarget::Return => self.ret(),
+            BrTarget::Label(label) => dynasm!(self.asm
+                ; jmp =>label.0
+            ),
+        }
+    }
+
+    /// If `default` is `None` then the default is just continuing execution
+    pub fn br_table<I>(
+        &mut self,
+        targets: I,
+        default: Option<BrTarget<Label>>,
+        pass_args: impl FnOnce(&mut Self),
+    ) where
+        I: IntoIterator<Item = Option<BrTarget<Label>>>,
+        I::IntoIter: ExactSizeIterator,
+    {
+        let mut targets = targets.into_iter();
+        let count = targets.len();
+
+        let mut selector = self.pop();
+
+        pass_args(self);
+
+        if let Some(imm) = selector.imm_i32() {
+            if let Some(target) = targets.nth(imm as _).or(Some(default)).and_then(|a| a) {
+                match target {
+                    BrTarget::Label(label) => self.br(label),
+                    BrTarget::Return => {
+                        dynasm!(self.asm
+                            ; ret
+                        );
+                    }
+                }
+            }
+        } else {
+            let end_label = self.create_label();
+
+            if count > 0 {
+                let (selector_reg, pop_selector) = self
+                    .into_temp_reg(GPRType::Rq, &mut selector)
+                    .map(|r| (r, false))
+                    .unwrap_or_else(|| {
+                        self.push_physical(ValueLocation::Reg(RAX));
+                        self.block_state.regs.mark_used(RAX);
+                        (RAX, true)
+                    });
+
+                let (tmp, pop_tmp) = if let Some(reg) = self.take_reg(I64) {
+                    (reg, false)
+                } else {
+                    let out_reg = if selector_reg == RAX { RCX } else { RAX };
+
+                    self.push_physical(ValueLocation::Reg(out_reg));
+                    self.block_state.regs.mark_used(out_reg);
+
+                    (out_reg, true)
+                };
+
+                self.immediate_to_reg(tmp, (count as u32).into());
+                dynasm!(self.asm
+                    ; cmp Rq(selector_reg.rq().unwrap()), Rq(tmp.rq().unwrap())
+                    ; cmova Rq(selector_reg.rq().unwrap()), Rq(tmp.rq().unwrap())
+                    ; lea Rq(tmp.rq().unwrap()), [>start_label]
+                    ; lea Rq(selector_reg.rq().unwrap()), [
+                        Rq(selector_reg.rq().unwrap()) * 5
+                    ]
+                    ; add Rq(selector_reg.rq().unwrap()), Rq(tmp.rq().unwrap())
+                );
+
+                if pop_tmp {
+                    dynasm!(self.asm
+                        ; pop Rq(tmp.rq().unwrap())
+                    );
+                } else {
+                    self.block_state.regs.release(tmp);
+                }
+
+                if pop_selector {
+                    dynasm!(self.asm
+                        ; pop Rq(selector_reg.rq().unwrap())
+                    );
+                }
+
+                dynasm!(self.asm
+                    ; jmp Rq(selector_reg.rq().unwrap())
+                ; start_label:
+                );
+
+                for target in targets {
+                    let label = target
+                        .map(|target| self.target_to_label(target))
+                        .unwrap_or(end_label);
+                    dynasm!(self.asm
+                        ; jmp =>label.0
+                    );
+                }
+            }
+
+            if let Some(def) = default {
+                match def {
+                    BrTarget::Label(label) => dynasm!(self.asm
+                        ; jmp =>label.0
+                    ),
+                    BrTarget::Return => dynasm!(self.asm
+                        ; ret
+                    ),
+                }
+            }
+
+            self.define_label(end_label);
+        }
+
+        self.free_value(selector);
+    }
+
+    fn set_stack_depth(&mut self, depth: StackDepth) {
+        if self.block_state.depth.0 != depth.0 {
+            let diff = depth.0 as i32 - self.block_state.depth.0 as i32;
+            let emit_lea = if diff.abs() == 1 {
+                if self.block_state.depth.0 < depth.0 {
+                    for _ in 0..diff {
+                        dynasm!(self.asm
+                            ; push rax
+                        );
+                    }
+
+                    false
+                } else if self.block_state.depth.0 > depth.0 {
+                    if let Some(trash) = self.take_reg(I64) {
+                        for _ in 0..self.block_state.depth.0 - depth.0 {
+                            dynasm!(self.asm
+                                ; pop Rq(trash.rq().unwrap())
+                            );
+                        }
+                        self.block_state.regs.release(trash);
+
+                        false
+                    } else {
+                        true
+                    }
+                } else {
+                    false
+                }
+            } else {
+                true
+            };
+
+            if emit_lea {
+                dynasm!(self.asm
+                    ; lea rsp, [rsp + (self.block_state.depth.0 as i32 - depth.0 as i32) * WORD_SIZE as i32]
+                );
+            }
+
+            self.block_state.depth = depth;
+        }
+    }
+
+    fn do_pass_block_args(&mut self, cc: &BlockCallingConvention) {
+        let args = &cc.arguments;
+        for &dst in args.iter().rev().take(self.block_state.stack.len()) {
+            if let CCLoc::Reg(r) = dst {
+                if !self.block_state.regs.is_free(r)
+                    && *self.block_state.stack.last().unwrap() != ValueLocation::Reg(r)
+                {
+                    // TODO: This would be made simpler and more efficient with a proper SSE
+                    //       representation.
+                    self.save_regs(&[r], ..);
+                }
+
+                self.block_state.regs.mark_used(r);
+            }
+            self.pop_into(dst);
+        }
+    }
+
+    pub fn pass_block_args(&mut self, cc: &BlockCallingConvention) {
+        self.do_pass_block_args(cc);
+        self.set_stack_depth(cc.stack_depth);
+    }
+
+    pub fn serialize_block_args(
+        &mut self,
+        cc: &BlockCallingConvention,
+        params: u32,
+    ) -> BlockCallingConvention {
+        self.do_pass_block_args(cc);
+
+        let mut out_args = cc.arguments.clone();
+
+        out_args.reverse();
+
+        while out_args.len() < params as usize {
+            let mut val = self.pop();
+
+            // TODO: We can use stack slots for values already on the stack but we
+            //       don't refcount stack slots right now
+            out_args.push(self.into_temp_loc(None, &mut val));
+        }
+
+        out_args.reverse();
+
+        self.set_stack_depth(cc.stack_depth);
+
+        BlockCallingConvention {
+            stack_depth: cc.stack_depth,
+            arguments: out_args,
+        }
+    }
+
+    /// Puts all stack values into "real" locations so that they can i.e. be set to different
+    /// values on different iterations of a loop
+    pub fn serialize_args(&mut self, count: u32) -> BlockCallingConvention {
+        let mut out = Vec::with_capacity(count as _);
+
+        // TODO: We can make this more efficient now that `pop` isn't so complicated
+        for _ in 0..count {
+            let mut val = self.pop();
+            // TODO: We can use stack slots for values already on the stack but we
+            //       don't refcount stack slots right now
+            let loc = self.into_temp_loc(None, &mut val);
+
+            out.push(loc);
+        }
+
+        out.reverse();
+
+        BlockCallingConvention {
+            stack_depth: self.block_state.depth,
+            arguments: out,
+        }
+    }
+
+    pub fn get_global(&mut self, global_idx: u32) {
+        let (reg, offset) = self
+            .module_context
+            .defined_global_index(global_idx)
+            .map(|defined_global_index| {
+                (
+                    None,
+                    self.module_context
+                        .vmctx_vmglobal_definition(defined_global_index),
+                )
+            })
+            .unwrap_or_else(|| {
+                let reg = self.take_reg(I64).unwrap();
+
+                dynasm!(self.asm
+                    ; mov Rq(reg.rq().unwrap()), [
+                        Rq(VMCTX) +
+                            self.module_context.vmctx_vmglobal_import_from(global_idx) as i32
+                    ]
+                );
+
+                (Some(reg), 0)
+            });
+
+        let out = self.take_reg(GPRType::Rq).unwrap();
+        let vmctx = GPR::Rq(VMCTX);
+
+        // TODO: Are globals necessarily aligned to 128 bits? We can load directly to an XMM reg if so
+        dynasm!(self.asm
+            ; mov Rq(out.rq().unwrap()), [Rq(reg.unwrap_or(vmctx).rq().unwrap()) + offset as i32]
+        );
+
+        if let Some(reg) = reg {
+            self.block_state.regs.release(reg);
+        }
+
+        self.push(ValueLocation::Reg(out));
+    }
+
+    pub fn set_global(&mut self, global_idx: u32) {
+        let mut val = self.pop();
+        let (reg, offset) = self
+            .module_context
+            .defined_global_index(global_idx)
+            .map(|defined_global_index| {
+                (
+                    None,
+                    self.module_context
+                        .vmctx_vmglobal_definition(defined_global_index),
+                )
+            })
+            .unwrap_or_else(|| {
+                let reg = self.take_reg(I64).unwrap();
+
+                dynasm!(self.asm
+                    ; mov Rq(reg.rq().unwrap()), [
+                        Rq(VMCTX) +
+                            self.module_context.vmctx_vmglobal_import_from(global_idx) as i32
+                    ]
+                );
+
+                (Some(reg), 0)
+            });
+
+        let val_reg = self.into_reg(GPRType::Rq, &mut val).unwrap();
+        let vmctx = GPR::Rq(VMCTX);
+
+        // We always use `Rq` (even for floats) since the globals are not necessarily aligned to 128 bits
+        dynasm!(self.asm
+            ; mov [
+                Rq(reg.unwrap_or(vmctx).rq().unwrap()) + offset as i32
+            ], Rq(val_reg.rq().unwrap())
+        );
+
+        if let Some(reg) = reg {
+            self.block_state.regs.release(reg);
+        }
+
+        self.free_value(val);
+    }
+
+    fn immediate_to_reg(&mut self, reg: GPR, val: Value) {
+        match reg {
+            GPR::Rq(r) => {
+                let val = val.as_bytes();
+                if (val as u64) <= u32::max_value() as u64 {
+                    dynasm!(self.asm
+                        ; mov Rd(r), val as i32
+                    );
+                } else {
+                    dynasm!(self.asm
+                        ; mov Rq(r), QWORD val
+                    );
+                }
+            }
+            GPR::Rx(r) => {
+                let label = self.aligned_label(16, LabelValue::from(val));
+                dynasm!(self.asm
+                    ; movq Rx(r), [=>label.0]
+                );
+            }
+        }
+    }
+
+    // The `&` and `&mut` aren't necessary (`ValueLocation` is copy) but it ensures that we don't get
+    // the arguments the wrong way around. In the future we want to have a `ReadLocation` and `WriteLocation`
+    // so we statically can't write to a literal so this will become a non-issue.
+    fn copy_value(&mut self, src: ValueLocation, dst: CCLoc) {
+        match (src, dst) {
+            (ValueLocation::Cond(cond), CCLoc::Stack(o)) => {
+                let offset = self.adjusted_offset(o);
+
+                dynasm!(self.asm
+                    ; mov QWORD [rsp + offset], DWORD 0
+                );
+
+                match cond {
+                    cc::EQUAL => dynasm!(self.asm
+                        ; sete [rsp + offset]
+                    ),
+                    cc::NOT_EQUAL => dynasm!(self.asm
+                        ; setne [rsp + offset]
+                    ),
+                    cc::GT_U => dynasm!(self.asm
+                        ; seta [rsp + offset]
+                    ),
+                    cc::GE_U => dynasm!(self.asm
+                        ; setae [rsp + offset]
+                    ),
+                    cc::LT_U => dynasm!(self.asm
+                        ; setb [rsp + offset]
+                    ),
+                    cc::LE_U => dynasm!(self.asm
+                        ; setbe [rsp + offset]
+                    ),
+                    cc::GT_S => dynasm!(self.asm
+                        ; setg [rsp + offset]
+                    ),
+                    cc::GE_S => dynasm!(self.asm
+                        ; setge [rsp + offset]
+                    ),
+                    cc::LT_S => dynasm!(self.asm
+                        ; setl [rsp + offset]
+                    ),
+                    cc::LE_S => dynasm!(self.asm
+                        ; setle [rsp + offset]
+                    ),
+                }
+            }
+            (ValueLocation::Cond(cond), CCLoc::Reg(reg)) => match reg {
+                GPR::Rq(r) => {
+                    dynasm!(self.asm
+                        ; mov Rq(r), 0
+                    );
+
+                    match cond {
+                        cc::EQUAL => dynasm!(self.asm
+                            ; sete Rb(r)
+                        ),
+                        cc::NOT_EQUAL => dynasm!(self.asm
+                            ; setne Rb(r)
+                        ),
+                        cc::GT_U => dynasm!(self.asm
+                            ; seta Rb(r)
+                        ),
+                        cc::GE_U => dynasm!(self.asm
+                            ; setae Rb(r)
+                        ),
+                        cc::LT_U => dynasm!(self.asm
+                            ; setb Rb(r)
+                        ),
+                        cc::LE_U => dynasm!(self.asm
+                            ; setbe Rb(r)
+                        ),
+                        cc::GT_S => dynasm!(self.asm
+                            ; setg Rb(r)
+                        ),
+                        cc::GE_S => dynasm!(self.asm
+                            ; setge Rb(r)
+                        ),
+                        cc::LT_S => dynasm!(self.asm
+                            ; setl Rb(r)
+                        ),
+                        cc::LE_S => dynasm!(self.asm
+                            ; setle Rb(r)
+                        ),
+                    }
+                }
+                GPR::Rx(_) => {
+                    let temp = CCLoc::Reg(self.take_reg(I32).unwrap());
+                    self.copy_value(src, temp);
+                    let temp = temp.into();
+                    self.copy_value(temp, dst);
+                    self.free_value(temp);
+                }
+            },
+            (ValueLocation::Stack(in_offset), CCLoc::Stack(out_offset)) => {
+                let in_offset = self.adjusted_offset(in_offset);
+                let out_offset = self.adjusted_offset(out_offset);
+                if in_offset != out_offset {
+                    if let Some(gpr) = self.take_reg(I64) {
+                        dynasm!(self.asm
+                            ; mov Rq(gpr.rq().unwrap()), [rsp + in_offset]
+                            ; mov [rsp + out_offset], Rq(gpr.rq().unwrap())
+                        );
+                        self.block_state.regs.release(gpr);
+                    } else {
+                        dynasm!(self.asm
+                            ; push rax
+                            ; mov rax, [rsp + in_offset + WORD_SIZE as i32]
+                            ; mov [rsp + out_offset + WORD_SIZE as i32], rax
+                            ; pop rax
+                        );
+                    }
+                }
+            }
+            // TODO: XMM registers
+            (ValueLocation::Reg(in_reg), CCLoc::Stack(out_offset)) => {
+                let out_offset = self.adjusted_offset(out_offset);
+                match in_reg {
+                    GPR::Rq(in_reg) => {
+                        // We can always use `Rq` here for now because stack slots are in multiples of
+                        // 8 bytes
+                        dynasm!(self.asm
+                            ; mov [rsp + out_offset], Rq(in_reg)
+                        );
+                    }
+                    GPR::Rx(in_reg) => {
+                        // We can always use `movq` here for now because stack slots are in multiples of
+                        // 8 bytes
+                        dynasm!(self.asm
+                            ; movq [rsp + out_offset], Rx(in_reg)
+                        );
+                    }
+                }
+            }
+            (ValueLocation::Immediate(i), CCLoc::Stack(out_offset)) => {
+                // TODO: Floats
+                let i = i.as_bytes();
+                let out_offset = self.adjusted_offset(out_offset);
+                if (i as u64) <= u32::max_value() as u64 {
+                    dynasm!(self.asm
+                        ; mov DWORD [rsp + out_offset], i as i32
+                    );
+                } else {
+                    if let Some(scratch) = self.take_reg(I64) {
+                        dynasm!(self.asm
+                            ; mov Rq(scratch.rq().unwrap()), QWORD i
+                            ; mov [rsp + out_offset], Rq(scratch.rq().unwrap())
+                        );
+
+                        self.block_state.regs.release(scratch);
+                    } else {
+                        dynasm!(self.asm
+                            ; push rax
+                            ; mov rax, QWORD i
+                            ; mov [rsp + out_offset + WORD_SIZE as i32], rax
+                            ; pop rax
+                        );
+                    }
+                }
+            }
+            (ValueLocation::Stack(in_offset), CCLoc::Reg(out_reg)) => {
+                let in_offset = self.adjusted_offset(in_offset);
+                match out_reg {
+                    GPR::Rq(out_reg) => {
+                        // We can always use `Rq` here for now because stack slots are in multiples of
+                        // 8 bytes
+                        dynasm!(self.asm
+                            ; mov Rq(out_reg), [rsp + in_offset]
+                        );
+                    }
+                    GPR::Rx(out_reg) => {
+                        // We can always use `movq` here for now because stack slots are in multiples of
+                        // 8 bytes
+                        dynasm!(self.asm
+                            ; movq Rx(out_reg), [rsp + in_offset]
+                        );
+                    }
+                }
+            }
+            (ValueLocation::Reg(in_reg), CCLoc::Reg(out_reg)) => {
+                if in_reg != out_reg {
+                    match (in_reg, out_reg) {
+                        (GPR::Rq(in_reg), GPR::Rq(out_reg)) => {
+                            dynasm!(self.asm
+                                ; mov Rq(out_reg), Rq(in_reg)
+                            );
+                        }
+                        (GPR::Rx(in_reg), GPR::Rq(out_reg)) => {
+                            dynasm!(self.asm
+                                ; movq Rq(out_reg), Rx(in_reg)
+                            );
+                        }
+                        (GPR::Rq(in_reg), GPR::Rx(out_reg)) => {
+                            dynasm!(self.asm
+                                ; movq Rx(out_reg), Rq(in_reg)
+                            );
+                        }
+                        (GPR::Rx(in_reg), GPR::Rx(out_reg)) => {
+                            dynasm!(self.asm
+                                ; movapd Rx(out_reg), Rx(in_reg)
+                            );
+                        }
+                    }
+                }
+            }
+            (ValueLocation::Immediate(i), CCLoc::Reg(out_reg)) => {
+                // TODO: Floats
+                self.immediate_to_reg(out_reg, i);
+            }
+        }
+    }
+
+    /// Define the given label at the current position.
+    ///
+    /// Multiple labels can be defined at the same position. However, a label
+    /// can be defined only once.
+    pub fn define_label(&mut self, label: Label) {
+        self.asm.dynamic_label(label.0);
+    }
+
+    pub fn set_state(&mut self, state: VirtualCallingConvention) {
+        self.block_state.regs = Registers::new();
+        for elem in &state.stack {
+            if let ValueLocation::Reg(r) = elem {
+                self.block_state.regs.mark_used(*r);
+            }
+        }
+        self.block_state.stack = state.stack;
+        self.block_state.depth = state.depth;
+    }
+
+    pub fn apply_cc(&mut self, cc: &BlockCallingConvention) {
+        let stack = cc.arguments.iter();
+
+        self.block_state.stack = Vec::with_capacity(stack.size_hint().0);
+        self.block_state.regs = Registers::new();
+
+        for &elem in stack {
+            if let CCLoc::Reg(r) = elem {
+                self.block_state.regs.mark_used(r);
+            }
+
+            self.block_state.stack.push(elem.into());
+        }
+
+        self.block_state.depth = cc.stack_depth;
+    }
+
+    load!(i32_load, GPRType::Rq, Rd, movd, mov, DWORD);
+    load!(i64_load, GPRType::Rq, Rq, movq, mov, QWORD);
+    load!(f32_load, GPRType::Rx, Rd, movd, mov, DWORD);
+    load!(f64_load, GPRType::Rx, Rq, movq, mov, QWORD);
+
+    load!(i32_load8_u, GPRType::Rq, Rd, NONE, movzx, BYTE);
+    load!(i32_load8_s, GPRType::Rq, Rd, NONE, movsx, BYTE);
+    load!(i32_load16_u, GPRType::Rq, Rd, NONE, movzx, WORD);
+    load!(i32_load16_s, GPRType::Rq, Rd, NONE, movsx, WORD);
+
+    load!(i64_load8_u, GPRType::Rq, Rq, NONE, movzx, BYTE);
+    load!(i64_load8_s, GPRType::Rq, Rq, NONE, movsx, BYTE);
+    load!(i64_load16_u, GPRType::Rq, Rq, NONE, movzx, WORD);
+    load!(i64_load16_s, GPRType::Rq, Rq, NONE, movsx, WORD);
+    load!(i64_load32_u, GPRType::Rq, Rd, movd, mov, DWORD);
+    load!(i64_load32_s, GPRType::Rq, Rq, NONE, movsxd, DWORD);
+
+    store!(store8, Rb, NONE, DWORD);
+    store!(store16, Rw, NONE, QWORD);
+    store!(store32, Rd, movd, DWORD);
+    store!(store64, Rq, movq, QWORD);
+
+    fn push_physical(&mut self, mut value: ValueLocation) -> ValueLocation {
+        let out_offset = -(self.block_state.depth.0 as i32 + 1);
+        match value {
+            ValueLocation::Reg(_) | ValueLocation::Immediate(_) | ValueLocation::Cond(_) => {
+                if let Some(gpr) = self.into_reg(GPRType::Rq, &mut value) {
+                    dynasm!(self.asm
+                        ; push Rq(gpr.rq().unwrap())
+                    );
+                } else {
+                    dynasm!(self.asm
+                        ; push rax
+                    );
+
+                    self.copy_value(value, CCLoc::Stack(out_offset));
+                }
+
+                self.free_value(value);
+            }
+            ValueLocation::Stack(o) => {
+                let offset = self.adjusted_offset(o);
+                dynasm!(self.asm
+                    ; push QWORD [rsp + offset]
+                );
+            }
+        }
+
+        self.block_state.depth.reserve(1);
+
+        ValueLocation::Stack(out_offset)
+    }
+
+    fn push(&mut self, value: ValueLocation) {
+        if let Some(mut top) = self.block_state.stack.pop() {
+            if let ValueLocation::Cond(_) = top {
+                self.into_reg(I32, &mut top).unwrap();
+            }
+
+            self.block_state.stack.push(top);
+        }
+
+        self.block_state.stack.push(value);
+    }
+
+    fn pop(&mut self) -> ValueLocation {
+        self.block_state.stack.pop().expect("Stack is empty")
+    }
+
+    pub fn drop(&mut self, range: RangeInclusive<u32>) {
+        let mut repush = Vec::with_capacity(*range.start() as _);
+
+        for _ in 0..*range.start() {
+            repush.push(self.pop());
+        }
+
+        for _ in range {
+            let val = self.pop();
+            self.free_value(val);
+        }
+
+        for v in repush.into_iter().rev() {
+            self.push(v);
+        }
+    }
+
+    fn pop_into(&mut self, dst: CCLoc) {
+        let val = self.pop();
+        self.copy_value(val, dst);
+        self.free_value(val);
+    }
+
+    fn free_value(&mut self, val: ValueLocation) {
+        match val {
+            ValueLocation::Reg(r) => {
+                self.block_state.regs.release(r);
+            }
+            // TODO: Refcounted stack slots
+            _ => {}
+        }
+    }
+
+    /// Puts this value into a register so that it can be efficiently read
+    fn into_reg(&mut self, ty: impl Into<Option<GPRType>>, val: &mut ValueLocation) -> Option<GPR> {
+        let out = self.to_reg(ty, *val)?;
+        self.free_value(*val);
+        *val = ValueLocation::Reg(out);
+        Some(out)
+    }
+
+    /// Clones this value into a register so that it can be efficiently read
+    fn to_reg(&mut self, ty: impl Into<Option<GPRType>>, val: ValueLocation) -> Option<GPR> {
+        let ty = ty.into();
+        match val {
+            ValueLocation::Reg(r) if ty.map(|t| t == r.type_()).unwrap_or(true) => {
+                self.block_state.regs.mark_used(r);
+                Some(r)
+            }
+            val => {
+                let scratch = self.take_reg(ty.unwrap_or(GPRType::Rq))?;
+
+                self.copy_value(val, CCLoc::Reg(scratch));
+
+                Some(scratch)
+            }
+        }
+    }
+
+    /// Puts this value into a temporary register so that operations
+    /// on that register don't write to a local.
+    fn into_temp_reg(
+        &mut self,
+        ty: impl Into<Option<GPRType>>,
+        val: &mut ValueLocation,
+    ) -> Option<GPR> {
+        let out = self.to_temp_reg(ty, *val)?;
+        self.free_value(*val);
+        *val = ValueLocation::Reg(out);
+        Some(out)
+    }
+
+    fn into_temp_loc(&mut self, ty: impl Into<Option<GPRType>>, val: &mut ValueLocation) -> CCLoc {
+        match val {
+            _ => {
+                if let Some(gpr) = self.into_temp_reg(ty, val) {
+                    CCLoc::Reg(gpr)
+                } else {
+                    let out = CCLoc::Stack(self.push_physical(*val).stack().unwrap());
+                    *val = out.into();
+                    out
+                }
+            }
+        }
+    }
+
+    /// Clones this value into a temporary register so that operations
+    /// on that register don't write to a local.
+    fn to_temp_reg(&mut self, ty: impl Into<Option<GPRType>>, val: ValueLocation) -> Option<GPR> {
+        // If we have `None` as the type then it always matches (`.unwrap_or(true)`)
+        match val {
+            ValueLocation::Reg(r) => {
+                let ty = ty.into();
+                let type_matches = ty.map(|t| t == r.type_()).unwrap_or(true);
+
+                if self.block_state.regs.num_usages(r) <= 1 && type_matches {
+                    self.block_state.regs.mark_used(r);
+                    Some(r)
+                } else {
+                    let scratch = self.take_reg(ty.unwrap_or(GPRType::Rq))?;
+
+                    self.copy_value(val, CCLoc::Reg(scratch));
+
+                    Some(scratch)
+                }
+            }
+            val => self.to_reg(ty, val),
+        }
+    }
+
+    pub fn f32_neg(&mut self) {
+        let mut val = self.pop();
+
+        let out = if let Some(i) = val.imm_f32() {
+            ValueLocation::Immediate(
+                Ieee32::from_bits((-f32::from_bits(i.to_bits())).to_bits()).into(),
+            )
+        } else {
+            let reg = self.into_temp_reg(GPRType::Rx, &mut val).unwrap();
+            let const_label = self.aligned_label(16, LabelValue::I32(SIGN_MASK_F32 as i32));
+
+            dynasm!(self.asm
+                ; xorps Rx(reg.rx().unwrap()), [=>const_label.0]
+            );
+
+            val
+        };
+
+        self.push(out);
+    }
+
+    pub fn f64_neg(&mut self) {
+        let mut val = self.pop();
+
+        let out = if let Some(i) = val.imm_f64() {
+            ValueLocation::Immediate(
+                Ieee64::from_bits((-f64::from_bits(i.to_bits())).to_bits()).into(),
+            )
+        } else {
+            let reg = self.into_temp_reg(GPRType::Rx, &mut val).unwrap();
+            let const_label = self.aligned_label(16, LabelValue::I64(SIGN_MASK_F64 as i64));
+
+            dynasm!(self.asm
+                ; xorpd Rx(reg.rx().unwrap()), [=>const_label.0]
+            );
+
+            val
+        };
+
+        self.push(out);
+    }
+
+    pub fn f32_abs(&mut self) {
+        let mut val = self.pop();
+
+        let out = if let Some(i) = val.imm_f32() {
+            ValueLocation::Immediate(
+                Ieee32::from_bits(f32::from_bits(i.to_bits()).abs().to_bits()).into(),
+            )
+        } else {
+            let reg = self.into_temp_reg(GPRType::Rx, &mut val).unwrap();
+            let const_label = self.aligned_label(16, LabelValue::I32(REST_MASK_F32 as i32));
+
+            dynasm!(self.asm
+                ; andps Rx(reg.rx().unwrap()), [=>const_label.0]
+            );
+
+            val
+        };
+
+        self.push(out);
+    }
+
+    pub fn f64_abs(&mut self) {
+        let mut val = self.pop();
+
+        let out = if let Some(i) = val.imm_f64() {
+            ValueLocation::Immediate(
+                Ieee64::from_bits(f64::from_bits(i.to_bits()).abs().to_bits()).into(),
+            )
+        } else {
+            let reg = self.into_temp_reg(GPRType::Rx, &mut val).unwrap();
+            let const_label = self.aligned_label(16, LabelValue::I64(REST_MASK_F64 as i64));
+
+            dynasm!(self.asm
+                ; andps Rx(reg.rx().unwrap()), [=>const_label.0]
+            );
+
+            val
+        };
+
+        self.push(out);
+    }
+
+    pub fn f32_sqrt(&mut self) {
+        let mut val = self.pop();
+
+        let out = if let Some(i) = val.imm_f32() {
+            ValueLocation::Immediate(
+                Ieee32::from_bits(f32::from_bits(i.to_bits()).sqrt().to_bits()).into(),
+            )
+        } else {
+            let reg = self.into_temp_reg(GPRType::Rx, &mut val).unwrap();
+
+            dynasm!(self.asm
+                ; sqrtss Rx(reg.rx().unwrap()), Rx(reg.rx().unwrap())
+            );
+
+            val
+        };
+
+        self.push(out);
+    }
+
+    pub fn f64_sqrt(&mut self) {
+        let mut val = self.pop();
+
+        let out = if let Some(i) = val.imm_f64() {
+            ValueLocation::Immediate(
+                Ieee64::from_bits(f64::from_bits(i.to_bits()).sqrt().to_bits()).into(),
+            )
+        } else {
+            let reg = self.into_temp_reg(GPRType::Rx, &mut val).unwrap();
+
+            dynasm!(self.asm
+                ; sqrtsd Rx(reg.rx().unwrap()), Rx(reg.rx().unwrap())
+            );
+
+            ValueLocation::Reg(reg)
+        };
+
+        self.push(out);
+    }
+
+    pub fn f32_copysign(&mut self) {
+        let mut right = self.pop();
+        let mut left = self.pop();
+
+        let out = if let (Some(left), Some(right)) = (left.imm_f32(), right.imm_f32()) {
+            ValueLocation::Immediate(
+                Ieee32::from_bits(
+                    (left.to_bits() & REST_MASK_F32) | (right.to_bits() & SIGN_MASK_F32),
+                )
+                .into(),
+            )
+        } else {
+            let lreg = self.into_temp_reg(GPRType::Rx, &mut left).unwrap();
+            let rreg = self.into_reg(GPRType::Rx, &mut right).unwrap();
+            let sign_mask = self.aligned_label(16, LabelValue::I32(SIGN_MASK_F32 as i32));
+            let rest_mask = self.aligned_label(16, LabelValue::I32(REST_MASK_F32 as i32));
+
+            dynasm!(self.asm
+                ; andps Rx(rreg.rx().unwrap()), [=>sign_mask.0]
+                ; andps Rx(lreg.rx().unwrap()), [=>rest_mask.0]
+                ; orps  Rx(lreg.rx().unwrap()), Rx(rreg.rx().unwrap())
+            );
+
+            self.free_value(right);
+
+            left
+        };
+
+        self.push(out);
+    }
+
+    pub fn f64_copysign(&mut self) {
+        let mut right = self.pop();
+        let mut left = self.pop();
+
+        let out = if let (Some(left), Some(right)) = (left.imm_f64(), right.imm_f64()) {
+            ValueLocation::Immediate(
+                Ieee64::from_bits(
+                    (left.to_bits() & REST_MASK_F64) | (right.to_bits() & SIGN_MASK_F64),
+                )
+                .into(),
+            )
+        } else {
+            let lreg = self.into_temp_reg(GPRType::Rx, &mut left).unwrap();
+            let rreg = self.into_reg(GPRType::Rx, &mut right).unwrap();
+            let sign_mask = self.aligned_label(16, LabelValue::I64(SIGN_MASK_F64 as i64));
+            let rest_mask = self.aligned_label(16, LabelValue::I64(REST_MASK_F64 as i64));
+
+            dynasm!(self.asm
+                ; andpd Rx(rreg.rx().unwrap()), [=>sign_mask.0]
+                ; andpd Rx(lreg.rx().unwrap()), [=>rest_mask.0]
+                ; orpd  Rx(lreg.rx().unwrap()), Rx(rreg.rx().unwrap())
+            );
+
+            self.free_value(right);
+
+            left
+        };
+
+        self.push(out);
+    }
+
+    pub fn i32_clz(&mut self) {
+        let mut val = self.pop();
+
+        let out_val = match val {
+            ValueLocation::Immediate(imm) => {
+                ValueLocation::Immediate(imm.as_i32().unwrap().leading_zeros().into())
+            }
+            ValueLocation::Stack(offset) => {
+                let offset = self.adjusted_offset(offset);
+                let temp = self.take_reg(I32).unwrap();
+
+                if is_x86_feature_detected!("lzcnt") {
+                    dynasm!(self.asm
+                        ; lzcnt Rd(temp.rq().unwrap()), [rsp + offset]
+                    );
+                    ValueLocation::Reg(temp)
+                } else {
+                    let temp_2 = self.take_reg(I32).unwrap();
+
+                    dynasm!(self.asm
+                        ; bsr Rd(temp.rq().unwrap()), [rsp + offset]
+                        ; mov Rd(temp_2.rq().unwrap()), DWORD 0x3fu64 as _
+                        ; cmove Rd(temp.rq().unwrap()), Rd(temp_2.rq().unwrap())
+                        ; mov Rd(temp_2.rq().unwrap()), DWORD 0x1fu64 as _
+                        ; xor Rd(temp.rq().unwrap()), Rd(temp_2.rq().unwrap())
+                    );
+                    self.free_value(ValueLocation::Reg(temp_2));
+                    ValueLocation::Reg(temp)
+                }
+            }
+            ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
+                let reg = self.into_reg(GPRType::Rq, &mut val).unwrap();
+                let temp = self.take_reg(I32).unwrap();
+
+                if is_x86_feature_detected!("lzcnt") {
+                    dynasm!(self.asm
+                        ; lzcnt Rd(temp.rq().unwrap()), Rd(reg.rq().unwrap())
+                    );
+                    ValueLocation::Reg(temp)
+                } else {
+                    dynasm!(self.asm
+                        ; bsr Rd(temp.rq().unwrap()), Rd(reg.rq().unwrap())
+                        ; mov Rd(reg.rq().unwrap()), DWORD 0x3fu64 as _
+                        ; cmove Rd(temp.rq().unwrap()), Rd(reg.rq().unwrap())
+                        ; mov Rd(reg.rq().unwrap()), DWORD 0x1fu64 as _
+                        ; xor Rd(temp.rq().unwrap()), Rd(reg.rq().unwrap())
+                    );
+                    ValueLocation::Reg(temp)
+                }
+            }
+        };
+
+        self.free_value(val);
+        self.push(out_val);
+    }
+
+    pub fn i64_clz(&mut self) {
+        let mut val = self.pop();
+
+        let out_val = match val {
+            ValueLocation::Immediate(imm) => {
+                ValueLocation::Immediate((imm.as_i64().unwrap().leading_zeros() as u64).into())
+            }
+            ValueLocation::Stack(offset) => {
+                let offset = self.adjusted_offset(offset);
+                let temp = self.take_reg(I64).unwrap();
+
+                if is_x86_feature_detected!("lzcnt") {
+                    dynasm!(self.asm
+                        ; lzcnt Rq(temp.rq().unwrap()), [rsp + offset]
+                    );
+                    ValueLocation::Reg(temp)
+                } else {
+                    let temp_2 = self.take_reg(I64).unwrap();
+
+                    dynasm!(self.asm
+                        ; bsr Rq(temp.rq().unwrap()), [rsp + offset]
+                        ; mov Rq(temp_2.rq().unwrap()), QWORD 0x7fu64 as _
+                        ; cmove Rq(temp.rq().unwrap()), Rq(temp_2.rq().unwrap())
+                        ; mov Rq(temp_2.rq().unwrap()), QWORD 0x3fu64 as _
+                        ; xor Rq(temp.rq().unwrap()), Rq(temp_2.rq().unwrap())
+                    );
+                    self.free_value(ValueLocation::Reg(temp_2));
+                    ValueLocation::Reg(temp)
+                }
+            }
+            ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
+                let reg = self.into_reg(GPRType::Rq, &mut val).unwrap();
+                let temp = self.take_reg(I64).unwrap();
+
+                if is_x86_feature_detected!("lzcnt") {
+                    dynasm!(self.asm
+                        ; lzcnt Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap())
+                    );
+                    ValueLocation::Reg(temp)
+                } else {
+                    dynasm!(self.asm
+                        ; bsr Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap())
+                        ; mov Rq(reg.rq().unwrap()), QWORD 0x7fu64 as _
+                        ; cmove Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap())
+                        ; mov Rq(reg.rq().unwrap()), QWORD 0x3fu64 as _
+                        ; xor Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap())
+                    );
+                    ValueLocation::Reg(temp)
+                }
+            }
+        };
+
+        self.free_value(val);
+        self.push(out_val);
+    }
+
+    pub fn i32_ctz(&mut self) {
+        let mut val = self.pop();
+
+        let out_val = match val {
+            ValueLocation::Immediate(imm) => {
+                ValueLocation::Immediate(imm.as_i32().unwrap().trailing_zeros().into())
+            }
+            ValueLocation::Stack(offset) => {
+                let offset = self.adjusted_offset(offset);
+                let temp = self.take_reg(I32).unwrap();
+
+                if is_x86_feature_detected!("lzcnt") {
+                    dynasm!(self.asm
+                        ; tzcnt Rd(temp.rq().unwrap()), [rsp + offset]
+                    );
+                    ValueLocation::Reg(temp)
+                } else {
+                    let temp_zero_val = self.take_reg(I32).unwrap();
+
+                    dynasm!(self.asm
+                        ; bsf Rd(temp.rq().unwrap()), [rsp + offset]
+                        ; mov Rd(temp_zero_val.rq().unwrap()), DWORD 0x20u32 as _
+                        ; cmove Rd(temp.rq().unwrap()), Rd(temp_zero_val.rq().unwrap())
+                    );
+                    self.free_value(ValueLocation::Reg(temp_zero_val));
+                    ValueLocation::Reg(temp)
+                }
+            }
+            ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
+                let reg = self.into_reg(GPRType::Rq, &mut val).unwrap();
+                let temp = self.take_reg(I32).unwrap();
+
+                if is_x86_feature_detected!("lzcnt") {
+                    dynasm!(self.asm
+                        ; tzcnt Rd(temp.rq().unwrap()), Rd(reg.rq().unwrap())
+                    );
+                    ValueLocation::Reg(temp)
+                } else {
+                    dynasm!(self.asm
+                        ; bsf Rd(temp.rq().unwrap()), Rd(reg.rq().unwrap())
+                        ; mov Rd(reg.rq().unwrap()), DWORD 0x20u32 as _
+                        ; cmove Rd(temp.rq().unwrap()), Rd(reg.rq().unwrap())
+                    );
+                    ValueLocation::Reg(temp)
+                }
+            }
+        };
+
+        self.free_value(val);
+        self.push(out_val);
+    }
+
+    pub fn i64_ctz(&mut self) {
+        let mut val = self.pop();
+
+        let out_val = match val {
+            ValueLocation::Immediate(imm) => {
+                ValueLocation::Immediate((imm.as_i64().unwrap().trailing_zeros() as u64).into())
+            }
+            ValueLocation::Stack(offset) => {
+                let offset = self.adjusted_offset(offset);
+                let temp = self.take_reg(I64).unwrap();
+
+                if is_x86_feature_detected!("lzcnt") {
+                    dynasm!(self.asm
+                        ; tzcnt Rq(temp.rq().unwrap()), [rsp + offset]
+                    );
+                    ValueLocation::Reg(temp)
+                } else {
+                    let temp_zero_val = self.take_reg(I64).unwrap();
+
+                    dynasm!(self.asm
+                        ; bsf Rq(temp.rq().unwrap()), [rsp + offset]
+                        ; mov Rq(temp_zero_val.rq().unwrap()), QWORD 0x40u64 as _
+                        ; cmove Rq(temp.rq().unwrap()), Rq(temp_zero_val.rq().unwrap())
+                    );
+                    self.free_value(ValueLocation::Reg(temp_zero_val));
+                    ValueLocation::Reg(temp)
+                }
+            }
+            ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
+                let reg = self.into_reg(GPRType::Rq, &mut val).unwrap();
+                let temp = self.take_reg(I64).unwrap();
+
+                dynasm!(self.asm
+                    ; bsf Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap())
+                    ; mov Rq(reg.rq().unwrap()), QWORD 0x40u64 as _
+                    ; cmove Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap())
+                );
+                ValueLocation::Reg(temp)
+            }
+        };
+
+        self.free_value(val);
+        self.push(out_val);
+    }
+
+    pub fn i32_extend_u(&mut self) {
+        let val = self.pop();
+
+        let out = if let ValueLocation::Immediate(imm) = val {
+            ValueLocation::Immediate((imm.as_i32().unwrap() as u32 as u64).into())
+        } else {
+            let new_reg = self.take_reg(I64).unwrap();
+
+            // TODO: Track set-ness of bits - we can make this a no-op in most cases
+            //       but we have to make this unconditional just in case this value
+            //       came from a truncate.
+            match val {
+                ValueLocation::Reg(GPR::Rx(rxreg)) => {
+                    dynasm!(self.asm
+                        ; movd Rd(new_reg.rq().unwrap()), Rx(rxreg)
+                    );
+                }
+                ValueLocation::Reg(GPR::Rq(rqreg)) => {
+                    dynasm!(self.asm
+                        ; mov Rd(new_reg.rq().unwrap()), Rd(rqreg)
+                    );
+                }
+                ValueLocation::Stack(offset) => {
+                    let offset = self.adjusted_offset(offset);
+
+                    dynasm!(self.asm
+                        ; mov Rd(new_reg.rq().unwrap()), [rsp + offset]
+                    );
+                }
+                ValueLocation::Cond(_) => self.copy_value(val, CCLoc::Reg(new_reg)),
+                ValueLocation::Immediate(_) => unreachable!(),
+            }
+
+            ValueLocation::Reg(new_reg)
+        };
+
+        self.free_value(val);
+
+        self.push(out);
+    }
+
+    pub fn i32_extend_s(&mut self) {
+        let val = self.pop();
+
+        self.free_value(val);
+        let new_reg = self.take_reg(I64).unwrap();
+
+        let out = if let ValueLocation::Immediate(imm) = val {
+            self.block_state.regs.release(new_reg);
+            ValueLocation::Immediate((imm.as_i32().unwrap() as i64).into())
+        } else {
+            match val {
+                ValueLocation::Reg(GPR::Rx(rxreg)) => {
+                    dynasm!(self.asm
+                        ; movd Rd(new_reg.rq().unwrap()), Rx(rxreg)
+                        ; movsxd Rq(new_reg.rq().unwrap()), Rd(new_reg.rq().unwrap())
+                    );
+                }
+                ValueLocation::Reg(GPR::Rq(rqreg)) => {
+                    dynasm!(self.asm
+                        ; movsxd Rq(new_reg.rq().unwrap()), Rd(rqreg)
+                    );
+                }
+                ValueLocation::Stack(offset) => {
+                    let offset = self.adjusted_offset(offset);
+
+                    dynasm!(self.asm
+                        ; movsxd Rq(new_reg.rq().unwrap()), DWORD [rsp + offset]
+                    );
+                }
+                _ => unreachable!(),
+            }
+
+            ValueLocation::Reg(new_reg)
+        };
+
+        self.push(out);
+    }
+
+    unop!(i32_popcnt, popcnt, Rd, u32, u32::count_ones);
+    conversion!(
+        f64_from_f32,
+        cvtss2sd,
+        Rx,
+        rx,
+        Rx,
+        rx,
+        f32,
+        f64,
+        as_f32,
+        |a: Ieee32| Ieee64::from_bits((f32::from_bits(a.to_bits()) as f64).to_bits())
+    );
+    conversion!(
+        f32_from_f64,
+        cvtsd2ss,
+        Rx,
+        rx,
+        Rx,
+        rx,
+        f64,
+        f32,
+        as_f64,
+        |a: Ieee64| Ieee32::from_bits((f64::from_bits(a.to_bits()) as f32).to_bits())
+    );
+    pub fn i32_truncate_f32_s(&mut self) {
+        let mut val = self.pop();
+
+        let out_val = match val {
+            ValueLocation::Immediate(imm) => ValueLocation::Immediate(
+                (f32::from_bits(imm.as_f32().unwrap().to_bits()) as i32).into(),
+            ),
+            _ => {
+                let reg = self.into_reg(F32, &mut val).unwrap();
+                let temp = self.take_reg(I32).unwrap();
+
+                let sign_mask = self.aligned_label(4, LabelValue::I32(SIGN_MASK_F32 as i32));
+                let float_cmp_mask = self.aligned_label(16, LabelValue::I32(0xcf000000u32 as i32));
+                let zero = self.aligned_label(16, LabelValue::I32(0));
+                let trap_label = self.trap_label();
+
+                dynasm!(self.asm
+                    ; cvttss2si Rd(temp.rq().unwrap()), Rx(reg.rx().unwrap())
+                    ; cmp Rd(temp.rq().unwrap()), [=>sign_mask.0]
+                    ; jne >ret
+                    ; ucomiss Rx(reg.rx().unwrap()), Rx(reg.rx().unwrap())
+                    ; jp =>trap_label.0
+                    ; ucomiss Rx(reg.rx().unwrap()), [=>float_cmp_mask.0]
+                    ; jnae =>trap_label.0
+                    ; ucomiss Rx(reg.rx().unwrap()), [=>zero.0]
+                    ; jnb =>trap_label.0
+                ; ret:
+                );
+
+                ValueLocation::Reg(temp)
+            }
+        };
+
+        self.free_value(val);
+
+        self.push(out_val);
+    }
+
+    pub fn i32_truncate_f32_u(&mut self) {
+        let mut val = self.pop();
+
+        let out_val = match val {
+            ValueLocation::Immediate(imm) => ValueLocation::Immediate(
+                (f32::from_bits(imm.as_f32().unwrap().to_bits()) as i32).into(),
+            ),
+            _ => {
+                let reg = self.into_temp_reg(F32, &mut val).unwrap();
+                let temp = self.take_reg(I32).unwrap();
+
+                let sign_mask = self.aligned_label(4, LabelValue::I32(SIGN_MASK_F32 as i32));
+                let float_cmp_mask = self.aligned_label(16, LabelValue::I32(0x4f000000u32 as i32));
+                let trap_label = self.trap_label();
+
+                dynasm!(self.asm
+                    ; ucomiss Rx(reg.rx().unwrap()), [=>float_cmp_mask.0]
+                    ; jae >else_
+                    ; jp =>trap_label.0
+                    ; cvttss2si Rd(temp.rq().unwrap()), Rx(reg.rx().unwrap())
+                    ; test Rd(temp.rq().unwrap()), Rd(temp.rq().unwrap())
+                    ; js =>trap_label.0
+                    ; jmp >ret
+                ; else_:
+                    ; subss Rx(reg.rx().unwrap()), [=>float_cmp_mask.0]
+                    ; cvttss2si Rd(temp.rq().unwrap()), Rx(reg.rx().unwrap())
+                    ; test Rd(temp.rq().unwrap()), Rd(temp.rq().unwrap())
+                    ; js =>trap_label.0
+                    ; add Rq(temp.rq().unwrap()), [=>sign_mask.0]
+                ; ret:
+                );
+
+                ValueLocation::Reg(temp)
+            }
+        };
+
+        self.free_value(val);
+
+        self.push(out_val);
+    }
+
+    pub fn i32_truncate_f64_s(&mut self) {
+        let mut val = self.pop();
+
+        let out_val = match val {
+            ValueLocation::Immediate(imm) => ValueLocation::Immediate(
+                (f64::from_bits(imm.as_f64().unwrap().to_bits()) as i32).into(),
+            ),
+            _ => {
+                let reg = self.into_reg(F32, &mut val).unwrap();
+                let temp = self.take_reg(I32).unwrap();
+
+                let sign_mask = self.aligned_label(4, LabelValue::I32(SIGN_MASK_F32 as i32));
+                let float_cmp_mask =
+                    self.aligned_label(16, LabelValue::I64(0xc1e0000000200000u64 as i64));
+                let zero = self.aligned_label(16, LabelValue::I64(0));
+                let trap_label = self.trap_label();
+
+                dynasm!(self.asm
+                    ; cvttsd2si Rd(temp.rq().unwrap()), Rx(reg.rx().unwrap())
+                    ; cmp Rd(temp.rq().unwrap()), [=>sign_mask.0]
+                    ; jne >ret
+                    ; ucomisd Rx(reg.rx().unwrap()), Rx(reg.rx().unwrap())
+                    ; jp =>trap_label.0
+                    ; ucomisd Rx(reg.rx().unwrap()), [=>float_cmp_mask.0]
+                    ; jna =>trap_label.0
+                    ; ucomisd Rx(reg.rx().unwrap()), [=>zero.0]
+                    ; jnb =>trap_label.0
+                ; ret:
+                );
+
+                ValueLocation::Reg(temp)
+            }
+        };
+
+        self.free_value(val);
+
+        self.push(out_val);
+    }
+
+    pub fn i32_truncate_f64_u(&mut self) {
+        let mut val = self.pop();
+
+        let out_val = match val {
+            ValueLocation::Immediate(imm) => ValueLocation::Immediate(
+                (f64::from_bits(imm.as_f64().unwrap().to_bits()) as u32).into(),
+            ),
+            _ => {
+                let reg = self.into_temp_reg(F32, &mut val).unwrap();
+                let temp = self.take_reg(I32).unwrap();
+
+                let sign_mask = self.aligned_label(4, LabelValue::I32(SIGN_MASK_F32 as i32));
+                let float_cmp_mask =
+                    self.aligned_label(16, LabelValue::I64(0x41e0000000000000u64 as i64));
+                let trap_label = self.trap_label();
+
+                dynasm!(self.asm
+                    ; ucomisd Rx(reg.rx().unwrap()), [=>float_cmp_mask.0]
+                    ; jae >else_
+                    ; jp =>trap_label.0
+                    ; cvttsd2si Rd(temp.rq().unwrap()), Rx(reg.rx().unwrap())
+                    ; test Rd(temp.rq().unwrap()), Rd(temp.rq().unwrap())
+                    ; js =>trap_label.0
+                    ; jmp >ret
+                ; else_:
+                    ; subsd Rx(reg.rx().unwrap()), [=>float_cmp_mask.0]
+                    ; cvttsd2si Rd(temp.rq().unwrap()), Rx(reg.rx().unwrap())
+                    ; test Rd(temp.rq().unwrap()), Rd(temp.rq().unwrap())
+                    ; js =>trap_label.0
+                    ; add Rq(temp.rq().unwrap()), [=>sign_mask.0]
+                ; ret:
+                );
+
+                ValueLocation::Reg(temp)
+            }
+        };
+
+        self.free_value(val);
+
+        self.push(out_val);
+    }
+
+    conversion!(
+        f32_convert_from_i32_s,
+        cvtsi2ss,
+        Rd,
+        rq,
+        Rx,
+        rx,
+        i32,
+        f32,
+        as_i32,
+        |a| Ieee32::from_bits((a as f32).to_bits())
+    );
+    conversion!(
+        f64_convert_from_i32_s,
+        cvtsi2sd,
+        Rd,
+        rq,
+        Rx,
+        rx,
+        i32,
+        f64,
+        as_i32,
+        |a| Ieee64::from_bits((a as f64).to_bits())
+    );
+    conversion!(
+        f32_convert_from_i64_s,
+        cvtsi2ss,
+        Rq,
+        rq,
+        Rx,
+        rx,
+        i64,
+        f32,
+        as_i64,
+        |a| Ieee32::from_bits((a as f32).to_bits())
+    );
+    conversion!(
+        f64_convert_from_i64_s,
+        cvtsi2sd,
+        Rq,
+        rq,
+        Rx,
+        rx,
+        i64,
+        f64,
+        as_i64,
+        |a| Ieee64::from_bits((a as f64).to_bits())
+    );
+
+    pub fn i64_truncate_f32_s(&mut self) {
+        let mut val = self.pop();
+
+        let out_val = match val {
+            ValueLocation::Immediate(imm) => ValueLocation::Immediate(
+                (f32::from_bits(imm.as_f32().unwrap().to_bits()) as i64).into(),
+            ),
+            _ => {
+                let reg = self.into_temp_reg(F32, &mut val).unwrap();
+                let temp = self.take_reg(I32).unwrap();
+
+                let sign_mask = self.aligned_label(16, LabelValue::I64(SIGN_MASK_F64 as i64));
+                let float_cmp_mask = self.aligned_label(16, LabelValue::I32(0xdf000000u32 as i32));
+                let zero = self.aligned_label(16, LabelValue::I64(0));
+                let trap_label = self.trap_label();
+
+                dynasm!(self.asm
+                    ; cvttss2si Rq(temp.rq().unwrap()), Rx(reg.rx().unwrap())
+                    ; cmp Rq(temp.rq().unwrap()), [=>sign_mask.0]
+                    ; jne >ret
+                    ; ucomiss Rx(reg.rx().unwrap()), Rx(reg.rx().unwrap())
+                    ; jp =>trap_label.0
+                    ; ucomiss Rx(reg.rx().unwrap()), [=>float_cmp_mask.0]
+                    ; jnae =>trap_label.0
+                    ; ucomiss Rx(reg.rx().unwrap()), [=>zero.0]
+                    ; jnb =>trap_label.0
+                ; ret:
+                );
+
+                ValueLocation::Reg(temp)
+            }
+        };
+
+        self.free_value(val);
+
+        self.push(out_val);
+    }
+
+    pub fn i64_truncate_f64_s(&mut self) {
+        let mut val = self.pop();
+
+        let out_val = match val {
+            ValueLocation::Immediate(imm) => ValueLocation::Immediate(
+                (f64::from_bits(imm.as_f64().unwrap().to_bits()) as i64).into(),
+            ),
+            _ => {
+                let reg = self.into_reg(F32, &mut val).unwrap();
+                let temp = self.take_reg(I32).unwrap();
+
+                let sign_mask = self.aligned_label(8, LabelValue::I64(SIGN_MASK_F64 as i64));
+                let float_cmp_mask =
+                    self.aligned_label(16, LabelValue::I64(0xc3e0000000000000u64 as i64));
+                let zero = self.aligned_label(16, LabelValue::I64(0));
+                let trap_label = self.trap_label();
+
+                dynasm!(self.asm
+                    ; cvttsd2si Rq(temp.rq().unwrap()), Rx(reg.rx().unwrap())
+                    ; cmp Rq(temp.rq().unwrap()), [=>sign_mask.0]
+                    ; jne >ret
+                    ; ucomisd Rx(reg.rx().unwrap()), Rx(reg.rx().unwrap())
+                    ; jp =>trap_label.0
+                    ; ucomisd Rx(reg.rx().unwrap()), [=>float_cmp_mask.0]
+                    ; jnae =>trap_label.0
+                    ; ucomisd Rx(reg.rx().unwrap()), [=>zero.0]
+                    ; jnb =>trap_label.0
+                ; ret:
+                );
+
+                ValueLocation::Reg(temp)
+            }
+        };
+
+        self.free_value(val);
+
+        self.push(out_val);
+    }
+
+    pub fn i64_truncate_f32_u(&mut self) {
+        let mut val = self.pop();
+
+        let out_val = match val {
+            ValueLocation::Immediate(imm) => ValueLocation::Immediate(
+                (f32::from_bits(imm.as_f32().unwrap().to_bits()) as u64).into(),
+            ),
+            _ => {
+                let reg = self.into_reg(F32, &mut val).unwrap();
+
+                let temp = self.take_reg(I64).unwrap();
+                let sign_mask = self.aligned_label(16, LabelValue::I64(SIGN_MASK_F64 as i64));
+                let u64_trunc_f32_const = self.aligned_label(16, LabelValue::I32(0x5F000000));
+                let trap_label = self.trap_label();
+
+                dynasm!(self.asm
+                    ; comiss Rx(reg.rx().unwrap()), [=>u64_trunc_f32_const.0]
+                    ; jae >large
+                    ; jp =>trap_label.0
+                    ; cvttss2si Rq(temp.rq().unwrap()), Rx(reg.rx().unwrap())
+                    ; test Rq(temp.rq().unwrap()), Rq(temp.rq().unwrap())
+                    ; js =>trap_label.0
+                    ; jmp >cont
+                ; large:
+                    ; subss Rx(reg.rx().unwrap()), [=>u64_trunc_f32_const.0]
+                    ; cvttss2si Rq(temp.rq().unwrap()), Rx(reg.rx().unwrap())
+                    ; test Rq(temp.rq().unwrap()), Rq(temp.rq().unwrap())
+                    ; js =>trap_label.0
+                    ; add Rq(temp.rq().unwrap()), [=>sign_mask.0]
+                ; cont:
+                );
+
+                ValueLocation::Reg(temp)
+            }
+        };
+
+        self.free_value(val);
+
+        self.push(out_val);
+    }
+
+    pub fn i64_truncate_f64_u(&mut self) {
+        let mut val = self.pop();
+
+        let out_val = match val {
+            ValueLocation::Immediate(imm) => ValueLocation::Immediate(
+                (f64::from_bits(imm.as_f64().unwrap().to_bits()) as u64).into(),
+            ),
+            _ => {
+                let reg = self.into_reg(F64, &mut val).unwrap();
+                let temp = self.take_reg(I64).unwrap();
+
+                let sign_mask = self.aligned_label(16, LabelValue::I64(SIGN_MASK_F64 as i64));
+                let u64_trunc_f64_const =
+                    self.aligned_label(16, LabelValue::I64(0x43e0000000000000));
+                let trap_label = self.trap_label();
+
+                dynasm!(self.asm
+                    ; comisd Rx(reg.rx().unwrap()), [=>u64_trunc_f64_const.0]
+                    ; jnb >large
+                    ; jp =>trap_label.0
+                    ; cvttsd2si Rq(temp.rq().unwrap()), Rx(reg.rx().unwrap())
+                    ; cmp Rq(temp.rq().unwrap()), 0
+                    ; jge >cont
+                    ; jmp =>trap_label.0
+                ; large:
+                    ; subsd Rx(reg.rx().unwrap()), [=>u64_trunc_f64_const.0]
+                    ; cvttsd2si Rq(temp.rq().unwrap()), Rx(reg.rx().unwrap())
+                    ; cmp Rq(temp.rq().unwrap()), 0
+                    ; jnge =>trap_label.0
+                    ; add Rq(temp.rq().unwrap()), [=>sign_mask.0]
+                ; cont:
+                );
+
+                ValueLocation::Reg(temp)
+            }
+        };
+
+        self.free_value(val);
+
+        self.push(out_val);
+    }
+
+    pub fn f32_convert_from_i32_u(&mut self) {
+        let mut val = self.pop();
+
+        let out_val = match val {
+            ValueLocation::Immediate(imm) => ValueLocation::Immediate(
+                Ieee32::from_bits((imm.as_i32().unwrap() as u32 as f32).to_bits()).into(),
+            ),
+            _ => {
+                let reg = self.into_reg(I32, &mut val).unwrap();
+
+                let temp = self.take_reg(F32).unwrap();
+
+                dynasm!(self.asm
+                    ; mov Rd(reg.rq().unwrap()), Rd(reg.rq().unwrap())
+                    ; cvtsi2ss Rx(temp.rx().unwrap()), Rq(reg.rq().unwrap())
+                );
+
+                ValueLocation::Reg(temp)
+            }
+        };
+
+        self.free_value(val);
+
+        self.push(out_val);
+    }
+
+    pub fn f64_convert_from_i32_u(&mut self) {
+        let mut val = self.pop();
+
+        let out_val = match val {
+            ValueLocation::Immediate(imm) => ValueLocation::Immediate(
+                Ieee64::from_bits((imm.as_i32().unwrap() as u32 as f64).to_bits()).into(),
+            ),
+            _ => {
+                let reg = self.into_reg(I32, &mut val).unwrap();
+                let temp = self.take_reg(F64).unwrap();
+
+                dynasm!(self.asm
+                    ; mov Rd(reg.rq().unwrap()), Rd(reg.rq().unwrap())
+                    ; cvtsi2sd Rx(temp.rx().unwrap()), Rq(reg.rq().unwrap())
+                );
+
+                ValueLocation::Reg(temp)
+            }
+        };
+
+        self.free_value(val);
+
+        self.push(out_val);
+    }
+
+    pub fn f32_convert_from_i64_u(&mut self) {
+        let mut val = self.pop();
+
+        let out_val = match val {
+            ValueLocation::Immediate(imm) => ValueLocation::Immediate(
+                Ieee32::from_bits((imm.as_i64().unwrap() as u64 as f32).to_bits()).into(),
+            ),
+            _ => {
+                let reg = self.into_reg(I64, &mut val).unwrap();
+                let out = self.take_reg(F32).unwrap();
+                let temp = self.take_reg(I64).unwrap();
+
+                dynasm!(self.asm
+                    ; test Rq(reg.rq().unwrap()), Rq(reg.rq().unwrap())
+                    ; js >negative
+                    ; cvtsi2ss Rx(out.rx().unwrap()), Rq(reg.rq().unwrap())
+                    ; jmp >ret
+                ; negative:
+                    ; mov Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap())
+                    ; shr Rq(temp.rq().unwrap()), 1
+                    ; and Rq(reg.rq().unwrap()), 1
+                    ; or Rq(reg.rq().unwrap()), Rq(temp.rq().unwrap())
+                    ; cvtsi2ss Rx(out.rx().unwrap()), Rq(reg.rq().unwrap())
+                    ; addss Rx(out.rx().unwrap()), Rx(out.rx().unwrap())
+                ; ret:
+                );
+
+                self.free_value(ValueLocation::Reg(temp));
+
+                ValueLocation::Reg(out)
+            }
+        };
+
+        self.free_value(val);
+
+        self.push(out_val);
+    }
+
+    pub fn f64_convert_from_i64_u(&mut self) {
+        let mut val = self.pop();
+
+        let out_val = match val {
+            ValueLocation::Immediate(imm) => ValueLocation::Immediate(
+                Ieee64::from_bits((imm.as_i64().unwrap() as u64 as f64).to_bits()).into(),
+            ),
+            _ => {
+                let reg = self.into_reg(I64, &mut val).unwrap();
+
+                let out = self.take_reg(F32).unwrap();
+                let temp = self.take_reg(I64).unwrap();
+
+                dynasm!(self.asm
+                    ; test Rq(reg.rq().unwrap()), Rq(reg.rq().unwrap())
+                    ; js >negative
+                    ; cvtsi2sd Rx(out.rx().unwrap()), Rq(reg.rq().unwrap())
+                    ; jmp >ret
+                ; negative:
+                    ; mov Rq(temp.rq().unwrap()), Rq(reg.rq().unwrap())
+                    ; shr Rq(temp.rq().unwrap()), 1
+                    ; and Rq(reg.rq().unwrap()), 1
+                    ; or Rq(reg.rq().unwrap()), Rq(temp.rq().unwrap())
+                    ; cvtsi2sd Rx(out.rx().unwrap()), Rq(reg.rq().unwrap())
+                    ; addsd Rx(out.rx().unwrap()), Rx(out.rx().unwrap())
+                ; ret:
+                );
+
+                self.free_value(ValueLocation::Reg(temp));
+
+                ValueLocation::Reg(out)
+            }
+        };
+
+        self.free_value(val);
+
+        self.push(out_val);
+    }
+
+    pub fn i32_wrap_from_i64(&mut self) {
+        let val = self.pop();
+
+        let out = match val {
+            ValueLocation::Immediate(imm) => {
+                ValueLocation::Immediate((imm.as_i64().unwrap() as u64 as u32).into())
+            }
+            val => val,
+        };
+
+        self.push(out);
+    }
+
+    pub fn i32_reinterpret_from_f32(&mut self) {
+        let val = self.pop();
+
+        let out = match val {
+            ValueLocation::Immediate(imm) => {
+                ValueLocation::Immediate(imm.as_f32().unwrap().to_bits().into())
+            }
+            val => val,
+        };
+
+        self.push(out);
+    }
+
+    pub fn i64_reinterpret_from_f64(&mut self) {
+        let val = self.pop();
+
+        let out = match val {
+            ValueLocation::Immediate(imm) => {
+                ValueLocation::Immediate(imm.as_f64().unwrap().to_bits().into())
+            }
+            val => val,
+        };
+
+        self.push(out);
+    }
+
+    pub fn f32_reinterpret_from_i32(&mut self) {
+        let val = self.pop();
+
+        let out = match val {
+            ValueLocation::Immediate(imm) => {
+                ValueLocation::Immediate(Ieee32::from_bits(imm.as_i32().unwrap() as _).into())
+            }
+            val => val,
+        };
+
+        self.push(out);
+    }
+
+    pub fn f64_reinterpret_from_i64(&mut self) {
+        let val = self.pop();
+
+        let out = match val {
+            ValueLocation::Immediate(imm) => {
+                ValueLocation::Immediate(Ieee64::from_bits(imm.as_i64().unwrap() as _).into())
+            }
+            val => val,
+        };
+
+        self.push(out);
+    }
+
+    unop!(i64_popcnt, popcnt, Rq, u64, |a: u64| a.count_ones() as u64);
+
+    // TODO: Use `lea` when the LHS operand isn't a temporary but both of the operands
+    //       are in registers.
+    commutative_binop_i32!(i32_add, add, i32::wrapping_add);
+    commutative_binop_i32!(i32_and, and, |a, b| a & b);
+    commutative_binop_i32!(i32_or, or, |a, b| a | b);
+    commutative_binop_i32!(i32_xor, xor, |a, b| a ^ b);
+    binop_i32!(i32_sub, sub, i32::wrapping_sub);
+
+    commutative_binop_i64!(i64_add, add, i64::wrapping_add);
+    commutative_binop_i64!(i64_and, and, |a, b| a & b);
+    commutative_binop_i64!(i64_or, or, |a, b| a | b);
+    commutative_binop_i64!(i64_xor, xor, |a, b| a ^ b);
+    binop_i64!(i64_sub, sub, i64::wrapping_sub);
+
+    commutative_binop_f32!(f32_add, addss, |a, b| a + b);
+    commutative_binop_f32!(f32_mul, mulss, |a, b| a * b);
+    minmax_float!(
+        f32_min,
+        minss,
+        ucomiss,
+        addss,
+        orps,
+        as_f32,
+        |a: Ieee32, b: Ieee32| Ieee32::from_bits(
+            f32::from_bits(a.to_bits())
+                .min(f32::from_bits(b.to_bits()))
+                .to_bits()
+        )
+    );
+    minmax_float!(
+        f32_max,
+        maxss,
+        ucomiss,
+        addss,
+        andps,
+        as_f32,
+        |a: Ieee32, b: Ieee32| Ieee32::from_bits(
+            f32::from_bits(a.to_bits())
+                .max(f32::from_bits(b.to_bits()))
+                .to_bits()
+        )
+    );
+    binop_f32!(f32_sub, subss, |a, b| a - b);
+    binop_f32!(f32_div, divss, |a, b| a / b);
+
+    pub fn f32_ceil(&mut self) {
+        self.relocated_function_call(
+            &ir::ExternalName::LibCall(ir::LibCall::CeilF32),
+            iter::once(F32),
+            iter::once(F32),
+            true,
+        );
+    }
+
+    pub fn f32_floor(&mut self) {
+        self.relocated_function_call(
+            &ir::ExternalName::LibCall(ir::LibCall::FloorF32),
+            iter::once(F32),
+            iter::once(F32),
+            true,
+        );
+    }
+
+    pub fn f32_nearest(&mut self) {
+        self.relocated_function_call(
+            &ir::ExternalName::LibCall(ir::LibCall::NearestF32),
+            iter::once(F32),
+            iter::once(F32),
+            true,
+        );
+    }
+
+    pub fn f32_trunc(&mut self) {
+        self.relocated_function_call(
+            &ir::ExternalName::LibCall(ir::LibCall::TruncF32),
+            iter::once(F32),
+            iter::once(F32),
+            true,
+        );
+    }
+
+    commutative_binop_f64!(f64_add, addsd, |a, b| a + b);
+    commutative_binop_f64!(f64_mul, mulsd, |a, b| a * b);
+    minmax_float!(
+        f64_min,
+        minsd,
+        ucomisd,
+        addsd,
+        orpd,
+        as_f64,
+        |a: Ieee64, b: Ieee64| Ieee64::from_bits(
+            f64::from_bits(a.to_bits())
+                .min(f64::from_bits(b.to_bits()))
+                .to_bits()
+        )
+    );
+    minmax_float!(
+        f64_max,
+        maxsd,
+        ucomisd,
+        addsd,
+        andpd,
+        as_f64,
+        |a: Ieee64, b: Ieee64| Ieee64::from_bits(
+            f64::from_bits(a.to_bits())
+                .max(f64::from_bits(b.to_bits()))
+                .to_bits()
+        )
+    );
+    binop_f64!(f64_sub, subsd, |a, b| a - b);
+    binop_f64!(f64_div, divsd, |a, b| a / b);
+
+    pub fn f64_ceil(&mut self) {
+        self.relocated_function_call(
+            &ir::ExternalName::LibCall(ir::LibCall::CeilF64),
+            iter::once(F64),
+            iter::once(F64),
+            true,
+        );
+    }
+
+    pub fn f64_floor(&mut self) {
+        self.relocated_function_call(
+            &ir::ExternalName::LibCall(ir::LibCall::FloorF64),
+            iter::once(F64),
+            iter::once(F64),
+            true,
+        );
+    }
+
+    pub fn f64_nearest(&mut self) {
+        self.relocated_function_call(
+            &ir::ExternalName::LibCall(ir::LibCall::NearestF64),
+            iter::once(F64),
+            iter::once(F64),
+            true,
+        );
+    }
+
+    pub fn f64_trunc(&mut self) {
+        self.relocated_function_call(
+            &ir::ExternalName::LibCall(ir::LibCall::TruncF64),
+            iter::once(F64),
+            iter::once(F64),
+            true,
+        );
+    }
+
+    shift!(
+        i32_shl,
+        Rd,
+        shl,
+        |a, b| (a as i32).wrapping_shl(b as _),
+        I32
+    );
+    shift!(
+        i32_shr_s,
+        Rd,
+        sar,
+        |a, b| (a as i32).wrapping_shr(b as _),
+        I32
+    );
+    shift!(
+        i32_shr_u,
+        Rd,
+        shr,
+        |a, b| (a as u32).wrapping_shr(b as _),
+        I32
+    );
+    shift!(
+        i32_rotl,
+        Rd,
+        rol,
+        |a, b| (a as u32).rotate_left(b as _),
+        I32
+    );
+    shift!(
+        i32_rotr,
+        Rd,
+        ror,
+        |a, b| (a as u32).rotate_right(b as _),
+        I32
+    );
+
+    shift!(
+        i64_shl,
+        Rq,
+        shl,
+        |a, b| (a as i64).wrapping_shl(b as _),
+        I64
+    );
+    shift!(
+        i64_shr_s,
+        Rq,
+        sar,
+        |a, b| (a as i64).wrapping_shr(b as _),
+        I64
+    );
+    shift!(
+        i64_shr_u,
+        Rq,
+        shr,
+        |a, b| (a as u64).wrapping_shr(b as _),
+        I64
+    );
+    shift!(
+        i64_rotl,
+        Rq,
+        rol,
+        |a, b| (a as u64).rotate_left(b as _),
+        I64
+    );
+    shift!(
+        i64_rotr,
+        Rq,
+        ror,
+        |a, b| (a as u64).rotate_right(b as _),
+        I64
+    );
+
+    // TODO: Do this without emitting `mov`
+    fn cleanup_gprs(&mut self, gprs: impl Iterator<Item = GPR>) {
+        for gpr in gprs {
+            dynasm!(self.asm
+                ; pop Rq(gpr.rq().unwrap())
+            );
+            self.block_state.depth.free(1);
+            // DON'T MARK IT USED HERE! See comment in `full_div`
+        }
+    }
+
+    int_div!(
+        i32_full_div_s,
+        i32_full_div_u,
+        i32_div_u,
+        i32_div_s,
+        i32_rem_u,
+        i32_rem_s,
+        imm_i32,
+        i32,
+        u32,
+        Rd,
+        DWORD
+    );
+    int_div!(
+        i64_full_div_s,
+        i64_full_div_u,
+        i64_div_u,
+        i64_div_s,
+        i64_rem_u,
+        i64_rem_s,
+        imm_i64,
+        i64,
+        u64,
+        Rq,
+        QWORD
+    );
+
+    // TODO: With a proper SSE-like "Value" system we could do this way better (we wouldn't have
+    //       to move `RAX`/`RDX` back afterwards).
+    fn full_div(
+        &mut self,
+        mut divisor: ValueLocation,
+        dividend: ValueLocation,
+        do_div: impl FnOnce(&mut Self, &mut ValueLocation),
+    ) -> (
+        ValueLocation,
+        ValueLocation,
+        impl Iterator<Item = GPR> + Clone + 'this,
+    ) {
+        // To stop `take_reg` from allocating either of these necessary registers
+        self.block_state.regs.mark_used(RAX);
+        self.block_state.regs.mark_used(RDX);
+        if divisor == ValueLocation::Reg(RAX) || divisor == ValueLocation::Reg(RDX) {
+            let new_reg = self.take_reg(GPRType::Rq).unwrap();
+            self.copy_value(divisor, CCLoc::Reg(new_reg));
+            self.free_value(divisor);
+
+            divisor = ValueLocation::Reg(new_reg);
+        }
+        self.block_state.regs.release(RAX);
+        self.block_state.regs.release(RDX);
+
+        let saved_rax = if self.block_state.regs.is_free(RAX) {
+            None
+        } else {
+            dynasm!(self.asm
+                ; push rax
+            );
+            self.block_state.depth.reserve(1);
+            // DON'T FREE THIS REGISTER HERE - since we don't
+            // remove it from the stack freeing the register
+            // here will cause `take_reg` to allocate it.
+            Some(())
+        };
+
+        let saved_rdx = if self.block_state.regs.is_free(RDX) {
+            None
+        } else {
+            dynasm!(self.asm
+                ; push rdx
+            );
+            self.block_state.depth.reserve(1);
+            // DON'T FREE THIS REGISTER HERE - since we don't
+            // remove it from the stack freeing the register
+            // here will cause `take_reg` to allocate it.
+            Some(())
+        };
+
+        let saved = saved_rdx
+            .map(|_| RDX)
+            .into_iter()
+            .chain(saved_rax.map(|_| RAX));
+
+        self.copy_value(dividend, CCLoc::Reg(RAX));
+        self.block_state.regs.mark_used(RAX);
+
+        self.free_value(dividend);
+        // To stop `take_reg` from allocating either of these necessary registers
+        self.block_state.regs.mark_used(RDX);
+
+        do_div(self, &mut divisor);
+        self.free_value(divisor);
+
+        assert!(!self.block_state.regs.is_free(RAX));
+        assert!(!self.block_state.regs.is_free(RDX));
+
+        (ValueLocation::Reg(RAX), ValueLocation::Reg(RDX), saved)
+    }
+
+    fn i32_full_div_u(
+        &mut self,
+        divisor: ValueLocation,
+        dividend: ValueLocation,
+    ) -> (
+        ValueLocation,
+        ValueLocation,
+        impl Iterator<Item = GPR> + Clone + 'this,
+    ) {
+        self.full_div(divisor, dividend, |this, divisor| match divisor {
+            ValueLocation::Stack(offset) => {
+                let offset = this.adjusted_offset(*offset);
+                dynasm!(this.asm
+                    ; xor edx, edx
+                    ; div DWORD [rsp + offset]
+                );
+            }
+            ValueLocation::Immediate(_) | ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
+                let r = this.into_reg(I32, divisor).unwrap();
+                dynasm!(this.asm
+                    ; xor edx, edx
+                    ; div Rd(r.rq().unwrap())
+                );
+            }
+        })
+    }
+
+    fn i32_full_div_s(
+        &mut self,
+        divisor: ValueLocation,
+        dividend: ValueLocation,
+    ) -> (
+        ValueLocation,
+        ValueLocation,
+        impl Iterator<Item = GPR> + Clone + 'this,
+    ) {
+        self.full_div(divisor, dividend, |this, divisor| match divisor {
+            ValueLocation::Stack(offset) => {
+                let offset = this.adjusted_offset(*offset);
+                dynasm!(this.asm
+                    ; cdq
+                    ; idiv DWORD [rsp + offset]
+                );
+            }
+            ValueLocation::Immediate(_) | ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
+                let r = this.into_reg(I32, divisor).unwrap();
+                dynasm!(this.asm
+                    ; cdq
+                    ; idiv Rd(r.rq().unwrap())
+                );
+            }
+        })
+    }
+
+    fn i64_full_div_u(
+        &mut self,
+        divisor: ValueLocation,
+        dividend: ValueLocation,
+    ) -> (
+        ValueLocation,
+        ValueLocation,
+        impl Iterator<Item = GPR> + Clone + 'this,
+    ) {
+        self.full_div(divisor, dividend, |this, divisor| match divisor {
+            ValueLocation::Stack(offset) => {
+                let offset = this.adjusted_offset(*offset);
+                dynasm!(this.asm
+                    ; xor rdx, rdx
+                    ; div QWORD [rsp + offset]
+                );
+            }
+            ValueLocation::Immediate(_) | ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
+                let r = this.into_reg(I64, divisor).unwrap();
+                dynasm!(this.asm
+                    ; xor rdx, rdx
+                    ; div Rq(r.rq().unwrap())
+                );
+            }
+        })
+    }
+
+    fn i64_full_div_s(
+        &mut self,
+        divisor: ValueLocation,
+        dividend: ValueLocation,
+    ) -> (
+        ValueLocation,
+        ValueLocation,
+        impl Iterator<Item = GPR> + Clone + 'this,
+    ) {
+        self.full_div(divisor, dividend, |this, divisor| match divisor {
+            ValueLocation::Stack(offset) => {
+                let offset = this.adjusted_offset(*offset);
+                dynasm!(this.asm
+                    ; cqo
+                    ; idiv QWORD [rsp + offset]
+                );
+            }
+            ValueLocation::Immediate(_) | ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
+                let r = this.into_reg(I64, divisor).unwrap();
+                dynasm!(this.asm
+                    ; cqo
+                    ; idiv Rq(r.rq().unwrap())
+                );
+            }
+        })
+    }
+
+    // `i32_mul` needs to be separate because the immediate form of the instruction
+    // has a different syntax to the immediate form of the other instructions.
+    pub fn i32_mul(&mut self) {
+        let right = self.pop();
+        let left = self.pop();
+
+        if let Some(right) = right.immediate() {
+            if let Some(left) = left.immediate() {
+                self.push(ValueLocation::Immediate(
+                    i32::wrapping_mul(right.as_i32().unwrap(), left.as_i32().unwrap()).into(),
+                ));
+                return;
+            }
+        }
+
+        let (mut left, mut right) = match left {
+            ValueLocation::Reg(_) => (left, right),
+            _ => {
+                if right.immediate().is_some() {
+                    (left, right)
+                } else {
+                    (right, left)
+                }
+            }
+        };
+
+        let out = match right {
+            ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
+                let rreg = self.into_reg(I32, &mut right).unwrap();
+                let lreg = self.into_temp_reg(I32, &mut left).unwrap();
+                dynasm!(self.asm
+                    ; imul Rd(lreg.rq().unwrap()), Rd(rreg.rq().unwrap())
+                );
+                left
+            }
+            ValueLocation::Stack(offset) => {
+                let offset = self.adjusted_offset(offset);
+
+                let lreg = self.into_temp_reg(I32, &mut left).unwrap();
+                dynasm!(self.asm
+                    ; imul Rd(lreg.rq().unwrap()), [rsp + offset]
+                );
+                left
+            }
+            ValueLocation::Immediate(i) => {
+                let lreg = self.into_reg(I32, &mut left).unwrap();
+                let new_reg = self.take_reg(I32).unwrap();
+                dynasm!(self.asm
+                    ; imul Rd(new_reg.rq().unwrap()), Rd(lreg.rq().unwrap()), i.as_i32().unwrap()
+                );
+                self.free_value(left);
+                ValueLocation::Reg(new_reg)
+            }
+        };
+
+        self.push(out);
+        self.free_value(right);
+    }
+
+    // `i64_mul` needs to be separate because the immediate form of the instruction
+    // has a different syntax to the immediate form of the other instructions.
+    pub fn i64_mul(&mut self) {
+        let right = self.pop();
+        let left = self.pop();
+
+        if let Some(right) = right.immediate() {
+            if let Some(left) = left.immediate() {
+                self.push(ValueLocation::Immediate(
+                    i64::wrapping_mul(right.as_i64().unwrap(), left.as_i64().unwrap()).into(),
+                ));
+                return;
+            }
+        }
+
+        let (mut left, mut right) = match left {
+            ValueLocation::Reg(_) => (left, right),
+            _ => {
+                if right.immediate().is_some() {
+                    (left, right)
+                } else {
+                    (right, left)
+                }
+            }
+        };
+
+        let out = match right {
+            ValueLocation::Reg(_) | ValueLocation::Cond(_) => {
+                let rreg = self.into_reg(I64, &mut right).unwrap();
+                let lreg = self.into_temp_reg(I64, &mut left).unwrap();
+                dynasm!(self.asm
+                    ; imul Rq(lreg.rq().unwrap()), Rq(rreg.rq().unwrap())
+                );
+                left
+            }
+            ValueLocation::Stack(offset) => {
+                let offset = self.adjusted_offset(offset);
+
+                let lreg = self.into_temp_reg(I64, &mut left).unwrap();
+                dynasm!(self.asm
+                    ; imul Rq(lreg.rq().unwrap()), [rsp + offset]
+                );
+                left
+            }
+            ValueLocation::Immediate(i) => {
+                let i = i.as_i64().unwrap();
+                if let Some(i) = i.try_into() {
+                    let new_reg = self.take_reg(I64).unwrap();
+                    let lreg = self.into_reg(I64, &mut left).unwrap();
+
+                    dynasm!(self.asm
+                        ; imul Rq(new_reg.rq().unwrap()), Rq(lreg.rq().unwrap()), i
+                    );
+
+                    self.free_value(left);
+
+                    ValueLocation::Reg(new_reg)
+                } else {
+                    let rreg = self.into_reg(I64, &mut right).unwrap();
+                    let lreg = self.into_temp_reg(I64, &mut left).unwrap();
+                    dynasm!(self.asm
+                        ; imul Rq(lreg.rq().unwrap()), Rq(rreg.rq().unwrap())
+                    );
+                    left
+                }
+            }
+        };
+
+        self.push(out);
+        self.free_value(right);
+    }
+
+    fn cmov(&mut self, cond_code: CondCode, dst: GPR, src: CCLoc) {
+        match src {
+            CCLoc::Reg(reg) => match cond_code {
+                cc::EQUAL => {
+                    dynasm!(self.asm
+                        ; cmove Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap())
+                    );
+                }
+                cc::NOT_EQUAL => {
+                    dynasm!(self.asm
+                        ; cmovne Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap())
+                    );
+                }
+                cc::GE_U => {
+                    dynasm!(self.asm
+                        ; cmovae Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap())
+                    );
+                }
+                cc::LT_U => {
+                    dynasm!(self.asm
+                        ; cmovb Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap())
+                    );
+                }
+                cc::GT_U => {
+                    dynasm!(self.asm
+                        ; cmova Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap())
+                    );
+                }
+                cc::LE_U => {
+                    dynasm!(self.asm
+                        ; cmovbe Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap())
+                    );
+                }
+                cc::GE_S => {
+                    dynasm!(self.asm
+                        ; cmovge Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap())
+                    );
+                }
+                cc::LT_S => {
+                    dynasm!(self.asm
+                        ; cmovl Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap())
+                    );
+                }
+                cc::GT_S => {
+                    dynasm!(self.asm
+                        ; cmovg Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap())
+                    );
+                }
+                cc::LE_S => {
+                    dynasm!(self.asm
+                        ; cmovle Rq(dst.rq().unwrap()), Rq(reg.rq().unwrap())
+                    );
+                }
+            },
+            CCLoc::Stack(offset) => {
+                let offset = self.adjusted_offset(offset);
+
+                match cond_code {
+                    cc::EQUAL => {
+                        dynasm!(self.asm
+                            ; cmove Rq(dst.rq().unwrap()), [rsp + offset]
+                        );
+                    }
+                    cc::NOT_EQUAL => {
+                        dynasm!(self.asm
+                            ; cmovne Rq(dst.rq().unwrap()), [rsp + offset]
+                        );
+                    }
+                    cc::GE_U => {
+                        dynasm!(self.asm
+                            ; cmovae Rq(dst.rq().unwrap()), [rsp + offset]
+                        );
+                    }
+                    cc::LT_U => {
+                        dynasm!(self.asm
+                            ; cmovb Rq(dst.rq().unwrap()), [rsp + offset]
+                        );
+                    }
+                    cc::GT_U => {
+                        dynasm!(self.asm
+                            ; cmova Rq(dst.rq().unwrap()), [rsp + offset]
+                        );
+                    }
+                    cc::LE_U => {
+                        dynasm!(self.asm
+                            ; cmovbe Rq(dst.rq().unwrap()), [rsp + offset]
+                        );
+                    }
+                    cc::GE_S => {
+                        dynasm!(self.asm
+                            ; cmovge Rq(dst.rq().unwrap()), [rsp + offset]
+                        );
+                    }
+                    cc::LT_S => {
+                        dynasm!(self.asm
+                            ; cmovl Rq(dst.rq().unwrap()), [rsp + offset]
+                        );
+                    }
+                    cc::GT_S => {
+                        dynasm!(self.asm
+                            ; cmovg Rq(dst.rq().unwrap()), [rsp + offset]
+                        );
+                    }
+                    cc::LE_S => {
+                        dynasm!(self.asm
+                            ; cmovle Rq(dst.rq().unwrap()), [rsp + offset]
+                        );
+                    }
+                }
+            }
+        }
+    }
+
+    pub fn select(&mut self) {
+        let mut cond = self.pop();
+        let mut else_ = self.pop();
+        let mut then = self.pop();
+
+        if let ValueLocation::Immediate(i) = cond {
+            if i.as_i32().unwrap() == 0 {
+                self.free_value(then);
+                self.push(else_);
+            } else {
+                self.free_value(else_);
+                self.push(then);
+            }
+
+            return;
+        }
+
+        let cond_code = match cond {
+            ValueLocation::Cond(cc) => cc,
+            _ => {
+                let cond_reg = self.into_reg(I32, &mut cond).unwrap();
+                dynasm!(self.asm
+                    ; test Rd(cond_reg.rq().unwrap()), Rd(cond_reg.rq().unwrap())
+                );
+                self.free_value(cond);
+
+                cc::NOT_EQUAL
+            }
+        };
+
+        let else_ = if let ValueLocation::Stack(offset) = else_ {
+            CCLoc::Stack(offset)
+        } else {
+            CCLoc::Reg(self.into_reg(I32, &mut else_).unwrap())
+        };
+
+        let then = if let ValueLocation::Stack(offset) = then {
+            CCLoc::Stack(offset)
+        } else {
+            CCLoc::Reg(self.into_reg(I32, &mut then).unwrap())
+        };
+
+        let out_gpr = match (then, else_) {
+            (CCLoc::Reg(then_reg), else_) if self.block_state.regs.num_usages(then_reg) <= 1 => {
+                self.cmov(!cond_code, then_reg, else_);
+                self.free_value(else_.into());
+
+                then_reg
+            }
+            (then, CCLoc::Reg(else_reg)) if self.block_state.regs.num_usages(else_reg) <= 1 => {
+                self.cmov(cond_code, else_reg, then);
+                self.free_value(then.into());
+
+                else_reg
+            }
+            (then, else_) => {
+                let out = self.take_reg(GPRType::Rq).unwrap();
+                self.copy_value(else_.into(), CCLoc::Reg(out));
+                self.cmov(cond_code, out, then);
+
+                self.free_value(then.into());
+                self.free_value(else_.into());
+
+                out
+            }
+        };
+
+        self.push(ValueLocation::Reg(out_gpr));
+    }
+
+    pub fn pick(&mut self, depth: u32) {
+        let idx = self.block_state.stack.len() - 1 - depth as usize;
+        let v = self.block_state.stack[idx];
+
+        match v {
+            ValueLocation::Reg(r) => {
+                self.block_state.regs.mark_used(r);
+            }
+            _ => {}
+        }
+
+        self.block_state.stack.push(v);
+    }
+
+    pub fn const_(&mut self, imm: Value) {
+        self.push(ValueLocation::Immediate(imm));
+    }
+
+    fn relocated_function_call(
+        &mut self,
+        name: &cranelift_codegen::ir::ExternalName,
+        args: impl IntoIterator<Item = SignlessType>,
+        rets: impl IntoIterator<Item = SignlessType>,
+        preserve_vmctx: bool,
+    ) {
+        let locs = arg_locs(args);
+
+        self.save_volatile(..locs.len());
+
+        if preserve_vmctx {
+            dynasm!(self.asm
+                ; push Rq(VMCTX)
+            );
+            self.block_state.depth.reserve(1);
+        }
+
+        let depth = self.block_state.depth.clone();
+
+        self.pass_outgoing_args(&locs);
+        // 2 bytes for the 64-bit `mov` opcode + register ident, the rest is the immediate
+        self.reloc_sink.reloc_external(
+            (self.asm.offset().0
+                - self.func_starts[self.current_function as usize]
+                    .0
+                    .unwrap()
+                    .0) as u32
+                + 2,
+            binemit::Reloc::Abs8,
+            name,
+            0,
+        );
+        let temp = self.take_reg(I64).unwrap();
+        dynasm!(self.asm
+            ; mov Rq(temp.rq().unwrap()), QWORD 0xdeadbeefdeadbeefu64 as i64
+            ; call Rq(temp.rq().unwrap())
+        );
+        self.block_state.regs.release(temp);
+
+        for i in locs {
+            self.free_value(i.into());
+        }
+
+        self.push_function_returns(rets);
+
+        if preserve_vmctx {
+            self.set_stack_depth(depth);
+
+            dynasm!(self.asm
+                ; pop Rq(VMCTX)
+            );
+            self.block_state.depth.free(1);
+        }
+    }
+
+    // TODO: Other memory indices
+    pub fn memory_size(&mut self) {
+        let memory_index = 0;
+        if let Some(defined_memory_index) = self.module_context.defined_memory_index(memory_index) {
+            self.push(ValueLocation::Immediate(defined_memory_index.into()));
+            self.relocated_function_call(
+                &magic::get_memory32_size_name(),
+                iter::once(I32),
+                iter::once(I32),
+                true,
+            );
+        } else {
+            self.push(ValueLocation::Immediate(memory_index.into()));
+            self.relocated_function_call(
+                &magic::get_imported_memory32_size_name(),
+                iter::once(I32),
+                iter::once(I32),
+                true,
+            );
+        }
+    }
+
+    // TODO: Other memory indices
+    pub fn memory_grow(&mut self) {
+        let memory_index = 0;
+        if let Some(defined_memory_index) = self.module_context.defined_memory_index(memory_index) {
+            self.push(ValueLocation::Immediate(defined_memory_index.into()));
+            self.relocated_function_call(
+                &magic::get_memory32_grow_name(),
+                iter::once(I32).chain(iter::once(I32)),
+                iter::once(I32),
+                true,
+            );
+        } else {
+            self.push(ValueLocation::Immediate(memory_index.into()));
+            self.relocated_function_call(
+                &magic::get_imported_memory32_grow_name(),
+                iter::once(I32).chain(iter::once(I32)),
+                iter::once(I32),
+                true,
+            );
+        }
+    }
+
+    // TODO: Use `ArrayVec`?
+    // TODO: This inefficiently duplicates registers but it's not really possible
+    //       to double up stack space right now.
+    /// Saves volatile (i.e. caller-saved) registers before a function call, if they are used.
+    fn save_volatile(&mut self, bounds: impl std::ops::RangeBounds<usize>) {
+        self.save_regs(SCRATCH_REGS, ..);
+    }
+
+    fn save_regs<I>(&mut self, regs: &I, bounds: impl std::ops::RangeBounds<usize>)
+    where
+        for<'a> &'a I: IntoIterator<Item = &'a GPR>,
+        I: ?Sized,
+    {
+        use std::ops::Bound::*;
+
+        let mut stack = mem::replace(&mut self.block_state.stack, vec![]);
+        let (start, end) = (
+            match bounds.end_bound() {
+                Unbounded => 0,
+                Included(v) => stack.len().saturating_sub(1 + v),
+                Excluded(v) => stack.len().saturating_sub(*v),
+            },
+            match bounds.start_bound() {
+                Unbounded => stack.len(),
+                Included(v) => stack.len().saturating_sub(*v),
+                Excluded(v) => stack.len().saturating_sub(1 + v),
+            },
+        );
+
+        let mut slice = &mut stack[start..end];
+
+        loop {
+            if let Some((first, rest)) = slice.split_first_mut() {
+                if let ValueLocation::Reg(vreg) = *first {
+                    if regs.into_iter().any(|r| *r == vreg) {
+                        let old = *first;
+                        *first = self.push_physical(old);
+                        for val in &mut *rest {
+                            if *val == old {
+                                self.free_value(*val);
+                                *val = *first;
+                            }
+                        }
+                    }
+                }
+
+                slice = rest;
+            } else {
+                break;
+            }
+        }
+
+        mem::replace(&mut self.block_state.stack, stack);
+    }
+
+    /// Write the arguments to the callee to the registers and the stack using the SystemV
+    /// calling convention.
+    fn pass_outgoing_args(&mut self, out_locs: &[CCLoc]) {
+        // TODO: Do alignment here
+        let total_stack_space = out_locs
+            .iter()
+            .flat_map(|&l| {
+                if let CCLoc::Stack(offset) = l {
+                    if offset > 0 {
+                        Some(offset as u32)
+                    } else {
+                        None
+                    }
+                } else {
+                    None
+                }
+            })
+            .max()
+            .unwrap_or(0);
+        let mut depth = self.block_state.depth.0 + total_stack_space;
+
+        if depth & 1 != 0 {
+            self.set_stack_depth(StackDepth(self.block_state.depth.0 + 1));
+            depth += 1;
+        }
+
+        let mut pending = Vec::<(ValueLocation, CCLoc)>::with_capacity(out_locs.len());
+
+        for &loc in out_locs.iter().rev() {
+            let val = self.pop();
+
+            pending.push((val, loc));
+        }
+
+        while !pending.is_empty() {
+            let start_len = pending.len();
+
+            for (src, dst) in mem::replace(&mut pending, vec![]) {
+                if src != ValueLocation::from(dst) {
+                    if let CCLoc::Reg(r) = dst {
+                        if !self.block_state.regs.is_free(r) {
+                            pending.push((src, dst));
+                            continue;
+                        }
+
+                        self.block_state.regs.mark_used(r);
+                    }
+
+                    self.copy_value(src, dst);
+                    self.free_value(src);
+                }
+            }
+
+            if pending.len() == start_len {
+                let src = *pending
+                    .iter()
+                    .filter_map(|(src, _)| {
+                        if let ValueLocation::Reg(reg) = src {
+                            Some(reg)
+                        } else {
+                            None
+                        }
+                    })
+                    .next()
+                    .expect(
+                        "Programmer error: We shouldn't need to push \
+                         intermediate args if we don't have any argument sources in registers",
+                    );
+                let new_src = self.push_physical(ValueLocation::Reg(src));
+                for (old_src, _) in pending.iter_mut() {
+                    if *old_src == ValueLocation::Reg(src) {
+                        *old_src = new_src;
+                    }
+                }
+            }
+        }
+
+        self.set_stack_depth(StackDepth(depth));
+    }
+
+    fn push_function_returns(&mut self, returns: impl IntoIterator<Item = SignlessType>) {
+        for loc in ret_locs(returns) {
+            if let CCLoc::Reg(reg) = loc {
+                self.block_state.regs.mark_used(reg);
+            }
+
+            self.push(loc.into());
+        }
+    }
+
+    pub fn call_indirect(
+        &mut self,
+        type_id: u32,
+        arg_types: impl IntoIterator<Item = SignlessType>,
+        return_types: impl IntoIterator<Item = SignlessType>,
+    ) {
+        let locs = arg_locs(arg_types);
+
+        for &loc in &locs {
+            if let CCLoc::Reg(r) = loc {
+                self.block_state.regs.mark_used(r);
+            }
+        }
+
+        let mut callee = self.pop();
+        let callee_reg = self.into_temp_reg(I32, &mut callee).unwrap();
+
+        for &loc in &locs {
+            if let CCLoc::Reg(r) = loc {
+                self.block_state.regs.release(r);
+            }
+        }
+
+        self.save_volatile(..locs.len());
+
+        dynasm!(self.asm
+            ; push Rq(VMCTX)
+        );
+        self.block_state.depth.reserve(1);
+        let depth = self.block_state.depth.clone();
+
+        self.pass_outgoing_args(&locs);
+
+        let fail = self.trap_label().0;
+        let table_index = 0;
+        let reg_offset = self
+            .module_context
+            .defined_table_index(table_index)
+            .map(|index| {
+                (
+                    None,
+                    self.module_context.vmctx_vmtable_definition(index) as i32,
+                )
+            });
+
+        let vmctx = GPR::Rq(VMCTX);
+        let (reg, offset) = reg_offset.unwrap_or_else(|| {
+            let reg = self.take_reg(I64).unwrap();
+
+            dynasm!(self.asm
+                ; mov Rq(reg.rq().unwrap()), [
+                    Rq(VMCTX) + self.module_context.vmctx_vmtable_import_from(table_index) as i32
+                ]
+            );
+
+            (Some(reg), 0)
+        });
+
+        let temp0 = self.take_reg(I64).unwrap();
+
+        dynasm!(self.asm
+            ; cmp Rd(callee_reg.rq().unwrap()), [
+                Rq(reg.unwrap_or(vmctx).rq().unwrap()) +
+                    offset +
+                    self.module_context.vmtable_definition_current_elements() as i32
+            ]
+            ; jae =>fail
+            ; imul
+                Rd(callee_reg.rq().unwrap()),
+                Rd(callee_reg.rq().unwrap()),
+                self.module_context.size_of_vmcaller_checked_anyfunc() as i32
+            ; mov Rq(temp0.rq().unwrap()), [
+                Rq(reg.unwrap_or(vmctx).rq().unwrap()) +
+                    offset +
+                    self.module_context.vmtable_definition_base() as i32
+            ]
+        );
+
+        if let Some(reg) = reg {
+            self.block_state.regs.release(reg);
+        }
+
+        let temp1 = self.take_reg(I64).unwrap();
+
+        dynasm!(self.asm
+            ; mov Rd(temp1.rq().unwrap()), [
+                Rq(VMCTX) +
+                    self.module_context
+                        .vmctx_vmshared_signature_id(type_id) as i32
+            ]
+            ; cmp DWORD [
+                Rq(temp0.rq().unwrap()) +
+                    Rq(callee_reg.rq().unwrap()) +
+                    self.module_context.vmcaller_checked_anyfunc_type_index() as i32
+            ], Rd(temp1.rq().unwrap())
+            ; jne =>fail
+            ; mov Rq(VMCTX), [
+                Rq(temp0.rq().unwrap()) +
+                    Rq(callee_reg.rq().unwrap()) +
+                    self.module_context.vmcaller_checked_anyfunc_vmctx() as i32
+            ]
+            ; call QWORD [
+                Rq(temp0.rq().unwrap()) +
+                    Rq(callee_reg.rq().unwrap()) +
+                    self.module_context.vmcaller_checked_anyfunc_func_ptr() as i32
+            ]
+        );
+
+        self.block_state.regs.release(temp0);
+        self.block_state.regs.release(temp1);
+        self.free_value(callee);
+
+        for i in locs {
+            self.free_value(i.into());
+        }
+
+        self.push_function_returns(return_types);
+
+        self.set_stack_depth(depth);
+        dynasm!(self.asm
+            ; pop Rq(VMCTX)
+        );
+        self.block_state.depth.free(1);
+    }
+
+    pub fn swap(&mut self, depth: u32) {
+        let last = self.block_state.stack.len() - 1;
+        self.block_state.stack.swap(last, last - depth as usize);
+    }
+
+    /// Call a function with the given index
+    pub fn call_direct(
+        &mut self,
+        index: u32,
+        arg_types: impl IntoIterator<Item = SignlessType>,
+        return_types: impl IntoIterator<Item = SignlessType>,
+    ) {
+        self.relocated_function_call(
+            &ir::ExternalName::user(0, index),
+            arg_types,
+            return_types,
+            false,
+        );
+    }
+
+    /// Call a function with the given index
+    pub fn call_direct_self(
+        &mut self,
+        defined_index: u32,
+        arg_types: impl IntoIterator<Item = SignlessType>,
+        return_types: impl IntoIterator<Item = SignlessType>,
+    ) {
+        let locs = arg_locs(arg_types);
+
+        self.save_volatile(..locs.len());
+
+        let (_, label) = self.func_starts[defined_index as usize];
+
+        self.pass_outgoing_args(&locs);
+        dynasm!(self.asm
+            ; call =>label
+        );
+
+        for i in locs {
+            self.free_value(i.into());
+        }
+
+        self.push_function_returns(return_types);
+    }
+
+    /// Call a function with the given index
+    pub fn call_direct_imported(
+        &mut self,
+        index: u32,
+        arg_types: impl IntoIterator<Item = SignlessType>,
+        return_types: impl IntoIterator<Item = SignlessType>,
+    ) {
+        let locs = arg_locs(arg_types);
+
+        dynasm!(self.asm
+            ; push Rq(VMCTX)
+        );
+        self.block_state.depth.reserve(1);
+        let depth = self.block_state.depth.clone();
+
+        self.save_volatile(..locs.len());
+        self.pass_outgoing_args(&locs);
+
+        let callee = self.take_reg(I64).unwrap();
+
+        dynasm!(self.asm
+            ; mov Rq(callee.rq().unwrap()), [
+                Rq(VMCTX) + self.module_context.vmctx_vmfunction_import_body(index) as i32
+            ]
+            ; mov Rq(VMCTX), [
+                Rq(VMCTX) + self.module_context.vmctx_vmfunction_import_vmctx(index) as i32
+            ]
+            ; call Rq(callee.rq().unwrap())
+        );
+
+        self.block_state.regs.release(callee);
+
+        for i in locs {
+            self.free_value(i.into());
+        }
+
+        self.push_function_returns(return_types);
+
+        self.set_stack_depth(depth);
+        dynasm!(self.asm
+            ; pop Rq(VMCTX)
+        );
+        self.block_state.depth.free(1);
+    }
+
+    // TODO: Reserve space to store RBX, RBP, and R12..R15 so we can use them
+    //       as scratch registers
+    /// Writes the function prologue and stores the arguments as locals
+    pub fn start_function(&mut self, params: impl IntoIterator<Item = SignlessType>) {
+        let locs = Vec::from_iter(arg_locs(params));
+
+        self.apply_cc(&BlockCallingConvention::function_start(locs));
+    }
+
+    pub fn ret(&mut self) {
+        dynasm!(self.asm
+            ; ret
+        );
+    }
+
+    pub fn epilogue(&mut self) {}
+
+    pub fn trap(&mut self) {
+        let trap_label = self.trap_label();
+        dynasm!(self.asm
+            ; jmp =>trap_label.0
+        );
+    }
+
+    pub fn trap_label(&mut self) -> Label {
+        self.label(|asm: &mut Assembler| {
+            dynasm!(asm
+                ; ud2
+            );
+        })
+    }
+
+    pub fn ret_label(&mut self) -> Label {
+        self.label(|asm: &mut Assembler| {
+            dynasm!(asm
+                ; ret
+            );
+        })
+    }
+
+    fn label<F>(&mut self, fun: F) -> Label
+    where
+        F: IntoLabel,
+    {
+        self.aligned_label(1, fun)
+    }
+
+    fn aligned_label<F>(&mut self, align: u32, fun: F) -> Label
+    where
+        F: IntoLabel,
+    {
+        let key = fun.key();
+        if let Some((label, _, _)) = self.labels.get(&(align, key)) {
+            return *label;
+        }
+
+        let label = self.create_label();
+        self.labels
+            .insert((align, key), (label, align, Some(fun.callback())));
+
+        label
+    }
+
+    fn target_to_label(&mut self, target: BrTarget<Label>) -> Label {
+        match target {
+            BrTarget::Label(label) => label,
+            BrTarget::Return => self.ret_label(),
+        }
+    }
+}
+
+trait IntoLabel {
+    fn key(&self) -> Either<TypeId, (LabelValue, Option<LabelValue>)>;
+    fn callback(self) -> Box<dyn FnMut(&mut Assembler)>;
+}
+
+impl<F> IntoLabel for F
+where
+    F: FnMut(&mut Assembler) + Any,
+{
+    fn key(&self) -> Either<TypeId, (LabelValue, Option<LabelValue>)> {
+        Either::Left(TypeId::of::<Self>())
+    }
+
+    fn callback(self) -> Box<dyn FnMut(&mut Assembler)> {
+        Box::new(self)
+    }
+}
+
+fn const_value(val: LabelValue) -> impl FnMut(&mut Assembler) {
+    move |asm| match val {
+        LabelValue::I32(val) => dynasm!(asm
+            ; .dword val
+        ),
+        LabelValue::I64(val) => dynasm!(asm
+            ; .qword val
+        ),
+    }
+}
+
+fn const_values(a: LabelValue, b: LabelValue) -> impl FnMut(&mut Assembler) {
+    move |asm| {
+        match a {
+            LabelValue::I32(val) => dynasm!(asm
+                ; .dword val
+            ),
+            LabelValue::I64(val) => dynasm!(asm
+                ; .qword val
+            ),
+        }
+
+        match b {
+            LabelValue::I32(val) => dynasm!(asm
+                ; .dword val
+            ),
+            LabelValue::I64(val) => dynasm!(asm
+                ; .qword val
+            ),
+        }
+    }
+}
+
+impl IntoLabel for LabelValue {
+    fn key(&self) -> Either<TypeId, (LabelValue, Option<LabelValue>)> {
+        Either::Right((*self, None))
+    }
+    fn callback(self) -> Box<dyn FnMut(&mut Assembler)> {
+        Box::new(const_value(self))
+    }
+}
+
+impl IntoLabel for (LabelValue, LabelValue) {
+    fn key(&self) -> Either<TypeId, (LabelValue, Option<LabelValue>)> {
+        Either::Right((self.0, Some(self.1)))
+    }
+    fn callback(self) -> Box<dyn FnMut(&mut Assembler)> {
+        Box::new(const_values(self.0, self.1))
+    }
+}
diff --git a/lightbeam/src/disassemble.rs b/lightbeam/src/disassemble.rs
new file mode 100644
index 0000000000..10366b0040
--- /dev/null
+++ b/lightbeam/src/disassemble.rs
@@ -0,0 +1,55 @@
+use capstone::prelude::*;
+use dynasmrt::AssemblyOffset;
+use std::error::Error;
+use std::fmt::{Display, Write};
+
+pub fn disassemble(
+    mem: &[u8],
+    mut ops: &[(AssemblyOffset, impl Display)],
+) -> Result<(), Box<dyn Error>> {
+    let cs = Capstone::new()
+        .x86()
+        .mode(arch::x86::ArchMode::Mode64)
+        .build()?;
+
+    println!("{} bytes:", mem.len());
+    let insns = cs.disasm_all(&mem, 0x0)?;
+    for i in insns.iter() {
+        let mut line = String::new();
+
+        let address = i.address();
+
+        loop {
+            if let Some((offset, op)) = ops.first() {
+                if offset.0 as u64 <= address {
+                    ops = &ops[1..];
+                    println!("{}", op);
+                } else {
+                    break;
+                }
+            } else {
+                break;
+            }
+        }
+
+        write!(&mut line, "{:4x}:\t", i.address())?;
+
+        let mut bytes_str = String::new();
+        for b in i.bytes() {
+            write!(&mut bytes_str, "{:02x} ", b)?;
+        }
+        write!(&mut line, "{:24}\t", bytes_str)?;
+
+        if let Some(s) = i.mnemonic() {
+            write!(&mut line, "{}\t", s)?;
+        }
+
+        if let Some(s) = i.op_str() {
+            write!(&mut line, "{}", s)?;
+        }
+
+        println!("{}", line);
+    }
+
+    Ok(())
+}
diff --git a/lightbeam/src/error.rs b/lightbeam/src/error.rs
new file mode 100644
index 0000000000..5f2ac6492e
--- /dev/null
+++ b/lightbeam/src/error.rs
@@ -0,0 +1,27 @@
+use capstone;
+use wasmparser::BinaryReaderError;
+
+#[derive(Fail, PartialEq, Eq, Clone, Debug)]
+pub enum Error {
+    #[fail(display = "Disassembler error: {}", _0)]
+    Disassembler(String),
+
+    #[fail(display = "Assembler error: {}", _0)]
+    Assembler(String),
+
+    #[fail(display = "Input error: {}", _0)]
+    Input(String),
+}
+
+impl From<BinaryReaderError> for Error {
+    fn from(e: BinaryReaderError) -> Self {
+        let BinaryReaderError { message, offset } = e;
+        Error::Input(format!("At wasm offset {}: {}", offset, message))
+    }
+}
+
+impl From<capstone::Error> for Error {
+    fn from(e: capstone::Error) -> Self {
+        Error::Disassembler(e.to_string())
+    }
+}
diff --git a/lightbeam/src/function_body.rs b/lightbeam/src/function_body.rs
new file mode 100644
index 0000000000..f578b64f6f
--- /dev/null
+++ b/lightbeam/src/function_body.rs
@@ -0,0 +1,858 @@
+use crate::backend::{
+    ret_locs, BlockCallingConvention, CodeGenSession, Context, Label, Registers, ValueLocation,
+    VirtualCallingConvention,
+};
+use crate::error::Error;
+use crate::microwasm::*;
+use crate::module::{ModuleContext, SigType, Signature};
+use cranelift_codegen::binemit;
+use dynasmrt::DynasmApi;
+use either::{Either, Left, Right};
+use multi_mut::HashMapMultiMut;
+use std::{collections::HashMap, fmt, hash::Hash, mem};
+
+#[derive(Debug)]
+struct Block {
+    label: BrTarget<Label>,
+    calling_convention: Option<Either<BlockCallingConvention, VirtualCallingConvention>>,
+    params: u32,
+    // TODO: Is there a cleaner way to do this? `has_backwards_callers` should always be set if `is_next`
+    //       is false, so we should probably use an `enum` here.
+    is_next: bool,
+    num_callers: Option<u32>,
+    actual_num_callers: u32,
+    has_backwards_callers: bool,
+}
+
+impl Block {
+    fn should_serialize_args(&self) -> bool {
+        self.calling_convention.is_none()
+            && (self.num_callers != Some(1) || self.has_backwards_callers)
+    }
+}
+
+const DISASSEMBLE: bool = false;
+
+pub fn translate_wasm<M>(
+    session: &mut CodeGenSession<M>,
+    reloc_sink: &mut dyn binemit::RelocSink,
+    func_idx: u32,
+    body: &wasmparser::FunctionBody,
+) -> Result<(), Error>
+where
+    M: ModuleContext,
+    for<'any> &'any M::Signature: Into<OpSig>,
+{
+    let ty = session.module_context.defined_func_type(func_idx);
+
+    if DISASSEMBLE {
+        let microwasm_conv = MicrowasmConv::new(
+            session.module_context,
+            ty.params().iter().map(SigType::to_microwasm_type),
+            ty.returns().iter().map(SigType::to_microwasm_type),
+            body,
+        );
+
+        let _ = crate::microwasm::dis(
+            std::io::stdout(),
+            func_idx,
+            microwasm_conv.flat_map(|ops| ops.unwrap()),
+        );
+    }
+
+    let microwasm_conv = MicrowasmConv::new(
+        session.module_context,
+        ty.params().iter().map(SigType::to_microwasm_type),
+        ty.returns().iter().map(SigType::to_microwasm_type),
+        body,
+    );
+
+    translate(
+        session,
+        reloc_sink,
+        func_idx,
+        microwasm_conv.flat_map(|i| i.expect("TODO: Make this not panic")),
+    )
+}
+
+pub fn translate<M, I, L: Send + Sync + 'static>(
+    session: &mut CodeGenSession<M>,
+    reloc_sink: &mut dyn binemit::RelocSink,
+    func_idx: u32,
+    body: I,
+) -> Result<(), Error>
+where
+    M: ModuleContext,
+    I: IntoIterator<Item = Operator<L>>,
+    L: Hash + Clone + Eq,
+    BrTarget<L>: std::fmt::Display,
+{
+    fn drop_elements<T>(stack: &mut Vec<T>, depths: std::ops::RangeInclusive<u32>) {
+        let _ = (|| {
+            let start = stack
+                .len()
+                .checked_sub(1)?
+                .checked_sub(*depths.end() as usize)?;
+            let end = stack
+                .len()
+                .checked_sub(1)?
+                .checked_sub(*depths.start() as usize)?;
+            let real_range = start..=end;
+
+            stack.drain(real_range);
+
+            Some(())
+        })();
+    }
+
+    let func_type = session.module_context.defined_func_type(func_idx);
+    let mut body = body.into_iter().peekable();
+
+    let module_context = &*session.module_context;
+    let mut op_offset_map = mem::replace(&mut session.op_offset_map, vec![]);
+    let ctx = &mut session.new_context(func_idx, reloc_sink);
+    op_offset_map.push((
+        ctx.asm.offset(),
+        Box::new(format!("Function {}:", func_idx)),
+    ));
+
+    let params = func_type
+        .params()
+        .iter()
+        .map(|t| t.to_microwasm_type())
+        .collect::<Vec<_>>();
+
+    ctx.start_function(params.iter().cloned());
+
+    let mut blocks = HashMap::<BrTarget<L>, Block>::new();
+
+    let num_returns = func_type.returns().len();
+
+    blocks.insert(
+        BrTarget::Return,
+        Block {
+            label: BrTarget::Return,
+            params: num_returns as u32,
+            calling_convention: Some(Left(BlockCallingConvention::function_start(ret_locs(
+                func_type.returns().iter().map(|t| t.to_microwasm_type()),
+            )))),
+            is_next: false,
+            has_backwards_callers: false,
+            actual_num_callers: 0,
+            num_callers: None,
+        },
+    );
+
+    while let Some(op) = body.next() {
+        if let Some(Operator::Label(label)) = body.peek() {
+            let block = blocks
+                .get_mut(&BrTarget::Label(label.clone()))
+                .expect("Label defined before being declared");
+            block.is_next = true;
+        }
+
+        macro_rules! assert_ge {
+            ($left:expr, $right:expr) => ({
+                match (&$left, &$right) {
+                    (left_val, right_val) => {
+                        if !(*left_val >= *right_val) {
+                            // The reborrows below are intentional. Without them, the stack slot for the
+                            // borrow is initialized even before the values are compared, leading to a
+                            // noticeable slow down.
+                            panic!(r#"assertion failed: `(left >= right)`
+  left: `{:?}`,
+ right: `{:?}`"#, &*left_val, &*right_val)
+                        }
+                    }
+                }
+            });
+            ($left:expr, $right:expr,) => ({
+                assert_ge!($left, $right)
+            });
+        }
+
+        // `cfg` on blocks doesn't work in the compiler right now, so we have to write a dummy macro
+        #[cfg(debug_assertions)]
+        macro_rules! assertions {
+            () => {
+                if let Operator::Label(label) = &op {
+                    let block = &blocks[&BrTarget::Label(label.clone())];
+                    let num_cc_params = block.calling_convention.as_ref().map(|cc| match cc {
+                        Left(cc) => cc.arguments.len(),
+                        Right(cc) => cc.stack.len(),
+                    });
+                    if let Some(num_cc_params) = num_cc_params {
+                        assert_ge!(num_cc_params, block.params as usize);
+                    }
+                } else {
+                    let mut actual_regs = Registers::new();
+                    for val in &ctx.block_state.stack {
+                        if let ValueLocation::Reg(gpr) = val {
+                            actual_regs.mark_used(*gpr);
+                        }
+                    }
+                    assert_eq!(actual_regs, ctx.block_state.regs,);
+                }
+            };
+        }
+
+        #[cfg(not(debug_assertions))]
+        macro_rules! assertions {
+            () => {};
+        }
+
+        assertions!();
+
+        struct DisassemblyOpFormatter<Label>(Operator<Label>);
+
+        impl<Label> fmt::Display for DisassemblyOpFormatter<Label>
+        where
+            Operator<Label>: fmt::Display,
+        {
+            fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+                match self.0 {
+                    Operator::Label(_) => write!(f, "{}", self.0),
+                    Operator::Block { .. } => write!(f, "{:5}\t{}", "", self.0),
+                    _ => write!(f, "{:5}\t  {}", "", self.0),
+                }
+            }
+        }
+
+        op_offset_map.push((
+            ctx.asm.offset(),
+            Box::new(DisassemblyOpFormatter(op.clone())),
+        ));
+
+        match op {
+            Operator::Unreachable => {
+                ctx.trap();
+            }
+            Operator::Label(label) => {
+                use std::collections::hash_map::Entry;
+
+                if let Entry::Occupied(mut entry) = blocks.entry(BrTarget::Label(label.clone())) {
+                    let has_backwards_callers = {
+                        let block = entry.get_mut();
+
+                        // TODO: Maybe we want to restrict Microwasm so that at least one of its callers
+                        //       must be before the label. In an ideal world the restriction would be that
+                        //       blocks without callers are illegal, but that's not reasonably possible for
+                        //       Microwasm generated from Wasm.
+                        if block.actual_num_callers == 0 {
+                            loop {
+                                let done = match body.peek() {
+                                    Some(Operator::Label(_)) | None => true,
+                                    Some(_) => false,
+                                };
+
+                                if done {
+                                    break;
+                                }
+
+                                let skipped = body.next();
+
+                                // We still want to honour block definitions even in unreachable code
+                                if let Some(Operator::Block {
+                                    label,
+                                    has_backwards_callers,
+                                    params,
+                                    num_callers,
+                                }) = skipped
+                                {
+                                    let asm_label = ctx.create_label();
+                                    blocks.insert(
+                                        BrTarget::Label(label),
+                                        Block {
+                                            label: BrTarget::Label(asm_label),
+                                            params: params.len() as _,
+                                            calling_convention: None,
+                                            is_next: false,
+                                            has_backwards_callers,
+                                            actual_num_callers: 0,
+                                            num_callers,
+                                        },
+                                    );
+                                }
+                            }
+
+                            continue;
+                        }
+
+                        block.is_next = false;
+
+                        // TODO: We can `take` this if it's a `Right`
+                        match block.calling_convention.as_ref() {
+                            Some(Left(cc)) => {
+                                ctx.apply_cc(cc);
+                            }
+                            Some(Right(virt)) => {
+                                ctx.set_state(virt.clone());
+                            }
+                            _ => assert_eq!(block.params as usize, ctx.block_state.stack.len()),
+                        }
+
+                        ctx.define_label(block.label.label().unwrap().clone());
+
+                        block.has_backwards_callers
+                    };
+
+                    // To reduce memory overhead
+                    if !has_backwards_callers {
+                        entry.remove_entry();
+                    }
+                } else {
+                    panic!("Label defined before being declared");
+                }
+            }
+            Operator::Block {
+                label,
+                has_backwards_callers,
+                params,
+                num_callers,
+            } => {
+                let asm_label = ctx.create_label();
+                blocks.insert(
+                    BrTarget::Label(label),
+                    Block {
+                        label: BrTarget::Label(asm_label),
+                        params: params.len() as _,
+                        calling_convention: None,
+                        is_next: false,
+                        has_backwards_callers,
+                        actual_num_callers: 0,
+                        num_callers,
+                    },
+                );
+            }
+            Operator::Br { target } => {
+                // TODO: We should add the block to the hashmap if we don't have it already
+                let block = blocks.get_mut(&target).unwrap();
+                block.actual_num_callers += 1;
+
+                let should_serialize_args = block.should_serialize_args();
+
+                match block {
+                    Block {
+                        is_next,
+                        label: BrTarget::Label(l),
+                        calling_convention,
+                        ..
+                    } => {
+                        let cc = if should_serialize_args {
+                            *calling_convention = Some(Left(ctx.serialize_args(block.params)));
+                            None
+                        } else {
+                            calling_convention
+                                .as_ref()
+                                .map(Either::as_ref)
+                                .and_then(Either::left)
+                        };
+
+                        if let Some(cc) = cc {
+                            ctx.pass_block_args(cc);
+                        }
+
+                        if !*is_next {
+                            ctx.br(*l);
+                        }
+                    }
+                    Block {
+                        label: BrTarget::Return,
+                        calling_convention: Some(Left(cc)),
+                        ..
+                    } => {
+                        ctx.pass_block_args(cc);
+                        ctx.ret();
+                    }
+                    _ => unimplemented!(),
+                }
+            }
+            Operator::BrIf { then, else_ } => {
+                let (then_block, else_block) = blocks.pair_mut(&then.target, &else_.target);
+                // TODO: If actual_num_callers == num_callers then we can remove this block from the hashmap.
+                //       This frees memory and acts as a kind of verification that `num_callers` is set
+                //       correctly. It doesn't help for loops and block ends generated from Wasm.
+                then_block.actual_num_callers += 1;
+                else_block.actual_num_callers += 1;
+
+                let then_block_parts = (then_block.is_next, then_block.label);
+                let else_block_parts = (else_block.is_next, else_block.label);
+
+                // TODO: The blocks should have compatible (one must be subset of other?) calling
+                //       conventions or else at least one must have no calling convention. This
+                //       should always be true for converting from WebAssembly AIUI.
+                let f = |ctx: &mut Context<_>| {
+                    let then_block_should_serialize_args = then_block.should_serialize_args();
+                    let else_block_should_serialize_args = else_block.should_serialize_args();
+                    let max_params = then_block.params.max(else_block.params);
+
+                    match (
+                        (&mut then_block.calling_convention, &then.to_drop),
+                        (&mut else_block.calling_convention, &else_.to_drop),
+                    ) {
+                        ((Some(Left(ref cc)), _), ref mut other @ (None, _))
+                        | (ref mut other @ (None, _), (Some(Left(ref cc)), _)) => {
+                            let mut cc = ctx.serialize_block_args(cc, max_params);
+                            if let Some(to_drop) = other.1 {
+                                drop_elements(&mut cc.arguments, to_drop.clone());
+                            }
+                            *other.0 = Some(Left(cc));
+                        }
+                        (
+                            (ref mut then_cc @ None, then_to_drop),
+                            (ref mut else_cc @ None, else_to_drop),
+                        ) => {
+                            let virt_cc = if !then_block_should_serialize_args
+                                || !else_block_should_serialize_args
+                            {
+                                Some(ctx.virtual_calling_convention())
+                            } else {
+                                None
+                            };
+                            let cc = if then_block_should_serialize_args
+                                || else_block_should_serialize_args
+                            {
+                                Some(ctx.serialize_args(max_params))
+                            } else {
+                                None
+                            };
+
+                            **then_cc = if then_block_should_serialize_args {
+                                let mut cc = cc.clone().unwrap();
+                                if let Some(to_drop) = then_to_drop.clone() {
+                                    drop_elements(&mut cc.arguments, to_drop);
+                                }
+                                Some(Left(cc))
+                            } else {
+                                let mut cc = virt_cc.clone().unwrap();
+                                if let Some(to_drop) = then_to_drop.clone() {
+                                    drop_elements(&mut cc.stack, to_drop);
+                                }
+                                Some(Right(cc))
+                            };
+                            **else_cc = if else_block_should_serialize_args {
+                                let mut cc = cc.unwrap();
+                                if let Some(to_drop) = else_to_drop.clone() {
+                                    drop_elements(&mut cc.arguments, to_drop);
+                                }
+                                Some(Left(cc))
+                            } else {
+                                let mut cc = virt_cc.unwrap();
+                                if let Some(to_drop) = else_to_drop.clone() {
+                                    drop_elements(&mut cc.stack, to_drop);
+                                }
+                                Some(Right(cc))
+                            };
+                        }
+                        _ => unimplemented!(
+                            "Can't pass different params to different sides of `br_if` yet"
+                        ),
+                    }
+                };
+
+                match (then_block_parts, else_block_parts) {
+                    ((true, _), (false, else_)) => {
+                        ctx.br_if_false(else_, f);
+                    }
+                    ((false, then), (true, _)) => {
+                        ctx.br_if_true(then, f);
+                    }
+                    ((false, then), (false, else_)) => {
+                        ctx.br_if_true(then, f);
+                        ctx.br(else_);
+                    }
+                    other => unimplemented!("{:#?}", other),
+                }
+            }
+            Operator::BrTable(BrTable { targets, default }) => {
+                use itertools::Itertools;
+
+                let (label, num_callers, params) = {
+                    let def = &blocks[&default.target];
+                    (
+                        if def.is_next { None } else { Some(def.label) },
+                        def.num_callers,
+                        def.params
+                            + default
+                                .to_drop
+                                .as_ref()
+                                .map(|t| t.clone().count())
+                                .unwrap_or_default() as u32,
+                    )
+                };
+
+                let target_labels = targets
+                    .iter()
+                    .map(|target| {
+                        let block = &blocks[&target.target];
+                        if block.is_next {
+                            None
+                        } else {
+                            Some(block.label)
+                        }
+                    })
+                    .collect::<Vec<_>>();
+
+                ctx.br_table(target_labels, label, |ctx| {
+                    let mut cc = None;
+                    let mut max_params = params;
+                    let mut max_num_callers = num_callers;
+
+                    for target in targets.iter().chain(std::iter::once(&default)).unique() {
+                        let block = blocks.get_mut(&target.target).unwrap();
+                        block.actual_num_callers += 1;
+
+                        if block.calling_convention.is_some() {
+                            let new_cc = block.calling_convention.clone();
+                            assert!(cc.is_none() || cc == new_cc, "Can't pass different params to different elements of `br_table` yet");
+                            cc = new_cc;
+                        }
+
+                        if let Some(max) = max_num_callers {
+                            max_num_callers = block.num_callers.map(|n| max.max(n));
+                        }
+
+                        max_params = max_params.max(
+                            block.params
+                                + target
+                                    .to_drop
+                                    .as_ref()
+                                    .map(|t| t.clone().count())
+                                    .unwrap_or_default() as u32
+                        );
+                    }
+
+                    let cc = cc.map(|cc| {
+                        match cc {
+                            Left(cc) => Left(ctx.serialize_block_args(&cc, max_params)),
+                            Right(cc) => Right(cc),
+                        }
+                    }).unwrap_or_else(||
+                        if max_num_callers.map(|callers| callers <= 1).unwrap_or(false) {
+                            Right(ctx.virtual_calling_convention())
+                        } else {
+                            Left(ctx.serialize_args(max_params))
+                        }
+                    );
+
+                    for target in targets.iter().chain(std::iter::once(&default)).unique() {
+                        let block = blocks.get_mut(&target.target).unwrap();
+                        let mut cc = cc.clone();
+                        if let Some(to_drop) = target.to_drop.clone() {
+                            match &mut cc {
+                                Left(cc) => drop_elements(&mut cc.arguments, to_drop),
+                                Right(cc) => drop_elements(&mut cc.stack, to_drop),
+                            }
+                        }
+                        block.calling_convention = Some(cc);
+                    }
+                });
+            }
+            Operator::Swap(depth) => ctx.swap(depth),
+            Operator::Pick(depth) => ctx.pick(depth),
+            Operator::Eq(I32) => ctx.i32_eq(),
+            Operator::Eqz(Size::_32) => ctx.i32_eqz(),
+            Operator::Ne(I32) => ctx.i32_neq(),
+            Operator::Lt(SI32) => ctx.i32_lt_s(),
+            Operator::Le(SI32) => ctx.i32_le_s(),
+            Operator::Gt(SI32) => ctx.i32_gt_s(),
+            Operator::Ge(SI32) => ctx.i32_ge_s(),
+            Operator::Lt(SU32) => ctx.i32_lt_u(),
+            Operator::Le(SU32) => ctx.i32_le_u(),
+            Operator::Gt(SU32) => ctx.i32_gt_u(),
+            Operator::Ge(SU32) => ctx.i32_ge_u(),
+            Operator::Add(I32) => ctx.i32_add(),
+            Operator::Sub(I32) => ctx.i32_sub(),
+            Operator::And(Size::_32) => ctx.i32_and(),
+            Operator::Or(Size::_32) => ctx.i32_or(),
+            Operator::Xor(Size::_32) => ctx.i32_xor(),
+            Operator::Mul(I32) => ctx.i32_mul(),
+            Operator::Div(SU32) => ctx.i32_div_u(),
+            Operator::Div(SI32) => ctx.i32_div_s(),
+            Operator::Rem(sint::I32) => ctx.i32_rem_s(),
+            Operator::Rem(sint::U32) => ctx.i32_rem_u(),
+            Operator::Shl(Size::_32) => ctx.i32_shl(),
+            Operator::Shr(sint::I32) => ctx.i32_shr_s(),
+            Operator::Shr(sint::U32) => ctx.i32_shr_u(),
+            Operator::Rotl(Size::_32) => ctx.i32_rotl(),
+            Operator::Rotr(Size::_32) => ctx.i32_rotr(),
+            Operator::Clz(Size::_32) => ctx.i32_clz(),
+            Operator::Ctz(Size::_32) => ctx.i32_ctz(),
+            Operator::Popcnt(Size::_32) => ctx.i32_popcnt(),
+            Operator::Eq(I64) => ctx.i64_eq(),
+            Operator::Eqz(Size::_64) => ctx.i64_eqz(),
+            Operator::Ne(I64) => ctx.i64_neq(),
+            Operator::Lt(SI64) => ctx.i64_lt_s(),
+            Operator::Le(SI64) => ctx.i64_le_s(),
+            Operator::Gt(SI64) => ctx.i64_gt_s(),
+            Operator::Ge(SI64) => ctx.i64_ge_s(),
+            Operator::Lt(SU64) => ctx.i64_lt_u(),
+            Operator::Le(SU64) => ctx.i64_le_u(),
+            Operator::Gt(SU64) => ctx.i64_gt_u(),
+            Operator::Ge(SU64) => ctx.i64_ge_u(),
+            Operator::Add(I64) => ctx.i64_add(),
+            Operator::Sub(I64) => ctx.i64_sub(),
+            Operator::And(Size::_64) => ctx.i64_and(),
+            Operator::Or(Size::_64) => ctx.i64_or(),
+            Operator::Xor(Size::_64) => ctx.i64_xor(),
+            Operator::Mul(I64) => ctx.i64_mul(),
+            Operator::Div(SU64) => ctx.i64_div_u(),
+            Operator::Div(SI64) => ctx.i64_div_s(),
+            Operator::Rem(sint::I64) => ctx.i64_rem_s(),
+            Operator::Rem(sint::U64) => ctx.i64_rem_u(),
+            Operator::Shl(Size::_64) => ctx.i64_shl(),
+            Operator::Shr(sint::I64) => ctx.i64_shr_s(),
+            Operator::Shr(sint::U64) => ctx.i64_shr_u(),
+            Operator::Rotl(Size::_64) => ctx.i64_rotl(),
+            Operator::Rotr(Size::_64) => ctx.i64_rotr(),
+            Operator::Clz(Size::_64) => ctx.i64_clz(),
+            Operator::Ctz(Size::_64) => ctx.i64_ctz(),
+            Operator::Popcnt(Size::_64) => ctx.i64_popcnt(),
+            Operator::Add(F32) => ctx.f32_add(),
+            Operator::Mul(F32) => ctx.f32_mul(),
+            Operator::Sub(F32) => ctx.f32_sub(),
+            Operator::Div(SF32) => ctx.f32_div(),
+            Operator::Min(Size::_32) => ctx.f32_min(),
+            Operator::Max(Size::_32) => ctx.f32_max(),
+            Operator::Copysign(Size::_32) => ctx.f32_copysign(),
+            Operator::Sqrt(Size::_32) => ctx.f32_sqrt(),
+            Operator::Neg(Size::_32) => ctx.f32_neg(),
+            Operator::Abs(Size::_32) => ctx.f32_abs(),
+            Operator::Floor(Size::_32) => ctx.f32_floor(),
+            Operator::Ceil(Size::_32) => ctx.f32_ceil(),
+            Operator::Nearest(Size::_32) => ctx.f32_nearest(),
+            Operator::Trunc(Size::_32) => ctx.f32_trunc(),
+            Operator::Eq(F32) => ctx.f32_eq(),
+            Operator::Ne(F32) => ctx.f32_ne(),
+            Operator::Gt(SF32) => ctx.f32_gt(),
+            Operator::Ge(SF32) => ctx.f32_ge(),
+            Operator::Lt(SF32) => ctx.f32_lt(),
+            Operator::Le(SF32) => ctx.f32_le(),
+            Operator::Add(F64) => ctx.f64_add(),
+            Operator::Mul(F64) => ctx.f64_mul(),
+            Operator::Sub(F64) => ctx.f64_sub(),
+            Operator::Div(SF64) => ctx.f64_div(),
+            Operator::Min(Size::_64) => ctx.f64_min(),
+            Operator::Max(Size::_64) => ctx.f64_max(),
+            Operator::Copysign(Size::_64) => ctx.f64_copysign(),
+            Operator::Sqrt(Size::_64) => ctx.f64_sqrt(),
+            Operator::Neg(Size::_64) => ctx.f64_neg(),
+            Operator::Abs(Size::_64) => ctx.f64_abs(),
+            Operator::Floor(Size::_64) => ctx.f64_floor(),
+            Operator::Ceil(Size::_64) => ctx.f64_ceil(),
+            Operator::Nearest(Size::_64) => ctx.f64_nearest(),
+            Operator::Trunc(Size::_64) => ctx.f64_trunc(),
+            Operator::Eq(F64) => ctx.f64_eq(),
+            Operator::Ne(F64) => ctx.f64_ne(),
+            Operator::Gt(SF64) => ctx.f64_gt(),
+            Operator::Ge(SF64) => ctx.f64_ge(),
+            Operator::Lt(SF64) => ctx.f64_lt(),
+            Operator::Le(SF64) => ctx.f64_le(),
+            Operator::Drop(range) => ctx.drop(range),
+            Operator::Const(val) => ctx.const_(val),
+            Operator::I32WrapFromI64 => ctx.i32_wrap_from_i64(),
+            Operator::I32ReinterpretFromF32 => ctx.i32_reinterpret_from_f32(),
+            Operator::I64ReinterpretFromF64 => ctx.i64_reinterpret_from_f64(),
+            Operator::F32ReinterpretFromI32 => ctx.f32_reinterpret_from_i32(),
+            Operator::F64ReinterpretFromI64 => ctx.f64_reinterpret_from_i64(),
+            Operator::ITruncFromF {
+                input_ty: Size::_32,
+                output_ty: sint::I32,
+            } => {
+                ctx.i32_truncate_f32_s();
+            }
+            Operator::ITruncFromF {
+                input_ty: Size::_32,
+                output_ty: sint::U32,
+            } => {
+                ctx.i32_truncate_f32_u();
+            }
+            Operator::ITruncFromF {
+                input_ty: Size::_64,
+                output_ty: sint::I32,
+            } => {
+                ctx.i32_truncate_f64_s();
+            }
+            Operator::ITruncFromF {
+                input_ty: Size::_64,
+                output_ty: sint::U32,
+            } => {
+                ctx.i32_truncate_f64_u();
+            }
+            Operator::ITruncFromF {
+                input_ty: Size::_32,
+                output_ty: sint::I64,
+            } => {
+                ctx.i64_truncate_f32_s();
+            }
+            Operator::ITruncFromF {
+                input_ty: Size::_32,
+                output_ty: sint::U64,
+            } => {
+                ctx.i64_truncate_f32_u();
+            }
+            Operator::ITruncFromF {
+                input_ty: Size::_64,
+                output_ty: sint::I64,
+            } => {
+                ctx.i64_truncate_f64_s();
+            }
+            Operator::ITruncFromF {
+                input_ty: Size::_64,
+                output_ty: sint::U64,
+            } => {
+                ctx.i64_truncate_f64_u();
+            }
+            Operator::Extend {
+                sign: Signedness::Unsigned,
+            } => ctx.i32_extend_u(),
+            Operator::Extend {
+                sign: Signedness::Signed,
+            } => ctx.i32_extend_s(),
+            Operator::FConvertFromI {
+                input_ty: sint::I32,
+                output_ty: Size::_32,
+            } => ctx.f32_convert_from_i32_s(),
+            Operator::FConvertFromI {
+                input_ty: sint::I32,
+                output_ty: Size::_64,
+            } => ctx.f64_convert_from_i32_s(),
+            Operator::FConvertFromI {
+                input_ty: sint::I64,
+                output_ty: Size::_32,
+            } => ctx.f32_convert_from_i64_s(),
+            Operator::FConvertFromI {
+                input_ty: sint::I64,
+                output_ty: Size::_64,
+            } => ctx.f64_convert_from_i64_s(),
+            Operator::FConvertFromI {
+                input_ty: sint::U32,
+                output_ty: Size::_32,
+            } => ctx.f32_convert_from_i32_u(),
+            Operator::FConvertFromI {
+                input_ty: sint::U32,
+                output_ty: Size::_64,
+            } => ctx.f64_convert_from_i32_u(),
+            Operator::FConvertFromI {
+                input_ty: sint::U64,
+                output_ty: Size::_32,
+            } => ctx.f32_convert_from_i64_u(),
+            Operator::FConvertFromI {
+                input_ty: sint::U64,
+                output_ty: Size::_64,
+            } => ctx.f64_convert_from_i64_u(),
+            Operator::F64PromoteFromF32 => ctx.f64_from_f32(),
+            Operator::F32DemoteFromF64 => ctx.f32_from_f64(),
+            Operator::Load8 {
+                ty: sint::U32,
+                memarg,
+            } => ctx.i32_load8_u(memarg.offset),
+            Operator::Load16 {
+                ty: sint::U32,
+                memarg,
+            } => ctx.i32_load16_u(memarg.offset),
+            Operator::Load8 {
+                ty: sint::I32,
+                memarg,
+            } => ctx.i32_load8_s(memarg.offset),
+            Operator::Load16 {
+                ty: sint::I32,
+                memarg,
+            } => ctx.i32_load16_s(memarg.offset),
+            Operator::Load8 {
+                ty: sint::U64,
+                memarg,
+            } => ctx.i64_load8_u(memarg.offset),
+            Operator::Load16 {
+                ty: sint::U64,
+                memarg,
+            } => ctx.i64_load16_u(memarg.offset),
+            Operator::Load8 {
+                ty: sint::I64,
+                memarg,
+            } => ctx.i64_load8_s(memarg.offset),
+            Operator::Load16 {
+                ty: sint::I64,
+                memarg,
+            } => ctx.i64_load16_s(memarg.offset),
+            Operator::Load32 {
+                sign: Signedness::Unsigned,
+                memarg,
+            } => ctx.i64_load32_u(memarg.offset),
+            Operator::Load32 {
+                sign: Signedness::Signed,
+                memarg,
+            } => ctx.i64_load32_s(memarg.offset),
+            Operator::Load { ty: I32, memarg } => ctx.i32_load(memarg.offset),
+            Operator::Load { ty: F32, memarg } => ctx.f32_load(memarg.offset),
+            Operator::Load { ty: I64, memarg } => ctx.i64_load(memarg.offset),
+            Operator::Load { ty: F64, memarg } => ctx.f64_load(memarg.offset),
+            Operator::Store8 { ty: _, memarg } => ctx.store8(memarg.offset),
+            Operator::Store16 { ty: _, memarg } => ctx.store16(memarg.offset),
+            Operator::Store32 { memarg }
+            | Operator::Store { ty: I32, memarg }
+            | Operator::Store { ty: F32, memarg } => ctx.store32(memarg.offset),
+            Operator::Store { ty: I64, memarg } | Operator::Store { ty: F64, memarg } => {
+                ctx.store64(memarg.offset)
+            }
+            Operator::GetGlobal(idx) => ctx.get_global(idx),
+            Operator::SetGlobal(idx) => ctx.set_global(idx),
+            Operator::Select => {
+                ctx.select();
+            }
+            Operator::MemorySize { reserved: _ } => {
+                ctx.memory_size();
+            }
+            Operator::MemoryGrow { reserved: _ } => {
+                ctx.memory_grow();
+            }
+            Operator::Call { function_index } => {
+                let callee_ty = module_context.func_type(function_index);
+
+                if let Some(defined_index) = module_context.defined_func_index(function_index) {
+                    if function_index == func_idx {
+                        ctx.call_direct_self(
+                            defined_index,
+                            callee_ty.params().iter().map(|t| t.to_microwasm_type()),
+                            callee_ty.returns().iter().map(|t| t.to_microwasm_type()),
+                        );
+                    } else {
+                        ctx.call_direct(
+                            function_index,
+                            callee_ty.params().iter().map(|t| t.to_microwasm_type()),
+                            callee_ty.returns().iter().map(|t| t.to_microwasm_type()),
+                        );
+                    }
+                } else {
+                    ctx.call_direct_imported(
+                        function_index,
+                        callee_ty.params().iter().map(|t| t.to_microwasm_type()),
+                        callee_ty.returns().iter().map(|t| t.to_microwasm_type()),
+                    );
+                }
+            }
+            Operator::CallIndirect {
+                type_index,
+                table_index,
+            } => {
+                assert_eq!(table_index, 0);
+
+                let callee_ty = module_context.signature(type_index);
+
+                // TODO: this implementation assumes that this function is locally defined.
+
+                ctx.call_indirect(
+                    type_index,
+                    callee_ty.params().iter().map(|t| t.to_microwasm_type()),
+                    callee_ty.returns().iter().map(|t| t.to_microwasm_type()),
+                );
+            }
+        }
+    }
+
+    ctx.epilogue();
+
+    mem::replace(&mut session.op_offset_map, op_offset_map);
+
+    Ok(())
+}
diff --git a/lightbeam/src/lib.rs b/lightbeam/src/lib.rs
new file mode 100644
index 0000000000..80bcd9ac03
--- /dev/null
+++ b/lightbeam/src/lib.rs
@@ -0,0 +1,42 @@
+#![cfg_attr(feature = "bench", feature(test))]
+#![feature(proc_macro_hygiene)]
+
+#[macro_use]
+extern crate smallvec;
+extern crate capstone;
+extern crate either;
+extern crate failure;
+pub extern crate wasmparser;
+#[macro_use]
+extern crate failure_derive;
+#[macro_use]
+extern crate memoffset;
+extern crate dynasm;
+extern crate dynasmrt;
+extern crate itertools;
+#[cfg(test)]
+#[macro_use]
+extern crate lazy_static;
+#[cfg(test)]
+#[macro_use]
+extern crate quickcheck;
+#[cfg(test)]
+extern crate wabt;
+// Just so we can implement `Signature` for `cranelift_codegen::ir::Signature`
+extern crate cranelift_codegen;
+extern crate multi_mut;
+
+mod backend;
+mod disassemble;
+mod error;
+mod function_body;
+mod microwasm;
+mod module;
+mod translate_sections;
+
+#[cfg(test)]
+mod tests;
+
+pub use crate::backend::CodeGenSession;
+pub use crate::function_body::translate_wasm as translate_function;
+pub use crate::module::{translate, ExecutableModule, ModuleContext, Signature, TranslatedModule};
diff --git a/lightbeam/src/microwasm.rs b/lightbeam/src/microwasm.rs
new file mode 100644
index 0000000000..ab20037288
--- /dev/null
+++ b/lightbeam/src/microwasm.rs
@@ -0,0 +1,2103 @@
+use crate::module::{ModuleContext, SigType, Signature};
+use smallvec::SmallVec;
+use std::{
+    convert::TryInto,
+    fmt,
+    iter::{self, FromIterator},
+    ops::RangeInclusive,
+};
+use wasmparser::{
+    FunctionBody, Ieee32 as WasmIeee32, Ieee64 as WasmIeee64,
+    MemoryImmediate as WasmMemoryImmediate, Operator as WasmOperator, OperatorsReader,
+};
+
+pub fn dis<L>(
+    mut out: impl std::io::Write,
+    function_name: impl fmt::Display,
+    microwasm: impl IntoIterator<Item = Operator<L>>,
+) -> std::io::Result<()>
+where
+    BrTarget<L>: fmt::Display,
+    L: Clone,
+{
+    writeln!(out, ".fn_{}:", function_name)?;
+
+    let p = "      ";
+    for op in microwasm {
+        if op.is_label() {
+            writeln!(out, "{}", op)?;
+        } else if op.is_block() {
+            writeln!(out, "{}", op)?;
+        } else {
+            writeln!(out, "{}{}", p, op)?;
+        }
+    }
+
+    Ok(())
+}
+
+/// A constant value embedded in the instructions
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+pub enum Value {
+    I32(i32),
+    I64(i64),
+    F32(Ieee32),
+    F64(Ieee64),
+}
+
+impl fmt::Display for Value {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            Value::I32(v) => write!(f, "{}i32", v),
+            Value::I64(v) => write!(f, "{}i64", v),
+            Value::F32(v) => write!(f, "{}f32", f32::from_bits(v.to_bits())),
+            Value::F64(v) => write!(f, "{}f64", f64::from_bits(v.to_bits())),
+        }
+    }
+}
+
+impl Value {
+    pub fn as_int(self) -> Option<i64> {
+        self.as_i64().or_else(|| self.as_i32().map(|i| i as _))
+    }
+
+    pub fn as_bytes(self) -> i64 {
+        match self {
+            Value::I32(val) => val as _,
+            Value::I64(val) => val,
+            Value::F32(val) => val.0 as _,
+            Value::F64(val) => val.0 as _,
+        }
+    }
+
+    pub fn as_i32(self) -> Option<i32> {
+        match self {
+            Value::I32(val) => Some(val),
+            _ => None,
+        }
+    }
+
+    pub fn as_i64(self) -> Option<i64> {
+        match self {
+            Value::I64(val) => Some(val),
+            _ => None,
+        }
+    }
+
+    pub fn as_f32(self) -> Option<Ieee32> {
+        match self {
+            Value::F32(val) => Some(val),
+            _ => None,
+        }
+    }
+
+    pub fn as_f64(self) -> Option<Ieee64> {
+        match self {
+            Value::F64(val) => Some(val),
+            _ => None,
+        }
+    }
+
+    pub fn type_(&self) -> SignlessType {
+        match self {
+            Value::I32(_) => Type::Int(Size::_32),
+            Value::I64(_) => Type::Int(Size::_64),
+            Value::F32(Ieee32(_)) => Type::Float(Size::_32),
+            Value::F64(Ieee64(_)) => Type::Float(Size::_64),
+        }
+    }
+
+    fn default_for_type(ty: SignlessType) -> Self {
+        match ty {
+            Type::Int(Size::_32) => Value::I32(0),
+            Type::Int(Size::_64) => Value::I64(0),
+            Type::Float(Size::_32) => Value::F32(Ieee32(0)),
+            Type::Float(Size::_64) => Value::F64(Ieee64(0)),
+        }
+    }
+}
+
+impl From<i32> for Value {
+    fn from(other: i32) -> Self {
+        Value::I32(other)
+    }
+}
+impl From<i64> for Value {
+    fn from(other: i64) -> Self {
+        Value::I64(other)
+    }
+}
+impl From<u32> for Value {
+    fn from(other: u32) -> Self {
+        Value::I32(other as _)
+    }
+}
+impl From<u64> for Value {
+    fn from(other: u64) -> Self {
+        Value::I64(other as _)
+    }
+}
+impl From<Ieee32> for Value {
+    fn from(other: Ieee32) -> Self {
+        Value::F32(other)
+    }
+}
+impl From<Ieee64> for Value {
+    fn from(other: Ieee64) -> Self {
+        Value::F64(other)
+    }
+}
+
+/// Whether to interpret an integer as signed or unsigned
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+pub enum Signedness {
+    Signed,
+    Unsigned,
+}
+
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+pub enum Size {
+    _32,
+    _64,
+}
+
+type Int = Size;
+type Float = Size;
+
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+pub struct SignfulInt(pub Signedness, pub Size);
+
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+pub enum Type<I> {
+    Int(I),
+    Float(Size),
+}
+
+pub trait IntoType<T> {
+    fn into_type() -> T;
+}
+
+impl IntoType<SignlessType> for i32 {
+    fn into_type() -> SignlessType {
+        I32
+    }
+}
+
+impl IntoType<SignlessType> for i64 {
+    fn into_type() -> SignlessType {
+        I64
+    }
+}
+
+impl IntoType<SignlessType> for u32 {
+    fn into_type() -> SignlessType {
+        I32
+    }
+}
+
+impl IntoType<SignlessType> for u64 {
+    fn into_type() -> SignlessType {
+        I64
+    }
+}
+
+impl IntoType<SignlessType> for f32 {
+    fn into_type() -> SignlessType {
+        F32
+    }
+}
+
+impl IntoType<SignlessType> for f64 {
+    fn into_type() -> SignlessType {
+        F64
+    }
+}
+
+impl<I> Type<I> {
+    pub fn for_<T: IntoType<Self>>() -> Self {
+        T::into_type()
+    }
+}
+
+impl fmt::Display for SignfulType {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            Type::Int(i) => write!(f, "{}", i),
+            Type::Float(Size::_32) => write!(f, "f32"),
+            Type::Float(Size::_64) => write!(f, "f64"),
+        }
+    }
+}
+
+impl fmt::Display for SignlessType {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            Type::Int(Size::_32) => write!(f, "i32"),
+            Type::Int(Size::_64) => write!(f, "i64"),
+            Type::Float(Size::_32) => write!(f, "f32"),
+            Type::Float(Size::_64) => write!(f, "f64"),
+        }
+    }
+}
+
+impl fmt::Display for SignfulInt {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            SignfulInt(Signedness::Signed, Size::_32) => write!(f, "i32"),
+            SignfulInt(Signedness::Unsigned, Size::_32) => write!(f, "u32"),
+            SignfulInt(Signedness::Signed, Size::_64) => write!(f, "i64"),
+            SignfulInt(Signedness::Unsigned, Size::_64) => write!(f, "u64"),
+        }
+    }
+}
+
+pub type SignlessType = Type<Size>;
+pub type SignfulType = Type<SignfulInt>;
+
+pub const I32: SignlessType = Type::Int(Size::_32);
+pub const I64: SignlessType = Type::Int(Size::_64);
+pub const F32: SignlessType = Type::Float(Size::_32);
+pub const F64: SignlessType = Type::Float(Size::_64);
+
+pub mod sint {
+    use super::{Signedness, SignfulInt, Size};
+
+    pub const I32: SignfulInt = SignfulInt(Signedness::Signed, Size::_32);
+    pub const I64: SignfulInt = SignfulInt(Signedness::Signed, Size::_64);
+    pub const U32: SignfulInt = SignfulInt(Signedness::Unsigned, Size::_32);
+    pub const U64: SignfulInt = SignfulInt(Signedness::Unsigned, Size::_64);
+}
+
+pub const SI32: SignfulType = Type::Int(sint::I32);
+pub const SI64: SignfulType = Type::Int(sint::I64);
+pub const SU32: SignfulType = Type::Int(sint::U32);
+pub const SU64: SignfulType = Type::Int(sint::U64);
+pub const SF32: SignfulType = Type::Float(Size::_32);
+pub const SF64: SignfulType = Type::Float(Size::_64);
+
+impl SignlessType {
+    pub fn from_wasm(other: wasmparser::Type) -> Option<Self> {
+        use wasmparser::Type;
+
+        match other {
+            Type::I32 => Some(I32),
+            Type::I64 => Some(I64),
+            Type::F32 => Some(F32),
+            Type::F64 => Some(F64),
+            Type::EmptyBlockType => None,
+            _ => unimplemented!(),
+        }
+    }
+}
+
+fn create_returns_from_wasm_type(ty: wasmparser::TypeOrFuncType) -> Vec<SignlessType> {
+    match ty {
+        wasmparser::TypeOrFuncType::Type(ty) => Vec::from_iter(Type::from_wasm(ty)),
+        wasmparser::TypeOrFuncType::FuncType(_) => panic!("unsupported func type"),
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct BrTable<L> {
+    pub targets: Vec<BrTargetDrop<L>>,
+    pub default: BrTargetDrop<L>,
+}
+
+#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
+pub enum NameTag {
+    Header,
+    Else,
+    End,
+}
+
+pub type WasmLabel = (u32, NameTag);
+
+pub type OperatorFromWasm = Operator<WasmLabel>;
+
+#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)]
+pub enum BrTarget<L> {
+    Return,
+    Label(L),
+}
+
+impl<L> BrTarget<L> {
+    pub fn label(&self) -> Option<&L> {
+        match self {
+            BrTarget::Return => None,
+            BrTarget::Label(l) => Some(l),
+        }
+    }
+}
+
+impl<L> From<L> for BrTarget<L> {
+    fn from(other: L) -> Self {
+        BrTarget::Label(other)
+    }
+}
+
+impl fmt::Display for BrTarget<WasmLabel> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            BrTarget::Return => write!(f, ".return"),
+            BrTarget::Label((i, NameTag::Header)) => write!(f, ".L{}", i),
+            BrTarget::Label((i, NameTag::Else)) => write!(f, ".L{}_else", i),
+            BrTarget::Label((i, NameTag::End)) => write!(f, ".L{}_end", i),
+        }
+    }
+}
+
+impl fmt::Display for BrTarget<&str> {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            BrTarget::Return => write!(f, ".return"),
+            BrTarget::Label(l) => write!(f, ".L{}", l),
+        }
+    }
+}
+
+#[derive(Debug, Clone, Hash, PartialEq, Eq)]
+pub struct BrTargetDrop<L> {
+    pub target: BrTarget<L>,
+    pub to_drop: Option<RangeInclusive<u32>>,
+}
+
+impl<L> From<BrTarget<L>> for BrTargetDrop<L> {
+    fn from(other: BrTarget<L>) -> Self {
+        BrTargetDrop {
+            target: other,
+            to_drop: None,
+        }
+    }
+}
+
+impl<L> fmt::Display for BrTargetDrop<L>
+where
+    BrTarget<L>: fmt::Display,
+{
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        if let Some(drop) = &self.to_drop {
+            write!(
+                f,
+                "({}, drop {}..={})",
+                self.target,
+                drop.start(),
+                drop.end()
+            )
+        } else {
+            write!(f, "{}", self.target)
+        }
+    }
+}
+
+#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq)]
+pub struct Ieee32(u32);
+#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq)]
+pub struct Ieee64(u64);
+
+impl Ieee32 {
+    pub fn to_bits(self) -> u32 {
+        self.0
+    }
+
+    pub fn from_bits(other: u32) -> Self {
+        Ieee32(other)
+    }
+}
+
+impl From<WasmIeee32> for Ieee32 {
+    fn from(other: WasmIeee32) -> Self {
+        Self::from_bits(other.bits())
+    }
+}
+
+impl Ieee64 {
+    pub fn to_bits(self) -> u64 {
+        self.0
+    }
+
+    pub fn from_bits(other: u64) -> Self {
+        Ieee64(other)
+    }
+}
+
+impl From<WasmIeee64> for Ieee64 {
+    fn from(other: WasmIeee64) -> Self {
+        Self::from_bits(other.bits())
+    }
+}
+
+#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq)]
+pub struct MemoryImmediate {
+    pub flags: u32,
+    pub offset: u32,
+}
+
+impl From<WasmMemoryImmediate> for MemoryImmediate {
+    fn from(other: WasmMemoryImmediate) -> Self {
+        MemoryImmediate {
+            flags: other.flags,
+            offset: other.offset,
+        }
+    }
+}
+
+// TODO: Explicit VmCtx?
+#[derive(Debug, Clone)]
+pub enum Operator<Label> {
+    /// Explicit trap instruction
+    Unreachable,
+    /// Define metadata for a block - its label, its signature, whether it has backwards callers etc. It
+    /// is an error to branch to a block that has yet to be defined.
+    Block {
+        label: Label,
+        // TODO: Do we need this?
+        params: Vec<SignlessType>,
+        // TODO: Ideally we'd have `num_backwards_callers` but we can't know that for WebAssembly
+        has_backwards_callers: bool,
+        num_callers: Option<u32>,
+    },
+    /// Start a new block. It is an error if the previous block has not been closed by emitting a `Br` or
+    /// `BrTable`.
+    Label(Label),
+    /// Unconditionally break to a new block. This the parameters off the stack and passes them into
+    /// the new block. Any remaining elements on the stack are discarded.
+    Br {
+        /// Returning from the function is just calling the "return" block
+        target: BrTarget<Label>,
+    },
+    /// Pop a value off the top of the stack, jump to the `else_` label if this value is `true`
+    /// and the `then` label otherwise. The `then` and `else_` blocks must have the same parameters.
+    BrIf {
+        /// Label to jump to if the value at the top of the stack is true
+        then: BrTargetDrop<Label>,
+        /// Label to jump to if the value at the top of the stack is false
+        else_: BrTargetDrop<Label>,
+    },
+    /// Pop a value off the top of the stack, jump to `table[value.min(table.len() - 1)]`. All elements
+    /// in the table must have the same parameters.
+    BrTable(
+        /// The table of labels to jump to - the index should be clamped to the length of the table
+        BrTable<Label>,
+    ),
+    /// Call a function
+    Call {
+        function_index: u32,
+    },
+    /// Pop an `i32` off the top of the stack, index into the table at `table_index` and call that function
+    CallIndirect {
+        type_index: u32,
+        table_index: u32,
+    },
+    /// Pop an element off of the stack and discard it.
+    Drop(RangeInclusive<u32>),
+    /// Pop an `i32` off of the stack and 2 elements off of the stack, call them `A` and `B` where `A` is the
+    /// first element popped and `B` is the second. If the `i32` is 0 then discard `B` and push `A` back onto
+    /// the stack, otherwise discard `A` and push `B` back onto the stack.
+    Select,
+    /// Duplicate the element at depth `depth` to the top of the stack. This can be used to implement
+    /// `GetLocal`.
+    Pick(u32),
+    /// Swap the top element of the stack with the element at depth `depth`. This can be used to implement
+    /// `SetLocal`.
+    // TODO: Is it better to have `Swap`, to have `Pull` (which moves the `nth` element instead of swapping)
+    //       or to have both?
+    Swap(u32),
+    GetGlobal(u32),
+    SetGlobal(u32),
+    Load {
+        ty: SignlessType,
+        memarg: MemoryImmediate,
+    },
+    Load8 {
+        ty: SignfulInt,
+        memarg: MemoryImmediate,
+    },
+    Load16 {
+        ty: SignfulInt,
+        memarg: MemoryImmediate,
+    },
+    // Only available for {I,U}64
+    // TODO: Roll this into `Load` somehow?
+    Load32 {
+        sign: Signedness,
+        memarg: MemoryImmediate,
+    },
+    Store {
+        ty: SignlessType,
+        memarg: MemoryImmediate,
+    },
+    Store8 {
+        /// `ty` on integers
+        ty: Int,
+        memarg: MemoryImmediate,
+    },
+    Store16 {
+        /// `ty` on integers
+        ty: Int,
+        memarg: MemoryImmediate,
+    },
+    // Only available for I64
+    // TODO: Roll this into `Store` somehow?
+    Store32 {
+        memarg: MemoryImmediate,
+    },
+    MemorySize {
+        reserved: u32,
+    },
+    MemoryGrow {
+        reserved: u32,
+    },
+    Const(Value),
+    Eq(SignlessType),
+    Ne(SignlessType),
+    /// `eqz` on integers
+    Eqz(Int),
+    Lt(SignfulType),
+    Gt(SignfulType),
+    Le(SignfulType),
+    Ge(SignfulType),
+    Add(SignlessType),
+    Sub(SignlessType),
+    Mul(SignlessType),
+    /// `clz` on integers
+    Clz(Int),
+    /// `ctz` on integers
+    Ctz(Int),
+    /// `popcnt` on integers
+    Popcnt(Int),
+    Div(SignfulType),
+    Rem(SignfulInt),
+    And(Int),
+    Or(Int),
+    Xor(Int),
+    Shl(Int),
+    Shr(SignfulInt),
+    Rotl(Int),
+    Rotr(Int),
+    Abs(Float),
+    Neg(Float),
+    Ceil(Float),
+    Floor(Float),
+    Trunc(Float),
+    Nearest(Float),
+    Sqrt(Float),
+    Min(Float),
+    Max(Float),
+    Copysign(Float),
+    I32WrapFromI64,
+    ITruncFromF {
+        input_ty: Float,
+        output_ty: SignfulInt,
+    },
+    FConvertFromI {
+        input_ty: SignfulInt,
+        output_ty: Float,
+    },
+    F32DemoteFromF64,
+    F64PromoteFromF32,
+    I32ReinterpretFromF32,
+    I64ReinterpretFromF64,
+    F32ReinterpretFromI32,
+    F64ReinterpretFromI64,
+    // Only available for input I32 and output I64
+    Extend {
+        sign: Signedness,
+    },
+}
+
+impl<L> Operator<L> {
+    pub fn is_label(&self) -> bool {
+        match self {
+            Operator::Label(..) => true,
+            _ => false,
+        }
+    }
+
+    pub fn is_block(&self) -> bool {
+        match self {
+            Operator::Block { .. } => true,
+            _ => false,
+        }
+    }
+
+    pub fn end(params: Vec<SignlessType>, label: L) -> Self {
+        Operator::Block {
+            params,
+            label,
+            has_backwards_callers: false,
+            // TODO
+            num_callers: None,
+        }
+    }
+
+    pub fn block(params: Vec<SignlessType>, label: L) -> Self {
+        Operator::Block {
+            params,
+            label,
+            has_backwards_callers: false,
+            num_callers: Some(1),
+        }
+    }
+
+    pub fn loop_(params: Vec<SignlessType>, label: L) -> Self {
+        Operator::Block {
+            params,
+            label,
+            has_backwards_callers: true,
+            num_callers: None,
+        }
+    }
+}
+
+impl<L> fmt::Display for Operator<L>
+where
+    BrTarget<L>: fmt::Display,
+    L: Clone,
+{
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        match self {
+            Operator::Unreachable => write!(f, "unreachable"),
+            Operator::Label(label) => write!(f, "{}:", BrTarget::Label(label.clone())),
+            Operator::Block {
+                label,
+                params,
+                has_backwards_callers,
+                num_callers,
+            } => {
+                write!(f, "def {} :: [", BrTarget::Label(label.clone()))?;
+                let mut iter = params.iter();
+                if let Some(p) = iter.next() {
+                    write!(f, "{}", p)?;
+                    for p in iter {
+                        write!(f, ", {}", p)?;
+                    }
+                }
+                write!(f, "]")?;
+
+                if *has_backwards_callers {
+                    write!(f, " has_backwards_callers")?;
+                }
+
+                if let Some(n) = num_callers {
+                    write!(f, " num_callers={}", n)?;
+                }
+
+                Ok(())
+            }
+            Operator::Br { target } => write!(f, "br {}", target),
+            Operator::BrIf { then, else_ } => write!(f, "br_if {}, {}", then, else_),
+            Operator::BrTable(BrTable { targets, default }) => {
+                write!(f, "br_table [")?;
+                let mut iter = targets.iter();
+                if let Some(p) = iter.next() {
+                    write!(f, "{}", p)?;
+                    for p in iter {
+                        write!(f, ", {}", p)?;
+                    }
+                }
+
+                write!(f, "], {}", default)
+            }
+            Operator::Call { function_index } => write!(f, "call {}", function_index),
+            Operator::CallIndirect { .. } => write!(f, "call_indirect"),
+            Operator::Drop(range) => {
+                write!(f, "drop")?;
+
+                match range.clone().into_inner() {
+                    (0, 0) => {}
+                    (start, end) if start == end => {
+                        write!(f, " {}", start)?;
+                    }
+                    (start, end) => {
+                        write!(f, " {}..={}", start, end)?;
+                    }
+                }
+
+                Ok(())
+            }
+            Operator::Select => write!(f, "select"),
+            Operator::Pick(depth) => write!(f, "pick {}", depth),
+            Operator::Swap(depth) => write!(f, "swap {}", depth),
+            Operator::Load { ty, memarg } => {
+                write!(f, "{}.load {}, {}", ty, memarg.flags, memarg.offset)
+            }
+            Operator::Load8 { ty, memarg } => {
+                write!(f, "{}.load8 {}, {}", ty, memarg.flags, memarg.offset)
+            }
+            Operator::Load16 { ty, memarg } => {
+                write!(f, "{}.load16 {}, {}", ty, memarg.flags, memarg.offset)
+            }
+            Operator::Load32 { sign, memarg } => write!(
+                f,
+                "{}.load32 {}, {}",
+                SignfulInt(*sign, Size::_64),
+                memarg.flags,
+                memarg.offset
+            ),
+            Operator::Store { ty, memarg } => {
+                write!(f, "{}.store {}, {}", ty, memarg.flags, memarg.offset)
+            }
+            Operator::Store8 { ty, memarg } => write!(
+                f,
+                "{}.store8 {}, {}",
+                SignfulInt(Signedness::Unsigned, *ty),
+                memarg.flags,
+                memarg.offset
+            ),
+            Operator::Store16 { ty, memarg } => write!(
+                f,
+                "{}.store16 {}, {}",
+                SignfulInt(Signedness::Unsigned, *ty),
+                memarg.flags,
+                memarg.offset
+            ),
+            Operator::Store32 { memarg } => {
+                write!(f, "u64.store32 {}, {}", memarg.flags, memarg.offset)
+            }
+            Operator::MemorySize { .. } => write!(f, "memory.size"),
+            Operator::MemoryGrow { .. } => write!(f, "memory.grow"),
+            Operator::Const(val) => write!(f, "const {}", val),
+            Operator::Eq(ty) => write!(f, "{}.eq", ty),
+            Operator::Ne(ty) => write!(f, "{}.ne", ty),
+            Operator::Eqz(ty) => write!(f, "{}.eqz", SignfulInt(Signedness::Unsigned, *ty)),
+            Operator::Lt(ty) => write!(f, "{}.lt", ty),
+            Operator::Gt(ty) => write!(f, "{}.gt", ty),
+            Operator::Le(ty) => write!(f, "{}.le", ty),
+            Operator::Ge(ty) => write!(f, "{}.ge", ty),
+            Operator::Add(ty) => write!(f, "{}.add", ty),
+            Operator::Sub(ty) => write!(f, "{}.sub", ty),
+            Operator::Mul(ty) => write!(f, "{}.mul", ty),
+            Operator::Clz(ty) => write!(f, "{}.clz", SignfulInt(Signedness::Unsigned, *ty)),
+            Operator::Ctz(ty) => write!(f, "{}.ctz", SignfulInt(Signedness::Unsigned, *ty)),
+            Operator::Popcnt(ty) => write!(f, "{}.popcnt", SignfulInt(Signedness::Unsigned, *ty)),
+            Operator::Div(ty) => write!(f, "{}.div", ty),
+            Operator::Rem(ty) => write!(f, "{}.rem", ty),
+            Operator::And(ty) => write!(f, "{}.and", SignfulInt(Signedness::Unsigned, *ty)),
+            Operator::Or(ty) => write!(f, "{}.or", SignfulInt(Signedness::Unsigned, *ty)),
+            Operator::Xor(ty) => write!(f, "{}.xor", SignfulInt(Signedness::Unsigned, *ty)),
+            Operator::Shl(ty) => write!(f, "{}.shl", SignfulInt(Signedness::Unsigned, *ty)),
+            Operator::Shr(ty) => write!(f, "{}.shr", ty),
+            Operator::Rotl(ty) => write!(f, "{}.rotl", SignfulInt(Signedness::Unsigned, *ty)),
+            Operator::Rotr(ty) => write!(f, "{}.rotr", SignfulInt(Signedness::Unsigned, *ty)),
+            Operator::Abs(ty) => write!(f, "{}.abs", Type::<Int>::Float(*ty)),
+            Operator::Neg(ty) => write!(f, "{}.neg", Type::<Int>::Float(*ty)),
+            Operator::Ceil(ty) => write!(f, "{}.ceil", Type::<Int>::Float(*ty)),
+            Operator::Floor(ty) => write!(f, "{}.floor", Type::<Int>::Float(*ty)),
+            Operator::Trunc(ty) => write!(f, "{}.trunc", Type::<Int>::Float(*ty)),
+            Operator::Nearest(ty) => write!(f, "{}.nearest", Type::<Int>::Float(*ty)),
+            Operator::Sqrt(ty) => write!(f, "{}.sqrt", Type::<Int>::Float(*ty)),
+            Operator::Min(ty) => write!(f, "{}.min", Type::<Int>::Float(*ty)),
+            Operator::Max(ty) => write!(f, "{}.max", Type::<Int>::Float(*ty)),
+            Operator::Copysign(ty) => write!(f, "{}.copysign", Type::<Int>::Float(*ty)),
+            Operator::I32WrapFromI64 => write!(f, "i32.wrap_from.i64"),
+            Operator::F32DemoteFromF64 => write!(f, "f32.demote_from.f64"),
+            Operator::F64PromoteFromF32 => write!(f, "f64.promote_from.f32"),
+            Operator::I32ReinterpretFromF32 => write!(f, "i32.reinterpret_from.f32"),
+            Operator::I64ReinterpretFromF64 => write!(f, "i64.reinterpret_from.f64"),
+            Operator::F32ReinterpretFromI32 => write!(f, "f32.reinterpret_from.i32"),
+            Operator::F64ReinterpretFromI64 => write!(f, "f64.reinterpret_from.i64"),
+            Operator::FConvertFromI {
+                input_ty,
+                output_ty,
+            } => write!(
+                f,
+                "{}.convert_from.{}",
+                Type::Float::<Int>(*output_ty),
+                input_ty,
+            ),
+            Operator::GetGlobal(index) => write!(f, "global.get {}", index),
+            Operator::SetGlobal(index) => write!(f, "global.set {}", index),
+            Operator::ITruncFromF {
+                input_ty,
+                output_ty,
+            } => write!(
+                f,
+                "{}.truncate_from.{}",
+                output_ty,
+                Type::<Int>::Float(*input_ty)
+            ),
+            Operator::Extend { sign } => write!(
+                f,
+                "{}.extend_from.{}",
+                SignfulInt(*sign, Size::_64),
+                SignfulInt(*sign, Size::_32)
+            ),
+        }
+    }
+}
+
+// TODO: If we return a `Vec<<T as MicrowasmReceiver>::Item>` will that convert to (essentially) a no-op
+//       in the case that `Item` is a ZST? That is important for ensuring that we don't do unnecessary
+//       work when we're directly generating asm.
+/// WIP: Trait to abstract over either producing a stream of Microwasm or directly producing assembly
+/// from the Wasm. This should give a significant speedup since we don't need to allocate any vectors
+/// or pay the cost of branches - we can just use iterators and direct function calls.
+pub trait MicrowasmReceiver<Label> {
+    type Item;
+
+    fn unreachable(&mut self) -> Self::Item;
+    fn block(
+        &mut self,
+        label: Label,
+        params: impl Iterator<Item = SignlessType>,
+        has_backwards_callers: bool,
+        num_callers: Option<u32>,
+    ) -> Self::Item;
+    fn label(&mut self, _: Label) -> Self::Item;
+    fn br(&mut self, target: BrTarget<Label>) -> Self::Item;
+    fn br_if(&mut self, then: BrTargetDrop<Label>, else_: BrTargetDrop<Label>) -> Self::Item;
+    fn br_table(&mut self, _: BrTable<Label>) -> Self::Item;
+    fn call(&mut self, function_index: u32) -> Self::Item;
+    fn call_indirect(&mut self, type_index: u32, table_index: u32) -> Self::Item;
+    fn drop(&mut self, _: RangeInclusive<u32>) -> Self::Item;
+    fn select(&mut self) -> Self::Item;
+    fn pick(&mut self, _: u32) -> Self::Item;
+    fn swap(&mut self, _: u32) -> Self::Item;
+    fn get_global(&mut self, index: u32) -> Self::Item;
+    fn set_global(&mut self, index: u32) -> Self::Item;
+    fn load(&mut self, ty: SignlessType, memarg: MemoryImmediate) -> Self::Item;
+    fn load8(&mut self, ty: SignfulInt, memarg: MemoryImmediate) -> Self::Item;
+    fn load16(&mut self, ty: SignfulInt, memarg: MemoryImmediate) -> Self::Item;
+    fn load32(&mut self, sign: Signedness, memarg: MemoryImmediate) -> Self::Item;
+    fn store(&mut self, ty: SignlessType, memarg: MemoryImmediate) -> Self::Item;
+    fn store8(&mut self, ty: Int, memarg: MemoryImmediate) -> Self::Item;
+    fn store16(&mut self, ty: Int, memarg: MemoryImmediate) -> Self::Item;
+    fn store32(&mut self, memarg: MemoryImmediate) -> Self::Item;
+    fn memory_size(&mut self, reserved: u32) -> Self::Item;
+    fn memory_grow(&mut self, reserved: u32) -> Self::Item;
+    fn const_(&mut self, _: Value) -> Self::Item;
+    fn ref_null(&mut self) -> Self::Item;
+    fn ref_is_null(&mut self) -> Self::Item;
+    fn eq(&mut self, _: SignlessType) -> Self::Item;
+    fn ne(&mut self, _: SignlessType) -> Self::Item;
+    fn eqz(&mut self, _: Int) -> Self::Item;
+    fn lt(&mut self, _: SignfulType) -> Self::Item;
+    fn gt(&mut self, _: SignfulType) -> Self::Item;
+    fn le(&mut self, _: SignfulType) -> Self::Item;
+    fn ge(&mut self, _: SignfulType) -> Self::Item;
+    fn add(&mut self, _: SignlessType) -> Self::Item;
+    fn sub(&mut self, _: SignlessType) -> Self::Item;
+    fn mul(&mut self, _: SignlessType) -> Self::Item;
+    fn clz(&mut self, _: Int) -> Self::Item;
+    fn ctz(&mut self, _: Int) -> Self::Item;
+    fn popcnt(&mut self, _: Int) -> Self::Item;
+    fn div(&mut self, _: SignfulType) -> Self::Item;
+    fn rem(&mut self, _: SignfulInt) -> Self::Item;
+    fn and(&mut self, _: Int) -> Self::Item;
+    fn or(&mut self, _: Int) -> Self::Item;
+    fn xor(&mut self, _: Int) -> Self::Item;
+    fn shl(&mut self, _: Int) -> Self::Item;
+    fn shr(&mut self, _: SignfulInt) -> Self::Item;
+    fn rotl(&mut self, _: Int) -> Self::Item;
+    fn rotr(&mut self, _: Int) -> Self::Item;
+    fn abs(&mut self, _: Float) -> Self::Item;
+    fn neg(&mut self, _: Float) -> Self::Item;
+    fn ceil(&mut self, _: Float) -> Self::Item;
+    fn floor(&mut self, _: Float) -> Self::Item;
+    fn trunc(&mut self, _: Float) -> Self::Item;
+    fn nearest(&mut self, _: Float) -> Self::Item;
+    fn sqrt(&mut self, _: Float) -> Self::Item;
+    fn min(&mut self, _: Float) -> Self::Item;
+    fn max(&mut self, _: Float) -> Self::Item;
+    fn copysign(&mut self, _: Float) -> Self::Item;
+    fn i32_wrap_from_i64(&mut self) -> Self::Item;
+    fn i_trunc_from_f(&mut self, input_ty: Float, output_ty: SignfulInt) -> Self::Item;
+    fn f_convert_from_i(&mut self, input_ty: SignfulInt, output_ty: Float) -> Self::Item;
+    fn f32_demote_from_f64(&mut self) -> Self::Item;
+    fn f64_promote_from_f32(&mut self) -> Self::Item;
+    fn i32_reinterpret_from_f32(&mut self) -> Self::Item;
+    fn i64_reinterpret_from_f64(&mut self) -> Self::Item;
+    fn f32_reinterpret_from_i32(&mut self) -> Self::Item;
+    fn f64_reinterpret_from_i64(&mut self) -> Self::Item;
+    fn extend(&mut self, sign: Signedness) -> Self::Item;
+    fn i_sat_trunc_from_f(&mut self, input_ty: Float, output_ty: SignfulInt) -> Self::Item;
+    fn memory_init(&mut self, segment: u32) -> Self::Item;
+    fn data_drop(&mut self, segment: u32) -> Self::Item;
+    fn memory_copy(&mut self) -> Self::Item;
+    fn memory_fill(&mut self) -> Self::Item;
+    fn table_init(&mut self, segment: u32) -> Self::Item;
+    fn elem_drop(&mut self, segment: u32) -> Self::Item;
+    fn table_copy(&mut self) -> Self::Item;
+}
+
+/// Type of a control frame.
+#[derive(Debug, Clone, PartialEq)]
+enum ControlFrameKind {
+    /// A regular block frame.
+    ///
+    /// Can be used for an implicit function block.
+    Block {
+        needs_end_label: bool,
+    },
+    Function,
+    /// Loop frame (branching to the beginning of block).
+    Loop,
+    /// True-subblock of if expression.
+    If {
+        has_else: bool,
+    },
+}
+
+#[derive(Debug, Clone, PartialEq)]
+struct ControlFrame {
+    id: u32,
+    arguments: u32,
+    returns: Vec<SignlessType>,
+    kind: ControlFrameKind,
+}
+
+impl ControlFrame {
+    fn needs_end_label(&self) -> bool {
+        match self.kind {
+            ControlFrameKind::Block { needs_end_label } => needs_end_label,
+            ControlFrameKind::If { .. } => true,
+            ControlFrameKind::Loop | ControlFrameKind::Function => false,
+        }
+    }
+
+    fn mark_branched_to(&mut self) {
+        if let ControlFrameKind::Block { needs_end_label } = &mut self.kind {
+            *needs_end_label = true
+        }
+    }
+
+    fn br_target(&self) -> BrTarget<(u32, NameTag)> {
+        match self.kind {
+            ControlFrameKind::Loop => BrTarget::Label((self.id, NameTag::Header)),
+            ControlFrameKind::Function => BrTarget::Return,
+            ControlFrameKind::Block { .. } | ControlFrameKind::If { .. } => {
+                BrTarget::Label((self.id, NameTag::End))
+            }
+        }
+    }
+}
+
+pub struct MicrowasmConv<'a, 'b, M> {
+    // TODO: Maybe have a `ConvInner` type and have this wrap an `Option` so that
+    //       we can dealloc everything when we've finished emitting
+    is_done: bool,
+    consts_to_emit: Option<Vec<Value>>,
+    stack: Vec<SignlessType>,
+    internal: OperatorsReader<'a>,
+    module: &'b M,
+    current_id: u32,
+    control_frames: Vec<ControlFrame>,
+    unreachable: bool,
+}
+
+#[derive(Debug)]
+enum SigT {
+    T,
+    Concrete(SignlessType),
+}
+
+impl From<SignlessType> for SigT {
+    fn from(other: SignlessType) -> SigT {
+        SigT::Concrete(other)
+    }
+}
+
+#[derive(Debug)]
+pub struct OpSig {
+    input: SmallVec<[SigT; 3]>,
+    output: SmallVec<[SigT; 3]>,
+}
+
+impl OpSig {
+    #[inline(always)]
+    fn new<I0, I1>(input: I0, output: I1) -> Self
+    where
+        I0: IntoIterator<Item = SigT>,
+        I1: IntoIterator<Item = SigT>,
+    {
+        OpSig {
+            input: SmallVec::from_iter(input),
+            output: SmallVec::from_iter(output),
+        }
+    }
+
+    fn none() -> Self {
+        Self::new(None, None)
+    }
+}
+
+impl<T> From<&'_ T> for OpSig
+where
+    T: Signature,
+{
+    fn from(other: &T) -> Self {
+        OpSig::new(
+            other
+                .params()
+                .iter()
+                .map(|t| SigT::Concrete(t.to_microwasm_type())),
+            other
+                .returns()
+                .iter()
+                .map(|t| SigT::Concrete(t.to_microwasm_type())),
+        )
+    }
+}
+
+impl<'a, 'b, M: ModuleContext> MicrowasmConv<'a, 'b, M>
+where
+    for<'any> &'any M::Signature: Into<OpSig>,
+{
+    pub fn new(
+        context: &'b M,
+        params: impl IntoIterator<Item = SignlessType>,
+        returns: impl IntoIterator<Item = SignlessType>,
+        reader: &'a FunctionBody,
+    ) -> Self {
+        // TODO: Don't panic!
+        let locals_reader = reader
+            .get_locals_reader()
+            .expect("Failed to get locals reader");
+        let mut locals = Vec::from_iter(params);
+        let mut consts = Vec::new();
+
+        for loc in locals_reader {
+            let (count, ty) = loc.expect("Getting local failed");
+            let ty = Type::from_wasm(ty).expect("Invalid local type");
+            locals.extend(std::iter::repeat(ty).take(count as _));
+            consts.extend(
+                std::iter::repeat(ty)
+                    .map(Value::default_for_type)
+                    .take(count as _),
+            )
+        }
+
+        let num_locals = locals.len() as _;
+
+        let mut out = Self {
+            is_done: false,
+            stack: locals,
+            module: context,
+            consts_to_emit: Some(consts),
+            internal: reader
+                .get_operators_reader()
+                .expect("Failed to get operators reader"),
+            current_id: 0,
+            control_frames: vec![],
+            unreachable: false,
+        };
+
+        let id = out.next_id();
+        out.control_frames.push(ControlFrame {
+            id,
+            arguments: num_locals,
+            returns: returns.into_iter().collect(),
+            kind: ControlFrameKind::Function,
+        });
+
+        out
+    }
+
+    fn op_sig(&self, op: &WasmOperator) -> OpSig {
+        use self::SigT::T;
+        use std::iter::{empty as none, once};
+
+        #[inline(always)]
+        fn one<A>(a: A) -> impl IntoIterator<Item = SigT>
+        where
+            A: Into<SigT>,
+        {
+            once(a.into())
+        }
+
+        #[inline(always)]
+        fn two<A, B>(a: A, b: B) -> impl IntoIterator<Item = SigT>
+        where
+            A: Into<SigT>,
+            B: Into<SigT>,
+        {
+            once(a.into()).chain(once(b.into()))
+        }
+
+        #[inline(always)]
+        fn three<A, B, C>(a: A, b: B, c: C) -> impl IntoIterator<Item = SigT>
+        where
+            A: Into<SigT>,
+            B: Into<SigT>,
+            C: Into<SigT>,
+        {
+            once(a.into()).chain(once(b.into())).chain(once(c.into()))
+        }
+
+        macro_rules! sig {
+            (@iter $a:expr, $b:expr, $c:expr) => { three($a, $b, $c) };
+            (@iter $a:expr, $b:expr) => { two($a, $b) };
+            (@iter $a:expr) => { one($a) };
+            (@iter) => { none() };
+            (($($t:expr),*) -> ($($o:expr),*)) => {
+                OpSig::new(sig!(@iter $($t),*), sig!(@iter $($o),*))
+            };
+        }
+
+        match op {
+            WasmOperator::Unreachable => OpSig::none(),
+            WasmOperator::Nop => OpSig::none(),
+
+            WasmOperator::Block { .. } => OpSig::none(),
+            WasmOperator::Loop { .. } => OpSig::none(),
+            WasmOperator::If { .. } => sig!((I32) -> ()),
+            WasmOperator::Else => OpSig::none(),
+            WasmOperator::End => OpSig::none(),
+
+            WasmOperator::Br { .. } => OpSig::none(),
+            WasmOperator::BrIf { .. } => sig!((I32) -> ()),
+            WasmOperator::BrTable { .. } => sig!((I32) -> ()),
+            WasmOperator::Return => OpSig::none(),
+
+            WasmOperator::Call { function_index } => {
+                let func_type = self.module.func_type(*function_index);
+                func_type.into()
+            }
+            WasmOperator::CallIndirect { index, .. } => {
+                let func_type = self.module.signature(*index);
+                let mut out = func_type.into();
+                out.input.push(I32.into());
+                out
+            }
+
+            WasmOperator::Drop => sig!((T) -> ()),
+
+            // `Select` pops 3 elements and pushes 1
+            WasmOperator::Select => sig!((T, T, I32) -> (T)),
+
+            WasmOperator::GetLocal { local_index } => {
+                let ty = self.stack[*local_index as usize];
+
+                sig!(() -> (ty))
+            }
+            WasmOperator::SetLocal { local_index } => {
+                let ty = self.stack[*local_index as usize];
+
+                sig!((ty) -> ())
+            }
+            WasmOperator::TeeLocal { local_index } => {
+                let ty = self.stack[*local_index as usize];
+
+                sig!((ty) -> (ty))
+            }
+
+            WasmOperator::GetGlobal { global_index } => sig!(() -> (self.module.global_type(*global_index).to_microwasm_type())),
+            WasmOperator::SetGlobal { global_index } => sig!((self.module.global_type(*global_index).to_microwasm_type()) -> ()),
+
+            WasmOperator::F32Load { .. } => sig!((I32) -> (F32)),
+            WasmOperator::F64Load { .. } => sig!((I32) -> (F64)),
+
+            WasmOperator::I32Load { .. }
+            | WasmOperator::I32Load8S { .. }
+            | WasmOperator::I32Load8U { .. }
+            | WasmOperator::I32Load16S { .. }
+            | WasmOperator::I32Load16U { .. } => sig!((I32) -> (I32)),
+
+            WasmOperator::I64Load { .. }
+            | WasmOperator::I64Load8S { .. }
+            | WasmOperator::I64Load8U { .. }
+            | WasmOperator::I64Load16S { .. }
+            | WasmOperator::I64Load16U { .. }
+            | WasmOperator::I64Load32S { .. }
+            | WasmOperator::I64Load32U { .. } => sig!((I32) -> (I64)),
+
+            WasmOperator::F32Store { .. } => sig!((I32, F32) -> ()),
+            WasmOperator::F64Store { .. } => sig!((I32, F64) -> ()),
+            WasmOperator::I32Store { .. }
+            | WasmOperator::I32Store8 { .. }
+            | WasmOperator::I32Store16 { .. } => sig!((I32, I32) -> ()),
+            WasmOperator::I64Store { .. }
+            | WasmOperator::I64Store8 { .. }
+            | WasmOperator::I64Store16 { .. }
+            | WasmOperator::I64Store32 { .. } => sig!((I32, I64) -> ()),
+
+            WasmOperator::MemorySize { .. } => sig!(() -> (I32)),
+            WasmOperator::MemoryGrow { .. } => sig!((I32) -> (I32)),
+
+            WasmOperator::I32Const { .. } => sig!(() -> (I32)),
+            WasmOperator::I64Const { .. } => sig!(() -> (I64)),
+            WasmOperator::F32Const { .. } => sig!(() -> (F32)),
+            WasmOperator::F64Const { .. } => sig!(() -> (F64)),
+
+            WasmOperator::RefNull => unimplemented!(),
+            WasmOperator::RefIsNull => unimplemented!(),
+
+            // All comparison operators remove 2 elements and push 1
+            WasmOperator::I32Eqz => sig!((I32) -> (I32)),
+            WasmOperator::I32Eq
+            | WasmOperator::I32Ne
+            | WasmOperator::I32LtS
+            | WasmOperator::I32LtU
+            | WasmOperator::I32GtS
+            | WasmOperator::I32GtU
+            | WasmOperator::I32LeS
+            | WasmOperator::I32LeU
+            | WasmOperator::I32GeS
+            | WasmOperator::I32GeU => sig!((I32, I32) -> (I32)),
+
+            WasmOperator::I64Eqz => sig!((I64) -> (I32)),
+            WasmOperator::I64Eq
+            | WasmOperator::I64Ne
+            | WasmOperator::I64LtS
+            | WasmOperator::I64LtU
+            | WasmOperator::I64GtS
+            | WasmOperator::I64GtU
+            | WasmOperator::I64LeS
+            | WasmOperator::I64LeU
+            | WasmOperator::I64GeS
+            | WasmOperator::I64GeU => sig!((I64, I64) -> (I32)),
+
+            WasmOperator::F32Eq
+            | WasmOperator::F32Ne
+            | WasmOperator::F32Lt
+            | WasmOperator::F32Gt
+            | WasmOperator::F32Le
+            | WasmOperator::F32Ge => sig!((F32, F32) -> (I32)),
+
+            WasmOperator::F64Eq
+            | WasmOperator::F64Ne
+            | WasmOperator::F64Lt
+            | WasmOperator::F64Gt
+            | WasmOperator::F64Le
+            | WasmOperator::F64Ge => sig!((F64, F64) -> (I32)),
+
+            WasmOperator::I32Clz | WasmOperator::I32Ctz | WasmOperator::I32Popcnt => sig!((I32) -> (I32)),
+            WasmOperator::I64Clz | WasmOperator::I64Ctz | WasmOperator::I64Popcnt => sig!((I64) -> (I64)),
+
+            WasmOperator::I32Add
+            | WasmOperator::I32Sub
+            | WasmOperator::I32Mul
+            | WasmOperator::I32DivS
+            | WasmOperator::I32DivU
+            | WasmOperator::I32RemS
+            | WasmOperator::I32RemU
+            | WasmOperator::I32And
+            | WasmOperator::I32Or
+            | WasmOperator::I32Xor
+            | WasmOperator::I32Shl
+            | WasmOperator::I32ShrS
+            | WasmOperator::I32ShrU
+            | WasmOperator::I32Rotl
+            | WasmOperator::I32Rotr => sig!((I32, I32) -> (I32)),
+
+            WasmOperator::I64Add
+            | WasmOperator::I64Sub
+            | WasmOperator::I64Mul
+            | WasmOperator::I64DivS
+            | WasmOperator::I64DivU
+            | WasmOperator::I64RemS
+            | WasmOperator::I64RemU
+            | WasmOperator::I64And
+            | WasmOperator::I64Or
+            | WasmOperator::I64Xor
+            | WasmOperator::I64Shl
+            | WasmOperator::I64ShrS
+            | WasmOperator::I64ShrU
+            | WasmOperator::I64Rotl
+            | WasmOperator::I64Rotr => sig!((I64, I64) -> (I64)),
+
+            WasmOperator::F32Abs
+            | WasmOperator::F32Neg
+            | WasmOperator::F32Ceil
+            | WasmOperator::F32Floor
+            | WasmOperator::F32Trunc
+            | WasmOperator::F32Nearest
+            | WasmOperator::F32Sqrt => sig!((F32) -> (F32)),
+
+            WasmOperator::F64Abs
+            | WasmOperator::F64Neg
+            | WasmOperator::F64Ceil
+            | WasmOperator::F64Floor
+            | WasmOperator::F64Trunc
+            | WasmOperator::F64Nearest
+            | WasmOperator::F64Sqrt => sig!((F64) -> (F64)),
+
+            WasmOperator::F32Add
+            | WasmOperator::F32Sub
+            | WasmOperator::F32Mul
+            | WasmOperator::F32Div
+            | WasmOperator::F32Min
+            | WasmOperator::F32Max
+            | WasmOperator::F32Copysign => sig!((F32, F32) -> (F32)),
+
+            WasmOperator::F64Add
+            | WasmOperator::F64Sub
+            | WasmOperator::F64Mul
+            | WasmOperator::F64Div
+            | WasmOperator::F64Min
+            | WasmOperator::F64Max
+            | WasmOperator::F64Copysign => sig!((F64, F64) -> (F64)),
+
+            WasmOperator::I32WrapI64 => sig!((I64) -> (I32)),
+            WasmOperator::I32TruncSF32 | WasmOperator::I32TruncUF32 => sig!((F32) -> (I32)),
+            WasmOperator::I32TruncSF64 | WasmOperator::I32TruncUF64 => sig!((F64) -> (I32)),
+            WasmOperator::I64ExtendSI32 | WasmOperator::I64ExtendUI32 => sig!((I32) -> (I64)),
+            WasmOperator::I64TruncSF32 | WasmOperator::I64TruncUF32 => sig!((F32) -> (I64)),
+            WasmOperator::I64TruncSF64 | WasmOperator::I64TruncUF64 => sig!((F64) -> (I64)),
+            WasmOperator::F32ConvertSI32 | WasmOperator::F32ConvertUI32 => sig!((I32) -> (F32)),
+            WasmOperator::F32ConvertSI64 | WasmOperator::F32ConvertUI64 => sig!((I64) -> (F32)),
+            WasmOperator::F32DemoteF64 => sig!((F64) -> (F32)),
+            WasmOperator::F64ConvertSI32 | WasmOperator::F64ConvertUI32 => sig!((I32) -> (F64)),
+            WasmOperator::F64ConvertSI64 | WasmOperator::F64ConvertUI64 => sig!((I64) -> (F64)),
+            WasmOperator::F64PromoteF32 => sig!((F32) -> (F64)),
+            WasmOperator::I32ReinterpretF32 => sig!((F32) -> (I32)),
+            WasmOperator::I64ReinterpretF64 => sig!((F64) -> (I64)),
+            WasmOperator::F32ReinterpretI32 => sig!((I32) -> (F32)),
+            WasmOperator::F64ReinterpretI64 => sig!((I64) -> (F64)),
+
+            WasmOperator::I32Extend8S => sig!((I32) -> (I32)),
+            WasmOperator::I32Extend16S => sig!((I32) -> (I32)),
+            WasmOperator::I64Extend8S => sig!((I32) -> (I64)),
+            WasmOperator::I64Extend16S => sig!((I32) -> (I64)),
+            WasmOperator::I64Extend32S => sig!((I32) -> (I64)),
+
+            _ => unimplemented!(),
+        }
+    }
+
+    fn next_id(&mut self) -> u32 {
+        let id = self.current_id;
+        self.current_id += 1;
+        id
+    }
+
+    fn nth_block(&self, n: usize) -> &ControlFrame {
+        self.control_frames.iter().rev().nth(n).unwrap()
+    }
+
+    fn nth_block_mut(&mut self, n: usize) -> &mut ControlFrame {
+        self.control_frames.iter_mut().rev().nth(n).unwrap()
+    }
+
+    fn function_block(&self) -> &ControlFrame {
+        self.control_frames.first().unwrap()
+    }
+
+    fn local_depth(&self, idx: u32) -> i32 {
+        self.stack.len() as i32 - 1 - idx as i32
+    }
+
+    fn apply_op(&mut self, sig: OpSig) {
+        let mut ty_param = None;
+
+        for p in sig.input.iter().rev() {
+            let stack_ty = self
+                .stack
+                .pop()
+                .unwrap_or_else(|| panic!("Stack is empty (while processing {:?})", sig));
+
+            let ty = match p {
+                SigT::T => {
+                    if let Some(t) = ty_param {
+                        t
+                    } else {
+                        ty_param = Some(stack_ty);
+                        stack_ty
+                    }
+                }
+                SigT::Concrete(ty) => *ty,
+            };
+
+            debug_assert_eq!(ty, stack_ty);
+        }
+
+        for p in sig.output.into_iter().rev() {
+            let ty = match p {
+                SigT::T => ty_param.expect("Type parameter was not set"),
+                SigT::Concrete(ty) => ty,
+            };
+            self.stack.push(ty);
+        }
+    }
+
+    fn block_params(&self) -> Vec<SignlessType> {
+        self.stack.clone()
+    }
+
+    fn block_params_with_wasm_type(&self, ty: wasmparser::TypeOrFuncType) -> Vec<SignlessType> {
+        let mut out = self.block_params();
+        out.extend(create_returns_from_wasm_type(ty));
+        out
+    }
+}
+
+impl<'a, 'b, M: ModuleContext> Iterator for MicrowasmConv<'a, 'b, M>
+where
+    for<'any> &'any M::Signature: Into<OpSig>,
+{
+    type Item = wasmparser::Result<SmallVec<[OperatorFromWasm; 1]>>;
+
+    fn next(&mut self) -> Option<wasmparser::Result<SmallVec<[OperatorFromWasm; 1]>>> {
+        macro_rules! to_drop {
+            ($block:expr) => {{
+                let block = &$block;
+                let first_non_local_depth = block.returns.len() as u32;
+
+                (|| {
+                    let last_non_local_depth = (self.stack.len() as u32)
+                        .checked_sub(1)?
+                        .checked_sub(block.arguments)?;
+
+                    if first_non_local_depth <= last_non_local_depth {
+                        Some(first_non_local_depth..=last_non_local_depth)
+                    } else {
+                        None
+                    }
+                })()
+            }};
+        }
+
+        if self.is_done {
+            return None;
+        }
+
+        if let Some(consts) = self.consts_to_emit.take() {
+            return Some(Ok(consts.into_iter().map(Operator::Const).collect()));
+        }
+
+        if self.unreachable {
+            self.unreachable = false;
+            let mut depth = 0;
+
+            // `if..then..else`/`br_if` means that there may be branches in which
+            // the instruction that caused us to mark this as unreachable to not
+            // be executed. Tracking this in the microwasm translation step is
+            // very complicated so we just do basic code removal here and leave
+            // the removal of uncalled blocks to the backend.
+            return Some(Ok(loop {
+                let op = match self.internal.read() {
+                    Err(e) => return Some(Err(e)),
+                    Ok(o) => o,
+                };
+                match op {
+                    WasmOperator::Block { .. }
+                    | WasmOperator::Loop { .. }
+                    | WasmOperator::If { .. } => {
+                        depth += 1;
+                    }
+                    WasmOperator::Else => {
+                        if depth == 0 {
+                            let block = self.control_frames.last_mut().expect("Failed");
+
+                            self.stack.truncate(block.arguments as _);
+
+                            if let ControlFrameKind::If { has_else, .. } = &mut block.kind {
+                                *has_else = true;
+                            }
+
+                            break smallvec![Operator::Label((block.id, NameTag::Else))];
+                        }
+                    }
+                    WasmOperator::End => {
+                        if depth == 0 {
+                            let block = self.control_frames.pop().expect("Failed");
+
+                            if self.control_frames.is_empty() {
+                                self.is_done = true;
+                                return None;
+                            }
+
+                            self.stack.truncate(block.arguments as _);
+                            self.stack.extend(block.returns);
+
+                            let end_label = (block.id, NameTag::End);
+
+                            if let ControlFrameKind::If {
+                                has_else: false, ..
+                            } = block.kind
+                            {
+                                break smallvec![
+                                    Operator::Label((block.id, NameTag::Else)),
+                                    Operator::Br {
+                                        target: BrTarget::Label(end_label),
+                                    },
+                                    Operator::Label(end_label),
+                                ];
+                            } else {
+                                break smallvec![Operator::Label((block.id, NameTag::End))];
+                            }
+                        } else {
+                            depth -= 1;
+                        }
+                    }
+                    _ => {}
+                }
+            }));
+        }
+
+        let op = match self.internal.read() {
+            Err(e) => return Some(Err(e)),
+            Ok(o) => o,
+        };
+
+        let op_sig = self.op_sig(&op);
+
+        self.apply_op(op_sig);
+
+        Some(Ok(match op {
+            WasmOperator::Unreachable => {
+                self.unreachable = true;
+                smallvec![Operator::Unreachable]
+            }
+            WasmOperator::Nop => smallvec![],
+            WasmOperator::Block { ty } => {
+                let id = self.next_id();
+                self.control_frames.push(ControlFrame {
+                    id,
+                    arguments: self.stack.len() as u32,
+                    returns: create_returns_from_wasm_type(ty),
+                    kind: ControlFrameKind::Block {
+                        needs_end_label: false,
+                    },
+                });
+                smallvec![Operator::end(
+                    self.block_params_with_wasm_type(ty),
+                    (id, NameTag::End),
+                )]
+            }
+            WasmOperator::Loop { ty } => {
+                let id = self.next_id();
+                self.control_frames.push(ControlFrame {
+                    id,
+                    arguments: self.stack.len() as u32,
+                    returns: create_returns_from_wasm_type(ty),
+                    kind: ControlFrameKind::Loop,
+                });
+                let label = (id, NameTag::Header);
+                smallvec![
+                    Operator::loop_(self.block_params(), label),
+                    Operator::end(self.block_params_with_wasm_type(ty), (id, NameTag::End)),
+                    Operator::Br {
+                        target: BrTarget::Label(label),
+                    },
+                    Operator::Label(label),
+                ]
+            }
+            WasmOperator::If { ty } => {
+                let id = self.next_id();
+                self.control_frames.push(ControlFrame {
+                    id,
+                    arguments: self.stack.len() as u32,
+                    returns: create_returns_from_wasm_type(ty),
+                    kind: ControlFrameKind::If { has_else: false },
+                });
+                let (then, else_, end) = (
+                    (id, NameTag::Header),
+                    (id, NameTag::Else),
+                    (id, NameTag::End),
+                );
+                smallvec![
+                    Operator::block(self.block_params(), then),
+                    Operator::block(self.block_params(), else_),
+                    Operator::end(self.block_params_with_wasm_type(ty), end),
+                    Operator::BrIf {
+                        then: BrTarget::Label(then).into(),
+                        else_: BrTarget::Label(else_).into()
+                    },
+                    Operator::Label(then),
+                ]
+            }
+            WasmOperator::Else => {
+                // We don't pop it since we're still in the second block.
+                let to_drop = to_drop!(self.control_frames.last().expect("Failed"));
+                let block = self.control_frames.last_mut().expect("Failed");
+
+                if let ControlFrameKind::If { has_else, .. } = &mut block.kind {
+                    *has_else = true;
+                }
+
+                self.stack.truncate(block.arguments as _);
+
+                let label = (block.id, NameTag::Else);
+
+                SmallVec::from_iter(
+                    to_drop
+                        .into_iter()
+                        .map(Operator::Drop)
+                        .chain(iter::once(Operator::Br {
+                            target: BrTarget::Label((block.id, NameTag::End)),
+                        }))
+                        .chain(iter::once(Operator::Label(label))),
+                )
+            }
+            WasmOperator::End => {
+                let block = self.control_frames.pop().expect("Failed");
+
+                let to_drop = to_drop!(block);
+
+                self.stack.truncate(block.arguments as _);
+                self.stack.extend(block.returns.iter().cloned());
+
+                if let ControlFrameKind::If {
+                    has_else: false, ..
+                } = block.kind
+                {
+                    let else_ = (block.id, NameTag::Else);
+                    let end = (block.id, NameTag::End);
+
+                    to_drop
+                        .map(Operator::Drop)
+                        .into_iter()
+                        .chain::<SmallVec<[_; 4]>>(smallvec![
+                            Operator::Br {
+                                target: BrTarget::Label(end),
+                            },
+                            Operator::Label(else_),
+                            Operator::Br {
+                                target: BrTarget::Label(end),
+                            },
+                            Operator::Label(end),
+                        ])
+                        .collect()
+                } else {
+                    SmallVec::from_iter(if self.control_frames.is_empty() {
+                        self.is_done = true;
+
+                        None.into_iter()
+                            .chain(Some(Operator::Br {
+                                target: BrTarget::Return,
+                            }))
+                            .chain(None)
+                    } else if block.needs_end_label() {
+                        let label = (block.id, NameTag::End);
+
+                        to_drop
+                            .map(Operator::Drop)
+                            .into_iter()
+                            .chain(Some(Operator::Br {
+                                target: BrTarget::Label(label),
+                            }))
+                            .chain(Some(Operator::Label(label)))
+                    } else {
+                        to_drop
+                            .map(Operator::Drop)
+                            .into_iter()
+                            .chain(None)
+                            .chain(None)
+                    })
+                }
+            }
+            // TODO: If we're breaking out of the function block we want
+            //       to drop locals too (see code for `WasmOperator::End`)
+            WasmOperator::Br { relative_depth } => {
+                self.unreachable = true;
+                let to_drop = to_drop!(self.nth_block(relative_depth as _));
+
+                let block = self.nth_block_mut(relative_depth as _);
+                block.mark_branched_to();
+                SmallVec::from_iter(to_drop.into_iter().map(Operator::Drop).chain(iter::once(
+                    Operator::Br {
+                        target: block.br_target(),
+                    },
+                )))
+            }
+            WasmOperator::BrIf { relative_depth } => {
+                let to_drop = to_drop!(self.nth_block(relative_depth as _));
+
+                let label = (self.next_id(), NameTag::Header);
+                let params = self.block_params();
+                let block = self.nth_block_mut(relative_depth as _);
+                block.mark_branched_to();
+
+                smallvec![
+                    Operator::block(params, label),
+                    Operator::BrIf {
+                        then: BrTargetDrop {
+                            to_drop,
+                            target: block.br_target()
+                        },
+                        else_: BrTarget::Label(label).into(),
+                    },
+                    Operator::Label(label),
+                ]
+            }
+            WasmOperator::BrTable { table } => {
+                self.unreachable = true;
+                let (entries, default) = match table.read_table() {
+                    Ok(o) => o,
+                    Err(e) => return Some(Err(e)),
+                };
+                let targets = entries
+                    .iter()
+                    .map(|depth| {
+                        self.nth_block_mut(*depth as _).mark_branched_to();
+                        let block = self.nth_block(*depth as _);
+
+                        let target = block.br_target();
+                        BrTargetDrop {
+                            to_drop: to_drop!(block),
+                            target,
+                        }
+                    })
+                    .collect();
+
+                self.nth_block_mut(default as _).mark_branched_to();
+
+                let default = self.nth_block(default as _);
+                let target = default.br_target();
+                let default = BrTargetDrop {
+                    to_drop: to_drop!(default),
+                    target,
+                };
+
+                smallvec![Operator::BrTable(BrTable { targets, default })]
+            }
+            WasmOperator::Return => {
+                self.unreachable = true;
+
+                let block = self.function_block();
+                let to_drop = to_drop!(block);
+
+                SmallVec::from_iter(to_drop.into_iter().map(Operator::Drop).chain(iter::once(
+                    Operator::Br {
+                        target: block.br_target(),
+                    },
+                )))
+            }
+            WasmOperator::Call { function_index } => smallvec![Operator::Call { function_index }],
+            WasmOperator::CallIndirect { index, table_index } => {
+                smallvec![Operator::CallIndirect {
+                    type_index: index,
+                    table_index,
+                }]
+            }
+            WasmOperator::Drop => smallvec![Operator::Drop(0..=0)],
+            WasmOperator::Select => smallvec![Operator::Select],
+
+            WasmOperator::GetLocal { local_index } => {
+                // `- 1` because we apply the stack difference _before_ this point
+                let depth = self.local_depth(local_index) - 1;
+                smallvec![Operator::Pick(
+                    depth.try_into().expect("Local out of range")
+                )]
+            }
+            WasmOperator::SetLocal { local_index } => {
+                // `+ 1` because we apply the stack difference _before_ this point
+                let depth = self.local_depth(local_index) + 1;
+                smallvec![
+                    Operator::Swap(depth.try_into().expect("Local out of range")),
+                    Operator::Drop(0..=0)
+                ]
+            }
+            WasmOperator::TeeLocal { local_index } => {
+                // `+ 1` because we `pick` before `swap`
+                let depth = self.local_depth(local_index) + 1;
+                smallvec![
+                    Operator::Pick(0),
+                    Operator::Swap(depth.try_into().expect("Local out of range")),
+                    Operator::Drop(0..=0),
+                ]
+            }
+            WasmOperator::GetGlobal { global_index } => {
+                smallvec![Operator::GetGlobal(global_index)]
+            }
+            WasmOperator::SetGlobal { global_index } => {
+                smallvec![Operator::SetGlobal(global_index)]
+            }
+
+            WasmOperator::I32Load { memarg } => smallvec![Operator::Load {
+                ty: I32,
+                memarg: memarg.into()
+            }],
+            WasmOperator::I64Load { memarg } => smallvec![Operator::Load {
+                ty: I64,
+                memarg: memarg.into()
+            }],
+            WasmOperator::F32Load { memarg } => smallvec![Operator::Load {
+                ty: F32,
+                memarg: memarg.into()
+            }],
+            WasmOperator::F64Load { memarg } => smallvec![Operator::Load {
+                ty: F64,
+                memarg: memarg.into()
+            }],
+            WasmOperator::I32Load8S { memarg } => smallvec![Operator::Load8 {
+                ty: sint::I32,
+                memarg: memarg.into(),
+            }],
+            WasmOperator::I32Load8U { memarg } => smallvec![Operator::Load8 {
+                ty: sint::U32,
+                memarg: memarg.into(),
+            }],
+            WasmOperator::I32Load16S { memarg } => smallvec![Operator::Load16 {
+                ty: sint::I32,
+                memarg: memarg.into(),
+            }],
+            WasmOperator::I32Load16U { memarg } => smallvec![Operator::Load16 {
+                ty: sint::U32,
+                memarg: memarg.into(),
+            }],
+            WasmOperator::I64Load8S { memarg } => smallvec![Operator::Load8 {
+                ty: sint::I64,
+                memarg: memarg.into(),
+            }],
+            WasmOperator::I64Load8U { memarg } => smallvec![Operator::Load8 {
+                ty: sint::U64,
+                memarg: memarg.into(),
+            }],
+            WasmOperator::I64Load16S { memarg } => smallvec![Operator::Load16 {
+                ty: sint::I64,
+                memarg: memarg.into(),
+            }],
+            WasmOperator::I64Load16U { memarg } => smallvec![Operator::Load16 {
+                ty: sint::U64,
+                memarg: memarg.into(),
+            }],
+            WasmOperator::I64Load32S { memarg } => smallvec![Operator::Load32 {
+                sign: Signedness::Signed,
+                memarg: memarg.into(),
+            }],
+            WasmOperator::I64Load32U { memarg } => smallvec![Operator::Load32 {
+                sign: Signedness::Unsigned,
+                memarg: memarg.into(),
+            }],
+
+            WasmOperator::I32Store { memarg } => smallvec![Operator::Store {
+                ty: I32,
+                memarg: memarg.into()
+            }],
+            WasmOperator::I64Store { memarg } => smallvec![Operator::Store {
+                ty: I64,
+                memarg: memarg.into()
+            }],
+            WasmOperator::F32Store { memarg } => smallvec![Operator::Store {
+                ty: F32,
+                memarg: memarg.into()
+            }],
+            WasmOperator::F64Store { memarg } => smallvec![Operator::Store {
+                ty: F64,
+                memarg: memarg.into()
+            }],
+
+            WasmOperator::I32Store8 { memarg } => smallvec![Operator::Store8 {
+                ty: Size::_32,
+                memarg: memarg.into(),
+            }],
+            WasmOperator::I32Store16 { memarg } => smallvec![Operator::Store16 {
+                ty: Size::_32,
+                memarg: memarg.into(),
+            }],
+            WasmOperator::I64Store8 { memarg } => smallvec![Operator::Store8 {
+                ty: Size::_64,
+                memarg: memarg.into(),
+            }],
+            WasmOperator::I64Store16 { memarg } => smallvec![Operator::Store16 {
+                ty: Size::_64,
+                memarg: memarg.into(),
+            }],
+            WasmOperator::I64Store32 { memarg } => smallvec![Operator::Store32 {
+                memarg: memarg.into()
+            }],
+            WasmOperator::MemorySize { reserved } => smallvec![Operator::MemorySize { reserved }],
+            WasmOperator::MemoryGrow { reserved } => smallvec![Operator::MemoryGrow { reserved }],
+            WasmOperator::I32Const { value } => smallvec![Operator::Const(Value::I32(value))],
+            WasmOperator::I64Const { value } => smallvec![Operator::Const(Value::I64(value))],
+            WasmOperator::F32Const { value } => {
+                smallvec![Operator::Const(Value::F32(value.into()))]
+            }
+            WasmOperator::F64Const { value } => {
+                smallvec![Operator::Const(Value::F64(value.into()))]
+            }
+            WasmOperator::RefNull => unimplemented!("{:?}", op),
+            WasmOperator::RefIsNull => unimplemented!("{:?}", op),
+            WasmOperator::I32Eqz => smallvec![Operator::Eqz(Size::_32)],
+            WasmOperator::I32Eq => smallvec![Operator::Eq(I32)],
+            WasmOperator::I32Ne => smallvec![Operator::Ne(I32)],
+            WasmOperator::I32LtS => smallvec![Operator::Lt(SI32)],
+            WasmOperator::I32LtU => smallvec![Operator::Lt(SU32)],
+            WasmOperator::I32GtS => smallvec![Operator::Gt(SI32)],
+            WasmOperator::I32GtU => smallvec![Operator::Gt(SU32)],
+            WasmOperator::I32LeS => smallvec![Operator::Le(SI32)],
+            WasmOperator::I32LeU => smallvec![Operator::Le(SU32)],
+            WasmOperator::I32GeS => smallvec![Operator::Ge(SI32)],
+            WasmOperator::I32GeU => smallvec![Operator::Ge(SU32)],
+            WasmOperator::I64Eqz => smallvec![Operator::Eqz(Size::_64)],
+            WasmOperator::I64Eq => smallvec![Operator::Eq(I64)],
+            WasmOperator::I64Ne => smallvec![Operator::Ne(I64)],
+            WasmOperator::I64LtS => smallvec![Operator::Lt(SI64)],
+            WasmOperator::I64LtU => smallvec![Operator::Lt(SU64)],
+            WasmOperator::I64GtS => smallvec![Operator::Gt(SI64)],
+            WasmOperator::I64GtU => smallvec![Operator::Gt(SU64)],
+            WasmOperator::I64LeS => smallvec![Operator::Le(SI64)],
+            WasmOperator::I64LeU => smallvec![Operator::Le(SU64)],
+            WasmOperator::I64GeS => smallvec![Operator::Ge(SI64)],
+            WasmOperator::I64GeU => smallvec![Operator::Ge(SU64)],
+            WasmOperator::F32Eq => smallvec![Operator::Eq(F32)],
+            WasmOperator::F32Ne => smallvec![Operator::Ne(F32)],
+            WasmOperator::F32Lt => smallvec![Operator::Lt(SF32)],
+            WasmOperator::F32Gt => smallvec![Operator::Gt(SF32)],
+            WasmOperator::F32Le => smallvec![Operator::Le(SF32)],
+            WasmOperator::F32Ge => smallvec![Operator::Ge(SF32)],
+            WasmOperator::F64Eq => smallvec![Operator::Eq(F64)],
+            WasmOperator::F64Ne => smallvec![Operator::Ne(F64)],
+            WasmOperator::F64Lt => smallvec![Operator::Lt(SF64)],
+            WasmOperator::F64Gt => smallvec![Operator::Gt(SF64)],
+            WasmOperator::F64Le => smallvec![Operator::Le(SF64)],
+            WasmOperator::F64Ge => smallvec![Operator::Ge(SF64)],
+            WasmOperator::I32Clz => smallvec![Operator::Clz(Size::_32)],
+            WasmOperator::I32Ctz => smallvec![Operator::Ctz(Size::_32)],
+            WasmOperator::I32Popcnt => smallvec![Operator::Popcnt(Size::_32)],
+            WasmOperator::I32Add => smallvec![Operator::Add(I32)],
+            WasmOperator::I32Sub => smallvec![Operator::Sub(I32)],
+            WasmOperator::I32Mul => smallvec![Operator::Mul(I32)],
+            WasmOperator::I32DivS => smallvec![Operator::Div(SI32)],
+            WasmOperator::I32DivU => smallvec![Operator::Div(SU32)],
+            // Unlike Wasm, our `rem_s` instruction _does_ trap on `-1`. Instead
+            // of handling this complexity in the backend, we handle it here
+            // (where it's way easier to debug).
+            WasmOperator::I32RemS => smallvec![Operator::Rem(sint::I32),],
+
+            WasmOperator::I32RemU => smallvec![Operator::Rem(sint::U32),],
+            WasmOperator::I32And => smallvec![Operator::And(Size::_32)],
+            WasmOperator::I32Or => smallvec![Operator::Or(Size::_32)],
+            WasmOperator::I32Xor => smallvec![Operator::Xor(Size::_32)],
+            WasmOperator::I32Shl => smallvec![Operator::Shl(Size::_32)],
+            WasmOperator::I32ShrS => smallvec![Operator::Shr(sint::I32)],
+            WasmOperator::I32ShrU => smallvec![Operator::Shr(sint::U32)],
+            WasmOperator::I32Rotl => smallvec![Operator::Rotl(Size::_32)],
+            WasmOperator::I32Rotr => smallvec![Operator::Rotr(Size::_32)],
+            WasmOperator::I64Clz => smallvec![Operator::Clz(Size::_64)],
+            WasmOperator::I64Ctz => smallvec![Operator::Ctz(Size::_64)],
+            WasmOperator::I64Popcnt => smallvec![Operator::Popcnt(Size::_64)],
+            WasmOperator::I64Add => smallvec![Operator::Add(I64)],
+            WasmOperator::I64Sub => smallvec![Operator::Sub(I64)],
+            WasmOperator::I64Mul => smallvec![Operator::Mul(I64)],
+            WasmOperator::I64DivS => smallvec![Operator::Div(SI64)],
+            WasmOperator::I64DivU => smallvec![Operator::Div(SU64)],
+            WasmOperator::I64RemS => smallvec![Operator::Rem(sint::I64),],
+
+            WasmOperator::I64RemU => smallvec![Operator::Rem(sint::U64)],
+            WasmOperator::I64And => smallvec![Operator::And(Size::_64)],
+            WasmOperator::I64Or => smallvec![Operator::Or(Size::_64)],
+            WasmOperator::I64Xor => smallvec![Operator::Xor(Size::_64)],
+            WasmOperator::I64Shl => smallvec![Operator::Shl(Size::_64)],
+            WasmOperator::I64ShrS => smallvec![Operator::Shr(sint::I64)],
+            WasmOperator::I64ShrU => smallvec![Operator::Shr(sint::U64)],
+            WasmOperator::I64Rotl => smallvec![Operator::Rotl(Size::_64)],
+            WasmOperator::I64Rotr => smallvec![Operator::Rotr(Size::_64)],
+            WasmOperator::F32Abs => smallvec![Operator::Abs(Size::_32)],
+            WasmOperator::F32Neg => smallvec![Operator::Neg(Size::_32)],
+            WasmOperator::F32Ceil => smallvec![Operator::Ceil(Size::_32)],
+            WasmOperator::F32Floor => smallvec![Operator::Floor(Size::_32)],
+            WasmOperator::F32Trunc => smallvec![Operator::Trunc(Size::_32)],
+            WasmOperator::F32Nearest => smallvec![Operator::Nearest(Size::_32)],
+            WasmOperator::F32Sqrt => smallvec![Operator::Sqrt(Size::_32)],
+            WasmOperator::F32Add => smallvec![Operator::Add(F32)],
+            WasmOperator::F32Sub => smallvec![Operator::Sub(F32)],
+            WasmOperator::F32Mul => smallvec![Operator::Mul(F32)],
+            WasmOperator::F32Div => smallvec![Operator::Div(SF32)],
+            WasmOperator::F32Min => smallvec![Operator::Min(Size::_32)],
+            WasmOperator::F32Max => smallvec![Operator::Max(Size::_32)],
+            WasmOperator::F32Copysign => smallvec![Operator::Copysign(Size::_32)],
+            WasmOperator::F64Abs => smallvec![Operator::Abs(Size::_64)],
+            WasmOperator::F64Neg => smallvec![Operator::Neg(Size::_64)],
+            WasmOperator::F64Ceil => smallvec![Operator::Ceil(Size::_64)],
+            WasmOperator::F64Floor => smallvec![Operator::Floor(Size::_64)],
+            WasmOperator::F64Trunc => smallvec![Operator::Trunc(Size::_64)],
+            WasmOperator::F64Nearest => smallvec![Operator::Nearest(Size::_64)],
+            WasmOperator::F64Sqrt => smallvec![Operator::Sqrt(Size::_64)],
+            WasmOperator::F64Add => smallvec![Operator::Add(F64)],
+            WasmOperator::F64Sub => smallvec![Operator::Sub(F64)],
+            WasmOperator::F64Mul => smallvec![Operator::Mul(F64)],
+            WasmOperator::F64Div => smallvec![Operator::Div(SF64)],
+            WasmOperator::F64Min => smallvec![Operator::Min(Size::_64)],
+            WasmOperator::F64Max => smallvec![Operator::Max(Size::_64)],
+            WasmOperator::F64Copysign => smallvec![Operator::Copysign(Size::_64)],
+            WasmOperator::I32WrapI64 => smallvec![Operator::I32WrapFromI64],
+            WasmOperator::I32TruncSF32 => smallvec![Operator::ITruncFromF {
+                input_ty: Size::_32,
+                output_ty: sint::I32
+            }],
+            WasmOperator::I32TruncUF32 => smallvec![Operator::ITruncFromF {
+                input_ty: Size::_32,
+                output_ty: sint::U32
+            }],
+            WasmOperator::I32TruncSF64 => smallvec![Operator::ITruncFromF {
+                input_ty: Size::_64,
+                output_ty: sint::I32
+            }],
+            WasmOperator::I32TruncUF64 => smallvec![Operator::ITruncFromF {
+                input_ty: Size::_64,
+                output_ty: sint::U32
+            }],
+            WasmOperator::I64ExtendSI32 => smallvec![Operator::Extend {
+                sign: Signedness::Signed
+            }],
+            WasmOperator::I64ExtendUI32 => smallvec![Operator::Extend {
+                sign: Signedness::Unsigned
+            }],
+            WasmOperator::I64TruncSF32 => smallvec![Operator::ITruncFromF {
+                input_ty: Size::_32,
+                output_ty: sint::I64,
+            }],
+            WasmOperator::I64TruncUF32 => smallvec![Operator::ITruncFromF {
+                input_ty: Size::_32,
+                output_ty: sint::U64,
+            }],
+            WasmOperator::I64TruncSF64 => smallvec![Operator::ITruncFromF {
+                input_ty: Size::_64,
+                output_ty: sint::I64,
+            }],
+            WasmOperator::I64TruncUF64 => smallvec![Operator::ITruncFromF {
+                input_ty: Size::_64,
+                output_ty: sint::U64,
+            }],
+            WasmOperator::F32ConvertSI32 => smallvec![Operator::FConvertFromI {
+                input_ty: sint::I32,
+                output_ty: Size::_32
+            }],
+            WasmOperator::F32ConvertUI32 => smallvec![Operator::FConvertFromI {
+                input_ty: sint::U32,
+                output_ty: Size::_32
+            }],
+            WasmOperator::F32ConvertSI64 => smallvec![Operator::FConvertFromI {
+                input_ty: sint::I64,
+                output_ty: Size::_32
+            }],
+            WasmOperator::F32ConvertUI64 => smallvec![Operator::FConvertFromI {
+                input_ty: sint::U64,
+                output_ty: Size::_32
+            }],
+            WasmOperator::F64ConvertSI32 => smallvec![Operator::FConvertFromI {
+                input_ty: sint::I32,
+                output_ty: Size::_64
+            }],
+            WasmOperator::F64ConvertUI32 => smallvec![Operator::FConvertFromI {
+                input_ty: sint::U32,
+                output_ty: Size::_64
+            }],
+            WasmOperator::F64ConvertSI64 => smallvec![Operator::FConvertFromI {
+                input_ty: sint::I64,
+                output_ty: Size::_64
+            }],
+            WasmOperator::F64ConvertUI64 => smallvec![Operator::FConvertFromI {
+                input_ty: sint::U64,
+                output_ty: Size::_64
+            }],
+            WasmOperator::F32DemoteF64 => smallvec![Operator::F32DemoteFromF64],
+            WasmOperator::F64PromoteF32 => smallvec![Operator::F64PromoteFromF32],
+            WasmOperator::I32ReinterpretF32 => smallvec![Operator::I32ReinterpretFromF32],
+            WasmOperator::I64ReinterpretF64 => smallvec![Operator::I64ReinterpretFromF64],
+            WasmOperator::F32ReinterpretI32 => smallvec![Operator::F32ReinterpretFromI32],
+            WasmOperator::F64ReinterpretI64 => smallvec![Operator::F64ReinterpretFromI64],
+            WasmOperator::I32Extend8S => unimplemented!("{:?}", op),
+            WasmOperator::I32Extend16S => unimplemented!("{:?}", op),
+            WasmOperator::I64Extend8S => unimplemented!("{:?}", op),
+            WasmOperator::I64Extend16S => unimplemented!("{:?}", op),
+            WasmOperator::I64Extend32S => unimplemented!("{:?}", op),
+
+            // 0xFC operators
+            // Non-trapping Float-to-int Conversions
+            WasmOperator::I32TruncSSatF32 => unimplemented!("{:?}", op),
+            WasmOperator::I32TruncUSatF32 => unimplemented!("{:?}", op),
+            WasmOperator::I32TruncSSatF64 => unimplemented!("{:?}", op),
+            WasmOperator::I32TruncUSatF64 => unimplemented!("{:?}", op),
+            WasmOperator::I64TruncSSatF32 => unimplemented!("{:?}", op),
+            WasmOperator::I64TruncUSatF32 => unimplemented!("{:?}", op),
+            WasmOperator::I64TruncSSatF64 => unimplemented!("{:?}", op),
+            WasmOperator::I64TruncUSatF64 => unimplemented!("{:?}", op),
+
+            other => unimplemented!("{:?}", other),
+        }))
+    }
+}
diff --git a/lightbeam/src/module.rs b/lightbeam/src/module.rs
new file mode 100644
index 0000000000..9a61ae9925
--- /dev/null
+++ b/lightbeam/src/module.rs
@@ -0,0 +1,639 @@
+use crate::backend::TranslatedCodeSection;
+use crate::error::Error;
+use crate::microwasm;
+use crate::translate_sections;
+use cranelift_codegen::{
+    ir::{self, AbiParam, Signature as CraneliftSignature},
+    isa,
+};
+use std::{convert::TryInto, mem};
+use wasmparser::{FuncType, MemoryType, ModuleReader, SectionCode, Type};
+
+pub trait AsValueType {
+    const TYPE: Type;
+}
+
+pub trait TypeList {
+    const TYPE_LIST: &'static [Type];
+}
+
+impl<T> TypeList for T
+where
+    T: AsValueType,
+{
+    const TYPE_LIST: &'static [Type] = &[T::TYPE];
+}
+
+impl AsValueType for i32 {
+    const TYPE: Type = Type::I32;
+}
+impl AsValueType for i64 {
+    const TYPE: Type = Type::I64;
+}
+impl AsValueType for u32 {
+    const TYPE: Type = Type::I32;
+}
+impl AsValueType for u64 {
+    const TYPE: Type = Type::I64;
+}
+impl AsValueType for f32 {
+    const TYPE: Type = Type::F32;
+}
+impl AsValueType for f64 {
+    const TYPE: Type = Type::F64;
+}
+
+pub trait FunctionArgs<O> {
+    type FuncType;
+
+    unsafe fn call(self, func: Self::FuncType, vm_ctx: *const u8) -> O;
+    fn into_func(start: *const u8) -> Self::FuncType;
+}
+
+type VmCtxPtr = u64;
+
+macro_rules! impl_function_args {
+    ($first:ident $(, $rest:ident)*) => {
+        impl<Output, $first, $($rest),*> FunctionArgs<Output> for ($first, $($rest),*) {
+            type FuncType = unsafe extern "sysv64" fn(VmCtxPtr, $first $(, $rest)*) -> Output;
+
+            #[allow(non_snake_case)]
+            unsafe fn call(self, func: Self::FuncType, vm_ctx: *const u8) -> Output {
+                let ($first, $($rest),*) = self;
+                func(vm_ctx as VmCtxPtr, $first $(, $rest)*)
+            }
+
+            fn into_func(start: *const u8) -> Self::FuncType {
+                unsafe { mem::transmute(start) }
+            }
+        }
+
+        impl<$first: AsValueType, $($rest: AsValueType),*> TypeList for ($first, $($rest),*) {
+            const TYPE_LIST: &'static [Type] = &[$first::TYPE, $($rest::TYPE),*];
+        }
+
+        impl_function_args!($($rest),*);
+    };
+    () => {
+        impl<Output> FunctionArgs<Output> for () {
+            type FuncType = unsafe extern "sysv64" fn(VmCtxPtr) -> Output;
+
+            unsafe fn call(self, func: Self::FuncType, vm_ctx: *const u8) -> Output {
+                func(vm_ctx as VmCtxPtr)
+            }
+
+            fn into_func(start: *const u8) -> Self::FuncType {
+                unsafe { mem::transmute(start) }
+            }
+        }
+
+        impl TypeList for () {
+            const TYPE_LIST: &'static [Type] = &[];
+        }
+    };
+}
+
+impl_function_args!(A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S);
+
+#[derive(Default)]
+pub struct TranslatedModule {
+    translated_code_section: Option<TranslatedCodeSection>,
+    ctx: SimpleContext,
+    // TODO: Should we wrap this in a `Mutex` so that calling functions from multiple
+    //       threads doesn't cause data races?
+    memory: Option<MemoryType>,
+}
+
+impl TranslatedModule {
+    pub fn instantiate(self) -> ExecutableModule {
+        let mem_size = self.memory.map(|m| m.limits.initial).unwrap_or(0) as usize;
+        let mem: BoxSlice<_> = vec![0u8; mem_size * WASM_PAGE_SIZE]
+            .into_boxed_slice()
+            .into();
+
+        let ctx = if mem.len > 0 {
+            Some(Box::new(VmCtx { mem }) as Box<VmCtx>)
+        } else {
+            None
+        };
+
+        ExecutableModule {
+            module: self,
+            context: ctx,
+        }
+    }
+
+    pub fn disassemble(&self) {
+        self.translated_code_section
+            .as_ref()
+            .expect("no code section")
+            .disassemble();
+    }
+}
+
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+pub enum ExecutionError {
+    FuncIndexOutOfBounds,
+    TypeMismatch,
+}
+
+pub struct ExecutableModule {
+    module: TranslatedModule,
+    context: Option<Box<VmCtx>>,
+}
+
+impl ExecutableModule {
+    /// Executes the function _without checking types_. This can cause undefined
+    /// memory to be accessed.
+    pub unsafe fn execute_func_unchecked<Args: FunctionArgs<T>, T>(
+        &self,
+        func_idx: u32,
+        args: Args,
+    ) -> T {
+        let code_section = self
+            .module
+            .translated_code_section
+            .as_ref()
+            .expect("no code section");
+        let start_buf = code_section.func_start(func_idx as usize);
+
+        args.call(
+            Args::into_func(start_buf),
+            self.context
+                .as_ref()
+                .map(|ctx| (&**ctx) as *const VmCtx as *const u8)
+                .unwrap_or(std::ptr::null()),
+        )
+    }
+
+    pub fn execute_func<Args: FunctionArgs<T> + TypeList, T: TypeList>(
+        &self,
+        func_idx: u32,
+        args: Args,
+    ) -> Result<T, ExecutionError> {
+        let module = &self.module;
+
+        if func_idx as usize >= module.ctx.func_ty_indicies.len() {
+            return Err(ExecutionError::FuncIndexOutOfBounds);
+        }
+
+        let type_ = module.ctx.func_type(func_idx);
+
+        // TODO: Handle "compatible" types (i.e. f32 and i32)
+        if (&type_.params[..], &type_.returns[..]) != (Args::TYPE_LIST, T::TYPE_LIST) {
+            return Err(ExecutionError::TypeMismatch);
+        }
+
+        Ok(unsafe { self.execute_func_unchecked(func_idx, args) })
+    }
+
+    pub fn disassemble(&self) {
+        self.module.disassemble();
+    }
+}
+
+struct BoxSlice<T> {
+    len: usize,
+    ptr: *mut T,
+}
+
+impl<T> From<Box<[T]>> for BoxSlice<T> {
+    fn from(mut other: Box<[T]>) -> Self {
+        let out = BoxSlice {
+            len: other.len(),
+            ptr: other.as_mut_ptr(),
+        };
+        mem::forget(other);
+        out
+    }
+}
+
+unsafe impl<T: Send> Send for BoxSlice<T> {}
+unsafe impl<T: Sync> Sync for BoxSlice<T> {}
+
+impl<T> Drop for BoxSlice<T> {
+    fn drop(&mut self) {
+        unsafe { Vec::from_raw_parts(self.ptr, self.len, self.len) };
+    }
+}
+
+type BoxByteSlice = BoxSlice<u8>;
+
+pub struct VmCtx {
+    mem: BoxByteSlice,
+}
+
+impl VmCtx {
+    pub fn offset_of_memory_ptr() -> u32 {
+        (offset_of!(VmCtx, mem) + offset_of!(BoxByteSlice, ptr))
+            .try_into()
+            .expect("Offset exceeded size of u32")
+    }
+
+    pub fn offset_of_memory_len() -> u32 {
+        (offset_of!(VmCtx, mem) + offset_of!(BoxByteSlice, len))
+            .try_into()
+            .expect("Offset exceeded size of u32")
+    }
+}
+
+#[derive(Default, Debug)]
+pub struct SimpleContext {
+    types: Vec<FuncType>,
+    func_ty_indicies: Vec<u32>,
+}
+
+pub const WASM_PAGE_SIZE: usize = 65_536;
+
+pub trait Signature {
+    type Type: SigType;
+
+    fn params(&self) -> &[Self::Type];
+    fn returns(&self) -> &[Self::Type];
+}
+
+pub trait SigType {
+    fn to_microwasm_type(&self) -> microwasm::SignlessType;
+}
+
+impl SigType for ir::Type {
+    fn to_microwasm_type(&self) -> microwasm::SignlessType {
+        use crate::microwasm::{Size::*, Type::*};
+
+        if self.is_int() {
+            match self.bits() {
+                32 => Int(_32),
+                64 => Int(_64),
+                _ => unimplemented!(),
+            }
+        } else if self.is_float() {
+            match self.bits() {
+                32 => Float(_32),
+                64 => Float(_64),
+                _ => unimplemented!(),
+            }
+        } else {
+            unimplemented!()
+        }
+    }
+}
+
+impl SigType for AbiParam {
+    fn to_microwasm_type(&self) -> microwasm::SignlessType {
+        self.value_type.to_microwasm_type()
+    }
+}
+
+impl Signature for CraneliftSignature {
+    type Type = AbiParam;
+
+    fn params(&self) -> &[Self::Type] {
+        // TODO: We want to instead add the `VMContext` to the signature used by
+        //       cranelift, removing the special-casing from the internals.
+        assert_eq!(self.params[0].purpose, ir::ArgumentPurpose::VMContext);
+        assert_eq!(self.call_conv, isa::CallConv::SystemV);
+        &self.params[1..]
+    }
+
+    fn returns(&self) -> &[Self::Type] {
+        assert_eq!(self.call_conv, isa::CallConv::SystemV);
+        &self.returns
+    }
+}
+
+impl SigType for wasmparser::Type {
+    fn to_microwasm_type(&self) -> microwasm::SignlessType {
+        microwasm::Type::from_wasm(*self).unwrap()
+    }
+}
+
+impl Signature for FuncType {
+    type Type = wasmparser::Type;
+
+    fn params(&self) -> &[Self::Type] {
+        &*self.params
+    }
+
+    fn returns(&self) -> &[Self::Type] {
+        &*self.returns
+    }
+}
+
+pub trait ModuleContext {
+    type Signature: Signature;
+    type GlobalType: SigType;
+
+    fn vmctx_vmglobal_definition(&self, index: u32) -> u32;
+    fn vmctx_vmglobal_import_from(&self, index: u32) -> u32;
+    fn vmctx_vmmemory_import_from(&self, memory_index: u32) -> u32;
+    fn vmctx_vmmemory_definition(&self, defined_memory_index: u32) -> u32;
+    fn vmctx_vmmemory_definition_base(&self, defined_memory_index: u32) -> u32;
+    fn vmctx_vmmemory_definition_current_length(&self, defined_memory_index: u32) -> u32;
+    fn vmmemory_definition_base(&self) -> u8;
+    fn vmmemory_definition_current_length(&self) -> u8;
+    fn vmctx_vmtable_import_from(&self, table_index: u32) -> u32;
+    fn vmctx_vmtable_definition(&self, defined_table_index: u32) -> u32;
+    fn vmctx_vmtable_definition_base(&self, defined_table_index: u32) -> u32;
+    fn vmctx_vmtable_definition_current_elements(&self, defined_table_index: u32) -> u32;
+    fn vmctx_vmfunction_import_body(&self, func_index: u32) -> u32;
+    fn vmctx_vmfunction_import_vmctx(&self, func_index: u32) -> u32;
+    fn vmtable_definition_base(&self) -> u8;
+    fn vmtable_definition_current_elements(&self) -> u8;
+    fn vmctx_vmshared_signature_id(&self, signature_idx: u32) -> u32;
+    fn vmcaller_checked_anyfunc_type_index(&self) -> u8;
+    fn vmcaller_checked_anyfunc_func_ptr(&self) -> u8;
+    fn vmcaller_checked_anyfunc_vmctx(&self) -> u8;
+    fn size_of_vmcaller_checked_anyfunc(&self) -> u8;
+
+    fn defined_table_index(&self, table_index: u32) -> Option<u32>;
+    fn defined_memory_index(&self, index: u32) -> Option<u32>;
+
+    fn defined_global_index(&self, global_index: u32) -> Option<u32>;
+    fn global_type(&self, global_index: u32) -> &Self::GlobalType;
+
+    fn func_type_index(&self, func_idx: u32) -> u32;
+    fn signature(&self, index: u32) -> &Self::Signature;
+
+    fn func_index(&self, defined_func_index: u32) -> u32;
+    fn defined_func_index(&self, func_index: u32) -> Option<u32>;
+
+    fn defined_func_type(&self, func_idx: u32) -> &Self::Signature {
+        // TODO: This assumes that there are no imported functions.
+        self.func_type(self.func_index(func_idx))
+    }
+
+    fn func_type(&self, func_idx: u32) -> &Self::Signature {
+        self.signature(self.func_type_index(func_idx))
+    }
+
+    fn emit_memory_bounds_check(&self) -> bool {
+        true
+    }
+}
+
+impl ModuleContext for SimpleContext {
+    type Signature = FuncType;
+    type GlobalType = wasmparser::Type;
+
+    // TODO: We don't support external functions yet
+    fn func_index(&self, func_idx: u32) -> u32 {
+        func_idx
+    }
+
+    fn defined_func_index(&self, func_idx: u32) -> Option<u32> {
+        Some(func_idx)
+    }
+
+    fn func_type_index(&self, func_idx: u32) -> u32 {
+        self.func_ty_indicies[func_idx as usize]
+    }
+
+    fn defined_global_index(&self, _index: u32) -> Option<u32> {
+        unimplemented!()
+    }
+
+    fn global_type(&self, _global_index: u32) -> &Self::GlobalType {
+        unimplemented!()
+    }
+
+    fn signature(&self, index: u32) -> &Self::Signature {
+        &self.types[index as usize]
+    }
+
+    fn vmctx_vmglobal_definition(&self, _index: u32) -> u32 {
+        unimplemented!()
+    }
+
+    fn vmctx_vmglobal_import_from(&self, _index: u32) -> u32 {
+        unimplemented!()
+    }
+
+    fn defined_memory_index(&self, _index: u32) -> Option<u32> {
+        unimplemented!()
+    }
+
+    fn defined_table_index(&self, index: u32) -> Option<u32> {
+        Some(index)
+    }
+
+    fn vmctx_vmfunction_import_body(&self, _func_index: u32) -> u32 {
+        unimplemented!()
+    }
+    fn vmctx_vmfunction_import_vmctx(&self, _func_index: u32) -> u32 {
+        unimplemented!()
+    }
+
+    fn vmctx_vmtable_import_from(&self, _table_index: u32) -> u32 {
+        unimplemented!()
+    }
+
+    fn vmctx_vmmemory_definition(&self, _defined_memory_index: u32) -> u32 {
+        unimplemented!()
+    }
+    fn vmctx_vmmemory_import_from(&self, _memory_index: u32) -> u32 {
+        unimplemented!()
+    }
+    fn vmmemory_definition_base(&self) -> u8 {
+        unimplemented!()
+    }
+    fn vmmemory_definition_current_length(&self) -> u8 {
+        unimplemented!()
+    }
+    fn vmctx_vmmemory_definition_base(&self, defined_memory_index: u32) -> u32 {
+        assert_eq!(defined_memory_index, 0);
+        VmCtx::offset_of_memory_ptr()
+    }
+
+    fn vmctx_vmmemory_definition_current_length(&self, defined_memory_index: u32) -> u32 {
+        assert_eq!(defined_memory_index, 0);
+        VmCtx::offset_of_memory_len()
+    }
+
+    fn vmctx_vmtable_definition(&self, _defined_table_index: u32) -> u32 {
+        unimplemented!()
+    }
+
+    fn vmctx_vmtable_definition_base(&self, _defined_table_index: u32) -> u32 {
+        unimplemented!()
+    }
+
+    fn vmctx_vmtable_definition_current_elements(&self, _defined_table_index: u32) -> u32 {
+        unimplemented!()
+    }
+
+    fn vmtable_definition_base(&self) -> u8 {
+        unimplemented!()
+    }
+
+    fn vmtable_definition_current_elements(&self) -> u8 {
+        unimplemented!()
+    }
+
+    fn vmcaller_checked_anyfunc_vmctx(&self) -> u8 {
+        unimplemented!()
+    }
+
+    fn vmcaller_checked_anyfunc_type_index(&self) -> u8 {
+        unimplemented!()
+    }
+
+    fn vmcaller_checked_anyfunc_func_ptr(&self) -> u8 {
+        unimplemented!()
+    }
+
+    fn size_of_vmcaller_checked_anyfunc(&self) -> u8 {
+        unimplemented!()
+    }
+
+    fn vmctx_vmshared_signature_id(&self, _signature_idx: u32) -> u32 {
+        unimplemented!()
+    }
+
+    // TODO: type of a global
+}
+
+pub fn translate(data: &[u8]) -> Result<ExecutableModule, Error> {
+    translate_only(data).map(|m| m.instantiate())
+}
+
+/// Translate from a slice of bytes holding a wasm module.
+pub fn translate_only(data: &[u8]) -> Result<TranslatedModule, Error> {
+    let mut reader = ModuleReader::new(data)?;
+    let mut output = TranslatedModule::default();
+
+    reader.skip_custom_sections()?;
+    if reader.eof() {
+        return Ok(output);
+    }
+    let mut section = reader.read()?;
+
+    if let SectionCode::Type = section.code {
+        let types_reader = section.get_type_section_reader()?;
+        output.ctx.types = translate_sections::type_(types_reader)?;
+
+        reader.skip_custom_sections()?;
+        if reader.eof() {
+            return Ok(output);
+        }
+        section = reader.read()?;
+    }
+
+    if let SectionCode::Import = section.code {
+        let imports = section.get_import_section_reader()?;
+        translate_sections::import(imports)?;
+
+        reader.skip_custom_sections()?;
+        if reader.eof() {
+            return Ok(output);
+        }
+        section = reader.read()?;
+    }
+
+    if let SectionCode::Function = section.code {
+        let functions = section.get_function_section_reader()?;
+        output.ctx.func_ty_indicies = translate_sections::function(functions)?;
+
+        reader.skip_custom_sections()?;
+        if reader.eof() {
+            return Ok(output);
+        }
+        section = reader.read()?;
+    }
+
+    if let SectionCode::Table = section.code {
+        let tables = section.get_table_section_reader()?;
+        translate_sections::table(tables)?;
+
+        reader.skip_custom_sections()?;
+        if reader.eof() {
+            return Ok(output);
+        }
+        section = reader.read()?;
+    }
+
+    if let SectionCode::Memory = section.code {
+        let memories = section.get_memory_section_reader()?;
+        let mem = translate_sections::memory(memories)?;
+
+        assert!(
+            mem.len() <= 1,
+            "Multiple memory sections not yet unimplemented"
+        );
+
+        if !mem.is_empty() {
+            let mem = mem[0];
+            assert_eq!(Some(mem.limits.initial), mem.limits.maximum);
+            output.memory = Some(mem);
+        }
+
+        reader.skip_custom_sections()?;
+        if reader.eof() {
+            return Ok(output);
+        }
+        section = reader.read()?;
+    }
+
+    if let SectionCode::Global = section.code {
+        let globals = section.get_global_section_reader()?;
+        translate_sections::global(globals)?;
+
+        reader.skip_custom_sections()?;
+        if reader.eof() {
+            return Ok(output);
+        }
+        section = reader.read()?;
+    }
+
+    if let SectionCode::Export = section.code {
+        let exports = section.get_export_section_reader()?;
+        translate_sections::export(exports)?;
+
+        reader.skip_custom_sections()?;
+        if reader.eof() {
+            return Ok(output);
+        }
+        section = reader.read()?;
+    }
+
+    if let SectionCode::Start = section.code {
+        let start = section.get_start_section_content()?;
+        translate_sections::start(start)?;
+
+        reader.skip_custom_sections()?;
+        if reader.eof() {
+            return Ok(output);
+        }
+        section = reader.read()?;
+    }
+
+    if let SectionCode::Element = section.code {
+        let elements = section.get_element_section_reader()?;
+        translate_sections::element(elements)?;
+
+        reader.skip_custom_sections()?;
+        if reader.eof() {
+            return Ok(output);
+        }
+        section = reader.read()?;
+    }
+
+    if let SectionCode::Code = section.code {
+        let code = section.get_code_section_reader()?;
+        output.translated_code_section = Some(translate_sections::code(code, &output.ctx)?);
+
+        reader.skip_custom_sections()?;
+        if reader.eof() {
+            return Ok(output);
+        }
+        section = reader.read()?;
+    }
+
+    if let SectionCode::Data = section.code {
+        let data = section.get_data_section_reader()?;
+        translate_sections::data(data)?;
+    }
+
+    assert!(reader.eof());
+
+    Ok(output)
+}
diff --git a/lightbeam/src/tests.rs b/lightbeam/src/tests.rs
new file mode 100644
index 0000000000..78c63c47ee
--- /dev/null
+++ b/lightbeam/src/tests.rs
@@ -0,0 +1,1067 @@
+use super::{module::ExecutionError, translate, ExecutableModule};
+use wabt;
+
+fn translate_wat(wat: &str) -> ExecutableModule {
+    let wasm = wabt::wat2wasm(wat).unwrap();
+    let compiled = translate(&wasm).unwrap();
+    compiled
+}
+
+/// Execute the first function in the module.
+fn execute_wat(wat: &str, a: u32, b: u32) -> u32 {
+    let translated = translate_wat(wat);
+    translated.disassemble();
+    translated.execute_func(0, (a, b)).unwrap()
+}
+
+#[test]
+fn empty() {
+    let _ = translate_wat("(module (func))");
+}
+
+mod op32 {
+    use super::{translate_wat, ExecutableModule};
+
+    macro_rules! binop_test {
+        ($op:ident, $func:expr) => {
+            mod $op {
+                use super::{translate_wat, ExecutableModule};
+                use std::sync::Once;
+
+                const OP: &str = stringify!($op);
+
+                lazy_static! {
+                    static ref AS_PARAMS: ExecutableModule = translate_wat(&format!(
+                        "(module (func (param i32) (param i32) (result i32)
+                            (i32.{op} (get_local 0) (get_local 1))))",
+                        op = OP
+                    ));
+                }
+
+                quickcheck! {
+                    fn as_params(a: i32, b: i32) -> bool {
+                         AS_PARAMS.execute_func::<(i32, i32), i32>(0, (a, b)) == Ok($func(a, b))
+                    }
+
+                    fn lit_lit(a: i32, b: i32) -> bool {
+                        let translated = translate_wat(&format!("
+                            (module (func (result i32)
+                                (i32.{op} (i32.const {left}) (i32.const {right}))))
+                        ", op = OP, left = a, right = b));
+                        static ONCE: Once = Once::new();
+                        ONCE.call_once(|| translated.disassemble());
+
+                        translated.execute_func::<(), i32>(0, ()) == Ok($func(a, b))
+                    }
+
+                    fn lit_reg(a: i32, b: i32) -> bool {
+                        let translated = translate_wat(&format!("
+                            (module (func (param i32) (result i32)
+                                (i32.{op} (i32.const {left}) (get_local 0))))
+                        ", op = OP, left = a));
+                        static ONCE: Once = Once::new();
+                        ONCE.call_once(|| translated.disassemble());
+
+                        translated.execute_func::<(i32,), i32>(0, (b,)) == Ok($func(a, b))
+                    }
+
+                    fn reg_lit(a: i32, b: i32) -> bool {
+                        let translated = translate_wat(&format!("
+                            (module (func (param i32) (result i32)
+                                (i32.{op} (get_local 0) (i32.const {right}))))
+                        ", op = OP, right = b));
+                        static ONCE: Once = Once::new();
+                        ONCE.call_once(|| translated.disassemble());
+
+                        translated.execute_func::<(i32,), i32>(0, (a,)) == Ok($func(a, b))
+                    }
+                }
+            }
+        };
+    }
+
+    macro_rules! unop_test {
+        ($name:ident, $func:expr) => {
+            mod $name {
+                use super::{translate_wat, ExecutableModule};
+                use std::sync::Once;
+
+                lazy_static! {
+                    static ref AS_PARAM: ExecutableModule = translate_wat(concat!(
+                        "(module (func (param i32) (result i32)
+                            (i32.",
+                        stringify!($name),
+                        " (get_local 0))))"
+                    ),);
+                }
+
+                quickcheck! {
+                    fn as_param(a: u32) -> bool {
+                         AS_PARAM.execute_func::<(u32,), u32>(0, (a,)) == Ok($func(a))
+                    }
+
+                    fn lit(a: u32) -> bool {
+                        let translated = translate_wat(&format!(concat!("
+                            (module (func (result i32)
+                                (i32.",stringify!($name)," (i32.const {val}))))
+                        "), val = a));
+                        static ONCE: Once = Once::new();
+                        ONCE.call_once(|| translated.disassemble());
+
+                        translated.execute_func::<(), u32>(0, ()) == Ok($func(a))
+                    }
+                }
+            }
+        };
+    }
+
+    unop_test!(clz, u32::leading_zeros);
+    unop_test!(ctz, u32::trailing_zeros);
+    unop_test!(popcnt, u32::count_ones);
+    unop_test!(eqz, |a: u32| if a == 0 { 1 } else { 0 });
+
+    binop_test!(add, i32::wrapping_add);
+    binop_test!(sub, i32::wrapping_sub);
+    binop_test!(and, std::ops::BitAnd::bitand);
+    binop_test!(or, std::ops::BitOr::bitor);
+    binop_test!(xor, std::ops::BitXor::bitxor);
+    binop_test!(mul, i32::wrapping_mul);
+    binop_test!(eq, |a, b| if a == b { 1 } else { 0 });
+    binop_test!(ne, |a, b| if a != b { 1 } else { 0 });
+    binop_test!(lt_u, |a, b| if (a as u32) < (b as u32) { 1 } else { 0 });
+    binop_test!(le_u, |a, b| if (a as u32) <= (b as u32) { 1 } else { 0 });
+    binop_test!(gt_u, |a, b| if (a as u32) > (b as u32) { 1 } else { 0 });
+    binop_test!(ge_u, |a, b| if (a as u32) >= (b as u32) { 1 } else { 0 });
+    binop_test!(lt_s, |a, b| if a < b { 1 } else { 0 });
+    binop_test!(le_s, |a, b| if a <= b { 1 } else { 0 });
+    binop_test!(gt_s, |a, b| if a > b { 1 } else { 0 });
+    binop_test!(ge_s, |a, b| if a >= b { 1 } else { 0 });
+    binop_test!(shl, |a, b| (a as i32).wrapping_shl(b as _));
+    binop_test!(shr_s, |a, b| (a as i32).wrapping_shr(b as _));
+    binop_test!(shr_u, |a, b| (a as u32).wrapping_shr(b as _) as i32);
+    binop_test!(rotl, |a, b| (a as u32).rotate_left(b as _) as i32);
+    binop_test!(rotr, |a, b| (a as u32).rotate_right(b as _) as i32);
+}
+
+mod op64 {
+    use super::{translate_wat, ExecutableModule};
+
+    macro_rules! binop_test {
+        ($op:ident, $func:expr) => {
+            binop_test!($op, $func, i64);
+        };
+        ($op:ident, $func:expr, $retty:ident) => {
+            mod $op {
+                use super::{translate_wat, ExecutableModule};
+
+                const RETTY: &str = stringify!($retty);
+                const OP: &str = stringify!($op);
+
+                lazy_static! {
+                    static ref AS_PARAMS: ExecutableModule = translate_wat(&format!("
+                        (module (func (param i64) (param i64) (result {retty})
+                            (i64.{op} (get_local 0) (get_local 1))))
+                    ", retty = RETTY, op = OP));
+                }
+
+                quickcheck! {
+                    fn as_params(a: i64, b: i64) -> bool {
+                        AS_PARAMS.execute_func::<(i64, i64), $retty>(0, (a, b)) == Ok($func(a, b) as $retty)
+                    }
+
+                    fn lit_lit(a: i64, b: i64) -> bool {
+                        translate_wat(&format!("
+                            (module (func (result {retty})
+                                (i64.{op} (i64.const {left}) (i64.const {right}))))
+                        ", retty = RETTY, op = OP, left = a, right = b)).execute_func::<(), $retty>(0, ()) == Ok($func(a, b) as $retty)
+                    }
+
+                    fn lit_reg(a: i64, b: i64) -> bool {
+                        use std::sync::Once;
+
+                        let translated = translate_wat(&format!("
+                            (module (func (param i64) (result {retty})
+                                (i64.{op} (i64.const {left}) (get_local 0))))
+                        ", retty = RETTY, op = OP, left = a));
+                        static ONCE: Once = Once::new();
+                        ONCE.call_once(|| translated.disassemble());
+
+                        translated.execute_func::<(i64,), $retty>(0, (b,)) == Ok($func(a, b) as $retty)
+                    }
+
+                    fn reg_lit(a: i64, b: i64) -> bool {
+                        use std::sync::Once;
+
+                        let translated = translate_wat(&format!("
+                            (module (func (param i64) (result {retty})
+                                (i64.{op} (get_local 0) (i64.const {right}))))
+                        ", retty = RETTY, op = OP, right = b));
+                        static ONCE: Once = Once::new();
+                        ONCE.call_once(|| translated.disassemble());
+
+                        translated.execute_func::<(i64,), $retty>(0, (a,)) == Ok($func(a, b) as $retty)
+                    }
+                }
+            }
+        };
+    }
+
+    macro_rules! unop_test {
+        ($name:ident, $func:expr) => {
+            unop_test!($name, $func, i64);
+        };
+        ($name:ident, $func:expr, $out_ty:ty) => {
+            mod $name {
+                use super::{translate_wat, ExecutableModule};
+                use std::sync::Once;
+
+                lazy_static! {
+                    static ref AS_PARAM: ExecutableModule = translate_wat(concat!(
+                        "(module (func (param i64) (result ",
+                        stringify!($out_ty),
+                        ")
+                            (i64.",
+                        stringify!($name),
+                        " (get_local 0))))"
+                    ),);
+                }
+
+                quickcheck! {
+                    fn as_param(a: u64) -> bool {
+                         AS_PARAM.execute_func::<(u64,), $out_ty>(0, (a,)) == Ok($func(a))
+                    }
+
+                    fn lit(a: u64) -> bool {
+                                                let translated = translate_wat(&format!(concat!("
+                            (module (func (result ",stringify!($out_ty),")
+                                (i64.",stringify!($name)," (i64.const {val}))))
+                        "), val = a));
+                        static ONCE: Once = Once::new();
+                        ONCE.call_once(|| translated.disassemble());
+
+                        translated.execute_func::<(), $out_ty>(0, ()) == Ok($func(a))
+                    }
+                }
+            }
+        };
+    }
+
+    unop_test!(clz, |a: u64| a.leading_zeros() as _);
+    unop_test!(ctz, |a: u64| a.trailing_zeros() as _);
+    unop_test!(popcnt, |a: u64| a.count_ones() as _);
+    unop_test!(eqz, |a: u64| if a == 0 { 1 } else { 0 }, i32);
+
+    binop_test!(add, i64::wrapping_add);
+    binop_test!(sub, i64::wrapping_sub);
+    binop_test!(and, std::ops::BitAnd::bitand);
+    binop_test!(or, std::ops::BitOr::bitor);
+    binop_test!(xor, std::ops::BitXor::bitxor);
+    binop_test!(mul, i64::wrapping_mul);
+    binop_test!(eq, |a, b| if a == b { 1 } else { 0 }, i32);
+    binop_test!(ne, |a, b| if a != b { 1 } else { 0 }, i32);
+    binop_test!(
+        lt_u,
+        |a, b| if (a as u64) < (b as u64) { 1 } else { 0 },
+        i32
+    );
+    binop_test!(
+        le_u,
+        |a, b| if (a as u64) <= (b as u64) { 1 } else { 0 },
+        i32
+    );
+    binop_test!(
+        gt_u,
+        |a, b| if (a as u64) > (b as u64) { 1 } else { 0 },
+        i32
+    );
+    binop_test!(
+        ge_u,
+        |a, b| if (a as u64) >= (b as u64) { 1 } else { 0 },
+        i32
+    );
+    binop_test!(lt_s, |a, b| if a < b { 1 } else { 0 }, i32);
+    binop_test!(le_s, |a, b| if a <= b { 1 } else { 0 }, i32);
+    binop_test!(gt_s, |a, b| if a > b { 1 } else { 0 }, i32);
+    binop_test!(ge_s, |a, b| if a >= b { 1 } else { 0 }, i32);
+    binop_test!(shl, |a, b| (a as i64).wrapping_shl(b as _));
+    binop_test!(shr_s, |a, b| (a as i64).wrapping_shr(b as _));
+    binop_test!(shr_u, |a, b| (a as u64).wrapping_shr(b as _) as i64);
+    binop_test!(rotl, |a, b| (a as u64).rotate_left(b as _) as i64);
+    binop_test!(rotr, |a, b| (a as u64).rotate_right(b as _) as i64);
+}
+
+mod opf32 {
+    use super::{translate_wat, ExecutableModule};
+
+    macro_rules! binop_test {
+        ($op:ident, $func:expr) => {
+            binop_test!($op, $func, f32);
+        };
+        ($op:ident, $func:expr, $retty:ident) => {
+            mod $op {
+                use super::{translate_wat, ExecutableModule};
+
+                const RETTY: &str = stringify!($retty);
+                const OP: &str = stringify!($op);
+
+                lazy_static! {
+                    static ref AS_PARAMS: ExecutableModule = translate_wat(&format!("
+                        (module (func (param f32) (param f32) (result {retty})
+                            (f32.{op} (get_local 0) (get_local 1))))
+                    ", retty = RETTY, op = OP));
+                }
+
+                quickcheck! {
+                    fn as_params(a: f32, b: f32) -> bool {
+                        AS_PARAMS.execute_func::<(f32, f32), $retty>(0, (a, b)) == Ok($func(a, b) as $retty)
+                    }
+
+                    fn lit_lit(a: f32, b: f32) -> bool {
+                        translate_wat(&format!("
+                            (module (func (result {retty})
+                                (f32.{op} (f32.const {left}) (f32.const {right}))))
+                        ", retty = RETTY, op = OP, left = a, right = b)).execute_func::<(), $retty>(0, ()) == Ok($func(a, b) as $retty)
+                    }
+
+                    fn lit_reg(a: f32, b: f32) -> bool {
+                        use std::sync::Once;
+
+                        let translated = translate_wat(&format!("
+                            (module (func (param f32) (result {retty})
+                                (f32.{op} (f32.const {left}) (get_local 0))))
+                        ", retty = RETTY, op = OP, left = a));
+                        static ONCE: Once = Once::new();
+                        ONCE.call_once(|| translated.disassemble());
+
+                        translated.execute_func::<(f32,), $retty>(0, (b,)) == Ok($func(a, b) as $retty)
+                    }
+
+                    fn reg_lit(a: f32, b: f32) -> bool {
+                        use std::sync::Once;
+
+                        let translated = translate_wat(&format!("
+                            (module (func (param f32) (result {retty})
+                                (f32.{op} (get_local 0) (f32.const {right}))))
+                        ", retty = RETTY, op = OP, right = b));
+                        static ONCE: Once = Once::new();
+                        ONCE.call_once(|| translated.disassemble());
+
+                        translated.execute_func::<(f32,), $retty>(0, (a,)) == Ok($func(a, b) as $retty)
+                    }
+                }
+            }
+        };
+    }
+
+    macro_rules! unop_test {
+        ($name:ident, $func:expr) => {
+            unop_test!($name, $func, f32);
+        };
+        ($name:ident, $func:expr, $out_ty:ty) => {
+            mod $name {
+                use super::{translate_wat, ExecutableModule};
+                use std::sync::Once;
+
+                lazy_static! {
+                    static ref AS_PARAM: ExecutableModule = translate_wat(concat!(
+                        "(module (func (param f32) (result ",
+                        stringify!($out_ty),
+                        ")
+                            (f32.",
+                        stringify!($name),
+                        " (get_local 0))))"
+                    ),);
+                }
+
+                quickcheck! {
+                    fn as_param(a: f32) -> bool {
+                        static ONCE: Once = Once::new();
+                        ONCE.call_once(|| AS_PARAM.disassemble());
+                        AS_PARAM.execute_func::<(f32,), $out_ty>(0, (a,)) == Ok($func(a))
+                    }
+
+                    fn lit(a: f32) -> bool {
+                                                let translated = translate_wat(&format!(concat!("
+                            (module (func (result ",stringify!($out_ty),")
+                                (f32.",stringify!($name)," (f32.const {val}))))
+                        "), val = a));
+                        static ONCE: Once = Once::new();
+                        ONCE.call_once(|| translated.disassemble());
+
+                        translated.execute_func::<(), $out_ty>(0, ()) == Ok($func(a))
+                    }
+                }
+            }
+        };
+    }
+
+    binop_test!(add, |a, b| a + b);
+    binop_test!(mul, |a, b| a * b);
+    binop_test!(sub, |a, b| a - b);
+    binop_test!(gt, |a, b| a > b, i32);
+    binop_test!(lt, |a, b| a < b, i32);
+    binop_test!(ge, |a, b| a >= b, i32);
+    binop_test!(le, |a, b| a <= b, i32);
+
+    unop_test!(neg, |a: f32| -a);
+    unop_test!(abs, |a: f32| a.abs());
+}
+
+mod opf64 {
+    use super::{translate_wat, ExecutableModule};
+
+    macro_rules! binop_test {
+        ($op:ident, $func:expr) => {
+            binop_test!($op, $func, f64);
+        };
+        ($op:ident, $func:expr, $retty:ident) => {
+            mod $op {
+                use super::{translate_wat, ExecutableModule};
+
+                const RETTY: &str = stringify!($retty);
+                const OP: &str = stringify!($op);
+
+                lazy_static! {
+                    static ref AS_PARAMS: ExecutableModule = translate_wat(&format!("
+                        (module (func (param f64) (param f64) (result {retty})
+                            (f64.{op} (get_local 0) (get_local 1))))
+                    ", retty = RETTY, op = OP));
+                }
+
+                quickcheck! {
+                    fn as_params(a: f64, b: f64) -> bool {
+                        AS_PARAMS.execute_func::<(f64, f64), $retty>(0, (a, b)) == Ok($func(a, b) as $retty)
+                    }
+
+                    fn lit_lit(a: f64, b: f64) -> bool {
+                        translate_wat(&format!("
+                            (module (func (result {retty})
+                                (f64.{op} (f64.const {left}) (f64.const {right}))))
+                        ", retty = RETTY, op = OP, left = a, right = b)).execute_func::<(), $retty>(0, ()) == Ok($func(a, b) as $retty)
+                    }
+
+                    fn lit_reg(a: f64, b: f64) -> bool {
+                        use std::sync::Once;
+
+                        let translated = translate_wat(&format!("
+                            (module (func (param f64) (result {retty})
+                                (f64.{op} (f64.const {left}) (get_local 0))))
+                        ", retty = RETTY, op = OP, left = a));
+                        static ONCE: Once = Once::new();
+                        ONCE.call_once(|| translated.disassemble());
+
+                        translated.execute_func::<(f64,), $retty>(0, (b,)) == Ok($func(a, b) as $retty)
+                    }
+
+                    fn reg_lit(a: f64, b: f64) -> bool {
+                        use std::sync::Once;
+
+                        let translated = translate_wat(&format!("
+                            (module (func (param f64) (result {retty})
+                                (f64.{op} (get_local 0) (f64.const {right}))))
+                        ", retty = RETTY, op = OP, right = b));
+                        static ONCE: Once = Once::new();
+                        ONCE.call_once(|| translated.disassemble());
+
+                        translated.execute_func::<(f64,), $retty>(0, (a,)) == Ok($func(a, b) as $retty)
+                    }
+                }
+            }
+        };
+    }
+
+    macro_rules! unop_test {
+        ($name:ident, $func:expr) => {
+            unop_test!($name, $func, f64);
+        };
+        ($name:ident, $func:expr, $out_ty:ty) => {
+            mod $name {
+                use super::{translate_wat, ExecutableModule};
+                use std::sync::Once;
+
+                lazy_static! {
+                    static ref AS_PARAM: ExecutableModule = translate_wat(concat!(
+                        "(module (func (param f64) (result ",
+                        stringify!($out_ty),
+                        ")
+                            (f64.",
+                        stringify!($name),
+                        " (get_local 0))))"
+                    ),);
+                }
+
+                quickcheck! {
+                    fn as_param(a: f64) -> bool {
+                        static ONCE: Once = Once::new();
+                        ONCE.call_once(|| AS_PARAM.disassemble());
+                        AS_PARAM.execute_func::<(f64,), $out_ty>(0, (a,)) == Ok($func(a))
+                    }
+
+                    fn lit(a: f64) -> bool {
+                                                let translated = translate_wat(&format!(concat!("
+                            (module (func (result ",stringify!($out_ty),")
+                                (f64.",stringify!($name)," (f64.const {val}))))
+                        "), val = a));
+                        static ONCE: Once = Once::new();
+                        ONCE.call_once(|| translated.disassemble());
+
+                        translated.execute_func::<(), $out_ty>(0, ()) == Ok($func(a))
+                    }
+                }
+            }
+        };
+    }
+
+    binop_test!(add, |a, b| a + b);
+    binop_test!(mul, |a, b| a * b);
+    binop_test!(sub, |a, b| a - b);
+    binop_test!(gt, |a, b| a > b, i32);
+    binop_test!(lt, |a, b| a < b, i32);
+    binop_test!(ge, |a, b| a >= b, i32);
+    binop_test!(le, |a, b| a <= b, i32);
+
+    unop_test!(neg, |a: f64| -a);
+    unop_test!(abs, |a: f64| a.abs());
+}
+
+quickcheck! {
+    fn if_then_else(a: u32, b: u32) -> bool {
+        const CODE: &str = r#"
+(module
+  (func (param i32) (param i32) (result i32)
+    (if (result i32)
+      (i32.eq
+        (get_local 0)
+        (get_local 1)
+      )
+      (then (get_local 0))
+      (else (get_local 1))
+    )
+  )
+)
+        "#;
+
+        lazy_static! {
+            static ref TRANSLATED: ExecutableModule = {let out = translate_wat(CODE); out.disassemble(); out};
+        }
+
+        let out = TRANSLATED.execute_func::<(u32, u32), u32>(0, (a, b));
+
+        out == Ok(if a == b { a } else { b })
+    }
+}
+#[test]
+fn if_without_result() {
+    let code = r#"
+(module
+  (func (param i32) (param i32) (result i32)
+    (if
+      (i32.eq
+        (get_local 0)
+        (get_local 1)
+      )
+      (then (unreachable))
+    )
+
+    (get_local 0)
+  )
+)
+    "#;
+
+    assert_eq!(execute_wat(code, 2, 3), 2);
+}
+
+#[test]
+fn block() {
+    let code = r#"
+(module
+  (func (param i32) (param i32) (result i32)
+    (block (result i32)
+        get_local 0
+    )
+  )
+)
+    "#;
+    assert_eq!(execute_wat(code, 10, 20), 10);
+}
+
+#[test]
+fn br_block() {
+    let code = r#"
+(module
+  (func (param i32) (param i32) (result i32)
+    get_local 1
+    (block (result i32)
+        get_local 0
+        get_local 0
+        br 0
+        unreachable
+    )
+    i32.add
+  )
+)
+    "#;
+
+    let translated = translate_wat(code);
+    translated.disassemble();
+
+    assert_eq!(
+        translated.execute_func::<(i32, i32), i32>(0, (5, 7)),
+        Ok(12)
+    );
+}
+
+// Tests discarding values on the value stack, while
+// carrying over the result using a conditional branch.
+#[test]
+fn brif_block() {
+    let code = r#"
+(module
+  (func (param i32) (param i32) (result i32)
+    get_local 1
+    (block (result i32)
+        get_local 0
+        get_local 0
+        br_if 0
+        unreachable
+    )
+    i32.add
+  )
+)
+    "#;
+    assert_eq!(execute_wat(code, 5, 7), 12);
+}
+
+// Tests that br_if keeps values in the case if the branch
+// hasn't been taken.
+#[test]
+fn brif_block_passthru() {
+    let code = r#"
+(module
+  (func (param i32) (param i32) (result i32)
+    (block (result i32)
+        get_local 1
+        get_local 0
+        br_if 0
+        get_local 1
+        i32.add
+    )
+  )
+)
+    "#;
+    assert_eq!(execute_wat(code, 0, 3), 6);
+}
+
+quickcheck! {
+    #[test]
+    fn literals(a: i32, b: i64, c: i32, d: i64) -> bool {
+        let code = format!(r#"
+            (module
+              (func (result i32)
+                (i32.const {})
+              )
+              (func (result i64)
+                (i64.const {})
+              )
+              (func (result f32)
+                (f32.const {})
+              )
+              (func (result f64)
+                (f64.const {})
+              )
+            )
+        "#, a, b, c, d);
+
+        let translated = translate_wat(&code);
+
+        assert_eq!(translated.execute_func::<(), i32>(0, ()), Ok(a));
+        assert_eq!(translated.execute_func::<(), i64>(1, ()), Ok(b));
+        assert_eq!(translated.execute_func::<(), f32>(2, ()), Ok(c as _));
+        assert_eq!(translated.execute_func::<(), f64>(3, ()), Ok(d as _));
+
+        true
+    }
+}
+
+quickcheck! {
+    #[test]
+    fn params(a: i32, b: i64, c: i32, d: i64) -> bool {
+        let code = r#"
+            (module
+              (func (param i32) (param i64) (param f32) (param f64) (result i32)
+                (get_local 0)
+              )
+              (func (param i32) (param i64) (param f32) (param f64) (result i64)
+                (get_local 1)
+              )
+              (func (param i32) (param i64) (param f32) (param f64) (result f32)
+                (get_local 2)
+              )
+              (func (param i32) (param i64) (param f32) (param f64) (result f64)
+                (get_local 3)
+              )
+            )
+        "#;
+
+        let c = c as f32;
+        let d = d as f64;
+
+        let translated = translate_wat(&code);
+
+        assert_eq!(translated.execute_func::<(i32, i64, f32, f64), i32>(0, (a, b, c, d)), Ok(a));
+        assert_eq!(translated.execute_func::<(i32, i64, f32, f64), i64>(1, (a, b, c, d)), Ok(b));
+        assert_eq!(translated.execute_func::<(i32, i64, f32, f64), f32>(2, (a, b, c, d)), Ok(c));
+        assert_eq!(translated.execute_func::<(i32, i64, f32, f64), f64>(3, (a, b, c, d)), Ok(d));
+
+        true
+    }
+}
+#[test]
+fn wrong_type() {
+    let code = r#"
+(module
+  (func (param i32) (param i64) (result i32)
+    (i32.const 228)
+  )
+)
+    "#;
+
+    let translated = translate_wat(code);
+    assert_eq!(
+        translated
+            .execute_func::<_, ()>(0, (0u32, 0u32))
+            .unwrap_err(),
+        ExecutionError::TypeMismatch
+    );
+}
+
+#[test]
+fn wrong_index() {
+    let code = r#"
+(module
+  (func (param i32) (param i64) (result i32)
+    (i32.const 228)
+  )
+)
+    "#;
+
+    let translated = translate_wat(code);
+    assert_eq!(
+        translated
+            .execute_func::<_, ()>(10, (0u32, 0u32))
+            .unwrap_err(),
+        ExecutionError::FuncIndexOutOfBounds
+    );
+}
+
+fn iterative_fib_baseline(n: u32) -> u32 {
+    let (mut a, mut b) = (1, 1);
+
+    for _ in 0..n {
+        let old_a = a;
+        a = b;
+        b += old_a;
+    }
+
+    a
+}
+
+const FIBONACCI: &str = r#"
+(module
+  (func $fib (param $n i32) (result i32)
+    (if (result i32)
+      (i32.eq
+        (i32.const 0)
+        (get_local $n)
+      )
+      (then
+        (i32.const 1)
+      )
+      (else
+        (if (result i32)
+          (i32.eq
+            (i32.const 1)
+            (get_local $n)
+          )
+          (then
+            (i32.const 1)
+          )
+          (else
+            (i32.add
+              ;; fib(n - 1)
+              (call $fib
+                (i32.add
+                  (get_local $n)
+                  (i32.const -1)
+                )
+              )
+              ;; fib(n - 2)
+              (call $fib
+                (i32.add
+                  (get_local $n)
+                  (i32.const -2)
+                )
+              )
+            )
+          )
+        )
+      )
+    )
+  )
+)
+    "#;
+
+#[test]
+fn fib_unopt() {
+    let translated = translate_wat(FIBONACCI);
+    translated.disassemble();
+
+    for x in 0..30 {
+        assert_eq!(
+            translated.execute_func::<_, u32>(0, (x,)),
+            Ok(iterative_fib_baseline(x)),
+            "Failed for x={}",
+            x
+        );
+    }
+}
+
+// Generated by Rust for the `fib` function in `bench_fibonacci_baseline`
+const FIBONACCI_OPT: &str = r"
+(module
+  (func $fib (param $p0 i32) (result i32)
+    (local $l1 i32)
+    (set_local $l1
+      (i32.const 1))
+    (block $B0
+      (br_if $B0
+        (i32.lt_u
+          (get_local $p0)
+          (i32.const 2)))
+      (set_local $l1
+        (i32.const 1))
+      (loop $L1
+        (set_local $l1
+          (i32.add
+            (call $fib
+              (i32.add
+                (get_local $p0)
+                (i32.const -1)))
+            (get_local $l1)))
+        (br_if $L1
+          (i32.gt_u
+            (tee_local $p0
+              (i32.add
+                (get_local $p0)
+                (i32.const -2)))
+            (i32.const 1)))))
+    (get_local $l1)))";
+
+#[test]
+fn fib_opt() {
+    let translated = translate_wat(FIBONACCI_OPT);
+    translated.disassemble();
+
+    for x in 0..30 {
+        assert_eq!(
+            translated.execute_func::<_, u32>(0, (x,)),
+            Ok(iterative_fib_baseline(x)),
+            "Failed for x={}",
+            x
+        );
+    }
+}
+
+#[test]
+fn i32_div() {
+    const CODE: &str = r"
+    (module
+      (func (param i32) (param i32) (result i32)
+        (i32.div_s (get_local 0) (get_local 1))
+      )
+    )";
+
+    let translated = translate_wat(CODE);
+    translated.disassemble();
+
+    assert_eq!(translated.execute_func::<_, u32>(0, (-1, -1)), Ok(1));
+}
+
+#[test]
+fn i32_rem() {
+    const CODE: &str = r"
+    (module
+      (func (param i32) (param i32) (result i32)
+        (i32.rem_s (get_local 0) (get_local 1))
+      )
+    )";
+
+    let translated = translate_wat(CODE);
+    translated.disassemble();
+
+    assert_eq!(translated.execute_func::<_, u32>(0, (123121, -1)), Ok(0));
+}
+
+#[test]
+fn i64_div() {
+    const CODE: &str = r"
+    (module
+      (func (param i64) (param i64) (result i64)
+        (i64.div_s (get_local 0) (get_local 1))
+      )
+    )";
+
+    let translated = translate_wat(CODE);
+    translated.disassemble();
+
+    assert_eq!(translated.execute_func::<_, u64>(0, (-1i64, -1i64)), Ok(1));
+}
+
+#[test]
+fn i64_rem() {
+    const CODE: &str = r"
+    (module
+      (func (param i64) (param i64) (result i64)
+        (i64.rem_s (get_local 0) (get_local 1))
+      )
+    )";
+
+    let translated = translate_wat(CODE);
+    translated.disassemble();
+
+    assert_eq!(
+        translated.execute_func::<_, u64>(0, (123121i64, -1i64)),
+        Ok(0)
+    );
+}
+
+#[test]
+fn br_table() {
+    const CODE: &str = r"
+(func (param $i i32) (result i32)
+    (return
+      (block $2 (result i32)
+        (i32.add (i32.const 10)
+          (block $1 (result i32)
+            (i32.add (i32.const 100)
+              (block $0 (result i32)
+                (i32.add (i32.const 1000)
+                  (block $default (result i32)
+                    (br_table $0 $1 $2 $default
+                      (i32.mul (i32.const 2) (get_local $i))
+                      (i32.and (i32.const 3) (get_local $i))
+                    )
+                  )
+                )
+              )
+            )
+          )
+        )
+      )
+    )
+  )
+";
+
+    let translated = translate_wat(CODE);
+    translated.disassemble();
+
+    assert_eq!(translated.execute_func::<_, u32>(0, (0u32,)), Ok(110));
+    assert_eq!(translated.execute_func::<_, u32>(0, (1u32,)), Ok(12));
+    assert_eq!(translated.execute_func::<_, u32>(0, (2u32,)), Ok(4));
+    assert_eq!(translated.execute_func::<_, u32>(0, (3u32,)), Ok(1116));
+    assert_eq!(translated.execute_func::<_, u32>(0, (4u32,)), Ok(118));
+    assert_eq!(translated.execute_func::<_, u32>(0, (5u32,)), Ok(20));
+    assert_eq!(translated.execute_func::<_, u32>(0, (6u32,)), Ok(12));
+    assert_eq!(translated.execute_func::<_, u32>(0, (7u32,)), Ok(1124));
+    assert_eq!(translated.execute_func::<_, u32>(0, (8u32,)), Ok(126));
+}
+
+macro_rules! test_select {
+    ($name:ident, $ty:ident) => {
+        mod $name {
+            use super::{translate_wat, ExecutableModule};
+            use std::sync::Once;
+
+            lazy_static! {
+                static ref AS_PARAMS: ExecutableModule = translate_wat(&format!(
+                    "
+                    (module
+                        (func (param {ty}) (param {ty}) (param i32) (result {ty})
+                            (select (get_local 0) (get_local 1) (get_local 2))
+                        )
+                    )",
+                    ty = stringify!($ty)
+                ));
+            }
+
+            quickcheck! {
+                fn as_param(cond: bool, then: $ty, else_: $ty) -> bool {
+                     let icond: i32 = if cond { 1 } else { 0 };
+                     AS_PARAMS.execute_func::<($ty, $ty, i32), $ty>(0, (then, else_, icond)) ==
+                        Ok(if cond { then } else { else_ })
+                }
+
+                fn lit(cond: bool, then: $ty, else_: $ty) -> bool {
+                    let icond: i32 = if cond { 1 } else { 0 };
+                                                    let translated = translate_wat(&format!("
+                            (module (func (param {ty}) (param {ty}) (result {ty})
+                                (select (get_local 0) (get_local 1) (i32.const {val}))))
+                        ",
+                        val = icond,
+                        ty = stringify!($ty)
+                    ));
+                    static ONCE: Once = Once::new();
+                    ONCE.call_once(|| translated.disassemble());
+
+                    translated.execute_func::<($ty, $ty), $ty>(0, (then, else_)) ==
+                        Ok(if cond { then } else { else_ })
+                }
+            }
+        }
+    };
+}
+
+test_select!(select32, i32);
+test_select!(select64, i64);
+
+#[cfg(feature = "bench")]
+mod benches {
+    extern crate test;
+
+    use super::{translate, wabt, FIBONACCI, FIBONACCI_OPT};
+
+    #[bench]
+    fn bench_fibonacci_compile(b: &mut test::Bencher) {
+        let wasm = wabt::wat2wasm(FIBONACCI).unwrap();
+
+        b.iter(|| test::black_box(translate(&wasm).unwrap()));
+    }
+
+    #[bench]
+    fn bench_fibonacci_run(b: &mut test::Bencher) {
+        let wasm = wabt::wat2wasm(FIBONACCI_OPT).unwrap();
+        let module = translate(&wasm).unwrap();
+
+        b.iter(|| module.execute_func::<_, u32>(0, (20,)));
+    }
+
+    #[bench]
+    fn bench_fibonacci_compile_run(b: &mut test::Bencher) {
+        let wasm = wabt::wat2wasm(FIBONACCI).unwrap();
+
+        b.iter(|| translate(&wasm).unwrap().execute_func::<_, u32>(0, (20,)));
+    }
+
+    #[bench]
+    fn bench_fibonacci_baseline(b: &mut test::Bencher) {
+        fn fib(n: i32) -> i32 {
+            if n == 0 || n == 1 {
+                1
+            } else {
+                fib(n - 1) + fib(n - 2)
+            }
+        }
+
+        b.iter(|| test::black_box(fib(test::black_box(20))));
+    }
+}
diff --git a/lightbeam/src/translate_sections.rs b/lightbeam/src/translate_sections.rs
new file mode 100644
index 0000000000..3c446e7f91
--- /dev/null
+++ b/lightbeam/src/translate_sections.rs
@@ -0,0 +1,130 @@
+use crate::backend::{CodeGenSession, TranslatedCodeSection};
+use crate::error::Error;
+use crate::function_body;
+use crate::module::SimpleContext;
+use cranelift_codegen::{binemit, ir};
+use wasmparser::{
+    CodeSectionReader, DataSectionReader, ElementSectionReader, ExportSectionReader, FuncType,
+    FunctionSectionReader, GlobalSectionReader, ImportSectionReader, MemorySectionReader,
+    MemoryType, TableSectionReader, TableType, TypeSectionReader,
+};
+
+/// Parses the Type section of the wasm module.
+pub fn type_(types_reader: TypeSectionReader) -> Result<Vec<FuncType>, Error> {
+    types_reader
+        .into_iter()
+        .map(|r| r.map_err(Into::into))
+        .collect()
+}
+
+/// Parses the Import section of the wasm module.
+pub fn import(imports: ImportSectionReader) -> Result<(), Error> {
+    for entry in imports {
+        entry?; // TODO
+    }
+    Ok(())
+}
+
+/// Parses the Function section of the wasm module.
+pub fn function(functions: FunctionSectionReader) -> Result<Vec<u32>, Error> {
+    functions
+        .into_iter()
+        .map(|r| r.map_err(Into::into))
+        .collect()
+}
+
+/// Parses the Table section of the wasm module.
+pub fn table(tables: TableSectionReader) -> Result<Vec<TableType>, Error> {
+    tables.into_iter().map(|r| r.map_err(Into::into)).collect()
+}
+
+/// Parses the Memory section of the wasm module.
+pub fn memory(memories: MemorySectionReader) -> Result<Vec<MemoryType>, Error> {
+    memories
+        .into_iter()
+        .map(|r| r.map_err(Into::into))
+        .collect()
+}
+
+/// Parses the Global section of the wasm module.
+pub fn global(globals: GlobalSectionReader) -> Result<(), Error> {
+    for entry in globals {
+        entry?; // TODO
+    }
+    Ok(())
+}
+
+/// Parses the Export section of the wasm module.
+pub fn export(exports: ExportSectionReader) -> Result<(), Error> {
+    for entry in exports {
+        entry?; // TODO
+    }
+    Ok(())
+}
+
+/// Parses the Start section of the wasm module.
+pub fn start(_index: u32) -> Result<(), Error> {
+    // TODO
+    Ok(())
+}
+
+/// Parses the Element section of the wasm module.
+pub fn element(elements: ElementSectionReader) -> Result<(), Error> {
+    for entry in elements {
+        entry?;
+    }
+
+    Ok(())
+}
+
+struct UnimplementedRelocSink;
+
+impl binemit::RelocSink for UnimplementedRelocSink {
+    fn reloc_ebb(&mut self, _: binemit::CodeOffset, _: binemit::Reloc, _: binemit::CodeOffset) {
+        unimplemented!()
+    }
+
+    fn reloc_external(
+        &mut self,
+        _: binemit::CodeOffset,
+        _: binemit::Reloc,
+        _: &ir::ExternalName,
+        _: binemit::Addend,
+    ) {
+        unimplemented!()
+    }
+
+    fn reloc_constant(&mut self, _: binemit::CodeOffset, _: binemit::Reloc, _: ir::ConstantOffset) {
+        unimplemented!()
+    }
+
+    fn reloc_jt(&mut self, _: binemit::CodeOffset, _: binemit::Reloc, _: ir::JumpTable) {
+        unimplemented!()
+    }
+}
+
+/// Parses the Code section of the wasm module.
+pub fn code(
+    code: CodeSectionReader,
+    translation_ctx: &SimpleContext,
+) -> Result<TranslatedCodeSection, Error> {
+    let func_count = code.get_count();
+    let mut session = CodeGenSession::new(func_count, translation_ctx);
+
+    for (idx, body) in code.into_iter().enumerate() {
+        let body = body?;
+        let mut relocs = UnimplementedRelocSink;
+
+        function_body::translate_wasm(&mut session, &mut relocs, idx as u32, &body)?;
+    }
+
+    Ok(session.into_translated_code_section()?)
+}
+
+/// Parses the Data section of the wasm module.
+pub fn data(data: DataSectionReader) -> Result<(), Error> {
+    for entry in data {
+        entry?; // TODO
+    }
+    Ok(())
+}
diff --git a/lightbeam/test.wasm b/lightbeam/test.wasm
new file mode 100644
index 0000000000..1b132cf191
Binary files /dev/null and b/lightbeam/test.wasm differ
diff --git a/lightbeam/test.wat b/lightbeam/test.wat
new file mode 100644
index 0000000000..2f59984430
--- /dev/null
+++ b/lightbeam/test.wat
@@ -0,0 +1,3 @@
+(module
+  (func (param i32) (param i32) (result i32) (i32.add (get_local 0) (get_local 1)))
+)
diff --git a/misc/wasmtime-py/src/lib.rs b/misc/wasmtime-py/src/lib.rs
index f35fa7b1bb..ab745ff8da 100644
--- a/misc/wasmtime-py/src/lib.rs
+++ b/misc/wasmtime-py/src/lib.rs
@@ -69,7 +69,7 @@ pub fn instantiate(
         isa_builder.finish(cranelift_codegen::settings::Flags::new(flag_builder))
     };
 
-    let mut context = wasmtime_jit::Context::with_isa(isa);
+    let mut context = wasmtime_jit::Context::with_isa(isa, wasmtime_jit::CompilationStrategy::Auto);
     context.set_debug_info(generate_debug_info);
     let global_exports = context.get_global_exports();
 
diff --git a/misc/wasmtime-rust/macro/src/lib.rs b/misc/wasmtime-rust/macro/src/lib.rs
index 0cdce6d919..dd9a9f9457 100644
--- a/misc/wasmtime-rust/macro/src/lib.rs
+++ b/misc/wasmtime-rust/macro/src/lib.rs
@@ -56,7 +56,10 @@ fn generate_load(item: &syn::ItemTrait) -> syn::Result<TokenStream> {
                 isa_builder.finish(#root::cranelift_codegen::settings::Flags::new(flag_builder))
             };
 
-            let mut cx = #root::wasmtime_jit::Context::with_isa(isa);
+            let mut cx = #root::wasmtime_jit::Context::with_isa(
+                isa,
+                #root::wasmtime_jit::CompilationStrategy::Auto
+            );
             let data = #root::wasmtime_interface_types::ModuleData::new(&bytes)?;
             let handle = cx.instantiate_module(None, &bytes)?;
 
diff --git a/src/bin/wasm2obj.rs b/src/bin/wasm2obj.rs
index 768101a2c3..6b8bd8287e 100644
--- a/src/bin/wasm2obj.rs
+++ b/src/bin/wasm2obj.rs
@@ -49,11 +49,15 @@ use std::process;
 use std::str;
 use std::str::FromStr;
 use target_lexicon::Triple;
+use wasmtime::pick_compilation_strategy;
 use wasmtime_debug::{emit_debugsections, read_debuginfo};
+#[cfg(feature = "lightbeam")]
+use wasmtime_environ::Lightbeam;
 use wasmtime_environ::{cache_create_new_config, cache_init};
 use wasmtime_environ::{
     Compiler, Cranelift, ModuleEnvironment, ModuleVmctxInfo, Tunables, VMOffsets,
 };
+use wasmtime_jit::CompilationStrategy;
 use wasmtime_obj::emit_module;
 
 const USAGE: &str = "
@@ -63,7 +67,7 @@ The translation is dependent on the environment chosen.
 The default is a dummy environment that produces placeholder values.
 
 Usage:
-    wasm2obj [--target TARGET] [-Odg] [--disable-cache | --cache-config=<cache_config_file>] [--enable-simd] <file> -o <output>
+    wasm2obj [--target TARGET] [-Odg] [--disable-cache | --cache-config=<cache_config_file>] [--enable-simd] [--lightbeam | --cranelift] <file> -o <output>
     wasm2obj --create-cache-config [--cache-config=<cache_config_file>]
     wasm2obj --help | --version
 
@@ -80,6 +84,8 @@ Options:
                         creates default configuration and writes it to the disk,
                         use with --cache-config to specify custom config file
                         instead of default one
+    --lightbeam         use Lightbeam for all compilation
+    --cranelift         use Cranelift for all compilation
     --enable-simd       enable proposed SIMD instructions
     -O, --optimize      runs optimization passes on the translated functions
     --version           print the Cranelift version
@@ -97,6 +103,8 @@ struct Args {
     flag_cache_config: Option<String>,
     flag_create_cache_config: bool,
     flag_enable_simd: bool,
+    flag_lightbeam: bool,
+    flag_cranelift: bool,
     flag_optimize: bool,
 }
 
@@ -164,6 +172,8 @@ fn main() {
         args.flag_g,
         args.flag_enable_simd,
         args.flag_optimize,
+        args.flag_cranelift,
+        args.flag_lightbeam,
     ) {
         Ok(()) => {}
         Err(message) => {
@@ -180,6 +190,8 @@ fn handle_module(
     generate_debug_info: bool,
     enable_simd: bool,
     enable_optimize: bool,
+    cranelift: bool,
+    lightbeam: bool,
 ) -> Result<(), String> {
     let data = match read_wasm_file(path) {
         Ok(data) => data,
@@ -223,6 +235,9 @@ fn handle_module(
     // TODO: Expose the tunables as command-line flags.
     let tunables = Tunables::default();
 
+    // Decide how to compile.
+    let strategy = pick_compilation_strategy(cranelift, lightbeam);
+
     let (module, lazy_function_body_inputs, lazy_data_initializers, target_config) = {
         let environ = ModuleEnvironment::new(isa.frontend_config(), tunables);
 
@@ -240,13 +255,25 @@ fn handle_module(
 
     // TODO: use the traps information
     let (compilation, relocations, address_transform, value_ranges, stack_slots, _traps) =
-        Cranelift::compile_module(
-            &module,
-            lazy_function_body_inputs,
-            &*isa,
-            generate_debug_info,
-        )
-        .map_err(|e| e.to_string())?;
+        match strategy {
+            CompilationStrategy::Auto | CompilationStrategy::Cranelift => {
+                Cranelift::compile_module(
+                    &module,
+                    lazy_function_body_inputs,
+                    &*isa,
+                    generate_debug_info,
+                )
+                .map_err(|e| e.to_string())?
+            }
+            #[cfg(feature = "lightbeam")]
+            CompilationStrategy::Lightbeam => Lightbeam::compile_module(
+                &module,
+                lazy_function_body_inputs,
+                &*isa,
+                generate_debug_info,
+            )
+            .map_err(|e| e.to_string())?,
+        };
 
     let module_vmctx_info = {
         let ofs = VMOffsets::new(target_config.pointer_bytes(), &module);
diff --git a/src/bin/wasmtime.rs b/src/bin/wasmtime.rs
index 10e362ea95..22a141d9a3 100644
--- a/src/bin/wasmtime.rs
+++ b/src/bin/wasmtime.rs
@@ -44,6 +44,7 @@ use std::path::{Path, PathBuf};
 use std::process::exit;
 use wabt;
 use wasi_common::preopen_dir;
+use wasmtime::pick_compilation_strategy;
 use wasmtime_api::{Config, Engine, HostRef, Instance, Module, Store};
 use wasmtime_environ::{cache_create_new_config, cache_init};
 use wasmtime_interface_types::ModuleData;
@@ -62,8 +63,8 @@ including calling the start function if one is present. Additional functions
 given with --invoke are then called.
 
 Usage:
-    wasmtime [-odg] [--enable-simd] [--wasi-c] [--disable-cache | --cache-config=<cache_config_file>] [--preload=<wasm>...] [--env=<env>...] [--dir=<dir>...] [--mapdir=<mapping>...] <file> [<arg>...]
-    wasmtime [-odg] [--enable-simd] [--wasi-c] [--disable-cache | --cache-config=<cache_config_file>] [--env=<env>...] [--dir=<dir>...] [--mapdir=<mapping>...] --invoke=<fn> <file> [<arg>...]
+    wasmtime [-odg] [--enable-simd] [--wasi-c] [--disable-cache | --cache-config=<cache_config_file>] [--preload=<wasm>...] [--env=<env>...] [--dir=<dir>...] [--mapdir=<mapping>...] [--lightbeam | --cranelift] <file> [<arg>...]
+    wasmtime [-odg] [--enable-simd] [--wasi-c] [--disable-cache | --cache-config=<cache_config_file>] [--env=<env>...] [--dir=<dir>...] [--mapdir=<mapping>...] --invoke=<fn> [--lightbeam | --cranelift] <file> [<arg>...]
     wasmtime --create-cache-config [--cache-config=<cache_config_file>]
     wasmtime --help | --version
 
@@ -80,6 +81,8 @@ Options:
                         instead of default one
     -g                  generate debug information
     -d, --debug         enable debug output on stderr/stdout
+    --lightbeam         use Lightbeam for all compilation
+    --cranelift         use Cranelift for all compilation
     --enable-simd       enable proposed SIMD instructions
     --wasi-c            enable the wasi-c implementation of WASI
     --preload=<wasm>    load an additional wasm module before loading the main module
@@ -102,6 +105,8 @@ struct Args {
     flag_debug: bool,
     flag_g: bool,
     flag_enable_simd: bool,
+    flag_lightbeam: bool,
+    flag_cranelift: bool,
     flag_invoke: Option<String>,
     flag_preload: Vec<String>,
     flag_env: Vec<String>,
@@ -281,7 +286,15 @@ fn rmain() -> Result<(), Error> {
         flag_builder.set("opt_level", "speed")?;
     }
 
-    let config = Config::new(settings::Flags::new(flag_builder), features, debug_info);
+    // Decide how to compile.
+    let strategy = pick_compilation_strategy(args.flag_cranelift, args.flag_lightbeam);
+
+    let config = Config::new(
+        settings::Flags::new(flag_builder),
+        features,
+        debug_info,
+        strategy,
+    );
     let engine = HostRef::new(Engine::new(config));
     let store = HostRef::new(Store::new(engine));
 
diff --git a/src/bin/wast.rs b/src/bin/wast.rs
index c144f1efb0..a05acb0631 100644
--- a/src/bin/wast.rs
+++ b/src/bin/wast.rs
@@ -33,6 +33,7 @@ use pretty_env_logger;
 use serde::Deserialize;
 use std::path::Path;
 use std::process;
+use wasmtime::pick_compilation_strategy;
 use wasmtime_environ::{cache_create_new_config, cache_init};
 use wasmtime_jit::{Compiler, Features};
 use wasmtime_wast::WastContext;
@@ -41,7 +42,7 @@ const USAGE: &str = "
 Wast test runner.
 
 Usage:
-    wast [-do] [--enable-simd] [--disable-cache | --cache-config=<cache_config_file>] <file>...
+    wast [-do] [--enable-simd] [--disable-cache | --cache-config=<cache_config_file>] [--lightbeam | --cranelift] <file>...
     wast --create-cache-config [--cache-config=<cache_config_file>]
     wast --help | --version
 
@@ -57,6 +58,8 @@ Options:
                         creates default configuration and writes it to the disk,
                         use with --cache-config to specify custom config file
                         instead of default one
+    --lightbeam         use Lightbeam for all compilation
+    --cranelift         use Cranelift for all compilation
     -d, --debug         enable debug output on stderr/stdout
     --enable-simd       enable proposed SIMD instructions
 ";
@@ -71,6 +74,8 @@ struct Args {
     flag_cache_config: Option<String>,
     flag_create_cache_config: bool,
     flag_enable_simd: bool,
+    flag_always_lightbeam: bool,
+    flag_always_cranelift: bool,
 }
 
 fn main() {
@@ -148,8 +153,12 @@ fn main() {
         features.simd = true;
     }
 
+    // Decide how to compile.
+    let strategy =
+        pick_compilation_strategy(args.flag_always_cranelift, args.flag_always_lightbeam);
+
     let isa = isa_builder.finish(settings::Flags::new(flag_builder));
-    let engine = Compiler::new(isa);
+    let engine = Compiler::new(isa, strategy);
     let mut wast_context = WastContext::new(Box::new(engine)).with_features(features);
 
     wast_context
diff --git a/src/lib.rs b/src/lib.rs
index aa6546c1c9..fcc29d3207 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,3 +1,21 @@
+use wasmtime_jit::CompilationStrategy;
+
+pub fn pick_compilation_strategy(
+    always_cranelift: bool,
+    always_lightbeam: bool,
+) -> CompilationStrategy {
+    // Decide how to compile.
+    match (always_lightbeam, always_cranelift) {
+        #[cfg(feature = "lightbeam")]
+        (true, false) => CompilationStrategy::Lightbeam,
+        #[cfg(not(feature = "lightbeam"))]
+        (true, false) => panic!("--lightbeam given, but Lightbeam support is not enabled"),
+        (false, true) => CompilationStrategy::Cranelift,
+        (false, false) => CompilationStrategy::Auto,
+        (true, true) => panic!("Can't enable --cranelift and --lightbeam at the same time"),
+    }
+}
+
 pub fn init_file_per_thread_logger(prefix: &'static str) {
     file_per_thread_logger::initialize(prefix);
 
diff --git a/test-all.sh b/test-all.sh
index 22617d8961..9767bd731e 100755
--- a/test-all.sh
+++ b/test-all.sh
@@ -90,7 +90,8 @@ if rustup toolchain list | grep -q nightly; then
 
     # Nightly is available, so also run lightbeam's tests, which we
     # skipped earlier.
-    cargo +nightly test --package lightbeam
+    cargo +nightly test --features lightbeam --package lightbeam
+    cargo +nightly test --features lightbeam
 
     # Also run wasmtime-py and wasmtime-rust's tests.
     RUST_BACKTRACE=1 cargo +nightly test \
diff --git a/tests/instantiate.rs b/tests/instantiate.rs
index adf4945688..00c29c09d1 100644
--- a/tests/instantiate.rs
+++ b/tests/instantiate.rs
@@ -8,7 +8,7 @@ use std::io::Read;
 use std::path::PathBuf;
 use std::rc::Rc;
 use wabt;
-use wasmtime_jit::{instantiate, Compiler, NullResolver};
+use wasmtime_jit::{instantiate, CompilationStrategy, Compiler, NullResolver};
 
 #[cfg(test)]
 const PATH_MODULE_RS2WASM_ADD_FUNC: &str = r"filetests/rs2wasm-add-func.wat";
@@ -40,7 +40,7 @@ fn test_environ_translate() {
     let isa = isa_builder.finish(settings::Flags::new(flag_builder));
 
     let mut resolver = NullResolver {};
-    let mut compiler = Compiler::new(isa);
+    let mut compiler = Compiler::new(isa, CompilationStrategy::Auto);
     let global_exports = Rc::new(RefCell::new(HashMap::new()));
     let instance = instantiate(&mut compiler, &data, &mut resolver, global_exports, false);
     assert!(instance.is_ok());
diff --git a/tests/wast_testsuites.rs b/tests/wast_testsuites.rs
index 7fb0b017fc..d319332ae8 100644
--- a/tests/wast_testsuites.rs
+++ b/tests/wast_testsuites.rs
@@ -4,7 +4,7 @@ use cranelift_codegen::isa;
 use cranelift_codegen::settings;
 use cranelift_codegen::settings::Configurable;
 use std::path::Path;
-use wasmtime_jit::{Compiler, Features};
+use wasmtime_jit::{CompilationStrategy, Compiler, Features};
 use wasmtime_wast::WastContext;
 
 include!(concat!(env!("OUT_DIR"), "/wast_testsuite_tests.rs"));
diff --git a/wasmtime-api/src/context.rs b/wasmtime-api/src/context.rs
index 76085dd868..3016288c59 100644
--- a/wasmtime-api/src/context.rs
+++ b/wasmtime-api/src/context.rs
@@ -2,7 +2,7 @@ use std::cell::{RefCell, RefMut};
 use std::hash::{Hash, Hasher};
 use std::rc::Rc;
 
-use wasmtime_jit::{Compiler, Features};
+use wasmtime_jit::{CompilationStrategy, Compiler, Features};
 
 use cranelift_codegen::settings;
 
@@ -22,8 +22,13 @@ impl Context {
         }
     }
 
-    pub fn create(flags: settings::Flags, features: Features, debug_info: bool) -> Context {
-        Context::new(create_compiler(flags), features, debug_info)
+    pub fn create(
+        flags: settings::Flags,
+        features: Features,
+        debug_info: bool,
+        strategy: CompilationStrategy,
+    ) -> Context {
+        Context::new(create_compiler(flags, strategy), features, debug_info)
     }
 
     pub(crate) fn debug_info(&self) -> bool {
@@ -52,12 +57,12 @@ impl PartialEq for Context {
     }
 }
 
-pub(crate) fn create_compiler(flags: settings::Flags) -> Compiler {
+pub(crate) fn create_compiler(flags: settings::Flags, strategy: CompilationStrategy) -> Compiler {
     let isa = {
         let isa_builder =
             cranelift_native::builder().expect("host machine is not a supported target");
         isa_builder.finish(flags)
     };
 
-    Compiler::new(isa)
+    Compiler::new(isa, strategy)
 }
diff --git a/wasmtime-api/src/runtime.rs b/wasmtime-api/src/runtime.rs
index 61de46a3b6..535120091e 100644
--- a/wasmtime-api/src/runtime.rs
+++ b/wasmtime-api/src/runtime.rs
@@ -6,7 +6,7 @@ use crate::context::{create_compiler, Context};
 use crate::r#ref::HostRef;
 
 use cranelift_codegen::{ir, settings};
-use wasmtime_jit::Features;
+use wasmtime_jit::{CompilationStrategy, Features};
 
 // Runtime Environment
 
@@ -21,6 +21,7 @@ pub struct Config {
     flags: settings::Flags,
     features: Features,
     debug_info: bool,
+    strategy: CompilationStrategy,
 }
 
 impl Config {
@@ -29,14 +30,21 @@ impl Config {
             debug_info: false,
             features: Default::default(),
             flags: default_flags(),
+            strategy: CompilationStrategy::Auto,
         }
     }
 
-    pub fn new(flags: settings::Flags, features: Features, debug_info: bool) -> Config {
+    pub fn new(
+        flags: settings::Flags,
+        features: Features,
+        debug_info: bool,
+        strategy: CompilationStrategy,
+    ) -> Config {
         Config {
             flags,
             features,
             debug_info,
+            strategy,
         }
     }
 
@@ -51,6 +59,10 @@ impl Config {
     pub(crate) fn features(&self) -> &Features {
         &self.features
     }
+
+    pub(crate) fn strategy(&self) -> CompilationStrategy {
+        self.strategy
+    }
 }
 
 // Engine
@@ -74,7 +86,7 @@ impl Engine {
 
     pub fn create_wasmtime_context(&self) -> wasmtime_jit::Context {
         let flags = self.config.flags().clone();
-        wasmtime_jit::Context::new(Box::new(create_compiler(flags)))
+        wasmtime_jit::Context::new(Box::new(create_compiler(flags, self.config.strategy())))
     }
 }
 
@@ -92,9 +104,10 @@ impl Store {
         let flags = engine.borrow().config().flags().clone();
         let features = engine.borrow().config().features().clone();
         let debug_info = engine.borrow().config().debug_info();
+        let strategy = engine.borrow().config().strategy();
         Store {
             engine,
-            context: Context::create(flags, features, debug_info),
+            context: Context::create(flags, features, debug_info, strategy),
             global_exports: Rc::new(RefCell::new(HashMap::new())),
             signature_cache: HashMap::new(),
         }
diff --git a/wasmtime-environ/src/compilation.rs b/wasmtime-environ/src/compilation.rs
index 4c4f306ed0..7d5c936fc9 100644
--- a/wasmtime-environ/src/compilation.rs
+++ b/wasmtime-environ/src/compilation.rs
@@ -153,6 +153,10 @@ pub enum CompileError {
     /// A compilation error occured.
     #[fail(display = "Compilation error: {}", _0)]
     Codegen(CodegenError),
+
+    /// A compilation error occured.
+    #[fail(display = "Debug info is not supported with this configuration")]
+    DebugInfoNotSupported,
 }
 
 /// An implementation of a compiler from parsed WebAssembly module to native code.
diff --git a/wasmtime-environ/src/lightbeam.rs b/wasmtime-environ/src/lightbeam.rs
index e40befb5ac..b327861ce0 100644
--- a/wasmtime-environ/src/lightbeam.rs
+++ b/wasmtime-environ/src/lightbeam.rs
@@ -1,12 +1,13 @@
 //! Support for compiling with Lightbeam.
 
-use crate::compilation::{AddressTransforms, Compilation, CompileError, Relocations};
+use crate::compilation::{Compilation, CompileError, Relocations, Traps};
 use crate::func_environ::FuncEnvironment;
 use crate::module::Module;
 use crate::module_environ::FunctionBodyData;
 // TODO: Put this in `compilation`
+use crate::address_map::{ModuleAddressMap, ValueLabelsRanges};
 use crate::cranelift::RelocSink;
-use cranelift_codegen::isa;
+use cranelift_codegen::{ir, isa};
 use cranelift_entity::{PrimaryMap, SecondaryMap};
 use cranelift_wasm::DefinedFuncIndex;
 use lightbeam;
@@ -22,8 +23,22 @@ impl crate::compilation::Compiler for Lightbeam {
         function_body_inputs: PrimaryMap<DefinedFuncIndex, FunctionBodyData<'data>>,
         isa: &dyn isa::TargetIsa,
         // TODO
-        _generate_debug_info: bool,
-    ) -> Result<(Compilation, Relocations, AddressTransforms), CompileError> {
+        generate_debug_info: bool,
+    ) -> Result<
+        (
+            Compilation,
+            Relocations,
+            ModuleAddressMap,
+            ValueLabelsRanges,
+            PrimaryMap<DefinedFuncIndex, ir::StackSlots>,
+            Traps,
+        ),
+        CompileError,
+    > {
+        if generate_debug_info {
+            return Err(CompileError::DebugInfoNotSupported);
+        }
+
         let env = FuncEnvironment::new(isa.frontend_config(), module);
         let mut relocations = PrimaryMap::new();
         let mut codegen_session: lightbeam::CodeGenSession<_> =
@@ -57,7 +72,10 @@ impl crate::compilation::Compiler for Lightbeam {
         Ok((
             Compilation::from_buffer(code_section.buffer().to_vec(), code_section_ranges_and_jt),
             relocations,
-            AddressTransforms::new(),
+            ModuleAddressMap::new(),
+            ValueLabelsRanges::new(),
+            PrimaryMap::new(),
+            Traps::new(),
         ))
     }
 }
diff --git a/wasmtime-jit/src/compiler.rs b/wasmtime-jit/src/compiler.rs
index f1e87ce2d5..0cf7bf8625 100644
--- a/wasmtime-jit/src/compiler.rs
+++ b/wasmtime-jit/src/compiler.rs
@@ -25,6 +25,20 @@ use wasmtime_runtime::{
     VMFunctionBody,
 };
 
+/// Select which kind of compilation to use.
+#[derive(Copy, Clone, Debug)]
+pub enum CompilationStrategy {
+    /// Let Wasmtime pick the strategy.
+    Auto,
+
+    /// Compile all functions with Cranelift.
+    Cranelift,
+
+    /// Compile all functions with Lightbeam.
+    #[cfg(feature = "lightbeam")]
+    Lightbeam,
+}
+
 /// A WebAssembly code JIT compiler.
 ///
 /// A `Compiler` instance owns the executable memory that it allocates.
@@ -40,6 +54,7 @@ pub struct Compiler {
     trap_registration_guards: Vec<TrapRegistrationGuard>,
     trampoline_park: HashMap<*const VMFunctionBody, *const VMFunctionBody>,
     signatures: SignatureRegistry,
+    strategy: CompilationStrategy,
 
     /// The `FunctionBuilderContext`, shared between trampline function compilations.
     fn_builder_ctx: FunctionBuilderContext,
@@ -47,7 +62,7 @@ pub struct Compiler {
 
 impl Compiler {
     /// Construct a new `Compiler`.
-    pub fn new(isa: Box<dyn TargetIsa>) -> Self {
+    pub fn new(isa: Box<dyn TargetIsa>, strategy: CompilationStrategy) -> Self {
         Self {
             isa,
             code_memory: CodeMemory::new(),
@@ -55,6 +70,7 @@ impl Compiler {
             trampoline_park: HashMap::new(),
             signatures: SignatureRegistry::new(),
             fn_builder_ctx: FunctionBuilderContext::new(),
+            strategy,
         }
     }
 }
@@ -73,11 +89,6 @@ impl Drop for Compiler {
     }
 }
 
-#[cfg(feature = "lightbeam")]
-type DefaultCompiler = wasmtime_environ::lightbeam::Lightbeam;
-#[cfg(not(feature = "lightbeam"))]
-type DefaultCompiler = wasmtime_environ::cranelift::Cranelift;
-
 impl Compiler {
     /// Return the target's frontend configuration settings.
     pub fn frontend_config(&self) -> TargetFrontendConfig {
@@ -105,12 +116,27 @@ impl Compiler {
         SetupError,
     > {
         let (compilation, relocations, address_transform, value_ranges, stack_slots, traps) =
-            DefaultCompiler::compile_module(
-                module,
-                function_body_inputs,
-                &*self.isa,
-                debug_data.is_some(),
-            )
+            match self.strategy {
+                // For now, interpret `Auto` as `Cranelift` since that's the most stable
+                // implementation.
+                CompilationStrategy::Auto | CompilationStrategy::Cranelift => {
+                    wasmtime_environ::cranelift::Cranelift::compile_module(
+                        module,
+                        function_body_inputs,
+                        &*self.isa,
+                        debug_data.is_some(),
+                    )
+                }
+                #[cfg(feature = "lightbeam")]
+                CompilationStrategy::Lightbeam => {
+                    wasmtime_environ::lightbeam::Lightbeam::compile_module(
+                        module,
+                        function_body_inputs,
+                        &*self.isa,
+                        debug_data.is_some(),
+                    )
+                }
+            }
             .map_err(SetupError::Compile)?;
 
         let allocated_functions =
diff --git a/wasmtime-jit/src/context.rs b/wasmtime-jit/src/context.rs
index 16891bf229..22b2b94bae 100644
--- a/wasmtime-jit/src/context.rs
+++ b/wasmtime-jit/src/context.rs
@@ -1,7 +1,7 @@
 use crate::action::{get, inspect_memory, invoke};
 use crate::{
-    instantiate, ActionError, ActionOutcome, Compiler, InstanceHandle, Namespace, RuntimeValue,
-    SetupError,
+    instantiate, ActionError, ActionOutcome, CompilationStrategy, Compiler, InstanceHandle,
+    Namespace, RuntimeValue, SetupError,
 };
 use cranelift_codegen::isa::TargetIsa;
 use std::boxed::Box;
@@ -103,8 +103,8 @@ impl Context {
     }
 
     /// Construct a new instance of `Context` with the given target.
-    pub fn with_isa(isa: Box<dyn TargetIsa>) -> Self {
-        Self::new(Box::new(Compiler::new(isa)))
+    pub fn with_isa(isa: Box<dyn TargetIsa>, strategy: CompilationStrategy) -> Self {
+        Self::new(Box::new(Compiler::new(isa, strategy)))
     }
 
     /// Retrieve the context features
diff --git a/wasmtime-jit/src/lib.rs b/wasmtime-jit/src/lib.rs
index 7c9ab11d84..bc16699429 100644
--- a/wasmtime-jit/src/lib.rs
+++ b/wasmtime-jit/src/lib.rs
@@ -49,7 +49,7 @@ mod resolver;
 mod target_tunables;
 
 pub use crate::action::{ActionError, ActionOutcome, RuntimeValue};
-pub use crate::compiler::Compiler;
+pub use crate::compiler::{CompilationStrategy, Compiler};
 pub use crate::context::{Context, ContextError, Features, UnknownInstance};
 pub use crate::instantiate::{instantiate, CompiledModule, SetupError};
 pub use crate::link::link_module;
diff --git a/wasmtime-wast/Cargo.toml b/wasmtime-wast/Cargo.toml
index 013f9162ca..7023185309 100644
--- a/wasmtime-wast/Cargo.toml
+++ b/wasmtime-wast/Cargo.toml
@@ -25,3 +25,6 @@ failure_derive = { version = "0.1.3", default-features = false }
 [badges]
 maintenance = { status = "experimental" }
 travis-ci = { repository = "CraneStation/wasmtime" }
+
+[features]
+lightbeam = []
diff --git a/wasmtime-wast/src/wast.rs b/wasmtime-wast/src/wast.rs
index e357e61c66..76fe18ae9c 100644
--- a/wasmtime-wast/src/wast.rs
+++ b/wasmtime-wast/src/wast.rs
@@ -298,6 +298,12 @@ impl WastContext {
                             message: trap_message,
                         } => {
                             if !trap_message.contains(&message) {
+                                #[cfg(feature = "lightbeam")]
+                                println!(
+                                    "{}:{}: TODO: Check the assert_trap message: {}",
+                                    filename, line, message
+                                );
+                                #[cfg(not(feature = "lightbeam"))]
                                 return Err(WastFileError {
                                     filename: filename.to_string(),
                                     line,