diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 500c743f97..c38b7cc1a3 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -328,28 +328,6 @@ jobs:
       env:
         RUST_BACKTRACE: 1
 
-  # Perform all tests (debug mode) for `wasmtime` with the old x86 backend.
-  test_x86:
-    name: Test old x86 backend
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v2
-      with:
-        submodules: true
-    - uses: ./.github/actions/install-rust
-      with:
-        toolchain: stable
-
-    # Install wasm32 targets in order to build various tests throughout the
-    # repo.
-    - run: rustup target add wasm32-wasi
-    - run: rustup target add wasm32-unknown-unknown
-
-    # Run the old x86 backend CI (we will eventually remove this).
-    - run: ./ci/run-tests.sh --features old-x86-backend --locked
-      env:
-        RUST_BACKTRACE: 1
-
   # Build and test the wasi-nn module.
   test_wasi_nn:
     name: Test wasi-nn module
diff --git a/Cargo.toml b/Cargo.toml
index 08e0c0eb72..d69e3ac062 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -100,9 +100,6 @@ posix-signals-on-macos = ["wasmtime/posix-signals-on-macos"]
 # backend is the default now.
 experimental_x64 = []
 
-# Use the old x86 backend.
-old-x86-backend = ["wasmtime/old-x86-backend"]
-
 [badges]
 maintenance = { status = "actively-developed" }
 
diff --git a/build.rs b/build.rs
index 06f0669cdf..cc6d3e5047 100644
--- a/build.rs
+++ b/build.rs
@@ -182,11 +182,6 @@ fn write_testsuite_tests(
 fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
     match strategy {
         "Cranelift" => match (testsuite, testname) {
-            // Skip all reference types tests on the old backend. The modern
-            // implementation of reference types uses atomic instructions
-            // for reference counts on `externref`, but the old backend does not
-            // implement atomic instructions.
-            ("reference_types", _) if cfg!(feature = "old-x86-backend") => return true,
             // No simd support yet for s390x.
             ("simd", _) if platform_is_s390x() => return true,
             // No memory64 support yet for s390x.
diff --git a/cranelift/codegen/Cargo.toml b/cranelift/codegen/Cargo.toml
index 5bc6b2c9ca..4397568e5a 100644
--- a/cranelift/codegen/Cargo.toml
+++ b/cranelift/codegen/Cargo.toml
@@ -71,9 +71,6 @@ arm32 = [] # Work-in-progress codegen backend for ARM.
 # backend is the default now.
 experimental_x64 = []
 
-# Make the old x86 backend the default.
-old-x86-backend = []
-
 # Option to enable all architectures.
 all-arch = [
     "x86",
diff --git a/cranelift/codegen/meta/src/isa/x86/encodings.rs b/cranelift/codegen/meta/src/isa/x86/encodings.rs
deleted file mode 100644
index 2f222defb5..0000000000
--- a/cranelift/codegen/meta/src/isa/x86/encodings.rs
+++ /dev/null
@@ -1,2731 +0,0 @@
-#![allow(non_snake_case)]
-
-use cranelift_codegen_shared::condcodes::IntCC;
-use std::collections::HashMap;
-
-use crate::cdsl::encodings::{Encoding, EncodingBuilder};
-use crate::cdsl::instructions::{
-    vector, Bindable, Immediate, InstSpec, Instruction, InstructionGroup, InstructionPredicate,
-    InstructionPredicateNode, InstructionPredicateRegistry,
-};
-use crate::cdsl::recipes::{EncodingRecipe, EncodingRecipeNumber, Recipes};
-use crate::cdsl::settings::{SettingGroup, SettingPredicateNumber};
-use crate::cdsl::types::{LaneType, ValueType};
-use crate::shared::types::Bool::{B1, B16, B32, B64, B8};
-use crate::shared::types::Float::{F32, F64};
-use crate::shared::types::Int::{I16, I32, I64, I8};
-use crate::shared::types::Reference::{R32, R64};
-use crate::shared::Definitions as SharedDefinitions;
-
-use crate::isa::x86::opcodes::*;
-
-use super::recipes::{RecipeGroup, Template};
-use crate::cdsl::instructions::BindParameter::Any;
-
-pub(crate) struct PerCpuModeEncodings {
-    pub enc32: Vec<Encoding>,
-    pub enc64: Vec<Encoding>,
-    pub recipes: Recipes,
-    recipes_by_name: HashMap<String, EncodingRecipeNumber>,
-    pub inst_pred_reg: InstructionPredicateRegistry,
-}
-
-impl PerCpuModeEncodings {
-    fn new() -> Self {
-        Self {
-            enc32: Vec::new(),
-            enc64: Vec::new(),
-            recipes: Recipes::new(),
-            recipes_by_name: HashMap::new(),
-            inst_pred_reg: InstructionPredicateRegistry::new(),
-        }
-    }
-
-    fn add_recipe(&mut self, recipe: EncodingRecipe) -> EncodingRecipeNumber {
-        if let Some(found_index) = self.recipes_by_name.get(&recipe.name) {
-            assert!(
-                self.recipes[*found_index] == recipe,
-                "trying to insert different recipes with a same name ({})",
-                recipe.name
-            );
-            *found_index
-        } else {
-            let recipe_name = recipe.name.clone();
-            let index = self.recipes.push(recipe);
-            self.recipes_by_name.insert(recipe_name, index);
-            index
-        }
-    }
-
-    fn make_encoding<T>(
-        &mut self,
-        inst: InstSpec,
-        template: Template,
-        builder_closure: T,
-    ) -> Encoding
-    where
-        T: FnOnce(EncodingBuilder) -> EncodingBuilder,
-    {
-        let (recipe, bits) = template.build();
-        let recipe_number = self.add_recipe(recipe);
-        let builder = EncodingBuilder::new(inst, recipe_number, bits);
-        builder_closure(builder).build(&self.recipes, &mut self.inst_pred_reg)
-    }
-
-    fn enc32_func<T>(&mut self, inst: impl Into<InstSpec>, template: Template, builder_closure: T)
-    where
-        T: FnOnce(EncodingBuilder) -> EncodingBuilder,
-    {
-        let encoding = self.make_encoding(inst.into(), template, builder_closure);
-        self.enc32.push(encoding);
-    }
-    fn enc32(&mut self, inst: impl Into<InstSpec>, template: Template) {
-        self.enc32_func(inst, template, |x| x);
-    }
-    fn enc32_isap(
-        &mut self,
-        inst: impl Into<InstSpec>,
-        template: Template,
-        isap: SettingPredicateNumber,
-    ) {
-        self.enc32_func(inst, template, |encoding| encoding.isa_predicate(isap));
-    }
-    fn enc32_instp(
-        &mut self,
-        inst: impl Into<InstSpec>,
-        template: Template,
-        instp: InstructionPredicateNode,
-    ) {
-        self.enc32_func(inst, template, |encoding| encoding.inst_predicate(instp));
-    }
-    fn enc32_rec(&mut self, inst: impl Into<InstSpec>, recipe: &EncodingRecipe, bits: u16) {
-        let recipe_number = self.add_recipe(recipe.clone());
-        let builder = EncodingBuilder::new(inst.into(), recipe_number, bits);
-        let encoding = builder.build(&self.recipes, &mut self.inst_pred_reg);
-        self.enc32.push(encoding);
-    }
-
-    fn enc64_func<T>(&mut self, inst: impl Into<InstSpec>, template: Template, builder_closure: T)
-    where
-        T: FnOnce(EncodingBuilder) -> EncodingBuilder,
-    {
-        let encoding = self.make_encoding(inst.into(), template, builder_closure);
-        self.enc64.push(encoding);
-    }
-    fn enc64(&mut self, inst: impl Into<InstSpec>, template: Template) {
-        self.enc64_func(inst, template, |x| x);
-    }
-    fn enc64_isap(
-        &mut self,
-        inst: impl Into<InstSpec>,
-        template: Template,
-        isap: SettingPredicateNumber,
-    ) {
-        self.enc64_func(inst, template, |encoding| encoding.isa_predicate(isap));
-    }
-    fn enc64_instp(
-        &mut self,
-        inst: impl Into<InstSpec>,
-        template: Template,
-        instp: InstructionPredicateNode,
-    ) {
-        self.enc64_func(inst, template, |encoding| encoding.inst_predicate(instp));
-    }
-    fn enc64_rec(&mut self, inst: impl Into<InstSpec>, recipe: &EncodingRecipe, bits: u16) {
-        let recipe_number = self.add_recipe(recipe.clone());
-        let builder = EncodingBuilder::new(inst.into(), recipe_number, bits);
-        let encoding = builder.build(&self.recipes, &mut self.inst_pred_reg);
-        self.enc64.push(encoding);
-    }
-
-    /// Adds I32/I64 encodings as appropriate for a typed instruction.
-    /// The REX prefix is always inferred at runtime.
-    ///
-    /// Add encodings for `inst.i32` to X86_32.
-    /// Add encodings for `inst.i32` to X86_64 with optional, inferred REX.
-    /// Add encodings for `inst.i64` to X86_64 with a REX.W prefix.
-    fn enc_i32_i64(&mut self, inst: impl Into<InstSpec>, template: Template) {
-        let inst: InstSpec = inst.into();
-
-        // I32 on x86: no REX prefix.
-        self.enc32(inst.bind(I32), template.infer_rex());
-
-        // I32 on x86_64: REX.W unset; REX.RXB determined at runtime from registers.
-        self.enc64(inst.bind(I32), template.infer_rex());
-
-        // I64 on x86_64: REX.W set; REX.RXB determined at runtime from registers.
-        self.enc64(inst.bind(I64), template.rex().w());
-    }
-
-    /// Adds I32/I64 encodings as appropriate for a typed instruction.
-    /// All variants of REX prefix are explicitly emitted, not inferred.
-    ///
-    /// Add encodings for `inst.i32` to X86_32.
-    /// Add encodings for `inst.i32` to X86_64 with and without REX.
-    /// Add encodings for `inst.i64` to X86_64 with and without REX.
-    fn enc_i32_i64_explicit_rex(&mut self, inst: impl Into<InstSpec>, template: Template) {
-        let inst: InstSpec = inst.into();
-        self.enc32(inst.bind(I32), template.nonrex());
-
-        // REX-less encoding must come after REX encoding so we don't use it by default.
-        // Otherwise reg-alloc would never use r8 and up.
-        self.enc64(inst.bind(I32), template.rex());
-        self.enc64(inst.bind(I32), template.nonrex());
-        self.enc64(inst.bind(I64), template.rex().w());
-    }
-
-    /// Adds B32/B64 encodings as appropriate for a typed instruction.
-    /// The REX prefix is always inferred at runtime.
-    ///
-    /// Adds encoding for `inst.b32` to X86_32.
-    /// Adds encoding for `inst.b32` to X86_64 with optional, inferred REX.
-    /// Adds encoding for `inst.b64` to X86_64 with a REX.W prefix.
-    fn enc_b32_b64(&mut self, inst: impl Into<InstSpec>, template: Template) {
-        let inst: InstSpec = inst.into();
-
-        // B32 on x86: no REX prefix.
-        self.enc32(inst.bind(B32), template.infer_rex());
-
-        // B32 on x86_64: REX.W unset; REX.RXB determined at runtime from registers.
-        self.enc64(inst.bind(B32), template.infer_rex());
-
-        // B64 on x86_64: REX.W set; REX.RXB determined at runtime from registers.
-        self.enc64(inst.bind(B64), template.rex().w());
-    }
-
-    /// Add encodings for `inst.i32` to X86_32.
-    /// Add encodings for `inst.i32` to X86_64 with a REX prefix.
-    /// Add encodings for `inst.i64` to X86_64 with a REX.W prefix.
-    fn enc_i32_i64_rex_only(&mut self, inst: impl Into<InstSpec>, template: Template) {
-        let inst: InstSpec = inst.into();
-        self.enc32(inst.bind(I32), template.nonrex());
-        self.enc64(inst.bind(I32), template.rex());
-        self.enc64(inst.bind(I64), template.rex().w());
-    }
-
-    /// Add encodings for `inst.i32` to X86_32.
-    /// Add encodings for `inst.i32` to X86_64 with and without REX.
-    /// Add encodings for `inst.i64` to X86_64 with a REX.W prefix.
-    fn enc_i32_i64_instp(
-        &mut self,
-        inst: &Instruction,
-        template: Template,
-        instp: InstructionPredicateNode,
-    ) {
-        self.enc32_func(inst.bind(I32), template.nonrex(), |builder| {
-            builder.inst_predicate(instp.clone())
-        });
-
-        // REX-less encoding must come after REX encoding so we don't use it by default. Otherwise
-        // reg-alloc would never use r8 and up.
-        self.enc64_func(inst.bind(I32), template.rex(), |builder| {
-            builder.inst_predicate(instp.clone())
-        });
-        self.enc64_func(inst.bind(I32), template.nonrex(), |builder| {
-            builder.inst_predicate(instp.clone())
-        });
-        self.enc64_func(inst.bind(I64), template.rex().w(), |builder| {
-            builder.inst_predicate(instp)
-        });
-    }
-
-    /// Add encodings for `inst.r32` to X86_32.
-    /// Add encodings for `inst.r32` to X86_64 with and without REX.
-    /// Add encodings for `inst.r64` to X86_64 with a REX.W prefix.
-    fn enc_r32_r64_instp(
-        &mut self,
-        inst: &Instruction,
-        template: Template,
-        instp: InstructionPredicateNode,
-    ) {
-        self.enc32_func(inst.bind(R32), template.nonrex(), |builder| {
-            builder.inst_predicate(instp.clone())
-        });
-
-        // REX-less encoding must come after REX encoding so we don't use it by default. Otherwise
-        // reg-alloc would never use r8 and up.
-        self.enc64_func(inst.bind(R32), template.rex(), |builder| {
-            builder.inst_predicate(instp.clone())
-        });
-        self.enc64_func(inst.bind(R32), template.nonrex(), |builder| {
-            builder.inst_predicate(instp.clone())
-        });
-        self.enc64_func(inst.bind(R64), template.rex().w(), |builder| {
-            builder.inst_predicate(instp)
-        });
-    }
-
-    /// Add encodings for `inst.r32` to X86_32.
-    /// Add encodings for `inst.r64` to X86_64 with a REX.W prefix.
-    fn enc_r32_r64_rex_only(&mut self, inst: impl Into<InstSpec>, template: Template) {
-        let inst: InstSpec = inst.into();
-        self.enc32(inst.bind(R32), template.nonrex());
-        self.enc64(inst.bind(R64), template.rex().w());
-    }
-
-    fn enc_r32_r64_ld_st(&mut self, inst: &Instruction, w_bit: bool, template: Template) {
-        self.enc32(inst.clone().bind(R32).bind(Any), template.clone());
-
-        // REX-less encoding must come after REX encoding so we don't use it by
-        // default. Otherwise reg-alloc would never use r8 and up.
-        self.enc64(inst.clone().bind(R32).bind(Any), template.clone().rex());
-        self.enc64(inst.clone().bind(R32).bind(Any), template.clone());
-
-        if w_bit {
-            self.enc64(inst.clone().bind(R64).bind(Any), template.rex().w());
-        } else {
-            self.enc64(inst.clone().bind(R64).bind(Any), template.clone().rex());
-            self.enc64(inst.clone().bind(R64).bind(Any), template);
-        }
-    }
-
-    /// Add encodings for `inst` to X86_64 with and without a REX prefix.
-    fn enc_x86_64(&mut self, inst: impl Into<InstSpec> + Clone, template: Template) {
-        // See above comment about the ordering of rex vs non-rex encodings.
-        self.enc64(inst.clone(), template.rex());
-        self.enc64(inst, template);
-    }
-
-    /// Add encodings for `inst` to X86_64 with and without a REX prefix.
-    fn enc_x86_64_instp(
-        &mut self,
-        inst: impl Clone + Into<InstSpec>,
-        template: Template,
-        instp: InstructionPredicateNode,
-    ) {
-        // See above comment about the ordering of rex vs non-rex encodings.
-        self.enc64_func(inst.clone(), template.rex(), |builder| {
-            builder.inst_predicate(instp.clone())
-        });
-        self.enc64_func(inst, template, |builder| builder.inst_predicate(instp));
-    }
-    fn enc_x86_64_isap(
-        &mut self,
-        inst: impl Clone + Into<InstSpec>,
-        template: Template,
-        isap: SettingPredicateNumber,
-    ) {
-        // See above comment about the ordering of rex vs non-rex encodings.
-        self.enc64_isap(inst.clone(), template.rex(), isap);
-        self.enc64_isap(inst, template, isap);
-    }
-
-    /// Add all three encodings for `inst`:
-    /// - X86_32
-    /// - X86_64 with and without the REX prefix.
-    fn enc_both(&mut self, inst: impl Clone + Into<InstSpec>, template: Template) {
-        self.enc32(inst.clone(), template.clone());
-        self.enc_x86_64(inst, template);
-    }
-    fn enc_both_isap(
-        &mut self,
-        inst: impl Clone + Into<InstSpec>,
-        template: Template,
-        isap: SettingPredicateNumber,
-    ) {
-        self.enc32_isap(inst.clone(), template.clone(), isap);
-        self.enc_x86_64_isap(inst, template, isap);
-    }
-    fn enc_both_instp(
-        &mut self,
-        inst: impl Clone + Into<InstSpec>,
-        template: Template,
-        instp: InstructionPredicateNode,
-    ) {
-        self.enc32_instp(inst.clone(), template.clone(), instp.clone());
-        self.enc_x86_64_instp(inst, template, instp);
-    }
-
-    /// Add two encodings for `inst`:
-    /// - X86_32, no REX prefix, since this is not valid in 32-bit mode.
-    /// - X86_64, dynamically infer the REX prefix.
-    fn enc_both_inferred(&mut self, inst: impl Clone + Into<InstSpec>, template: Template) {
-        self.enc32(inst.clone(), template.clone());
-        self.enc64(inst, template.infer_rex());
-    }
-    fn enc_both_inferred_maybe_isap(
-        &mut self,
-        inst: impl Clone + Into<InstSpec>,
-        template: Template,
-        isap: Option<SettingPredicateNumber>,
-    ) {
-        self.enc32_maybe_isap(inst.clone(), template.clone(), isap);
-        self.enc64_maybe_isap(inst, template.infer_rex(), isap);
-    }
-
-    /// Add two encodings for `inst`:
-    /// - X86_32
-    /// - X86_64 with the REX prefix.
-    fn enc_both_rex_only(&mut self, inst: impl Clone + Into<InstSpec>, template: Template) {
-        self.enc32(inst.clone(), template.clone());
-        self.enc64(inst, template.rex());
-    }
-
-    /// Add encodings for `inst.i32` to X86_32.
-    /// Add encodings for `inst.i32` to X86_64 with and without REX.
-    /// Add encodings for `inst.i64` to X86_64 with a REX prefix, using the `w_bit`
-    /// argument to determine whether or not to set the REX.W bit.
-    fn enc_i32_i64_ld_st(&mut self, inst: &Instruction, w_bit: bool, template: Template) {
-        self.enc32(inst.clone().bind(I32).bind(Any), template.clone());
-
-        // REX-less encoding must come after REX encoding so we don't use it by
-        // default. Otherwise reg-alloc would never use r8 and up.
-        self.enc64(inst.clone().bind(I32).bind(Any), template.clone().rex());
-        self.enc64(inst.clone().bind(I32).bind(Any), template.clone());
-
-        if w_bit {
-            self.enc64(inst.clone().bind(I64).bind(Any), template.rex().w());
-        } else {
-            self.enc64(inst.clone().bind(I64).bind(Any), template.clone().rex());
-            self.enc64(inst.clone().bind(I64).bind(Any), template);
-        }
-    }
-
-    /// Add the same encoding/recipe pairing to both X86_32 and X86_64
-    fn enc_32_64_rec(
-        &mut self,
-        inst: impl Clone + Into<InstSpec>,
-        recipe: &EncodingRecipe,
-        bits: u16,
-    ) {
-        self.enc32_rec(inst.clone(), recipe, bits);
-        self.enc64_rec(inst, recipe, bits);
-    }
-
-    /// Add the same encoding to both X86_32 and X86_64; assumes configuration (e.g. REX, operand binding) has already happened
-    fn enc_32_64_func<T>(
-        &mut self,
-        inst: impl Clone + Into<InstSpec>,
-        template: Template,
-        builder_closure: T,
-    ) where
-        T: FnOnce(EncodingBuilder) -> EncodingBuilder,
-    {
-        let encoding = self.make_encoding(inst.into(), template, builder_closure);
-        self.enc32.push(encoding.clone());
-        self.enc64.push(encoding);
-    }
-
-    /// Add the same encoding to both X86_32 and X86_64; assumes configuration (e.g. REX, operand
-    /// binding) has already happened.
-    fn enc_32_64_maybe_isap(
-        &mut self,
-        inst: impl Clone + Into<InstSpec>,
-        template: Template,
-        isap: Option<SettingPredicateNumber>,
-    ) {
-        self.enc32_maybe_isap(inst.clone(), template.clone(), isap);
-        self.enc64_maybe_isap(inst, template, isap);
-    }
-
-    fn enc32_maybe_isap(
-        &mut self,
-        inst: impl Into<InstSpec>,
-        template: Template,
-        isap: Option<SettingPredicateNumber>,
-    ) {
-        match isap {
-            None => self.enc32(inst, template),
-            Some(isap) => self.enc32_isap(inst, template, isap),
-        }
-    }
-
-    fn enc64_maybe_isap(
-        &mut self,
-        inst: impl Into<InstSpec>,
-        template: Template,
-        isap: Option<SettingPredicateNumber>,
-    ) {
-        match isap {
-            None => self.enc64(inst, template),
-            Some(isap) => self.enc64_isap(inst, template, isap),
-        }
-    }
-}
-
-// Definitions.
-
-#[inline(never)]
-fn define_moves(e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, r: &RecipeGroup) {
-    let shared = &shared_defs.instructions;
-    let formats = &shared_defs.formats;
-
-    // Shorthands for instructions.
-    let bconst = shared.by_name("bconst");
-    let bint = shared.by_name("bint");
-    let copy = shared.by_name("copy");
-    let copy_special = shared.by_name("copy_special");
-    let copy_to_ssa = shared.by_name("copy_to_ssa");
-    let get_pinned_reg = shared.by_name("get_pinned_reg");
-    let iconst = shared.by_name("iconst");
-    let ireduce = shared.by_name("ireduce");
-    let regmove = shared.by_name("regmove");
-    let sextend = shared.by_name("sextend");
-    let set_pinned_reg = shared.by_name("set_pinned_reg");
-    let uextend = shared.by_name("uextend");
-    let dummy_sarg_t = shared.by_name("dummy_sarg_t");
-
-    // Shorthands for recipes.
-    let rec_copysp = r.template("copysp");
-    let rec_furm_reg_to_ssa = r.template("furm_reg_to_ssa");
-    let rec_get_pinned_reg = r.recipe("get_pinned_reg");
-    let rec_null = r.recipe("null");
-    let rec_pu_id = r.template("pu_id");
-    let rec_pu_id_bool = r.template("pu_id_bool");
-    let rec_pu_iq = r.template("pu_iq");
-    let rec_rmov = r.template("rmov");
-    let rec_set_pinned_reg = r.template("set_pinned_reg");
-    let rec_u_id = r.template("u_id");
-    let rec_u_id_z = r.template("u_id_z");
-    let rec_umr = r.template("umr");
-    let rec_umr_reg_to_ssa = r.template("umr_reg_to_ssa");
-    let rec_urm_noflags = r.template("urm_noflags");
-    let rec_urm_noflags_abcd = r.template("urm_noflags_abcd");
-    let rec_dummy_sarg_t = r.recipe("dummy_sarg_t");
-
-    // The pinned reg is fixed to a certain value entirely user-controlled, so it generates nothing!
-    e.enc64_rec(get_pinned_reg.bind(I64), rec_get_pinned_reg, 0);
-    e.enc_x86_64(
-        set_pinned_reg.bind(I64),
-        rec_set_pinned_reg.opcodes(&MOV_STORE).rex().w(),
-    );
-
-    e.enc_i32_i64(copy, rec_umr.opcodes(&MOV_STORE));
-    e.enc_r32_r64_rex_only(copy, rec_umr.opcodes(&MOV_STORE));
-    e.enc_both(copy.bind(B1), rec_umr.opcodes(&MOV_STORE));
-    e.enc_both(copy.bind(I8), rec_umr.opcodes(&MOV_STORE));
-    e.enc_both(copy.bind(I16), rec_umr.opcodes(&MOV_STORE));
-
-    // TODO For x86-64, only define REX forms for now, since we can't describe the
-    // special regunit immediate operands with the current constraint language.
-    for &ty in &[I8, I16, I32] {
-        e.enc32(regmove.bind(ty), rec_rmov.opcodes(&MOV_STORE));
-        e.enc64(regmove.bind(ty), rec_rmov.opcodes(&MOV_STORE).rex());
-    }
-    for &ty in &[B8, B16, B32] {
-        e.enc32(regmove.bind(ty), rec_rmov.opcodes(&MOV_STORE));
-        e.enc64(regmove.bind(ty), rec_rmov.opcodes(&MOV_STORE).rex());
-    }
-    e.enc64(regmove.bind(I64), rec_rmov.opcodes(&MOV_STORE).rex().w());
-    e.enc_both(regmove.bind(B1), rec_rmov.opcodes(&MOV_STORE));
-    e.enc_both(regmove.bind(I8), rec_rmov.opcodes(&MOV_STORE));
-    e.enc32(regmove.bind(R32), rec_rmov.opcodes(&MOV_STORE));
-    e.enc64(regmove.bind(R32), rec_rmov.opcodes(&MOV_STORE).rex());
-    e.enc64(regmove.bind(R64), rec_rmov.opcodes(&MOV_STORE).rex().w());
-
-    // Immediate constants.
-    e.enc32(iconst.bind(I32), rec_pu_id.opcodes(&MOV_IMM));
-
-    e.enc64(iconst.bind(I32), rec_pu_id.rex().opcodes(&MOV_IMM));
-    e.enc64(iconst.bind(I32), rec_pu_id.opcodes(&MOV_IMM));
-
-    // The 32-bit immediate movl also zero-extends to 64 bits.
-    let is_unsigned_int32 =
-        InstructionPredicate::new_is_unsigned_int(&*formats.unary_imm, "imm", 32, 0);
-
-    e.enc64_func(
-        iconst.bind(I64),
-        rec_pu_id.opcodes(&MOV_IMM).rex(),
-        |encoding| encoding.inst_predicate(is_unsigned_int32.clone()),
-    );
-    e.enc64_func(iconst.bind(I64), rec_pu_id.opcodes(&MOV_IMM), |encoding| {
-        encoding.inst_predicate(is_unsigned_int32)
-    });
-
-    // Sign-extended 32-bit immediate.
-    e.enc64(
-        iconst.bind(I64),
-        rec_u_id.rex().opcodes(&MOV_IMM_SIGNEXTEND).rrr(0).w(),
-    );
-
-    // Finally, the MOV_IMM opcode takes an 8-byte immediate with a REX.W prefix.
-    e.enc64(iconst.bind(I64), rec_pu_iq.opcodes(&MOV_IMM).rex().w());
-
-    // Bool constants (uses MOV)
-    for &ty in &[B1, B8, B16, B32] {
-        e.enc_both(bconst.bind(ty), rec_pu_id_bool.opcodes(&MOV_IMM));
-    }
-    e.enc64(bconst.bind(B64), rec_pu_id_bool.opcodes(&MOV_IMM).rex());
-
-    // You may expect that i8 encodings would use 0x30 (XORB) to indicate that encodings should be
-    // on 8-bit operands (f.ex "xor %al, %al"). Cranelift currently does not know when it can
-    // safely drop the 0x66 prefix, so we explicitly select a wider but permissible opcode.
-    let is_zero_int = InstructionPredicate::new_is_zero_int(&formats.unary_imm, "imm");
-    e.enc_both_instp(
-        iconst.bind(I8),
-        rec_u_id_z.opcodes(&XOR),
-        is_zero_int.clone(),
-    );
-
-    // You may expect that i16 encodings would have an 0x66 prefix on the opcode to indicate that
-    // encodings should be on 16-bit operands (f.ex, "xor %ax, %ax"). Cranelift currently does not
-    // know that it can drop the 0x66 prefix and clear the upper half of a 32-bit register in these
-    // scenarios, so we explicitly select a wider but permissible opcode.
-    //
-    // This effectively formalizes the i16->i32 widening that Cranelift performs when there isn't
-    // an appropriate i16 encoding available.
-    e.enc_both_instp(
-        iconst.bind(I16),
-        rec_u_id_z.opcodes(&XOR),
-        is_zero_int.clone(),
-    );
-    e.enc_both_instp(
-        iconst.bind(I32),
-        rec_u_id_z.opcodes(&XOR),
-        is_zero_int.clone(),
-    );
-    e.enc_x86_64_instp(iconst.bind(I64), rec_u_id_z.opcodes(&XOR), is_zero_int);
-
-    // Numerical conversions.
-
-    // Reducing an integer is a no-op.
-    e.enc32_rec(ireduce.bind(I8).bind(I16), rec_null, 0);
-    e.enc32_rec(ireduce.bind(I8).bind(I32), rec_null, 0);
-    e.enc32_rec(ireduce.bind(I16).bind(I32), rec_null, 0);
-
-    e.enc64_rec(ireduce.bind(I8).bind(I16), rec_null, 0);
-    e.enc64_rec(ireduce.bind(I8).bind(I32), rec_null, 0);
-    e.enc64_rec(ireduce.bind(I16).bind(I32), rec_null, 0);
-    e.enc64_rec(ireduce.bind(I8).bind(I64), rec_null, 0);
-    e.enc64_rec(ireduce.bind(I16).bind(I64), rec_null, 0);
-    e.enc64_rec(ireduce.bind(I32).bind(I64), rec_null, 0);
-
-    // TODO: Add encodings for cbw, cwde, cdqe, which are sign-extending
-    // instructions for %al/%ax/%eax to %ax/%eax/%rax.
-
-    // movsbl
-    e.enc32(
-        sextend.bind(I32).bind(I8),
-        rec_urm_noflags_abcd.opcodes(&MOVSX_BYTE),
-    );
-    e.enc64(
-        sextend.bind(I32).bind(I8),
-        rec_urm_noflags.opcodes(&MOVSX_BYTE).rex(),
-    );
-    e.enc64(
-        sextend.bind(I32).bind(I8),
-        rec_urm_noflags_abcd.opcodes(&MOVSX_BYTE),
-    );
-
-    // movswl
-    e.enc32(
-        sextend.bind(I32).bind(I16),
-        rec_urm_noflags.opcodes(&MOVSX_WORD),
-    );
-    e.enc64(
-        sextend.bind(I32).bind(I16),
-        rec_urm_noflags.opcodes(&MOVSX_WORD).rex(),
-    );
-    e.enc64(
-        sextend.bind(I32).bind(I16),
-        rec_urm_noflags.opcodes(&MOVSX_WORD),
-    );
-
-    // movsbq
-    e.enc64(
-        sextend.bind(I64).bind(I8),
-        rec_urm_noflags.opcodes(&MOVSX_BYTE).rex().w(),
-    );
-
-    // movswq
-    e.enc64(
-        sextend.bind(I64).bind(I16),
-        rec_urm_noflags.opcodes(&MOVSX_WORD).rex().w(),
-    );
-
-    // movslq
-    e.enc64(
-        sextend.bind(I64).bind(I32),
-        rec_urm_noflags.opcodes(&MOVSXD).rex().w(),
-    );
-
-    // movzbl
-    e.enc32(
-        uextend.bind(I32).bind(I8),
-        rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE),
-    );
-    e.enc64(
-        uextend.bind(I32).bind(I8),
-        rec_urm_noflags.opcodes(&MOVZX_BYTE).rex(),
-    );
-    e.enc64(
-        uextend.bind(I32).bind(I8),
-        rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE),
-    );
-
-    // movzwl
-    e.enc32(
-        uextend.bind(I32).bind(I16),
-        rec_urm_noflags.opcodes(&MOVZX_WORD),
-    );
-    e.enc64(
-        uextend.bind(I32).bind(I16),
-        rec_urm_noflags.opcodes(&MOVZX_WORD).rex(),
-    );
-    e.enc64(
-        uextend.bind(I32).bind(I16),
-        rec_urm_noflags.opcodes(&MOVZX_WORD),
-    );
-
-    // movzbq, encoded as movzbl because it's equivalent and shorter.
-    e.enc64(
-        uextend.bind(I64).bind(I8),
-        rec_urm_noflags.opcodes(&MOVZX_BYTE).rex(),
-    );
-    e.enc64(
-        uextend.bind(I64).bind(I8),
-        rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE),
-    );
-
-    // movzwq, encoded as movzwl because it's equivalent and shorter
-    e.enc64(
-        uextend.bind(I64).bind(I16),
-        rec_urm_noflags.opcodes(&MOVZX_WORD).rex(),
-    );
-    e.enc64(
-        uextend.bind(I64).bind(I16),
-        rec_urm_noflags.opcodes(&MOVZX_WORD),
-    );
-
-    // A 32-bit register copy clears the high 32 bits.
-    e.enc64(
-        uextend.bind(I64).bind(I32),
-        rec_umr.opcodes(&MOV_STORE).rex(),
-    );
-    e.enc64(uextend.bind(I64).bind(I32), rec_umr.opcodes(&MOV_STORE));
-
-    // Convert bool to int.
-    //
-    // This assumes that b1 is represented as an 8-bit low register with the value 0
-    // or 1.
-    //
-    // Encode movzbq as movzbl, because it's equivalent and shorter.
-    for &to in &[I8, I16, I32, I64] {
-        for &from in &[B1, B8] {
-            e.enc64(
-                bint.bind(to).bind(from),
-                rec_urm_noflags.opcodes(&MOVZX_BYTE).rex(),
-            );
-            e.enc64(
-                bint.bind(to).bind(from),
-                rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE),
-            );
-            if to != I64 {
-                e.enc32(
-                    bint.bind(to).bind(from),
-                    rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE),
-                );
-            }
-        }
-    }
-    for (to, from) in &[(I16, B16), (I32, B32), (I64, B64)] {
-        e.enc_both(
-            bint.bind(*to).bind(*from),
-            rec_urm_noflags_abcd.opcodes(&MOVZX_BYTE),
-        );
-    }
-
-    // Copy Special
-    // For x86-64, only define REX forms for now, since we can't describe the
-    // special regunit immediate operands with the current constraint language.
-    e.enc64(copy_special, rec_copysp.opcodes(&MOV_STORE).rex().w());
-    e.enc32(copy_special, rec_copysp.opcodes(&MOV_STORE));
-
-    // Copy to SSA.  These have to be done with special _rex_only encoders, because the standard
-    // machinery for deciding whether a REX.{RXB} prefix is needed doesn't take into account
-    // the source register, which is specified directly in the instruction.
-    e.enc_i32_i64_rex_only(copy_to_ssa, rec_umr_reg_to_ssa.opcodes(&MOV_STORE));
-    e.enc_r32_r64_rex_only(copy_to_ssa, rec_umr_reg_to_ssa.opcodes(&MOV_STORE));
-    e.enc_both_rex_only(copy_to_ssa.bind(B1), rec_umr_reg_to_ssa.opcodes(&MOV_STORE));
-    e.enc_both_rex_only(copy_to_ssa.bind(I8), rec_umr_reg_to_ssa.opcodes(&MOV_STORE));
-    e.enc_both_rex_only(
-        copy_to_ssa.bind(I16),
-        rec_umr_reg_to_ssa.opcodes(&MOV_STORE),
-    );
-    e.enc_both_rex_only(
-        copy_to_ssa.bind(F64),
-        rec_furm_reg_to_ssa.opcodes(&MOVSD_LOAD),
-    );
-    e.enc_both_rex_only(
-        copy_to_ssa.bind(F32),
-        rec_furm_reg_to_ssa.opcodes(&MOVSS_LOAD),
-    );
-
-    e.enc_32_64_rec(dummy_sarg_t, rec_dummy_sarg_t, 0);
-}
-
-#[inline(never)]
-fn define_memory(
-    e: &mut PerCpuModeEncodings,
-    shared_defs: &SharedDefinitions,
-    x86: &InstructionGroup,
-    r: &RecipeGroup,
-) {
-    let shared = &shared_defs.instructions;
-    let formats = &shared_defs.formats;
-
-    // Shorthands for instructions.
-    let adjust_sp_down = shared.by_name("adjust_sp_down");
-    let adjust_sp_down_imm = shared.by_name("adjust_sp_down_imm");
-    let adjust_sp_up_imm = shared.by_name("adjust_sp_up_imm");
-    let copy_nop = shared.by_name("copy_nop");
-    let fill = shared.by_name("fill");
-    let fill_nop = shared.by_name("fill_nop");
-    let istore16 = shared.by_name("istore16");
-    let istore16_complex = shared.by_name("istore16_complex");
-    let istore32 = shared.by_name("istore32");
-    let istore32_complex = shared.by_name("istore32_complex");
-    let istore8 = shared.by_name("istore8");
-    let istore8_complex = shared.by_name("istore8_complex");
-    let load = shared.by_name("load");
-    let load_complex = shared.by_name("load_complex");
-    let regfill = shared.by_name("regfill");
-    let regspill = shared.by_name("regspill");
-    let sload16 = shared.by_name("sload16");
-    let sload16_complex = shared.by_name("sload16_complex");
-    let sload32 = shared.by_name("sload32");
-    let sload32_complex = shared.by_name("sload32_complex");
-    let sload8 = shared.by_name("sload8");
-    let sload8_complex = shared.by_name("sload8_complex");
-    let spill = shared.by_name("spill");
-    let store = shared.by_name("store");
-    let store_complex = shared.by_name("store_complex");
-    let uload16 = shared.by_name("uload16");
-    let uload16_complex = shared.by_name("uload16_complex");
-    let uload32 = shared.by_name("uload32");
-    let uload32_complex = shared.by_name("uload32_complex");
-    let uload8 = shared.by_name("uload8");
-    let uload8_complex = shared.by_name("uload8_complex");
-    let x86_pop = x86.by_name("x86_pop");
-    let x86_push = x86.by_name("x86_push");
-
-    // Shorthands for recipes.
-    let rec_adjustsp = r.template("adjustsp");
-    let rec_adjustsp_ib = r.template("adjustsp_ib");
-    let rec_adjustsp_id = r.template("adjustsp_id");
-    let rec_ffillnull = r.recipe("ffillnull");
-    let rec_fillnull = r.recipe("fillnull");
-    let rec_fillSib32 = r.template("fillSib32");
-    let rec_ld = r.template("ld");
-    let rec_ldDisp32 = r.template("ldDisp32");
-    let rec_ldDisp8 = r.template("ldDisp8");
-    let rec_ldWithIndex = r.template("ldWithIndex");
-    let rec_ldWithIndexDisp32 = r.template("ldWithIndexDisp32");
-    let rec_ldWithIndexDisp8 = r.template("ldWithIndexDisp8");
-    let rec_popq = r.template("popq");
-    let rec_pushq = r.template("pushq");
-    let rec_regfill32 = r.template("regfill32");
-    let rec_regspill32 = r.template("regspill32");
-    let rec_spillSib32 = r.template("spillSib32");
-    let rec_st = r.template("st");
-    let rec_stacknull = r.recipe("stacknull");
-    let rec_stDisp32 = r.template("stDisp32");
-    let rec_stDisp32_abcd = r.template("stDisp32_abcd");
-    let rec_stDisp8 = r.template("stDisp8");
-    let rec_stDisp8_abcd = r.template("stDisp8_abcd");
-    let rec_stWithIndex = r.template("stWithIndex");
-    let rec_stWithIndexDisp32 = r.template("stWithIndexDisp32");
-    let rec_stWithIndexDisp32_abcd = r.template("stWithIndexDisp32_abcd");
-    let rec_stWithIndexDisp8 = r.template("stWithIndexDisp8");
-    let rec_stWithIndexDisp8_abcd = r.template("stWithIndexDisp8_abcd");
-    let rec_stWithIndex_abcd = r.template("stWithIndex_abcd");
-    let rec_st_abcd = r.template("st_abcd");
-
-    // Loads and stores.
-    let is_load_complex_length_two =
-        InstructionPredicate::new_length_equals(&*formats.load_complex, 2);
-
-    for recipe in &[rec_ldWithIndex, rec_ldWithIndexDisp8, rec_ldWithIndexDisp32] {
-        e.enc_i32_i64_instp(
-            load_complex,
-            recipe.opcodes(&MOV_LOAD),
-            is_load_complex_length_two.clone(),
-        );
-        e.enc_r32_r64_instp(
-            load_complex,
-            recipe.opcodes(&MOV_LOAD),
-            is_load_complex_length_two.clone(),
-        );
-        e.enc_x86_64_instp(
-            uload32_complex,
-            recipe.opcodes(&MOV_LOAD),
-            is_load_complex_length_two.clone(),
-        );
-
-        e.enc64_instp(
-            sload32_complex,
-            recipe.opcodes(&MOVSXD).rex().w(),
-            is_load_complex_length_two.clone(),
-        );
-
-        e.enc_i32_i64_instp(
-            uload16_complex,
-            recipe.opcodes(&MOVZX_WORD),
-            is_load_complex_length_two.clone(),
-        );
-        e.enc_i32_i64_instp(
-            sload16_complex,
-            recipe.opcodes(&MOVSX_WORD),
-            is_load_complex_length_two.clone(),
-        );
-
-        e.enc_i32_i64_instp(
-            uload8_complex,
-            recipe.opcodes(&MOVZX_BYTE),
-            is_load_complex_length_two.clone(),
-        );
-
-        e.enc_i32_i64_instp(
-            sload8_complex,
-            recipe.opcodes(&MOVSX_BYTE),
-            is_load_complex_length_two.clone(),
-        );
-    }
-
-    let is_store_complex_length_three =
-        InstructionPredicate::new_length_equals(&*formats.store_complex, 3);
-
-    for recipe in &[rec_stWithIndex, rec_stWithIndexDisp8, rec_stWithIndexDisp32] {
-        e.enc_i32_i64_instp(
-            store_complex,
-            recipe.opcodes(&MOV_STORE),
-            is_store_complex_length_three.clone(),
-        );
-        e.enc_r32_r64_instp(
-            store_complex,
-            recipe.opcodes(&MOV_STORE),
-            is_store_complex_length_three.clone(),
-        );
-        e.enc_x86_64_instp(
-            istore32_complex,
-            recipe.opcodes(&MOV_STORE),
-            is_store_complex_length_three.clone(),
-        );
-        e.enc_both_instp(
-            istore16_complex.bind(I32),
-            recipe.opcodes(&MOV_STORE_16),
-            is_store_complex_length_three.clone(),
-        );
-        e.enc_x86_64_instp(
-            istore16_complex.bind(I64),
-            recipe.opcodes(&MOV_STORE_16),
-            is_store_complex_length_three.clone(),
-        );
-    }
-
-    for recipe in &[
-        rec_stWithIndex_abcd,
-        rec_stWithIndexDisp8_abcd,
-        rec_stWithIndexDisp32_abcd,
-    ] {
-        e.enc_both_instp(
-            istore8_complex.bind(I32),
-            recipe.opcodes(&MOV_BYTE_STORE),
-            is_store_complex_length_three.clone(),
-        );
-        e.enc_x86_64_instp(
-            istore8_complex.bind(I64),
-            recipe.opcodes(&MOV_BYTE_STORE),
-            is_store_complex_length_three.clone(),
-        );
-    }
-
-    for recipe in &[rec_st, rec_stDisp8, rec_stDisp32] {
-        e.enc_i32_i64_ld_st(store, true, recipe.opcodes(&MOV_STORE));
-        e.enc_r32_r64_ld_st(store, true, recipe.opcodes(&MOV_STORE));
-        e.enc_x86_64(istore32.bind(I64).bind(Any), recipe.opcodes(&MOV_STORE));
-        e.enc_i32_i64_ld_st(istore16, false, recipe.opcodes(&MOV_STORE_16));
-    }
-
-    // Byte stores are more complicated because the registers they can address
-    // depends of the presence of a REX prefix. The st*_abcd recipes fall back to
-    // the corresponding st* recipes when a REX prefix is applied.
-
-    for recipe in &[rec_st_abcd, rec_stDisp8_abcd, rec_stDisp32_abcd] {
-        e.enc_both(istore8.bind(I32).bind(Any), recipe.opcodes(&MOV_BYTE_STORE));
-        e.enc_x86_64(istore8.bind(I64).bind(Any), recipe.opcodes(&MOV_BYTE_STORE));
-    }
-
-    e.enc_i32_i64_explicit_rex(spill, rec_spillSib32.opcodes(&MOV_STORE));
-    e.enc_i32_i64_explicit_rex(regspill, rec_regspill32.opcodes(&MOV_STORE));
-    e.enc_r32_r64_rex_only(spill, rec_spillSib32.opcodes(&MOV_STORE));
-    e.enc_r32_r64_rex_only(regspill, rec_regspill32.opcodes(&MOV_STORE));
-
-    // Use a 32-bit write for spilling `b1`, `i8` and `i16` to avoid
-    // constraining the permitted registers.
-    // See MIN_SPILL_SLOT_SIZE which makes this safe.
-
-    e.enc_both(spill.bind(B1), rec_spillSib32.opcodes(&MOV_STORE));
-    e.enc_both(regspill.bind(B1), rec_regspill32.opcodes(&MOV_STORE));
-    for &ty in &[I8, I16] {
-        e.enc_both(spill.bind(ty), rec_spillSib32.opcodes(&MOV_STORE));
-        e.enc_both(regspill.bind(ty), rec_regspill32.opcodes(&MOV_STORE));
-    }
-
-    for recipe in &[rec_ld, rec_ldDisp8, rec_ldDisp32] {
-        e.enc_i32_i64_ld_st(load, true, recipe.opcodes(&MOV_LOAD));
-        e.enc_r32_r64_ld_st(load, true, recipe.opcodes(&MOV_LOAD));
-        e.enc_x86_64(uload32.bind(I64), recipe.opcodes(&MOV_LOAD));
-        e.enc64(sload32.bind(I64), recipe.opcodes(&MOVSXD).rex().w());
-        e.enc_i32_i64_ld_st(uload16, true, recipe.opcodes(&MOVZX_WORD));
-        e.enc_i32_i64_ld_st(sload16, true, recipe.opcodes(&MOVSX_WORD));
-        e.enc_i32_i64_ld_st(uload8, true, recipe.opcodes(&MOVZX_BYTE));
-        e.enc_i32_i64_ld_st(sload8, true, recipe.opcodes(&MOVSX_BYTE));
-    }
-
-    e.enc_i32_i64_explicit_rex(fill, rec_fillSib32.opcodes(&MOV_LOAD));
-    e.enc_i32_i64_explicit_rex(regfill, rec_regfill32.opcodes(&MOV_LOAD));
-    e.enc_r32_r64_rex_only(fill, rec_fillSib32.opcodes(&MOV_LOAD));
-    e.enc_r32_r64_rex_only(regfill, rec_regfill32.opcodes(&MOV_LOAD));
-
-    // No-op fills, created by late-stage redundant-fill removal.
-    for &ty in &[I64, I32, I16, I8] {
-        e.enc64_rec(fill_nop.bind(ty), rec_fillnull, 0);
-        e.enc32_rec(fill_nop.bind(ty), rec_fillnull, 0);
-    }
-    e.enc64_rec(fill_nop.bind(B1), rec_fillnull, 0);
-    e.enc32_rec(fill_nop.bind(B1), rec_fillnull, 0);
-    for &ty in &[F64, F32] {
-        e.enc64_rec(fill_nop.bind(ty), rec_ffillnull, 0);
-        e.enc32_rec(fill_nop.bind(ty), rec_ffillnull, 0);
-    }
-    for &ty in &[R64, R32] {
-        e.enc64_rec(fill_nop.bind(ty), rec_fillnull, 0);
-        e.enc32_rec(fill_nop.bind(ty), rec_fillnull, 0);
-    }
-
-    // Load 32 bits from `b1`, `i8` and `i16` spill slots. See `spill.b1` above.
-
-    e.enc_both(fill.bind(B1), rec_fillSib32.opcodes(&MOV_LOAD));
-    e.enc_both(regfill.bind(B1), rec_regfill32.opcodes(&MOV_LOAD));
-    for &ty in &[I8, I16] {
-        e.enc_both(fill.bind(ty), rec_fillSib32.opcodes(&MOV_LOAD));
-        e.enc_both(regfill.bind(ty), rec_regfill32.opcodes(&MOV_LOAD));
-    }
-
-    // Push and Pop.
-    e.enc32(x86_push.bind(I32), rec_pushq.opcodes(&PUSH_REG));
-    e.enc_x86_64(x86_push.bind(I64), rec_pushq.opcodes(&PUSH_REG));
-
-    e.enc32(x86_pop.bind(I32), rec_popq.opcodes(&POP_REG));
-    e.enc_x86_64(x86_pop.bind(I64), rec_popq.opcodes(&POP_REG));
-
-    // Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn
-    // into a no-op.
-    // The same encoding is generated for both the 64- and 32-bit architectures.
-    for &ty in &[I64, I32, I16, I8] {
-        e.enc64_rec(copy_nop.bind(ty), rec_stacknull, 0);
-        e.enc32_rec(copy_nop.bind(ty), rec_stacknull, 0);
-    }
-    for &ty in &[F64, F32] {
-        e.enc64_rec(copy_nop.bind(ty), rec_stacknull, 0);
-        e.enc32_rec(copy_nop.bind(ty), rec_stacknull, 0);
-    }
-
-    // Adjust SP down by a dynamic value (or up, with a negative operand).
-    e.enc32(adjust_sp_down.bind(I32), rec_adjustsp.opcodes(&SUB));
-    e.enc64(
-        adjust_sp_down.bind(I64),
-        rec_adjustsp.opcodes(&SUB).rex().w(),
-    );
-
-    // Adjust SP up by an immediate (or down, with a negative immediate).
-    e.enc32(adjust_sp_up_imm, rec_adjustsp_ib.opcodes(&CMP_IMM8));
-    e.enc32(adjust_sp_up_imm, rec_adjustsp_id.opcodes(&CMP_IMM));
-    e.enc64(
-        adjust_sp_up_imm,
-        rec_adjustsp_ib.opcodes(&CMP_IMM8).rex().w(),
-    );
-    e.enc64(
-        adjust_sp_up_imm,
-        rec_adjustsp_id.opcodes(&CMP_IMM).rex().w(),
-    );
-
-    // Adjust SP down by an immediate (or up, with a negative immediate).
-    e.enc32(
-        adjust_sp_down_imm,
-        rec_adjustsp_ib.opcodes(&CMP_IMM8).rrr(5),
-    );
-    e.enc32(adjust_sp_down_imm, rec_adjustsp_id.opcodes(&CMP_IMM).rrr(5));
-    e.enc64(
-        adjust_sp_down_imm,
-        rec_adjustsp_ib.opcodes(&CMP_IMM8).rrr(5).rex().w(),
-    );
-    e.enc64(
-        adjust_sp_down_imm,
-        rec_adjustsp_id.opcodes(&CMP_IMM).rrr(5).rex().w(),
-    );
-}
-
-#[inline(never)]
-fn define_fpu_moves(e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, r: &RecipeGroup) {
-    let shared = &shared_defs.instructions;
-
-    // Shorthands for instructions.
-    let bitcast = shared.by_name("bitcast");
-    let copy = shared.by_name("copy");
-    let regmove = shared.by_name("regmove");
-
-    // Shorthands for recipes.
-    let rec_frmov = r.template("frmov");
-    let rec_frurm = r.template("frurm");
-    let rec_furm = r.template("furm");
-    let rec_rfumr = r.template("rfumr");
-
-    // Floating-point moves.
-    // movd
-    e.enc_both(
-        bitcast.bind(F32).bind(I32),
-        rec_frurm.opcodes(&MOVD_LOAD_XMM),
-    );
-    e.enc_both(
-        bitcast.bind(I32).bind(F32),
-        rec_rfumr.opcodes(&MOVD_STORE_XMM),
-    );
-
-    // movq
-    e.enc64(
-        bitcast.bind(F64).bind(I64),
-        rec_frurm.opcodes(&MOVD_LOAD_XMM).rex().w(),
-    );
-    e.enc64(
-        bitcast.bind(I64).bind(F64),
-        rec_rfumr.opcodes(&MOVD_STORE_XMM).rex().w(),
-    );
-
-    // movaps
-    e.enc_both(copy.bind(F32), rec_furm.opcodes(&MOVAPS_LOAD));
-    e.enc_both(copy.bind(F64), rec_furm.opcodes(&MOVAPS_LOAD));
-
-    // TODO For x86-64, only define REX forms for now, since we can't describe the special regunit
-    // immediate operands with the current constraint language.
-    e.enc32(regmove.bind(F32), rec_frmov.opcodes(&MOVAPS_LOAD));
-    e.enc64(regmove.bind(F32), rec_frmov.opcodes(&MOVAPS_LOAD).rex());
-
-    // TODO For x86-64, only define REX forms for now, since we can't describe the special regunit
-    // immediate operands with the current constraint language.
-    e.enc32(regmove.bind(F64), rec_frmov.opcodes(&MOVAPS_LOAD));
-    e.enc64(regmove.bind(F64), rec_frmov.opcodes(&MOVAPS_LOAD).rex());
-}
-
-#[inline(never)]
-fn define_fpu_memory(
-    e: &mut PerCpuModeEncodings,
-    shared_defs: &SharedDefinitions,
-    r: &RecipeGroup,
-) {
-    let shared = &shared_defs.instructions;
-
-    // Shorthands for instructions.
-    let fill = shared.by_name("fill");
-    let load = shared.by_name("load");
-    let load_complex = shared.by_name("load_complex");
-    let regfill = shared.by_name("regfill");
-    let regspill = shared.by_name("regspill");
-    let spill = shared.by_name("spill");
-    let store = shared.by_name("store");
-    let store_complex = shared.by_name("store_complex");
-
-    // Shorthands for recipes.
-    let rec_ffillSib32 = r.template("ffillSib32");
-    let rec_fld = r.template("fld");
-    let rec_fldDisp32 = r.template("fldDisp32");
-    let rec_fldDisp8 = r.template("fldDisp8");
-    let rec_fldWithIndex = r.template("fldWithIndex");
-    let rec_fldWithIndexDisp32 = r.template("fldWithIndexDisp32");
-    let rec_fldWithIndexDisp8 = r.template("fldWithIndexDisp8");
-    let rec_fregfill32 = r.template("fregfill32");
-    let rec_fregspill32 = r.template("fregspill32");
-    let rec_fspillSib32 = r.template("fspillSib32");
-    let rec_fst = r.template("fst");
-    let rec_fstDisp32 = r.template("fstDisp32");
-    let rec_fstDisp8 = r.template("fstDisp8");
-    let rec_fstWithIndex = r.template("fstWithIndex");
-    let rec_fstWithIndexDisp32 = r.template("fstWithIndexDisp32");
-    let rec_fstWithIndexDisp8 = r.template("fstWithIndexDisp8");
-
-    // Float loads and stores.
-    e.enc_both(load.bind(F32).bind(Any), rec_fld.opcodes(&MOVSS_LOAD));
-    e.enc_both(load.bind(F32).bind(Any), rec_fldDisp8.opcodes(&MOVSS_LOAD));
-    e.enc_both(load.bind(F32).bind(Any), rec_fldDisp32.opcodes(&MOVSS_LOAD));
-
-    e.enc_both(
-        load_complex.bind(F32),
-        rec_fldWithIndex.opcodes(&MOVSS_LOAD),
-    );
-    e.enc_both(
-        load_complex.bind(F32),
-        rec_fldWithIndexDisp8.opcodes(&MOVSS_LOAD),
-    );
-    e.enc_both(
-        load_complex.bind(F32),
-        rec_fldWithIndexDisp32.opcodes(&MOVSS_LOAD),
-    );
-
-    e.enc_both(load.bind(F64).bind(Any), rec_fld.opcodes(&MOVSD_LOAD));
-    e.enc_both(load.bind(F64).bind(Any), rec_fldDisp8.opcodes(&MOVSD_LOAD));
-    e.enc_both(load.bind(F64).bind(Any), rec_fldDisp32.opcodes(&MOVSD_LOAD));
-
-    e.enc_both(
-        load_complex.bind(F64),
-        rec_fldWithIndex.opcodes(&MOVSD_LOAD),
-    );
-    e.enc_both(
-        load_complex.bind(F64),
-        rec_fldWithIndexDisp8.opcodes(&MOVSD_LOAD),
-    );
-    e.enc_both(
-        load_complex.bind(F64),
-        rec_fldWithIndexDisp32.opcodes(&MOVSD_LOAD),
-    );
-
-    e.enc_both(store.bind(F32).bind(Any), rec_fst.opcodes(&MOVSS_STORE));
-    e.enc_both(
-        store.bind(F32).bind(Any),
-        rec_fstDisp8.opcodes(&MOVSS_STORE),
-    );
-    e.enc_both(
-        store.bind(F32).bind(Any),
-        rec_fstDisp32.opcodes(&MOVSS_STORE),
-    );
-
-    e.enc_both(
-        store_complex.bind(F32),
-        rec_fstWithIndex.opcodes(&MOVSS_STORE),
-    );
-    e.enc_both(
-        store_complex.bind(F32),
-        rec_fstWithIndexDisp8.opcodes(&MOVSS_STORE),
-    );
-    e.enc_both(
-        store_complex.bind(F32),
-        rec_fstWithIndexDisp32.opcodes(&MOVSS_STORE),
-    );
-
-    e.enc_both(store.bind(F64).bind(Any), rec_fst.opcodes(&MOVSD_STORE));
-    e.enc_both(
-        store.bind(F64).bind(Any),
-        rec_fstDisp8.opcodes(&MOVSD_STORE),
-    );
-    e.enc_both(
-        store.bind(F64).bind(Any),
-        rec_fstDisp32.opcodes(&MOVSD_STORE),
-    );
-
-    e.enc_both(
-        store_complex.bind(F64),
-        rec_fstWithIndex.opcodes(&MOVSD_STORE),
-    );
-    e.enc_both(
-        store_complex.bind(F64),
-        rec_fstWithIndexDisp8.opcodes(&MOVSD_STORE),
-    );
-    e.enc_both(
-        store_complex.bind(F64),
-        rec_fstWithIndexDisp32.opcodes(&MOVSD_STORE),
-    );
-
-    e.enc_both(fill.bind(F32), rec_ffillSib32.opcodes(&MOVSS_LOAD));
-    e.enc_both(regfill.bind(F32), rec_fregfill32.opcodes(&MOVSS_LOAD));
-    e.enc_both(fill.bind(F64), rec_ffillSib32.opcodes(&MOVSD_LOAD));
-    e.enc_both(regfill.bind(F64), rec_fregfill32.opcodes(&MOVSD_LOAD));
-
-    e.enc_both(spill.bind(F32), rec_fspillSib32.opcodes(&MOVSS_STORE));
-    e.enc_both(regspill.bind(F32), rec_fregspill32.opcodes(&MOVSS_STORE));
-    e.enc_both(spill.bind(F64), rec_fspillSib32.opcodes(&MOVSD_STORE));
-    e.enc_both(regspill.bind(F64), rec_fregspill32.opcodes(&MOVSD_STORE));
-}
-
-#[inline(never)]
-fn define_fpu_ops(
-    e: &mut PerCpuModeEncodings,
-    shared_defs: &SharedDefinitions,
-    settings: &SettingGroup,
-    x86: &InstructionGroup,
-    r: &RecipeGroup,
-) {
-    let shared = &shared_defs.instructions;
-    let formats = &shared_defs.formats;
-
-    // Shorthands for instructions.
-    let ceil = shared.by_name("ceil");
-    let f32const = shared.by_name("f32const");
-    let f64const = shared.by_name("f64const");
-    let fadd = shared.by_name("fadd");
-    let fcmp = shared.by_name("fcmp");
-    let fcvt_from_sint = shared.by_name("fcvt_from_sint");
-    let fdemote = shared.by_name("fdemote");
-    let fdiv = shared.by_name("fdiv");
-    let ffcmp = shared.by_name("ffcmp");
-    let floor = shared.by_name("floor");
-    let fmul = shared.by_name("fmul");
-    let fpromote = shared.by_name("fpromote");
-    let fsub = shared.by_name("fsub");
-    let nearest = shared.by_name("nearest");
-    let sqrt = shared.by_name("sqrt");
-    let trunc = shared.by_name("trunc");
-    let x86_cvtt2si = x86.by_name("x86_cvtt2si");
-    let x86_fmax = x86.by_name("x86_fmax");
-    let x86_fmin = x86.by_name("x86_fmin");
-
-    // Shorthands for recipes.
-    let rec_f32imm_z = r.template("f32imm_z");
-    let rec_f64imm_z = r.template("f64imm_z");
-    let rec_fa = r.template("fa");
-    let rec_fcmp = r.template("fcmp");
-    let rec_fcscc = r.template("fcscc");
-    let rec_frurm = r.template("frurm");
-    let rec_furm = r.template("furm");
-    let rec_furmi_rnd = r.template("furmi_rnd");
-    let rec_rfurm = r.template("rfurm");
-
-    // Predicates shorthands.
-    let use_sse41 = settings.predicate_by_name("use_sse41");
-
-    // Floating-point constants equal to 0.0 can be encoded using either `xorps` or `xorpd`, for
-    // 32-bit and 64-bit floats respectively.
-    let is_zero_32_bit_float =
-        InstructionPredicate::new_is_zero_32bit_float(&*formats.unary_ieee32, "imm");
-    e.enc32_instp(
-        f32const,
-        rec_f32imm_z.opcodes(&XORPS),
-        is_zero_32_bit_float.clone(),
-    );
-
-    let is_zero_64_bit_float =
-        InstructionPredicate::new_is_zero_64bit_float(&*formats.unary_ieee64, "imm");
-    e.enc32_instp(
-        f64const,
-        rec_f64imm_z.opcodes(&XORPD),
-        is_zero_64_bit_float.clone(),
-    );
-
-    e.enc_x86_64_instp(f32const, rec_f32imm_z.opcodes(&XORPS), is_zero_32_bit_float);
-    e.enc_x86_64_instp(f64const, rec_f64imm_z.opcodes(&XORPD), is_zero_64_bit_float);
-
-    // cvtsi2ss
-    e.enc_i32_i64(fcvt_from_sint.bind(F32), rec_frurm.opcodes(&CVTSI2SS));
-
-    // cvtsi2sd
-    e.enc_i32_i64(fcvt_from_sint.bind(F64), rec_frurm.opcodes(&CVTSI2SD));
-
-    // cvtss2sd
-    e.enc_both(fpromote.bind(F64).bind(F32), rec_furm.opcodes(&CVTSS2SD));
-
-    // cvtsd2ss
-    e.enc_both(fdemote.bind(F32).bind(F64), rec_furm.opcodes(&CVTSD2SS));
-
-    // cvttss2si
-    e.enc_both(
-        x86_cvtt2si.bind(I32).bind(F32),
-        rec_rfurm.opcodes(&CVTTSS2SI),
-    );
-    e.enc64(
-        x86_cvtt2si.bind(I64).bind(F32),
-        rec_rfurm.opcodes(&CVTTSS2SI).rex().w(),
-    );
-
-    // cvttsd2si
-    e.enc_both(
-        x86_cvtt2si.bind(I32).bind(F64),
-        rec_rfurm.opcodes(&CVTTSD2SI),
-    );
-    e.enc64(
-        x86_cvtt2si.bind(I64).bind(F64),
-        rec_rfurm.opcodes(&CVTTSD2SI).rex().w(),
-    );
-
-    // Exact square roots.
-    e.enc_both(sqrt.bind(F32), rec_furm.opcodes(&SQRTSS));
-    e.enc_both(sqrt.bind(F64), rec_furm.opcodes(&SQRTSD));
-
-    // Rounding. The recipe looks at the opcode to pick an immediate.
-    for inst in &[nearest, floor, ceil, trunc] {
-        e.enc_both_isap(inst.bind(F32), rec_furmi_rnd.opcodes(&ROUNDSS), use_sse41);
-        e.enc_both_isap(inst.bind(F64), rec_furmi_rnd.opcodes(&ROUNDSD), use_sse41);
-    }
-
-    // Binary arithmetic ops.
-    e.enc_both(fadd.bind(F32), rec_fa.opcodes(&ADDSS));
-    e.enc_both(fadd.bind(F64), rec_fa.opcodes(&ADDSD));
-
-    e.enc_both(fsub.bind(F32), rec_fa.opcodes(&SUBSS));
-    e.enc_both(fsub.bind(F64), rec_fa.opcodes(&SUBSD));
-
-    e.enc_both(fmul.bind(F32), rec_fa.opcodes(&MULSS));
-    e.enc_both(fmul.bind(F64), rec_fa.opcodes(&MULSD));
-
-    e.enc_both(fdiv.bind(F32), rec_fa.opcodes(&DIVSS));
-    e.enc_both(fdiv.bind(F64), rec_fa.opcodes(&DIVSD));
-
-    e.enc_both(x86_fmin.bind(F32), rec_fa.opcodes(&MINSS));
-    e.enc_both(x86_fmin.bind(F64), rec_fa.opcodes(&MINSD));
-
-    e.enc_both(x86_fmax.bind(F32), rec_fa.opcodes(&MAXSS));
-    e.enc_both(x86_fmax.bind(F64), rec_fa.opcodes(&MAXSD));
-
-    // Comparisons.
-    //
-    // This only covers the condition codes in `supported_floatccs`, the rest are
-    // handled by legalization patterns.
-    e.enc_both(fcmp.bind(F32), rec_fcscc.opcodes(&UCOMISS));
-    e.enc_both(fcmp.bind(F64), rec_fcscc.opcodes(&UCOMISD));
-    e.enc_both(ffcmp.bind(F32), rec_fcmp.opcodes(&UCOMISS));
-    e.enc_both(ffcmp.bind(F64), rec_fcmp.opcodes(&UCOMISD));
-}
-
-#[inline(never)]
-fn define_alu(
-    e: &mut PerCpuModeEncodings,
-    shared_defs: &SharedDefinitions,
-    settings: &SettingGroup,
-    x86: &InstructionGroup,
-    r: &RecipeGroup,
-) {
-    let shared = &shared_defs.instructions;
-
-    // Shorthands for instructions.
-    let clz = shared.by_name("clz");
-    let ctz = shared.by_name("ctz");
-    let icmp = shared.by_name("icmp");
-    let icmp_imm = shared.by_name("icmp_imm");
-    let ifcmp = shared.by_name("ifcmp");
-    let ifcmp_imm = shared.by_name("ifcmp_imm");
-    let ifcmp_sp = shared.by_name("ifcmp_sp");
-    let ishl = shared.by_name("ishl");
-    let ishl_imm = shared.by_name("ishl_imm");
-    let popcnt = shared.by_name("popcnt");
-    let rotl = shared.by_name("rotl");
-    let rotl_imm = shared.by_name("rotl_imm");
-    let rotr = shared.by_name("rotr");
-    let rotr_imm = shared.by_name("rotr_imm");
-    let selectif = shared.by_name("selectif");
-    let selectif_spectre_guard = shared.by_name("selectif_spectre_guard");
-    let sshr = shared.by_name("sshr");
-    let sshr_imm = shared.by_name("sshr_imm");
-    let trueff = shared.by_name("trueff");
-    let trueif = shared.by_name("trueif");
-    let ushr = shared.by_name("ushr");
-    let ushr_imm = shared.by_name("ushr_imm");
-    let x86_bsf = x86.by_name("x86_bsf");
-    let x86_bsr = x86.by_name("x86_bsr");
-
-    // Shorthands for recipes.
-    let rec_bsf_and_bsr = r.template("bsf_and_bsr");
-    let rec_cmov = r.template("cmov");
-    let rec_icscc = r.template("icscc");
-    let rec_icscc_ib = r.template("icscc_ib");
-    let rec_icscc_id = r.template("icscc_id");
-    let rec_rcmp = r.template("rcmp");
-    let rec_rcmp_ib = r.template("rcmp_ib");
-    let rec_rcmp_id = r.template("rcmp_id");
-    let rec_rcmp_sp = r.template("rcmp_sp");
-    let rec_rc = r.template("rc");
-    let rec_setf_abcd = r.template("setf_abcd");
-    let rec_seti_abcd = r.template("seti_abcd");
-    let rec_urm = r.template("urm");
-
-    // Predicates shorthands.
-    let use_popcnt = settings.predicate_by_name("use_popcnt");
-    let use_lzcnt = settings.predicate_by_name("use_lzcnt");
-    let use_bmi1 = settings.predicate_by_name("use_bmi1");
-
-    let band = shared.by_name("band");
-    let band_imm = shared.by_name("band_imm");
-    let band_not = shared.by_name("band_not");
-    let bnot = shared.by_name("bnot");
-    let bor = shared.by_name("bor");
-    let bor_imm = shared.by_name("bor_imm");
-    let bxor = shared.by_name("bxor");
-    let bxor_imm = shared.by_name("bxor_imm");
-    let iadd = shared.by_name("iadd");
-    let iadd_ifcarry = shared.by_name("iadd_ifcarry");
-    let iadd_ifcin = shared.by_name("iadd_ifcin");
-    let iadd_ifcout = shared.by_name("iadd_ifcout");
-    let iadd_imm = shared.by_name("iadd_imm");
-    let imul = shared.by_name("imul");
-    let isub = shared.by_name("isub");
-    let isub_ifbin = shared.by_name("isub_ifbin");
-    let isub_ifborrow = shared.by_name("isub_ifborrow");
-    let isub_ifbout = shared.by_name("isub_ifbout");
-    let x86_sdivmodx = x86.by_name("x86_sdivmodx");
-    let x86_smulx = x86.by_name("x86_smulx");
-    let x86_udivmodx = x86.by_name("x86_udivmodx");
-    let x86_umulx = x86.by_name("x86_umulx");
-
-    let rec_div = r.template("div");
-    let rec_fa = r.template("fa");
-    let rec_fax = r.template("fax");
-    let rec_mulx = r.template("mulx");
-    let rec_r_ib = r.template("r_ib");
-    let rec_r_id = r.template("r_id");
-    let rec_rin = r.template("rin");
-    let rec_rio = r.template("rio");
-    let rec_rout = r.template("rout");
-    let rec_rr = r.template("rr");
-    let rec_rrx = r.template("rrx");
-    let rec_ur = r.template("ur");
-
-    e.enc_i32_i64(iadd, rec_rr.opcodes(&ADD));
-    e.enc_i32_i64(iadd_ifcout, rec_rout.opcodes(&ADD));
-    e.enc_i32_i64(iadd_ifcin, rec_rin.opcodes(&ADC));
-    e.enc_i32_i64(iadd_ifcarry, rec_rio.opcodes(&ADC));
-    e.enc_i32_i64(iadd_imm, rec_r_ib.opcodes(&ADD_IMM8_SIGN_EXTEND).rrr(0));
-    e.enc_i32_i64(iadd_imm, rec_r_id.opcodes(&ADD_IMM).rrr(0));
-
-    e.enc_i32_i64(isub, rec_rr.opcodes(&SUB));
-    e.enc_i32_i64(isub_ifbout, rec_rout.opcodes(&SUB));
-    e.enc_i32_i64(isub_ifbin, rec_rin.opcodes(&SBB));
-    e.enc_i32_i64(isub_ifborrow, rec_rio.opcodes(&SBB));
-
-    e.enc_i32_i64(band, rec_rr.opcodes(&AND));
-    e.enc_b32_b64(band, rec_rr.opcodes(&AND));
-
-    // TODO: band_imm.i64 with an unsigned 32-bit immediate can be encoded as band_imm.i32. Can
-    // even use the single-byte immediate for 0xffff_ffXX masks.
-
-    e.enc_i32_i64(band_imm, rec_r_ib.opcodes(&AND_IMM8_SIGN_EXTEND).rrr(4));
-    e.enc_i32_i64(band_imm, rec_r_id.opcodes(&AND_IMM).rrr(4));
-
-    e.enc_i32_i64(bor, rec_rr.opcodes(&OR));
-    e.enc_b32_b64(bor, rec_rr.opcodes(&OR));
-    e.enc_i32_i64(bor_imm, rec_r_ib.opcodes(&OR_IMM8_SIGN_EXTEND).rrr(1));
-    e.enc_i32_i64(bor_imm, rec_r_id.opcodes(&OR_IMM).rrr(1));
-
-    e.enc_i32_i64(bxor, rec_rr.opcodes(&XOR));
-    e.enc_b32_b64(bxor, rec_rr.opcodes(&XOR));
-    e.enc_i32_i64(bxor_imm, rec_r_ib.opcodes(&XOR_IMM8_SIGN_EXTEND).rrr(6));
-    e.enc_i32_i64(bxor_imm, rec_r_id.opcodes(&XOR_IMM).rrr(6));
-
-    // x86 has a bitwise not instruction NOT.
-    e.enc_i32_i64(bnot, rec_ur.opcodes(&NOT).rrr(2));
-    e.enc_b32_b64(bnot, rec_ur.opcodes(&NOT).rrr(2));
-    e.enc_both(bnot.bind(B1), rec_ur.opcodes(&NOT).rrr(2));
-
-    // Also add a `b1` encodings for the logic instructions.
-    // TODO: Should this be done with 8-bit instructions? It would improve partial register
-    // dependencies.
-    e.enc_both(band.bind(B1), rec_rr.opcodes(&AND));
-    e.enc_both(bor.bind(B1), rec_rr.opcodes(&OR));
-    e.enc_both(bxor.bind(B1), rec_rr.opcodes(&XOR));
-
-    e.enc_i32_i64(imul, rec_rrx.opcodes(&IMUL));
-    e.enc_i32_i64(x86_sdivmodx, rec_div.opcodes(&IDIV).rrr(7));
-    e.enc_i32_i64(x86_udivmodx, rec_div.opcodes(&DIV).rrr(6));
-
-    e.enc_i32_i64(x86_smulx, rec_mulx.opcodes(&IMUL_RDX_RAX).rrr(5));
-    e.enc_i32_i64(x86_umulx, rec_mulx.opcodes(&MUL).rrr(4));
-
-    // Binary bitwise ops.
-    //
-    // The F64 version is intentionally encoded using the single-precision opcode:
-    // the operation is identical and the encoding is one byte shorter.
-    e.enc_both(band.bind(F32), rec_fa.opcodes(&ANDPS));
-    e.enc_both(band.bind(F64), rec_fa.opcodes(&ANDPS));
-
-    e.enc_both(bor.bind(F32), rec_fa.opcodes(&ORPS));
-    e.enc_both(bor.bind(F64), rec_fa.opcodes(&ORPS));
-
-    e.enc_both(bxor.bind(F32), rec_fa.opcodes(&XORPS));
-    e.enc_both(bxor.bind(F64), rec_fa.opcodes(&XORPS));
-
-    // The `andnps(x,y)` instruction computes `~x&y`, while band_not(x,y)` is `x&~y.
-    e.enc_both(band_not.bind(F32), rec_fax.opcodes(&ANDNPS));
-    e.enc_both(band_not.bind(F64), rec_fax.opcodes(&ANDNPS));
-
-    // Shifts and rotates.
-    // Note that the dynamic shift amount is only masked by 5 or 6 bits; the 8-bit
-    // and 16-bit shifts would need explicit masking.
-
-    for &(inst, rrr) in &[(rotl, 0), (rotr, 1), (ishl, 4), (ushr, 5), (sshr, 7)] {
-        // Cannot use enc_i32_i64 for this pattern because instructions require
-        // to bind any.
-        e.enc32(inst.bind(I32).bind(I8), rec_rc.opcodes(&ROTATE_CL).rrr(rrr));
-        e.enc32(
-            inst.bind(I32).bind(I16),
-            rec_rc.opcodes(&ROTATE_CL).rrr(rrr),
-        );
-        e.enc32(
-            inst.bind(I32).bind(I32),
-            rec_rc.opcodes(&ROTATE_CL).rrr(rrr),
-        );
-        e.enc64(
-            inst.bind(I64).bind(Any),
-            rec_rc.opcodes(&ROTATE_CL).rrr(rrr).rex().w(),
-        );
-        e.enc64(
-            inst.bind(I32).bind(Any),
-            rec_rc.opcodes(&ROTATE_CL).rrr(rrr).rex(),
-        );
-        e.enc64(
-            inst.bind(I32).bind(Any),
-            rec_rc.opcodes(&ROTATE_CL).rrr(rrr),
-        );
-    }
-
-    e.enc_i32_i64(rotl_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(0));
-    e.enc_i32_i64(rotr_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(1));
-    e.enc_i32_i64(ishl_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(4));
-    e.enc_i32_i64(ushr_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(5));
-    e.enc_i32_i64(sshr_imm, rec_r_ib.opcodes(&ROTATE_IMM8).rrr(7));
-
-    // Population count.
-    e.enc32_isap(popcnt.bind(I32), rec_urm.opcodes(&POPCNT), use_popcnt);
-    e.enc64_isap(
-        popcnt.bind(I64),
-        rec_urm.opcodes(&POPCNT).rex().w(),
-        use_popcnt,
-    );
-    e.enc64_isap(popcnt.bind(I32), rec_urm.opcodes(&POPCNT).rex(), use_popcnt);
-    e.enc64_isap(popcnt.bind(I32), rec_urm.opcodes(&POPCNT), use_popcnt);
-
-    // Count leading zero bits.
-    e.enc32_isap(clz.bind(I32), rec_urm.opcodes(&LZCNT), use_lzcnt);
-    e.enc64_isap(clz.bind(I64), rec_urm.opcodes(&LZCNT).rex().w(), use_lzcnt);
-    e.enc64_isap(clz.bind(I32), rec_urm.opcodes(&LZCNT).rex(), use_lzcnt);
-    e.enc64_isap(clz.bind(I32), rec_urm.opcodes(&LZCNT), use_lzcnt);
-
-    // Count trailing zero bits.
-    e.enc32_isap(ctz.bind(I32), rec_urm.opcodes(&TZCNT), use_bmi1);
-    e.enc64_isap(ctz.bind(I64), rec_urm.opcodes(&TZCNT).rex().w(), use_bmi1);
-    e.enc64_isap(ctz.bind(I32), rec_urm.opcodes(&TZCNT).rex(), use_bmi1);
-    e.enc64_isap(ctz.bind(I32), rec_urm.opcodes(&TZCNT), use_bmi1);
-
-    // Bit scan forwards and reverse
-    e.enc_i32_i64(x86_bsf, rec_bsf_and_bsr.opcodes(&BIT_SCAN_FORWARD));
-    e.enc_i32_i64(x86_bsr, rec_bsf_and_bsr.opcodes(&BIT_SCAN_REVERSE));
-
-    // Comparisons
-    e.enc_i32_i64(icmp, rec_icscc.opcodes(&CMP_REG));
-    e.enc_i32_i64(icmp_imm, rec_icscc_ib.opcodes(&CMP_IMM8).rrr(7));
-    e.enc_i32_i64(icmp_imm, rec_icscc_id.opcodes(&CMP_IMM).rrr(7));
-    e.enc_i32_i64(ifcmp, rec_rcmp.opcodes(&CMP_REG));
-    e.enc_i32_i64(ifcmp_imm, rec_rcmp_ib.opcodes(&CMP_IMM8).rrr(7));
-    e.enc_i32_i64(ifcmp_imm, rec_rcmp_id.opcodes(&CMP_IMM).rrr(7));
-    // TODO: We could special-case ifcmp_imm(x, 0) to TEST(x, x).
-
-    e.enc32(ifcmp_sp.bind(I32), rec_rcmp_sp.opcodes(&CMP_REG));
-    e.enc64(ifcmp_sp.bind(I64), rec_rcmp_sp.opcodes(&CMP_REG).rex().w());
-
-    // Convert flags to bool.
-    // This encodes `b1` as an 8-bit low register with the value 0 or 1.
-    e.enc_both(trueif, rec_seti_abcd.opcodes(&SET_BYTE_IF_OVERFLOW));
-    e.enc_both(trueff, rec_setf_abcd.opcodes(&SET_BYTE_IF_OVERFLOW));
-
-    // Conditional move (a.k.a integer select).
-    e.enc_i32_i64(selectif, rec_cmov.opcodes(&CMOV_OVERFLOW));
-    // A Spectre-guard integer select is exactly the same as a selectif, but
-    // is not associated with any other legalization rules and is not
-    // recognized by any optimizations, so it must arrive here unmodified
-    // and in its original place.
-    e.enc_i32_i64(selectif_spectre_guard, rec_cmov.opcodes(&CMOV_OVERFLOW));
-}
-
-#[inline(never)]
-#[allow(clippy::cognitive_complexity)]
-fn define_simd(
-    e: &mut PerCpuModeEncodings,
-    shared_defs: &SharedDefinitions,
-    settings: &SettingGroup,
-    x86: &InstructionGroup,
-    r: &RecipeGroup,
-) {
-    let shared = &shared_defs.instructions;
-    let formats = &shared_defs.formats;
-
-    // Shorthands for instructions.
-    let avg_round = shared.by_name("avg_round");
-    let bitcast = shared.by_name("bitcast");
-    let bor = shared.by_name("bor");
-    let bxor = shared.by_name("bxor");
-    let copy = shared.by_name("copy");
-    let copy_nop = shared.by_name("copy_nop");
-    let copy_to_ssa = shared.by_name("copy_to_ssa");
-    let fadd = shared.by_name("fadd");
-    let fcmp = shared.by_name("fcmp");
-    let fcvt_from_sint = shared.by_name("fcvt_from_sint");
-    let fdiv = shared.by_name("fdiv");
-    let fill = shared.by_name("fill");
-    let fill_nop = shared.by_name("fill_nop");
-    let fmul = shared.by_name("fmul");
-    let fsub = shared.by_name("fsub");
-    let iabs = shared.by_name("iabs");
-    let iadd = shared.by_name("iadd");
-    let icmp = shared.by_name("icmp");
-    let imul = shared.by_name("imul");
-    let ishl_imm = shared.by_name("ishl_imm");
-    let load = shared.by_name("load");
-    let load_complex = shared.by_name("load_complex");
-    let raw_bitcast = shared.by_name("raw_bitcast");
-    let regfill = shared.by_name("regfill");
-    let regmove = shared.by_name("regmove");
-    let regspill = shared.by_name("regspill");
-    let sadd_sat = shared.by_name("sadd_sat");
-    let scalar_to_vector = shared.by_name("scalar_to_vector");
-    let sload8x8 = shared.by_name("sload8x8");
-    let sload8x8_complex = shared.by_name("sload8x8_complex");
-    let sload16x4 = shared.by_name("sload16x4");
-    let sload16x4_complex = shared.by_name("sload16x4_complex");
-    let sload32x2 = shared.by_name("sload32x2");
-    let sload32x2_complex = shared.by_name("sload32x2_complex");
-    let spill = shared.by_name("spill");
-    let sqrt = shared.by_name("sqrt");
-    let sshr_imm = shared.by_name("sshr_imm");
-    let ssub_sat = shared.by_name("ssub_sat");
-    let store = shared.by_name("store");
-    let store_complex = shared.by_name("store_complex");
-    let swiden_low = shared.by_name("swiden_low");
-    let uadd_sat = shared.by_name("uadd_sat");
-    let uload8x8 = shared.by_name("uload8x8");
-    let uload8x8_complex = shared.by_name("uload8x8_complex");
-    let uload16x4 = shared.by_name("uload16x4");
-    let uload16x4_complex = shared.by_name("uload16x4_complex");
-    let uload32x2 = shared.by_name("uload32x2");
-    let uload32x2_complex = shared.by_name("uload32x2_complex");
-    let snarrow = shared.by_name("snarrow");
-    let unarrow = shared.by_name("unarrow");
-    let uwiden_low = shared.by_name("uwiden_low");
-    let ushr_imm = shared.by_name("ushr_imm");
-    let usub_sat = shared.by_name("usub_sat");
-    let vconst = shared.by_name("vconst");
-    let vselect = shared.by_name("vselect");
-    let widening_pairwise_dot_product_s = shared.by_name("widening_pairwise_dot_product_s");
-    let x86_cvtt2si = x86.by_name("x86_cvtt2si");
-    let x86_insertps = x86.by_name("x86_insertps");
-    let x86_fmax = x86.by_name("x86_fmax");
-    let x86_fmin = x86.by_name("x86_fmin");
-    let x86_movlhps = x86.by_name("x86_movlhps");
-    let x86_movsd = x86.by_name("x86_movsd");
-    let x86_pblendw = x86.by_name("x86_pblendw");
-    let x86_pextr = x86.by_name("x86_pextr");
-    let x86_pinsr = x86.by_name("x86_pinsr");
-    let x86_pmaxs = x86.by_name("x86_pmaxs");
-    let x86_pmaxu = x86.by_name("x86_pmaxu");
-    let x86_pmins = x86.by_name("x86_pmins");
-    let x86_pminu = x86.by_name("x86_pminu");
-    let x86_pmullq = x86.by_name("x86_pmullq");
-    let x86_pmuludq = x86.by_name("x86_pmuludq");
-    let x86_palignr = x86.by_name("x86_palignr");
-    let x86_pshufb = x86.by_name("x86_pshufb");
-    let x86_pshufd = x86.by_name("x86_pshufd");
-    let x86_psll = x86.by_name("x86_psll");
-    let x86_psra = x86.by_name("x86_psra");
-    let x86_psrl = x86.by_name("x86_psrl");
-    let x86_ptest = x86.by_name("x86_ptest");
-    let x86_punpckh = x86.by_name("x86_punpckh");
-    let x86_punpckl = x86.by_name("x86_punpckl");
-    let x86_vcvtudq2ps = x86.by_name("x86_vcvtudq2ps");
-
-    // Shorthands for recipes.
-    let rec_blend = r.template("blend");
-    let rec_evex_reg_vvvv_rm_128 = r.template("evex_reg_vvvv_rm_128");
-    let rec_evex_reg_rm_128 = r.template("evex_reg_rm_128");
-    let rec_f_ib = r.template("f_ib");
-    let rec_fa = r.template("fa");
-    let rec_fa_ib = r.template("fa_ib");
-    let rec_fax = r.template("fax");
-    let rec_fcmp = r.template("fcmp");
-    let rec_ffillSib32 = r.template("ffillSib32");
-    let rec_ffillnull = r.recipe("ffillnull");
-    let rec_fld = r.template("fld");
-    let rec_fldDisp32 = r.template("fldDisp32");
-    let rec_fldDisp8 = r.template("fldDisp8");
-    let rec_fldWithIndex = r.template("fldWithIndex");
-    let rec_fldWithIndexDisp32 = r.template("fldWithIndexDisp32");
-    let rec_fldWithIndexDisp8 = r.template("fldWithIndexDisp8");
-    let rec_fregfill32 = r.template("fregfill32");
-    let rec_fregspill32 = r.template("fregspill32");
-    let rec_frmov = r.template("frmov");
-    let rec_frurm = r.template("frurm");
-    let rec_fspillSib32 = r.template("fspillSib32");
-    let rec_fst = r.template("fst");
-    let rec_fstDisp32 = r.template("fstDisp32");
-    let rec_fstDisp8 = r.template("fstDisp8");
-    let rec_fstWithIndex = r.template("fstWithIndex");
-    let rec_fstWithIndexDisp32 = r.template("fstWithIndexDisp32");
-    let rec_fstWithIndexDisp8 = r.template("fstWithIndexDisp8");
-    let rec_furm = r.template("furm");
-    let rec_furm_reg_to_ssa = r.template("furm_reg_to_ssa");
-    let rec_icscc_fpr = r.template("icscc_fpr");
-    let rec_null_fpr = r.recipe("null_fpr");
-    let rec_pfcmp = r.template("pfcmp");
-    let rec_r_ib_unsigned_fpr = r.template("r_ib_unsigned_fpr");
-    let rec_r_ib_unsigned_gpr = r.template("r_ib_unsigned_gpr");
-    let rec_r_ib_unsigned_r = r.template("r_ib_unsigned_r");
-    let rec_stacknull = r.recipe("stacknull");
-    let rec_vconst = r.template("vconst");
-    let rec_vconst_optimized = r.template("vconst_optimized");
-
-    // Predicates shorthands.
-    settings.predicate_by_name("all_ones_funcaddrs_and_not_is_pic");
-    settings.predicate_by_name("not_all_ones_funcaddrs_and_not_is_pic");
-    let use_ssse3_simd = settings.predicate_by_name("use_ssse3_simd");
-    let use_sse41_simd = settings.predicate_by_name("use_sse41_simd");
-    let use_sse42_simd = settings.predicate_by_name("use_sse42_simd");
-    let use_avx512dq_simd = settings.predicate_by_name("use_avx512dq_simd");
-    let use_avx512vl_simd = settings.predicate_by_name("use_avx512vl_simd");
-
-    // SIMD vector size: eventually multiple vector sizes may be supported but for now only
-    // SSE-sized vectors are available.
-    let sse_vector_size: u64 = 128;
-
-    // SIMD splat: before x86 can use vector data, it must be moved to XMM registers; see
-    // legalize.rs for how this is done; once there, x86_pshuf* (below) is used for broadcasting the
-    // value across the register.
-
-    let allowed_simd_type = |t: &LaneType| t.lane_bits() >= 8 && t.lane_bits() < 128;
-
-    // PSHUFB, 8-bit shuffle using two XMM registers.
-    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
-        let instruction = x86_pshufb.bind(vector(ty, sse_vector_size));
-        let template = rec_fa.opcodes(&PSHUFB);
-        e.enc_both_inferred_maybe_isap(instruction.clone(), template.clone(), Some(use_ssse3_simd));
-    }
-
-    // PSHUFD, 32-bit shuffle using one XMM register and a u8 immediate.
-    for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 32) {
-        let instruction = x86_pshufd.bind(vector(ty, sse_vector_size));
-        let template = rec_r_ib_unsigned_fpr.opcodes(&PSHUFD);
-        e.enc_both_inferred(instruction, template);
-    }
-
-    // SIMD vselect; controlling value of vselect is a boolean vector, so each lane should be
-    // either all ones or all zeroes - it makes it possible to always use 8-bit PBLENDVB;
-    // for 32/64-bit lanes we can also use BLENDVPS and BLENDVPD
-    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
-        let opcode = match ty.lane_bits() {
-            32 => &BLENDVPS,
-            64 => &BLENDVPD,
-            _ => &PBLENDVB,
-        };
-        let instruction = vselect.bind(vector(ty, sse_vector_size));
-        let template = rec_blend.opcodes(opcode);
-        e.enc_both_inferred_maybe_isap(instruction, template, Some(use_sse41_simd));
-    }
-
-    // PBLENDW, select lanes using a u8 immediate.
-    for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 16) {
-        let instruction = x86_pblendw.bind(vector(ty, sse_vector_size));
-        let template = rec_fa_ib.opcodes(&PBLENDW);
-        e.enc_both_inferred_maybe_isap(instruction, template, Some(use_sse41_simd));
-    }
-
-    // SIMD scalar_to_vector; this uses MOV to copy the scalar value to an XMM register; according
-    // to the Intel manual: "When the destination operand is an XMM register, the source operand is
-    // written to the low doubleword of the register and the register is zero-extended to 128 bits."
-    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
-        let instruction = scalar_to_vector.bind(vector(ty, sse_vector_size));
-        if ty.is_float() {
-            // No need to move floats--they already live in XMM registers.
-            e.enc_32_64_rec(instruction, rec_null_fpr, 0);
-        } else {
-            let template = rec_frurm.opcodes(&MOVD_LOAD_XMM);
-            if ty.lane_bits() < 64 {
-                e.enc_both_inferred(instruction, template);
-            } else {
-                // No 32-bit encodings for 64-bit widths.
-                assert_eq!(ty.lane_bits(), 64);
-                e.enc64(instruction, template.rex().w());
-            }
-        }
-    }
-
-    // SIMD insertlane
-    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
-        let (opcode, isap): (&[_], _) = match ty.lane_bits() {
-            8 => (&PINSRB, Some(use_sse41_simd)),
-            16 => (&PINSRW, None),
-            32 | 64 => (&PINSR, Some(use_sse41_simd)),
-            _ => panic!("invalid size for SIMD insertlane"),
-        };
-
-        let instruction = x86_pinsr.bind(vector(ty, sse_vector_size));
-        let template = rec_r_ib_unsigned_r.opcodes(opcode);
-        if ty.lane_bits() < 64 {
-            e.enc_both_inferred_maybe_isap(instruction, template, isap);
-        } else {
-            // It turns out the 64-bit widths have REX/W encodings and only are available on
-            // x86_64.
-            e.enc64_maybe_isap(instruction, template.rex().w(), isap);
-        }
-    }
-
-    // For legalizing insertlane with floats, INSERTPS from SSE4.1.
-    {
-        let instruction = x86_insertps.bind(vector(F32, sse_vector_size));
-        let template = rec_fa_ib.opcodes(&INSERTPS);
-        e.enc_both_inferred_maybe_isap(instruction, template, Some(use_sse41_simd));
-    }
-
-    // For legalizing insertlane with floats,  MOVSD from SSE2.
-    {
-        let instruction = x86_movsd.bind(vector(F64, sse_vector_size));
-        let template = rec_fa.opcodes(&MOVSD_LOAD);
-        e.enc_both_inferred(instruction, template); // from SSE2
-    }
-
-    // For legalizing insertlane with floats, MOVLHPS from SSE.
-    {
-        let instruction = x86_movlhps.bind(vector(F64, sse_vector_size));
-        let template = rec_fa.opcodes(&MOVLHPS);
-        e.enc_both_inferred(instruction, template); // from SSE
-    }
-
-    // SIMD extractlane
-    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
-        let opcode = match ty.lane_bits() {
-            8 => &PEXTRB,
-            16 => &PEXTRW,
-            32 | 64 => &PEXTR,
-            _ => panic!("invalid size for SIMD extractlane"),
-        };
-
-        let instruction = x86_pextr.bind(vector(ty, sse_vector_size));
-        let template = rec_r_ib_unsigned_gpr.opcodes(opcode);
-        if ty.lane_bits() < 64 {
-            e.enc_both_inferred_maybe_isap(instruction, template, Some(use_sse41_simd));
-        } else {
-            // It turns out the 64-bit widths have REX/W encodings and only are available on
-            // x86_64.
-            e.enc64_maybe_isap(instruction, template.rex().w(), Some(use_sse41_simd));
-        }
-    }
-
-    // SIMD packing/unpacking
-    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
-        let (high, low) = match ty.lane_bits() {
-            8 => (&PUNPCKHBW, &PUNPCKLBW),
-            16 => (&PUNPCKHWD, &PUNPCKLWD),
-            32 => (&PUNPCKHDQ, &PUNPCKLDQ),
-            64 => (&PUNPCKHQDQ, &PUNPCKLQDQ),
-            _ => panic!("invalid size for SIMD packing/unpacking"),
-        };
-
-        e.enc_both_inferred(
-            x86_punpckh.bind(vector(ty, sse_vector_size)),
-            rec_fa.opcodes(high),
-        );
-        e.enc_both_inferred(
-            x86_punpckl.bind(vector(ty, sse_vector_size)),
-            rec_fa.opcodes(low),
-        );
-    }
-
-    // SIMD narrow/widen
-    for (ty, opcodes) in &[(I16, &PACKSSWB), (I32, &PACKSSDW)] {
-        let snarrow = snarrow.bind(vector(*ty, sse_vector_size));
-        e.enc_both_inferred(snarrow, rec_fa.opcodes(*opcodes));
-    }
-    for (ty, opcodes, isap) in &[
-        (I16, &PACKUSWB[..], None),
-        (I32, &PACKUSDW[..], Some(use_sse41_simd)),
-    ] {
-        let unarrow = unarrow.bind(vector(*ty, sse_vector_size));
-        e.enc_both_inferred_maybe_isap(unarrow, rec_fa.opcodes(*opcodes), *isap);
-    }
-    for (ty, swiden_opcode, uwiden_opcode) in &[
-        (I8, &PMOVSXBW[..], &PMOVZXBW[..]),
-        (I16, &PMOVSXWD[..], &PMOVZXWD[..]),
-    ] {
-        let isap = Some(use_sse41_simd);
-        let swiden_low = swiden_low.bind(vector(*ty, sse_vector_size));
-        e.enc_both_inferred_maybe_isap(swiden_low, rec_furm.opcodes(*swiden_opcode), isap);
-        let uwiden_low = uwiden_low.bind(vector(*ty, sse_vector_size));
-        e.enc_both_inferred_maybe_isap(uwiden_low, rec_furm.opcodes(*uwiden_opcode), isap);
-    }
-    for ty in &[I8, I16, I32, I64] {
-        e.enc_both_inferred_maybe_isap(
-            x86_palignr.bind(vector(*ty, sse_vector_size)),
-            rec_fa_ib.opcodes(&PALIGNR[..]),
-            Some(use_ssse3_simd),
-        );
-    }
-
-    // SIMD bitcast all 128-bit vectors to each other (for legalizing splat.x16x8).
-    for from_type in ValueType::all_lane_types().filter(allowed_simd_type) {
-        for to_type in
-            ValueType::all_lane_types().filter(|t| allowed_simd_type(t) && *t != from_type)
-        {
-            let instruction = raw_bitcast
-                .bind(vector(to_type, sse_vector_size))
-                .bind(vector(from_type, sse_vector_size));
-            e.enc_32_64_rec(instruction, rec_null_fpr, 0);
-        }
-    }
-
-    // SIMD raw bitcast floats to vector (and back); assumes that floats are already stored in an
-    // XMM register.
-    for float_type in &[F32, F64] {
-        for lane_type in ValueType::all_lane_types().filter(allowed_simd_type) {
-            e.enc_32_64_rec(
-                raw_bitcast
-                    .bind(vector(lane_type, sse_vector_size))
-                    .bind(*float_type),
-                rec_null_fpr,
-                0,
-            );
-            e.enc_32_64_rec(
-                raw_bitcast
-                    .bind(*float_type)
-                    .bind(vector(lane_type, sse_vector_size)),
-                rec_null_fpr,
-                0,
-            );
-        }
-    }
-
-    // SIMD conversions
-    {
-        let fcvt_from_sint_32 = fcvt_from_sint
-            .bind(vector(F32, sse_vector_size))
-            .bind(vector(I32, sse_vector_size));
-        e.enc_both(fcvt_from_sint_32, rec_furm.opcodes(&CVTDQ2PS));
-
-        e.enc_32_64_maybe_isap(
-            x86_vcvtudq2ps,
-            rec_evex_reg_rm_128.opcodes(&VCVTUDQ2PS),
-            Some(use_avx512vl_simd), // TODO need an OR predicate to join with AVX512F
-        );
-
-        e.enc_both_inferred(
-            x86_cvtt2si
-                .bind(vector(I32, sse_vector_size))
-                .bind(vector(F32, sse_vector_size)),
-            rec_furm.opcodes(&CVTTPS2DQ),
-        );
-    }
-
-    // SIMD vconst for special cases (all zeroes, all ones)
-    // this must be encoded prior to the MOVUPS implementation (below) so the compiler sees this
-    // encoding first
-    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
-        let instruction = vconst.bind(vector(ty, sse_vector_size));
-
-        let is_zero_128bit =
-            InstructionPredicate::new_is_all_zeroes(&*formats.unary_const, "constant_handle");
-        let template = rec_vconst_optimized.opcodes(&PXOR).infer_rex();
-        e.enc_32_64_func(instruction.clone(), template, |builder| {
-            builder.inst_predicate(is_zero_128bit)
-        });
-
-        let is_ones_128bit =
-            InstructionPredicate::new_is_all_ones(&*formats.unary_const, "constant_handle");
-        let template = rec_vconst_optimized.opcodes(&PCMPEQB).infer_rex();
-        e.enc_32_64_func(instruction, template, |builder| {
-            builder.inst_predicate(is_ones_128bit)
-        });
-    }
-
-    // SIMD vconst using MOVUPS
-    // TODO it would be ideal if eventually this became the more efficient MOVAPS but we would have
-    // to guarantee that the constants are aligned when emitted and there is currently no mechanism
-    // for that; alternately, constants could be loaded into XMM registers using a sequence like:
-    // MOVQ + MOVHPD + MOVQ + MOVLPD (this allows the constants to be immediates instead of stored
-    // in memory) but some performance measurements are needed.
-    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
-        let instruction = vconst.bind(vector(ty, sse_vector_size));
-        let template = rec_vconst.opcodes(&MOVUPS_LOAD);
-        e.enc_both_inferred(instruction, template); // from SSE
-    }
-
-    // SIMD register movement: store, load, spill, fill, regmove, etc. All of these use encodings of
-    // MOVUPS and MOVAPS from SSE (TODO ideally all of these would either use MOVAPS when we have
-    // alignment or type-specific encodings, see https://github.com/bytecodealliance/wasmtime/issues/1124).
-    // Also, it would be ideal to infer REX prefixes for all of these instructions but for the
-    // time being only instructions with common recipes have `infer_rex()` support.
-    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
-        // Store
-        let bound_store = store.bind(vector(ty, sse_vector_size)).bind(Any);
-        e.enc_both_inferred(bound_store.clone(), rec_fst.opcodes(&MOVUPS_STORE));
-        e.enc_both_inferred(bound_store.clone(), rec_fstDisp8.opcodes(&MOVUPS_STORE));
-        e.enc_both_inferred(bound_store, rec_fstDisp32.opcodes(&MOVUPS_STORE));
-
-        // Store complex
-        let bound_store_complex = store_complex.bind(vector(ty, sse_vector_size));
-        e.enc_both(
-            bound_store_complex.clone(),
-            rec_fstWithIndex.opcodes(&MOVUPS_STORE),
-        );
-        e.enc_both(
-            bound_store_complex.clone(),
-            rec_fstWithIndexDisp8.opcodes(&MOVUPS_STORE),
-        );
-        e.enc_both(
-            bound_store_complex,
-            rec_fstWithIndexDisp32.opcodes(&MOVUPS_STORE),
-        );
-
-        // Load
-        let bound_load = load.bind(vector(ty, sse_vector_size)).bind(Any);
-        e.enc_both_inferred(bound_load.clone(), rec_fld.opcodes(&MOVUPS_LOAD));
-        e.enc_both_inferred(bound_load.clone(), rec_fldDisp8.opcodes(&MOVUPS_LOAD));
-        e.enc_both_inferred(bound_load, rec_fldDisp32.opcodes(&MOVUPS_LOAD));
-
-        // Load complex
-        let bound_load_complex = load_complex.bind(vector(ty, sse_vector_size));
-        e.enc_both(
-            bound_load_complex.clone(),
-            rec_fldWithIndex.opcodes(&MOVUPS_LOAD),
-        );
-        e.enc_both(
-            bound_load_complex.clone(),
-            rec_fldWithIndexDisp8.opcodes(&MOVUPS_LOAD),
-        );
-        e.enc_both(
-            bound_load_complex,
-            rec_fldWithIndexDisp32.opcodes(&MOVUPS_LOAD),
-        );
-
-        // Spill
-        let bound_spill = spill.bind(vector(ty, sse_vector_size));
-        e.enc_both(bound_spill, rec_fspillSib32.opcodes(&MOVUPS_STORE));
-        let bound_regspill = regspill.bind(vector(ty, sse_vector_size));
-        e.enc_both(bound_regspill, rec_fregspill32.opcodes(&MOVUPS_STORE));
-
-        // Fill
-        let bound_fill = fill.bind(vector(ty, sse_vector_size));
-        e.enc_both(bound_fill, rec_ffillSib32.opcodes(&MOVUPS_LOAD));
-        let bound_regfill = regfill.bind(vector(ty, sse_vector_size));
-        e.enc_both(bound_regfill, rec_fregfill32.opcodes(&MOVUPS_LOAD));
-        let bound_fill_nop = fill_nop.bind(vector(ty, sse_vector_size));
-        e.enc_32_64_rec(bound_fill_nop, rec_ffillnull, 0);
-
-        // Regmove
-        let bound_regmove = regmove.bind(vector(ty, sse_vector_size));
-        e.enc_both(bound_regmove, rec_frmov.opcodes(&MOVAPS_LOAD));
-
-        // Copy
-        let bound_copy = copy.bind(vector(ty, sse_vector_size));
-        e.enc_both(bound_copy, rec_furm.opcodes(&MOVAPS_LOAD));
-        let bound_copy_to_ssa = copy_to_ssa.bind(vector(ty, sse_vector_size));
-        e.enc_both(bound_copy_to_ssa, rec_furm_reg_to_ssa.opcodes(&MOVAPS_LOAD));
-        let bound_copy_nop = copy_nop.bind(vector(ty, sse_vector_size));
-        e.enc_32_64_rec(bound_copy_nop, rec_stacknull, 0);
-    }
-
-    // SIMD load extend
-    for (inst, opcodes) in &[
-        (uload8x8, &PMOVZXBW),
-        (uload16x4, &PMOVZXWD),
-        (uload32x2, &PMOVZXDQ),
-        (sload8x8, &PMOVSXBW),
-        (sload16x4, &PMOVSXWD),
-        (sload32x2, &PMOVSXDQ),
-    ] {
-        let isap = Some(use_sse41_simd);
-        for recipe in &[rec_fld, rec_fldDisp8, rec_fldDisp32] {
-            let inst = *inst;
-            let template = recipe.opcodes(*opcodes);
-            e.enc_both_inferred_maybe_isap(inst.clone().bind(I32), template.clone(), isap);
-            e.enc64_maybe_isap(inst.bind(I64), template.infer_rex(), isap);
-        }
-    }
-
-    // SIMD load extend (complex addressing)
-    let is_load_complex_length_two =
-        InstructionPredicate::new_length_equals(&*formats.load_complex, 2);
-    for (inst, opcodes) in &[
-        (uload8x8_complex, &PMOVZXBW),
-        (uload16x4_complex, &PMOVZXWD),
-        (uload32x2_complex, &PMOVZXDQ),
-        (sload8x8_complex, &PMOVSXBW),
-        (sload16x4_complex, &PMOVSXWD),
-        (sload32x2_complex, &PMOVSXDQ),
-    ] {
-        for recipe in &[
-            rec_fldWithIndex,
-            rec_fldWithIndexDisp8,
-            rec_fldWithIndexDisp32,
-        ] {
-            let template = recipe.opcodes(*opcodes);
-            let predicate = |encoding: EncodingBuilder| {
-                encoding
-                    .isa_predicate(use_sse41_simd)
-                    .inst_predicate(is_load_complex_length_two.clone())
-            };
-            e.enc32_func(inst.clone(), template.clone(), predicate);
-            // No infer_rex calculator for these recipes; place REX version first as in enc_x86_64.
-            e.enc64_func(inst.clone(), template.rex(), predicate);
-            e.enc64_func(inst.clone(), template, predicate);
-        }
-    }
-
-    // SIMD integer addition
-    for (ty, opcodes) in &[(I8, &PADDB), (I16, &PADDW), (I32, &PADDD), (I64, &PADDQ)] {
-        let iadd = iadd.bind(vector(*ty, sse_vector_size));
-        e.enc_both_inferred(iadd, rec_fa.opcodes(*opcodes));
-    }
-
-    // SIMD integer saturating addition
-    e.enc_both_inferred(
-        sadd_sat.bind(vector(I8, sse_vector_size)),
-        rec_fa.opcodes(&PADDSB),
-    );
-    e.enc_both_inferred(
-        sadd_sat.bind(vector(I16, sse_vector_size)),
-        rec_fa.opcodes(&PADDSW),
-    );
-    e.enc_both_inferred(
-        uadd_sat.bind(vector(I8, sse_vector_size)),
-        rec_fa.opcodes(&PADDUSB),
-    );
-    e.enc_both_inferred(
-        uadd_sat.bind(vector(I16, sse_vector_size)),
-        rec_fa.opcodes(&PADDUSW),
-    );
-
-    // SIMD integer subtraction
-    let isub = shared.by_name("isub");
-    for (ty, opcodes) in &[(I8, &PSUBB), (I16, &PSUBW), (I32, &PSUBD), (I64, &PSUBQ)] {
-        let isub = isub.bind(vector(*ty, sse_vector_size));
-        e.enc_both_inferred(isub, rec_fa.opcodes(*opcodes));
-    }
-
-    // SIMD integer saturating subtraction
-    e.enc_both_inferred(
-        ssub_sat.bind(vector(I8, sse_vector_size)),
-        rec_fa.opcodes(&PSUBSB),
-    );
-    e.enc_both_inferred(
-        ssub_sat.bind(vector(I16, sse_vector_size)),
-        rec_fa.opcodes(&PSUBSW),
-    );
-    e.enc_both_inferred(
-        usub_sat.bind(vector(I8, sse_vector_size)),
-        rec_fa.opcodes(&PSUBUSB),
-    );
-    e.enc_both_inferred(
-        usub_sat.bind(vector(I16, sse_vector_size)),
-        rec_fa.opcodes(&PSUBUSW),
-    );
-
-    // SIMD integer multiplication: the x86 ISA does not have instructions for multiplying I8x16
-    // and I64x2 and these are (at the time of writing) not necessary for WASM SIMD.
-    for (ty, opcodes, isap) in &[
-        (I16, &PMULLW[..], None),
-        (I32, &PMULLD[..], Some(use_sse41_simd)),
-    ] {
-        let imul = imul.bind(vector(*ty, sse_vector_size));
-        e.enc_both_inferred_maybe_isap(imul, rec_fa.opcodes(opcodes), *isap);
-    }
-
-    // SIMD multiplication with lane expansion.
-    e.enc_both_inferred(x86_pmuludq, rec_fa.opcodes(&PMULUDQ));
-
-    // SIMD multiplication and add adjacent pairs, from SSE2.
-    e.enc_both_inferred(widening_pairwise_dot_product_s, rec_fa.opcodes(&PMADDWD));
-
-    // SIMD integer multiplication for I64x2 using a AVX512.
-    {
-        e.enc_32_64_maybe_isap(
-            x86_pmullq,
-            rec_evex_reg_vvvv_rm_128.opcodes(&VPMULLQ).w(),
-            Some(use_avx512dq_simd), // TODO need an OR predicate to join with AVX512VL
-        );
-    }
-
-    // SIMD integer average with rounding.
-    for (ty, opcodes) in &[(I8, &PAVGB[..]), (I16, &PAVGW[..])] {
-        let avgr = avg_round.bind(vector(*ty, sse_vector_size));
-        e.enc_both_inferred(avgr, rec_fa.opcodes(opcodes));
-    }
-
-    // SIMD integer absolute value.
-    for (ty, opcodes) in &[(I8, &PABSB[..]), (I16, &PABSW[..]), (I32, &PABSD)] {
-        let iabs = iabs.bind(vector(*ty, sse_vector_size));
-        e.enc_both_inferred_maybe_isap(iabs, rec_furm.opcodes(opcodes), Some(use_ssse3_simd));
-    }
-
-    // SIMD logical operations
-    let band = shared.by_name("band");
-    let band_not = shared.by_name("band_not");
-    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
-        // and
-        let band = band.bind(vector(ty, sse_vector_size));
-        e.enc_both_inferred(band, rec_fa.opcodes(&PAND));
-
-        // and not (note flipped recipe operands to match band_not order)
-        let band_not = band_not.bind(vector(ty, sse_vector_size));
-        e.enc_both_inferred(band_not, rec_fax.opcodes(&PANDN));
-
-        // or
-        let bor = bor.bind(vector(ty, sse_vector_size));
-        e.enc_both_inferred(bor, rec_fa.opcodes(&POR));
-
-        // xor
-        let bxor = bxor.bind(vector(ty, sse_vector_size));
-        e.enc_both_inferred(bxor, rec_fa.opcodes(&PXOR));
-
-        // ptest
-        let x86_ptest = x86_ptest.bind(vector(ty, sse_vector_size));
-        e.enc_both_inferred_maybe_isap(x86_ptest, rec_fcmp.opcodes(&PTEST), Some(use_sse41_simd));
-    }
-
-    // SIMD bitcast from I32/I64 to the low bits of a vector (e.g. I64x2); this register movement
-    // allows SIMD shifts to be legalized more easily. TODO ideally this would be typed as an
-    // I128x1 but restrictions on the type builder prevent this; the general idea here is that
-    // the upper bits are all zeroed and do not form parts of any separate lane. See
-    // https://github.com/bytecodealliance/wasmtime/issues/1140.
-    e.enc_both_inferred(
-        bitcast.bind(vector(I64, sse_vector_size)).bind(I32),
-        rec_frurm.opcodes(&MOVD_LOAD_XMM),
-    );
-    e.enc64(
-        bitcast.bind(vector(I64, sse_vector_size)).bind(I64),
-        rec_frurm.opcodes(&MOVD_LOAD_XMM).rex().w(),
-    );
-
-    // SIMD shift left
-    for (ty, opcodes) in &[(I16, &PSLLW), (I32, &PSLLD), (I64, &PSLLQ)] {
-        let x86_psll = x86_psll.bind(vector(*ty, sse_vector_size));
-        e.enc_both_inferred(x86_psll, rec_fa.opcodes(*opcodes));
-    }
-
-    // SIMD shift right (logical)
-    for (ty, opcodes) in &[(I16, &PSRLW), (I32, &PSRLD), (I64, &PSRLQ)] {
-        let x86_psrl = x86_psrl.bind(vector(*ty, sse_vector_size));
-        e.enc_both_inferred(x86_psrl, rec_fa.opcodes(*opcodes));
-    }
-
-    // SIMD shift right (arithmetic)
-    for (ty, opcodes) in &[(I16, &PSRAW), (I32, &PSRAD)] {
-        let x86_psra = x86_psra.bind(vector(*ty, sse_vector_size));
-        e.enc_both_inferred(x86_psra, rec_fa.opcodes(*opcodes));
-    }
-
-    // SIMD immediate shift
-    for (ty, opcodes) in &[(I16, &PS_W_IMM), (I32, &PS_D_IMM), (I64, &PS_Q_IMM)] {
-        let ishl_imm = ishl_imm.bind(vector(*ty, sse_vector_size));
-        e.enc_both_inferred(ishl_imm, rec_f_ib.opcodes(*opcodes).rrr(6));
-
-        let ushr_imm = ushr_imm.bind(vector(*ty, sse_vector_size));
-        e.enc_both_inferred(ushr_imm, rec_f_ib.opcodes(*opcodes).rrr(2));
-
-        // One exception: PSRAQ does not exist in for 64x2 in SSE2, it requires a higher CPU feature set.
-        if *ty != I64 {
-            let sshr_imm = sshr_imm.bind(vector(*ty, sse_vector_size));
-            e.enc_both_inferred(sshr_imm, rec_f_ib.opcodes(*opcodes).rrr(4));
-        }
-    }
-
-    // SIMD integer comparisons
-    {
-        use IntCC::*;
-        for (ty, cc, opcodes, isa_predicate) in &[
-            (I8, Equal, &PCMPEQB[..], None),
-            (I16, Equal, &PCMPEQW[..], None),
-            (I32, Equal, &PCMPEQD[..], None),
-            (I64, Equal, &PCMPEQQ[..], Some(use_sse41_simd)),
-            (I8, SignedGreaterThan, &PCMPGTB[..], None),
-            (I16, SignedGreaterThan, &PCMPGTW[..], None),
-            (I32, SignedGreaterThan, &PCMPGTD[..], None),
-            (I64, SignedGreaterThan, &PCMPGTQ, Some(use_sse42_simd)),
-        ] {
-            let instruction = icmp
-                .bind(Immediate::IntCC(*cc))
-                .bind(vector(*ty, sse_vector_size));
-            let template = rec_icscc_fpr.opcodes(opcodes);
-            e.enc_both_inferred_maybe_isap(instruction, template, *isa_predicate);
-        }
-    }
-
-    // SIMD min/max
-    for (ty, inst, opcodes, isa_predicate) in &[
-        (I8, x86_pmaxs, &PMAXSB[..], Some(use_sse41_simd)),
-        (I16, x86_pmaxs, &PMAXSW[..], None),
-        (I32, x86_pmaxs, &PMAXSD[..], Some(use_sse41_simd)),
-        (I8, x86_pmaxu, &PMAXUB[..], None),
-        (I16, x86_pmaxu, &PMAXUW[..], Some(use_sse41_simd)),
-        (I32, x86_pmaxu, &PMAXUD[..], Some(use_sse41_simd)),
-        (I8, x86_pmins, &PMINSB[..], Some(use_sse41_simd)),
-        (I16, x86_pmins, &PMINSW[..], None),
-        (I32, x86_pmins, &PMINSD[..], Some(use_sse41_simd)),
-        (I8, x86_pminu, &PMINUB[..], None),
-        (I16, x86_pminu, &PMINUW[..], Some(use_sse41_simd)),
-        (I32, x86_pminu, &PMINUD[..], Some(use_sse41_simd)),
-    ] {
-        let inst = inst.bind(vector(*ty, sse_vector_size));
-        e.enc_both_inferred_maybe_isap(inst, rec_fa.opcodes(opcodes), *isa_predicate);
-    }
-
-    // SIMD float comparisons
-    e.enc_both_inferred(
-        fcmp.bind(vector(F32, sse_vector_size)),
-        rec_pfcmp.opcodes(&CMPPS),
-    );
-    e.enc_both_inferred(
-        fcmp.bind(vector(F64, sse_vector_size)),
-        rec_pfcmp.opcodes(&CMPPD),
-    );
-
-    // SIMD float arithmetic
-    for (ty, inst, opcodes) in &[
-        (F32, fadd, &ADDPS[..]),
-        (F64, fadd, &ADDPD[..]),
-        (F32, fsub, &SUBPS[..]),
-        (F64, fsub, &SUBPD[..]),
-        (F32, fmul, &MULPS[..]),
-        (F64, fmul, &MULPD[..]),
-        (F32, fdiv, &DIVPS[..]),
-        (F64, fdiv, &DIVPD[..]),
-        (F32, x86_fmin, &MINPS[..]),
-        (F64, x86_fmin, &MINPD[..]),
-        (F32, x86_fmax, &MAXPS[..]),
-        (F64, x86_fmax, &MAXPD[..]),
-    ] {
-        let inst = inst.bind(vector(*ty, sse_vector_size));
-        e.enc_both_inferred(inst, rec_fa.opcodes(opcodes));
-    }
-    for (ty, inst, opcodes) in &[(F32, sqrt, &SQRTPS[..]), (F64, sqrt, &SQRTPD[..])] {
-        let inst = inst.bind(vector(*ty, sse_vector_size));
-        e.enc_both_inferred(inst, rec_furm.opcodes(opcodes));
-    }
-}
-
-#[inline(never)]
-fn define_entity_ref(
-    e: &mut PerCpuModeEncodings,
-    shared_defs: &SharedDefinitions,
-    settings: &SettingGroup,
-    r: &RecipeGroup,
-) {
-    let shared = &shared_defs.instructions;
-    let formats = &shared_defs.formats;
-
-    // Shorthands for instructions.
-    let const_addr = shared.by_name("const_addr");
-    let func_addr = shared.by_name("func_addr");
-    let stack_addr = shared.by_name("stack_addr");
-    let symbol_value = shared.by_name("symbol_value");
-
-    // Shorthands for recipes.
-    let rec_allones_fnaddr4 = r.template("allones_fnaddr4");
-    let rec_allones_fnaddr8 = r.template("allones_fnaddr8");
-    let rec_fnaddr4 = r.template("fnaddr4");
-    let rec_fnaddr8 = r.template("fnaddr8");
-    let rec_const_addr = r.template("const_addr");
-    let rec_got_fnaddr8 = r.template("got_fnaddr8");
-    let rec_got_gvaddr8 = r.template("got_gvaddr8");
-    let rec_gvaddr4 = r.template("gvaddr4");
-    let rec_gvaddr8 = r.template("gvaddr8");
-    let rec_pcrel_fnaddr8 = r.template("pcrel_fnaddr8");
-    let rec_pcrel_gvaddr8 = r.template("pcrel_gvaddr8");
-    let rec_spaddr_id = r.template("spaddr_id");
-
-    // Predicates shorthands.
-    let all_ones_funcaddrs_and_not_is_pic =
-        settings.predicate_by_name("all_ones_funcaddrs_and_not_is_pic");
-    let is_pic = settings.predicate_by_name("is_pic");
-    let not_all_ones_funcaddrs_and_not_is_pic =
-        settings.predicate_by_name("not_all_ones_funcaddrs_and_not_is_pic");
-    let not_is_pic = settings.predicate_by_name("not_is_pic");
-
-    // Function addresses.
-
-    // Non-PIC, all-ones funcaddresses.
-    e.enc32_isap(
-        func_addr.bind(I32),
-        rec_fnaddr4.opcodes(&MOV_IMM),
-        not_all_ones_funcaddrs_and_not_is_pic,
-    );
-    e.enc64_isap(
-        func_addr.bind(I64),
-        rec_fnaddr8.opcodes(&MOV_IMM).rex().w(),
-        not_all_ones_funcaddrs_and_not_is_pic,
-    );
-
-    // Non-PIC, all-zeros funcaddresses.
-    e.enc32_isap(
-        func_addr.bind(I32),
-        rec_allones_fnaddr4.opcodes(&MOV_IMM),
-        all_ones_funcaddrs_and_not_is_pic,
-    );
-    e.enc64_isap(
-        func_addr.bind(I64),
-        rec_allones_fnaddr8.opcodes(&MOV_IMM).rex().w(),
-        all_ones_funcaddrs_and_not_is_pic,
-    );
-
-    // 64-bit, colocated, both PIC and non-PIC. Use the lea instruction's pc-relative field.
-    let is_colocated_func =
-        InstructionPredicate::new_is_colocated_func(&*formats.func_addr, "func_ref");
-    e.enc64_instp(
-        func_addr.bind(I64),
-        rec_pcrel_fnaddr8.opcodes(&LEA).rex().w(),
-        is_colocated_func,
-    );
-
-    // 64-bit, non-colocated, PIC.
-    e.enc64_isap(
-        func_addr.bind(I64),
-        rec_got_fnaddr8.opcodes(&MOV_LOAD).rex().w(),
-        is_pic,
-    );
-
-    // Global addresses.
-
-    // Non-PIC.
-    e.enc32_isap(
-        symbol_value.bind(I32),
-        rec_gvaddr4.opcodes(&MOV_IMM),
-        not_is_pic,
-    );
-    e.enc64_isap(
-        symbol_value.bind(I64),
-        rec_gvaddr8.opcodes(&MOV_IMM).rex().w(),
-        not_is_pic,
-    );
-
-    // PIC, colocated.
-    e.enc64_func(
-        symbol_value.bind(I64),
-        rec_pcrel_gvaddr8.opcodes(&LEA).rex().w(),
-        |encoding| {
-            encoding
-                .isa_predicate(is_pic)
-                .inst_predicate(InstructionPredicate::new_is_colocated_data(formats))
-        },
-    );
-
-    // PIC, non-colocated.
-    e.enc64_isap(
-        symbol_value.bind(I64),
-        rec_got_gvaddr8.opcodes(&MOV_LOAD).rex().w(),
-        is_pic,
-    );
-
-    // Stack addresses.
-    //
-    // TODO: Add encoding rules for stack_load and stack_store, so that they
-    // don't get legalized to stack_addr + load/store.
-    e.enc64(stack_addr.bind(I64), rec_spaddr_id.opcodes(&LEA).rex().w());
-    e.enc32(stack_addr.bind(I32), rec_spaddr_id.opcodes(&LEA));
-
-    // Constant addresses (PIC).
-    e.enc64(const_addr.bind(I64), rec_const_addr.opcodes(&LEA).rex().w());
-    e.enc32(const_addr.bind(I32), rec_const_addr.opcodes(&LEA));
-}
-
-/// Control flow opcodes.
-#[inline(never)]
-fn define_control_flow(
-    e: &mut PerCpuModeEncodings,
-    shared_defs: &SharedDefinitions,
-    settings: &SettingGroup,
-    r: &RecipeGroup,
-) {
-    let shared = &shared_defs.instructions;
-    let formats = &shared_defs.formats;
-
-    // Shorthands for instructions.
-    let brff = shared.by_name("brff");
-    let brif = shared.by_name("brif");
-    let brnz = shared.by_name("brnz");
-    let brz = shared.by_name("brz");
-    let call = shared.by_name("call");
-    let call_indirect = shared.by_name("call_indirect");
-    let debugtrap = shared.by_name("debugtrap");
-    let indirect_jump_table_br = shared.by_name("indirect_jump_table_br");
-    let jump = shared.by_name("jump");
-    let jump_table_base = shared.by_name("jump_table_base");
-    let jump_table_entry = shared.by_name("jump_table_entry");
-    let return_ = shared.by_name("return");
-    let trap = shared.by_name("trap");
-    let trapff = shared.by_name("trapff");
-    let trapif = shared.by_name("trapif");
-    let resumable_trap = shared.by_name("resumable_trap");
-
-    // Shorthands for recipes.
-    let rec_brfb = r.template("brfb");
-    let rec_brfd = r.template("brfd");
-    let rec_brib = r.template("brib");
-    let rec_brid = r.template("brid");
-    let rec_call_id = r.template("call_id");
-    let rec_call_plt_id = r.template("call_plt_id");
-    let rec_call_r = r.template("call_r");
-    let rec_debugtrap = r.recipe("debugtrap");
-    let rec_indirect_jmp = r.template("indirect_jmp");
-    let rec_jmpb = r.template("jmpb");
-    let rec_jmpd = r.template("jmpd");
-    let rec_jt_base = r.template("jt_base");
-    let rec_jt_entry = r.template("jt_entry");
-    let rec_ret = r.template("ret");
-    let rec_t8jccb_abcd = r.template("t8jccb_abcd");
-    let rec_t8jccd_abcd = r.template("t8jccd_abcd");
-    let rec_t8jccd_long = r.template("t8jccd_long");
-    let rec_tjccb = r.template("tjccb");
-    let rec_tjccd = r.template("tjccd");
-    let rec_trap = r.template("trap");
-    let rec_trapif = r.recipe("trapif");
-    let rec_trapff = r.recipe("trapff");
-
-    // Predicates shorthands.
-    let is_pic = settings.predicate_by_name("is_pic");
-
-    // Call/return
-
-    // 32-bit, both PIC and non-PIC.
-    e.enc32(call, rec_call_id.opcodes(&CALL_RELATIVE));
-
-    // 64-bit, colocated, both PIC and non-PIC. Use the call instruction's pc-relative field.
-    let is_colocated_func = InstructionPredicate::new_is_colocated_func(&*formats.call, "func_ref");
-    e.enc64_instp(call, rec_call_id.opcodes(&CALL_RELATIVE), is_colocated_func);
-
-    // 64-bit, non-colocated, PIC. There is no 64-bit non-colocated non-PIC version, since non-PIC
-    // is currently using the large model, which requires calls be lowered to
-    // func_addr+call_indirect.
-    e.enc64_isap(call, rec_call_plt_id.opcodes(&CALL_RELATIVE), is_pic);
-
-    e.enc32(
-        call_indirect.bind(I32),
-        rec_call_r.opcodes(&JUMP_ABSOLUTE).rrr(2),
-    );
-    e.enc64(
-        call_indirect.bind(I64),
-        rec_call_r.opcodes(&JUMP_ABSOLUTE).rrr(2).rex(),
-    );
-    e.enc64(
-        call_indirect.bind(I64),
-        rec_call_r.opcodes(&JUMP_ABSOLUTE).rrr(2),
-    );
-
-    e.enc32(return_, rec_ret.opcodes(&RET_NEAR));
-    e.enc64(return_, rec_ret.opcodes(&RET_NEAR));
-
-    // Branches.
-    e.enc32(jump, rec_jmpb.opcodes(&JUMP_SHORT));
-    e.enc64(jump, rec_jmpb.opcodes(&JUMP_SHORT));
-    e.enc32(jump, rec_jmpd.opcodes(&JUMP_NEAR_RELATIVE));
-    e.enc64(jump, rec_jmpd.opcodes(&JUMP_NEAR_RELATIVE));
-
-    e.enc_both(brif, rec_brib.opcodes(&JUMP_SHORT_IF_OVERFLOW));
-    e.enc_both(brif, rec_brid.opcodes(&JUMP_NEAR_IF_OVERFLOW));
-
-    // Not all float condition codes are legal, see `supported_floatccs`.
-    e.enc_both(brff, rec_brfb.opcodes(&JUMP_SHORT_IF_OVERFLOW));
-    e.enc_both(brff, rec_brfd.opcodes(&JUMP_NEAR_IF_OVERFLOW));
-
-    // Note that the tjccd opcode will be prefixed with 0x0f.
-    e.enc_i32_i64_explicit_rex(brz, rec_tjccb.opcodes(&JUMP_SHORT_IF_EQUAL));
-    e.enc_i32_i64_explicit_rex(brz, rec_tjccd.opcodes(&TEST_BYTE_REG));
-    e.enc_i32_i64_explicit_rex(brnz, rec_tjccb.opcodes(&JUMP_SHORT_IF_NOT_EQUAL));
-    e.enc_i32_i64_explicit_rex(brnz, rec_tjccd.opcodes(&TEST_REG));
-
-    // Branch on a b1 value in a register only looks at the low 8 bits. See also
-    // bint encodings below.
-    //
-    // Start with the worst-case encoding for X86_32 only. The register allocator
-    // can't handle a branch with an ABCD-constrained operand.
-    e.enc32(brz.bind(B1), rec_t8jccd_long.opcodes(&TEST_BYTE_REG));
-    e.enc32(brnz.bind(B1), rec_t8jccd_long.opcodes(&TEST_REG));
-
-    e.enc_both(brz.bind(B1), rec_t8jccb_abcd.opcodes(&JUMP_SHORT_IF_EQUAL));
-    e.enc_both(brz.bind(B1), rec_t8jccd_abcd.opcodes(&TEST_BYTE_REG));
-    e.enc_both(
-        brnz.bind(B1),
-        rec_t8jccb_abcd.opcodes(&JUMP_SHORT_IF_NOT_EQUAL),
-    );
-    e.enc_both(brnz.bind(B1), rec_t8jccd_abcd.opcodes(&TEST_REG));
-
-    // Jump tables.
-    e.enc64(
-        jump_table_entry.bind(I64),
-        rec_jt_entry.opcodes(&MOVSXD).rex().w(),
-    );
-    e.enc32(jump_table_entry.bind(I32), rec_jt_entry.opcodes(&MOV_LOAD));
-
-    e.enc64(
-        jump_table_base.bind(I64),
-        rec_jt_base.opcodes(&LEA).rex().w(),
-    );
-    e.enc32(jump_table_base.bind(I32), rec_jt_base.opcodes(&LEA));
-
-    e.enc_x86_64(
-        indirect_jump_table_br.bind(I64),
-        rec_indirect_jmp.opcodes(&JUMP_ABSOLUTE).rrr(4),
-    );
-    e.enc32(
-        indirect_jump_table_br.bind(I32),
-        rec_indirect_jmp.opcodes(&JUMP_ABSOLUTE).rrr(4),
-    );
-
-    // Trap as ud2
-    e.enc32(trap, rec_trap.opcodes(&UNDEFINED2));
-    e.enc64(trap, rec_trap.opcodes(&UNDEFINED2));
-    e.enc32(resumable_trap, rec_trap.opcodes(&UNDEFINED2));
-    e.enc64(resumable_trap, rec_trap.opcodes(&UNDEFINED2));
-
-    // Debug trap as int3
-    e.enc32_rec(debugtrap, rec_debugtrap, 0);
-    e.enc64_rec(debugtrap, rec_debugtrap, 0);
-
-    e.enc32_rec(trapif, rec_trapif, 0);
-    e.enc64_rec(trapif, rec_trapif, 0);
-    e.enc32_rec(trapff, rec_trapff, 0);
-    e.enc64_rec(trapff, rec_trapff, 0);
-}
-
-/// Reference type instructions.
-#[inline(never)]
-fn define_reftypes(e: &mut PerCpuModeEncodings, shared_defs: &SharedDefinitions, r: &RecipeGroup) {
-    let shared = &shared_defs.instructions;
-
-    let is_null = shared.by_name("is_null");
-    let is_invalid = shared.by_name("is_invalid");
-    let null = shared.by_name("null");
-    let safepoint = shared.by_name("safepoint");
-
-    let rec_is_zero = r.template("is_zero");
-    let rec_is_invalid = r.template("is_invalid");
-    let rec_pu_id_ref = r.template("pu_id_ref");
-    let rec_safepoint = r.recipe("safepoint");
-
-    // Null references implemented as iconst 0.
-    e.enc32(null.bind(R32), rec_pu_id_ref.opcodes(&MOV_IMM));
-
-    e.enc64(null.bind(R64), rec_pu_id_ref.rex().opcodes(&MOV_IMM));
-    e.enc64(null.bind(R64), rec_pu_id_ref.opcodes(&MOV_IMM));
-
-    // is_null, implemented by testing whether the value is 0.
-    e.enc_r32_r64_rex_only(is_null, rec_is_zero.opcodes(&TEST_REG));
-
-    // is_invalid, implemented by testing whether the value is -1.
-    e.enc_r32_r64_rex_only(is_invalid, rec_is_invalid.opcodes(&CMP_IMM8).rrr(7));
-
-    // safepoint instruction calls sink, no actual encoding.
-    e.enc32_rec(safepoint, rec_safepoint, 0);
-    e.enc64_rec(safepoint, rec_safepoint, 0);
-}
-
-#[allow(clippy::cognitive_complexity)]
-pub(crate) fn define(
-    shared_defs: &SharedDefinitions,
-    settings: &SettingGroup,
-    x86: &InstructionGroup,
-    r: &RecipeGroup,
-) -> PerCpuModeEncodings {
-    // Definitions.
-    let mut e = PerCpuModeEncodings::new();
-
-    define_moves(&mut e, shared_defs, r);
-    define_memory(&mut e, shared_defs, x86, r);
-    define_fpu_moves(&mut e, shared_defs, r);
-    define_fpu_memory(&mut e, shared_defs, r);
-    define_fpu_ops(&mut e, shared_defs, settings, x86, r);
-    define_alu(&mut e, shared_defs, settings, x86, r);
-    define_simd(&mut e, shared_defs, settings, x86, r);
-    define_entity_ref(&mut e, shared_defs, settings, r);
-    define_control_flow(&mut e, shared_defs, settings, r);
-    define_reftypes(&mut e, shared_defs, r);
-
-    let x86_elf_tls_get_addr = x86.by_name("x86_elf_tls_get_addr");
-    let x86_macho_tls_get_addr = x86.by_name("x86_macho_tls_get_addr");
-
-    let rec_elf_tls_get_addr = r.recipe("elf_tls_get_addr");
-    let rec_macho_tls_get_addr = r.recipe("macho_tls_get_addr");
-
-    e.enc64_rec(x86_elf_tls_get_addr, rec_elf_tls_get_addr, 0);
-    e.enc64_rec(x86_macho_tls_get_addr, rec_macho_tls_get_addr, 0);
-
-    e
-}
diff --git a/cranelift/codegen/meta/src/isa/x86/instructions.rs b/cranelift/codegen/meta/src/isa/x86/instructions.rs
deleted file mode 100644
index 7acd2e2c50..0000000000
--- a/cranelift/codegen/meta/src/isa/x86/instructions.rs
+++ /dev/null
@@ -1,723 +0,0 @@
-#![allow(non_snake_case)]
-
-use crate::cdsl::instructions::{
-    AllInstructions, InstructionBuilder as Inst, InstructionGroup, InstructionGroupBuilder,
-};
-use crate::cdsl::operands::Operand;
-use crate::cdsl::types::ValueType;
-use crate::cdsl::typevar::{Interval, TypeSetBuilder, TypeVar};
-use crate::shared::entities::EntityRefs;
-use crate::shared::formats::Formats;
-use crate::shared::immediates::Immediates;
-use crate::shared::types;
-
-#[allow(clippy::many_single_char_names)]
-pub(crate) fn define(
-    mut all_instructions: &mut AllInstructions,
-    formats: &Formats,
-    immediates: &Immediates,
-    entities: &EntityRefs,
-) -> InstructionGroup {
-    let mut ig = InstructionGroupBuilder::new(&mut all_instructions);
-
-    let iflags: &TypeVar = &ValueType::Special(types::Flag::IFlags.into()).into();
-
-    let iWord = &TypeVar::new(
-        "iWord",
-        "A scalar integer machine word",
-        TypeSetBuilder::new().ints(32..64).build(),
-    );
-    let nlo = &Operand::new("nlo", iWord).with_doc("Low part of numerator");
-    let nhi = &Operand::new("nhi", iWord).with_doc("High part of numerator");
-    let d = &Operand::new("d", iWord).with_doc("Denominator");
-    let q = &Operand::new("q", iWord).with_doc("Quotient");
-    let r = &Operand::new("r", iWord).with_doc("Remainder");
-
-    ig.push(
-        Inst::new(
-            "x86_udivmodx",
-            r#"
-        Extended unsigned division.
-
-        Concatenate the bits in `nhi` and `nlo` to form the numerator.
-        Interpret the bits as an unsigned number and divide by the unsigned
-        denominator `d`. Trap when `d` is zero or if the quotient is larger
-        than the range of the output.
-
-        Return both quotient and remainder.
-        "#,
-            &formats.ternary,
-        )
-        .operands_in(vec![nlo, nhi, d])
-        .operands_out(vec![q, r])
-        .can_trap(true),
-    );
-
-    ig.push(
-        Inst::new(
-            "x86_sdivmodx",
-            r#"
-        Extended signed division.
-
-        Concatenate the bits in `nhi` and `nlo` to form the numerator.
-        Interpret the bits as a signed number and divide by the signed
-        denominator `d`. Trap when `d` is zero or if the quotient is outside
-        the range of the output.
-
-        Return both quotient and remainder.
-        "#,
-            &formats.ternary,
-        )
-        .operands_in(vec![nlo, nhi, d])
-        .operands_out(vec![q, r])
-        .can_trap(true),
-    );
-
-    let argL = &Operand::new("argL", iWord);
-    let argR = &Operand::new("argR", iWord);
-    let resLo = &Operand::new("resLo", iWord);
-    let resHi = &Operand::new("resHi", iWord);
-
-    ig.push(
-        Inst::new(
-            "x86_umulx",
-            r#"
-        Unsigned integer multiplication, producing a double-length result.
-
-        Polymorphic over all scalar integer types, but does not support vector
-        types.
-        "#,
-            &formats.binary,
-        )
-        .operands_in(vec![argL, argR])
-        .operands_out(vec![resLo, resHi]),
-    );
-
-    ig.push(
-        Inst::new(
-            "x86_smulx",
-            r#"
-        Signed integer multiplication, producing a double-length result.
-
-        Polymorphic over all scalar integer types, but does not support vector
-        types.
-        "#,
-            &formats.binary,
-        )
-        .operands_in(vec![argL, argR])
-        .operands_out(vec![resLo, resHi]),
-    );
-
-    let Float = &TypeVar::new(
-        "Float",
-        "A scalar or vector floating point number",
-        TypeSetBuilder::new()
-            .floats(Interval::All)
-            .simd_lanes(Interval::All)
-            .build(),
-    );
-    let IntTo = &TypeVar::new(
-        "IntTo",
-        "An integer type with the same number of lanes",
-        TypeSetBuilder::new()
-            .ints(32..64)
-            .simd_lanes(Interval::All)
-            .build(),
-    );
-    let x = &Operand::new("x", Float);
-    let a = &Operand::new("a", IntTo);
-
-    ig.push(
-        Inst::new(
-            "x86_cvtt2si",
-            r#"
-        Convert with truncation floating point to signed integer.
-
-        The source floating point operand is converted to a signed integer by
-        rounding towards zero. If the result can't be represented in the output
-        type, returns the smallest signed value the output type can represent.
-
-        This instruction does not trap.
-        "#,
-            &formats.unary,
-        )
-        .operands_in(vec![x])
-        .operands_out(vec![a]),
-    );
-
-    let f32x4 = &TypeVar::new(
-        "f32x4",
-        "A floating point number",
-        TypeSetBuilder::new()
-            .floats(32..32)
-            .simd_lanes(4..4)
-            .build(),
-    );
-    let i32x4 = &TypeVar::new(
-        "i32x4",
-        "An integer type with the same number of lanes",
-        TypeSetBuilder::new().ints(32..32).simd_lanes(4..4).build(),
-    );
-    let x = &Operand::new("x", i32x4);
-    let a = &Operand::new("a", f32x4);
-
-    ig.push(
-        Inst::new(
-            "x86_vcvtudq2ps",
-            r#"
-        Convert unsigned integer to floating point.
-
-        Convert packed doubleword unsigned integers to packed single-precision floating-point 
-        values. This instruction does not trap.
-        "#,
-            &formats.unary,
-        )
-        .operands_in(vec![x])
-        .operands_out(vec![a]),
-    );
-
-    let x = &Operand::new("x", Float);
-    let a = &Operand::new("a", Float);
-    let y = &Operand::new("y", Float);
-
-    ig.push(
-        Inst::new(
-            "x86_fmin",
-            r#"
-        Floating point minimum with x86 semantics.
-
-        This is equivalent to the C ternary operator `x < y ? x : y` which
-        differs from `fmin` when either operand is NaN or when comparing
-        +0.0 to -0.0.
-
-        When the two operands don't compare as LT, `y` is returned unchanged,
-        even if it is a signalling NaN.
-        "#,
-            &formats.binary,
-        )
-        .operands_in(vec![x, y])
-        .operands_out(vec![a]),
-    );
-
-    ig.push(
-        Inst::new(
-            "x86_fmax",
-            r#"
-        Floating point maximum with x86 semantics.
-
-        This is equivalent to the C ternary operator `x > y ? x : y` which
-        differs from `fmax` when either operand is NaN or when comparing
-        +0.0 to -0.0.
-
-        When the two operands don't compare as GT, `y` is returned unchanged,
-        even if it is a signalling NaN.
-        "#,
-            &formats.binary,
-        )
-        .operands_in(vec![x, y])
-        .operands_out(vec![a]),
-    );
-
-    let x = &Operand::new("x", iWord);
-
-    ig.push(
-        Inst::new(
-            "x86_push",
-            r#"
-    Pushes a value onto the stack.
-
-    Decrements the stack pointer and stores the specified value on to the top.
-
-    This is polymorphic in i32 and i64. However, it is only implemented for i64
-    in 64-bit mode, and only for i32 in 32-bit mode.
-    "#,
-            &formats.unary,
-        )
-        .operands_in(vec![x])
-        .other_side_effects(true)
-        .can_store(true),
-    );
-
-    ig.push(
-        Inst::new(
-            "x86_pop",
-            r#"
-    Pops a value from the stack.
-
-    Loads a value from the top of the stack and then increments the stack
-    pointer.
-
-    This is polymorphic in i32 and i64. However, it is only implemented for i64
-    in 64-bit mode, and only for i32 in 32-bit mode.
-    "#,
-            &formats.nullary,
-        )
-        .operands_out(vec![x])
-        .other_side_effects(true)
-        .can_load(true),
-    );
-
-    let y = &Operand::new("y", iWord);
-    let rflags = &Operand::new("rflags", iflags);
-
-    ig.push(
-        Inst::new(
-            "x86_bsr",
-            r#"
-    Bit Scan Reverse -- returns the bit-index of the most significant 1
-    in the word. Result is undefined if the argument is zero. However, it
-    sets the Z flag depending on the argument, so it is at least easy to
-    detect and handle that case.
-
-    This is polymorphic in i32 and i64. It is implemented for both i64 and
-    i32 in 64-bit mode, and only for i32 in 32-bit mode.
-    "#,
-            &formats.unary,
-        )
-        .operands_in(vec![x])
-        .operands_out(vec![y, rflags]),
-    );
-
-    ig.push(
-        Inst::new(
-            "x86_bsf",
-            r#"
-    Bit Scan Forwards -- returns the bit-index of the least significant 1
-    in the word. Is otherwise identical to 'bsr', just above.
-    "#,
-            &formats.unary,
-        )
-        .operands_in(vec![x])
-        .operands_out(vec![y, rflags]),
-    );
-
-    let uimm8 = &immediates.uimm8;
-    let TxN = &TypeVar::new(
-        "TxN",
-        "A SIMD vector type",
-        TypeSetBuilder::new()
-            .ints(Interval::All)
-            .floats(Interval::All)
-            .bools(Interval::All)
-            .simd_lanes(Interval::All)
-            .includes_scalars(false)
-            .build(),
-    );
-    let a = &Operand::new("a", TxN).with_doc("A vector value (i.e. held in an XMM register)");
-    let b = &Operand::new("b", TxN).with_doc("A vector value (i.e. held in an XMM register)");
-    let i = &Operand::new("i", uimm8).with_doc("An ordering operand controlling the copying of data from the source to the destination; see PSHUFD in Intel manual for details");
-
-    ig.push(
-        Inst::new(
-            "x86_pshufd",
-            r#"
-    Packed Shuffle Doublewords -- copies data from either memory or lanes in an extended
-    register and re-orders the data according to the passed immediate byte.
-    "#,
-            &formats.binary_imm8,
-        )
-        .operands_in(vec![a, i]) // TODO allow copying from memory here (need more permissive type than TxN)
-        .operands_out(vec![a]),
-    );
-
-    ig.push(
-        Inst::new(
-            "x86_pshufb",
-            r#"
-    Packed Shuffle Bytes -- re-orders data in an extended register using a shuffle
-    mask from either memory or another extended register
-    "#,
-            &formats.binary,
-        )
-        .operands_in(vec![a, b]) // TODO allow re-ordering from memory here (need more permissive type than TxN)
-        .operands_out(vec![a]),
-    );
-
-    let mask = &Operand::new("mask", uimm8).with_doc("mask to select lanes from b");
-    ig.push(
-        Inst::new(
-            "x86_pblendw",
-            r#"
-    Blend packed words using an immediate mask. Each bit of the 8-bit immediate corresponds to a 
-    lane in ``b``: if the bit is set, the lane is copied into ``a``.
-    "#,
-            &formats.ternary_imm8,
-        )
-        .operands_in(vec![a, b, mask])
-        .operands_out(vec![a]),
-    );
-
-    let Idx = &Operand::new("Idx", uimm8).with_doc("Lane index");
-    let x = &Operand::new("x", TxN);
-    let a = &Operand::new("a", &TxN.lane_of());
-
-    ig.push(
-        Inst::new(
-            "x86_pextr",
-            r#"
-        Extract lane ``Idx`` from ``x``.
-        The lane index, ``Idx``, is an immediate value, not an SSA value. It
-        must indicate a valid lane index for the type of ``x``.
-        "#,
-            &formats.binary_imm8,
-        )
-        .operands_in(vec![x, Idx])
-        .operands_out(vec![a]),
-    );
-
-    let IBxN = &TypeVar::new(
-        "IBxN",
-        "A SIMD vector type containing only booleans and integers",
-        TypeSetBuilder::new()
-            .ints(Interval::All)
-            .bools(Interval::All)
-            .simd_lanes(Interval::All)
-            .includes_scalars(false)
-            .build(),
-    );
-    let x = &Operand::new("x", IBxN);
-    let y = &Operand::new("y", &IBxN.lane_of()).with_doc("New lane value");
-    let a = &Operand::new("a", IBxN);
-
-    ig.push(
-        Inst::new(
-            "x86_pinsr",
-            r#"
-        Insert ``y`` into ``x`` at lane ``Idx``.
-        The lane index, ``Idx``, is an immediate value, not an SSA value. It
-        must indicate a valid lane index for the type of ``x``.
-        "#,
-            &formats.ternary_imm8,
-        )
-        .operands_in(vec![x, y, Idx])
-        .operands_out(vec![a]),
-    );
-
-    let FxN = &TypeVar::new(
-        "FxN",
-        "A SIMD vector type containing floats",
-        TypeSetBuilder::new()
-            .floats(Interval::All)
-            .simd_lanes(Interval::All)
-            .includes_scalars(false)
-            .build(),
-    );
-    let x = &Operand::new("x", FxN);
-    let y = &Operand::new("y", &FxN.lane_of()).with_doc("New lane value");
-    let a = &Operand::new("a", FxN);
-
-    ig.push(
-        Inst::new(
-            "x86_insertps",
-            r#"
-        Insert a lane of ``y`` into ``x`` at using ``Idx`` to encode both which lane the value is
-        extracted from and which it is inserted to. This is similar to x86_pinsr but inserts
-        floats, which are already stored in an XMM register.
-        "#,
-            &formats.ternary_imm8,
-        )
-        .operands_in(vec![x, y, Idx])
-        .operands_out(vec![a]),
-    );
-
-    let x = &Operand::new("x", TxN);
-    let y = &Operand::new("y", TxN);
-    let a = &Operand::new("a", TxN);
-
-    ig.push(
-        Inst::new(
-            "x86_punpckh",
-            r#"
-        Unpack the high-order lanes of ``x`` and ``y`` and interleave into ``a``. With notional
-        i8x4 vectors, where ``x = [x3, x2, x1, x0]`` and ``y = [y3, y2, y1, y0]``, this operation
-        would result in ``a = [y3, x3, y2, x2]`` (using the Intel manual's right-to-left lane
-        ordering). 
-        "#,
-            &formats.binary,
-        )
-        .operands_in(vec![x, y])
-        .operands_out(vec![a]),
-    );
-
-    ig.push(
-        Inst::new(
-            "x86_punpckl",
-            r#"
-        Unpack the low-order lanes of ``x`` and ``y`` and interleave into ``a``. With notional
-        i8x4 vectors, where ``x = [x3, x2, x1, x0]`` and ``y = [y3, y2, y1, y0]``, this operation
-        would result in ``a = [y1, x1, y0, x0]`` (using the Intel manual's right-to-left lane
-        ordering).
-        "#,
-            &formats.binary,
-        )
-        .operands_in(vec![x, y])
-        .operands_out(vec![a]),
-    );
-
-    let x = &Operand::new("x", FxN);
-    let y = &Operand::new("y", FxN);
-    let a = &Operand::new("a", FxN);
-
-    ig.push(
-        Inst::new(
-            "x86_movsd",
-            r#"
-        Move the low 64 bits of the float vector ``y`` to the low 64 bits of float vector ``x``
-        "#,
-            &formats.binary,
-        )
-        .operands_in(vec![x, y])
-        .operands_out(vec![a]),
-    );
-
-    ig.push(
-        Inst::new(
-            "x86_movlhps",
-            r#"
-        Move the low 64 bits of the float vector ``y`` to the high 64 bits of float vector ``x``
-        "#,
-            &formats.binary,
-        )
-        .operands_in(vec![x, y])
-        .operands_out(vec![a]),
-    );
-
-    let IxN = &TypeVar::new(
-        "IxN",
-        "A SIMD vector type containing integers",
-        TypeSetBuilder::new()
-            .ints(Interval::All)
-            .simd_lanes(Interval::All)
-            .includes_scalars(false)
-            .build(),
-    );
-    let I128 = &TypeVar::new(
-        "I128",
-        "A SIMD vector type containing one large integer (due to Cranelift type constraints, \
-        this uses the Cranelift I64X2 type but should be understood as one large value, i.e., the \
-        upper lane is concatenated with the lower lane to form the integer)",
-        TypeSetBuilder::new()
-            .ints(64..64)
-            .simd_lanes(2..2)
-            .includes_scalars(false)
-            .build(),
-    );
-
-    let x = &Operand::new("x", IxN).with_doc("Vector value to shift");
-    let y = &Operand::new("y", I128).with_doc("Number of bits to shift");
-    let a = &Operand::new("a", IxN);
-
-    ig.push(
-        Inst::new(
-            "x86_psll",
-            r#"
-        Shift Packed Data Left Logical -- This implements the behavior of the shared instruction
-        ``ishl`` but alters the shift operand to live in an XMM register as expected by the PSLL*
-        family of instructions.
-        "#,
-            &formats.binary,
-        )
-        .operands_in(vec![x, y])
-        .operands_out(vec![a]),
-    );
-
-    ig.push(
-        Inst::new(
-            "x86_psrl",
-            r#"
-        Shift Packed Data Right Logical -- This implements the behavior of the shared instruction
-        ``ushr`` but alters the shift operand to live in an XMM register as expected by the PSRL*
-        family of instructions.
-        "#,
-            &formats.binary,
-        )
-        .operands_in(vec![x, y])
-        .operands_out(vec![a]),
-    );
-
-    ig.push(
-        Inst::new(
-            "x86_psra",
-            r#"
-        Shift Packed Data Right Arithmetic -- This implements the behavior of the shared
-        instruction ``sshr`` but alters the shift operand to live in an XMM register as expected by
-        the PSRA* family of instructions.
-        "#,
-            &formats.binary,
-        )
-        .operands_in(vec![x, y])
-        .operands_out(vec![a]),
-    );
-
-    let I64x2 = &TypeVar::new(
-        "I64x2",
-        "A SIMD vector type containing two 64-bit integers",
-        TypeSetBuilder::new()
-            .ints(64..64)
-            .simd_lanes(2..2)
-            .includes_scalars(false)
-            .build(),
-    );
-
-    let x = &Operand::new("x", I64x2);
-    let y = &Operand::new("y", I64x2);
-    let a = &Operand::new("a", I64x2);
-    ig.push(
-        Inst::new(
-            "x86_pmullq",
-            r#"
-        Multiply Packed Integers -- Multiply two 64x2 integers and receive a 64x2 result with
-        lane-wise wrapping if the result overflows. This instruction is necessary to add distinct
-        encodings for CPUs with newer vector features.
-        "#,
-            &formats.binary,
-        )
-        .operands_in(vec![x, y])
-        .operands_out(vec![a]),
-    );
-
-    ig.push(
-        Inst::new(
-            "x86_pmuludq",
-            r#"
-        Multiply Packed Integers -- Using only the bottom 32 bits in each lane, multiply two 64x2
-        unsigned integers and receive a 64x2 result. This instruction avoids the need for handling
-        overflow as in `x86_pmullq`.
-        "#,
-            &formats.binary,
-        )
-        .operands_in(vec![x, y])
-        .operands_out(vec![a]),
-    );
-
-    let x = &Operand::new("x", TxN);
-    let y = &Operand::new("y", TxN);
-    let f = &Operand::new("f", iflags);
-    ig.push(
-        Inst::new(
-            "x86_ptest",
-            r#"
-        Logical Compare -- PTEST will set the ZF flag if all bits in the result are 0 of the
-        bitwise AND of the first source operand (first operand) and the second source operand
-        (second operand). PTEST sets the CF flag if all bits in the result are 0 of the bitwise
-        AND of the second source operand (second operand) and the logical NOT of the destination
-        operand (first operand).
-        "#,
-            &formats.binary,
-        )
-        .operands_in(vec![x, y])
-        .operands_out(vec![f]),
-    );
-
-    let x = &Operand::new("x", IxN);
-    let y = &Operand::new("y", IxN);
-    let a = &Operand::new("a", IxN);
-    ig.push(
-        Inst::new(
-            "x86_pmaxs",
-            r#"
-        Maximum of Packed Signed Integers -- Compare signed integers in the first and second
-        operand and return the maximum values.
-        "#,
-            &formats.binary,
-        )
-        .operands_in(vec![x, y])
-        .operands_out(vec![a]),
-    );
-
-    ig.push(
-        Inst::new(
-            "x86_pmaxu",
-            r#"
-        Maximum of Packed Unsigned Integers -- Compare unsigned integers in the first and second
-        operand and return the maximum values.
-        "#,
-            &formats.binary,
-        )
-        .operands_in(vec![x, y])
-        .operands_out(vec![a]),
-    );
-
-    ig.push(
-        Inst::new(
-            "x86_pmins",
-            r#"
-        Minimum of Packed Signed Integers -- Compare signed integers in the first and second
-        operand and return the minimum values.
-        "#,
-            &formats.binary,
-        )
-        .operands_in(vec![x, y])
-        .operands_out(vec![a]),
-    );
-
-    ig.push(
-        Inst::new(
-            "x86_pminu",
-            r#"
-        Minimum of Packed Unsigned Integers -- Compare unsigned integers in the first and second
-        operand and return the minimum values.
-        "#,
-            &formats.binary,
-        )
-        .operands_in(vec![x, y])
-        .operands_out(vec![a]),
-    );
-
-    let c = &Operand::new("c", uimm8)
-        .with_doc("The number of bytes to shift right; see PALIGNR in Intel manual for details");
-    ig.push(
-        Inst::new(
-            "x86_palignr",
-            r#"
-        Concatenate destination and source operands, extracting a byte-aligned result shifted to 
-        the right by `c`.
-        "#,
-            &formats.ternary_imm8,
-        )
-        .operands_in(vec![x, y, c])
-        .operands_out(vec![a]),
-    );
-
-    let i64_t = &TypeVar::new(
-        "i64_t",
-        "A scalar 64bit integer",
-        TypeSetBuilder::new().ints(64..64).build(),
-    );
-
-    let GV = &Operand::new("GV", &entities.global_value);
-    let addr = &Operand::new("addr", i64_t);
-
-    ig.push(
-        Inst::new(
-            "x86_elf_tls_get_addr",
-            r#"
-        Elf tls get addr -- This implements the GD TLS model for ELF. The clobber output should
-        not be used.
-            "#,
-            &formats.unary_global_value,
-        )
-        // This is a bit overly broad to mark as clobbering *all* the registers, because it should
-        // only preserve caller-saved registers. There's no way to indicate this to register
-        // allocation yet, though, so mark as clobbering all registers instead.
-        .clobbers_all_regs(true)
-        .operands_in(vec![GV])
-        .operands_out(vec![addr]),
-    );
-    ig.push(
-        Inst::new(
-            "x86_macho_tls_get_addr",
-            r#"
-        Mach-O tls get addr -- This implements TLS access for Mach-O. The clobber output should
-        not be used.
-            "#,
-            &formats.unary_global_value,
-        )
-        // See above comment for x86_elf_tls_get_addr.
-        .clobbers_all_regs(true)
-        .operands_in(vec![GV])
-        .operands_out(vec![addr]),
-    );
-
-    ig.build()
-}
diff --git a/cranelift/codegen/meta/src/isa/x86/legalize.rs b/cranelift/codegen/meta/src/isa/x86/legalize.rs
deleted file mode 100644
index de78c3b3b7..0000000000
--- a/cranelift/codegen/meta/src/isa/x86/legalize.rs
+++ /dev/null
@@ -1,827 +0,0 @@
-use crate::cdsl::ast::{constant, var, ExprBuilder, Literal};
-use crate::cdsl::instructions::{vector, Bindable, InstructionGroup};
-use crate::cdsl::types::{LaneType, ValueType};
-use crate::cdsl::xform::TransformGroupBuilder;
-use crate::shared::types::Float::{F32, F64};
-use crate::shared::types::Int::{I16, I32, I64, I8};
-use crate::shared::Definitions as SharedDefinitions;
-
-#[allow(clippy::many_single_char_names)]
-pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &InstructionGroup) {
-    let mut expand = TransformGroupBuilder::new(
-        "x86_expand",
-        r#"
-    Legalize instructions by expansion.
-
-    Use x86-specific instructions if needed."#,
-    )
-    .isa("x86")
-    .chain_with(shared.transform_groups.by_name("expand_flags").id);
-
-    let mut narrow = TransformGroupBuilder::new(
-        "x86_narrow",
-        r#"
-    Legalize instructions by narrowing.
-
-    Use x86-specific instructions if needed."#,
-    )
-    .isa("x86")
-    .chain_with(shared.transform_groups.by_name("narrow_flags").id);
-
-    let mut narrow_avx = TransformGroupBuilder::new(
-        "x86_narrow_avx",
-        r#"
-    Legalize instructions by narrowing with CPU feature checks.
-
-    This special case converts using x86 AVX instructions where available."#,
-    )
-    .isa("x86");
-    // We cannot chain with the x86_narrow group until this group is built, see bottom of this
-    // function for where this is chained.
-
-    let mut widen = TransformGroupBuilder::new(
-        "x86_widen",
-        r#"
-    Legalize instructions by widening.
-
-    Use x86-specific instructions if needed."#,
-    )
-    .isa("x86")
-    .chain_with(shared.transform_groups.by_name("widen").id);
-
-    // List of instructions.
-    let insts = &shared.instructions;
-    let band = insts.by_name("band");
-    let bor = insts.by_name("bor");
-    let clz = insts.by_name("clz");
-    let ctz = insts.by_name("ctz");
-    let fcmp = insts.by_name("fcmp");
-    let fcvt_from_uint = insts.by_name("fcvt_from_uint");
-    let fcvt_to_sint = insts.by_name("fcvt_to_sint");
-    let fcvt_to_uint = insts.by_name("fcvt_to_uint");
-    let fcvt_to_sint_sat = insts.by_name("fcvt_to_sint_sat");
-    let fcvt_to_uint_sat = insts.by_name("fcvt_to_uint_sat");
-    let fmax = insts.by_name("fmax");
-    let fmin = insts.by_name("fmin");
-    let iadd = insts.by_name("iadd");
-    let iconst = insts.by_name("iconst");
-    let imul = insts.by_name("imul");
-    let ineg = insts.by_name("ineg");
-    let isub = insts.by_name("isub");
-    let ishl = insts.by_name("ishl");
-    let ireduce = insts.by_name("ireduce");
-    let popcnt = insts.by_name("popcnt");
-    let sdiv = insts.by_name("sdiv");
-    let selectif = insts.by_name("selectif");
-    let smulhi = insts.by_name("smulhi");
-    let srem = insts.by_name("srem");
-    let tls_value = insts.by_name("tls_value");
-    let udiv = insts.by_name("udiv");
-    let umulhi = insts.by_name("umulhi");
-    let ushr = insts.by_name("ushr");
-    let ushr_imm = insts.by_name("ushr_imm");
-    let urem = insts.by_name("urem");
-
-    let x86_bsf = x86_instructions.by_name("x86_bsf");
-    let x86_bsr = x86_instructions.by_name("x86_bsr");
-    let x86_umulx = x86_instructions.by_name("x86_umulx");
-    let x86_smulx = x86_instructions.by_name("x86_smulx");
-
-    let imm = &shared.imm;
-
-    // Shift by a 64-bit amount is equivalent to a shift by that amount mod 32, so we can reduce
-    // the size of the shift amount. This is useful for x86_32, where an I64 shift amount is
-    // not encodable.
-    let a = var("a");
-    let x = var("x");
-    let y = var("y");
-    let z = var("z");
-
-    for &ty in &[I8, I16, I32] {
-        let ishl_by_i64 = ishl.bind(ty).bind(I64);
-        let ireduce = ireduce.bind(I32);
-        expand.legalize(
-            def!(a = ishl_by_i64(x, y)),
-            vec![def!(z = ireduce(y)), def!(a = ishl(x, z))],
-        );
-    }
-
-    for &ty in &[I8, I16, I32] {
-        let ushr_by_i64 = ushr.bind(ty).bind(I64);
-        let ireduce = ireduce.bind(I32);
-        expand.legalize(
-            def!(a = ushr_by_i64(x, y)),
-            vec![def!(z = ireduce(y)), def!(a = ishl(x, z))],
-        );
-    }
-
-    // Division and remainder.
-    //
-    // The srem expansion requires custom code because srem INT_MIN, -1 is not
-    // allowed to trap. The other ops need to check avoid_div_traps.
-    expand.custom_legalize(sdiv, "expand_sdivrem");
-    expand.custom_legalize(srem, "expand_sdivrem");
-    expand.custom_legalize(udiv, "expand_udivrem");
-    expand.custom_legalize(urem, "expand_udivrem");
-
-    // Double length (widening) multiplication.
-    let a = var("a");
-    let x = var("x");
-    let y = var("y");
-    let a1 = var("a1");
-    let a2 = var("a2");
-    let res_lo = var("res_lo");
-    let res_hi = var("res_hi");
-
-    expand.legalize(
-        def!(res_hi = umulhi(x, y)),
-        vec![def!((res_lo, res_hi) = x86_umulx(x, y))],
-    );
-
-    expand.legalize(
-        def!(res_hi = smulhi(x, y)),
-        vec![def!((res_lo, res_hi) = x86_smulx(x, y))],
-    );
-
-    // Floating point condition codes.
-    //
-    // The 8 condition codes in `supported_floatccs` are directly supported by a
-    // `ucomiss` or `ucomisd` instruction. The remaining codes need legalization
-    // patterns.
-
-    let floatcc_eq = Literal::enumerator_for(&imm.floatcc, "eq");
-    let floatcc_ord = Literal::enumerator_for(&imm.floatcc, "ord");
-    let floatcc_ueq = Literal::enumerator_for(&imm.floatcc, "ueq");
-    let floatcc_ne = Literal::enumerator_for(&imm.floatcc, "ne");
-    let floatcc_uno = Literal::enumerator_for(&imm.floatcc, "uno");
-    let floatcc_one = Literal::enumerator_for(&imm.floatcc, "one");
-
-    // Equality needs an explicit `ord` test which checks the parity bit.
-    expand.legalize(
-        def!(a = fcmp(floatcc_eq, x, y)),
-        vec![
-            def!(a1 = fcmp(floatcc_ord, x, y)),
-            def!(a2 = fcmp(floatcc_ueq, x, y)),
-            def!(a = band(a1, a2)),
-        ],
-    );
-    expand.legalize(
-        def!(a = fcmp(floatcc_ne, x, y)),
-        vec![
-            def!(a1 = fcmp(floatcc_uno, x, y)),
-            def!(a2 = fcmp(floatcc_one, x, y)),
-            def!(a = bor(a1, a2)),
-        ],
-    );
-
-    let floatcc_lt = &Literal::enumerator_for(&imm.floatcc, "lt");
-    let floatcc_gt = &Literal::enumerator_for(&imm.floatcc, "gt");
-    let floatcc_le = &Literal::enumerator_for(&imm.floatcc, "le");
-    let floatcc_ge = &Literal::enumerator_for(&imm.floatcc, "ge");
-    let floatcc_ugt = &Literal::enumerator_for(&imm.floatcc, "ugt");
-    let floatcc_ult = &Literal::enumerator_for(&imm.floatcc, "ult");
-    let floatcc_uge = &Literal::enumerator_for(&imm.floatcc, "uge");
-    let floatcc_ule = &Literal::enumerator_for(&imm.floatcc, "ule");
-
-    // Inequalities that need to be reversed.
-    for &(cc, rev_cc) in &[
-        (floatcc_lt, floatcc_gt),
-        (floatcc_le, floatcc_ge),
-        (floatcc_ugt, floatcc_ult),
-        (floatcc_uge, floatcc_ule),
-    ] {
-        expand.legalize(def!(a = fcmp(cc, x, y)), vec![def!(a = fcmp(rev_cc, y, x))]);
-    }
-
-    // We need to modify the CFG for min/max legalization.
-    expand.custom_legalize(fmin, "expand_minmax");
-    expand.custom_legalize(fmax, "expand_minmax");
-
-    // Conversions from unsigned need special handling.
-    expand.custom_legalize(fcvt_from_uint, "expand_fcvt_from_uint");
-    // Conversions from float to int can trap and modify the control flow graph.
-    expand.custom_legalize(fcvt_to_sint, "expand_fcvt_to_sint");
-    expand.custom_legalize(fcvt_to_uint, "expand_fcvt_to_uint");
-    expand.custom_legalize(fcvt_to_sint_sat, "expand_fcvt_to_sint_sat");
-    expand.custom_legalize(fcvt_to_uint_sat, "expand_fcvt_to_uint_sat");
-
-    // Count leading and trailing zeroes, for baseline x86_64
-    let c_minus_one = var("c_minus_one");
-    let c_thirty_one = var("c_thirty_one");
-    let c_thirty_two = var("c_thirty_two");
-    let c_sixty_three = var("c_sixty_three");
-    let c_sixty_four = var("c_sixty_four");
-    let index1 = var("index1");
-    let r2flags = var("r2flags");
-    let index2 = var("index2");
-
-    let intcc_eq = Literal::enumerator_for(&imm.intcc, "eq");
-    let imm64_minus_one = Literal::constant(&imm.imm64, -1);
-    let imm64_63 = Literal::constant(&imm.imm64, 63);
-    expand.legalize(
-        def!(a = clz.I64(x)),
-        vec![
-            def!(c_minus_one = iconst(imm64_minus_one)),
-            def!(c_sixty_three = iconst(imm64_63)),
-            def!((index1, r2flags) = x86_bsr(x)),
-            def!(index2 = selectif(intcc_eq, r2flags, c_minus_one, index1)),
-            def!(a = isub(c_sixty_three, index2)),
-        ],
-    );
-
-    let imm64_31 = Literal::constant(&imm.imm64, 31);
-    expand.legalize(
-        def!(a = clz.I32(x)),
-        vec![
-            def!(c_minus_one = iconst(imm64_minus_one)),
-            def!(c_thirty_one = iconst(imm64_31)),
-            def!((index1, r2flags) = x86_bsr(x)),
-            def!(index2 = selectif(intcc_eq, r2flags, c_minus_one, index1)),
-            def!(a = isub(c_thirty_one, index2)),
-        ],
-    );
-
-    let imm64_64 = Literal::constant(&imm.imm64, 64);
-    expand.legalize(
-        def!(a = ctz.I64(x)),
-        vec![
-            def!(c_sixty_four = iconst(imm64_64)),
-            def!((index1, r2flags) = x86_bsf(x)),
-            def!(a = selectif(intcc_eq, r2flags, c_sixty_four, index1)),
-        ],
-    );
-
-    let imm64_32 = Literal::constant(&imm.imm64, 32);
-    expand.legalize(
-        def!(a = ctz.I32(x)),
-        vec![
-            def!(c_thirty_two = iconst(imm64_32)),
-            def!((index1, r2flags) = x86_bsf(x)),
-            def!(a = selectif(intcc_eq, r2flags, c_thirty_two, index1)),
-        ],
-    );
-
-    // Population count for baseline x86_64
-    let x = var("x");
-    let r = var("r");
-
-    let qv3 = var("qv3");
-    let qv4 = var("qv4");
-    let qv5 = var("qv5");
-    let qv6 = var("qv6");
-    let qv7 = var("qv7");
-    let qv8 = var("qv8");
-    let qv9 = var("qv9");
-    let qv10 = var("qv10");
-    let qv11 = var("qv11");
-    let qv12 = var("qv12");
-    let qv13 = var("qv13");
-    let qv14 = var("qv14");
-    let qv15 = var("qv15");
-    let qc77 = var("qc77");
-    #[allow(non_snake_case)]
-    let qc0F = var("qc0F");
-    let qc01 = var("qc01");
-
-    let imm64_1 = Literal::constant(&imm.imm64, 1);
-    let imm64_4 = Literal::constant(&imm.imm64, 4);
-    expand.legalize(
-        def!(r = popcnt.I64(x)),
-        vec![
-            def!(qv3 = ushr_imm(x, imm64_1)),
-            def!(qc77 = iconst(Literal::constant(&imm.imm64, 0x7777_7777_7777_7777))),
-            def!(qv4 = band(qv3, qc77)),
-            def!(qv5 = isub(x, qv4)),
-            def!(qv6 = ushr_imm(qv4, imm64_1)),
-            def!(qv7 = band(qv6, qc77)),
-            def!(qv8 = isub(qv5, qv7)),
-            def!(qv9 = ushr_imm(qv7, imm64_1)),
-            def!(qv10 = band(qv9, qc77)),
-            def!(qv11 = isub(qv8, qv10)),
-            def!(qv12 = ushr_imm(qv11, imm64_4)),
-            def!(qv13 = iadd(qv11, qv12)),
-            def!(qc0F = iconst(Literal::constant(&imm.imm64, 0x0F0F_0F0F_0F0F_0F0F))),
-            def!(qv14 = band(qv13, qc0F)),
-            def!(qc01 = iconst(Literal::constant(&imm.imm64, 0x0101_0101_0101_0101))),
-            def!(qv15 = imul(qv14, qc01)),
-            def!(r = ushr_imm(qv15, Literal::constant(&imm.imm64, 56))),
-        ],
-    );
-
-    let lv3 = var("lv3");
-    let lv4 = var("lv4");
-    let lv5 = var("lv5");
-    let lv6 = var("lv6");
-    let lv7 = var("lv7");
-    let lv8 = var("lv8");
-    let lv9 = var("lv9");
-    let lv10 = var("lv10");
-    let lv11 = var("lv11");
-    let lv12 = var("lv12");
-    let lv13 = var("lv13");
-    let lv14 = var("lv14");
-    let lv15 = var("lv15");
-    let lc77 = var("lc77");
-    #[allow(non_snake_case)]
-    let lc0F = var("lc0F");
-    let lc01 = var("lc01");
-
-    expand.legalize(
-        def!(r = popcnt.I32(x)),
-        vec![
-            def!(lv3 = ushr_imm(x, imm64_1)),
-            def!(lc77 = iconst(Literal::constant(&imm.imm64, 0x7777_7777))),
-            def!(lv4 = band(lv3, lc77)),
-            def!(lv5 = isub(x, lv4)),
-            def!(lv6 = ushr_imm(lv4, imm64_1)),
-            def!(lv7 = band(lv6, lc77)),
-            def!(lv8 = isub(lv5, lv7)),
-            def!(lv9 = ushr_imm(lv7, imm64_1)),
-            def!(lv10 = band(lv9, lc77)),
-            def!(lv11 = isub(lv8, lv10)),
-            def!(lv12 = ushr_imm(lv11, imm64_4)),
-            def!(lv13 = iadd(lv11, lv12)),
-            def!(lc0F = iconst(Literal::constant(&imm.imm64, 0x0F0F_0F0F))),
-            def!(lv14 = band(lv13, lc0F)),
-            def!(lc01 = iconst(Literal::constant(&imm.imm64, 0x0101_0101))),
-            def!(lv15 = imul(lv14, lc01)),
-            def!(r = ushr_imm(lv15, Literal::constant(&imm.imm64, 24))),
-        ],
-    );
-
-    expand.custom_legalize(ineg, "convert_ineg");
-    expand.custom_legalize(tls_value, "expand_tls_value");
-    widen.custom_legalize(ineg, "convert_ineg");
-
-    // To reduce compilation times, separate out large blocks of legalizations by theme.
-    define_simd(shared, x86_instructions, &mut narrow, &mut narrow_avx);
-
-    expand.build_and_add_to(&mut shared.transform_groups);
-    let narrow_id = narrow.build_and_add_to(&mut shared.transform_groups);
-    narrow_avx
-        .chain_with(narrow_id)
-        .build_and_add_to(&mut shared.transform_groups);
-    widen.build_and_add_to(&mut shared.transform_groups);
-}
-
-fn define_simd(
-    shared: &mut SharedDefinitions,
-    x86_instructions: &InstructionGroup,
-    narrow: &mut TransformGroupBuilder,
-    narrow_avx: &mut TransformGroupBuilder,
-) {
-    let insts = &shared.instructions;
-    let band = insts.by_name("band");
-    let band_not = insts.by_name("band_not");
-    let bitcast = insts.by_name("bitcast");
-    let bitselect = insts.by_name("bitselect");
-    let bor = insts.by_name("bor");
-    let bnot = insts.by_name("bnot");
-    let bxor = insts.by_name("bxor");
-    let extractlane = insts.by_name("extractlane");
-    let fabs = insts.by_name("fabs");
-    let fcmp = insts.by_name("fcmp");
-    let fcvt_from_uint = insts.by_name("fcvt_from_uint");
-    let fcvt_to_sint_sat = insts.by_name("fcvt_to_sint_sat");
-    let fcvt_to_uint_sat = insts.by_name("fcvt_to_uint_sat");
-    let fmax = insts.by_name("fmax");
-    let fmin = insts.by_name("fmin");
-    let fneg = insts.by_name("fneg");
-    let iadd_imm = insts.by_name("iadd_imm");
-    let icmp = insts.by_name("icmp");
-    let imax = insts.by_name("imax");
-    let imin = insts.by_name("imin");
-    let imul = insts.by_name("imul");
-    let ineg = insts.by_name("ineg");
-    let insertlane = insts.by_name("insertlane");
-    let ishl = insts.by_name("ishl");
-    let ishl_imm = insts.by_name("ishl_imm");
-    let raw_bitcast = insts.by_name("raw_bitcast");
-    let scalar_to_vector = insts.by_name("scalar_to_vector");
-    let splat = insts.by_name("splat");
-    let shuffle = insts.by_name("shuffle");
-    let sshr = insts.by_name("sshr");
-    let swizzle = insts.by_name("swizzle");
-    let trueif = insts.by_name("trueif");
-    let uadd_sat = insts.by_name("uadd_sat");
-    let umax = insts.by_name("umax");
-    let umin = insts.by_name("umin");
-    let snarrow = insts.by_name("snarrow");
-    let swiden_high = insts.by_name("swiden_high");
-    let swiden_low = insts.by_name("swiden_low");
-    let ushr_imm = insts.by_name("ushr_imm");
-    let ushr = insts.by_name("ushr");
-    let uwiden_high = insts.by_name("uwiden_high");
-    let uwiden_low = insts.by_name("uwiden_low");
-    let vconst = insts.by_name("vconst");
-    let vall_true = insts.by_name("vall_true");
-    let vany_true = insts.by_name("vany_true");
-    let vselect = insts.by_name("vselect");
-
-    let x86_palignr = x86_instructions.by_name("x86_palignr");
-    let x86_pmaxs = x86_instructions.by_name("x86_pmaxs");
-    let x86_pmaxu = x86_instructions.by_name("x86_pmaxu");
-    let x86_pmins = x86_instructions.by_name("x86_pmins");
-    let x86_pminu = x86_instructions.by_name("x86_pminu");
-    let x86_pshufb = x86_instructions.by_name("x86_pshufb");
-    let x86_pshufd = x86_instructions.by_name("x86_pshufd");
-    let x86_psra = x86_instructions.by_name("x86_psra");
-    let x86_ptest = x86_instructions.by_name("x86_ptest");
-    let x86_punpckh = x86_instructions.by_name("x86_punpckh");
-    let x86_punpckl = x86_instructions.by_name("x86_punpckl");
-
-    let imm = &shared.imm;
-
-    // Set up variables and immediates.
-    let uimm8_zero = Literal::constant(&imm.uimm8, 0x00);
-    let uimm8_one = Literal::constant(&imm.uimm8, 0x01);
-    let uimm8_eight = Literal::constant(&imm.uimm8, 8);
-    let u128_zeroes = constant(vec![0x00; 16]);
-    let u128_ones = constant(vec![0xff; 16]);
-    let u128_seventies = constant(vec![0x70; 16]);
-    let a = var("a");
-    let b = var("b");
-    let c = var("c");
-    let d = var("d");
-    let e = var("e");
-    let f = var("f");
-    let g = var("g");
-    let h = var("h");
-    let x = var("x");
-    let y = var("y");
-    let z = var("z");
-
-    // Limit the SIMD vector size: eventually multiple vector sizes may be supported
-    // but for now only SSE-sized vectors are available.
-    let sse_vector_size: u64 = 128;
-    let allowed_simd_type = |t: &LaneType| t.lane_bits() >= 8 && t.lane_bits() < 128;
-
-    // SIMD splat: 8-bits
-    for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 8) {
-        let splat_any8x16 = splat.bind(vector(ty, sse_vector_size));
-        narrow.legalize(
-            def!(y = splat_any8x16(x)),
-            vec![
-                // Move into the lowest 8 bits of an XMM register.
-                def!(a = scalar_to_vector(x)),
-                // Zero out a different XMM register; the shuffle mask for moving the lowest byte
-                // to all other byte lanes is 0x0.
-                def!(b = vconst(u128_zeroes)),
-                // PSHUFB takes two XMM operands, one of which is a shuffle mask (i.e. b).
-                def!(y = x86_pshufb(a, b)),
-            ],
-        );
-    }
-
-    // SIMD splat: 16-bits
-    for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 16) {
-        let splat_x16x8 = splat.bind(vector(ty, sse_vector_size));
-        let raw_bitcast_any16x8_to_i32x4 = raw_bitcast
-            .bind(vector(I32, sse_vector_size))
-            .bind(vector(ty, sse_vector_size));
-        let raw_bitcast_i32x4_to_any16x8 = raw_bitcast
-            .bind(vector(ty, sse_vector_size))
-            .bind(vector(I32, sse_vector_size));
-        narrow.legalize(
-            def!(y = splat_x16x8(x)),
-            vec![
-                // Move into the lowest 16 bits of an XMM register.
-                def!(a = scalar_to_vector(x)),
-                // Insert the value again but in the next lowest 16 bits.
-                def!(b = insertlane(a, x, uimm8_one)),
-                // No instruction emitted; pretend this is an I32x4 so we can use PSHUFD.
-                def!(c = raw_bitcast_any16x8_to_i32x4(b)),
-                // Broadcast the bytes in the XMM register with PSHUFD.
-                def!(d = x86_pshufd(c, uimm8_zero)),
-                // No instruction emitted; pretend this is an X16x8 again.
-                def!(y = raw_bitcast_i32x4_to_any16x8(d)),
-            ],
-        );
-    }
-
-    // SIMD splat: 32-bits
-    for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 32) {
-        let splat_any32x4 = splat.bind(vector(ty, sse_vector_size));
-        narrow.legalize(
-            def!(y = splat_any32x4(x)),
-            vec![
-                // Translate to an x86 MOV to get the value in an XMM register.
-                def!(a = scalar_to_vector(x)),
-                // Broadcast the bytes in the XMM register with PSHUFD.
-                def!(y = x86_pshufd(a, uimm8_zero)),
-            ],
-        );
-    }
-
-    // SIMD splat: 64-bits
-    for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 64) {
-        let splat_any64x2 = splat.bind(vector(ty, sse_vector_size));
-        narrow.legalize(
-            def!(y = splat_any64x2(x)),
-            vec![
-                // Move into the lowest 64 bits of an XMM register.
-                def!(a = scalar_to_vector(x)),
-                // Move into the highest 64 bits of the same XMM register.
-                def!(y = insertlane(a, x, uimm8_one)),
-            ],
-        );
-    }
-
-    // SIMD swizzle; the following inefficient implementation is due to the Wasm SIMD spec requiring
-    // mask indexes greater than 15 to have the same semantics as a 0 index. For the spec discussion,
-    // see https://github.com/WebAssembly/simd/issues/93.
-    {
-        let swizzle = swizzle.bind(vector(I8, sse_vector_size));
-        narrow.legalize(
-            def!(a = swizzle(x, y)),
-            vec![
-                def!(b = vconst(u128_seventies)),
-                def!(c = uadd_sat(y, b)),
-                def!(a = x86_pshufb(x, c)),
-            ],
-        );
-    }
-
-    // SIMD bnot
-    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
-        let bnot = bnot.bind(vector(ty, sse_vector_size));
-        narrow.legalize(
-            def!(y = bnot(x)),
-            vec![def!(a = vconst(u128_ones)), def!(y = bxor(a, x))],
-        );
-    }
-
-    // SIMD shift right (arithmetic, i16x8 and i32x4)
-    for ty in &[I16, I32] {
-        let sshr = sshr.bind(vector(*ty, sse_vector_size));
-        let bitcast_i64x2 = bitcast.bind(vector(I64, sse_vector_size));
-        narrow.legalize(
-            def!(a = sshr(x, y)),
-            vec![def!(b = bitcast_i64x2(y)), def!(a = x86_psra(x, b))],
-        );
-    }
-    // SIMD shift right (arithmetic, i8x16)
-    {
-        let sshr = sshr.bind(vector(I8, sse_vector_size));
-        let bitcast_i64x2 = bitcast.bind(vector(I64, sse_vector_size));
-        let raw_bitcast_i16x8 = raw_bitcast.bind(vector(I16, sse_vector_size));
-        let raw_bitcast_i16x8_again = raw_bitcast.bind(vector(I16, sse_vector_size));
-        narrow.legalize(
-            def!(z = sshr(x, y)),
-            vec![
-                // Since we will use the high byte of each 16x8 lane, shift an extra 8 bits.
-                def!(a = iadd_imm(y, uimm8_eight)),
-                def!(b = bitcast_i64x2(a)),
-                // Take the low 8 bytes of x, duplicate them in 16x8 lanes, then shift right.
-                def!(c = x86_punpckl(x, x)),
-                def!(d = raw_bitcast_i16x8(c)),
-                def!(e = x86_psra(d, b)),
-                // Take the high 8 bytes of x, duplicate them in 16x8 lanes, then shift right.
-                def!(f = x86_punpckh(x, x)),
-                def!(g = raw_bitcast_i16x8_again(f)),
-                def!(h = x86_psra(g, b)),
-                // Re-pack the vector.
-                def!(z = snarrow(e, h)),
-            ],
-        );
-    }
-    // SIMD shift right (arithmetic, i64x2)
-    {
-        let sshr_vector = sshr.bind(vector(I64, sse_vector_size));
-        let sshr_scalar_lane0 = sshr.bind(I64);
-        let sshr_scalar_lane1 = sshr.bind(I64);
-        narrow.legalize(
-            def!(z = sshr_vector(x, y)),
-            vec![
-                // Use scalar operations to shift the first lane.
-                def!(a = extractlane(x, uimm8_zero)),
-                def!(b = sshr_scalar_lane0(a, y)),
-                def!(c = insertlane(x, b, uimm8_zero)),
-                // Do the same for the second lane.
-                def!(d = extractlane(x, uimm8_one)),
-                def!(e = sshr_scalar_lane1(d, y)),
-                def!(z = insertlane(c, e, uimm8_one)),
-            ],
-        );
-    }
-
-    // SIMD select
-    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
-        let bitselect = bitselect.bind(vector(ty, sse_vector_size)); // must bind both x/y and c
-        narrow.legalize(
-            def!(d = bitselect(c, x, y)),
-            vec![
-                def!(a = band(x, c)),
-                def!(b = band_not(y, c)),
-                def!(d = bor(a, b)),
-            ],
-        );
-    }
-
-    // SIMD vselect; replace with bitselect if BLEND* instructions are not available.
-    // This works, because each lane of boolean vector is filled with zeroes or ones.
-    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
-        let vselect = vselect.bind(vector(ty, sse_vector_size));
-        let raw_bitcast = raw_bitcast.bind(vector(ty, sse_vector_size));
-        narrow.legalize(
-            def!(d = vselect(c, x, y)),
-            vec![def!(a = raw_bitcast(c)), def!(d = bitselect(a, x, y))],
-        );
-    }
-
-    // SIMD vany_true
-    let ne = Literal::enumerator_for(&imm.intcc, "ne");
-    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
-        let vany_true = vany_true.bind(vector(ty, sse_vector_size));
-        narrow.legalize(
-            def!(y = vany_true(x)),
-            vec![def!(a = x86_ptest(x, x)), def!(y = trueif(ne, a))],
-        );
-    }
-
-    // SIMD vall_true
-    let eq = Literal::enumerator_for(&imm.intcc, "eq");
-    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
-        let vall_true = vall_true.bind(vector(ty, sse_vector_size));
-        if ty.is_int() {
-            // In the common case (Wasm's integer-only all_true), we do not require a
-            // bitcast.
-            narrow.legalize(
-                def!(y = vall_true(x)),
-                vec![
-                    def!(a = vconst(u128_zeroes)),
-                    def!(c = icmp(eq, x, a)),
-                    def!(d = x86_ptest(c, c)),
-                    def!(y = trueif(eq, d)),
-                ],
-            );
-        } else {
-            // However, to support other types we must bitcast them to an integer vector to
-            // use icmp.
-            let lane_type_as_int = LaneType::int_from_bits(ty.lane_bits() as u16);
-            let raw_bitcast_to_int = raw_bitcast.bind(vector(lane_type_as_int, sse_vector_size));
-            narrow.legalize(
-                def!(y = vall_true(x)),
-                vec![
-                    def!(a = vconst(u128_zeroes)),
-                    def!(b = raw_bitcast_to_int(x)),
-                    def!(c = icmp(eq, b, a)),
-                    def!(d = x86_ptest(c, c)),
-                    def!(y = trueif(eq, d)),
-                ],
-            );
-        }
-    }
-
-    // SIMD icmp ne
-    let ne = Literal::enumerator_for(&imm.intcc, "ne");
-    for ty in ValueType::all_lane_types().filter(|ty| allowed_simd_type(ty) && ty.is_int()) {
-        let icmp_ = icmp.bind(vector(ty, sse_vector_size));
-        narrow.legalize(
-            def!(c = icmp_(ne, a, b)),
-            vec![def!(x = icmp(eq, a, b)), def!(c = bnot(x))],
-        );
-    }
-
-    // SIMD icmp greater-/less-than
-    let sgt = Literal::enumerator_for(&imm.intcc, "sgt");
-    let ugt = Literal::enumerator_for(&imm.intcc, "ugt");
-    let sge = Literal::enumerator_for(&imm.intcc, "sge");
-    let uge = Literal::enumerator_for(&imm.intcc, "uge");
-    let slt = Literal::enumerator_for(&imm.intcc, "slt");
-    let ult = Literal::enumerator_for(&imm.intcc, "ult");
-    let sle = Literal::enumerator_for(&imm.intcc, "sle");
-    let ule = Literal::enumerator_for(&imm.intcc, "ule");
-    for ty in &[I8, I16, I32] {
-        // greater-than
-        let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
-        narrow.legalize(
-            def!(c = icmp_(ugt, a, b)),
-            vec![
-                def!(x = x86_pmaxu(a, b)),
-                def!(y = icmp(eq, x, b)),
-                def!(c = bnot(y)),
-            ],
-        );
-        let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
-        narrow.legalize(
-            def!(c = icmp_(sge, a, b)),
-            vec![def!(x = x86_pmins(a, b)), def!(c = icmp(eq, x, b))],
-        );
-        let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
-        narrow.legalize(
-            def!(c = icmp_(uge, a, b)),
-            vec![def!(x = x86_pminu(a, b)), def!(c = icmp(eq, x, b))],
-        );
-
-        // less-than
-        let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
-        narrow.legalize(def!(c = icmp_(slt, a, b)), vec![def!(c = icmp(sgt, b, a))]);
-        let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
-        narrow.legalize(def!(c = icmp_(ult, a, b)), vec![def!(c = icmp(ugt, b, a))]);
-        let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
-        narrow.legalize(def!(c = icmp_(sle, a, b)), vec![def!(c = icmp(sge, b, a))]);
-        let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
-        narrow.legalize(def!(c = icmp_(ule, a, b)), vec![def!(c = icmp(uge, b, a))]);
-    }
-
-    // SIMD integer min/max
-    for ty in &[I8, I16, I32] {
-        let imin = imin.bind(vector(*ty, sse_vector_size));
-        narrow.legalize(def!(c = imin(a, b)), vec![def!(c = x86_pmins(a, b))]);
-        let umin = umin.bind(vector(*ty, sse_vector_size));
-        narrow.legalize(def!(c = umin(a, b)), vec![def!(c = x86_pminu(a, b))]);
-        let imax = imax.bind(vector(*ty, sse_vector_size));
-        narrow.legalize(def!(c = imax(a, b)), vec![def!(c = x86_pmaxs(a, b))]);
-        let umax = umax.bind(vector(*ty, sse_vector_size));
-        narrow.legalize(def!(c = umax(a, b)), vec![def!(c = x86_pmaxu(a, b))]);
-    }
-
-    // SIMD fcmp greater-/less-than
-    let gt = Literal::enumerator_for(&imm.floatcc, "gt");
-    let lt = Literal::enumerator_for(&imm.floatcc, "lt");
-    let ge = Literal::enumerator_for(&imm.floatcc, "ge");
-    let le = Literal::enumerator_for(&imm.floatcc, "le");
-    let ugt = Literal::enumerator_for(&imm.floatcc, "ugt");
-    let ult = Literal::enumerator_for(&imm.floatcc, "ult");
-    let uge = Literal::enumerator_for(&imm.floatcc, "uge");
-    let ule = Literal::enumerator_for(&imm.floatcc, "ule");
-    for ty in &[F32, F64] {
-        let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size));
-        narrow.legalize(def!(c = fcmp_(gt, a, b)), vec![def!(c = fcmp(lt, b, a))]);
-        let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size));
-        narrow.legalize(def!(c = fcmp_(ge, a, b)), vec![def!(c = fcmp(le, b, a))]);
-        let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size));
-        narrow.legalize(def!(c = fcmp_(ult, a, b)), vec![def!(c = fcmp(ugt, b, a))]);
-        let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size));
-        narrow.legalize(def!(c = fcmp_(ule, a, b)), vec![def!(c = fcmp(uge, b, a))]);
-    }
-
-    for ty in &[F32, F64] {
-        let fneg = fneg.bind(vector(*ty, sse_vector_size));
-        let lane_type_as_int = LaneType::int_from_bits(LaneType::from(*ty).lane_bits() as u16);
-        let uimm8_shift = Literal::constant(&imm.uimm8, lane_type_as_int.lane_bits() as i64 - 1);
-        let vconst = vconst.bind(vector(lane_type_as_int, sse_vector_size));
-        let bitcast_to_float = raw_bitcast.bind(vector(*ty, sse_vector_size));
-        narrow.legalize(
-            def!(b = fneg(a)),
-            vec![
-                def!(c = vconst(u128_ones)),
-                def!(d = ishl_imm(c, uimm8_shift)), // Create a mask of all 0s except the MSB.
-                def!(e = bitcast_to_float(d)),      // Cast mask to the floating-point type.
-                def!(b = bxor(a, e)),               // Flip the MSB.
-            ],
-        );
-    }
-
-    // SIMD fabs
-    for ty in &[F32, F64] {
-        let fabs = fabs.bind(vector(*ty, sse_vector_size));
-        let lane_type_as_int = LaneType::int_from_bits(LaneType::from(*ty).lane_bits() as u16);
-        let vconst = vconst.bind(vector(lane_type_as_int, sse_vector_size));
-        let bitcast_to_float = raw_bitcast.bind(vector(*ty, sse_vector_size));
-        narrow.legalize(
-            def!(b = fabs(a)),
-            vec![
-                def!(c = vconst(u128_ones)),
-                def!(d = ushr_imm(c, uimm8_one)), // Create a mask of all 1s except the MSB.
-                def!(e = bitcast_to_float(d)),    // Cast mask to the floating-point type.
-                def!(b = band(a, e)),             // Unset the MSB.
-            ],
-        );
-    }
-
-    // SIMD widen
-    for ty in &[I8, I16] {
-        let swiden_high = swiden_high.bind(vector(*ty, sse_vector_size));
-        narrow.legalize(
-            def!(b = swiden_high(a)),
-            vec![
-                def!(c = x86_palignr(a, a, uimm8_eight)),
-                def!(b = swiden_low(c)),
-            ],
-        );
-        let uwiden_high = uwiden_high.bind(vector(*ty, sse_vector_size));
-        narrow.legalize(
-            def!(b = uwiden_high(a)),
-            vec![
-                def!(c = x86_palignr(a, a, uimm8_eight)),
-                def!(b = uwiden_low(c)),
-            ],
-        );
-    }
-
-    narrow.custom_legalize(shuffle, "convert_shuffle");
-    narrow.custom_legalize(extractlane, "convert_extractlane");
-    narrow.custom_legalize(insertlane, "convert_insertlane");
-    narrow.custom_legalize(ineg, "convert_ineg");
-    narrow.custom_legalize(ushr, "convert_ushr");
-    narrow.custom_legalize(ishl, "convert_ishl");
-    narrow.custom_legalize(fcvt_to_sint_sat, "expand_fcvt_to_sint_sat_vector");
-    narrow.custom_legalize(fmin, "expand_minmax_vector");
-    narrow.custom_legalize(fmax, "expand_minmax_vector");
-
-    narrow_avx.custom_legalize(imul, "convert_i64x2_imul");
-    narrow_avx.custom_legalize(fcvt_from_uint, "expand_fcvt_from_uint_vector");
-    narrow_avx.custom_legalize(fcvt_to_uint_sat, "expand_fcvt_to_uint_sat_vector");
-}
diff --git a/cranelift/codegen/meta/src/isa/x86/mod.rs b/cranelift/codegen/meta/src/isa/x86/mod.rs
index 26c833a77f..7c3e4c6877 100644
--- a/cranelift/codegen/meta/src/isa/x86/mod.rs
+++ b/cranelift/codegen/meta/src/isa/x86/mod.rs
@@ -1,87 +1,25 @@
-use crate::cdsl::cpu_modes::CpuMode;
+use crate::cdsl::instructions::{InstructionGroupBuilder, InstructionPredicateMap};
 use crate::cdsl::isa::TargetIsa;
-use crate::cdsl::types::{ReferenceType, VectorType};
+use crate::cdsl::recipes::Recipes;
+use crate::cdsl::regs::IsaRegsBuilder;
 
-use crate::shared::types::Bool::B1;
-use crate::shared::types::Float::{F32, F64};
-use crate::shared::types::Int::{I16, I32, I64, I8};
-use crate::shared::types::Reference::{R32, R64};
 use crate::shared::Definitions as SharedDefinitions;
 
-mod encodings;
-mod instructions;
-mod legalize;
-mod opcodes;
-mod recipes;
-mod registers;
 pub(crate) mod settings;
 
 pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
     let settings = settings::define(&shared_defs.settings);
-    let regs = registers::define();
 
-    let inst_group = instructions::define(
-        &mut shared_defs.all_instructions,
-        &shared_defs.formats,
-        &shared_defs.imm,
-        &shared_defs.entities,
-    );
-    legalize::define(shared_defs, &inst_group);
+    let inst_group = InstructionGroupBuilder::new(&mut shared_defs.all_instructions).build();
 
-    // CPU modes for 32-bit and 64-bit operations.
-    let mut x86_64 = CpuMode::new("I64");
-    let mut x86_32 = CpuMode::new("I32");
-
-    let expand_flags = shared_defs.transform_groups.by_name("expand_flags");
-    let x86_widen = shared_defs.transform_groups.by_name("x86_widen");
-    let x86_narrow = shared_defs.transform_groups.by_name("x86_narrow");
-    let x86_narrow_avx = shared_defs.transform_groups.by_name("x86_narrow_avx");
-    let x86_expand = shared_defs.transform_groups.by_name("x86_expand");
-
-    x86_32.legalize_monomorphic(expand_flags);
-    x86_32.legalize_default(x86_narrow);
-    x86_32.legalize_type(B1, expand_flags);
-    x86_32.legalize_type(I8, x86_widen);
-    x86_32.legalize_type(I16, x86_widen);
-    x86_32.legalize_type(I32, x86_expand);
-    x86_32.legalize_value_type(ReferenceType(R32), x86_expand);
-    x86_32.legalize_type(F32, x86_expand);
-    x86_32.legalize_type(F64, x86_expand);
-    x86_32.legalize_value_type(VectorType::new(I32.into(), 4), x86_narrow_avx);
-    x86_32.legalize_value_type(VectorType::new(I64.into(), 2), x86_narrow_avx);
-    x86_32.legalize_value_type(VectorType::new(F32.into(), 4), x86_narrow_avx);
-
-    x86_64.legalize_monomorphic(expand_flags);
-    x86_64.legalize_default(x86_narrow);
-    x86_64.legalize_type(B1, expand_flags);
-    x86_64.legalize_type(I8, x86_widen);
-    x86_64.legalize_type(I16, x86_widen);
-    x86_64.legalize_type(I32, x86_expand);
-    x86_64.legalize_type(I64, x86_expand);
-    x86_64.legalize_value_type(ReferenceType(R64), x86_expand);
-    x86_64.legalize_type(F32, x86_expand);
-    x86_64.legalize_type(F64, x86_expand);
-    x86_64.legalize_value_type(VectorType::new(I32.into(), 4), x86_narrow_avx);
-    x86_64.legalize_value_type(VectorType::new(I64.into(), 2), x86_narrow_avx);
-    x86_64.legalize_value_type(VectorType::new(F32.into(), 4), x86_narrow_avx);
-
-    let recipes = recipes::define(shared_defs, &settings, &regs);
-
-    let encodings = encodings::define(shared_defs, &settings, &inst_group, &recipes);
-    x86_32.set_encodings(encodings.enc32);
-    x86_64.set_encodings(encodings.enc64);
-    let encodings_predicates = encodings.inst_pred_reg.extract();
-
-    let recipes = encodings.recipes;
-
-    let cpu_modes = vec![x86_64, x86_32];
+    let cpu_modes = vec![];
 
     TargetIsa::new(
         "x86",
         settings,
-        regs,
-        recipes,
+        IsaRegsBuilder::new().build(),
+        Recipes::new(),
         cpu_modes,
-        encodings_predicates,
+        InstructionPredicateMap::new(),
     )
 }
diff --git a/cranelift/codegen/meta/src/isa/x86/opcodes.rs b/cranelift/codegen/meta/src/isa/x86/opcodes.rs
deleted file mode 100644
index 2e72a1744d..0000000000
--- a/cranelift/codegen/meta/src/isa/x86/opcodes.rs
+++ /dev/null
@@ -1,721 +0,0 @@
-//! Static, named definitions of instruction opcodes.
-
-/// Empty opcode for use as a default.
-pub static EMPTY: [u8; 0] = [];
-
-/// Add with carry flag r{16,32,64} to r/m of the same size.
-pub static ADC: [u8; 1] = [0x11];
-
-/// Add r{16,32,64} to r/m of the same size.
-pub static ADD: [u8; 1] = [0x01];
-
-/// Add imm{16,32} to r/m{16,32,64}, possibly sign-extended.
-pub static ADD_IMM: [u8; 1] = [0x81];
-
-/// Add sign-extended imm8 to r/m{16,32,64}.
-pub static ADD_IMM8_SIGN_EXTEND: [u8; 1] = [0x83];
-
-/// Add packed double-precision floating-point values from xmm2/mem to xmm1 and store result in  
-/// xmm1 (SSE2).
-pub static ADDPD: [u8; 3] = [0x66, 0x0f, 0x58];
-
-/// Add packed single-precision floating-point values from xmm2/mem to xmm1 and store result in  
-/// xmm1 (SSE).
-pub static ADDPS: [u8; 2] = [0x0f, 0x58];
-
-/// Add the low double-precision floating-point value from xmm2/mem to xmm1
-/// and store the result in xmm1.
-pub static ADDSD: [u8; 3] = [0xf2, 0x0f, 0x58];
-
-/// Add the low single-precision floating-point value from xmm2/mem to xmm1
-/// and store the result in xmm1.
-pub static ADDSS: [u8; 3] = [0xf3, 0x0f, 0x58];
-
-/// r/m{16,32,64} AND register of the same size (Intel docs have a typo).
-pub static AND: [u8; 1] = [0x21];
-
-/// imm{16,32} AND r/m{16,32,64}, possibly sign-extended.
-pub static AND_IMM: [u8; 1] = [0x81];
-
-/// r/m{16,32,64} AND sign-extended imm8.
-pub static AND_IMM8_SIGN_EXTEND: [u8; 1] = [0x83];
-
-/// Return the bitwise logical AND NOT of packed single-precision floating-point
-/// values in xmm1 and xmm2/mem.
-pub static ANDNPS: [u8; 2] = [0x0f, 0x55];
-
-/// Return the bitwise logical AND of packed single-precision floating-point values
-/// in xmm1 and xmm2/mem.
-pub static ANDPS: [u8; 2] = [0x0f, 0x54];
-
-/// Bit scan forward (stores index of first encountered 1 from the front).
-pub static BIT_SCAN_FORWARD: [u8; 2] = [0x0f, 0xbc];
-
-/// Bit scan reverse (stores index of first encountered 1 from the back).
-pub static BIT_SCAN_REVERSE: [u8; 2] = [0x0f, 0xbd];
-
-/// Select packed single-precision floating-point values from xmm1 and xmm2/m128
-/// from mask specified in XMM0 and store the values into xmm1 (SSE4.1).
-pub static BLENDVPS: [u8; 4] = [0x66, 0x0f, 0x38, 0x14];
-
-/// Select packed double-precision floating-point values from xmm1 and xmm2/m128
-/// from mask specified in XMM0 and store the values into xmm1 (SSE4.1).
-pub static BLENDVPD: [u8; 4] = [0x66, 0x0f, 0x38, 0x15];
-
-/// Call near, relative, displacement relative to next instruction (sign-extended).
-pub static CALL_RELATIVE: [u8; 1] = [0xe8];
-
-/// Move r/m{16,32,64} if overflow (OF=1).
-pub static CMOV_OVERFLOW: [u8; 2] = [0x0f, 0x40];
-
-/// Compare imm{16,32} with r/m{16,32,64} (sign-extended if 64).
-pub static CMP_IMM: [u8; 1] = [0x81];
-
-/// Compare imm8 with r/m{16,32,64}.
-pub static CMP_IMM8: [u8; 1] = [0x83];
-
-/// Compare r{16,32,64} with r/m of the same size.
-pub static CMP_REG: [u8; 1] = [0x39];
-
-/// Compare packed double-precision floating-point value in xmm2/m32 and xmm1 using bits 2:0 of
-/// imm8 as comparison predicate (SSE2).
-pub static CMPPD: [u8; 3] = [0x66, 0x0f, 0xc2];
-
-/// Compare packed single-precision floating-point value in xmm2/m32 and xmm1 using bits 2:0 of
-/// imm8 as comparison predicate (SSE).
-pub static CMPPS: [u8; 2] = [0x0f, 0xc2];
-
-/// Convert four packed signed doubleword integers from xmm2/mem to four packed single-precision
-/// floating-point values in xmm1 (SSE2).
-pub static CVTDQ2PS: [u8; 2] = [0x0f, 0x5b];
-
-/// Convert scalar double-precision floating-point value to scalar single-precision
-/// floating-point value.
-pub static CVTSD2SS: [u8; 3] = [0xf2, 0x0f, 0x5a];
-
-/// Convert doubleword integer to scalar double-precision floating-point value.
-pub static CVTSI2SD: [u8; 3] = [0xf2, 0x0f, 0x2a];
-
-/// Convert doubleword integer to scalar single-precision floating-point value.
-pub static CVTSI2SS: [u8; 3] = [0xf3, 0x0f, 0x2a];
-
-/// Convert scalar single-precision floating-point value to scalar double-precision
-/// float-point value.
-pub static CVTSS2SD: [u8; 3] = [0xf3, 0x0f, 0x5a];
-
-/// Convert four packed single-precision floating-point values from xmm2/mem to four packed signed
-/// doubleword values in xmm1 using truncation (SSE2).
-pub static CVTTPS2DQ: [u8; 3] = [0xf3, 0x0f, 0x5b];
-
-/// Convert with truncation scalar double-precision floating-point value to signed
-/// integer.
-pub static CVTTSD2SI: [u8; 3] = [0xf2, 0x0f, 0x2c];
-
-/// Convert with truncation scalar single-precision floating-point value to integer.
-pub static CVTTSS2SI: [u8; 3] = [0xf3, 0x0f, 0x2c];
-
-/// Unsigned divide for {16,32,64}-bit.
-pub static DIV: [u8; 1] = [0xf7];
-
-/// Divide packed double-precision floating-point values in xmm1 by packed double-precision
-/// floating-point values in xmm2/mem (SSE2).
-pub static DIVPD: [u8; 3] = [0x66, 0x0f, 0x5e];
-
-/// Divide packed single-precision floating-point values in xmm1 by packed single-precision
-/// floating-point values in xmm2/mem (SSE).
-pub static DIVPS: [u8; 2] = [0x0f, 0x5e];
-
-/// Divide low double-precision floating-point value in xmm1 by low double-precision
-/// floating-point value in xmm2/m64.
-pub static DIVSD: [u8; 3] = [0xf2, 0x0f, 0x5e];
-
-/// Divide low single-precision floating-point value in xmm1 by low single-precision
-/// floating-point value in xmm2/m32.
-pub static DIVSS: [u8; 3] = [0xf3, 0x0f, 0x5e];
-
-/// Signed divide for {16,32,64}-bit.
-pub static IDIV: [u8; 1] = [0xf7];
-
-/// Signed multiply for {16,32,64}-bit, generic registers.
-pub static IMUL: [u8; 2] = [0x0f, 0xaf];
-
-/// Signed multiply for {16,32,64}-bit, storing into RDX:RAX.
-pub static IMUL_RDX_RAX: [u8; 1] = [0xf7];
-
-/// Insert scalar single-precision floating-point value.
-pub static INSERTPS: [u8; 4] = [0x66, 0x0f, 0x3a, 0x21];
-
-/// Either:
-///  1. Jump near, absolute indirect, RIP = 64-bit offset from register or memory.
-///  2. Jump far, absolute indirect, address given in m16:64.
-pub static JUMP_ABSOLUTE: [u8; 1] = [0xff];
-
-/// Jump near, relative, RIP = RIP + 32-bit displacement sign extended to 64 bits.
-pub static JUMP_NEAR_RELATIVE: [u8; 1] = [0xe9];
-
-/// Jump near (rel32) if overflow (OF=1).
-pub static JUMP_NEAR_IF_OVERFLOW: [u8; 2] = [0x0f, 0x80];
-
-/// Jump short, relative, RIP = RIP + 8-bit displacement sign extended to 64 bits.
-pub static JUMP_SHORT: [u8; 1] = [0xeb];
-
-/// Jump short (rel8) if equal (ZF=1).
-pub static JUMP_SHORT_IF_EQUAL: [u8; 1] = [0x74];
-
-/// Jump short (rel8) if not equal (ZF=0).
-pub static JUMP_SHORT_IF_NOT_EQUAL: [u8; 1] = [0x75];
-
-/// Jump short (rel8) if overflow (OF=1).
-pub static JUMP_SHORT_IF_OVERFLOW: [u8; 1] = [0x70];
-
-/// Store effective address for m in register r{16,32,64}.
-pub static LEA: [u8; 1] = [0x8d];
-
-/// Count the number of leading zero bits.
-pub static LZCNT: [u8; 3] = [0xf3, 0x0f, 0xbd];
-
-/// Return the maximum packed double-precision floating-point values between xmm1 and xmm2/m128
-/// (SSE2).
-pub static MAXPD: [u8; 3] = [0x66, 0x0f, 0x5f];
-
-/// Return the maximum packed single-precision floating-point values between  xmm1 and xmm2/m128
-/// (SSE).
-pub static MAXPS: [u8; 2] = [0x0f, 0x5f];
-
-/// Return the maximum scalar double-precision floating-point value between
-/// xmm2/m64 and xmm1.
-pub static MAXSD: [u8; 3] = [0xf2, 0x0f, 0x5f];
-
-/// Return the maximum scalar single-precision floating-point value between
-/// xmm2/m32 and xmm1.
-pub static MAXSS: [u8; 3] = [0xf3, 0x0f, 0x5f];
-
-/// Return the minimum packed double-precision floating-point values between xmm1 and xmm2/m128
-/// (SSE2).
-pub static MINPD: [u8; 3] = [0x66, 0x0f, 0x5d];
-
-/// Return the minimum packed single-precision floating-point values between xmm1 and xmm2/m128
-/// (SSE).
-pub static MINPS: [u8; 2] = [0x0f, 0x5d];
-
-/// Return the minimum scalar double-precision floating-point value between
-/// xmm2/m64 and xmm1.
-pub static MINSD: [u8; 3] = [0xf2, 0x0f, 0x5d];
-
-/// Return the minimum scalar single-precision floating-point value between
-/// xmm2/m32 and xmm1.
-pub static MINSS: [u8; 3] = [0xf3, 0x0f, 0x5d];
-
-/// Move r8 to r/m8.
-pub static MOV_BYTE_STORE: [u8; 1] = [0x88];
-
-/// Move imm{16,32,64} to same-sized register.
-pub static MOV_IMM: [u8; 1] = [0xb8];
-
-/// Move imm{16,32} to r{16,32,64}, sign-extended if 64-bit target.
-pub static MOV_IMM_SIGNEXTEND: [u8; 1] = [0xc7];
-
-/// Move {r/m16, r/m32, r/m64} to same-sized register.
-pub static MOV_LOAD: [u8; 1] = [0x8b];
-
-/// Move r16 to r/m16.
-pub static MOV_STORE_16: [u8; 2] = [0x66, 0x89];
-
-/// Move {r16, r32, r64} to same-sized register or memory.
-pub static MOV_STORE: [u8; 1] = [0x89];
-
-/// Move aligned packed single-precision floating-point values from x/m to xmm (SSE).
-pub static MOVAPS_LOAD: [u8; 2] = [0x0f, 0x28];
-
-/// Move doubleword from r/m32 to xmm (SSE2). Quadword with REX prefix.
-pub static MOVD_LOAD_XMM: [u8; 3] = [0x66, 0x0f, 0x6e];
-
-/// Move doubleword from xmm to r/m32 (SSE2). Quadword with REX prefix.
-pub static MOVD_STORE_XMM: [u8; 3] = [0x66, 0x0f, 0x7e];
-
-/// Move packed single-precision floating-point values low to high (SSE).
-pub static MOVLHPS: [u8; 2] = [0x0f, 0x16];
-
-/// Move scalar double-precision floating-point value (from reg/mem to reg).
-pub static MOVSD_LOAD: [u8; 3] = [0xf2, 0x0f, 0x10];
-
-/// Move scalar double-precision floating-point value (from reg to reg/mem).
-pub static MOVSD_STORE: [u8; 3] = [0xf2, 0x0f, 0x11];
-
-/// Move scalar single-precision floating-point value (from reg to reg/mem).
-pub static MOVSS_STORE: [u8; 3] = [0xf3, 0x0f, 0x11];
-
-/// Move scalar single-precision floating-point-value (from reg/mem to reg).
-pub static MOVSS_LOAD: [u8; 3] = [0xf3, 0x0f, 0x10];
-
-/// Move byte to register with sign-extension.
-pub static MOVSX_BYTE: [u8; 2] = [0x0f, 0xbe];
-
-/// Move word to register with sign-extension.
-pub static MOVSX_WORD: [u8; 2] = [0x0f, 0xbf];
-
-/// Move doubleword to register with sign-extension.
-pub static MOVSXD: [u8; 1] = [0x63];
-
-/// Move unaligned packed single-precision floating-point from x/m to xmm (SSE).
-pub static MOVUPS_LOAD: [u8; 2] = [0x0f, 0x10];
-
-/// Move unaligned packed single-precision floating-point value from xmm to x/m (SSE).
-pub static MOVUPS_STORE: [u8; 2] = [0x0f, 0x11];
-
-/// Move byte to register with zero-extension.
-pub static MOVZX_BYTE: [u8; 2] = [0x0f, 0xb6];
-
-/// Move word to register with zero-extension.
-pub static MOVZX_WORD: [u8; 2] = [0x0f, 0xb7];
-
-/// Unsigned multiply for {16,32,64}-bit.
-pub static MUL: [u8; 1] = [0xf7];
-
-/// Multiply packed double-precision floating-point values from xmm2/mem to xmm1 and store result
-/// in xmm1 (SSE2).
-pub static MULPD: [u8; 3] = [0x66, 0x0f, 0x59];
-
-/// Multiply packed single-precision floating-point values from xmm2/mem to xmm1 and store result
-/// in xmm1 (SSE).
-pub static MULPS: [u8; 2] = [0x0f, 0x59];
-
-/// Multiply the low double-precision floating-point value in xmm2/m64 by the
-/// low double-precision floating-point value in xmm1.
-pub static MULSD: [u8; 3] = [0xf2, 0x0f, 0x59];
-
-/// Multiply the low single-precision floating-point value in xmm2/m32 by the
-/// low single-precision floating-point value in xmm1.
-pub static MULSS: [u8; 3] = [0xf3, 0x0f, 0x59];
-
-/// Reverse each bit of r/m{16,32,64}.
-pub static NOT: [u8; 1] = [0xf7];
-
-/// r{16,32,64} OR register of same size.
-pub static OR: [u8; 1] = [0x09];
-
-/// imm{16,32} OR r/m{16,32,64}, possibly sign-extended.
-pub static OR_IMM: [u8; 1] = [0x81];
-
-/// r/m{16,32,64} OR sign-extended imm8.
-pub static OR_IMM8_SIGN_EXTEND: [u8; 1] = [0x83];
-
-/// Return the bitwise logical OR of packed single-precision values in xmm and x/m (SSE).
-pub static ORPS: [u8; 2] = [0x0f, 0x56];
-
-/// Compute the absolute value of bytes in xmm2/m128 and store the unsigned result in xmm1 (SSSE3).
-pub static PABSB: [u8; 4] = [0x66, 0x0f, 0x38, 0x1c];
-
-/// Compute the absolute value of 32-bit integers in xmm2/m128 and store the unsigned result in
-/// xmm1 (SSSE3).
-pub static PABSD: [u8; 4] = [0x66, 0x0f, 0x38, 0x1e];
-
-/// Compute the absolute value of 16-bit integers in xmm2/m128 and store the unsigned result in
-/// xmm1 (SSSE3).
-pub static PABSW: [u8; 4] = [0x66, 0x0f, 0x38, 0x1d];
-
-/// Converts 8 packed signed word integers from xmm1 and from xmm2/m128 into 16 packed signed byte
-/// integers in xmm1 using signed saturation (SSE2).
-pub static PACKSSWB: [u8; 3] = [0x66, 0x0f, 0x63];
-
-/// Converts 4 packed signed doubleword integers from xmm1 and from xmm2/m128 into 8 packed signed
-/// word integers in xmm1 using signed saturation (SSE2).
-pub static PACKSSDW: [u8; 3] = [0x66, 0x0f, 0x6b];
-
-/// Converts 8 packed signed word integers from xmm1 and from xmm2/m128 into 16 packed unsigned byte
-/// integers in xmm1 using unsigned saturation (SSE2).
-pub static PACKUSWB: [u8; 3] = [0x66, 0x0f, 0x67];
-
-/// Converts 4 packed signed doubleword integers from xmm1 and from xmm2/m128 into 8 unpacked signed
-/// word integers in xmm1 using unsigned saturation (SSE4.1).
-pub static PACKUSDW: [u8; 4] = [0x66, 0x0f, 0x38, 0x2b];
-
-/// Add packed byte integers from xmm2/m128 and xmm1 (SSE2).
-pub static PADDB: [u8; 3] = [0x66, 0x0f, 0xfc];
-
-/// Add packed doubleword integers from xmm2/m128 and xmm1 (SSE2).
-pub static PADDD: [u8; 3] = [0x66, 0x0f, 0xfe];
-
-/// Add packed quadword integers from xmm2/m128 and xmm1 (SSE2).
-pub static PADDQ: [u8; 3] = [0x66, 0x0f, 0xd4];
-
-/// Add packed word integers from xmm2/m128 and xmm1 (SSE2).
-pub static PADDW: [u8; 3] = [0x66, 0x0f, 0xfd];
-
-/// Add packed signed byte integers from xmm2/m128 and xmm1 saturate the results (SSE).
-pub static PADDSB: [u8; 3] = [0x66, 0x0f, 0xec];
-
-/// Add packed signed word integers from xmm2/m128 and xmm1 saturate the results (SSE).
-pub static PADDSW: [u8; 3] = [0x66, 0x0f, 0xed];
-
-/// Add packed unsigned byte integers from xmm2/m128 and xmm1 saturate the results (SSE).
-pub static PADDUSB: [u8; 3] = [0x66, 0x0f, 0xdc];
-
-/// Add packed unsigned word integers from xmm2/m128 and xmm1 saturate the results (SSE).
-pub static PADDUSW: [u8; 3] = [0x66, 0x0f, 0xdd];
-
-/// Concatenate destination and source operands, extract a byte-aligned result into xmm1 that is
-/// shifted to the right by the constant number of bytes in imm8 (SSSE3).
-pub static PALIGNR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0f];
-
-/// Bitwise AND of xmm2/m128 and xmm1 (SSE2).
-pub static PAND: [u8; 3] = [0x66, 0x0f, 0xdb];
-
-/// Bitwise AND NOT of xmm2/m128 and xmm1 (SSE2).
-pub static PANDN: [u8; 3] = [0x66, 0x0f, 0xdf];
-
-/// Average packed unsigned byte integers from xmm2/m128 and xmm1 with rounding (SSE2).
-pub static PAVGB: [u8; 3] = [0x66, 0x0f, 0xE0];
-
-/// Average packed unsigned word integers from xmm2/m128 and xmm1 with rounding (SSE2).
-pub static PAVGW: [u8; 3] = [0x66, 0x0f, 0xE3];
-
-/// Select byte values from xmm1 and xmm2/m128 from mask specified in the high bit of each byte
-/// in XMM0 and store the values into xmm1 (SSE4.1).
-pub static PBLENDVB: [u8; 4] = [0x66, 0x0f, 0x38, 0x10];
-
-/// Select words from xmm1 and xmm2/m128 from mask specified in imm8 and store the values into xmm1
-/// (SSE4.1).
-pub static PBLENDW: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0e];
-
-/// Compare packed data for equal (SSE2).
-pub static PCMPEQB: [u8; 3] = [0x66, 0x0f, 0x74];
-
-/// Compare packed data for equal (SSE2).
-pub static PCMPEQD: [u8; 3] = [0x66, 0x0f, 0x76];
-
-/// Compare packed data for equal (SSE4.1).
-pub static PCMPEQQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x29];
-
-/// Compare packed data for equal (SSE2).
-pub static PCMPEQW: [u8; 3] = [0x66, 0x0f, 0x75];
-
-/// Compare packed signed byte integers for greater than (SSE2).
-pub static PCMPGTB: [u8; 3] = [0x66, 0x0f, 0x64];
-
-/// Compare packed signed doubleword integers for greater than (SSE2).
-pub static PCMPGTD: [u8; 3] = [0x66, 0x0f, 0x66];
-
-/// Compare packed signed quadword integers for greater than (SSE4.2).
-pub static PCMPGTQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x37];
-
-/// Compare packed signed word integers for greater than (SSE2).
-pub static PCMPGTW: [u8; 3] = [0x66, 0x0f, 0x65];
-
-/// Extract doubleword or quadword, depending on REX.W (SSE4.1).
-pub static PEXTR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x16];
-
-/// Extract byte (SSE4.1).
-pub static PEXTRB: [u8; 4] = [0x66, 0x0f, 0x3a, 0x14];
-
-/// Extract word (SSE4.1). There is a 3-byte SSE2 variant that can also move to m/16.
-pub static PEXTRW: [u8; 4] = [0x66, 0x0f, 0x3a, 0x15];
-
-/// Insert doubleword or quadword, depending on REX.W (SSE4.1).
-pub static PINSR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x22];
-
-/// Insert byte (SSE4.1).
-pub static PINSRB: [u8; 4] = [0x66, 0x0f, 0x3a, 0x20];
-
-/// Insert word (SSE2).
-pub static PINSRW: [u8; 3] = [0x66, 0x0f, 0xc4];
-
-/// Compare packed signed byte integers in xmm1 and xmm2/m128 and store packed maximum values in
-/// xmm1 (SSE4.1).
-pub static PMAXSB: [u8; 4] = [0x66, 0x0f, 0x38, 0x3c];
-
-/// Compare packed signed doubleword integers in xmm1 and xmm2/m128 and store packed maximum
-/// values in xmm1 (SSE4.1).
-pub static PMAXSD: [u8; 4] = [0x66, 0x0f, 0x38, 0x3d];
-
-/// Compare packed signed word integers in xmm1 and xmm2/m128 and store packed maximum values in
-/// xmm1 (SSE2).
-pub static PMAXSW: [u8; 3] = [0x66, 0x0f, 0xee];
-
-/// Compare packed unsigned byte integers in xmm1 and xmm2/m128 and store packed maximum values in
-/// xmm1 (SSE2).
-pub static PMAXUB: [u8; 3] = [0x66, 0x0f, 0xde];
-
-/// Compare packed unsigned doubleword integers in xmm1 and xmm2/m128 and store packed maximum
-/// values in xmm1 (SSE4.1).
-pub static PMAXUD: [u8; 4] = [0x66, 0x0f, 0x38, 0x3f];
-
-/// Compare packed unsigned word integers in xmm1 and xmm2/m128 and store packed maximum values in
-/// xmm1 (SSE4.1).
-pub static PMAXUW: [u8; 4] = [0x66, 0x0f, 0x38, 0x3e];
-
-/// Compare packed signed byte integers in xmm1 and xmm2/m128 and store packed minimum values in
-/// xmm1 (SSE4.1).
-pub static PMINSB: [u8; 4] = [0x66, 0x0f, 0x38, 0x38];
-
-/// Compare packed signed doubleword integers in xmm1 and xmm2/m128 and store packed minimum
-/// values in xmm1 (SSE4.1).
-pub static PMINSD: [u8; 4] = [0x66, 0x0f, 0x38, 0x39];
-
-/// Compare packed signed word integers in xmm1 and xmm2/m128 and store packed minimum values in
-/// xmm1 (SSE2).
-pub static PMINSW: [u8; 3] = [0x66, 0x0f, 0xea];
-
-/// Compare packed unsigned byte integers in xmm1 and xmm2/m128 and store packed minimum values in
-/// xmm1 (SSE2).
-pub static PMINUB: [u8; 3] = [0x66, 0x0f, 0xda];
-
-/// Compare packed unsigned doubleword integers in xmm1 and xmm2/m128 and store packed minimum
-/// values in xmm1 (SSE4.1).
-pub static PMINUD: [u8; 4] = [0x66, 0x0f, 0x38, 0x3b];
-
-/// Compare packed unsigned word integers in xmm1 and xmm2/m128 and store packed minimum values in
-/// xmm1 (SSE4.1).
-pub static PMINUW: [u8; 4] = [0x66, 0x0f, 0x38, 0x3a];
-
-/// Sign extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 16-bit
-/// integers in xmm1 (SSE4.1).
-pub static PMOVSXBW: [u8; 4] = [0x66, 0x0f, 0x38, 0x20];
-
-/// Sign extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 32-bit
-/// integers in xmm1 (SSE4.1).
-pub static PMOVSXWD: [u8; 4] = [0x66, 0x0f, 0x38, 0x23];
-
-/// Sign extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit
-/// integers in xmm1 (SSE4.1).
-pub static PMOVSXDQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x25];
-
-/// Zero extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 16-bit
-/// integers in xmm1 (SSE4.1).
-pub static PMOVZXBW: [u8; 4] = [0x66, 0x0f, 0x38, 0x30];
-
-/// Zero extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 32-bit
-/// integers in xmm1 (SSE4.1).
-pub static PMOVZXWD: [u8; 4] = [0x66, 0x0f, 0x38, 0x33];
-
-/// Zero extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit
-/// integers in xmm1 (SSE4.1).
-pub static PMOVZXDQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x35];
-
-/// Multiply the packed signed word integers in xmm1 and xmm2/m128, and store the low 16 bits of
-/// the results in xmm1 (SSE2).
-pub static PMULLW: [u8; 3] = [0x66, 0x0f, 0xd5];
-
-/// Multiply the packed doubleword signed integers in xmm1 and xmm2/m128 and store the low 32
-/// bits of each product in xmm1 (SSE4.1).
-pub static PMULLD: [u8; 4] = [0x66, 0x0f, 0x38, 0x40];
-
-/// Multiply the packed quadword signed integers in xmm2 and xmm3/m128 and store the low 64
-/// bits of each product in xmm1 (AVX512VL/DQ). Requires an EVEX encoding.
-pub static VPMULLQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x40];
-
-/// Multiply packed unsigned doubleword integers in xmm1 by packed unsigned doubleword integers
-/// in xmm2/m128, and store the quadword results in xmm1 (SSE2).
-pub static PMULUDQ: [u8; 3] = [0x66, 0x0f, 0xf4];
-
-/// Multiply the packed word integers, add adjacent doubleword results.
-pub static PMADDWD: [u8; 3] = [0x66, 0x0f, 0xf5];
-
-/// Pop top of stack into r{16,32,64}; increment stack pointer.
-pub static POP_REG: [u8; 1] = [0x58];
-
-/// Returns the count of number of bits set to 1.
-pub static POPCNT: [u8; 3] = [0xf3, 0x0f, 0xb8];
-
-/// Bitwise OR of xmm2/m128 and xmm1 (SSE2).
-pub static POR: [u8; 3] = [0x66, 0x0f, 0xeb];
-
-/// Shuffle bytes in xmm1 according to contents of xmm2/m128 (SSE3).
-pub static PSHUFB: [u8; 4] = [0x66, 0x0f, 0x38, 0x00];
-
-/// Shuffle the doublewords in xmm2/m128 based on the encoding in imm8 and
-/// store the result in xmm1 (SSE2).
-pub static PSHUFD: [u8; 3] = [0x66, 0x0f, 0x70];
-
-/// Shift words in xmm1 by imm8; the direction and sign-bit behavior is controlled by the RRR
-/// digit used in the ModR/M byte (SSE2).
-pub static PS_W_IMM: [u8; 3] = [0x66, 0x0f, 0x71];
-
-/// Shift doublewords in xmm1 by imm8; the direction and sign-bit behavior is controlled by the RRR
-/// digit used in the ModR/M byte (SSE2).
-pub static PS_D_IMM: [u8; 3] = [0x66, 0x0f, 0x72];
-
-/// Shift quadwords in xmm1 by imm8; the direction and sign-bit behavior is controlled by the RRR
-/// digit used in the ModR/M byte (SSE2).
-pub static PS_Q_IMM: [u8; 3] = [0x66, 0x0f, 0x73];
-
-/// Shift words in xmm1 left by xmm2/m128 while shifting in 0s (SSE2).
-pub static PSLLW: [u8; 3] = [0x66, 0x0f, 0xf1];
-
-/// Shift doublewords in xmm1 left by xmm2/m128 while shifting in 0s (SSE2).
-pub static PSLLD: [u8; 3] = [0x66, 0x0f, 0xf2];
-
-/// Shift quadwords in xmm1 left by xmm2/m128 while shifting in 0s (SSE2).
-pub static PSLLQ: [u8; 3] = [0x66, 0x0f, 0xf3];
-
-/// Shift words in xmm1 right by xmm2/m128 while shifting in 0s (SSE2).
-pub static PSRLW: [u8; 3] = [0x66, 0x0f, 0xd1];
-
-/// Shift doublewords in xmm1 right by xmm2/m128 while shifting in 0s (SSE2).
-pub static PSRLD: [u8; 3] = [0x66, 0x0f, 0xd2];
-
-/// Shift quadwords in xmm1 right by xmm2/m128 while shifting in 0s (SSE2).
-pub static PSRLQ: [u8; 3] = [0x66, 0x0f, 0xd3];
-
-/// Shift words in xmm1 right by xmm2/m128 while shifting in sign bits (SSE2).
-pub static PSRAW: [u8; 3] = [0x66, 0x0f, 0xe1];
-
-/// Shift doublewords in xmm1 right by xmm2/m128 while shifting in sign bits (SSE2).
-pub static PSRAD: [u8; 3] = [0x66, 0x0f, 0xe2];
-
-/// Subtract packed byte integers in xmm2/m128 from packed byte integers in xmm1 (SSE2).
-pub static PSUBB: [u8; 3] = [0x66, 0x0f, 0xf8];
-
-/// Subtract packed word integers in xmm2/m128 from packed word integers in xmm1 (SSE2).
-pub static PSUBW: [u8; 3] = [0x66, 0x0f, 0xf9];
-
-/// Subtract packed doubleword integers in xmm2/m128 from doubleword byte integers in xmm1 (SSE2).
-pub static PSUBD: [u8; 3] = [0x66, 0x0f, 0xfa];
-
-/// Subtract packed quadword integers in xmm2/m128 from xmm1 (SSE2).
-pub static PSUBQ: [u8; 3] = [0x66, 0x0f, 0xfb];
-
-/// Subtract packed signed byte integers in xmm2/m128 from packed signed byte integers in xmm1
-/// and saturate results (SSE2).
-pub static PSUBSB: [u8; 3] = [0x66, 0x0f, 0xe8];
-
-/// Subtract packed signed word integers in xmm2/m128 from packed signed word integers in xmm1
-/// and saturate results (SSE2).
-pub static PSUBSW: [u8; 3] = [0x66, 0x0f, 0xe9];
-
-/// Subtract packed unsigned byte integers in xmm2/m128 from packed unsigned byte integers in xmm1
-/// and saturate results (SSE2).
-pub static PSUBUSB: [u8; 3] = [0x66, 0x0f, 0xd8];
-
-/// Subtract packed unsigned word integers in xmm2/m128 from packed unsigned word integers in xmm1
-/// and saturate results (SSE2).
-pub static PSUBUSW: [u8; 3] = [0x66, 0x0f, 0xd9];
-
-/// Set ZF if xmm2/m128 AND xmm1 result is all 0s; set CF if xmm2/m128 AND NOT xmm1 result is all
-/// 0s (SSE4.1).
-pub static PTEST: [u8; 4] = [0x66, 0x0f, 0x38, 0x17];
-
-/// Unpack and interleave high-order bytes from xmm1 and xmm2/m128 into xmm1 (SSE2).
-pub static PUNPCKHBW: [u8; 3] = [0x66, 0x0f, 0x68];
-
-/// Unpack and interleave high-order words from xmm1 and xmm2/m128 into xmm1 (SSE2).
-pub static PUNPCKHWD: [u8; 3] = [0x66, 0x0f, 0x69];
-
-/// Unpack and interleave high-order doublewords from xmm1 and xmm2/m128 into xmm1 (SSE2).
-pub static PUNPCKHDQ: [u8; 3] = [0x66, 0x0f, 0x6A];
-
-/// Unpack and interleave high-order quadwords from xmm1 and xmm2/m128 into xmm1 (SSE2).
-pub static PUNPCKHQDQ: [u8; 3] = [0x66, 0x0f, 0x6D];
-
-/// Unpack and interleave low-order bytes from xmm1 and xmm2/m128 into xmm1 (SSE2).
-pub static PUNPCKLBW: [u8; 3] = [0x66, 0x0f, 0x60];
-
-/// Unpack and interleave low-order words from xmm1 and xmm2/m128 into xmm1 (SSE2).
-pub static PUNPCKLWD: [u8; 3] = [0x66, 0x0f, 0x61];
-
-/// Unpack and interleave low-order doublewords from xmm1 and xmm2/m128 into xmm1 (SSE2).
-pub static PUNPCKLDQ: [u8; 3] = [0x66, 0x0f, 0x62];
-
-/// Unpack and interleave low-order quadwords from xmm1 and xmm2/m128 into xmm1 (SSE2).
-pub static PUNPCKLQDQ: [u8; 3] = [0x66, 0x0f, 0x6C];
-
-/// Push r{16,32,64}.
-pub static PUSH_REG: [u8; 1] = [0x50];
-
-/// Logical exclusive OR (SSE2).
-pub static PXOR: [u8; 3] = [0x66, 0x0f, 0xef];
-
-/// Near return to calling procedure.
-pub static RET_NEAR: [u8; 1] = [0xc3];
-
-/// General rotation opcode. Kind of rotation depends on encoding.
-pub static ROTATE_CL: [u8; 1] = [0xd3];
-
-/// General rotation opcode. Kind of rotation depends on encoding.
-pub static ROTATE_IMM8: [u8; 1] = [0xc1];
-
-/// Round scalar doubl-precision floating-point values.
-pub static ROUNDSD: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0b];
-
-/// Round scalar single-precision floating-point values.
-pub static ROUNDSS: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0a];
-
-/// Subtract with borrow r{16,32,64} from r/m of the same size.
-pub static SBB: [u8; 1] = [0x19];
-
-/// Set byte if overflow (OF=1).
-pub static SET_BYTE_IF_OVERFLOW: [u8; 2] = [0x0f, 0x90];
-
-/// Compute the square root of the packed double-precision floating-point values and store the
-/// result in xmm1 (SSE2).
-pub static SQRTPD: [u8; 3] = [0x66, 0x0f, 0x51];
-
-/// Compute the square root of the packed double-precision floating-point values and store the
-/// result in xmm1 (SSE).
-pub static SQRTPS: [u8; 2] = [0x0f, 0x51];
-
-/// Compute square root of scalar double-precision floating-point value.
-pub static SQRTSD: [u8; 3] = [0xf2, 0x0f, 0x51];
-
-/// Compute square root of scalar single-precision value.
-pub static SQRTSS: [u8; 3] = [0xf3, 0x0f, 0x51];
-
-/// Subtract r{16,32,64} from r/m of same size.
-pub static SUB: [u8; 1] = [0x29];
-
-/// Subtract packed double-precision floating-point values in xmm2/mem from xmm1 and store result
-/// in xmm1 (SSE2).
-pub static SUBPD: [u8; 3] = [0x66, 0x0f, 0x5c];
-
-/// Subtract packed single-precision floating-point values in xmm2/mem from xmm1 and store result
-/// in xmm1 (SSE).
-pub static SUBPS: [u8; 2] = [0x0f, 0x5c];
-
-/// Subtract the low double-precision floating-point value in xmm2/m64 from xmm1
-/// and store the result in xmm1.
-pub static SUBSD: [u8; 3] = [0xf2, 0x0f, 0x5c];
-
-/// Subtract the low single-precision floating-point value in xmm2/m32 from xmm1
-/// and store the result in xmm1.
-pub static SUBSS: [u8; 3] = [0xf3, 0x0f, 0x5c];
-
-/// AND r8 with r/m8; set SF, ZF, PF according to result.
-pub static TEST_BYTE_REG: [u8; 1] = [0x84];
-
-/// AND {r16, r32, r64} with r/m of the same size; set SF, ZF, PF according to result.
-pub static TEST_REG: [u8; 1] = [0x85];
-
-/// Count the number of trailing zero bits.
-pub static TZCNT: [u8; 3] = [0xf3, 0x0f, 0xbc];
-
-/// Compare low double-precision floating-point values in xmm1 and xmm2/mem64
-/// and set the EFLAGS flags accordingly.
-pub static UCOMISD: [u8; 3] = [0x66, 0x0f, 0x2e];
-
-/// Compare low single-precision floating-point values in xmm1 and xmm2/mem32
-/// and set the EFLAGS flags accordingly.
-pub static UCOMISS: [u8; 2] = [0x0f, 0x2e];
-
-/// Raise invalid opcode instruction.
-pub static UNDEFINED2: [u8; 2] = [0x0f, 0x0b];
-
-/// Convert four packed unsigned doubleword integers from xmm2/m128/m32bcst to packed
-/// single-precision floating-point values in xmm1 with writemask k1. Rounding behavior
-/// is controlled by MXCSR but can be overriden by EVEX.L'L in static rounding mode
-/// (AVX512VL, AVX512F).
-pub static VCVTUDQ2PS: [u8; 3] = [0xf2, 0x0f, 0x7a];
-
-/// imm{16,32} XOR r/m{16,32,64}, possibly sign-extended.
-pub static XOR_IMM: [u8; 1] = [0x81];
-
-/// r/m{16,32,64} XOR sign-extended imm8.
-pub static XOR_IMM8_SIGN_EXTEND: [u8; 1] = [0x83];
-
-/// r/m{16,32,64} XOR register of the same size.
-pub static XOR: [u8; 1] = [0x31];
-
-/// Bitwise logical XOR of packed double-precision floating-point values.
-pub static XORPD: [u8; 3] = [0x66, 0x0f, 0x57];
-
-/// Bitwise logical XOR of packed single-precision floating-point values.
-pub static XORPS: [u8; 2] = [0x0f, 0x57];
diff --git a/cranelift/codegen/meta/src/isa/x86/recipes.rs b/cranelift/codegen/meta/src/isa/x86/recipes.rs
deleted file mode 100644
index f45f8dc673..0000000000
--- a/cranelift/codegen/meta/src/isa/x86/recipes.rs
+++ /dev/null
@@ -1,3445 +0,0 @@
-//! Encoding recipes for x86/x86_64.
-use std::rc::Rc;
-
-use cranelift_codegen_shared::isa::x86::EncodingBits;
-
-use crate::cdsl::ast::Literal;
-use crate::cdsl::formats::InstructionFormat;
-use crate::cdsl::instructions::InstructionPredicate;
-use crate::cdsl::recipes::{
-    EncodingRecipe, EncodingRecipeBuilder, OperandConstraint, Register, Stack,
-};
-use crate::cdsl::regs::IsaRegs;
-use crate::cdsl::settings::SettingGroup;
-use crate::shared::Definitions as SharedDefinitions;
-
-use crate::isa::x86::opcodes;
-
-/// Helper data structure to create recipes and template recipes.
-/// It contains all the recipes and recipe templates that might be used in the encodings crate of
-/// this same directory.
-pub(crate) struct RecipeGroup<'builder> {
-    /// Memoized registers description, to pass it to builders later.
-    regs: &'builder IsaRegs,
-
-    /// All the recipes explicitly created in this file. This is different from the final set of
-    /// recipes, which is definitive only once encodings have generated new recipes on the fly.
-    recipes: Vec<EncodingRecipe>,
-
-    /// All the recipe templates created in this file.
-    templates: Vec<Rc<Template<'builder>>>,
-}
-
-impl<'builder> RecipeGroup<'builder> {
-    fn new(regs: &'builder IsaRegs) -> Self {
-        Self {
-            regs,
-            recipes: Vec::new(),
-            templates: Vec::new(),
-        }
-    }
-    fn add_recipe(&mut self, recipe: EncodingRecipeBuilder) {
-        self.recipes.push(recipe.build());
-    }
-    fn add_template_recipe(&mut self, recipe: EncodingRecipeBuilder) -> Rc<Template<'builder>> {
-        let template = Rc::new(Template::new(recipe, self.regs));
-        self.templates.push(template.clone());
-        template
-    }
-    fn add_template_inferred(
-        &mut self,
-        recipe: EncodingRecipeBuilder,
-        infer_function: &'static str,
-    ) -> Rc<Template<'builder>> {
-        let template =
-            Rc::new(Template::new(recipe, self.regs).inferred_rex_compute_size(infer_function));
-        self.templates.push(template.clone());
-        template
-    }
-    fn add_template(&mut self, template: Template<'builder>) -> Rc<Template<'builder>> {
-        let template = Rc::new(template);
-        self.templates.push(template.clone());
-        template
-    }
-    pub fn recipe(&self, name: &str) -> &EncodingRecipe {
-        self.recipes
-            .iter()
-            .find(|recipe| recipe.name == name)
-            .unwrap_or_else(|| panic!("unknown recipe name: {}. Try template?", name))
-    }
-    pub fn template(&self, name: &str) -> &Template {
-        self.templates
-            .iter()
-            .find(|recipe| recipe.name() == name)
-            .unwrap_or_else(|| panic!("unknown template name: {}. Try recipe?", name))
-    }
-}
-
-// Opcode representation.
-//
-// Cranelift requires each recipe to have a single encoding size in bytes, and x86 opcodes are
-// variable length, so we use separate recipes for different styles of opcodes and prefixes. The
-// opcode format is indicated by the recipe name prefix.
-//
-// The match case below does not include the REX prefix which goes after the mandatory prefix.
-// VEX/XOP and EVEX prefixes are not yet supported. Encodings using any of these prefixes are
-// represented by separate recipes.
-//
-// The encoding bits are:
-//
-// 0-7:   The opcode byte <op>.
-// 8-9:   pp, mandatory prefix:
-//        00 none (Op*)
-//        01 66   (Mp*)
-//        10 F3   (Mp*)
-//        11 F2   (Mp*)
-// 10-11: mm, opcode map:
-//        00 <op>        (Op1/Mp1)
-//        01 0F <op>     (Op2/Mp2)
-//        10 0F 38 <op>  (Op3/Mp3)
-//        11 0F 3A <op>  (Op3/Mp3)
-// 12-14  rrr, opcode bits for the ModR/M byte for certain opcodes.
-// 15:    REX.W bit (or VEX.W/E)
-//
-// There is some redundancy between bits 8-11 and the recipe names, but we have enough bits, and
-// the pp+mm format is ready for supporting VEX prefixes.
-//
-// TODO Cranelift doesn't actually require recipe to have different encoding sizes anymore, so this
-// could be simplified.
-
-/// Given a sequence of opcode bytes, compute the recipe name prefix and encoding bits.
-fn decode_opcodes(op_bytes: &[u8], rrr: u16, w: u16) -> (&'static str, u16) {
-    let enc = EncodingBits::new(op_bytes, rrr, w);
-    (enc.prefix().recipe_name_prefix(), enc.bits())
-}
-
-/// Given a snippet of Rust code (or None), replace the `PUT_OP` macro with the
-/// corresponding `put_*` function from the `binemit.rs` module.
-fn replace_put_op(code: Option<String>, prefix: &str) -> Option<String> {
-    code.map(|code| code.replace("{{PUT_OP}}", &format!("put_{}", prefix.to_lowercase())))
-}
-
-/// Replaces constraints to a REX-prefixed register class by the equivalent non-REX register class.
-fn replace_nonrex_constraints(
-    regs: &IsaRegs,
-    constraints: Vec<OperandConstraint>,
-) -> Vec<OperandConstraint> {
-    constraints
-        .into_iter()
-        .map(|constraint| match constraint {
-            OperandConstraint::RegClass(rc_index) => {
-                let new_rc_index = if rc_index == regs.class_by_name("GPR") {
-                    regs.class_by_name("GPR8")
-                } else if rc_index == regs.class_by_name("FPR") {
-                    regs.class_by_name("FPR8")
-                } else {
-                    rc_index
-                };
-                OperandConstraint::RegClass(new_rc_index)
-            }
-            _ => constraint,
-        })
-        .collect()
-}
-
-fn replace_evex_constraints(
-    _: &IsaRegs,
-    constraints: Vec<OperandConstraint>,
-) -> Vec<OperandConstraint> {
-    constraints
-        .into_iter()
-        .map(|constraint| match constraint {
-            OperandConstraint::RegClass(rc_index) => {
-                // FIXME(#1306) this should be able to upgrade the register class to FPR32 as in
-                // `replace_nonrex_constraints` above, e.g. When FPR32 is re-added, add back in the
-                // rc_index conversion to FPR32. In the meantime, this is effectively a no-op
-                // conversion--the register class stays the same.
-                OperandConstraint::RegClass(rc_index)
-            }
-            _ => constraint,
-        })
-        .collect()
-}
-
-/// Specifies how the prefix (e.g. REX) is emitted by a Recipe.
-#[derive(Copy, Clone, PartialEq)]
-pub enum RecipePrefixKind {
-    /// The REX emission behavior is not hardcoded for the Recipe
-    /// and may be overridden when using the Template.
-    Unspecified,
-
-    /// The Recipe must hardcode the non-emission of the REX prefix.
-    NeverEmitRex,
-
-    /// The Recipe must hardcode the emission of the REX prefix.
-    AlwaysEmitRex,
-
-    /// The Recipe should infer the emission of the REX.RXB bits from registers,
-    /// and the REX.W bit from the EncodingBits.
-    ///
-    /// Because such a Recipe has a non-constant instruction size, it must have
-    /// a special `compute_size` handler for the inferrable-REX case.
-    InferRex,
-
-    /// The Recipe must hardcode the emission of an EVEX prefix.
-    Evex,
-}
-
-impl Default for RecipePrefixKind {
-    fn default() -> Self {
-        Self::Unspecified
-    }
-}
-
-/// Previously called a TailRecipe in the Python meta language, this allows to create multiple
-/// variants of a single base EncodingRecipe (rex prefix, specialized w/rrr bits, different
-/// opcodes). It serves as a prototype of an EncodingRecipe, which is then used when actually creating
-/// Encodings, in encodings.rs. This is an idiosyncrasy of the x86 meta-language, and could be
-/// reconsidered later.
-#[derive(Clone)]
-pub(crate) struct Template<'builder> {
-    /// Description of registers, used in the build() method.
-    regs: &'builder IsaRegs,
-
-    /// The recipe template, which is to be specialized (by copy).
-    recipe: EncodingRecipeBuilder,
-
-    /// How is the REX prefix emitted?
-    rex_kind: RecipePrefixKind,
-
-    /// Function for `compute_size()` when REX is inferrable.
-    inferred_rex_compute_size: Option<&'static str>,
-
-    /// Other recipe to use when REX-prefixed.
-    when_prefixed: Option<Rc<Template<'builder>>>,
-
-    // Parameters passed in the EncodingBits.
-    /// Value of the W bit (0 or 1), stored in the EncodingBits.
-    w_bit: u16,
-    /// Value of the RRR bits (between 0 and 0b111).
-    rrr_bits: u16,
-    /// Opcode bytes.
-    op_bytes: &'static [u8],
-}
-
-impl<'builder> Template<'builder> {
-    fn new(recipe: EncodingRecipeBuilder, regs: &'builder IsaRegs) -> Self {
-        Self {
-            regs,
-            recipe,
-            rex_kind: RecipePrefixKind::default(),
-            inferred_rex_compute_size: None,
-            when_prefixed: None,
-            w_bit: 0,
-            rrr_bits: 0,
-            op_bytes: &opcodes::EMPTY,
-        }
-    }
-
-    fn name(&self) -> &str {
-        &self.recipe.name
-    }
-    fn rex_kind(self, kind: RecipePrefixKind) -> Self {
-        Self {
-            rex_kind: kind,
-            ..self
-        }
-    }
-    fn inferred_rex_compute_size(self, function: &'static str) -> Self {
-        Self {
-            inferred_rex_compute_size: Some(function),
-            ..self
-        }
-    }
-    fn when_prefixed(self, template: Rc<Template<'builder>>) -> Self {
-        assert!(self.when_prefixed.is_none());
-        Self {
-            when_prefixed: Some(template),
-            ..self
-        }
-    }
-
-    // Copy setters.
-    pub fn opcodes(&self, op_bytes: &'static [u8]) -> Self {
-        assert!(!op_bytes.is_empty());
-        let mut copy = self.clone();
-        copy.op_bytes = op_bytes;
-        copy
-    }
-    pub fn w(&self) -> Self {
-        let mut copy = self.clone();
-        copy.w_bit = 1;
-        copy
-    }
-    pub fn rrr(&self, value: u16) -> Self {
-        assert!(value <= 0b111);
-        let mut copy = self.clone();
-        copy.rrr_bits = value;
-        copy
-    }
-    pub fn nonrex(&self) -> Self {
-        assert!(
-            self.rex_kind != RecipePrefixKind::AlwaysEmitRex,
-            "Template requires REX prefix."
-        );
-        let mut copy = self.clone();
-        copy.rex_kind = RecipePrefixKind::NeverEmitRex;
-        copy
-    }
-    pub fn rex(&self) -> Self {
-        assert!(
-            self.rex_kind != RecipePrefixKind::NeverEmitRex,
-            "Template requires no REX prefix."
-        );
-        if let Some(prefixed) = &self.when_prefixed {
-            let mut ret = prefixed.rex();
-            // Forward specialized parameters.
-            ret.op_bytes = self.op_bytes;
-            ret.w_bit = self.w_bit;
-            ret.rrr_bits = self.rrr_bits;
-            return ret;
-        }
-        let mut copy = self.clone();
-        copy.rex_kind = RecipePrefixKind::AlwaysEmitRex;
-        copy
-    }
-    pub fn infer_rex(&self) -> Self {
-        assert!(
-            self.rex_kind != RecipePrefixKind::NeverEmitRex,
-            "Template requires no REX prefix."
-        );
-        assert!(
-            self.when_prefixed.is_none(),
-            "infer_rex used with when_prefixed()."
-        );
-        let mut copy = self.clone();
-        copy.rex_kind = RecipePrefixKind::InferRex;
-        copy
-    }
-
-    pub fn build(mut self) -> (EncodingRecipe, u16) {
-        let (opcode, bits) = decode_opcodes(&self.op_bytes, self.rrr_bits, self.w_bit);
-
-        let (recipe_name, size_addendum) = match self.rex_kind {
-            RecipePrefixKind::Unspecified | RecipePrefixKind::NeverEmitRex => {
-                // Ensure the operands are limited to non-REX constraints.
-                let operands_in = self.recipe.operands_in.unwrap_or_default();
-                self.recipe.operands_in = Some(replace_nonrex_constraints(self.regs, operands_in));
-                let operands_out = self.recipe.operands_out.unwrap_or_default();
-                self.recipe.operands_out =
-                    Some(replace_nonrex_constraints(self.regs, operands_out));
-
-                (opcode.into(), self.op_bytes.len() as u64)
-            }
-            RecipePrefixKind::AlwaysEmitRex => {
-                ("Rex".to_string() + opcode, self.op_bytes.len() as u64 + 1)
-            }
-            RecipePrefixKind::InferRex => {
-                assert_eq!(self.w_bit, 0, "A REX.W bit always requires a REX prefix; avoid using `infer_rex().w()` and use `rex().w()` instead.");
-                // Hook up the right function for inferred compute_size().
-                assert!(
-                    self.inferred_rex_compute_size.is_some(),
-                    "InferRex recipe '{}' needs an inferred_rex_compute_size function.",
-                    &self.recipe.name
-                );
-                self.recipe.compute_size = self.inferred_rex_compute_size;
-
-                ("DynRex".to_string() + opcode, self.op_bytes.len() as u64)
-            }
-            RecipePrefixKind::Evex => {
-                // Allow the operands to expand limits to EVEX constraints.
-                let operands_in = self.recipe.operands_in.unwrap_or_default();
-                self.recipe.operands_in = Some(replace_evex_constraints(self.regs, operands_in));
-                let operands_out = self.recipe.operands_out.unwrap_or_default();
-                self.recipe.operands_out = Some(replace_evex_constraints(self.regs, operands_out));
-
-                ("Evex".to_string() + opcode, 4 + 1)
-            }
-        };
-
-        self.recipe.base_size += size_addendum;
-
-        // Branch ranges are relative to the end of the instruction.
-        // For InferRex, the range should be the minimum, assuming no REX.
-        if let Some(range) = self.recipe.branch_range.as_mut() {
-            range.inst_size += size_addendum;
-        }
-
-        self.recipe.emit = replace_put_op(self.recipe.emit, &recipe_name);
-        self.recipe.name = recipe_name + &self.recipe.name;
-
-        (self.recipe.build(), bits)
-    }
-}
-
-/// Returns a predicate checking that the "cond" field of the instruction contains one of the
-/// directly supported floating point condition codes.
-fn supported_floatccs_predicate(
-    supported_cc: &[Literal],
-    format: &InstructionFormat,
-) -> InstructionPredicate {
-    supported_cc
-        .iter()
-        .fold(InstructionPredicate::new(), |pred, literal| {
-            pred.or(InstructionPredicate::new_is_field_equal(
-                format,
-                "cond",
-                literal.to_rust_code(),
-            ))
-        })
-}
-
-/// Return an instruction predicate that checks if `iform.imm` is a valid `scale` for a SIB byte.
-fn valid_scale(format: &InstructionFormat) -> InstructionPredicate {
-    ["1", "2", "4", "8"]
-        .iter()
-        .fold(InstructionPredicate::new(), |pred, &literal| {
-            pred.or(InstructionPredicate::new_is_field_equal(
-                format,
-                "imm",
-                literal.into(),
-            ))
-        })
-}
-
-pub(crate) fn define<'shared>(
-    shared_defs: &'shared SharedDefinitions,
-    settings: &'shared SettingGroup,
-    regs: &'shared IsaRegs,
-) -> RecipeGroup<'shared> {
-    // The set of floating point condition codes that are directly supported.
-    // Other condition codes need to be reversed or expressed as two tests.
-    let floatcc = &shared_defs.imm.floatcc;
-    let supported_floatccs: Vec<Literal> = ["ord", "uno", "one", "ueq", "gt", "ge", "ult", "ule"]
-        .iter()
-        .map(|name| Literal::enumerator_for(floatcc, name))
-        .collect();
-
-    // Register classes shorthands.
-    let abcd = regs.class_by_name("ABCD");
-    let gpr = regs.class_by_name("GPR");
-    let fpr = regs.class_by_name("FPR");
-    let flag = regs.class_by_name("FLAG");
-
-    // Operand constraints shorthands.
-    let reg_rflags = Register::new(flag, regs.regunit_by_name(flag, "rflags"));
-    let reg_rax = Register::new(gpr, regs.regunit_by_name(gpr, "rax"));
-    let reg_rcx = Register::new(gpr, regs.regunit_by_name(gpr, "rcx"));
-    let reg_rdx = Register::new(gpr, regs.regunit_by_name(gpr, "rdx"));
-    let reg_r15 = Register::new(gpr, regs.regunit_by_name(gpr, "r15"));
-    let reg_xmm0 = Register::new(fpr, regs.regunit_by_name(fpr, "xmm0"));
-
-    // Stack operand with a 32-bit signed displacement from either RBP or RSP.
-    let stack_gpr32 = Stack::new(gpr);
-    let stack_fpr32 = Stack::new(fpr);
-
-    let formats = &shared_defs.formats;
-
-    // Predicates shorthands.
-    let use_sse41 = settings.predicate_by_name("use_sse41");
-
-    // Definitions.
-    let mut recipes = RecipeGroup::new(regs);
-
-    // A null unary instruction that takes a GPR register. Can be used for identity copies and
-    // no-op conversions.
-    recipes.add_recipe(
-        EncodingRecipeBuilder::new("null", &formats.unary, 0)
-            .operands_in(vec![gpr])
-            .operands_out(vec![0])
-            .emit(""),
-    );
-    recipes.add_recipe(
-        EncodingRecipeBuilder::new("null_fpr", &formats.unary, 0)
-            .operands_in(vec![fpr])
-            .operands_out(vec![0])
-            .emit(""),
-    );
-    recipes.add_recipe(
-        EncodingRecipeBuilder::new("stacknull", &formats.unary, 0)
-            .operands_in(vec![stack_gpr32])
-            .operands_out(vec![stack_gpr32])
-            .emit(""),
-    );
-
-    recipes.add_recipe(
-        EncodingRecipeBuilder::new("get_pinned_reg", &formats.nullary, 0)
-            .operands_out(vec![reg_r15])
-            .emit(""),
-    );
-    // umr with a fixed register output that's r15.
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("set_pinned_reg", &formats.unary, 1)
-            .operands_in(vec![gpr])
-            .clobbers_flags(false)
-            .emit(
-                r#"
-                    let r15 = RU::r15.into();
-                    {{PUT_OP}}(bits, rex2(r15, in_reg0), sink);
-                    modrm_rr(r15, in_reg0, sink);
-                "#,
-            ),
-    );
-
-    // No-op fills, created by late-stage redundant-fill removal.
-    recipes.add_recipe(
-        EncodingRecipeBuilder::new("fillnull", &formats.unary, 0)
-            .operands_in(vec![stack_gpr32])
-            .operands_out(vec![gpr])
-            .clobbers_flags(false)
-            .emit(""),
-    );
-    recipes.add_recipe(
-        EncodingRecipeBuilder::new("ffillnull", &formats.unary, 0)
-            .operands_in(vec![stack_gpr32])
-            .operands_out(vec![fpr])
-            .clobbers_flags(false)
-            .emit(""),
-    );
-
-    recipes.add_recipe(
-        EncodingRecipeBuilder::new("debugtrap", &formats.nullary, 1).emit("sink.put1(0xcc);"),
-    );
-
-    // XX opcode, no ModR/M.
-    recipes.add_template_recipe(EncodingRecipeBuilder::new("trap", &formats.trap, 0).emit(
-        r#"
-            sink.trap(code, func.srclocs[inst]);
-            {{PUT_OP}}(bits, BASE_REX, sink);
-        "#,
-    ));
-
-    // Macro: conditional jump over a ud2.
-    recipes.add_recipe(
-        EncodingRecipeBuilder::new("trapif", &formats.int_cond_trap, 4)
-            .operands_in(vec![reg_rflags])
-            .clobbers_flags(false)
-            .emit(
-                r#"
-                    // Jump over a 2-byte ud2.
-                    sink.put1(0x70 | (icc2opc(cond.inverse()) as u8));
-                    sink.put1(2);
-                    // ud2.
-                    sink.trap(code, func.srclocs[inst]);
-                    sink.put1(0x0f);
-                    sink.put1(0x0b);
-                "#,
-            ),
-    );
-
-    recipes.add_recipe(
-        EncodingRecipeBuilder::new("trapff", &formats.float_cond_trap, 4)
-            .operands_in(vec![reg_rflags])
-            .clobbers_flags(false)
-            .inst_predicate(supported_floatccs_predicate(
-                &supported_floatccs,
-                &*formats.float_cond_trap,
-            ))
-            .emit(
-                r#"
-                    // Jump over a 2-byte ud2.
-                    sink.put1(0x70 | (fcc2opc(cond.inverse()) as u8));
-                    sink.put1(2);
-                    // ud2.
-                    sink.trap(code, func.srclocs[inst]);
-                    sink.put1(0x0f);
-                    sink.put1(0x0b);
-                "#,
-            ),
-    );
-
-    // XX /r
-    recipes.add_template_inferred(
-        EncodingRecipeBuilder::new("rr", &formats.binary, 1)
-            .operands_in(vec![gpr, gpr])
-            .operands_out(vec![0])
-            .emit(
-                r#"
-                        {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink);
-                        modrm_rr(in_reg0, in_reg1, sink);
-                    "#,
-            ),
-        "size_with_inferred_rex_for_inreg0_inreg1",
-    );
-
-    // XX /r with operands swapped. (RM form).
-    recipes.add_template_inferred(
-        EncodingRecipeBuilder::new("rrx", &formats.binary, 1)
-            .operands_in(vec![gpr, gpr])
-            .operands_out(vec![0])
-            .emit(
-                r#"
-                        {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
-                        modrm_rr(in_reg1, in_reg0, sink);
-                    "#,
-            ),
-        "size_with_inferred_rex_for_inreg0_inreg1",
-    );
-
-    // XX /r with FPR ins and outs. A form.
-    recipes.add_template_inferred(
-        EncodingRecipeBuilder::new("fa", &formats.binary, 1)
-            .operands_in(vec![fpr, fpr])
-            .operands_out(vec![0])
-            .emit(
-                r#"
-                    {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
-                    modrm_rr(in_reg1, in_reg0, sink);
-                "#,
-            ),
-        "size_with_inferred_rex_for_inreg0_inreg1",
-    );
-
-    // XX /r with FPR ins and outs. A form with input operands swapped.
-    recipes.add_template_inferred(
-        EncodingRecipeBuilder::new("fax", &formats.binary, 1)
-            .operands_in(vec![fpr, fpr])
-            .operands_out(vec![1])
-            .emit(
-                r#"
-                    {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink);
-                    modrm_rr(in_reg0, in_reg1, sink);
-                "#,
-            ),
-        // The operand order does not matter for calculating whether a REX prefix is needed.
-        "size_with_inferred_rex_for_inreg0_inreg1",
-    );
-
-    // XX /r with FPR ins and outs. A form with a byte immediate.
-    {
-        recipes.add_template_inferred(
-            EncodingRecipeBuilder::new("fa_ib", &formats.ternary_imm8, 2)
-                .operands_in(vec![fpr, fpr])
-                .operands_out(vec![0])
-                .inst_predicate(InstructionPredicate::new_is_unsigned_int(
-                    &*formats.ternary_imm8,
-                    "imm",
-                    8,
-                    0,
-                ))
-                .emit(
-                    r#"
-                    {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
-                    modrm_rr(in_reg1, in_reg0, sink);
-                    let imm: i64 = imm.into();
-                    sink.put1(imm as u8);
-                "#,
-                ),
-            "size_with_inferred_rex_for_inreg0_inreg1",
-        );
-    }
-
-    // XX /n for a unary operation with extension bits.
-    recipes.add_template(
-        Template::new(
-            EncodingRecipeBuilder::new("ur", &formats.unary, 1)
-                .operands_in(vec![gpr])
-                .operands_out(vec![0])
-                .emit(
-                    r#"
-                        {{PUT_OP}}(bits, rex1(in_reg0), sink);
-                        modrm_r_bits(in_reg0, bits, sink);
-                    "#,
-                ),
-            regs,
-        )
-        .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"),
-    );
-
-    // XX /r, but for a unary operator with separate input/output register, like
-    // copies. MR form, preserving flags.
-    recipes.add_template(
-        Template::new(
-            EncodingRecipeBuilder::new("umr", &formats.unary, 1)
-                .operands_in(vec![gpr])
-                .operands_out(vec![gpr])
-                .clobbers_flags(false)
-                .emit(
-                    r#"
-                        {{PUT_OP}}(bits, rex2(out_reg0, in_reg0), sink);
-                        modrm_rr(out_reg0, in_reg0, sink);
-                    "#,
-                ),
-            regs,
-        )
-        .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_outreg0"),
-    );
-
-    // Same as umr, but with FPR -> GPR registers.
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("rfumr", &formats.unary, 1)
-            .operands_in(vec![fpr])
-            .operands_out(vec![gpr])
-            .clobbers_flags(false)
-            .emit(
-                r#"
-                    {{PUT_OP}}(bits, rex2(out_reg0, in_reg0), sink);
-                    modrm_rr(out_reg0, in_reg0, sink);
-                "#,
-            ),
-    );
-
-    // Same as umr, but with the source register specified directly.
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("umr_reg_to_ssa", &formats.copy_to_ssa, 1)
-            // No operands_in to mention, because a source register is specified directly.
-            .operands_out(vec![gpr])
-            .clobbers_flags(false)
-            .emit(
-                r#"
-                    {{PUT_OP}}(bits, rex2(out_reg0, src), sink);
-                    modrm_rr(out_reg0, src, sink);
-                "#,
-            ),
-    );
-
-    // XX /r, but for a unary operator with separate input/output register.
-    // RM form. Clobbers FLAGS.
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("urm", &formats.unary, 1)
-            .operands_in(vec![gpr])
-            .operands_out(vec![gpr])
-            .emit(
-                r#"
-                    {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
-                    modrm_rr(in_reg0, out_reg0, sink);
-                "#,
-            ),
-    );
-
-    // XX /r. Same as urm, but doesn't clobber FLAGS.
-    let urm_noflags = recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("urm_noflags", &formats.unary, 1)
-            .operands_in(vec![gpr])
-            .operands_out(vec![gpr])
-            .clobbers_flags(false)
-            .emit(
-                r#"
-                    {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
-                    modrm_rr(in_reg0, out_reg0, sink);
-                "#,
-            ),
-    );
-
-    // XX /r. Same as urm_noflags, but input limited to ABCD.
-    recipes.add_template(
-        Template::new(
-            EncodingRecipeBuilder::new("urm_noflags_abcd", &formats.unary, 1)
-                .operands_in(vec![abcd])
-                .operands_out(vec![gpr])
-                .clobbers_flags(false)
-                .emit(
-                    r#"
-                    {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
-                    modrm_rr(in_reg0, out_reg0, sink);
-                "#,
-                ),
-            regs,
-        )
-        .when_prefixed(urm_noflags),
-    );
-
-    // XX /r, RM form, FPR -> FPR.
-    recipes.add_template_inferred(
-        EncodingRecipeBuilder::new("furm", &formats.unary, 1)
-            .operands_in(vec![fpr])
-            .operands_out(vec![fpr])
-            .clobbers_flags(false)
-            .emit(
-                r#"
-                    {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
-                    modrm_rr(in_reg0, out_reg0, sink);
-                "#,
-            ),
-        "size_with_inferred_rex_for_inreg0_outreg0",
-    );
-
-    // Same as furm, but with the source register specified directly.
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("furm_reg_to_ssa", &formats.copy_to_ssa, 1)
-            // No operands_in to mention, because a source register is specified directly.
-            .operands_out(vec![fpr])
-            .clobbers_flags(false)
-            .emit(
-                r#"
-                    {{PUT_OP}}(bits, rex2(src, out_reg0), sink);
-                    modrm_rr(src, out_reg0, sink);
-                "#,
-            ),
-    );
-
-    // XX /r, RM form, GPR -> FPR.
-    recipes.add_template_inferred(
-        EncodingRecipeBuilder::new("frurm", &formats.unary, 1)
-            .operands_in(vec![gpr])
-            .operands_out(vec![fpr])
-            .clobbers_flags(false)
-            .emit(
-                r#"
-                        {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
-                        modrm_rr(in_reg0, out_reg0, sink);
-                    "#,
-            ),
-        "size_with_inferred_rex_for_inreg0_outreg0",
-    );
-
-    // XX /r, RM form, FPR -> GPR.
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("rfurm", &formats.unary, 1)
-            .operands_in(vec![fpr])
-            .operands_out(vec![gpr])
-            .clobbers_flags(false)
-            .emit(
-                r#"
-                    {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
-                    modrm_rr(in_reg0, out_reg0, sink);
-                "#,
-            ),
-    );
-
-    // XX /r, RMI form for one of the roundXX SSE 4.1 instructions.
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("furmi_rnd", &formats.unary, 2)
-            .operands_in(vec![fpr])
-            .operands_out(vec![fpr])
-            .isa_predicate(use_sse41)
-            .emit(
-                r#"
-                    {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
-                    modrm_rr(in_reg0, out_reg0, sink);
-                    sink.put1(match opcode {
-                        Opcode::Nearest => 0b00,
-                        Opcode::Floor => 0b01,
-                        Opcode::Ceil => 0b10,
-                        Opcode::Trunc => 0b11,
-                        x => panic!("{} unexpected for furmi_rnd", opcode),
-                    });
-                "#,
-            ),
-    );
-
-    // XX /r, for regmove instructions.
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("rmov", &formats.reg_move, 1)
-            .operands_in(vec![gpr])
-            .clobbers_flags(false)
-            .emit(
-                r#"
-                    {{PUT_OP}}(bits, rex2(dst, src), sink);
-                    modrm_rr(dst, src, sink);
-                "#,
-            ),
-    );
-
-    // XX /r, for regmove instructions (FPR version, RM encoded).
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("frmov", &formats.reg_move, 1)
-            .operands_in(vec![fpr])
-            .clobbers_flags(false)
-            .emit(
-                r#"
-                    {{PUT_OP}}(bits, rex2(src, dst), sink);
-                    modrm_rr(src, dst, sink);
-                "#,
-            ),
-    );
-
-    // XX /n with one arg in %rcx, for shifts.
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("rc", &formats.binary, 1)
-            .operands_in(vec![
-                OperandConstraint::RegClass(gpr),
-                OperandConstraint::FixedReg(reg_rcx),
-            ])
-            .operands_out(vec![0])
-            .emit(
-                r#"
-                    {{PUT_OP}}(bits, rex1(in_reg0), sink);
-                    modrm_r_bits(in_reg0, bits, sink);
-                "#,
-            ),
-    );
-
-    // XX /n for division: inputs in %rax, %rdx, r. Outputs in %rax, %rdx.
-    recipes.add_template(
-        Template::new(
-            EncodingRecipeBuilder::new("div", &formats.ternary, 1)
-                .operands_in(vec![
-                    OperandConstraint::FixedReg(reg_rax),
-                    OperandConstraint::FixedReg(reg_rdx),
-                    OperandConstraint::RegClass(gpr),
-                ])
-                .operands_out(vec![reg_rax, reg_rdx])
-                .emit(
-                    r#"
-                        sink.trap(TrapCode::IntegerDivisionByZero, func.srclocs[inst]);
-                        {{PUT_OP}}(bits, rex1(in_reg2), sink);
-                        modrm_r_bits(in_reg2, bits, sink);
-                    "#,
-                ),
-            regs,
-        )
-        .inferred_rex_compute_size("size_with_inferred_rex_for_inreg2"),
-    );
-
-    // XX /n for {s,u}mulx: inputs in %rax, r. Outputs in %rdx(hi):%rax(lo)
-    recipes.add_template(
-        Template::new(
-            EncodingRecipeBuilder::new("mulx", &formats.binary, 1)
-                .operands_in(vec![
-                    OperandConstraint::FixedReg(reg_rax),
-                    OperandConstraint::RegClass(gpr),
-                ])
-                .operands_out(vec![
-                    OperandConstraint::FixedReg(reg_rax),
-                    OperandConstraint::FixedReg(reg_rdx),
-                ])
-                .emit(
-                    r#"
-                        {{PUT_OP}}(bits, rex1(in_reg1), sink);
-                        modrm_r_bits(in_reg1, bits, sink);
-                    "#,
-                ),
-            regs,
-        )
-        .inferred_rex_compute_size("size_with_inferred_rex_for_inreg1"),
-    );
-
-    // XX /r for BLEND* instructions
-    recipes.add_template_inferred(
-        EncodingRecipeBuilder::new("blend", &formats.ternary, 1)
-            .operands_in(vec![
-                OperandConstraint::FixedReg(reg_xmm0),
-                OperandConstraint::RegClass(fpr),
-                OperandConstraint::RegClass(fpr),
-            ])
-            .operands_out(vec![2])
-            .emit(
-                r#"
-                    {{PUT_OP}}(bits, rex2(in_reg1, in_reg2), sink);
-                    modrm_rr(in_reg1, in_reg2, sink);
-                "#,
-            ),
-        "size_with_inferred_rex_for_inreg1_inreg2",
-    );
-
-    // XX /n ib with 8-bit immediate sign-extended.
-    {
-        recipes.add_template_inferred(
-            EncodingRecipeBuilder::new("r_ib", &formats.binary_imm64, 2)
-                .operands_in(vec![gpr])
-                .operands_out(vec![0])
-                .inst_predicate(InstructionPredicate::new_is_signed_int(
-                    &*formats.binary_imm64,
-                    "imm",
-                    8,
-                    0,
-                ))
-                .emit(
-                    r#"
-                            {{PUT_OP}}(bits, rex1(in_reg0), sink);
-                            modrm_r_bits(in_reg0, bits, sink);
-                            let imm: i64 = imm.into();
-                            sink.put1(imm as u8);
-                        "#,
-                ),
-            "size_with_inferred_rex_for_inreg0",
-        );
-
-        recipes.add_template_inferred(
-            EncodingRecipeBuilder::new("f_ib", &formats.binary_imm64, 2)
-                .operands_in(vec![fpr])
-                .operands_out(vec![0])
-                .inst_predicate(InstructionPredicate::new_is_signed_int(
-                    &*formats.binary_imm64,
-                    "imm",
-                    8,
-                    0,
-                ))
-                .emit(
-                    r#"
-                        {{PUT_OP}}(bits, rex1(in_reg0), sink);
-                        modrm_r_bits(in_reg0, bits, sink);
-                        let imm: i64 = imm.into();
-                        sink.put1(imm as u8);
-                    "#,
-                ),
-            "size_with_inferred_rex_for_inreg0",
-        );
-
-        // XX /n id with 32-bit immediate sign-extended.
-        recipes.add_template(
-            Template::new(
-                EncodingRecipeBuilder::new("r_id", &formats.binary_imm64, 5)
-                    .operands_in(vec![gpr])
-                    .operands_out(vec![0])
-                    .inst_predicate(InstructionPredicate::new_is_signed_int(
-                        &*formats.binary_imm64,
-                        "imm",
-                        32,
-                        0,
-                    ))
-                    .emit(
-                        r#"
-                            {{PUT_OP}}(bits, rex1(in_reg0), sink);
-                            modrm_r_bits(in_reg0, bits, sink);
-                            let imm: i64 = imm.into();
-                            sink.put4(imm as u32);
-                        "#,
-                    ),
-                regs,
-            )
-            .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"),
-        );
-    }
-
-    // XX /r ib with 8-bit unsigned immediate (e.g. for pshufd)
-    {
-        recipes.add_template_inferred(
-            EncodingRecipeBuilder::new("r_ib_unsigned_fpr", &formats.binary_imm8, 2)
-                .operands_in(vec![fpr])
-                .operands_out(vec![fpr])
-                .inst_predicate(InstructionPredicate::new_is_unsigned_int(
-                    &*formats.binary_imm8,
-                    "imm",
-                    8,
-                    0,
-                ))
-                .emit(
-                    r#"
-                    {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
-                    modrm_rr(in_reg0, out_reg0, sink);
-                    let imm: i64 = imm.into();
-                    sink.put1(imm as u8);
-                "#,
-                ),
-            "size_with_inferred_rex_for_inreg0_outreg0",
-        );
-    }
-
-    // XX /r ib with 8-bit unsigned immediate (e.g. for extractlane)
-    {
-        recipes.add_template_inferred(
-            EncodingRecipeBuilder::new("r_ib_unsigned_gpr", &formats.binary_imm8, 2)
-                .operands_in(vec![fpr])
-                .operands_out(vec![gpr])
-                .inst_predicate(InstructionPredicate::new_is_unsigned_int(
-                    &*formats.binary_imm8, "imm", 8, 0,
-                ))
-                .emit(
-                    r#"
-                    {{PUT_OP}}(bits, rex2(out_reg0, in_reg0), sink);
-                    modrm_rr(out_reg0, in_reg0, sink); // note the flipped register in the ModR/M byte
-                    let imm: i64 = imm.into();
-                    sink.put1(imm as u8);
-                "#,
-                ), "size_with_inferred_rex_for_inreg0_outreg0"
-        );
-    }
-
-    // XX /r ib with 8-bit unsigned immediate (e.g. for insertlane)
-    {
-        recipes.add_template_inferred(
-            EncodingRecipeBuilder::new("r_ib_unsigned_r", &formats.ternary_imm8, 2)
-                .operands_in(vec![fpr, gpr])
-                .operands_out(vec![0])
-                .inst_predicate(InstructionPredicate::new_is_unsigned_int(
-                    &*formats.ternary_imm8,
-                    "imm",
-                    8,
-                    0,
-                ))
-                .emit(
-                    r#"
-                    {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
-                    modrm_rr(in_reg1, in_reg0, sink);
-                    let imm: i64 = imm.into();
-                    sink.put1(imm as u8);
-                "#,
-                ),
-            "size_with_inferred_rex_for_inreg0_inreg1",
-        );
-    }
-
-    {
-        // XX /n id with 32-bit immediate sign-extended. UnaryImm version.
-        recipes.add_template_recipe(
-            EncodingRecipeBuilder::new("u_id", &formats.unary_imm, 5)
-                .operands_out(vec![gpr])
-                .inst_predicate(InstructionPredicate::new_is_signed_int(
-                    &*formats.unary_imm,
-                    "imm",
-                    32,
-                    0,
-                ))
-                .emit(
-                    r#"
-                        {{PUT_OP}}(bits, rex1(out_reg0), sink);
-                        modrm_r_bits(out_reg0, bits, sink);
-                        let imm: i64 = imm.into();
-                        sink.put4(imm as u32);
-                    "#,
-                ),
-        );
-    }
-
-    // XX+rd id unary with 32-bit immediate. Note no recipe predicate.
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("pu_id", &formats.unary_imm, 4)
-            .operands_out(vec![gpr])
-            .emit(
-                r#"
-                    // The destination register is encoded in the low bits of the opcode.
-                    // No ModR/M.
-                    {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
-                    let imm: i64 = imm.into();
-                    sink.put4(imm as u32);
-                "#,
-            ),
-    );
-
-    // XX+rd id unary with bool immediate. Note no recipe predicate.
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("pu_id_bool", &formats.unary_bool, 4)
-            .operands_out(vec![gpr])
-            .emit(
-                r#"
-                    // The destination register is encoded in the low bits of the opcode.
-                    // No ModR/M.
-                    {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
-                    let imm: u32 = if imm { 1 } else { 0 };
-                    sink.put4(imm);
-                "#,
-            ),
-    );
-
-    // XX+rd id nullary with 0 as 32-bit immediate. Note no recipe predicate.
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("pu_id_ref", &formats.nullary, 4)
-            .operands_out(vec![gpr])
-            .emit(
-                r#"
-                    // The destination register is encoded in the low bits of the opcode.
-                    // No ModR/M.
-                    {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
-                    sink.put4(0);
-                "#,
-            ),
-    );
-
-    // XX+rd iq unary with 64-bit immediate.
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("pu_iq", &formats.unary_imm, 8)
-            .operands_out(vec![gpr])
-            .emit(
-                r#"
-                    {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
-                    let imm: i64 = imm.into();
-                    sink.put8(imm as u64);
-                "#,
-            ),
-    );
-
-    // XX+rd id unary with zero immediate.
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("u_id_z", &formats.unary_imm, 1)
-            .operands_out(vec![gpr])
-            .emit(
-                r#"
-                    {{PUT_OP}}(bits, rex2(out_reg0, out_reg0), sink);
-                    modrm_rr(out_reg0, out_reg0, sink);
-                "#,
-            ),
-    );
-
-    // XX /n Unary with floating point 32-bit immediate equal to zero.
-    {
-        recipes.add_template_recipe(
-            EncodingRecipeBuilder::new("f32imm_z", &formats.unary_ieee32, 1)
-                .operands_out(vec![fpr])
-                .inst_predicate(InstructionPredicate::new_is_zero_32bit_float(
-                    &*formats.unary_ieee32,
-                    "imm",
-                ))
-                .emit(
-                    r#"
-                        {{PUT_OP}}(bits, rex2(out_reg0, out_reg0), sink);
-                        modrm_rr(out_reg0, out_reg0, sink);
-                    "#,
-                ),
-        );
-    }
-
-    // XX /n Unary with floating point 64-bit immediate equal to zero.
-    {
-        recipes.add_template_recipe(
-            EncodingRecipeBuilder::new("f64imm_z", &formats.unary_ieee64, 1)
-                .operands_out(vec![fpr])
-                .inst_predicate(InstructionPredicate::new_is_zero_64bit_float(
-                    &*formats.unary_ieee64,
-                    "imm",
-                ))
-                .emit(
-                    r#"
-                        {{PUT_OP}}(bits, rex2(out_reg0, out_reg0), sink);
-                        modrm_rr(out_reg0, out_reg0, sink);
-                    "#,
-                ),
-        );
-    }
-
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("pushq", &formats.unary, 0)
-            .operands_in(vec![gpr])
-            .emit(
-                r#"
-                    sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
-                    {{PUT_OP}}(bits | (in_reg0 & 7), rex1(in_reg0), sink);
-                "#,
-            ),
-    );
-
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("popq", &formats.nullary, 0)
-            .operands_out(vec![gpr])
-            .emit(
-                r#"
-                    {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
-                "#,
-            ),
-    );
-
-    // XX /r, for regmove instructions.
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("copysp", &formats.copy_special, 1)
-            .clobbers_flags(false)
-            .emit(
-                r#"
-                    {{PUT_OP}}(bits, rex2(dst, src), sink);
-                    modrm_rr(dst, src, sink);
-                "#,
-            ),
-    );
-
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("adjustsp", &formats.unary, 1)
-            .operands_in(vec![gpr])
-            .emit(
-                r#"
-                    {{PUT_OP}}(bits, rex2(RU::rsp.into(), in_reg0), sink);
-                    modrm_rr(RU::rsp.into(), in_reg0, sink);
-                "#,
-            ),
-    );
-
-    {
-        recipes.add_template_recipe(
-            EncodingRecipeBuilder::new("adjustsp_ib", &formats.unary_imm, 2)
-                .inst_predicate(InstructionPredicate::new_is_signed_int(
-                    &*formats.unary_imm,
-                    "imm",
-                    8,
-                    0,
-                ))
-                .emit(
-                    r#"
-                        {{PUT_OP}}(bits, rex1(RU::rsp.into()), sink);
-                        modrm_r_bits(RU::rsp.into(), bits, sink);
-                        let imm: i64 = imm.into();
-                        sink.put1(imm as u8);
-                    "#,
-                ),
-        );
-
-        recipes.add_template_recipe(
-            EncodingRecipeBuilder::new("adjustsp_id", &formats.unary_imm, 5)
-                .inst_predicate(InstructionPredicate::new_is_signed_int(
-                    &*formats.unary_imm,
-                    "imm",
-                    32,
-                    0,
-                ))
-                .emit(
-                    r#"
-                        {{PUT_OP}}(bits, rex1(RU::rsp.into()), sink);
-                        modrm_r_bits(RU::rsp.into(), bits, sink);
-                        let imm: i64 = imm.into();
-                        sink.put4(imm as u32);
-                    "#,
-                ),
-        );
-    }
-
-    recipes.add_recipe(
-        EncodingRecipeBuilder::new("dummy_sarg_t", &formats.nullary, 0)
-            .operands_out(vec![Stack::new(gpr)])
-            .emit(""),
-    );
-
-    // XX+rd id with Abs4 function relocation.
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("fnaddr4", &formats.func_addr, 4)
-            .operands_out(vec![gpr])
-            .emit(
-                r#"
-                    {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
-                    sink.reloc_external(func.srclocs[inst],
-                                        Reloc::Abs4,
-                                        &func.dfg.ext_funcs[func_ref].name,
-                                        0);
-                    sink.put4(0);
-                "#,
-            ),
-    );
-
-    // XX+rd iq with Abs8 function relocation.
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("fnaddr8", &formats.func_addr, 8)
-            .operands_out(vec![gpr])
-            .emit(
-                r#"
-                    {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
-                    sink.reloc_external(func.srclocs[inst],
-                                        Reloc::Abs8,
-                                        &func.dfg.ext_funcs[func_ref].name,
-                                        0);
-                    sink.put8(0);
-                "#,
-            ),
-    );
-
-    // Similar to fnaddr4, but writes !0 (this is used by BaldrMonkey).
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("allones_fnaddr4", &formats.func_addr, 4)
-            .operands_out(vec![gpr])
-            .emit(
-                r#"
-                    {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
-                    sink.reloc_external(func.srclocs[inst],
-                                        Reloc::Abs4,
-                                        &func.dfg.ext_funcs[func_ref].name,
-                                        0);
-                    // Write the immediate as `!0` for the benefit of BaldrMonkey.
-                    sink.put4(!0);
-                "#,
-            ),
-    );
-
-    // Similar to fnaddr8, but writes !0 (this is used by BaldrMonkey).
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("allones_fnaddr8", &formats.func_addr, 8)
-            .operands_out(vec![gpr])
-            .emit(
-                r#"
-                    {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
-                    sink.reloc_external(func.srclocs[inst],
-                                        Reloc::Abs8,
-                                        &func.dfg.ext_funcs[func_ref].name,
-                                        0);
-                    // Write the immediate as `!0` for the benefit of BaldrMonkey.
-                    sink.put8(!0);
-                "#,
-            ),
-    );
-
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("pcrel_fnaddr8", &formats.func_addr, 5)
-            .operands_out(vec![gpr])
-            // rex2 gets passed 0 for r/m register because the upper bit of
-            // r/m doesn't get decoded when in rip-relative addressing mode.
-            .emit(
-                r#"
-                    {{PUT_OP}}(bits, rex2(0, out_reg0), sink);
-                    modrm_riprel(out_reg0, sink);
-                    // The addend adjusts for the difference between the end of the
-                    // instruction and the beginning of the immediate field.
-                    sink.reloc_external(func.srclocs[inst],
-                                        Reloc::X86PCRel4,
-                                        &func.dfg.ext_funcs[func_ref].name,
-                                        -4);
-                    sink.put4(0);
-                "#,
-            ),
-    );
-
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("got_fnaddr8", &formats.func_addr, 5)
-            .operands_out(vec![gpr])
-            // rex2 gets passed 0 for r/m register because the upper bit of
-            // r/m doesn't get decoded when in rip-relative addressing mode.
-            .emit(
-                r#"
-                    {{PUT_OP}}(bits, rex2(0, out_reg0), sink);
-                    modrm_riprel(out_reg0, sink);
-                    // The addend adjusts for the difference between the end of the
-                    // instruction and the beginning of the immediate field.
-                    sink.reloc_external(func.srclocs[inst],
-                                        Reloc::X86GOTPCRel4,
-                                        &func.dfg.ext_funcs[func_ref].name,
-                                        -4);
-                    sink.put4(0);
-                "#,
-            ),
-    );
-
-    // XX+rd id with Abs4 globalsym relocation.
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("gvaddr4", &formats.unary_global_value, 4)
-            .operands_out(vec![gpr])
-            .emit(
-                r#"
-                    {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
-                    sink.reloc_external(func.srclocs[inst],
-                                        Reloc::Abs4,
-                                        &func.global_values[global_value].symbol_name(),
-                                        0);
-                    sink.put4(0);
-                "#,
-            ),
-    );
-
-    // XX+rd iq with Abs8 globalsym relocation.
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("gvaddr8", &formats.unary_global_value, 8)
-            .operands_out(vec![gpr])
-            .emit(
-                r#"
-                    {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
-                    sink.reloc_external(func.srclocs[inst],
-                                        Reloc::Abs8,
-                                        &func.global_values[global_value].symbol_name(),
-                                        0);
-                    sink.put8(0);
-                "#,
-            ),
-    );
-
-    // XX+rd iq with PCRel4 globalsym relocation.
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("pcrel_gvaddr8", &formats.unary_global_value, 5)
-            .operands_out(vec![gpr])
-            .emit(
-                r#"
-                    {{PUT_OP}}(bits, rex2(0, out_reg0), sink);
-                    modrm_rm(5, out_reg0, sink);
-                    // The addend adjusts for the difference between the end of the
-                    // instruction and the beginning of the immediate field.
-                    sink.reloc_external(func.srclocs[inst],
-                                        Reloc::X86PCRel4,
-                                        &func.global_values[global_value].symbol_name(),
-                                        -4);
-                    sink.put4(0);
-                "#,
-            ),
-    );
-
-    // XX+rd iq with Abs8 globalsym relocation.
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("got_gvaddr8", &formats.unary_global_value, 5)
-            .operands_out(vec![gpr])
-            .emit(
-                r#"
-                    {{PUT_OP}}(bits, rex2(0, out_reg0), sink);
-                    modrm_rm(5, out_reg0, sink);
-                    // The addend adjusts for the difference between the end of the
-                    // instruction and the beginning of the immediate field.
-                    sink.reloc_external(func.srclocs[inst],
-                                        Reloc::X86GOTPCRel4,
-                                        &func.global_values[global_value].symbol_name(),
-                                        -4);
-                    sink.put4(0);
-                "#,
-            ),
-    );
-
-    // Stack addresses.
-    //
-    // TODO Alternative forms for 8-bit immediates, when applicable.
-
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("spaddr_id", &formats.stack_load, 6)
-            .operands_out(vec![gpr])
-            .emit(
-                r#"
-                    let sp = StackRef::sp(stack_slot, &func.stack_slots);
-                    let base = stk_base(sp.base);
-                    {{PUT_OP}}(bits, rex2(base, out_reg0), sink);
-                    modrm_sib_disp32(out_reg0, sink);
-                    sib_noindex(base, sink);
-                    let imm : i32 = offset.into();
-                    sink.put4(sp.offset.checked_add(imm).unwrap() as u32);
-                "#,
-            ),
-    );
-
-    // Constant addresses.
-
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("const_addr", &formats.unary_const, 5)
-            .operands_out(vec![gpr])
-            .clobbers_flags(false)
-            .emit(
-                r#"
-                    {{PUT_OP}}(bits, rex2(0, out_reg0), sink);
-                    modrm_riprel(out_reg0, sink);
-                    const_disp4(constant_handle, func, sink);
-                "#,
-            ),
-    );
-
-    // Store recipes.
-
-    {
-        // Simple stores.
-
-        // A predicate asking if the offset is zero.
-        let has_no_offset =
-            InstructionPredicate::new_is_field_equal(&*formats.store, "offset", "0".into());
-
-        // XX /r register-indirect store with no offset.
-        let st = recipes.add_template_recipe(
-            EncodingRecipeBuilder::new("st", &formats.store, 1)
-                .operands_in(vec![gpr, gpr])
-                .inst_predicate(has_no_offset.clone())
-                .clobbers_flags(false)
-                .compute_size("size_plus_maybe_sib_or_offset_for_inreg_1")
-                .emit(
-                    r#"
-                        if !flags.notrap() {
-                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-                        }
-                        {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
-                        if needs_sib_byte(in_reg1) {
-                            modrm_sib(in_reg0, sink);
-                            sib_noindex(in_reg1, sink);
-                        } else if needs_offset(in_reg1) {
-                            modrm_disp8(in_reg1, in_reg0, sink);
-                            sink.put1(0);
-                        } else {
-                            modrm_rm(in_reg1, in_reg0, sink);
-                        }
-                    "#,
-                ),
-        );
-
-        // XX /r register-indirect store with no offset.
-        // Only ABCD allowed for stored value. This is for byte stores with no REX.
-        recipes.add_template(
-            Template::new(
-                EncodingRecipeBuilder::new("st_abcd", &formats.store, 1)
-                    .operands_in(vec![abcd, gpr])
-                    .inst_predicate(has_no_offset.clone())
-                    .clobbers_flags(false)
-                    .compute_size("size_plus_maybe_sib_or_offset_for_inreg_1")
-                    .emit(
-                        r#"
-                        if !flags.notrap() {
-                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-                        }
-                        {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
-                        if needs_sib_byte(in_reg1) {
-                            modrm_sib(in_reg0, sink);
-                            sib_noindex(in_reg1, sink);
-                        } else if needs_offset(in_reg1) {
-                            modrm_disp8(in_reg1, in_reg0, sink);
-                            sink.put1(0);
-                        } else {
-                            modrm_rm(in_reg1, in_reg0, sink);
-                        }
-                    "#,
-                    ),
-                regs,
-            )
-            .when_prefixed(st),
-        );
-
-        // XX /r register-indirect store of FPR with no offset.
-        recipes.add_template_inferred(
-            EncodingRecipeBuilder::new("fst", &formats.store, 1)
-                .operands_in(vec![fpr, gpr])
-                .inst_predicate(has_no_offset)
-                .clobbers_flags(false)
-                .compute_size("size_plus_maybe_sib_or_offset_for_inreg_1")
-                .emit(
-                    r#"
-                        if !flags.notrap() {
-                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-                        }
-                        {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
-                        if needs_sib_byte(in_reg1) {
-                            modrm_sib(in_reg0, sink);
-                            sib_noindex(in_reg1, sink);
-                        } else if needs_offset(in_reg1) {
-                            modrm_disp8(in_reg1, in_reg0, sink);
-                            sink.put1(0);
-                        } else {
-                            modrm_rm(in_reg1, in_reg0, sink);
-                        }
-                    "#,
-                ),
-            "size_plus_maybe_sib_or_offset_inreg1_plus_rex_prefix_for_inreg0_inreg1",
-        );
-
-        let has_small_offset =
-            InstructionPredicate::new_is_signed_int(&*formats.store, "offset", 8, 0);
-
-        // XX /r register-indirect store with 8-bit offset.
-        let st_disp8 = recipes.add_template_recipe(
-            EncodingRecipeBuilder::new("stDisp8", &formats.store, 2)
-                .operands_in(vec![gpr, gpr])
-                .inst_predicate(has_small_offset.clone())
-                .clobbers_flags(false)
-                .compute_size("size_plus_maybe_sib_for_inreg_1")
-                .emit(
-                    r#"
-                        if !flags.notrap() {
-                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-                        }
-                        {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
-                        if needs_sib_byte(in_reg1) {
-                            modrm_sib_disp8(in_reg0, sink);
-                            sib_noindex(in_reg1, sink);
-                        } else {
-                            modrm_disp8(in_reg1, in_reg0, sink);
-                        }
-                        let offset: i32 = offset.into();
-                        sink.put1(offset as u8);
-                    "#,
-                ),
-        );
-
-        // XX /r register-indirect store with 8-bit offset.
-        // Only ABCD allowed for stored value. This is for byte stores with no REX.
-        recipes.add_template(
-            Template::new(
-                EncodingRecipeBuilder::new("stDisp8_abcd", &formats.store, 2)
-                    .operands_in(vec![abcd, gpr])
-                    .inst_predicate(has_small_offset.clone())
-                    .clobbers_flags(false)
-                    .compute_size("size_plus_maybe_sib_for_inreg_1")
-                    .emit(
-                        r#"
-                        if !flags.notrap() {
-                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-                        }
-                        {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
-                        if needs_sib_byte(in_reg1) {
-                            modrm_sib_disp8(in_reg0, sink);
-                            sib_noindex(in_reg1, sink);
-                        } else {
-                            modrm_disp8(in_reg1, in_reg0, sink);
-                        }
-                        let offset: i32 = offset.into();
-                        sink.put1(offset as u8);
-                    "#,
-                    ),
-                regs,
-            )
-            .when_prefixed(st_disp8),
-        );
-
-        // XX /r register-indirect store with 8-bit offset of FPR.
-        recipes.add_template_inferred(
-            EncodingRecipeBuilder::new("fstDisp8", &formats.store, 2)
-                .operands_in(vec![fpr, gpr])
-                .inst_predicate(has_small_offset)
-                .clobbers_flags(false)
-                .compute_size("size_plus_maybe_sib_for_inreg_1")
-                .emit(
-                    r#"
-                        if !flags.notrap() {
-                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-                        }
-                        {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
-                        if needs_sib_byte(in_reg1) {
-                            modrm_sib_disp8(in_reg0, sink);
-                            sib_noindex(in_reg1, sink);
-                        } else {
-                            modrm_disp8(in_reg1, in_reg0, sink);
-                        }
-                        let offset: i32 = offset.into();
-                        sink.put1(offset as u8);
-                    "#,
-                ),
-            "size_plus_maybe_sib_inreg1_plus_rex_prefix_for_inreg0_inreg1",
-        );
-
-        // XX /r register-indirect store with 32-bit offset.
-        let st_disp32 = recipes.add_template_recipe(
-            EncodingRecipeBuilder::new("stDisp32", &formats.store, 5)
-                .operands_in(vec![gpr, gpr])
-                .clobbers_flags(false)
-                .compute_size("size_plus_maybe_sib_for_inreg_1")
-                .emit(
-                    r#"
-                        if !flags.notrap() {
-                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-                        }
-                        {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
-                        if needs_sib_byte(in_reg1) {
-                            modrm_sib_disp32(in_reg0, sink);
-                            sib_noindex(in_reg1, sink);
-                        } else {
-                            modrm_disp32(in_reg1, in_reg0, sink);
-                        }
-                        let offset: i32 = offset.into();
-                        sink.put4(offset as u32);
-                    "#,
-                ),
-        );
-
-        // XX /r register-indirect store with 32-bit offset.
-        // Only ABCD allowed for stored value. This is for byte stores with no REX.
-        recipes.add_template(
-            Template::new(
-                EncodingRecipeBuilder::new("stDisp32_abcd", &formats.store, 5)
-                    .operands_in(vec![abcd, gpr])
-                    .clobbers_flags(false)
-                    .compute_size("size_plus_maybe_sib_for_inreg_1")
-                    .emit(
-                        r#"
-                        if !flags.notrap() {
-                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-                        }
-                        {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
-                        if needs_sib_byte(in_reg1) {
-                            modrm_sib_disp32(in_reg0, sink);
-                            sib_noindex(in_reg1, sink);
-                        } else {
-                            modrm_disp32(in_reg1, in_reg0, sink);
-                        }
-                        let offset: i32 = offset.into();
-                        sink.put4(offset as u32);
-                    "#,
-                    ),
-                regs,
-            )
-            .when_prefixed(st_disp32),
-        );
-
-        // XX /r register-indirect store with 32-bit offset of FPR.
-        recipes.add_template_inferred(
-            EncodingRecipeBuilder::new("fstDisp32", &formats.store, 5)
-                .operands_in(vec![fpr, gpr])
-                .clobbers_flags(false)
-                .compute_size("size_plus_maybe_sib_for_inreg_1")
-                .emit(
-                    r#"
-                        if !flags.notrap() {
-                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-                        }
-                        {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
-                        if needs_sib_byte(in_reg1) {
-                            modrm_sib_disp32(in_reg0, sink);
-                            sib_noindex(in_reg1, sink);
-                        } else {
-                            modrm_disp32(in_reg1, in_reg0, sink);
-                        }
-                        let offset: i32 = offset.into();
-                        sink.put4(offset as u32);
-                    "#,
-                ),
-            "size_plus_maybe_sib_inreg1_plus_rex_prefix_for_inreg0_inreg1",
-        );
-    }
-
-    {
-        // Complex stores.
-
-        // A predicate asking if the offset is zero.
-        let has_no_offset =
-            InstructionPredicate::new_is_field_equal(&*formats.store_complex, "offset", "0".into());
-
-        // XX /r register-indirect store with index and no offset.
-        recipes.add_template_recipe(
-            EncodingRecipeBuilder::new("stWithIndex", &formats.store_complex, 2)
-                .operands_in(vec![gpr, gpr, gpr])
-                .inst_predicate(has_no_offset.clone())
-                .clobbers_flags(false)
-                .compute_size("size_plus_maybe_offset_for_inreg_1")
-                .emit(
-                    r#"
-                        if !flags.notrap() {
-                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-                        }
-                        {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
-                        // The else branch always inserts an SIB byte.
-                        if needs_offset(in_reg1) {
-                            modrm_sib_disp8(in_reg0, sink);
-                            sib(0, in_reg2, in_reg1, sink);
-                            sink.put1(0);
-                        } else {
-                            modrm_sib(in_reg0, sink);
-                            sib(0, in_reg2, in_reg1, sink);
-                        }
-                    "#,
-                ),
-        );
-
-        // XX /r register-indirect store with index and no offset.
-        // Only ABCD allowed for stored value. This is for byte stores with no REX.
-        recipes.add_template_recipe(
-            EncodingRecipeBuilder::new("stWithIndex_abcd", &formats.store_complex, 2)
-                .operands_in(vec![abcd, gpr, gpr])
-                .inst_predicate(has_no_offset.clone())
-                .clobbers_flags(false)
-                .compute_size("size_plus_maybe_offset_for_inreg_1")
-                .emit(
-                    r#"
-                        if !flags.notrap() {
-                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-                        }
-                        {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
-                        // The else branch always inserts an SIB byte.
-                        if needs_offset(in_reg1) {
-                            modrm_sib_disp8(in_reg0, sink);
-                            sib(0, in_reg2, in_reg1, sink);
-                            sink.put1(0);
-                        } else {
-                            modrm_sib(in_reg0, sink);
-                            sib(0, in_reg2, in_reg1, sink);
-                        }
-                    "#,
-                ),
-        );
-
-        // XX /r register-indirect store with index and no offset of FPR.
-        recipes.add_template_recipe(
-            EncodingRecipeBuilder::new("fstWithIndex", &formats.store_complex, 2)
-                .operands_in(vec![fpr, gpr, gpr])
-                .inst_predicate(has_no_offset)
-                .clobbers_flags(false)
-                .compute_size("size_plus_maybe_offset_for_inreg_1")
-                .emit(
-                    r#"
-                        if !flags.notrap() {
-                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-                        }
-                        {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
-                        // The else branch always inserts an SIB byte.
-                        if needs_offset(in_reg1) {
-                            modrm_sib_disp8(in_reg0, sink);
-                            sib(0, in_reg2, in_reg1, sink);
-                            sink.put1(0);
-                        } else {
-                            modrm_sib(in_reg0, sink);
-                            sib(0, in_reg2, in_reg1, sink);
-                        }
-                    "#,
-                ),
-        );
-
-        let has_small_offset =
-            InstructionPredicate::new_is_signed_int(&*formats.store_complex, "offset", 8, 0);
-
-        // XX /r register-indirect store with index and 8-bit offset.
-        recipes.add_template_recipe(
-            EncodingRecipeBuilder::new("stWithIndexDisp8", &formats.store_complex, 3)
-                .operands_in(vec![gpr, gpr, gpr])
-                .inst_predicate(has_small_offset.clone())
-                .clobbers_flags(false)
-                .emit(
-                    r#"
-                        if !flags.notrap() {
-                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-                        }
-                        {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
-                        modrm_sib_disp8(in_reg0, sink);
-                        sib(0, in_reg2, in_reg1, sink);
-                        let offset: i32 = offset.into();
-                        sink.put1(offset as u8);
-                    "#,
-                ),
-        );
-
-        // XX /r register-indirect store with index and 8-bit offset.
-        // Only ABCD allowed for stored value. This is for byte stores with no REX.
-        recipes.add_template_recipe(
-            EncodingRecipeBuilder::new("stWithIndexDisp8_abcd", &formats.store_complex, 3)
-                .operands_in(vec![abcd, gpr, gpr])
-                .inst_predicate(has_small_offset.clone())
-                .clobbers_flags(false)
-                .emit(
-                    r#"
-                        if !flags.notrap() {
-                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-                        }
-                        {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
-                        modrm_sib_disp8(in_reg0, sink);
-                        sib(0, in_reg2, in_reg1, sink);
-                        let offset: i32 = offset.into();
-                        sink.put1(offset as u8);
-                    "#,
-                ),
-        );
-
-        // XX /r register-indirect store with index and 8-bit offset of FPR.
-        recipes.add_template_recipe(
-            EncodingRecipeBuilder::new("fstWithIndexDisp8", &formats.store_complex, 3)
-                .operands_in(vec![fpr, gpr, gpr])
-                .inst_predicate(has_small_offset)
-                .clobbers_flags(false)
-                .emit(
-                    r#"
-                        if !flags.notrap() {
-                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-                        }
-                        {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
-                        modrm_sib_disp8(in_reg0, sink);
-                        sib(0, in_reg2, in_reg1, sink);
-                        let offset: i32 = offset.into();
-                        sink.put1(offset as u8);
-                    "#,
-                ),
-        );
-
-        let has_big_offset =
-            InstructionPredicate::new_is_signed_int(&*formats.store_complex, "offset", 32, 0);
-
-        // XX /r register-indirect store with index and 32-bit offset.
-        recipes.add_template_recipe(
-            EncodingRecipeBuilder::new("stWithIndexDisp32", &formats.store_complex, 6)
-                .operands_in(vec![gpr, gpr, gpr])
-                .inst_predicate(has_big_offset.clone())
-                .clobbers_flags(false)
-                .emit(
-                    r#"
-                        if !flags.notrap() {
-                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-                        }
-                        {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
-                        modrm_sib_disp32(in_reg0, sink);
-                        sib(0, in_reg2, in_reg1, sink);
-                        let offset: i32 = offset.into();
-                        sink.put4(offset as u32);
-                    "#,
-                ),
-        );
-
-        // XX /r register-indirect store with index and 32-bit offset.
-        // Only ABCD allowed for stored value. This is for byte stores with no REX.
-        recipes.add_template_recipe(
-            EncodingRecipeBuilder::new("stWithIndexDisp32_abcd", &formats.store_complex, 6)
-                .operands_in(vec![abcd, gpr, gpr])
-                .inst_predicate(has_big_offset.clone())
-                .clobbers_flags(false)
-                .emit(
-                    r#"
-                        if !flags.notrap() {
-                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-                        }
-                        {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
-                        modrm_sib_disp32(in_reg0, sink);
-                        sib(0, in_reg2, in_reg1, sink);
-                        let offset: i32 = offset.into();
-                        sink.put4(offset as u32);
-                    "#,
-                ),
-        );
-
-        // XX /r register-indirect store with index and 32-bit offset of FPR.
-        recipes.add_template_recipe(
-            EncodingRecipeBuilder::new("fstWithIndexDisp32", &formats.store_complex, 6)
-                .operands_in(vec![fpr, gpr, gpr])
-                .inst_predicate(has_big_offset)
-                .clobbers_flags(false)
-                .emit(
-                    r#"
-                        if !flags.notrap() {
-                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-                        }
-                        {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
-                        modrm_sib_disp32(in_reg0, sink);
-                        sib(0, in_reg2, in_reg1, sink);
-                        let offset: i32 = offset.into();
-                        sink.put4(offset as u32);
-                    "#,
-                ),
-        );
-    }
-
-    // Unary spill with SIB and 32-bit displacement.
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("spillSib32", &formats.unary, 6)
-            .operands_in(vec![gpr])
-            .operands_out(vec![stack_gpr32])
-            .clobbers_flags(false)
-            .emit(
-                r#"
-                    sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
-                    let base = stk_base(out_stk0.base);
-                    {{PUT_OP}}(bits, rex2(base, in_reg0), sink);
-                    modrm_sib_disp32(in_reg0, sink);
-                    sib_noindex(base, sink);
-                    sink.put4(out_stk0.offset as u32);
-                "#,
-            ),
-    );
-
-    // Like spillSib32, but targeting an FPR rather than a GPR.
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("fspillSib32", &formats.unary, 6)
-            .operands_in(vec![fpr])
-            .operands_out(vec![stack_fpr32])
-            .clobbers_flags(false)
-            .emit(
-                r#"
-                    sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
-                    let base = stk_base(out_stk0.base);
-                    {{PUT_OP}}(bits, rex2(base, in_reg0), sink);
-                    modrm_sib_disp32(in_reg0, sink);
-                    sib_noindex(base, sink);
-                    sink.put4(out_stk0.offset as u32);
-                "#,
-            ),
-    );
-
-    // Regspill using RSP-relative addressing.
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("regspill32", &formats.reg_spill, 6)
-            .operands_in(vec![gpr])
-            .clobbers_flags(false)
-            .emit(
-                r#"
-                    sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
-                    let dst = StackRef::sp(dst, &func.stack_slots);
-                    let base = stk_base(dst.base);
-                    {{PUT_OP}}(bits, rex2(base, src), sink);
-                    modrm_sib_disp32(src, sink);
-                    sib_noindex(base, sink);
-                    sink.put4(dst.offset as u32);
-                "#,
-            ),
-    );
-
-    // Like regspill32, but targeting an FPR rather than a GPR.
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("fregspill32", &formats.reg_spill, 6)
-            .operands_in(vec![fpr])
-            .clobbers_flags(false)
-            .emit(
-                r#"
-                    sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
-                    let dst = StackRef::sp(dst, &func.stack_slots);
-                    let base = stk_base(dst.base);
-                    {{PUT_OP}}(bits, rex2(base, src), sink);
-                    modrm_sib_disp32(src, sink);
-                    sib_noindex(base, sink);
-                    sink.put4(dst.offset as u32);
-                "#,
-            ),
-    );
-
-    // Load recipes.
-
-    {
-        // Simple loads.
-
-        // A predicate asking if the offset is zero.
-        let has_no_offset =
-            InstructionPredicate::new_is_field_equal(&*formats.load, "offset", "0".into());
-
-        // XX /r load with no offset.
-        recipes.add_template_recipe(
-            EncodingRecipeBuilder::new("ld", &formats.load, 1)
-                .operands_in(vec![gpr])
-                .operands_out(vec![gpr])
-                .inst_predicate(has_no_offset.clone())
-                .clobbers_flags(false)
-                .compute_size("size_plus_maybe_sib_or_offset_for_inreg_0")
-                .emit(
-                    r#"
-                        if !flags.notrap() {
-                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-                        }
-                        {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
-                        if needs_sib_byte(in_reg0) {
-                            modrm_sib(out_reg0, sink);
-                            sib_noindex(in_reg0, sink);
-                        } else if needs_offset(in_reg0) {
-                            modrm_disp8(in_reg0, out_reg0, sink);
-                            sink.put1(0);
-                        } else {
-                            modrm_rm(in_reg0, out_reg0, sink);
-                        }
-                    "#,
-                ),
-        );
-
-        // XX /r float load with no offset.
-        recipes.add_template_inferred(
-            EncodingRecipeBuilder::new("fld", &formats.load, 1)
-                .operands_in(vec![gpr])
-                .operands_out(vec![fpr])
-                .inst_predicate(has_no_offset)
-                .clobbers_flags(false)
-                .compute_size("size_plus_maybe_sib_or_offset_for_inreg_0")
-                .emit(
-                    r#"
-                        if !flags.notrap() {
-                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-                        }
-                        {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
-                        if needs_sib_byte(in_reg0) {
-                            modrm_sib(out_reg0, sink);
-                            sib_noindex(in_reg0, sink);
-                        } else if needs_offset(in_reg0) {
-                            modrm_disp8(in_reg0, out_reg0, sink);
-                            sink.put1(0);
-                        } else {
-                            modrm_rm(in_reg0, out_reg0, sink);
-                        }
-                    "#,
-                ),
-            "size_plus_maybe_sib_or_offset_for_inreg_0_plus_rex_prefix_for_inreg0_outreg0",
-        );
-
-        let has_small_offset =
-            InstructionPredicate::new_is_signed_int(&*formats.load, "offset", 8, 0);
-
-        // XX /r load with 8-bit offset.
-        recipes.add_template_recipe(
-            EncodingRecipeBuilder::new("ldDisp8", &formats.load, 2)
-                .operands_in(vec![gpr])
-                .operands_out(vec![gpr])
-                .inst_predicate(has_small_offset.clone())
-                .clobbers_flags(false)
-                .compute_size("size_plus_maybe_sib_for_inreg_0")
-                .emit(
-                    r#"
-                        if !flags.notrap() {
-                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-                        }
-                        {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
-                        if needs_sib_byte(in_reg0) {
-                            modrm_sib_disp8(out_reg0, sink);
-                            sib_noindex(in_reg0, sink);
-                        } else {
-                            modrm_disp8(in_reg0, out_reg0, sink);
-                        }
-                        let offset: i32 = offset.into();
-                        sink.put1(offset as u8);
-                    "#,
-                ),
-        );
-
-        // XX /r float load with 8-bit offset.
-        recipes.add_template_inferred(
-            EncodingRecipeBuilder::new("fldDisp8", &formats.load, 2)
-                .operands_in(vec![gpr])
-                .operands_out(vec![fpr])
-                .inst_predicate(has_small_offset)
-                .clobbers_flags(false)
-                .compute_size("size_plus_maybe_sib_for_inreg_0")
-                .emit(
-                    r#"
-                        if !flags.notrap() {
-                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-                        }
-                        {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
-                        if needs_sib_byte(in_reg0) {
-                            modrm_sib_disp8(out_reg0, sink);
-                            sib_noindex(in_reg0, sink);
-                        } else {
-                            modrm_disp8(in_reg0, out_reg0, sink);
-                        }
-                        let offset: i32 = offset.into();
-                        sink.put1(offset as u8);
-                    "#,
-                ),
-            "size_plus_maybe_sib_for_inreg_0_plus_rex_prefix_for_inreg0_outreg0",
-        );
-
-        let has_big_offset =
-            InstructionPredicate::new_is_signed_int(&*formats.load, "offset", 32, 0);
-
-        // XX /r load with 32-bit offset.
-        recipes.add_template_recipe(
-            EncodingRecipeBuilder::new("ldDisp32", &formats.load, 5)
-                .operands_in(vec![gpr])
-                .operands_out(vec![gpr])
-                .inst_predicate(has_big_offset.clone())
-                .clobbers_flags(false)
-                .compute_size("size_plus_maybe_sib_for_inreg_0")
-                .emit(
-                    r#"
-                        if !flags.notrap() {
-                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-                        }
-                        {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
-                        if needs_sib_byte(in_reg0) {
-                            modrm_sib_disp32(out_reg0, sink);
-                            sib_noindex(in_reg0, sink);
-                        } else {
-                            modrm_disp32(in_reg0, out_reg0, sink);
-                        }
-                        let offset: i32 = offset.into();
-                        sink.put4(offset as u32);
-                    "#,
-                ),
-        );
-
-        // XX /r float load with 32-bit offset.
-        recipes.add_template_inferred(
-            EncodingRecipeBuilder::new("fldDisp32", &formats.load, 5)
-                .operands_in(vec![gpr])
-                .operands_out(vec![fpr])
-                .inst_predicate(has_big_offset)
-                .clobbers_flags(false)
-                .compute_size("size_plus_maybe_sib_for_inreg_0")
-                .emit(
-                    r#"
-                        if !flags.notrap() {
-                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-                        }
-                        {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
-                        if needs_sib_byte(in_reg0) {
-                            modrm_sib_disp32(out_reg0, sink);
-                            sib_noindex(in_reg0, sink);
-                        } else {
-                            modrm_disp32(in_reg0, out_reg0, sink);
-                        }
-                        let offset: i32 = offset.into();
-                        sink.put4(offset as u32);
-                    "#,
-                ),
-            "size_plus_maybe_sib_for_inreg_0_plus_rex_prefix_for_inreg0_outreg0",
-        );
-    }
-
-    {
-        // Complex loads.
-
-        // A predicate asking if the offset is zero.
-        let has_no_offset =
-            InstructionPredicate::new_is_field_equal(&*formats.load_complex, "offset", "0".into());
-
-        // XX /r load with index and no offset.
-        recipes.add_template_recipe(
-            EncodingRecipeBuilder::new("ldWithIndex", &formats.load_complex, 2)
-                .operands_in(vec![gpr, gpr])
-                .operands_out(vec![gpr])
-                .inst_predicate(has_no_offset.clone())
-                .clobbers_flags(false)
-                .compute_size("size_plus_maybe_offset_for_inreg_0")
-                .emit(
-                    r#"
-                        if !flags.notrap() {
-                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-                        }
-                        {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
-                        // The else branch always inserts an SIB byte.
-                        if needs_offset(in_reg0) {
-                            modrm_sib_disp8(out_reg0, sink);
-                            sib(0, in_reg1, in_reg0, sink);
-                            sink.put1(0);
-                        } else {
-                            modrm_sib(out_reg0, sink);
-                            sib(0, in_reg1, in_reg0, sink);
-                        }
-                    "#,
-                ),
-        );
-
-        // XX /r float load with index and no offset.
-        recipes.add_template_recipe(
-            EncodingRecipeBuilder::new("fldWithIndex", &formats.load_complex, 2)
-                .operands_in(vec![gpr, gpr])
-                .operands_out(vec![fpr])
-                .inst_predicate(has_no_offset)
-                .clobbers_flags(false)
-                .compute_size("size_plus_maybe_offset_for_inreg_0")
-                .emit(
-                    r#"
-                        if !flags.notrap() {
-                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-                        }
-                        {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
-                        // The else branch always inserts an SIB byte.
-                        if needs_offset(in_reg0) {
-                            modrm_sib_disp8(out_reg0, sink);
-                            sib(0, in_reg1, in_reg0, sink);
-                            sink.put1(0);
-                        } else {
-                            modrm_sib(out_reg0, sink);
-                            sib(0, in_reg1, in_reg0, sink);
-                        }
-                    "#,
-                ),
-        );
-
-        let has_small_offset =
-            InstructionPredicate::new_is_signed_int(&*formats.load_complex, "offset", 8, 0);
-
-        // XX /r load with index and 8-bit offset.
-        recipes.add_template_recipe(
-            EncodingRecipeBuilder::new("ldWithIndexDisp8", &formats.load_complex, 3)
-                .operands_in(vec![gpr, gpr])
-                .operands_out(vec![gpr])
-                .inst_predicate(has_small_offset.clone())
-                .clobbers_flags(false)
-                .emit(
-                    r#"
-                        if !flags.notrap() {
-                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-                        }
-                        {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
-                        modrm_sib_disp8(out_reg0, sink);
-                        sib(0, in_reg1, in_reg0, sink);
-                        let offset: i32 = offset.into();
-                        sink.put1(offset as u8);
-                    "#,
-                ),
-        );
-
-        // XX /r float load with 8-bit offset.
-        recipes.add_template_recipe(
-            EncodingRecipeBuilder::new("fldWithIndexDisp8", &formats.load_complex, 3)
-                .operands_in(vec![gpr, gpr])
-                .operands_out(vec![fpr])
-                .inst_predicate(has_small_offset)
-                .clobbers_flags(false)
-                .emit(
-                    r#"
-                        if !flags.notrap() {
-                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-                        }
-                        {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
-                        modrm_sib_disp8(out_reg0, sink);
-                        sib(0, in_reg1, in_reg0, sink);
-                        let offset: i32 = offset.into();
-                        sink.put1(offset as u8);
-                    "#,
-                ),
-        );
-
-        let has_big_offset =
-            InstructionPredicate::new_is_signed_int(&*formats.load_complex, "offset", 32, 0);
-
-        // XX /r load with index and 32-bit offset.
-        recipes.add_template_recipe(
-            EncodingRecipeBuilder::new("ldWithIndexDisp32", &formats.load_complex, 6)
-                .operands_in(vec![gpr, gpr])
-                .operands_out(vec![gpr])
-                .inst_predicate(has_big_offset.clone())
-                .clobbers_flags(false)
-                .emit(
-                    r#"
-                        if !flags.notrap() {
-                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-                        }
-                        {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
-                        modrm_sib_disp32(out_reg0, sink);
-                        sib(0, in_reg1, in_reg0, sink);
-                        let offset: i32 = offset.into();
-                        sink.put4(offset as u32);
-                    "#,
-                ),
-        );
-
-        // XX /r float load with index and 32-bit offset.
-        recipes.add_template_recipe(
-            EncodingRecipeBuilder::new("fldWithIndexDisp32", &formats.load_complex, 6)
-                .operands_in(vec![gpr, gpr])
-                .operands_out(vec![fpr])
-                .inst_predicate(has_big_offset)
-                .clobbers_flags(false)
-                .emit(
-                    r#"
-                        if !flags.notrap() {
-                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
-                        }
-                        {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
-                        modrm_sib_disp32(out_reg0, sink);
-                        sib(0, in_reg1, in_reg0, sink);
-                        let offset: i32 = offset.into();
-                        sink.put4(offset as u32);
-                    "#,
-                ),
-        );
-    }
-
-    // Unary fill with SIB and 32-bit displacement.
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("fillSib32", &formats.unary, 6)
-            .operands_in(vec![stack_gpr32])
-            .operands_out(vec![gpr])
-            .clobbers_flags(false)
-            .emit(
-                r#"
-                    let base = stk_base(in_stk0.base);
-                    {{PUT_OP}}(bits, rex2(base, out_reg0), sink);
-                    modrm_sib_disp32(out_reg0, sink);
-                    sib_noindex(base, sink);
-                    sink.put4(in_stk0.offset as u32);
-                "#,
-            ),
-    );
-
-    // Like fillSib32, but targeting an FPR rather than a GPR.
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("ffillSib32", &formats.unary, 6)
-            .operands_in(vec![stack_fpr32])
-            .operands_out(vec![fpr])
-            .clobbers_flags(false)
-            .emit(
-                r#"
-                    let base = stk_base(in_stk0.base);
-                    {{PUT_OP}}(bits, rex2(base, out_reg0), sink);
-                    modrm_sib_disp32(out_reg0, sink);
-                    sib_noindex(base, sink);
-                    sink.put4(in_stk0.offset as u32);
-                "#,
-            ),
-    );
-
-    // Regfill with RSP-relative 32-bit displacement.
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("regfill32", &formats.reg_fill, 6)
-            .operands_in(vec![stack_gpr32])
-            .clobbers_flags(false)
-            .emit(
-                r#"
-                    let src = StackRef::sp(src, &func.stack_slots);
-                    let base = stk_base(src.base);
-                    {{PUT_OP}}(bits, rex2(base, dst), sink);
-                    modrm_sib_disp32(dst, sink);
-                    sib_noindex(base, sink);
-                    sink.put4(src.offset as u32);
-                "#,
-            ),
-    );
-
-    // Like regfill32, but targeting an FPR rather than a GPR.
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("fregfill32", &formats.reg_fill, 6)
-            .operands_in(vec![stack_fpr32])
-            .clobbers_flags(false)
-            .emit(
-                r#"
-                    let src = StackRef::sp(src, &func.stack_slots);
-                    let base = stk_base(src.base);
-                    {{PUT_OP}}(bits, rex2(base, dst), sink);
-                    modrm_sib_disp32(dst, sink);
-                    sib_noindex(base, sink);
-                    sink.put4(src.offset as u32);
-                "#,
-            ),
-    );
-
-    // Call/return.
-
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("call_id", &formats.call, 4).emit(
-            r#"
-            sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
-            {{PUT_OP}}(bits, BASE_REX, sink);
-            // The addend adjusts for the difference between the end of the
-            // instruction and the beginning of the immediate field.
-            sink.reloc_external(func.srclocs[inst],
-                                Reloc::X86CallPCRel4,
-                                &func.dfg.ext_funcs[func_ref].name,
-                                -4);
-            sink.put4(0);
-            sink.add_call_site(opcode, func.srclocs[inst]);
-        "#,
-        ),
-    );
-
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("call_plt_id", &formats.call, 4).emit(
-            r#"
-            sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
-            {{PUT_OP}}(bits, BASE_REX, sink);
-            sink.reloc_external(func.srclocs[inst],
-                                Reloc::X86CallPLTRel4,
-                                &func.dfg.ext_funcs[func_ref].name,
-                                -4);
-            sink.put4(0);
-            sink.add_call_site(opcode, func.srclocs[inst]);
-        "#,
-        ),
-    );
-
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("call_r", &formats.call_indirect, 1)
-            .operands_in(vec![gpr])
-            .emit(
-                r#"
-                    sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
-                    {{PUT_OP}}(bits, rex1(in_reg0), sink);
-                    modrm_r_bits(in_reg0, bits, sink);
-                    sink.add_call_site(opcode, func.srclocs[inst]);
-                "#,
-            ),
-    );
-
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("ret", &formats.multiary, 0)
-            .emit("{{PUT_OP}}(bits, BASE_REX, sink);"),
-    );
-
-    // Branches.
-
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("jmpb", &formats.jump, 1)
-            .branch_range((1, 8))
-            .clobbers_flags(false)
-            .emit(
-                r#"
-                    {{PUT_OP}}(bits, BASE_REX, sink);
-                    disp1(destination, func, sink);
-                "#,
-            ),
-    );
-
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("jmpd", &formats.jump, 4)
-            .branch_range((4, 32))
-            .clobbers_flags(false)
-            .emit(
-                r#"
-                    {{PUT_OP}}(bits, BASE_REX, sink);
-                    disp4(destination, func, sink);
-                "#,
-            ),
-    );
-
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("brib", &formats.branch_int, 1)
-            .operands_in(vec![reg_rflags])
-            .branch_range((1, 8))
-            .clobbers_flags(false)
-            .emit(
-                r#"
-                    {{PUT_OP}}(bits | icc2opc(cond), BASE_REX, sink);
-                    disp1(destination, func, sink);
-                "#,
-            ),
-    );
-
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("brid", &formats.branch_int, 4)
-            .operands_in(vec![reg_rflags])
-            .branch_range((4, 32))
-            .clobbers_flags(false)
-            .emit(
-                r#"
-                    {{PUT_OP}}(bits | icc2opc(cond), BASE_REX, sink);
-                    disp4(destination, func, sink);
-                "#,
-            ),
-    );
-
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("brfb", &formats.branch_float, 1)
-            .operands_in(vec![reg_rflags])
-            .branch_range((1, 8))
-            .clobbers_flags(false)
-            .inst_predicate(supported_floatccs_predicate(
-                &supported_floatccs,
-                &*formats.branch_float,
-            ))
-            .emit(
-                r#"
-                    {{PUT_OP}}(bits | fcc2opc(cond), BASE_REX, sink);
-                    disp1(destination, func, sink);
-                "#,
-            ),
-    );
-
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("brfd", &formats.branch_float, 4)
-            .operands_in(vec![reg_rflags])
-            .branch_range((4, 32))
-            .clobbers_flags(false)
-            .inst_predicate(supported_floatccs_predicate(
-                &supported_floatccs,
-                &*formats.branch_float,
-            ))
-            .emit(
-                r#"
-                    {{PUT_OP}}(bits | fcc2opc(cond), BASE_REX, sink);
-                    disp4(destination, func, sink);
-                "#,
-            ),
-    );
-
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("indirect_jmp", &formats.indirect_jump, 1)
-            .operands_in(vec![gpr])
-            .clobbers_flags(false)
-            .emit(
-                r#"
-                    {{PUT_OP}}(bits, rex1(in_reg0), sink);
-                    modrm_r_bits(in_reg0, bits, sink);
-                "#,
-            ),
-    );
-
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("jt_entry", &formats.branch_table_entry, 2)
-            .operands_in(vec![gpr, gpr])
-            .operands_out(vec![gpr])
-            .clobbers_flags(false)
-            .inst_predicate(valid_scale(&*formats.branch_table_entry))
-            .compute_size("size_plus_maybe_offset_for_inreg_1")
-            .emit(
-                r#"
-                    {{PUT_OP}}(bits, rex3(in_reg1, out_reg0, in_reg0), sink);
-                    if needs_offset(in_reg1) {
-                        modrm_sib_disp8(out_reg0, sink);
-                        sib(imm.trailing_zeros() as u8, in_reg0, in_reg1, sink);
-                        sink.put1(0);
-                    } else {
-                        modrm_sib(out_reg0, sink);
-                        sib(imm.trailing_zeros() as u8, in_reg0, in_reg1, sink);
-                    }
-                "#,
-            ),
-    );
-
-    recipes.add_template_inferred(
-        EncodingRecipeBuilder::new("vconst", &formats.unary_const, 5)
-            .operands_out(vec![fpr])
-            .clobbers_flags(false)
-            .emit(
-                r#"
-                    {{PUT_OP}}(bits, rex2(0, out_reg0), sink);
-                    modrm_riprel(out_reg0, sink);
-                    const_disp4(constant_handle, func, sink);
-                "#,
-            ),
-        "size_with_inferred_rex_for_outreg0",
-    );
-
-    recipes.add_template_inferred(
-        EncodingRecipeBuilder::new("vconst_optimized", &formats.unary_const, 1)
-            .operands_out(vec![fpr])
-            .clobbers_flags(false)
-            .emit(
-                r#"
-                    {{PUT_OP}}(bits, rex2(out_reg0, out_reg0), sink);
-                    modrm_rr(out_reg0, out_reg0, sink);
-                "#,
-            ),
-        "size_with_inferred_rex_for_outreg0",
-    );
-
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("jt_base", &formats.branch_table_base, 5)
-            .operands_out(vec![gpr])
-            .clobbers_flags(false)
-            .emit(
-                r#"
-                    {{PUT_OP}}(bits, rex2(0, out_reg0), sink);
-                    modrm_riprel(out_reg0, sink);
-
-                    // No reloc is needed here as the jump table is emitted directly after
-                    // the function body.
-                    jt_disp4(table, func, sink);
-                "#,
-            ),
-    );
-
-    // Test flags and set a register.
-    //
-    // These setCC instructions only set the low 8 bits, and they can only write ABCD registers
-    // without a REX prefix.
-    //
-    // Other instruction encodings accepting `b1` inputs have the same constraints and only look at
-    // the low 8 bits of the input register.
-
-    let seti = recipes.add_template(
-        Template::new(
-            EncodingRecipeBuilder::new("seti", &formats.int_cond, 1)
-                .operands_in(vec![reg_rflags])
-                .operands_out(vec![gpr])
-                .clobbers_flags(false)
-                .emit(
-                    r#"
-                    {{PUT_OP}}(bits | icc2opc(cond), rex1(out_reg0), sink);
-                    modrm_r_bits(out_reg0, bits, sink);
-                "#,
-                ),
-            regs,
-        )
-        .rex_kind(RecipePrefixKind::AlwaysEmitRex),
-    );
-
-    recipes.add_template(
-        Template::new(
-            EncodingRecipeBuilder::new("seti_abcd", &formats.int_cond, 1)
-                .operands_in(vec![reg_rflags])
-                .operands_out(vec![abcd])
-                .clobbers_flags(false)
-                .emit(
-                    r#"
-                    {{PUT_OP}}(bits | icc2opc(cond), rex1(out_reg0), sink);
-                    modrm_r_bits(out_reg0, bits, sink);
-                "#,
-                ),
-            regs,
-        )
-        .when_prefixed(seti),
-    );
-
-    let setf = recipes.add_template(
-        Template::new(
-            EncodingRecipeBuilder::new("setf", &formats.float_cond, 1)
-                .operands_in(vec![reg_rflags])
-                .operands_out(vec![gpr])
-                .clobbers_flags(false)
-                .emit(
-                    r#"
-                    {{PUT_OP}}(bits | fcc2opc(cond), rex1(out_reg0), sink);
-                    modrm_r_bits(out_reg0, bits, sink);
-                "#,
-                ),
-            regs,
-        )
-        .rex_kind(RecipePrefixKind::AlwaysEmitRex),
-    );
-
-    recipes.add_template(
-        Template::new(
-            EncodingRecipeBuilder::new("setf_abcd", &formats.float_cond, 1)
-                .operands_in(vec![reg_rflags])
-                .operands_out(vec![abcd])
-                .clobbers_flags(false)
-                .emit(
-                    r#"
-                    {{PUT_OP}}(bits | fcc2opc(cond), rex1(out_reg0), sink);
-                    modrm_r_bits(out_reg0, bits, sink);
-                "#,
-                ),
-            regs,
-        )
-        .when_prefixed(setf),
-    );
-
-    // Conditional move (a.k.a integer select)
-    // (maybe-REX.W) 0F 4x modrm(r,r)
-    // 1 byte, modrm(r,r), is after the opcode
-    recipes.add_template(
-        Template::new(
-            EncodingRecipeBuilder::new("cmov", &formats.int_select, 1)
-                .operands_in(vec![
-                    OperandConstraint::FixedReg(reg_rflags),
-                    OperandConstraint::RegClass(gpr),
-                    OperandConstraint::RegClass(gpr),
-                ])
-                .operands_out(vec![2])
-                .clobbers_flags(false)
-                .emit(
-                    r#"
-                        {{PUT_OP}}(bits | icc2opc(cond), rex2(in_reg1, in_reg2), sink);
-                        modrm_rr(in_reg1, in_reg2, sink);
-                    "#,
-                ),
-            regs,
-        )
-        .inferred_rex_compute_size("size_with_inferred_rex_for_cmov"),
-    );
-
-    // Bit scan forwards and reverse
-    recipes.add_template(
-        Template::new(
-            EncodingRecipeBuilder::new("bsf_and_bsr", &formats.unary, 1)
-                .operands_in(vec![gpr])
-                .operands_out(vec![
-                    OperandConstraint::RegClass(gpr),
-                    OperandConstraint::FixedReg(reg_rflags),
-                ])
-                .emit(
-                    r#"
-                        {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
-                        modrm_rr(in_reg0, out_reg0, sink);
-                    "#,
-                ),
-            regs,
-        )
-        .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_outreg0"),
-    );
-
-    // Arithematic with flag I/O.
-
-    // XX /r, MR form. Add two GPR registers and set carry flag.
-    recipes.add_template(
-        Template::new(
-            EncodingRecipeBuilder::new("rout", &formats.binary, 1)
-                .operands_in(vec![gpr, gpr])
-                .operands_out(vec![
-                    OperandConstraint::TiedInput(0),
-                    OperandConstraint::FixedReg(reg_rflags),
-                ])
-                .clobbers_flags(true)
-                .emit(
-                    r#"
-                        {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink);
-                        modrm_rr(in_reg0, in_reg1, sink);
-                    "#,
-                ),
-            regs,
-        )
-        .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"),
-    );
-
-    // XX /r, MR form. Add two GPR registers and get carry flag.
-    recipes.add_template(
-        Template::new(
-            EncodingRecipeBuilder::new("rin", &formats.ternary, 1)
-                .operands_in(vec![
-                    OperandConstraint::RegClass(gpr),
-                    OperandConstraint::RegClass(gpr),
-                    OperandConstraint::FixedReg(reg_rflags),
-                ])
-                .operands_out(vec![0])
-                .clobbers_flags(true)
-                .emit(
-                    r#"
-                        {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink);
-                        modrm_rr(in_reg0, in_reg1, sink);
-                    "#,
-                ),
-            regs,
-        )
-        .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"),
-    );
-
-    // XX /r, MR form. Add two GPR registers with carry flag.
-    recipes.add_template(
-        Template::new(
-            EncodingRecipeBuilder::new("rio", &formats.ternary, 1)
-                .operands_in(vec![
-                    OperandConstraint::RegClass(gpr),
-                    OperandConstraint::RegClass(gpr),
-                    OperandConstraint::FixedReg(reg_rflags),
-                ])
-                .operands_out(vec![
-                    OperandConstraint::TiedInput(0),
-                    OperandConstraint::FixedReg(reg_rflags),
-                ])
-                .clobbers_flags(true)
-                .emit(
-                    r#"
-                        {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink);
-                        modrm_rr(in_reg0, in_reg1, sink);
-                    "#,
-                ),
-            regs,
-        )
-        .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"),
-    );
-
-    // Compare and set flags.
-
-    // XX /r, MR form. Compare two GPR registers and set flags.
-    recipes.add_template(
-        Template::new(
-            EncodingRecipeBuilder::new("rcmp", &formats.binary, 1)
-                .operands_in(vec![gpr, gpr])
-                .operands_out(vec![reg_rflags])
-                .emit(
-                    r#"
-                        {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink);
-                        modrm_rr(in_reg0, in_reg1, sink);
-                    "#,
-                ),
-            regs,
-        )
-        .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"),
-    );
-
-    // Same as rcmp, but second operand is the stack pointer.
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("rcmp_sp", &formats.unary, 1)
-            .operands_in(vec![gpr])
-            .operands_out(vec![reg_rflags])
-            .emit(
-                r#"
-                    {{PUT_OP}}(bits, rex2(in_reg0, RU::rsp.into()), sink);
-                    modrm_rr(in_reg0, RU::rsp.into(), sink);
-                "#,
-            ),
-    );
-
-    // XX /r, RM form. Compare two FPR registers and set flags.
-    recipes.add_template_inferred(
-        EncodingRecipeBuilder::new("fcmp", &formats.binary, 1)
-            .operands_in(vec![fpr, fpr])
-            .operands_out(vec![reg_rflags])
-            .emit(
-                r#"
-                    {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
-                    modrm_rr(in_reg1, in_reg0, sink);
-                "#,
-            ),
-        "size_with_inferred_rex_for_inreg0_inreg1",
-    );
-
-    {
-        let has_small_offset =
-            InstructionPredicate::new_is_signed_int(&*formats.binary_imm64, "imm", 8, 0);
-
-        // XX /n, MI form with imm8.
-        recipes.add_template(
-            Template::new(
-                EncodingRecipeBuilder::new("rcmp_ib", &formats.binary_imm64, 2)
-                    .operands_in(vec![gpr])
-                    .operands_out(vec![reg_rflags])
-                    .inst_predicate(has_small_offset)
-                    .emit(
-                        r#"
-                            {{PUT_OP}}(bits, rex1(in_reg0), sink);
-                            modrm_r_bits(in_reg0, bits, sink);
-                            let imm: i64 = imm.into();
-                            sink.put1(imm as u8);
-                        "#,
-                    ),
-                regs,
-            )
-            .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"),
-        );
-
-        let has_big_offset =
-            InstructionPredicate::new_is_signed_int(&*formats.binary_imm64, "imm", 32, 0);
-
-        // XX /n, MI form with imm32.
-        recipes.add_template(
-            Template::new(
-                EncodingRecipeBuilder::new("rcmp_id", &formats.binary_imm64, 5)
-                    .operands_in(vec![gpr])
-                    .operands_out(vec![reg_rflags])
-                    .inst_predicate(has_big_offset)
-                    .emit(
-                        r#"
-                            {{PUT_OP}}(bits, rex1(in_reg0), sink);
-                            modrm_r_bits(in_reg0, bits, sink);
-                            let imm: i64 = imm.into();
-                            sink.put4(imm as u32);
-                        "#,
-                    ),
-                regs,
-            )
-            .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"),
-        );
-    }
-
-    // Test-and-branch.
-    //
-    // This recipe represents the macro fusion of a test and a conditional branch.
-    // This serves two purposes:
-    //
-    // 1. Guarantee that the test and branch get scheduled next to each other so
-    //    macro fusion is guaranteed to be possible.
-    // 2. Hide the status flags from Cranelift which doesn't currently model flags.
-    //
-    // The encoding bits affect both the test and the branch instruction:
-    //
-    // Bits 0-7 are the Jcc opcode.
-    // Bits 8-15 control the test instruction which always has opcode byte 0x85.
-
-    recipes.add_template(
-        Template::new(
-            EncodingRecipeBuilder::new("tjccb", &formats.branch, 1 + 2)
-                .operands_in(vec![gpr])
-                .branch_range((3, 8))
-                .emit(
-                    r#"
-                        // test r, r.
-                        {{PUT_OP}}((bits & 0xff00) | 0x85, rex2(in_reg0, in_reg0), sink);
-                        modrm_rr(in_reg0, in_reg0, sink);
-                        // Jcc instruction.
-                        sink.put1(bits as u8);
-                        disp1(destination, func, sink);
-                    "#,
-                ),
-            regs,
-        )
-        .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"),
-    );
-
-    recipes.add_template(
-        Template::new(
-            EncodingRecipeBuilder::new("tjccd", &formats.branch, 1 + 6)
-                .operands_in(vec![gpr])
-                .branch_range((7, 32))
-                .emit(
-                    r#"
-                        // test r, r.
-                        {{PUT_OP}}((bits & 0xff00) | 0x85, rex2(in_reg0, in_reg0), sink);
-                        modrm_rr(in_reg0, in_reg0, sink);
-                        // Jcc instruction.
-                        sink.put1(0x0f);
-                        sink.put1(bits as u8);
-                        disp4(destination, func, sink);
-                    "#,
-                ),
-            regs,
-        )
-        .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"),
-    );
-
-    // 8-bit test-and-branch.
-
-    let t8jccb = recipes.add_template(
-        Template::new(
-            EncodingRecipeBuilder::new("t8jccb", &formats.branch, 1 + 2)
-                .operands_in(vec![gpr])
-                .branch_range((3, 8))
-                .emit(
-                    r#"
-                    // test8 r, r.
-                    {{PUT_OP}}((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink);
-                    modrm_rr(in_reg0, in_reg0, sink);
-                    // Jcc instruction.
-                    sink.put1(bits as u8);
-                    disp1(destination, func, sink);
-                "#,
-                ),
-            regs,
-        )
-        .rex_kind(RecipePrefixKind::AlwaysEmitRex),
-    );
-
-    recipes.add_template(
-        Template::new(
-            EncodingRecipeBuilder::new("t8jccb_abcd", &formats.branch, 1 + 2)
-                .operands_in(vec![abcd])
-                .branch_range((3, 8))
-                .emit(
-                    r#"
-                    // test8 r, r.
-                    {{PUT_OP}}((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink);
-                    modrm_rr(in_reg0, in_reg0, sink);
-                    // Jcc instruction.
-                    sink.put1(bits as u8);
-                    disp1(destination, func, sink);
-                "#,
-                ),
-            regs,
-        )
-        .when_prefixed(t8jccb),
-    );
-
-    let t8jccd = recipes.add_template(
-        Template::new(
-            EncodingRecipeBuilder::new("t8jccd", &formats.branch, 1 + 6)
-                .operands_in(vec![gpr])
-                .branch_range((7, 32))
-                .emit(
-                    r#"
-                    // test8 r, r.
-                    {{PUT_OP}}((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink);
-                    modrm_rr(in_reg0, in_reg0, sink);
-                    // Jcc instruction.
-                    sink.put1(0x0f);
-                    sink.put1(bits as u8);
-                    disp4(destination, func, sink);
-                "#,
-                ),
-            regs,
-        )
-        .rex_kind(RecipePrefixKind::AlwaysEmitRex),
-    );
-
-    recipes.add_template(
-        Template::new(
-            EncodingRecipeBuilder::new("t8jccd_abcd", &formats.branch, 1 + 6)
-                .operands_in(vec![abcd])
-                .branch_range((7, 32))
-                .emit(
-                    r#"
-                    // test8 r, r.
-                    {{PUT_OP}}((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink);
-                    modrm_rr(in_reg0, in_reg0, sink);
-                    // Jcc instruction.
-                    sink.put1(0x0f);
-                    sink.put1(bits as u8);
-                    disp4(destination, func, sink);
-                "#,
-                ),
-            regs,
-        )
-        .when_prefixed(t8jccd),
-    );
-
-    // Worst case test-and-branch recipe for brz.b1 and brnz.b1 in 32-bit mode.
-    // The register allocator can't handle a branch instruction with constrained
-    // operands like the t8jccd_abcd above. This variant can accept the b1 opernd in
-    // any register, but is is larger because it uses a 32-bit test instruction with
-    // a 0xff immediate.
-
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("t8jccd_long", &formats.branch, 5 + 6)
-            .operands_in(vec![gpr])
-            .branch_range((11, 32))
-            .emit(
-                r#"
-                    // test32 r, 0xff.
-                    {{PUT_OP}}((bits & 0xff00) | 0xf7, rex1(in_reg0), sink);
-                    modrm_r_bits(in_reg0, bits, sink);
-                    sink.put4(0xff);
-                    // Jcc instruction.
-                    sink.put1(0x0f);
-                    sink.put1(bits as u8);
-                    disp4(destination, func, sink);
-                "#,
-            ),
-    );
-
-    // Comparison that produces a `b1` result in a GPR.
-    //
-    // This is a macro of a `cmp` instruction followed by a `setCC` instruction.
-    //
-    // TODO This is not a great solution because:
-    //
-    // - The cmp+setcc combination is not recognized by CPU's macro fusion.
-    // - The 64-bit encoding has issues with REX prefixes. The `cmp` and `setCC`
-    //   instructions may need a REX independently.
-    // - Modeling CPU flags in the type system would be better.
-    //
-    // Since the `setCC` instructions only write an 8-bit register, we use that as
-    // our `b1` representation: A `b1` value is represented as a GPR where the low 8
-    // bits are known to be 0 or 1. The high bits are undefined.
-    //
-    // This bandaid macro doesn't support a REX prefix for the final `setCC`
-    // instruction, so it is limited to the `ABCD` register class for booleans.
-    // The omission of a `when_prefixed` alternative is deliberate here.
-
-    recipes.add_template(
-        Template::new(
-            EncodingRecipeBuilder::new("icscc", &formats.int_compare, 1 + 3)
-                .operands_in(vec![gpr, gpr])
-                .operands_out(vec![abcd])
-                .emit(
-                    r#"
-                        // Comparison instruction.
-                        {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink);
-                        modrm_rr(in_reg0, in_reg1, sink);
-                        // `setCC` instruction, no REX.
-                        let setcc = 0x90 | icc2opc(cond);
-                        sink.put1(0x0f);
-                        sink.put1(setcc as u8);
-                        modrm_rr(out_reg0, 0, sink);
-                    "#,
-                ),
-            regs,
-        )
-        .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0_inreg1"),
-    );
-
-    recipes.add_template_inferred(
-        EncodingRecipeBuilder::new("icscc_fpr", &formats.int_compare, 1)
-            .operands_in(vec![fpr, fpr])
-            .operands_out(vec![0])
-            .emit(
-                r#"
-                    // Comparison instruction.
-                    {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
-                    modrm_rr(in_reg1, in_reg0, sink);
-                "#,
-            ),
-        "size_with_inferred_rex_for_inreg0_inreg1",
-    );
-
-    {
-        let is_small_imm =
-            InstructionPredicate::new_is_signed_int(&*formats.int_compare_imm, "imm", 8, 0);
-
-        recipes.add_template(
-            Template::new(
-                EncodingRecipeBuilder::new("icscc_ib", &formats.int_compare_imm, 2 + 3)
-                    .operands_in(vec![gpr])
-                    .operands_out(vec![abcd])
-                    .inst_predicate(is_small_imm)
-                    .emit(
-                        r#"
-                            // Comparison instruction.
-                            {{PUT_OP}}(bits, rex1(in_reg0), sink);
-                            modrm_r_bits(in_reg0, bits, sink);
-                            let imm: i64 = imm.into();
-                            sink.put1(imm as u8);
-                            // `setCC` instruction, no REX.
-                            let setcc = 0x90 | icc2opc(cond);
-                            sink.put1(0x0f);
-                            sink.put1(setcc as u8);
-                            modrm_rr(out_reg0, 0, sink);
-                        "#,
-                    ),
-                regs,
-            )
-            .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"),
-        );
-
-        let is_big_imm =
-            InstructionPredicate::new_is_signed_int(&*formats.int_compare_imm, "imm", 32, 0);
-
-        recipes.add_template(
-            Template::new(
-                EncodingRecipeBuilder::new("icscc_id", &formats.int_compare_imm, 5 + 3)
-                    .operands_in(vec![gpr])
-                    .operands_out(vec![abcd])
-                    .inst_predicate(is_big_imm)
-                    .emit(
-                        r#"
-                            // Comparison instruction.
-                            {{PUT_OP}}(bits, rex1(in_reg0), sink);
-                            modrm_r_bits(in_reg0, bits, sink);
-                            let imm: i64 = imm.into();
-                            sink.put4(imm as u32);
-                            // `setCC` instruction, no REX.
-                            let setcc = 0x90 | icc2opc(cond);
-                            sink.put1(0x0f);
-                            sink.put1(setcc as u8);
-                            modrm_rr(out_reg0, 0, sink);
-                        "#,
-                    ),
-                regs,
-            )
-            .inferred_rex_compute_size("size_with_inferred_rex_for_inreg0"),
-        );
-    }
-
-    // Make a FloatCompare instruction predicate with the supported condition codes.
-    //
-    // Same thing for floating point.
-    //
-    // The ucomiss/ucomisd instructions set the FLAGS bits CF/PF/CF like this:
-    //
-    //    ZPC OSA
-    // UN 111 000
-    // GT 000 000
-    // LT 001 000
-    // EQ 100 000
-    //
-    // Not all floating point condition codes are supported.
-    // The omission of a `when_prefixed` alternative is deliberate here.
-
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("fcscc", &formats.float_compare, 1 + 3)
-            .operands_in(vec![fpr, fpr])
-            .operands_out(vec![abcd])
-            .inst_predicate(supported_floatccs_predicate(
-                &supported_floatccs,
-                &*formats.float_compare,
-            ))
-            .emit(
-                r#"
-                    // Comparison instruction.
-                    {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
-                    modrm_rr(in_reg1, in_reg0, sink);
-                    // `setCC` instruction, no REX.
-                    use crate::ir::condcodes::FloatCC::*;
-                    let setcc = match cond {
-                        Ordered                    => 0x9b, // EQ|LT|GT => setnp (P=0)
-                        Unordered                  => 0x9a, // UN       => setp  (P=1)
-                        OrderedNotEqual            => 0x95, // LT|GT    => setne (Z=0),
-                        UnorderedOrEqual           => 0x94, // UN|EQ    => sete  (Z=1)
-                        GreaterThan                => 0x97, // GT       => seta  (C=0&Z=0)
-                        GreaterThanOrEqual         => 0x93, // GT|EQ    => setae (C=0)
-                        UnorderedOrLessThan        => 0x92, // UN|LT    => setb  (C=1)
-                        UnorderedOrLessThanOrEqual => 0x96, // UN|LT|EQ => setbe (Z=1|C=1)
-                        Equal |                       // EQ
-                        NotEqual |                    // UN|LT|GT
-                        LessThan |                    // LT
-                        LessThanOrEqual |             // LT|EQ
-                        UnorderedOrGreaterThan |      // UN|GT
-                        UnorderedOrGreaterThanOrEqual // UN|GT|EQ
-                        => panic!("{} not supported by fcscc", cond),
-                    };
-                    sink.put1(0x0f);
-                    sink.put1(setcc);
-                    modrm_rr(out_reg0, 0, sink);
-                "#,
-            ),
-    );
-
-    {
-        let supported_floatccs: Vec<Literal> = ["eq", "lt", "le", "uno", "ne", "uge", "ugt", "ord"]
-            .iter()
-            .map(|name| Literal::enumerator_for(floatcc, name))
-            .collect();
-        recipes.add_template_inferred(
-            EncodingRecipeBuilder::new("pfcmp", &formats.float_compare, 2)
-                .operands_in(vec![fpr, fpr])
-                .operands_out(vec![0])
-                .inst_predicate(supported_floatccs_predicate(
-                    &supported_floatccs[..],
-                    &*formats.float_compare,
-                ))
-                .emit(
-                    r#"
-                    // Comparison instruction.
-                    {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
-                    modrm_rr(in_reg1, in_reg0, sink);
-                    // Add immediate byte indicating what type of comparison.
-                    use crate::ir::condcodes::FloatCC::*;
-                    let imm = match cond {
-                        Equal                      => 0x00,
-                        LessThan                   => 0x01,
-                        LessThanOrEqual            => 0x02,
-                        Unordered                  => 0x03,
-                        NotEqual                   => 0x04,
-                        UnorderedOrGreaterThanOrEqual => 0x05,
-                        UnorderedOrGreaterThan => 0x06,
-                        Ordered                    => 0x07,
-                        _ => panic!("{} not supported by pfcmp", cond),
-                    };
-                    sink.put1(imm);
-                "#,
-                ),
-            "size_with_inferred_rex_for_inreg0_inreg1",
-        );
-    }
-
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("is_zero", &formats.unary, 2 + 2)
-            .operands_in(vec![gpr])
-            .operands_out(vec![abcd])
-            .emit(
-                r#"
-                    // Test instruction.
-                    {{PUT_OP}}(bits, rex2(in_reg0, in_reg0), sink);
-                    modrm_rr(in_reg0, in_reg0, sink);
-                    // Check ZF = 1 flag to see if register holds 0.
-                    sink.put1(0x0f);
-                    sink.put1(0x94);
-                    modrm_rr(out_reg0, 0, sink);
-                "#,
-            ),
-    );
-
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("is_invalid", &formats.unary, 2 + 3)
-            .operands_in(vec![gpr])
-            .operands_out(vec![abcd])
-            .emit(
-                r#"
-                    // Comparison instruction.
-                    {{PUT_OP}}(bits, rex1(in_reg0), sink);
-                    modrm_r_bits(in_reg0, bits, sink);
-                    sink.put1(0xff);
-                    // `setCC` instruction, no REX.
-                    use crate::ir::condcodes::IntCC::*;
-                    let setcc = 0x90 | icc2opc(Equal);
-                    sink.put1(0x0f);
-                    sink.put1(setcc as u8);
-                    modrm_rr(out_reg0, 0, sink);
-                "#,
-            ),
-    );
-
-    recipes.add_recipe(
-        EncodingRecipeBuilder::new("safepoint", &formats.multiary, 0).emit(
-            r#"
-                sink.add_stack_map(args, func, isa);
-            "#,
-        ),
-    );
-
-    // Both `elf_tls_get_addr` and `macho_tls_get_addr` require all caller-saved registers to be spilled.
-    // This is currently special cased in `regalloc/spilling.rs` in the `visit_inst` function.
-
-    recipes.add_recipe(
-        EncodingRecipeBuilder::new("elf_tls_get_addr", &formats.unary_global_value, 16)
-            // FIXME Correct encoding for non rax registers
-            .operands_out(vec![reg_rax])
-            .emit(
-                r#"
-                    // output %rax
-                    // clobbers %rdi
-
-                    // Those data16 prefixes are necessary to pad to 16 bytes.
-
-                    // data16 lea gv@tlsgd(%rip),%rdi
-                    sink.put1(0x66); // data16
-                    sink.put1(0b01001000); // rex.w
-                    const LEA: u8 = 0x8d;
-                    sink.put1(LEA); // lea
-                    modrm_riprel(0b111/*out_reg0*/, sink); // 0x3d
-                    sink.reloc_external(func.srclocs[inst],
-                                        Reloc::ElfX86_64TlsGd,
-                                        &func.global_values[global_value].symbol_name(),
-                                        -4);
-                    sink.put4(0);
-
-                    // data16 data16 callq __tls_get_addr-4
-                    sink.put1(0x66); // data16
-                    sink.put1(0x66); // data16
-                    sink.put1(0b01001000); // rex.w
-                    sink.put1(0xe8); // call
-                    sink.reloc_external(func.srclocs[inst],
-                                        Reloc::X86CallPLTRel4,
-                                        &ExternalName::LibCall(LibCall::ElfTlsGetAddr),
-                                        -4);
-                    sink.put4(0);
-                "#,
-            ),
-    );
-
-    recipes.add_recipe(
-        EncodingRecipeBuilder::new("macho_tls_get_addr", &formats.unary_global_value, 9)
-            // FIXME Correct encoding for non rax registers
-            .operands_out(vec![reg_rax])
-            .emit(
-                r#"
-                    // output %rax
-                    // clobbers %rdi
-
-                    // movq gv@tlv(%rip), %rdi
-                    sink.put1(0x48); // rex
-                    sink.put1(0x8b); // mov
-                    modrm_riprel(0b111/*out_reg0*/, sink); // 0x3d
-                    sink.reloc_external(func.srclocs[inst],
-                                        Reloc::MachOX86_64Tlv,
-                                        &func.global_values[global_value].symbol_name(),
-                                        -4);
-                    sink.put4(0);
-
-                    // callq *(%rdi)
-                    sink.put1(0xff);
-                    sink.put1(0x17);
-                "#,
-            ),
-    );
-
-    recipes.add_template(
-        Template::new(
-        EncodingRecipeBuilder::new("evex_reg_vvvv_rm_128", &formats.binary, 1)
-            .operands_in(vec![fpr, fpr])
-            .operands_out(vec![fpr])
-            .emit(
-                r#"
-                // instruction encoding operands: reg (op1, w), vvvv (op2, r), rm (op3, r)
-                // this maps to:                  out_reg0,     in_reg0,       in_reg1
-                let context = EvexContext::Other { length: EvexVectorLength::V128 };
-                let masking = EvexMasking::None;
-                put_evex(bits, out_reg0, in_reg0, in_reg1, context, masking, sink); // params: reg, vvvv, rm
-                modrm_rr(in_reg1, out_reg0, sink); // params: rm, reg
-                "#,
-            ),
-        regs).rex_kind(RecipePrefixKind::Evex)
-    );
-
-    recipes.add_template(
-        Template::new(
-            EncodingRecipeBuilder::new("evex_reg_rm_128", &formats.unary, 1)
-                .operands_in(vec![fpr])
-                .operands_out(vec![fpr])
-                .emit(
-                    r#"
-                // instruction encoding operands: reg (op1, w), rm (op2, r)
-                // this maps to:                  out_reg0,     in_reg0
-                let context = EvexContext::Other { length: EvexVectorLength::V128 };
-                let masking = EvexMasking::None;
-                put_evex(bits, out_reg0, 0, in_reg0, context, masking, sink); // params: reg, vvvv, rm
-                modrm_rr(in_reg0, out_reg0, sink); // params: rm, reg
-                "#,
-                ),
-            regs).rex_kind(RecipePrefixKind::Evex)
-    );
-
-    recipes
-}
diff --git a/cranelift/codegen/meta/src/isa/x86/registers.rs b/cranelift/codegen/meta/src/isa/x86/registers.rs
deleted file mode 100644
index 85a8965f89..0000000000
--- a/cranelift/codegen/meta/src/isa/x86/registers.rs
+++ /dev/null
@@ -1,43 +0,0 @@
-use crate::cdsl::regs::{IsaRegs, IsaRegsBuilder, RegBankBuilder, RegClassBuilder};
-
-pub(crate) fn define() -> IsaRegs {
-    let mut regs = IsaRegsBuilder::new();
-
-    let builder = RegBankBuilder::new("FloatRegs", "xmm")
-        .units(16)
-        .track_pressure(true);
-    let float_regs = regs.add_bank(builder);
-
-    let builder = RegBankBuilder::new("IntRegs", "r")
-        .units(16)
-        .names(vec!["rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi"])
-        .track_pressure(true)
-        .pinned_reg(15);
-    let int_regs = regs.add_bank(builder);
-
-    let builder = RegBankBuilder::new("FlagRegs", "")
-        .units(1)
-        .names(vec!["rflags"])
-        .track_pressure(false);
-    let flag_reg = regs.add_bank(builder);
-
-    let builder = RegClassBuilder::new_toplevel("GPR", int_regs);
-    let gpr = regs.add_class(builder);
-
-    let builder = RegClassBuilder::new_toplevel("FPR", float_regs);
-    let fpr = regs.add_class(builder);
-
-    let builder = RegClassBuilder::new_toplevel("FLAG", flag_reg);
-    regs.add_class(builder);
-
-    let builder = RegClassBuilder::subclass_of("GPR8", gpr, 0, 8);
-    let gpr8 = regs.add_class(builder);
-
-    let builder = RegClassBuilder::subclass_of("ABCD", gpr8, 0, 4);
-    regs.add_class(builder);
-
-    let builder = RegClassBuilder::subclass_of("FPR8", fpr, 0, 8);
-    regs.add_class(builder);
-
-    regs.build()
-}
diff --git a/cranelift/codegen/shared/src/isa/mod.rs b/cranelift/codegen/shared/src/isa/mod.rs
deleted file mode 100644
index 4d8e485f6c..0000000000
--- a/cranelift/codegen/shared/src/isa/mod.rs
+++ /dev/null
@@ -1,3 +0,0 @@
-//! Shared ISA-specific definitions.
-
-pub mod x86;
diff --git a/cranelift/codegen/shared/src/isa/x86/encoding_bits.rs b/cranelift/codegen/shared/src/isa/x86/encoding_bits.rs
deleted file mode 100644
index 9edb2a6e6f..0000000000
--- a/cranelift/codegen/shared/src/isa/x86/encoding_bits.rs
+++ /dev/null
@@ -1,419 +0,0 @@
-//! Provides a named interface to the `u16` Encoding bits.
-
-use std::ops::RangeInclusive;
-
-/// Named interface to the `u16` Encoding bits, representing an opcode.
-///
-/// Cranelift requires each recipe to have a single encoding size in bytes.
-/// X86 opcodes are variable length, so we use separate recipes for different
-/// styles of opcodes and prefixes. The opcode format is indicated by the
-/// recipe name prefix.
-///
-/// VEX/XOP and EVEX prefixes are not yet supported.
-/// Encodings using any of these prefixes are represented by separate recipes.
-///
-/// The encoding bits are:
-///
-/// 0-7:   The opcode byte <op>.
-/// 8-9:   pp, mandatory prefix:
-///        00: none (Op*)
-///        01: 66   (Mp*)
-///        10: F3   (Mp*)
-///        11: F2   (Mp*)
-/// 10-11: mm, opcode map:
-///        00: <op>        (Op1/Mp1)
-///        01: 0F <op>     (Op2/Mp2)
-///        10: 0F 38 <op>  (Op3/Mp3)
-///        11: 0F 3A <op>  (Op3/Mp3)
-/// 12-14  rrr, opcode bits for the ModR/M byte for certain opcodes.
-/// 15:    REX.W bit (or VEX.W/E)
-#[derive(Copy, Clone, PartialEq)]
-pub struct EncodingBits(u16);
-const OPCODE: RangeInclusive<u16> = 0..=7;
-const OPCODE_PREFIX: RangeInclusive<u16> = 8..=11; // Includes pp and mm.
-const RRR: RangeInclusive<u16> = 12..=14;
-const REX_W: RangeInclusive<u16> = 15..=15;
-
-impl From<u16> for EncodingBits {
-    fn from(bits: u16) -> Self {
-        Self(bits)
-    }
-}
-
-impl EncodingBits {
-    /// Constructs a new EncodingBits from parts.
-    pub fn new(op_bytes: &[u8], rrr: u16, rex_w: u16) -> Self {
-        assert!(
-            !op_bytes.is_empty(),
-            "op_bytes must include at least one opcode byte"
-        );
-        let mut new = Self::from(0);
-        let last_byte = op_bytes[op_bytes.len() - 1];
-        new.write(OPCODE, last_byte as u16);
-        let prefix: u8 = OpcodePrefix::from_opcode(op_bytes).into();
-        new.write(OPCODE_PREFIX, prefix as u16);
-        new.write(RRR, rrr);
-        new.write(REX_W, rex_w);
-        new
-    }
-
-    /// Returns a copy of the EncodingBits with the RRR bits set.
-    #[inline]
-    pub fn with_rrr(mut self, rrr: u8) -> Self {
-        debug_assert_eq!(self.rrr(), 0);
-        self.write(RRR, rrr.into());
-        self
-    }
-
-    /// Returns a copy of the EncodingBits with the REX.W bit set.
-    #[inline]
-    pub fn with_rex_w(mut self) -> Self {
-        debug_assert_eq!(self.rex_w(), 0);
-        self.write(REX_W, 1);
-        self
-    }
-
-    /// Returns the raw bits.
-    #[inline]
-    pub fn bits(self) -> u16 {
-        self.0
-    }
-
-    /// Convenience method for writing bits to specific range.
-    #[inline]
-    fn write(&mut self, range: RangeInclusive<u16>, value: u16) {
-        assert!(ExactSizeIterator::len(&range) > 0);
-        let size = range.end() - range.start() + 1; // Calculate the number of bits in the range.
-        let mask = (1 << size) - 1; // Generate a bit mask.
-        debug_assert!(
-            value <= mask,
-            "The written value should have fewer than {} bits.",
-            size
-        );
-        let mask_complement = !(mask << *range.start()); // Create the bitwise complement for the clear mask.
-        self.0 &= mask_complement; // Clear the bits in `range`.
-        let value = (value & mask) << *range.start(); // Place the value in the correct location.
-        self.0 |= value; // Modify the bits in `range`.
-    }
-
-    /// Convenience method for reading bits from a specific range.
-    #[inline]
-    fn read(self, range: RangeInclusive<u16>) -> u8 {
-        assert!(ExactSizeIterator::len(&range) > 0);
-        let size = range.end() - range.start() + 1; // Calculate the number of bits in the range.
-        debug_assert!(size <= 8, "This structure expects ranges of at most 8 bits");
-        let mask = (1 << size) - 1; // Generate a bit mask.
-        ((self.0 >> *range.start()) & mask) as u8
-    }
-
-    /// Instruction opcode byte, without the prefix.
-    #[inline]
-    pub fn opcode_byte(self) -> u8 {
-        self.read(OPCODE)
-    }
-
-    /// Prefix kind for the instruction, as an enum.
-    #[inline]
-    pub fn prefix(self) -> OpcodePrefix {
-        OpcodePrefix::from(self.read(OPCODE_PREFIX))
-    }
-
-    /// Extracts the PP bits of the OpcodePrefix.
-    #[inline]
-    pub fn pp(self) -> u8 {
-        self.prefix().to_primitive() & 0x3
-    }
-
-    /// Extracts the MM bits of the OpcodePrefix.
-    #[inline]
-    pub fn mm(self) -> u8 {
-        (self.prefix().to_primitive() >> 2) & 0x3
-    }
-
-    /// Bits for the ModR/M byte for certain opcodes.
-    #[inline]
-    pub fn rrr(self) -> u8 {
-        self.read(RRR)
-    }
-
-    /// REX.W bit (or VEX.W/E).
-    #[inline]
-    pub fn rex_w(self) -> u8 {
-        self.read(REX_W)
-    }
-}
-
-/// Opcode prefix representation.
-///
-/// The prefix type occupies four of the EncodingBits.
-#[allow(non_camel_case_types)]
-#[allow(missing_docs)]
-#[derive(Copy, Clone, Debug, Eq, PartialEq)]
-pub enum OpcodePrefix {
-    Op1,
-    Mp1_66,
-    Mp1_f3,
-    Mp1_f2,
-    Op2_0f,
-    Mp2_66_0f,
-    Mp2_f3_0f,
-    Mp2_f2_0f,
-    Op3_0f_38,
-    Mp3_66_0f_38,
-    Mp3_f3_0f_38,
-    Mp3_f2_0f_38,
-    Op3_0f_3a,
-    Mp3_66_0f_3a,
-    Mp3_f3_0f_3a,
-    Mp3_f2_0f_3a,
-}
-
-impl From<u8> for OpcodePrefix {
-    fn from(n: u8) -> Self {
-        use OpcodePrefix::*;
-        match n {
-            0b0000 => Op1,
-            0b0001 => Mp1_66,
-            0b0010 => Mp1_f3,
-            0b0011 => Mp1_f2,
-            0b0100 => Op2_0f,
-            0b0101 => Mp2_66_0f,
-            0b0110 => Mp2_f3_0f,
-            0b0111 => Mp2_f2_0f,
-            0b1000 => Op3_0f_38,
-            0b1001 => Mp3_66_0f_38,
-            0b1010 => Mp3_f3_0f_38,
-            0b1011 => Mp3_f2_0f_38,
-            0b1100 => Op3_0f_3a,
-            0b1101 => Mp3_66_0f_3a,
-            0b1110 => Mp3_f3_0f_3a,
-            0b1111 => Mp3_f2_0f_3a,
-            _ => panic!("invalid opcode prefix"),
-        }
-    }
-}
-
-impl Into<u8> for OpcodePrefix {
-    fn into(self) -> u8 {
-        use OpcodePrefix::*;
-        match self {
-            Op1 => 0b0000,
-            Mp1_66 => 0b0001,
-            Mp1_f3 => 0b0010,
-            Mp1_f2 => 0b0011,
-            Op2_0f => 0b0100,
-            Mp2_66_0f => 0b0101,
-            Mp2_f3_0f => 0b0110,
-            Mp2_f2_0f => 0b0111,
-            Op3_0f_38 => 0b1000,
-            Mp3_66_0f_38 => 0b1001,
-            Mp3_f3_0f_38 => 0b1010,
-            Mp3_f2_0f_38 => 0b1011,
-            Op3_0f_3a => 0b1100,
-            Mp3_66_0f_3a => 0b1101,
-            Mp3_f3_0f_3a => 0b1110,
-            Mp3_f2_0f_3a => 0b1111,
-        }
-    }
-}
-
-impl OpcodePrefix {
-    /// Convert an opcode prefix to a `u8`; this is a convenience proxy for `Into<u8>`.
-    fn to_primitive(self) -> u8 {
-        self.into()
-    }
-
-    /// Extracts the OpcodePrefix from the opcode.
-    pub fn from_opcode(op_bytes: &[u8]) -> Self {
-        assert!(!op_bytes.is_empty(), "at least one opcode byte");
-
-        let prefix_bytes = &op_bytes[..op_bytes.len() - 1];
-        match prefix_bytes {
-            [] => Self::Op1,
-            [0x66] => Self::Mp1_66,
-            [0xf3] => Self::Mp1_f3,
-            [0xf2] => Self::Mp1_f2,
-            [0x0f] => Self::Op2_0f,
-            [0x66, 0x0f] => Self::Mp2_66_0f,
-            [0xf3, 0x0f] => Self::Mp2_f3_0f,
-            [0xf2, 0x0f] => Self::Mp2_f2_0f,
-            [0x0f, 0x38] => Self::Op3_0f_38,
-            [0x66, 0x0f, 0x38] => Self::Mp3_66_0f_38,
-            [0xf3, 0x0f, 0x38] => Self::Mp3_f3_0f_38,
-            [0xf2, 0x0f, 0x38] => Self::Mp3_f2_0f_38,
-            [0x0f, 0x3a] => Self::Op3_0f_3a,
-            [0x66, 0x0f, 0x3a] => Self::Mp3_66_0f_3a,
-            [0xf3, 0x0f, 0x3a] => Self::Mp3_f3_0f_3a,
-            [0xf2, 0x0f, 0x3a] => Self::Mp3_f2_0f_3a,
-            _ => {
-                panic!("unexpected opcode sequence: {:?}", op_bytes);
-            }
-        }
-    }
-
-    /// Returns the recipe name prefix.
-    ///
-    /// At the moment, each similar OpcodePrefix group is given its own Recipe.
-    /// In order to distinguish them, this string is prefixed.
-    pub fn recipe_name_prefix(self) -> &'static str {
-        use OpcodePrefix::*;
-        match self {
-            Op1 => "Op1",
-            Op2_0f => "Op2",
-            Op3_0f_38 | Op3_0f_3a => "Op3",
-            Mp1_66 | Mp1_f3 | Mp1_f2 => "Mp1",
-            Mp2_66_0f | Mp2_f3_0f | Mp2_f2_0f => "Mp2",
-            Mp3_66_0f_38 | Mp3_f3_0f_38 | Mp3_f2_0f_38 => "Mp3",
-            Mp3_66_0f_3a | Mp3_f3_0f_3a | Mp3_f2_0f_3a => "Mp3",
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    /// Helper function for prefix_roundtrip() to avoid long lines.
-    fn test_roundtrip(p: OpcodePrefix) {
-        assert_eq!(p, OpcodePrefix::from(p.to_primitive()));
-    }
-
-    /// Tests that to/from each opcode matches.
-    #[test]
-    fn prefix_roundtrip() {
-        test_roundtrip(OpcodePrefix::Op1);
-        test_roundtrip(OpcodePrefix::Mp1_66);
-        test_roundtrip(OpcodePrefix::Mp1_f3);
-        test_roundtrip(OpcodePrefix::Mp1_f2);
-        test_roundtrip(OpcodePrefix::Op2_0f);
-        test_roundtrip(OpcodePrefix::Mp2_66_0f);
-        test_roundtrip(OpcodePrefix::Mp2_f3_0f);
-        test_roundtrip(OpcodePrefix::Mp2_f2_0f);
-        test_roundtrip(OpcodePrefix::Op3_0f_38);
-        test_roundtrip(OpcodePrefix::Mp3_66_0f_38);
-        test_roundtrip(OpcodePrefix::Mp3_f3_0f_38);
-        test_roundtrip(OpcodePrefix::Mp3_f2_0f_38);
-        test_roundtrip(OpcodePrefix::Op3_0f_3a);
-        test_roundtrip(OpcodePrefix::Mp3_66_0f_3a);
-        test_roundtrip(OpcodePrefix::Mp3_f3_0f_3a);
-        test_roundtrip(OpcodePrefix::Mp3_f2_0f_3a);
-    }
-
-    #[test]
-    fn prefix_to_name() {
-        assert_eq!(OpcodePrefix::Op1.recipe_name_prefix(), "Op1");
-        assert_eq!(OpcodePrefix::Op2_0f.recipe_name_prefix(), "Op2");
-        assert_eq!(OpcodePrefix::Op3_0f_38.recipe_name_prefix(), "Op3");
-        assert_eq!(OpcodePrefix::Mp1_66.recipe_name_prefix(), "Mp1");
-        assert_eq!(OpcodePrefix::Mp2_66_0f.recipe_name_prefix(), "Mp2");
-        assert_eq!(OpcodePrefix::Mp3_66_0f_3a.recipe_name_prefix(), "Mp3");
-    }
-
-    /// Tests that the opcode_byte is the lower of the EncodingBits.
-    #[test]
-    fn encodingbits_opcode_byte() {
-        let enc = EncodingBits::from(0x00ff);
-        assert_eq!(enc.opcode_byte(), 0xff);
-        assert_eq!(enc.prefix().to_primitive(), 0x0);
-        assert_eq!(enc.rrr(), 0x0);
-        assert_eq!(enc.rex_w(), 0x0);
-
-        let enc = EncodingBits::from(0x00cd);
-        assert_eq!(enc.opcode_byte(), 0xcd);
-    }
-
-    /// Tests that the OpcodePrefix is encoded correctly.
-    #[test]
-    fn encodingbits_prefix() {
-        let enc = EncodingBits::from(0x0c00);
-        assert_eq!(enc.opcode_byte(), 0x00);
-        assert_eq!(enc.prefix().to_primitive(), 0xc);
-        assert_eq!(enc.prefix(), OpcodePrefix::Op3_0f_3a);
-        assert_eq!(enc.rrr(), 0x0);
-        assert_eq!(enc.rex_w(), 0x0);
-    }
-
-    /// Tests that the PP bits are encoded correctly.
-    #[test]
-    fn encodingbits_pp() {
-        let enc = EncodingBits::from(0x0300);
-        assert_eq!(enc.opcode_byte(), 0x0);
-        assert_eq!(enc.pp(), 0x3);
-        assert_eq!(enc.mm(), 0x0);
-        assert_eq!(enc.rrr(), 0x0);
-        assert_eq!(enc.rex_w(), 0x0);
-    }
-
-    /// Tests that the MM bits are encoded correctly.
-    #[test]
-    fn encodingbits_mm() {
-        let enc = EncodingBits::from(0x0c00);
-        assert_eq!(enc.opcode_byte(), 0x0);
-        assert_eq!(enc.pp(), 0x00);
-        assert_eq!(enc.mm(), 0x3);
-        assert_eq!(enc.rrr(), 0x0);
-        assert_eq!(enc.rex_w(), 0x0);
-    }
-
-    /// Tests that the ModR/M bits are encoded correctly.
-    #[test]
-    fn encodingbits_rrr() {
-        let enc = EncodingBits::from(0x5000);
-        assert_eq!(enc.opcode_byte(), 0x0);
-        assert_eq!(enc.prefix().to_primitive(), 0x0);
-        assert_eq!(enc.rrr(), 0x5);
-        assert_eq!(enc.rex_w(), 0x0);
-    }
-
-    /// Tests that the REX.W bit is encoded correctly.
-    #[test]
-    fn encodingbits_rex_w() {
-        let enc = EncodingBits::from(0x8000);
-        assert_eq!(enc.opcode_byte(), 0x00);
-        assert_eq!(enc.prefix().to_primitive(), 0x0);
-        assert_eq!(enc.rrr(), 0x0);
-        assert_eq!(enc.rex_w(), 0x1);
-    }
-
-    /// Tests setting and unsetting a bit using EncodingBits::write.
-    #[test]
-    fn encodingbits_flip() {
-        let mut bits = EncodingBits::from(0);
-        let range = 2..=2;
-
-        bits.write(range.clone(), 1);
-        assert_eq!(bits.bits(), 0b100);
-
-        bits.write(range, 0);
-        assert_eq!(bits.bits(), 0b000);
-    }
-
-    /// Tests a round-trip of EncodingBits from/to a u16 (hardcoded endianness).
-    #[test]
-    fn encodingbits_roundtrip() {
-        let bits: u16 = 0x1234;
-        assert_eq!(EncodingBits::from(bits).bits(), bits);
-    }
-
-    #[test]
-    // I purposely want to divide the bits using the ranges defined above.
-    #[allow(clippy::inconsistent_digit_grouping)]
-    fn encodingbits_construction() {
-        assert_eq!(
-            EncodingBits::new(&[0x66, 0x40], 5, 1).bits(),
-            0b1_101_0001_01000000 // 1 = rex_w, 101 = rrr, 0001 = prefix, 01000000 = opcode
-        );
-    }
-
-    #[test]
-    #[should_panic]
-    fn encodingbits_panics_at_write_to_invalid_range() {
-        EncodingBits::from(0).write(1..=0, 42);
-    }
-
-    #[test]
-    #[should_panic]
-    fn encodingbits_panics_at_read_to_invalid_range() {
-        EncodingBits::from(0).read(1..=0);
-    }
-}
diff --git a/cranelift/codegen/shared/src/isa/x86/mod.rs b/cranelift/codegen/shared/src/isa/x86/mod.rs
deleted file mode 100644
index fb45ae56c3..0000000000
--- a/cranelift/codegen/shared/src/isa/x86/mod.rs
+++ /dev/null
@@ -1,4 +0,0 @@
-//! Shared x86-specific definitions.
-
-mod encoding_bits;
-pub use encoding_bits::*;
diff --git a/cranelift/codegen/shared/src/lib.rs b/cranelift/codegen/shared/src/lib.rs
index 9b4cb941ed..c031ee7440 100644
--- a/cranelift/codegen/shared/src/lib.rs
+++ b/cranelift/codegen/shared/src/lib.rs
@@ -22,7 +22,6 @@
 pub mod condcodes;
 pub mod constant_hash;
 pub mod constants;
-pub mod isa;
 
 /// Version number of this crate.
 pub const VERSION: &str = env!("CARGO_PKG_VERSION");
diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
index 6a4e18cbe3..18004b5c03 100644
--- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
@@ -3565,45 +3565,6 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
             panic!("ALU+imm and ALU+carry ops should not appear here!");
         }
 
-        #[cfg(feature = "x86")]
-        Opcode::X86Udivmodx
-        | Opcode::X86Sdivmodx
-        | Opcode::X86Umulx
-        | Opcode::X86Smulx
-        | Opcode::X86Cvtt2si
-        | Opcode::X86Fmin
-        | Opcode::X86Fmax
-        | Opcode::X86Push
-        | Opcode::X86Pop
-        | Opcode::X86Bsr
-        | Opcode::X86Bsf
-        | Opcode::X86Pblendw
-        | Opcode::X86Pshufd
-        | Opcode::X86Pshufb
-        | Opcode::X86Pextr
-        | Opcode::X86Pinsr
-        | Opcode::X86Insertps
-        | Opcode::X86Movsd
-        | Opcode::X86Movlhps
-        | Opcode::X86Palignr
-        | Opcode::X86Psll
-        | Opcode::X86Psrl
-        | Opcode::X86Psra
-        | Opcode::X86Ptest
-        | Opcode::X86Pmaxs
-        | Opcode::X86Pmaxu
-        | Opcode::X86Pmins
-        | Opcode::X86Pminu
-        | Opcode::X86Pmullq
-        | Opcode::X86Pmuludq
-        | Opcode::X86Punpckh
-        | Opcode::X86Punpckl
-        | Opcode::X86Vcvtudq2ps
-        | Opcode::X86ElfTlsGetAddr
-        | Opcode::X86MachoTlsGetAddr => {
-            panic!("x86-specific opcode in supposedly arch-neutral IR!");
-        }
-
         Opcode::DummySargT => unreachable!(),
 
         Opcode::Iabs => {
diff --git a/cranelift/codegen/src/isa/legacy/mod.rs b/cranelift/codegen/src/isa/legacy/mod.rs
index a89230f941..15900b9509 100644
--- a/cranelift/codegen/src/isa/legacy/mod.rs
+++ b/cranelift/codegen/src/isa/legacy/mod.rs
@@ -1,12 +1,4 @@
 //! Legacy ("old-style") backends that will be removed in the future.
 
-// N.B.: the old x86-64 backend (`x86`) and the new one (`x64`) are both
-// included whenever building with x86 support. The new backend is the default,
-// but the old can be requested with `BackendVariant::Legacy`. However, if this
-// crate is built with the `old-x86-backend` feature, then the old backend is
-// default instead.
-#[cfg(feature = "x86")]
-pub(crate) mod x86;
-
 #[cfg(feature = "riscv")]
 pub(crate) mod riscv;
diff --git a/cranelift/codegen/src/isa/legacy/x86/abi.rs b/cranelift/codegen/src/isa/legacy/x86/abi.rs
deleted file mode 100644
index 934cfec4dd..0000000000
--- a/cranelift/codegen/src/isa/legacy/x86/abi.rs
+++ /dev/null
@@ -1,1102 +0,0 @@
-//! x86 ABI implementation.
-
-use super::super::super::settings as shared_settings;
-use super::registers::{FPR, GPR, RU};
-use super::settings as isa_settings;
-use crate::abi::{legalize_args, ArgAction, ArgAssigner, ValueConversion};
-use crate::cursor::{Cursor, CursorPosition, EncCursor};
-use crate::ir;
-use crate::ir::immediates::Imm64;
-use crate::ir::stackslot::{StackOffset, StackSize};
-use crate::ir::types;
-use crate::ir::{
-    get_probestack_funcref, AbiParam, ArgumentExtension, ArgumentLoc, ArgumentPurpose, InstBuilder,
-    ValueLoc,
-};
-use crate::isa::{CallConv, RegClass, RegUnit, TargetIsa};
-use crate::regalloc::RegisterSet;
-use crate::result::CodegenResult;
-use crate::stack_layout::layout_stack;
-use alloc::borrow::Cow;
-use core::i32;
-use target_lexicon::{PointerWidth, Triple};
-
-/// Argument registers for x86-64
-static ARG_GPRS: [RU; 6] = [RU::rdi, RU::rsi, RU::rdx, RU::rcx, RU::r8, RU::r9];
-
-/// Return value registers.
-static RET_GPRS: [RU; 3] = [RU::rax, RU::rdx, RU::rcx];
-
-/// Argument registers for x86-64, when using windows fastcall
-static ARG_GPRS_WIN_FASTCALL_X64: [RU; 4] = [RU::rcx, RU::rdx, RU::r8, RU::r9];
-
-/// Return value registers for x86-64, when using windows fastcall
-static RET_GPRS_WIN_FASTCALL_X64: [RU; 1] = [RU::rax];
-
-/// The win64 fastcall ABI uses some shadow stack space, allocated by the caller, that can be used
-/// by the callee for temporary values.
-///
-/// [1] "Space is allocated on the call stack as a shadow store for callees to save" This shadow
-/// store contains the parameters which are passed through registers (ARG_GPRS) and is eventually
-/// used by the callee to save & restore the values of the arguments.
-///
-/// [2] https://blogs.msdn.microsoft.com/oldnewthing/20110302-00/?p=11333 "Although the x64 calling
-/// convention reserves spill space for parameters, you don’t have to use them as such"
-const WIN_SHADOW_STACK_SPACE: StackSize = 32;
-
-/// Stack alignment requirement for functions.
-///
-/// 16 bytes is the perfect stack alignment, because:
-///
-/// - On Win64, "The primary exceptions are the stack pointer and malloc or alloca memory, which
-/// are aligned to 16 bytes in order to aid performance".
-/// - The original 32-bit x86 ELF ABI had a 4-byte aligned stack pointer, but newer versions use a
-/// 16-byte aligned stack pointer.
-/// - This allows using aligned loads and stores on SIMD vectors of 16 bytes that are located
-/// higher up in the stack.
-const STACK_ALIGNMENT: u32 = 16;
-
-#[derive(Clone)]
-struct Args {
-    pointer_bytes: u8,
-    pointer_bits: u8,
-    pointer_type: ir::Type,
-    gpr: &'static [RU],
-    gpr_used: usize,
-    fpr_limit: usize,
-    fpr_used: usize,
-    offset: u32,
-    call_conv: CallConv,
-    shared_flags: shared_settings::Flags,
-    #[allow(dead_code)]
-    isa_flags: isa_settings::Flags,
-    assigning_returns: bool,
-}
-
-impl Args {
-    fn new(
-        bits: u8,
-        gpr: &'static [RU],
-        fpr_limit: usize,
-        call_conv: CallConv,
-        shared_flags: &shared_settings::Flags,
-        isa_flags: &isa_settings::Flags,
-        assigning_returns: bool,
-    ) -> Self {
-        let offset = if call_conv.extends_windows_fastcall() {
-            WIN_SHADOW_STACK_SPACE
-        } else {
-            0
-        };
-
-        Self {
-            pointer_bytes: bits / 8,
-            pointer_bits: bits,
-            pointer_type: ir::Type::int(u16::from(bits)).unwrap(),
-            gpr,
-            gpr_used: 0,
-            fpr_limit,
-            fpr_used: 0,
-            offset,
-            call_conv,
-            shared_flags: shared_flags.clone(),
-            isa_flags: isa_flags.clone(),
-            assigning_returns,
-        }
-    }
-}
-
-impl ArgAssigner for Args {
-    fn assign(&mut self, arg: &AbiParam) -> ArgAction {
-        if let ArgumentPurpose::StructArgument(size) = arg.purpose {
-            if self.call_conv != CallConv::SystemV {
-                panic!(
-                    "The sarg argument purpose is not yet implemented for non-systemv call conv {:?}",
-                    self.call_conv,
-                );
-            }
-            let loc = ArgumentLoc::Stack(self.offset as i32);
-            self.offset += size;
-            debug_assert!(self.offset <= i32::MAX as u32);
-            return ArgAction::AssignAndChangeType(loc, types::SARG_T);
-        }
-
-        let ty = arg.value_type;
-
-        if ty.bits() > u16::from(self.pointer_bits) {
-            if !self.assigning_returns && self.call_conv.extends_windows_fastcall() {
-                // "Any argument that doesn't fit in 8 bytes, or isn't
-                // 1, 2, 4, or 8 bytes, must be passed by reference"
-                return ValueConversion::Pointer(self.pointer_type).into();
-            } else if !ty.is_vector() && !ty.is_float() {
-                // On SystemV large integers and booleans are broken down to fit in a register.
-                return ValueConversion::IntSplit.into();
-            }
-        }
-
-        // Vectors should stay in vector registers unless SIMD is not enabled--then they are split
-        if ty.is_vector() {
-            if self.shared_flags.enable_simd() {
-                let reg = FPR.unit(self.fpr_used);
-                self.fpr_used += 1;
-                return ArgumentLoc::Reg(reg).into();
-            }
-            return ValueConversion::VectorSplit.into();
-        }
-
-        // Small integers are extended to the size of a pointer register, but
-        // only in ABIs that require this. The Baldrdash (SpiderMonkey) ABI
-        // does, but our other supported ABIs on x86 do not.
-        if ty.is_int()
-            && ty.bits() < u16::from(self.pointer_bits)
-            && self.call_conv.extends_baldrdash()
-        {
-            match arg.extension {
-                ArgumentExtension::None => {}
-                ArgumentExtension::Uext => return ValueConversion::Uext(self.pointer_type).into(),
-                ArgumentExtension::Sext => return ValueConversion::Sext(self.pointer_type).into(),
-            }
-        }
-
-        // Handle special-purpose arguments.
-        if ty.is_int() && self.call_conv.extends_baldrdash() {
-            match arg.purpose {
-                // This is SpiderMonkey's `WasmTlsReg`.
-                ArgumentPurpose::VMContext => {
-                    return ArgumentLoc::Reg(if self.pointer_bits == 64 {
-                        RU::r14
-                    } else {
-                        RU::rsi
-                    } as RegUnit)
-                    .into();
-                }
-                // This is SpiderMonkey's `WasmTableCallSigReg`.
-                ArgumentPurpose::SignatureId => {
-                    return ArgumentLoc::Reg(if self.pointer_bits == 64 {
-                        RU::r10
-                    } else {
-                        RU::rcx
-                    } as RegUnit)
-                    .into()
-                }
-                _ => {}
-            }
-        }
-
-        // Try to use a GPR.
-        if !ty.is_float() && self.gpr_used < self.gpr.len() {
-            let reg = self.gpr[self.gpr_used] as RegUnit;
-            self.gpr_used += 1;
-            return ArgumentLoc::Reg(reg).into();
-        }
-
-        // Try to use an FPR.
-        let fpr_offset = if self.call_conv.extends_windows_fastcall() {
-            // Float and general registers on windows share the same parameter index.
-            // The used register depends entirely on the parameter index: Even if XMM0
-            // is not used for the first parameter, it cannot be used for the second parameter.
-            debug_assert_eq!(self.fpr_limit, self.gpr.len());
-            &mut self.gpr_used
-        } else {
-            &mut self.fpr_used
-        };
-
-        if ty.is_float() && *fpr_offset < self.fpr_limit {
-            let reg = FPR.unit(*fpr_offset);
-            *fpr_offset += 1;
-            return ArgumentLoc::Reg(reg).into();
-        }
-
-        // Assign a stack location.
-        let loc = ArgumentLoc::Stack(self.offset as i32);
-        self.offset += u32::from(self.pointer_bytes);
-        debug_assert!(self.offset <= i32::MAX as u32);
-        loc.into()
-    }
-}
-
-/// Legalize `sig`.
-pub fn legalize_signature(
-    sig: &mut Cow<ir::Signature>,
-    triple: &Triple,
-    _current: bool,
-    shared_flags: &shared_settings::Flags,
-    isa_flags: &isa_settings::Flags,
-) {
-    let bits;
-    let mut args;
-
-    match triple.pointer_width().unwrap() {
-        PointerWidth::U16 => panic!(),
-        PointerWidth::U32 => {
-            bits = 32;
-            args = Args::new(bits, &[], 0, sig.call_conv, shared_flags, isa_flags, false);
-        }
-        PointerWidth::U64 => {
-            bits = 64;
-            args = if sig.call_conv.extends_windows_fastcall() {
-                Args::new(
-                    bits,
-                    &ARG_GPRS_WIN_FASTCALL_X64[..],
-                    4,
-                    sig.call_conv,
-                    shared_flags,
-                    isa_flags,
-                    false,
-                )
-            } else {
-                Args::new(
-                    bits,
-                    &ARG_GPRS[..],
-                    8,
-                    sig.call_conv,
-                    shared_flags,
-                    isa_flags,
-                    false,
-                )
-            };
-        }
-    }
-
-    let (ret_regs, ret_fpr_limit) = if sig.call_conv.extends_windows_fastcall() {
-        // windows-x64 calling convention only uses XMM0 or RAX for return values
-        (&RET_GPRS_WIN_FASTCALL_X64[..], 1)
-    } else {
-        (&RET_GPRS[..], 2)
-    };
-
-    let mut rets = Args::new(
-        bits,
-        ret_regs,
-        ret_fpr_limit,
-        sig.call_conv,
-        shared_flags,
-        isa_flags,
-        true,
-    );
-
-    // If we don't have enough available return registers
-    // to fit all of the return values, we need to backtrack and start
-    // assigning locations all over again with a different strategy. In order to
-    // do that, we need a copy of the original assigner for the returns.
-    let mut backup_rets = rets.clone();
-
-    if let Some(new_returns) = legalize_args(&sig.returns, &mut rets) {
-        if new_returns
-            .iter()
-            .filter(|r| r.purpose == ArgumentPurpose::Normal)
-            .any(|r| !r.location.is_reg())
-        {
-            // The return values couldn't all fit into available return
-            // registers. Introduce the use of a struct-return parameter.
-            debug_assert!(!sig.uses_struct_return_param());
-
-            // We're using the first register for the return pointer parameter.
-            let mut ret_ptr_param = AbiParam {
-                value_type: args.pointer_type,
-                purpose: ArgumentPurpose::StructReturn,
-                extension: ArgumentExtension::None,
-                location: ArgumentLoc::Unassigned,
-                legalized_to_pointer: false,
-            };
-            match args.assign(&ret_ptr_param) {
-                ArgAction::Assign(ArgumentLoc::Reg(reg)) => {
-                    ret_ptr_param.location = ArgumentLoc::Reg(reg);
-                    sig.to_mut().params.push(ret_ptr_param);
-                }
-                _ => unreachable!("return pointer should always get a register assignment"),
-            }
-
-            // We're using the first return register for the return pointer (like
-            // sys v does).
-            let mut ret_ptr_return = AbiParam {
-                value_type: args.pointer_type,
-                purpose: ArgumentPurpose::StructReturn,
-                extension: ArgumentExtension::None,
-                location: ArgumentLoc::Unassigned,
-                legalized_to_pointer: false,
-            };
-            match backup_rets.assign(&ret_ptr_return) {
-                ArgAction::Assign(ArgumentLoc::Reg(reg)) => {
-                    ret_ptr_return.location = ArgumentLoc::Reg(reg);
-                    sig.to_mut().returns.push(ret_ptr_return);
-                }
-                _ => unreachable!("return pointer should always get a register assignment"),
-            }
-
-            sig.to_mut().returns.retain(|ret| {
-                // Either this is the return pointer, in which case we want to keep
-                // it, or else assume that it is assigned for a reason and doesn't
-                // conflict with our return pointering legalization.
-                debug_assert_eq!(
-                    ret.location.is_assigned(),
-                    ret.purpose != ArgumentPurpose::Normal
-                );
-                ret.location.is_assigned()
-            });
-
-            if let Some(new_returns) = legalize_args(&sig.returns, &mut backup_rets) {
-                sig.to_mut().returns = new_returns;
-            }
-        } else {
-            sig.to_mut().returns = new_returns;
-        }
-    }
-
-    if let Some(new_params) = legalize_args(&sig.params, &mut args) {
-        sig.to_mut().params = new_params;
-    }
-}
-
-/// Get register class for a type appearing in a legalized signature.
-pub fn regclass_for_abi_type(ty: ir::Type) -> RegClass {
-    if ty.is_int() || ty.is_bool() || ty.is_ref() {
-        GPR
-    } else {
-        FPR
-    }
-}
-
-/// Get the set of allocatable registers for `func`.
-pub fn allocatable_registers(triple: &Triple, flags: &shared_settings::Flags) -> RegisterSet {
-    let mut regs = RegisterSet::new();
-    regs.take(GPR, RU::rsp as RegUnit);
-    regs.take(GPR, RU::rbp as RegUnit);
-
-    // 32-bit arch only has 8 registers.
-    if triple.pointer_width().unwrap() != PointerWidth::U64 {
-        for i in 8..16 {
-            regs.take(GPR, GPR.unit(i));
-            regs.take(FPR, FPR.unit(i));
-        }
-        if flags.enable_pinned_reg() {
-            unimplemented!("Pinned register not implemented on x86-32.");
-        }
-    } else {
-        // Choose r15 as the pinned register on 64-bits: it is non-volatile on native ABIs and
-        // isn't the fixed output register of any instruction.
-        if flags.enable_pinned_reg() {
-            regs.take(GPR, RU::r15 as RegUnit);
-        }
-    }
-
-    regs
-}
-
-/// Get the set of callee-saved general-purpose registers.
-fn callee_saved_gprs(isa: &dyn TargetIsa, call_conv: CallConv) -> &'static [RU] {
-    match isa.triple().pointer_width().unwrap() {
-        PointerWidth::U16 => panic!(),
-        PointerWidth::U32 => &[RU::rbx, RU::rsi, RU::rdi],
-        PointerWidth::U64 => {
-            if call_conv.extends_windows_fastcall() {
-                // "registers RBX, RBP, RDI, RSI, RSP, R12, R13, R14, R15, and XMM6-15 are
-                // considered nonvolatile and must be saved and restored by a function that uses
-                //  them."
-                // as per https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention
-                // RSP & RBP are not listed below, since they are restored automatically during
-                // a function call. If that wasn't the case, function calls (RET) would not work.
-                &[
-                    RU::rbx,
-                    RU::rdi,
-                    RU::rsi,
-                    RU::r12,
-                    RU::r13,
-                    RU::r14,
-                    RU::r15,
-                ]
-            } else {
-                &[RU::rbx, RU::r12, RU::r13, RU::r14, RU::r15]
-            }
-        }
-    }
-}
-
-/// Get the set of callee-saved floating-point (SIMD) registers.
-fn callee_saved_fprs(isa: &dyn TargetIsa, call_conv: CallConv) -> &'static [RU] {
-    match isa.triple().pointer_width().unwrap() {
-        PointerWidth::U16 => panic!(),
-        PointerWidth::U32 => &[],
-        PointerWidth::U64 => {
-            if call_conv.extends_windows_fastcall() {
-                // "registers RBX, ... , and XMM6-15 are considered nonvolatile and must be saved
-                //  and restored by a function that uses them."
-                // as per https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention as of
-                // February 5th, 2020.
-                &[
-                    RU::xmm6,
-                    RU::xmm7,
-                    RU::xmm8,
-                    RU::xmm9,
-                    RU::xmm10,
-                    RU::xmm11,
-                    RU::xmm12,
-                    RU::xmm13,
-                    RU::xmm14,
-                    RU::xmm15,
-                ]
-            } else {
-                &[]
-            }
-        }
-    }
-}
-
-/// Get the set of callee-saved registers that are used.
-fn callee_saved_regs_used(isa: &dyn TargetIsa, func: &ir::Function) -> RegisterSet {
-    let mut all_callee_saved = RegisterSet::empty();
-    for reg in callee_saved_gprs(isa, func.signature.call_conv) {
-        all_callee_saved.free(GPR, *reg as RegUnit);
-    }
-    for reg in callee_saved_fprs(isa, func.signature.call_conv) {
-        all_callee_saved.free(FPR, *reg as RegUnit);
-    }
-
-    let mut used = RegisterSet::empty();
-    for value_loc in func.locations.values() {
-        // Note that `value_loc` here contains only a single unit of a potentially multi-unit
-        // register. We don't use registers that overlap each other in the x86 ISA, but in others
-        // we do. So this should not be blindly reused.
-        if let ValueLoc::Reg(ru) = *value_loc {
-            if GPR.contains(ru) {
-                if !used.is_avail(GPR, ru) {
-                    used.free(GPR, ru);
-                }
-            } else if FPR.contains(ru) {
-                if !used.is_avail(FPR, ru) {
-                    used.free(FPR, ru);
-                }
-            }
-        }
-    }
-
-    // regmove and regfill instructions may temporarily divert values into other registers,
-    // and these are not reflected in `func.locations`. Scan the function for such instructions
-    // and note which callee-saved registers they use.
-    //
-    // TODO: Consider re-evaluating how regmove/regfill/regspill work and whether it's possible
-    // to avoid this step.
-    for block in &func.layout {
-        for inst in func.layout.block_insts(block) {
-            match func.dfg[inst] {
-                ir::instructions::InstructionData::RegMove { dst, .. }
-                | ir::instructions::InstructionData::RegFill { dst, .. } => {
-                    if GPR.contains(dst) {
-                        if !used.is_avail(GPR, dst) {
-                            used.free(GPR, dst);
-                        }
-                    } else if FPR.contains(dst) {
-                        if !used.is_avail(FPR, dst) {
-                            used.free(FPR, dst);
-                        }
-                    }
-                }
-                _ => (),
-            }
-        }
-    }
-
-    used.intersect(&all_callee_saved);
-    used
-}
-
-pub fn prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> CodegenResult<()> {
-    match func.signature.call_conv {
-        // For now, just translate fast and cold as system_v.
-        CallConv::Fast | CallConv::Cold | CallConv::SystemV | CallConv::WasmtimeSystemV => {
-            system_v_prologue_epilogue(func, isa)
-        }
-        CallConv::WindowsFastcall | CallConv::WasmtimeFastcall => {
-            fastcall_prologue_epilogue(func, isa)
-        }
-        CallConv::BaldrdashSystemV | CallConv::BaldrdashWindows => {
-            baldrdash_prologue_epilogue(func, isa)
-        }
-        CallConv::Probestack => unimplemented!("probestack calling convention"),
-        CallConv::Baldrdash2020 => unimplemented!("Baldrdash ABI 2020"),
-        CallConv::AppleAarch64 | CallConv::WasmtimeAppleAarch64 => unreachable!(),
-    }
-}
-
-fn baldrdash_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> CodegenResult<()> {
-    debug_assert!(
-        !isa.flags().enable_probestack(),
-        "baldrdash does not expect cranelift to emit stack probes"
-    );
-
-    let word_size = StackSize::from(isa.pointer_bytes());
-    let shadow_store_size = if func.signature.call_conv.extends_windows_fastcall() {
-        WIN_SHADOW_STACK_SPACE
-    } else {
-        0
-    };
-
-    let bytes =
-        StackSize::from(isa.flags().baldrdash_prologue_words()) * word_size + shadow_store_size;
-
-    let mut ss = ir::StackSlotData::new(ir::StackSlotKind::IncomingArg, bytes);
-    ss.offset = Some(-(bytes as StackOffset));
-    func.stack_slots.push(ss);
-
-    let is_leaf = func.is_leaf();
-    layout_stack(&mut func.stack_slots, is_leaf, STACK_ALIGNMENT)?;
-    Ok(())
-}
-
-/// Implementation of the fastcall-based Win64 calling convention described at [1]
-/// [1] https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention
-fn fastcall_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> CodegenResult<()> {
-    if isa.triple().pointer_width().unwrap() != PointerWidth::U64 {
-        panic!("TODO: windows-fastcall: x86-32 not implemented yet");
-    }
-
-    // The reserved stack area is composed of:
-    //   return address + frame pointer + all callee-saved registers
-    //
-    // Pushing the return address is an implicit function of the `call`
-    // instruction. Each of the others we will then push explicitly. Then we
-    // will adjust the stack pointer to make room for the rest of the required
-    // space for this frame.
-    let csrs = callee_saved_regs_used(isa, func);
-    let gpsr_stack_size = ((csrs.iter(GPR).len() + 2) * isa.pointer_bytes() as usize) as u32;
-    let fpsr_stack_size = (csrs.iter(FPR).len() * types::F64X2.bytes() as usize) as u32;
-    let mut csr_stack_size = gpsr_stack_size + fpsr_stack_size;
-
-    // FPRs must be saved with 16-byte alignment; because they follow the GPRs on the stack, align if needed
-    if fpsr_stack_size > 0 {
-        csr_stack_size = (csr_stack_size + 15) & !15;
-    }
-
-    func.create_stack_slot(ir::StackSlotData {
-        kind: ir::StackSlotKind::IncomingArg,
-        size: csr_stack_size,
-        offset: Some(-(csr_stack_size as StackOffset)),
-    });
-
-    let is_leaf = func.is_leaf();
-
-    // If not a leaf function, allocate an explicit stack slot at the end of the space for the callee's shadow space
-    if !is_leaf {
-        // TODO: eventually use the caller-provided shadow store as spill slot space when laying out the stack
-        func.create_stack_slot(ir::StackSlotData {
-            kind: ir::StackSlotKind::ExplicitSlot,
-            size: WIN_SHADOW_STACK_SPACE,
-            offset: None,
-        });
-    }
-
-    let total_stack_size = layout_stack(&mut func.stack_slots, is_leaf, STACK_ALIGNMENT)? as i32;
-
-    // Subtract the GPR saved register size from the local size because pushes are used for the saves
-    let local_stack_size = i64::from(total_stack_size - gpsr_stack_size as i32);
-
-    // Add CSRs to function signature
-    let reg_type = isa.pointer_type();
-    let sp_arg_index = if fpsr_stack_size > 0 {
-        let sp_arg = ir::AbiParam::special_reg(
-            reg_type,
-            ir::ArgumentPurpose::CalleeSaved,
-            RU::rsp as RegUnit,
-        );
-        let index = func.signature.params.len();
-        func.signature.params.push(sp_arg);
-        Some(index)
-    } else {
-        None
-    };
-    let fp_arg = ir::AbiParam::special_reg(
-        reg_type,
-        ir::ArgumentPurpose::FramePointer,
-        RU::rbp as RegUnit,
-    );
-    func.signature.params.push(fp_arg);
-    func.signature.returns.push(fp_arg);
-
-    for gp_csr in csrs.iter(GPR) {
-        let csr_arg = ir::AbiParam::special_reg(reg_type, ir::ArgumentPurpose::CalleeSaved, gp_csr);
-        func.signature.params.push(csr_arg);
-        func.signature.returns.push(csr_arg);
-    }
-
-    for fp_csr in csrs.iter(FPR) {
-        // The calling convention described in
-        // https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention only requires
-        // preserving the low 128 bits of XMM6-XMM15.
-        let csr_arg =
-            ir::AbiParam::special_reg(types::F64X2, ir::ArgumentPurpose::CalleeSaved, fp_csr);
-        func.signature.params.push(csr_arg);
-        func.signature.returns.push(csr_arg);
-    }
-
-    // Set up the cursor and insert the prologue
-    let entry_block = func.layout.entry_block().expect("missing entry block");
-    let mut pos = EncCursor::new(func, isa).at_first_insertion_point(entry_block);
-    insert_common_prologue(
-        &mut pos,
-        local_stack_size,
-        reg_type,
-        &csrs,
-        sp_arg_index.is_some(),
-        isa,
-    );
-
-    // Reset the cursor and insert the epilogue
-    let mut pos = pos.at_position(CursorPosition::Nowhere);
-    insert_common_epilogues(&mut pos, local_stack_size, reg_type, &csrs, sp_arg_index);
-
-    Ok(())
-}
-
-/// Insert a System V-compatible prologue and epilogue.
-fn system_v_prologue_epilogue(func: &mut ir::Function, isa: &dyn TargetIsa) -> CodegenResult<()> {
-    let pointer_width = isa.triple().pointer_width().unwrap();
-    let word_size = pointer_width.bytes() as usize;
-
-    let csrs = callee_saved_regs_used(isa, func);
-    assert!(
-        csrs.iter(FPR).len() == 0,
-        "SysV ABI does not have callee-save SIMD registers"
-    );
-
-    // The reserved stack area is composed of:
-    //   return address + frame pointer + all callee-saved registers
-    //
-    // Pushing the return address is an implicit function of the `call`
-    // instruction. Each of the others we will then push explicitly. Then we
-    // will adjust the stack pointer to make room for the rest of the required
-    // space for this frame.
-    let csr_stack_size = ((csrs.iter(GPR).len() + 2) * word_size) as i32;
-    func.create_stack_slot(ir::StackSlotData {
-        kind: ir::StackSlotKind::IncomingArg,
-        size: csr_stack_size as u32,
-        offset: Some(-csr_stack_size),
-    });
-
-    let is_leaf = func.is_leaf();
-    let total_stack_size = layout_stack(&mut func.stack_slots, is_leaf, STACK_ALIGNMENT)? as i32;
-    let local_stack_size = i64::from(total_stack_size - csr_stack_size);
-
-    // Add CSRs to function signature
-    let reg_type = ir::Type::int(u16::from(pointer_width.bits())).unwrap();
-    // On X86-32 all parameters, including vmctx, are passed on stack, and we need
-    // to extract vmctx from the stack before we can save the frame pointer.
-    let sp_arg_index = if isa.pointer_bits() == 32 {
-        let sp_arg = ir::AbiParam::special_reg(
-            reg_type,
-            ir::ArgumentPurpose::CalleeSaved,
-            RU::rsp as RegUnit,
-        );
-        let index = func.signature.params.len();
-        func.signature.params.push(sp_arg);
-        Some(index)
-    } else {
-        None
-    };
-    let fp_arg = ir::AbiParam::special_reg(
-        reg_type,
-        ir::ArgumentPurpose::FramePointer,
-        RU::rbp as RegUnit,
-    );
-    func.signature.params.push(fp_arg);
-    func.signature.returns.push(fp_arg);
-
-    for csr in csrs.iter(GPR) {
-        let csr_arg = ir::AbiParam::special_reg(reg_type, ir::ArgumentPurpose::CalleeSaved, csr);
-        func.signature.params.push(csr_arg);
-        func.signature.returns.push(csr_arg);
-    }
-
-    // Set up the cursor and insert the prologue
-    let entry_block = func.layout.entry_block().expect("missing entry block");
-    let mut pos = EncCursor::new(func, isa).at_first_insertion_point(entry_block);
-    insert_common_prologue(
-        &mut pos,
-        local_stack_size,
-        reg_type,
-        &csrs,
-        sp_arg_index.is_some(),
-        isa,
-    );
-
-    // Reset the cursor and insert the epilogue
-    let mut pos = pos.at_position(CursorPosition::Nowhere);
-    insert_common_epilogues(&mut pos, local_stack_size, reg_type, &csrs, sp_arg_index);
-
-    Ok(())
-}
-
-/// Insert the prologue for a given function.
-/// This is used by common calling conventions such as System V.
-fn insert_common_prologue(
-    pos: &mut EncCursor,
-    stack_size: i64,
-    reg_type: ir::types::Type,
-    csrs: &RegisterSet,
-    has_sp_param: bool,
-    isa: &dyn TargetIsa,
-) {
-    let sp = if has_sp_param {
-        let block = pos.current_block().expect("missing block under cursor");
-        let sp = pos.func.dfg.append_block_param(block, reg_type);
-        pos.func.locations[sp] = ir::ValueLoc::Reg(RU::rsp as RegUnit);
-        Some(sp)
-    } else {
-        None
-    };
-
-    // If this is a leaf function with zero stack, then there's no need to
-    // insert a stack check since it can't overflow anything and
-    // forward-progress is guarantee so long as loop are handled anyway.
-    //
-    // If this has a stack size it could stack overflow, or if it isn't a leaf
-    // it could be part of a long call chain which we need to check anyway.
-    //
-    // First we look for the stack limit as a special argument to the function,
-    // and failing that we see if a custom stack limit factory has been provided
-    // which will be used to likely calculate the stack limit from the arguments
-    // or perhaps constants.
-    if stack_size > 0 || !pos.func.is_leaf() {
-        let scratch = ir::ValueLoc::Reg(RU::rax as RegUnit);
-        let stack_limit_arg = match pos.func.special_param(ArgumentPurpose::StackLimit) {
-            Some(arg) => {
-                let copy = pos.ins().copy(arg);
-                pos.func.locations[copy] = scratch;
-                Some(copy)
-            }
-            None => pos
-                .func
-                .stack_limit
-                .map(|gv| interpret_gv(pos, gv, sp, scratch)),
-        };
-        if let Some(stack_limit_arg) = stack_limit_arg {
-            insert_stack_check(pos, stack_size, stack_limit_arg);
-        }
-    }
-
-    // Append param to entry block
-    let block = pos.current_block().expect("missing block under cursor");
-    let fp = pos.func.dfg.append_block_param(block, reg_type);
-    pos.func.locations[fp] = ir::ValueLoc::Reg(RU::rbp as RegUnit);
-
-    pos.ins().x86_push(fp);
-
-    let mov_sp_inst = pos
-        .ins()
-        .copy_special(RU::rsp as RegUnit, RU::rbp as RegUnit);
-
-    let mut last_csr_push = None;
-    for reg in csrs.iter(GPR) {
-        // Append param to entry block
-        let csr_arg = pos.func.dfg.append_block_param(block, reg_type);
-
-        // Assign it a location
-        pos.func.locations[csr_arg] = ir::ValueLoc::Reg(reg);
-        last_csr_push = Some(pos.ins().x86_push(csr_arg));
-    }
-
-    // Allocate stack frame storage.
-    let mut adjust_sp_inst = None;
-    if stack_size > 0 {
-        if isa.flags().enable_probestack() && stack_size > (1 << isa.flags().probestack_size_log2())
-        {
-            // Emit a stack probe.
-            let rax = RU::rax as RegUnit;
-            let rax_val = ir::ValueLoc::Reg(rax);
-
-            // The probestack function expects its input in %rax.
-            let arg = pos.ins().iconst(reg_type, stack_size);
-            pos.func.locations[arg] = rax_val;
-
-            // Call the probestack function.
-            let callee = get_probestack_funcref(pos.func, reg_type, rax, isa);
-
-            // Make the call.
-            let call = if !isa.flags().is_pic()
-                && isa.triple().pointer_width().unwrap() == PointerWidth::U64
-                && !pos.func.dfg.ext_funcs[callee].colocated
-            {
-                // 64-bit non-PIC non-colocated calls need to be legalized to call_indirect.
-                // Use r11 as it may be clobbered under all supported calling conventions.
-                let r11 = RU::r11 as RegUnit;
-                let sig = pos.func.dfg.ext_funcs[callee].signature;
-                let addr = pos.ins().func_addr(reg_type, callee);
-                pos.func.locations[addr] = ir::ValueLoc::Reg(r11);
-                pos.ins().call_indirect(sig, addr, &[arg])
-            } else {
-                // Otherwise just do a normal call.
-                pos.ins().call(callee, &[arg])
-            };
-
-            // If the probestack function doesn't adjust sp, do it ourselves.
-            if !isa.flags().probestack_func_adjusts_sp() {
-                let result = pos.func.dfg.inst_results(call)[0];
-                pos.func.locations[result] = rax_val;
-                adjust_sp_inst = Some(pos.ins().adjust_sp_down(result));
-            }
-        } else {
-            // Simply decrement the stack pointer.
-            adjust_sp_inst = Some(pos.ins().adjust_sp_down_imm(Imm64::new(stack_size)));
-        }
-    }
-
-    // With the stack pointer adjusted, save any callee-saved floating point registers via offset
-    // FPR saves are at the highest addresses of the local frame allocation, immediately following the GPR pushes
-    let mut last_fpr_save = None;
-
-    for (i, reg) in csrs.iter(FPR).enumerate() {
-        // Append param to entry block
-        let csr_arg = pos.func.dfg.append_block_param(block, types::F64X2);
-
-        // Since regalloc has already run, we must assign a location.
-        pos.func.locations[csr_arg] = ir::ValueLoc::Reg(reg);
-
-        // Offset to where the register is saved relative to RSP, accounting for FPR save alignment
-        let offset = ((i + 1) * types::F64X2.bytes() as usize) as i64
-            + (stack_size % types::F64X2.bytes() as i64);
-
-        last_fpr_save = Some(pos.ins().store(
-            ir::MemFlags::trusted(),
-            csr_arg,
-            sp.expect("FPR save requires SP param"),
-            (stack_size - offset) as i32,
-        ));
-    }
-
-    pos.func.prologue_end = Some(
-        last_fpr_save
-            .or(adjust_sp_inst)
-            .or(last_csr_push)
-            .unwrap_or(mov_sp_inst),
-    );
-}
-
-/// Inserts code necessary to calculate `gv`.
-///
-/// Note that this is typically done with `ins().global_value(...)` but that
-/// requires legalization to run to encode it, and we're running super late
-/// here in the backend where legalization isn't possible. To get around this
-/// we manually interpret the `gv` specified and do register allocation for
-/// intermediate values.
-///
-/// This is an incomplete implementation of loading `GlobalValue` values to get
-/// compared to the stack pointer, but currently it serves enough functionality
-/// to get this implemented in `wasmtime` itself. This'll likely get expanded a
-/// bit over time!
-fn interpret_gv(
-    pos: &mut EncCursor,
-    gv: ir::GlobalValue,
-    sp: Option<ir::Value>,
-    scratch: ir::ValueLoc,
-) -> ir::Value {
-    match pos.func.global_values[gv] {
-        ir::GlobalValueData::VMContext => {
-            let vmctx_index = pos
-                .func
-                .signature
-                .special_param_index(ir::ArgumentPurpose::VMContext)
-                .expect("no vmcontext parameter found");
-            match pos.func.signature.params[vmctx_index] {
-                AbiParam {
-                    location: ArgumentLoc::Reg(_),
-                    ..
-                } => {
-                    let entry = pos.func.layout.entry_block().unwrap();
-                    pos.func.dfg.block_params(entry)[vmctx_index]
-                }
-                AbiParam {
-                    location: ArgumentLoc::Stack(offset),
-                    value_type,
-                    ..
-                } => {
-                    let offset =
-                        offset + i32::from(pos.isa.pointer_bytes() * (1 + vmctx_index as u8));
-                    // The following access can be marked `trusted` because it is a load of an argument. We
-                    // know it is safe because it was safe to write it in preparing this function call.
-                    let ret =
-                        pos.ins()
-                            .load(value_type, ir::MemFlags::trusted(), sp.unwrap(), offset);
-                    pos.func.locations[ret] = scratch;
-                    return ret;
-                }
-                AbiParam {
-                    location: ArgumentLoc::Unassigned,
-                    ..
-                } => unreachable!(),
-            }
-        }
-        ir::GlobalValueData::Load {
-            base,
-            offset,
-            global_type,
-            readonly: _,
-        } => {
-            let base = interpret_gv(pos, base, sp, scratch);
-            let ret = pos
-                .ins()
-                .load(global_type, ir::MemFlags::trusted(), base, offset);
-            pos.func.locations[ret] = scratch;
-            return ret;
-        }
-        ref other => panic!("global value for stack limit not supported: {}", other),
-    }
-}
-
-/// Insert a check that generates a trap if the stack pointer goes
-/// below a value in `stack_limit_arg`.
-fn insert_stack_check(pos: &mut EncCursor, stack_size: i64, stack_limit_arg: ir::Value) {
-    use crate::ir::condcodes::IntCC;
-
-    // Our stack pointer, after subtracting `stack_size`, must not be below
-    // `stack_limit_arg`. To do this we're going to add `stack_size` to
-    // `stack_limit_arg` and see if the stack pointer is below that. The
-    // `stack_size + stack_limit_arg` computation might overflow, however, due
-    // to how stack limits may be loaded and set externally to trigger a trap.
-    //
-    // To handle this we'll need an extra comparison to see if the stack
-    // pointer is already below `stack_limit_arg`. Most of the time this
-    // isn't necessary though since the stack limit which triggers a trap is
-    // likely a sentinel somewhere around `usize::max_value()`. In that case
-    // only conditionally emit this pre-flight check. That way most functions
-    // only have the one comparison, but are also guaranteed that if we add
-    // `stack_size` to `stack_limit_arg` is won't overflow.
-    //
-    // This does mean that code generators which use this stack check
-    // functionality need to ensure that values stored into the stack limit
-    // will never overflow if this threshold is added.
-    if stack_size >= 32 * 1024 {
-        let cflags = pos.ins().ifcmp_sp(stack_limit_arg);
-        pos.func.locations[cflags] = ir::ValueLoc::Reg(RU::rflags as RegUnit);
-        pos.ins().trapif(
-            IntCC::UnsignedGreaterThanOrEqual,
-            cflags,
-            ir::TrapCode::StackOverflow,
-        );
-    }
-
-    // Copy `stack_limit_arg` into a %rax and use it for calculating
-    // a SP threshold.
-    let sp_threshold = pos.ins().iadd_imm(stack_limit_arg, stack_size);
-    pos.func.locations[sp_threshold] = ir::ValueLoc::Reg(RU::rax as RegUnit);
-
-    // If the stack pointer currently reaches the SP threshold or below it then after opening
-    // the current stack frame, the current stack pointer will reach the limit.
-    let cflags = pos.ins().ifcmp_sp(sp_threshold);
-    pos.func.locations[cflags] = ir::ValueLoc::Reg(RU::rflags as RegUnit);
-    pos.ins().trapif(
-        IntCC::UnsignedGreaterThanOrEqual,
-        cflags,
-        ir::TrapCode::StackOverflow,
-    );
-}
-
-/// Find all `return` instructions and insert epilogues before them.
-fn insert_common_epilogues(
-    pos: &mut EncCursor,
-    stack_size: i64,
-    reg_type: ir::types::Type,
-    csrs: &RegisterSet,
-    sp_arg_index: Option<usize>,
-) {
-    while let Some(block) = pos.next_block() {
-        pos.goto_last_inst(block);
-        if let Some(inst) = pos.current_inst() {
-            if pos.func.dfg[inst].opcode().is_return() {
-                insert_common_epilogue(inst, block, stack_size, pos, reg_type, csrs, sp_arg_index);
-            }
-        }
-    }
-}
-
-/// Insert an epilogue given a specific `return` instruction.
-/// This is used by common calling conventions such as System V.
-fn insert_common_epilogue(
-    inst: ir::Inst,
-    block: ir::Block,
-    stack_size: i64,
-    pos: &mut EncCursor,
-    reg_type: ir::types::Type,
-    csrs: &RegisterSet,
-    sp_arg_index: Option<usize>,
-) {
-    // Insert the pop of the frame pointer
-    let fp_pop = pos.ins().x86_pop(reg_type);
-    let fp_pop_inst = pos.prev_inst().unwrap();
-    pos.func.locations[fp_pop] = ir::ValueLoc::Reg(RU::rbp as RegUnit);
-    pos.func.dfg.append_inst_arg(inst, fp_pop);
-
-    // Insert the CSR pops
-    let mut first_csr_pop_inst = None;
-    for reg in csrs.iter(GPR) {
-        let csr_pop = pos.ins().x86_pop(reg_type);
-        first_csr_pop_inst = pos.prev_inst();
-        assert!(first_csr_pop_inst.is_some());
-        pos.func.locations[csr_pop] = ir::ValueLoc::Reg(reg);
-        pos.func.dfg.append_inst_arg(inst, csr_pop);
-    }
-
-    // Insert the adjustment of SP
-    let mut sp_adjust_inst = None;
-    if stack_size > 0 {
-        pos.ins().adjust_sp_up_imm(Imm64::new(stack_size));
-        sp_adjust_inst = pos.prev_inst();
-        assert!(sp_adjust_inst.is_some());
-    }
-
-    let mut first_fpr_load = None;
-    if let Some(index) = sp_arg_index {
-        let sp = pos
-            .func
-            .dfg
-            .block_params(pos.func.layout.entry_block().unwrap())[index];
-
-        // Insert the FPR loads (unlike the GPRs, which are stack pops, these are in-order loads)
-        for (i, reg) in csrs.iter(FPR).enumerate() {
-            // Offset to where the register is saved relative to RSP, accounting for FPR save alignment
-            let offset = ((i + 1) * types::F64X2.bytes() as usize) as i64
-                + (stack_size % types::F64X2.bytes() as i64);
-
-            let value = pos.ins().load(
-                types::F64X2,
-                ir::MemFlags::trusted(),
-                sp,
-                (stack_size - offset) as i32,
-            );
-
-            first_fpr_load.get_or_insert(pos.current_inst().expect("current inst"));
-
-            pos.func.locations[value] = ir::ValueLoc::Reg(reg);
-            pos.func.dfg.append_inst_arg(inst, value);
-        }
-    } else {
-        assert!(csrs.iter(FPR).len() == 0);
-    }
-
-    pos.func.epilogues_start.push((
-        first_fpr_load
-            .or(sp_adjust_inst)
-            .or(first_csr_pop_inst)
-            .unwrap_or(fp_pop_inst),
-        block,
-    ));
-}
-
-#[cfg(feature = "unwind")]
-pub fn create_unwind_info(
-    func: &ir::Function,
-    isa: &dyn TargetIsa,
-) -> CodegenResult<Option<crate::isa::unwind::UnwindInfo>> {
-    use crate::isa::unwind::UnwindInfo;
-    use crate::machinst::UnwindInfoKind;
-
-    // Assumption: RBP is being used as the frame pointer for both calling conventions
-    // In the future, we should be omitting frame pointer as an optimization, so this will change
-    Ok(match isa.unwind_info_kind() {
-        UnwindInfoKind::SystemV => {
-            super::unwind::systemv::create_unwind_info(func, isa)?.map(|u| UnwindInfo::SystemV(u))
-        }
-        UnwindInfoKind::Windows => {
-            super::unwind::winx64::create_unwind_info(func, isa)?.map(|u| UnwindInfo::WindowsX64(u))
-        }
-        UnwindInfoKind::None => None,
-    })
-}
diff --git a/cranelift/codegen/src/isa/legacy/x86/binemit.rs b/cranelift/codegen/src/isa/legacy/x86/binemit.rs
deleted file mode 100644
index 0480873672..0000000000
--- a/cranelift/codegen/src/isa/legacy/x86/binemit.rs
+++ /dev/null
@@ -1,578 +0,0 @@
-//! Emitting binary x86 machine code.
-
-use super::enc_tables::{needs_offset, needs_sib_byte};
-use super::registers::RU;
-use crate::binemit::{bad_encoding, CodeSink, Reloc};
-use crate::ir::condcodes::{CondCode, FloatCC, IntCC};
-use crate::ir::{
-    Block, Constant, ExternalName, Function, Inst, InstructionData, JumpTable, LibCall, Opcode,
-    TrapCode,
-};
-use crate::isa::{RegUnit, StackBase, StackBaseMask, StackRef, TargetIsa};
-use crate::regalloc::RegDiversions;
-use cranelift_codegen_shared::isa::x86::EncodingBits;
-
-include!(concat!(env!("OUT_DIR"), "/binemit-x86.rs"));
-
-// Convert a stack base to the corresponding register.
-fn stk_base(base: StackBase) -> RegUnit {
-    let ru = match base {
-        StackBase::SP => RU::rsp,
-        StackBase::FP => RU::rbp,
-        StackBase::Zone => unimplemented!(),
-    };
-    ru as RegUnit
-}
-
-// Mandatory prefix bytes for Mp* opcodes.
-const PREFIX: [u8; 3] = [0x66, 0xf3, 0xf2];
-
-// Second byte for three-byte opcodes for mm=0b10 and mm=0b11.
-const OP3_BYTE2: [u8; 2] = [0x38, 0x3a];
-
-// A REX prefix with no bits set: 0b0100WRXB.
-const BASE_REX: u8 = 0b0100_0000;
-
-// Create a single-register REX prefix, setting the B bit to bit 3 of the register.
-// This is used for instructions that encode a register in the low 3 bits of the opcode and for
-// instructions that use the ModR/M `reg` field for something else.
-fn rex1(reg_b: RegUnit) -> u8 {
-    let b = ((reg_b >> 3) & 1) as u8;
-    BASE_REX | b
-}
-
-// Create a dual-register REX prefix, setting:
-//
-// REX.B = bit 3 of r/m register, or SIB base register when a SIB byte is present.
-// REX.R = bit 3 of reg register.
-fn rex2(rm: RegUnit, reg: RegUnit) -> u8 {
-    let b = ((rm >> 3) & 1) as u8;
-    let r = ((reg >> 3) & 1) as u8;
-    BASE_REX | b | (r << 2)
-}
-
-// Create a three-register REX prefix, setting:
-//
-// REX.B = bit 3 of r/m register, or SIB base register when a SIB byte is present.
-// REX.R = bit 3 of reg register.
-// REX.X = bit 3 of SIB index register.
-fn rex3(rm: RegUnit, reg: RegUnit, index: RegUnit) -> u8 {
-    let b = ((rm >> 3) & 1) as u8;
-    let r = ((reg >> 3) & 1) as u8;
-    let x = ((index >> 3) & 1) as u8;
-    BASE_REX | b | (x << 1) | (r << 2)
-}
-
-/// Encode the RXBR' bits of the EVEX P0 byte. For an explanation of these bits, see section 2.6.1
-/// in the Intel Software Development Manual, volume 2A. These bits can be used by different
-/// addressing modes (see section 2.6.2), requiring different `vex*` functions than this one.
-fn evex2(rm: RegUnit, reg: RegUnit) -> u8 {
-    let b = (!(rm >> 3) & 1) as u8;
-    let x = (!(rm >> 4) & 1) as u8;
-    let r = (!(reg >> 3) & 1) as u8;
-    let r_ = (!(reg >> 4) & 1) as u8;
-    0x00 | r_ | (b << 1) | (x << 2) | (r << 3)
-}
-
-/// Determines whether a REX prefix should be emitted. A REX byte always has 0100 in bits 7:4; bits
-/// 3:0 correspond to WRXB. W allows certain instructions to declare a 64-bit operand size; because
-/// [needs_rex] is only used by [infer_rex] and we prevent [infer_rex] from using [w] in
-/// [Template::build], we do not need to check again whether [w] forces an inferred REX prefix--it
-/// always does and should be encoded like `.rex().w()`. The RXB are extension of ModR/M or SIB
-/// fields; see section 2.2.1.2 in the Intel Software Development Manual.
-#[inline]
-fn needs_rex(rex: u8) -> bool {
-    rex != BASE_REX
-}
-
-// Emit a REX prefix.
-//
-// The R, X, and B bits are computed from registers using the functions above. The W bit is
-// extracted from `bits`.
-fn rex_prefix<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
-    debug_assert_eq!(rex & 0xf8, BASE_REX);
-    let w = EncodingBits::from(bits).rex_w();
-    sink.put1(rex | (w << 3));
-}
-
-// Emit a single-byte opcode with no REX prefix.
-fn put_op1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
-    debug_assert_eq!(bits & 0x8f00, 0, "Invalid encoding bits for Op1*");
-    debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Op1 encoding");
-    sink.put1(bits as u8);
-}
-
-// Emit a single-byte opcode with REX prefix.
-fn put_rexop1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
-    debug_assert_eq!(bits & 0x0f00, 0, "Invalid encoding bits for RexOp1*");
-    rex_prefix(bits, rex, sink);
-    sink.put1(bits as u8);
-}
-
-/// Emit a single-byte opcode with inferred REX prefix.
-fn put_dynrexop1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
-    debug_assert_eq!(bits & 0x0f00, 0, "Invalid encoding bits for DynRexOp1*");
-    if needs_rex(rex) {
-        rex_prefix(bits, rex, sink);
-    }
-    sink.put1(bits as u8);
-}
-
-// Emit two-byte opcode: 0F XX
-fn put_op2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
-    debug_assert_eq!(bits & 0x8f00, 0x0400, "Invalid encoding bits for Op2*");
-    debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Op2 encoding");
-    sink.put1(0x0f);
-    sink.put1(bits as u8);
-}
-
-// Emit two-byte opcode: 0F XX with REX prefix.
-fn put_rexop2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
-    debug_assert_eq!(bits & 0x0f00, 0x0400, "Invalid encoding bits for RexOp2*");
-    rex_prefix(bits, rex, sink);
-    sink.put1(0x0f);
-    sink.put1(bits as u8);
-}
-
-/// Emit two-byte opcode: 0F XX with inferred REX prefix.
-fn put_dynrexop2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
-    debug_assert_eq!(
-        bits & 0x0f00,
-        0x0400,
-        "Invalid encoding bits for DynRexOp2*"
-    );
-    if needs_rex(rex) {
-        rex_prefix(bits, rex, sink);
-    }
-    sink.put1(0x0f);
-    sink.put1(bits as u8);
-}
-
-// Emit single-byte opcode with mandatory prefix.
-fn put_mp1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
-    debug_assert_eq!(bits & 0x8c00, 0, "Invalid encoding bits for Mp1*");
-    let enc = EncodingBits::from(bits);
-    sink.put1(PREFIX[(enc.pp() - 1) as usize]);
-    debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Mp1 encoding");
-    sink.put1(bits as u8);
-}
-
-// Emit single-byte opcode with mandatory prefix and REX.
-fn put_rexmp1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
-    debug_assert_eq!(bits & 0x0c00, 0, "Invalid encoding bits for RexMp1*");
-    let enc = EncodingBits::from(bits);
-    sink.put1(PREFIX[(enc.pp() - 1) as usize]);
-    rex_prefix(bits, rex, sink);
-    sink.put1(bits as u8);
-}
-
-// Emit two-byte opcode (0F XX) with mandatory prefix.
-fn put_mp2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
-    debug_assert_eq!(bits & 0x8c00, 0x0400, "Invalid encoding bits for Mp2*");
-    let enc = EncodingBits::from(bits);
-    sink.put1(PREFIX[(enc.pp() - 1) as usize]);
-    debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Mp2 encoding");
-    sink.put1(0x0f);
-    sink.put1(bits as u8);
-}
-
-// Emit two-byte opcode (0F XX) with mandatory prefix and REX.
-fn put_rexmp2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
-    debug_assert_eq!(bits & 0x0c00, 0x0400, "Invalid encoding bits for RexMp2*");
-    let enc = EncodingBits::from(bits);
-    sink.put1(PREFIX[(enc.pp() - 1) as usize]);
-    rex_prefix(bits, rex, sink);
-    sink.put1(0x0f);
-    sink.put1(bits as u8);
-}
-
-/// Emit two-byte opcode (0F XX) with mandatory prefix and inferred REX.
-fn put_dynrexmp2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
-    debug_assert_eq!(
-        bits & 0x0c00,
-        0x0400,
-        "Invalid encoding bits for DynRexMp2*"
-    );
-    let enc = EncodingBits::from(bits);
-    sink.put1(PREFIX[(enc.pp() - 1) as usize]);
-    if needs_rex(rex) {
-        rex_prefix(bits, rex, sink);
-    }
-    sink.put1(0x0f);
-    sink.put1(bits as u8);
-}
-
-/// Emit three-byte opcode (0F 3[8A] XX) with mandatory prefix.
-fn put_mp3<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
-    debug_assert_eq!(bits & 0x8800, 0x0800, "Invalid encoding bits for Mp3*");
-    debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Mp3 encoding");
-    let enc = EncodingBits::from(bits);
-    sink.put1(PREFIX[(enc.pp() - 1) as usize]);
-    sink.put1(0x0f);
-    sink.put1(OP3_BYTE2[(enc.mm() - 2) as usize]);
-    sink.put1(bits as u8);
-}
-
-/// Emit three-byte opcode (0F 3[8A] XX) with mandatory prefix and REX
-fn put_rexmp3<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
-    debug_assert_eq!(bits & 0x0800, 0x0800, "Invalid encoding bits for RexMp3*");
-    let enc = EncodingBits::from(bits);
-    sink.put1(PREFIX[(enc.pp() - 1) as usize]);
-    rex_prefix(bits, rex, sink);
-    sink.put1(0x0f);
-    sink.put1(OP3_BYTE2[(enc.mm() - 2) as usize]);
-    sink.put1(bits as u8);
-}
-
-/// Emit three-byte opcode (0F 3[8A] XX) with mandatory prefix and an inferred REX prefix.
-fn put_dynrexmp3<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
-    debug_assert_eq!(
-        bits & 0x0800,
-        0x0800,
-        "Invalid encoding bits for DynRexMp3*"
-    );
-    let enc = EncodingBits::from(bits);
-    sink.put1(PREFIX[(enc.pp() - 1) as usize]);
-    if needs_rex(rex) {
-        rex_prefix(bits, rex, sink);
-    }
-    sink.put1(0x0f);
-    sink.put1(OP3_BYTE2[(enc.mm() - 2) as usize]);
-    sink.put1(bits as u8);
-}
-
-/// Defines the EVEX context for the `L'`, `L`, and `b` bits (bits 6:4 of EVEX P2 byte). Table 2-36 in
-/// section 2.6.10 (Intel Software Development Manual, volume 2A) describes how these bits can be
-/// used together for certain classes of instructions; i.e., special care should be taken to ensure
-/// that instructions use an applicable correct `EvexContext`. Table 2-39 contains cases where
-/// opcodes can result in an #UD.
-#[allow(dead_code)]
-enum EvexContext {
-    RoundingRegToRegFP {
-        rc: EvexRoundingControl,
-    },
-    NoRoundingFP {
-        sae: bool,
-        length: EvexVectorLength,
-    },
-    MemoryOp {
-        broadcast: bool,
-        length: EvexVectorLength,
-    },
-    Other {
-        length: EvexVectorLength,
-    },
-}
-
-impl EvexContext {
-    /// Encode the `L'`, `L`, and `b` bits (bits 6:4 of EVEX P2 byte) for merging with the P2 byte.
-    fn bits(&self) -> u8 {
-        match self {
-            Self::RoundingRegToRegFP { rc } => 0b001 | rc.bits() << 1,
-            Self::NoRoundingFP { sae, length } => (*sae as u8) | length.bits() << 1,
-            Self::MemoryOp { broadcast, length } => (*broadcast as u8) | length.bits() << 1,
-            Self::Other { length } => length.bits() << 1,
-        }
-    }
-}
-
-/// The EVEX format allows choosing a vector length in the `L'` and `L` bits; see `EvexContext`.
-#[allow(dead_code)]
-enum EvexVectorLength {
-    V128,
-    V256,
-    V512,
-}
-
-impl EvexVectorLength {
-    /// Encode the `L'` and `L` bits for merging with the P2 byte.
-    fn bits(&self) -> u8 {
-        match self {
-            Self::V128 => 0b00,
-            Self::V256 => 0b01,
-            Self::V512 => 0b10,
-            // 0b11 is reserved (#UD).
-        }
-    }
-}
-
-/// The EVEX format allows defining rounding control in the `L'` and `L` bits; see `EvexContext`.
-#[allow(dead_code)]
-enum EvexRoundingControl {
-    RNE,
-    RD,
-    RU,
-    RZ,
-}
-
-impl EvexRoundingControl {
-    /// Encode the `L'` and `L` bits for merging with the P2 byte.
-    fn bits(&self) -> u8 {
-        match self {
-            Self::RNE => 0b00,
-            Self::RD => 0b01,
-            Self::RU => 0b10,
-            Self::RZ => 0b11,
-        }
-    }
-}
-
-/// Defines the EVEX masking behavior; masking support is described in section 2.6.4 of the Intel
-/// Software Development Manual, volume 2A.
-#[allow(dead_code)]
-enum EvexMasking {
-    None,
-    Merging { k: u8 },
-    Zeroing { k: u8 },
-}
-
-impl EvexMasking {
-    /// Encode the `z` bit for merging with the P2 byte.
-    fn z_bit(&self) -> u8 {
-        match self {
-            Self::None | Self::Merging { .. } => 0,
-            Self::Zeroing { .. } => 1,
-        }
-    }
-
-    /// Encode the `aaa` bits for merging with the P2 byte.
-    fn aaa_bits(&self) -> u8 {
-        match self {
-            Self::None => 0b000,
-            Self::Merging { k } | Self::Zeroing { k } => {
-                debug_assert!(*k <= 7);
-                *k
-            }
-        }
-    }
-}
-
-/// Encode an EVEX prefix, including the instruction opcode. To match the current recipe
-/// convention, the ModR/M byte is written separately in the recipe. This EVEX encoding function
-/// only encodes the `reg` (operand 1), `vvvv` (operand 2), `rm` (operand 3) form; other forms are
-/// possible (see section 2.6.2, Intel Software Development Manual, volume 2A), requiring
-/// refactoring of this function or separate functions for each form (e.g. as for the REX prefix).
-fn put_evex<CS: CodeSink + ?Sized>(
-    bits: u16,
-    reg: RegUnit,
-    vvvvv: RegUnit,
-    rm: RegUnit,
-    context: EvexContext,
-    masking: EvexMasking,
-    sink: &mut CS,
-) {
-    let enc = EncodingBits::from(bits);
-
-    // EVEX prefix.
-    sink.put1(0x62);
-
-    debug_assert!(enc.mm() < 0b100);
-    let mut p0 = enc.mm() & 0b11;
-    p0 |= evex2(rm, reg) << 4; // bits 3:2 are always unset
-    sink.put1(p0);
-
-    let mut p1 = enc.pp() | 0b100; // bit 2 is always set
-    p1 |= (!(vvvvv as u8) & 0b1111) << 3;
-    p1 |= (enc.rex_w() & 0b1) << 7;
-    sink.put1(p1);
-
-    let mut p2 = masking.aaa_bits();
-    p2 |= (!(vvvvv as u8 >> 4) & 0b1) << 3;
-    p2 |= context.bits() << 4;
-    p2 |= masking.z_bit() << 7;
-    sink.put1(p2);
-
-    // Opcode
-    sink.put1(enc.opcode_byte());
-
-    // ModR/M byte placed in recipe
-}
-
-/// Emit a ModR/M byte for reg-reg operands.
-fn modrm_rr<CS: CodeSink + ?Sized>(rm: RegUnit, reg: RegUnit, sink: &mut CS) {
-    let reg = reg as u8 & 7;
-    let rm = rm as u8 & 7;
-    let mut b = 0b11000000;
-    b |= reg << 3;
-    b |= rm;
-    sink.put1(b);
-}
-
-/// Emit a ModR/M byte where the reg bits are part of the opcode.
-fn modrm_r_bits<CS: CodeSink + ?Sized>(rm: RegUnit, bits: u16, sink: &mut CS) {
-    let reg = (bits >> 12) as u8 & 7;
-    let rm = rm as u8 & 7;
-    let mut b = 0b11000000;
-    b |= reg << 3;
-    b |= rm;
-    sink.put1(b);
-}
-
-/// Emit a mode 00 ModR/M byte. This is a register-indirect addressing mode with no offset.
-/// Registers %rsp and %rbp are invalid for `rm`, %rsp indicates a SIB byte, and %rbp indicates an
-/// absolute immediate 32-bit address.
-fn modrm_rm<CS: CodeSink + ?Sized>(rm: RegUnit, reg: RegUnit, sink: &mut CS) {
-    let reg = reg as u8 & 7;
-    let rm = rm as u8 & 7;
-    let mut b = 0b00000000;
-    b |= reg << 3;
-    b |= rm;
-    sink.put1(b);
-}
-
-/// Emit a mode 00 Mod/RM byte, with a rip-relative displacement in 64-bit mode. Effective address
-/// is calculated by adding displacement to 64-bit rip of next instruction. See intel Sw dev manual
-/// section 2.2.1.6.
-fn modrm_riprel<CS: CodeSink + ?Sized>(reg: RegUnit, sink: &mut CS) {
-    modrm_rm(0b101, reg, sink)
-}
-
-/// Emit a mode 01 ModR/M byte. This is a register-indirect addressing mode with 8-bit
-/// displacement.
-/// Register %rsp is invalid for `rm`. It indicates the presence of a SIB byte.
-fn modrm_disp8<CS: CodeSink + ?Sized>(rm: RegUnit, reg: RegUnit, sink: &mut CS) {
-    let reg = reg as u8 & 7;
-    let rm = rm as u8 & 7;
-    let mut b = 0b01000000;
-    b |= reg << 3;
-    b |= rm;
-    sink.put1(b);
-}
-
-/// Emit a mode 10 ModR/M byte. This is a register-indirect addressing mode with 32-bit
-/// displacement.
-/// Register %rsp is invalid for `rm`. It indicates the presence of a SIB byte.
-fn modrm_disp32<CS: CodeSink + ?Sized>(rm: RegUnit, reg: RegUnit, sink: &mut CS) {
-    let reg = reg as u8 & 7;
-    let rm = rm as u8 & 7;
-    let mut b = 0b10000000;
-    b |= reg << 3;
-    b |= rm;
-    sink.put1(b);
-}
-
-/// Emit a mode 00 ModR/M with a 100 RM indicating a SIB byte is present.
-fn modrm_sib<CS: CodeSink + ?Sized>(reg: RegUnit, sink: &mut CS) {
-    modrm_rm(0b100, reg, sink);
-}
-
-/// Emit a mode 01 ModR/M with a 100 RM indicating a SIB byte and 8-bit
-/// displacement are present.
-fn modrm_sib_disp8<CS: CodeSink + ?Sized>(reg: RegUnit, sink: &mut CS) {
-    modrm_disp8(0b100, reg, sink);
-}
-
-/// Emit a mode 10 ModR/M with a 100 RM indicating a SIB byte and 32-bit
-/// displacement are present.
-fn modrm_sib_disp32<CS: CodeSink + ?Sized>(reg: RegUnit, sink: &mut CS) {
-    modrm_disp32(0b100, reg, sink);
-}
-
-/// Emit a SIB byte with a base register and no scale+index.
-fn sib_noindex<CS: CodeSink + ?Sized>(base: RegUnit, sink: &mut CS) {
-    let base = base as u8 & 7;
-    // SIB        SS_III_BBB.
-    let mut b = 0b00_100_000;
-    b |= base;
-    sink.put1(b);
-}
-
-/// Emit a SIB byte with a scale, base, and index.
-fn sib<CS: CodeSink + ?Sized>(scale: u8, index: RegUnit, base: RegUnit, sink: &mut CS) {
-    // SIB        SS_III_BBB.
-    debug_assert_eq!(scale & !0x03, 0, "Scale out of range");
-    let scale = scale & 3;
-    let index = index as u8 & 7;
-    let base = base as u8 & 7;
-    let b: u8 = (scale << 6) | (index << 3) | base;
-    sink.put1(b);
-}
-
-/// Get the low 4 bits of an opcode for an integer condition code.
-///
-/// Add this offset to a base opcode for:
-///
-/// ---- 0x70: Short conditional branch.
-/// 0x0f 0x80: Long conditional branch.
-/// 0x0f 0x90: SetCC.
-///
-fn icc2opc(cond: IntCC) -> u16 {
-    use crate::ir::condcodes::IntCC::*;
-    match cond {
-        Overflow => 0x0,
-        NotOverflow => 0x1,
-        UnsignedLessThan => 0x2,
-        UnsignedGreaterThanOrEqual => 0x3,
-        Equal => 0x4,
-        NotEqual => 0x5,
-        UnsignedLessThanOrEqual => 0x6,
-        UnsignedGreaterThan => 0x7,
-        // 0x8 = Sign.
-        // 0x9 = !Sign.
-        // 0xa = Parity even.
-        // 0xb = Parity odd.
-        SignedLessThan => 0xc,
-        SignedGreaterThanOrEqual => 0xd,
-        SignedLessThanOrEqual => 0xe,
-        SignedGreaterThan => 0xf,
-    }
-}
-
-/// Get the low 4 bits of an opcode for a floating point condition code.
-///
-/// The ucomiss/ucomisd instructions set the FLAGS bits CF/PF/CF like this:
-///
-///    ZPC OSA
-/// UN 111 000
-/// GT 000 000
-/// LT 001 000
-/// EQ 100 000
-///
-/// Not all floating point condition codes are supported.
-fn fcc2opc(cond: FloatCC) -> u16 {
-    use crate::ir::condcodes::FloatCC::*;
-    match cond {
-        Ordered                    => 0xb, // EQ|LT|GT => *np (P=0)
-        Unordered                  => 0xa, // UN       => *p  (P=1)
-        OrderedNotEqual            => 0x5, // LT|GT    => *ne (Z=0),
-        UnorderedOrEqual           => 0x4, // UN|EQ    => *e  (Z=1)
-        GreaterThan                => 0x7, // GT       => *a  (C=0&Z=0)
-        GreaterThanOrEqual         => 0x3, // GT|EQ    => *ae (C=0)
-        UnorderedOrLessThan        => 0x2, // UN|LT    => *b  (C=1)
-        UnorderedOrLessThanOrEqual => 0x6, // UN|LT|EQ => *be (Z=1|C=1)
-        Equal |                            // EQ
-        NotEqual |                         // UN|LT|GT
-        LessThan |                         // LT
-        LessThanOrEqual |                  // LT|EQ
-        UnorderedOrGreaterThan |           // UN|GT
-        UnorderedOrGreaterThanOrEqual      // UN|GT|EQ
-        => panic!("{} not supported", cond),
-    }
-}
-
-/// Emit a single-byte branch displacement to `destination`.
-fn disp1<CS: CodeSink + ?Sized>(destination: Block, func: &Function, sink: &mut CS) {
-    let delta = func.offsets[destination].wrapping_sub(sink.offset() + 1);
-    sink.put1(delta as u8);
-}
-
-/// Emit a four-byte branch displacement to `destination`.
-fn disp4<CS: CodeSink + ?Sized>(destination: Block, func: &Function, sink: &mut CS) {
-    let delta = func.offsets[destination].wrapping_sub(sink.offset() + 4);
-    sink.put4(delta);
-}
-
-/// Emit a four-byte displacement to jump table `jt`.
-fn jt_disp4<CS: CodeSink + ?Sized>(jt: JumpTable, func: &Function, sink: &mut CS) {
-    let delta = func.jt_offsets[jt].wrapping_sub(sink.offset() + 4);
-    sink.put4(delta);
-    sink.reloc_jt(Reloc::X86PCRelRodata4, jt);
-}
-
-/// Emit a four-byte displacement to `constant`.
-fn const_disp4<CS: CodeSink + ?Sized>(constant: Constant, func: &Function, sink: &mut CS) {
-    let offset = func.dfg.constants.get_offset(constant);
-    let delta = offset.wrapping_sub(sink.offset() + 4);
-    sink.put4(delta);
-    sink.reloc_constant(Reloc::X86PCRelRodata4, offset);
-}
diff --git a/cranelift/codegen/src/isa/legacy/x86/enc_tables.rs b/cranelift/codegen/src/isa/legacy/x86/enc_tables.rs
deleted file mode 100644
index 72890cffd9..0000000000
--- a/cranelift/codegen/src/isa/legacy/x86/enc_tables.rs
+++ /dev/null
@@ -1,1894 +0,0 @@
-//! Encoding tables for x86 ISAs.
-
-use super::registers::*;
-use crate::bitset::BitSet;
-use crate::cursor::{Cursor, FuncCursor};
-use crate::flowgraph::ControlFlowGraph;
-use crate::ir::condcodes::{FloatCC, IntCC};
-use crate::ir::types::*;
-use crate::ir::{self, Function, Inst, InstBuilder, MemFlags};
-use crate::isa::constraints::*;
-use crate::isa::enc_tables::*;
-use crate::isa::encoding::base_size;
-use crate::isa::encoding::{Encoding, RecipeSizing};
-use crate::isa::RegUnit;
-use crate::isa::{self, TargetIsa};
-use crate::legalizer::expand_as_libcall;
-use crate::predicates;
-use crate::regalloc::RegDiversions;
-
-include!(concat!(env!("OUT_DIR"), "/encoding-x86.rs"));
-include!(concat!(env!("OUT_DIR"), "/legalize-x86.rs"));
-
-/// Whether the REX prefix is needed for encoding extended registers (via REX.RXB).
-///
-/// Normal x86 instructions have only 3 bits for encoding a register.
-/// The REX prefix adds REX.R, REX,X, and REX.B bits, interpreted as fourth bits.
-pub fn is_extended_reg(reg: RegUnit) -> bool {
-    // Extended registers have the fourth bit set.
-    reg as u8 & 0b1000 != 0
-}
-
-pub fn needs_sib_byte(reg: RegUnit) -> bool {
-    reg == RU::r12 as RegUnit || reg == RU::rsp as RegUnit
-}
-pub fn needs_offset(reg: RegUnit) -> bool {
-    reg == RU::r13 as RegUnit || reg == RU::rbp as RegUnit
-}
-pub fn needs_sib_byte_or_offset(reg: RegUnit) -> bool {
-    needs_sib_byte(reg) || needs_offset(reg)
-}
-
-fn test_input(
-    op_index: usize,
-    inst: Inst,
-    divert: &RegDiversions,
-    func: &Function,
-    condition_func: fn(RegUnit) -> bool,
-) -> bool {
-    let in_reg = divert.reg(func.dfg.inst_args(inst)[op_index], &func.locations);
-    condition_func(in_reg)
-}
-
-fn test_result(
-    result_index: usize,
-    inst: Inst,
-    divert: &RegDiversions,
-    func: &Function,
-    condition_func: fn(RegUnit) -> bool,
-) -> bool {
-    let out_reg = divert.reg(func.dfg.inst_results(inst)[result_index], &func.locations);
-    condition_func(out_reg)
-}
-
-fn size_plus_maybe_offset_for_inreg_0(
-    sizing: &RecipeSizing,
-    _enc: Encoding,
-    inst: Inst,
-    divert: &RegDiversions,
-    func: &Function,
-) -> u8 {
-    let needs_offset = test_input(0, inst, divert, func, needs_offset);
-    sizing.base_size + if needs_offset { 1 } else { 0 }
-}
-fn size_plus_maybe_offset_for_inreg_1(
-    sizing: &RecipeSizing,
-    _enc: Encoding,
-    inst: Inst,
-    divert: &RegDiversions,
-    func: &Function,
-) -> u8 {
-    let needs_offset = test_input(1, inst, divert, func, needs_offset);
-    sizing.base_size + if needs_offset { 1 } else { 0 }
-}
-fn size_plus_maybe_sib_for_inreg_0(
-    sizing: &RecipeSizing,
-    _enc: Encoding,
-    inst: Inst,
-    divert: &RegDiversions,
-    func: &Function,
-) -> u8 {
-    let needs_sib = test_input(0, inst, divert, func, needs_sib_byte);
-    sizing.base_size + if needs_sib { 1 } else { 0 }
-}
-fn size_plus_maybe_sib_for_inreg_1(
-    sizing: &RecipeSizing,
-    _enc: Encoding,
-    inst: Inst,
-    divert: &RegDiversions,
-    func: &Function,
-) -> u8 {
-    let needs_sib = test_input(1, inst, divert, func, needs_sib_byte);
-    sizing.base_size + if needs_sib { 1 } else { 0 }
-}
-fn size_plus_maybe_sib_or_offset_for_inreg_0(
-    sizing: &RecipeSizing,
-    _enc: Encoding,
-    inst: Inst,
-    divert: &RegDiversions,
-    func: &Function,
-) -> u8 {
-    let needs_sib_or_offset = test_input(0, inst, divert, func, needs_sib_byte_or_offset);
-    sizing.base_size + if needs_sib_or_offset { 1 } else { 0 }
-}
-fn size_plus_maybe_sib_or_offset_for_inreg_1(
-    sizing: &RecipeSizing,
-    _enc: Encoding,
-    inst: Inst,
-    divert: &RegDiversions,
-    func: &Function,
-) -> u8 {
-    let needs_sib_or_offset = test_input(1, inst, divert, func, needs_sib_byte_or_offset);
-    sizing.base_size + if needs_sib_or_offset { 1 } else { 0 }
-}
-
-/// Calculates the size while inferring if the first and second input registers (inreg0, inreg1)
-/// require a dynamic REX prefix and if the second input register (inreg1) requires a SIB or offset.
-fn size_plus_maybe_sib_or_offset_inreg1_plus_rex_prefix_for_inreg0_inreg1(
-    sizing: &RecipeSizing,
-    enc: Encoding,
-    inst: Inst,
-    divert: &RegDiversions,
-    func: &Function,
-) -> u8 {
-    // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed.
-    let needs_rex = test_input(0, inst, divert, func, is_extended_reg)
-        || test_input(1, inst, divert, func, is_extended_reg);
-    size_plus_maybe_sib_or_offset_for_inreg_1(sizing, enc, inst, divert, func)
-        + if needs_rex { 1 } else { 0 }
-}
-
-/// Calculates the size while inferring if the first and second input registers (inreg0, inreg1)
-/// require a dynamic REX prefix and if the second input register (inreg1) requires a SIB.
-fn size_plus_maybe_sib_inreg1_plus_rex_prefix_for_inreg0_inreg1(
-    sizing: &RecipeSizing,
-    enc: Encoding,
-    inst: Inst,
-    divert: &RegDiversions,
-    func: &Function,
-) -> u8 {
-    // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed.
-    let needs_rex = test_input(0, inst, divert, func, is_extended_reg)
-        || test_input(1, inst, divert, func, is_extended_reg);
-    size_plus_maybe_sib_for_inreg_1(sizing, enc, inst, divert, func) + if needs_rex { 1 } else { 0 }
-}
-
-/// Calculates the size while inferring if the first input register (inreg0) and first output
-/// register (outreg0) require a dynamic REX and if the first input register (inreg0) requires a
-/// SIB or offset.
-fn size_plus_maybe_sib_or_offset_for_inreg_0_plus_rex_prefix_for_inreg0_outreg0(
-    sizing: &RecipeSizing,
-    enc: Encoding,
-    inst: Inst,
-    divert: &RegDiversions,
-    func: &Function,
-) -> u8 {
-    // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed.
-    let needs_rex = test_input(0, inst, divert, func, is_extended_reg)
-        || test_result(0, inst, divert, func, is_extended_reg);
-    size_plus_maybe_sib_or_offset_for_inreg_0(sizing, enc, inst, divert, func)
-        + if needs_rex { 1 } else { 0 }
-}
-
-/// Calculates the size while inferring if the first input register (inreg0) and first output
-/// register (outreg0) require a dynamic REX and if the first input register (inreg0) requires a
-/// SIB.
-fn size_plus_maybe_sib_for_inreg_0_plus_rex_prefix_for_inreg0_outreg0(
-    sizing: &RecipeSizing,
-    enc: Encoding,
-    inst: Inst,
-    divert: &RegDiversions,
-    func: &Function,
-) -> u8 {
-    // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed.
-    let needs_rex = test_input(0, inst, divert, func, is_extended_reg)
-        || test_result(0, inst, divert, func, is_extended_reg);
-    size_plus_maybe_sib_for_inreg_0(sizing, enc, inst, divert, func) + if needs_rex { 1 } else { 0 }
-}
-
-/// Infers whether a dynamic REX prefix will be emitted, for use with one input reg.
-///
-/// A REX prefix is known to be emitted if either:
-///  1. The EncodingBits specify that REX.W is to be set.
-///  2. Registers are used that require REX.R or REX.B bits for encoding.
-fn size_with_inferred_rex_for_inreg0(
-    sizing: &RecipeSizing,
-    _enc: Encoding,
-    inst: Inst,
-    divert: &RegDiversions,
-    func: &Function,
-) -> u8 {
-    // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed.
-    let needs_rex = test_input(0, inst, divert, func, is_extended_reg);
-    sizing.base_size + if needs_rex { 1 } else { 0 }
-}
-
-/// Infers whether a dynamic REX prefix will be emitted, based on the second operand.
-fn size_with_inferred_rex_for_inreg1(
-    sizing: &RecipeSizing,
-    _enc: Encoding,
-    inst: Inst,
-    divert: &RegDiversions,
-    func: &Function,
-) -> u8 {
-    // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed.
-    let needs_rex = test_input(1, inst, divert, func, is_extended_reg);
-    sizing.base_size + if needs_rex { 1 } else { 0 }
-}
-
-/// Infers whether a dynamic REX prefix will be emitted, based on the third operand.
-fn size_with_inferred_rex_for_inreg2(
-    sizing: &RecipeSizing,
-    _: Encoding,
-    inst: Inst,
-    divert: &RegDiversions,
-    func: &Function,
-) -> u8 {
-    // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed.
-    let needs_rex = test_input(2, inst, divert, func, is_extended_reg);
-    sizing.base_size + if needs_rex { 1 } else { 0 }
-}
-
-/// Infers whether a dynamic REX prefix will be emitted, for use with two input registers.
-///
-/// A REX prefix is known to be emitted if either:
-///  1. The EncodingBits specify that REX.W is to be set.
-///  2. Registers are used that require REX.R or REX.B bits for encoding.
-fn size_with_inferred_rex_for_inreg0_inreg1(
-    sizing: &RecipeSizing,
-    _enc: Encoding,
-    inst: Inst,
-    divert: &RegDiversions,
-    func: &Function,
-) -> u8 {
-    // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed.
-    let needs_rex = test_input(0, inst, divert, func, is_extended_reg)
-        || test_input(1, inst, divert, func, is_extended_reg);
-    sizing.base_size + if needs_rex { 1 } else { 0 }
-}
-
-/// Infers whether a dynamic REX prefix will be emitted, based on second and third operand.
-fn size_with_inferred_rex_for_inreg1_inreg2(
-    sizing: &RecipeSizing,
-    _enc: Encoding,
-    inst: Inst,
-    divert: &RegDiversions,
-    func: &Function,
-) -> u8 {
-    // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed.
-    let needs_rex = test_input(1, inst, divert, func, is_extended_reg)
-        || test_input(2, inst, divert, func, is_extended_reg);
-    sizing.base_size + if needs_rex { 1 } else { 0 }
-}
-
-/// Infers whether a dynamic REX prefix will be emitted, based on a single
-/// input register and a single output register.
-fn size_with_inferred_rex_for_inreg0_outreg0(
-    sizing: &RecipeSizing,
-    _enc: Encoding,
-    inst: Inst,
-    divert: &RegDiversions,
-    func: &Function,
-) -> u8 {
-    // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed.
-    let needs_rex = test_input(0, inst, divert, func, is_extended_reg)
-        || test_result(0, inst, divert, func, is_extended_reg);
-    sizing.base_size + if needs_rex { 1 } else { 0 }
-}
-
-/// Infers whether a dynamic REX prefix will be emitted, based on a single output register.
-fn size_with_inferred_rex_for_outreg0(
-    sizing: &RecipeSizing,
-    _enc: Encoding,
-    inst: Inst,
-    divert: &RegDiversions,
-    func: &Function,
-) -> u8 {
-    // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed.
-    let needs_rex = test_result(0, inst, divert, func, is_extended_reg);
-    sizing.base_size + if needs_rex { 1 } else { 0 }
-}
-
-/// Infers whether a dynamic REX prefix will be emitted, for use with CMOV.
-///
-/// CMOV uses 3 inputs, with the REX is inferred from reg1 and reg2.
-fn size_with_inferred_rex_for_cmov(
-    sizing: &RecipeSizing,
-    _enc: Encoding,
-    inst: Inst,
-    divert: &RegDiversions,
-    func: &Function,
-) -> u8 {
-    // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed.
-    let needs_rex = test_input(1, inst, divert, func, is_extended_reg)
-        || test_input(2, inst, divert, func, is_extended_reg);
-    sizing.base_size + if needs_rex { 1 } else { 0 }
-}
-
-/// If the value's definition is a constant immediate, returns its unpacked value, or None
-/// otherwise.
-fn maybe_iconst_imm(pos: &FuncCursor, value: ir::Value) -> Option<i64> {
-    if let ir::ValueDef::Result(inst, _) = &pos.func.dfg.value_def(value) {
-        if let ir::InstructionData::UnaryImm {
-            opcode: ir::Opcode::Iconst,
-            imm,
-        } = &pos.func.dfg[*inst]
-        {
-            let value: i64 = (*imm).into();
-            Some(value)
-        } else {
-            None
-        }
-    } else {
-        None
-    }
-}
-
-/// Expand the `sdiv` and `srem` instructions using `x86_sdivmodx`.
-fn expand_sdivrem(
-    inst: ir::Inst,
-    func: &mut ir::Function,
-    cfg: &mut ControlFlowGraph,
-    isa: &dyn TargetIsa,
-) {
-    let (x, y, is_srem) = match func.dfg[inst] {
-        ir::InstructionData::Binary {
-            opcode: ir::Opcode::Sdiv,
-            args,
-        } => (args[0], args[1], false),
-        ir::InstructionData::Binary {
-            opcode: ir::Opcode::Srem,
-            args,
-        } => (args[0], args[1], true),
-        _ => panic!("Need sdiv/srem: {}", func.dfg.display_inst(inst, None)),
-    };
-
-    let old_block = func.layout.pp_block(inst);
-    let result = func.dfg.first_result(inst);
-    let ty = func.dfg.value_type(result);
-
-    let mut pos = FuncCursor::new(func).at_inst(inst);
-    pos.use_srcloc(inst);
-    pos.func.dfg.clear_results(inst);
-
-    let avoid_div_traps = isa.flags().avoid_div_traps();
-
-    // If we can tolerate native division traps, sdiv doesn't need branching.
-    if !avoid_div_traps && !is_srem {
-        let xhi = pos.ins().sshr_imm(x, i64::from(ty.lane_bits()) - 1);
-        pos.ins().with_result(result).x86_sdivmodx(x, xhi, y);
-        pos.remove_inst();
-        return;
-    }
-
-    // Try to remove checks if the input value is an immediate other than 0 or -1. For these two
-    // immediates, we'd ideally replace conditional traps by traps, but this requires more
-    // manipulation of the dfg/cfg, which is out of scope here.
-    let (could_be_zero, could_be_minus_one) = if let Some(imm) = maybe_iconst_imm(&pos, y) {
-        (imm == 0, imm == -1)
-    } else {
-        (true, true)
-    };
-
-    // Put in an explicit division-by-zero trap if the environment requires it.
-    if avoid_div_traps && could_be_zero {
-        pos.ins().trapz(y, ir::TrapCode::IntegerDivisionByZero);
-    }
-
-    if !could_be_minus_one {
-        let xhi = pos.ins().sshr_imm(x, i64::from(ty.lane_bits()) - 1);
-        let reuse = if is_srem {
-            [None, Some(result)]
-        } else {
-            [Some(result), None]
-        };
-        pos.ins().with_results(reuse).x86_sdivmodx(x, xhi, y);
-        pos.remove_inst();
-        return;
-    }
-
-    // block handling the nominal case.
-    let nominal = pos.func.dfg.make_block();
-
-    // block handling the -1 divisor case.
-    let minus_one = pos.func.dfg.make_block();
-
-    // Final block with one argument representing the final result value.
-    let done = pos.func.dfg.make_block();
-
-    // Move the `inst` result value onto the `done` block.
-    pos.func.dfg.attach_block_param(done, result);
-
-    // Start by checking for a -1 divisor which needs to be handled specially.
-    let is_m1 = pos.ins().ifcmp_imm(y, -1);
-    pos.ins().brif(IntCC::Equal, is_m1, minus_one, &[]);
-    pos.ins().jump(nominal, &[]);
-
-    // Now it is safe to execute the `x86_sdivmodx` instruction which will still trap on division
-    // by zero.
-    pos.insert_block(nominal);
-    let xhi = pos.ins().sshr_imm(x, i64::from(ty.lane_bits()) - 1);
-    let (quot, rem) = pos.ins().x86_sdivmodx(x, xhi, y);
-    let divres = if is_srem { rem } else { quot };
-    pos.ins().jump(done, &[divres]);
-
-    // Now deal with the -1 divisor case.
-    pos.insert_block(minus_one);
-    let m1_result = if is_srem {
-        // x % -1 = 0.
-        pos.ins().iconst(ty, 0)
-    } else {
-        // Explicitly check for overflow: Trap when x == INT_MIN.
-        debug_assert!(avoid_div_traps, "Native trapping divide handled above");
-        let f = pos.ins().ifcmp_imm(x, -1 << (ty.lane_bits() - 1));
-        pos.ins()
-            .trapif(IntCC::Equal, f, ir::TrapCode::IntegerOverflow);
-        // x / -1 = -x.
-        pos.ins().irsub_imm(x, 0)
-    };
-
-    // Recycle the original instruction as a jump.
-    pos.func.dfg.replace(inst).jump(done, &[m1_result]);
-
-    // Finally insert a label for the completion.
-    pos.next_inst();
-    pos.insert_block(done);
-
-    cfg.recompute_block(pos.func, old_block);
-    cfg.recompute_block(pos.func, nominal);
-    cfg.recompute_block(pos.func, minus_one);
-    cfg.recompute_block(pos.func, done);
-}
-
-/// Expand the `udiv` and `urem` instructions using `x86_udivmodx`.
-fn expand_udivrem(
-    inst: ir::Inst,
-    func: &mut ir::Function,
-    _cfg: &mut ControlFlowGraph,
-    isa: &dyn TargetIsa,
-) {
-    let (x, y, is_urem) = match func.dfg[inst] {
-        ir::InstructionData::Binary {
-            opcode: ir::Opcode::Udiv,
-            args,
-        } => (args[0], args[1], false),
-        ir::InstructionData::Binary {
-            opcode: ir::Opcode::Urem,
-            args,
-        } => (args[0], args[1], true),
-        _ => panic!("Need udiv/urem: {}", func.dfg.display_inst(inst, None)),
-    };
-    let avoid_div_traps = isa.flags().avoid_div_traps();
-    let result = func.dfg.first_result(inst);
-    let ty = func.dfg.value_type(result);
-
-    let mut pos = FuncCursor::new(func).at_inst(inst);
-    pos.use_srcloc(inst);
-    pos.func.dfg.clear_results(inst);
-
-    // Put in an explicit division-by-zero trap if the environment requires it.
-    if avoid_div_traps {
-        let zero_check = if let Some(imm) = maybe_iconst_imm(&pos, y) {
-            // Ideally, we'd just replace the conditional trap with a trap when the immediate is
-            // zero, but this requires more manipulation of the dfg/cfg, which is out of scope
-            // here.
-            imm == 0
-        } else {
-            true
-        };
-        if zero_check {
-            pos.ins().trapz(y, ir::TrapCode::IntegerDivisionByZero);
-        }
-    }
-
-    // Now it is safe to execute the `x86_udivmodx` instruction.
-    let xhi = pos.ins().iconst(ty, 0);
-    let reuse = if is_urem {
-        [None, Some(result)]
-    } else {
-        [Some(result), None]
-    };
-    pos.ins().with_results(reuse).x86_udivmodx(x, xhi, y);
-    pos.remove_inst();
-}
-
-/// Expand the `fmin` and `fmax` instructions using the x86 `x86_fmin` and `x86_fmax`
-/// instructions.
-fn expand_minmax(
-    inst: ir::Inst,
-    func: &mut ir::Function,
-    cfg: &mut ControlFlowGraph,
-    _isa: &dyn TargetIsa,
-) {
-    let (x, y, x86_opc, bitwise_opc) = match func.dfg[inst] {
-        ir::InstructionData::Binary {
-            opcode: ir::Opcode::Fmin,
-            args,
-        } => (args[0], args[1], ir::Opcode::X86Fmin, ir::Opcode::Bor),
-        ir::InstructionData::Binary {
-            opcode: ir::Opcode::Fmax,
-            args,
-        } => (args[0], args[1], ir::Opcode::X86Fmax, ir::Opcode::Band),
-        _ => panic!("Expected fmin/fmax: {}", func.dfg.display_inst(inst, None)),
-    };
-    let old_block = func.layout.pp_block(inst);
-
-    // We need to handle the following conditions, depending on how x and y compare:
-    //
-    // 1. LT or GT: The native `x86_opc` min/max instruction does what we need.
-    // 2. EQ: We need to use `bitwise_opc` to make sure that
-    //    fmin(0.0, -0.0) -> -0.0 and fmax(0.0, -0.0) -> 0.0.
-    // 3. UN: We need to produce a quiet NaN that is canonical if the inputs are canonical.
-
-    // block handling case 1) where operands are ordered but not equal.
-    let one_block = func.dfg.make_block();
-
-    // block handling case 3) where one operand is NaN.
-    let uno_block = func.dfg.make_block();
-
-    // block that handles the unordered or equal cases 2) and 3).
-    let ueq_block = func.dfg.make_block();
-
-    // block handling case 2) where operands are ordered and equal.
-    let eq_block = func.dfg.make_block();
-
-    // Final block with one argument representing the final result value.
-    let done = func.dfg.make_block();
-
-    // The basic blocks are laid out to minimize branching for the common cases:
-    //
-    // 1) One branch not taken, one jump.
-    // 2) One branch taken.
-    // 3) Two branches taken, one jump.
-
-    // Move the `inst` result value onto the `done` block.
-    let result = func.dfg.first_result(inst);
-    let ty = func.dfg.value_type(result);
-    func.dfg.clear_results(inst);
-    func.dfg.attach_block_param(done, result);
-
-    // Test for case 1) ordered and not equal.
-    let mut pos = FuncCursor::new(func).at_inst(inst);
-    pos.use_srcloc(inst);
-    let cmp_ueq = pos.ins().fcmp(FloatCC::UnorderedOrEqual, x, y);
-    pos.ins().brnz(cmp_ueq, ueq_block, &[]);
-    pos.ins().jump(one_block, &[]);
-
-    // Handle the common ordered, not equal (LT|GT) case.
-    pos.insert_block(one_block);
-    let one_inst = pos.ins().Binary(x86_opc, ty, x, y).0;
-    let one_result = pos.func.dfg.first_result(one_inst);
-    pos.ins().jump(done, &[one_result]);
-
-    // Case 3) Unordered.
-    // We know that at least one operand is a NaN that needs to be propagated. We simply use an
-    // `fadd` instruction which has the same NaN propagation semantics.
-    pos.insert_block(uno_block);
-    let uno_result = pos.ins().fadd(x, y);
-    pos.ins().jump(done, &[uno_result]);
-
-    // Case 2) or 3).
-    pos.insert_block(ueq_block);
-    // Test for case 3) (UN) one value is NaN.
-    // TODO: When we get support for flag values, we can reuse the above comparison.
-    let cmp_uno = pos.ins().fcmp(FloatCC::Unordered, x, y);
-    pos.ins().brnz(cmp_uno, uno_block, &[]);
-    pos.ins().jump(eq_block, &[]);
-
-    // We are now in case 2) where x and y compare EQ.
-    // We need a bitwise operation to get the sign right.
-    pos.insert_block(eq_block);
-    let bw_inst = pos.ins().Binary(bitwise_opc, ty, x, y).0;
-    let bw_result = pos.func.dfg.first_result(bw_inst);
-    // This should become a fall-through for this second most common case.
-    // Recycle the original instruction as a jump.
-    pos.func.dfg.replace(inst).jump(done, &[bw_result]);
-
-    // Finally insert a label for the completion.
-    pos.next_inst();
-    pos.insert_block(done);
-
-    cfg.recompute_block(pos.func, old_block);
-    cfg.recompute_block(pos.func, one_block);
-    cfg.recompute_block(pos.func, uno_block);
-    cfg.recompute_block(pos.func, ueq_block);
-    cfg.recompute_block(pos.func, eq_block);
-    cfg.recompute_block(pos.func, done);
-}
-
-/// This legalization converts a minimum/maximum operation into a sequence that matches the
-/// non-x86-friendly WebAssembly semantics of NaN handling. This logic is kept separate from
-/// [expand_minmax] above (the scalar version) for code clarity.
-fn expand_minmax_vector(
-    inst: ir::Inst,
-    func: &mut ir::Function,
-    _cfg: &mut ControlFlowGraph,
-    _isa: &dyn TargetIsa,
-) {
-    let ty = func.dfg.ctrl_typevar(inst);
-    debug_assert!(ty.is_vector());
-    let (x, y, x86_opcode, is_max) = match func.dfg[inst] {
-        ir::InstructionData::Binary {
-            opcode: ir::Opcode::Fmin,
-            args,
-        } => (args[0], args[1], ir::Opcode::X86Fmin, false),
-        ir::InstructionData::Binary {
-            opcode: ir::Opcode::Fmax,
-            args,
-        } => (args[0], args[1], ir::Opcode::X86Fmax, true),
-        _ => panic!("Expected fmin/fmax: {}", func.dfg.display_inst(inst, None)),
-    };
-
-    let mut pos = FuncCursor::new(func).at_inst(inst);
-    pos.use_srcloc(inst);
-
-    // This sequence is complex due to how x86 handles NaNs and +0/-0. If x86 finds a NaN in
-    // either lane it returns the second operand; likewise, if both operands are in {+0.0, -0.0}
-    // it returns the second operand. To match the behavior of "return the minimum of the
-    // operands or a canonical NaN if either operand is NaN," we must compare in both
-    // directions.
-    let (forward_inst, dfg) = pos.ins().Binary(x86_opcode, ty, x, y);
-    let forward = dfg.first_result(forward_inst);
-    let (backward_inst, dfg) = pos.ins().Binary(x86_opcode, ty, y, x);
-    let backward = dfg.first_result(backward_inst);
-
-    let (value, mask) = if is_max {
-        // For maximum:
-        // Find any differences between the forward and backward `max` operation.
-        let difference = pos.ins().bxor(forward, backward);
-        // Merge in the differences.
-        let propagate_nans_and_plus_zero = pos.ins().bor(backward, difference);
-        let value = pos.ins().fsub(propagate_nans_and_plus_zero, difference);
-        // Discover which lanes have NaNs in them.
-        let find_nan_lanes_mask = pos.ins().fcmp(FloatCC::Unordered, difference, value);
-        (value, find_nan_lanes_mask)
-    } else {
-        // For minimum:
-        // If either lane is a NaN, we want to use these bits, not the second operand bits.
-        let propagate_nans = pos.ins().bor(backward, forward);
-        // Find which lanes contain a NaN with an unordered comparison, filling the mask with
-        // 1s.
-        let find_nan_lanes_mask = pos.ins().fcmp(FloatCC::Unordered, forward, propagate_nans);
-        let bitcast_find_nan_lanes_mask = pos.ins().raw_bitcast(ty, find_nan_lanes_mask);
-        // Then flood the value lane with all 1s if that lane is a NaN. This causes all NaNs
-        // along this code path to be quieted and negative: after the upcoming shift and and_not,
-        // all upper bits (sign, exponent, and payload MSB) will be 1s.
-        let tmp = pos.ins().bor(propagate_nans, bitcast_find_nan_lanes_mask);
-        (tmp, bitcast_find_nan_lanes_mask)
-    };
-
-    // During this lowering we will need to know how many bits to shift by and what type to
-    // convert to when using an integer shift. Recall that an IEEE754 number looks like:
-    // `[sign bit] [exponent bits] [significand bits]`
-    // A quiet NaN has all exponent bits set to 1 and the most significant bit of the
-    // significand set to 1; a signaling NaN has the same exponent but the MSB of the
-    // significand is set to 0. The payload of the NaN is the remaining significand bits, and
-    // WebAssembly assumes a canonical NaN is quiet and has 0s in its payload. To compute this
-    // canonical NaN, we create a mask for the top 10 bits on F32X4 (1 sign + 8 exp. + 1 MSB
-    // sig.) and the top 13 bits on F64X2 (1 sign + 11 exp. + 1 MSB sig.). This means that all
-    // NaNs produced with the mask will be negative (`-NaN`) which is allowed by the sign
-    // non-determinism in the spec: https://webassembly.github.io/spec/core/bikeshed/index.html#nan-propagation%E2%91%A0
-    let (shift_by, ty_as_int) = match ty {
-        F32X4 => (10, I32X4),
-        F64X2 => (13, I64X2),
-        _ => unimplemented!("this legalization only understands 128-bit floating point types"),
-    };
-
-    // In order to clear the NaN payload for canonical NaNs, we shift right the NaN lanes (all
-    // 1s) leaving 0s in the top bits. Remember that non-NaN lanes are all 0s so this has
-    // little effect.
-    let mask_as_int = pos.ins().raw_bitcast(ty_as_int, mask);
-    let shift_mask = pos.ins().ushr_imm(mask_as_int, shift_by);
-    let shift_mask_as_float = pos.ins().raw_bitcast(ty, shift_mask);
-
-    // Finally, we replace the value with `value & ~shift_mask`. For non-NaN lanes, this is
-    // equivalent to `... & 1111...` but for NaN lanes this will only have 1s in the top bits,
-    // clearing the payload.
-    pos.func
-        .dfg
-        .replace(inst)
-        .band_not(value, shift_mask_as_float);
-}
-
-/// x86 has no unsigned-to-float conversions. We handle the easy case of zero-extending i32 to
-/// i64 with a pattern, the rest needs more code.
-///
-/// Note that this is the scalar implementation; for the vector implemenation see
-/// [expand_fcvt_from_uint_vector].
-fn expand_fcvt_from_uint(
-    inst: ir::Inst,
-    func: &mut ir::Function,
-    cfg: &mut ControlFlowGraph,
-    _isa: &dyn TargetIsa,
-) {
-    let x;
-    match func.dfg[inst] {
-        ir::InstructionData::Unary {
-            opcode: ir::Opcode::FcvtFromUint,
-            arg,
-        } => x = arg,
-        _ => panic!("Need fcvt_from_uint: {}", func.dfg.display_inst(inst, None)),
-    }
-    let xty = func.dfg.value_type(x);
-    let result = func.dfg.first_result(inst);
-    let ty = func.dfg.value_type(result);
-    let mut pos = FuncCursor::new(func).at_inst(inst);
-    pos.use_srcloc(inst);
-
-    // Conversion from an unsigned int smaller than 64bit is easy on x86-64.
-    match xty {
-        ir::types::I8 | ir::types::I16 | ir::types::I32 => {
-            // TODO: This should be guarded by an ISA check.
-            let wide = pos.ins().uextend(ir::types::I64, x);
-            pos.func.dfg.replace(inst).fcvt_from_sint(ty, wide);
-            return;
-        }
-        ir::types::I64 => {}
-        _ => unimplemented!(),
-    }
-
-    let old_block = pos.func.layout.pp_block(inst);
-
-    // block handling the case where x >= 0.
-    let poszero_block = pos.func.dfg.make_block();
-
-    // block handling the case where x < 0.
-    let neg_block = pos.func.dfg.make_block();
-
-    // Final block with one argument representing the final result value.
-    let done = pos.func.dfg.make_block();
-
-    // Move the `inst` result value onto the `done` block.
-    pos.func.dfg.clear_results(inst);
-    pos.func.dfg.attach_block_param(done, result);
-
-    // If x as a signed int is not negative, we can use the existing `fcvt_from_sint` instruction.
-    let is_neg = pos.ins().icmp_imm(IntCC::SignedLessThan, x, 0);
-    pos.ins().brnz(is_neg, neg_block, &[]);
-    pos.ins().jump(poszero_block, &[]);
-
-    // Easy case: just use a signed conversion.
-    pos.insert_block(poszero_block);
-    let posres = pos.ins().fcvt_from_sint(ty, x);
-    pos.ins().jump(done, &[posres]);
-
-    // Now handle the negative case.
-    pos.insert_block(neg_block);
-
-    // Divide x by two to get it in range for the signed conversion, keep the LSB, and scale it
-    // back up on the FP side.
-    let ihalf = pos.ins().ushr_imm(x, 1);
-    let lsb = pos.ins().band_imm(x, 1);
-    let ifinal = pos.ins().bor(ihalf, lsb);
-    let fhalf = pos.ins().fcvt_from_sint(ty, ifinal);
-    let negres = pos.ins().fadd(fhalf, fhalf);
-
-    // Recycle the original instruction as a jump.
-    pos.func.dfg.replace(inst).jump(done, &[negres]);
-
-    // Finally insert a label for the completion.
-    pos.next_inst();
-    pos.insert_block(done);
-
-    cfg.recompute_block(pos.func, old_block);
-    cfg.recompute_block(pos.func, poszero_block);
-    cfg.recompute_block(pos.func, neg_block);
-    cfg.recompute_block(pos.func, done);
-}
-
-/// To convert packed unsigned integers to their float equivalents, we must legalize to a special
-/// AVX512 instruction (using MCSR rounding) or use a long sequence of instructions. This logic is
-/// separate from [expand_fcvt_from_uint] above (the scalar version), only due to how the transform
-/// groups are set up; TODO if we change the SIMD legalization groups, then this logic could be
-/// merged into [expand_fcvt_from_uint] (see https://github.com/bytecodealliance/wasmtime/issues/1745).
-fn expand_fcvt_from_uint_vector(
-    inst: ir::Inst,
-    func: &mut ir::Function,
-    _cfg: &mut ControlFlowGraph,
-    isa: &dyn TargetIsa,
-) {
-    let mut pos = FuncCursor::new(func).at_inst(inst);
-    pos.use_srcloc(inst);
-
-    if let ir::InstructionData::Unary {
-        opcode: ir::Opcode::FcvtFromUint,
-        arg,
-    } = pos.func.dfg[inst]
-    {
-        let controlling_type = pos.func.dfg.ctrl_typevar(inst);
-        if controlling_type == F32X4 {
-            debug_assert_eq!(pos.func.dfg.value_type(arg), I32X4);
-            let x86_isa = isa
-                .as_any()
-                .downcast_ref::<isa::x86::Isa>()
-                .expect("the target ISA must be x86 at this point");
-            if x86_isa.isa_flags.use_avx512vl_simd() || x86_isa.isa_flags.use_avx512f_simd() {
-                // If we have certain AVX512 features, we can lower this instruction simply.
-                pos.func.dfg.replace(inst).x86_vcvtudq2ps(arg);
-            } else {
-                // Otherwise, we default to a very lengthy SSE4.1-compatible sequence: PXOR,
-                // PBLENDW, PSUB, CVTDQ2PS, PSRLD, CVTDQ2PS, ADDPS, ADDPS
-                let bitcast_arg = pos.ins().raw_bitcast(I16X8, arg);
-                let zero_constant = pos.func.dfg.constants.insert(vec![0; 16].into());
-                let zero = pos.ins().vconst(I16X8, zero_constant);
-                let low = pos.ins().x86_pblendw(zero, bitcast_arg, 0x55);
-                let bitcast_low = pos.ins().raw_bitcast(I32X4, low);
-                let high = pos.ins().isub(arg, bitcast_low);
-                let convert_low = pos.ins().fcvt_from_sint(F32X4, bitcast_low);
-                let shift_high = pos.ins().ushr_imm(high, 1);
-                let convert_high = pos.ins().fcvt_from_sint(F32X4, shift_high);
-                let double_high = pos.ins().fadd(convert_high, convert_high);
-                pos.func.dfg.replace(inst).fadd(double_high, convert_low);
-            }
-        } else {
-            unimplemented!("cannot legalize {}", pos.func.dfg.display_inst(inst, None))
-        }
-    }
-}
-
-fn expand_fcvt_to_sint(
-    inst: ir::Inst,
-    func: &mut ir::Function,
-    cfg: &mut ControlFlowGraph,
-    _isa: &dyn TargetIsa,
-) {
-    use crate::ir::immediates::{Ieee32, Ieee64};
-
-    let x = match func.dfg[inst] {
-        ir::InstructionData::Unary {
-            opcode: ir::Opcode::FcvtToSint,
-            arg,
-        } => arg,
-        _ => panic!("Need fcvt_to_sint: {}", func.dfg.display_inst(inst, None)),
-    };
-    let old_block = func.layout.pp_block(inst);
-    let xty = func.dfg.value_type(x);
-    let result = func.dfg.first_result(inst);
-    let ty = func.dfg.value_type(result);
-
-    // Final block after the bad value checks.
-    let done = func.dfg.make_block();
-
-    // block for checking failure cases.
-    let maybe_trap_block = func.dfg.make_block();
-
-    // The `x86_cvtt2si` performs the desired conversion, but it doesn't trap on NaN or overflow.
-    // It produces an INT_MIN result instead.
-    func.dfg.replace(inst).x86_cvtt2si(ty, x);
-
-    let mut pos = FuncCursor::new(func).after_inst(inst);
-    pos.use_srcloc(inst);
-
-    let is_done = pos
-        .ins()
-        .icmp_imm(IntCC::NotEqual, result, 1 << (ty.lane_bits() - 1));
-    pos.ins().brnz(is_done, done, &[]);
-    pos.ins().jump(maybe_trap_block, &[]);
-
-    // We now have the following possibilities:
-    //
-    // 1. INT_MIN was actually the correct conversion result.
-    // 2. The input was NaN -> trap bad_toint
-    // 3. The input was out of range -> trap int_ovf
-    //
-    pos.insert_block(maybe_trap_block);
-
-    // Check for NaN.
-    let is_nan = pos.ins().fcmp(FloatCC::Unordered, x, x);
-    pos.ins()
-        .trapnz(is_nan, ir::TrapCode::BadConversionToInteger);
-
-    // Check for case 1: INT_MIN is the correct result.
-    // Determine the smallest floating point number that would convert to INT_MIN.
-    let mut overflow_cc = FloatCC::LessThan;
-    let output_bits = ty.lane_bits();
-    let flimit = match xty {
-        ir::types::F32 =>
-        // An f32 can represent `i16::min_value() - 1` exactly with precision to spare, so
-        // there are values less than -2^(N-1) that convert correctly to INT_MIN.
-        {
-            pos.ins().f32const(if output_bits < 32 {
-                overflow_cc = FloatCC::LessThanOrEqual;
-                Ieee32::fcvt_to_sint_negative_overflow(output_bits)
-            } else {
-                Ieee32::pow2(output_bits - 1).neg()
-            })
-        }
-        ir::types::F64 =>
-        // An f64 can represent `i32::min_value() - 1` exactly with precision to spare, so
-        // there are values less than -2^(N-1) that convert correctly to INT_MIN.
-        {
-            pos.ins().f64const(if output_bits < 64 {
-                overflow_cc = FloatCC::LessThanOrEqual;
-                Ieee64::fcvt_to_sint_negative_overflow(output_bits)
-            } else {
-                Ieee64::pow2(output_bits - 1).neg()
-            })
-        }
-        _ => panic!("Can't convert {}", xty),
-    };
-    let overflow = pos.ins().fcmp(overflow_cc, x, flimit);
-    pos.ins().trapnz(overflow, ir::TrapCode::IntegerOverflow);
-
-    // Finally, we could have a positive value that is too large.
-    let fzero = match xty {
-        ir::types::F32 => pos.ins().f32const(Ieee32::with_bits(0)),
-        ir::types::F64 => pos.ins().f64const(Ieee64::with_bits(0)),
-        _ => panic!("Can't convert {}", xty),
-    };
-    let overflow = pos.ins().fcmp(FloatCC::GreaterThanOrEqual, x, fzero);
-    pos.ins().trapnz(overflow, ir::TrapCode::IntegerOverflow);
-
-    pos.ins().jump(done, &[]);
-    pos.insert_block(done);
-
-    cfg.recompute_block(pos.func, old_block);
-    cfg.recompute_block(pos.func, maybe_trap_block);
-    cfg.recompute_block(pos.func, done);
-}
-
-fn expand_fcvt_to_sint_sat(
-    inst: ir::Inst,
-    func: &mut ir::Function,
-    cfg: &mut ControlFlowGraph,
-    _isa: &dyn TargetIsa,
-) {
-    use crate::ir::immediates::{Ieee32, Ieee64};
-
-    let x = match func.dfg[inst] {
-        ir::InstructionData::Unary {
-            opcode: ir::Opcode::FcvtToSintSat,
-            arg,
-        } => arg,
-        _ => panic!(
-            "Need fcvt_to_sint_sat: {}",
-            func.dfg.display_inst(inst, None)
-        ),
-    };
-
-    let old_block = func.layout.pp_block(inst);
-    let xty = func.dfg.value_type(x);
-    let result = func.dfg.first_result(inst);
-    let ty = func.dfg.value_type(result);
-
-    // Final block after the bad value checks.
-    let done_block = func.dfg.make_block();
-    let intmin_block = func.dfg.make_block();
-    let minsat_block = func.dfg.make_block();
-    let maxsat_block = func.dfg.make_block();
-    func.dfg.clear_results(inst);
-    func.dfg.attach_block_param(done_block, result);
-
-    let mut pos = FuncCursor::new(func).at_inst(inst);
-    pos.use_srcloc(inst);
-
-    // The `x86_cvtt2si` performs the desired conversion, but it doesn't trap on NaN or
-    // overflow. It produces an INT_MIN result instead.
-    let cvtt2si = pos.ins().x86_cvtt2si(ty, x);
-
-    let is_done = pos
-        .ins()
-        .icmp_imm(IntCC::NotEqual, cvtt2si, 1 << (ty.lane_bits() - 1));
-    pos.ins().brnz(is_done, done_block, &[cvtt2si]);
-    pos.ins().jump(intmin_block, &[]);
-
-    // We now have the following possibilities:
-    //
-    // 1. INT_MIN was actually the correct conversion result.
-    // 2. The input was NaN -> replace the result value with 0.
-    // 3. The input was out of range -> saturate the result to the min/max value.
-    pos.insert_block(intmin_block);
-
-    // Check for NaN, which is truncated to 0.
-    let zero = pos.ins().iconst(ty, 0);
-    let is_nan = pos.ins().fcmp(FloatCC::Unordered, x, x);
-    pos.ins().brnz(is_nan, done_block, &[zero]);
-    pos.ins().jump(minsat_block, &[]);
-
-    // Check for case 1: INT_MIN is the correct result.
-    // Determine the smallest floating point number that would convert to INT_MIN.
-    pos.insert_block(minsat_block);
-    let mut overflow_cc = FloatCC::LessThan;
-    let output_bits = ty.lane_bits();
-    let flimit = match xty {
-        ir::types::F32 =>
-        // An f32 can represent `i16::min_value() - 1` exactly with precision to spare, so
-        // there are values less than -2^(N-1) that convert correctly to INT_MIN.
-        {
-            pos.ins().f32const(if output_bits < 32 {
-                overflow_cc = FloatCC::LessThanOrEqual;
-                Ieee32::fcvt_to_sint_negative_overflow(output_bits)
-            } else {
-                Ieee32::pow2(output_bits - 1).neg()
-            })
-        }
-        ir::types::F64 =>
-        // An f64 can represent `i32::min_value() - 1` exactly with precision to spare, so
-        // there are values less than -2^(N-1) that convert correctly to INT_MIN.
-        {
-            pos.ins().f64const(if output_bits < 64 {
-                overflow_cc = FloatCC::LessThanOrEqual;
-                Ieee64::fcvt_to_sint_negative_overflow(output_bits)
-            } else {
-                Ieee64::pow2(output_bits - 1).neg()
-            })
-        }
-        _ => panic!("Can't convert {}", xty),
-    };
-
-    let overflow = pos.ins().fcmp(overflow_cc, x, flimit);
-    let min_imm = match ty {
-        ir::types::I32 => i32::min_value() as i64,
-        ir::types::I64 => i64::min_value(),
-        _ => panic!("Don't know the min value for {}", ty),
-    };
-    let min_value = pos.ins().iconst(ty, min_imm);
-    pos.ins().brnz(overflow, done_block, &[min_value]);
-    pos.ins().jump(maxsat_block, &[]);
-
-    // Finally, we could have a positive value that is too large.
-    pos.insert_block(maxsat_block);
-    let fzero = match xty {
-        ir::types::F32 => pos.ins().f32const(Ieee32::with_bits(0)),
-        ir::types::F64 => pos.ins().f64const(Ieee64::with_bits(0)),
-        _ => panic!("Can't convert {}", xty),
-    };
-
-    let max_imm = match ty {
-        ir::types::I32 => i32::max_value() as i64,
-        ir::types::I64 => i64::max_value(),
-        _ => panic!("Don't know the max value for {}", ty),
-    };
-    let max_value = pos.ins().iconst(ty, max_imm);
-
-    let overflow = pos.ins().fcmp(FloatCC::GreaterThanOrEqual, x, fzero);
-    pos.ins().brnz(overflow, done_block, &[max_value]);
-
-    // Recycle the original instruction.
-    pos.func.dfg.replace(inst).jump(done_block, &[cvtt2si]);
-
-    // Finally insert a label for the completion.
-    pos.next_inst();
-    pos.insert_block(done_block);
-
-    cfg.recompute_block(pos.func, old_block);
-    cfg.recompute_block(pos.func, intmin_block);
-    cfg.recompute_block(pos.func, minsat_block);
-    cfg.recompute_block(pos.func, maxsat_block);
-    cfg.recompute_block(pos.func, done_block);
-}
-
-/// This legalization converts a vector of 32-bit floating point lanes to signed integer lanes
-/// using CVTTPS2DQ (see encoding of `x86_cvtt2si`). This logic is separate from [expand_fcvt_to_sint_sat]
-/// above (the scalar version), only due to how the transform groups are set up; TODO if we change
-/// the SIMD legalization groups, then this logic could be merged into [expand_fcvt_to_sint_sat]
-/// (see https://github.com/bytecodealliance/wasmtime/issues/1745).
-fn expand_fcvt_to_sint_sat_vector(
-    inst: ir::Inst,
-    func: &mut ir::Function,
-    _cfg: &mut ControlFlowGraph,
-    _isa: &dyn TargetIsa,
-) {
-    let mut pos = FuncCursor::new(func).at_inst(inst);
-    pos.use_srcloc(inst);
-
-    if let ir::InstructionData::Unary {
-        opcode: ir::Opcode::FcvtToSintSat,
-        arg,
-    } = pos.func.dfg[inst]
-    {
-        let controlling_type = pos.func.dfg.ctrl_typevar(inst);
-        if controlling_type == I32X4 {
-            debug_assert_eq!(pos.func.dfg.value_type(arg), F32X4);
-            // We must both quiet any NaNs--setting that lane to 0--and saturate any
-            // lanes that might overflow during conversion to the highest/lowest signed integer
-            // allowed in that lane.
-
-            // Saturate NaNs: `fcmp eq` will not match if a lane contains a NaN. We use ANDPS to
-            // avoid doing the comparison twice (we need the zeroed lanes to find differences).
-            let zeroed_nans = pos.ins().fcmp(FloatCC::Equal, arg, arg);
-            let zeroed_nans_bitcast = pos.ins().raw_bitcast(F32X4, zeroed_nans);
-            let zeroed_nans_copy = pos.ins().band(arg, zeroed_nans_bitcast);
-
-            // Find differences with the zeroed lanes (we will only use the MSB: 1 if positive or
-            // NaN, 0 otherwise).
-            let differences = pos.ins().bxor(zeroed_nans_bitcast, arg);
-            let differences_bitcast = pos.ins().raw_bitcast(I32X4, differences);
-
-            // Convert the numeric lanes. CVTTPS2DQ will mark overflows with 0x80000000 (MSB set).
-            let converted = pos.ins().x86_cvtt2si(I32X4, zeroed_nans_copy);
-
-            // Create a mask of all 1s only on positive overflow, 0s otherwise. This uses the MSB
-            // of `differences` (1 when positive or NaN) and the MSB of `converted` (1 on positive
-            // overflow).
-            let tmp = pos.ins().band(differences_bitcast, converted);
-            let mask = pos.ins().sshr_imm(tmp, 31);
-
-            // Apply the mask to create 0x7FFFFFFF for positive overflow. XOR of all 0s (all other
-            // cases) has no effect.
-            pos.func.dfg.replace(inst).bxor(converted, mask);
-        } else {
-            unimplemented!("cannot legalize {}", pos.func.dfg.display_inst(inst, None))
-        }
-    }
-}
-
-fn expand_fcvt_to_uint(
-    inst: ir::Inst,
-    func: &mut ir::Function,
-    cfg: &mut ControlFlowGraph,
-    _isa: &dyn TargetIsa,
-) {
-    use crate::ir::immediates::{Ieee32, Ieee64};
-
-    let x = match func.dfg[inst] {
-        ir::InstructionData::Unary {
-            opcode: ir::Opcode::FcvtToUint,
-            arg,
-        } => arg,
-        _ => panic!("Need fcvt_to_uint: {}", func.dfg.display_inst(inst, None)),
-    };
-
-    let old_block = func.layout.pp_block(inst);
-    let xty = func.dfg.value_type(x);
-    let result = func.dfg.first_result(inst);
-    let ty = func.dfg.value_type(result);
-
-    // block handle numbers < 2^(N-1).
-    let below_uint_max_block = func.dfg.make_block();
-
-    // block handle numbers < 0.
-    let below_zero_block = func.dfg.make_block();
-
-    // block handling numbers >= 2^(N-1).
-    let large = func.dfg.make_block();
-
-    // Final block after the bad value checks.
-    let done = func.dfg.make_block();
-
-    // Move the `inst` result value onto the `done` block.
-    func.dfg.clear_results(inst);
-    func.dfg.attach_block_param(done, result);
-
-    let mut pos = FuncCursor::new(func).at_inst(inst);
-    pos.use_srcloc(inst);
-
-    // Start by materializing the floating point constant 2^(N-1) where N is the number of bits in
-    // the destination integer type.
-    let pow2nm1 = match xty {
-        ir::types::F32 => pos.ins().f32const(Ieee32::pow2(ty.lane_bits() - 1)),
-        ir::types::F64 => pos.ins().f64const(Ieee64::pow2(ty.lane_bits() - 1)),
-        _ => panic!("Can't convert {}", xty),
-    };
-    let is_large = pos.ins().ffcmp(x, pow2nm1);
-    pos.ins()
-        .brff(FloatCC::GreaterThanOrEqual, is_large, large, &[]);
-    pos.ins().jump(below_uint_max_block, &[]);
-
-    // We need to generate a specific trap code when `x` is NaN, so reuse the flags from the
-    // previous comparison.
-    pos.insert_block(below_uint_max_block);
-    pos.ins().trapff(
-        FloatCC::Unordered,
-        is_large,
-        ir::TrapCode::BadConversionToInteger,
-    );
-
-    // Now we know that x < 2^(N-1) and not NaN.
-    let sres = pos.ins().x86_cvtt2si(ty, x);
-    let is_neg = pos.ins().ifcmp_imm(sres, 0);
-    pos.ins()
-        .brif(IntCC::SignedGreaterThanOrEqual, is_neg, done, &[sres]);
-    pos.ins().jump(below_zero_block, &[]);
-
-    pos.insert_block(below_zero_block);
-    pos.ins().trap(ir::TrapCode::IntegerOverflow);
-
-    // Handle the case where x >= 2^(N-1) and not NaN.
-    pos.insert_block(large);
-    let adjx = pos.ins().fsub(x, pow2nm1);
-    let lres = pos.ins().x86_cvtt2si(ty, adjx);
-    let is_neg = pos.ins().ifcmp_imm(lres, 0);
-    pos.ins()
-        .trapif(IntCC::SignedLessThan, is_neg, ir::TrapCode::IntegerOverflow);
-    let lfinal = pos.ins().iadd_imm(lres, 1 << (ty.lane_bits() - 1));
-
-    // Recycle the original instruction as a jump.
-    pos.func.dfg.replace(inst).jump(done, &[lfinal]);
-
-    // Finally insert a label for the completion.
-    pos.next_inst();
-    pos.insert_block(done);
-
-    cfg.recompute_block(pos.func, old_block);
-    cfg.recompute_block(pos.func, below_uint_max_block);
-    cfg.recompute_block(pos.func, below_zero_block);
-    cfg.recompute_block(pos.func, large);
-    cfg.recompute_block(pos.func, done);
-}
-
-fn expand_fcvt_to_uint_sat(
-    inst: ir::Inst,
-    func: &mut ir::Function,
-    cfg: &mut ControlFlowGraph,
-    _isa: &dyn TargetIsa,
-) {
-    use crate::ir::immediates::{Ieee32, Ieee64};
-
-    let x = match func.dfg[inst] {
-        ir::InstructionData::Unary {
-            opcode: ir::Opcode::FcvtToUintSat,
-            arg,
-        } => arg,
-        _ => panic!(
-            "Need fcvt_to_uint_sat: {}",
-            func.dfg.display_inst(inst, None)
-        ),
-    };
-
-    let old_block = func.layout.pp_block(inst);
-    let xty = func.dfg.value_type(x);
-    let result = func.dfg.first_result(inst);
-    let ty = func.dfg.value_type(result);
-
-    // block handle numbers < 2^(N-1).
-    let below_pow2nm1_or_nan_block = func.dfg.make_block();
-    let below_pow2nm1_block = func.dfg.make_block();
-
-    // block handling numbers >= 2^(N-1).
-    let large = func.dfg.make_block();
-
-    // block handling numbers < 2^N.
-    let uint_large_block = func.dfg.make_block();
-
-    // Final block after the bad value checks.
-    let done = func.dfg.make_block();
-
-    // Move the `inst` result value onto the `done` block.
-    func.dfg.clear_results(inst);
-    func.dfg.attach_block_param(done, result);
-
-    let mut pos = FuncCursor::new(func).at_inst(inst);
-    pos.use_srcloc(inst);
-
-    // Start by materializing the floating point constant 2^(N-1) where N is the number of bits in
-    // the destination integer type.
-    let pow2nm1 = match xty {
-        ir::types::F32 => pos.ins().f32const(Ieee32::pow2(ty.lane_bits() - 1)),
-        ir::types::F64 => pos.ins().f64const(Ieee64::pow2(ty.lane_bits() - 1)),
-        _ => panic!("Can't convert {}", xty),
-    };
-    let zero = pos.ins().iconst(ty, 0);
-    let is_large = pos.ins().ffcmp(x, pow2nm1);
-    pos.ins()
-        .brff(FloatCC::GreaterThanOrEqual, is_large, large, &[]);
-    pos.ins().jump(below_pow2nm1_or_nan_block, &[]);
-
-    // We need to generate zero when `x` is NaN, so reuse the flags from the previous comparison.
-    pos.insert_block(below_pow2nm1_or_nan_block);
-    pos.ins().brff(FloatCC::Unordered, is_large, done, &[zero]);
-    pos.ins().jump(below_pow2nm1_block, &[]);
-
-    // Now we know that x < 2^(N-1) and not NaN. If the result of the cvtt2si is positive, we're
-    // done; otherwise saturate to the minimum unsigned value, that is 0.
-    pos.insert_block(below_pow2nm1_block);
-    let sres = pos.ins().x86_cvtt2si(ty, x);
-    let is_neg = pos.ins().ifcmp_imm(sres, 0);
-    pos.ins()
-        .brif(IntCC::SignedGreaterThanOrEqual, is_neg, done, &[sres]);
-    pos.ins().jump(done, &[zero]);
-
-    // Handle the case where x >= 2^(N-1) and not NaN.
-    pos.insert_block(large);
-    let adjx = pos.ins().fsub(x, pow2nm1);
-    let lres = pos.ins().x86_cvtt2si(ty, adjx);
-    let max_value = pos.ins().iconst(
-        ty,
-        match ty {
-            ir::types::I32 => u32::max_value() as i64,
-            ir::types::I64 => u64::max_value() as i64,
-            _ => panic!("Can't convert {}", ty),
-        },
-    );
-    let is_neg = pos.ins().ifcmp_imm(lres, 0);
-    pos.ins()
-        .brif(IntCC::SignedLessThan, is_neg, done, &[max_value]);
-    pos.ins().jump(uint_large_block, &[]);
-
-    pos.insert_block(uint_large_block);
-    let lfinal = pos.ins().iadd_imm(lres, 1 << (ty.lane_bits() - 1));
-
-    // Recycle the original instruction as a jump.
-    pos.func.dfg.replace(inst).jump(done, &[lfinal]);
-
-    // Finally insert a label for the completion.
-    pos.next_inst();
-    pos.insert_block(done);
-
-    cfg.recompute_block(pos.func, old_block);
-    cfg.recompute_block(pos.func, below_pow2nm1_or_nan_block);
-    cfg.recompute_block(pos.func, below_pow2nm1_block);
-    cfg.recompute_block(pos.func, large);
-    cfg.recompute_block(pos.func, uint_large_block);
-    cfg.recompute_block(pos.func, done);
-}
-
-// Lanes of an I32x4 filled with the max signed integer values converted to an F32x4.
-static MAX_SIGNED_I32X4S_AS_F32X4S: [u8; 16] = [
-    0x00, 0x00, 0x00, 0x4f, 0x00, 0x00, 0x00, 0x4f, 0x00, 0x00, 0x00, 0x4f, 0x00, 0x00, 0x00, 0x4f,
-];
-
-/// This legalization converts a vector of 32-bit floating point lanes to unsigned integer lanes
-/// using a long sequence of NaN quieting and truncation. This logic is separate from
-/// [expand_fcvt_to_uint_sat] above (the scalar version), only due to how the transform groups are
-/// set up; TODO if we change the SIMD legalization groups, then this logic could be merged into
-/// [expand_fcvt_to_uint_sat] (see https://github.com/bytecodealliance/wasmtime/issues/1745).
-fn expand_fcvt_to_uint_sat_vector(
-    inst: ir::Inst,
-    func: &mut ir::Function,
-    _cfg: &mut ControlFlowGraph,
-    _isa: &dyn TargetIsa,
-) {
-    let mut pos = FuncCursor::new(func).at_inst(inst);
-    pos.use_srcloc(inst);
-
-    if let ir::InstructionData::Unary {
-        opcode: ir::Opcode::FcvtToUintSat,
-        arg,
-    } = pos.func.dfg[inst]
-    {
-        let controlling_type = pos.func.dfg.ctrl_typevar(inst);
-        if controlling_type == I32X4 {
-            debug_assert_eq!(pos.func.dfg.value_type(arg), F32X4);
-            // We must both quiet any NaNs--setting that lane to 0--and saturate any
-            // lanes that might overflow during conversion to the highest/lowest integer
-            // allowed in that lane.
-            let zeroes_constant = pos.func.dfg.constants.insert(vec![0x00; 16].into());
-            let max_signed_constant = pos
-                .func
-                .dfg
-                .constants
-                .insert(MAX_SIGNED_I32X4S_AS_F32X4S.as_ref().into());
-            let zeroes = pos.ins().vconst(F32X4, zeroes_constant);
-            let max_signed = pos.ins().vconst(F32X4, max_signed_constant);
-            // Clamp the input to 0 for negative floating point numbers. TODO we need to
-            // convert NaNs to 0 but this doesn't do that?
-            let ge_zero = pos.ins().x86_fmax(arg, zeroes);
-            // Find lanes that exceed the max signed value that CVTTPS2DQ knows how to convert.
-            // For floating point numbers above this, CVTTPS2DQ returns the undefined value
-            // 0x80000000.
-            let minus_max_signed = pos.ins().fsub(ge_zero, max_signed);
-            let le_max_signed =
-                pos.ins()
-                    .fcmp(FloatCC::LessThanOrEqual, max_signed, minus_max_signed);
-            // Identify lanes that have minus_max_signed > max_signed || minus_max_signed < 0.
-            // These lanes have the MSB set to 1 after the XOR. We are trying to calculate a
-            // valid, in-range addend.
-            let minus_max_signed_as_int = pos.ins().x86_cvtt2si(I32X4, minus_max_signed);
-            let le_max_signed_as_int = pos.ins().raw_bitcast(I32X4, le_max_signed);
-            let difference = pos
-                .ins()
-                .bxor(minus_max_signed_as_int, le_max_signed_as_int);
-            // Calculate amount to add above 0x7FFFFFF, zeroing out any lanes identified
-            // previously (MSB set to 1).
-            let zeroes_as_int = pos.ins().raw_bitcast(I32X4, zeroes);
-            let addend = pos.ins().x86_pmaxs(difference, zeroes_as_int);
-            // Convert the original clamped number to an integer and add back in the addend
-            // (the part of the value above 0x7FFFFFF, since CVTTPS2DQ overflows with these).
-            let converted = pos.ins().x86_cvtt2si(I32X4, ge_zero);
-            pos.func.dfg.replace(inst).iadd(converted, addend);
-        } else {
-            unreachable!(
-                "{} should not be legalized in expand_fcvt_to_uint_sat_vector",
-                pos.func.dfg.display_inst(inst, None)
-            )
-        }
-    }
-}
-
-/// Convert shuffle instructions.
-fn convert_shuffle(
-    inst: ir::Inst,
-    func: &mut ir::Function,
-    _cfg: &mut ControlFlowGraph,
-    _isa: &dyn TargetIsa,
-) {
-    let mut pos = FuncCursor::new(func).at_inst(inst);
-    pos.use_srcloc(inst);
-
-    if let ir::InstructionData::Shuffle { args, mask, .. } = pos.func.dfg[inst] {
-        // A mask-building helper: in 128-bit SIMD, 0-15 indicate which lane to read from and a 1
-        // in the most significant position zeroes the lane.
-        let zero_unknown_lane_index = |b: u8| if b > 15 { 0b10000000 } else { b };
-
-        // We only have to worry about aliasing here because copies will be introduced later (in
-        // regalloc).
-        let a = pos.func.dfg.resolve_aliases(args[0]);
-        let b = pos.func.dfg.resolve_aliases(args[1]);
-        let mask = pos
-            .func
-            .dfg
-            .immediates
-            .get(mask)
-            .expect("The shuffle immediate should have been recorded before this point")
-            .clone();
-        if a == b {
-            // PSHUFB the first argument (since it is the same as the second).
-            let constructed_mask = mask
-                .iter()
-                // If the mask is greater than 15 it still may be referring to a lane in b.
-                .map(|&b| if b > 15 { b.wrapping_sub(16) } else { b })
-                .map(zero_unknown_lane_index)
-                .collect();
-            let handle = pos.func.dfg.constants.insert(constructed_mask);
-            // Move the built mask into another XMM register.
-            let a_type = pos.func.dfg.value_type(a);
-            let mask_value = pos.ins().vconst(a_type, handle);
-            // Shuffle the single incoming argument.
-            pos.func.dfg.replace(inst).x86_pshufb(a, mask_value);
-        } else {
-            // PSHUFB the first argument, placing zeroes for unused lanes.
-            let constructed_mask = mask.iter().cloned().map(zero_unknown_lane_index).collect();
-            let handle = pos.func.dfg.constants.insert(constructed_mask);
-            // Move the built mask into another XMM register.
-            let a_type = pos.func.dfg.value_type(a);
-            let mask_value = pos.ins().vconst(a_type, handle);
-            // Shuffle the first argument.
-            let shuffled_first_arg = pos.ins().x86_pshufb(a, mask_value);
-
-            // PSHUFB the second argument, placing zeroes for unused lanes.
-            let constructed_mask = mask
-                .iter()
-                .map(|b| b.wrapping_sub(16))
-                .map(zero_unknown_lane_index)
-                .collect();
-            let handle = pos.func.dfg.constants.insert(constructed_mask);
-            // Move the built mask into another XMM register.
-            let b_type = pos.func.dfg.value_type(b);
-            let mask_value = pos.ins().vconst(b_type, handle);
-            // Shuffle the second argument.
-            let shuffled_second_arg = pos.ins().x86_pshufb(b, mask_value);
-
-            // OR the vectors together to form the final shuffled value.
-            pos.func
-                .dfg
-                .replace(inst)
-                .bor(shuffled_first_arg, shuffled_second_arg);
-
-            // TODO when AVX512 is enabled we should replace this sequence with a single VPERMB
-        };
-    }
-}
-
-/// Because floats already exist in XMM registers, we can keep them there when executing a CLIF
-/// extractlane instruction
-fn convert_extractlane(
-    inst: ir::Inst,
-    func: &mut ir::Function,
-    _cfg: &mut ControlFlowGraph,
-    _isa: &dyn TargetIsa,
-) {
-    let mut pos = FuncCursor::new(func).at_inst(inst);
-    pos.use_srcloc(inst);
-
-    if let ir::InstructionData::BinaryImm8 {
-        opcode: ir::Opcode::Extractlane,
-        arg,
-        imm: lane,
-    } = pos.func.dfg[inst]
-    {
-        // NOTE: the following legalization assumes that the upper bits of the XMM register do
-        // not need to be zeroed during extractlane.
-        let value_type = pos.func.dfg.value_type(arg);
-        if value_type.lane_type().is_float() {
-            // Floats are already in XMM registers and can stay there.
-            let shuffled = if lane != 0 {
-                // Replace the extractlane with a PSHUFD to get the float in the right place.
-                match value_type {
-                    F32X4 => {
-                        // Move the selected lane to the 0 lane.
-                        let shuffle_mask: u8 = 0b00_00_00_00 | lane;
-                        pos.ins().x86_pshufd(arg, shuffle_mask)
-                    }
-                    F64X2 => {
-                        assert_eq!(lane, 1);
-                        // Because we know the lane == 1, we move the upper 64 bits to the lower
-                        // 64 bits, leaving the top 64 bits as-is.
-                        let shuffle_mask = 0b11_10_11_10;
-                        let bitcast = pos.ins().raw_bitcast(F32X4, arg);
-                        pos.ins().x86_pshufd(bitcast, shuffle_mask)
-                    }
-                    _ => unreachable!(),
-                }
-            } else {
-                // Remove the extractlane instruction, leaving the float where it is.
-                arg
-            };
-            // Then we must bitcast to the right type.
-            pos.func
-                .dfg
-                .replace(inst)
-                .raw_bitcast(value_type.lane_type(), shuffled);
-        } else {
-            // For non-floats, lower with the usual PEXTR* instruction.
-            pos.func.dfg.replace(inst).x86_pextr(arg, lane);
-        }
-    }
-}
-
-/// Because floats exist in XMM registers, we can keep them there when executing a CLIF
-/// insertlane instruction
-fn convert_insertlane(
-    inst: ir::Inst,
-    func: &mut ir::Function,
-    _cfg: &mut ControlFlowGraph,
-    _isa: &dyn TargetIsa,
-) {
-    let mut pos = FuncCursor::new(func).at_inst(inst);
-    pos.use_srcloc(inst);
-
-    if let ir::InstructionData::TernaryImm8 {
-        opcode: ir::Opcode::Insertlane,
-        args: [vector, replacement],
-        imm: lane,
-    } = pos.func.dfg[inst]
-    {
-        let value_type = pos.func.dfg.value_type(vector);
-        if value_type.lane_type().is_float() {
-            // Floats are already in XMM registers and can stay there.
-            match value_type {
-                F32X4 => {
-                    assert!(lane <= 3);
-                    let immediate = 0b00_00_00_00 | lane << 4;
-                    // Insert 32-bits from replacement (at index 00, bits 7:8) to vector (lane
-                    // shifted into bits 5:6).
-                    pos.func
-                        .dfg
-                        .replace(inst)
-                        .x86_insertps(vector, replacement, immediate)
-                }
-                F64X2 => {
-                    let replacement_as_vector = pos.ins().raw_bitcast(F64X2, replacement); // only necessary due to SSA types
-                    if lane == 0 {
-                        // Move the lowest quadword in replacement to vector without changing
-                        // the upper bits.
-                        pos.func
-                            .dfg
-                            .replace(inst)
-                            .x86_movsd(vector, replacement_as_vector)
-                    } else {
-                        assert_eq!(lane, 1);
-                        // Move the low 64 bits of replacement vector to the high 64 bits of the
-                        // vector.
-                        pos.func
-                            .dfg
-                            .replace(inst)
-                            .x86_movlhps(vector, replacement_as_vector)
-                    }
-                }
-                _ => unreachable!(),
-            };
-        } else {
-            // For non-floats, lower with the usual PINSR* instruction.
-            pos.func
-                .dfg
-                .replace(inst)
-                .x86_pinsr(vector, replacement, lane);
-        }
-    }
-}
-
-/// For SIMD or scalar integer negation, convert `ineg` to `vconst + isub` or `iconst + isub`.
-fn convert_ineg(
-    inst: ir::Inst,
-    func: &mut ir::Function,
-    _cfg: &mut ControlFlowGraph,
-    _isa: &dyn TargetIsa,
-) {
-    let mut pos = FuncCursor::new(func).at_inst(inst);
-    pos.use_srcloc(inst);
-
-    if let ir::InstructionData::Unary {
-        opcode: ir::Opcode::Ineg,
-        arg,
-    } = pos.func.dfg[inst]
-    {
-        let value_type = pos.func.dfg.value_type(arg);
-        let zero_value = if value_type.is_vector() && value_type.lane_type().is_int() {
-            let zero_immediate = pos.func.dfg.constants.insert(vec![0; 16].into());
-            pos.ins().vconst(value_type, zero_immediate) // this should be legalized to a PXOR
-        } else if value_type.is_int() {
-            pos.ins().iconst(value_type, 0)
-        } else {
-            panic!("Can't convert ineg of type {}", value_type)
-        };
-        pos.func.dfg.replace(inst).isub(zero_value, arg);
-    } else {
-        unreachable!()
-    }
-}
-
-fn expand_dword_to_xmm<'f>(
-    pos: &mut FuncCursor<'_>,
-    arg: ir::Value,
-    arg_type: ir::Type,
-) -> ir::Value {
-    if arg_type == I64 {
-        let (arg_lo, arg_hi) = pos.ins().isplit(arg);
-        let arg = pos.ins().scalar_to_vector(I32X4, arg_lo);
-        let arg = pos.ins().insertlane(arg, arg_hi, 1);
-        let arg = pos.ins().raw_bitcast(I64X2, arg);
-        arg
-    } else {
-        pos.ins().bitcast(I64X2, arg)
-    }
-}
-
-fn contract_dword_from_xmm<'f>(
-    pos: &mut FuncCursor<'f>,
-    inst: ir::Inst,
-    ret: ir::Value,
-    ret_type: ir::Type,
-) {
-    if ret_type == I64 {
-        let ret = pos.ins().raw_bitcast(I32X4, ret);
-        let ret_lo = pos.ins().extractlane(ret, 0);
-        let ret_hi = pos.ins().extractlane(ret, 1);
-        pos.func.dfg.replace(inst).iconcat(ret_lo, ret_hi);
-    } else {
-        let ret = pos.ins().extractlane(ret, 0);
-        pos.func.dfg.replace(inst).ireduce(ret_type, ret);
-    }
-}
-
-// Masks for i8x16 unsigned right shift.
-static USHR_MASKS: [u8; 128] = [
-    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-    0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f,
-    0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f,
-    0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
-    0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f, 0x0f,
-    0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
-    0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
-    0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
-];
-
-// Convert a vector unsigned right shift. x86 has implementations for i16x8 and up (see `x86_pslr`),
-// but for i8x16 we translate the shift to a i16x8 shift and mask off the upper bits. This same
-// conversion could be provided in the CDSL if we could use varargs there (TODO); i.e. `load_complex`
-// has a varargs field that we can't modify with the CDSL in legalize.rs.
-fn convert_ushr(
-    inst: ir::Inst,
-    func: &mut ir::Function,
-    _cfg: &mut ControlFlowGraph,
-    isa: &dyn TargetIsa,
-) {
-    let mut pos = FuncCursor::new(func).at_inst(inst);
-    pos.use_srcloc(inst);
-
-    if let ir::InstructionData::Binary {
-        opcode: ir::Opcode::Ushr,
-        args: [arg0, arg1],
-    } = pos.func.dfg[inst]
-    {
-        // Note that for Wasm, the bounding of the shift index has happened during translation
-        let arg0_type = pos.func.dfg.value_type(arg0);
-        let arg1_type = pos.func.dfg.value_type(arg1);
-        assert!(!arg1_type.is_vector() && arg1_type.is_int());
-
-        // TODO it may be more clear to use scalar_to_vector here; the current issue is that
-        // scalar_to_vector has the restriction that the vector produced has a matching lane size
-        // (e.g. i32 -> i32x4) whereas bitcast allows moving any-to-any conversions (e.g. i32 ->
-        // i64x2). This matters because for some reason x86_psrl only allows i64x2 as the shift
-        // index type--this could be relaxed since it is not really meaningful.
-        let shift_index = pos.ins().bitcast(I64X2, arg1);
-
-        if arg0_type == I8X16 {
-            // First, shift the vector using an I16X8 shift.
-            let bitcasted = pos.ins().raw_bitcast(I16X8, arg0);
-            let shifted = pos.ins().x86_psrl(bitcasted, shift_index);
-            let shifted = pos.ins().raw_bitcast(I8X16, shifted);
-
-            // Then, fixup the even lanes that have incorrect upper bits. This uses the 128 mask
-            // bytes as a table that we index into. It is a substantial code-size increase but
-            // reduces the instruction count slightly.
-            let masks = pos.func.dfg.constants.insert(USHR_MASKS.as_ref().into());
-            let mask_address = pos.ins().const_addr(isa.pointer_type(), masks);
-            let mask_offset = pos.ins().ishl_imm(arg1, 4);
-            let mask =
-                pos.ins()
-                    .load_complex(arg0_type, MemFlags::new(), &[mask_address, mask_offset], 0);
-            pos.func.dfg.replace(inst).band(shifted, mask);
-        } else if arg0_type.is_vector() {
-            // x86 has encodings for these shifts.
-            pos.func.dfg.replace(inst).x86_psrl(arg0, shift_index);
-        } else if arg0_type == I64 {
-            // 64 bit shifts need to be legalized on x86_32.
-            let x86_isa = isa
-                .as_any()
-                .downcast_ref::<isa::x86::Isa>()
-                .expect("the target ISA must be x86 at this point");
-            if x86_isa.isa_flags.has_sse41() {
-                // if we have pinstrq/pextrq (SSE 4.1), legalize to that
-                let value = expand_dword_to_xmm(&mut pos, arg0, arg0_type);
-                let amount = expand_dword_to_xmm(&mut pos, arg1, arg1_type);
-                let shifted = pos.ins().x86_psrl(value, amount);
-                contract_dword_from_xmm(&mut pos, inst, shifted, arg0_type);
-            } else {
-                // otherwise legalize to libcall
-                expand_as_libcall(inst, func, isa);
-            }
-        } else {
-            // Everything else should be already legal.
-            unreachable!()
-        }
-    }
-}
-
-// Masks for i8x16 left shift.
-static SHL_MASKS: [u8; 128] = [
-    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-    0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe,
-    0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc, 0xfc,
-    0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8,
-    0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
-    0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0,
-    0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0,
-    0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
-];
-
-// Convert a vector left shift. x86 has implementations for i16x8 and up (see `x86_psll`),
-// but for i8x16 we translate the shift to a i16x8 shift and mask off the lower bits. This same
-// conversion could be provided in the CDSL if we could use varargs there (TODO); i.e. `load_complex`
-// has a varargs field that we can't modify with the CDSL in legalize.rs.
-fn convert_ishl(
-    inst: ir::Inst,
-    func: &mut ir::Function,
-    _cfg: &mut ControlFlowGraph,
-    isa: &dyn TargetIsa,
-) {
-    let mut pos = FuncCursor::new(func).at_inst(inst);
-    pos.use_srcloc(inst);
-
-    if let ir::InstructionData::Binary {
-        opcode: ir::Opcode::Ishl,
-        args: [arg0, arg1],
-    } = pos.func.dfg[inst]
-    {
-        // Note that for Wasm, the bounding of the shift index has happened during translation
-        let arg0_type = pos.func.dfg.value_type(arg0);
-        let arg1_type = pos.func.dfg.value_type(arg1);
-        assert!(!arg1_type.is_vector() && arg1_type.is_int());
-
-        // TODO it may be more clear to use scalar_to_vector here; the current issue is that
-        // scalar_to_vector has the restriction that the vector produced has a matching lane size
-        // (e.g. i32 -> i32x4) whereas bitcast allows moving any-to-any conversions (e.g. i32 ->
-        // i64x2). This matters because for some reason x86_psrl only allows i64x2 as the shift
-        // index type--this could be relaxed since it is not really meaningful.
-        let shift_index = pos.ins().bitcast(I64X2, arg1);
-
-        if arg0_type == I8X16 {
-            // First, shift the vector using an I16X8 shift.
-            let bitcasted = pos.ins().raw_bitcast(I16X8, arg0);
-            let shifted = pos.ins().x86_psll(bitcasted, shift_index);
-            let shifted = pos.ins().raw_bitcast(I8X16, shifted);
-
-            // Then, fixup the even lanes that have incorrect lower bits. This uses the 128 mask
-            // bytes as a table that we index into. It is a substantial code-size increase but
-            // reduces the instruction count slightly.
-            let masks = pos.func.dfg.constants.insert(SHL_MASKS.as_ref().into());
-            let mask_address = pos.ins().const_addr(isa.pointer_type(), masks);
-            let mask_offset = pos.ins().ishl_imm(arg1, 4);
-            let mask =
-                pos.ins()
-                    .load_complex(arg0_type, MemFlags::new(), &[mask_address, mask_offset], 0);
-            pos.func.dfg.replace(inst).band(shifted, mask);
-        } else if arg0_type.is_vector() {
-            // x86 has encodings for these shifts.
-            pos.func.dfg.replace(inst).x86_psll(arg0, shift_index);
-        } else if arg0_type == I64 {
-            // 64 bit shifts need to be legalized on x86_32.
-            let x86_isa = isa
-                .as_any()
-                .downcast_ref::<isa::x86::Isa>()
-                .expect("the target ISA must be x86 at this point");
-            if x86_isa.isa_flags.has_sse41() {
-                // if we have pinstrq/pextrq (SSE 4.1), legalize to that
-                let value = expand_dword_to_xmm(&mut pos, arg0, arg0_type);
-                let amount = expand_dword_to_xmm(&mut pos, arg1, arg1_type);
-                let shifted = pos.ins().x86_psll(value, amount);
-                contract_dword_from_xmm(&mut pos, inst, shifted, arg0_type);
-            } else {
-                // otherwise legalize to libcall
-                expand_as_libcall(inst, func, isa);
-            }
-        } else {
-            // Everything else should be already legal.
-            unreachable!()
-        }
-    }
-}
-
-/// Convert an imul.i64x2 to a valid code sequence on x86, first with AVX512 and then with SSE2.
-fn convert_i64x2_imul(
-    inst: ir::Inst,
-    func: &mut ir::Function,
-    _cfg: &mut ControlFlowGraph,
-    isa: &dyn TargetIsa,
-) {
-    let mut pos = FuncCursor::new(func).at_inst(inst);
-    pos.use_srcloc(inst);
-
-    if let ir::InstructionData::Binary {
-        opcode: ir::Opcode::Imul,
-        args: [arg0, arg1],
-    } = pos.func.dfg[inst]
-    {
-        let ty = pos.func.dfg.ctrl_typevar(inst);
-        if ty == I64X2 {
-            let x86_isa = isa
-                .as_any()
-                .downcast_ref::<isa::x86::Isa>()
-                .expect("the target ISA must be x86 at this point");
-            if x86_isa.isa_flags.use_avx512dq_simd() || x86_isa.isa_flags.use_avx512vl_simd() {
-                // If we have certain AVX512 features, we can lower this instruction simply.
-                pos.func.dfg.replace(inst).x86_pmullq(arg0, arg1);
-            } else {
-                // Otherwise, we default to a very lengthy SSE2-compatible sequence. It splits each
-                // 64-bit lane into 32-bit high and low sections using shifting and then performs
-                // the following arithmetic per lane: with arg0 = concat(high0, low0) and arg1 =
-                // concat(high1, low1), calculate (high0 * low1) + (high1 * low0) + (low0 * low1).
-                let high0 = pos.ins().ushr_imm(arg0, 32);
-                let mul0 = pos.ins().x86_pmuludq(high0, arg1);
-                let high1 = pos.ins().ushr_imm(arg1, 32);
-                let mul1 = pos.ins().x86_pmuludq(high1, arg0);
-                let addhigh = pos.ins().iadd(mul0, mul1);
-                let high = pos.ins().ishl_imm(addhigh, 32);
-                let low = pos.ins().x86_pmuludq(arg0, arg1);
-                pos.func.dfg.replace(inst).iadd(low, high);
-            }
-        } else {
-            unreachable!(
-                "{} should be encodable; it cannot be legalized by convert_i64x2_imul",
-                pos.func.dfg.display_inst(inst, None)
-            );
-        }
-    }
-}
-
-fn expand_tls_value(
-    inst: ir::Inst,
-    func: &mut ir::Function,
-    _cfg: &mut ControlFlowGraph,
-    isa: &dyn TargetIsa,
-) {
-    use crate::settings::TlsModel;
-
-    assert!(
-        isa.triple().architecture == target_lexicon::Architecture::X86_64,
-        "Not yet implemented for {:?}",
-        isa.triple(),
-    );
-
-    if let ir::InstructionData::UnaryGlobalValue {
-        opcode: ir::Opcode::TlsValue,
-        global_value,
-    } = func.dfg[inst]
-    {
-        let ctrl_typevar = func.dfg.ctrl_typevar(inst);
-        assert_eq!(ctrl_typevar, ir::types::I64);
-
-        match isa.flags().tls_model() {
-            TlsModel::None => panic!("tls_model flag is not set."),
-            TlsModel::ElfGd => {
-                func.dfg.replace(inst).x86_elf_tls_get_addr(global_value);
-            }
-            TlsModel::Macho => {
-                func.dfg.replace(inst).x86_macho_tls_get_addr(global_value);
-            }
-            model => unimplemented!("tls_value for tls model {:?}", model),
-        }
-    } else {
-        unreachable!();
-    }
-}
diff --git a/cranelift/codegen/src/isa/legacy/x86/mod.rs b/cranelift/codegen/src/isa/legacy/x86/mod.rs
deleted file mode 100644
index e61fda1931..0000000000
--- a/cranelift/codegen/src/isa/legacy/x86/mod.rs
+++ /dev/null
@@ -1,199 +0,0 @@
-//! x86 Instruction Set Architectures.
-
-mod abi;
-mod binemit;
-mod enc_tables;
-mod registers;
-pub mod settings;
-#[cfg(feature = "unwind")]
-pub mod unwind;
-
-use super::super::settings as shared_settings;
-#[cfg(feature = "testing_hooks")]
-use crate::binemit::CodeSink;
-use crate::binemit::{emit_function, MemoryCodeSink};
-use crate::ir;
-use crate::isa::enc_tables::{self as shared_enc_tables, lookup_enclist, Encodings};
-use crate::isa::Builder as IsaBuilder;
-#[cfg(feature = "unwind")]
-use crate::isa::{unwind::systemv::RegisterMappingError, RegUnit};
-use crate::isa::{EncInfo, RegClass, RegInfo, TargetIsa};
-use crate::regalloc;
-use crate::result::CodegenResult;
-use crate::timing;
-use alloc::{borrow::Cow, boxed::Box, vec::Vec};
-use core::any::Any;
-use core::fmt;
-use core::hash::{Hash, Hasher};
-use target_lexicon::{PointerWidth, Triple};
-
-#[allow(dead_code)]
-struct Isa {
-    triple: Triple,
-    shared_flags: shared_settings::Flags,
-    isa_flags: settings::Flags,
-    cpumode: &'static [shared_enc_tables::Level1Entry<u16>],
-}
-
-/// Get an ISA builder for creating x86 targets.
-pub fn isa_builder(triple: Triple) -> IsaBuilder {
-    IsaBuilder {
-        triple,
-        setup: settings::builder(),
-        constructor: isa_constructor,
-    }
-}
-
-fn isa_constructor(
-    triple: Triple,
-    shared_flags: shared_settings::Flags,
-    builder: shared_settings::Builder,
-) -> Box<dyn TargetIsa> {
-    let level1 = match triple.pointer_width().unwrap() {
-        PointerWidth::U16 => unimplemented!("x86-16"),
-        PointerWidth::U32 => &enc_tables::LEVEL1_I32[..],
-        PointerWidth::U64 => &enc_tables::LEVEL1_I64[..],
-    };
-
-    let isa_flags = settings::Flags::new(&shared_flags, builder);
-
-    Box::new(Isa {
-        triple,
-        isa_flags,
-        shared_flags,
-        cpumode: level1,
-    })
-}
-
-impl TargetIsa for Isa {
-    fn name(&self) -> &'static str {
-        "x86"
-    }
-
-    fn triple(&self) -> &Triple {
-        &self.triple
-    }
-
-    fn flags(&self) -> &shared_settings::Flags {
-        &self.shared_flags
-    }
-
-    fn isa_flags(&self) -> Vec<shared_settings::Value> {
-        self.isa_flags.iter().collect()
-    }
-
-    fn hash_all_flags(&self, mut hasher: &mut dyn Hasher) {
-        self.shared_flags.hash(&mut hasher);
-        self.isa_flags.hash(&mut hasher);
-    }
-
-    fn uses_cpu_flags(&self) -> bool {
-        true
-    }
-
-    fn uses_complex_addresses(&self) -> bool {
-        true
-    }
-
-    fn register_info(&self) -> RegInfo {
-        registers::INFO.clone()
-    }
-
-    #[cfg(feature = "unwind")]
-    fn map_dwarf_register(&self, reg: RegUnit) -> Result<u16, RegisterMappingError> {
-        unwind::systemv::map_reg(self, reg).map(|r| r.0)
-    }
-
-    fn encoding_info(&self) -> EncInfo {
-        enc_tables::INFO.clone()
-    }
-
-    fn legal_encodings<'a>(
-        &'a self,
-        func: &'a ir::Function,
-        inst: &'a ir::InstructionData,
-        ctrl_typevar: ir::Type,
-    ) -> Encodings<'a> {
-        lookup_enclist(
-            ctrl_typevar,
-            inst,
-            func,
-            self.cpumode,
-            &enc_tables::LEVEL2[..],
-            &enc_tables::ENCLISTS[..],
-            &enc_tables::LEGALIZE_ACTIONS[..],
-            &enc_tables::RECIPE_PREDICATES[..],
-            &enc_tables::INST_PREDICATES[..],
-            self.isa_flags.predicate_view(),
-        )
-    }
-
-    fn legalize_signature(&self, sig: &mut Cow<ir::Signature>, current: bool) {
-        abi::legalize_signature(
-            sig,
-            &self.triple,
-            current,
-            &self.shared_flags,
-            &self.isa_flags,
-        )
-    }
-
-    fn regclass_for_abi_type(&self, ty: ir::Type) -> RegClass {
-        abi::regclass_for_abi_type(ty)
-    }
-
-    fn allocatable_registers(&self, _func: &ir::Function) -> regalloc::RegisterSet {
-        abi::allocatable_registers(&self.triple, &self.shared_flags)
-    }
-
-    #[cfg(feature = "testing_hooks")]
-    fn emit_inst(
-        &self,
-        func: &ir::Function,
-        inst: ir::Inst,
-        divert: &mut regalloc::RegDiversions,
-        sink: &mut dyn CodeSink,
-    ) {
-        binemit::emit_inst(func, inst, divert, sink, self)
-    }
-
-    fn emit_function_to_memory(&self, func: &ir::Function, sink: &mut MemoryCodeSink) {
-        emit_function(func, binemit::emit_inst, sink, self)
-    }
-
-    fn prologue_epilogue(&self, func: &mut ir::Function) -> CodegenResult<()> {
-        let _tt = timing::prologue_epilogue();
-        abi::prologue_epilogue(func, self)
-    }
-
-    fn unsigned_add_overflow_condition(&self) -> ir::condcodes::IntCC {
-        ir::condcodes::IntCC::UnsignedLessThan
-    }
-
-    fn unsigned_sub_overflow_condition(&self) -> ir::condcodes::IntCC {
-        ir::condcodes::IntCC::UnsignedLessThan
-    }
-
-    #[cfg(feature = "unwind")]
-    fn create_unwind_info(
-        &self,
-        func: &ir::Function,
-    ) -> CodegenResult<Option<super::super::unwind::UnwindInfo>> {
-        abi::create_unwind_info(func, self)
-    }
-
-    #[cfg(feature = "unwind")]
-    fn create_systemv_cie(&self) -> Option<gimli::write::CommonInformationEntry> {
-        Some(unwind::systemv::create_cie())
-    }
-
-    fn as_any(&self) -> &dyn Any {
-        self as &dyn Any
-    }
-}
-
-impl fmt::Display for Isa {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{}\n{}", self.shared_flags, self.isa_flags)
-    }
-}
diff --git a/cranelift/codegen/src/isa/legacy/x86/registers.rs b/cranelift/codegen/src/isa/legacy/x86/registers.rs
deleted file mode 100644
index a7518b268b..0000000000
--- a/cranelift/codegen/src/isa/legacy/x86/registers.rs
+++ /dev/null
@@ -1,86 +0,0 @@
-//! x86 register descriptions.
-
-use crate::isa::registers::{RegBank, RegClass, RegClassData, RegInfo, RegUnit};
-
-include!(concat!(env!("OUT_DIR"), "/registers-x86.rs"));
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::isa::RegUnit;
-    use alloc::string::{String, ToString};
-
-    #[test]
-    fn unit_encodings() {
-        fn gpr(unit: usize) -> Option<u16> {
-            Some(GPR.unit(unit))
-        }
-        // The encoding of integer registers is not alphabetical.
-        assert_eq!(INFO.parse_regunit("rax"), gpr(0));
-        assert_eq!(INFO.parse_regunit("rbx"), gpr(3));
-        assert_eq!(INFO.parse_regunit("rcx"), gpr(1));
-        assert_eq!(INFO.parse_regunit("rdx"), gpr(2));
-        assert_eq!(INFO.parse_regunit("rsi"), gpr(6));
-        assert_eq!(INFO.parse_regunit("rdi"), gpr(7));
-        assert_eq!(INFO.parse_regunit("rbp"), gpr(5));
-        assert_eq!(INFO.parse_regunit("rsp"), gpr(4));
-        assert_eq!(INFO.parse_regunit("r8"), gpr(8));
-        assert_eq!(INFO.parse_regunit("r15"), gpr(15));
-
-        fn fpr(unit: usize) -> Option<u16> {
-            Some(FPR.unit(unit))
-        }
-        assert_eq!(INFO.parse_regunit("xmm0"), fpr(0));
-        assert_eq!(INFO.parse_regunit("xmm15"), fpr(15));
-
-        // FIXME(#1306) Add these tests back in when FPR32 is re-added.
-        // fn fpr32(unit: usize) -> Option<u16> {
-        //    Some(FPR32.unit(unit))
-        // }
-        // assert_eq!(INFO.parse_regunit("xmm0"), fpr32(0));
-        // assert_eq!(INFO.parse_regunit("xmm31"), fpr32(31));
-    }
-
-    #[test]
-    fn unit_names() {
-        fn gpr(ru: RegUnit) -> String {
-            INFO.display_regunit(GPR.first + ru).to_string()
-        }
-        assert_eq!(gpr(0), "%rax");
-        assert_eq!(gpr(3), "%rbx");
-        assert_eq!(gpr(1), "%rcx");
-        assert_eq!(gpr(2), "%rdx");
-        assert_eq!(gpr(6), "%rsi");
-        assert_eq!(gpr(7), "%rdi");
-        assert_eq!(gpr(5), "%rbp");
-        assert_eq!(gpr(4), "%rsp");
-        assert_eq!(gpr(8), "%r8");
-        assert_eq!(gpr(15), "%r15");
-
-        fn fpr(ru: RegUnit) -> String {
-            INFO.display_regunit(FPR.first + ru).to_string()
-        }
-        assert_eq!(fpr(0), "%xmm0");
-        assert_eq!(fpr(15), "%xmm15");
-
-        // FIXME(#1306) Add these tests back in when FPR32 is re-added.
-        // fn fpr32(ru: RegUnit) -> String {
-        //    INFO.display_regunit(FPR32.first + ru).to_string()
-        // }
-        // assert_eq!(fpr32(0), "%xmm0");
-        // assert_eq!(fpr32(31), "%xmm31");
-    }
-
-    #[test]
-    fn regclasses() {
-        assert_eq!(GPR.intersect_index(GPR), Some(GPR.into()));
-        assert_eq!(GPR.intersect_index(ABCD), Some(ABCD.into()));
-        assert_eq!(GPR.intersect_index(FPR), None);
-        assert_eq!(ABCD.intersect_index(GPR), Some(ABCD.into()));
-        assert_eq!(ABCD.intersect_index(ABCD), Some(ABCD.into()));
-        assert_eq!(ABCD.intersect_index(FPR), None);
-        assert_eq!(FPR.intersect_index(FPR), Some(FPR.into()));
-        assert_eq!(FPR.intersect_index(GPR), None);
-        assert_eq!(FPR.intersect_index(ABCD), None);
-    }
-}
diff --git a/cranelift/codegen/src/isa/legacy/x86/settings.rs b/cranelift/codegen/src/isa/legacy/x86/settings.rs
deleted file mode 100644
index f13431c1a2..0000000000
--- a/cranelift/codegen/src/isa/legacy/x86/settings.rs
+++ /dev/null
@@ -1,52 +0,0 @@
-//! x86 Settings.
-
-use crate::settings::{self, detail, Builder, Value};
-use core::fmt;
-
-// Include code generated by `cranelift-codegen/meta/src/gen_settings.rs:`. This file contains a
-// public `Flags` struct with an impl for all of the settings defined in
-// `cranelift-codegen/meta/src/isa/x86/settings.rs`.
-include!(concat!(env!("OUT_DIR"), "/settings-x86.rs"));
-
-#[cfg(test)]
-mod tests {
-    use super::{builder, Flags};
-    use crate::settings::{self, Configurable};
-
-    #[test]
-    fn presets() {
-        let shared = settings::Flags::new(settings::builder());
-
-        // Nehalem has SSE4.1 but not BMI1.
-        let mut b0 = builder();
-        b0.enable("nehalem").unwrap();
-        let f0 = Flags::new(&shared, b0);
-        assert_eq!(f0.has_sse41(), true);
-        assert_eq!(f0.has_bmi1(), false);
-
-        let mut b1 = builder();
-        b1.enable("haswell").unwrap();
-        let f1 = Flags::new(&shared, b1);
-        assert_eq!(f1.has_sse41(), true);
-        assert_eq!(f1.has_bmi1(), true);
-    }
-    #[test]
-    fn display_presets() {
-        // Spot check that the flags Display impl does not cause a panic
-        let shared = settings::Flags::new(settings::builder());
-
-        let b0 = builder();
-        let f0 = Flags::new(&shared, b0);
-        let _ = format!("{}", f0);
-
-        let mut b1 = builder();
-        b1.enable("nehalem").unwrap();
-        let f1 = Flags::new(&shared, b1);
-        let _ = format!("{}", f1);
-
-        let mut b2 = builder();
-        b2.enable("haswell").unwrap();
-        let f2 = Flags::new(&shared, b2);
-        let _ = format!("{}", f2);
-    }
-}
diff --git a/cranelift/codegen/src/isa/legacy/x86/unwind.rs b/cranelift/codegen/src/isa/legacy/x86/unwind.rs
deleted file mode 100644
index 2eed8b74e4..0000000000
--- a/cranelift/codegen/src/isa/legacy/x86/unwind.rs
+++ /dev/null
@@ -1,531 +0,0 @@
-//! Module for x86 unwind generation for supported ABIs.
-
-pub mod systemv;
-pub mod winx64;
-
-use crate::ir::{Function, InstructionData, Opcode, ValueLoc};
-use crate::isa::x86::registers::{FPR, RU};
-use crate::isa::{RegUnit, TargetIsa};
-use crate::result::CodegenResult;
-use alloc::vec::Vec;
-use std::collections::HashMap;
-
-use crate::isa::unwind::input::{UnwindCode, UnwindInfo};
-
-pub(crate) fn create_unwind_info(
-    func: &Function,
-    isa: &dyn TargetIsa,
-) -> CodegenResult<Option<UnwindInfo<RegUnit>>> {
-    // Find last block based on max offset.
-    let last_block = func
-        .layout
-        .blocks()
-        .max_by_key(|b| func.offsets[*b])
-        .expect("at least a block");
-    // Find last instruction offset + size, and make it function size.
-    let function_size = func
-        .inst_offsets(last_block, &isa.encoding_info())
-        .fold(0, |_, (offset, _, size)| offset + size);
-
-    let entry_block = func.layout.entry_block().expect("missing entry block");
-    let prologue_end = func.prologue_end.unwrap();
-    let epilogues_start = func
-        .epilogues_start
-        .iter()
-        .map(|(i, b)| (*b, *i))
-        .collect::<HashMap<_, _>>();
-
-    let word_size = isa.pointer_bytes();
-
-    let mut stack_size = None;
-    let mut prologue_size = 0;
-    let mut prologue_unwind_codes = Vec::new();
-    let mut epilogues_unwind_codes = Vec::new();
-    let mut frame_register: Option<RegUnit> = None;
-
-    // Process only entry block and blocks with epilogues.
-    let mut blocks = func
-        .epilogues_start
-        .iter()
-        .map(|(_, b)| *b)
-        .collect::<Vec<_>>();
-    if !blocks.contains(&entry_block) {
-        blocks.push(entry_block);
-    }
-    blocks.sort_by_key(|b| func.offsets[*b]);
-
-    for block in blocks.iter() {
-        let mut in_prologue = block == &entry_block;
-        let mut in_epilogue = false;
-        let mut epilogue_pop_offsets = Vec::new();
-
-        let epilogue_start = epilogues_start.get(block);
-        let is_last_block = block == &last_block;
-
-        for (offset, inst, size) in func.inst_offsets(*block, &isa.encoding_info()) {
-            let offset = offset + size;
-
-            let unwind_codes;
-            if in_prologue {
-                // Check for prologue end (inclusive)
-                if prologue_end == inst {
-                    in_prologue = false;
-                }
-                prologue_size += size;
-                unwind_codes = &mut prologue_unwind_codes;
-            } else if !in_epilogue && epilogue_start == Some(&inst) {
-                // Now in an epilogue, emit a remember state instruction if not last block
-                in_epilogue = true;
-
-                epilogues_unwind_codes.push(Vec::new());
-                unwind_codes = epilogues_unwind_codes.last_mut().unwrap();
-
-                if !is_last_block {
-                    unwind_codes.push((offset, UnwindCode::RememberState));
-                }
-            } else if in_epilogue {
-                unwind_codes = epilogues_unwind_codes.last_mut().unwrap();
-            } else {
-                // Ignore normal instructions
-                continue;
-            }
-
-            match func.dfg[inst] {
-                InstructionData::Unary { opcode, arg } => {
-                    match opcode {
-                        Opcode::X86Push => {
-                            let reg = func.locations[arg].unwrap_reg();
-                            unwind_codes.push((
-                                offset,
-                                UnwindCode::StackAlloc {
-                                    size: word_size.into(),
-                                },
-                            ));
-                            unwind_codes.push((
-                                offset,
-                                UnwindCode::SaveRegister {
-                                    reg,
-                                    stack_offset: 0,
-                                },
-                            ));
-                        }
-                        Opcode::AdjustSpDown => {
-                            let stack_size =
-                                stack_size.expect("expected a previous stack size instruction");
-
-                            // This is used when calling a stack check function
-                            // We need to track the assignment to RAX which has the size of the stack
-                            unwind_codes
-                                .push((offset, UnwindCode::StackAlloc { size: stack_size }));
-                        }
-                        _ => {}
-                    }
-                }
-                InstructionData::UnaryImm { opcode, imm } => {
-                    match opcode {
-                        Opcode::Iconst => {
-                            let imm: i64 = imm.into();
-                            assert!(imm <= core::u32::MAX as i64);
-                            assert!(stack_size.is_none());
-
-                            // This instruction should only appear in a prologue to pass an
-                            // argument of the stack size to a stack check function.
-                            // Record the stack size so we know what it is when we encounter the adjustment
-                            // instruction (which will adjust via the register assigned to this instruction).
-                            stack_size = Some(imm as u32);
-                        }
-                        Opcode::AdjustSpDownImm => {
-                            let imm: i64 = imm.into();
-                            assert!(imm <= core::u32::MAX as i64);
-
-                            stack_size = Some(imm as u32);
-
-                            unwind_codes
-                                .push((offset, UnwindCode::StackAlloc { size: imm as u32 }));
-                        }
-                        Opcode::AdjustSpUpImm => {
-                            let imm: i64 = imm.into();
-                            assert!(imm <= core::u32::MAX as i64);
-
-                            stack_size = Some(imm as u32);
-
-                            unwind_codes
-                                .push((offset, UnwindCode::StackDealloc { size: imm as u32 }));
-                        }
-                        _ => {}
-                    }
-                }
-                InstructionData::Store {
-                    opcode: Opcode::Store,
-                    args: [arg1, arg2],
-                    offset: stack_offset,
-                    ..
-                } => {
-                    if let (ValueLoc::Reg(src), ValueLoc::Reg(dst)) =
-                        (func.locations[arg1], func.locations[arg2])
-                    {
-                        // If this is a save of an FPR, record an unwind operation
-                        // Note: the stack_offset here is relative to an adjusted SP
-                        if dst == (RU::rsp as RegUnit) && FPR.contains(src) {
-                            let stack_offset: i32 = stack_offset.into();
-                            unwind_codes.push((
-                                offset,
-                                UnwindCode::SaveRegister {
-                                    reg: src,
-                                    stack_offset: stack_offset as u32,
-                                },
-                            ));
-                        }
-                    }
-                }
-                InstructionData::CopySpecial { src, dst, .. } if frame_register.is_none() => {
-                    // Check for change in CFA register (RSP is always the starting CFA)
-                    if src == (RU::rsp as RegUnit) {
-                        unwind_codes.push((offset, UnwindCode::SetFramePointer { reg: dst }));
-                        frame_register = Some(dst);
-                    }
-                }
-                InstructionData::NullAry { opcode } => match opcode {
-                    Opcode::X86Pop => {
-                        epilogue_pop_offsets.push(offset);
-                    }
-                    _ => {}
-                },
-                InstructionData::MultiAry { opcode, .. } if in_epilogue => match opcode {
-                    Opcode::Return => {
-                        let args = func.dfg.inst_args(inst);
-                        for (i, arg) in args.iter().rev().enumerate() {
-                            // Only walk back the args for the pop instructions encountered
-                            if i >= epilogue_pop_offsets.len() {
-                                break;
-                            }
-
-                            let offset = epilogue_pop_offsets[i];
-
-                            let reg = func.locations[*arg].unwrap_reg();
-                            unwind_codes.push((offset, UnwindCode::RestoreRegister { reg }));
-                            unwind_codes.push((
-                                offset,
-                                UnwindCode::StackDealloc {
-                                    size: word_size.into(),
-                                },
-                            ));
-
-                            if Some(reg) == frame_register {
-                                unwind_codes.push((offset, UnwindCode::RestoreFramePointer));
-                                // Keep frame_register assigned for next epilogue.
-                            }
-                        }
-                        epilogue_pop_offsets.clear();
-
-                        // TODO ensure unwind codes sorted by offsets ?
-
-                        if !is_last_block {
-                            unwind_codes.push((offset, UnwindCode::RestoreState));
-                        }
-
-                        in_epilogue = false;
-                    }
-                    _ => {}
-                },
-                _ => {}
-            };
-        }
-    }
-
-    Ok(Some(UnwindInfo {
-        prologue_size,
-        prologue_unwind_codes,
-        epilogues_unwind_codes,
-        function_size,
-        word_size,
-        initial_sp_offset: word_size,
-    }))
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::cursor::{Cursor, FuncCursor};
-    use crate::ir::{
-        types, AbiParam, ExternalName, InstBuilder, Signature, StackSlotData, StackSlotKind,
-    };
-    use crate::isa::{lookup_variant, BackendVariant, CallConv};
-    use crate::settings::{builder, Flags};
-    use crate::Context;
-    use std::str::FromStr;
-    use target_lexicon::triple;
-
-    #[test]
-    fn test_small_alloc() {
-        let isa = lookup_variant(triple!("x86_64"), BackendVariant::Legacy)
-            .expect("expect x86 ISA")
-            .finish(Flags::new(builder()));
-
-        let mut context = Context::for_function(create_function(
-            CallConv::WindowsFastcall,
-            Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)),
-        ));
-
-        context.compile(&*isa).expect("expected compilation");
-
-        let unwind = create_unwind_info(&context.func, &*isa)
-            .expect("can create unwind info")
-            .expect("expected unwind info");
-
-        assert_eq!(
-            unwind,
-            UnwindInfo {
-                prologue_size: 9,
-                prologue_unwind_codes: vec![
-                    (2, UnwindCode::StackAlloc { size: 8 }),
-                    (
-                        2,
-                        UnwindCode::SaveRegister {
-                            reg: RU::rbp.into(),
-                            stack_offset: 0,
-                        }
-                    ),
-                    (
-                        5,
-                        UnwindCode::SetFramePointer {
-                            reg: RU::rbp.into(),
-                        }
-                    ),
-                    (9, UnwindCode::StackAlloc { size: 64 })
-                ],
-                epilogues_unwind_codes: vec![vec![
-                    (13, UnwindCode::StackDealloc { size: 64 }),
-                    (
-                        15,
-                        UnwindCode::RestoreRegister {
-                            reg: RU::rbp.into()
-                        }
-                    ),
-                    (15, UnwindCode::StackDealloc { size: 8 }),
-                    (15, UnwindCode::RestoreFramePointer)
-                ]],
-                function_size: 16,
-                word_size: 8,
-                initial_sp_offset: 8,
-            }
-        );
-    }
-
-    #[test]
-    fn test_medium_alloc() {
-        let isa = lookup_variant(triple!("x86_64"), BackendVariant::Legacy)
-            .expect("expect x86 ISA")
-            .finish(Flags::new(builder()));
-
-        let mut context = Context::for_function(create_function(
-            CallConv::WindowsFastcall,
-            Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 10000)),
-        ));
-
-        context.compile(&*isa).expect("expected compilation");
-
-        let unwind = create_unwind_info(&context.func, &*isa)
-            .expect("can create unwind info")
-            .expect("expected unwind info");
-
-        assert_eq!(
-            unwind,
-            UnwindInfo {
-                prologue_size: 27,
-                prologue_unwind_codes: vec![
-                    (2, UnwindCode::StackAlloc { size: 8 }),
-                    (
-                        2,
-                        UnwindCode::SaveRegister {
-                            reg: RU::rbp.into(),
-                            stack_offset: 0,
-                        }
-                    ),
-                    (
-                        5,
-                        UnwindCode::SetFramePointer {
-                            reg: RU::rbp.into(),
-                        }
-                    ),
-                    (27, UnwindCode::StackAlloc { size: 10000 })
-                ],
-                epilogues_unwind_codes: vec![vec![
-                    (34, UnwindCode::StackDealloc { size: 10000 }),
-                    (
-                        36,
-                        UnwindCode::RestoreRegister {
-                            reg: RU::rbp.into()
-                        }
-                    ),
-                    (36, UnwindCode::StackDealloc { size: 8 }),
-                    (36, UnwindCode::RestoreFramePointer)
-                ]],
-                function_size: 37,
-                word_size: 8,
-                initial_sp_offset: 8,
-            }
-        );
-    }
-
-    #[test]
-    fn test_large_alloc() {
-        let isa = lookup_variant(triple!("x86_64"), BackendVariant::Legacy)
-            .expect("expect x86 ISA")
-            .finish(Flags::new(builder()));
-
-        let mut context = Context::for_function(create_function(
-            CallConv::WindowsFastcall,
-            Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 1000000)),
-        ));
-
-        context.compile(&*isa).expect("expected compilation");
-
-        let unwind = create_unwind_info(&context.func, &*isa)
-            .expect("can create unwind info")
-            .expect("expected unwind info");
-
-        assert_eq!(
-            unwind,
-            UnwindInfo {
-                prologue_size: 27,
-                prologue_unwind_codes: vec![
-                    (2, UnwindCode::StackAlloc { size: 8 }),
-                    (
-                        2,
-                        UnwindCode::SaveRegister {
-                            reg: RU::rbp.into(),
-                            stack_offset: 0,
-                        }
-                    ),
-                    (
-                        5,
-                        UnwindCode::SetFramePointer {
-                            reg: RU::rbp.into(),
-                        }
-                    ),
-                    (27, UnwindCode::StackAlloc { size: 1000000 })
-                ],
-                epilogues_unwind_codes: vec![vec![
-                    (34, UnwindCode::StackDealloc { size: 1000000 }),
-                    (
-                        36,
-                        UnwindCode::RestoreRegister {
-                            reg: RU::rbp.into()
-                        }
-                    ),
-                    (36, UnwindCode::StackDealloc { size: 8 }),
-                    (36, UnwindCode::RestoreFramePointer)
-                ]],
-                function_size: 37,
-                word_size: 8,
-                initial_sp_offset: 8,
-            }
-        );
-    }
-
-    fn create_function(call_conv: CallConv, stack_slot: Option<StackSlotData>) -> Function {
-        let mut func =
-            Function::with_name_signature(ExternalName::user(0, 0), Signature::new(call_conv));
-
-        let block0 = func.dfg.make_block();
-        let mut pos = FuncCursor::new(&mut func);
-        pos.insert_block(block0);
-        pos.ins().return_(&[]);
-
-        if let Some(stack_slot) = stack_slot {
-            func.stack_slots.push(stack_slot);
-        }
-
-        func
-    }
-
-    #[test]
-    fn test_multi_return_func() {
-        let isa = lookup_variant(triple!("x86_64"), BackendVariant::Legacy)
-            .expect("expect x86 ISA")
-            .finish(Flags::new(builder()));
-
-        let mut context = Context::for_function(create_multi_return_function(CallConv::SystemV));
-
-        context.compile(&*isa).expect("expected compilation");
-
-        let unwind = create_unwind_info(&context.func, &*isa)
-            .expect("can create unwind info")
-            .expect("expected unwind info");
-
-        assert_eq!(
-            unwind,
-            UnwindInfo {
-                prologue_size: 5,
-                prologue_unwind_codes: vec![
-                    (2, UnwindCode::StackAlloc { size: 8 }),
-                    (
-                        2,
-                        UnwindCode::SaveRegister {
-                            reg: RU::rbp.into(),
-                            stack_offset: 0,
-                        }
-                    ),
-                    (
-                        5,
-                        UnwindCode::SetFramePointer {
-                            reg: RU::rbp.into()
-                        }
-                    )
-                ],
-                epilogues_unwind_codes: vec![
-                    vec![
-                        (12, UnwindCode::RememberState),
-                        (
-                            12,
-                            UnwindCode::RestoreRegister {
-                                reg: RU::rbp.into()
-                            }
-                        ),
-                        (12, UnwindCode::StackDealloc { size: 8 }),
-                        (12, UnwindCode::RestoreFramePointer),
-                        (13, UnwindCode::RestoreState)
-                    ],
-                    vec![
-                        (
-                            15,
-                            UnwindCode::RestoreRegister {
-                                reg: RU::rbp.into()
-                            }
-                        ),
-                        (15, UnwindCode::StackDealloc { size: 8 }),
-                        (15, UnwindCode::RestoreFramePointer)
-                    ]
-                ],
-                function_size: 16,
-                word_size: 8,
-                initial_sp_offset: 8,
-            }
-        );
-    }
-
-    fn create_multi_return_function(call_conv: CallConv) -> Function {
-        let mut sig = Signature::new(call_conv);
-        sig.params.push(AbiParam::new(types::I32));
-        let mut func = Function::with_name_signature(ExternalName::user(0, 0), sig);
-
-        let block0 = func.dfg.make_block();
-        let v0 = func.dfg.append_block_param(block0, types::I32);
-        let block1 = func.dfg.make_block();
-        let block2 = func.dfg.make_block();
-
-        let mut pos = FuncCursor::new(&mut func);
-        pos.insert_block(block0);
-        pos.ins().brnz(v0, block2, &[]);
-        pos.ins().jump(block1, &[]);
-
-        pos.insert_block(block1);
-        pos.ins().return_(&[]);
-
-        pos.insert_block(block2);
-        pos.ins().return_(&[]);
-
-        func
-    }
-}
diff --git a/cranelift/codegen/src/isa/legacy/x86/unwind/systemv.rs b/cranelift/codegen/src/isa/legacy/x86/unwind/systemv.rs
deleted file mode 100644
index 31fc64c9fb..0000000000
--- a/cranelift/codegen/src/isa/legacy/x86/unwind/systemv.rs
+++ /dev/null
@@ -1,235 +0,0 @@
-//! Unwind information for System V ABI (x86-64).
-
-use crate::ir::Function;
-use crate::isa::{
-    unwind::systemv::{RegisterMappingError, UnwindInfo},
-    RegUnit, TargetIsa,
-};
-use crate::result::CodegenResult;
-use gimli::{write::CommonInformationEntry, Encoding, Format, Register, X86_64};
-
-/// Creates a new x86-64 common information entry (CIE).
-pub fn create_cie() -> CommonInformationEntry {
-    use gimli::write::CallFrameInstruction;
-
-    let mut entry = CommonInformationEntry::new(
-        Encoding {
-            address_size: 8,
-            format: Format::Dwarf32,
-            version: 1,
-        },
-        1,  // Code alignment factor
-        -8, // Data alignment factor
-        X86_64::RA,
-    );
-
-    // Every frame will start with the call frame address (CFA) at RSP+8
-    // It is +8 to account for the push of the return address by the call instruction
-    entry.add_instruction(CallFrameInstruction::Cfa(X86_64::RSP, 8));
-
-    // Every frame will start with the return address at RSP (CFA-8 = RSP+8-8 = RSP)
-    entry.add_instruction(CallFrameInstruction::Offset(X86_64::RA, -8));
-
-    entry
-}
-
-/// Map Cranelift registers to their corresponding Gimli registers.
-pub fn map_reg(isa: &dyn TargetIsa, reg: RegUnit) -> Result<Register, RegisterMappingError> {
-    if isa.name() != "x86" || isa.pointer_bits() != 64 {
-        return Err(RegisterMappingError::UnsupportedArchitecture);
-    }
-
-    // Mapping from https://github.com/bytecodealliance/cranelift/pull/902 by @iximeow
-    const X86_GP_REG_MAP: [gimli::Register; 16] = [
-        X86_64::RAX,
-        X86_64::RCX,
-        X86_64::RDX,
-        X86_64::RBX,
-        X86_64::RSP,
-        X86_64::RBP,
-        X86_64::RSI,
-        X86_64::RDI,
-        X86_64::R8,
-        X86_64::R9,
-        X86_64::R10,
-        X86_64::R11,
-        X86_64::R12,
-        X86_64::R13,
-        X86_64::R14,
-        X86_64::R15,
-    ];
-    const X86_XMM_REG_MAP: [gimli::Register; 16] = [
-        X86_64::XMM0,
-        X86_64::XMM1,
-        X86_64::XMM2,
-        X86_64::XMM3,
-        X86_64::XMM4,
-        X86_64::XMM5,
-        X86_64::XMM6,
-        X86_64::XMM7,
-        X86_64::XMM8,
-        X86_64::XMM9,
-        X86_64::XMM10,
-        X86_64::XMM11,
-        X86_64::XMM12,
-        X86_64::XMM13,
-        X86_64::XMM14,
-        X86_64::XMM15,
-    ];
-
-    let reg_info = isa.register_info();
-    let bank = reg_info
-        .bank_containing_regunit(reg)
-        .ok_or_else(|| RegisterMappingError::MissingBank)?;
-    match bank.name {
-        "IntRegs" => {
-            // x86 GP registers have a weird mapping to DWARF registers, so we use a
-            // lookup table.
-            Ok(X86_GP_REG_MAP[(reg - bank.first_unit) as usize])
-        }
-        "FloatRegs" => Ok(X86_XMM_REG_MAP[(reg - bank.first_unit) as usize]),
-        _ => Err(RegisterMappingError::UnsupportedRegisterBank(bank.name)),
-    }
-}
-
-pub(crate) fn create_unwind_info(
-    func: &Function,
-    isa: &dyn TargetIsa,
-) -> CodegenResult<Option<UnwindInfo>> {
-    // Only System V-like calling conventions are supported
-    match isa.unwind_info_kind() {
-        crate::machinst::UnwindInfoKind::SystemV => {}
-        _ => return Ok(None),
-    }
-
-    if func.prologue_end.is_none() || isa.name() != "x86" || isa.pointer_bits() != 64 {
-        return Ok(None);
-    }
-
-    let unwind = match super::create_unwind_info(func, isa)? {
-        Some(u) => u,
-        None => {
-            return Ok(None);
-        }
-    };
-
-    struct RegisterMapper<'a, 'b>(&'a (dyn TargetIsa + 'b));
-    impl<'a, 'b> crate::isa::unwind::systemv::RegisterMapper<RegUnit> for RegisterMapper<'a, 'b> {
-        fn map(&self, reg: RegUnit) -> Result<u16, RegisterMappingError> {
-            Ok(map_reg(self.0, reg)?.0)
-        }
-        fn sp(&self) -> u16 {
-            X86_64::RSP.0
-        }
-        fn fp(&self) -> Option<u16> {
-            Some(X86_64::RBP.0)
-        }
-    }
-    let map = RegisterMapper(isa);
-
-    Ok(Some(UnwindInfo::build(unwind, &map)?))
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::cursor::{Cursor, FuncCursor};
-    use crate::ir::{
-        types, AbiParam, ExternalName, InstBuilder, Signature, StackSlotData, StackSlotKind,
-    };
-    use crate::isa::{lookup_variant, BackendVariant, CallConv};
-    use crate::settings::{builder, Flags};
-    use crate::Context;
-    use gimli::write::Address;
-    use std::str::FromStr;
-    use target_lexicon::triple;
-
-    #[test]
-    fn test_simple_func() {
-        let isa = lookup_variant(triple!("x86_64"), BackendVariant::Legacy)
-            .expect("expect x86 ISA")
-            .finish(Flags::new(builder()));
-
-        let mut context = Context::for_function(create_function(
-            CallConv::SystemV,
-            Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)),
-        ));
-
-        context.compile(&*isa).expect("expected compilation");
-
-        let fde = match isa
-            .create_unwind_info(&context.func)
-            .expect("can create unwind info")
-        {
-            Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => {
-                info.to_fde(Address::Constant(1234))
-            }
-            _ => panic!("expected unwind information"),
-        };
-
-        assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(1234), length: 16, lsda: None, instructions: [(2, CfaOffset(16)), (2, Offset(Register(6), -16)), (5, CfaRegister(Register(6))), (15, SameValue(Register(6))), (15, Cfa(Register(7), 8))] }");
-    }
-
-    fn create_function(call_conv: CallConv, stack_slot: Option<StackSlotData>) -> Function {
-        let mut func =
-            Function::with_name_signature(ExternalName::user(0, 0), Signature::new(call_conv));
-
-        let block0 = func.dfg.make_block();
-        let mut pos = FuncCursor::new(&mut func);
-        pos.insert_block(block0);
-        pos.ins().return_(&[]);
-
-        if let Some(stack_slot) = stack_slot {
-            func.stack_slots.push(stack_slot);
-        }
-
-        func
-    }
-
-    #[test]
-    fn test_multi_return_func() {
-        let isa = lookup_variant(triple!("x86_64"), BackendVariant::Legacy)
-            .expect("expect x86 ISA")
-            .finish(Flags::new(builder()));
-
-        let mut context = Context::for_function(create_multi_return_function(CallConv::SystemV));
-
-        context.compile(&*isa).expect("expected compilation");
-
-        let fde = match isa
-            .create_unwind_info(&context.func)
-            .expect("can create unwind info")
-        {
-            Some(crate::isa::unwind::UnwindInfo::SystemV(info)) => {
-                info.to_fde(Address::Constant(4321))
-            }
-            _ => panic!("expected unwind information"),
-        };
-
-        assert_eq!(format!("{:?}", fde), "FrameDescriptionEntry { address: Constant(4321), length: 16, lsda: None, instructions: [(2, CfaOffset(16)), (2, Offset(Register(6), -16)), (5, CfaRegister(Register(6))), (12, RememberState), (12, SameValue(Register(6))), (12, Cfa(Register(7), 8)), (13, RestoreState), (15, SameValue(Register(6))), (15, Cfa(Register(7), 8))] }");
-    }
-
-    fn create_multi_return_function(call_conv: CallConv) -> Function {
-        let mut sig = Signature::new(call_conv);
-        sig.params.push(AbiParam::new(types::I32));
-        let mut func = Function::with_name_signature(ExternalName::user(0, 0), sig);
-
-        let block0 = func.dfg.make_block();
-        let v0 = func.dfg.append_block_param(block0, types::I32);
-        let block1 = func.dfg.make_block();
-        let block2 = func.dfg.make_block();
-
-        let mut pos = FuncCursor::new(&mut func);
-        pos.insert_block(block0);
-        pos.ins().brnz(v0, block2, &[]);
-        pos.ins().jump(block1, &[]);
-
-        pos.insert_block(block1);
-        pos.ins().return_(&[]);
-
-        pos.insert_block(block2);
-        pos.ins().return_(&[]);
-
-        func
-    }
-}
diff --git a/cranelift/codegen/src/isa/legacy/x86/unwind/winx64.rs b/cranelift/codegen/src/isa/legacy/x86/unwind/winx64.rs
deleted file mode 100644
index 33e5463bb8..0000000000
--- a/cranelift/codegen/src/isa/legacy/x86/unwind/winx64.rs
+++ /dev/null
@@ -1,265 +0,0 @@
-//! Unwind information for Windows x64 ABI.
-
-use crate::ir::Function;
-use crate::isa::x86::registers::{FPR, GPR};
-use crate::isa::{unwind::winx64::UnwindInfo, RegUnit, TargetIsa};
-use crate::result::CodegenResult;
-
-pub(crate) fn create_unwind_info(
-    func: &Function,
-    isa: &dyn TargetIsa,
-) -> CodegenResult<Option<UnwindInfo>> {
-    // Only Windows fastcall is supported for unwind information
-    if !func.signature.call_conv.extends_windows_fastcall() || func.prologue_end.is_none() {
-        return Ok(None);
-    }
-
-    let unwind = match super::create_unwind_info(func, isa)? {
-        Some(u) => u,
-        None => {
-            return Ok(None);
-        }
-    };
-
-    Ok(Some(UnwindInfo::build::<RegUnit, RegisterMapper>(unwind)?))
-}
-
-struct RegisterMapper;
-
-impl crate::isa::unwind::winx64::RegisterMapper<RegUnit> for RegisterMapper {
-    fn map(reg: RegUnit) -> crate::isa::unwind::winx64::MappedRegister {
-        use crate::isa::unwind::winx64::MappedRegister;
-        if GPR.contains(reg) {
-            MappedRegister::Int(GPR.index_of(reg) as u8)
-        } else if FPR.contains(reg) {
-            MappedRegister::Xmm(reg as u8)
-        } else {
-            panic!()
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::cursor::{Cursor, FuncCursor};
-    use crate::ir::{ExternalName, InstBuilder, Signature, StackSlotData, StackSlotKind};
-    use crate::isa::unwind::winx64::UnwindCode;
-    use crate::isa::x86::registers::RU;
-    use crate::isa::{lookup_variant, BackendVariant, CallConv};
-    use crate::settings::{builder, Flags};
-    use crate::Context;
-    use std::str::FromStr;
-    use target_lexicon::triple;
-
-    #[test]
-    fn test_wrong_calling_convention() {
-        let isa = lookup_variant(triple!("x86_64"), BackendVariant::Legacy)
-            .expect("expect x86 ISA")
-            .finish(Flags::new(builder()));
-
-        let mut context = Context::for_function(create_function(CallConv::SystemV, None));
-
-        context.compile(&*isa).expect("expected compilation");
-
-        assert_eq!(
-            create_unwind_info(&context.func, &*isa).expect("can create unwind info"),
-            None
-        );
-    }
-
-    #[test]
-    fn test_small_alloc() {
-        let isa = lookup_variant(triple!("x86_64"), BackendVariant::Legacy)
-            .expect("expect x86 ISA")
-            .finish(Flags::new(builder()));
-
-        let mut context = Context::for_function(create_function(
-            CallConv::WindowsFastcall,
-            Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 64)),
-        ));
-
-        context.compile(&*isa).expect("expected compilation");
-
-        let unwind = create_unwind_info(&context.func, &*isa)
-            .expect("can create unwind info")
-            .expect("expected unwind info");
-
-        assert_eq!(
-            unwind,
-            UnwindInfo {
-                flags: 0,
-                prologue_size: 9,
-                frame_register: None,
-                frame_register_offset: 0,
-                unwind_codes: vec![
-                    UnwindCode::PushRegister {
-                        instruction_offset: 2,
-                        reg: GPR.index_of(RU::rbp.into()) as u8
-                    },
-                    UnwindCode::StackAlloc {
-                        instruction_offset: 9,
-                        size: 64
-                    }
-                ]
-            }
-        );
-
-        assert_eq!(unwind.emit_size(), 8);
-
-        let mut buf = [0u8; 8];
-        unwind.emit(&mut buf);
-
-        assert_eq!(
-            buf,
-            [
-                0x01, // Version and flags (version 1, no flags)
-                0x09, // Prologue size
-                0x02, // Unwind code count (1 for stack alloc, 1 for push reg)
-                0x00, // Frame register + offset (no frame register)
-                0x09, // Prolog offset
-                0x72, // Operation 2 (small stack alloc), size = 0xB slots (e.g. (0x7 * 8) + 8 = 64 bytes)
-                0x02, // Prolog offset
-                0x50, // Operation 0 (save nonvolatile register), reg = 5 (RBP)
-            ]
-        );
-    }
-
-    #[test]
-    fn test_medium_alloc() {
-        let isa = lookup_variant(triple!("x86_64"), BackendVariant::Legacy)
-            .expect("expect x86 ISA")
-            .finish(Flags::new(builder()));
-
-        let mut context = Context::for_function(create_function(
-            CallConv::WindowsFastcall,
-            Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 10000)),
-        ));
-
-        context.compile(&*isa).expect("expected compilation");
-
-        let unwind = create_unwind_info(&context.func, &*isa)
-            .expect("can create unwind info")
-            .expect("expected unwind info");
-
-        assert_eq!(
-            unwind,
-            UnwindInfo {
-                flags: 0,
-                prologue_size: 27,
-                frame_register: None,
-                frame_register_offset: 0,
-                unwind_codes: vec![
-                    UnwindCode::PushRegister {
-                        instruction_offset: 2,
-                        reg: GPR.index_of(RU::rbp.into()) as u8
-                    },
-                    UnwindCode::StackAlloc {
-                        instruction_offset: 27,
-                        size: 10000
-                    }
-                ]
-            }
-        );
-
-        assert_eq!(unwind.emit_size(), 12);
-
-        let mut buf = [0u8; 12];
-        unwind.emit(&mut buf);
-
-        assert_eq!(
-            buf,
-            [
-                0x01, // Version and flags (version 1, no flags)
-                0x1B, // Prologue size
-                0x03, // Unwind code count (2 for stack alloc, 1 for push reg)
-                0x00, // Frame register + offset (no frame register)
-                0x1B, // Prolog offset
-                0x01, // Operation 1 (large stack alloc), size is scaled 16-bits (info = 0)
-                0xE2, // Low size byte
-                0x04, // High size byte (e.g. 0x04E2 * 8 = 10000 bytes)
-                0x02, // Prolog offset
-                0x50, // Operation 0 (push nonvolatile register), reg = 5 (RBP)
-                0x00, // Padding
-                0x00, // Padding
-            ]
-        );
-    }
-
-    #[test]
-    fn test_large_alloc() {
-        let isa = lookup_variant(triple!("x86_64"), BackendVariant::Legacy)
-            .expect("expect x86 ISA")
-            .finish(Flags::new(builder()));
-
-        let mut context = Context::for_function(create_function(
-            CallConv::WindowsFastcall,
-            Some(StackSlotData::new(StackSlotKind::ExplicitSlot, 1000000)),
-        ));
-
-        context.compile(&*isa).expect("expected compilation");
-
-        let unwind = create_unwind_info(&context.func, &*isa)
-            .expect("can create unwind info")
-            .expect("expected unwind info");
-
-        assert_eq!(
-            unwind,
-            UnwindInfo {
-                flags: 0,
-                prologue_size: 27,
-                frame_register: None,
-                frame_register_offset: 0,
-                unwind_codes: vec![
-                    UnwindCode::PushRegister {
-                        instruction_offset: 2,
-                        reg: GPR.index_of(RU::rbp.into()) as u8
-                    },
-                    UnwindCode::StackAlloc {
-                        instruction_offset: 27,
-                        size: 1000000
-                    }
-                ]
-            }
-        );
-
-        assert_eq!(unwind.emit_size(), 12);
-
-        let mut buf = [0u8; 12];
-        unwind.emit(&mut buf);
-
-        assert_eq!(
-            buf,
-            [
-                0x01, // Version and flags (version 1, no flags)
-                0x1B, // Prologue size
-                0x04, // Unwind code count (3 for stack alloc, 1 for push reg)
-                0x00, // Frame register + offset (no frame register)
-                0x1B, // Prolog offset
-                0x11, // Operation 1 (large stack alloc), size is unscaled 32-bits (info = 1)
-                0x40, // Byte 1 of size
-                0x42, // Byte 2 of size
-                0x0F, // Byte 3 of size
-                0x00, // Byte 4 of size (size is 0xF4240 = 1000000 bytes)
-                0x02, // Prolog offset
-                0x50, // Operation 0 (push nonvolatile register), reg = 5 (RBP)
-            ]
-        );
-    }
-
-    fn create_function(call_conv: CallConv, stack_slot: Option<StackSlotData>) -> Function {
-        let mut func =
-            Function::with_name_signature(ExternalName::user(0, 0), Signature::new(call_conv));
-
-        let block0 = func.dfg.make_block();
-        let mut pos = FuncCursor::new(&mut func);
-        pos.insert_block(block0);
-        pos.ins().return_(&[]);
-
-        if let Some(stack_slot) = stack_slot {
-            func.stack_slots.push(stack_slot);
-        }
-
-        func
-    }
-}
diff --git a/cranelift/codegen/src/isa/mod.rs b/cranelift/codegen/src/isa/mod.rs
index 3331534c49..92edfd744f 100644
--- a/cranelift/codegen/src/isa/mod.rs
+++ b/cranelift/codegen/src/isa/mod.rs
@@ -84,12 +84,9 @@ pub(crate) mod aarch64;
 #[cfg(feature = "s390x")]
 mod s390x;
 
-#[cfg(any(feature = "x86", feature = "riscv"))]
+#[cfg(feature = "riscv")]
 mod legacy;
 
-#[cfg(feature = "x86")]
-use legacy::x86;
-
 #[cfg(feature = "riscv")]
 use legacy::riscv;
 
@@ -120,49 +117,19 @@ macro_rules! isa_builder {
     }};
 }
 
-/// The "variant" for a given target. On one platform (x86-64), we have two
-/// backends, the "old" and "new" one; the new one is the default if included
-/// in the build configuration and not otherwise specified.
-#[derive(Clone, Copy, Debug)]
-pub enum BackendVariant {
-    /// Any backend available.
-    Any,
-    /// A "legacy" backend: one that operates using legalizations and encodings.
-    Legacy,
-    /// A backend built on `MachInst`s and the `VCode` framework.
-    MachInst,
-}
-
-impl Default for BackendVariant {
-    fn default() -> Self {
-        BackendVariant::Any
-    }
-}
-
 /// Look for an ISA for the given `triple`, selecting the backend variant given
 /// by `variant` if available.
-pub fn lookup_variant(triple: Triple, variant: BackendVariant) -> Result<Builder, LookupError> {
-    match (triple.architecture, variant) {
-        (Architecture::Riscv32 { .. }, _) | (Architecture::Riscv64 { .. }, _) => {
+pub fn lookup_variant(triple: Triple) -> Result<Builder, LookupError> {
+    match triple.architecture {
+        Architecture::Riscv32 { .. } | Architecture::Riscv64 { .. } => {
             isa_builder!(riscv, (feature = "riscv"), triple)
         }
-        (Architecture::X86_64, BackendVariant::Legacy) => {
-            isa_builder!(x86, (feature = "x86"), triple)
-        }
-        (Architecture::X86_64, BackendVariant::MachInst) => {
+        Architecture::X86_64 => {
             isa_builder!(x64, (feature = "x86"), triple)
         }
-        #[cfg(not(feature = "old-x86-backend"))]
-        (Architecture::X86_64, BackendVariant::Any) => {
-            isa_builder!(x64, (feature = "x86"), triple)
-        }
-        #[cfg(feature = "old-x86-backend")]
-        (Architecture::X86_64, BackendVariant::Any) => {
-            isa_builder!(x86, (feature = "x86"), triple)
-        }
-        (Architecture::Arm { .. }, _) => isa_builder!(arm32, (feature = "arm32"), triple),
-        (Architecture::Aarch64 { .. }, _) => isa_builder!(aarch64, (feature = "arm64"), triple),
-        (Architecture::S390x { .. }, _) => isa_builder!(s390x, (feature = "s390x"), triple),
+        Architecture::Arm { .. } => isa_builder!(arm32, (feature = "arm32"), triple),
+        Architecture::Aarch64 { .. } => isa_builder!(aarch64, (feature = "arm64"), triple),
+        Architecture::S390x { .. } => isa_builder!(s390x, (feature = "s390x"), triple),
         _ => Err(LookupError::Unsupported),
     }
 }
@@ -170,7 +137,7 @@ pub fn lookup_variant(triple: Triple, variant: BackendVariant) -> Result<Builder
 /// Look for an ISA for the given `triple`.
 /// Return a builder that can create a corresponding `TargetIsa`.
 pub fn lookup(triple: Triple) -> Result<Builder, LookupError> {
-    lookup_variant(triple, BackendVariant::Any)
+    lookup_variant(triple)
 }
 
 /// Look for a supported ISA with the given `name`.
@@ -292,11 +259,6 @@ pub trait TargetIsa: fmt::Display + Send + Sync {
     /// Get the ISA-dependent flag values that were used to make this trait object.
     fn isa_flags(&self) -> Vec<settings::Value>;
 
-    /// Get the variant of this ISA (Legacy or MachInst).
-    fn variant(&self) -> BackendVariant {
-        BackendVariant::Legacy
-    }
-
     /// Hashes all flags, both ISA-independent and ISA-specific, into the
     /// specified hasher.
     fn hash_all_flags(&self, hasher: &mut dyn Hasher);
diff --git a/cranelift/codegen/src/isa/s390x/lower.rs b/cranelift/codegen/src/isa/s390x/lower.rs
index cd0db1ae34..c100e36031 100644
--- a/cranelift/codegen/src/isa/s390x/lower.rs
+++ b/cranelift/codegen/src/isa/s390x/lower.rs
@@ -2962,45 +2962,6 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
         | Opcode::IfcmpImm => {
             panic!("ALU+imm and ALU+carry ops should not appear here!");
         }
-
-        #[cfg(feature = "x86")]
-        Opcode::X86Udivmodx
-        | Opcode::X86Sdivmodx
-        | Opcode::X86Umulx
-        | Opcode::X86Smulx
-        | Opcode::X86Cvtt2si
-        | Opcode::X86Fmin
-        | Opcode::X86Fmax
-        | Opcode::X86Push
-        | Opcode::X86Pop
-        | Opcode::X86Bsr
-        | Opcode::X86Bsf
-        | Opcode::X86Pblendw
-        | Opcode::X86Pshufd
-        | Opcode::X86Pshufb
-        | Opcode::X86Pextr
-        | Opcode::X86Pinsr
-        | Opcode::X86Insertps
-        | Opcode::X86Movsd
-        | Opcode::X86Movlhps
-        | Opcode::X86Psll
-        | Opcode::X86Psrl
-        | Opcode::X86Psra
-        | Opcode::X86Ptest
-        | Opcode::X86Pmaxs
-        | Opcode::X86Pmaxu
-        | Opcode::X86Pmins
-        | Opcode::X86Pminu
-        | Opcode::X86Pmullq
-        | Opcode::X86Pmuludq
-        | Opcode::X86Punpckh
-        | Opcode::X86Punpckl
-        | Opcode::X86Vcvtudq2ps
-        | Opcode::X86Palignr
-        | Opcode::X86ElfTlsGetAddr
-        | Opcode::X86MachoTlsGetAddr => {
-            panic!("x86-specific opcode in supposedly arch-neutral IR!");
-        }
     }
 
     Ok(())
diff --git a/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs b/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs
index 9115db0671..7119d5b260 100644
--- a/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs
+++ b/cranelift/codegen/src/isa/x64/inst/unwind/systemv.rs
@@ -109,7 +109,6 @@ mod tests {
     use target_lexicon::triple;
 
     #[test]
-    #[cfg_attr(feature = "old-x86-backend", ignore)]
     fn test_simple_func() {
         let isa = lookup(triple!("x86_64"))
             .expect("expect x86 ISA")
@@ -152,7 +151,6 @@ mod tests {
     }
 
     #[test]
-    #[cfg_attr(feature = "old-x86-backend", ignore)]
     fn test_multi_return_func() {
         let isa = lookup(triple!("x86_64"))
             .expect("expect x86 ISA")
diff --git a/cranelift/codegen/src/isa/x64/lower.rs b/cranelift/codegen/src/isa/x64/lower.rs
index da29a04314..9b51a27b07 100644
--- a/cranelift/codegen/src/isa/x64/lower.rs
+++ b/cranelift/codegen/src/isa/x64/lower.rs
@@ -6900,44 +6900,6 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
             panic!("Branch opcode reached non-branch lowering logic!");
         }
 
-        Opcode::X86Udivmodx
-        | Opcode::X86Sdivmodx
-        | Opcode::X86Umulx
-        | Opcode::X86Smulx
-        | Opcode::X86Cvtt2si
-        | Opcode::X86Fmin
-        | Opcode::X86Fmax
-        | Opcode::X86Push
-        | Opcode::X86Pop
-        | Opcode::X86Bsr
-        | Opcode::X86Bsf
-        | Opcode::X86Pblendw
-        | Opcode::X86Pshufd
-        | Opcode::X86Pshufb
-        | Opcode::X86Pextr
-        | Opcode::X86Pinsr
-        | Opcode::X86Insertps
-        | Opcode::X86Movsd
-        | Opcode::X86Movlhps
-        | Opcode::X86Palignr
-        | Opcode::X86Psll
-        | Opcode::X86Psrl
-        | Opcode::X86Psra
-        | Opcode::X86Ptest
-        | Opcode::X86Pmaxs
-        | Opcode::X86Pmaxu
-        | Opcode::X86Pmins
-        | Opcode::X86Pminu
-        | Opcode::X86Pmullq
-        | Opcode::X86Pmuludq
-        | Opcode::X86Punpckh
-        | Opcode::X86Punpckl
-        | Opcode::X86Vcvtudq2ps
-        | Opcode::X86ElfTlsGetAddr
-        | Opcode::X86MachoTlsGetAddr => {
-            panic!("x86-specific opcode in supposedly arch-neutral IR!");
-        }
-
         Opcode::Nop => {
             // Nothing.
         }
diff --git a/cranelift/codegen/src/legalizer/mod.rs b/cranelift/codegen/src/legalizer/mod.rs
index 3b794a1e25..ee89ee35f0 100644
--- a/cranelift/codegen/src/legalizer/mod.rs
+++ b/cranelift/codegen/src/legalizer/mod.rs
@@ -21,9 +21,9 @@ use crate::ir::types::{I32, I64};
 use crate::ir::{self, InstBuilder, MemFlags};
 use crate::isa::TargetIsa;
 
-#[cfg(any(feature = "x86", feature = "riscv"))]
+#[cfg(feature = "riscv")]
 use crate::predicates;
-#[cfg(any(feature = "x86", feature = "riscv"))]
+#[cfg(feature = "riscv")]
 use alloc::vec::Vec;
 
 use crate::timing;
diff --git a/cranelift/codegen/src/machinst/adapter.rs b/cranelift/codegen/src/machinst/adapter.rs
index c0bc76417f..b60bf8300a 100644
--- a/cranelift/codegen/src/machinst/adapter.rs
+++ b/cranelift/codegen/src/machinst/adapter.rs
@@ -3,7 +3,7 @@
 use crate::binemit;
 use crate::ir;
 use crate::isa::{
-    BackendVariant, EncInfo, Encoding, Encodings, Legalize, RegClass, RegInfo, TargetIsa,
+    EncInfo, Encoding, Encodings, Legalize, RegClass, RegInfo, TargetIsa,
 };
 use crate::machinst::*;
 use crate::regalloc::RegisterSet;
@@ -64,10 +64,6 @@ impl TargetIsa for TargetIsaAdapter {
         self.backend.isa_flags()
     }
 
-    fn variant(&self) -> BackendVariant {
-        BackendVariant::MachInst
-    }
-
     fn hash_all_flags(&self, hasher: &mut dyn Hasher) {
         self.backend.hash_all_flags(hasher);
     }
diff --git a/cranelift/filetests/filetests/isa/x64/amode-opt.clif b/cranelift/filetests/filetests/isa/x64/amode-opt.clif
index f5789b67ee..902df05a08 100644
--- a/cranelift/filetests/filetests/isa/x64/amode-opt.clif
+++ b/cranelift/filetests/filetests/isa/x64/amode-opt.clif
@@ -1,5 +1,5 @@
 test compile
-target x86_64 machinst
+target x86_64
 
 function %amode_add(i64, i64) -> i64 {
 block0(v0: i64, v1: i64):
diff --git a/cranelift/filetests/filetests/isa/x64/b1.clif b/cranelift/filetests/filetests/isa/x64/b1.clif
index cbd265a9ea..64049860dd 100644
--- a/cranelift/filetests/filetests/isa/x64/b1.clif
+++ b/cranelift/filetests/filetests/isa/x64/b1.clif
@@ -1,5 +1,5 @@
 test compile
-target x86_64 machinst
+target x86_64
 
 function %f0(b1, i32, i32) -> i32 {
 ; check:  pushq   %rbp
diff --git a/cranelift/filetests/filetests/isa/x64/basic.clif b/cranelift/filetests/filetests/isa/x64/basic.clif
index 8b43d70c7c..88b605d3d7 100644
--- a/cranelift/filetests/filetests/isa/x64/basic.clif
+++ b/cranelift/filetests/filetests/isa/x64/basic.clif
@@ -1,5 +1,5 @@
 test compile
-target x86_64 machinst
+target x86_64
 
 function %f(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
diff --git a/cranelift/filetests/filetests/isa/x64/branches.clif b/cranelift/filetests/filetests/isa/x64/branches.clif
index 99aec088ac..edcf36ec1a 100644
--- a/cranelift/filetests/filetests/isa/x64/branches.clif
+++ b/cranelift/filetests/filetests/isa/x64/branches.clif
@@ -1,5 +1,5 @@
 test compile
-target x86_64 machinst
+target x86_64
 
 function %f0(i32, i32) -> i32 {
 block0(v0: i32, v1: i32):
diff --git a/cranelift/filetests/filetests/isa/x64/call-conv.clif b/cranelift/filetests/filetests/isa/x64/call-conv.clif
index 8f619e2aa7..e2f0d8e81a 100644
--- a/cranelift/filetests/filetests/isa/x64/call-conv.clif
+++ b/cranelift/filetests/filetests/isa/x64/call-conv.clif
@@ -1,5 +1,5 @@
 test compile
-target x86_64 machinst
+target x86_64
 
 ;; system_v has first param in %rdi, fascall in %rcx
 function %one_arg(i32) system_v {
diff --git a/cranelift/filetests/filetests/isa/x64/clz-lzcnt.clif b/cranelift/filetests/filetests/isa/x64/clz-lzcnt.clif
index f36caed88a..627e4700bb 100644
--- a/cranelift/filetests/filetests/isa/x64/clz-lzcnt.clif
+++ b/cranelift/filetests/filetests/isa/x64/clz-lzcnt.clif
@@ -1,5 +1,5 @@
 test compile
-target x86_64 machinst has_lzcnt
+target x86_64 has_lzcnt
 
 function %clz(i64) -> i64 {
 block0(v0: i64):
diff --git a/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif b/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif
index 16c788ed84..853fd0d5f9 100644
--- a/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif
+++ b/cranelift/filetests/filetests/isa/x64/cmp-mem-bug.clif
@@ -1,5 +1,5 @@
 test compile
-target x86_64 machinst
+target x86_64
 
 function %f0(i64, i64) -> i64, i64 {
 block0(v0: i64, v1: i64):
diff --git a/cranelift/filetests/filetests/isa/x64/ctz-bmi1.clif b/cranelift/filetests/filetests/isa/x64/ctz-bmi1.clif
index 5931451e11..42f82c653f 100644
--- a/cranelift/filetests/filetests/isa/x64/ctz-bmi1.clif
+++ b/cranelift/filetests/filetests/isa/x64/ctz-bmi1.clif
@@ -1,5 +1,5 @@
 test compile
-target x86_64 machinst has_bmi1
+target x86_64 has_bmi1
 
 function %ctz(i64) -> i64 {
 block0(v0: i64):
diff --git a/cranelift/filetests/filetests/isa/x64/div-checks.clif b/cranelift/filetests/filetests/isa/x64/div-checks.clif
index 3984aba42f..5a366914a2 100644
--- a/cranelift/filetests/filetests/isa/x64/div-checks.clif
+++ b/cranelift/filetests/filetests/isa/x64/div-checks.clif
@@ -1,6 +1,6 @@
 test compile
 set avoid_div_traps=false
-target x86_64 machinst
+target x86_64
 
 ;; We should get the checked-div/rem sequence (`srem` pseudoinst below) even
 ;; when `avoid_div_traps` above is false (i.e. even when the host is normally
diff --git a/cranelift/filetests/filetests/isa/x64/fastcall.clif b/cranelift/filetests/filetests/isa/x64/fastcall.clif
index fc8d3a801d..99397044d9 100644
--- a/cranelift/filetests/filetests/isa/x64/fastcall.clif
+++ b/cranelift/filetests/filetests/isa/x64/fastcall.clif
@@ -1,7 +1,7 @@
 test compile
 set enable_llvm_abi_extensions=true
 set unwind_info=true
-target x86_64 machinst
+target x86_64
 
 function %f0(i64, i64, i64, i64) -> i64 windows_fastcall {
 block0(v0: i64, v1: i64, v2: i64, v3: i64):
@@ -206,7 +206,7 @@ block0(v0: i64):
   v18 = load.f64 v0+136
   v19 = load.f64 v0+144
   v20 = load.f64 v0+152
-  
+
   v21 = fadd.f64 v1, v2
   v22 = fadd.f64 v3, v4
   v23 = fadd.f64 v5, v6
diff --git a/cranelift/filetests/filetests/isa/x64/floating-point.clif b/cranelift/filetests/filetests/isa/x64/floating-point.clif
index c1e30a3b19..2f2552aec1 100644
--- a/cranelift/filetests/filetests/isa/x64/floating-point.clif
+++ b/cranelift/filetests/filetests/isa/x64/floating-point.clif
@@ -1,5 +1,5 @@
 test compile
-target x86_64 machinst
+target x86_64
 
 function %f(f64) -> f64 {
 block0(v0: f64):
diff --git a/cranelift/filetests/filetests/isa/x64/heap.clif b/cranelift/filetests/filetests/isa/x64/heap.clif
index 2c77bc7ec2..b8c27f422e 100644
--- a/cranelift/filetests/filetests/isa/x64/heap.clif
+++ b/cranelift/filetests/filetests/isa/x64/heap.clif
@@ -1,5 +1,5 @@
 test compile
-target x86_64 machinst
+target x86_64
 
 function %f(i32, i64 vmctx) -> i64 {
     gv0 = vmctx
diff --git a/cranelift/filetests/filetests/isa/x64/i128.clif b/cranelift/filetests/filetests/isa/x64/i128.clif
index 61783e366d..75013a8170 100644
--- a/cranelift/filetests/filetests/isa/x64/i128.clif
+++ b/cranelift/filetests/filetests/isa/x64/i128.clif
@@ -1,6 +1,6 @@
 test compile
 set enable_llvm_abi_extensions=true
-target x86_64 machinst
+target x86_64
 
 function %f0(i128, i128) -> i128 {
 ; check:   pushq   %rbp
@@ -190,7 +190,7 @@ block0(v0: i128, v1: i128):
 ; nextln: orq     %rax, %r8
 ; nextln: andq    $$1, %r8
 ; nextln: setnz   %r8b
- 
+
     v4 = icmp slt v0, v1
 ; check:  cmpq    %rcx, %rsi
 ; nextln: setl    %r9b
@@ -201,7 +201,7 @@ block0(v0: i128, v1: i128):
 ; nextln: orq     %r9, %r10
 ; nextln: andq    $$1, %r10
 ; nextln: setnz   %r9b
- 
+
     v5 = icmp sle v0, v1
 ; check:  cmpq    %rcx, %rsi
 ; nextln: setl    %r10b
@@ -212,7 +212,7 @@ block0(v0: i128, v1: i128):
 ; nextln: orq     %r10, %r11
 ; nextln: andq    $$1, %r11
 ; nextln: setnz   %r10b
- 
+
     v6 = icmp sgt v0, v1
 ; check:  cmpq    %rcx, %rsi
 ; nextln: setnle  %r11b
@@ -307,7 +307,7 @@ block0(v0: i128):
 ; nextln: setz    %sil
 ; nextln: andb    %dil, %sil
 ; nextln: jnz     label1; j label2
- 
+
     jump block2
 
 block1:
@@ -725,7 +725,7 @@ block2(v6: i128):
 ; nextln: movq    %rbp, %rsp
 ; nextln: popq    %rbp
 ; nextln: ret
- 
+
 }
 
 function %f24(i128, i128, i64, i128, i128, i128) -> i128 {
@@ -1106,4 +1106,4 @@ block0(v0: i128, v1: i128):
 ; nextln: movq    %rcx, %rdx
 ; nextln: movq    %rbp, %rsp
 ; nextln: popq    %rbp
-; nextln: ret
\ No newline at end of file
+; nextln: ret
diff --git a/cranelift/filetests/filetests/isa/x64/load-op.clif b/cranelift/filetests/filetests/isa/x64/load-op.clif
index a4069b20ca..85c26dec3e 100644
--- a/cranelift/filetests/filetests/isa/x64/load-op.clif
+++ b/cranelift/filetests/filetests/isa/x64/load-op.clif
@@ -1,5 +1,5 @@
 test compile
-target x86_64 machinst
+target x86_64
 
 function %add_from_mem_u32_1(i64, i32) -> i32 {
 block0(v0: i64, v1: i32):
diff --git a/cranelift/filetests/filetests/isa/x64/move-elision.clif b/cranelift/filetests/filetests/isa/x64/move-elision.clif
index 5b23afb8d3..533eb5341b 100644
--- a/cranelift/filetests/filetests/isa/x64/move-elision.clif
+++ b/cranelift/filetests/filetests/isa/x64/move-elision.clif
@@ -1,6 +1,6 @@
 test compile
 set enable_simd
-target x86_64 machinst skylake
+target x86_64 skylake
 
 function %move_registers(i32x4) -> b8x16 {
 block0(v0: i32x4):
diff --git a/cranelift/filetests/filetests/isa/x64/popcnt-use-popcnt.clif b/cranelift/filetests/filetests/isa/x64/popcnt-use-popcnt.clif
index 2049f53962..8326e681b0 100644
--- a/cranelift/filetests/filetests/isa/x64/popcnt-use-popcnt.clif
+++ b/cranelift/filetests/filetests/isa/x64/popcnt-use-popcnt.clif
@@ -1,5 +1,5 @@
 test compile
-target x86_64 machinst has_popcnt has_sse42
+target x86_64 has_popcnt has_sse42
 
 function %popcnt(i64) -> i64 {
 block0(v0: i64):
diff --git a/cranelift/filetests/filetests/isa/x64/popcnt.clif b/cranelift/filetests/filetests/isa/x64/popcnt.clif
index df68f6b4b7..e3f8d0c49d 100644
--- a/cranelift/filetests/filetests/isa/x64/popcnt.clif
+++ b/cranelift/filetests/filetests/isa/x64/popcnt.clif
@@ -1,5 +1,5 @@
 test compile
-target x86_64 machinst
+target x86_64
 
 function %popcnt64(i64) -> i64 {
 block0(v0: i64):
diff --git a/cranelift/filetests/filetests/isa/x64/probestack.clif b/cranelift/filetests/filetests/isa/x64/probestack.clif
index 8e8b424c70..40944a797c 100644
--- a/cranelift/filetests/filetests/isa/x64/probestack.clif
+++ b/cranelift/filetests/filetests/isa/x64/probestack.clif
@@ -1,6 +1,6 @@
 test compile
 set enable_probestack=true
-target x86_64 machinst
+target x86_64
 
 function %f1() -> i64 {
 ss0 = explicit_slot 100000
diff --git a/cranelift/filetests/filetests/isa/x64/select-i128.clif b/cranelift/filetests/filetests/isa/x64/select-i128.clif
index af6996f85f..41c8a67fb1 100644
--- a/cranelift/filetests/filetests/isa/x64/select-i128.clif
+++ b/cranelift/filetests/filetests/isa/x64/select-i128.clif
@@ -1,6 +1,6 @@
 test compile
 set enable_llvm_abi_extensions=true
-target x86_64 machinst
+target x86_64
 
 function %f0(i32, i128, i128) -> i128 {
 ; check:   pushq   %rbp
@@ -24,6 +24,6 @@ block0(v0: i32, v1: i128, v2: i128):
 ; nextln: movq    %rbp, %rsp
 ; nextln: popq    %rbp
 ; nextln: ret
- 
+
 }
 
diff --git a/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif b/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif
index 52761b1ed0..b7251f9fe1 100644
--- a/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif
+++ b/cranelift/filetests/filetests/isa/x64/simd-bitwise-compile.clif
@@ -1,6 +1,6 @@
 test compile
 set enable_simd
-target x86_64 machinst skylake
+target x86_64 skylake
 
 function %bitselect_i16x8() -> i16x8 {
 block0:
diff --git a/cranelift/filetests/filetests/isa/x64/simd-comparison-legalize.clif b/cranelift/filetests/filetests/isa/x64/simd-comparison-legalize.clif
index b50ff6328d..7e3dee77f0 100644
--- a/cranelift/filetests/filetests/isa/x64/simd-comparison-legalize.clif
+++ b/cranelift/filetests/filetests/isa/x64/simd-comparison-legalize.clif
@@ -1,6 +1,6 @@
 test compile
 set enable_simd
-target x86_64 machinst skylake
+target x86_64 skylake
 
 function %icmp_ne_32x4(i32x4, i32x4) -> b32x4 {
 block0(v0: i32x4, v1: i32x4):
diff --git a/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif b/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif
index 2f6a8c7dfd..38894f6086 100644
--- a/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif
+++ b/cranelift/filetests/filetests/isa/x64/simd-lane-access-compile.clif
@@ -1,6 +1,6 @@
 test compile
 set enable_simd
-target x86_64 machinst has_ssse3 has_sse41
+target x86_64 has_ssse3 has_sse41
 
 ;; shuffle
 
diff --git a/cranelift/filetests/filetests/isa/x64/simd-logical-compile.clif b/cranelift/filetests/filetests/isa/x64/simd-logical-compile.clif
index 72249faaef..29f4b2cdb0 100644
--- a/cranelift/filetests/filetests/isa/x64/simd-logical-compile.clif
+++ b/cranelift/filetests/filetests/isa/x64/simd-logical-compile.clif
@@ -1,6 +1,6 @@
 test compile
 set enable_simd
-target x86_64 machinst skylake
+target x86_64 skylake
 
 function %bnot_b32x4(b32x4) -> b32x4 {
 block0(v0: b32x4):
diff --git a/cranelift/filetests/filetests/isa/x64/store-stack-full-width-i32.clif b/cranelift/filetests/filetests/isa/x64/store-stack-full-width-i32.clif
index 31edd7bdca..c20f816fc2 100644
--- a/cranelift/filetests/filetests/isa/x64/store-stack-full-width-i32.clif
+++ b/cranelift/filetests/filetests/isa/x64/store-stack-full-width-i32.clif
@@ -1,5 +1,5 @@
 test compile
-target x86_64 machinst
+target x86_64
 
 ;; The goal of this test is to ensure that stack spills of an integer value,
 ;; which width is less than the machine word's size, cause the full word to be
diff --git a/cranelift/filetests/filetests/isa/x64/struct-arg.clif b/cranelift/filetests/filetests/isa/x64/struct-arg.clif
index e9001c5393..23fbb731b8 100644
--- a/cranelift/filetests/filetests/isa/x64/struct-arg.clif
+++ b/cranelift/filetests/filetests/isa/x64/struct-arg.clif
@@ -1,5 +1,5 @@
 test compile
-target x86_64 machinst
+target x86_64
 
 function u0:0(i64 sarg(64)) -> i8 system_v {
 block0(v0: i64):
diff --git a/cranelift/filetests/filetests/isa/x64/struct-ret.clif b/cranelift/filetests/filetests/isa/x64/struct-ret.clif
index ee59ff4963..90a6d6fbe4 100644
--- a/cranelift/filetests/filetests/isa/x64/struct-ret.clif
+++ b/cranelift/filetests/filetests/isa/x64/struct-ret.clif
@@ -1,5 +1,5 @@
 test compile
-target x86_64 machinst
+target x86_64
 
 function %f0(i64 sret) {
 block0(v0: i64):
diff --git a/cranelift/filetests/filetests/isa/x64/tls_elf.clif b/cranelift/filetests/filetests/isa/x64/tls_elf.clif
index 37a4698619..71bf7dada9 100644
--- a/cranelift/filetests/filetests/isa/x64/tls_elf.clif
+++ b/cranelift/filetests/filetests/isa/x64/tls_elf.clif
@@ -1,6 +1,6 @@
 test compile
 set tls_model=elf_gd
-target x86_64 machinst
+target x86_64
 
 function u0:0(i32) -> i64 {
 gv0 = symbol colocated tls u1:0
diff --git a/cranelift/filetests/filetests/isa/x64/uextend-elision.clif b/cranelift/filetests/filetests/isa/x64/uextend-elision.clif
index ef43c3dd03..32c856c419 100644
--- a/cranelift/filetests/filetests/isa/x64/uextend-elision.clif
+++ b/cranelift/filetests/filetests/isa/x64/uextend-elision.clif
@@ -1,5 +1,5 @@
 test compile
-target x86_64 machinst
+target x86_64
 
 function %elide_uextend_add(i32, i32) -> i64 {
 block0(v0: i32, v1: i32):
diff --git a/cranelift/filetests/filetests/isa/x64/unused_jt_unreachable_block.clif b/cranelift/filetests/filetests/isa/x64/unused_jt_unreachable_block.clif
index 5ddd4b20d3..6548930328 100644
--- a/cranelift/filetests/filetests/isa/x64/unused_jt_unreachable_block.clif
+++ b/cranelift/filetests/filetests/isa/x64/unused_jt_unreachable_block.clif
@@ -1,5 +1,5 @@
 test compile
-target x86_64 machinst
+target x86_64
 
 ;; From: https://github.com/bytecodealliance/wasmtime/issues/2670
 
diff --git a/cranelift/filetests/filetests/isa/x86/abcd.clif b/cranelift/filetests/filetests/isa/x86/abcd.clif
deleted file mode 100644
index bfea325055..0000000000
--- a/cranelift/filetests/filetests/isa/x86/abcd.clif
+++ /dev/null
@@ -1,13 +0,0 @@
-test regalloc
-target i686 legacy
-
-; %rdi can't be used in a movsbl instruction, so test that the register
-; allocator can move it to a register that can be.
-
-function %test(i32 [%rdi]) -> i32 system_v {
-block0(v0: i32 [%rdi]):
-    v1 = ireduce.i8 v0
-    v2 = sextend.i32 v1
-    return v2
-}
-; check: regmove v1, %rdi -> %rax
diff --git a/cranelift/filetests/filetests/isa/x86/abi-bool.clif b/cranelift/filetests/filetests/isa/x86/abi-bool.clif
deleted file mode 100644
index 5286de3c18..0000000000
--- a/cranelift/filetests/filetests/isa/x86/abi-bool.clif
+++ /dev/null
@@ -1,19 +0,0 @@
-test compile
-target x86_64 legacy haswell
-
-function %foo(i64, i64, i64, i32) -> b1 system_v {
-block3(v0: i64, v1: i64, v2: i64, v3: i32):
-    v5 = icmp ne v2, v2
-    v8 = iconst.i64 0
-    jump block2(v8, v3, v5)
-
-block2(v10: i64, v30: i32, v37: b1):
-    v18 = load.i32 notrap aligned v2
-    v27 = iadd.i64 v10, v10
-    v31 = icmp eq v30, v30
-    brz v31, block2(v27, v30, v37)
-    jump block0(v37)
-
-block0(v35: b1):
-    return v35
-}
diff --git a/cranelift/filetests/filetests/isa/x86/abi32.clif b/cranelift/filetests/filetests/isa/x86/abi32.clif
deleted file mode 100644
index 8ca530a695..0000000000
--- a/cranelift/filetests/filetests/isa/x86/abi32.clif
+++ /dev/null
@@ -1,20 +0,0 @@
-; Test the legalization of function signatures.
-test legalizer
-target i686 legacy
-
-; regex: V=v\d+
-
-function %f() {
-    sig0 = (i32) -> i32 system_v
-    ; check: sig0 = (i32 [0]) -> i32 [%rax] system_v
-
-    sig1 = (i64) -> b1 system_v
-    ; check: sig1 = (i32 [0], i32 [4]) -> b1 [%rax] system_v
-
-    sig2 = (f32, i64) -> f64 system_v
-    ; check: sig2 = (f32 [0], i32 [4], i32 [8]) -> f64 [%xmm0] system_v
-
-block0:
-    return
-}
-
diff --git a/cranelift/filetests/filetests/isa/x86/abi64.clif b/cranelift/filetests/filetests/isa/x86/abi64.clif
deleted file mode 100644
index 0da2aad424..0000000000
--- a/cranelift/filetests/filetests/isa/x86/abi64.clif
+++ /dev/null
@@ -1,37 +0,0 @@
-; Test the legalization of function signatures.
-test legalizer
-target x86_64 legacy
-
-; regex: V=v\d+
-
-function %f() {
-    sig0 = (i32) -> i32 system_v
-    ; check: sig0 = (i32 [%rdi]) -> i32 [%rax] system_v
-
-    sig1 = (i64) -> b1 system_v
-    ; check: sig1 = (i64 [%rdi]) -> b1 [%rax] system_v
-
-    sig2 = (f32, i64) -> f64 system_v
-    ; check: sig2 = (f32 [%xmm0], i64 [%rdi]) -> f64 [%xmm0] system_v
-
-    sig3 = () -> i128 system_v
-    ; check: sig3 = () -> i64 [%rax], i64 [%rdx] system_v
-
-    sig4 = (i128) -> i128 system_v
-    ; check: sig4 = (i64 [%rdi], i64 [%rsi]) -> i64 [%rax], i64 [%rdx] system_v
-
-block0:
-    return
-}
-
-function %pass_stack_int64(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 vmctx) baldrdash_system_v {
-    sig0 = (i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, i64 vmctx) baldrdash_system_v
-    fn0 = u0:0 sig0
-
-block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64, v5: i64, v6: i64, v7: i64, v8: i64, v9: i64, v10: i64, v11: i64, v12: i64, v13: i64, v14: i64, v15: i64, v16: i64, v17: i64, v18: i64, v19: i64, v20: i64):
-    call fn0(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20)
-    jump block1
-
-block1:
-    return
-}
diff --git a/cranelift/filetests/filetests/isa/x86/allones_funcaddrs32.clif b/cranelift/filetests/filetests/isa/x86/allones_funcaddrs32.clif
deleted file mode 100644
index ca0ace1342..0000000000
--- a/cranelift/filetests/filetests/isa/x86/allones_funcaddrs32.clif
+++ /dev/null
@@ -1,25 +0,0 @@
-; binary emission of 32-bit code.
-test binemit
-set opt_level=speed_and_size
-set emit_all_ones_funcaddrs
-target i686 legacy haswell
-
-; The binary encodings can be verified with the command:
-;
-;   sed -ne 's/^ *; asm: *//p' filetests/isa/x86/allones_funcaddrs32.clif | llvm-mc -show-encoding -triple=i386
-;
-
-; Tests from binary32.clif affected by emit_all_ones_funcaddrs.
-function %I32() {
-    sig0 = ()
-    fn0 = %foo()
-
-block0:
-
-    ; asm: movl $-1, %ecx
-    [-,%rcx]            v400 = func_addr.i32 fn0        ; bin: b9 Abs4(%foo) ffffffff
-    ; asm: movl $-1, %esi
-    [-,%rsi]            v401 = func_addr.i32 fn0        ; bin: be Abs4(%foo) ffffffff
-
-    return                                              ; bin: c3
-}
diff --git a/cranelift/filetests/filetests/isa/x86/allones_funcaddrs64.clif b/cranelift/filetests/filetests/isa/x86/allones_funcaddrs64.clif
deleted file mode 100644
index 7fbb670df2..0000000000
--- a/cranelift/filetests/filetests/isa/x86/allones_funcaddrs64.clif
+++ /dev/null
@@ -1,27 +0,0 @@
-; binary emission of 64-bit code.
-test binemit
-set opt_level=speed_and_size
-set emit_all_ones_funcaddrs
-target x86_64 legacy haswell
-
-; The binary encodings can be verified with the command:
-;
-;   sed -ne 's/^ *; asm: *//p' filetests/isa/x86/allones_funcaddrs64.clif | llvm-mc -show-encoding -triple=x86_64
-;
-
-; Tests from binary64.clif affected by emit_all_ones_funcaddrs.
-function %I64() {
-    sig0 = ()
-    fn0 = %foo()
-
-block0:
-
-    ; asm: movabsq $-1, %rcx
-    [-,%rcx]            v400 = func_addr.i64 fn0        ; bin: 48 b9 Abs8(%foo) ffffffffffffffff
-    ; asm: movabsq $-1, %rsi
-    [-,%rsi]            v401 = func_addr.i64 fn0        ; bin: 48 be Abs8(%foo) ffffffffffffffff
-    ; asm: movabsq $-1, %r10
-    [-,%r10]            v402 = func_addr.i64 fn0        ; bin: 49 ba Abs8(%foo) ffffffffffffffff
-
-    return                                              ; bin: c3
-}
diff --git a/cranelift/filetests/filetests/isa/x86/baldrdash-table-sig-reg.clif b/cranelift/filetests/filetests/isa/x86/baldrdash-table-sig-reg.clif
deleted file mode 100644
index d2713829cd..0000000000
--- a/cranelift/filetests/filetests/isa/x86/baldrdash-table-sig-reg.clif
+++ /dev/null
@@ -1,14 +0,0 @@
-test compile
-set enable_probestack=false
-target i686 legacy
-
-function u0:0(i32 vmctx) baldrdash_system_v {
-    sig0 = (i32 vmctx, i32 sigid) baldrdash_system_v
-
-block0(v0: i32):
-    v2 = iconst.i32 0
-    v8 = iconst.i32 0
-    v9 = iconst.i32 0
-    call_indirect sig0, v8(v9, v2)
-    trap user0
-}
diff --git a/cranelift/filetests/filetests/isa/x86/baseline_clz_ctz_popcount.clif b/cranelift/filetests/filetests/isa/x86/baseline_clz_ctz_popcount.clif
deleted file mode 100644
index 9099315878..0000000000
--- a/cranelift/filetests/filetests/isa/x86/baseline_clz_ctz_popcount.clif
+++ /dev/null
@@ -1,92 +0,0 @@
-test compile
-target x86_64 legacy baseline
-
-
-; clz/ctz on 64 bit operands
-
-function %i64_clz(i64) -> i64 {
-block0(v10: i64):
-  v11 = clz v10
-  ; check: x86_bsr
-  ; check: selectif.i64
-  return v11
-}
-
-function %i64_ctz(i64) -> i64 {
-block1(v20: i64):
-  v21 = ctz v20
-  ; check: x86_bsf
-  ; check: selectif.i64
-  return v21
-}
-
-
-; clz/ctz on 32 bit operands
-
-function %i32_clz(i32) -> i32 {
-block0(v10: i32):
-  v11 = clz v10
-  ; check: x86_bsr
-  ; check: selectif.i32
-  return v11
-}
-
-function %i32_ctz(i32) -> i32 {
-block1(v20: i32):
-  v21 = ctz v20
-  ; check: x86_bsf
-  ; check: selectif.i32
-  return v21
-}
-
-
-; popcount on 64 bit operands
-
-function %i64_popcount(i64) -> i64 {
-block0(v30: i64):
-  v31 = popcnt v30;
-  ; check: ushr_imm
-  ; check: iconst.i64
-  ; check: band
-  ; check: isub
-  ; check: ushr_imm
-  ; check: band
-  ; check: isub
-  ; check: ushr_imm
-  ; check: band
-  ; check: isub
-  ; check: ushr_imm
-  ; check: iadd
-  ; check: iconst.i64
-  ; check: band
-  ; check: iconst.i64
-  ; check: imul
-  ; check: ushr_imm
-  return v31;
-}
-
-
-; popcount on 32 bit operands
-
-function %i32_popcount(i32) -> i32 {
-block0(v40: i32):
-  v41 = popcnt v40;
-  ; check: ushr_imm
-  ; check: iconst.i32
-  ; check: band
-  ; check: isub
-  ; check: ushr_imm
-  ; check: band
-  ; check: isub
-  ; check: ushr_imm
-  ; check: band
-  ; check: isub
-  ; check: ushr_imm
-  ; check: iadd
-  ; check: iconst.i32
-  ; check: band
-  ; check: iconst.i32
-  ; check: imul
-  ; check: ushr_imm
-  return v41;
-}
diff --git a/cranelift/filetests/filetests/isa/x86/baseline_clz_ctz_popcount_encoding.clif b/cranelift/filetests/filetests/isa/x86/baseline_clz_ctz_popcount_encoding.clif
deleted file mode 100644
index b2f36ff148..0000000000
--- a/cranelift/filetests/filetests/isa/x86/baseline_clz_ctz_popcount_encoding.clif
+++ /dev/null
@@ -1,87 +0,0 @@
-test binemit
-set opt_level=speed_and_size
-target x86_64 legacy baseline
-
-; The binary encodings can be verified with the command:
-;
-;   sed -ne 's/^ *; asm: *//p' filetests/isa/x86/baseline_clz_ctz_popcount_encoding.clif | llvm-mc -show-encoding -triple=x86_64
-;
-
-function %Foo() {
-block0:
-    ; 64-bit wide bsf
-
-    [-,%r11]                 v10 = iconst.i64 0x1234
-    ; asm: bsfq %r11, %rcx
-    [-,%rcx,%rflags]         v11, v12 = x86_bsf v10    ; bin: 49 0f bc cb
-
-    [-,%rdx]                 v14 = iconst.i64 0x5678
-    ; asm: bsfq %rdx, %r12
-    [-,%r12,%rflags]         v15, v16 = x86_bsf v14    ; bin: 4c 0f bc e2
-
-    ; asm: bsfq %rdx, %rdi
-    [-,%rdi,%rflags]         v17, v18 = x86_bsf v14    ; bin: 48 0f bc fa
-
-
-    ; 32-bit wide bsf
-
-    [-,%r11]                 v20 = iconst.i32 0x1234
-    ; asm: bsfl %r11d, %ecx
-    [-,%rcx,%rflags]         v21, v22 = x86_bsf v20    ; bin: 41 0f bc cb
-
-    [-,%rdx]                 v24 = iconst.i32 0x5678
-    ; asm: bsfl %edx, %r12d
-    [-,%r12,%rflags]         v25, v26 = x86_bsf v24    ; bin: 44 0f bc e2
-
-    ; asm: bsfl %edx, %esi
-    [-,%rsi,%rflags]         v27, v28 = x86_bsf v24    ; bin: 0f bc f2
-
-
-    ; 64-bit wide bsr
-
-    [-,%r11]                 v30 = iconst.i64 0x1234
-    ; asm: bsrq %r11, %rcx
-    [-,%rcx,%rflags]         v31, v32 = x86_bsr v30    ; bin: 49 0f bd cb
-
-    [-,%rdx]                 v34 = iconst.i64 0x5678
-    ; asm: bsrq %rdx, %r12
-    [-,%r12,%rflags]         v35, v36 = x86_bsr v34    ; bin: 4c 0f bd e2
-
-    ; asm: bsrq %rdx, %rdi
-    [-,%rdi,%rflags]         v37, v38 = x86_bsr v34    ; bin: 48 0f bd fa
-
-
-    ; 32-bit wide bsr
-
-    [-,%r11]                 v40 = iconst.i32 0x1234
-    ; asm: bsrl %r11d, %ecx
-    [-,%rcx,%rflags]         v41, v42 = x86_bsr v40    ; bin: 41 0f bd cb
-
-    [-,%rdx]                 v44 = iconst.i32 0x5678
-    ; asm: bsrl %edx, %r12d
-    [-,%r12,%rflags]         v45, v46 = x86_bsr v44    ; bin: 44 0f bd e2
-
-    ; asm: bsrl %edx, %esi
-    [-,%rsi,%rflags]         v47, v48 = x86_bsr v44    ; bin: 0f bd f2
-
-
-    ; 64-bit wide cmov
-
-    ; asm: cmoveq %r11, %rdx
-    [-,%rdx]     v51 = selectif.i64 eq v48, v30, v34   ; bin: 49 0f 44 d3
-
-    ; asm: cmoveq %rdi, %rdx
-    [-,%rdx]     v52 = selectif.i64 eq v48, v37, v34   ; bin: 48 0f 44 d7
-
-
-    ; 32-bit wide cmov
-
-    ; asm: cmovnel %r11d, %edx
-    [-,%rdx]    v60 = selectif.i32 ne v48, v40, v44    ; bin: 41 0f 45 d3
-
-    ; asm: cmovlel %esi, %edx
-    [-,%rdx]    v61 = selectif.i32 sle v48, v27, v44    ; bin: 0f 4e d6
-
-
-    trap user0
-}
diff --git a/cranelift/filetests/filetests/isa/x86/binary32-float.clif b/cranelift/filetests/filetests/isa/x86/binary32-float.clif
deleted file mode 100644
index cfac85f7b2..0000000000
--- a/cranelift/filetests/filetests/isa/x86/binary32-float.clif
+++ /dev/null
@@ -1,557 +0,0 @@
-; Binary emission of 32-bit floating point code.
-test binemit
-target i686 legacy haswell
-
-; The binary encodings can be verified with the command:
-;
-;   sed -ne 's/^ *; asm: *//p' filetests/isa/x86/binary32-float.clif | llvm-mc -show-encoding -triple=i386
-;
-
-function %F32() {
-    ss0 = incoming_arg 8, offset 0
-    ss1 = incoming_arg 1024, offset -1024
-    ss2 = incoming_arg 1024, offset -2048
-    ss3 = incoming_arg 8, offset -2056
-
-block0:
-    [-,%rcx]            v0 = iconst.i32 1
-    [-,%rsi]            v1 = iconst.i32 2
-
-    ; asm: cvtsi2ss %ecx, %xmm5
-    [-,%xmm5]           v10 = fcvt_from_sint.f32 v0             ; bin: f3 0f 2a e9
-    ; asm: cvtsi2ss %esi, %xmm2
-    [-,%xmm2]           v11 = fcvt_from_sint.f32 v1             ; bin: f3 0f 2a d6
-
-    ; asm: cvtss2sd %xmm2, %xmm5
-    [-,%xmm5]           v12 = fpromote.f64 v11                  ; bin: f3 0f 5a ea
-    ; asm: cvtss2sd %xmm5, %xmm2
-    [-,%xmm2]           v13 = fpromote.f64 v10                  ; bin: f3 0f 5a d5
-
-    ; asm: movd %ecx, %xmm5
-    [-,%xmm5]           v14 = bitcast.f32 v0                    ; bin: 66 0f 6e e9
-    ; asm: movd %esi, %xmm2
-    [-,%xmm2]           v15 = bitcast.f32 v1                    ; bin: 66 0f 6e d6
-
-    ; asm: movd %xmm5, %ecx
-    [-,%rcx]            v16 = bitcast.i32 v10                   ; bin: 66 0f 7e e9
-    ; asm: movd %xmm2, %esi
-    [-,%rsi]            v17 = bitcast.i32 v11                   ; bin: 66 0f 7e d6
-
-    ; asm: movaps %xmm2, %xmm5
-    [-,%xmm5]           v18 = copy v11                          ; bin: 0f 28 ea
-    ; asm: movaps %xmm5, %xmm2
-    [-,%xmm2]           v19 = copy v10                          ; bin: 0f 28 d5
-
-    ; asm: movaps %xmm2, %xmm5
-    regmove v19, %xmm2 -> %xmm5                                 ; bin: 0f 28 ea
-    ; asm: movaps %xmm5, %xmm2
-    regmove v19, %xmm5 -> %xmm2                                 ; bin: 0f 28 d5
-
-    ; Binary arithmetic.
-
-    ; asm: addss %xmm2, %xmm5
-    [-,%xmm5]           v20 = fadd v10, v11                     ; bin: f3 0f 58 ea
-    ; asm: addss %xmm5, %xmm2
-    [-,%xmm2]           v21 = fadd v11, v10                     ; bin: f3 0f 58 d5
-
-    ; asm: subss %xmm2, %xmm5
-    [-,%xmm5]           v22 = fsub v10, v11                     ; bin: f3 0f 5c ea
-    ; asm: subss %xmm5, %xmm2
-    [-,%xmm2]           v23 = fsub v11, v10                     ; bin: f3 0f 5c d5
-
-    ; asm: mulss %xmm2, %xmm5
-    [-,%xmm5]           v24 = fmul v10, v11                     ; bin: f3 0f 59 ea
-    ; asm: mulss %xmm5, %xmm2
-    [-,%xmm2]           v25 = fmul v11, v10                     ; bin: f3 0f 59 d5
-
-    ; asm: divss %xmm2, %xmm5
-    [-,%xmm5]           v26 = fdiv v10, v11                     ; bin: f3 0f 5e ea
-    ; asm: divss %xmm5, %xmm2
-    [-,%xmm2]           v27 = fdiv v11, v10                     ; bin: f3 0f 5e d5
-
-    ; Bitwise ops.
-    ; We use the *ps SSE instructions for everything because they are smaller.
-
-    ; asm: andps %xmm2, %xmm5
-    [-,%xmm5]           v30 = band v10, v11                     ; bin: 0f 54 ea
-    ; asm: andps %xmm5, %xmm2
-    [-,%xmm2]           v31 = band v11, v10                     ; bin: 0f 54 d5
-
-    ; asm: andnps %xmm2, %xmm5
-    [-,%xmm5]           v32 = band_not v11, v10                 ; bin: 0f 55 ea
-    ; asm: andnps %xmm5, %xmm2
-    [-,%xmm2]           v33 = band_not v10, v11                 ; bin: 0f 55 d5
-
-    ; asm: orps %xmm2, %xmm5
-    [-,%xmm5]           v34 = bor v10, v11                      ; bin: 0f 56 ea
-    ; asm: orps %xmm5, %xmm2
-    [-,%xmm2]           v35 = bor v11, v10                      ; bin: 0f 56 d5
-
-    ; asm: xorps %xmm2, %xmm5
-    [-,%xmm5]           v36 = bxor v10, v11                     ; bin: 0f 57 ea
-    ; asm: xorps %xmm5, %xmm2
-    [-,%xmm2]           v37 = bxor v11, v10                     ; bin: 0f 57 d5
-
-    ; Convert float to int. (No i64 dest on i386).
-
-    ; asm: cvttss2si %xmm5, %ecx
-    [-,%rcx]            v40 = x86_cvtt2si.i32 v10               ; bin: f3 0f 2c cd
-    ; asm: cvttss2si %xmm2, %esi
-    [-,%rsi]            v41 = x86_cvtt2si.i32 v11               ; bin: f3 0f 2c f2
-
-    ; Min/max.
-
-    ; asm: minss %xmm2, %xmm5
-    [-,%xmm5]           v42 = x86_fmin v10, v11                 ; bin: f3 0f 5d ea
-    ; asm: minss %xmm5, %xmm2
-    [-,%xmm2]           v43 = x86_fmin v11, v10                 ; bin: f3 0f 5d d5
-    ; asm: maxss %xmm2, %xmm5
-    [-,%xmm5]           v44 = x86_fmax v10, v11                 ; bin: f3 0f 5f ea
-    ; asm: maxss %xmm5, %xmm2
-    [-,%xmm2]           v45 = x86_fmax v11, v10                 ; bin: f3 0f 5f d5
-
-    ; Unary arithmetic.
-
-    ; asm: sqrtss %xmm5, %xmm2
-    [-,%xmm2]           v50 = sqrt v10                          ; bin: f3 0f 51 d5
-    ; asm: sqrtss %xmm2, %xmm5
-    [-,%xmm5]           v51 = sqrt v11                          ; bin: f3 0f 51 ea
-
-    ; asm: roundss $0, %xmm5, %xmm4
-    [-,%xmm4]           v52 = nearest v10                       ; bin: 66 0f 3a 0a e5 00
-    ; asm: roundss $0, %xmm2, %xmm5
-    [-,%xmm5]           v53 = nearest v11                       ; bin: 66 0f 3a 0a ea 00
-    ; asm: roundss $0, %xmm5, %xmm2
-    [-,%xmm2]           v54 = nearest v10                       ; bin: 66 0f 3a 0a d5 00
-
-    ; asm: roundss $1, %xmm5, %xmm4
-    [-,%xmm4]           v55 = floor v10                         ; bin: 66 0f 3a 0a e5 01
-    ; asm: roundss $1, %xmm2, %xmm5
-    [-,%xmm5]           v56 = floor v11                         ; bin: 66 0f 3a 0a ea 01
-    ; asm: roundss $1, %xmm5, %xmm2
-    [-,%xmm2]           v57 = floor v10                         ; bin: 66 0f 3a 0a d5 01
-
-    ; asm: roundss $2, %xmm5, %xmm4
-    [-,%xmm4]           v58 = ceil v10                          ; bin: 66 0f 3a 0a e5 02
-    ; asm: roundss $2, %xmm2, %xmm5
-    [-,%xmm5]           v59 = ceil v11                          ; bin: 66 0f 3a 0a ea 02
-    ; asm: roundss $2, %xmm5, %xmm2
-    [-,%xmm2]           v60 = ceil v10                          ; bin: 66 0f 3a 0a d5 02
-
-    ; asm: roundss $3, %xmm5, %xmm4
-    [-,%xmm4]           v61 = trunc v10                         ; bin: 66 0f 3a 0a e5 03
-    ; asm: roundss $3, %xmm2, %xmm5
-    [-,%xmm5]           v62 = trunc v11                         ; bin: 66 0f 3a 0a ea 03
-    ; asm: roundss $3, %xmm5, %xmm2
-    [-,%xmm2]           v63 = trunc v10                         ; bin: 66 0f 3a 0a d5 03
-
-    ; Load/Store
-
-    ; asm: movss (%ecx), %xmm5
-    [-,%xmm5]           v100 = load.f32 v0                      ; bin: heap_oob f3 0f 10 29
-    ; asm: movss (%esi), %xmm2
-    [-,%xmm2]           v101 = load.f32 v1                      ; bin: heap_oob f3 0f 10 16
-    ; asm: movss 50(%ecx), %xmm5
-    [-,%xmm5]           v110 = load.f32 v0+50                   ; bin: heap_oob f3 0f 10 69 32
-    ; asm: movss -50(%esi), %xmm2
-    [-,%xmm2]           v111 = load.f32 v1-50                   ; bin: heap_oob f3 0f 10 56 ce
-    ; asm: movss 10000(%ecx), %xmm5
-    [-,%xmm5]           v120 = load.f32 v0+10000                ; bin: heap_oob f3 0f 10 a9 00002710
-    ; asm: movss -10000(%esi), %xmm2
-    [-,%xmm2]           v121 = load.f32 v1-10000                ; bin: heap_oob f3 0f 10 96 ffffd8f0
-
-    ; asm: movss %xmm5, (%ecx)
-    [-]                 store.f32 v100, v0                      ; bin: heap_oob f3 0f 11 29
-    ; asm: movss %xmm2, (%esi)
-    [-]                 store.f32 v101, v1                      ; bin: heap_oob f3 0f 11 16
-    ; asm: movss %xmm5, 50(%ecx)
-    [-]                 store.f32 v100, v0+50                   ; bin: heap_oob f3 0f 11 69 32
-    ; asm: movss %xmm2, -50(%esi)
-    [-]                 store.f32 v101, v1-50                   ; bin: heap_oob f3 0f 11 56 ce
-    ; asm: movss %xmm5, 10000(%ecx)
-    [-]                 store.f32 v100, v0+10000                ; bin: heap_oob f3 0f 11 a9 00002710
-    ; asm: movss %xmm2, -10000(%esi)
-    [-]                 store.f32 v101, v1-10000                ; bin: heap_oob f3 0f 11 96 ffffd8f0
-
-    ; Spill / Fill.
-
-    ; asm: movss %xmm5, 1032(%esp)
-    [-,ss1]             v200 = spill v100                       ; bin: stk_ovf f3 0f 11 ac 24 00000408
-    ; asm: movss %xmm2, 1032(%esp)
-    [-,ss1]             v201 = spill v101                       ; bin: stk_ovf f3 0f 11 94 24 00000408
-
-    ; asm: movss 1032(%esp), %xmm5
-    [-,%xmm5]           v210 = fill v200                        ; bin: f3 0f 10 ac 24 00000408
-    ; asm: movss 1032(%esp), %xmm2
-    [-,%xmm2]           v211 = fill v201                        ; bin: f3 0f 10 94 24 00000408
-
-    ; asm: movss %xmm5, 1032(%esp)
-    regspill v100, %xmm5 -> ss1                                 ; bin: stk_ovf f3 0f 11 ac 24 00000408
-    ; asm: movss 1032(%esp), %xmm5
-    regfill v100, ss1 -> %xmm5                                  ; bin: f3 0f 10 ac 24 00000408
-
-    ; Comparisons.
-    ;
-    ; Only `supported_floatccs` are tested here. Others are handled by
-    ; legalization patterns.
-
-    ; asm: ucomiss %xmm2, %xmm5
-    ; asm: setnp %bl
-    [-,%rbx]            v300 = fcmp ord v10, v11                ; bin: 0f 2e ea 0f 9b c3
-    ; asm: ucomiss %xmm5, %xmm2
-    ; asm: setp %bl
-    [-,%rbx]            v301 = fcmp uno v11, v10                ; bin: 0f 2e d5 0f 9a c3
-    ; asm: ucomiss %xmm2, %xmm5
-    ; asm: setne %dl
-    [-,%rdx]            v302 = fcmp one v10, v11                ; bin: 0f 2e ea 0f 95 c2
-    ; asm: ucomiss %xmm5, %xmm2
-    ; asm: sete %dl
-    [-,%rdx]            v303 = fcmp ueq v11, v10                ; bin: 0f 2e d5 0f 94 c2
-    ; asm: ucomiss %xmm2, %xmm5
-    ; asm: seta %bl
-    [-,%rbx]            v304 = fcmp gt v10, v11                 ; bin: 0f 2e ea 0f 97 c3
-    ; asm: ucomiss %xmm5, %xmm2
-    ; asm: setae %bl
-    [-,%rbx]            v305 = fcmp ge v11, v10                 ; bin: 0f 2e d5 0f 93 c3
-    ; asm: ucomiss %xmm2, %xmm5
-    ; asm: setb %dl
-    [-,%rdx]            v306 = fcmp ult v10, v11                ; bin: 0f 2e ea 0f 92 c2
-    ; asm: ucomiss %xmm5, %xmm2
-    ; asm: setbe %dl
-    [-,%rdx]            v307 = fcmp ule v11, v10                ; bin: 0f 2e d5 0f 96 c2
-
-    ; asm: ucomiss %xmm2, %xmm5
-    [-,%rflags]         v310 = ffcmp v10, v11                   ; bin: 0f 2e ea
-    ; asm: ucomiss %xmm2, %xmm5
-    [-,%rflags]         v311 = ffcmp v11, v10                   ; bin: 0f 2e d5
-    ; asm: ucomiss %xmm5, %xmm5
-    [-,%rflags]         v312 = ffcmp v10, v10                   ; bin: 0f 2e ed
-
-    ; Load/Store Complex
-
-    [-,%rax]            v350 = iconst.i32 1
-    [-,%rbx]            v351 = iconst.i32 2
-
-    ; asm: movss (%rax,%rbx,1),%xmm5
-    [-,%xmm5]           v352 = load_complex.f32 v350+v351               ; bin: heap_oob f3 0f 10 2c 18
-    ; asm: movss 0x32(%rax,%rbx,1),%xmm5
-    [-,%xmm5]           v353 = load_complex.f32 v350+v351+50            ; bin: heap_oob f3 0f 10 6c 18 32
-    ; asm: movss -0x32(%rax,%rbx,1),%xmm5
-    [-,%xmm5]           v354 = load_complex.f32 v350+v351-50            ; bin: heap_oob f3 0f 10 6c 18 ce
-    ; asm: movss 0x2710(%rax,%rbx,1),%xmm5
-    [-,%xmm5]           v355 = load_complex.f32 v350+v351+10000         ; bin: heap_oob f3 0f 10 ac 18 00002710
-    ; asm: movss -0x2710(%rax,%rbx,1),%xmm5
-    [-,%xmm5]           v356 = load_complex.f32 v350+v351-10000         ; bin: heap_oob f3 0f 10 ac 18 ffffd8f0
-    ; asm: movss %xmm5,(%rax,%rbx,1)
-    [-]                 store_complex.f32 v100, v350+v351               ; bin: heap_oob f3 0f 11 2c 18
-    ; asm: movss %xmm5,0x32(%rax,%rbx,1)
-    [-]                 store_complex.f32 v100, v350+v351+50            ; bin: heap_oob f3 0f 11 6c 18 32
-    ; asm: movss %xmm2,-0x32(%rax,%rbx,1)
-    [-]                 store_complex.f32 v101, v350+v351-50            ; bin: heap_oob f3 0f 11 54 18 ce
-    ; asm: movss %xmm5,0x2710(%rax,%rbx,1)
-    [-]                 store_complex.f32 v100, v350+v351+10000         ; bin: heap_oob f3 0f 11 ac 18 00002710
-    ; asm: movss %xmm2,-0x2710(%rax,%rbx,1)
-    [-]                 store_complex.f32 v101, v350+v351-10000         ; bin: heap_oob f3 0f 11 94 18 ffffd8f0
-
-    return
-}
-
-function %F64() {
-    ss0 = incoming_arg 8, offset 0
-    ss1 = incoming_arg 1024, offset -1024
-    ss2 = incoming_arg 1024, offset -2048
-    ss3 = incoming_arg 8, offset -2056
-
-block0:
-    [-,%rcx]            v0 = iconst.i32 1
-    [-,%rsi]            v1 = iconst.i32 2
-
-    ; asm: cvtsi2sd %ecx, %xmm5
-    [-,%xmm5]           v10 = fcvt_from_sint.f64 v0             ; bin: f2 0f 2a e9
-    ; asm: cvtsi2sd %esi, %xmm2
-    [-,%xmm2]           v11 = fcvt_from_sint.f64 v1             ; bin: f2 0f 2a d6
-
-    ; asm: cvtsd2ss %xmm2, %xmm5
-    [-,%xmm5]           v12 = fdemote.f32 v11                   ; bin: f2 0f 5a ea
-    ; asm: cvtsd2ss %xmm5, %xmm2
-    [-,%xmm2]           v13 = fdemote.f32 v10                   ; bin: f2 0f 5a d5
-
-    ; No i64 <-> f64 bitcasts in 32-bit mode.
-
-    ; asm: movaps %xmm2, %xmm5
-    [-,%xmm5]           v18 = copy v11                          ; bin: 0f 28 ea
-    ; asm: movaps %xmm5, %xmm2
-    [-,%xmm2]           v19 = copy v10                          ; bin: 0f 28 d5
-
-    ; asm: movaps %xmm2, %xmm5
-    regmove v19, %xmm2 -> %xmm5                                 ; bin: 0f 28 ea
-    ; asm: movaps %xmm5, %xmm2
-    regmove v19, %xmm5 -> %xmm2                                 ; bin: 0f 28 d5
-
-    ; Binary arithmetic.
-
-    ; asm: addsd %xmm2, %xmm5
-    [-,%xmm5]           v20 = fadd v10, v11                     ; bin: f2 0f 58 ea
-    ; asm: addsd %xmm5, %xmm2
-    [-,%xmm2]           v21 = fadd v11, v10                     ; bin: f2 0f 58 d5
-
-    ; asm: subsd %xmm2, %xmm5
-    [-,%xmm5]           v22 = fsub v10, v11                     ; bin: f2 0f 5c ea
-    ; asm: subsd %xmm5, %xmm2
-    [-,%xmm2]           v23 = fsub v11, v10                     ; bin: f2 0f 5c d5
-
-    ; asm: mulsd %xmm2, %xmm5
-    [-,%xmm5]           v24 = fmul v10, v11                     ; bin: f2 0f 59 ea
-    ; asm: mulsd %xmm5, %xmm2
-    [-,%xmm2]           v25 = fmul v11, v10                     ; bin: f2 0f 59 d5
-
-    ; asm: divsd %xmm2, %xmm5
-    [-,%xmm5]           v26 = fdiv v10, v11                     ; bin: f2 0f 5e ea
-    ; asm: divsd %xmm5, %xmm2
-    [-,%xmm2]           v27 = fdiv v11, v10                     ; bin: f2 0f 5e d5
-
-    ; Bitwise ops.
-    ; We use the *ps SSE instructions for everything because they are smaller.
-
-    ; asm: andps %xmm2, %xmm5
-    [-,%xmm5]           v30 = band v10, v11                     ; bin: 0f 54 ea
-    ; asm: andps %xmm5, %xmm2
-    [-,%xmm2]           v31 = band v11, v10                     ; bin: 0f 54 d5
-
-    ; asm: andnps %xmm2, %xmm5
-    [-,%xmm5]           v32 = band_not v11, v10                 ; bin: 0f 55 ea
-    ; asm: andnps %xmm5, %xmm2
-    [-,%xmm2]           v33 = band_not v10, v11                 ; bin: 0f 55 d5
-
-    ; asm: orps %xmm2, %xmm5
-    [-,%xmm5]           v34 = bor v10, v11                      ; bin: 0f 56 ea
-    ; asm: orps %xmm5, %xmm2
-    [-,%xmm2]           v35 = bor v11, v10                      ; bin: 0f 56 d5
-
-    ; asm: xorps %xmm2, %xmm5
-    [-,%xmm5]           v36 = bxor v10, v11                     ; bin: 0f 57 ea
-    ; asm: xorps %xmm5, %xmm2
-    [-,%xmm2]           v37 = bxor v11, v10                     ; bin: 0f 57 d5
-
-    ; Convert float to int. (No i64 dest on i386).
-
-    ; asm: cvttsd2si %xmm5, %ecx
-    [-,%rcx]            v40 = x86_cvtt2si.i32 v10               ; bin: f2 0f 2c cd
-    ; asm: cvttsd2si %xmm2, %esi
-    [-,%rsi]            v41 = x86_cvtt2si.i32 v11               ; bin: f2 0f 2c f2
-
-    ; Min/max.
-
-    ; asm: minsd %xmm2, %xmm5
-    [-,%xmm5]           v42 = x86_fmin v10, v11                 ; bin: f2 0f 5d ea
-    ; asm: minsd %xmm5, %xmm2
-    [-,%xmm2]           v43 = x86_fmin v11, v10                 ; bin: f2 0f 5d d5
-    ; asm: maxsd %xmm2, %xmm5
-    [-,%xmm5]           v44 = x86_fmax v10, v11                 ; bin: f2 0f 5f ea
-    ; asm: maxsd %xmm5, %xmm2
-    [-,%xmm2]           v45 = x86_fmax v11, v10                 ; bin: f2 0f 5f d5
-
-    ; Unary arithmetic.
-
-    ; asm: sqrtsd %xmm5, %xmm2
-    [-,%xmm2]           v50 = sqrt v10                          ; bin: f2 0f 51 d5
-    ; asm: sqrtsd %xmm2, %xmm5
-    [-,%xmm5]           v51 = sqrt v11                          ; bin: f2 0f 51 ea
-
-    ; asm: roundsd $0, %xmm5, %xmm4
-    [-,%xmm4]           v52 = nearest v10                       ; bin: 66 0f 3a 0b e5 00
-    ; asm: roundsd $0, %xmm2, %xmm5
-    [-,%xmm5]           v53 = nearest v11                       ; bin: 66 0f 3a 0b ea 00
-    ; asm: roundsd $0, %xmm5, %xmm2
-    [-,%xmm2]           v54 = nearest v10                       ; bin: 66 0f 3a 0b d5 00
-
-    ; asm: roundsd $1, %xmm5, %xmm4
-    [-,%xmm4]           v55 = floor v10                         ; bin: 66 0f 3a 0b e5 01
-    ; asm: roundsd $1, %xmm2, %xmm5
-    [-,%xmm5]           v56 = floor v11                         ; bin: 66 0f 3a 0b ea 01
-    ; asm: roundsd $1, %xmm5, %xmm2
-    [-,%xmm2]           v57 = floor v10                         ; bin: 66 0f 3a 0b d5 01
-
-    ; asm: roundsd $2, %xmm5, %xmm4
-    [-,%xmm4]           v58 = ceil v10                          ; bin: 66 0f 3a 0b e5 02
-    ; asm: roundsd $2, %xmm2, %xmm5
-    [-,%xmm5]           v59 = ceil v11                          ; bin: 66 0f 3a 0b ea 02
-    ; asm: roundsd $2, %xmm5, %xmm2
-    [-,%xmm2]           v60 = ceil v10                          ; bin: 66 0f 3a 0b d5 02
-
-    ; asm: roundsd $3, %xmm5, %xmm4
-    [-,%xmm4]           v61 = trunc v10                         ; bin: 66 0f 3a 0b e5 03
-    ; asm: roundsd $3, %xmm2, %xmm5
-    [-,%xmm5]           v62 = trunc v11                         ; bin: 66 0f 3a 0b ea 03
-    ; asm: roundsd $3, %xmm5, %xmm2
-    [-,%xmm2]           v63 = trunc v10                         ; bin: 66 0f 3a 0b d5 03
-
-    ; Load/Store
-
-    ; asm: movsd (%ecx), %xmm5
-    [-,%xmm5]           v100 = load.f64 v0                      ; bin: heap_oob f2 0f 10 29
-    ; asm: movsd (%esi), %xmm2
-    [-,%xmm2]           v101 = load.f64 v1                      ; bin: heap_oob f2 0f 10 16
-    ; asm: movsd 50(%ecx), %xmm5
-    [-,%xmm5]           v110 = load.f64 v0+50                   ; bin: heap_oob f2 0f 10 69 32
-    ; asm: movsd -50(%esi), %xmm2
-    [-,%xmm2]           v111 = load.f64 v1-50                   ; bin: heap_oob f2 0f 10 56 ce
-    ; asm: movsd 10000(%ecx), %xmm5
-    [-,%xmm5]           v120 = load.f64 v0+10000                ; bin: heap_oob f2 0f 10 a9 00002710
-    ; asm: movsd -10000(%esi), %xmm2
-    [-,%xmm2]           v121 = load.f64 v1-10000                ; bin: heap_oob f2 0f 10 96 ffffd8f0
-
-    ; asm: movsd %xmm5, (%ecx)
-    [-]                 store.f64 v100, v0                      ; bin: heap_oob f2 0f 11 29
-    ; asm: movsd %xmm2, (%esi)
-    [-]                 store.f64 v101, v1                      ; bin: heap_oob f2 0f 11 16
-    ; asm: movsd %xmm5, 50(%ecx)
-    [-]                 store.f64 v100, v0+50                   ; bin: heap_oob f2 0f 11 69 32
-    ; asm: movsd %xmm2, -50(%esi)
-    [-]                 store.f64 v101, v1-50                   ; bin: heap_oob f2 0f 11 56 ce
-    ; asm: movsd %xmm5, 10000(%ecx)
-    [-]                 store.f64 v100, v0+10000                ; bin: heap_oob f2 0f 11 a9 00002710
-    ; asm: movsd %xmm2, -10000(%esi)
-    [-]                 store.f64 v101, v1-10000                ; bin: heap_oob f2 0f 11 96 ffffd8f0
-
-    ; Spill / Fill.
-
-    ; asm: movsd %xmm5, 1032(%esp)
-    [-,ss1]             v200 = spill v100                       ; bin: stk_ovf f2 0f 11 ac 24 00000408
-    ; asm: movsd %xmm2, 1032(%esp)
-    [-,ss1]             v201 = spill v101                       ; bin: stk_ovf f2 0f 11 94 24 00000408
-
-    ; asm: movsd 1032(%esp), %xmm5
-    [-,%xmm5]           v210 = fill v200                        ; bin: f2 0f 10 ac 24 00000408
-    ; asm: movsd 1032(%esp), %xmm2
-    [-,%xmm2]           v211 = fill v201                        ; bin: f2 0f 10 94 24 00000408
-
-    ; asm: movsd %xmm5, 1032(%esp)
-    regspill v100, %xmm5 -> ss1                                 ; bin: stk_ovf f2 0f 11 ac 24 00000408
-    ; asm: movsd 1032(%esp), %xmm5
-    regfill v100, ss1 -> %xmm5                                  ; bin: f2 0f 10 ac 24 00000408
-
-    ; Comparisons.
-    ;
-    ; Only `supported_floatccs` are tested here. Others are handled by
-    ; legalization patterns.
-
-    ; asm: ucomisd %xmm2, %xmm5
-    ; asm: setnp %bl
-    [-,%rbx]            v300 = fcmp ord v10, v11                ; bin: 66 0f 2e ea 0f 9b c3
-    ; asm: ucomisd %xmm5, %xmm2
-    ; asm: setp %bl
-    [-,%rbx]            v301 = fcmp uno v11, v10                ; bin: 66 0f 2e d5 0f 9a c3
-    ; asm: ucomisd %xmm2, %xmm5
-    ; asm: setne %dl
-    [-,%rdx]            v302 = fcmp one v10, v11                ; bin: 66 0f 2e ea 0f 95 c2
-    ; asm: ucomisd %xmm5, %xmm2
-    ; asm: sete %dl
-    [-,%rdx]            v303 = fcmp ueq v11, v10                ; bin: 66 0f 2e d5 0f 94 c2
-    ; asm: ucomisd %xmm2, %xmm5
-    ; asm: seta %bl
-    [-,%rbx]            v304 = fcmp gt v10, v11                 ; bin: 66 0f 2e ea 0f 97 c3
-    ; asm: ucomisd %xmm5, %xmm2
-    ; asm: setae %bl
-    [-,%rbx]            v305 = fcmp ge v11, v10                 ; bin: 66 0f 2e d5 0f 93 c3
-    ; asm: ucomisd %xmm2, %xmm5
-    ; asm: setb %dl
-    [-,%rdx]            v306 = fcmp ult v10, v11                ; bin: 66 0f 2e ea 0f 92 c2
-    ; asm: ucomisd %xmm5, %xmm2
-    ; asm: setbe %dl
-    [-,%rdx]            v307 = fcmp ule v11, v10                ; bin: 66 0f 2e d5 0f 96 c2
-
-    ; asm: ucomisd %xmm2, %xmm5
-    [-,%rflags]         v310 = ffcmp v10, v11                   ; bin: 66 0f 2e ea
-    ; asm: ucomisd %xmm2, %xmm5
-    [-,%rflags]         v311 = ffcmp v11, v10                   ; bin: 66 0f 2e d5
-    ; asm: ucomisd %xmm5, %xmm5
-    [-,%rflags]         v312 = ffcmp v10, v10                   ; bin: 66 0f 2e ed
-
-    return
-}
-
-function %cpuflags_float(f32 [%xmm0]) {
-block0(v0: f32 [%xmm0]):
-    ; asm: ucomiss %xmm0, %xmm0
-    [-,%rflags]         v1 = ffcmp v0, v0                       ; bin: 0f 2e c0
-
-    jump block1
-
-block1:
-    ; asm: jnp block1
-    brff ord v1, block1                                           ; bin: 7b fe
-    jump block2
-
-block2:
-    ; asm: jp block1
-    brff uno v1, block1                                           ; bin: 7a fc
-    jump block3
-
-block3:
-    ; asm: jne block1
-    brff one v1, block1                                           ; bin: 75 fa
-    jump block4
-
-block4:
-    ; asm: je block1
-    brff ueq v1, block1                                           ; bin: 74 f8
-    jump block5
-
-block5:
-    ; asm: ja block1
-    brff gt v1, block1                                            ; bin: 77 f6
-    jump block6
-
-block6:
-    ; asm: jae block1
-    brff ge v1, block1                                            ; bin: 73 f4
-    jump block7
-
-block7:
-    ; asm: jb block1
-    brff ult v1, block1                                           ; bin: 72 f2
-    jump block8
-
-block8:
-    ; asm: jbe block1
-    brff ule v1, block1                                           ; bin: 76 f0
-    jump block9
-
-block9:
-    ; asm: jp .+4; ud2
-    trapff ord v1, user0                                        ; bin: 7a 02 user0 0f 0b
-    ; asm: jnp .+4; ud2
-    trapff uno v1, user0                                        ; bin: 7b 02 user0 0f 0b
-    ; asm: je .+4; ud2
-    trapff one v1, user0                                        ; bin: 74 02 user0 0f 0b
-    ; asm: jne .+4; ud2
-    trapff ueq v1, user0                                        ; bin: 75 02 user0 0f 0b
-    ; asm: jna .+4; ud2
-    trapff gt v1, user0                                         ; bin: 76 02 user0 0f 0b
-    ; asm: jnae .+4; ud2
-    trapff ge v1, user0                                         ; bin: 72 02 user0 0f 0b
-    ; asm: jnb .+4; ud2
-    trapff ult v1, user0                                        ; bin: 73 02 user0 0f 0b
-    ; asm: jnbe .+4; ud2
-    trapff ule v1, user0                                        ; bin: 77 02 user0 0f 0b
-
-    ; asm: setnp %bl
-    [-,%rbx]            v10 = trueff ord v1                     ; bin: 0f 9b c3
-    ; asm: setp %bl
-    [-,%rbx]            v11 = trueff uno v1                     ; bin: 0f 9a c3
-    ; asm: setne %dl
-    [-,%rdx]            v12 = trueff one v1                     ; bin: 0f 95 c2
-    ; asm: sete %dl
-    [-,%rdx]            v13 = trueff ueq v1                     ; bin: 0f 94 c2
-    ; asm: seta %al
-    [-,%rax]            v14 = trueff gt v1                      ; bin: 0f 97 c0
-    ; asm: setae %al
-    [-,%rax]            v15 = trueff ge v1                      ; bin: 0f 93 c0
-    ; asm: setb %cl
-    [-,%rcx]            v16 = trueff ult v1                     ; bin: 0f 92 c1
-    ; asm: setbe %cl
-    [-,%rcx]            v17 = trueff ule v1                     ; bin: 0f 96 c1
-
-    return
-}
diff --git a/cranelift/filetests/filetests/isa/x86/binary32.clif b/cranelift/filetests/filetests/isa/x86/binary32.clif
deleted file mode 100644
index 11268d5c4e..0000000000
--- a/cranelift/filetests/filetests/isa/x86/binary32.clif
+++ /dev/null
@@ -1,721 +0,0 @@
-; binary emission of x86-32 code.
-test binemit
-set opt_level=speed_and_size
-target i686 legacy haswell
-
-; The binary encodings can be verified with the command:
-;
-;   sed -ne 's/^ *; asm: *//p' filetests/isa/x86/binary32.clif | llvm-mc -show-encoding -triple=i386
-;
-
-function %I32() {
-    sig0 = ()
-    fn0 = %foo()
-
-    gv0 = symbol %some_gv
-
-    ss0 = incoming_arg 8, offset 0
-    ss1 = incoming_arg 1024, offset -1024
-    ss2 = incoming_arg 1024, offset -2048
-    ss3 = incoming_arg 8, offset -2056
-
-block0:
-    ; asm: movl $1, %ecx
-    [-,%rcx]            v1 = iconst.i32 1        ; bin: b9 00000001
-    ; asm: movl $2, %esi
-    [-,%rsi]            v2 = iconst.i32 2        ; bin: be 00000002
-
-    ; asm: movb $1, %cl
-    [-,%rcx]            v9007 = bconst.b1 true      ; bin: b9 00000001
-
-    ; Integer Register-Register Operations.
-
-    ; asm: addl %esi, %ecx
-    [-,%rcx]             v10 = iadd v1, v2       ; bin: 01 f1
-    ; asm: addl %ecx, %esi
-    [-,%rsi]             v11 = iadd v2, v1       ; bin: 01 ce
-    ; asm: subl %esi, %ecx
-    [-,%rcx]             v12 = isub v1, v2       ; bin: 29 f1
-    ; asm: subl %ecx, %esi
-    [-,%rsi]             v13 = isub v2, v1       ; bin: 29 ce
-
-    ; asm: andl %esi, %ecx
-    [-,%rcx]             v14 = band v1, v2       ; bin: 21 f1
-    ; asm: andl %ecx, %esi
-    [-,%rsi]             v15 = band v2, v1       ; bin: 21 ce
-    ; asm: orl %esi, %ecx
-    [-,%rcx]             v16 = bor v1, v2        ; bin: 09 f1
-    ; asm: orl %ecx, %esi
-    [-,%rsi]             v17 = bor v2, v1        ; bin: 09 ce
-    ; asm: xorl %esi, %ecx
-    [-,%rcx]             v18 = bxor v1, v2       ; bin: 31 f1
-    ; asm: xorl %ecx, %esi
-    [-,%rsi]             v19 = bxor v2, v1       ; bin: 31 ce
-
-    ; Dynamic shifts take the shift amount in %rcx.
-
-    ; asm: shll %cl, %esi
-    [-,%rsi]             v20 = ishl v2, v1       ; bin: d3 e6
-    ; asm: shll %cl, %ecx
-    [-,%rcx]             v21 = ishl v1, v1       ; bin: d3 e1
-    ; asm: shrl %cl, %esi
-    [-,%rsi]             v22 = ushr v2, v1       ; bin: d3 ee
-    ; asm: shrl %cl, %ecx
-    [-,%rcx]             v23 = ushr v1, v1       ; bin: d3 e9
-    ; asm: sarl %cl, %esi
-    [-,%rsi]             v24 = sshr v2, v1       ; bin: d3 fe
-    ; asm: sarl %cl, %ecx
-    [-,%rcx]             v25 = sshr v1, v1       ; bin: d3 f9
-    ; asm: roll %cl, %esi
-    [-,%rsi]             v26 = rotl v2, v1       ; bin: d3 c6
-    ; asm: roll %cl, %ecx
-    [-,%rcx]             v27 = rotl v1, v1       ; bin: d3 c1
-    ; asm: rorl %cl, %esi
-    [-,%rsi]             v28 = rotr v2, v1       ; bin: d3 ce
-    ; asm: rorl %cl, %ecx
-    [-,%rcx]             v29 = rotr v1, v1       ; bin: d3 c9
-
-    ; Integer Register - Immediate 8-bit operations.
-    ; The 8-bit immediate is sign-extended.
-
-    ; asm: addl $-128, %ecx
-    [-,%rcx]             v30 = iadd_imm v1, -128 ; bin: 83 c1 80
-    ; asm: addl $10, %esi
-    [-,%rsi]             v31 = iadd_imm v2, 10   ; bin: 83 c6 0a
-
-    ; asm: andl $-128, %ecx
-    [-,%rcx]             v32 = band_imm v1, -128 ; bin: 83 e1 80
-    ; asm: andl $10, %esi
-    [-,%rsi]             v33 = band_imm v2, 10   ; bin: 83 e6 0a
-    ; asm: orl $-128, %ecx
-    [-,%rcx]             v34 = bor_imm v1, -128  ; bin: 83 c9 80
-    ; asm: orl $10, %esi
-    [-,%rsi]             v35 = bor_imm v2, 10    ; bin: 83 ce 0a
-    ; asm: xorl $-128, %ecx
-    [-,%rcx]             v36 = bxor_imm v1, -128 ; bin: 83 f1 80
-    ; asm: xorl $10, %esi
-    [-,%rsi]             v37 = bxor_imm v2, 10   ; bin: 83 f6 0a
-
-    ; Integer Register - Immediate 32-bit operations.
-
-    ; asm: addl $-128000, %ecx
-    [-,%rcx]             v40 = iadd_imm v1, -128000 ; bin: 81 c1 fffe0c00
-    ; asm: addl $1000000, %esi
-    [-,%rsi]             v41 = iadd_imm v2, 1000000 ; bin: 81 c6 000f4240
-
-    ; asm: andl $-128000, %ecx
-    [-,%rcx]             v42 = band_imm v1, -128000 ; bin: 81 e1 fffe0c00
-    ; asm: andl $1000000, %esi
-    [-,%rsi]             v43 = band_imm v2, 1000000 ; bin: 81 e6 000f4240
-    ; asm: orl $-128000, %ecx
-    [-,%rcx]             v44 = bor_imm v1, -128000  ; bin: 81 c9 fffe0c00
-    ; asm: orl $1000000, %esi
-    [-,%rsi]             v45 = bor_imm v2, 1000000  ; bin: 81 ce 000f4240
-    ; asm: xorl $-128000, %ecx
-    [-,%rcx]             v46 = bxor_imm v1, -128000 ; bin: 81 f1 fffe0c00
-    ; asm: xorl $1000000, %esi
-    [-,%rsi]             v47 = bxor_imm v2, 1000000 ; bin: 81 f6 000f4240
-
-    ; More arithmetic.
-
-    ; asm: imull %esi, %ecx
-    [-,%rcx]             v50 = imul v1, v2       ; bin: 0f af ce
-    ; asm: imull %ecx, %esi
-    [-,%rsi]             v51 = imul v2, v1       ; bin: 0f af f1
-
-    ; asm: movl $1, %eax
-    [-,%rax]      v52 = iconst.i32 1                    ; bin: b8 00000001
-    ; asm: movl $2, %edx
-    [-,%rdx]      v53 = iconst.i32 2                    ; bin: ba 00000002
-    ; asm: idivl %ecx
-    [-,%rax,%rdx] v54, v55 = x86_sdivmodx v52, v53, v1  ; bin: int_divz f7 f9
-    ; asm: idivl %esi
-    [-,%rax,%rdx] v56, v57 = x86_sdivmodx v52, v53, v2  ; bin: int_divz f7 fe
-    ; asm: divl %ecx
-    [-,%rax,%rdx] v58, v59 = x86_udivmodx v52, v53, v1  ; bin: int_divz f7 f1
-    ; asm: divl %esi
-    [-,%rax,%rdx] v60, v61 = x86_udivmodx v52, v53, v2  ; bin: int_divz f7 f6
-
-    ; Register copies.
-
-    ; asm: movl %esi, %ecx
-    [-,%rcx]             v80 = copy v2           ; bin: 89 f1
-    ; asm: movl %ecx, %esi
-    [-,%rsi]             v81 = copy v1           ; bin: 89 ce
-
-    ; Copy Special
-    ; asm: movl %esp, %ebp
-    copy_special %rsp -> %rbp                   ; bin: 89 e5
-    ; asm: movl %ebp, %esp
-    copy_special %rbp -> %rsp                   ; bin: 89 ec
-
-
-    ; Load/Store instructions.
-
-    ; Register indirect addressing with no displacement.
-
-    ; asm: movl %ecx, (%esi)
-    store v1, v2                                ; bin: heap_oob 89 0e
-    ; asm: movl %esi, (%ecx)
-    store v2, v1                                ; bin: heap_oob 89 31
-    ; asm: movw %cx, (%esi)
-    istore16 v1, v2                             ; bin: heap_oob 66 89 0e
-    ; asm: movw %si, (%ecx)
-    istore16 v2, v1                             ; bin: heap_oob 66 89 31
-    ; asm: movb %cl, (%esi)
-    istore8 v1, v2                              ; bin: heap_oob 88 0e
-    ; Can't store %sil in 32-bit mode (needs REX prefix).
-
-    ; asm: movl (%ecx), %edi
-    [-,%rdi]            v100 = load.i32 v1      ; bin: heap_oob 8b 39
-    ; asm: movl (%esi), %edx
-    [-,%rdx]            v101 = load.i32 v2      ; bin: heap_oob 8b 16
-    ; asm: movzwl (%ecx), %edi
-    [-,%rdi]            v102 = uload16.i32 v1   ; bin: heap_oob 0f b7 39
-    ; asm: movzwl (%esi), %edx
-    [-,%rdx]            v103 = uload16.i32 v2   ; bin: heap_oob 0f b7 16
-    ; asm: movswl (%ecx), %edi
-    [-,%rdi]            v104 = sload16.i32 v1   ; bin: heap_oob 0f bf 39
-    ; asm: movswl (%esi), %edx
-    [-,%rdx]            v105 = sload16.i32 v2   ; bin: heap_oob 0f bf 16
-    ; asm: movzbl (%ecx), %edi
-    [-,%rdi]            v106 = uload8.i32 v1    ; bin: heap_oob 0f b6 39
-    ; asm: movzbl (%esi), %edx
-    [-,%rdx]            v107 = uload8.i32 v2    ; bin: heap_oob 0f b6 16
-    ; asm: movsbl (%ecx), %edi
-    [-,%rdi]            v108 = sload8.i32 v1    ; bin: heap_oob 0f be 39
-    ; asm: movsbl (%esi), %edx
-    [-,%rdx]            v109 = sload8.i32 v2    ; bin: heap_oob 0f be 16
-
-    ; Register-indirect with 8-bit signed displacement.
-
-    ; asm: movl %ecx, 100(%esi)
-    store v1, v2+100                            ; bin: heap_oob 89 4e 64
-    ; asm: movl %esi, -100(%ecx)
-    store v2, v1-100                            ; bin: heap_oob 89 71 9c
-    ; asm: movw %cx, 100(%esi)
-    istore16 v1, v2+100                         ; bin: heap_oob 66 89 4e 64
-    ; asm: movw %si, -100(%ecx)
-    istore16 v2, v1-100                         ; bin: heap_oob 66 89 71 9c
-    ; asm: movb %cl, 100(%esi)
-    istore8 v1, v2+100                          ; bin: heap_oob 88 4e 64
-
-    ; asm: movl 50(%ecx), %edi
-    [-,%rdi]            v110 = load.i32 v1+50           ; bin: heap_oob 8b 79 32
-    ; asm: movl -50(%esi), %edx
-    [-,%rdx]            v111 = load.i32 v2-50           ; bin: heap_oob 8b 56 ce
-    ; asm: movzwl 50(%ecx), %edi
-    [-,%rdi]            v112 = uload16.i32 v1+50        ; bin: heap_oob 0f b7 79 32
-    ; asm: movzwl -50(%esi), %edx
-    [-,%rdx]            v113 = uload16.i32 v2-50        ; bin: heap_oob 0f b7 56 ce
-    ; asm: movswl 50(%ecx), %edi
-    [-,%rdi]            v114 = sload16.i32 v1+50        ; bin: heap_oob 0f bf 79 32
-    ; asm: movswl -50(%esi), %edx
-    [-,%rdx]            v115 = sload16.i32 v2-50        ; bin: heap_oob 0f bf 56 ce
-    ; asm: movzbl 50(%ecx), %edi
-    [-,%rdi]            v116 = uload8.i32 v1+50         ; bin: heap_oob 0f b6 79 32
-    ; asm: movzbl -50(%esi), %edx
-    [-,%rdx]            v117 = uload8.i32 v2-50         ; bin: heap_oob 0f b6 56 ce
-    ; asm: movsbl 50(%ecx), %edi
-    [-,%rdi]            v118 = sload8.i32 v1+50         ; bin: heap_oob 0f be 79 32
-    ; asm: movsbl -50(%esi), %edx
-    [-,%rdx]            v119 = sload8.i32 v2-50         ; bin: heap_oob 0f be 56 ce
-
-    ; Register-indirect with 32-bit signed displacement.
-
-    ; asm: movl %ecx, 10000(%esi)
-    store v1, v2+10000                          ; bin: heap_oob 89 8e 00002710
-    ; asm: movl %esi, -10000(%ecx)
-    store v2, v1-10000                          ; bin: heap_oob 89 b1 ffffd8f0
-    ; asm: movw %cx, 10000(%esi)
-    istore16 v1, v2+10000                       ; bin: heap_oob 66 89 8e 00002710
-    ; asm: movw %si, -10000(%ecx)
-    istore16 v2, v1-10000                       ; bin: heap_oob 66 89 b1 ffffd8f0
-    ; asm: movb %cl, 10000(%esi)
-    istore8 v1, v2+10000                        ; bin: heap_oob 88 8e 00002710
-
-    ; asm: movl 50000(%ecx), %edi
-    [-,%rdi]            v120 = load.i32 v1+50000           ; bin: heap_oob 8b b9 0000c350
-    ; asm: movl -50000(%esi), %edx
-    [-,%rdx]            v121 = load.i32 v2-50000           ; bin: heap_oob 8b 96 ffff3cb0
-    ; asm: movzwl 50000(%ecx), %edi
-    [-,%rdi]            v122 = uload16.i32 v1+50000        ; bin: heap_oob 0f b7 b9 0000c350
-    ; asm: movzwl -50000(%esi), %edx
-    [-,%rdx]            v123 = uload16.i32 v2-50000        ; bin: heap_oob 0f b7 96 ffff3cb0
-    ; asm: movswl 50000(%ecx), %edi
-    [-,%rdi]            v124 = sload16.i32 v1+50000        ; bin: heap_oob 0f bf b9 0000c350
-    ; asm: movswl -50000(%esi), %edx
-    [-,%rdx]            v125 = sload16.i32 v2-50000        ; bin: heap_oob 0f bf 96 ffff3cb0
-    ; asm: movzbl 50000(%ecx), %edi
-    [-,%rdi]            v126 = uload8.i32 v1+50000         ; bin: heap_oob 0f b6 b9 0000c350
-    ; asm: movzbl -50000(%esi), %edx
-    [-,%rdx]            v127 = uload8.i32 v2-50000         ; bin: heap_oob 0f b6 96 ffff3cb0
-    ; asm: movsbl 50000(%ecx), %edi
-    [-,%rdi]            v128 = sload8.i32 v1+50000         ; bin: heap_oob 0f be b9 0000c350
-    ; asm: movsbl -50000(%esi), %edx
-    [-,%rdx]            v129 = sload8.i32 v2-50000         ; bin: heap_oob 0f be 96 ffff3cb0
-
-    ; Bit-counting instructions.
-
-    ; asm: popcntl %esi, %ecx
-    [-,%rcx]            v200 = popcnt v2        ; bin: f3 0f b8 ce
-    ; asm: popcntl %ecx, %esi
-    [-,%rsi]            v201 = popcnt v1        ; bin: f3 0f b8 f1
-
-    ; asm: lzcntl %esi, %ecx
-    [-,%rcx]            v202 = clz v2           ; bin: f3 0f bd ce
-    ; asm: lzcntl %ecx, %esi
-    [-,%rsi]            v203 = clz v1           ; bin: f3 0f bd f1
-
-    ; asm: tzcntl %esi, %ecx
-    [-,%rcx]            v204 = ctz v2           ; bin: f3 0f bc ce
-    ; asm: tzcntl %ecx, %esi
-    [-,%rsi]            v205 = ctz v1           ; bin: f3 0f bc f1
-
-    ; Integer comparisons.
-
-    ; asm: cmpl %esi, %ecx
-    ; asm: sete %bl
-    [-,%rbx]            v300 = icmp eq v1, v2   ; bin: 39 f1 0f 94 c3
-    ; asm: cmpl %ecx, %esi
-    ; asm: sete %dl
-    [-,%rdx]            v301 = icmp eq v2, v1   ; bin: 39 ce 0f 94 c2
-
-    ; asm: cmpl %esi, %ecx
-    ; asm: setne %bl
-    [-,%rbx]            v302 = icmp ne v1, v2   ; bin: 39 f1 0f 95 c3
-    ; asm: cmpl %ecx, %esi
-    ; asm: setne %dl
-    [-,%rdx]            v303 = icmp ne v2, v1   ; bin: 39 ce 0f 95 c2
-
-    ; asm: cmpl %esi, %ecx
-    ; asm: setl %bl
-    [-,%rbx]            v304 = icmp slt v1, v2  ; bin: 39 f1 0f 9c c3
-    ; asm: cmpl %ecx, %esi
-    ; asm: setl %dl
-    [-,%rdx]            v305 = icmp slt v2, v1  ; bin: 39 ce 0f 9c c2
-
-    ; asm: cmpl %esi, %ecx
-    ; asm: setge %bl
-    [-,%rbx]            v306 = icmp sge v1, v2  ; bin: 39 f1 0f 9d c3
-    ; asm: cmpl %ecx, %esi
-    ; asm: setge %dl
-    [-,%rdx]            v307 = icmp sge v2, v1  ; bin: 39 ce 0f 9d c2
-
-    ; asm: cmpl %esi, %ecx
-    ; asm: setg %bl
-    [-,%rbx]            v308 = icmp sgt v1, v2  ; bin: 39 f1 0f 9f c3
-    ; asm: cmpl %ecx, %esi
-    ; asm: setg %dl
-    [-,%rdx]            v309 = icmp sgt v2, v1  ; bin: 39 ce 0f 9f c2
-
-    ; asm: cmpl %esi, %ecx
-    ; asm: setle %bl
-    [-,%rbx]            v310 = icmp sle v1, v2  ; bin: 39 f1 0f 9e c3
-    ; asm: cmpl %ecx, %esi
-    ; asm: setle %dl
-    [-,%rdx]            v311 = icmp sle v2, v1  ; bin: 39 ce 0f 9e c2
-
-    ; asm: cmpl %esi, %ecx
-    ; asm: setb %bl
-    [-,%rbx]            v312 = icmp ult v1, v2  ; bin: 39 f1 0f 92 c3
-    ; asm: cmpl %ecx, %esi
-    ; asm: setb %dl
-    [-,%rdx]            v313 = icmp ult v2, v1  ; bin: 39 ce 0f 92 c2
-
-    ; asm: cmpl %esi, %ecx
-    ; asm: setae %bl
-    [-,%rbx]            v314 = icmp uge v1, v2  ; bin: 39 f1 0f 93 c3
-    ; asm: cmpl %ecx, %esi
-    ; asm: setae %dl
-    [-,%rdx]            v315 = icmp uge v2, v1  ; bin: 39 ce 0f 93 c2
-
-    ; asm: cmpl %esi, %ecx
-    ; asm: seta %bl
-    [-,%rbx]            v316 = icmp ugt v1, v2  ; bin: 39 f1 0f 97 c3
-    ; asm: cmpl %ecx, %esi
-    ; asm: seta %dl
-    [-,%rdx]            v317 = icmp ugt v2, v1  ; bin: 39 ce 0f 97 c2
-
-    ; asm: cmpl %esi, %ecx
-    ; asm: setbe %bl
-    [-,%rbx]            v318 = icmp ule v1, v2  ; bin: 39 f1 0f 96 c3
-    ; asm: cmpl %ecx, %esi
-    ; asm: setbe %dl
-    [-,%rdx]            v319 = icmp ule v2, v1  ; bin: 39 ce 0f 96 c2
-
-    ; Bool-to-int conversions.
-
-    ; asm: movzbl %bl, %ecx
-    [-,%rcx]             v350 = bint.i32 v300   ; bin: 0f b6 cb
-    ; asm: movzbl %dl, %esi
-    [-,%rsi]             v351 = bint.i32 v301   ; bin: 0f b6 f2
-
-    ; asm: call foo
-    call fn0()                                  ; bin: stk_ovf e8 CallPCRel4(%foo-4) 00000000
-
-    ; asm: movl $0, %ecx
-    [-,%rcx]            v400 = func_addr.i32 fn0        ; bin: b9 Abs4(%foo) 00000000
-    ; asm: movl $0, %esi
-    [-,%rsi]            v401 = func_addr.i32 fn0        ; bin: be Abs4(%foo) 00000000
-
-    ; asm: call *%ecx
-    call_indirect sig0, v400()                  ; bin: stk_ovf ff d1
-    ; asm: call *%esi
-    call_indirect sig0, v401()                  ; bin: stk_ovf ff d6
-
-    ; asm: movl $0, %ecx
-    [-,%rcx]            v450 = symbol_value.i32 gv0    ; bin: b9 Abs4(%some_gv) 00000000
-    ; asm: movl $0, %esi
-    [-,%rsi]            v451 = symbol_value.i32 gv0    ; bin: be Abs4(%some_gv) 00000000
-
-    ; Spill / Fill.
-
-    ; asm: movl %ecx, 1032(%esp)
-    [-,ss1]             v500 = spill v1         ; bin: stk_ovf 89 8c 24 00000408
-    ; asm: movl %esi, 1032(%esp)
-    [-,ss1]             v501 = spill v2         ; bin: stk_ovf 89 b4 24 00000408
-
-    ; asm: movl 1032(%esp), %ecx
-    [-,%rcx]            v510 = fill v500        ; bin: 8b 8c 24 00000408
-    ; asm: movl 1032(%esp), %esi
-    [-,%rsi]            v511 = fill v501        ; bin: 8b b4 24 00000408
-
-    ; asm: movl %ecx, 1032(%esp)
-    regspill v1, %rcx -> ss1                    ; bin: stk_ovf 89 8c 24 00000408
-    ; asm: movl 1032(%esp), %ecx
-    regfill v1, ss1 -> %rcx                     ; bin: 8b 8c 24 00000408
-
-    ; Push and Pop
-    ; asm: pushl %ecx
-    x86_push v1                                 ; bin: stk_ovf 51
-    ; asm: popl %ecx
-    [-,%rcx]            v512 = x86_pop.i32      ; bin: 59
-
-    ; Adjust Stack Pointer Up
-    ; asm: addl $64, %esp
-    adjust_sp_up_imm 64                         ; bin: 83 c4 40
-    ; asm: addl $-64, %esp
-    adjust_sp_up_imm -64                        ; bin: 83 c4 c0
-    ; asm: addl $1024, %esp
-    adjust_sp_up_imm 1024                       ; bin: 81 c4 00000400
-    ; asm: addl $-1024, %esp
-    adjust_sp_up_imm -1024                      ; bin: 81 c4 fffffc00
-    ; asm: addl $2147483647, %esp
-    adjust_sp_up_imm 2147483647                 ; bin: 81 c4 7fffffff
-    ; asm: addl $-2147483648, %esp
-    adjust_sp_up_imm -2147483648                ; bin: 81 c4 80000000
-
-    ; Adjust Stack Pointer Down
-    ; asm: subl %ecx, %esp
-    adjust_sp_down v1                           ; bin: 29 cc
-    ; asm: subl %esi, %esp
-    adjust_sp_down v2                           ; bin: 29 f4
-    ; asm: addl $64, %esp
-    adjust_sp_down_imm 64                       ; bin: 83 ec 40
-    ; asm: addl $-64, %esp
-    adjust_sp_down_imm -64                      ; bin: 83 ec c0
-    ; asm: addl $1024, %esp
-    adjust_sp_down_imm 1024                     ; bin: 81 ec 00000400
-    ; asm: addl $-1024, %esp
-    adjust_sp_down_imm -1024                    ; bin: 81 ec fffffc00
-    ; asm: addl $2147483647, %esp
-    adjust_sp_down_imm 2147483647               ; bin: 81 ec 7fffffff
-    ; asm: addl $-2147483648, %esp
-    adjust_sp_down_imm -2147483648              ; bin: 81 ec 80000000
-
-    ; Shift immediates
-    ; asm: shll $2, %esi
-    [-,%rsi]             v513 = ishl_imm v2, 2    ; bin: c1 e6 02
-    ; asm: sarl $5, %esi
-    [-,%rsi]             v514 = sshr_imm v2, 5    ; bin: c1 fe 05
-    ; asm: shrl $8, %esi
-    [-,%rsi]             v515 = ushr_imm v2, 8    ; bin: c1 ee 08
-
-    ; Rotate immediates
-    ; asm: rolq $12, %esi
-    [-,%rsi]             v5101 = rotl_imm v2, 12   ; bin: c1 c6 0c
-    ; asm: rorq $5, %esi
-    [-,%rsi]             v5103 = rotr_imm v2, 5    ; bin: c1 ce 05
-
-    ; Load Complex
-    [-,%rax]            v521 = iconst.i32 1
-    [-,%rbx]            v522 = iconst.i32 1
-    ; asm: movl (%eax,%ebx,1), %ecx
-    [-,%rcx]            v526 = load_complex.i32 v521+v522         ; bin: heap_oob 8b 0c 18
-    ; asm: movl 1(%eax,%ebx,1), %ecx
-    [-,%rcx]            v528 = load_complex.i32 v521+v522+1       ; bin: heap_oob 8b 4c 18 01
-    ; asm: mov    0x100000(%eax,%ebx,1),%ecx
-    [-,%rcx]            v530 = load_complex.i32 v521+v522+0x1000  ; bin: heap_oob 8b 8c 18 00001000
-    ; asm: movzbl (%eax,%ebx,1),%ecx
-    [-,%rcx]            v532 = uload8_complex.i32 v521+v522         ; bin: heap_oob 0f b6 0c 18
-    ; asm: movsbl (%eax,%ebx,1),%ecx
-    [-,%rcx]            v534 = sload8_complex.i32 v521+v522         ; bin: heap_oob 0f be 0c 18
-    ; asm: movzwl (%eax,%ebx,1),%ecx
-    [-,%rcx]            v536 = uload16_complex.i32 v521+v522         ; bin: heap_oob 0f b7 0c 18
-    ; asm: movswl (%eax,%ebx,1),%ecx
-    [-,%rcx]            v538 = sload16_complex.i32 v521+v522         ; bin: heap_oob 0f bf 0c 18
-
-    ; Store Complex
-    [-,%rcx]            v601 = iconst.i32 1
-    ; asm: mov    %ecx,(%eax,%ebx,1)
-    store_complex v601, v521+v522        ; bin: heap_oob 89 0c 18
-    ; asm: mov    %ecx,0x1(%eax,%ebx,1)
-    store_complex v601, v521+v522+1      ; bin: heap_oob 89 4c 18 01
-    ; asm: mov    %ecx,0x100000(%eax,%ebx,1)
-    store_complex v601, v521+v522+0x1000 ; bin: heap_oob 89 8c 18 00001000
-    ; asm: mov    %cx,(%eax,%ebx,1)
-    istore16_complex v601, v521+v522     ; bin: heap_oob 66 89 0c 18
-    ; asm: mov    %cl,(%eax,%ebx,1)
-    istore8_complex v601, v521+v522      ; bin: heap_oob 88 0c 18
-
-    ; Carry Addition
-    ; asm: addl %esi, %ecx
-    [-,%rcx,%rflags] v701, v702 = iadd_ifcout v1, v2  ; bin: 01 f1
-    ; asm: adcl %esi, %ecx
-    [-,%rcx] v703 = iadd_ifcin v1, v2, v702           ; bin: 11 f1
-    ; asm: adcl %esi, %ecx
-    [-,%rcx,%rflags] v704, v705 = iadd_ifcarry v1, v2, v702 ; bin: 11 f1
-
-    ; Borrow Subtraction
-    ; asm: subl %esi, %ecx
-    [-,%rcx,%rflags] v706, v707 = isub_ifbout v1, v2         ; bin: 29 f1
-    ; asm: sbbl %esi, %ecx
-    [-,%rcx] v708 = isub_ifbin v1, v2, v707                  ; bin: 19 f1
-    ; asm: sbbl %esi, %ecx
-    [-,%rcx,%rflags] v709, v710 = isub_ifborrow v1, v2, v707 ; bin: 19 f1
-
-    ; asm: testl %ecx, %ecx
-    ; asm: je block1
-    brz v1, block1                                ; bin: 85 c9 74 0e
-    fallthrough block3
-
-block3:
-    ; asm: testl %esi, %esi
-    ; asm: je block1
-    brz v2, block1                                ; bin: 85 f6 74 0a
-    fallthrough block4
-
-block4:
-    ; asm: testl %ecx, %ecx
-    ; asm: jne block1
-    brnz v1, block1                               ; bin: 85 c9 75 06
-    fallthrough block5
-
-block5:
-    ; asm: testl %esi, %esi
-    ; asm: jne block1
-    brnz v2, block1                               ; bin: 85 f6 75 02
-
-    ; asm: jmp block2
-    jump block2                                   ; bin: eb 01
-
-    ; asm: block1:
-block1:
-    ; asm: ret
-    return                                      ; bin: c3
-
-    ; asm: block2:
-block2:
-    trap user0                                  ; bin: user0 0f 0b
-}
-
-; Special branch encodings only for I32 mode.
-function %special_branches() {
-block0:
-    [-,%rcx]            v1 = iconst.i32 1
-    [-,%rsi]            v2 = iconst.i32 2
-    [-,%rdi]            v3 = icmp eq v1, v2
-    [-,%rbx]            v4 = icmp ugt v1, v2
-
-    ; asm: testl $0xff, %edi
-    ; asm: je block1
-    brz v3, block1                                ; bin: f7 c7 000000ff 0f 84 00000015
-    fallthrough block2
-
-block2:
-    ; asm: testb %bl, %bl
-    ; asm: je block1
-    brz v4, block1                                ; bin: 84 db 74 11
-    fallthrough block3
-
-block3:
-    ; asm: testl $0xff, %edi
-    ; asm: jne block1
-    brnz v3, block1                               ; bin: f7 c7 000000ff 0f 85 00000005
-    fallthrough block4
-
-block4:
-    ; asm: testb %bl, %bl
-    ; asm: jne block1
-    brnz v4, block1                               ; bin: 84 db 75 01
-    fallthrough block5
-
-block5:
-    return
-
-block1:
-    return
-}
-
-; CPU flag instructions.
-function %cpu_flags() {
-block0:
-    [-,%rcx]            v1 = iconst.i32 1
-    [-,%rsi]            v2 = iconst.i32 2
-    jump block1
-
-block1:
-    ; asm: cmpl %esi, %ecx
-    [-,%rflags]         v10 = ifcmp v1, v2      ; bin: 39 f1
-    ; asm: cmpl %ecx, %esi
-    [-,%rflags]         v11 = ifcmp v2, v1      ; bin: 39 ce
-
-    ; asm: je block1
-    brif eq v11, block1                           ; bin: 74 fa
-    jump block2
-
-block2:
-    ; asm: jne block1
-    brif ne v11, block1                           ; bin: 75 f8
-    jump block3
-
-block3:
-    ; asm: jl block1
-    brif slt v11, block1                          ; bin: 7c f6
-    jump block4
-
-block4:
-    ; asm: jge block1
-    brif sge v11, block1                          ; bin: 7d f4
-    jump block5
-
-block5:
-    ; asm: jg block1
-    brif sgt v11, block1                          ; bin: 7f f2
-    jump block6
-
-block6:
-    ; asm: jle block1
-    brif sle v11, block1                          ; bin: 7e f0
-    jump block7
-
-block7:
-    ; asm: jb block1
-    brif ult v11, block1                          ; bin: 72 ee
-    jump block8
-
-block8:
-    ; asm: jae block1
-    brif uge v11, block1                          ; bin: 73 ec
-    jump block9
-
-block9:
-    ; asm: ja block1
-    brif ugt v11, block1                          ; bin: 77 ea
-    jump block10
-
-block10:
-    ; asm: jbe block1
-    brif ule v11, block1                          ; bin: 76 e8
-    jump block11
-
-block11:
-
-    ; asm: sete %bl
-    [-,%rbx]            v20 = trueif eq v11                           ; bin: 0f 94 c3
-    ; asm: setne %bl
-    [-,%rbx]            v21 = trueif ne v11                           ; bin: 0f 95 c3
-    ; asm: setl %dl
-    [-,%rdx]            v22 = trueif slt v11                          ; bin: 0f 9c c2
-    ; asm: setge %dl
-    [-,%rdx]            v23 = trueif sge v11                          ; bin: 0f 9d c2
-    ; asm: setg %bl
-    [-,%rbx]            v24 = trueif sgt v11                          ; bin: 0f 9f c3
-    ; asm: setle %bl
-    [-,%rbx]            v25 = trueif sle v11                          ; bin: 0f 9e c3
-    ; asm: setb %dl
-    [-,%rdx]            v26 = trueif ult v11                          ; bin: 0f 92 c2
-    ; asm: setae %dl
-    [-,%rdx]            v27 = trueif uge v11                          ; bin: 0f 93 c2
-    ; asm: seta %bl
-    [-,%rbx]            v28 = trueif ugt v11                          ; bin: 0f 97 c3
-    ; asm: setbe %bl
-    [-,%rbx]            v29 = trueif ule v11                          ; bin: 0f 96 c3
-
-    ; The trapif instructions are encoded as macros: a conditional jump over a ud2.
-    ; asm: jne .+4; ud2
-    trapif eq v11, user0                           ; bin: 75 02 user0 0f 0b
-    ; asm: je .+4; ud2
-    trapif ne v11, user0                           ; bin: 74 02 user0 0f 0b
-    ; asm: jnl .+4; ud2
-    trapif slt v11, user0                          ; bin: 7d 02 user0 0f 0b
-    ; asm: jnge .+4; ud2
-    trapif sge v11, user0                          ; bin: 7c 02 user0 0f 0b
-    ; asm: jng .+4; ud2
-    trapif sgt v11, user0                          ; bin: 7e 02 user0 0f 0b
-    ; asm: jnle .+4; ud2
-    trapif sle v11, user0                          ; bin: 7f 02 user0 0f 0b
-    ; asm: jnb .+4; ud2
-    trapif ult v11, user0                          ; bin: 73 02 user0 0f 0b
-    ; asm: jnae .+4; ud2
-    trapif uge v11, user0                          ; bin: 72 02 user0 0f 0b
-    ; asm: jna .+4; ud2
-    trapif ugt v11, user0                          ; bin: 76 02 user0 0f 0b
-    ; asm: jnbe .+4; ud2
-    trapif ule v11, user0                          ; bin: 77 02 user0 0f 0b
-    ; asm: jo .+4; ud2
-    trapif of v11, user0                          ; bin: 71 02 user0 0f 0b
-    ; asm: jno .+4; ud2
-    trapif nof v11, user0                          ; bin: 70 02 user0 0f 0b
-
-    ; Stack check.
-    ; asm: cmpl %esp, %ecx
-    [-,%rflags]         v40 = ifcmp_sp v1       ; bin: 39 e1
-    ; asm: cmpl %esp, %esi
-    [-,%rflags]         v41 = ifcmp_sp v2       ; bin: 39 e6
-
-    ; asm: cmpl $-100, %ecx
-    [-,%rflags]         v42 = ifcmp_imm v1, -100   ; bin: 83 f9 9c
-    ; asm: cmpl $100, %esi
-    [-,%rflags]         v43 = ifcmp_imm v2, 100    ; bin: 83 fe 64
-
-    ; asm: cmpl $-10000, %ecx
-    [-,%rflags]         v44 = ifcmp_imm v1, -10000 ; bin: 81 f9 ffffd8f0
-    ; asm: cmpl $10000, %esi
-    [-,%rflags]         v45 = ifcmp_imm v2, 10000  ; bin: 81 fe 00002710
-
-    return
-}
-
-; Tests for i32/i8 conversion instructions.
-function %I32_I8() {
-block0:
-    [-,%rcx]            v1 = iconst.i32 1
-
-    [-,%rcx]            v11 = ireduce.i8 v1             ; bin:
-
-    ; asm: movsbl %cl, %esi
-    [-,%rsi]            v20 = sextend.i32 v11           ; bin: 0f be f1
-
-    ; asm: movzbl %cl, %esi
-    [-,%rsi]            v30 = uextend.i32 v11           ; bin: 0f b6 f1
-
-    trap user0                                          ; bin: user0 0f 0b
-}
-
-; Tests for i32/i16 conversion instructions.
-function %I32_I16() {
-block0:
-    [-,%rcx]            v1 = iconst.i32 1
-
-    [-,%rcx]            v11 = ireduce.i16 v1            ; bin:
-
-    ; asm: movswl %cx, %esi
-    [-,%rsi]            v20 = sextend.i32 v11           ; bin: 0f bf f1
-
-    ; asm: movzwl %cx, %esi
-    [-,%rsi]            v30 = uextend.i32 v11           ; bin: 0f b7 f1
-
-    trap user0                                          ; bin: user0 0f 0b
-}
diff --git a/cranelift/filetests/filetests/isa/x86/binary64-float.clif b/cranelift/filetests/filetests/isa/x86/binary64-float.clif
deleted file mode 100644
index 6bf6f325b1..0000000000
--- a/cranelift/filetests/filetests/isa/x86/binary64-float.clif
+++ /dev/null
@@ -1,638 +0,0 @@
-; Binary emission of 64-bit floating point code.
-test binemit
-set opt_level=speed_and_size
-target x86_64 legacy haswell
-
-; The binary encodings can be verified with the command:
-;
-;   sed -ne 's/^ *; asm: *//p' filetests/isa/x86/binary64-float.clif | llvm-mc -show-encoding -triple=x86_64
-;
-
-function %F32() {
-    ss0 = incoming_arg 8, offset 0
-    ss1 = incoming_arg 1024, offset -1024
-    ss2 = incoming_arg 1024, offset -2048
-    ss3 = incoming_arg 8, offset -2056
-
-block0:
-    [-,%r11]            v0 = iconst.i32 1
-    [-,%rsi]            v1 = iconst.i32 2
-    [-,%rax]            v2 = iconst.i64 11
-    [-,%r14]            v3 = iconst.i64 12
-    [-,%r13]            v4 = iconst.i64 13
-
-    ; asm: cvtsi2ssl %r11d, %xmm5
-    [-,%xmm5]           v10 = fcvt_from_sint.f32 v0             ; bin: f3 41 0f 2a eb
-    ; asm: cvtsi2ssl %esi, %xmm10
-    [-,%xmm10]          v11 = fcvt_from_sint.f32 v1             ; bin: f3 44 0f 2a d6
-
-    ; asm: cvtsi2ssq %rax, %xmm5
-    [-,%xmm5]           v12 = fcvt_from_sint.f32 v2             ; bin: f3 48 0f 2a e8
-    ; asm: cvtsi2ssq %r14, %xmm10
-    [-,%xmm10]          v13 = fcvt_from_sint.f32 v3             ; bin: f3 4d 0f 2a d6
-
-    ; asm: cvtss2sd %xmm10, %xmm5
-    [-,%xmm5]           v14 = fpromote.f64 v11                  ; bin: f3 41 0f 5a ea
-    ; asm: cvtss2sd %xmm5, %xmm10
-    [-,%xmm10]          v15 = fpromote.f64 v10                  ; bin: f3 44 0f 5a d5
-
-    ; asm: movd %r11d, %xmm5
-    [-,%xmm5]           v16 = bitcast.f32 v0                    ; bin: 66 41 0f 6e eb
-    ; asm: movd %esi, %xmm10
-    [-,%xmm10]          v17 = bitcast.f32 v1                    ; bin: 66 44 0f 6e d6
-
-    ; asm: movd %xmm5, %ecx
-    [-,%rcx]            v18 = bitcast.i32 v10                   ; bin: 66 0f 7e e9
-    ; asm: movd %xmm10, %esi
-    [-,%rsi]            v19 = bitcast.i32 v11                   ; bin: 66 44 0f 7e d6
-
-    ; Binary arithmetic.
-
-    ; asm: addss %xmm10, %xmm5
-    [-,%xmm5]           v20 = fadd v10, v11                     ; bin: f3 41 0f 58 ea
-    ; asm: addss %xmm5, %xmm10
-    [-,%xmm10]          v21 = fadd v11, v10                     ; bin: f3 44 0f 58 d5
-
-    ; asm: subss %xmm10, %xmm5
-    [-,%xmm5]           v22 = fsub v10, v11                     ; bin: f3 41 0f 5c ea
-    ; asm: subss %xmm5, %xmm10
-    [-,%xmm10]          v23 = fsub v11, v10                     ; bin: f3 44 0f 5c d5
-
-    ; asm: mulss %xmm10, %xmm5
-    [-,%xmm5]           v24 = fmul v10, v11                     ; bin: f3 41 0f 59 ea
-    ; asm: mulss %xmm5, %xmm10
-    [-,%xmm10]          v25 = fmul v11, v10                     ; bin: f3 44 0f 59 d5
-
-    ; asm: divss %xmm10, %xmm5
-    [-,%xmm5]           v26 = fdiv v10, v11                     ; bin: f3 41 0f 5e ea
-    ; asm: divss %xmm5, %xmm10
-    [-,%xmm10]          v27 = fdiv v11, v10                     ; bin: f3 44 0f 5e d5
-
-    ; Bitwise ops.
-    ; We use the *ps SSE instructions for everything because they are smaller.
-
-    ; asm: andps %xmm10, %xmm5
-    [-,%xmm5]           v30 = band v10, v11                     ; bin: 41 0f 54 ea
-    ; asm: andps %xmm5, %xmm10
-    [-,%xmm10]          v31 = band v11, v10                     ; bin: 44 0f 54 d5
-
-    ; asm: andnps %xmm10, %xmm5
-    [-,%xmm5]           v32 = band_not v11, v10                 ; bin: 41 0f 55 ea
-    ; asm: andnps %xmm5, %xmm10
-    [-,%xmm10]          v33 = band_not v10, v11                 ; bin: 44 0f 55 d5
-
-    ; asm: orps %xmm10, %xmm5
-    [-,%xmm5]           v34 = bor v10, v11                      ; bin: 41 0f 56 ea
-    ; asm: orps %xmm5, %xmm10
-    [-,%xmm10]          v35 = bor v11, v10                      ; bin: 44 0f 56 d5
-
-    ; asm: xorps %xmm10, %xmm5
-    [-,%xmm5]           v36 = bxor v10, v11                     ; bin: 41 0f 57 ea
-    ; asm: xorps %xmm5, %xmm10
-    [-,%xmm10]          v37 = bxor v11, v10                     ; bin: 44 0f 57 d5
-
-    ; asm: movaps %xmm10, %xmm5
-    [-,%xmm5]           v38 = copy v11                          ; bin: 41 0f 28 ea
-    ; asm: movaps %xmm5, %xmm10
-    [-,%xmm10]          v39 = copy v10                          ; bin: 44 0f 28 d5
-
-    ; Copy to SSA
-
-    ; asm: movsd %xmm0, %xmm15
-    [-,%xmm15]          v400 = copy_to_ssa.f64 %xmm0            ; bin: f2 44 0f 10 f8
-    ; asm: movsd %xmm15, %xmm0
-    [-,%xmm0]           v401 = copy_to_ssa.f64 %xmm15           ; bin: f2 41 0f 10 c7
-    ; asm: movsd %xmm7, %xmm6.  Unfortunately we get a redundant REX prefix.
-    [-,%xmm6]           v402 = copy_to_ssa.f64 %xmm7            ; bin: f2 40 0f 10 f7
-    ; asm: movsd %xmm11, %xmm14
-    [-,%xmm14]          v403 = copy_to_ssa.f64 %xmm11           ; bin: f2 45 0f 10 f3
-
-    ; asm: movss %xmm0, %xmm15
-    [-,%xmm15]          v404 = copy_to_ssa.f32 %xmm0            ; bin: f3 44 0f 10 f8
-    ; asm: movss %xmm15, %xmm0
-    [-,%xmm0]           v405 = copy_to_ssa.f32 %xmm15           ; bin: f3 41 0f 10 c7
-    ; asm: movss %xmm7, %xmm6.  Unfortunately we get a redundant REX prefix.
-    [-,%xmm6]           v406 = copy_to_ssa.f32 %xmm7            ; bin: f3 40 0f 10 f7
-    ; asm: movss %xmm11, %xmm14
-    [-,%xmm14]          v407 = copy_to_ssa.f32 %xmm11           ; bin: f3 45 0f 10 f3
-
-    ; Convert float to int.
-
-    ; asm: cvttss2si %xmm5, %ecx
-    [-,%rcx]            v40 = x86_cvtt2si.i32 v10               ; bin: f3 0f 2c cd
-    ; asm: cvttss2si %xmm10, %esi
-    [-,%rsi]            v41 = x86_cvtt2si.i32 v11               ; bin: f3 41 0f 2c f2
-
-    ; asm: cvttss2si %xmm5, %rcx
-    [-,%rcx]            v42 = x86_cvtt2si.i64 v10               ; bin: f3 48 0f 2c cd
-    ; asm: cvttss2si %xmm10, %rsi
-    [-,%rsi]            v43 = x86_cvtt2si.i64 v11               ; bin: f3 49 0f 2c f2
-
-    ; Min/max.
-
-    ; asm: minss %xmm10, %xmm5
-    [-,%xmm5]           v44 = x86_fmin v10, v11                 ; bin: f3 41 0f 5d ea
-    ; asm: minss %xmm5, %xmm10
-    [-,%xmm10]          v45 = x86_fmin v11, v10                 ; bin: f3 44 0f 5d d5
-    ; asm: maxss %xmm10, %xmm5
-    [-,%xmm5]           v46 = x86_fmax v10, v11                 ; bin: f3 41 0f 5f ea
-    ; asm: maxss %xmm5, %xmm10
-    [-,%xmm10]          v47 = x86_fmax v11, v10                 ; bin: f3 44 0f 5f d5
-
-    ; Unary arithmetic.
-
-    ; asm: sqrtss %xmm5, %xmm10
-    [-,%xmm10]          v50 = sqrt v10                          ; bin: f3 44 0f 51 d5
-    ; asm: sqrtss %xmm10, %xmm5
-    [-,%xmm5]           v51 = sqrt v11                          ; bin: f3 41 0f 51 ea
-
-    ; asm: roundss $0, %xmm5, %xmm10
-    [-,%xmm10]          v52 = nearest v10                       ; bin: 66 44 0f 3a 0a d5 00
-    ; asm: roundss $0, %xmm10, %xmm5
-    [-,%xmm5]           v53 = nearest v11                       ; bin: 66 41 0f 3a 0a ea 00
-    ; asm: roundss $0, %xmm5, %xmm2
-    [-,%xmm2]           v54 = nearest v10                       ; bin: 66 0f 3a 0a d5 00
-
-    ; asm: roundss $1, %xmm5, %xmm10
-    [-,%xmm10]          v55 = floor v10                         ; bin: 66 44 0f 3a 0a d5 01
-    ; asm: roundss $1, %xmm10, %xmm5
-    [-,%xmm5]           v56 = floor v11                         ; bin: 66 41 0f 3a 0a ea 01
-    ; asm: roundss $1, %xmm5, %xmm2
-    [-,%xmm2]           v57 = floor v10                         ; bin: 66 0f 3a 0a d5 01
-
-    ; asm: roundss $2, %xmm5, %xmm10
-    [-,%xmm10]          v58 = ceil v10                          ; bin: 66 44 0f 3a 0a d5 02
-    ; asm: roundss $2, %xmm10, %xmm5
-    [-,%xmm5]           v59 = ceil v11                          ; bin: 66 41 0f 3a 0a ea 02
-    ; asm: roundss $2, %xmm5, %xmm2
-    [-,%xmm2]           v60 = ceil v10                          ; bin: 66 0f 3a 0a d5 02
-
-    ; asm: roundss $3, %xmm5, %xmm10
-    [-,%xmm10]          v61 = trunc v10                         ; bin: 66 44 0f 3a 0a d5 03
-    ; asm: roundss $3, %xmm10, %xmm5
-    [-,%xmm5]           v62 = trunc v11                         ; bin: 66 41 0f 3a 0a ea 03
-    ; asm: roundss $3, %xmm5, %xmm2
-    [-,%xmm2]           v63 = trunc v10                         ; bin: 66 0f 3a 0a d5 03
-
-    ; Load/Store
-
-    ; asm: movss (%r14), %xmm5
-    [-,%xmm5]           v100 = load.f32 v3                      ; bin: heap_oob f3 41 0f 10 2e
-    ; asm: movss (%rax), %xmm10
-    [-,%xmm10]          v101 = load.f32 v2                      ; bin: heap_oob f3 44 0f 10 10
-    ; asm: movss 50(%r14), %xmm5
-    [-,%xmm5]           v110 = load.f32 v3+50                   ; bin: heap_oob f3 41 0f 10 6e 32
-    ; asm: movss -50(%rax), %xmm10
-    [-,%xmm10]          v111 = load.f32 v2-50                   ; bin: heap_oob f3 44 0f 10 50 ce
-    ; asm: movss 10000(%r14), %xmm5
-    [-,%xmm5]           v120 = load.f32 v3+10000                ; bin: heap_oob f3 41 0f 10 ae 00002710
-    ; asm: movss -10000(%rax), %xmm10
-    [-,%xmm10]          v121 = load.f32 v2-10000                ; bin: heap_oob f3 44 0f 10 90 ffffd8f0
-
-    ; asm: movss %xmm5, (%r14)
-    [-]                 store.f32 v100, v3                      ; bin: heap_oob f3 41 0f 11 2e
-    ; asm: movss %xmm10, (%rax)
-    [-]                 store.f32 v101, v2                      ; bin: heap_oob f3 44 0f 11 10
-    ; asm: movss %xmm5, (%r13)
-    [-]                 store.f32 v100, v4                      ; bin: heap_oob f3 41 0f 11 6d 00
-    ; asm: movss %xmm10, (%r13)
-    [-]                 store.f32 v101, v4                      ; bin: heap_oob f3 45 0f 11 55 00
-    ; asm: movss %xmm5, 50(%r14)
-    [-]                 store.f32 v100, v3+50                   ; bin: heap_oob f3 41 0f 11 6e 32
-    ; asm: movss %xmm10, -50(%rax)
-    [-]                 store.f32 v101, v2-50                   ; bin: heap_oob f3 44 0f 11 50 ce
-    ; asm: movss %xmm5, 10000(%r14)
-    [-]                 store.f32 v100, v3+10000                ; bin: heap_oob f3 41 0f 11 ae 00002710
-    ; asm: movss %xmm10, -10000(%rax)
-    [-]                 store.f32 v101, v2-10000                ; bin: heap_oob f3 44 0f 11 90 ffffd8f0
-
-    ; Spill / Fill.
-
-    ; asm: movss %xmm5, 1032(%rsp)
-    [-,ss1]             v200 = spill v100                       ; bin: stk_ovf f3 0f 11 ac 24 00000408
-    ; asm: movss %xmm10, 1032(%rsp)
-    [-,ss1]             v201 = spill v101                       ; bin: stk_ovf f3 44 0f 11 94 24 00000408
-
-    ; asm: movss 1032(%rsp), %xmm5
-    [-,%xmm5]           v210 = fill v200                        ; bin: f3 0f 10 ac 24 00000408
-    ; asm: movss 1032(%rsp), %xmm10
-    [-,%xmm10]          v211 = fill v201                        ; bin: f3 44 0f 10 94 24 00000408
-
-    ; asm: movss %xmm5, 1032(%rsp)
-    regspill v100, %xmm5 -> ss1                                 ; bin: stk_ovf f3 0f 11 ac 24 00000408
-    ; asm: movss 1032(%rsp), %xmm5
-    regfill v100, ss1 -> %xmm5                                  ; bin: f3 0f 10 ac 24 00000408
-
-    ; Comparisons.
-    ;
-    ; Only `supported_floatccs` are tested here. Others are handled by
-    ; legalization patterns.
-
-    ; asm: ucomiss %xmm10, %xmm5
-    ; asm: setnp %bl
-    [-,%rbx]            v300 = fcmp ord v10, v11                ; bin: 41 0f 2e ea 0f 9b c3
-    ; asm: ucomiss %xmm5, %xmm10
-    ; asm: setp %bl
-    [-,%rbx]            v301 = fcmp uno v11, v10                ; bin: 44 0f 2e d5 0f 9a c3
-    ; asm: ucomiss %xmm10, %xmm5
-    ; asm: setne %dl
-    [-,%rdx]            v302 = fcmp one v10, v11                ; bin: 41 0f 2e ea 0f 95 c2
-    ; asm: ucomiss %xmm5, %xmm10
-    ; asm: sete %dl
-    [-,%rdx]            v303 = fcmp ueq v11, v10                ; bin: 44 0f 2e d5 0f 94 c2
-    ; asm: ucomiss %xmm10, %xmm5
-    ; asm: seta %bl
-    [-,%rbx]            v304 = fcmp gt v10, v11                 ; bin: 41 0f 2e ea 0f 97 c3
-    ; asm: ucomiss %xmm5, %xmm10
-    ; asm: setae %bl
-    [-,%rbx]            v305 = fcmp ge v11, v10                 ; bin: 44 0f 2e d5 0f 93 c3
-    ; asm: ucomiss %xmm10, %xmm5
-    ; asm: setb %dl
-    [-,%rdx]            v306 = fcmp ult v10, v11                ; bin: 41 0f 2e ea 0f 92 c2
-    ; asm: ucomiss %xmm5, %xmm10
-    ; asm: setbe %dl
-    [-,%rdx]            v307 = fcmp ule v11, v10                ; bin: 44 0f 2e d5 0f 96 c2
-
-    ; asm: ucomiss %xmm10, %xmm5
-    [-,%rflags]         v310 = ffcmp v10, v11                   ; bin: 41 0f 2e ea
-    ; asm: ucomiss %xmm10, %xmm5
-    [-,%rflags]         v311 = ffcmp v11, v10                   ; bin: 44 0f 2e d5
-    ; asm: ucomiss %xmm5, %xmm5
-    [-,%rflags]         v312 = ffcmp v10, v10                   ; bin: 0f 2e ed
-
-
-    ; Load/Store Complex
-
-    [-,%rax]            v350 = iconst.i64 1
-    [-,%rbx]            v351 = iconst.i64 2
-
-    ; asm: movss  (%rax,%rbx,1),%xmm5
-    [-,%xmm5]           v352 = load_complex.f32 v350+v351               ; bin: heap_oob f3 0f 10 2c 18
-    ; asm: movss 0x32(%rax,%rbx,1),%xmm5
-    [-,%xmm5]           v353 = load_complex.f32 v350+v351+50            ; bin: heap_oob f3 0f 10 6c 18 32
-    ; asm: movss -0x32(%rax,%rbx,1),%xmm10
-    [-,%xmm10]          v354 = load_complex.f32 v350+v351-50            ; bin: heap_oob f3 44 0f 10 54 18 ce
-    ; asm: 0x2710(%rax,%rbx,1),%xmm5
-    [-,%xmm5]           v355 = load_complex.f32 v350+v351+10000         ; bin: heap_oob f3 0f 10 ac 18 00002710
-    ; asm: -0x2710(%rax,%rbx,1),%xmm10
-    [-,%xmm10]          v356 = load_complex.f32 v350+v351-10000         ; bin: heap_oob f3 44 0f 10 94 18 ffffd8f0
-
-    ; asm: movsd %xmm5, (%rax,%rbx,1)
-    [-]                 store_complex.f32 v100, v350+v351               ; bin: heap_oob f3 0f 11 2c 18
-    ; asm: movsd %xmm5, 50(%rax,%rbx,1)
-    [-]                 store_complex.f32 v100, v350+v351+50            ; bin: heap_oob f3 0f 11 6c 18 32
-    ; asm: movsd %xmm10, -50(%rax,%rbx,1)
-    [-]                 store_complex.f32 v101, v350+v351-50            ; bin: heap_oob f3 44 0f 11 54 18 ce
-    ; asm: movsd %xmm5, 10000(%rax,%rbx,1)
-    [-]                 store_complex.f32 v100, v350+v351+10000         ; bin: heap_oob f3 0f 11 ac 18 00002710
-    ; asm: movsd %xmm10, -10000(%rax,%rbx,1)
-    [-]                 store_complex.f32 v101, v350+v351-10000         ; bin: heap_oob f3 44 0f 11 94 18 ffffd8f0
-
-    return
-}
-
-function %F64() {
-    ss0 = incoming_arg 8, offset 0
-    ss1 = incoming_arg 1024, offset -1024
-    ss2 = incoming_arg 1024, offset -2048
-    ss3 = incoming_arg 8, offset -2056
-
-block0:
-    [-,%r11]            v0 = iconst.i32 1
-    [-,%rsi]            v1 = iconst.i32 2
-    [-,%rax]            v2 = iconst.i64 11
-    [-,%r14]            v3 = iconst.i64 12
-    [-,%r13]            v4 = iconst.i64 13
-
-    ; asm: cvtsi2sdl %r11d, %xmm5
-    [-,%xmm5]           v10 = fcvt_from_sint.f64 v0             ; bin: f2 41 0f 2a eb
-    ; asm: cvtsi2sdl %esi, %xmm10
-    [-,%xmm10]          v11 = fcvt_from_sint.f64 v1             ; bin: f2 44 0f 2a d6
-
-    ; asm: cvtsi2sdq %rax, %xmm5
-    [-,%xmm5]           v12 = fcvt_from_sint.f64 v2             ; bin: f2 48 0f 2a e8
-    ; asm: cvtsi2sdq %r14, %xmm10
-    [-,%xmm10]          v13 = fcvt_from_sint.f64 v3             ; bin: f2 4d 0f 2a d6
-
-    ; asm: cvtsd2ss %xmm10, %xmm5
-    [-,%xmm5]           v14 = fdemote.f32 v11                   ; bin: f2 41 0f 5a ea
-    ; asm: cvtsd2ss %xmm5, %xmm10
-    [-,%xmm10]          v15 = fdemote.f32 v10                   ; bin: f2 44 0f 5a d5
-
-    ; asm: movq %rax, %xmm5
-    [-,%xmm5]           v16 = bitcast.f64 v2                    ; bin: 66 48 0f 6e e8
-    ; asm: movq %r14, %xmm10
-    [-,%xmm10]          v17 = bitcast.f64 v3                    ; bin: 66 4d 0f 6e d6
-
-    ; asm: movq %xmm5, %rcx
-    [-,%rcx]            v18 = bitcast.i64 v10                   ; bin: 66 48 0f 7e e9
-    ; asm: movq %xmm10, %rsi
-    [-,%rsi]            v19 = bitcast.i64 v11                   ; bin: 66 4c 0f 7e d6
-
-    ; Binary arithmetic.
-
-    ; asm: addsd %xmm10, %xmm5
-    [-,%xmm5]           v20 = fadd v10, v11                     ; bin: f2 41 0f 58 ea
-    ; asm: addsd %xmm5, %xmm10
-    [-,%xmm10]          v21 = fadd v11, v10                     ; bin: f2 44 0f 58 d5
-
-    ; asm: subsd %xmm10, %xmm5
-    [-,%xmm5]           v22 = fsub v10, v11                     ; bin: f2 41 0f 5c ea
-    ; asm: subsd %xmm5, %xmm10
-    [-,%xmm10]          v23 = fsub v11, v10                     ; bin: f2 44 0f 5c d5
-
-    ; asm: mulsd %xmm10, %xmm5
-    [-,%xmm5]           v24 = fmul v10, v11                     ; bin: f2 41 0f 59 ea
-    ; asm: mulsd %xmm5, %xmm10
-    [-,%xmm10]          v25 = fmul v11, v10                     ; bin: f2 44 0f 59 d5
-
-    ; asm: divsd %xmm10, %xmm5
-    [-,%xmm5]           v26 = fdiv v10, v11                     ; bin: f2 41 0f 5e ea
-    ; asm: divsd %xmm5, %xmm10
-    [-,%xmm10]          v27 = fdiv v11, v10                     ; bin: f2 44 0f 5e d5
-
-    ; Bitwise ops.
-    ; We use the *ps SSE instructions for everything because they are smaller.
-
-    ; asm: andps %xmm10, %xmm5
-    [-,%xmm5]           v30 = band v10, v11                     ; bin: 41 0f 54 ea
-    ; asm: andps %xmm5, %xmm10
-    [-,%xmm10]          v31 = band v11, v10                     ; bin: 44 0f 54 d5
-
-    ; asm: andnps %xmm10, %xmm5
-    [-,%xmm5]           v32 = band_not v11, v10                 ; bin: 41 0f 55 ea
-    ; asm: andnps %xmm5, %xmm10
-    [-,%xmm10]          v33 = band_not v10, v11                 ; bin: 44 0f 55 d5
-
-    ; asm: orps %xmm10, %xmm5
-    [-,%xmm5]           v34 = bor v10, v11                      ; bin: 41 0f 56 ea
-    ; asm: orps %xmm5, %xmm10
-    [-,%xmm10]          v35 = bor v11, v10                      ; bin: 44 0f 56 d5
-
-    ; asm: xorps %xmm10, %xmm5
-    [-,%xmm5]           v36 = bxor v10, v11                     ; bin: 41 0f 57 ea
-    ; asm: xorps %xmm5, %xmm10
-    [-,%xmm10]          v37 = bxor v11, v10                     ; bin: 44 0f 57 d5
-
-    ; asm: movaps %xmm10, %xmm5
-    [-,%xmm5]           v38 = copy v11                          ; bin: 41 0f 28 ea
-    ; asm: movaps %xmm5, %xmm10
-    [-,%xmm10]          v39 = copy v10                          ; bin: 44 0f 28 d5
-
-    ; Convert float to int.
-
-    ; asm: cvttsd2si %xmm5, %ecx
-    [-,%rcx]            v40 = x86_cvtt2si.i32 v10               ; bin: f2 0f 2c cd
-    ; asm: cvttsd2si %xmm10, %esi
-    [-,%rsi]            v41 = x86_cvtt2si.i32 v11               ; bin: f2 41 0f 2c f2
-
-    ; asm: cvttsd2si %xmm5, %rcx
-    [-,%rcx]            v42 = x86_cvtt2si.i64 v10               ; bin: f2 48 0f 2c cd
-    ; asm: cvttsd2si %xmm10, %rsi
-    [-,%rsi]            v43 = x86_cvtt2si.i64 v11               ; bin: f2 49 0f 2c f2
-
-    ; Min/max.
-
-    ; asm: minsd %xmm10, %xmm5
-    [-,%xmm5]           v44 = x86_fmin v10, v11                 ; bin: f2 41 0f 5d ea
-    ; asm: minsd %xmm5, %xmm10
-    [-,%xmm10]          v45 = x86_fmin v11, v10                 ; bin: f2 44 0f 5d d5
-    ; asm: maxsd %xmm10, %xmm5
-    [-,%xmm5]           v46 = x86_fmax v10, v11                 ; bin: f2 41 0f 5f ea
-    ; asm: maxsd %xmm5, %xmm10
-    [-,%xmm10]          v47 = x86_fmax v11, v10                 ; bin: f2 44 0f 5f d5
-
-    ; Unary arithmetic.
-
-    ; asm: sqrtsd %xmm5, %xmm10
-    [-,%xmm10]          v50 = sqrt v10                          ; bin: f2 44 0f 51 d5
-    ; asm: sqrtsd %xmm10, %xmm5
-    [-,%xmm5]           v51 = sqrt v11                          ; bin: f2 41 0f 51 ea
-
-    ; asm: roundsd $0, %xmm5, %xmm10
-    [-,%xmm10]          v52 = nearest v10                       ; bin: 66 44 0f 3a 0b d5 00
-    ; asm: roundsd $0, %xmm10, %xmm5
-    [-,%xmm5]           v53 = nearest v11                       ; bin: 66 41 0f 3a 0b ea 00
-    ; asm: roundsd $0, %xmm5, %xmm2
-    [-,%xmm2]           v54 = nearest v10                       ; bin: 66 0f 3a 0b d5 00
-
-    ; asm: roundsd $1, %xmm5, %xmm10
-    [-,%xmm10]          v55 = floor v10                         ; bin: 66 44 0f 3a 0b d5 01
-    ; asm: roundsd $1, %xmm10, %xmm5
-    [-,%xmm5]           v56 = floor v11                         ; bin: 66 41 0f 3a 0b ea 01
-    ; asm: roundsd $1, %xmm5, %xmm2
-    [-,%xmm2]           v57 = floor v10                         ; bin: 66 0f 3a 0b d5 01
-
-    ; asm: roundsd $2, %xmm5, %xmm10
-    [-,%xmm10]          v58 = ceil v10                          ; bin: 66 44 0f 3a 0b d5 02
-    ; asm: roundsd $2, %xmm10, %xmm5
-    [-,%xmm5]           v59 = ceil v11                          ; bin: 66 41 0f 3a 0b ea 02
-    ; asm: roundsd $2, %xmm5, %xmm2
-    [-,%xmm2]           v60 = ceil v10                          ; bin: 66 0f 3a 0b d5 02
-
-    ; asm: roundsd $3, %xmm5, %xmm10
-    [-,%xmm10]          v61 = trunc v10                         ; bin: 66 44 0f 3a 0b d5 03
-    ; asm: roundsd $3, %xmm10, %xmm5
-    [-,%xmm5]           v62 = trunc v11                         ; bin: 66 41 0f 3a 0b ea 03
-    ; asm: roundsd $3, %xmm5, %xmm2
-    [-,%xmm2]           v63 = trunc v10                         ; bin: 66 0f 3a 0b d5 03
-
-    ; Load/Store
-
-    ; asm: movsd (%r14), %xmm5
-    [-,%xmm5]           v100 = load.f64 v3                      ; bin: heap_oob f2 41 0f 10 2e
-    ; asm: movsd (%rax), %xmm10
-    [-,%xmm10]          v101 = load.f64 v2                      ; bin: heap_oob f2 44 0f 10 10
-    ; asm: movsd 50(%r14), %xmm5
-    [-,%xmm5]           v110 = load.f64 v3+50                   ; bin: heap_oob f2 41 0f 10 6e 32
-    ; asm: movsd -50(%rax), %xmm10
-    [-,%xmm10]          v111 = load.f64 v2-50                   ; bin: heap_oob f2 44 0f 10 50 ce
-    ; asm: movsd 10000(%r14), %xmm5
-    [-,%xmm5]           v120 = load.f64 v3+10000                ; bin: heap_oob f2 41 0f 10 ae 00002710
-    ; asm: movsd -10000(%rax), %xmm10
-    [-,%xmm10]          v121 = load.f64 v2-10000                ; bin: heap_oob f2 44 0f 10 90 ffffd8f0
-
-    ; asm: movsd %xmm5, (%r14)
-    [-]                 store.f64 v100, v3                      ; bin: heap_oob f2 41 0f 11 2e
-    ; asm: movsd %xmm10, (%rax)
-    [-]                 store.f64 v101, v2                      ; bin: heap_oob f2 44 0f 11 10
-    ; asm: movsd %xmm5, (%r13)
-    [-]                 store.f64 v100, v4                      ; bin: heap_oob f2 41 0f 11 6d 00
-    ; asm: movsd %xmm10, (%r13)
-    [-]                 store.f64 v101, v4                      ; bin: heap_oob f2 45 0f 11 55 00
-    ; asm: movsd %xmm5, 50(%r14)
-    [-]                 store.f64 v100, v3+50                   ; bin: heap_oob f2 41 0f 11 6e 32
-    ; asm: movsd %xmm10, -50(%rax)
-    [-]                 store.f64 v101, v2-50                   ; bin: heap_oob f2 44 0f 11 50 ce
-    ; asm: movsd %xmm5, 10000(%r14)
-    [-]                 store.f64 v100, v3+10000                ; bin: heap_oob f2 41 0f 11 ae 00002710
-    ; asm: movsd %xmm10, -10000(%rax)
-    [-]                 store.f64 v101, v2-10000                ; bin: heap_oob f2 44 0f 11 90 ffffd8f0
-
-    ; Spill / Fill.
-
-    ; asm: movsd %xmm5, 1032(%rsp)
-    [-,ss1]             v200 = spill v100                       ; bin: stk_ovf f2 0f 11 ac 24 00000408
-    ; asm: movsd %xmm10, 1032(%rsp)
-    [-,ss1]             v201 = spill v101                       ; bin: stk_ovf f2 44 0f 11 94 24 00000408
-
-    ; asm: movsd 1032(%rsp), %xmm5
-    [-,%xmm5]           v210 = fill v200                        ; bin: f2 0f 10 ac 24 00000408
-    ; asm: movsd 1032(%rsp), %xmm10
-    [-,%xmm10]          v211 = fill v201                        ; bin: f2 44 0f 10 94 24 00000408
-
-    ; asm: movsd %xmm5, 1032(%rsp)
-    regspill v100, %xmm5 -> ss1                                 ; bin: stk_ovf f2 0f 11 ac 24 00000408
-    ; asm: movsd 1032(%rsp), %xmm5
-    regfill v100, ss1 -> %xmm5                                  ; bin: f2 0f 10 ac 24 00000408
-
-    ; Comparisons.
-    ;
-    ; Only `supported_floatccs` are tested here. Others are handled by
-    ; legalization patterns.
-
-    ; asm: ucomisd %xmm10, %xmm5
-    ; asm: setnp %bl
-    [-,%rbx]            v300 = fcmp ord v10, v11                ; bin: 66 41 0f 2e ea 0f 9b c3
-    ; asm: ucomisd %xmm5, %xmm10
-    ; asm: setp %bl
-    [-,%rbx]            v301 = fcmp uno v11, v10                ; bin: 66 44 0f 2e d5 0f 9a c3
-    ; asm: ucomisd %xmm10, %xmm5
-    ; asm: setne %dl
-    [-,%rdx]            v302 = fcmp one v10, v11                ; bin: 66 41 0f 2e ea 0f 95 c2
-    ; asm: ucomisd %xmm5, %xmm10
-    ; asm: sete %dl
-    [-,%rdx]            v303 = fcmp ueq v11, v10                ; bin: 66 44 0f 2e d5 0f 94 c2
-    ; asm: ucomisd %xmm10, %xmm5
-    ; asm: seta %bl
-    [-,%rbx]            v304 = fcmp gt v10, v11                 ; bin: 66 41 0f 2e ea 0f 97 c3
-    ; asm: ucomisd %xmm5, %xmm10
-    ; asm: setae %bl
-    [-,%rbx]            v305 = fcmp ge v11, v10                 ; bin: 66 44 0f 2e d5 0f 93 c3
-    ; asm: ucomisd %xmm10, %xmm5
-    ; asm: setb %dl
-    [-,%rdx]            v306 = fcmp ult v10, v11                ; bin: 66 41 0f 2e ea 0f 92 c2
-    ; asm: ucomisd %xmm5, %xmm10
-    ; asm: setbe %dl
-    [-,%rdx]            v307 = fcmp ule v11, v10                ; bin: 66 44 0f 2e d5 0f 96 c2
-
-    ; asm: ucomisd %xmm10, %xmm5
-    [-,%rflags]         v310 = ffcmp v10, v11                   ; bin: 66 41 0f 2e ea
-    ; asm: ucomisd %xmm10, %xmm5
-    [-,%rflags]         v311 = ffcmp v11, v10                   ; bin: 66 44 0f 2e d5
-    ; asm: ucomisd %xmm5, %xmm5
-    [-,%rflags]         v312 = ffcmp v10, v10                   ; bin: 66 0f 2e ed
-
-    ; Load/Store Complex
-
-    [-,%rax]            v350 = iconst.i64 1
-    [-,%rbx]            v351 = iconst.i64 2
-    ; asm: movsd (%rax,%rbx,1),%xmm5
-    [-,%xmm5]           v352 = load_complex.f64 v350+v351               ; bin: heap_oob f2 0f 10 2c 18
-    ; asm: movsd 0x32(%rax,%rbx,1),%xmm5
-    [-,%xmm5]           v353 = load_complex.f64 v350+v351+50            ; bin: heap_oob f2 0f 10 6c 18 32
-    ; asm: movsd -0x32(%rax,%rbx,1),%xmm10
-    [-,%xmm10]          v354 = load_complex.f64 v350+v351-50            ; bin: heap_oob f2 44 0f 10 54 18 ce
-    ; asm: movsd 0x2710(%rax,%rbx,1),%xmm5
-    [-,%xmm5]           v355 = load_complex.f64 v350+v351+10000         ; bin: heap_oob f2 0f 10 ac 18 00002710
-    ; asm: movsd -0x2710(%rax,%rbx,1),%xmm10
-    [-,%xmm10]          v356 = load_complex.f64 v350+v351-10000         ; bin: heap_oob f2 44 0f 10 94 18 ffffd8f0
-
-    ; asm: movsd %xmm5, (%rax,%rbx,1)
-    [-]                 store_complex.f64 v100, v350+v351               ; bin: heap_oob f2 0f 11 2c 18
-    ; asm: movsd %xmm5, 50(%rax,%rbx,1)
-    [-]                 store_complex.f64 v100, v350+v351+50            ; bin: heap_oob f2 0f 11 6c 18 32
-    ; asm: movsd %xmm10, -50(%rax,%rbx,1)
-    [-]                 store_complex.f64 v101, v350+v351-50            ; bin: heap_oob f2 44 0f 11 54 18 ce
-    ; asm: movsd %xmm5, 10000(%rax,%rbx,1)
-    [-]                 store_complex.f64 v100, v350+v351+10000         ; bin: heap_oob f2 0f 11 ac 18 00002710
-    ; asm: movsd %xmm10, -10000(%rax,%rbx,1)
-    [-]                 store_complex.f64 v101, v350+v351-10000         ; bin: heap_oob f2 44 0f 11 94 18 ffffd8f0
-
-    return
-}
-
-function %cpuflags_float(f32 [%xmm0]) {
-block0(v0: f32 [%xmm0]):
-    ; asm: ucomiss %xmm0, %xmm0
-    [-,%rflags]         v1 = ffcmp v0, v0                       ; bin: 0f 2e c0
-
-    jump block1
-
-block1:
-    ; asm: jnp block1
-    brff ord v1, block1                                           ; bin: 7b fe
-    jump block2
-
-block2:
-    ; asm: jp block1
-    brff uno v1, block1                                           ; bin: 7a fc
-    jump block3
-
-block3:
-    ; asm: jne block1
-    brff one v1, block1                                           ; bin: 75 fa
-    jump block4
-
-block4:
-    ; asm: je block1
-    brff ueq v1, block1                                           ; bin: 74 f8
-    jump block5
-
-block5:
-    ; asm: ja block1
-    brff gt v1, block1                                            ; bin: 77 f6
-    jump block6
-
-block6:
-    ; asm: jae block1
-    brff ge v1, block1                                            ; bin: 73 f4
-    jump block7
-
-block7:
-    ; asm: jb block1
-    brff ult v1, block1                                           ; bin: 72 f2
-    jump block8
-
-block8:
-    ; asm: jbe block1
-    brff ule v1, block1                                           ; bin: 76 f0
-    jump block9
-
-block9:
-    ; asm: jp .+4; ud2
-    trapff ord v1, user0                                        ; bin: 7a 02 user0 0f 0b
-    ; asm: jnp .+4; ud2
-    trapff uno v1, user0                                        ; bin: 7b 02 user0 0f 0b
-    ; asm: je .+4; ud2
-    trapff one v1, user0                                        ; bin: 74 02 user0 0f 0b
-    ; asm: jne .+4; ud2
-    trapff ueq v1, user0                                        ; bin: 75 02 user0 0f 0b
-    ; asm: jna .+4; ud2
-    trapff gt v1, user0                                         ; bin: 76 02 user0 0f 0b
-    ; asm: jnae .+4; ud2
-    trapff ge v1, user0                                         ; bin: 72 02 user0 0f 0b
-    ; asm: jnb .+4; ud2
-    trapff ult v1, user0                                        ; bin: 73 02 user0 0f 0b
-    ; asm: jnbe .+4; ud2
-    trapff ule v1, user0                                        ; bin: 77 02 user0 0f 0b
-
-    ; asm: setnp %bl
-    [-,%rbx]            v10 = trueff ord v1                     ; bin: 0f 9b c3
-    ; asm: setp %bl
-    [-,%rbx]            v11 = trueff uno v1                     ; bin: 0f 9a c3
-    ; asm: setne %dl
-    [-,%rdx]            v12 = trueff one v1                     ; bin: 0f 95 c2
-    ; asm: sete %dl
-    [-,%rdx]            v13 = trueff ueq v1                     ; bin: 0f 94 c2
-    ; asm: seta %r10b
-    [-,%r10]            v14 = trueff gt v1                      ; bin: 41 0f 97 c2
-    ; asm: setae %r10b
-    [-,%r10]            v15 = trueff ge v1                      ; bin: 41 0f 93 c2
-    ; asm: setb %r14b
-    [-,%r14]            v16 = trueff ult v1                     ; bin: 41 0f 92 c6
-    ; asm: setbe %r14b
-    [-,%r14]            v17 = trueff ule v1                     ; bin: 41 0f 96 c6
-
-    return
-}
diff --git a/cranelift/filetests/filetests/isa/x86/binary64-pic.clif b/cranelift/filetests/filetests/isa/x86/binary64-pic.clif
deleted file mode 100644
index 4f2c650592..0000000000
--- a/cranelift/filetests/filetests/isa/x86/binary64-pic.clif
+++ /dev/null
@@ -1,83 +0,0 @@
-; binary emission of 64-bit code.
-test binemit
-set opt_level=speed_and_size
-set is_pic
-target x86_64 legacy haswell
-
-; The binary encodings can be verified with the command:
-;
-;   sed -ne 's/^ *; asm: *//p' filetests/isa/x86/binary64-pic.clif | llvm-mc -show-encoding -triple=x86_64
-;
-
-; Tests for i64 instructions.
-function %I64() {
-    sig0 = ()
-    fn0 = %foo()
-    fn1 = colocated %bar()
-
-    gv0 = symbol %some_gv
-    gv1 = symbol colocated %some_gv
-
-    ; Use incoming_arg stack slots because they won't be relocated by the frame
-    ; layout.
-    ss0 = incoming_arg 8, offset 0
-    ss1 = incoming_arg 1024, offset -1024
-    ss2 = incoming_arg 1024, offset -2048
-    ss3 = incoming_arg 8, offset -2056
-
-block0:
-
-    ; Colocated functions.
-
-    ; asm: call foo
-    call fn1()                                  ; bin: stk_ovf e8 CallPCRel4(%bar-4) 00000000
-
-    ; asm: lea 0x0(%rip), %rax
-    [-,%rax]            v0 = func_addr.i64 fn1        ; bin: 48 8d 05 PCRel4(%bar-4) 00000000
-    ; asm: lea 0x0(%rip), %rsi
-    [-,%rsi]            v1 = func_addr.i64 fn1        ; bin: 48 8d 35 PCRel4(%bar-4) 00000000
-    ; asm: lea 0x0(%rip), %r10
-    [-,%r10]            v2 = func_addr.i64 fn1        ; bin: 4c 8d 15 PCRel4(%bar-4) 00000000
-
-    ; asm: call *%rax
-    call_indirect sig0, v0()                  ; bin: stk_ovf ff d0
-    ; asm: call *%rsi
-    call_indirect sig0, v1()                  ; bin: stk_ovf ff d6
-    ; asm: call *%r10
-    call_indirect sig0, v2()                  ; bin: stk_ovf 41 ff d2
-
-    ; Non-colocated functions.
-
-    ; asm: call foo@PLT
-    call fn0()                                  ; bin: stk_ovf e8 CallPLTRel4(%foo-4) 00000000
-
-    ; asm: mov 0x0(%rip), %rax
-    [-,%rax]            v100 = func_addr.i64 fn0        ; bin: 48 8b 05 GOTPCRel4(%foo-4) 00000000
-    ; asm: mov 0x0(%rip), %rsi
-    [-,%rsi]            v101 = func_addr.i64 fn0        ; bin: 48 8b 35 GOTPCRel4(%foo-4) 00000000
-    ; asm: mov 0x0(%rip), %r10
-    [-,%r10]            v102 = func_addr.i64 fn0        ; bin: 4c 8b 15 GOTPCRel4(%foo-4) 00000000
-
-    ; asm: call *%rax
-    call_indirect sig0, v100()                  ; bin: stk_ovf ff d0
-    ; asm: call *%rsi
-    call_indirect sig0, v101()                  ; bin: stk_ovf ff d6
-    ; asm: call *%r10
-    call_indirect sig0, v102()                  ; bin: stk_ovf 41 ff d2
-
-    ; asm: mov 0x0(%rip), %rcx
-    [-,%rcx]            v3 = symbol_value.i64 gv0    ; bin: 48 8b 0d GOTPCRel4(%some_gv-4) 00000000
-    ; asm: mov 0x0(%rip), %rsi
-    [-,%rsi]            v4 = symbol_value.i64 gv0    ; bin: 48 8b 35 GOTPCRel4(%some_gv-4) 00000000
-    ; asm: mov 0x0(%rip), %r10
-    [-,%r10]            v5 = symbol_value.i64 gv0    ; bin: 4c 8b 15 GOTPCRel4(%some_gv-4) 00000000
-
-    ; asm: lea 0x0(%rip), %rcx
-    [-,%rcx]            v6 = symbol_value.i64 gv1    ; bin: 48 8d 0d PCRel4(%some_gv-4) 00000000
-    ; asm: lea 0x0(%rip), %rsi
-    [-,%rsi]            v7 = symbol_value.i64 gv1    ; bin: 48 8d 35 PCRel4(%some_gv-4) 00000000
-    ; asm: lea 0x0(%rip), %r10
-    [-,%r10]            v8 = symbol_value.i64 gv1    ; bin: 4c 8d 15 PCRel4(%some_gv-4) 00000000
-
-    return
-}
diff --git a/cranelift/filetests/filetests/isa/x86/binary64.clif b/cranelift/filetests/filetests/isa/x86/binary64.clif
deleted file mode 100644
index c5e1cf5099..0000000000
--- a/cranelift/filetests/filetests/isa/x86/binary64.clif
+++ /dev/null
@@ -1,1692 +0,0 @@
-; binary emission of x86-64 code.
-test binemit
-set opt_level=speed_and_size
-target x86_64 legacy haswell
-
-; The binary encodings can be verified with the command:
-;
-;   sed -ne 's/^ *; asm: *//p' filetests/isa/x86/binary64.clif | llvm-mc -show-encoding -triple=x86_64
-;
-
-; Tests for i64 instructions.
-function %I64() {
-    sig0 = ()
-    fn0 = %foo()
-    fn1 = colocated %bar()
-
-    gv0 = symbol %some_gv
-
-    ; Use incoming_arg stack slots because they won't be relocated by the frame
-    ; layout.
-    ss0 = incoming_arg 8, offset 0
-    ss1 = incoming_arg 1024, offset -1024
-    ss2 = incoming_arg 1024, offset -2048
-    ss3 = incoming_arg 8, offset -2056
-
-block0:
-
-    ; Integer Constants.
-
-    ; asm: movq $0x01020304f1f2f3f4, %rcx
-    [-,%rcx]            v1 = iconst.i64 0x0102_0304_f1f2_f3f4 ; bin: 48 b9 01020304f1f2f3f4
-    ; asm: movq $0x11020304f1f2f3f4, %rsi
-    [-,%rsi]            v2 = iconst.i64 0x1102_0304_f1f2_f3f4 ; bin: 48 be 11020304f1f2f3f4
-    ; asm: movq $0x21020304f1f2f3f4, %r10
-    [-,%r10]            v3 = iconst.i64 0x2102_0304_f1f2_f3f4 ; bin: 49 ba 21020304f1f2f3f4
-    ; asm: movl $0xff001122, %r8d                             # 32-bit zero-extended constant.
-    [-,%r8]             v4 = iconst.i64 0xff00_1122           ; bin: 41 b8 ff001122
-    ; asm: movq $0xffffffff88001122, %r14                     # 32-bit sign-extended constant.
-    [-,%r14]            v5 = iconst.i64 0xffff_ffff_8800_1122 ; bin: 49 c7 c6 88001122
-
-    ; asm: movb $1, %cl
-    [-,%rcx]            v9007 = bconst.b1 true      ; bin: b9 00000001
-    ; asm: movb $1, %sil
-    [-,%r10]            v9008 = bconst.b1 true      ; bin: 41 ba 00000001
-
-    ; Integer Register Operations.
-
-    ; asm: notq %rcx
-    [-,%rcx]             v4000 = bnot v1       ; bin: 48 f7 d1
-    ; asm: notq %rsi
-    [-,%rsi]             v4001 = bnot v2       ; bin: 48 f7 d6
-    ; asm: notq %r10
-    [-,%r10]             v4002 = bnot v3       ; bin: 49 f7 d2
-
-    ; Integer Register-Register Operations.
-
-    ; asm: addq %rsi, %rcx
-    [-,%rcx]             v10 = iadd v1, v2       ; bin: 48 01 f1
-    ; asm: addq %r10, %rsi
-    [-,%rsi]             v11 = iadd v2, v3       ; bin: 4c 01 d6
-    ; asm: addq %rcx, %r10
-    [-,%r10]             v12 = iadd v3, v1       ; bin: 49 01 ca
-
-    ; asm: subq %rsi, %rcx
-    [-,%rcx]             v20 = isub v1, v2       ; bin: 48 29 f1
-    ; asm: subq %r10, %rsi
-    [-,%rsi]             v21 = isub v2, v3       ; bin: 4c 29 d6
-    ; asm: subq %rcx, %r10
-    [-,%r10]             v22 = isub v3, v1       ; bin: 49 29 ca
-
-    ; asm: andq %rsi, %rcx
-    [-,%rcx]             v30 = band v1, v2       ; bin: 48 21 f1
-    ; asm: andq %r10, %rsi
-    [-,%rsi]             v31 = band v2, v3       ; bin: 4c 21 d6
-    ; asm: andq %rcx, %r10
-    [-,%r10]             v32 = band v3, v1       ; bin: 49 21 ca
-
-    ; asm: orq %rsi, %rcx
-    [-,%rcx]             v40 = bor v1, v2       ; bin: 48 09 f1
-    ; asm: orq %r10, %rsi
-    [-,%rsi]             v41 = bor v2, v3       ; bin: 4c 09 d6
-    ; asm: orq %rcx, %r10
-    [-,%r10]             v42 = bor v3, v1       ; bin: 49 09 ca
-
-    ; asm: xorq %rsi, %rcx
-    [-,%rcx]             v50 = bxor v1, v2       ; bin: 48 31 f1
-    ; asm: xorq %r10, %rsi
-    [-,%rsi]             v51 = bxor v2, v3       ; bin: 4c 31 d6
-    ; asm: xorq %rcx, %r10
-    [-,%r10]             v52 = bxor v3, v1       ; bin: 49 31 ca
-
-    ; asm: shlq %cl, %rsi
-    [-,%rsi]             v60 = ishl v2, v1       ; bin: 48 d3 e6
-    ; asm: shlq %cl, %r10
-    [-,%r10]             v61 = ishl v3, v1       ; bin: 49 d3 e2
-    ; asm: sarq %cl, %rsi
-    [-,%rsi]             v62 = sshr v2, v1       ; bin: 48 d3 fe
-    ; asm: sarq %cl, %r10
-    [-,%r10]             v63 = sshr v3, v1       ; bin: 49 d3 fa
-    ; asm: shrq %cl, %rsi
-    [-,%rsi]             v64 = ushr v2, v1       ; bin: 48 d3 ee
-    ; asm: shrq %cl, %r10
-    [-,%r10]             v65 = ushr v3, v1       ; bin: 49 d3 ea
-
-    ; asm: rolq %cl, %rsi
-    [-,%rsi]             v66 = rotl v2, v1       ; bin: 48 d3 c6
-    ; asm: rolq %cl, %r10
-    [-,%r10]             v67 = rotl v3, v1       ; bin: 49 d3 c2
-    ; asm: rorq %cl, %rsi
-    [-,%rsi]             v68 = rotr v2, v1       ; bin: 48 d3 ce
-    ; asm: rorq %cl, %r10
-    [-,%r10]             v69 = rotr v3, v1       ; bin: 49 d3 ca
-
-    ; Integer Register-Immediate Operations.
-    ; These 64-bit ops all use a 32-bit immediate that is sign-extended to 64 bits.
-    ; Some take 8-bit immediates that are sign-extended to 64 bits.
-
-    ; asm: addq $-100000, %rcx
-    [-,%rcx]     v70 = iadd_imm v1, -100000     ; bin: 48 81 c1 fffe7960
-    ; asm: addq $100000, %rsi
-    [-,%rsi]     v71 = iadd_imm v2, 100000      ; bin: 48 81 c6 000186a0
-    ; asm: addq $0x7fffffff, %r10
-    [-,%r10]     v72 = iadd_imm v3, 0x7fff_ffff ; bin: 49 81 c2 7fffffff
-    ; asm: addq $100, %r8
-    [-,%r8]      v73 = iadd_imm v4, 100         ; bin: 49 83 c0 64
-    ; asm: addq $-100, %r14
-    [-,%r14]     v74 = iadd_imm v5, -100        ; bin: 49 83 c6 9c
-
-    ; asm: andq $-100000, %rcx
-    [-,%rcx]     v80 = band_imm v1, -100000     ; bin: 48 81 e1 fffe7960
-    ; asm: andq $100000, %rsi
-    [-,%rsi]     v81 = band_imm v2, 100000      ; bin: 48 81 e6 000186a0
-    ; asm: andq $0x7fffffff, %r10
-    [-,%r10]     v82 = band_imm v3, 0x7fff_ffff ; bin: 49 81 e2 7fffffff
-    ; asm: andq $100, %r8
-    [-,%r8]      v83 = band_imm v4, 100         ; bin: 49 83 e0 64
-    ; asm: andq $-100, %r14
-    [-,%r14]     v84 = band_imm v5, -100        ; bin: 49 83 e6 9c
-
-    ; asm: orq $-100000, %rcx
-    [-,%rcx]     v90 = bor_imm v1, -100000      ; bin: 48 81 c9 fffe7960
-    ; asm: orq $100000, %rsi
-    [-,%rsi]     v91 = bor_imm v2, 100000       ; bin: 48 81 ce 000186a0
-    ; asm: orq $0x7fffffff, %r10
-    [-,%r10]     v92 = bor_imm v3, 0x7fff_ffff  ; bin: 49 81 ca 7fffffff
-    ; asm: orq $100, %r8
-    [-,%r8]      v93 = bor_imm v4, 100          ; bin: 49 83 c8 64
-    ; asm: orq $-100, %r14
-    [-,%r14]     v94 = bor_imm v5, -100         ; bin: 49 83 ce 9c
-    ; asm: ret
-
-    ; asm: xorq $-100000, %rcx
-    [-,%rcx]     v100 = bxor_imm v1, -100000     ; bin: 48 81 f1 fffe7960
-    ; asm: xorq $100000, %rsi
-    [-,%rsi]     v101 = bxor_imm v2, 100000      ; bin: 48 81 f6 000186a0
-    ; asm: xorq $0x7fffffff, %r10
-    [-,%r10]     v102 = bxor_imm v3, 0x7fff_ffff ; bin: 49 81 f2 7fffffff
-    ; asm: xorq $100, %r8
-    [-,%r8]      v103 = bxor_imm v4, 100         ; bin: 49 83 f0 64
-    ; asm: xorq $-100, %r14
-    [-,%r14]     v104 = bxor_imm v5, -100        ; bin: 49 83 f6 9c
-
-    ; Register copies.
-
-    ; asm: movq %rsi, %rcx
-    [-,%rcx]             v110 = copy v2          ; bin: 48 89 f1
-    ; asm: movq %r10, %rsi
-    [-,%rsi]             v111 = copy v3          ; bin: 4c 89 d6
-    ; asm: movq %rcx, %r10
-    [-,%r10]             v112 = copy v1          ; bin: 49 89 ca
-
-    ; Copy Special
-    ; asm: movq %rsp, %rbp
-    copy_special %rsp -> %rbp                   ; bin: 48 89 e5
-    ; asm: movq %r10, %r11
-    copy_special %r10 -> %r11                   ; bin: 4d 89 d3
-    ; asm: movq %rsp, %r11
-    copy_special %rsp -> %r11                   ; bin: 49 89 e3
-    ; asm: movq %r10, %rsp
-    copy_special %r10 -> %rsp                   ; bin: 4c 89 d4
-
-    ; Copy to SSA
-
-    ; asm: movq %rax, %r15
-    [-,%r15]      v700 = copy_to_ssa.i64 %rax    ; bin: 49 89 c7
-    ; asm: movq %r15, %rax
-    [-,%rax]      v701 = copy_to_ssa.i64 %r15    ; bin: 4c 89 f8
-    ; asm: movq %rdi, %rsi
-    [-,%rsi]      v702 = copy_to_ssa.i64 %rdi    ; bin: 48 89 fe
-    ; asm: movq %r11, %r14
-    [-,%r14]      v703 = copy_to_ssa.i64 %r11    ; bin: 4d 89 de
-
-    ; asm: movl %eax, %r15d
-    [-,%r15]      v704 = copy_to_ssa.i32 %rax    ; bin: 41 89 c7
-    ; asm: movl %r15d, %eax
-    [-,%rax]      v705 = copy_to_ssa.i32 %r15    ; bin: 44 89 f8
-    ; asm: movl %edi, %esi.  Unfortunately we get a redundant REX prefix.
-    [-,%rsi]      v706 = copy_to_ssa.i32 %rdi    ; bin: 40 89 fe
-    ; asm: movl %r11, %r14
-    [-,%r14]      v707 = copy_to_ssa.i32 %r11    ; bin: 45 89 de
-
-    ; Load/Store instructions.
-
-    ; Register indirect addressing with no displacement.
-
-    ; asm: movq %rcx, (%r10)
-    store v1, v3                                ; bin: heap_oob 49 89 0a
-    ; asm: movq %r10, (%rcx)
-    store v3, v1                                ; bin: heap_oob 4c 89 11
-    ; asm: movl %ecx, (%r10)
-    istore32 v1, v3                             ; bin: heap_oob 41 89 0a
-    ; asm: movl %r10d, (%rcx)
-    istore32 v3, v1                             ; bin: heap_oob 44 89 11
-    ; asm: movw %cx, (%r10)
-    istore16 v1, v3                             ; bin: heap_oob 66 41 89 0a
-    ; asm: movw %r10w, (%rcx)
-    istore16 v3, v1                             ; bin: heap_oob 66 44 89 11
-    ; asm: movb %cl, (%r10)
-    istore8 v1, v3                              ; bin: heap_oob 41 88 0a
-    ; asm: movb %r10b, (%rcx)
-    istore8 v3, v1                              ; bin: heap_oob 44 88 11
-
-    ; asm: movq (%rcx), %r14
-    [-,%r14]            v120 = load.i64 v1      ; bin: heap_oob 4c 8b 31
-    ; asm: movq (%r10), %rdx
-    [-,%rdx]            v121 = load.i64 v3      ; bin: heap_oob 49 8b 12
-    ; asm: movl (%rcx), %r14d
-    [-,%r14]            v122 = uload32.i64 v1   ; bin: heap_oob 44 8b 31
-    ; asm: movl (%r10), %edx
-    [-,%rdx]            v123 = uload32.i64 v3   ; bin: heap_oob 41 8b 12
-    ; asm: movslq (%rcx), %r14
-    [-,%r14]            v124 = sload32.i64 v1   ; bin: heap_oob 4c 63 31
-    ; asm: movslq (%r10), %rdx
-    [-,%rdx]            v125 = sload32.i64 v3   ; bin: heap_oob 49 63 12
-    ; asm: movzwq (%rcx), %r14
-    [-,%r14]            v126 = uload16.i64 v1   ; bin: heap_oob 4c 0f b7 31
-    ; asm: movzwq (%r10), %rdx
-    [-,%rdx]            v127 = uload16.i64 v3   ; bin: heap_oob 49 0f b7 12
-    ; asm: movswq (%rcx), %r14
-    [-,%r14]            v128 = sload16.i64 v1   ; bin: heap_oob 4c 0f bf 31
-    ; asm: movswq (%r10), %rdx
-    [-,%rdx]            v129 = sload16.i64 v3   ; bin: heap_oob 49 0f bf 12
-    ; asm: movzbq (%rcx), %r14
-    [-,%r14]            v130 = uload8.i64 v1    ; bin: heap_oob 4c 0f b6 31
-    ; asm: movzbq (%r10), %rdx
-    [-,%rdx]            v131 = uload8.i64 v3    ; bin: heap_oob 49 0f b6 12
-    ; asm: movsbq (%rcx), %r14
-    [-,%r14]            v132 = sload8.i64 v1    ; bin: heap_oob 4c 0f be 31
-    ; asm: movsbq (%r10), %rdx
-    [-,%rdx]            v133 = sload8.i64 v3    ; bin: heap_oob 49 0f be 12
-
-    ; Register-indirect with 8-bit signed displacement.
-
-    ; asm: movq %rcx, 100(%r10)
-    store v1, v3+100                            ; bin: heap_oob 49 89 4a 64
-    ; asm: movq %r10, -100(%rcx)
-    store v3, v1-100                            ; bin: heap_oob 4c 89 51 9c
-    ; asm: movl %ecx, 100(%r10)
-    istore32 v1, v3+100                         ; bin: heap_oob 41 89 4a 64
-    ; asm: movl %r10d, -100(%rcx)
-    istore32 v3, v1-100                         ; bin: heap_oob 44 89 51 9c
-    ; asm: movw %cx, 100(%r10)
-    istore16 v1, v3+100                         ; bin: heap_oob 66 41 89 4a 64
-    ; asm: movw %r10w, -100(%rcx)
-    istore16 v3, v1-100                         ; bin: heap_oob 66 44 89 51 9c
-    ; asm: movb %cl, 100(%r10)
-    istore8 v1, v3+100                          ; bin: heap_oob 41 88 4a 64
-    ; asm: movb %r10b, 100(%rcx)
-    istore8 v3, v1+100                          ; bin: heap_oob 44 88 51 64
-
-    ; asm: movq 50(%rcx), %r10
-    [-,%r10]            v140 = load.i64 v1+50           ; bin: heap_oob 4c 8b 51 32
-    ; asm: movq -50(%r10), %rdx
-    [-,%rdx]            v141 = load.i64 v3-50           ; bin: heap_oob 49 8b 52 ce
-    ; asm: movl 50(%rcx), %edi
-    [-,%rdi]            v142 = uload32.i64 v1+50        ; bin: heap_oob 8b 79 32
-    ; asm: movl -50(%rsi), %edx
-    [-,%rdx]            v143 = uload32.i64 v2-50        ; bin: heap_oob 8b 56 ce
-    ; asm: movslq 50(%rcx), %rdi
-    [-,%rdi]            v144 = sload32.i64 v1+50        ; bin: heap_oob 48 63 79 32
-    ; asm: movslq -50(%rsi), %rdx
-    [-,%rdx]            v145 = sload32.i64 v2-50        ; bin: heap_oob 48 63 56 ce
-    ; asm: movzwq 50(%rcx), %rdi
-    [-,%rdi]            v146 = uload16.i64 v1+50        ; bin: heap_oob 48 0f b7 79 32
-    ; asm: movzwq -50(%rsi), %rdx
-    [-,%rdx]            v147 = uload16.i64 v2-50        ; bin: heap_oob 48 0f b7 56 ce
-    ; asm: movswq 50(%rcx), %rdi
-    [-,%rdi]            v148 = sload16.i64 v1+50        ; bin: heap_oob 48 0f bf 79 32
-    ; asm: movswq -50(%rsi), %rdx
-    [-,%rdx]            v149 = sload16.i64 v2-50        ; bin: heap_oob 48 0f bf 56 ce
-    ; asm: movzbq 50(%rcx), %rdi
-    [-,%rdi]            v150 = uload8.i64 v1+50         ; bin: heap_oob 48 0f b6 79 32
-    ; asm: movzbq -50(%rsi), %rdx
-    [-,%rdx]            v151 = uload8.i64 v2-50         ; bin: heap_oob 48 0f b6 56 ce
-    ; asm: movsbq 50(%rcx), %rdi
-    [-,%rdi]            v152 = sload8.i64 v1+50         ; bin: heap_oob 48 0f be 79 32
-    ; asm: movsbq -50(%rsi), %rdx
-    [-,%rdx]            v153 = sload8.i64 v2-50         ; bin: heap_oob 48 0f be 56 ce
-
-    ; Register-indirect with 32-bit signed displacement.
-
-    ; asm: movq %rcx, 10000(%r10)
-    store v1, v3+10000                          ; bin: heap_oob 49 89 8a 00002710
-    ; asm: movq %r10, -10000(%rcx)
-    store v3, v1-10000                          ; bin: heap_oob 4c 89 91 ffffd8f0
-    ; asm: movl %ecx, 10000(%rsi)
-    istore32 v1, v2+10000                       ; bin: heap_oob 89 8e 00002710
-    ; asm: movl %esi, -10000(%rcx)
-    istore32 v2, v1-10000                       ; bin: heap_oob 89 b1 ffffd8f0
-    ; asm: movw %cx, 10000(%rsi)
-    istore16 v1, v2+10000                       ; bin: heap_oob 66 89 8e 00002710
-    ; asm: movw %si, -10000(%rcx)
-    istore16 v2, v1-10000                       ; bin: heap_oob 66 89 b1 ffffd8f0
-    ; asm: movb %cl, 10000(%rsi)
-    istore8 v1, v2+10000                        ; bin: heap_oob 88 8e 00002710
-    ; asm: movb %sil, 10000(%rcx)
-    istore8 v2, v1+10000                        ; bin: heap_oob 40 88 b1 00002710
-
-    ; asm: movq 50000(%rcx), %r10
-    [-,%r10]            v160 = load.i64 v1+50000           ; bin: heap_oob 4c 8b 91 0000c350
-    ; asm: movq -50000(%r10), %rdx
-    [-,%rdx]            v161 = load.i64 v3-50000           ; bin: heap_oob 49 8b 92 ffff3cb0
-    ; asm: movl 50000(%rcx), %edi
-    [-,%rdi]            v162 = uload32.i64 v1+50000        ; bin: heap_oob 8b b9 0000c350
-    ; asm: movl -50000(%rsi), %edx
-    [-,%rdx]            v163 = uload32.i64 v2-50000        ; bin: heap_oob 8b 96 ffff3cb0
-    ; asm: movslq 50000(%rcx), %rdi
-    [-,%rdi]            v164 = sload32.i64 v1+50000        ; bin: heap_oob 48 63 b9 0000c350
-    ; asm: movslq -50000(%rsi), %rdx
-    [-,%rdx]            v165 = sload32.i64 v2-50000        ; bin: heap_oob 48 63 96 ffff3cb0
-    ; asm: movzwq 50000(%rcx), %rdi
-    [-,%rdi]            v166 = uload16.i64 v1+50000        ; bin: heap_oob 48 0f b7 b9 0000c350
-    ; asm: movzwq -50000(%rsi), %rdx
-    [-,%rdx]            v167 = uload16.i64 v2-50000        ; bin: heap_oob 48 0f b7 96 ffff3cb0
-    ; asm: movswq 50000(%rcx), %rdi
-    [-,%rdi]            v168 = sload16.i64 v1+50000        ; bin: heap_oob 48 0f bf b9 0000c350
-    ; asm: movswq -50000(%rsi), %rdx
-    [-,%rdx]            v169 = sload16.i64 v2-50000        ; bin: heap_oob 48 0f bf 96 ffff3cb0
-    ; asm: movzbq 50000(%rcx), %rdi
-    [-,%rdi]            v170 = uload8.i64 v1+50000         ; bin: heap_oob 48 0f b6 b9 0000c350
-    ; asm: movzbq -50000(%rsi), %rdx
-    [-,%rdx]            v171 = uload8.i64 v2-50000         ; bin: heap_oob 48 0f b6 96 ffff3cb0
-    ; asm: movsbq 50000(%rcx), %rdi
-    [-,%rdi]            v172 = sload8.i64 v1+50000         ; bin: heap_oob 48 0f be b9 0000c350
-    ; asm: movsbq -50000(%rsi), %rdx
-    [-,%rdx]            v173 = sload8.i64 v2-50000         ; bin: heap_oob 48 0f be 96 ffff3cb0
-
-
-    ; More arithmetic.
-
-    ; asm: imulq %rsi, %rcx
-    [-,%rcx]             v180 = imul v1, v2       ; bin: 48 0f af ce
-    ; asm: imulq %r10, %rsi
-    [-,%rsi]             v181 = imul v2, v3       ; bin: 49 0f af f2
-    ; asm: imulq %rcx, %r10
-    [-,%r10]             v182 = imul v3, v1       ; bin: 4c 0f af d1
-
-    [-,%rax]      v190 = iconst.i64 1
-    [-,%rdx]      v191 = iconst.i64 2
-    ; asm: idivq %rcx
-    [-,%rax,%rdx] v192, v193 = x86_sdivmodx v190, v191, v1  ; bin: int_divz 48 f7 f9
-    ; asm: idivq %rsi
-    [-,%rax,%rdx] v194, v195 = x86_sdivmodx v190, v191, v2  ; bin: int_divz 48 f7 fe
-    ; asm: idivq %r10
-    [-,%rax,%rdx] v196, v197 = x86_sdivmodx v190, v191, v3  ; bin: int_divz 49 f7 fa
-    ; asm: divq %rcx
-    [-,%rax,%rdx] v198, v199 = x86_udivmodx v190, v191, v1  ; bin: int_divz 48 f7 f1
-    ; asm: divq %rsi
-    [-,%rax,%rdx] v200, v201 = x86_udivmodx v190, v191, v2  ; bin: int_divz 48 f7 f6
-    ; asm: divq %r10
-    [-,%rax,%rdx] v202, v203 = x86_udivmodx v190, v191, v3  ; bin: int_divz 49 f7 f2
-
-    ; double-length multiply instructions, 64 bit
-    [-,%rax]       v1001 = iconst.i64 1
-    [-,%r15]       v1002 = iconst.i64 2
-    ; asm: mulq %r15
-    [-,%rax,%rdx]  v1003, v1004 = x86_umulx v1001, v1002 ; bin: 49 f7 e7
-    ; asm: imulq %r15
-    [-,%rax,%rdx]  v1005, v1006 = x86_smulx v1001, v1002 ; bin: 49 f7 ef
-
-    ; double-length multiply instructions, 32 bit
-    [-,%rax]       v1011 = iconst.i32 1
-    [-,%r15]       v1012 = iconst.i32 2
-    [-,%rcx]       v1017 = iconst.i32 3
-    ; asm: mull %r15d
-    [-,%rax,%rdx]  v1013, v1014 = x86_umulx v1011, v1012    ; bin: 41 f7 e7
-    ; asm: imull %r15d
-    [-,%rax,%rdx]  v1015, v1016 = x86_smulx v1011, v1012    ; bin: 41 f7 ef
-
-    ; asm: mull %ecx
-    [-,%rax,%rdx]  v1018, v1019 = x86_umulx v1011, v1017    ; bin: f7 e1
-    ; asm: imull %ecx
-    [-,%rax,%rdx]  v1020, v1021 = x86_smulx v1011, v1017    ; bin: f7 e9
-
-    ; Bit-counting instructions.
-
-    ; asm: popcntq %rsi, %rcx
-    [-,%rcx]            v210 = popcnt v2        ; bin: f3 48 0f b8 ce
-    ; asm: popcntq %r10, %rsi
-    [-,%rsi]            v211 = popcnt v3        ; bin: f3 49 0f b8 f2
-    ; asm: popcntq %rcx, %r10
-    [-,%r10]            v212 = popcnt v1        ; bin: f3 4c 0f b8 d1
-
-    ; asm: lzcntq %rsi, %rcx
-    [-,%rcx]            v213 = clz v2           ; bin: f3 48 0f bd ce
-    ; asm: lzcntq %r10, %rsi
-    [-,%rsi]            v214 = clz v3           ; bin: f3 49 0f bd f2
-    ; asm: lzcntq %rcx, %r10
-    [-,%r10]            v215 = clz v1           ; bin: f3 4c 0f bd d1
-
-    ; asm: tzcntq %rsi, %rcx
-    [-,%rcx]            v216 = ctz v2           ; bin: f3 48 0f bc ce
-    ; asm: tzcntq %r10, %rsi
-    [-,%rsi]            v217 = ctz v3           ; bin: f3 49 0f bc f2
-    ; asm: tzcntq %rcx, %r10
-    [-,%r10]            v218 = ctz v1           ; bin: f3 4c 0f bc d1
-
-    ; Integer comparisons.
-
-    ; asm: cmpq %rsi, %rcx
-    ; asm: sete %bl
-    [-,%rbx]            v300 = icmp eq v1, v2   ; bin: 48 39 f1 0f 94 c3
-    ; asm: cmpq %r10, %rsi
-    ; asm: sete %dl
-    [-,%rdx]            v301 = icmp eq v2, v3   ; bin: 4c 39 d6 0f 94 c2
-
-    ; asm: cmpq %rsi, %rcx
-    ; asm: setne %bl
-    [-,%rbx]            v302 = icmp ne v1, v2   ; bin: 48 39 f1 0f 95 c3
-    ; asm: cmpq %r10, %rsi
-    ; asm: setne %dl
-    [-,%rdx]            v303 = icmp ne v2, v3   ; bin: 4c 39 d6 0f 95 c2
-
-    ; asm: cmpq %rsi, %rcx
-    ; asm: setl %bl
-    [-,%rbx]            v304 = icmp slt v1, v2  ; bin: 48 39 f1 0f 9c c3
-    ; asm: cmpq %r10, %rsi
-    ; asm: setl %dl
-    [-,%rdx]            v305 = icmp slt v2, v3  ; bin: 4c 39 d6 0f 9c c2
-
-    ; asm: cmpq %rsi, %rcx
-    ; asm: setge %bl
-    [-,%rbx]            v306 = icmp sge v1, v2  ; bin: 48 39 f1 0f 9d c3
-    ; asm: cmpq %r10, %rsi
-    ; asm: setge %dl
-    [-,%rdx]            v307 = icmp sge v2, v3  ; bin: 4c 39 d6 0f 9d c2
-
-    ; asm: cmpq %rsi, %rcx
-    ; asm: setg %bl
-    [-,%rbx]            v308 = icmp sgt v1, v2  ; bin: 48 39 f1 0f 9f c3
-    ; asm: cmpq %r10, %rsi
-    ; asm: setg %dl
-    [-,%rdx]            v309 = icmp sgt v2, v3  ; bin: 4c 39 d6 0f 9f c2
-
-    ; asm: cmpq %rsi, %rcx
-    ; asm: setle %bl
-    [-,%rbx]            v310 = icmp sle v1, v2  ; bin: 48 39 f1 0f 9e c3
-    ; asm: cmpq %r10, %rsi
-    ; asm: setle %dl
-    [-,%rdx]            v311 = icmp sle v2, v3  ; bin: 4c 39 d6 0f 9e c2
-
-    ; asm: cmpq %rsi, %rcx
-    ; asm: setb %bl
-    [-,%rbx]            v312 = icmp ult v1, v2  ; bin: 48 39 f1 0f 92 c3
-    ; asm: cmpq %r10, %rsi
-    ; asm: setb %dl
-    [-,%rdx]            v313 = icmp ult v2, v3  ; bin: 4c 39 d6 0f 92 c2
-
-    ; asm: cmpq %rsi, %rcx
-    ; asm: setae %bl
-    [-,%rbx]            v314 = icmp uge v1, v2  ; bin: 48 39 f1 0f 93 c3
-    ; asm: cmpq %r10, %rsi
-    ; asm: setae %dl
-    [-,%rdx]            v315 = icmp uge v2, v3  ; bin: 4c 39 d6 0f 93 c2
-
-    ; asm: cmpq %rsi, %rcx
-    ; asm: seta %bl
-    [-,%rbx]            v316 = icmp ugt v1, v2  ; bin: 48 39 f1 0f 97 c3
-    ; asm: cmpq %r10, %rsi
-    ; asm: seta %dl
-    [-,%rdx]            v317 = icmp ugt v2, v3  ; bin: 4c 39 d6 0f 97 c2
-
-    ; asm: cmpq %rsi, %rcx
-    ; asm: setbe %bl
-    [-,%rbx]            v318 = icmp ule v1, v2  ; bin: 48 39 f1 0f 96 c3
-    ; asm: cmpq %r10, %rsi
-    ; asm: setbe %dl
-    [-,%rdx]            v319 = icmp ule v2, v3  ; bin: 4c 39 d6 0f 96 c2
-
-    ; asm: cmpq $37, %rcx
-    ; asm: setl %bl
-    [-,%rbx]            v320 = icmp_imm slt v1, 37     ; bin: 48 83 f9 25 0f 9c c3
-
-    ; asm: cmpq $100000, %rcx
-    ; asm: setl %bl
-    [-,%rbx]            v321 = icmp_imm slt v1, 100000 ; bin: 48 81 f9 000186a0 0f 9c c3
-
-    ; Bool-to-int conversions.
-
-    ; asm: movzbq %bl, %rcx
-    [-,%rcx]             v350 = bint.i64 v300   ; bin: 0f b6 cb
-    ; asm: movzbq %dl, %rsi
-    [-,%rsi]             v351 = bint.i64 v301   ; bin: 0f b6 f2
-
-    ; Colocated functions.
-
-    ; asm: call bar
-    call fn1()                                  ; bin: stk_ovf e8 CallPCRel4(%bar-4) 00000000
-
-    ; asm: lea 0x0(%rip), %rcx
-    [-,%rcx]            v400 = func_addr.i64 fn1        ; bin: 48 8d 0d PCRel4(%bar-4) 00000000
-    ; asm: lea 0x0(%rip), %rsi
-    [-,%rsi]            v401 = func_addr.i64 fn1        ; bin: 48 8d 35 PCRel4(%bar-4) 00000000
-    ; asm: lea 0x0(%rip), %r10
-    [-,%r10]            v402 = func_addr.i64 fn1        ; bin: 4c 8d 15 PCRel4(%bar-4) 00000000
-
-    ; asm: call *%rcx
-    call_indirect sig0, v400()                  ; bin: stk_ovf ff d1
-    ; asm: call *%rsi
-    call_indirect sig0, v401()                  ; bin: stk_ovf ff d6
-    ; asm: call *%r10
-    call_indirect sig0, v402()                  ; bin: stk_ovf 41 ff d2
-
-    ; Non-colocated functions. Note that there is no non-colocated non-PIC call.
-
-    ; asm: movabsq $0, %rcx
-    [-,%rcx]            v410 = func_addr.i64 fn0        ; bin: 48 b9 Abs8(%foo) 0000000000000000
-    ; asm: movabsq $0, %rsi
-    [-,%rsi]            v411 = func_addr.i64 fn0        ; bin: 48 be Abs8(%foo) 0000000000000000
-    ; asm: movabsq $0, %r10
-    [-,%r10]            v412 = func_addr.i64 fn0        ; bin: 49 ba Abs8(%foo) 0000000000000000
-
-    ; asm: call *%rcx
-    call_indirect sig0, v410()                  ; bin: stk_ovf ff d1
-    ; asm: call *%rsi
-    call_indirect sig0, v411()                  ; bin: stk_ovf ff d6
-    ; asm: call *%r10
-    call_indirect sig0, v412()                  ; bin: stk_ovf 41 ff d2
-
-    ; asm: movabsq $-1, %rcx
-    [-,%rcx]            v450 = symbol_value.i64 gv0    ; bin: 48 b9 Abs8(%some_gv) 0000000000000000
-    ; asm: movabsq $-1, %rsi
-    [-,%rsi]            v451 = symbol_value.i64 gv0    ; bin: 48 be Abs8(%some_gv) 0000000000000000
-    ; asm: movabsq $-1, %r10
-    [-,%r10]            v452 = symbol_value.i64 gv0    ; bin: 49 ba Abs8(%some_gv) 0000000000000000
-
-    ; Spill / Fill.
-
-    ; asm: movq %rcx, 1032(%rsp)
-    [-,ss1]             v500 = spill v1         ; bin: stk_ovf 48 89 8c 24 00000408
-    ; asm: movq %rsi, 1032(%rsp)
-    [-,ss1]             v501 = spill v2         ; bin: stk_ovf 48 89 b4 24 00000408
-    ; asm: movq %r10, 1032(%rsp)
-    [-,ss1]             v502 = spill v3         ; bin: stk_ovf 4c 89 94 24 00000408
-
-    ; asm: movq 1032(%rsp), %rcx
-    [-,%rcx]            v510 = fill v500        ; bin: 48 8b 8c 24 00000408
-    ; asm: movq 1032(%rsp), %rsi
-    [-,%rsi]            v511 = fill v501        ; bin: 48 8b b4 24 00000408
-    ; asm: movq 1032(%rsp), %r10
-    [-,%r10]            v512 = fill v502        ; bin: 4c 8b 94 24 00000408
-
-    ; asm: movq %rcx, 1032(%rsp)
-    regspill v1, %rcx -> ss1                    ; bin: stk_ovf 48 89 8c 24 00000408
-    ; asm: movq 1032(%rsp), %rcx
-    regfill v1, ss1 -> %rcx                     ; bin: 48 8b 8c 24 00000408
-
-    ; Push and Pop
-    ; asm: pushq %rcx
-    x86_push v1                                 ; bin: stk_ovf 51
-    ; asm: pushq %r10
-    x86_push v3                                 ; bin: stk_ovf 41 52
-    ; asm: popq %rcx
-    [-,%rcx]            v513 = x86_pop.i64      ; bin: 59
-    ; asm: popq %r10
-    [-,%r10]            v514 = x86_pop.i64      ; bin: 41 5a
-
-    ; Adjust Stack Pointer Up
-    ; asm: addq $64, %rsp
-    adjust_sp_up_imm 64                         ; bin: 48 83 c4 40
-    ; asm: addq $-64, %rsp
-    adjust_sp_up_imm -64                        ; bin: 48 83 c4 c0
-    ; asm: addq $1024, %rsp
-    adjust_sp_up_imm 1024                       ; bin: 48 81 c4 00000400
-    ; asm: addq $-1024, %rsp
-    adjust_sp_up_imm -1024                      ; bin: 48 81 c4 fffffc00
-    ; asm: addq $2147483647, %rsp
-    adjust_sp_up_imm 2147483647                 ; bin: 48 81 c4 7fffffff
-    ; asm: addq $-2147483648, %rsp
-    adjust_sp_up_imm -2147483648                ; bin: 48 81 c4 80000000
-
-    ; Adjust Stack Pointer Down
-    ; asm: subq %rcx, %rsp
-    adjust_sp_down v1                           ; bin: 48 29 cc
-    ; asm: subq %r10, %rsp
-    adjust_sp_down v3                           ; bin: 4c 29 d4
-    ; asm: subq $64, %rsp
-    adjust_sp_down_imm 64                       ; bin: 48 83 ec 40
-    ; asm: subq $-64, %rsp
-    adjust_sp_down_imm -64                      ; bin: 48 83 ec c0
-    ; asm: subq $1024, %rsp
-    adjust_sp_down_imm 1024                     ; bin: 48 81 ec 00000400
-    ; asm: subq $-1024, %rsp
-    adjust_sp_down_imm -1024                    ; bin: 48 81 ec fffffc00
-    ; asm: subq $2147483647, %rsp
-    adjust_sp_down_imm 2147483647               ; bin: 48 81 ec 7fffffff
-    ; asm: subq $-2147483648, %rsp
-    adjust_sp_down_imm -2147483648              ; bin: 48 81 ec 80000000
-
-    ; Shift immediates
-    ; asm: shlq $12, %rsi
-    [-,%rsi]             v515 = ishl_imm v2, 12   ; bin: 48 c1 e6 0c
-    ; asm: shlq $13, %r8
-    [-,%r8]              v516 = ishl_imm v4, 13   ; bin: 49 c1 e0 0d
-    ; asm: sarq $32, %rsi
-    [-,%rsi]             v517 = sshr_imm v2, 32   ; bin: 48 c1 fe 20
-    ; asm: sarq $33, %r8
-    [-,%r8]              v518 = sshr_imm v4, 33   ; bin: 49 c1 f8 21
-    ; asm: shrq $62, %rsi
-    [-,%rsi]             v519 = ushr_imm v2, 62   ; bin: 48 c1 ee 3e
-    ; asm: shrq $63, %r8
-    [-,%r8]              v520 = ushr_imm v4, 63   ; bin: 49 c1 e8 3f
-
-
-    ; Rotate immediates
-    ; asm: rolq $12, %rsi
-    [-,%rsi]             v5101 = rotl_imm v2, 12   ; bin: 48 c1 c6 0c
-    ; asm: rolq $13, %r8
-    [-,%r8]              v5102 = rotl_imm v4, 13   ; bin: 49 c1 c0 0d
-    ; asm: rorq $32, %rsi
-    [-,%rsi]             v5103 = rotr_imm v2, 32   ; bin: 48 c1 ce 20
-    ; asm: rorq $33, %r8
-    [-,%r8]              v5104 = rotr_imm v4, 33   ; bin: 49 c1 c8 21
-
-
-    ; Load Complex
-    [-,%rax]            v521 = iconst.i64 1
-    [-,%rbx]            v522 = iconst.i64 1
-    [-,%rdi]            v523 = iconst.i32 1
-    [-,%rsi]            v524 = iconst.i32 1
-    ; asm: movq (%rax,%rbx,1), %rcx
-    [-,%rcx]            v525 = load_complex.i64 v521+v522               ; bin: heap_oob 48 8b 0c 18
-    ; asm: movl (%rax,%rbx,1), %ecx
-    [-,%rcx]            v526 = load_complex.i32 v521+v522               ; bin: heap_oob 8b 0c 18
-    ; asm: movq 1(%rax,%rbx,1), %rcx
-    [-,%rcx]            v527 = load_complex.i64 v521+v522+1             ; bin: heap_oob 48 8b 4c 18 01
-    ; asm: movl 1(%rax,%rbx,1), %ecx
-    [-,%rcx]            v528 = load_complex.i32 v521+v522+1             ; bin: heap_oob 8b 4c 18 01
-    ; asm: mov    0x100000(%rax,%rbx,1),%rcx
-    [-,%rcx]            v529 = load_complex.i64 v521+v522+0x1000        ; bin: heap_oob 48 8b 8c 18 00001000
-    ; asm: mov    0x100000(%rax,%rbx,1),%ecx
-    [-,%rcx]            v530 = load_complex.i32 v521+v522+0x1000        ; bin: heap_oob 8b 8c 18 00001000
-    ; asm: movzbq (%rax,%rbx,1),%rcx
-    [-,%rcx]            v531 = uload8_complex.i64 v521+v522             ; bin: heap_oob 48 0f b6 0c 18
-    ; asm: movzbl (%rax,%rbx,1),%ecx
-    [-,%rcx]            v532 = uload8_complex.i32 v521+v522             ; bin: heap_oob 0f b6 0c 18
-    ; asm: movsbq (%rax,%rbx,1),%rcx
-    [-,%rcx]            v533 = sload8_complex.i64 v521+v522             ; bin: heap_oob 48 0f be 0c 18
-    ; asm: movsbl (%rax,%rbx,1),%ecx
-    [-,%rcx]            v534 = sload8_complex.i32 v521+v522             ; bin: heap_oob 0f be 0c 18
-    ; asm: movzwq (%rax,%rbx,1),%rcx
-    [-,%rcx]            v535 = uload16_complex.i64 v521+v522            ; bin: heap_oob 48 0f b7 0c 18
-    ; asm: movzwl (%rax,%rbx,1),%ecx
-    [-,%rcx]            v536 = uload16_complex.i32 v521+v522            ; bin: heap_oob 0f b7 0c 18
-    ; asm: movswq (%rax,%rbx,1),%rcx
-    [-,%rcx]            v537 = sload16_complex.i64 v521+v522            ; bin: heap_oob 48 0f bf 0c 18
-    ; asm: movswl (%rax,%rbx,1),%ecx
-    [-,%rcx]            v538 = sload16_complex.i32 v521+v522            ; bin: heap_oob 0f bf 0c 18
-    ; asm: mov    (%rax,%rbx,1),%ecx
-    [-,%rcx]            v539 = uload32_complex v521+v522                ; bin: heap_oob 8b 0c 18
-    ; asm: movslq (%rax,%rbx,1),%rcx
-    [-,%rcx]            v540 = sload32_complex v521+v522                ; bin: heap_oob 48 63 0c 18
-    [-,%r13]            v550 = iconst.i64 1
-    [-,%r14]            v551 = iconst.i64 1
-    ; asm: mov 0x0(%r13,%r14,1),%r12d
-    [-,%r12]            v552 = load_complex.i32 v550+v551               ; bin: heap_oob 47 8b 64 35 00
-
-    ; Store Complex
-    [-,%rcx]            v600 = iconst.i64 1
-    [-,%rcx]            v601 = iconst.i32 1
-    [-,%r10]            v602 = iconst.i64 1
-    [-,%r11]            v603 = iconst.i32 1
-    ; asm: mov    %rcx,(%rax,%rbx,1)
-    store_complex v600, v521+v522               ; bin: heap_oob 48 89 0c 18
-    ; asm: mov    %rcx,0x1(%rax,%rbx,1)
-    store_complex v600, v521+v522+1             ; bin: heap_oob 48 89 4c 18 01
-    ; asm: mov    %rcx,0x100000(%rax,%rbx,1)
-    store_complex v600, v521+v522+0x1000        ; bin: heap_oob 48 89 8c 18 00001000
-    ; asm: mov    %ecx,(%rax,%rbx,1)
-    store_complex v601, v521+v522               ; bin: heap_oob 89 0c 18
-    ; asm: mov    %ecx,0x1(%rax,%rbx,1)
-    store_complex v601, v521+v522+1             ; bin: heap_oob 89 4c 18 01
-    ; asm: mov    %ecx,0x100000(%rax,%rbx,1)
-    store_complex v601, v521+v522+0x1000        ; bin: heap_oob 89 8c 18 00001000
-    ; asm: mov    %ecx,(%rax,%rbx,1)
-    istore32_complex v600, v521+v522            ; bin: heap_oob 89 0c 18
-    ; asm: mov    %cx,(%rax,%rbx,1)
-    istore16_complex v600, v521+v522            ; bin: heap_oob 66 89 0c 18
-    ; asm: mov    %cx,(%rax,%rbx,1)
-    istore16_complex v601, v521+v522            ; bin: heap_oob 66 89 0c 18
-    ; asm: mov    %r10w,(%rax,%rbx,1)
-    istore16_complex v602, v521+v522            ; bin: heap_oob 66 44 89 14 18
-    ; asm: mov    %r11w,(%rax,%rbx,1)
-    istore16_complex v603, v521+v522            ; bin: heap_oob 66 44 89 1c 18
-    ; asm: mov    %cl,(%rax,%rbx,1)
-    istore8_complex v600, v521+v522             ; bin: heap_oob 88 0c 18
-    ; asm: mov    %cl,(%rax,%rbx,1)
-    istore8_complex v601, v521+v522             ; bin: heap_oob 88 0c 18
-
-    ; asm: testq %rcx, %rcx
-    ; asm: je block1
-    brz v1, block1                                ; bin: 48 85 c9 74 1b
-    fallthrough block3
-
-block3:
-    ; asm: testq %rsi, %rsi
-    ; asm: je block1
-    brz v2, block1                                ; bin: 48 85 f6 74 16
-    fallthrough block4
-
-block4:
-    ; asm: testq %r10, %r10
-    ; asm: je block1
-    brz v3, block1                                ; bin: 4d 85 d2 74 11
-    fallthrough block5
-
-block5:
-    ; asm: testq %rcx, %rcx
-    ; asm: jne block1
-    brnz v1, block1                               ; bin: 48 85 c9 75 0c
-    fallthrough block6
-
-block6:
-    ; asm: testq %rsi, %rsi
-    ; asm: jne block1
-    brnz v2, block1                               ; bin: 48 85 f6 75 07
-    fallthrough block7
-
-block7:
-    ; asm: testq %r10, %r10
-    ; asm: jne block1
-    brnz v3, block1                               ; bin: 4d 85 d2 75 02
-
-    ; asm: jmp block2
-    jump block2                                   ; bin: eb 01
-
-    ; asm: block1:
-block1:
-    return                                      ; bin: c3
-
-    ; asm: block2:
-block2:
-    ; Add a no-op instruction to prevent fold_redundant_jump from removing this block.
-    ; asm: notq %rcx
-    [-,%rcx]             v5000 = bnot v1        ; bin: 48 f7 d1
-    jump block1                                   ; bin: eb fa
-}
-
-; CPU flag instructions.
-function %cpu_flags_I64() {
-block0:
-    [-,%rcx]            v1 = iconst.i64 1
-    [-,%r10]            v2 = iconst.i64 2
-    jump block1
-
-block1:
-    ; asm: cmpq %r10, %rcx
-    [-,%rflags]         v10 = ifcmp v1, v2      ; bin: 4c 39 d1
-    ; asm: cmpq %rcx, %r10
-    [-,%rflags]         v11 = ifcmp v2, v1      ; bin: 49 39 ca
-
-    ; asm: je block1
-    brif eq v11, block1                           ; bin: 74 f8
-    jump block2
-
-block2:
-    ; asm: jne block1
-    brif ne v11, block1                           ; bin: 75 f6
-    jump block3
-
-block3:
-    ; asm: jl block1
-    brif slt v11, block1                          ; bin: 7c f4
-    jump block4
-
-block4:
-    ; asm: jge block1
-    brif sge v11, block1                          ; bin: 7d f2
-    jump block5
-
-block5:
-    ; asm: jg block1
-    brif sgt v11, block1                          ; bin: 7f f0
-    jump block6
-
-block6:
-    ; asm: jle block1
-    brif sle v11, block1                          ; bin: 7e ee
-    jump block7
-
-block7:
-    ; asm: jb block1
-    brif ult v11, block1                          ; bin: 72 ec
-    jump block8
-
-block8:
-    ; asm: jae block1
-    brif uge v11, block1                          ; bin: 73 ea
-    jump block9
-
-block9:
-    ; asm: ja block1
-    brif ugt v11, block1                          ; bin: 77 e8
-    jump block10
-
-block10:
-    ; asm: jbe block1
-    brif ule v11, block1                          ; bin: 76 e6
-    jump block11
-
-block11:
-
-    ; asm: sete %bl
-    [-,%rbx]            v20 = trueif eq v11                           ; bin: 0f 94 c3
-    ; asm: setne %bl
-    [-,%rbx]            v21 = trueif ne v11                           ; bin: 0f 95 c3
-    ; asm: setl %dl
-    [-,%rdx]            v22 = trueif slt v11                          ; bin: 0f 9c c2
-    ; asm: setge %dl
-    [-,%rdx]            v23 = trueif sge v11                          ; bin: 0f 9d c2
-    ; asm: setg %r10b
-    [-,%r10]            v24 = trueif sgt v11                          ; bin: 41 0f 9f c2
-    ; asm: setle %r10b
-    [-,%r10]            v25 = trueif sle v11                          ; bin: 41 0f 9e c2
-    ; asm: setb %r14b
-    [-,%r14]            v26 = trueif ult v11                          ; bin: 41 0f 92 c6
-    ; asm: setae %r14b
-    [-,%r14]            v27 = trueif uge v11                          ; bin: 41 0f 93 c6
-    ; asm: seta %r11b
-    [-,%r11]            v28 = trueif ugt v11                          ; bin: 41 0f 97 c3
-    ; asm: setbe %r11b
-    [-,%r11]            v29 = trueif ule v11                          ; bin: 41 0f 96 c3
-
-    ; The trapif instructions are encoded as macros: a conditional jump over a ud2.
-    ; asm: jne .+4; ud2
-    trapif eq v11, user0                           ; bin: 75 02 user0 0f 0b
-    ; asm: je .+4; ud2
-    trapif ne v11, user0                           ; bin: 74 02 user0 0f 0b
-    ; asm: jnl .+4; ud2
-    trapif slt v11, user0                          ; bin: 7d 02 user0 0f 0b
-    ; asm: jnge .+4; ud2
-    trapif sge v11, user0                          ; bin: 7c 02 user0 0f 0b
-    ; asm: jng .+4; ud2
-    trapif sgt v11, user0                          ; bin: 7e 02 user0 0f 0b
-    ; asm: jnle .+4; ud2
-    trapif sle v11, user0                          ; bin: 7f 02 user0 0f 0b
-    ; asm: jnb .+4; ud2
-    trapif ult v11, user0                          ; bin: 73 02 user0 0f 0b
-    ; asm: jnae .+4; ud2
-    trapif uge v11, user0                          ; bin: 72 02 user0 0f 0b
-    ; asm: jna .+4; ud2
-    trapif ugt v11, user0                          ; bin: 76 02 user0 0f 0b
-    ; asm: jnbe .+4; ud2
-    trapif ule v11, user0                          ; bin: 77 02 user0 0f 0b
-    ; asm: jo .+4; ud2
-    trapif of v11, user0                          ; bin: 71 02 user0 0f 0b
-    ; asm: jno .+4; ud2
-    trapif nof v11, user0                          ; bin: 70 02 user0 0f 0b
-
-    ; Debug trap.
-    debugtrap ; bin: cc
-
-    ; Stack check.
-    ; asm: cmpq %rsp, %rcx
-    [-,%rflags]         v40 = ifcmp_sp v1       ; bin: 48 39 e1
-    ; asm: cmpq %rsp, %r10
-    [-,%rflags]         v41 = ifcmp_sp v2       ; bin: 49 39 e2
-
-    ; asm: cmpq $-100, %rcx
-    [-,%rflags]         v522 = ifcmp_imm v1, -100   ; bin: 48 83 f9 9c
-    ; asm: cmpq $100, %r10
-    [-,%rflags]         v523 = ifcmp_imm v2, 100    ; bin: 49 83 fa 64
-
-    ; asm: cmpq $-10000, %rcx
-    [-,%rflags]         v524 = ifcmp_imm v1, -10000 ; bin: 48 81 f9 ffffd8f0
-    ; asm: cmpq $10000, %r10
-    [-,%rflags]         v525 = ifcmp_imm v2, 10000  ; bin: 49 81 fa 00002710
-
-
-    return
-}
-
-; Test for the encoding of outgoing_arg stack slots.
-function %outargs() {
-    ss0 = incoming_arg 16, offset -16
-    ss1 = outgoing_arg 8, offset 8
-    ss2 = outgoing_arg 8, offset 0
-
-block0:
-    [-,%rcx]            v1 = iconst.i64 1
-
-    ; asm: movq %rcx, 8(%rsp)
-    [-,ss1]             v10 = spill v1              ; bin: stk_ovf 48 89 8c 24 00000008
-    ; asm: movq %rcx, (%rsp)
-    [-,ss2]             v11 = spill v1              ; bin: stk_ovf 48 89 8c 24 00000000
-
-    return
-}
-
-; Tests for i32 instructions in 64-bit mode.
-;
-; Note that many i32 instructions can be encoded both with and without a REX
-; prefix if they only use the low 8 registers. Here, we are testing the REX
-; encodings which are chosen by default. Switching to non-REX encodings should
-; be done by an instruction shrinking pass.
-function %I32() {
-    sig0 = ()
-    fn0 = %foo()
-
-    ss0 = incoming_arg 8, offset 0
-    ss1 = incoming_arg 1024, offset -1024
-    ss2 = incoming_arg 1024, offset -2048
-    ss3 = incoming_arg 8, offset -2056
-
-block0:
-
-    ; Integer Constants.
-
-    ; asm: movl $0x01020304, %ecx
-    [-,%rcx]            v1 = iconst.i32 0x0102_0304           ; bin: b9 01020304
-    ; asm: movl $0x11020304, %esi
-    [-,%rsi]            v2 = iconst.i32 0x1102_0304           ; bin: be 11020304
-    ; asm: movl $0x21020304, %r10d
-    [-,%r10]            v3 = iconst.i32 0x2102_0304           ; bin: 41 ba 21020304
-    ; asm: movl $0xff001122, %r8d
-    [-,%r8]             v4 = iconst.i32 0xff00_1122           ; bin: 41 b8 ff001122
-    ; asm: movl $0x88001122, %r14d
-    [-,%r14]            v5 = iconst.i32 0xffff_ffff_8800_1122 ; bin: 41 be 88001122
-
-    ; Load/Store instructions.
-
-    ; Register indirect addressing with no displacement.
-
-    ; asm: movl (%rcx), %edi
-    [-,%rdi]            v10 = load.i32 v1      ; bin: heap_oob 8b 39
-    ; asm: movl (%rsi), %edx
-    [-,%rdx]            v11 = load.i32 v2      ; bin: heap_oob 8b 16
-    ; asm: movzwl (%rcx), %edi
-    [-,%rdi]            v12 = uload16.i32 v1   ; bin: heap_oob 0f b7 39
-    ; asm: movzwl (%rsi), %edx
-    [-,%rdx]            v13 = uload16.i32 v2   ; bin: heap_oob 0f b7 16
-    ; asm: movswl (%rcx), %edi
-    [-,%rdi]            v14 = sload16.i32 v1   ; bin: heap_oob 0f bf 39
-    ; asm: movswl (%rsi), %edx
-    [-,%rdx]            v15 = sload16.i32 v2   ; bin: heap_oob 0f bf 16
-    ; asm: movzbl (%rcx), %edi
-    [-,%rdi]            v16 = uload8.i32 v1    ; bin: heap_oob 0f b6 39
-    ; asm: movzbl (%rsi), %edx
-    [-,%rdx]            v17 = uload8.i32 v2    ; bin: heap_oob 0f b6 16
-    ; asm: movsbl (%rcx), %edi
-    [-,%rdi]            v18 = sload8.i32 v1    ; bin: heap_oob 0f be 39
-    ; asm: movsbl (%rsi), %edx
-    [-,%rdx]            v19 = sload8.i32 v2    ; bin: heap_oob 0f be 16
-
-    ; Register-indirect with 8-bit signed displacement.
-
-    ; asm: movl 50(%rcx), %edi
-    [-,%rdi]            v20 = load.i32 v1+50           ; bin: heap_oob 8b 79 32
-    ; asm: movl -50(%rsi), %edx
-    [-,%rdx]            v21 = load.i32 v2-50           ; bin: heap_oob 8b 56 ce
-    ; asm: movzwl 50(%rcx), %edi
-    [-,%rdi]            v22 = uload16.i32 v1+50        ; bin: heap_oob 0f b7 79 32
-    ; asm: movzwl -50(%rsi), %edx
-    [-,%rdx]            v23 = uload16.i32 v2-50        ; bin: heap_oob 0f b7 56 ce
-    ; asm: movswl 50(%rcx), %edi
-    [-,%rdi]            v24 = sload16.i32 v1+50        ; bin: heap_oob 0f bf 79 32
-    ; asm: movswl -50(%rsi), %edx
-    [-,%rdx]            v25 = sload16.i32 v2-50        ; bin: heap_oob 0f bf 56 ce
-    ; asm: movzbl 50(%rcx), %edi
-    [-,%rdi]            v26 = uload8.i32 v1+50         ; bin: heap_oob 0f b6 79 32
-    ; asm: movzbl -50(%rsi), %edx
-    [-,%rdx]            v27 = uload8.i32 v2-50         ; bin: heap_oob 0f b6 56 ce
-    ; asm: movsbl 50(%rcx), %edi
-    [-,%rdi]            v28 = sload8.i32 v1+50         ; bin: heap_oob 0f be 79 32
-    ; asm: movsbl -50(%rsi), %edx
-    [-,%rdx]            v29 = sload8.i32 v2-50         ; bin: heap_oob 0f be 56 ce
-
-    ; Register-indirect with 32-bit signed displacement.
-
-    ; asm: movl 50000(%rcx), %edi
-    [-,%rdi]            v30 = load.i32 v1+50000           ; bin: heap_oob 8b b9 0000c350
-    ; asm: movl -50000(%rsi), %edx
-    [-,%rdx]            v31 = load.i32 v2-50000           ; bin: heap_oob 8b 96 ffff3cb0
-    ; asm: movzwl 50000(%rcx), %edi
-    [-,%rdi]            v32 = uload16.i32 v1+50000        ; bin: heap_oob 0f b7 b9 0000c350
-    ; asm: movzwl -50000(%rsi), %edx
-    [-,%rdx]            v33 = uload16.i32 v2-50000        ; bin: heap_oob 0f b7 96 ffff3cb0
-    ; asm: movswl 50000(%rcx), %edi
-    [-,%rdi]            v34 = sload16.i32 v1+50000        ; bin: heap_oob 0f bf b9 0000c350
-    ; asm: movswl -50000(%rsi), %edx
-    [-,%rdx]            v35 = sload16.i32 v2-50000        ; bin: heap_oob 0f bf 96 ffff3cb0
-    ; asm: movzbl 50000(%rcx), %edi
-    [-,%rdi]            v36 = uload8.i32 v1+50000         ; bin: heap_oob 0f b6 b9 0000c350
-    ; asm: movzbl -50000(%rsi), %edx
-    [-,%rdx]            v37 = uload8.i32 v2-50000         ; bin: heap_oob 0f b6 96 ffff3cb0
-    ; asm: movsbl 50000(%rcx), %edi
-    [-,%rdi]            v38 = sload8.i32 v1+50000         ; bin: heap_oob 0f be b9 0000c350
-    ; asm: movsbl -50000(%rsi), %edx
-    [-,%rdx]            v39 = sload8.i32 v2-50000         ; bin: heap_oob 0f be 96 ffff3cb0
-
-    ; Integer Register Operations.
-
-    ; asm: notl %ecx
-    [-,%rcx]             v4000 = bnot v1       ; bin: f7 d1
-    ; asm: notl %esi
-    [-,%rsi]             v4001 = bnot v2       ; bin: f7 d6
-    ; asm: notl %r10d
-    [-,%r10]             v4002 = bnot v3       ; bin: 41 f7 d2
-
-    ; Integer Register-Register Operations.
-
-    ; asm: addl %esi, %ecx
-    [-,%rcx]             v40 = iadd v1, v2       ; bin: 01 f1
-    ; asm: addl %r10d, %esi
-    [-,%rsi]             v41 = iadd v2, v3       ; bin: 44 01 d6
-    ; asm: addl %ecx, %r10d
-    [-,%r10]             v42 = iadd v3, v1       ; bin: 41 01 ca
-
-    ; asm: subl %esi, %ecx
-    [-,%rcx]             v50 = isub v1, v2       ; bin: 29 f1
-    ; asm: subl %r10d, %esi
-    [-,%rsi]             v51 = isub v2, v3       ; bin: 44 29 d6
-    ; asm: subl %ecx, %r10d
-    [-,%r10]             v52 = isub v3, v1       ; bin: 41 29 ca
-
-    ; asm: andl %esi, %ecx
-    [-,%rcx]             v60 = band v1, v2       ; bin: 21 f1
-    ; asm: andl %r10d, %esi
-    [-,%rsi]             v61 = band v2, v3       ; bin: 44 21 d6
-    ; asm: andl %ecx, %r10d
-    [-,%r10]             v62 = band v3, v1       ; bin: 41 21 ca
-
-    ; asm: orl %esi, %ecx
-    [-,%rcx]             v70 = bor v1, v2       ; bin: 09 f1
-    ; asm: orl %r10d, %esi
-    [-,%rsi]             v71 = bor v2, v3       ; bin: 44 09 d6
-    ; asm: orl %ecx, %r10d
-    [-,%r10]             v72 = bor v3, v1       ; bin: 41 09 ca
-
-    ; asm: xorl %esi, %ecx
-    [-,%rcx]             v80 = bxor v1, v2       ; bin: 31 f1
-    ; asm: xorl %r10d, %esi
-    [-,%rsi]             v81 = bxor v2, v3       ; bin: 44 31 d6
-    ; asm: xorl %ecx, %r10d
-    [-,%r10]             v82 = bxor v3, v1       ; bin: 41 31 ca
-
-    ; asm: shll %cl, %esi
-    [-,%rsi]             v90 = ishl v2, v1       ; bin: d3 e6
-    ; asm: shll %cl, %r10d
-    [-,%r10]             v91 = ishl v3, v1       ; bin: 41 d3 e2
-    ; asm: sarl %cl, %esi
-    [-,%rsi]             v92 = sshr v2, v1       ; bin: d3 fe
-    ; asm: sarl %cl, %r10d
-    [-,%r10]             v93 = sshr v3, v1       ; bin: 41 d3 fa
-    ; asm: shrl %cl, %esi
-    [-,%rsi]             v94 = ushr v2, v1       ; bin: d3 ee
-    ; asm: shrl %cl, %r10d
-    [-,%r10]             v95 = ushr v3, v1       ; bin: 41 d3 ea
-
-    ; asm: roll %cl, %esi
-    [-,%rsi]             v96 = rotl v2, v1       ; bin: d3 c6
-    ; asm: roll %cl, %r10d
-    [-,%r10]             v97 = rotl v3, v1       ; bin: 41 d3 c2
-    ; asm: rorl %cl, %esi
-    [-,%rsi]             v98 = rotr v2, v1       ; bin: d3 ce
-    ; asm: rorl %cl, %r10d
-    [-,%r10]             v99 = rotr v3, v1       ; bin: 41 d3 ca
-
-    ; Integer Register-Immediate Operations.
-    ; These 64-bit ops all use a 32-bit immediate that is sign-extended to 64 bits.
-    ; Some take 8-bit immediates that are sign-extended to 64 bits.
-
-    ; asm: addl $-100000, %ecx
-    [-,%rcx]     v100 = iadd_imm v1, -100000     ; bin: 81 c1 fffe7960
-    ; asm: addl $100000, %esi
-    [-,%rsi]     v101 = iadd_imm v2, 100000      ; bin: 81 c6 000186a0
-    ; asm: addl $0x7fffffff, %r10d
-    [-,%r10]     v102 = iadd_imm v3, 0x7fff_ffff ; bin: 41 81 c2 7fffffff
-    ; asm: addl $100, %r8d
-    [-,%r8]      v103 = iadd_imm v4, 100         ; bin: 41 83 c0 64
-    ; asm: addl $-100, %r14d
-    [-,%r14]     v104 = iadd_imm v5, -100        ; bin: 41 83 c6 9c
-
-    ; asm: andl $-100000, %ecx
-    [-,%rcx]     v110 = band_imm v1, -100000     ; bin: 81 e1 fffe7960
-    ; asm: andl $100000, %esi
-    [-,%rsi]     v111 = band_imm v2, 100000      ; bin: 81 e6 000186a0
-    ; asm: andl $0x7fffffff, %r10d
-    [-,%r10]     v112 = band_imm v3, 0x7fff_ffff ; bin: 41 81 e2 7fffffff
-    ; asm: andl $100, %r8d
-    [-,%r8]      v113 = band_imm v4, 100         ; bin: 41 83 e0 64
-    ; asm: andl $-100, %r14d
-    [-,%r14]     v114 = band_imm v5, -100        ; bin: 41 83 e6 9c
-
-    ; asm: orl $-100000, %ecx
-    [-,%rcx]     v120 = bor_imm v1, -100000      ; bin: 81 c9 fffe7960
-    ; asm: orl $100000, %esi
-    [-,%rsi]     v121 = bor_imm v2, 100000       ; bin: 81 ce 000186a0
-    ; asm: orl $0x7fffffff, %r10d
-    [-,%r10]     v122 = bor_imm v3, 0x7fff_ffff  ; bin: 41 81 ca 7fffffff
-    ; asm: orl $100, %r8d
-    [-,%r8]      v123 = bor_imm v4, 100          ; bin: 41 83 c8 64
-    ; asm: orl $-100, %r14d
-    [-,%r14]     v124 = bor_imm v5, -100         ; bin: 41 83 ce 9c
-    ; asm: ret
-
-    ; asm: xorl $-100000, %ecx
-    [-,%rcx]     v130 = bxor_imm v1, -100000     ; bin: 81 f1 fffe7960
-    ; asm: xorl $100000, %esi
-    [-,%rsi]     v131 = bxor_imm v2, 100000      ; bin: 81 f6 000186a0
-    ; asm: xorl $0x7fffffff, %r10d
-    [-,%r10]     v132 = bxor_imm v3, 0x7fff_ffff ; bin: 41 81 f2 7fffffff
-    ; asm: xorl $100, %r8d
-    [-,%r8]      v133 = bxor_imm v4, 100         ; bin: 41 83 f0 64
-    ; asm: xorl $-100, %r14d
-    [-,%r14]     v134 = bxor_imm v5, -100        ; bin: 41 83 f6 9c
-
-    ; Register copies.
-
-    ; asm: movl %esi, %ecx
-    [-,%rcx]             v140 = copy v2          ; bin: 89 f1
-    ; asm: movl %r10d, %esi
-    [-,%rsi]             v141 = copy v3          ; bin: 44 89 d6
-    ; asm: movl %ecx, %r10d
-    [-,%r10]             v142 = copy v1          ; bin: 41 89 ca
-
-    ; More arithmetic.
-
-    ; asm: imull %esi, %ecx
-    [-,%rcx]             v150 = imul v1, v2       ; bin: 0f af ce
-    ; asm: imull %r10d, %esi
-    [-,%rsi]             v151 = imul v2, v3       ; bin: 41 0f af f2
-    ; asm: imull %ecx, %r10d
-    [-,%r10]             v152 = imul v3, v1       ; bin: 44 0f af d1
-
-    [-,%rax]      v160 = iconst.i32 1
-    [-,%rdx]      v161 = iconst.i32 2
-    ; asm: idivl %ecx
-    [-,%rax,%rdx] v162, v163 = x86_sdivmodx v160, v161, v1  ; bin: int_divz f7 f9
-    ; asm: idivl %esi
-    [-,%rax,%rdx] v164, v165 = x86_sdivmodx v160, v161, v2  ; bin: int_divz f7 fe
-    ; asm: idivl %r10d
-    [-,%rax,%rdx] v166, v167 = x86_sdivmodx v160, v161, v3  ; bin: int_divz 41 f7 fa
-    ; asm: divl %ecx
-    [-,%rax,%rdx] v168, v169 = x86_udivmodx v160, v161, v1  ; bin: int_divz f7 f1
-    ; asm: divl %esi
-    [-,%rax,%rdx] v170, v171 = x86_udivmodx v160, v161, v2  ; bin: int_divz f7 f6
-    ; asm: divl %r10d
-    [-,%rax,%rdx] v172, v173 = x86_udivmodx v160, v161, v3  ; bin: int_divz 41 f7 f2
-
-    ; Bit-counting instructions.
-
-    ; asm: popcntl %esi, %ecx
-    [-,%rcx]            v200 = popcnt v2         ; bin: f3 0f b8 ce
-    ; asm: popcntl %r10d, %esi
-    [-,%rsi]            v201 = popcnt v3         ; bin: f3 41 0f b8 f2
-    ; asm: popcntl %ecx, %r10d
-    [-,%r10]            v202 = popcnt v1         ; bin: f3 44 0f b8 d1
-
-    ; asm: lzcntl %esi, %ecx
-    [-,%rcx]            v203 = clz v2            ; bin: f3 0f bd ce
-    ; asm: lzcntl %r10d, %esi
-    [-,%rsi]            v204 = clz v3            ; bin: f3 41 0f bd f2
-    ; asm: lzcntl %ecx, %r10d
-    [-,%r10]            v205 = clz v1            ; bin: f3 44 0f bd d1
-
-    ; asm: tzcntl %esi, %ecx
-    [-,%rcx]            v206 = ctz v2            ; bin: f3 0f bc ce
-    ; asm: tzcntl %r10d, %esi
-    [-,%rsi]            v207 = ctz v3            ; bin: f3 41 0f bc f2
-    ; asm: tzcntl %ecx, %r10d
-    [-,%r10]            v208 = ctz v1            ; bin: f3 44 0f bc d1
-
-    ; Integer comparisons.
-
-    ; asm: cmpl %esi, %ecx
-    ; asm: sete %bl
-    [-,%rbx]            v300 = icmp eq v1, v2   ; bin: 39 f1 0f 94 c3
-    ; asm: cmpl %r10d, %esi
-    ; asm: sete %dl
-    [-,%rdx]            v301 = icmp eq v2, v3   ; bin: 44 39 d6 0f 94 c2
-
-    ; asm: cmpl %esi, %ecx
-    ; asm: setne %bl
-    [-,%rbx]            v302 = icmp ne v1, v2   ; bin: 39 f1 0f 95 c3
-    ; asm: cmpl %r10d, %esi
-    ; asm: setne %dl
-    [-,%rdx]            v303 = icmp ne v2, v3   ; bin: 44 39 d6 0f 95 c2
-
-    ; asm: cmpl %esi, %ecx
-    ; asm: setl %bl
-    [-,%rbx]            v304 = icmp slt v1, v2  ; bin: 39 f1 0f 9c c3
-    ; asm: cmpl %r10d, %esi
-    ; asm: setl %dl
-    [-,%rdx]            v305 = icmp slt v2, v3  ; bin: 44 39 d6 0f 9c c2
-
-    ; asm: cmpl %esi, %ecx
-    ; asm: setge %bl
-    [-,%rbx]            v306 = icmp sge v1, v2  ; bin: 39 f1 0f 9d c3
-    ; asm: cmpl %r10d, %esi
-    ; asm: setge %dl
-    [-,%rdx]            v307 = icmp sge v2, v3  ; bin: 44 39 d6 0f 9d c2
-
-    ; asm: cmpl %esi, %ecx
-    ; asm: setg %bl
-    [-,%rbx]            v308 = icmp sgt v1, v2  ; bin: 39 f1 0f 9f c3
-    ; asm: cmpl %r10d, %esi
-    ; asm: setg %dl
-    [-,%rdx]            v309 = icmp sgt v2, v3  ; bin: 44 39 d6 0f 9f c2
-
-    ; asm: cmpl %esi, %ecx
-    ; asm: setle %bl
-    [-,%rbx]            v310 = icmp sle v1, v2  ; bin: 39 f1 0f 9e c3
-    ; asm: cmpl %r10d, %esi
-    ; asm: setle %dl
-    [-,%rdx]            v311 = icmp sle v2, v3  ; bin: 44 39 d6 0f 9e c2
-
-    ; asm: cmpl %esi, %ecx
-    ; asm: setb %bl
-    [-,%rbx]            v312 = icmp ult v1, v2  ; bin: 39 f1 0f 92 c3
-    ; asm: cmpl %r10d, %esi
-    ; asm: setb %dl
-    [-,%rdx]            v313 = icmp ult v2, v3  ; bin: 44 39 d6 0f 92 c2
-
-    ; asm: cmpl %esi, %ecx
-    ; asm: setae %bl
-    [-,%rbx]            v314 = icmp uge v1, v2  ; bin: 39 f1 0f 93 c3
-    ; asm: cmpl %r10d, %esi
-    ; asm: setae %dl
-    [-,%rdx]            v315 = icmp uge v2, v3  ; bin: 44 39 d6 0f 93 c2
-
-    ; asm: cmpl %esi, %ecx
-    ; asm: seta %bl
-    [-,%rbx]            v316 = icmp ugt v1, v2  ; bin: 39 f1 0f 97 c3
-    ; asm: cmpl %r10d, %esi
-    ; asm: seta %dl
-    [-,%rdx]            v317 = icmp ugt v2, v3  ; bin: 44 39 d6 0f 97 c2
-
-    ; asm: cmpl %esi, %ecx
-    ; asm: setbe %bl
-    [-,%rbx]            v318 = icmp ule v1, v2  ; bin: 39 f1 0f 96 c3
-    ; asm: cmpl %r10d, %esi
-    ; asm: setbe %dl
-    [-,%rdx]            v319 = icmp ule v2, v3  ; bin: 44 39 d6 0f 96 c2
-
-    ; asm: cmpl $37, %ecx
-    ; asm: setl %bl
-    [-,%rbx]            v320 = icmp_imm slt v1, 37  ; bin: 83 f9 25 0f 9c c3
-
-    ; asm: cmpl $100000, %ecx
-    ; asm: setl %bl
-    [-,%rbx]            v321 = icmp_imm slt v1, 100000 ; bin: 81 f9 000186a0 0f 9c c3
-
-    ; Bool-to-int conversions.
-
-    ; asm: movzbl %bl, %ecx
-    [-,%rcx]             v350 = bint.i32 v300   ; bin: 0f b6 cb
-    ; asm: movzbl %dl, %esi
-    [-,%rsi]             v351 = bint.i32 v301   ; bin: 0f b6 f2
-
-    ; Spill / Fill.
-
-    ; asm: movl %ecx, 1032(%rsp)
-    [-,ss1]             v500 = spill v1         ; bin: stk_ovf 89 8c 24 00000408
-    ; asm: movl %esi, 1032(%rsp)
-    [-,ss1]             v501 = spill v2         ; bin: stk_ovf 89 b4 24 00000408
-    ; asm: movl %r10d, 1032(%rsp)
-    [-,ss1]             v502 = spill v3         ; bin: stk_ovf 44 89 94 24 00000408
-
-    ; asm: movl 1032(%rsp), %ecx
-    [-,%rcx]            v510 = fill v500        ; bin: 8b 8c 24 00000408
-    ; asm: movl 1032(%rsp), %esi
-    [-,%rsi]            v511 = fill v501        ; bin: 8b b4 24 00000408
-    ; asm: movl 1032(%rsp), %r10d
-    [-,%r10]            v512 = fill v502        ; bin: 44 8b 94 24 00000408
-
-    ; asm: movl %ecx, 1032(%rsp)
-    regspill v1, %rcx -> ss1                    ; bin: stk_ovf 89 8c 24 00000408
-    ; asm: movl 1032(%rsp), %ecx
-    regfill v1, ss1 -> %rcx                     ; bin: 8b 8c 24 00000408
-
-    ; asm: cmpl %esi, %ecx
-    [-,%rflags]         v520 = ifcmp v1, v2      ; bin: 39 f1
-    ; asm: cmpl %r10d, %esi
-    [-,%rflags]         v521 = ifcmp v2, v3      ; bin: 44 39 d6
-
-    ; asm: cmpl $-100, %ecx
-    [-,%rflags]         v522 = ifcmp_imm v1, -100   ; bin: 83 f9 9c
-    ; asm: cmpl $100, %r10d
-    [-,%rflags]         v523 = ifcmp_imm v3, 100    ; bin: 41 83 fa 64
-
-    ; asm: cmpl $-10000, %ecx
-    [-,%rflags]         v524 = ifcmp_imm v1, -10000 ; bin: 81 f9 ffffd8f0
-    ; asm: cmpl $10000, %r10d
-    [-,%rflags]         v525 = ifcmp_imm v3, 10000  ; bin: 41 81 fa 00002710
-
-    ; asm: shll $2, %esi
-    [-,%rsi]             v526 = ishl_imm v2, 2    ; bin: c1 e6 02
-    ; asm: shll $12, %r10d
-    [-,%r10]             v527 = ishl_imm v3, 12   ; bin: 41 c1 e2 0c
-    ; asm: sarl $5, %esi
-    [-,%rsi]             v529 = sshr_imm v2, 5    ; bin: c1 fe 05
-    ; asm: sarl $32, %r10d
-    [-,%r10]             v530 = sshr_imm v3, 32   ; bin: 41 c1 fa 20
-    ; asm: shrl $8, %esi
-    [-,%rsi]             v532 = ushr_imm v2, 8    ; bin: c1 ee 08
-    ; asm: shrl $31, %r10d
-    [-,%r10]             v533 = ushr_imm v3, 31   ; bin: 41 c1 ea 1f
-
-    ; asm: testl %ecx, %ecx
-    ; asm: je block1x
-    brz v1, block1                                ; bin: 85 c9 74 18
-    fallthrough block3
-
-block3:
-    ; asm: testl %esi, %esi
-    ; asm: je block1x
-    brz v2, block1                                ; bin: 85 f6 74 14
-    fallthrough block4
-
-block4:
-    ; asm: testl %r10d, %r10d
-    ; asm: je block1x
-    brz v3, block1                                ; bin: 45 85 d2 74 0f
-    fallthrough block5
-
-block5:
-    ; asm: testl %ecx, %ecx
-    ; asm: jne block1x
-    brnz v1, block1                               ; bin: 85 c9 75 0b
-    fallthrough block6
-
-block6:
-    ; asm: testl %esi, %esi
-    ; asm: jne block1x
-    brnz v2, block1                               ; bin: 85 f6 75 07
-    fallthrough block7
-
-block7:
-    ; asm: testl %r10d, %r10d
-    ; asm: jne block1x
-    brnz v3, block1                               ; bin: 45 85 d2 75 02
-
-    ; asm: jmp block2x
-    jump block2                                   ; bin: eb 01
-
-    ; asm: block1x:
-block1:
-    return                                      ; bin: c3
-
-    ; asm: block2x:
-block2:
-    ; Add a no-op instruction to prevent fold_redundant_jump from removing this block.
-    ; asm: notl %ecx
-    [-,%rcx]             v5000 = bnot v1        ; bin: f7 d1
-    jump block1                                   ; bin: eb fb
-
-}
-
-; Tests for i32/i8 conversion instructions.
-function %I32_I8() {
-block0:
-    [-,%rcx]            v1 = iconst.i32 1
-    [-,%rsi]            v2 = iconst.i32 2
-    [-,%r10]            v3 = iconst.i32 3
-
-    [-,%rcx]            v11 = ireduce.i8 v1             ; bin:
-    [-,%rsi]            v12 = ireduce.i8 v2             ; bin:
-    [-,%r10]            v13 = ireduce.i8 v3             ; bin:
-
-    ; asm: movsbl %cl, %esi
-    [-,%rsi]            v20 = sextend.i32 v11           ; bin: 0f be f1
-    ; asm: movsbl %sil, %r10d
-    [-,%r10]            v21 = sextend.i32 v12           ; bin: 44 0f be d6
-    ; asm: movsbl %r10b, %ecx
-    [-,%rcx]            v22 = sextend.i32 v13           ; bin: 41 0f be ca
-
-    ; asm: movzbl %cl, %esi
-    [-,%rsi]            v30 = uextend.i32 v11           ; bin: 0f b6 f1
-    ; asm: movzbl %sil, %r10d
-    [-,%r10]            v31 = uextend.i32 v12           ; bin: 44 0f b6 d6
-    ; asm: movzbl %r10b, %ecx
-    [-,%rcx]            v32 = uextend.i32 v13           ; bin: 41 0f b6 ca
-
-    trap user0                                          ; bin: user0 0f 0b
-}
-
-; Tests for i32/i16 conversion instructions.
-function %I32_I16() {
-block0:
-    [-,%rcx]            v1 = iconst.i32 1
-    [-,%rsi]            v2 = iconst.i32 2
-    [-,%r10]            v3 = iconst.i32 3
-
-    [-,%rcx]            v11 = ireduce.i16 v1            ; bin:
-    [-,%rsi]            v12 = ireduce.i16 v2            ; bin:
-    [-,%r10]            v13 = ireduce.i16 v3            ; bin:
-
-    ; asm: movswl %cx, %esi
-    [-,%rsi]            v20 = sextend.i32 v11           ; bin: 0f bf f1
-    ; asm: movswl %si, %r10d
-    [-,%r10]            v21 = sextend.i32 v12           ; bin: 44 0f bf d6
-    ; asm: movswl %r10w, %ecx
-    [-,%rcx]            v22 = sextend.i32 v13           ; bin: 41 0f bf ca
-
-    ; asm: movzwl %cx, %esi
-    [-,%rsi]            v30 = uextend.i32 v11           ; bin: 0f b7 f1
-    ; asm: movzwl %si, %r10d
-    [-,%r10]            v31 = uextend.i32 v12           ; bin: 44 0f b7 d6
-    ; asm: movzwl %r10w, %ecx
-    [-,%rcx]            v32 = uextend.i32 v13           ; bin: 41 0f b7 ca
-
-    trap user0                                          ; bin: user0 0f 0b
-}
-
-; Tests for i64/i8 conversion instructions.
-function %I64_I8() {
-block0:
-    [-,%rcx]            v1 = iconst.i64 1
-    [-,%rsi]            v2 = iconst.i64 2
-    [-,%r10]            v3 = iconst.i64 3
-
-    [-,%rcx]            v11 = ireduce.i8 v1             ; bin:
-    [-,%rsi]            v12 = ireduce.i8 v2             ; bin:
-    [-,%r10]            v13 = ireduce.i8 v3             ; bin:
-
-    ; asm: movsbq %cl, %rsi
-    [-,%rsi]            v20 = sextend.i64 v11           ; bin: 48 0f be f1
-    ; asm: movsbq %sil, %r10
-    [-,%r10]            v21 = sextend.i64 v12           ; bin: 4c 0f be d6
-    ; asm: movsbq %r10b, %rcx
-    [-,%rcx]            v22 = sextend.i64 v13           ; bin: 49 0f be ca
-
-    ; asm: movzbl %cl, %esi
-    [-,%rsi]            v30 = uextend.i64 v11           ; bin: 0f b6 f1
-    ; asm: movzbl %sil, %r10d
-    [-,%r10]            v31 = uextend.i64 v12           ; bin: 44 0f b6 d6
-    ; asm: movzbl %r10b, %ecx
-    [-,%rcx]            v32 = uextend.i64 v13           ; bin: 41 0f b6 ca
-
-    trap user0                                          ; bin: user0 0f 0b
-}
-
-; Tests for i64/i16 conversion instructions.
-function %I64_I16() {
-block0:
-    [-,%rcx]            v1 = iconst.i64 1
-    [-,%rsi]            v2 = iconst.i64 2
-    [-,%r10]            v3 = iconst.i64 3
-
-    [-,%rcx]            v11 = ireduce.i16 v1            ; bin:
-    [-,%rsi]            v12 = ireduce.i16 v2            ; bin:
-    [-,%r10]            v13 = ireduce.i16 v3            ; bin:
-
-    ; asm: movswq %cx, %rsi
-    [-,%rsi]            v20 = sextend.i64 v11           ; bin: 48 0f bf f1
-    ; asm: movswq %si, %r10
-    [-,%r10]            v21 = sextend.i64 v12           ; bin: 4c 0f bf d6
-    ; asm: movswq %r10w, %rcx
-    [-,%rcx]            v22 = sextend.i64 v13           ; bin: 49 0f bf ca
-
-    ; asm: movzwl %cx, %esi
-    [-,%rsi]            v30 = uextend.i64 v11           ; bin: 0f b7 f1
-    ; asm: movzwl %si, %r10d
-    [-,%r10]            v31 = uextend.i64 v12           ; bin: 44 0f b7 d6
-    ; asm: movzwl %r10w, %ecx
-    [-,%rcx]            v32 = uextend.i64 v13           ; bin: 41 0f b7 ca
-
-    trap user0                                          ; bin: user0 0f 0b
-}
-
-; Tests for i64/i32 conversion instructions.
-function %I64_I32() {
-block0:
-    [-,%rcx]            v1 = iconst.i64 1
-    [-,%rsi]            v2 = iconst.i64 2
-    [-,%r10]            v3 = iconst.i64 3
-
-    [-,%rcx]            v11 = ireduce.i32 v1            ; bin:
-    [-,%rsi]            v12 = ireduce.i32 v2            ; bin:
-    [-,%r10]            v13 = ireduce.i32 v3            ; bin:
-
-    ; asm: movslq %ecx, %rsi
-    [-,%rsi]            v20 = sextend.i64 v11           ; bin: 48 63 f1
-    ; asm: movslq %esi, %r10
-    [-,%r10]            v21 = sextend.i64 v12           ; bin: 4c 63 d6
-    ; asm: movslq %r10d, %rcx
-    [-,%rcx]            v22 = sextend.i64 v13           ; bin: 49 63 ca
-
-    ; asm: movl %ecx, %esi
-    [-,%rsi]            v30 = uextend.i64 v11           ; bin: 89 ce
-    ; asm: movl %esi, %r10d
-    [-,%r10]            v31 = uextend.i64 v12           ; bin: 41 89 f2
-    ; asm: movl %r10d, %ecx
-    [-,%rcx]            v32 = uextend.i64 v13           ; bin: 44 89 d1
-
-    trap user0                                          ; bin: user0 0f 0b
-}
-
-; Tests for i64 jump table instructions.
-function %I64_JT(i64 [%rdi]) {
-    jt0 = jump_table [block1, block2, block3]
-
-block0(v0: i64 [%rdi]):
-    ; Note: The next two lines will need to change whenever instructions are
-    ;        added or removed from this test.
-    [-, %rax]           v1 = jump_table_base.i64 jt0    ; bin: 48 8d 05 00000039 PCRelRodata4(jt0)
-    [-, %r10]           v2 = jump_table_base.i64 jt0    ; bin: 4c 8d 15 00000032 PCRelRodata4(jt0)
-
-    [-, %rbx]           v10 = iconst.i64 1
-    [-, %r13]           v11 = iconst.i64 2
-
-    [-, %rax]           v20 = jump_table_entry.i64 v10, v1, 4, jt0      ; bin: 48 63 04 98
-    [-, %rax]           v21 = jump_table_entry.i64 v10, v2, 4, jt0      ; bin: 49 63 04 9a
-    [-, %rax]           v22 = jump_table_entry.i64 v11, v1, 4, jt0      ; bin: 4a 63 04 a8
-    [-, %rax]           v23 = jump_table_entry.i64 v11, v2, 4, jt0      ; bin: 4b 63 04 aa
-
-    [-, %r10]           v30 = jump_table_entry.i64 v10, v1, 4, jt0      ; bin: 4c 63 14 98
-    [-, %r10]           v31 = jump_table_entry.i64 v10, v2, 4, jt0      ; bin: 4d 63 14 9a
-    [-, %r10]           v32 = jump_table_entry.i64 v11, v1, 4, jt0      ; bin: 4e 63 14 a8
-    [-, %r10]           v33 = jump_table_entry.i64 v11, v2, 4, jt0      ; bin: 4f 63 14 aa
-
-    fallthrough block10
-
-block10:
-    indirect_jump_table_br v10, jt0             ; bin: ff e3
-block11:
-    indirect_jump_table_br v11, jt0             ; bin: 41 ff e5
-
-block1:
-    fallthrough block2
-block2:
-    fallthrough block3
-block3:
-    trap user0
-}
-
-function %r12_r13_loads() {
-block0:
-    [-,%r12]            v1 = iconst.i64 0x0123_4567_89ab_cdef
-    [-,%r13]            v2 = iconst.i64 0xfedc_ba98_7654_3210
-    [-,%rax]            v3 = iconst.i64 0x1
-
-    ;; Simple GPR load.
-    ; asm: movq (%r12), %rdx
-    [-,%rdx]            v4 = load.i64 notrap v1 ; bin: 49 8b 14 24
-    ; asm: movq (%r13), %rdx
-    [-,%rdx]            v5 = load.i64 notrap v2 ; bin: 49 8b 55 00
-
-    ;; Load with disp8.
-    ; asm: movq 0x1(%r12), %rdx
-    [-,%rdx]            v6 = load.i64 notrap v1+1 ; bin: 49 8b 54 24 01
-    ; asm: movq 0x1(%r13), %rdx
-    [-,%rdx]            v7 = load.i64 notrap v2+1 ; bin: 49 8b 55 01
-
-    ;; Load with disp32.
-    ; asm: movq 0x100(%r12), %rdx
-    [-,%rdx]            v8 = load.i64 notrap v1+256 ; bin: 49 8b 94 24 00000100
-    ; asm: movq 0x100(%r13), %rdx
-    [-,%rdx]            v9 = load.i64 notrap v2+256 ; bin: 49 8b 95 00000100
-
-    ;; Load for base+index.
-    ; asm: movq (%r12, %rax, 1), %rdx
-    [-,%rdx]            v10 = load_complex.i64 notrap v1+v3 ; bin: 49 8b 14 04
-    ; asm: movq (%r13, %rax, 1), %rdx
-    [-,%rdx]            v11 = load_complex.i64 notrap v2+v3 ; bin: 49 8b 54 05 00
-
-    ;; Now for FP values.
-    ; asm: movss (%r12), %xmm0
-    [-,%xmm0]            v12 = load.f32 notrap v1 ; bin: f3 41 0f 10 04 24
-    ; asm: movss (%r13), %xmm0
-    [-,%xmm0]            v13 = load.f32 notrap v2 ; bin: f3 41 0f 10 45 00
-
-    ;; Load with disp8.
-    ; asm: movss 0x1(%r12), %xmm0
-    [-,%xmm0]            v14 = load.f32 notrap v1+1 ; bin: f3 41 0f 10 44 24 01
-    ; asm: movss 0x1(%r13), %xmm0
-    [-,%xmm0]            v15 = load.f32 notrap v2+1 ; bin: f3 41 0f 10 45 01
-
-    ;; Load with disp32.
-    ; asm: movss 0x100(%r12), %xmm0
-    [-,%xmm0]            v16 = load.f32 notrap v1+256 ; bin: f3 41 0f 10 84 24 00000100
-    ; asm: movss 0x100(%r13), %xmm0
-    [-,%xmm0]            v17 = load.f32 notrap v2+256 ; bin: f3 41 0f 10 85 00000100
-
-    ;; Load for base+index.
-    ; asm: movss (%r12, %rax, 1), %xmm0
-    [-,%xmm0]            v18 = load_complex.f32 notrap v1+v3 ; bin: f3 41 0f 10 04 04
-    ; asm: movss (%r13, %rax, 1), %xmm0
-    [-,%xmm0]            v19 = load_complex.f32 notrap v2+v3 ; bin: f3 41 0f 10 44 05 00
-
-    return
-}
-
-function %r12_r13_stores() {
-block0:
-    [-,%r12]            v1 = iconst.i64 0x0123_4567_89ab_cdef
-    [-,%r13]            v2 = iconst.i64 0xfedc_ba98_7654_3210
-    [-,%rax]            v3 = iconst.i64 0x1
-    [-,%xmm0]           v4 = f32const 0x1.0
-
-    ;; Simple GPR load.
-    ; asm: movq %rax, (%r12)
-    store notrap v3, v1; bin: 49 89 04 24
-    ; asm: movq (%r13), %rdx
-    store notrap v3, v2; bin: 49 89 45 00
-
-    ; asm: movq %rax, 0x1(%r12)
-    store notrap v3, v1+1; bin: 49 89 44 24 01
-    ; asm: movq %rax, 0x1(%r13)
-    store notrap v3, v2+1; bin: 49 89 45 01
-
-    ; asm: movq %rax, 0x100(%r12)
-    store notrap v3, v1+256; bin: 49 89 84 24 00000100
-    ; asm: movq %rax, 0x100(%r13)
-    store notrap v3, v2+256; bin: 49 89 85 00000100
-
-    ; asm: movq %rax, (%r12, %rax, 1)
-    store_complex notrap v3, v1+v3; bin: 49 89 04 04
-    ; asm: movq %rax, (%r13, %rax, 1)
-    store_complex notrap v3, v2+v3; bin: 49 89 44 05 00
-
-    ; asm: movb %al, (%r12)
-    istore8 notrap v3, v1; bin: 41 88 04 24
-    ; asm: movb %al, (%r13)
-    istore8 notrap v3, v2; bin: 41 88 45 00
-
-    ; asm: movb %al, 0x1(%r12)
-    istore8 notrap v3, v1+1; bin: 41 88 44 24 01
-    ; asm: movb %al, 0x1(%r13)
-    istore8 notrap v3, v2+1; bin: 41 88 45 01
-
-    ; asm: movb %al, 0x100(%r12)
-    istore8 notrap v3, v1+256; bin: 41 88 84 24 00000100
-    ; asm: movb %al, 0x100(%r13)
-    istore8 notrap v3, v2+256; bin: 41 88 85 00000100
-
-    ; asm: movb %al, (%r12, %rax, 1)
-    istore8_complex notrap v3, v1+v3; bin: 41 88 04 04
-    ; asm: movb %al, (%r13, %rax, 1)
-    istore8_complex notrap v3, v2+v3; bin: 41 88 44 05 00
-
-    ; asm: movss %xmm0, (%r12)
-    store notrap v4, v1; bin: f3 41 0f 11 04 24
-    ; asm: movss %xmm0, (%r13)
-    store notrap v4, v2; bin: f3 41 0f 11 45 00
-
-    ; asm: movss %xmm0, 0x1(%r12)
-    store notrap v4, v1+1; bin: f3 41 0f 11 44 24 01
-    ; asm: movss %xmm0, 0x1(%r13)
-    store notrap v4, v2+1; bin: f3 41 0f 11 45 01
-
-    ; asm: movss %xmm0, 0x100(%r12)
-    store notrap v4, v1+256; bin: f3 41 0f 11 84 24 00000100
-    ; asm: movss %xmm0, 0x100(%r13)
-    store notrap v4, v2+256; bin: f3 41 0f 11 85 00000100
-
-    ; asm: movss %xmm0, (%r12, %rax, 1)
-    store_complex notrap v4, v1+v3; bin: f3 41 0f 11 04 04
-    ; asm: movss %xmm0, (%r13, %rax, 1)
-    store_complex notrap v4, v2+v3; bin: f3 41 0f 11 44 05 00
-
-    return
-}
-
-function %B64() {
-block0:
-    [-, %rax]   v1 = bconst.b64 true    ; bin: 40 b8 00000001
-    [-, %r10]   v0 = bconst.b64 true    ; bin: 41 ba 00000001
-    return
-}
-
-function %V128() {
-block0:
-    [-,%r10]            v3 = iconst.i64 0x2102_0304_f1f2_f3f4  ; bin: 49 ba 21020304f1f2f3f4
-    [-, %xmm9]          v4 = vconst.i32x4 [0 1 2 3]  ; bin: 44 0f 10 0d 0000000f PCRelRodata4(33)
-                        store v4, v3  ; bin: heap_oob 45 0f 11 0a
-
-    [-, %r11]           v5 = iconst.i64 0x1234
-    [-, %xmm2]          v6 = load.i32x4 v5  ; bin: heap_oob 41 0f 10 13
-                        return
-}
diff --git a/cranelift/filetests/filetests/isa/x86/br-i128.clif b/cranelift/filetests/filetests/isa/x86/br-i128.clif
deleted file mode 100644
index fccc691aa3..0000000000
--- a/cranelift/filetests/filetests/isa/x86/br-i128.clif
+++ /dev/null
@@ -1,42 +0,0 @@
-test compile
-target x86_64 legacy
-
-function u0:0(i128) -> i8 fast {
-block0(v0: i128):
-    brz v0, block2
-    ; check: v0 = iconcat v3, v4
-    ; nextln: v5 = icmp_imm eq v3, 0
-    ; nextln: v6 = icmp_imm eq v4, 0
-    ; nextln: v7 = band v5, v6
-    ; nextln: brnz v7, block2
-    jump block1
-
-block1:
-    v1 = iconst.i8 0
-    return v1
-
-block2:
-    v2 = iconst.i8 1
-    return v2
-}
-
-function u0:1(i128) -> i8 fast {
-block0(v0: i128):
-    brnz v0, block2
-    ; check: v0 = iconcat v3, v4
-    ; nextln: brnz v3, block2
-    ; nextln: fallthrough block3
-
-    ; check: block3:
-    ; nextln: brnz.i64 v4, block2
-    jump block1
-    ; nextln: fallthrough block1
-
-block1:
-    v1 = iconst.i8 0
-    return v1
-
-block2:
-    v2 = iconst.i8 1
-    return v2
-}
diff --git a/cranelift/filetests/filetests/isa/x86/brz-i8.clif b/cranelift/filetests/filetests/isa/x86/brz-i8.clif
deleted file mode 100644
index fda005bc81..0000000000
--- a/cranelift/filetests/filetests/isa/x86/brz-i8.clif
+++ /dev/null
@@ -1,38 +0,0 @@
-test compile
-target x86_64 legacy
-
-function u0:0() -> b1 {
-block0:
-    v0 = iconst.i8 0
-    ; check: v0 = iconst.i8 0
-    brz v0, block1
-    ; nextln: v3 = uextend.i32 v0
-    ; nextln: brz v3, block1
-    jump block2
-
-block1:
-    v1 = bconst.b1 true
-    return v1
-
-block2:
-    v2 = bconst.b1 false
-    return v2
-}
-
-function u0:1() -> b1 {
-block0:
-    v0 = iconst.i8 0
-    ; check: v0 = iconst.i8 0
-    brnz v0, block1
-    ; nextln: v3 = uextend.i32 v0
-    ; nextln: brnz v3, block1
-    jump block2
-
-block1:
-    v1 = bconst.b1 false
-    return v1
-
-block2:
-    v2 = bconst.b1 true
-    return v2
-}
diff --git a/cranelift/filetests/filetests/isa/x86/brz-x86_32-i64.clif b/cranelift/filetests/filetests/isa/x86/brz-x86_32-i64.clif
deleted file mode 100644
index eb537d7c1a..0000000000
--- a/cranelift/filetests/filetests/isa/x86/brz-x86_32-i64.clif
+++ /dev/null
@@ -1,36 +0,0 @@
-test compile
-target i686 legacy
-
-function u0:0(i32, i32) -> i32 {
-block0(v0: i32, v1: i32):
-    v2 = iconcat v0, v1
-    ; check:      v6 = fill v0
-    ; nextln:     v3 = icmp_imm eq v6, 0
-    ; nextln:     v7 = fill v1
-    ; nextln:     v4 = icmp_imm eq v7, 0
-    ; nextln:     v5 = band v3, v4
-    ; nextln:     brnz v5, block1
-    brz v2, block1
-    jump block2
-block1:
-    trap unreachable
-block2:
-    trap unreachable
-}
-
-function u0:1(i32, i32) -> i32 {
-block0(v0: i32, v1: i32):
-    v2 = iconcat v0, v1
-    ; check:      v3 = fill v0
-    ; nextln:     brnz v3, block1
-    ; nextln:     fallthrough block3
-    ; check:  block3:
-    ; nextln:     v4 = fill.i32 v1
-    ; nextln:     brnz v4, block1
-    brnz v2, block1
-    jump block2
-block1:
-    trap unreachable
-block2:
-    trap unreachable
-}
diff --git a/cranelift/filetests/filetests/isa/x86/extend-i128.clif b/cranelift/filetests/filetests/isa/x86/extend-i128.clif
deleted file mode 100644
index e7da3f0387..0000000000
--- a/cranelift/filetests/filetests/isa/x86/extend-i128.clif
+++ /dev/null
@@ -1,37 +0,0 @@
-test compile
-target x86_64 legacy
-
-function u0:0() -> b1 {
-block0:
-    v0 = iconst.i64 0xffff_ffff_eeee_0000
-    ; check: v0 = iconst.i64 0xffff_ffff_eeee_0000
-    ; nextln: v2 -> v0
-    v1 = uextend.i128 v0
-    ; nextln: v7 = iconst.i64 0
-    ; nextln: v3 -> v7
-    ; nextln: v1 = iconcat v0, v7
-
-    v2, v3 = isplit v1
-    v4 = icmp_imm eq v2, 0xffff_ffff_eeee_0000
-    v5 = icmp_imm eq v3, 0
-
-    v6 = band v4, v5
-    return v6
-}
-
-function u0:1() -> b1 {
-block0:
-    v0 = iconst.i64 0xffff_ffff_eeee_0000
-    ; check: v0 = iconst.i64 0xffff_ffff_eeee_0000
-    ; nextln: v2 -> v0
-    v1 = sextend.i128 v0
-    ; nextln: v8 = copy v0
-    ; nextln: v7 = sshr_imm v8, 63
-    ; nextln: v3 -> v7
-
-    v2, v3 = isplit v1
-    v4 = icmp_imm eq v2, 0xffff_ffff_eeee_0000
-    v5 = icmp_imm eq v3, 0xffff_ffff_ffff_ffff
-    v6 = band v4, v5
-    return v6
-}
diff --git a/cranelift/filetests/filetests/isa/x86/extend-i64.clif b/cranelift/filetests/filetests/isa/x86/extend-i64.clif
deleted file mode 100644
index a3d892c488..0000000000
--- a/cranelift/filetests/filetests/isa/x86/extend-i64.clif
+++ /dev/null
@@ -1,37 +0,0 @@
-test compile
-target i686 legacy
-
-function u0:0() -> b1 {
-block0:
-    v0 = iconst.i32 0xffff_ee00
-    ; check: v0 = iconst.i32 0xffff_ee00
-    ; nextln: v2 -> v0
-    v1 = uextend.i64 v0
-    ; nextln: v7 = iconst.i32 0
-    ; nextln: v3 -> v7
-    ; nextln: v1 = iconcat v0, v7
-
-    v2, v3 = isplit v1
-    v4 = icmp_imm eq v2, 0xffff_ee00
-    v5 = icmp_imm eq v3, 0
-
-    v6 = band v4, v5
-    return v6
-}
-
-function u0:1() -> b1 {
-block0:
-    v0 = iconst.i32 0xffff_ee00
-    ; check: v0 = iconst.i32 0xffff_ee00
-    ; nextln: v2 -> v0
-    v1 = sextend.i64 v0
-    ; nextln: v10 = copy v0
-    ; nextln: v7 = sshr_imm v10, 31
-    ; nextln: v3 -> v7
-
-    v2, v3 = isplit v1
-    v4 = icmp_imm eq v2, 0xffff_ee00
-    v5 = icmp_imm eq v3, 0xffff_ffff
-    v6 = band v4, v5
-    return v6
-}
diff --git a/cranelift/filetests/filetests/isa/x86/floating-point-zero-constants-32bit.clif b/cranelift/filetests/filetests/isa/x86/floating-point-zero-constants-32bit.clif
deleted file mode 100644
index 3bc9adf5bc..0000000000
--- a/cranelift/filetests/filetests/isa/x86/floating-point-zero-constants-32bit.clif
+++ /dev/null
@@ -1,17 +0,0 @@
-; Check that floating-point and integer constants equal to zero are optimized correctly.
-test binemit
-target i686 legacy
-
-function %foo() -> f32 fast {
-block0:
-  ; asm: xorps %xmm0, %xmm0
-  [-,%xmm0]    v0 = f32const 0.0     ; bin: 0f 57 c0
-  return v0
-}
-
-function %bar() -> f64 fast {
-block0:
-  ; asm: xorpd %xmm0, %xmm0
-  [-,%xmm0]    v1 = f64const 0.0     ; bin: 66 0f 57 c0
-  return v1
-}
diff --git a/cranelift/filetests/filetests/isa/x86/floating-point-zero-constants.clif b/cranelift/filetests/filetests/isa/x86/floating-point-zero-constants.clif
deleted file mode 100644
index 6fff51c7b1..0000000000
--- a/cranelift/filetests/filetests/isa/x86/floating-point-zero-constants.clif
+++ /dev/null
@@ -1,31 +0,0 @@
-; Check that floating-point constants equal to zero are optimized correctly.
-test binemit
-target x86_64 legacy
-
-function %zero_const_32bit_no_rex() -> f32 fast {
-block0:
-  ; asm: xorps %xmm0, %xmm0
-  [-,%xmm0]    v0 = f32const 0.0     ; bin: 40 0f 57 c0
-  return v0
-}
-
-function %zero_const_32bit_rex() -> f32 fast {
-block0:
-  ; asm: xorps %xmm8, %xmm8
-  [-,%xmm8]    v1 = f32const 0.0     ; bin: 45 0f 57 c0
-  return v1
-}
-
-function %zero_const_64bit_no_rex() -> f64 fast {
-block0:
-  ; asm: xorpd %xmm0, %xmm0
-  [-,%xmm0]    v0 = f64const 0.0     ; bin: 66 40 0f 57 c0
-  return v0
-}
-
-function %zero_const_64bit_rex() -> f64 fast {
-block0:
-  ; asm: xorpd %xmm8, %xmm8
-  [-,%xmm8]    v1 = f64const 0.0     ; bin: 66 45 0f 57 c0
-  return v1
-}
diff --git a/cranelift/filetests/filetests/isa/x86/i128-isplit-forward-jump.clif b/cranelift/filetests/filetests/isa/x86/i128-isplit-forward-jump.clif
deleted file mode 100644
index eda7b6dffd..0000000000
--- a/cranelift/filetests/filetests/isa/x86/i128-isplit-forward-jump.clif
+++ /dev/null
@@ -1,25 +0,0 @@
-test compile
-target x86_64 legacy
-
-function u0:0() -> i128 system_v {
-block0:
-    v0 = iconst.i64 0
-    v1 = iconst.i64 0
-    v2 = iconcat v0, v1
-    jump block5
-
-block2:
-    jump block4(v27)
-
-block4(v23: i128):
-    return v23
-
-block5:
-    v27 = bxor.i128 v2, v2
-    v32 = iconst.i32 0
-    brz v32, block2
-    jump block6
-
-block6:
-    trap user0
-}
diff --git a/cranelift/filetests/filetests/isa/x86/i128.clif b/cranelift/filetests/filetests/isa/x86/i128.clif
deleted file mode 100644
index b171c0ccfd..0000000000
--- a/cranelift/filetests/filetests/isa/x86/i128.clif
+++ /dev/null
@@ -1,46 +0,0 @@
-test compile
-target x86_64 legacy
-
-function u0:0(i64, i64) -> i128 fast {
-block0(v0: i64, v1: i64):
-;check: block0(v0: i64 [%rdi], v1: i64 [%rsi], v3: i64 [%rbp]):
-
-    v2 = iconcat.i64 v0, v1
-    ; check: regmove v0, %rdi -> %rax
-    ; check: regmove v1, %rsi -> %rdx
-
-    return v2
-    ; check: v4 = x86_pop.i64
-    ; check: return v0, v1, v4
-}
-
-function u0:1(i128) -> i64, i64 fast {
-block0(v0: i128):
-; check: block0(v3: i64 [%rdi], v4: i64 [%rsi], v5: i64 [%rbp]):
-
-    v1, v2 = isplit v0
-    ; check: regmove v3, %rdi -> %rax
-    ; check: regmove v4, %rsi -> %rdx
-
-    return v1, v2
-    ; check: v6 = x86_pop.i64
-    ; check: return v3, v4, v6
-}
-
-function u0:2(i64, i128) fast {
-; check: block0(v0: i64 [%rdi], v2: i64 [%rsi], v3: i64 [%rdx], v6: i64 [%rbp]):
-block0(v0: i64, v1: i128):
-    ; check: store v2, v0+8
-    ; check: store v3, v0+16
-    store v1, v0+8
-    return
-}
-
-function u0:3(i64) -> i128 fast {
-block0(v0: i64):
-    ; check: v2 = load.i64 v0+8
-    ; check: v3 = load.i64 v0+16
-    v1 = load.i128 v0+8
-    ; check: return v2, v3, v5
-    return v1
-}
diff --git a/cranelift/filetests/filetests/isa/x86/ireduce-i16-to-i8.clif b/cranelift/filetests/filetests/isa/x86/ireduce-i16-to-i8.clif
deleted file mode 100644
index dd75cac4a1..0000000000
--- a/cranelift/filetests/filetests/isa/x86/ireduce-i16-to-i8.clif
+++ /dev/null
@@ -1,8 +0,0 @@
-test compile
-target x86_64 legacy
-
-function u0:0(i16) -> i8 fast {
-block0(v0: i16):
-    v1 = ireduce.i8 v0
-    return v1
-}
diff --git a/cranelift/filetests/filetests/isa/x86/isplit-not-legalized-twice.clif b/cranelift/filetests/filetests/isa/x86/isplit-not-legalized-twice.clif
deleted file mode 100644
index 9aedb61001..0000000000
--- a/cranelift/filetests/filetests/isa/x86/isplit-not-legalized-twice.clif
+++ /dev/null
@@ -1,20 +0,0 @@
-test compile
-target x86_64 legacy
-
-function u0:0(i64, i64) -> i128 system_v {
-block0(v0: i64, v1: i64):
-    trap user0
-
-block30:
-    v245 = iconst.i64 0
-    v246 = iconcat v245, v245
-    ; The next instruction used to be legalized twice, causing a panic the second time.
-    v250, v251 = isplit.i128 v370
-    v252, v253 = isplit v246
-    trap user0
-
-block45:
-    v369 = iconst.i64 0
-    v370 = load.i128 v369
-    trap user0
-}
diff --git a/cranelift/filetests/filetests/isa/x86/isub_imm-i8.clif b/cranelift/filetests/filetests/isa/x86/isub_imm-i8.clif
deleted file mode 100644
index 948fa34d99..0000000000
--- a/cranelift/filetests/filetests/isa/x86/isub_imm-i8.clif
+++ /dev/null
@@ -1,14 +0,0 @@
-test compile
-set opt_level=speed_and_size
-target x86_64 legacy
-
-function u0:0(i8) -> i8 fast {
-block0(v0: i8):
-    v1 = iconst.i8 0
-    v2 = isub v1, v0
-    ; check:  uextend.i32
-    ; nextln: iconst.i32
-    ; nextln: isub
-    ; nextln: ireduce.i8
-    return v2
-}
diff --git a/cranelift/filetests/filetests/isa/x86/jump_i128_param_unused.clif b/cranelift/filetests/filetests/isa/x86/jump_i128_param_unused.clif
deleted file mode 100644
index a08356ca53..0000000000
--- a/cranelift/filetests/filetests/isa/x86/jump_i128_param_unused.clif
+++ /dev/null
@@ -1,10 +0,0 @@
-test compile
-target x86_64 legacy
-
-function u0:0(i128) system_v {
-block0(v0: i128):
-    jump block1(v0)
-
-block1(v1: i128):
-    return
-}
diff --git a/cranelift/filetests/filetests/isa/x86/legalize-bint-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-bint-i8.clif
deleted file mode 100644
index 9d88db9d17..0000000000
--- a/cranelift/filetests/filetests/isa/x86/legalize-bint-i8.clif
+++ /dev/null
@@ -1,10 +0,0 @@
-test compile
-
-target x86_64 legacy
-
-function u0:0() -> i8 fast {
-block0:
-    v14 = bconst.b1 false
-    v15 = bint.i8 v14
-    return v15
-}
diff --git a/cranelift/filetests/filetests/isa/x86/legalize-bnot.clif b/cranelift/filetests/filetests/isa/x86/legalize-bnot.clif
deleted file mode 100644
index acdd21c9f0..0000000000
--- a/cranelift/filetests/filetests/isa/x86/legalize-bnot.clif
+++ /dev/null
@@ -1,28 +0,0 @@
-test compile
-
-target x86_64 legacy
-
-function u0:51(i64, i64) system_v {
-    ss0 = explicit_slot 0
-    ss1 = explicit_slot 1
-    ss2 = explicit_slot 1
-    ss3 = explicit_slot 1
-
-block0(v0: i64, v1: i64):
-    v2 = stack_addr.i64 ss1
-    v3 = load.i8 v1
-    store v3, v2
-    v4 = stack_addr.i64 ss2
-    v5 = stack_addr.i64 ss3
-    jump block1
-
-block1:
-    v6 = load.i8 v2
-    store v6, v5
-    v7 = load.i8 v5
-    v8 = bnot v7
-    store v8, v4
-    v9 = load.i8 v4
-    store v9, v0
-    return
-}
diff --git a/cranelift/filetests/filetests/isa/x86/legalize-br-icmp.clif b/cranelift/filetests/filetests/isa/x86/legalize-br-icmp.clif
deleted file mode 100644
index f64108531c..0000000000
--- a/cranelift/filetests/filetests/isa/x86/legalize-br-icmp.clif
+++ /dev/null
@@ -1,46 +0,0 @@
-test legalizer
-
-target x86_64 legacy
-
-function %br_icmp(i64) fast {
-block0(v0: i64):
-    v1 = iconst.i64 0
-    br_icmp eq v0, v1, block1
-    jump block1
-
-block1:
-    return
-}
-
-; sameln: function %br_icmp(i64 [%rdi]) fast {
-; nextln:                                 block0(v0: i64):
-; nextln: [RexOp1pu_id#b8]                    v1 = iconst.i64 0
-; nextln: [RexOp1icscc#8039]                  v2 = icmp eq v0, v1
-; nextln: [RexOp1t8jccb#75]                   brnz v2, block1
-; nextln: [Op1jmpb#eb]                        jump block1
-; nextln: 
-; nextln:                                 block1:
-; nextln: [Op1ret#c3]                         return
-; nextln: }
-
-
-function %br_icmp_args(i64) fast {
-block0(v0: i64):
-    v1 = iconst.i64 0
-    br_icmp eq v0, v1, block1(v0)
-    jump block1(v0)
-
-block1(v2: i64):
-    return
-}
-
-; sameln: function %br_icmp_args(i64 [%rdi]) fast {
-; nextln:                                 block0(v0: i64):
-; nextln: [RexOp1pu_id#b8]                    v1 = iconst.i64 0
-; nextln: [RexOp1icscc#8039]                  v3 = icmp eq v0, v1
-; nextln: [RexOp1t8jccb#75]                   brnz v3, block1(v0)
-; nextln: [Op1jmpb#eb]                        jump block1(v0)
-; nextln: 
-; nextln:                                 block1(v2: i64):
-; nextln: [Op1ret#c3]                         return
-; nextln: }
diff --git a/cranelift/filetests/filetests/isa/x86/legalize-br-table.clif b/cranelift/filetests/filetests/isa/x86/legalize-br-table.clif
deleted file mode 100644
index c931d6cacf..0000000000
--- a/cranelift/filetests/filetests/isa/x86/legalize-br-table.clif
+++ /dev/null
@@ -1,31 +0,0 @@
-test compile
-set opt_level=speed_and_size
-target x86_64 legacy
-; regex: V=v\d+
-; regex: BB=block\d+
-
-function u0:0(i64) system_v {
-    ss0 = explicit_slot 1
-    jt0 = jump_table [block1]
-
-block0(v0: i64):
-    v1 = stack_addr.i64 ss0
-    v2 = load.i8 v1
-    br_table v2, block2, jt0
-; check:     $(oob=$V) = ifcmp_imm $(idx=$V), 1
-; block2 is replaced by block1 by fold_redundant_jump
-; nextln:    brif uge $oob, block1
-; nextln:    fallthrough $(inb=$BB)
-; check:   $inb:
-; nextln:    $(final_idx=$V) = uextend.i64 $idx
-; nextln:    $(base=$V) = jump_table_base.i64 jt0
-; nextln:    $(rel_addr=$V) = jump_table_entry $final_idx, $base, 4, jt0
-; nextln:    $(addr=$V) = iadd $base, $rel_addr
-; nextln:    indirect_jump_table_br $addr, jt0
-
-block2:
-    jump block1
-
-block1:
-    return
-}
diff --git a/cranelift/filetests/filetests/isa/x86/legalize-byte-ops-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-byte-ops-i8.clif
deleted file mode 100644
index 7c135d54ae..0000000000
--- a/cranelift/filetests/filetests/isa/x86/legalize-byte-ops-i8.clif
+++ /dev/null
@@ -1,36 +0,0 @@
-test compile
-target x86_64 legacy
-
-; regex: V=v\d+
-
-function u0:0(i8, i8) fast {
-fn0 = %black_box(i8)
-ss0 = explicit_slot 1 ; black box
-
-block0(v0: i8, v1: i8):
-    v99 = stack_addr.i64 ss0
-
-    ; check: istore8 $(V), $(V)
-
-    v2 = band v0, v1
-    store v2, v99
-    v3 = bor v0, v1
-    store v3, v99
-    v4 = bxor v0, v1
-    store v4, v99
-    v5 = bnot v0
-    store v5, v99
-    v6 = band_not v0, v1
-    store v6, v99
-    v7 = bor_not v0, v1
-    store v7, v99
-    v8 = bxor_not v0, v1
-    store v8, v99
-    v9 = band_imm v0, 42
-    store v9, v99
-    v10 = bor_imm v0, 42
-    store v10, v99
-    v11 = bxor_imm v0, 42
-    store v11, v99
-    return
-}
diff --git a/cranelift/filetests/filetests/isa/x86/legalize-call.clif b/cranelift/filetests/filetests/isa/x86/legalize-call.clif
deleted file mode 100644
index b21099281e..0000000000
--- a/cranelift/filetests/filetests/isa/x86/legalize-call.clif
+++ /dev/null
@@ -1,14 +0,0 @@
-; Test legalization of a non-colocated call in 64-bit non-PIC mode.
-test legalizer
-set opt_level=speed_and_size
-target x86_64 legacy haswell
-
-function %call() {
-    fn0 = %foo()
-block0:
-    call fn0()
-    return
-}
-
-; check:  v0 = func_addr.i64 fn0
-; nextln: call_indirect sig0, v0()
diff --git a/cranelift/filetests/filetests/isa/x86/legalize-clz-ctz-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-clz-ctz-i8.clif
deleted file mode 100644
index af5e158b07..0000000000
--- a/cranelift/filetests/filetests/isa/x86/legalize-clz-ctz-i8.clif
+++ /dev/null
@@ -1,25 +0,0 @@
-test compile
-target x86_64 legacy
-
-; regex: V=v\d+
-
-function u0:0(i8) -> i8, i8 fast {
-block0(v0: i8):
-    v1 = clz v0
-    ; check: v3 = uextend.i32 v0
-    ; nextln: v6 = iconst.i32 -1
-    ; nextln: v7 = iconst.i32 31
-    ; nextln: v8, v9 = x86_bsr v3
-    ; nextln: v10 = selectif.i32 eq v9, v6, v8
-    ; nextln: v4 = isub v7, v10
-    ; nextln: v5 = iadd_imm v4, -24
-    ; nextln: v1 = ireduce.i8 v5
-    v2 = ctz v0
-    ; nextln: v11 = uextend.i32 v0
-    ; nextln: v12 = bor_imm v11, 256
-    ; nextln: v14 = iconst.i32 32
-    ; nextln: v15, v16 = x86_bsf v12
-    ; nextln: v13 = selectif.i32 eq v16, v14, v15
-    ; nextln: v2 = ireduce.i8 v13
-    return v1, v2
-}
diff --git a/cranelift/filetests/filetests/isa/x86/legalize-custom.clif b/cranelift/filetests/filetests/isa/x86/legalize-custom.clif
deleted file mode 100644
index 0c51e064dd..0000000000
--- a/cranelift/filetests/filetests/isa/x86/legalize-custom.clif
+++ /dev/null
@@ -1,133 +0,0 @@
-; Test the custom legalizations.
-test legalizer
-target i686 legacy
-target x86_64 legacy
-
-; regex: V=v\d+
-; regex: BB=block\d+
-
-function %cond_trap(i32) {
-block0(v1: i32):
-    trapz v1, user67
-    return
-    ; check: block0(v1: i32
-    ; nextln: $(f=$V) = ifcmp_imm v1, 0
-    ; nextln: trapif eq $f, user67
-    ; nextln: return
-}
-
-function %cond_trap2(i32) {
-block0(v1: i32):
-    trapnz v1, int_ovf
-    return
-    ; check: block0(v1: i32
-    ; nextln: $(f=$V) = ifcmp_imm v1, 0
-    ; nextln: trapif ne $f, int_ovf
-    ; nextln: return
-}
-
-function %cond_trap_b1(i32) {
-block0(v1: i32):
-    v2 = icmp_imm eq v1, 6
-    trapz v2, user7
-    return
-    ; check: block0(v1: i32
-    ; check: brnz v2, $(new=$BB)
-    ; check: jump $(trap=$BB)
-    ; check: $trap:
-    ; nextln: trap user7
-    ; check: $new:
-    ; nextln: return
-}
-
-function %cond_trap2_b1(i32) {
-block0(v1: i32):
-    v2 = icmp_imm eq v1, 6
-    trapnz v2, user9
-    return
-    ; check: block0(v1: i32
-    ; check: brz v2, $(new=$BB)
-    ; check: jump $(trap=$BB)
-    ; check: $trap:
-    ; nextln: trap user9
-    ; check: $new:
-    ; nextln: return
-}
-
-function %f32const() -> f32 {
-block0:
-    v1 = f32const 0x1.0p1
-    ; check: $(tmp=$V) = iconst.i32
-    ; check: v1 = bitcast.f32 $tmp
-    return v1
-}
-
-function %select_f64(f64, f64, i32) -> f64 {
-block0(v0: f64, v1: f64, v2: i32):
-    v3 = select v2, v0, v1
-    ; check:  brnz v2, $(new=$BB)(v0)
-    ; nextln: jump $new(v1)
-    ; check: $new(v3: f64):
-    ; nextln: return v3
-    return v3
-}
-
-function %f32_min(f32, f32) -> f32 {
-block0(v0: f32, v1: f32):
-    v2 = fmin v0, v1
-    return v2
-    ; check: $(vnat=$V) = x86_fmin.f32 v0, v1
-    ; nextln: jump $(done=$BB)($vnat)
-
-    ; check: $(uno=$BB):
-    ; nextln: $(vuno=$V) = fadd.f32 v0, v1
-    ; nextln: jump $(done=$BB)($vuno)
-
-    ; check: $(ueq=$BB):
-    ; check: $(veq=$V) = bor.f32 v0, v1
-    ; nextln: jump $(done=$BB)($veq)
-
-    ; check: $done(v2: f32):
-    ; nextln: return v2
-}
-
-function %ineg_legalized_i8() {
-block0:
-    v0 = iconst.i8 1
-    v1 = ineg v0
-    ; check: v2 = iconst.i32 1
-    ; nextln: v0 = ireduce.i8 v2
-    ; nextln: v3 = iconst.i8 0
-    ; nextln: v4 = uextend.i32 v3
-    ; nextln: v5 = uextend.i32 v0
-    ; nextln: v6 = isub v4, v5
-    ; nextln: v1 = ireduce.i8 v6
-
-    return
-}
-
-function %ineg_legalized_i16() {
-block0:
-    v0 = iconst.i16 1
-    v1 = ineg v0
-    ; check: v2 = iconst.i32 1
-    ; nextln: v0 = ireduce.i16 v2
-    ; nextln: v3 = iconst.i16 0
-    ; nextln: v4 = uextend.i32 v3
-    ; nextln: v5 = uextend.i32 v0
-    ; nextln: v6 = isub v4, v5
-    ; nextln: v1 = ireduce.i16 v6
-
-    return
-}
-
-function %ineg_legalized_i32() {
-block0:
-    v0 = iconst.i32 1
-    v1 = ineg v0
-    ; check: v0 = iconst.i32 1
-    ; nextln: v2 = iconst.i32 0
-    ; nextln: v1 = isub v2, v0
-
-    return
-}
diff --git a/cranelift/filetests/filetests/isa/x86/legalize-div-traps.clif b/cranelift/filetests/filetests/isa/x86/legalize-div-traps.clif
deleted file mode 100644
index 9e579c1bcd..0000000000
--- a/cranelift/filetests/filetests/isa/x86/legalize-div-traps.clif
+++ /dev/null
@@ -1,192 +0,0 @@
-; Test the division legalizations.
-test legalizer
-; See also legalize-div.clif.
-set avoid_div_traps=1
-target x86_64 legacy
-
-; regex: V=v\d+
-; regex: BB=block\d+
-
-function %udiv(i64, i64) -> i64 {
-block0(v0: i64, v1: i64):
-    ; check: block0(
-    v2 = udiv v0, v1
-    ; nextln: $(fz=$V) = ifcmp_imm v1, 0
-    ; nextln: trapif eq $fz, int_divz
-    ; nextln: $(hi=$V) = iconst.i64 0
-    ; nextln: $(d=$V), $(r=$V) = x86_udivmodx v0, $hi, v1
-    return v2
-    ; nextln: return $d
-}
-
-function %udiv_0(i64) -> i64 {
-block0(v0: i64):
-    ; check: block0(
-    v1 = iconst.i64 0
-    ; nextln: v1 = iconst.i64 0
-    v2 = udiv v0, v1
-    ; nextln: $(fz=$V) = ifcmp_imm v1, 0
-    ; nextln: trapif eq $fz, int_divz
-    ; nextln: $(hi=$V) = iconst.i64 0
-    ; nextln: $(d=$V), $(r=$V) = x86_udivmodx v0, $hi, v1
-    return v2
-    ; nextln: return $d
-}
-
-function %udiv_minus_1(i64) -> i64 {
-block0(v0: i64):
-    ; check: block0(
-    v1 = iconst.i64 -1
-    ; nextln: v1 = iconst.i64 -1
-    v2 = udiv v0, v1
-    ; nextln: $(hi=$V) = iconst.i64 0
-    ; nextln: $(d=$V), $(r=$V) = x86_udivmodx v0, $hi, v1
-    return v2
-    ; nextln: return $d
-}
-
-function %urem(i64, i64) -> i64 {
-block0(v0: i64, v1: i64):
-    ; check: block0(
-    v2 = urem v0, v1
-    ; nextln: $(fz=$V) = ifcmp_imm v1, 0
-    ; nextln: trapif eq $fz, int_divz
-    ; nextln: $(hi=$V) = iconst.i64 0
-    ; nextln: $(d=$V), $(r=$V) = x86_udivmodx v0, $hi, v1
-    return v2
-    ; nextln: return $r
-}
-
-function %urem_0(i64) -> i64 {
-block0(v0: i64):
-    ; check: block0(
-    v1 = iconst.i64 0
-    ; nextln: v1 = iconst.i64 0
-    v2 = urem v0, v1
-    ; nextln: $(fz=$V) = ifcmp_imm v1, 0
-    ; nextln: trapif eq $fz, int_divz
-    ; nextln: $(hi=$V) = iconst.i64 0
-    ; nextln: $(d=$V), $(r=$V) = x86_udivmodx v0, $hi, v1
-    return v2
-    ; nextln: return $r
-}
-
-function %urem_minus_1(i64) -> i64 {
-block0(v0: i64):
-    ; check: block0(
-    v1 = iconst.i64 -1
-    ; nextln: v1 = iconst.i64 -1
-    v2 = urem v0, v1
-    ; nextln: $(hi=$V) = iconst.i64 0
-    ; nextln: $(d=$V), $(r=$V) = x86_udivmodx v0, $hi, v1
-    return v2
-    ; nextln: return $r
-}
-
-function %sdiv(i64, i64) -> i64 {
-block0(v0: i64, v1: i64):
-    ; check: block0(
-    v2 = sdiv v0, v1
-    ; nextln: $(fz=$V) = ifcmp_imm v1, 0
-    ; nextln: trapif eq $fz, int_divz
-    ; nextln: $(fm1=$V) = ifcmp_imm v1, -1
-    ; nextln: brif eq $fm1, $(m1=$BB)
-    ; check: $(hi=$V) = sshr_imm
-    ; nextln: $(q=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1
-    ; nextln: jump $(done=$BB)($q)
-    ; check: $m1:
-    ; nextln: $(imin=$V) = iconst.i64 0x8000_0000_0000_0000
-    ; nextln: $(fm=$V) = ifcmp.i64 v0, $imin
-    ; nextln: trapif eq $fm, int_ovf
-    ; check: $done(v2: i64):
-    return v2
-    ; nextln: return v2
-}
-
-function %sdiv_0(i64) -> i64 {
-block0(v0: i64):
-    ; check: block0(
-    v1 = iconst.i64 0
-    ; nextln: v1 = iconst.i64 0
-    v2 = sdiv v0, v1
-    ; nextln: $(fz=$V) = ifcmp_imm v1, 0
-    ; nextln: trapif eq $fz, int_divz
-    ; check: $(hi=$V) = sshr_imm
-    ; nextln: $(q=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1
-    return v2
-    ; nextln: return v2
-}
-
-function %sdiv_minus_1(i64) -> i64 {
-block0(v0: i64):
-    ; check: block0(
-    v1 = iconst.i64 -1
-    ; nextln: v1 = iconst.i64 -1
-    v2 = sdiv v0, v1
-    ; nextln: $(fm1=$V) = ifcmp_imm v1, -1
-    ; nextln: brif eq $fm1, $(m1=$BB)
-    ; check: $(hi=$V) = sshr_imm
-    ; nextln: $(q=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1
-    ; nextln: jump $(done=$BB)($q)
-    ; check: $m1:
-    ; nextln: $(imin=$V) = iconst.i64 0x8000_0000_0000_0000
-    ; nextln: $(fm=$V) = ifcmp.i64 v0, $imin
-    ; nextln: trapif eq $fm, int_ovf
-    ; check: $done(v2: i64):
-    return v2
-    ; nextln: return v2
-}
-
-; The srem expansion needs to special-case x % -1 since x86_sdivmodx traps on INT_MIN/-1.
-; TODO: Add more explicit pattern matching once we've cleaned up the ifcmp+brif pattern.
-function %srem(i64, i64) -> i64 {
-block0(v0: i64, v1: i64):
-    ; check: block0(
-    v2 = srem v0, v1
-    ; nextln: $(fz=$V) = ifcmp_imm v1, 0
-    ; nextln: trapif eq $fz, int_divz
-    ; nextln: $(fm1=$V) = ifcmp_imm v1, -1
-    ; nextln: brif eq $fm1, $(m1=$BB)
-    ; check: $(hi=$V) = sshr_imm
-    ; nextln: $(d=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1
-    ; nextln: jump $(done=$BB)($r)
-    ; check: $m1:
-    ; nextln: $(zero=$V) = iconst.i64 0
-    ; nextln: jump $(done=$BB)($zero)
-    ; check: $done(v2: i64):
-    return v2
-    ; nextln: return v2
-}
-
-function %srem_0(i64) -> i64 {
-block0(v0: i64):
-    ; check: block0(
-    v1 = iconst.i64 0
-    ; nextln: v1 = iconst.i64 0
-    v2 = srem v0, v1
-    ; nextln: $(fz=$V) = ifcmp_imm v1, 0
-    ; nextln: trapif eq $fz, int_divz
-    ; check: $(hi=$V) = sshr_imm
-    ; nextln: $(d=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1
-    return v2
-    ; nextln: return v2
-}
-
-function %srem_minus_1(i64) -> i64 {
-block0(v0: i64):
-    ; check: block0(
-    v1 = iconst.i64 -1
-    ; nextln: v1 = iconst.i64 -1
-    v2 = srem v0, v1
-    ; nextln: $(fm1=$V) = ifcmp_imm v1, -1
-    ; nextln: brif eq $fm1, $(m1=$BB)
-    ; check: $(hi=$V) = sshr_imm
-    ; nextln: $(d=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1
-    ; nextln: jump $(done=$BB)($r)
-    ; check: $m1:
-    ; nextln: $(zero=$V) = iconst.i64 0
-    ; nextln: jump $(done=$BB)($zero)
-    ; check: $done(v2: i64):
-    return v2
-    ; nextln: return v2
-}
diff --git a/cranelift/filetests/filetests/isa/x86/legalize-div.clif b/cranelift/filetests/filetests/isa/x86/legalize-div.clif
deleted file mode 100644
index b172a9aef3..0000000000
--- a/cranelift/filetests/filetests/isa/x86/legalize-div.clif
+++ /dev/null
@@ -1,57 +0,0 @@
-; Test the division legalizations.
-test legalizer
-; See also legalize-div-traps.clif.
-set avoid_div_traps=0
-target x86_64 legacy
-
-; regex: V=v\d+
-; regex: BB=block\d+
-
-function %udiv(i64, i64) -> i64 {
-block0(v0: i64, v1: i64):
-    ; check: block0(
-    v2 = udiv v0, v1
-    ; nextln: $(hi=$V) = iconst.i64 0
-    ; nextln: $(d=$V), $(r=$V) = x86_udivmodx v0, $hi, v1
-    return v2
-    ; nextln: return $d
-}
-
-function %urem(i64, i64) -> i64 {
-block0(v0: i64, v1: i64):
-    ; check: block0(
-    v2 = urem v0, v1
-    ; nextln: $(hi=$V) = iconst.i64 0
-    ; nextln: $(d=$V), $(r=$V) = x86_udivmodx v0, $hi, v1
-    return v2
-    ; nextln: return $r
-}
-
-function %sdiv(i64, i64) -> i64 {
-block0(v0: i64, v1: i64):
-    ; check: block0(
-    v2 = sdiv v0, v1
-    ; check: $(hi=$V) = sshr_imm
-    ; nextln: $(d=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1
-    return v2
-    ; nextln: return $d
-}
-
-; The srem expansion needs to special-case x % -1 since x86_sdivmodx traps on INT_MIN/-1.
-; TODO: Add more explicit pattern matching once we've cleaned up the ifcmp+brif pattern.
-function %srem(i64, i64) -> i64 {
-block0(v0: i64, v1: i64):
-    ; check: block0(
-    v2 = srem v0, v1
-    ; nextln: $(fm1=$V) = ifcmp_imm v1, -1
-    ; nextln: brif eq $fm1, $(m1=$BB)
-    ; check: $(hi=$V) = sshr_imm
-    ; nextln: $(d=$V), $(r=$V) = x86_sdivmodx v0, $hi, v1
-    ; nextln: jump $(done=$BB)($r)
-    ; check: $m1:
-    ; nextln: $(zero=$V) = iconst.i64 0
-    ; nextln: jump $(done=$BB)($zero)
-    ; check: $done(v2: i64):
-    return v2
-    ; nextln: return v2
-}
diff --git a/cranelift/filetests/filetests/isa/x86/legalize-f64const-x64.clif b/cranelift/filetests/filetests/isa/x86/legalize-f64const-x64.clif
deleted file mode 100644
index 43f57f8372..0000000000
--- a/cranelift/filetests/filetests/isa/x86/legalize-f64const-x64.clif
+++ /dev/null
@@ -1,13 +0,0 @@
-; Test the legalization of f64const.
-test legalizer
-target x86_64 legacy
-
-; regex: V=v\d+
-
-function %f64const() -> f64 {
-block0:
-    v1 = f64const 0x1.0p1
-    ; check: $(tmp=$V) = iconst.i64
-    ; check: v1 = bitcast.f64 $tmp
-    return v1
-}
diff --git a/cranelift/filetests/filetests/isa/x86/legalize-fcvt_from_usint-i16.clif b/cranelift/filetests/filetests/isa/x86/legalize-fcvt_from_usint-i16.clif
deleted file mode 100644
index 32a256c9e7..0000000000
--- a/cranelift/filetests/filetests/isa/x86/legalize-fcvt_from_usint-i16.clif
+++ /dev/null
@@ -1,14 +0,0 @@
-test compile
-target x86_64 legacy
-
-function u0:0(i16) -> f64 fast {
-block0(v0: i16):
-    v1 = fcvt_from_uint.f64 v0
-    return v1
-}
-
-function u0:1(i16) -> f64 fast {
-block0(v0: i16):
-    v1 = fcvt_from_sint.f64 v0
-    return v1
-}
diff --git a/cranelift/filetests/filetests/isa/x86/legalize-heaps.clif b/cranelift/filetests/filetests/isa/x86/legalize-heaps.clif
deleted file mode 100644
index 242a0f8dfa..0000000000
--- a/cranelift/filetests/filetests/isa/x86/legalize-heaps.clif
+++ /dev/null
@@ -1,123 +0,0 @@
-test legalizer
-set enable_heap_access_spectre_mitigation=false
-target x86_64 legacy
-
-; Test legalization for various forms of heap addresses.
-; regex: BB=block\d+
-
-function %heap_addrs(i32, i64, i64 vmctx) {
-    gv4 = vmctx
-    gv0 = iadd_imm.i64 gv4, 64
-    gv1 = iadd_imm.i64 gv4, 72
-    gv2 = iadd_imm.i64 gv4, 80
-    gv3 = load.i64 notrap aligned gv4+88
-
-    heap0 = static gv0, min 0x1_0000, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i32
-    heap1 = static gv0, offset_guard 0x1000, bound 0x1_0000, index_type i32
-    heap2 = static gv0, min 0x1_0000, bound 0x1_0000_0000, offset_guard 0x8000_0000, index_type i64
-    heap3 = static gv0, offset_guard 0x1000, bound 0x1_0000, index_type i64
-    heap4 = dynamic gv1, min 0x1_0000, bound gv3, offset_guard 0x8000_0000, index_type i32
-    heap5 = dynamic gv1, bound gv3, offset_guard 0x1000, index_type i32
-    heap6 = dynamic gv1, min 0x1_0000, bound gv2, offset_guard 0x8000_0000, index_type i64
-    heap7 = dynamic gv1, bound gv2, offset_guard 0x1000, index_type i64
-
-    ; check: heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000, index_type i32
-    ; check: heap1 = static gv0, min 0, bound 0x0001_0000, offset_guard 4096, index_type i32
-    ; check: heap2 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000, index_type i64
-    ; check: heap3 = static gv0, min 0, bound 0x0001_0000, offset_guard 4096, index_type i64
-    ; check: heap4 = dynamic gv1, min 0x0001_0000, bound gv3, offset_guard 0x8000_0000, index_type i32
-    ; check: heap5 = dynamic gv1, min 0, bound gv3, offset_guard 4096, index_type i32
-    ; check: heap6 = dynamic gv1, min 0x0001_0000, bound gv2, offset_guard 0x8000_0000, index_type i64
-    ; check: heap7 = dynamic gv1, min 0, bound gv2, offset_guard 4096, index_type i64
-
-block0(v0: i32, v1: i64, v3: i64):
-    ; The fast-path; 32-bit index, static heap with a sufficient bound, no bounds check needed!
-    v4 = heap_addr.i64 heap0, v0, 0
-    ; check:         v12 = uextend.i64 v0
-    ; check:         v13 = iadd_imm v3, 64
-    ; check:         v4 = iadd v13, v12
-
-    v5 = heap_addr.i64 heap1, v0, 0
-    ; check:         v14 = uextend.i64 v0
-    ; check:         v15 = icmp_imm ugt v14, 0x0001_0000
-    ; check:         brz v15, $(resume_1=$BB)
-    ; nextln:        jump $(trap_1=$BB)
-    ; check:     $trap_1:
-    ; nextln:        trap heap_oob
-    ; check:     $resume_1:
-    ; check:         v16 = iadd_imm.i64 v3, 64
-    ; check:         v5 = iadd v16, v14
-
-    v6 = heap_addr.i64 heap2, v1, 0
-    ; check:         v19 = iconst.i64 0x0001_0000_0000
-    ; check:         v17 = icmp.i64 ugt v1, v19
-    ; check:         brz v17, $(resume_2=$BB)
-    ; nextln:        jump $(trap_2=$BB)
-    ; check:     $trap_2:
-    ; nextln:        trap heap_oob
-    ; check:     $resume_2:
-    ; check:         v18 = iadd_imm.i64 v3, 64
-    ; check:         v6 = iadd v18, v1
-
-    v7 = heap_addr.i64 heap3, v1, 0
-    ; check:         v20 = icmp_imm.i64 ugt v1, 0x0001_0000
-    ; check:         brz v20, $(resume_3=$BB)
-    ; nextln:        jump $(trap_3=$BB)
-    ; check:     $trap_3:
-    ; nextln:        trap heap_oob
-    ; check:     $resume_3:
-    ; check:         v21 = iadd_imm.i64 v3, 64
-    ; check:         v7 = iadd v21, v1
-
-    v8 = heap_addr.i64 heap4, v0, 0
-    ; check:         v22 = uextend.i64 v0
-    ; check:         v23 = load.i64 notrap aligned v3+88
-    ; check:         v24 = iadd_imm v23, 0
-    ; check:         v25 = icmp ugt v22, v24
-    ; check:         brz v25, $(resume_4=$BB)
-    ; nextln:        jump $(trap_4=$BB)
-    ; check:     $trap_4:
-    ; nextln:        trap heap_oob
-    ; check:     $resume_4:
-    ; check:         v26 = iadd_imm.i64 v3, 72
-    ; check:         v8 = iadd v26, v22
-
-    v9 = heap_addr.i64 heap5, v0, 0
-    ; check:         v27 = uextend.i64 v0
-    ; check:         v28 = load.i64 notrap aligned v3+88
-    ; check:         v29 = iadd_imm v28, 0
-    ; check:         v30 = icmp ugt v27, v29
-    ; check:         brz v30, $(resume_5=$BB)
-    ; nextln:        jump $(trap_5=$BB)
-    ; check:     $trap_5:
-    ; nextln:        trap heap_oob
-    ; check:     $resume_5:
-    ; check:         v31 = iadd_imm.i64 v3, 72
-    ; check:         v9 = iadd v31, v27
-
-    v10 = heap_addr.i64 heap6, v1, 0
-    ; check:         v32 = iadd_imm.i64 v3, 80
-    ; check:         v33 = iadd_imm v32, 0
-    ; check:         v34 = icmp.i64 ugt v1, v33
-    ; check:         brz v34, $(resume_6=$BB)
-    ; nextln:        jump $(trap_6=$BB)
-    ; check:     $trap_6:
-    ; nextln:        trap heap_oob
-    ; check:     $resume_6:
-    ; check:         v35 = iadd_imm.i64 v3, 72
-    ; check:         v10 = iadd v35, v1
-
-    v11 = heap_addr.i64 heap7, v1, 0
-    ; check:         v36 = iadd_imm.i64 v3, 80
-    ; check:         v37 = iadd_imm v36, 0
-    ; check:         v38 = icmp.i64 ugt v1, v37
-    ; check:         brz v38, $(resume_7=$BB)
-    ; nextln:        jump $(trap_7=$BB)
-    ; check:     $trap_7:
-    ; nextln:        trap heap_oob
-    ; check:     $resume_7:
-    ; check:         v39 = iadd_imm.i64 v3, 72
-    ; check:         v11 = iadd v39, v1
-
-    return
-}
diff --git a/cranelift/filetests/filetests/isa/x86/legalize-i128.clif b/cranelift/filetests/filetests/isa/x86/legalize-i128.clif
deleted file mode 100644
index 276de82d4e..0000000000
--- a/cranelift/filetests/filetests/isa/x86/legalize-i128.clif
+++ /dev/null
@@ -1,20 +0,0 @@
-; Test the legalization of i128 instructions on x86_64.
-test legalizer
-target x86_64 legacy haswell
-
-; regex: V=v\d+
-
-function %imul(i128, i128) -> i128 {
-block0(v1: i128, v2: i128):
-    v10 = imul v1, v2
-    ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V)
-    ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V)
-    ; nextln: $(v11=$V) = imul $v1_msb, $v2_lsb
-    ; nextln: $(v12=$V) = imul $v1_lsb, $v2_msb
-    ; nextln: $(v13=$V) = iadd $v11, $v12
-    ; nextln: $(v99=$V), $(v14=$V) = x86_umulx $v1_lsb, $v2_lsb
-    ; nextln: $(v10_msb=$V) = iadd $v13, $v14
-    ; nextln: $(v10_lsb=$V) = imul $v1_lsb, $v2_lsb
-    ; nextln: v10 = iconcat $v10_lsb, $v10_msb
-    return v10
-}
diff --git a/cranelift/filetests/filetests/isa/x86/legalize-i64.clif b/cranelift/filetests/filetests/isa/x86/legalize-i64.clif
deleted file mode 100644
index 7e2d381947..0000000000
--- a/cranelift/filetests/filetests/isa/x86/legalize-i64.clif
+++ /dev/null
@@ -1,357 +0,0 @@
-; Test the legalization of i64 instructions on x86_32.
-test legalizer
-target i686 legacy haswell
-
-; regex: V=v\d+
-
-function %iadd(i64, i64) -> i64 {
-block0(v1: i64, v2: i64):
-    v10 = iadd v1, v2
-    ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V)
-    ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V)
-    ; nextln: $(v10_lsb=$V), $(carry=$V) = iadd_ifcout $v1_lsb, $v2_lsb
-    ; nextln: $(v10_msb=$V) = iadd_ifcin $v1_msb, $v2_msb, $carry
-    ; nextln: v10 = iconcat $v10_lsb, $v10_msb
-    return v10
-}
-
-function %isub(i64, i64) -> i64 {
-block0(v1: i64, v2: i64):
-    v10 = isub v1, v2
-    ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V)
-    ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V)
-    ; nextln: $(v10_lsb=$V), $(borrow=$V) = isub_ifbout $v1_lsb, $v2_lsb
-    ; nextln: $(v10_msb=$V) = isub_ifbin $v1_msb, $v2_msb, $borrow
-    ; nextln: v10 = iconcat $v10_lsb, $v10_msb
-    return v10
-}
-
-function %imul(i64, i64) -> i64 {
-block0(v1: i64, v2: i64):
-    v10 = imul v1, v2
-    ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V)
-    ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V)
-    ; nextln: $(v11=$V) = imul $v1_msb, $v2_lsb
-    ; nextln: $(v12=$V) = imul $v1_lsb, $v2_msb
-    ; nextln: $(v13=$V) = iadd $v11, $v12
-    ; nextln: $(v99=$V), $(v14=$V) = x86_umulx $v1_lsb, $v2_lsb
-    ; nextln: $(v10_msb=$V) = iadd $v13, $v14
-    ; nextln: $(v10_lsb=$V) = imul $v1_lsb, $v2_lsb
-    ; nextln: v10 = iconcat $v10_lsb, $v10_msb
-    return v10
-}
-
-function %icmp_eq(i64, i64) -> b1 {
-block0(v1: i64, v2: i64):
-    v10 = icmp eq v1, v2
-    ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V)
-    ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V)
-    ; nextln: $(v10_lsb=$V) = icmp eq $v1_lsb, $v2_lsb
-    ; nextln: $(v10_msb=$V) = icmp eq $v1_msb, $v2_msb
-    ; nextln: v10 = band $v10_lsb, $v10_msb
-    return v10
-}
-
-function %icmp_imm_eq(i64) -> b1 {
-block0(v1: i64):
-    v10 = icmp_imm eq v1, 0
-    ; check: $(v1_lsb=$V) -> $(v1_lsb_a=$V)
-    ; nextln: $(v1_msb=$V) -> $(v1_msb_a=$V)
-    ; nextln: v1 = iconcat $(v1_lsb_a=$V), $(v1_msb_a=$V)
-    ; nextln: $(v2_lsb=$V) = iconst.i32 0
-    ; nextln: $(v2_msb=$V) = iconst.i32 0
-    ; nextln: $(v10_lsb=$V) = icmp eq $v1_lsb, $v2_lsb
-    ; nextln: $(v10_msb=$V) = icmp eq $v1_msb, $v2_msb
-    ; nextln: v10 = band $v10_lsb, $v10_msb
-    return v10
-}
-
-function %icmp_ne(i64, i64) -> b1 {
-block0(v1: i64, v2: i64):
-    v10 = icmp ne v1, v2
-    ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V)
-    ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V)
-    ; nextln: $(v10_lsb=$V) = icmp ne $v1_lsb, $v2_lsb
-    ; nextln: $(v10_msb=$V) = icmp ne $v1_msb, $v2_msb
-    ; nextln: v10 = bor $v10_lsb, $v10_msb
-    return v10
-}
-
-function %icmp_imm_ne(i64) -> b1 {
-block0(v1: i64):
-    v10 = icmp_imm ne v1, 0
-    ; check: $(v1_lsb=$V) -> $(v1_lsb_a=$V)
-    ; nextln: $(v1_msb=$V) -> $(v1_msb_a=$V)
-    ; nextln: v1 = iconcat $(v1_lsb_a=$V), $(v1_msb_a=$V)
-    ; nextln: $(v2_lsb=$V) = iconst.i32 0
-    ; nextln: $(v2_msb=$V) = iconst.i32 0
-    ; nextln: $(v10_lsb=$V) = icmp ne $v1_lsb, $v2_lsb
-    ; nextln: $(v10_msb=$V) = icmp ne $v1_msb, $v2_msb
-    ; nextln: v10 = bor $v10_lsb, $v10_msb
-    return v10
-}
-
-function %icmp_sgt(i64, i64) -> b1 {
-block0(v1: i64, v2: i64):
-    v10 = icmp sgt v1, v2
-    ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V)
-    ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V)
-    ; nextln: $(b1=$V) = icmp sgt $v1_msb, $v2_msb
-    ; nextln: $(b2=$V) = icmp slt $v1_msb, $v2_msb
-    ; nextln: $(b3=$V) = icmp ugt $v1_lsb, $v2_lsb
-    ; nextln: $(c1=$V) = bnot $b2
-    ; nextln: $(c2=$V) = band $c1, $b3
-    ; nextln: v10 = bor $b1, $c2
-    return v10
-}
-
-function %icmp_imm_sgt(i64) -> b1 {
-block0(v1: i64):
-    v10 = icmp_imm sgt v1, 0
-    ; check: $(v1_lsb=$V) -> $(v1_lsb_a=$V)
-    ; nextln: $(v1_msb=$V) -> $(v1_msb_a=$V)
-    ; nextln: v1 = iconcat $(v1_lsb_a=$V), $(v1_msb_a=$V)
-    ; nextln: $(v2_lsb=$V) = iconst.i32 0
-    ; nextln: $(v2_msb=$V) = iconst.i32 0
-    ; nextln: $(b1=$V) = icmp sgt $v1_msb, $v2_msb
-    ; nextln: $(b2=$V) = icmp slt $v1_msb, $v2_msb
-    ; nextln: $(b3=$V) = icmp ugt $v1_lsb, $v2_lsb
-    ; nextln: $(c1=$V) = bnot $b2
-    ; nextln: $(c2=$V) = band $c1, $b3
-    ; nextln: v10 = bor $b1, $c2
-    return v10
-}
-
-function %icmp_sge(i64, i64) -> b1 {
-block0(v1: i64, v2: i64):
-    v10 = icmp sge v1, v2
-    ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V)
-    ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V)
-    ; nextln: $(b1=$V) = icmp sgt $v1_msb, $v2_msb
-    ; nextln: $(b2=$V) = icmp slt $v1_msb, $v2_msb
-    ; nextln: $(b3=$V) = icmp uge $v1_lsb, $v2_lsb
-    ; nextln: $(c1=$V) = bnot $b2
-    ; nextln: $(c2=$V) = band $c1, $b3
-    ; nextln: v10 = bor $b1, $c2
-    return v10
-}
-
-function %icmp_imm_sge(i64) -> b1 {
-block0(v1: i64):
-    v10 = icmp_imm sge v1, 0
-    ; check: $(v1_lsb=$V) -> $(v1_lsb_a=$V)
-    ; nextln: $(v1_msb=$V) -> $(v1_msb_a=$V)
-    ; nextln: v1 = iconcat $(v1_lsb_a=$V), $(v1_msb_a=$V)
-    ; nextln: $(v2_lsb=$V) = iconst.i32 0
-    ; nextln: $(v2_msb=$V) = iconst.i32 0
-    ; nextln: $(b1=$V) = icmp sgt $v1_msb, $v2_msb
-    ; nextln: $(b2=$V) = icmp slt $v1_msb, $v2_msb
-    ; nextln: $(b3=$V) = icmp uge $v1_lsb, $v2_lsb
-    ; nextln: $(c1=$V) = bnot $b2
-    ; nextln: $(c2=$V) = band $c1, $b3
-    ; nextln: v10 = bor $b1, $c2
-    return v10
-}
-
-function %icmp_slt(i64, i64) -> b1 {
-block0(v1: i64, v2: i64):
-    v10 = icmp slt v1, v2
-    ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V)
-    ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V)
-    ; nextln: $(b1=$V) = icmp slt $v1_msb, $v2_msb
-    ; nextln: $(b2=$V) = icmp sgt $v1_msb, $v2_msb
-    ; nextln: $(b3=$V) = icmp ult $v1_lsb, $v2_lsb
-    ; nextln: $(c1=$V) = bnot $b2
-    ; nextln: $(c2=$V) = band $c1, $b3
-    ; nextln: v10 = bor $b1, $c2
-    return v10
-}
-
-function %icmp_imm_slt(i64) -> b1 {
-block0(v1: i64):
-    v10 = icmp_imm slt v1, 0
-    ; check: $(v1_lsb=$V) -> $(v1_lsb_a=$V)
-    ; nextln: $(v1_msb=$V) -> $(v1_msb_a=$V)
-    ; nextln: v1 = iconcat $(v1_lsb_a=$V), $(v1_msb_a=$V)
-    ; nextln: $(v2_lsb=$V) = iconst.i32 0
-    ; nextln: $(v2_msb=$V) = iconst.i32 0
-    ; nextln: $(b1=$V) = icmp slt $v1_msb, $v2_msb
-    ; nextln: $(b2=$V) = icmp sgt $v1_msb, $v2_msb
-    ; nextln: $(b3=$V) = icmp ult $v1_lsb, $v2_lsb
-    ; nextln: $(c1=$V) = bnot $b2
-    ; nextln: $(c2=$V) = band $c1, $b3
-    ; nextln: v10 = bor $b1, $c2
-    return v10
-}
-
-function %icmp_sle(i64, i64) -> b1 {
-block0(v1: i64, v2: i64):
-    v10 = icmp sle v1, v2
-    ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V)
-    ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V)
-    ; nextln: $(b1=$V) = icmp slt $v1_msb, $v2_msb
-    ; nextln: $(b2=$V) = icmp sgt $v1_msb, $v2_msb
-    ; nextln: $(b3=$V) = icmp ule $v1_lsb, $v2_lsb
-    ; nextln: $(c1=$V) = bnot $b2
-    ; nextln: $(c2=$V) = band $c1, $b3
-    ; nextln: v10 = bor $b1, $c2
-    return v10
-}
-
-function %icmp_imm_sle(i64) -> b1 {
-block0(v1: i64):
-    v10 = icmp_imm sle v1, 0
-    ; check: $(v1_lsb=$V) -> $(v1_lsb_a=$V)
-    ; nextln: $(v1_msb=$V) -> $(v1_msb_a=$V)
-    ; nextln: v1 = iconcat $(v1_lsb_a=$V), $(v1_msb_a=$V)
-    ; nextln: $(v2_lsb=$V) = iconst.i32 0
-    ; nextln: $(v2_msb=$V) = iconst.i32 0
-    ; nextln: $(b1=$V) = icmp slt $v1_msb, $v2_msb
-    ; nextln: $(b2=$V) = icmp sgt $v1_msb, $v2_msb
-    ; nextln: $(b3=$V) = icmp ule $v1_lsb, $v2_lsb
-    ; nextln: $(c1=$V) = bnot $b2
-    ; nextln: $(c2=$V) = band $c1, $b3
-    ; nextln: v10 = bor $b1, $c2
-    return v10
-}
-
-function %icmp_ugt(i64, i64) -> b1 {
-block0(v1: i64, v2: i64):
-    v10 = icmp ugt v1, v2
-    ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V)
-    ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V)
-    ; nextln: $(b1=$V) = icmp ugt $v1_msb, $v2_msb
-    ; nextln: $(b2=$V) = icmp ult $v1_msb, $v2_msb
-    ; nextln: $(b3=$V) = icmp ugt $v1_lsb, $v2_lsb
-    ; nextln: $(c1=$V) = bnot $b2
-    ; nextln: $(c2=$V) = band $c1, $b3
-    ; nextln: v10 = bor $b1, $c2
-    return v10
-}
-
-function %icmp_imm_ugt(i64) -> b1 {
-block0(v1: i64):
-    v10 = icmp_imm ugt v1, 0
-    ; check: $(v1_lsb=$V) -> $(v1_lsb_a=$V)
-    ; nextln: $(v1_msb=$V) -> $(v1_msb_a=$V)
-    ; nextln: v1 = iconcat $(v1_lsb_a=$V), $(v1_msb_a=$V)
-    ; nextln: $(v2_lsb=$V) = iconst.i32 0
-    ; nextln: $(v2_msb=$V) = iconst.i32 0
-    ; nextln: $(b1=$V) = icmp ugt $v1_msb, $v2_msb
-    ; nextln: $(b2=$V) = icmp ult $v1_msb, $v2_msb
-    ; nextln: $(b3=$V) = icmp ugt $v1_lsb, $v2_lsb
-    ; nextln: $(c1=$V) = bnot $b2
-    ; nextln: $(c2=$V) = band $c1, $b3
-    ; nextln: v10 = bor $b1, $c2
-    return v10
-}
-
-function %icmp_uge(i64, i64) -> b1 {
-block0(v1: i64, v2: i64):
-    v10 = icmp uge v1, v2
-    ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V)
-    ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V)
-    ; nextln: $(b1=$V) = icmp ugt $v1_msb, $v2_msb
-    ; nextln: $(b2=$V) = icmp ult $v1_msb, $v2_msb
-    ; nextln: $(b3=$V) = icmp uge $v1_lsb, $v2_lsb
-    ; nextln: $(c1=$V) = bnot $b2
-    ; nextln: $(c2=$V) = band $c1, $b3
-    ; nextln: v10 = bor $b1, $c2
-    return v10
-}
-
-function %icmp_imm_uge(i64) -> b1 {
-block0(v1: i64):
-    v10 = icmp_imm uge v1, 0
-    ; check: $(v1_lsb=$V) -> $(v1_lsb_a=$V)
-    ; nextln: $(v1_msb=$V) -> $(v1_msb_a=$V)
-    ; nextln: v1 = iconcat $(v1_lsb_a=$V), $(v1_msb_a=$V)
-    ; nextln: $(v2_lsb=$V) = iconst.i32 0
-    ; nextln: $(v2_msb=$V) = iconst.i32 0
-    ; nextln: $(b1=$V) = icmp ugt $v1_msb, $v2_msb
-    ; nextln: $(b2=$V) = icmp ult $v1_msb, $v2_msb
-    ; nextln: $(b3=$V) = icmp uge $v1_lsb, $v2_lsb
-    ; nextln: $(c1=$V) = bnot $b2
-    ; nextln: $(c2=$V) = band $c1, $b3
-    ; nextln: v10 = bor $b1, $c2
-    return v10
-}
-
-function %icmp_ult(i64, i64) -> b1 {
-block0(v1: i64, v2: i64):
-    v10 = icmp ult v1, v2
-    ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V)
-    ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V)
-    ; nextln: $(b1=$V) = icmp ult $v1_msb, $v2_msb
-    ; nextln: $(b2=$V) = icmp ugt $v1_msb, $v2_msb
-    ; nextln: $(b3=$V) = icmp ult $v1_lsb, $v2_lsb
-    ; nextln: $(c1=$V) = bnot $b2
-    ; nextln: $(c2=$V) = band $c1, $b3
-    ; nextln: v10 = bor $b1, $c2
-    return v10
-}
-
-function %icmp_imm_ult(i64) -> b1 {
-block0(v1: i64):
-    v10 = icmp_imm ult v1, 0
-    ; check: $(v1_lsb=$V) -> $(v1_lsb_a=$V)
-    ; nextln: $(v1_msb=$V) -> $(v1_msb_a=$V)
-    ; nextln: v1 = iconcat $(v1_lsb_a=$V), $(v1_msb_a=$V)
-    ; nextln: $(v2_lsb=$V) = iconst.i32 0
-    ; nextln: $(v2_msb=$V) = iconst.i32 0
-    ; nextln: $(b1=$V) = icmp ult $v1_msb, $v2_msb
-    ; nextln: $(b2=$V) = icmp ugt $v1_msb, $v2_msb
-    ; nextln: $(b3=$V) = icmp ult $v1_lsb, $v2_lsb
-    ; nextln: $(c1=$V) = bnot $b2
-    ; nextln: $(c2=$V) = band $c1, $b3
-    ; nextln: v10 = bor $b1, $c2
-    return v10
-}
-
-function %icmp_ule(i64, i64) -> b1 {
-block0(v1: i64, v2: i64):
-    v10 = icmp ule v1, v2
-    ; check: v1 = iconcat $(v1_lsb=$V), $(v1_msb=$V)
-    ; nextln: v2 = iconcat $(v2_lsb=$V), $(v2_msb=$V)
-    ; nextln: $(b1=$V) = icmp ult $v1_msb, $v2_msb
-    ; nextln: $(b2=$V) = icmp ugt $v1_msb, $v2_msb
-    ; nextln: $(b3=$V) = icmp ule $v1_lsb, $v2_lsb
-    ; nextln: $(c1=$V) = bnot $b2
-    ; nextln: $(c2=$V) = band $c1, $b3
-    ; nextln: v10 = bor $b1, $c2
-    return v10
-}
-
-function %icmp_imm_ule(i64) -> b1 {
-block0(v1: i64):
-    v10 = icmp_imm ule v1, 0
-    ; check: $(v1_lsb=$V) -> $(v1_lsb_a=$V)
-    ; nextln: $(v1_msb=$V) -> $(v1_msb_a=$V)
-    ; nextln: v1 = iconcat $(v1_lsb_a=$V), $(v1_msb_a=$V)
-    ; nextln: $(v2_lsb=$V) = iconst.i32 0
-    ; nextln: $(v2_msb=$V) = iconst.i32 0
-    ; nextln: $(b1=$V) = icmp ult $v1_msb, $v2_msb
-    ; nextln: $(b2=$V) = icmp ugt $v1_msb, $v2_msb
-    ; nextln: $(b3=$V) = icmp ule $v1_lsb, $v2_lsb
-    ; nextln: $(c1=$V) = bnot $b2
-    ; nextln: $(c2=$V) = band $c1, $b3
-    ; nextln: v10 = bor $b1, $c2
-    return v10
-}
-
-function %ineg_legalized_i64() {
-block0:
-    v0 = iconst.i64 1
-    v1 = ineg v0
-    ; check: v2 = iconst.i32 1
-    ; nextln: v3 = iconst.i32 0
-    ; nextln: v0 = iconcat v2, v3
-    ; nextln: v5 = iconst.i32 0
-    ; nextln: v6 = iconst.i32 0
-    ; nextln: v4 = iconcat v5, v6
-    ; nextln: v7, v8 = isub_ifbout v5, v2
-    ; nextln: v9 = isub_ifbin v6, v3, v8
-    ; nextln: v1 = iconcat v7, v9
-
-    return
-}
diff --git a/cranelift/filetests/filetests/isa/x86/legalize-icmp-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-icmp-i8.clif
deleted file mode 100644
index 32f2b3d3e7..0000000000
--- a/cranelift/filetests/filetests/isa/x86/legalize-icmp-i8.clif
+++ /dev/null
@@ -1,19 +0,0 @@
-test compile
-target x86_64 legacy
-
-; regex: V=v\d+
-
-function u0:0(i8, i8) -> i8 fast {
-block0(v0: i8, v1: i8):
-    v2 = icmp_imm sle v0, 0
-    ; check: $(e1=$V) = sextend.i32 v0
-    ; nextln: v2 = icmp_imm sle $e1, 0
-    v3 = bint.i8 v2
-    v4 = icmp eq v0, v1
-    ; check: $(e2=$V) = uextend.i32 v0
-    ; nextln: $(e3=$V) = uextend.i32 v1
-    ; nextln: v4 = icmp eq $e2, $e3
-    v5 = bint.i8 v4
-    v6 = iadd v3, v5
-    return v6
-}
diff --git a/cranelift/filetests/filetests/isa/x86/legalize-iconst-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-iconst-i8.clif
deleted file mode 100644
index 1e6a70434a..0000000000
--- a/cranelift/filetests/filetests/isa/x86/legalize-iconst-i8.clif
+++ /dev/null
@@ -1,18 +0,0 @@
-test compile
-
-target x86_64 legacy
-
-function u0:0(i64) system_v {
-    ss0 = explicit_slot 0
-
-block0(v0: i64):
-    jump block1
-
-block1:
-; _0 = const 42u8
-    v1 = iconst.i8 42
-    store v1, v0
-; 
-; return
-    return
-}
diff --git a/cranelift/filetests/filetests/isa/x86/legalize-imul-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-imul-i8.clif
deleted file mode 100644
index b1f5b12095..0000000000
--- a/cranelift/filetests/filetests/isa/x86/legalize-imul-i8.clif
+++ /dev/null
@@ -1,11 +0,0 @@
-test compile
-
-target x86_64 legacy
-
-function u0:0(i64, i8, i8) system_v {
-
-block0(v0: i64, v1: i8, v2: i8):
-    v11 = imul v1, v2
-    store v11, v0
-    return
-}
diff --git a/cranelift/filetests/filetests/isa/x86/legalize-imul-imm-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-imul-imm-i8.clif
deleted file mode 100644
index 4f84d93d0b..0000000000
--- a/cranelift/filetests/filetests/isa/x86/legalize-imul-imm-i8.clif
+++ /dev/null
@@ -1,15 +0,0 @@
-test compile
-
-target x86_64 legacy
-
-function u0:0(i64, i8) system_v {
-    ss0 = explicit_slot 1
-
-block0(v0: i64, v1: i8):
-    v3 = stack_addr.i64 ss0
-    v5 = load.i8 v3
-    v6 = iconst.i8 2
-    v7 = imul_imm v5, 42
-    store v7, v0
-    return
-}
diff --git a/cranelift/filetests/filetests/isa/x86/legalize-ineg-x86_64.clif b/cranelift/filetests/filetests/isa/x86/legalize-ineg-x86_64.clif
deleted file mode 100644
index a36a2d6ed0..0000000000
--- a/cranelift/filetests/filetests/isa/x86/legalize-ineg-x86_64.clif
+++ /dev/null
@@ -1,13 +0,0 @@
-; Test the custom legalization of ineg.i64 on x86_64.
-test legalizer
-target x86_64 legacy
-
-function %ineg_legalized_i64() {
-block0:
-    v0 = iconst.i64 1
-    v1 = ineg v0
-    ; check: v0 = iconst.i64 1
-    ; nextln: v2 = iconst.i64 0
-    ; nextln: v1 = isub v2, v0
-    return
-}
diff --git a/cranelift/filetests/filetests/isa/x86/legalize-ireduce-i128.clif b/cranelift/filetests/filetests/isa/x86/legalize-ireduce-i128.clif
deleted file mode 100644
index 527710d4fe..0000000000
--- a/cranelift/filetests/filetests/isa/x86/legalize-ireduce-i128.clif
+++ /dev/null
@@ -1,11 +0,0 @@
-test compile
-target x86_64 legacy
-
-function u0:0(i64, i64) -> i64 {
-block0(v0: i64, v1: i64):
-    v2 = iconcat v0, v1
-    v3 = ireduce.i64 v2
-    ; check: v3 = copy v0
-    ; check: return v3
-    return v3
-}
diff --git a/cranelift/filetests/filetests/isa/x86/legalize-ireduce-i64.clif b/cranelift/filetests/filetests/isa/x86/legalize-ireduce-i64.clif
deleted file mode 100644
index 3ad3f4c69f..0000000000
--- a/cranelift/filetests/filetests/isa/x86/legalize-ireduce-i64.clif
+++ /dev/null
@@ -1,11 +0,0 @@
-test compile
-target i686 legacy
-
-function u0:0(i32, i32) -> i32 {
-block0(v0: i32, v1: i32):
-    v2 = iconcat v0, v1
-    v3 = ireduce.i32 v2
-    ; check: v3 = fill v0
-    ; check: return v3
-    return v3
-}
diff --git a/cranelift/filetests/filetests/isa/x86/legalize-isplit-backwards.clif b/cranelift/filetests/filetests/isa/x86/legalize-isplit-backwards.clif
deleted file mode 100644
index 0d042bf3ff..0000000000
--- a/cranelift/filetests/filetests/isa/x86/legalize-isplit-backwards.clif
+++ /dev/null
@@ -1,24 +0,0 @@
-test compile
-target x86_64 legacy
-
-function u0:0(i128) -> i64, i64 fast {
-; check: block0(v4: i64 [%rdi], v5: i64 [%rsi], v8: i64 [%rbp]):
-block0(v0: i128):
-    jump block2
-
-block1:
-    ; When this `isplit` is legalized, the bnot below is not yet legalized,
-    ; so there isn't a corresponding `iconcat` yet. We should try legalization
-    ; for this `isplit` again once all instrucions have been legalized.
-    v2, v3 = isplit.i128 v1
-    ; return v6, v7
-    return v2, v3
-
-block2:
-    ; check: v6 = bnot.i64 v4
-    ; check: v2 -> v6
-    ; check: v7 = bnot.i64 v5
-    ; check: v3 -> v7
-    v1 = bnot.i128 v0
-    jump block1
-}
diff --git a/cranelift/filetests/filetests/isa/x86/legalize-libcall.clif b/cranelift/filetests/filetests/isa/x86/legalize-libcall.clif
deleted file mode 100644
index 838a915bf0..0000000000
--- a/cranelift/filetests/filetests/isa/x86/legalize-libcall.clif
+++ /dev/null
@@ -1,15 +0,0 @@
-test legalizer
-
-; Pre-SSE 4.1, we need to use runtime library calls for floating point rounding operations.
-set is_pic
-target x86_64 legacy
-
-function %floor(f32) -> f32 {
-block0(v0: f32):
-    v1 = floor v0
-    return v1
-}
-; check: function %floor(f32 [%xmm0]) -> f32 [%xmm0] fast {
-; check: sig0 = (f32 [%xmm0]) -> f32 [%xmm0] system_v
-; check: fn0 = %FloorF32 sig0
-; check: v1 = call fn0(v0)
diff --git a/cranelift/filetests/filetests/isa/x86/legalize-load-store-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-load-store-i8.clif
deleted file mode 100644
index 4cbf3e088e..0000000000
--- a/cranelift/filetests/filetests/isa/x86/legalize-load-store-i8.clif
+++ /dev/null
@@ -1,31 +0,0 @@
-test compile
-
-target x86_64 legacy
-
-function u0:0(i64, i8, i8) system_v {
-    ss0 = explicit_slot 0
-    ss1 = explicit_slot 1
-    ss2 = explicit_slot 1
-    ss3 = explicit_slot 1
-    ss4 = explicit_slot 1
-
-block0(v0: i64, v1: i8, v2: i8):
-    v3 = stack_addr.i64 ss1
-    store v1, v3
-    v4 = stack_addr.i64 ss2
-    store v2, v4
-    v5 = stack_addr.i64 ss3
-    v6 = stack_addr.i64 ss4
-    jump block1
-
-block1:
-    v7 = load.i8 v3
-    store v7, v5
-    v8 = load.i8 v4
-    store v8, v6
-    v9 = load.i8 v5
-    v10 = load.i8 v6
-    v11 = imul v9, v10
-    store v11, v0
-    return
-}
diff --git a/cranelift/filetests/filetests/isa/x86/legalize-memory.clif b/cranelift/filetests/filetests/isa/x86/legalize-memory.clif
deleted file mode 100644
index 11a0f1d20f..0000000000
--- a/cranelift/filetests/filetests/isa/x86/legalize-memory.clif
+++ /dev/null
@@ -1,115 +0,0 @@
-; Test the legalization of memory objects.
-test legalizer
-set enable_heap_access_spectre_mitigation=false
-target x86_64 legacy
-
-; regex: V=v\d+
-; regex: BB=block\d+
-
-function %vmctx(i64 vmctx) -> i64 {
-    gv0 = vmctx
-    gv1 = iadd_imm.i64 gv0, -16
-
-block1(v1: i64):
-    v2 = global_value.i64 gv1
-    ; check: v2 = iadd_imm v1, -16
-    return v2
-    ; check: return v2
-}
-
-function %load(i64 vmctx) -> i64 {
-    gv0 = vmctx
-    gv1 = load.i64 notrap aligned gv0-16
-    gv2 = iadd_imm.i64 gv1, 32
-
-block1(v1: i64):
-    v2 = global_value.i64 gv2
-    ; check: $(p1=$V) = load.i64 notrap aligned v1-16
-    ; check: v2 = iadd_imm $p1, 32
-    return v2
-    ; check: return v2
-}
-
-function %symbol() -> i64 {
-    gv0 = symbol %something
-    gv1 = symbol u123:456
-
-block1:
-    v0 = global_value.i64 gv0
-    ; check: v0 = symbol_value.i64 gv0
-    v1 = global_value.i64 gv1
-    ; check: v1 = symbol_value.i64 gv1
-    v2 = bxor v0, v1
-    return v2
-}
-
-; SpiderMonkey VM-style static 4+2 GB heap.
-; This eliminates bounds checks completely for offsets < 2GB.
-function %staticheap_sm64(i32, i64 vmctx) -> f32 baldrdash_system_v {
-    gv0 = vmctx
-    gv1 = iadd_imm.i64 gv0, 64
-    heap0 = static gv1, min 0x1000, bound 0x1_0000_0000, offset_guard 0x8000_0000
-
-block0(v0: i32, v999: i64):
-    ; check: block0(
-    v1 = heap_addr.i64 heap0, v0, 1
-    ; Boundscheck should be eliminated.
-    ; Checks here are assuming that no pipehole opts fold the load offsets.
-    ; nextln: $(xoff=$V) = uextend.i64 v0
-    ; check: $(hbase=$V) = iadd_imm v999, 64
-    ; nextln: v1 = iadd $hbase, $xoff
-    v2 = load.f32 v1+16
-    ; nextln: v2 = load.f32 v1+16
-    v3 = load.f32 v1+20
-    ; nextln: v3 = load.f32 v1+20
-    v4 = fadd v2, v3
-    return v4
-}
-
-function %staticheap_static_oob_sm64(i32, i64 vmctx) -> f32 baldrdash_system_v {
-    gv0 = vmctx
-    gv1 = iadd_imm.i64 gv0, 64
-    heap0 = static gv1, min 0x1000, bound 0x1000_0000, offset_guard 0x8000_0000
-
-block0(v0: i32, v999: i64):
-    ; Everything after the obviously OOB access should be eliminated, leaving
-    ; the `trap heap_oob` instruction as the terminator of the block and moving
-    ; the remainder of the instructions into an inaccessible block.
-    ; check: block0(
-    ; nextln:     trap heap_oob
-    ; check: block1:
-    ; nextln:     v1 = iconst.i64 0
-    ; nextln:     v2 = load.f32 v1+16
-    ; nextln:     return v2
-    ; nextln: }
-    v1 = heap_addr.i64 heap0, v0, 0x1000_0001
-    v2 = load.f32 v1+16
-    return v2
-}
-
-
-; SpiderMonkey VM-style static 4+2 GB heap.
-; Offsets >= 2 GB do require a boundscheck.
-function %staticheap_sm64(i32, i64 vmctx) -> f32 baldrdash_system_v {
-    gv0 = vmctx
-    gv1 = iadd_imm.i64 gv0, 64
-    heap0 = static gv1, min 0x1000, bound 0x1_0000_0000, offset_guard 0x8000_0000
-
-block0(v0: i32, v999: i64):
-    ; check: block0(
-    v1 = heap_addr.i64 heap0, v0, 0x8000_0000
-    ; Boundscheck code
-    ; check: $(xoff=$V) = uextend.i64 v0
-    ; check: $(oob=$V) = icmp
-    ; nextln: brz $oob, $(ok=$BB)
-    ; nextln: jump $(trap_oob=$BB)
-    ; check: $trap_oob:
-    ; nextln: trap heap_oob
-    ; check: $ok:
-    ; Checks here are assuming that no pipehole opts fold the load offsets.
-    ; check: $(hbase=$V) = iadd_imm.i64 v999, 64
-    ; nextln: v1 = iadd $hbase, $xoff
-    v2 = load.f32 v1+0x7fff_ffff
-    ; nextln: v2 = load.f32 v1+0x7fff_ffff
-    return v2
-}
diff --git a/cranelift/filetests/filetests/isa/x86/legalize-mulhi.clif b/cranelift/filetests/filetests/isa/x86/legalize-mulhi.clif
deleted file mode 100644
index 179ef824f3..0000000000
--- a/cranelift/filetests/filetests/isa/x86/legalize-mulhi.clif
+++ /dev/null
@@ -1,43 +0,0 @@
-test compile
-target x86_64 legacy baseline
-
-; umulhi/smulhi on 64 bit operands
-
-function %i64_umulhi(i64, i64) -> i64 {
-block0(v10: i64, v11: i64):
-  v12 = umulhi v10, v11
-  ; check: %rdi -> %rax
-  ; check: x86_umulx
-  ; check: %rdx -> %rax
-  return v12
-}
-
-function %i64_smulhi(i64, i64) -> i64 {
-block0(v20: i64, v21: i64):
-  v22 = smulhi v20, v21
-  ; check: %rdi -> %rax
-  ; check: x86_smulx
-  ; check: %rdx -> %rax
-  return v22
-}
-
-
-; umulhi/smulhi on 32 bit operands
-
-function %i32_umulhi(i32, i32) -> i32 {
-block0(v30: i32, v31: i32):
-  v32 = umulhi v30, v31
-  ; check: %rdi -> %rax
-  ; check: x86_umulx
-  ; check: %rdx -> %rax
-  return v32
-}
-
-function %i32_smulhi(i32, i32) -> i32 {
-block0(v40: i32, v41: i32):
-  v42 = smulhi v40, v41
-  ; check: %rdi -> %rax
-  ; check: x86_smulx
-  ; check: %rdx -> %rax
-  return v42
-}
diff --git a/cranelift/filetests/filetests/isa/x86/legalize-popcnt-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-popcnt-i8.clif
deleted file mode 100644
index fb9c4f49b8..0000000000
--- a/cranelift/filetests/filetests/isa/x86/legalize-popcnt-i8.clif
+++ /dev/null
@@ -1,9 +0,0 @@
-test compile
-target x86_64 legacy
-
-function u0:0(i8) -> i8 fast {
-block0(v0: i8):
-    v1 = popcnt v0
-    ; check-not: sextend.i32 v0
-    return v1
-}
diff --git a/cranelift/filetests/filetests/isa/x86/legalize-regmove-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-regmove-i8.clif
deleted file mode 100644
index f770ba5643..0000000000
--- a/cranelift/filetests/filetests/isa/x86/legalize-regmove-i8.clif
+++ /dev/null
@@ -1,36 +0,0 @@
-test compile
-
-target x86_64 legacy
-
-function u0:0(i64, i64, i64) system_v {
-    ss0 = explicit_slot 0
-    ss1 = explicit_slot 8
-    ss2 = explicit_slot 8
-    ss3 = explicit_slot 2
-    ss4 = explicit_slot 8
-    sig0 = (i64, i16, i64) system_v
-    fn0 = colocated u0:11 sig0
-
-block0(v0: i64, v1: i64, v2: i64):
-    v3 = stack_addr.i64 ss1
-    store v1, v3
-    v4 = stack_addr.i64 ss2
-    store v2, v4
-    v5 = stack_addr.i64 ss3
-    v6 = stack_addr.i64 ss4
-    jump block1
-
-block1:
-    v7 = load.i64 v3
-    v8 = load.i16 v7
-    store v8, v5
-    v9 = load.i64 v4
-    store v9, v6
-    v10 = load.i16 v5
-    v11 = load.i64 v6
-    call fn0(v0, v10, v11)
-    jump block2
-
-block2:
-    return
-}
diff --git a/cranelift/filetests/filetests/isa/x86/legalize-rotate.clif b/cranelift/filetests/filetests/isa/x86/legalize-rotate.clif
deleted file mode 100644
index e058602615..0000000000
--- a/cranelift/filetests/filetests/isa/x86/legalize-rotate.clif
+++ /dev/null
@@ -1,35 +0,0 @@
-test compile
-target x86_64 legacy
-
-; regex: V=v\d+
-; regex: R=%[a-z0-9]+
-
-function %i32_rotr(i32, i32) -> i32 fast {
-block0(v0: i32, v1: i32):
-    ; check: regmove v1, $R -> %rcx
-    ; check: v2 = rotr v0, v1
-    v2 = rotr v0, v1
-    return v2
-}
-
-function %i32_rotr_imm_1(i32) -> i32 fast {
-block0(v0: i32):
-    ; check: $V = rotr_imm v0, 1
-    v2 = rotr_imm v0, 1
-    return v2
-}
-
-function %i32_rotl(i32, i32) -> i32 fast {
-block0(v0: i32, v1: i32):
-    ; check: regmove v1, $R -> %rcx
-    ; check: v2 = rotl v0, v1
-    v2 = rotl v0, v1
-    return v2
-}
-
-function %i32_rotl_imm_1(i32) -> i32 fast {
-block0(v0: i32):
-    ; check: $V = rotl_imm v0, 1
-    v2 = rotl_imm v0, 1
-    return v2
-}
diff --git a/cranelift/filetests/filetests/isa/x86/legalize-shlr-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-shlr-i8.clif
deleted file mode 100644
index 9759a8b155..0000000000
--- a/cranelift/filetests/filetests/isa/x86/legalize-shlr-i8.clif
+++ /dev/null
@@ -1,24 +0,0 @@
-test compile
-target x86_64 legacy
-
-; regex: V=v\d+
-
-function u0:0(i8, i8) -> i8 fast {
-block0(v0: i8, v1: i8):
-    v2 = ishl v0, v1
-    ; check: $(e1=$V) = uextend.i32 v0
-    ; check: $(r1=$V) = ishl $e1, v1
-    ; check v2 = ireduce.i8 $r1
-    v3 = ushr v0, v1
-    ; check: $(e2=$V) = uextend.i32 v0
-    ; check: $(r2=$V) = ushr $e2, v1
-    ; check v2 = ireduce.i8 $r2
-    v4 = sshr v0, v1
-    ; check: $(e3=$V) = sextend.i32 v0
-    ; check: $(r3=$V) = sshr $e3, v1
-    ; check v2 = ireduce.i8 $r3
-
-    v5 = iadd v2, v3
-    v6 = iadd v4, v5
-    return v6
-}
diff --git a/cranelift/filetests/filetests/isa/x86/legalize-tables.clif b/cranelift/filetests/filetests/isa/x86/legalize-tables.clif
deleted file mode 100644
index 10912afe76..0000000000
--- a/cranelift/filetests/filetests/isa/x86/legalize-tables.clif
+++ /dev/null
@@ -1,73 +0,0 @@
-test legalizer
-target x86_64 legacy
-
-; Test legalization for various forms of table addresses.
-; regex: BB=block\d+
-
-function %table_addrs(i32, i64, i64 vmctx) {
-    gv4 = vmctx
-    gv0 = iadd_imm.i64 gv4, 72
-    gv1 = iadd_imm.i64 gv4, 80
-    gv2 = load.i32 notrap aligned gv4+88
-
-    table0 = dynamic gv0, min 0x1_0000, bound gv2, element_size 1, index_type i32
-    table1 = dynamic gv0, bound gv2, element_size 16, index_type i32
-    table2 = dynamic gv0, min 0x1_0000, bound gv1, element_size 1, index_type i64
-    table3 = dynamic gv0, bound gv1, element_size 16, index_type i64
-
-    ; check: table0 = dynamic gv0, min 0x0001_0000, bound gv2, element_size 1, index_type i32
-    ; check: table1 = dynamic gv0, min 0, bound gv2, element_size 16, index_type i32
-    ; check: table2 = dynamic gv0, min 0x0001_0000, bound gv1, element_size 1, index_type i64
-    ; check: table3 = dynamic gv0, min 0, bound gv1, element_size 16, index_type i64
-
-block0(v0: i32, v1: i64, v3: i64):
-    v4 = table_addr.i64 table0, v0, +0
-    ; check:         v8 = load.i32 notrap aligned v3+88
-    ; check:         v9 = icmp uge v0, v8
-    ; check:         brz v9, $(resume_1=$BB)
-    ; nextln:        jump $(trap_1=$BB)
-    ; check:     $trap_1:
-    ; nextln:        trap table_oob
-    ; check:     $resume_1:
-    ; check:         v10 = uextend.i64 v0
-    ; check:         v11 = iadd_imm.i64 v3, 72
-    ; check:         v4 = iadd v11, v10
-
-    v5 = table_addr.i64 table1, v0, +0
-    ; check:         v12 = load.i32 notrap aligned v3+88
-    ; check:         v13 = icmp.i32 uge v0, v12
-    ; check:         brz v13, $(resume_2=$BB)
-    ; nextln:        jump $(trap_2=$BB)
-    ; check:     $trap_2:
-    ; nextln:        trap table_oob
-    ; check:     $resume_2:
-    ; check:         v14 = uextend.i64 v0
-    ; check:         v15 = iadd_imm.i64 v3, 72
-    ; check:         v16 = ishl_imm v14, 4
-    ; check:         v5 = iadd v15, v16
-
-    v6 = table_addr.i64 table2, v1, +0
-    ; check:         v17 = iadd_imm.i64 v3, 80
-    ; check:         v18 = icmp.i64 uge v1, v17
-    ; check:         brz v18, $(resume_3=$BB)
-    ; nextln:        jump $(trap_3=$BB)
-    ; check:     $trap_3:
-    ; nextln:        trap table_oob
-    ; check:     $resume_3:
-    ; check:         v19 = iadd_imm.i64 v3, 72
-    ; check:         v6 = iadd v19, v1
-
-    v7 = table_addr.i64 table3, v1, +0
-    ; check:         v20 = iadd_imm.i64 v3, 80
-    ; check:         v21 = icmp.i64 uge v1, v20
-    ; check:         brz v21, $(resume_4=$BB)
-    ; nextln:        jump $(trap_4=$BB)
-    ; check:     $trap_4:
-    ; nextln:        trap table_oob
-    ; check:     $resume_4:
-    ; check:         v22 = iadd_imm.i64 v3, 72
-    ; check:         v23 = ishl_imm.i64 v1, 4
-    ; check:         v7 = iadd v22, v23
-
-    return
-}
diff --git a/cranelift/filetests/filetests/isa/x86/legalize-urem-i8.clif b/cranelift/filetests/filetests/isa/x86/legalize-urem-i8.clif
deleted file mode 100644
index 7be308308c..0000000000
--- a/cranelift/filetests/filetests/isa/x86/legalize-urem-i8.clif
+++ /dev/null
@@ -1,15 +0,0 @@
-test compile
-target x86_64 legacy
-
-; regex: V=v\d+
-
-function u0:0(i8, i8) -> i8 fast {
-block0(v0: i8, v1: i8):
-    v2 = urem v0, v1
-    ; check: $(a=$V) = uextend.i32 v0
-    ; nextln: $(b=$V) = uextend.i32 v1
-    ; nextln: $(c=$V) = iconst.i32 0
-    ; nextln: $(V), $(r=$V) = x86_udivmodx $a, $c, $b
-    ; nextln: v2 = ireduce.i8 $r
-    return v2
-}
diff --git a/cranelift/filetests/filetests/isa/x86/legalize-x86_32-shifts.clif b/cranelift/filetests/filetests/isa/x86/legalize-x86_32-shifts.clif
deleted file mode 100644
index ff5d11a4d7..0000000000
--- a/cranelift/filetests/filetests/isa/x86/legalize-x86_32-shifts.clif
+++ /dev/null
@@ -1,51 +0,0 @@
-test compile
-set enable_simd
-target i686 legacy haswell
-
-function u0:1(i32) -> i64 system_v {
-    block1(v0: i32):
-        v1 = load.i64 notrap aligned v0+0
-        v2 = load.i32 notrap aligned v0+16
-        v3 = ishl v1, v2
-        return v3
-}
-
-function u0:2(i32) -> i64 system_v {
-    block1(v0: i32):
-        v1 = load.i64 notrap aligned v0+0
-        v2 = load.i64 notrap aligned v0+16
-        v3 = ishl v1, v2
-        return v3
-}
-
-function u0:3(i32) -> i32 system_v {
-    block1(v0: i32):
-        v1 = load.i32 notrap aligned v0+0
-        v2 = load.i64 notrap aligned v0+16
-        v3 = ishl v1, v2
-        return v3
-}
-
-function u0:4(i32) -> i64 system_v {
-    block1(v0: i32):
-        v1 = load.i64 notrap aligned v0+0
-        v2 = load.i32 notrap aligned v0+16
-        v3 = ushr v1, v2
-        return v3
-}
-
-function u0:5(i32) -> i64 system_v {
-    block1(v0: i32):
-        v1 = load.i64 notrap aligned v0+0
-        v2 = load.i64 notrap aligned v0+16
-        v3 = ushr v1, v2
-        return v3
-}
-
-function u0:6(i32) -> i32 system_v {
-    block1(v0: i32):
-        v1 = load.i32 notrap aligned v0+0
-        v2 = load.i64 notrap aligned v0+16
-        v3 = ushr v1, v2
-        return v3
-}
diff --git a/cranelift/filetests/filetests/isa/x86/load-store-narrow.clif b/cranelift/filetests/filetests/isa/x86/load-store-narrow.clif
deleted file mode 100644
index 4e0af65c9f..0000000000
--- a/cranelift/filetests/filetests/isa/x86/load-store-narrow.clif
+++ /dev/null
@@ -1,16 +0,0 @@
-test compile
-target i686 legacy
-
-function u0:0(i64, i32) system_v {
-block0(v0: i64, v1: i32):
-    v2 = bor v0, v0
-    store v2, v1
-    return
-}
-
-function u0:1(i32) -> i64 system_v {
-block0(v1: i32):
-    v0 = load.i64 v1
-    v2 = bor v0, v0
-    return v2
-}
diff --git a/cranelift/filetests/filetests/isa/x86/nop.clif b/cranelift/filetests/filetests/isa/x86/nop.clif
deleted file mode 100644
index cafa90eb4f..0000000000
--- a/cranelift/filetests/filetests/isa/x86/nop.clif
+++ /dev/null
@@ -1,10 +0,0 @@
-test compile
-
-target x86_64 legacy
-
-function %test(i32) -> i32 system_v {
-block0(v0: i32):
-    nop
-    v1 = iconst.i32 42
-    return v1
-}
diff --git a/cranelift/filetests/filetests/isa/x86/optimized-zero-constants-32bit.clif b/cranelift/filetests/filetests/isa/x86/optimized-zero-constants-32bit.clif
deleted file mode 100644
index b5a9658b67..0000000000
--- a/cranelift/filetests/filetests/isa/x86/optimized-zero-constants-32bit.clif
+++ /dev/null
@@ -1,52 +0,0 @@
-; Check that floating-point and integer constants equal to zero are optimized correctly.
-test binemit
-set opt_level=speed_and_size
-target i686 legacy
-
-function %foo() -> f32 fast {
-block0:
-  ; asm: xorps %xmm0, %xmm0
-  [-,%xmm0]    v0 = f32const 0.0     ; bin: 0f 57 c0
-  return v0
-}
-
-function %bar() -> f64 fast {
-block0:
-  ; asm: xorpd %xmm0, %xmm0
-  [-,%xmm0]    v1 = f64const 0.0     ; bin: 66 0f 57 c0
-  return v1
-}
-
-function %zero_dword() -> i32 fast {
-block0:
-  ; asm: xor %eax, %eax
-  [-,%rax]     v0 = iconst.i32 0     ; bin: 31 c0
-  ; asm: xor %edi, %edi
-  [-,%rdi]     v1 = iconst.i32 0     ; bin: 31 ff
-  return v0
-}
-
-function %zero_word() -> i16 fast {
-block0:
-  ; while you may expect this to be encoded like 6631c0, aka
-  ; xor %ax, %ax, the upper 16 bits of the register used for
-  ; i16 are left undefined, so it's not wrong to clear them.
-  ;
-  ; discarding the 66 prefix is shorter, so this test expects
-  ; that we do so.
-  ;
-  ; asm: xor %eax, %eax
-  [-,%rax]     v0 = iconst.i16 0     ; bin: 31 c0
-  ; asm: xor %edi, %edi
-  [-,%rdi]     v1 = iconst.i16 0     ; bin: 31 ff
-  return v0
-}
-
-function %zero_byte() -> i8 fast {
-block0:
-  ; asm: xor %eax, %eax
-  [-,%rax]     v0 = iconst.i8 0     ; bin: 31 c0
-  ; asm: xor %edi, %edi
-  [-,%rdi]     v1 = iconst.i8 0     ; bin: 31 ff
-  return v0
-}
diff --git a/cranelift/filetests/filetests/isa/x86/optimized-zero-constants.clif b/cranelift/filetests/filetests/isa/x86/optimized-zero-constants.clif
deleted file mode 100644
index 8e469b8b7a..0000000000
--- a/cranelift/filetests/filetests/isa/x86/optimized-zero-constants.clif
+++ /dev/null
@@ -1,72 +0,0 @@
-; Check that floating-point constants equal to zero are optimized correctly.
-test binemit
-set opt_level=speed_and_size
-target x86_64 legacy
-
-function %zero_const_32bit_no_rex() -> f32 fast {
-block0:
-  ; asm: xorps %xmm0, %xmm0
-  [-,%xmm0]    v0 = f32const 0.0     ; bin: 0f 57 c0
-  return v0
-}
-
-function %zero_const_32bit_rex() -> f32 fast {
-block0:
-  ; asm: xorps %xmm8, %xmm8
-  [-,%xmm8]    v1 = f32const 0.0     ; bin: 45 0f 57 c0
-  return v1
-}
-
-function %zero_const_64bit_no_rex() -> f64 fast {
-block0:
-  ; asm: xorpd %xmm0, %xmm0
-  [-,%xmm0]    v0 = f64const 0.0     ; bin: 66 0f 57 c0
-  return v0
-}
-
-function %zero_const_64bit_rex() -> f64 fast {
-block0:
-  ; asm: xorpd %xmm8, %xmm8
-  [-,%xmm8]    v1 = f64const 0.0     ; bin: 66 45 0f 57 c0
-  return v1
-}
-
-function %imm_zero_register() -> i64 fast {
-block0:
-  ; asm: xor %eax, %eax
-  [-,%rax]     v0 = iconst.i64 0     ; bin: 31 c0
-  ; asm: xor %edi, %edi
-  [-,%rdi]     v1 = iconst.i64 0     ; bin: 31 ff
-  ; asm: xor %r8, r8
-  [-,%r8]      v2 = iconst.i64 0     ; bin: 45 31 c0
-  ; asm: xor %r15, %r15
-  [-,%r15]     v4 = iconst.i64 0     ; bin: 45 31 ff
-  return v0
-}
-
-function %zero_word() -> i16 fast {
-block0:
-  ; while you may expect this to be encoded like 6631c0, aka
-  ; xor %ax, %ax, the upper 16 bits of the register used for
-  ; i16 are left undefined, so it's not wrong to clear them.
-  ;
-  ; discarding the 66 prefix is shorter, so this test expects
-  ; that we do so.
-  ;
-  ; asm: xor %eax, %eax
-  [-,%rax]     v0 = iconst.i16 0     ; bin: 31 c0
-  ; asm: xor %edi, %edi
-  [-,%rdi]     v1 = iconst.i16 0     ; bin: 31 ff
-  return v0
-}
-
-function %zero_byte() -> i8 fast {
-block0:
-  ; asm: xor %r8d, %r8d
-  [-,%r15]     v0 = iconst.i8 0     ; bin: 45 31 ff
-  ; asm: xor %eax, eax
-  [-,%rax]     v1 = iconst.i8 0     ; bin: 31 c0
-  ; asm: xor %edi, %edi
-  [-,%rdi]     v2 = iconst.i8 0     ; bin: 31 ff
-  return v0
-}
diff --git a/cranelift/filetests/filetests/isa/x86/pinned-reg.clif b/cranelift/filetests/filetests/isa/x86/pinned-reg.clif
deleted file mode 100644
index b9bc230c33..0000000000
--- a/cranelift/filetests/filetests/isa/x86/pinned-reg.clif
+++ /dev/null
@@ -1,74 +0,0 @@
-test compile
-
-set enable_pinned_reg=true
-set use_pinned_reg_as_heap_base=true
-set opt_level=speed_and_size
-
-target x86_64 legacy
-
-; regex: V=v\d+
-
-; r15 is the pinned heap register. It must not be rewritten, so it must not be
-; used as a tied output register.
-function %tied_input() -> i64 system_v {
-block0:
-    v1 = get_pinned_reg.i64
-    v2 = iadd_imm v1, 42
-    return v2
-}
-
-; check: ,%r15]
-; sameln: v1 = get_pinned_reg.i64
-; nextln: regmove v1, %r15 -> %rax
-; nextln: ,%rax]
-; sameln: iadd_imm v1, 42
-
-;; It musn't be used even if this is a tied input used twice.
-function %tied_twice() -> i64 system_v {
-block0:
-    v1 = get_pinned_reg.i64
-    v2 = iadd v1, v1
-    return v2
-}
-
-; check: ,%r15]
-; sameln: v1 = get_pinned_reg.i64
-; nextln: regmove v1, %r15 -> %rax
-; nextln: ,%rax]
-; sameln: iadd v1, v1
-
-function %uses() -> i64 system_v {
-block0:
-    v1 = get_pinned_reg.i64
-    v2 = iadd_imm v1, 42
-    v3 = get_pinned_reg.i64
-    v4 = iadd v2, v3
-    return v4
-}
-
-; check: ,%r15]
-; sameln: v1 = get_pinned_reg.i64
-; nextln: regmove v1, %r15 -> %rax
-; nextln: ,%rax]
-; sameln: iadd_imm v1, 42
-; nextln: ,%r15
-; sameln: v3 = get_pinned_reg.i64
-; nextln: ,%rax]
-; sameln: iadd v2, v3
-
-; When the pinned register is used as the heap base, the final load instruction
-; must use the %r15 register, since x86 implements the complex addressing mode.
-function u0:1(i64 vmctx) -> i64 system_v {
-    gv0 = vmctx
-    heap0 = static gv0, min 0x000a_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000, index_type i32
-
-block0(v42: i64):
-    v5 = iconst.i32 42
-    v6 = heap_addr.i64 heap0, v5, 0
-    v7 = load.i64 v6
-    return v7
-}
-
-; check: ,%r15]
-; sameln: $(heap_base=$V) = get_pinned_reg.i64
-; nextln: load_complex.i64 $heap_base+
diff --git a/cranelift/filetests/filetests/isa/x86/probestack-adjusts-sp.clif b/cranelift/filetests/filetests/isa/x86/probestack-adjusts-sp.clif
deleted file mode 100644
index 4b4a05244c..0000000000
--- a/cranelift/filetests/filetests/isa/x86/probestack-adjusts-sp.clif
+++ /dev/null
@@ -1,28 +0,0 @@
-test compile
-set use_colocated_libcalls=1
-set probestack_func_adjusts_sp=1
-target x86_64 legacy
-
-; Like %big in probestack.clif, but with the probestack function adjusting
-; the stack pointer itself.
-
-function %big() system_v {
-    ss0 = explicit_slot 300000
-block0:
-    return
-}
-; check: function %big(i64 fp [%rbp]) -> i64 fp [%rbp] system_v {
-; nextln:    ss0 = explicit_slot 300000, offset -300016
-; nextln:    ss1 = incoming_arg 16, offset -16
-; nextln:    sig0 = (i64 [%rax]) probestack
-; nextln:    fn0 = colocated %Probestack sig0
-; nextln: 
-; nextln:                                 block0(v0: i64 [%rbp]):
-; nextln: [RexOp1pushq#50]                    x86_push v0
-; nextln: [RexOp1copysp#8089]                 copy_special %rsp -> %rbp
-; nextln: [RexOp1pu_id#b8,%rax]               v1 = iconst.i64 0x0004_93e0
-; nextln: [Op1call_id#e8]                     call fn0(v1)
-; nextln: [RexOp1adjustsp_id#8081]            adjust_sp_up_imm 0x0004_93e0
-; nextln: [RexOp1popq#58,%rbp]                v2 = x86_pop.i64
-; nextln: [Op1ret#c3]                         return v2
-; nextln: }
diff --git a/cranelift/filetests/filetests/isa/x86/probestack-disabled.clif b/cranelift/filetests/filetests/isa/x86/probestack-disabled.clif
deleted file mode 100644
index 6b9b4f3342..0000000000
--- a/cranelift/filetests/filetests/isa/x86/probestack-disabled.clif
+++ /dev/null
@@ -1,24 +0,0 @@
-test compile
-set use_colocated_libcalls=1
-set enable_probestack=0
-target x86_64 legacy
-
-; Like %big in probestack.clif, but with probes disabled.
-
-function %big() system_v {
-    ss0 = explicit_slot 300000
-block0:
-    return
-}
-; check: function %big(i64 fp [%rbp]) -> i64 fp [%rbp] system_v {
-; nextln:    ss0 = explicit_slot 300000, offset -300016
-; nextln:    ss1 = incoming_arg 16, offset -16
-; nextln: 
-; nextln:                                 block0(v0: i64 [%rbp]):
-; nextln: [RexOp1pushq#50]                    x86_push v0
-; nextln: [RexOp1copysp#8089]                 copy_special %rsp -> %rbp
-; nextln: [RexOp1adjustsp_id#d081]            adjust_sp_down_imm 0x0004_93e0
-; nextln: [RexOp1adjustsp_id#8081]            adjust_sp_up_imm 0x0004_93e0
-; nextln: [RexOp1popq#58,%rbp]                v1 = x86_pop.i64
-; nextln: [Op1ret#c3]                         return v1
-; nextln: }
diff --git a/cranelift/filetests/filetests/isa/x86/probestack-noncolocated.clif b/cranelift/filetests/filetests/isa/x86/probestack-noncolocated.clif
deleted file mode 100644
index 2837ddd0c9..0000000000
--- a/cranelift/filetests/filetests/isa/x86/probestack-noncolocated.clif
+++ /dev/null
@@ -1,27 +0,0 @@
-test compile
-target x86_64 legacy
-
-; Like %big in probestack.clif, but without a colocated libcall.
-
-function %big() system_v {
-    ss0 = explicit_slot 300000
-block0:
-    return
-}
-; check: function %big(i64 fp [%rbp]) -> i64 fp [%rbp] system_v {
-; nextln:    ss0 = explicit_slot 300000, offset -300016
-; nextln:    ss1 = incoming_arg 16, offset -16
-; nextln:    sig0 = (i64 [%rax]) -> i64 [%rax] probestack
-; nextln:    fn0 = %Probestack sig0
-; nextln: 
-; nextln:                                 block0(v0: i64 [%rbp]):
-; nextln: [RexOp1pushq#50]                    x86_push v0
-; nextln: [RexOp1copysp#8089]                 copy_special %rsp -> %rbp
-; nextln: [RexOp1pu_id#b8,%rax]               v1 = iconst.i64 0x0004_93e0
-; nextln: [RexOp1fnaddr8#80b8,%r11]           v2 = func_addr.i64 fn0
-; nextln: [RexOp1call_r#20ff,%rax]            v3 = call_indirect sig0, v2(v1)
-; nextln: [RexOp1adjustsp#8029]               adjust_sp_down v3
-; nextln: [RexOp1adjustsp_id#8081]            adjust_sp_up_imm 0x0004_93e0
-; nextln: [RexOp1popq#58,%rbp]                v4 = x86_pop.i64
-; nextln: [Op1ret#c3]                         return v4
-; nextln: }
diff --git a/cranelift/filetests/filetests/isa/x86/probestack-size.clif b/cranelift/filetests/filetests/isa/x86/probestack-size.clif
deleted file mode 100644
index efb1900170..0000000000
--- a/cranelift/filetests/filetests/isa/x86/probestack-size.clif
+++ /dev/null
@@ -1,74 +0,0 @@
-test compile
-set use_colocated_libcalls=1
-set probestack_size_log2=13
-target x86_64 legacy
-
-; Like %big in probestack.clif, but now the probestack size is bigger
-; and it no longer needs a probe.
-
-function %big() system_v {
-    ss0 = explicit_slot 4097
-block0:
-    return
-}
-
-; check: function %big(i64 fp [%rbp]) -> i64 fp [%rbp] system_v {
-; nextln:     ss0 = explicit_slot 4097, offset -4113
-; nextln:     ss1 = incoming_arg 16, offset -16
-; nextln: 
-; nextln:                                 block0(v0: i64 [%rbp]):
-; nextln: [RexOp1pushq#50]                    x86_push v0
-; nextln: [RexOp1copysp#8089]                 copy_special %rsp -> %rbp
-; nextln: [RexOp1adjustsp_id#d081]            adjust_sp_down_imm 4112
-; nextln: [RexOp1adjustsp_id#8081]            adjust_sp_up_imm 4112
-; nextln: [RexOp1popq#58,%rbp]                v1 = x86_pop.i64
-; nextln: [Op1ret#c3]                         return v1
-; nextln: }
-
-
-; Like %big; still doesn't need a probe.
-
-function %bigger() system_v {
-    ss0 = explicit_slot 8192
-block0:
-    return
-}
-
-; check: function %bigger(i64 fp [%rbp]) -> i64 fp [%rbp] system_v {
-; nextln:     ss0 = explicit_slot 8192, offset -8208
-; nextln:     ss1 = incoming_arg 16, offset -16
-; nextln: 
-; nextln:                                 block0(v0: i64 [%rbp]):
-; nextln: [RexOp1pushq#50]                    x86_push v0
-; nextln: [RexOp1copysp#8089]                 copy_special %rsp -> %rbp
-; nextln: [RexOp1adjustsp_id#d081]            adjust_sp_down_imm 8192
-; nextln: [RexOp1adjustsp_id#8081]            adjust_sp_up_imm 8192
-; nextln: [RexOp1popq#58,%rbp]                v1 = x86_pop.i64
-; nextln: [Op1ret#c3]                         return v1
-; nextln: }
-
-
-; Like %bigger; this needs a probe.
-
-function %biggest() system_v {
-    ss0 = explicit_slot 8193
-block0:
-    return
-}
-
-; check: function %biggest(i64 fp [%rbp]) -> i64 fp [%rbp] system_v {
-; nextln:     ss0 = explicit_slot 8193, offset -8209
-; nextln:     ss1 = incoming_arg 16, offset -16
-; nextln:     sig0 = (i64 [%rax]) -> i64 [%rax] probestack
-; nextln:     fn0 = colocated %Probestack sig0
-; nextln: 
-; nextln:                                 block0(v0: i64 [%rbp]):
-; nextln: [RexOp1pushq#50]                    x86_push v0
-; nextln: [RexOp1copysp#8089]                 copy_special %rsp -> %rbp
-; nextln: [RexOp1pu_id#b8,%rax]               v1 = iconst.i64 8208
-; nextln: [Op1call_id#e8,%rax]                v2 = call fn0(v1)
-; nextln: [RexOp1adjustsp#8029]               adjust_sp_down v2
-; nextln: [RexOp1adjustsp_id#8081]            adjust_sp_up_imm 8208
-; nextln: [RexOp1popq#58,%rbp]                v3 = x86_pop.i64
-; nextln: [Op1ret#c3]                         return v3
-; nextln: }
diff --git a/cranelift/filetests/filetests/isa/x86/probestack.clif b/cranelift/filetests/filetests/isa/x86/probestack.clif
deleted file mode 100644
index c434cf5f63..0000000000
--- a/cranelift/filetests/filetests/isa/x86/probestack.clif
+++ /dev/null
@@ -1,49 +0,0 @@
-test compile
-set use_colocated_libcalls=1
-target x86_64 legacy
-
-; A function with a big stack frame. This should have a stack probe.
-
-function %big() system_v {
-    ss0 = explicit_slot 4097
-block0:
-    return
-}
-; check: function %big(i64 fp [%rbp]) -> i64 fp [%rbp] system_v {
-; nextln:    ss0 = explicit_slot 4097, offset -4113
-; nextln:    ss1 = incoming_arg 16, offset -16
-; nextln:    sig0 = (i64 [%rax]) -> i64 [%rax] probestack
-; nextln:    fn0 = colocated %Probestack sig0
-; nextln: 
-; nextln:                                 block0(v0: i64 [%rbp]):
-; nextln: [RexOp1pushq#50]                    x86_push v0
-; nextln: [RexOp1copysp#8089]                 copy_special %rsp -> %rbp
-; nextln: [RexOp1pu_id#b8,%rax]               v1 = iconst.i64 4112
-; nextln: [Op1call_id#e8,%rax]                v2 = call fn0(v1)
-; nextln: [RexOp1adjustsp#8029]               adjust_sp_down v2
-; nextln: [RexOp1adjustsp_id#8081]            adjust_sp_up_imm 4112
-; nextln: [RexOp1popq#58,%rbp]                v3 = x86_pop.i64
-; nextln: [Op1ret#c3]                         return v3
-; nextln: }
-
-
-; A function with a small enough stack frame. This shouldn't have a stack probe.
-
-function %small() system_v {
-    ss0 = explicit_slot 4096
-block0:
-    return
-}
-
-; check: function %small(i64 fp [%rbp]) -> i64 fp [%rbp] system_v {
-; nextln:     ss0 = explicit_slot 4096, offset -4112
-; nextln:     ss1 = incoming_arg 16, offset -16
-; nextln: 
-; nextln:                                 block0(v0: i64 [%rbp]):
-; nextln: [RexOp1pushq#50]                    x86_push v0
-; nextln: [RexOp1copysp#8089]                 copy_special %rsp -> %rbp
-; nextln: [RexOp1adjustsp_id#d081]            adjust_sp_down_imm 4096
-; nextln: [RexOp1adjustsp_id#8081]            adjust_sp_up_imm 4096
-; nextln: [RexOp1popq#58,%rbp]                v1 = x86_pop.i64
-; nextln: [Op1ret#c3]                         return v1
-; nextln: }
diff --git a/cranelift/filetests/filetests/isa/x86/prologue-epilogue.clif b/cranelift/filetests/filetests/isa/x86/prologue-epilogue.clif
deleted file mode 100644
index 831928186b..0000000000
--- a/cranelift/filetests/filetests/isa/x86/prologue-epilogue.clif
+++ /dev/null
@@ -1,314 +0,0 @@
-test compile
-set opt_level=speed_and_size
-set is_pic
-set enable_probestack=false
-target x86_64 legacy haswell
-
-; An empty function.
-
-function %empty() {
-block0:
-    return
-}
-
-; check: function %empty(i64 fp [%rbp]) -> i64 fp [%rbp] fast {
-; nextln:     ss0 = incoming_arg 16, offset -16
-; nextln: 
-; nextln: block0(v0: i64 [%rbp]):
-; nextln:     x86_push v0
-; nextln:     copy_special %rsp -> %rbp
-; nextln:     v1 = x86_pop.i64
-; nextln:     return v1
-; nextln: }
-
-; A function with a single stack slot.
-
-function %one_stack_slot() {
-    ss0 = explicit_slot 168
-block0:
-    return
-}
-
-; check: function %one_stack_slot(i64 fp [%rbp]) -> i64 fp [%rbp] fast {
-; nextln:     ss0 = explicit_slot 168, offset -184
-; nextln:     ss1 = incoming_arg 16, offset -16
-; nextln: 
-; nextln: block0(v0: i64 [%rbp]):
-; nextln:     x86_push v0
-; nextln:     copy_special %rsp -> %rbp
-; nextln:     adjust_sp_down_imm 176
-; nextln:     adjust_sp_up_imm 176
-; nextln:     v1 = x86_pop.i64
-; nextln:     return v1
-; nextln: }
-
-; A function performing a call.
-
-function %call() {
-    fn0 = %foo()
-
-block0:
-    call fn0()
-    return
-}
-
-; check: function %call(i64 fp [%rbp]) -> i64 fp [%rbp] fast {
-; nextln:     ss0 = incoming_arg 16, offset -16
-; nextln:     sig0 = () fast
-; nextln:     fn0 = %foo sig0
-; nextln: 
-; nextln: block0(v0: i64 [%rbp]):
-; nextln:     x86_push v0
-; nextln:     copy_special %rsp -> %rbp
-; nextln:     call fn0()
-; nextln:     v1 = x86_pop.i64
-; nextln:     return v1
-; nextln: }
-
-; A function that uses a lot of registers but doesn't quite need to spill.
-
-function %no_spill(i64, i64) {
-block0(v0: i64, v1: i64):
-    v2 = load.i32 v0+0
-    v3 = load.i32 v0+8
-    v4 = load.i32 v0+16
-    v5 = load.i32 v0+24
-    v6 = load.i32 v0+32
-    v7 = load.i32 v0+40
-    v8 = load.i32 v0+48
-    v9 = load.i32 v0+56
-    v10 = load.i32 v0+64
-    v11 = load.i32 v0+72
-    v12 = load.i32 v0+80
-    v13 = load.i32 v0+88
-    v14 = load.i32 v0+96
-    store.i32 v2, v1+0
-    store.i32 v3, v1+8
-    store.i32 v4, v1+16
-    store.i32 v5, v1+24
-    store.i32 v6, v1+32
-    store.i32 v7, v1+40
-    store.i32 v8, v1+48
-    store.i32 v9, v1+56
-    store.i32 v10, v1+64
-    store.i32 v11, v1+72
-    store.i32 v12, v1+80
-    store.i32 v13, v1+88
-    store.i32 v14, v1+96
-    return
-}
-
-; check: function %no_spill(i64 [%rdi], i64 [%rsi], i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15]) -> i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15] fast {
-; nextln:     ss0 = incoming_arg 56, offset -56
-; nextln: 
-; nextln: block0(v0: i64 [%rdi], v1: i64 [%rsi], v15: i64 [%rbp], v16: i64 [%rbx], v17: i64 [%r12], v18: i64 [%r13], v19: i64 [%r14], v20: i64 [%r15]):
-; nextln:     x86_push v15
-; nextln:     copy_special %rsp -> %rbp
-; nextln:     x86_push v16
-; nextln:     x86_push v17
-; nextln:     x86_push v18
-; nextln:     x86_push v19
-; nextln:     x86_push v20
-; nextln:     v2 = load.i32 v0
-; nextln:     v3 = load.i32 v0+8
-; nextln:     v4 = load.i32 v0+16
-; nextln:     v5 = load.i32 v0+24
-; nextln:     v6 = load.i32 v0+32
-; nextln:     v7 = load.i32 v0+40
-; nextln:     v8 = load.i32 v0+48
-; nextln:     v9 = load.i32 v0+56
-; nextln:     v10 = load.i32 v0+64
-; nextln:     v11 = load.i32 v0+72
-; nextln:     v12 = load.i32 v0+80
-; nextln:     v13 = load.i32 v0+88
-; nextln:     v14 = load.i32 v0+96
-; nextln:     store v2, v1
-; nextln:     store v3, v1+8
-; nextln:     store v4, v1+16
-; nextln:     store v5, v1+24
-; nextln:     store v6, v1+32
-; nextln:     store v7, v1+40
-; nextln:     store v8, v1+48
-; nextln:     store v9, v1+56
-; nextln:     store v10, v1+64
-; nextln:     store v11, v1+72
-; nextln:     store v12, v1+80
-; nextln:     store v13, v1+88
-; nextln:     store v14, v1+96
-; nextln:     v26 = x86_pop.i64
-; nextln:     v25 = x86_pop.i64
-; nextln:     v24 = x86_pop.i64
-; nextln:     v23 = x86_pop.i64
-; nextln:     v22 = x86_pop.i64
-; nextln:     v21 = x86_pop.i64
-; nextln:     return v21, v22, v23, v24, v25, v26
-; nextln: }
-
-; This function requires too many registers and must spill.
-
-function %yes_spill(i64, i64) {
-block0(v0: i64, v1: i64):
-    v2 = load.i32 v0+0
-    v3 = load.i32 v0+8
-    v4 = load.i32 v0+16
-    v5 = load.i32 v0+24
-    v6 = load.i32 v0+32
-    v7 = load.i32 v0+40
-    v8 = load.i32 v0+48
-    v9 = load.i32 v0+56
-    v10 = load.i32 v0+64
-    v11 = load.i32 v0+72
-    v12 = load.i32 v0+80
-    v13 = load.i32 v0+88
-    v14 = load.i32 v0+96
-    v15 = load.i32 v0+104
-    store.i32 v2, v1+0
-    store.i32 v3, v1+8
-    store.i32 v4, v1+16
-    store.i32 v5, v1+24
-    store.i32 v6, v1+32
-    store.i32 v7, v1+40
-    store.i32 v8, v1+48
-    store.i32 v9, v1+56
-    store.i32 v10, v1+64
-    store.i32 v11, v1+72
-    store.i32 v12, v1+80
-    store.i32 v13, v1+88
-    store.i32 v14, v1+96
-    store.i32 v15, v1+104
-    return
-}
-
-; check: function %yes_spill(i64 [%rdi], i64 [%rsi], i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15]) -> i64 fp [%rbp], i64 csr [%rbx], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15] fast {
-; check:     ss0 = spill_slot
-
-; check: block0(v16: i64 [%rdi], v17: i64 [%rsi], v48: i64 [%rbp], v49: i64 [%rbx], v50: i64 [%r12], v51: i64 [%r13], v52: i64 [%r14], v53: i64 [%r15]):
-; nextln:     x86_push v48
-; nextln:     copy_special %rsp -> %rbp
-; nextln:     x86_push v49
-; nextln:     x86_push v50
-; nextln:     x86_push v51
-; nextln:     x86_push v52
-; nextln:     x86_push v53
-; nextln:     adjust_sp_down_imm
-
-; check:      spill
-
-; check:      fill
-
-; check:     adjust_sp_up_imm
-; nextln:     v59 = x86_pop.i64
-; nextln:     v58 = x86_pop.i64
-; nextln:     v57 = x86_pop.i64
-; nextln:     v56 = x86_pop.i64
-; nextln:     v55 = x86_pop.i64
-; nextln:     v54 = x86_pop.i64
-; nextln:     return v54, v55, v56, v57, v58, v59
-; nextln: }
-
-; A function which uses diverted registers.
-
-function %divert(i32) -> i32 system_v {
-block0(v0: i32):
-    v2 = iconst.i32 0
-    v3 = iconst.i32 1
-    jump block1(v0, v3, v2)
-
-block1(v4: i32, v5: i32, v6: i32):
-    brz v4, block3
-    jump block2
-
-block2:
-    v7 = iadd v5, v6
-    v8 = iadd_imm v4, -1
-    jump block1(v8, v7, v5)
-
-block3:
-    return v5
-}
-
-; check: function %divert
-; check: regmove.i32 v5, %rcx -> %rbx
-; check: [Op1popq#58,%rbx]                   v15 = x86_pop.i64
-
-; Stack limit checking
-
-function %stack_limit(i64 stack_limit) {
-    ss0 = explicit_slot 168
-block0(v0: i64):
-    return
-}
-
-; check: function %stack_limit(i64 stack_limit [%rdi], i64 fp [%rbp]) -> i64 fp [%rbp] fast {
-; nextln:     ss0 = explicit_slot 168, offset -184
-; nextln:     ss1 = incoming_arg 16, offset -16
-; nextln: 
-; nextln: block0(v0: i64 [%rdi], v4: i64 [%rbp]):
-; nextln:     v1 = copy v0
-; nextln:     v2 = iadd_imm v1, 176
-; nextln:     v3 = ifcmp_sp v2
-; nextln:     trapif uge v3, stk_ovf
-; nextln:     x86_push v4
-; nextln:     copy_special %rsp -> %rbp
-; nextln:     adjust_sp_down_imm 176
-; nextln:     adjust_sp_up_imm 176
-; nextln:     v5 = x86_pop.i64
-; nextln:     return v5
-; nextln: }
-
-function %big_stack_limit(i64 stack_limit) {
-    ss0 = explicit_slot 40000
-block0(v0: i64):
-    return
-}
-
-; check: function %big_stack_limit(i64 stack_limit [%rdi], i64 fp [%rbp]) -> i64 fp [%rbp] fast {
-; nextln:     ss0 = explicit_slot 40000, offset -40016
-; nextln:     ss1 = incoming_arg 16, offset -16
-; nextln: 
-; nextln: block0(v0: i64 [%rdi], v5: i64 [%rbp]):
-; nextln:     v1 = copy v0
-; nextln:     v2 = ifcmp_sp v1
-; nextln:     trapif uge v2, stk_ovf
-; nextln:     v3 = iadd_imm v1, 0x9c40
-; nextln:     v4 = ifcmp_sp v3
-; nextln:     trapif uge v4, stk_ovf
-; nextln:     x86_push v5
-; nextln:     copy_special %rsp -> %rbp
-; nextln:     adjust_sp_down_imm 0x9c40
-; nextln:     adjust_sp_up_imm 0x9c40
-; nextln:     v6 = x86_pop.i64
-; nextln:     return v6
-; nextln: }
-
-function %limit_preamble(i64 vmctx) {
-    gv0 = vmctx
-    gv1 = load.i64 notrap aligned gv0
-    gv2 = load.i64 notrap aligned gv1+4
-    stack_limit = gv2
-    ss0 = explicit_slot 20
-block0(v0: i64):
-    return
-}
-
-; check: function %limit_preamble(i64 vmctx [%rdi], i64 fp [%rbp]) -> i64 fp [%rbp] fast {
-; nextln:     ss0 = explicit_slot 20, offset -36
-; nextln:     ss1 = incoming_arg 16, offset -16
-; nextln:     gv0 = vmctx
-; nextln:     gv1 = load.i64 notrap aligned gv0
-; nextln:     gv2 = load.i64 notrap aligned gv1+4
-; nextln:     stack_limit = gv2
-; nextln: 
-; nextln: block0(v0: i64 [%rdi], v5: i64 [%rbp]):
-; nextln:     v1 = load.i64 notrap aligned v0
-; nextln:     v2 = load.i64 notrap aligned v1+4
-; nextln:     v3 = iadd_imm v2, 32
-; nextln:     v4 = ifcmp_sp v3
-; nextln:     trapif uge v4, stk_ovf
-; nextln:     x86_push v5
-; nextln:     copy_special %rsp -> %rbp
-; nextln:     adjust_sp_down_imm 32
-; nextln:     adjust_sp_up_imm 32
-; nextln:     v6 = x86_pop.i64
-; nextln:     return v6
-; nextln: }
diff --git a/cranelift/filetests/filetests/isa/x86/relax_branch.clif b/cranelift/filetests/filetests/isa/x86/relax_branch.clif
deleted file mode 100644
index 0e123f8a36..0000000000
--- a/cranelift/filetests/filetests/isa/x86/relax_branch.clif
+++ /dev/null
@@ -1,132 +0,0 @@
-test binemit
-set opt_level=speed_and_size
-set avoid_div_traps
-set baldrdash_prologue_words=3
-set emit_all_ones_funcaddrs
-set enable_probestack=false
-target x86_64 legacy haswell
-
-; This checks that a branch that is too far away is getting relaxed. In
-; particular, the first block has to be non-empty but its encoding size must be
-; zero (i.e. not generate any code). See also issue #666 for more details.
-
-function u0:2691(i32 [%rdi], i32 [%rsi], i64 vmctx [%r14]) -> i64 uext [%rax] baldrdash_system_v {
-    ss0 = incoming_arg 24, offset -24
-    gv0 = vmctx
-    gv1 = iadd_imm.i64 gv0, 48
-    gv2 = load.i64 notrap aligned readonly gv0
-    heap0 = static gv2, min 0xd839_6000, bound 0x0001_0000_0000, offset_guard 0x8000_0000, index_type i32
-
-                                block0(v0: i32 [%rdi], v1: i32 [%rsi], v2: i64 [%r14]):
-@0005 [-]                           fallthrough block3(v0, v1)
-
-                                block3(v8: i32 [%rdi], v19: i32 [%rsi]):
-@0005 [RexOp1ldDisp8#808b,%rax]        v7 = load.i64 v2+48
-@0005 [RexOp1rcmp_ib#f083,%rflags]     v91 = ifcmp_imm v7, 0
-@0005 [trapif#00]                      trapif ne v91, interrupt
-[DynRexOp1umr#89,%rax]                 v105 = copy v8
-@000b [DynRexOp1r_ib#83,%rax]          v10 = iadd_imm v105, 1
-                                       v80 -> v10
-@0010 [Op1umr#89,%rcx]                 v92 = uextend.i64 v8
-@0010 [RexOp1ld#808b,%rdx]             v93 = load.i64 notrap aligned readonly v2
-                                       v95 -> v93
-@0010 [Op2ldWithIndex#4be,%rcx]        v12 = sload8_complex.i32 v93+v92
-[DynRexOp1umr#89,%rbx]                 v106 = copy v12
-@0017 [DynRexOp1r_ib#40c1,%rbx]        v14 = ishl_imm v106, 24
-@001a [DynRexOp1r_ib#70c1,%rbx]        v16 = sshr_imm v14, 24
-[DynRexOp1umr#89,%rdi]                 v107 = copy v16
-@001f [DynRexOp1r_ib#83,%rdi]          v18 = iadd_imm v107, 32
-[DynRexOp1umr#89,%r8]                  v108 = copy v19
-@0026 [DynRexOp1r_ib#83,%r8]           v21 = iadd_imm v108, 1
-                                       v82 -> v21
-@002b [Op1umr#89,%rsi]                 v94 = uextend.i64 v19
-@002b [Op2ldWithIndex#4be,%rdx]        v23 = sload8_complex.i32 v93+v94
-                                       v55 -> v23
-[DynRexOp1umr#89,%rsi]                 v109 = copy v23
-@0032 [DynRexOp1r_ib#40c1,%rsi]        v25 = ishl_imm v109, 24
-@0035 [DynRexOp1r_ib#70c1,%rsi]        v27 = sshr_imm v25, 24
-                                       v69 -> v27
-[DynRexOp1umr#89,%r9]                  v110 = copy v27
-@003a [DynRexOp1r_ib#83,%r9]           v29 = iadd_imm v110, 32
-                                       v68 -> v29
-@0042 [DynRexOp1r_ib#83,%rcx]          v31 = iadd_imm v12, -65
-@0045 [DynRexOp1r_ib#40c1,%rcx]        v33 = ishl_imm v31, 24
-@0048 [DynRexOp1r_ib#70c1,%rcx]        v35 = sshr_imm v33, 24
-@004c [DynRexOp1r_id#4081,%rcx]        v37 = band_imm v35, 255
-[DynRexOp1rcmp_ib#7083,%rflags]        v97 = ifcmp_imm v37, 26
-@0050 [Op1brib#70]                     brif sge v97, block6
-@0050 [-]                              fallthrough block10
-
-                                block10:
-[DynRexOp1umr#89,%rcx]              v101 = copy v18
-@0054 [Op1jmpb#eb]                  jump block5(v18, v101)
-
-                                block6:
-[DynRexOp1umr#89,%rcx]              v102 = copy.i32 v16
-@0059 [RexOp1rmov#89]               regmove v102, %rcx -> %rdi
-@0059 [RexOp1rmov#89]               regmove.i32 v16, %rbx -> %rcx
-@0059 [-]                           fallthrough block5(v102, v16)
-
-                                block5(v41: i32 [%rdi], v84: i32 [%rcx]):
-                                    v83 -> v84
-@005d [DynRexOp1r_id#4081,%rdi]     v43 = band_imm v41, 255
-@0062 [DynRexOp1r_ib#40c1,%rdi]     v45 = ishl_imm v43, 24
-                                    v52 -> v45
-@0065 [RexOp1rmov#89]               regmove v45, %rdi -> %rbx
-@0065 [DynRexOp1r_ib#70c1,%rbx]     v47 = sshr_imm v45, 24
-                                    v54 -> v47
-@0068 [RexOp1rmov#89]               regmove v47, %rbx -> %rdi
-@0068 [DynRexOp1icscc_ib#7083,%rbx] v49 = icmp_imm ne v47, 0
-@0068 [RexOp2urm_noflags#4b6,%r10]  v50 = bint.i32 v49
-@0076 [DynRexOp1r_ib#83,%rdx]       v57 = iadd_imm.i32 v23, -65
-@0079 [DynRexOp1r_ib#40c1,%rdx]     v59 = ishl_imm v57, 24
-@007c [DynRexOp1r_ib#70c1,%rdx]     v61 = sshr_imm v59, 24
-@0080 [DynRexOp1r_id#4081,%rdx]     v63 = band_imm v61, 255
-[DynRexOp1rcmp_ib#7083,%rflags]     v98 = ifcmp_imm v63, 26
-@0084 [RexOp1rmov#89]               regmove v47, %rdi -> %rbx
-@0084 [Op1brib#70]                  brif sge v98, block8
-@0084 [-]                           fallthrough block11
-
-                                block11:
-[DynRexOp1umr#89,%rdx]              v103 = copy.i32 v29
-@0088 [Op1jmpb#eb]                  jump block7(v29, v10, v21, v103)
-
-                                block8:
-[DynRexOp1umr#89,%rdx]              v104 = copy.i32 v27
-@008d [RexOp1rmov#89]               regmove v104, %rdx -> %r9
-@008d [RexOp1rmov#89]               regmove.i32 v27, %rsi -> %rdx
-@008d [-]                           fallthrough block7(v104, v10, v21, v27)
-
-                                block7(v67: i32 [%r9], v79: i32 [%rax], v81: i32 [%r8], v87: i32 [%rdx]):
-@0091 [DynRexOp1r_id#4081,%r9]           v71 = band_imm v67, 255
-@0094 [DynRexOp1r_ib#40c1,%r9]           v73 = ishl_imm v71, 24
-@0097 [DynRexOp1r_ib#70c1,%r9]           v75 = sshr_imm v73, 24
-@0098 [DynRexOp1icscc#39,%rbx]           v76 = icmp.i32 eq v47, v75
-@0098 [Op2urm_noflags_abcd#4b6,%rbx]     v77 = bint.i32 v76
-@0099 [DynRexOp1rr#21,%r10]              v78 = band.i32 v50, v77
-@009a [RexOp1tjccb#74]                   brz v78, block9
-@009a [-]                                fallthrough block12
-
-                                block12:
-[DynRexOp1umr#89,%rcx]              v99 = copy v81
-[DynRexOp1umr#89,%rdx]              v100 = copy v79
-@00a4 [RexOp1rmov#89]               regmove v100, %rdx -> %rdi
-@00a4 [RexOp1rmov#89]               regmove v99, %rcx -> %rsi
-@00a4 [Op1jmpd#e9]                  jump block3(v100, v99); bin: e9 ffffff2d
-
-                                block9:
-@00a7 [-]                           fallthrough block4
-
-                                block4:
-@00ad [DynRexOp1r_id#4081,%rcx]     v86 = band_imm.i32 v84, 255
-@00b3 [DynRexOp1r_id#4081,%rdx]     v89 = band_imm.i32 v87, 255
-@00b4 [DynRexOp1rr#29,%rcx]         v90 = isub v86, v89
-@00b5 [-]                           fallthrough block2(v90)
-
-                                block2(v5: i32 [%rcx]):
-@00b6 [-]                           fallthrough block1(v5)
-
-                                block1(v3: i32 [%rcx]):
-@00b6 [Op1umr#89,%rax]              v96 = uextend.i64 v3
-@00b6 [-]                           fallthrough_return v96
-}
diff --git a/cranelift/filetests/filetests/isa/x86/saturating-float-cast.clif b/cranelift/filetests/filetests/isa/x86/saturating-float-cast.clif
deleted file mode 100644
index a26e2d865c..0000000000
--- a/cranelift/filetests/filetests/isa/x86/saturating-float-cast.clif
+++ /dev/null
@@ -1,13 +0,0 @@
-test compile
-target x86_64 legacy
-
-function u0:0() -> f32 system_v {
-block0:
-    v0 = iconst.i8 255
-; check: v2 = iconst.i32 255
-; nextln: v0 = ireduce.i8 v2
-    v1 = fcvt_from_uint.f32 v0
-; nextln: v3 = uextend.i64 v0
-; nextln: v1 = fcvt_from_sint.f32 v3
-    return v1
-}
diff --git a/cranelift/filetests/filetests/isa/x86/select-i8.clif b/cranelift/filetests/filetests/isa/x86/select-i8.clif
deleted file mode 100644
index 44b7e32d12..0000000000
--- a/cranelift/filetests/filetests/isa/x86/select-i8.clif
+++ /dev/null
@@ -1,8 +0,0 @@
-test compile
-target x86_64 legacy
-
-function u0:0(b1, i8, i8) -> i8 {
-block0(v0: b1, v1: i8, v2: i8):
-    v3 = select v0, v1, v2
-    return v3
-}
diff --git a/cranelift/filetests/filetests/isa/x86/shrink-multiple-uses.clif b/cranelift/filetests/filetests/isa/x86/shrink-multiple-uses.clif
deleted file mode 100644
index 31b73da391..0000000000
--- a/cranelift/filetests/filetests/isa/x86/shrink-multiple-uses.clif
+++ /dev/null
@@ -1,18 +0,0 @@
-test shrink
-set opt_level=speed_and_size
-target x86_64 legacy
-
-function %test_multiple_uses(i32 [%rdi]) -> i32 {
-block0(v0: i32 [%rdi]):
-[DynRexOp1rcmp_ib#7083,%rflags]     v3 = ifcmp_imm v0, 0
-[Op2seti_abcd#490,%rax]             v1 = trueif eq v3
-[RexOp2urm_noflags#4b6,%rax]        v2 = bint.i32 v1
-[Op1brib#70]                        brif eq v3, block1
-[Op1jmpb#eb]                        jump block2
-
-block2:
-[Op1ret#c3]                         return v2
-
-block1:
-[Op2trap#40b]                       trap user0
-}
diff --git a/cranelift/filetests/filetests/isa/x86/shrink.clif b/cranelift/filetests/filetests/isa/x86/shrink.clif
deleted file mode 100644
index bb787832c9..0000000000
--- a/cranelift/filetests/filetests/isa/x86/shrink.clif
+++ /dev/null
@@ -1,40 +0,0 @@
-test binemit
-set opt_level=speed_and_size
-target x86_64 legacy
-
-; Test that instruction shrinking eliminates REX prefixes when possible.
-
-; The binary encodings can be verified with the command:
-;
-;   sed -ne 's/^ *; asm: *//p' filetests/isa/x86/shrink.clif | llvm-mc -show-encoding -triple=x86_64
-;
-
-function %test_shrinking(i32) -> i32 {
-block0(v0: i32 [ %rdi ]):
-           ; asm: movl $0x2,%eax
-[-,%rcx]   v1 = iconst.i32 2     ; bin: b9 00000002
-           ; asm: subl %ecx,%edi
-[-,%rdi]   v2 = isub v0, v1      ; bin: 29 cf
-           return v2
-}
-
-function %test_not_shrinking(i32) -> i32 {
-block0(v0: i32 [ %r8 ]):
-           ; asm: movl $0x2,%eax
-[-,%rcx]   v1 = iconst.i32 2     ; bin: b9 00000002
-           ; asm: subl %ecx,%edi
-[-,%r8]    v2 = isub v0, v1      ; bin: 41 29 c8
-           return v2
-}
-
-function %test_not_shrinking_i8() {
-block0:
-[-,%rsi]   v1 = iconst.i8 1
-           ; asm: movsbl %sil,%esi
-[-,%rsi]   v2 = sextend.i32 v1   ; bin: 40 0f be f6
-           ; asm: movzbl %sil,%esi
-[-,%rsi]   v3 = uextend.i32 v1   ; bin: 40 0f b6 f6
-           ; asm: movzbl %sil,%esi
-[-,%rsi]   v4 = uextend.i64 v1   ; bin: 40 0f b6 f6
-           trap user0
-}
diff --git a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-binemit.clif
deleted file mode 100644
index 0a8fbe7f0c..0000000000
--- a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-binemit.clif
+++ /dev/null
@@ -1,116 +0,0 @@
-test binemit
-set enable_simd
-target x86_64 legacy skylake
-
-function %arithmetic_i8x16(i8x16, i8x16) {
-block0(v0: i8x16 [%xmm6], v1: i8x16 [%xmm2]):
-[-, %xmm6]    v2 = iadd v0, v1        ; bin: 66 0f fc f2
-[-, %xmm6]    v3 = isub v0, v1        ; bin: 66 0f f8 f2
-[-, %xmm6]    v4 = sadd_sat v0, v1    ; bin: 66 0f ec f2
-[-, %xmm6]    v5 = ssub_sat v0, v1    ; bin: 66 0f e8 f2
-[-, %xmm6]    v6 = usub_sat v0, v1    ; bin: 66 0f d8 f2
-[-, %xmm6]    v7 = avg_round v0, v1   ; bin: 66 0f e0 f2
-[-, %xmm6]    v9 = iabs v1            ; bin: 66 0f 38 1c f2
-
-    return
-}
-
-function %arithmetic_i16x8(i16x8, i16x8) {
-block0(v0: i16x8 [%xmm3], v1: i16x8 [%xmm5]):
-[-, %xmm3]    v2 = iadd v0, v1        ; bin: 66 0f fd dd
-[-, %xmm3]    v3 = isub v0, v1        ; bin: 66 0f f9 dd
-[-, %xmm3]    v4 = imul v0, v1        ; bin: 66 0f d5 dd
-[-, %xmm3]    v5 = uadd_sat v0, v1    ; bin: 66 0f dd dd
-[-, %xmm3]    v6 = ssub_sat v0, v1    ; bin: 66 0f e9 dd
-[-, %xmm3]    v7 = usub_sat v0, v1    ; bin: 66 0f d9 dd
-[-, %xmm3]    v8 = avg_round v0, v1   ; bin: 66 0f e3 dd
-[-, %xmm3]    v9 = iabs v1            ; bin: 66 0f 38 1d dd
-
-    return
-}
-
-function %arithmetic_i32x4(i32x4, i32x4) {
-block0(v0: i32x4 [%xmm0], v1: i32x4 [%xmm1]):
-[-, %xmm0]    v2 = iadd v0, v1        ; bin: 66 0f fe c1
-[-, %xmm0]    v3 = isub v0, v1        ; bin: 66 0f fa c1
-[-, %xmm0]    v4 = imul v0, v1        ; bin: 66 0f 38 40 c1
-[-, %xmm0]    v5 = iabs v1            ; bin: 66 0f 38 1e c1
-
-    return
-}
-
-function %arithmetic_i64x2(i64x2, i64x2) {
-block0(v0: i64x2 [%xmm0], v1: i64x2 [%xmm2]):
-[-, %xmm0]    v2 = iadd v0, v1        ; bin: 66 0f d4 c2
-[-, %xmm0]    v3 = isub v0, v1        ; bin: 66 0f fb c2
-
-    return
-}
-
-function %arithmetic_i64x2_rex(i64x2, i64x2) {
-block0(v0: i64x2 [%xmm8], v1: i64x2 [%xmm10]):
-[-, %xmm8]    v2 = iadd v0, v1        ; bin: 66 45 0f d4 c2
-[-, %xmm8]    v3 = isub v0, v1        ; bin: 66 45 0f fb c2
-
-    return
-}
-
-function %arithmetic_f32x4(f32x4, f32x4) {
-block0(v0: f32x4 [%xmm3], v1: f32x4 [%xmm5]):
-[-, %xmm3]    v2 = fadd v0, v1      ; bin: 0f 58 dd
-[-, %xmm3]    v3 = fsub v0, v1      ; bin: 0f 5c dd
-[-, %xmm3]    v4 = fmul v0, v1      ; bin: 0f 59 dd
-[-, %xmm3]    v5 = fdiv v0, v1      ; bin: 0f 5e dd
-[-, %xmm3]    v6 = x86_fmin v0, v1  ; bin: 0f 5d dd
-[-, %xmm3]    v7 = x86_fmax v0, v1  ; bin: 0f 5f dd
-[-, %xmm3]    v8 = sqrt v0          ; bin: 0f 51 db
-    return
-}
-
-function %arithmetic_f32x4_rex(f32x4, f32x4) {
-block0(v0: f32x4 [%xmm3], v1: f32x4 [%xmm10]):
-[-, %xmm3]    v2 = fadd v0, v1      ; bin: 41 0f 58 da
-[-, %xmm3]    v3 = fsub v0, v1      ; bin: 41 0f 5c da
-[-, %xmm3]    v4 = fmul v0, v1      ; bin: 41 0f 59 da
-[-, %xmm3]    v5 = fdiv v0, v1      ; bin: 41 0f 5e da
-[-, %xmm3]    v6 = x86_fmin v0, v1  ; bin: 41 0f 5d da
-[-, %xmm3]    v7 = x86_fmax v0, v1  ; bin: 41 0f 5f da
-[-, %xmm3]    v8 = sqrt v1          ; bin: 41 0f 51 da
-    return
-}
-
-function %arithmetic_f64x2(f64x2, f64x2) {
-block0(v0: f64x2 [%xmm3], v1: f64x2 [%xmm5]):
-[-, %xmm3]    v2 = fadd v0, v1      ; bin: 66 0f 58 dd
-[-, %xmm3]    v3 = fsub v0, v1      ; bin: 66 0f 5c dd
-[-, %xmm3]    v4 = fmul v0, v1      ; bin: 66 0f 59 dd
-[-, %xmm3]    v5 = fdiv v0, v1      ; bin: 66 0f 5e dd
-[-, %xmm3]    v6 = x86_fmin v0, v1  ; bin: 66 0f 5d dd
-[-, %xmm3]    v7 = x86_fmax v0, v1  ; bin: 66 0f 5f dd
-[-, %xmm3]    v8 = sqrt v0          ; bin: 66 0f 51 db
-    return
-}
-
-function %arithmetic_f64x2_rex(f64x2, f64x2) {
-block0(v0: f64x2 [%xmm11], v1: f64x2 [%xmm13]):
-[-, %xmm11]    v2 = fadd v0, v1      ; bin: 66 45 0f 58 dd
-[-, %xmm11]    v3 = fsub v0, v1      ; bin: 66 45 0f 5c dd
-[-, %xmm11]    v4 = fmul v0, v1      ; bin: 66 45 0f 59 dd
-[-, %xmm11]    v5 = fdiv v0, v1      ; bin: 66 45 0f 5e dd
-[-, %xmm11]    v6 = x86_fmin v0, v1  ; bin: 66 45 0f 5d dd
-[-, %xmm11]    v7 = x86_fmax v0, v1  ; bin: 66 45 0f 5f dd
-[-, %xmm11]    v8 = sqrt v0          ; bin: 66 45 0f 51 db
-    return
-}
-
-function %pmuludq(i64x2, i64x2) -> i64x2 {
-block0(v0: i64x2 [%xmm3], v1: i64x2 [%xmm5]):
-[-, %xmm3]    v2 = x86_pmuludq v0, v1      ; bin: 66 0f f4 dd
-    return v2
-}
-
-function %pmaddwd(i16x8, i16x8) -> i32x4 {
-block0(v0: i16x8 [%xmm8], v1: i16x8 [%xmm9]):
-[-, %xmm8]    v2 = widening_pairwise_dot_product_s v0, v1      ; bin: 66 45 0f f5 c1
-    return v2
-}
diff --git a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-legalize.clif
deleted file mode 100644
index 74bc68ee67..0000000000
--- a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-legalize.clif
+++ /dev/null
@@ -1,117 +0,0 @@
-test legalizer
-set enable_simd
-target x86_64 legacy skylake
-
-function %ineg_i32x4() -> b1 {
-; check:  const0 = 0x00000001000000010000000100000001
-; nextln: const1 = 0x00000000000000000000000000000000
-block0:
-    v0 = vconst.i32x4 [1 1 1 1]
-    v2 = ineg v0
-    ; check: v5 = vconst.i32x4 const1
-    ; nextln: v2 = isub v5, v0
-
-    v3 = extractlane v2, 0
-    v4 = icmp_imm eq v3, -1
-
-    return v4
-}
-
-function %ineg_legalized() {
-; check: const0 = 0x00000000000000000000000000000000
-block0:
-    v0 = vconst.i8x16 0x00
-    v1 = ineg v0
-    ; check: v6 = vconst.i8x16 const0
-    ; nextln: v1 = isub v6, v0
-
-    v2 = raw_bitcast.i16x8 v0
-    v3 = ineg v2
-    ; check: v7 = vconst.i16x8 const0
-    ; nextln: v3 = isub v7, v2
-
-    v4 = raw_bitcast.i64x2 v0
-    v5 = ineg v4
-    ; check: v8 = vconst.i64x2 const0
-    ; nextln: v5 = isub v8, v4
-
-    return
-}
-
-function %fneg_legalized() {
-; check: const2 = 0xffffffffffffffffffffffffffffffff
-block0:
-    v0 = vconst.f32x4 [0x1.0 0x2.0 0x3.0 0x4.0]
-    v1 = fneg v0
-    ; check: v4 = vconst.i32x4 const2
-    ; nextln: v5 = ishl_imm v4, 31
-    ; nextln: v6 = raw_bitcast.f32x4 v5
-    ; nextln: v1 = bxor v0, v6
-
-    v2 = vconst.f64x2 [0x1.0 0x2.0]
-    v3 = fneg v2
-    ; check: v7 = vconst.i64x2 const2
-    ; nextln: v8 = ishl_imm v7, 63
-    ; nextln: v9 = raw_bitcast.f64x2 v8
-    ; nextln: v3 = bxor v2, v9
-
-    return
-}
-
-function %fabs_legalized() {
-; check: const1 = 0xffffffffffffffffffffffffffffffff
-block0:
-    v0 = vconst.f64x2 [0x1.0 -0x2.0]
-    v1 = fabs v0
-    ; check: v2 = vconst.i64x2 const1
-    ; nextln: v3 = ushr_imm v2, 1
-    ; nextln: v4 = raw_bitcast.f64x2 v3
-    ; nextln: v1 = band v0, v4
-    return
-}
-
-function %imul_i64x2(i64x2, i64x2) {
-block0(v0:i64x2, v1:i64x2):
-    v2 = imul v0, v1
-    ; check: v3 = ushr_imm v0, 32
-    ; nextln: v4 = x86_pmuludq v3, v1
-    ; nextln: v5 = ushr_imm v1, 32
-    ; nextln: v6 = x86_pmuludq v5, v0
-    ; nextln: v7 = iadd v4, v6
-    ; nextln: v8 = ishl_imm v7, 32
-    ; nextln: v9 = x86_pmuludq v0, v1
-    ; nextln: v2 = iadd v9, v8
-    return
-}
-
-function %fmin_f32x4(f32x4, f32x4) {
-block0(v0:f32x4, v1:f32x4):
-    v2 = fmin v0, v1
-    ; check: v3 = x86_fmin v0, v1
-    ; nextln: v4 = x86_fmin v1, v0
-    ; nextln: v5 = bor v4, v3
-    ; nextln: v6 = fcmp uno v3, v5
-    ; nextln: v7 = raw_bitcast.f32x4 v6
-    ; nextln: v8 = bor v5, v7
-    ; nextln: v9 = raw_bitcast.i32x4 v7
-    ; nextln: v10 = ushr_imm v9, 10
-    ; nextln: v11 = raw_bitcast.f32x4 v10
-    ; nextln: v2 = band_not v8, v11
-    return
-}
-
-function %fmax_f64x2(f64x2, f64x2) {
-block0(v0:f64x2, v1:f64x2):
-    v2 = fmax v0, v1
-    ; check: v3 = x86_fmax v0, v1
-    ; nextln: v4 = x86_fmax v1, v0
-    ; nextln: v5 = bxor v3, v4
-    ; nextln: v6 = bor v4, v5
-    ; nextln: v7 = fsub v6, v5
-    ; nextln: v8 = fcmp uno v5, v7
-    ; nextln: v9 = raw_bitcast.i64x2 v8
-    ; nextln: v10 = ushr_imm v9, 13
-    ; nextln: v11 = raw_bitcast.f64x2 v10
-    ; nextln: v2 = band_not v7, v11
-    return
-}
diff --git a/cranelift/filetests/filetests/isa/x86/simd-avx512-arithmetic-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-avx512-arithmetic-binemit.clif
deleted file mode 100644
index 0daf064713..0000000000
--- a/cranelift/filetests/filetests/isa/x86/simd-avx512-arithmetic-binemit.clif
+++ /dev/null
@@ -1,17 +0,0 @@
-test binemit
-set enable_simd
-target x86_64 legacy skylake has_avx512dq=true
-
-function %imul_i64x2() {
-block0:
-    [-, %xmm1]    v0 = vconst.i64x2 [1 2]
-    [-, %xmm2]    v1 = vconst.i64x2 [2 2]
-    [-, %xmm14]   v2 = x86_pmullq v0, v1 ; bin: 62 72 f5 08 40 f2
-    ; 62, mandatory EVEX prefix
-    ; 72 = 0111 0010, R is set (MSB in %xmm14) while X, B, and R' are unset (note these are all inverted); mm is set to 0F38
-    ; f5 = 1111 0101, W is set (64-bit op), vvvv set to 1 (inverted), bit 2 always set, pp set to 01
-    ; 08 = 0000 1000, everything, LL' indicates 128-bit, V' is unset (inverted, %xmm1 has MSB of 0)
-    ; 40, opcode (correct)
-    ; f2 = 1111 0010, ModR/M byte using 0b110 from %xmm14 in reg and 0b010 from %xmm2 in r/m
-    return
-}
diff --git a/cranelift/filetests/filetests/isa/x86/simd-avx512-arithmetic-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-avx512-arithmetic-legalize.clif
deleted file mode 100644
index 294902d45b..0000000000
--- a/cranelift/filetests/filetests/isa/x86/simd-avx512-arithmetic-legalize.clif
+++ /dev/null
@@ -1,10 +0,0 @@
-test legalizer
-set enable_simd
-target x86_64 legacy skylake has_avx512dq=true
-
-function %imul_i64x2(i64x2, i64x2) {
-block0(v0:i64x2, v1:i64x2):
-    v2 = imul v0, v1
-    ; check: v2 = x86_pmullq v0, v1
-    return
-}
diff --git a/cranelift/filetests/filetests/isa/x86/simd-avx512-conversion-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-avx512-conversion-binemit.clif
deleted file mode 100644
index 6f235e6b3b..0000000000
--- a/cranelift/filetests/filetests/isa/x86/simd-avx512-conversion-binemit.clif
+++ /dev/null
@@ -1,9 +0,0 @@
-test binemit
-set enable_simd
-target x86_64 legacy has_avx512vl=true
-
-function %fcvt_from_uint(i32x4) {
-block0(v0: i32x4 [%xmm2]):
-[-, %xmm6]  v1 = x86_vcvtudq2ps v0 ; bin: 62 f1 7f 08 7a f2
-    return
-}
diff --git a/cranelift/filetests/filetests/isa/x86/simd-avx512-conversion-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-avx512-conversion-legalize.clif
deleted file mode 100644
index cdadd3254d..0000000000
--- a/cranelift/filetests/filetests/isa/x86/simd-avx512-conversion-legalize.clif
+++ /dev/null
@@ -1,10 +0,0 @@
-test legalizer
-set enable_simd
-target x86_64 legacy skylake has_avx512f=true
-
-function %fcvt_from_uint(i32x4) -> f32x4 {
-block0(v0:i32x4):
-    v1 = fcvt_from_uint.f32x4 v0
-    ; check: v1 = x86_vcvtudq2ps v0
-    return v1
-}
diff --git a/cranelift/filetests/filetests/isa/x86/simd-bitwise-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-bitwise-binemit.clif
deleted file mode 100644
index 3131a8aa0c..0000000000
--- a/cranelift/filetests/filetests/isa/x86/simd-bitwise-binemit.clif
+++ /dev/null
@@ -1,99 +0,0 @@
-test binemit
-set enable_simd
-target x86_64 legacy skylake
-
-function %ishl_i16x8(i16x8, i64x2) -> i16x8 {
-block0(v0: i16x8 [%xmm2], v1: i64x2 [%xmm1]):
-[-, %xmm2]  v2 = x86_psll v0, v1     ; bin: 66 0f f1 d1
-            return v2
-}
-
-function %ishl_i32x4(i32x4, i64x2) -> i32x4 {
-block0(v0: i32x4 [%xmm4], v1: i64x2 [%xmm0]):
-[-, %xmm4]  v2 = x86_psll v0, v1      ; bin: 66 0f f2 e0
-            return v2
-}
-
-function %ishl_i64x2(i64x2, i64x2) -> i64x2 {
-block0(v0: i64x2 [%xmm6], v1: i64x2 [%xmm3]):
-[-, %xmm6]  v2 = x86_psll v0, v1      ; bin: 66 0f f3 f3
-            return v2
-}
-
-function %ushr_i16x8(i16x8, i64x2) -> i16x8 {
-block0(v0: i16x8 [%xmm2], v1: i64x2 [%xmm1]):
-[-, %xmm2]  v2 = x86_psrl v0, v1     ; bin: 66 0f d1 d1
-            return v2
-}
-
-function %ushr_i32x4(i32x4, i64x2) -> i32x4 {
-block0(v0: i32x4 [%xmm4], v1: i64x2 [%xmm0]):
-[-, %xmm4]  v2 = x86_psrl v0, v1      ; bin: 66 0f d2 e0
-            return v2
-}
-
-function %ushr_i64x2(i64x2, i64x2) -> i64x2 {
-block0(v0: i64x2 [%xmm6], v1: i64x2 [%xmm3]):
-[-, %xmm6]  v2 = x86_psrl v0, v1      ; bin: 66 0f d3 f3
-            return v2
-}
-
-function %sshr_i16x8(i16x8, i64x2) -> i16x8 {
-block0(v0: i16x8 [%xmm2], v1: i64x2 [%xmm1]):
-[-, %xmm2]  v2 = x86_psra v0, v1     ; bin: 66 0f e1 d1
-            return v2
-}
-
-function %sshr_i32x4(i32x4, i64x2) -> i32x4 {
-block0(v0: i32x4 [%xmm4], v1: i64x2 [%xmm0]):
-[-, %xmm4]  v2 = x86_psra v0, v1      ; bin: 66 0f e2 e0
-            return v2
-}
-
-function %ishl_imm_i16x8(i16x8) -> i16x8 {
-block0(v0: i16x8 [%xmm2]):
-[-, %xmm2]  v2 = ishl_imm v0, 3      ; bin: 66 0f 71 f2 03
-            return v2
-}
-
-function %ishl_imm_i32x4(i32x4) -> i32x4 {
-block0(v0: i32x4 [%xmm4]):
-[-, %xmm4]  v2 = ishl_imm v0, 10     ; bin: 66 0f 72 f4 0a
-            return v2
-}
-
-function %ishl_imm_i64x2(i64x2) -> i64x2 {
-block0(v0: i64x2 [%xmm6]):
-[-, %xmm6]  v2 = ishl_imm v0, 42     ; bin: 66 0f 73 f6 2a
-            return v2
-}
-
-function %ushr_imm_i16x8(i16x8) -> i16x8 {
-block0(v0: i16x8 [%xmm2]):
-[-, %xmm2]  v2 = ushr_imm v0, 3      ; bin: 66 0f 71 d2 03
-            return v2
-}
-
-function %ushr_imm_i32x4(i32x4) -> i32x4 {
-block0(v0: i32x4 [%xmm4]):
-[-, %xmm4]  v2 = ushr_imm v0, 10     ; bin: 66 0f 72 d4 0a
-            return v2
-}
-
-function %ushr_imm_i64x2(i64x2) -> i64x2 {
-block0(v0: i64x2 [%xmm6]):
-[-, %xmm6]  v2 = ushr_imm v0, 42     ; bin: 66 0f 73 d6 2a
-            return v2
-}
-
-function %sshr_imm_i16x8(i16x8) -> i16x8 {
-block0(v0: i16x8 [%xmm2]):
-[-, %xmm2]  v2 = sshr_imm v0, 3      ; bin: 66 0f 71 e2 03
-            return v2
-}
-
-function %sshr_imm_i32x4(i32x4) -> i32x4 {
-block0(v0: i32x4 [%xmm4]):
-[-, %xmm4]  v2 = sshr_imm v0, 10     ; bin: 66 0f 72 e4 0a
-            return v2
-}
diff --git a/cranelift/filetests/filetests/isa/x86/simd-bitwise-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-bitwise-legalize.clif
deleted file mode 100644
index 7674f83e01..0000000000
--- a/cranelift/filetests/filetests/isa/x86/simd-bitwise-legalize.clif
+++ /dev/null
@@ -1,111 +0,0 @@
-test legalizer
-set enable_simd
-target x86_64 legacy skylake
-
-function %ushr_i8x16() -> i8x16 {
-block0:
-    v0 = iconst.i32 1
-    v1 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
-    v2 = ushr v1, v0
-    ; check:  v3 = bitcast.i64x2 v0
-    ; nextln: v4 = raw_bitcast.i16x8 v1
-    ; nextln: v5 = x86_psrl v4, v3
-    ; nextln: v6 = raw_bitcast.i8x16 v5
-    ; nextln: v7 = const_addr.i64 const1
-    ; nextln: v8 = ishl_imm v0, 4
-    ; nextln: v9 = load_complex.i8x16 v7+v8
-    ; nextln: v2 = band v6, v9
-    return v2
-}
-
-function %sshr_i8x16() -> i8x16 {
-block0:
-    v0 = iconst.i32 1
-    v1 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
-    v2 = sshr v1, v0
-    ; check:  v3 = iadd_imm v0, 8
-    ; nextln: v4 = bitcast.i64x2 v3
-
-    ; nextln: v5 = x86_punpckl v1, v1
-    ; nextln: v6 = raw_bitcast.i16x8 v5
-    ; nextln: v7 = x86_psra v6, v4
-
-    ; nextln: v8 = x86_punpckh v1, v1
-    ; nextln: v9 = raw_bitcast.i16x8 v8
-    ; nextln: v10 = x86_psra v9, v4
-
-    ; nextln: v2 = snarrow v7, v10
-    return v2
-}
-
-function %ishl_i8x16() -> i8x16 {
-block0:
-    v0 = iconst.i32 1
-    v1 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
-    v2 = ishl v1, v0
-    ; check:  v3 = bitcast.i64x2 v0
-    ; nextln: v4 = raw_bitcast.i16x8 v1
-    ; nextln: v5 = x86_psll v4, v3
-    ; nextln: v6 = raw_bitcast.i8x16 v5
-    ; nextln: v7 = const_addr.i64 const1
-    ; nextln: v8 = ishl_imm v0, 4
-    ; nextln: v9 = load_complex.i8x16 v7+v8
-    ; nextln: v2 = band v6, v9
-    return v2
-}
-
-function %ishl_i32x4() -> i32x4 {
-block0:
-    v0 = iconst.i32 1
-    v1 = vconst.i32x4 [1 2 4 8]
-    v2 = ishl v1, v0
-    ; check: v3 = bitcast.i64x2 v0
-    ; nextln: v2 = x86_psll v1, v3
-    return v2
-}
-
-function %ushr_i64x2() -> i64x2 {
-block0:
-    v0 = iconst.i32 1
-    v1 = vconst.i64x2 [1 2]
-    v2 = ushr v1, v0
-    ; check: v3 = bitcast.i64x2 v0
-    ; nextln: v2 = x86_psrl v1, v3
-    return v2
-}
-
-function %sshr_i16x8() -> i16x8 {
-block0:
-    v0 = iconst.i32 1
-    v1 = vconst.i16x8 [1 2 4 8 16 32 64 128]
-    v2 = sshr v1, v0
-    ; check: v3 = bitcast.i64x2 v0
-    ; nextln: v2 = x86_psra v1, v3
-    return v2
-}
-
-function %sshr_i64x2() -> i64x2 {
-block0:
-    v0 = iconst.i32 1
-    v1 = vconst.i64x2 [1 2]
-    v2 = sshr v1, v0
-    ; check:  v3 = x86_pextr v1, 0
-    ; nextln: v4 = sshr v3, v0
-    ; nextln: v5 = x86_pinsr v1, v4, 0
-    ; nextln: v6 = x86_pextr v1, 1
-    ; nextln: v7 = sshr v6, v0
-    ; nextln: v2 = x86_pinsr v5, v7, 1
-    return v2
-}
-
-function %bitselect_i16x8() -> i16x8 {
-block0:
-    v0 = vconst.i16x8 [0 0 0 0 0 0 0 0]
-    v1 = vconst.i16x8 [0 0 0 0 0 0 0 0]
-    v2 = vconst.i16x8 [0 0 0 0 0 0 0 0]
-    v3 = bitselect v0, v1, v2
-    ; check: v4 = band v1, v0
-    ; nextln: v5 = band_not v2, v0
-    ; nextln: v3 = bor v4, v5
-    return v3
-}
diff --git a/cranelift/filetests/filetests/isa/x86/simd-comparison-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-comparison-binemit.clif
deleted file mode 100644
index 1d3db4a119..0000000000
--- a/cranelift/filetests/filetests/isa/x86/simd-comparison-binemit.clif
+++ /dev/null
@@ -1,138 +0,0 @@
-test binemit
-set enable_simd
-target x86_64 legacy skylake
-
-function %icmp_i8x16() {
-block0:
-[-, %xmm3]  v0 = vconst.i8x16 0x00                                  ; bin: 66 0f ef db
-[-, %xmm4]  v1 = vconst.i8x16 0xffffffffffffffffffffffffffffffff    ; bin: 66 0f 74 e4
-[-, %xmm3]  v2 = icmp eq v0, v1                                     ; bin: 66 0f 74 dc
-            return
-}
-
-function %icmp_i16x8_rex() {
-block0:
-[-, %xmm0]  v0 = vconst.i16x8 0x00
-[-, %xmm15] v1 = vconst.i16x8 0xffffffffffffffffffffffffffffffff
-[-, %xmm0]  v2 = icmp eq v0, v1                                     ; bin: 66 41 0f 75 c7
-            return
-}
-
-function %icmp_i32x4() {
-block0:
-[-, %xmm0]  v0 = vconst.i32x4 0x00
-[-, %xmm4]  v1 = vconst.i32x4 0xffffffffffffffffffffffffffffffff
-[-, %xmm0]  v2 = icmp eq v0, v1                                     ; bin: 66 0f 76 c4
-            return
-}
-
-function %icmp_i64x2_rex() {
-block0:
-[-, %xmm8]  v0 = vconst.i64x2 0x00
-[-, %xmm1]  v1 = vconst.i64x2 0xffffffffffffffffffffffffffffffff
-[-, %xmm8]  v2 = icmp eq v0, v1                                     ; bin: 66 44 0f 38 29 c1
-            return
-}
-
-function %icmp_sgt_i8x16(i8x16, i8x16) -> b8x16 {
-block0(v0: i8x16 [%xmm2], v1: i8x16 [%xmm1]):
-[-, %xmm2]  v2 = icmp sgt v0, v1     ; bin: 66 0f 64 d1
-            return v2
-}
-
-function %icmp_sgt_i16x8(i16x8, i16x8) -> b16x8 {
-block0(v0: i16x8 [%xmm4], v1: i16x8 [%xmm3]):
-[-, %xmm4]  v2 = icmp sgt v0, v1     ; bin: 66 0f 65 e3
-            return v2
-}
-
-function %icmp_sgt_i32x4(i32x4, i32x4) -> b32x4 {
-block0(v0: i32x4 [%xmm6], v1: i32x4 [%xmm5]):
-[-, %xmm6]  v2 = icmp sgt v0, v1     ; bin: 66 0f 66 f5
-            return v2
-}
-
-function %icmp_sgt_i64x2(i64x2, i64x2) -> b64x2 {
-block0(v0: i64x2 [%xmm0], v1: i64x2 [%xmm7]):
-[-, %xmm0]  v2 = icmp sgt v0, v1     ; bin: 66 0f 38 37 c7
-            return v2
-}
-
-function %min_max_i8x16(i8x16, i8x16) {
-block0(v0: i8x16 [%xmm3], v1: i8x16 [%xmm1]):
-[-, %xmm3]  v2 = x86_pmaxs v0, v1     ; bin: 66 0f 38 3c d9
-[-, %xmm3]  v3 = x86_pmaxu v0, v1     ; bin: 66 0f de d9
-[-, %xmm3]  v4 = x86_pmins v0, v1     ; bin: 66 0f 38 38 d9
-[-, %xmm3]  v5 = x86_pminu v0, v1     ; bin: 66 0f da d9
-            return
-}
-
-function %min_max_i16x8(i16x8, i16x8) {
-block0(v0: i16x8 [%xmm2], v1: i16x8 [%xmm5]):
-[-, %xmm2]  v2 = x86_pmaxs v0, v1     ; bin: 66 0f ee d5
-[-, %xmm2]  v3 = x86_pmaxu v0, v1     ; bin: 66 0f 38 3e d5
-[-, %xmm2]  v4 = x86_pmins v0, v1     ; bin: 66 0f ea d5
-[-, %xmm2]  v5 = x86_pminu v0, v1     ; bin: 66 0f 38 3a d5
-            return
-}
-
-function %min_max_i32x4(i32x4, i32x4) {
-block0(v0: i32x4 [%xmm2], v1: i32x4 [%xmm4]):
-[-, %xmm2]  v2 = x86_pmaxs v0, v1     ; bin: 66 0f 38 3d d4
-[-, %xmm2]  v3 = x86_pmaxu v0, v1     ; bin: 66 0f 38 3f d4
-[-, %xmm2]  v4 = x86_pmins v0, v1     ; bin: 66 0f 38 39 d4
-[-, %xmm2]  v5 = x86_pminu v0, v1     ; bin: 66 0f 38 3b d4
-            return
-}
-
-function %fcmp_f32x4(f32x4, f32x4) {
-block0(v0: f32x4 [%xmm2], v1: f32x4 [%xmm4]):
-[-, %xmm2]  v2 = fcmp eq v0, v1     ; bin: 0f c2 d4 00
-[-, %xmm2]  v3 = fcmp lt v0, v1     ; bin: 0f c2 d4 01
-[-, %xmm2]  v4 = fcmp le v0, v1     ; bin: 0f c2 d4 02
-[-, %xmm2]  v5 = fcmp uno v0, v1    ; bin: 0f c2 d4 03
-[-, %xmm2]  v6 = fcmp ne v0, v1     ; bin: 0f c2 d4 04
-[-, %xmm2]  v7 = fcmp uge v0, v1    ; bin: 0f c2 d4 05
-[-, %xmm2]  v8 = fcmp ugt v0, v1    ; bin: 0f c2 d4 06
-[-, %xmm2]  v9 = fcmp ord v0, v1    ; bin: 0f c2 d4 07
-            return
-}
-
-function %fcmp_f32x4_rex(f32x4, f32x4) {
-block0(v0: f32x4 [%xmm8], v1: f32x4 [%xmm8]):
-[-, %xmm8]  v2 = fcmp eq v0, v1     ; bin: 45 0f c2 c0 00
-[-, %xmm8]  v3 = fcmp lt v0, v1     ; bin: 45 0f c2 c0 01
-[-, %xmm8]  v4 = fcmp le v0, v1     ; bin: 45 0f c2 c0 02
-[-, %xmm8]  v5 = fcmp uno v0, v1    ; bin: 45 0f c2 c0 03
-[-, %xmm8]  v6 = fcmp ne v0, v1     ; bin: 45 0f c2 c0 04
-[-, %xmm8]  v7 = fcmp uge v0, v1    ; bin: 45 0f c2 c0 05
-[-, %xmm8]  v8 = fcmp ugt v0, v1    ; bin: 45 0f c2 c0 06
-[-, %xmm8]  v9 = fcmp ord v0, v1    ; bin: 45 0f c2 c0 07
-            return
-}
-
-function %fcmp_f64x2(f64x2, f64x2) {
-block0(v0: f64x2 [%xmm2], v1: f64x2 [%xmm0]):
-[-, %xmm2]  v2 = fcmp eq v0, v1     ; bin: 66 0f c2 d0 00
-[-, %xmm2]  v3 = fcmp lt v0, v1     ; bin: 66 0f c2 d0 01
-[-, %xmm2]  v4 = fcmp le v0, v1     ; bin: 66 0f c2 d0 02
-[-, %xmm2]  v5 = fcmp uno v0, v1    ; bin: 66 0f c2 d0 03
-[-, %xmm2]  v6 = fcmp ne v0, v1     ; bin: 66 0f c2 d0 04
-[-, %xmm2]  v7 = fcmp uge v0, v1    ; bin: 66 0f c2 d0 05
-[-, %xmm2]  v8 = fcmp ugt v0, v1    ; bin: 66 0f c2 d0 06
-[-, %xmm2]  v9 = fcmp ord v0, v1    ; bin: 66 0f c2 d0 07
-            return
-}
-
-function %fcmp_f64x2_rex(f64x2, f64x2) {
-block0(v0: f64x2 [%xmm9], v1: f64x2 [%xmm11]):
-[-, %xmm9]  v2 = fcmp eq v0, v1     ; bin: 66 45 0f c2 cb 00
-[-, %xmm9]  v3 = fcmp lt v0, v1     ; bin: 66 45 0f c2 cb 01
-[-, %xmm9]  v4 = fcmp le v0, v1     ; bin: 66 45 0f c2 cb 02
-[-, %xmm9]  v5 = fcmp uno v0, v1    ; bin: 66 45 0f c2 cb 03
-[-, %xmm9]  v6 = fcmp ne v0, v1     ; bin: 66 45 0f c2 cb 04
-[-, %xmm9]  v7 = fcmp uge v0, v1    ; bin: 66 45 0f c2 cb 05
-[-, %xmm9]  v8 = fcmp ugt v0, v1    ; bin: 66 45 0f c2 cb 06
-[-, %xmm9]  v9 = fcmp ord v0, v1    ; bin: 66 45 0f c2 cb 07
-            return
-}
diff --git a/cranelift/filetests/filetests/isa/x86/simd-comparison-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-comparison-legalize.clif
deleted file mode 100644
index a6324a34cc..0000000000
--- a/cranelift/filetests/filetests/isa/x86/simd-comparison-legalize.clif
+++ /dev/null
@@ -1,40 +0,0 @@
-test legalizer
-set enable_simd
-target x86_64 legacy skylake
-
-function %icmp_ne_32x4(i32x4, i32x4) -> b32x4 {
-; check: const0 = 0xffffffffffffffffffffffffffffffff
-block0(v0: i32x4, v1: i32x4):
-    v2 = icmp ne v0, v1
-    ; check: v3 = icmp eq v0, v1
-    ; nextln: v4 = vconst.b32x4 const0
-    ; nextln: v2 = bxor v4, v3
-    return v2
-}
-
-function %icmp_ugt_i32x4(i32x4, i32x4) -> b32x4 {
-; check: const0 = 0xffffffffffffffffffffffffffffffff
-block0(v0: i32x4, v1: i32x4):
-    v2 = icmp ugt v0, v1
-    ; check: v3 = x86_pmaxu v0, v1
-    ; nextln: v4 = icmp eq v3, v1
-    ; nextln: v5 = vconst.b32x4 const0
-    ; nextln: v2 = bxor v5, v4
-    return v2
-}
-
-function %icmp_sge_i16x8(i16x8, i16x8) -> b16x8 {
-block0(v0: i16x8, v1: i16x8):
-    v2 = icmp sge v0, v1
-    ; check: v3 = x86_pmins v0, v1
-    ; nextln: v2 = icmp eq v3, v1
-    return v2
-}
-
-function %icmp_uge_i8x16(i8x16, i8x16) -> b8x16 {
-block0(v0: i8x16, v1: i8x16):
-    v2 = icmp uge v0, v1
-    ; check: v3 = x86_pminu v0, v1
-    ; nextln: v2 = icmp eq v3, v1
-    return v2
-}
diff --git a/cranelift/filetests/filetests/isa/x86/simd-conversion-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-conversion-binemit.clif
deleted file mode 100644
index f26b436931..0000000000
--- a/cranelift/filetests/filetests/isa/x86/simd-conversion-binemit.clif
+++ /dev/null
@@ -1,26 +0,0 @@
-test binemit
-set enable_simd
-target x86_64 legacy nehalem
-
-; Ensure raw_bitcast emits no instructions.
-function %raw_bitcast_i16x8_to_b32x4() {
-block0:
-[-, %rbx]   v0 = bconst.b16 true
-[-, %xmm2]  v1 = scalar_to_vector.b16x8 v0
-[-, %xmm2]  v2 = raw_bitcast.i32x4 v1       ; bin:
-            return
-}
-
-function %conversions_i32x4(i32x4, i32x4) {
-block0(v0: i32x4 [%xmm6], v1: i32x4 [%xmm4]):
-[-, %xmm2]  v2 = fcvt_from_sint.f32x4 v0    ; bin: 40 0f 5b d6
-[-, %xmm6]  v3 = x86_palignr v0, v1, 3      ; bin: 66 0f 3a 0f f4 03
-            return
-}
-
-function %conversions_i16x8(i16x8) {
-block0(v0: i16x8 [%xmm6]):
-[-, %xmm2]  v1 = swiden_low v0              ; bin: 66 0f 38 23 d6
-[-, %xmm11] v2 = uwiden_low v0              ; bin: 66 44 0f 38 33 de
-            return
-}
diff --git a/cranelift/filetests/filetests/isa/x86/simd-conversion-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-conversion-legalize.clif
deleted file mode 100644
index 6de14e181a..0000000000
--- a/cranelift/filetests/filetests/isa/x86/simd-conversion-legalize.clif
+++ /dev/null
@@ -1,70 +0,0 @@
-test legalizer
-set enable_simd
-target x86_64 legacy skylake
-
-function %fcvt_from_uint(i32x4) -> f32x4 {
-block0(v0:i32x4):
-    v1 = fcvt_from_uint.f32x4 v0
-    ; check: v2 = raw_bitcast.i16x8 v0
-    ; nextln: v3 = vconst.i16x8 const0
-    ; nextln: v4 = x86_pblendw v3, v2, 85
-    ; nextln: v5 = raw_bitcast.i32x4 v4
-    ; nextln: v6 = isub v0, v5
-    ; nextln: v7 = fcvt_from_sint.f32x4 v5
-    ; nextln: v8 = ushr_imm v6, 1
-    ; nextln: v9 = fcvt_from_sint.f32x4 v8
-    ; nextln: v10 = fadd v9, v9
-    ; nextln: v1 = fadd v10, v7
-    return v1
-}
-
-function %fcvt_to_sint_sat(f32x4) -> i32x4 {
-block0(v0:f32x4):
-    v1 = fcvt_to_sint_sat.i32x4 v0
-    ; check: v2 = fcmp eq v0, v0
-    ; nextln: v3 = raw_bitcast.f32x4 v2
-    ; nextln: v4 = band v0, v3
-    ; nextln: v5 = bxor v3, v0
-    ; nextln: v6 = raw_bitcast.i32x4 v5
-    ; nextln: v7 = x86_cvtt2si.i32x4 v4
-    ; nextln: v8 = band v6, v7
-    ; nextln: v9 = sshr_imm v8, 31
-    ; nextln: v1 = bxor v7, v9
-    return v1
-}
-
-function %fcvt_to_uint_sat(f32x4) -> i32x4 {
-; check: const0 = 0x00000000000000000000000000000000
-; nextln: const1 = 0x4f0000004f0000004f0000004f000000
-block0(v0:f32x4):
-    v1 = fcvt_to_uint_sat.i32x4 v0
-    ; check: v2 = vconst.f32x4 const0
-    ; nextln: v3 = vconst.f32x4 const1
-    ; nextln: v4 = x86_fmax v0, v2
-    ; nextln: v5 = fsub v4, v3
-    ; nextln: v6 = fcmp le v3, v5
-    ; nextln: v7 = x86_cvtt2si.i32x4 v5
-    ; nextln: v8 = raw_bitcast.i32x4 v6
-    ; nextln: v9 = bxor v7, v8
-    ; nextln: v10 = raw_bitcast.i32x4 v2
-    ; nextln: v11 = x86_pmaxs v9, v10
-    ; nextln: v12 = x86_cvtt2si.i32x4 v4
-    ; nextln: v1 = iadd v12, v11
-    return v1
-}
-
-function %uwiden_high(i8x16) -> i16x8 {
-block0(v0: i8x16):
-    v1 = uwiden_high v0
-    ; check: v2 = x86_palignr v0, v0, 8
-    ; nextln: v1 = uwiden_low v2
-    return v1
-}
-
-function %swiden_high(i16x8) -> i32x4 {
-block0(v0: i16x8):
-    v1 = swiden_high v0
-    ; check: v2 = x86_palignr v0, v0, 8
-    ; nextln: v1 = swiden_low v2
-    return v1
-}
diff --git a/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit-for-size.clif b/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit-for-size.clif
deleted file mode 100644
index 6240a08557..0000000000
--- a/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit-for-size.clif
+++ /dev/null
@@ -1,34 +0,0 @@
-test binemit
-set opt_level=speed_and_size
-set enable_simd
-target x86_64 legacy
-
-;; These scalar_to_vector tests avoid the use of REX prefixes with the speed_and_size optimization flag.
-
-function %scalar_to_vector_b8() {
-block0:
-[-, %rax]   v0 = bconst.b8 true
-[-, %xmm0]  v1 = scalar_to_vector.b8x16 v0    ; bin: 66 0f 6e c0
-            return
-}
-
-function %scalar_to_vector_i16() {
-block0:
-[-, %rbx]   v0 = iconst.i16 42
-[-, %xmm2]  v1 = scalar_to_vector.i16x8 v0    ; bin: 66 0f 6e d3
-            return
-}
-
-function %scalar_to_vector_b32() {
-block0:
-[-, %rcx]   v0 = bconst.b32 false
-[-, %xmm3]  v1 = scalar_to_vector.b32x4 v0    ; bin: 66 0f 6e d9
-            return
-}
-
-function %scalar_to_vector_i64() {
-block0:
-[-, %rdx]   v0 = iconst.i64 42
-[-, %xmm7]  v1 = scalar_to_vector.i64x2 v0    ; bin: 66 48 0f 6e fa
-            return
-}
diff --git a/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit.clif
deleted file mode 100644
index a8c14a6342..0000000000
--- a/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit.clif
+++ /dev/null
@@ -1,126 +0,0 @@
-test binemit
-set enable_simd
-target x86_64 legacy haswell
-
-; for insertlane, floats are legalized differently than integers and booleans; integers and
-; booleans use x86_pinsr which is manually placed in the IR so that it can be binemit-tested
-
-function %insertlane_b8() {
-block0:
-[-, %rax]   v0 = bconst.b8 true
-[-, %rbx]   v1 = bconst.b8 false
-[-, %xmm0]  v2 = splat.b8x16 v0
-[-, %xmm0]  v3 = x86_pinsr v2, v1, 10    ; bin: 66 0f 3a 20 c3 0a
-            return
-}
-
-function %insertlane_i16() {
-block0:
-[-, %rax]   v0 = iconst.i16 4
-[-, %rbx]   v1 = iconst.i16 5
-[-, %xmm1]  v2 = splat.i16x8 v0
-[-, %xmm1]  v3 = x86_pinsr v2, v1, 4    ; bin: 66 0f c4 cb 04
-            return
-}
-
-function %insertlane_i32() {
-block0:
-[-, %rax]   v0 = iconst.i32 42
-[-, %rbx]   v1 = iconst.i32 99
-[-, %xmm4]  v2 = splat.i32x4 v0
-[-, %xmm4]  v3 = x86_pinsr v2, v1, 2    ; bin: 66 0f 3a 22 e3 02
-            return
-}
-
-function %insertlane_b64() {
-block0:
-[-, %rax]   v0 = bconst.b64 true
-[-, %rbx]   v1 = bconst.b64 false
-[-, %xmm2]  v2 = splat.b64x2 v0
-[-, %xmm2]  v3 = x86_pinsr v2, v1, 1    ; bin: 66 48 0f 3a 22 d3 01
-            return
-}
-
-; for extractlane, floats are legalized differently than integers and booleans; integers and
-; booleans use x86_pextr which is manually placed in the IR so that it can be binemit-tested
-
-function %extractlane_b8() {
-block0:
-[-, %rax]   v0 = bconst.b8 true
-[-, %xmm0]  v1 = splat.b8x16 v0
-[-, %rax]   v2 = x86_pextr v1, 10    ; bin: 66 0f 3a 14 c0 0a
-            return
-}
-
-function %extractlane_i16() {
-block0:
-[-, %rax]   v0 = iconst.i16 4
-[-, %xmm1]  v1 = splat.i16x8 v0
-[-, %rax]   v2 = x86_pextr v1, 4    ; bin: 66 0f 3a 15 c8 04
-            return
-}
-
-function %extractlane_i32() {
-block0:
-[-, %rax]   v0 = iconst.i32 42
-[-, %xmm4]  v1 = splat.i32x4 v0
-[-, %rcx]   v2 = x86_pextr v1, 2    ; bin: 66 0f 3a 16 e1 02
-            return
-}
-
-function %extractlane_b64() {
-block0:
-[-, %rax]   v0 = bconst.b64 false
-[-, %xmm2]  v1 = splat.b64x2 v0
-[-, %rbx]   v2 = x86_pextr v1, 1    ; bin: 66 48 0f 3a 16 d3 01
-            return
-}
-
-;; shuffle
-
-function %pshufd() {
-block0:
-[-, %rax]   v0 = iconst.i32 42
-[-, %xmm0]  v1 = scalar_to_vector.i32x4 v0  ; bin: 66 0f 6e c0
-[-, %xmm0]  v2 = x86_pshufd v1, 0           ; bin: 66 0f 70 c0 00
-            return
-}
-
-function %pshufb() {
-block0:
-[-, %rax]   v0 = iconst.i8 42
-[-, %xmm0]  v1 = scalar_to_vector.i8x16 v0   ; bin: 66 0f 6e c0
-[-, %rbx]   v2 = iconst.i8 43
-[-, %xmm12] v3 = scalar_to_vector.i8x16 v2   ; bin: 66 44 0f 6e e3
-[-, %xmm0]  v4 = x86_pshufb v1, v3           ; bin: 66 41 0f 38 00 c4
-            return
-}
-
-;; blend
-
-function %pblendw(b16x8, b16x8) {
-block0(v0: b16x8 [%xmm10], v1: b16x8 [%xmm2]):
-[-, %xmm10] v2 = x86_pblendw v0, v1, 0x55   ; bin: 66 44 0f 3a 0e d2 55
-            return
-}
-
-;; pack/unpack
-
-function %unpack_high_i8x16(i8x16, i8x16) {
-block0(v0: i8x16 [%xmm0], v1: i8x16 [%xmm12]):
-[-, %xmm0]  v2 = x86_punpckh v0, v1         ; bin: 66 41 0f 68 c4
-            return
-}
-
-function %unpack_low_i32x4(i32x4, i32x4) {
-block0(v0: i32x4 [%xmm7], v1: i32x4 [%xmm6]):
-[-, %xmm7]  v2 = x86_punpckl v0, v1         ; bin: 66 0f 62 fe
-            return
-}
-
-function %narrowing_i16x8(i16x8, i16x8) {
-block0(v0: i16x8 [%xmm7], v1: i16x8 [%xmm8]):
-[-, %xmm7]  v2 = snarrow v0, v1             ; bin: 66 41 0f 63 f8
-[-, %xmm7]  v3 = unarrow v0, v1             ; bin: 66 41 0f 67 f8
-            return
-}
diff --git a/cranelift/filetests/filetests/isa/x86/simd-lane-access-compile.clif b/cranelift/filetests/filetests/isa/x86/simd-lane-access-compile.clif
deleted file mode 100644
index 91ff8eb9a0..0000000000
--- a/cranelift/filetests/filetests/isa/x86/simd-lane-access-compile.clif
+++ /dev/null
@@ -1,19 +0,0 @@
-test compile
-set opt_level=speed_and_size
-set enable_probestack=false
-set enable_simd
-target x86_64 legacy
-
-; Ensure that scalar_to_vector emits no instructions for floats (already exist in an XMM register)
-function %scalar_to_vector_f32() -> f32x4 baldrdash_system_v {
-block0:
-    v0 = f32const 0x0.42
-    v1 = scalar_to_vector.f32x4 v0
-    return v1
-}
-
-; check: block0
-; nextln: v2 = iconst.i32 0x3e84_0000
-; nextln: v0 = bitcast.f32 v2
-; nextln: [null_fpr#00,%xmm0]                 v1 = scalar_to_vector.f32x4 v0
-; nextln: return v1
diff --git a/cranelift/filetests/filetests/isa/x86/simd-lane-access-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-lane-access-legalize.clif
deleted file mode 100644
index 284ef35180..0000000000
--- a/cranelift/filetests/filetests/isa/x86/simd-lane-access-legalize.clif
+++ /dev/null
@@ -1,101 +0,0 @@
-test legalizer
-set enable_simd
-target x86_64 legacy skylake
-
-;; shuffle
-
-function %shuffle_different_ssa_values() -> i8x16 {
-; check:  const2 = 0x80000000000000000000000000000000
-; nextln: const3 = 0x01808080808080808080808080808080
-block0:
-    v0 = vconst.i8x16 0x00
-    v1 = vconst.i8x16 0x01
-    v2 = shuffle v0, v1, 0x11000000000000000000000000000000     ; pick the second lane of v1, the rest use the first lane of v0
-    return v2
-}
-; check:  v1 = vconst.i8x16 const1
-; nextln: v3 = vconst.i8x16 const2
-; nextln: v4 = x86_pshufb v0, v3
-; nextln: v5 = vconst.i8x16 const3
-; nextln: v6 = x86_pshufb v1, v5
-; nextln: v2 = bor v4, v6
-
-function %shuffle_same_ssa_value() -> i8x16 {
-; check:  const1 = 0x03000000000000000000000000000000
-block0:
-    v1 = vconst.i8x16 0x01
-    v2 = shuffle v1, v1, 0x13000000000000000000000000000000     ; pick the fourth lane of v1 and the rest from the first lane of v1
-    return v2
-}
-; check:  v1 = vconst.i8x16 const0
-; nextln: v3 = vconst.i8x16 const1
-; nextln: v2 = x86_pshufb v1, v3
-
-;; splat
-
-function %splat_i32() -> i32x4 {
-block0:
-    v0 = iconst.i32 42
-    v1 = splat.i32x4 v0
-    return v1
-}
-; check:   block0:
-; nextln:     v0 = iconst.i32 42
-; nextln:     v2 = scalar_to_vector.i32x4 v0
-; nextln:     v1 = x86_pshufd v2, 0
-; nextln:     return v1
-; nextln: }
-
-function %splat_i64() -> i64x2 {
-block0:
-    v0 = iconst.i64 42
-    v1 = splat.i64x2 v0
-    return v1
-}
-; check:   block0:
-; nextln:     v0 = iconst.i64 42
-; nextln:     v2 = scalar_to_vector.i64x2 v0
-; nextln:     v1 = x86_pinsr v2, v0, 1
-; nextln:     return v1
-
-function %splat_b16() -> b16x8 {
-block0:
-    v0 = bconst.b16 true
-    v1 = splat.b16x8 v0
-    return v1
-}
-; check:   block0:
-; nextln:     v0 = bconst.b16 true
-; nextln:     v2 = scalar_to_vector.b16x8 v0
-; nextln:     v3 = x86_pinsr v2, v0, 1
-; nextln:     v4 = raw_bitcast.i32x4 v3
-; nextln:     v5 = x86_pshufd v4, 0
-; nextln:     v1 = raw_bitcast.b16x8 v5
-; nextln:     return v1
-
-function %splat_i8() -> i8x16 {
-; check: const0 = 0x00000000000000000000000000000000
-block0:
-    v0 = iconst.i8 42
-    v1 = splat.i8x16 v0
-    return v1
-}
-; check:   block0:
-; nextln:     v2 = iconst.i32 42
-; nextln:     v0 = ireduce.i8 v2
-; nextln:     v3 = scalar_to_vector.i8x16 v0
-; nextln:     v4 = vconst.i8x16 const0
-; nextln:     v1 = x86_pshufb v3, v4
-; nextln:     return v1
-
-function %swizzle() -> i8x16 {
-; check: const1 = 0x70707070707070707070707070707070
-block0:
-    v0 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
-    v1 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
-    v2 = swizzle.i8x16 v0, v1
-    ; check: v3 = vconst.i8x16 const1
-    ; nextln: v4 = uadd_sat v1, v3
-    ; nextln: v2 = x86_pshufb v0, v4
-    return v2
-}
diff --git a/cranelift/filetests/filetests/isa/x86/simd-logical-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-logical-binemit.clif
deleted file mode 100644
index af5ca0fe63..0000000000
--- a/cranelift/filetests/filetests/isa/x86/simd-logical-binemit.clif
+++ /dev/null
@@ -1,33 +0,0 @@
-test binemit
-set enable_simd
-target x86_64 legacy skylake
-
-function %bor_b16x8(b16x8, b16x8) -> b16x8 {
-block0(v0: b16x8 [%xmm2], v1: b16x8 [%xmm1]):
-[-, %xmm2]  v2 = bor v0, v1     ; bin: 66 0f eb d1
-            return v2
-}
-
-function %band_b64x2(b64x2, b64x2) -> b64x2 {
-block0(v0: b64x2 [%xmm6], v1: b64x2 [%xmm3]):
-[-, %xmm6]  v2 = band v0, v1      ; bin: 66 0f db f3
-            return v2
-}
-
-function %bxor_b32x4(b32x4, b32x4) -> b32x4 {
-block0(v0: b32x4 [%xmm4], v1: b32x4 [%xmm0]):
-[-, %xmm4]  v2 = bxor v0, v1      ; bin: 66 0f ef e0
-            return v2
-}
-
-function %band_not_b64x2(b64x2, b64x2) -> b64x2 {
-block0(v0: b64x2 [%xmm6], v1: b64x2 [%xmm3]):
-[-, %xmm3]  v2 = band_not v0, v1      ; bin: 66 0f df de
-            return v2
-}
-
-function %x86_ptest_f64x2(f64x2, f64x2) {
-block0(v0: f64x2 [%xmm0], v1: f64x2 [%xmm2]):
-[-, %rflags]  v2 = x86_ptest v0, v1      ; bin: 66 0f 38 17 c2
-            return
-}
diff --git a/cranelift/filetests/filetests/isa/x86/simd-logical-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-logical-legalize.clif
deleted file mode 100644
index 5e5bb7ac43..0000000000
--- a/cranelift/filetests/filetests/isa/x86/simd-logical-legalize.clif
+++ /dev/null
@@ -1,31 +0,0 @@
-test legalizer
-set enable_simd
-target x86_64 legacy skylake
-
-function %bnot_b32x4(b32x4) -> b32x4 {
-; check: const0 = 0xffffffffffffffffffffffffffffffff
-block0(v0: b32x4):
-    v1 = bnot v0
-    ; check: v2 = vconst.b32x4 const0
-    ; nextln: v1 = bxor v2, v0
-    return v1
-}
-
-function %vany_true_b32x4(b32x4) -> b1 {
-block0(v0: b32x4):
-    v1 = vany_true v0
-    ; check: v2 = x86_ptest v0, v0
-    ; nextln: v1 = trueif ne v2
-    return v1
-}
-
-function %vall_true_i64x2(i64x2) -> b1 {
-; check: const0 = 0x00000000000000000000000000000000
-block0(v0: i64x2):
-    v1 = vall_true v0
-    ; check: v2 = vconst.i64x2 const0
-    ; nextln: v3 = icmp eq v0, v2
-    ; nextln: v4 = x86_ptest v3, v3
-    ; nextln: v1 = trueif eq v4
-    return v1
-}
diff --git a/cranelift/filetests/filetests/isa/x86/simd-logical-rodata.clif b/cranelift/filetests/filetests/isa/x86/simd-logical-rodata.clif
deleted file mode 100644
index 6b6b91a915..0000000000
--- a/cranelift/filetests/filetests/isa/x86/simd-logical-rodata.clif
+++ /dev/null
@@ -1,11 +0,0 @@
-test rodata
-set enable_simd
-target x86_64 legacy skylake
-
-function %bnot_b32x4(b32x4) -> b32x4 {
-block0(v0: b32x4):
-    v1 = bnot v0
-    return v1
-}
-
-; sameln: [FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF, FF]
diff --git a/cranelift/filetests/filetests/isa/x86/simd-memory-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-memory-binemit.clif
deleted file mode 100644
index 4f8b050d01..0000000000
--- a/cranelift/filetests/filetests/isa/x86/simd-memory-binemit.clif
+++ /dev/null
@@ -1,85 +0,0 @@
-test binemit
-set enable_simd
-target x86_64 legacy skylake
-
-function %load_store_simple(i64) {
-block0(v0: i64 [%rax]):
-[-, %xmm0]    v10 = load.i32x4 v0   ; bin: heap_oob 0f 10 00
-[-]           store v10, v0         ; bin: heap_oob 0f 11 00
-
-              ; use REX prefix
-[-, %xmm8]    v12 = load.i8x16 v0   ; bin: heap_oob 44 0f 10 00
-[-]           store v12, v0         ; bin: heap_oob 44 0f 11 00
-
-    return
-}
-
-function %load_store_with_displacement(i64) {
-block0(v0: i64 [%rax]):
-              ; use 8-bit displacement
-[-, %xmm0]    v1 = load.f32x4 v0+42 ; bin: heap_oob 0f 10 40 2a
-[-]           store v1, v0+42       ; bin: heap_oob 0f 11 40 2a
-
-              ; use 8-bit displacement with REX prefix
-[-, %xmm8]    v2 = load.i8x16 v0   ; bin: heap_oob 44 0f 10 00
-[-]           store v2, v0         ; bin: heap_oob 44 0f 11 00
-
-              ; use 32-bit displacement
-[-, %xmm0]    v3 = load.f32x4 v0+256 ; bin: heap_oob 0f 10 80 00000100
-[-]           store v3, v0+256       ; bin: heap_oob 0f 11 80 00000100
-
-              ; use 32-bit displacement with REX prefix
-[-, %xmm8]    v4 = load.f32x4 v0+256 ; bin: heap_oob 44 0f 10 80 00000100
-[-]           store v4, v0+256       ; bin: heap_oob 44 0f 11 80 00000100
-
-    return
-}
-
-function %load_store_complex(i64, i64) {
-block0(v0: i64 [%rax], v1: i64 [%rbx]):
-              ; %xmm1 corresponds to ModR/M 0x04; the 0b100 in the R/M slot indicates a SIB byte follows
-              ; %rax and %rbx form the SIB 0x18
-[-, %xmm1]    v10 = load_complex.f64x2 v0+v1   ; bin: heap_oob 40 0f 10 0c 18
-              ; enabling bit 6 of the ModR/M byte indicates a disp8 follows
-[-]           store_complex v10, v0+v1+5       ; bin: heap_oob 40 0f 11 4c 18 05
-
-    return
-}
-
-function %copy_to_ssa() {
-block0:
-[-, %xmm1]    v0 = copy_to_ssa.i64x2 %xmm3  ; bin: 40 0f 28 cb
-[-, %xmm2]    v1 = copy_to_ssa.i64x2 %xmm15 ; bin: 41 0f 28 d7
-
-    return
-}
-
-function %uload_extend() {
-block0:
-    [-,%rdx]     v1 = iconst.i64 0x0123_4567_89ab_cdef
-    [-,%xmm2]    v3 = uload8x8 v1+0     ; bin: heap_oob 66 0f 38 30 12
-    [-,%xmm2]    v4 = uload8x8 v1+20    ; bin: heap_oob 66 0f 38 30 52 14
-    [-,%xmm2]    v5 = uload8x8 v1+256   ; bin: heap_oob 66 0f 38 30 92 00000100
-    [-,%xmm2]    v6 = uload16x4 v1+0    ; bin: heap_oob 66 0f 38 33 12
-    [-,%xmm2]    v7 = uload16x4 v1+20   ; bin: heap_oob 66 0f 38 33 52 14
-    [-,%xmm2]    v8 = uload16x4 v1+256  ; bin: heap_oob 66 0f 38 33 92 00000100
-    [-,%xmm10]   v9 = uload32x2 v1+0    ; bin: heap_oob 66 44 0f 38 35 12
-    [-,%xmm10]   v10 = uload32x2 v1+20  ; bin: heap_oob 66 44 0f 38 35 52 14
-    [-,%xmm10]   v11 = uload32x2 v1+256 ; bin: heap_oob 66 44 0f 38 35 92 00000100
-    return
-}
-
-function %sload_extend() {
-block0:
-    [-,%rdx]     v1 = iconst.i64 0x0123_4567_89ab_cdef
-    [-,%xmm2]    v3 = sload8x8 v1+0     ; bin: heap_oob 66 0f 38 20 12
-    [-,%xmm2]    v4 = sload8x8 v1+20    ; bin: heap_oob 66 0f 38 20 52 14
-    [-,%xmm2]    v5 = sload8x8 v1+256   ; bin: heap_oob 66 0f 38 20 92 00000100
-    [-,%xmm10]   v6 = sload16x4 v1+0    ; bin: heap_oob 66 44 0f 38 23 12
-    [-,%xmm10]   v7 = sload16x4 v1+20   ; bin: heap_oob 66 44 0f 38 23 52 14
-    [-,%xmm10]   v8 = sload16x4 v1+256  ; bin: heap_oob 66 44 0f 38 23 92 00000100
-    [-,%xmm2]    v9 = sload32x2 v1+0    ; bin: heap_oob 66 0f 38 25 12
-    [-,%xmm2]    v10 = sload32x2 v1+20  ; bin: heap_oob 66 0f 38 25 52 14
-    [-,%xmm2]    v11 = sload32x2 v1+256 ; bin: heap_oob 66 0f 38 25 92 00000100
-    return
-}
diff --git a/cranelift/filetests/filetests/isa/x86/simd-pextr-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-pextr-binemit.clif
deleted file mode 100644
index 4141a05b32..0000000000
--- a/cranelift/filetests/filetests/isa/x86/simd-pextr-binemit.clif
+++ /dev/null
@@ -1,22 +0,0 @@
-test binemit
-set enable_simd
-target x86_64 legacy haswell
-
-function u0:0(i64 fp [%rbp]) -> i32 [%rax], i64 fp [%rbp] system_v {
-    ss0 = explicit_slot 32, offset -48
-    ss1 = spill_slot 16, offset -64
-    ss2 = incoming_arg 16, offset -16
-    sig0 = () system_v
-    fn0 = colocated u0:2 sig0
-
-block0(v5: i64 [%rbp]):
-[-]        x86_push v5
-[-]        copy_special %rsp -> %rbp
-[-]        adjust_sp_down_imm 48
-[-,%rax]   v0 = stack_addr.i64 ss0
-[-,%xmm15] v4 = load.i32x4 v0
-[-,%rax]   v2 = x86_pextr v4, 1 ; bin: 66 44 0f 3a 16 f8 01
-[-]        adjust_sp_up_imm 48
-[-]        v6 = x86_pop.i64
-[-]        return v2, v6
-}
diff --git a/cranelift/filetests/filetests/isa/x86/simd-vconst-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-vconst-binemit.clif
deleted file mode 100644
index 23aee87655..0000000000
--- a/cranelift/filetests/filetests/isa/x86/simd-vconst-binemit.clif
+++ /dev/null
@@ -1,29 +0,0 @@
-test binemit
-set opt_level=speed_and_size
-set enable_simd
-target x86_64 legacy
-
-function %vconst_b8() {
-block0:
-[-, %xmm2]  v0 = vconst.b8x16 0x01  ; bin: 0f 10 15 00000008 PCRelRodata4(15)
-[-, %xmm3]  v1 = vconst.b8x16 0x02  ; bin: 0f 10 1d 00000011 PCRelRodata4(31)
-            return
-}
-
-function %vconst_with_preamble() {
-const42 = i32x4 [1 0 0 0]
-const43 = i32x4 [2 0 0 0]
-
-block0:
-[-, %xmm2]  v0 = vconst.i32x4 const42  ; bin: 0f 10 15 00000008 PCRelRodata4(15)
-[-, %xmm3]  v1 = vconst.i32x4 const43  ; bin: 0f 10 1d 00000011 PCRelRodata4(31)
-            return
-}
-
-function %address_of_vconst() {
-const42 = i32x4 [1 0 0 0]
-
-block0:
-[-, %rax]   v0 = const_addr.i64 const42  ; bin: 48 8d 05 00000001 PCRelRodata4(8)
-            return
-}
diff --git a/cranelift/filetests/filetests/isa/x86/simd-vconst-compile.clif b/cranelift/filetests/filetests/isa/x86/simd-vconst-compile.clif
deleted file mode 100644
index 477984b344..0000000000
--- a/cranelift/filetests/filetests/isa/x86/simd-vconst-compile.clif
+++ /dev/null
@@ -1,16 +0,0 @@
-test compile
-set enable_simd=true
-set enable_probestack=false
-target x86_64 legacy haswell
-
-; use baldrdash calling convention here for simplicity (avoids prologue, epilogue)
-function %vconst_i32() -> i32x4 baldrdash_system_v {
-block0:
-    v0 = vconst.i32x4 0x1234
-    return v0
-}
-; check:   const0 = 0x00000000000000000000000000001234
-; check:   block0:
-; nextln:     v0 = vconst.i32x4 const0
-; nextln:     return v0
-; nextln: }
diff --git a/cranelift/filetests/filetests/isa/x86/simd-vconst-optimized-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-vconst-optimized-binemit.clif
deleted file mode 100644
index 07fa364752..0000000000
--- a/cranelift/filetests/filetests/isa/x86/simd-vconst-optimized-binemit.clif
+++ /dev/null
@@ -1,10 +0,0 @@
-test binemit
-set enable_simd
-target x86_64 legacy
-
-function %vconst_optimizations() {
-block0:
-[-, %xmm4]  v0 = vconst.b8x16 0x00                                  ; bin: 66 0f ef e4
-[-, %xmm7]  v1 = vconst.b8x16 0xffffffffffffffffffffffffffffffff    ; bin: 66 0f 74 ff
-            return
-}
diff --git a/cranelift/filetests/filetests/isa/x86/simd-vconst-rodata.clif b/cranelift/filetests/filetests/isa/x86/simd-vconst-rodata.clif
deleted file mode 100644
index e7e63e65ea..0000000000
--- a/cranelift/filetests/filetests/isa/x86/simd-vconst-rodata.clif
+++ /dev/null
@@ -1,49 +0,0 @@
-test rodata
-set enable_simd=true
-target x86_64 legacy haswell
-
-function %vconst_i32() -> i32x4 {
-block0:
-    v0 = vconst.i32x4 0x1234
-    return v0
-}
-
-; sameln: [34, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
-
-function %vconst_b16() -> b16x8 {
-block0:
-    v0 = vconst.b16x8 [true false true false true false true true]
-    return v0
-}
-
-; sameln: [FF, FF, 0, 0, FF, FF, 0, 0, FF, FF, 0, 0, FF, FF, FF, FF]
-
-
-; Since both jump tables and constants are emitted after the function body, it is important that they do not interfere.
-; This test shows that even in the presence of jump tables, constants are emitted correctly
-function %vconst_with_jumptables() {
-jt0 = jump_table [block0]
-
-block10:
-    v10 = iconst.i64 0
-    br_table v10, block1, jt0
-block0:
-    jump block11
-block1:
-    jump block11
-block11:
-    v11 = vconst.i8x16 [1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16]
-    return
-}
-
-; sameln: [1, 2, 3, 4, 5, 6, 7, 8, 9, A, B, C, D, E, F, 10]
-
-function %vconst_preamble() -> b16x8 {
-const42 = i32x4 [0 1 2 3]
-const43 = i32x4 [4 5 6 7]
-block0:
-    v0 = vconst.b16x8 const42
-    return v0
-}
-
-; sameln: [0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 6, 0, 0, 0, 7, 0, 0, 0]
diff --git a/cranelift/filetests/filetests/isa/x86/simd-vselect-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-vselect-binemit.clif
deleted file mode 100644
index 275a5e4411..0000000000
--- a/cranelift/filetests/filetests/isa/x86/simd-vselect-binemit.clif
+++ /dev/null
@@ -1,27 +0,0 @@
-test binemit
-set enable_simd
-target x86_64 legacy haswell
-
-function %vselect_i8x16(b8x16, i8x16, i8x16) {
-block0(v0: b8x16 [%xmm0], v1: i8x16 [%xmm3], v2: i8x16 [%xmm5]):
-[-, %xmm5]  v3 = vselect v0, v1, v2  ; bin: 66 0f 38 10 eb
-            return
-}
-
-function %vselect_i16x8(b16x8, i16x8, i16x8) {
-block0(v0: b16x8 [%xmm0], v1: i16x8 [%xmm3], v2: i16x8 [%xmm5]):
-[-, %xmm5]  v3 = vselect v0, v1, v2  ; bin: 66 0f 38 10 eb
-            return
-}
-
-function %vselect_i32x4(b32x4, i32x4, i32x4) {
-block0(v0: b32x4 [%xmm0], v1: i32x4 [%xmm3], v2: i32x4 [%xmm5]):
-[-, %xmm5]  v3 = vselect v0, v1, v2  ; bin: 66 0f 38 14 eb
-            return
-}
-
-function %vselect_i64x2(b64x2, i64x2, i64x2) {
-block0(v0: b64x2 [%xmm0], v1: i64x2 [%xmm3], v2: i64x2 [%xmm5]):
-[-, %xmm5]  v3 = vselect v0, v1, v2  ; bin: 66 0f 38 15 eb
-            return
-}
diff --git a/cranelift/filetests/filetests/isa/x86/simd-vselect-legalize-to-bitselect.clif b/cranelift/filetests/filetests/isa/x86/simd-vselect-legalize-to-bitselect.clif
deleted file mode 100644
index 648b3f5584..0000000000
--- a/cranelift/filetests/filetests/isa/x86/simd-vselect-legalize-to-bitselect.clif
+++ /dev/null
@@ -1,45 +0,0 @@
-test legalizer
-set enable_simd
-target x86_64 legacy
-
-;; Test if vselect gets legalized if BLEND* instructions are not available
-
-function %vselect_i8x16(b8x16, i8x16, i8x16) -> i8x16 {
-block0(v0: b8x16, v1: i8x16, v2: i8x16):
-    v3 = vselect v0, v1, v2
-    ; check:  v4 = raw_bitcast.i8x16 v0
-    ; nextln: v5 = band v1, v4
-    ; nextln: v6 = band_not v2, v4
-    ; nextln: v3 = bor v5, v6
-    return v3
-}
-
-function %vselect_i16x8(b16x8, i16x8, i16x8) -> i16x8 {
-block0(v0: b16x8, v1: i16x8, v2: i16x8):
-    v3 = vselect v0, v1, v2
-    ; check:  v4 = raw_bitcast.i16x8 v0
-    ; nextln: v5 = band v1, v4
-    ; nextln: v6 = band_not v2, v4
-    ; nextln: v3 = bor v5, v6
-    return v3
-}
-
-function %vselect_i32x4(b32x4, i32x4, i32x4) -> i32x4 {
-block0(v0: b32x4, v1: i32x4, v2: i32x4):
-    v3 = vselect v0, v1, v2
-    ; check:  v4 = raw_bitcast.i32x4 v0
-    ; nextln: v5 = band v1, v4
-    ; nextln: v6 = band_not v2, v4
-    ; nextln: v3 = bor v5, v6
-    return v3
-}
-
-function %vselect_i64x2(b64x2, i64x2, i64x2) -> i64x2 {
-block0(v0: b64x2, v1: i64x2, v2: i64x2):
-    v3 = vselect v0, v1, v2
-    ; check:  v4 = raw_bitcast.i64x2 v0
-    ; nextln: v5 = band v1, v4
-    ; nextln: v6 = band_not v2, v4
-    ; nextln: v3 = bor v5, v6
-    return v3
-}
diff --git a/cranelift/filetests/filetests/isa/x86/stack-addr32.clif b/cranelift/filetests/filetests/isa/x86/stack-addr32.clif
deleted file mode 100644
index f06b3ec0eb..0000000000
--- a/cranelift/filetests/filetests/isa/x86/stack-addr32.clif
+++ /dev/null
@@ -1,33 +0,0 @@
-; binary emission of stack address instructions on i686.
-test binemit
-set opt_level=none
-target i686 legacy haswell
-
-; The binary encodings can be verified with the command:
-;
-;   sed -ne 's/^ *; asm: *//p' filetests/isa/x86/stack-addr32.clif | llvm-mc -show-encoding -triple=i686
-;
-
-function %stack_addr() {
-           ss0 = incoming_arg 8, offset 0
-           ss1 = incoming_arg 1024, offset -1024
-           ss2 = incoming_arg 1024, offset -2048
-           ss3 = incoming_arg 8, offset -2056
-           ss4 = explicit_slot 8, offset 0
-           ss5 = explicit_slot 8, offset 1024
-
-block0:
-[-,%rcx]   v0 = stack_addr.i32 ss0                      ; bin: 8d 8c 24 00000808
-[-,%rcx]   v1 = stack_addr.i32 ss1                      ; bin: 8d 8c 24 00000408
-[-,%rcx]   v2 = stack_addr.i32 ss2                      ; bin: 8d 8c 24 00000008
-[-,%rcx]   v3 = stack_addr.i32 ss3                      ; bin: 8d 8c 24 00000000
-[-,%rcx]   v4 = stack_addr.i32 ss4                      ; bin: 8d 8c 24 00000808
-[-,%rcx]   v5 = stack_addr.i32 ss5                      ; bin: 8d 8c 24 00000c08
-
-[-,%rcx]   v20 = stack_addr.i32 ss4+1                   ; bin: 8d 8c 24 00000809
-[-,%rcx]   v21 = stack_addr.i32 ss4+2                   ; bin: 8d 8c 24 0000080a
-[-,%rcx]   v22 = stack_addr.i32 ss4+2048                ; bin: 8d 8c 24 00001008
-[-,%rcx]   v23 = stack_addr.i32 ss4-4096                ; bin: 8d 8c 24 fffff808
-
-           return
-}
diff --git a/cranelift/filetests/filetests/isa/x86/stack-addr64.clif b/cranelift/filetests/filetests/isa/x86/stack-addr64.clif
deleted file mode 100644
index 5b8d5d7ab7..0000000000
--- a/cranelift/filetests/filetests/isa/x86/stack-addr64.clif
+++ /dev/null
@@ -1,45 +0,0 @@
-; binary emission of stack address instructions on x86-64.
-test binemit
-set opt_level=none
-target x86_64 legacy haswell
-
-; The binary encodings can be verified with the command:
-;
-;   sed -ne 's/^ *; asm: *//p' filetests/isa/x86/stack-addr64.clif | llvm-mc -show-encoding -triple=x86_64
-;
-
-function %stack_addr() {
-           ss0 = incoming_arg 8, offset 0
-           ss1 = incoming_arg 1024, offset -1024
-           ss2 = incoming_arg 1024, offset -2048
-           ss3 = incoming_arg 8, offset -2056
-           ss4 = explicit_slot 8, offset 0
-           ss5 = explicit_slot 8, offset 1024
-
-block0:
-[-,%rcx]   v0 = stack_addr.i64 ss0                      ; bin: 48 8d 8c 24 00000808
-[-,%rcx]   v1 = stack_addr.i64 ss1                      ; bin: 48 8d 8c 24 00000408
-[-,%rcx]   v2 = stack_addr.i64 ss2                      ; bin: 48 8d 8c 24 00000008
-[-,%rcx]   v3 = stack_addr.i64 ss3                      ; bin: 48 8d 8c 24 00000000
-[-,%rcx]   v4 = stack_addr.i64 ss4                      ; bin: 48 8d 8c 24 00000808
-[-,%rcx]   v5 = stack_addr.i64 ss5                      ; bin: 48 8d 8c 24 00000c08
-
-[-,%rcx]   v20 = stack_addr.i64 ss4+1                   ; bin: 48 8d 8c 24 00000809
-[-,%rcx]   v21 = stack_addr.i64 ss4+2                   ; bin: 48 8d 8c 24 0000080a
-[-,%rcx]   v22 = stack_addr.i64 ss4+2048                ; bin: 48 8d 8c 24 00001008
-[-,%rcx]   v23 = stack_addr.i64 ss4-4096                ; bin: 48 8d 8c 24 fffff808
-
-[-,%r8]    v50 = stack_addr.i64 ss0                     ; bin: 4c 8d 84 24 00000808
-[-,%r8]    v51 = stack_addr.i64 ss1                     ; bin: 4c 8d 84 24 00000408
-[-,%r8]    v52 = stack_addr.i64 ss2                     ; bin: 4c 8d 84 24 00000008
-[-,%r8]    v53 = stack_addr.i64 ss3                     ; bin: 4c 8d 84 24 00000000
-[-,%r8]    v54 = stack_addr.i64 ss4                     ; bin: 4c 8d 84 24 00000808
-[-,%r8]    v55 = stack_addr.i64 ss5                     ; bin: 4c 8d 84 24 00000c08
-
-[-,%r8]    v70 = stack_addr.i64 ss4+1                   ; bin: 4c 8d 84 24 00000809
-[-,%r8]    v71 = stack_addr.i64 ss4+2                   ; bin: 4c 8d 84 24 0000080a
-[-,%r8]    v72 = stack_addr.i64 ss4+2048                ; bin: 4c 8d 84 24 00001008
-[-,%r8]    v73 = stack_addr.i64 ss4-4096                ; bin: 4c 8d 84 24 fffff808
-
-           return
-}
diff --git a/cranelift/filetests/filetests/isa/x86/stack-load-store64.clif b/cranelift/filetests/filetests/isa/x86/stack-load-store64.clif
deleted file mode 100644
index 508fae04d2..0000000000
--- a/cranelift/filetests/filetests/isa/x86/stack-load-store64.clif
+++ /dev/null
@@ -1,21 +0,0 @@
-; legalization of stack load and store instructions on x86-64.
-test legalizer
-set opt_level=none
-target x86_64 legacy haswell
-
-function %stack_load_and_store() {
-           ss0 = explicit_slot 8, offset 0
-
-block0:
-   v0 = stack_load.i64 ss0
-
-; check: v1 = stack_addr.i64 ss0
-; check: v0 = load.i64 notrap aligned v1
-
-   stack_store.i64 v0, ss0
-
-; check: v2 = stack_addr.i64 ss0
-; check: store notrap aligned v0, v2
-
-   return
-}
diff --git a/cranelift/filetests/filetests/isa/x86/stack-load-store8.clif b/cranelift/filetests/filetests/isa/x86/stack-load-store8.clif
deleted file mode 100644
index 0a9f973fac..0000000000
--- a/cranelift/filetests/filetests/isa/x86/stack-load-store8.clif
+++ /dev/null
@@ -1,19 +0,0 @@
-test compile
-target x86_64 legacy
-
-function u0:0(i8) -> i8 {
-    ss0 = explicit_slot 1
-
-block0(v0: i8):
-    stack_store v0, ss0
-    ; check: v2 = stack_addr.i64 ss0
-    ; nextln: v3 = uextend.i32 v0
-    ; nextln: istore8 notrap aligned v3, v2
-
-    v1 = stack_load.i8 ss0
-    ; check: v4 = stack_addr.i64 ss0
-    ; nextln: v5 = uload8.i32 notrap aligned v4
-    ; nextln: v1 = ireduce.i8 v5
-
-    return v1
-}
diff --git a/cranelift/filetests/filetests/isa/x86/struct-arg.clif b/cranelift/filetests/filetests/isa/x86/struct-arg.clif
deleted file mode 100644
index 8358e8633a..0000000000
--- a/cranelift/filetests/filetests/isa/x86/struct-arg.clif
+++ /dev/null
@@ -1,117 +0,0 @@
-test compile
-set is_pic
-target x86_64 legacy
-
-function u0:0(i64 sarg(64)) -> i8 system_v {
-block0(v0: i64):
-    v1 = load.i8 v0
-    return v1
-}
-
-; check: function u0:0(sarg_t sarg(64) [0], i64 fp [%rbp]) -> i8 [%rax], i64 fp [%rbp] system_v {
-; nextln:      ss0 = incoming_arg 64, offset 0
-; nextln:      ss1 = incoming_arg 16, offset -16
-
-; check:                                  block0(v3: sarg_t [ss0], v5: i64 [%rbp]):
-; nextln:  [RexOp1pushq#50]                    x86_push v5
-; nextln:  [RexOp1copysp#8089]                 copy_special %rsp -> %rbp
-; nextln:  [RexOp1spaddr_id#808d,%rax]         v2 = stack_addr.i64 ss0
-; nextln:                                      v0 -> v2
-; nextln:  [RexOp2ld#4b6,%rax]                 v4 = uload8.i32 v2
-; nextln:  [null#00,%rax]                      v1 = ireduce.i8 v4
-; nextln:  [RexOp1popq#58,%rbp]                v6 = x86_pop.i64
-; nextln:  [Op1ret#c3]                         return v1, v6
-; nextln:  }
-
-function u0:1(i64, i64 sarg(64)) -> i8 system_v {
-block0(v0: i64, v1: i64):
-    v2 = load.i8 v1
-    return v2
-}
-
-; check: function u0:1(i64 [%rdi], sarg_t sarg(64) [0], i64 fp [%rbp]) -> i8 [%rax], i64 fp [%rbp] system_v {
-; nextln:      ss0 = incoming_arg 64, offset 0
-; nextln:      ss1 = incoming_arg 16, offset -16
-
-; check:                                  block0(v0: i64 [%rdi], v4: sarg_t [ss0], v6: i64 [%rbp]):
-; nextln: [RexOp1pushq#50]                    x86_push v6
-; nextln: [RexOp1copysp#8089]                 copy_special %rsp -> %rbp
-; nextln: [RexOp1spaddr_id#808d,%rax]         v3 = stack_addr.i64 ss0
-; nextln:                                     v1 -> v3
-; nextln: [RexOp2ld#4b6,%rax]                 v5 = uload8.i32 v3
-; nextln: [null#00,%rax]                      v2 = ireduce.i8 v5
-; nextln: [RexOp1popq#58,%rbp]                v7 = x86_pop.i64
-; nextln: [Op1ret#c3]                         return v2, v7
-; nextln: }
-
-
-function u0:2(i64) -> i8 system_v {
-fn1 = u0:0(i64 sarg(64)) -> i8 system_v
-
-block0(v0: i64):
-    v1 = call fn1(v0)
-    return v1
-}
-
-; check: function u0:2(i64 [%rdi], i64 fp [%rbp]) -> i8 [%rax], i64 fp [%rbp] system_v {
-; nextln:     ss0 = outgoing_arg 64, offset 0
-; nextln:     ss1 = incoming_arg 16, offset -16
-; nextln:     sig0 = (sarg_t sarg(64) [0]) -> i8 [%rax] system_v
-; nextln:     sig1 = (i64 [%rdi], i64 [%rsi], i64 [%rdx]) system_v
-; nextln:     fn1 = u0:0 sig0
-; nextln:     fn2 = %Memcpy sig1
-
-; check:                                 block0(v0: i64 [%rdi], v5: i64 [%rbp]):
-; nextln: [RexOp1pushq#50]                    x86_push v5
-; nextln: [RexOp1copysp#8089]                 copy_special %rsp -> %rbp
-; nextln: [RexOp1adjustsp_ib#d083]            adjust_sp_down_imm 64
-; nextln: [RexOp1spaddr_id#808d,%rax]         v2 = stack_addr.i64 ss0
-; nextln: [RexOp1pu_id#b8,%rcx]               v3 = iconst.i64 64
-; nextln: [RexOp1rmov#8089]                   regmove v0, %rdi -> %rsi
-; nextln: [RexOp1rmov#8089]                   regmove v2, %rax -> %rdi
-; nextln: [RexOp1rmov#8089]                   regmove v3, %rcx -> %rdx
-; nextln: [Op1call_plt_id#e8]                 call fn2(v2, v0, v3)
-; nextln: [dummy_sarg_t#00,ss0]               v4 = dummy_sarg_t
-; nextln: [Op1call_plt_id#e8,%rax]            v1 = call fn1(v4)
-; nextln: [RexOp1adjustsp_ib#8083]            adjust_sp_up_imm 64
-; nextln: [RexOp1popq#58,%rbp]                v6 = x86_pop.i64
-; nextln: [Op1ret#c3]                         return v1, v6
-; nextln: }
-
-function u0:3(i64, i64) -> i8 system_v {
-fn1 = u0:0(i64, i64 sarg(64)) -> i8 system_v
-
-block0(v0: i64, v1: i64):
-    v2 = call fn1(v0, v1)
-    return v2
-}
-
-; check: function u0:3(i64 [%rdi], i64 [%rsi], i64 fp [%rbp], i64 csr [%r15]) -> i8 [%rax], i64 fp [%rbp], i64 csr [%r15] system_v {
-; nextln:     ss0 = outgoing_arg 64, offset 0
-; nextln:     ss1 = spill_slot 8, offset -32
-; nextln:     ss2 = incoming_arg 24, offset -24
-; nextln:     sig0 = (i64 [%rdi], sarg_t sarg(64) [0]) -> i8 [%rax] system_v
-; nextln:     sig1 = (i64 [%rdi], i64 [%rsi], i64 [%rdx]) system_v
-; nextln:     fn1 = u0:0 sig0
-; nextln:     fn2 = %Memcpy sig1
-
-; check:                                  block0(v6: i64 [%rdi], v1: i64 [%rsi], v8: i64 [%rbp], v9: i64 [%r15]):
-; nextln: [RexOp1pushq#50]                    x86_push v8
-; nextln: [RexOp1copysp#8089]                 copy_special %rsp -> %rbp
-; nextln: [RexOp1pushq#50]                    x86_push v9
-; nextln: [RexOp1adjustsp_ib#d083]            adjust_sp_down_imm 72
-; nextln: [RexOp1spillSib32#8089,ss1]         v0 = spill v6
-; nextln: [RexOp1spaddr_id#808d,%rax]         v3 = stack_addr.i64 ss0
-; nextln: [RexOp1pu_id#b8,%rcx]               v4 = iconst.i64 64
-; nextln: [RexOp1rmov#8089]                   regmove v3, %rax -> %rdi
-; nextln: [RexOp1rmov#8089]                   regmove v4, %rcx -> %rdx
-; nextln: [Op1call_plt_id#e8]                 call fn2(v3, v1, v4)
-; nextln: [dummy_sarg_t#00,ss0]               v5 = dummy_sarg_t
-; nextln: [RexOp1fillSib32#808b,%r15]         v7 = fill v0
-; nextln: [RexOp1rmov#8089]                   regmove v7, %r15 -> %rdi
-; nextln: [Op1call_plt_id#e8,%rax]            v2 = call fn1(v7, v5)
-; nextln: [RexOp1adjustsp_ib#8083]            adjust_sp_up_imm 72
-; nextln: [RexOp1popq#58,%r15]                v11 = x86_pop.i64
-; nextln: [RexOp1popq#58,%rbp]                v10 = x86_pop.i64
-; nextln: [Op1ret#c3]                         return v2, v10, v11
-; nextln: }
diff --git a/cranelift/filetests/filetests/isa/x86/systemv_x64_unwind.clif b/cranelift/filetests/filetests/isa/x86/systemv_x64_unwind.clif
deleted file mode 100644
index c5144bfd97..0000000000
--- a/cranelift/filetests/filetests/isa/x86/systemv_x64_unwind.clif
+++ /dev/null
@@ -1,205 +0,0 @@
-test unwind
-set opt_level=speed_and_size
-set is_pic
-target x86_64-linux legacy haswell
-
-; check the unwind information with a function with no args
-function %no_args() system_v {
-block0:
-    return
-}
-; sameln: 0x00000000: CIE
-; nextln:         length: 0x00000014
-; nextln:        version: 0x01
-; nextln:     code_align: 1
-; nextln:     data_align: -8
-; nextln:    ra_register: 0x10
-; nextln:                 DW_CFA_def_cfa (r7, 8)
-; nextln:                 DW_CFA_offset (r16, 1)
-; nextln:                 DW_CFA_nop
-; nextln:                 DW_CFA_nop
-; nextln:                 DW_CFA_nop
-; nextln:                 DW_CFA_nop
-; nextln:                 DW_CFA_nop
-; nextln:                 DW_CFA_nop
-; nextln:   Instructions: Init State:
-; nextln: 
-; nextln: 
-; nextln: 0x00000018: FDE
-; nextln:         length: 0x00000024
-; nextln:    CIE_pointer: 0x00000000
-; nextln:     start_addr: 0x0000000000000000
-; nextln:     range_size: 0x0000000000000006 (end_addr = 0x0000000000000006)
-; nextln:   Instructions:
-; nextln:                 DW_CFA_advance_loc (1)
-; nextln:                 DW_CFA_def_cfa_offset (16)
-; nextln:                 DW_CFA_offset (r6, 2)
-; nextln:                 DW_CFA_advance_loc (3)
-; nextln:                 DW_CFA_def_cfa_register (r6)
-; nextln:                 DW_CFA_advance_loc (1)
-; nextln:                 DW_CFA_same_value (r6)
-; nextln:                 DW_CFA_def_cfa (r7, 8)
-; nextln:                 DW_CFA_nop
-
-; check a function with medium-sized stack alloc
-function %medium_stack() system_v {
-    ss0 = explicit_slot 100000
-block0:
-    return
-}
-; sameln: 0x00000000: CIE
-; nextln:         length: 0x00000014
-; nextln:        version: 0x01
-; nextln:     code_align: 1
-; nextln:     data_align: -8
-; nextln:    ra_register: 0x10
-; nextln:                 DW_CFA_def_cfa (r7, 8)
-; nextln:                 DW_CFA_offset (r16, 1)
-; nextln:                 DW_CFA_nop
-; nextln:                 DW_CFA_nop
-; nextln:                 DW_CFA_nop
-; nextln:                 DW_CFA_nop
-; nextln:                 DW_CFA_nop
-; nextln:                 DW_CFA_nop
-; nextln:   Instructions: Init State:
-; nextln: 
-; nextln: 
-; nextln: 0x00000018: FDE
-; nextln:         length: 0x00000024
-; nextln:    CIE_pointer: 0x00000000
-; nextln:     start_addr: 0x0000000000000000
-; nextln:     range_size: 0x000000000000001a (end_addr = 0x000000000000001a)
-; nextln:   Instructions:
-; nextln:                 DW_CFA_advance_loc (1)
-; nextln:                 DW_CFA_def_cfa_offset (16)
-; nextln:                 DW_CFA_offset (r6, 2)
-; nextln:                 DW_CFA_advance_loc (3)
-; nextln:                 DW_CFA_def_cfa_register (r6)
-; nextln:                 DW_CFA_advance_loc (21)
-; nextln:                 DW_CFA_same_value (r6)
-; nextln:                 DW_CFA_def_cfa (r7, 8)
-; nextln:                 DW_CFA_nop
-
-; check a function with large-sized stack alloc
-function %large_stack() system_v {
-    ss0 = explicit_slot 524288
-block0:
-    return
-}
-; sameln: 0x00000000: CIE
-; nextln:         length: 0x00000014
-; nextln:        version: 0x01
-; nextln:     code_align: 1
-; nextln:     data_align: -8
-; nextln:    ra_register: 0x10
-; nextln:                 DW_CFA_def_cfa (r7, 8)
-; nextln:                 DW_CFA_offset (r16, 1)
-; nextln:                 DW_CFA_nop
-; nextln:                 DW_CFA_nop
-; nextln:                 DW_CFA_nop
-; nextln:                 DW_CFA_nop
-; nextln:                 DW_CFA_nop
-; nextln:                 DW_CFA_nop
-; nextln:   Instructions: Init State:
-; nextln: 
-; nextln: 
-; nextln: 0x00000018: FDE
-; nextln:         length: 0x00000024
-; nextln:    CIE_pointer: 0x00000000
-; nextln:     start_addr: 0x0000000000000000
-; nextln:     range_size: 0x000000000000001a (end_addr = 0x000000000000001a)
-; nextln:   Instructions:
-; nextln:                 DW_CFA_advance_loc (1)
-; nextln:                 DW_CFA_def_cfa_offset (16)
-; nextln:                 DW_CFA_offset (r6, 2)
-; nextln:                 DW_CFA_advance_loc (3)
-; nextln:                 DW_CFA_def_cfa_register (r6)
-; nextln:                 DW_CFA_advance_loc (21)
-; nextln:                 DW_CFA_same_value (r6)
-; nextln:                 DW_CFA_def_cfa (r7, 8)
-; nextln:                 DW_CFA_nop
-; nextln: 
-
-; check a function that has CSRs
-function %lots_of_registers(i64, i64) system_v {
-block0(v0: i64, v1: i64):
-    v2 = load.i32 v0+0
-    v3 = load.i32 v0+8
-    v4 = load.i32 v0+16
-    v5 = load.i32 v0+24
-    v6 = load.i32 v0+32
-    v7 = load.i32 v0+40
-    v8 = load.i32 v0+48
-    v9 = load.i32 v0+56
-    v10 = load.i32 v0+64
-    v11 = load.i32 v0+72
-    v12 = load.i32 v0+80
-    v13 = load.i32 v0+88
-    v14 = load.i32 v0+96
-    store.i32 v2, v1+0
-    store.i32 v3, v1+8
-    store.i32 v4, v1+16
-    store.i32 v5, v1+24
-    store.i32 v6, v1+32
-    store.i32 v7, v1+40
-    store.i32 v8, v1+48
-    store.i32 v9, v1+56
-    store.i32 v10, v1+64
-    store.i32 v11, v1+72
-    store.i32 v12, v1+80
-    store.i32 v13, v1+88
-    store.i32 v14, v1+96
-    return
-}
-; sameln: 0x00000000: CIE
-; nextln:         length: 0x00000014
-; nextln:        version: 0x01
-; nextln:     code_align: 1
-; nextln:     data_align: -8
-; nextln:    ra_register: 0x10
-; nextln:                 DW_CFA_def_cfa (r7, 8)
-; nextln:                 DW_CFA_offset (r16, 1)
-; nextln:                 DW_CFA_nop
-; nextln:                 DW_CFA_nop
-; nextln:                 DW_CFA_nop
-; nextln:                 DW_CFA_nop
-; nextln:                 DW_CFA_nop
-; nextln:                 DW_CFA_nop
-; nextln:   Instructions: Init State:
-; nextln: 
-; nextln: 
-; nextln: 0x00000018: FDE
-; nextln:         length: 0x00000044
-; nextln:    CIE_pointer: 0x00000000
-; nextln:     start_addr: 0x0000000000000000
-; nextln:     range_size: 0x0000000000000074 (end_addr = 0x0000000000000074)
-; nextln:   Instructions:
-; nextln:                 DW_CFA_advance_loc (1)
-; nextln:                 DW_CFA_def_cfa_offset (16)
-; nextln:                 DW_CFA_offset (r6, 2)
-; nextln:                 DW_CFA_advance_loc (3)
-; nextln:                 DW_CFA_def_cfa_register (r6)
-; nextln:                 DW_CFA_advance_loc (1)
-; nextln:                 DW_CFA_offset (r3, 3)
-; nextln:                 DW_CFA_advance_loc (2)
-; nextln:                 DW_CFA_offset (r12, 4)
-; nextln:                 DW_CFA_advance_loc (2)
-; nextln:                 DW_CFA_offset (r13, 5)
-; nextln:                 DW_CFA_advance_loc (2)
-; nextln:                 DW_CFA_offset (r14, 6)
-; nextln:                 DW_CFA_advance_loc (2)
-; nextln:                 DW_CFA_offset (r15, 7)
-; nextln:                 DW_CFA_advance_loc (94)
-; nextln:                 DW_CFA_same_value (r15)
-; nextln:                 DW_CFA_advance_loc (2)
-; nextln:                 DW_CFA_same_value (r14)
-; nextln:                 DW_CFA_advance_loc (2)
-; nextln:                 DW_CFA_same_value (r13)
-; nextln:                 DW_CFA_advance_loc (2)
-; nextln:                 DW_CFA_same_value (r12)
-; nextln:                 DW_CFA_advance_loc (1)
-; nextln:                 DW_CFA_same_value (r3)
-; nextln:                 DW_CFA_advance_loc (1)
-; nextln:                 DW_CFA_same_value (r6)
-; nextln:                 DW_CFA_def_cfa (r7, 8)
-; nextln:                 DW_CFA_nop
diff --git a/cranelift/filetests/filetests/isa/x86/tls_elf.clif b/cranelift/filetests/filetests/isa/x86/tls_elf.clif
deleted file mode 100644
index 2c957e0b9a..0000000000
--- a/cranelift/filetests/filetests/isa/x86/tls_elf.clif
+++ /dev/null
@@ -1,18 +0,0 @@
-test regalloc
-set tls_model=elf_gd
-target x86_64 legacy
-
-function u0:0(i32) -> i32, i64 {
-gv0 = symbol colocated tls u1:0
-
-block0(v0: i32):
-    ; check: block0(v2: i32 [%rdi]):
-    ; nextln: [RexOp1spillSib32#89,ss0]           v0 = spill v2
-    v1 = global_value.i64 gv0
-    ; nextln: [elf_tls_get_addr#00,%rax]          v1 = x86_elf_tls_get_addr gv0
-    ; nextln: [RexOp1fillSib32#8b,%r15]           v3 = fill v0
-    return v0, v1
-    ; nextln: [RexOp1rmov#8089]                   regmove v1, %rax -> %rdx
-    ; nextln: [RexOp1rmov#89]                     regmove v3, %r15 -> %rax
-    ; nextln: [Op1ret#c3]                         return v3, v1
-}
diff --git a/cranelift/filetests/filetests/isa/x86/tls_enc.clif b/cranelift/filetests/filetests/isa/x86/tls_enc.clif
deleted file mode 100644
index d3481a15bf..0000000000
--- a/cranelift/filetests/filetests/isa/x86/tls_enc.clif
+++ /dev/null
@@ -1,11 +0,0 @@
-test binemit
-target x86_64 legacy
-
-function u0:0() -> i64, i64 {
-gv0 = symbol colocated tls u1:0
-
-block0:
-    [-, %rax] v0 = x86_elf_tls_get_addr gv0 ; bin: 66 48 8d 3d ElfX86_64TlsGd(u1:0-4) 00000000 66 66 48 e8 CallPLTRel4(%ElfTlsGetAddr-4) 00000000
-    [-, %rax] v1 = x86_macho_tls_get_addr gv0; bin: 48 8b 3d MachOX86_64Tlv(u1:0-4) 00000000 ff 17
-    return v0, v1
-}
diff --git a/cranelift/filetests/filetests/isa/x86/tls_macho.clif b/cranelift/filetests/filetests/isa/x86/tls_macho.clif
deleted file mode 100644
index 3747ac9f05..0000000000
--- a/cranelift/filetests/filetests/isa/x86/tls_macho.clif
+++ /dev/null
@@ -1,18 +0,0 @@
-test regalloc
-set tls_model=macho
-target x86_64 legacy
-
-function u0:0(i32) -> i32, i64 {
-gv0 = symbol colocated tls u1:0
-
-block0(v0: i32):
-    ; check: block0(v2: i32 [%rdi]):
-    ; nextln: [RexOp1spillSib32#89,ss0]           v0 = spill v2
-    v1 = global_value.i64 gv0
-    ; nextln: [macho_tls_get_addr#00,%rax]        v1 = x86_macho_tls_get_addr gv0
-    ; nextln: [RexOp1fillSib32#8b,%r15]           v3 = fill v0
-    return v0, v1
-    ; nextln: [RexOp1rmov#8089]                   regmove v1, %rax -> %rdx
-    ; nextln: [RexOp1rmov#89]                     regmove v3, %r15 -> %rax
-    ; nextln: [Op1ret#c3]                         return v3, v1
-}
diff --git a/cranelift/filetests/filetests/isa/x86/uextend-i8-to-i16.clif b/cranelift/filetests/filetests/isa/x86/uextend-i8-to-i16.clif
deleted file mode 100644
index 931b6e0aca..0000000000
--- a/cranelift/filetests/filetests/isa/x86/uextend-i8-to-i16.clif
+++ /dev/null
@@ -1,14 +0,0 @@
-test compile
-target x86_64 legacy
-
-function u0:0(i8) -> i16 fast {
-block0(v0: i8):
-    v1 = uextend.i16 v0
-    return v1
-}
-
-function u0:1(i8) -> i16 fast {
-block0(v0: i8):
-    v1 = sextend.i16 v0
-    return v1
-}
diff --git a/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64.clif b/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64.clif
deleted file mode 100644
index 13cf504d13..0000000000
--- a/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64.clif
+++ /dev/null
@@ -1,255 +0,0 @@
-test compile
-set opt_level=speed_and_size
-set is_pic
-target x86_64 legacy haswell
-
-; check if for one arg we use the right register
-function %one_arg(i64) windows_fastcall {
-block0(v0: i64):
-    return
-}
-; check:  function %one_arg(i64 [%rcx], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall {
-; nextln: ss0 = incoming_arg 16, offset -16
-; check:  block0(v0: i64 [%rcx], v1: i64 [%rbp]):
-; nextln: x86_push v1
-; nextln: copy_special %rsp -> %rbp
-; nextln: v2 = x86_pop.i64
-; nextln: return v2
-; nextln: }
-
-; check if we still use registers for 4 arguments
-function %four_args(i64, i64, i64, i64) windows_fastcall {
-block0(v0: i64, v1: i64, v2: i64, v3: i64):
-    return
-}
-; check:  function %four_args(i64 [%rcx], i64 [%rdx], i64 [%r8], i64 [%r9], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall {
-; nextln: ss0 = incoming_arg 16, offset -16
-; check:  block0(v0: i64 [%rcx], v1: i64 [%rdx], v2: i64 [%r8], v3: i64 [%r9], v4: i64 [%rbp]):
-; nextln: x86_push v4
-; nextln: copy_special %rsp -> %rbp
-; nextln: v5 = x86_pop.i64
-; nextln: return v5
-; nextln: }
-
-; check if float arguments are passed through XMM registers
-function %four_float_args(f64, f64, f64, f64) windows_fastcall {
-block0(v0: f64, v1: f64, v2: f64, v3: f64):
-    return
-}
-; check:  function %four_float_args(f64 [%xmm0], f64 [%xmm1], f64 [%xmm2], f64 [%xmm3], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall {
-; nextln: ss0 = incoming_arg 16, offset -16
-; check:  block0(v0: f64 [%xmm0], v1: f64 [%xmm1], v2: f64 [%xmm2], v3: f64 [%xmm3], v4: i64 [%rbp]):
-; nextln: x86_push v4
-; nextln: copy_special %rsp -> %rbp
-; nextln: v5 = x86_pop.i64
-; nextln: return v5
-; nextln: }
-
-; check if we use stack space for > 4 arguments
-function %five_args(i64, i64, i64, i64, i64) windows_fastcall {
-block0(v0: i64, v1: i64, v2: i64, v3: i64, v4: i64):
-    return
-}
-; check:  function %five_args(i64 [%rcx], i64 [%rdx], i64 [%r8], i64 [%r9], i64 [32], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall {
-; nextln: ss0 = incoming_arg 8, offset 32
-; nextln: ss1 = incoming_arg 16, offset -16
-; check:  block0(v0: i64 [%rcx], v1: i64 [%rdx], v2: i64 [%r8], v3: i64 [%r9], v4: i64 [ss0], v5: i64 [%rbp]):
-; nextln: x86_push v5
-; nextln: copy_special %rsp -> %rbp
-; nextln: v6 = x86_pop.i64
-; nextln: return v6
-; nextln: }
-
-; check that we preserve xmm6 and above if we're using them locally
-function %float_callee_saves(f64, f64, f64, f64) windows_fastcall {
-block0(v0: f64, v1: f64, v2: f64, v3: f64):
-; explicitly use a callee-save register
-[-, %xmm6]  v4 = fadd v0, v1
-[-, %xmm7]  v5 = fadd v0, v1
-    return
-}
-; check:  function %float_callee_sav(f64 [%xmm0], f64 [%xmm1], f64 [%xmm2], f64 [%xmm3], i64 csr [%rsp], i64 fp [%rbp], f64x2 csr [%xmm6], f64x2 csr [%xmm7]) -> i64 fp [%rbp], f64x2 csr [%xmm6], f64x2 csr [%xmm7] windows_fastcall {
-; nextln: ss0 = incoming_arg 48, offset -48
-; check:  block0(v0: f64 [%xmm0], v1: f64 [%xmm1], v2: f64 [%xmm2], v3: f64 [%xmm3], v6: i64 [%rsp], v7: i64 [%rbp], v8: f64x2 [%xmm6], v9: f64x2 [%xmm7]): 
-; nextln: x86_push v7
-; nextln: copy_special %rsp -> %rbp
-; nextln: adjust_sp_down_imm 32
-; nextln: store notrap aligned v8, v6+16
-; nextln: store notrap aligned v9, v6
-; nextln: v11 = load.f64x2 notrap aligned v6+16
-; nextln: v12 = load.f64x2 notrap aligned v6
-; nextln: adjust_sp_up_imm 32
-; nextln: v10 = x86_pop.i64
-; nextln: return v10, v11, v12
-; nextln: }
-
-function %mixed_int_float(i64, f64, i64, f32) windows_fastcall {
-block0(v0: i64, v1: f64, v2: i64, v3: f32):
-    return
-}
-; check:  function %mixed_int_float(i64 [%rcx], f64 [%xmm1], i64 [%r8], f32 [%xmm3], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall {
-; nextln: ss0 = incoming_arg 16, offset -16
-; check:  block0(v0: i64 [%rcx], v1: f64 [%xmm1], v2: i64 [%r8], v3: f32 [%xmm3], v4: i64 [%rbp]):
-; nextln: x86_push v4
-; nextln: copy_special %rsp -> %rbp
-; nextln: v5 = x86_pop.i64
-; nextln: return v5
-; nextln: }
-
-function %ret_val_float(f32, f64, i64, i64) -> f64 windows_fastcall {
-block0(v0: f32, v1: f64, v2: i64, v3: i64):
-    return v1
-}
-; check:  function %ret_val_float(f32 [%xmm0], f64 [%xmm1], i64 [%r8], i64 [%r9], i64 fp [%rbp]) -> f64 [%xmm0], i64 fp [%rbp] windows_fastcall {
-; nextln: ss0 = incoming_arg 16, offset -16
-; check:  block0(v0: f32 [%xmm0], v1: f64 [%xmm1], v2: i64 [%r8], v3: i64 [%r9], v4: i64 [%rbp]):
-; nextln: x86_push v4
-; nextln: copy_special %rsp -> %rbp
-; nextln: regmove v1, %xmm1 -> %xmm0
-; nextln: v5 = x86_pop.i64
-; nextln: return v1, v5
-; nextln: }
-
-function %ret_val_i128(i64, i64) -> i128 windows_fastcall {
-block0(v0: i64, v1: i64):
-    v2 = iconcat v0, v1
-    return v2
-}
-; check: function %ret_val_i128(i64 [%rdx], i64 [%r8], i64 sret [%rcx], i64 fp [%rbp]) -> i64 sret [%rax], i64 fp [%rbp] windows_fastcall {
-
-; check if i128 is passed by reference
-function %i128_arg(i128) windows_fastcall {
-block0(v0: i128):
-    return
-}
-; check: function %i128_arg(i64 ptr [%rcx], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall {
-
-; check if vector types are passed by reference
-function %i32x4_arg(i32x4) windows_fastcall {
-block0(v0: i32x4):
-    return
-}
-; check: function %i32x4_arg(i64 ptr [%rcx], i64 fp [%rbp]) -> i64 fp [%rbp] windows_fastcall {
-
-function %internal_stack_arg_function_call(i64) -> i64 windows_fastcall {
-  fn0 = %foo(i64, i64, i64, i64) -> i64 windows_fastcall
-  fn1 = %foo2(i64, i64, i64, i64) -> i64 windows_fastcall
-block0(v0: i64):
-    v1 = load.i64 v0+0
-    v2 = load.i64 v0+8
-    v3 = load.i64 v0+16
-    v4 = load.i64 v0+24
-    v5 = load.i64 v0+32
-    v6 = load.i64 v0+40
-    v7 = load.i64 v0+48
-    v8 = load.i64 v0+56
-    v9 = load.i64 v0+64
-    v10 = call fn0(v1, v2, v3, v4)
-    store.i64 v1, v0+8
-    store.i64 v2, v0+16
-    store.i64 v3, v0+24
-    store.i64 v4, v0+32
-    store.i64 v5, v0+40
-    store.i64 v6, v0+48
-    store.i64 v7, v0+56
-    store.i64 v8, v0+64
-    store.i64 v9, v0+72
-    return v10
-}
-; check:  function %internal_stack_a(i64 [%rcx], i64 fp [%rbp], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15]) -> i64 [%rax], i64 fp [%rbp], i64 csr [%r12], i64 csr [%r13], i64 csr [%r14], i64 csr [%r15] windows_fastcall {
-; nextln: ss0 = spill_slot 8, offset -56
-; nextln: ss1 = spill_slot 8, offset -64
-; nextln: ss2 = spill_slot 8, offset -72
-; nextln: ss3 = spill_slot 8, offset -80
-; nextln: ss4 = spill_slot 8, offset -88
-; nextln: ss5 = spill_slot 8, offset -96
-; nextln: ss6 = spill_slot 8, offset -104
-; nextln: ss7 = spill_slot 8, offset -112
-; nextln: ss8 = spill_slot 8, offset -120
-; nextln: ss9 = spill_slot 8, offset -128
-; nextln: ss10 = incoming_arg 48, offset -48
-; nextln: ss11 = explicit_slot 32, offset -160
-; nextln: sig0 = (i64 [%rcx], i64 [%rdx], i64 [%r8], i64 [%r9]) -> i64 [%rax] windows_fastcall
-; nextln: sig1 = (i64 [%rcx], i64 [%rdx], i64 [%r8], i64 [%r9]) -> i64 [%rax] windows_fastcall
-; nextln: fn0 = %foo sig0
-; nextln: fn1 = %foo2 sig1
-; check:  block0(v11: i64 [%rcx], v52: i64 [%rbp], v53: i64 [%r12], v54: i64 [%r13], v55: i64 [%r14], v56: i64 [%r15]):
-; nextln: x86_push v52
-; nextln: copy_special %rsp -> %rbp
-; nextln: x86_push v53
-; nextln: x86_push v54
-; nextln: x86_push v55
-; nextln: x86_push v56
-; nextln: adjust_sp_down_imm 112
-; nextln: v0 = spill v11
-; nextln: v12 = copy_to_ssa.i64 %rcx
-; nextln: v13 = load.i64 v12
-; nextln: v1 = spill v13
-; nextln: v14 = fill_nop v0
-; nextln: v15 = load.i64 v14+8
-; nextln: v2 = spill v15
-; nextln: v16 = fill_nop v0
-; nextln: v17 = load.i64 v16+16
-; nextln: v3 = spill v17
-; nextln: v18 = fill_nop v0
-; nextln: v19 = load.i64 v18+24
-; nextln: v4 = spill v19
-; nextln: v20 = fill_nop v0
-; nextln: v21 = load.i64 v20+32
-; nextln: v5 = spill v21
-; nextln: v22 = fill_nop v0
-; nextln: v23 = load.i64 v22+40
-; nextln: v6 = spill v23
-; nextln: v24 = fill_nop v0
-; nextln: v25 = load.i64 v24+48
-; nextln: v7 = spill v25
-; nextln: v26 = fill_nop v0
-; nextln: v27 = load.i64 v26+56
-; nextln: v8 = spill v27
-; nextln: v28 = fill_nop v0
-; nextln: v29 = load.i64 v28+64
-; nextln: v9 = spill v29
-; nextln: v30 = fill v1
-; nextln: v31 = fill v2
-; nextln: v32 = fill v3
-; nextln: v33 = fill v4
-; nextln: regmove v30, %r15 -> %rcx
-; nextln: regmove v31, %r14 -> %rdx
-; nextln: regmove v32, %r13 -> %r8
-; nextln: regmove v33, %r12 -> %r9
-; nextln: v10 = call fn0(v30, v31, v32, v33)
-; nextln: v34 = fill v1
-; nextln: v35 = fill v0
-; nextln: store v34, v35+8
-; nextln: v36 = fill v2
-; nextln: v37 = fill_nop v0
-; nextln: store v36, v37+16
-; nextln: v38 = fill v3
-; nextln: v39 = fill_nop v0
-; nextln: store v38, v39+24
-; nextln: v40 = fill v4
-; nextln: v41 = fill_nop v0
-; nextln: store v40, v41+32
-; nextln: v42 = fill v5
-; nextln: v43 = fill_nop v0
-; nextln: store v42, v43+40
-; nextln: v44 = fill v6
-; nextln: v45 = fill_nop v0
-; nextln: store v44, v45+48
-; nextln: v46 = fill v7
-; nextln: v47 = fill_nop v0
-; nextln: store v46, v47+56
-; nextln: v48 = fill v8
-; nextln: v49 = fill_nop v0
-; nextln: store v48, v49+64
-; nextln: v50 = fill v9
-; nextln: v51 = fill_nop v0
-; nextln: store v50, v51+72
-; nextln: adjust_sp_up_imm 112
-; nextln: v61 = x86_pop.i64 
-; nextln: v60 = x86_pop.i64 
-; nextln: v59 = x86_pop.i64 
-; nextln: v58 = x86_pop.i64 
-; nextln: v57 = x86_pop.i64 
-; nextln: return v10, v57, v58, v59, v60, v61
-; nextln: }
diff --git a/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64_unwind.clif b/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64_unwind.clif
deleted file mode 100644
index 547e131fbd..0000000000
--- a/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64_unwind.clif
+++ /dev/null
@@ -1,250 +0,0 @@
-test unwind
-set opt_level=speed_and_size
-set is_pic
-target x86_64-windows legacy haswell
-
-; check the unwind information with a leaf function with no args
-function %no_args_leaf() windows_fastcall {
-block0:
-    return
-}
-; sameln:                version: 1
-; nextln:                  flags: 0
-; nextln:          prologue size: 4
-; nextln:         frame register: 0
-; nextln:  frame register offset: 0
-; nextln:           unwind codes: 1
-; nextln:  
-; nextln:                 offset: 1
-; nextln:                     op: PushNonvolatileRegister
-; nextln:                   info: 5
-
-; check the unwind information with a non-leaf function with no args
-function %no_args() windows_fastcall {
-    fn0 = %foo()
-block0:
-    call fn0()
-    return
-}
-; sameln:                version: 1
-; nextln:                  flags: 0
-; nextln:          prologue size: 8
-; nextln:         frame register: 0
-; nextln:  frame register offset: 0
-; nextln:           unwind codes: 2
-; nextln:  
-; nextln:                 offset: 1
-; nextln:                     op: PushNonvolatileRegister
-; nextln:                   info: 5
-; nextln:  
-; nextln:                 offset: 8
-; nextln:                     op: SmallStackAlloc
-; nextln:                   info: 3
-
-; check a function with medium-sized stack alloc
-function %medium_stack() windows_fastcall {
-    ss0 = explicit_slot 100000
-block0:
-    return
-}
-; sameln:               version: 1
-; nextln:                 flags: 0
-; nextln:         prologue size: 17
-; nextln:        frame register: 0
-; nextln: frame register offset: 0
-; nextln:          unwind codes: 2
-; nextln: 
-; nextln:                offset: 1
-; nextln:                    op: PushNonvolatileRegister
-; nextln:                  info: 5
-; nextln:  
-; nextln:                offset: 17
-; nextln:                    op: LargeStackAlloc
-; nextln:                  info: 0
-; nextln:                 value: 12500 (u16)
-
-; check a function with large-sized stack alloc
-function %large_stack() windows_fastcall {
-    ss0 = explicit_slot 524288
-block0:
-    return
-}
-; sameln:                version: 1
-; nextln:                  flags: 0
-; nextln:          prologue size: 17
-; nextln:         frame register: 0
-; nextln:  frame register offset: 0
-; nextln:           unwind codes: 2
-; nextln:  
-; nextln:                 offset: 1
-; nextln:                     op: PushNonvolatileRegister
-; nextln:                   info: 5
-; nextln:  
-; nextln:                 offset: 17
-; nextln:                     op: LargeStackAlloc
-; nextln:                   info: 1
-; nextln:                  value: 524288 (u32)
-
-function %fpr_with_function_call(i64, i64) windows_fastcall {
-    fn0 = %foo(f64, f64, i64, i64, i64) windows_fastcall;
-block0(v0: i64, v1: i64):
-    v2 = load.f64 v0+0
-    v3 = load.f64 v0+8
-    v4 = load.i64 v0+16
-    v15 = load.f64 v0+104
-    v16 = load.f64 v0+112
-    v17 = load.f64 v0+120
-    v18 = load.f64 v0+128
-    v19 = load.f64 v0+136
-    v20 = load.f64 v0+144
-    v21 = load.f64 v0+152
-    v22 = load.f64 v0+160
-    v23 = load.f64 v0+168
-    call fn0(v2, v3, v4, v1, v1)
-    store.f64 v15, v1+104
-    store.f64 v16, v1+112
-    store.f64 v17, v1+120
-    store.f64 v18, v1+128
-    store.f64 v19, v1+136
-    store.f64 v20, v1+144
-    store.f64 v21, v1+152
-    store.f64 v22, v1+160
-    store.f64 v23, v1+168
-    return
-}
-; Only check the first unwind code here because this test specifically looks to
-; see that in a function that is not a leaf, a callee-save FPR is stored in an
-; area that does not overlap either the callee's shadow space or stack argument
-; space.
-;
-; sameln:                version: 1
-; nextln:                  flags: 0 
-; nextln:          prologue size: 22
-; nextln:         frame register: 0
-; nextln:  frame register offset: 0
-; nextln:           unwind codes: 4
-; nextln:  
-; nextln:                 offset: 1
-; nextln:                     op: PushNonvolatileRegister
-; nextln:                   info: 5
-; nextln:  
-; nextln:                 offset: 6
-; nextln:                     op: PushNonvolatileRegister
-; nextln:                   info: 15
-; nextln:  
-; nextln:                 offset: 13
-; nextln:                     op: LargeStackAlloc
-; nextln:                   info: 0
-; nextln:                  value: 23 (u16)
-; nextln:  
-; nextln:                 offset: 22
-; nextln:                     op: SaveXmm128
-; nextln:                   info: 15
-; nextln:                  value: 10 (u16)
-
-; check a function that has CSRs
-function %lots_of_registers(i64, i64) windows_fastcall {
-block0(v0: i64, v1: i64):
-    v2 = load.i32 v0+0
-    v3 = load.i32 v0+8
-    v4 = load.i32 v0+16
-    v5 = load.i32 v0+24
-    v6 = load.i32 v0+32
-    v7 = load.i32 v0+40
-    v8 = load.i32 v0+48
-    v9 = load.i32 v0+56
-    v10 = load.i32 v0+64
-    v11 = load.i32 v0+72
-    v12 = load.i32 v0+80
-    v13 = load.i32 v0+88
-    v14 = load.i32 v0+96
-    v15 = load.f64 v0+104
-    v16 = load.f64 v0+112
-    v17 = load.f64 v0+120
-    v18 = load.f64 v0+128
-    v19 = load.f64 v0+136
-    v20 = load.f64 v0+144
-    v21 = load.f64 v0+152
-    v22 = load.f64 v0+160
-    v23 = load.f64 v0+168
-    store.i32 v2, v1+0
-    store.i32 v3, v1+8
-    store.i32 v4, v1+16
-    store.i32 v5, v1+24
-    store.i32 v6, v1+32
-    store.i32 v7, v1+40
-    store.i32 v8, v1+48
-    store.i32 v9, v1+56
-    store.i32 v10, v1+64
-    store.i32 v11, v1+72
-    store.i32 v12, v1+80
-    store.i32 v13, v1+88
-    store.i32 v14, v1+96
-    store.f64 v15, v1+104
-    store.f64 v16, v1+112
-    store.f64 v17, v1+120
-    store.f64 v18, v1+128
-    store.f64 v19, v1+136
-    store.f64 v20, v1+144
-    store.f64 v21, v1+152
-    store.f64 v22, v1+160
-    store.f64 v23, v1+168
-    return
-}
-; sameln:                version: 1
-; nextln:                  flags: 0
-; nextln:          prologue size: 35
-; nextln:         frame register: 0
-; nextln:  frame register offset: 0
-; nextln:           unwind codes: 12
-; nextln:  
-; nextln:                 offset: 1
-; nextln:                     op: PushNonvolatileRegister
-; nextln:                   info: 5
-; nextln:  
-; nextln:                 offset: 5
-; nextln:                     op: PushNonvolatileRegister
-; nextln:                   info: 3
-; nextln:  
-; nextln:                 offset: 6
-; nextln:                     op: PushNonvolatileRegister
-; nextln:                   info: 6
-; nextln:  
-; nextln:                 offset: 7
-; nextln:                     op: PushNonvolatileRegister
-; nextln:                   info: 7
-; nextln:  
-; nextln:                 offset: 9
-; nextln:                     op: PushNonvolatileRegister
-; nextln:                   info: 12
-; nextln:  
-; nextln:                 offset: 11
-; nextln:                     op: PushNonvolatileRegister
-; nextln:                   info: 13
-; nextln:  
-; nextln:                 offset: 13
-; nextln:                     op: PushNonvolatileRegister
-; nextln:                   info: 14
-; nextln:  
-; nextln:                 offset: 15
-; nextln:                     op: PushNonvolatileRegister
-; nextln:                   info: 15
-; nextln:  
-; nextln:                 offset: 19
-; nextln:                     op: SmallStackAlloc
-; nextln:                   info: 8
-; nextln:  
-; nextln:                 offset: 24
-; nextln:                     op: SaveXmm128
-; nextln:                   info: 6
-; nextln:                  value: 3 (u16)
-; nextln:  
-; nextln:                 offset: 29
-; nextln:                     op: SaveXmm128
-; nextln:                   info: 7
-; nextln:                  value: 2 (u16)
-; nextln:  
-; nextln:                 offset: 35
-; nextln:                     op: SaveXmm128
-; nextln:                   info: 8
-; nextln:                  value: 1 (u16)
diff --git a/cranelift/filetests/filetests/legalizer/bitrev-i128.clif b/cranelift/filetests/filetests/legalizer/bitrev-i128.clif
deleted file mode 100644
index b58bf9bcb5..0000000000
--- a/cranelift/filetests/filetests/legalizer/bitrev-i128.clif
+++ /dev/null
@@ -1,89 +0,0 @@
-test legalizer
-target x86_64 legacy
-
-function %reverse_bits(i128) -> i128 {
-block0(v0: i128):
-    v1 = bitrev.i128 v0
-    return v1
-}
-
-; check: block0(v2: i64, v3: i64):
-; check: v0 = iconcat v2, v3
-; check: v33 = iconst.i64 0xaaaa_aaaa_aaaa_aaaa
-; check: v6 = band v2, v33
-; check: v7 = ushr_imm v6, 1
-; check: v34 = iconst.i64 0x5555_5555_5555_5555
-; check: v8 = band v2, v34
-; check: v9 = ishl_imm v8, 1
-; check: v10 = bor v7, v9
-; check: v35 = iconst.i64 0xcccc_cccc_cccc_cccc
-; check: v11 = band v10, v35
-; check: v12 = ushr_imm v11, 2
-; check: v36 = iconst.i64 0x3333_3333_3333_3333
-; check: v13 = band v10, v36
-; check: v14 = ishl_imm v13, 2
-; check: v15 = bor v12, v14
-; check: v37 = iconst.i64 0xf0f0_f0f0_f0f0_f0f0
-; check: v16 = band v15, v37
-; check: v17 = ushr_imm v16, 4
-; check: v38 = iconst.i64 0x0f0f_0f0f_0f0f_0f0f
-; check: v18 = band v15, v38
-; check: v19 = ishl_imm v18, 4
-; check: v20 = bor v17, v19
-; check: v39 = iconst.i64 0xff00_ff00_ff00_ff00
-; check: v21 = band v20, v39
-; check: v22 = ushr_imm v21, 8
-; check: v40 = iconst.i64 0x00ff_00ff_00ff_00ff
-; check: v23 = band v20, v40
-; check: v24 = ishl_imm v23, 8
-; check: v25 = bor v22, v24
-; check: v41 = iconst.i64 0xffff_0000_ffff_0000
-; check: v26 = band v25, v41
-; check: v27 = ushr_imm v26, 16
-; check: v42 = iconst.i64 0xffff_0000_ffff
-; check: v28 = band v25, v42
-; check: v29 = ishl_imm v28, 16
-; check: v30 = bor v27, v29
-; check: v31 = ushr_imm v30, 32
-; check: v32 = ishl_imm v30, 32
-; check: v4 = bor v31, v32
-; check: v70 = iconst.i64 0xaaaa_aaaa_aaaa_aaaa
-; check: v43 = band v3, v70
-; check: v44 = ushr_imm v43, 1
-; check: v71 = iconst.i64 0x5555_5555_5555_5555
-; check: v45 = band v3, v71
-; check: v46 = ishl_imm v45, 1
-; check: v47 = bor v44, v46
-; check: v72 = iconst.i64 0xcccc_cccc_cccc_cccc
-; check: v48 = band v47, v72
-; check: v49 = ushr_imm v48, 2
-; check: v73 = iconst.i64 0x3333_3333_3333_3333
-; check: v50 = band v47, v73
-; check: v51 = ishl_imm v50, 2
-; check: v52 = bor v49, v51
-; check: v74 = iconst.i64 0xf0f0_f0f0_f0f0_f0f0
-; check: v53 = band v52, v74
-; check: v54 = ushr_imm v53, 4
-; check: v75 = iconst.i64 0x0f0f_0f0f_0f0f_0f0f
-; check: v55 = band v52, v75
-; check: v56 = ishl_imm v55, 4
-; check: v57 = bor v54, v56
-; check: v76 = iconst.i64 0xff00_ff00_ff00_ff00
-; check: v58 = band v57, v76
-; check: v59 = ushr_imm v58, 8
-; check: v77 = iconst.i64 0x00ff_00ff_00ff_00ff
-; check: v60 = band v57, v77
-; check: v61 = ishl_imm v60, 8
-; check: v62 = bor v59, v61
-; check: v78 = iconst.i64 0xffff_0000_ffff_0000
-; check: v63 = band v62, v78
-; check: v64 = ushr_imm v63, 16
-; check: v79 = iconst.i64 0xffff_0000_ffff
-; check: v65 = band v62, v79
-; check: v66 = ishl_imm v65, 16
-; check: v67 = bor v64, v66
-; check: v68 = ushr_imm v67, 32
-; check: v69 = ishl_imm v67, 32
-; check: v5 = bor v68, v69
-; check: v1 = iconcat v5, v4
-; check: return v5, v4
diff --git a/cranelift/filetests/filetests/legalizer/bitrev.clif b/cranelift/filetests/filetests/legalizer/bitrev.clif
deleted file mode 100644
index 6c9ead0fe2..0000000000
--- a/cranelift/filetests/filetests/legalizer/bitrev.clif
+++ /dev/null
@@ -1,206 +0,0 @@
-test legalizer
-target x86_64 legacy
-
-function %reverse_bits_8(i8) -> i8 {
-block0(v0: i8):
-    v1 = bitrev.i8 v0
-    return v1
-}
-; check: v16 = uextend.i32 v0
-; check: v17 = band_imm v16, 170
-; check: v2 = ireduce.i8 v17
-; check: v18 = uextend.i32 v2
-; check: v19 = ushr_imm v18, 1
-; check: v3 = ireduce.i8 v19
-; check: v20 = uextend.i32 v0
-; check: v21 = band_imm v20, 85
-; check: v4 = ireduce.i8 v21
-; check: v22 = uextend.i32 v4
-; check: v23 = ishl_imm v22, 1
-; check: v5 = ireduce.i8 v23
-; check: v24 = uextend.i32 v3
-; check: v25 = uextend.i32 v5
-; check: v26 = bor v24, v25
-; check: v6 = ireduce.i8 v26
-; check: v27 = uextend.i32 v6
-; check: v28 = band_imm v27, 204
-; check: v7 = ireduce.i8 v28
-; check: v29 = uextend.i32 v7
-; check: v30 = ushr_imm v29, 2
-; check: v8 = ireduce.i8 v30
-; check: v31 = uextend.i32 v6
-; check: v32 = band_imm v31, 51
-; check: v9 = ireduce.i8 v32
-; check: v33 = uextend.i32 v9
-; check: v34 = ishl_imm v33, 2
-; check: v10 = ireduce.i8 v34
-; check: v35 = uextend.i32 v8
-; check: v36 = uextend.i32 v10
-; check: v37 = bor v35, v36
-; check: v11 = ireduce.i8 v37
-; check: v38 = uextend.i32 v11
-; check: v39 = band_imm v38, 240
-; check: v12 = ireduce.i8 v39
-; check: v40 = uextend.i32 v12
-; check: v41 = ushr_imm v40, 4
-; check: v13 = ireduce.i8 v41
-; check: v42 = uextend.i32 v11
-; check: v43 = band_imm v42, 15
-; check: v14 = ireduce.i8 v43
-; check: v44 = uextend.i32 v14
-; check: v45 = ishl_imm v44, 4
-; check: v15 = ireduce.i8 v45
-; check: v46 = uextend.i32 v13
-; check: v47 = uextend.i32 v15
-; check: v48 = bor v46, v47
-; check: v1 = ireduce.i8 v48
-; check: return v1
-
-function %reverse_bits_16(i16) -> i16 {
-block0(v0: i16):
-    v1 = bitrev.i16 v0
-    return v1
-}
-; check: v21 = uextend.i32 v0
-; check: v22 = band_imm v21, 0xaaaa
-; check: v2 = ireduce.i16 v22
-; check: v23 = uextend.i32 v2
-; check: v24 = ushr_imm v23, 1
-; check: v3 = ireduce.i16 v24
-; check: v25 = uextend.i32 v0
-; check: v26 = band_imm v25, 0x5555
-; check: v4 = ireduce.i16 v26
-; check: v27 = uextend.i32 v4
-; check: v28 = ishl_imm v27, 1
-; check: v5 = ireduce.i16 v28
-; check: v29 = uextend.i32 v3
-; check: v30 = uextend.i32 v5
-; check: v31 = bor v29, v30
-; check: v6 = ireduce.i16 v31
-; check: v32 = uextend.i32 v6
-; check: v33 = band_imm v32, 0xcccc
-; check: v7 = ireduce.i16 v33
-; check: v34 = uextend.i32 v7
-; check: v35 = ushr_imm v34, 2
-; check: v8 = ireduce.i16 v35
-; check: v36 = uextend.i32 v6
-; check: v37 = band_imm v36, 0x3333
-; check: v9 = ireduce.i16 v37
-; check: v38 = uextend.i32 v9
-; check: v39 = ishl_imm v38, 2
-; check: v10 = ireduce.i16 v39
-; check: v40 = uextend.i32 v8
-; check: v41 = uextend.i32 v10
-; check: v42 = bor v40, v41
-; check: v11 = ireduce.i16 v42
-; check: v43 = uextend.i32 v11
-; check: v44 = band_imm v43, 0xf0f0
-; check: v12 = ireduce.i16 v44
-; check: v45 = uextend.i32 v12
-; check: v46 = ushr_imm v45, 4
-; check: v13 = ireduce.i16 v46
-; check: v47 = uextend.i32 v11
-; check: v48 = band_imm v47, 3855
-; check: v14 = ireduce.i16 v48
-; check: v49 = uextend.i32 v14
-; check: v50 = ishl_imm v49, 4
-; check: v15 = ireduce.i16 v50
-; check: v51 = uextend.i32 v13
-; check: v52 = uextend.i32 v15
-; check: v53 = bor v51, v52
-; check: v16 = ireduce.i16 v53
-; check: v54 = uextend.i32 v16
-; check: v55 = band_imm v54, 0xff00
-; check: v17 = ireduce.i16 v55
-; check: v56 = uextend.i32 v17
-; check: v57 = ushr_imm v56, 8
-; check: v18 = ireduce.i16 v57
-; check: v58 = uextend.i32 v16
-; check: v59 = band_imm v58, 255
-; check: v19 = ireduce.i16 v59
-; check: v60 = uextend.i32 v19
-; check: v61 = ishl_imm v60, 8
-; check: v20 = ireduce.i16 v61
-; check: v62 = uextend.i32 v18
-; check: v63 = uextend.i32 v20
-; check: v64 = bor v62, v63
-; check: v1 = ireduce.i16 v64
-; check: return v1
-
-function %reverse_bits_32(i32) -> i32 {
-block0(v0: i32):
-    v1 = bitrev.i32 v0
-    return v1
-}
-; check: v24 = iconst.i32 0xaaaa_aaaa
-; check: v2 = band v0, v24
-; check: v3 = ushr_imm v2, 1
-; check: v4 = band_imm v0, 0x5555_5555
-; check: v5 = ishl_imm v4, 1
-; check: v6 = bor v3, v5
-; check: v25 = iconst.i32 0xcccc_cccc
-; check: v7 = band v6, v25
-; check: v8 = ushr_imm v7, 2
-; check: v9 = band_imm v6, 0x3333_3333
-; check: v10 = ishl_imm v9, 2
-; check: v11 = bor v8, v10
-; check: v26 = iconst.i32 0xf0f0_f0f0
-; check: v12 = band v11, v26
-; check: v13 = ushr_imm v12, 4
-; check: v14 = band_imm v11, 0x0f0f_0f0f
-; check: v15 = ishl_imm v14, 4
-; check: v16 = bor v13, v15
-; check: v27 = iconst.i32 0xff00_ff00
-; check: v17 = band v16, v27
-; check: v18 = ushr_imm v17, 8
-; check: v19 = band_imm v16, 0x00ff_00ff
-; check: v20 = ishl_imm v19, 8
-; check: v21 = bor v18, v20
-; check: v22 = ushr_imm v21, 16
-; check: v23 = ishl_imm v21, 16
-; check: v1 = bor v22, v23
-
-
-function %reverse_bits_64(i64) -> i64 {
-block0(v0: i64):
-    v1 = bitrev.i64 v0
-    return v1
-}
-; check: v29 = iconst.i64 0xaaaa_aaaa_aaaa_aaaa
-; check: v2 = band v0, v29
-; check: v3 = ushr_imm v2, 1
-; check: v30 = iconst.i64 0x5555_5555_5555_5555
-; check: v4 = band v0, v30
-; check: v5 = ishl_imm v4, 1
-; check: v6 = bor v3, v5
-; check: v31 = iconst.i64 0xcccc_cccc_cccc_cccc
-; check: v7 = band v6, v31
-; check: v8 = ushr_imm v7, 2
-; check: v32 = iconst.i64 0x3333_3333_3333_3333
-; check: v9 = band v6, v32
-; check: v10 = ishl_imm v9, 2
-; check: v11 = bor v8, v10
-; check: v33 = iconst.i64 0xf0f0_f0f0_f0f0_f0f0
-; check: v12 = band v11, v33
-; check: v13 = ushr_imm v12, 4
-; check: v34 = iconst.i64 0x0f0f_0f0f_0f0f_0f0f
-; check: v14 = band v11, v34
-; check: v15 = ishl_imm v14, 4
-; check: v16 = bor v13, v15
-; check: v35 = iconst.i64 0xff00_ff00_ff00_ff00
-; check: v17 = band v16, v35
-; check: v18 = ushr_imm v17, 8
-; check: v36 = iconst.i64 0x00ff_00ff_00ff_00ff
-; check: v19 = band v16, v36
-; check: v20 = ishl_imm v19, 8
-; check: v21 = bor v18, v20
-; check: v37 = iconst.i64 0xffff_0000_ffff_0000
-; check: v22 = band v21, v37
-; check: v23 = ushr_imm v22, 16
-; check: v38 = iconst.i64 0xffff_0000_ffff
-; check: v24 = band v21, v38
-; check: v25 = ishl_imm v24, 16
-; check: v26 = bor v23, v25
-; check: v27 = ushr_imm v26, 32
-; check: v28 = ishl_imm v26, 32
-; check: v1 = bor v27, v28
diff --git a/cranelift/filetests/filetests/legalizer/br_table_cond.clif b/cranelift/filetests/filetests/legalizer/br_table_cond.clif
deleted file mode 100644
index db464ae4d4..0000000000
--- a/cranelift/filetests/filetests/legalizer/br_table_cond.clif
+++ /dev/null
@@ -1,64 +0,0 @@
-test legalizer
-set enable_probestack=false
-set enable_jump_tables=false
-target x86_64 legacy
-
-; Test that when jump_tables_enables is false, all jump tables are eliminated.
-; regex: V=v\d+
-; regex: BB=block\d+
-
-function u0:0(i64 vmctx) baldrdash_system_v {
-    gv0 = vmctx
-    gv1 = iadd_imm.i64 gv0, 48
-    jt0 = jump_table [block2, block2, block7]
-    jt1 = jump_table [block8, block8]
-
-block0(v0: i64):
-    jump block5
-
-block5:
-    v1 = global_value.i64 gv1
-    v2 = load.i64 v1
-    trapnz v2, interrupt
-    v3 = iconst.i32 0
-    br_table v3, block3, jt0
-; check:  block5:
-; check:    $(val0=$V) = iconst.i32 0
-; nextln:   $(cmp0=$V) = icmp_imm eq $val0, 0
-; nextln:   brnz $cmp0, block2
-; nextln:   jump $(fail0=$BB)
-; check:  $fail0:
-; nextln:   $(cmp1=$V) = icmp_imm.i32 eq $val0, 1
-; nextln:   brnz $cmp1, block2
-; nextln:   jump $(fail1=$BB)
-; check:  $fail1:
-; nextln:   $(cmp2=$V) = icmp_imm.i32 eq $val0, 2
-; nextln:   brnz $cmp2, block7
-; nextln:   jump block3
-
-block7:
-    v4 = iconst.i32 0
-    br_table v4, block3, jt1
-; check:  block7:
-; check:    $(val1=$V) = iconst.i32 0
-; nextln:   $(cmp3=$V) = icmp_imm eq $val1, 0
-; nextln:   brnz $cmp3, block8
-; nextln:   jump $(fail3=$BB)
-; check:  $fail3:
-; nextln:   $(cmp4=$V) = icmp_imm.i32 eq $val1, 1
-; nextln:   brnz $cmp4, block8
-; nextln:   jump block3
-
-block8:
-    jump block5
-
-block3:
-    jump block2
-
-block2:
-    jump block1
-
-block1:
-    fallthrough_return
-}
-; not: jump_table
diff --git a/cranelift/filetests/filetests/legalizer/empty_br_table.clif b/cranelift/filetests/filetests/legalizer/empty_br_table.clif
deleted file mode 100644
index d320155470..0000000000
--- a/cranelift/filetests/filetests/legalizer/empty_br_table.clif
+++ /dev/null
@@ -1,17 +0,0 @@
-test legalizer
-set enable_probestack=false
-set enable_jump_tables=false
-target x86_64 legacy
-
-function u0:0(i64) {
-    jt0 = jump_table []
-
-block0(v0: i64):
-    br_table v0, block1, jt0
-; check:  block0(v0: i64):
-; nextln:   jump block1
-
-block1:
-    return
-}
-; not: jump_table
diff --git a/cranelift/filetests/filetests/legalizer/icmp_imm_i128.clif b/cranelift/filetests/filetests/legalizer/icmp_imm_i128.clif
deleted file mode 100644
index 6d72cc6499..0000000000
--- a/cranelift/filetests/filetests/legalizer/icmp_imm_i128.clif
+++ /dev/null
@@ -1,23 +0,0 @@
-test legalizer
-target x86_64 legacy
-
-function %icmp_imm_i128(i128) -> i8 {
-block0(v0: i128):
-    v1 = icmp_imm.i128 eq v0, 1
-    v2 = bint.i8 v1
-    return v2
-}
-
-; check: function %icmp_imm_i128(i64 [%rdi], i64 [%rsi]) -> i8 [%rax] fast {
-; nextln:                                 block0(v3: i64, v4: i64):
-; nextln:                                     v7 -> v3
-; nextln:                                     v8 -> v4
-; nextln: [-]                                 v0 = iconcat v3, v4
-; nextln: [RexOp1pu_id#b8]                    v5 = iconst.i64 1
-; nextln: [RexOp1pu_id#b8]                    v6 = iconst.i64 0
-; nextln: [RexOp1icscc#8039]                  v9 = icmp eq v7, v5
-; nextln: [RexOp1icscc#8039]                  v10 = icmp eq v8, v6
-; nextln: [RexOp1rr#21]                       v1 = band v9, v10
-; nextln: [RexOp2urm_noflags#4b6]             v2 = bint.i8 v1
-; nextln: [Op1ret#c3]                         return v2
-; nextln: }
diff --git a/cranelift/filetests/filetests/legalizer/pass_by_ref.clif b/cranelift/filetests/filetests/legalizer/pass_by_ref.clif
deleted file mode 100644
index 141330cf01..0000000000
--- a/cranelift/filetests/filetests/legalizer/pass_by_ref.clif
+++ /dev/null
@@ -1,31 +0,0 @@
-test legalizer
-target x86_64 legacy
-
-function %legalize_entry(i128) -> i64 windows_fastcall {
-block0(v0: i128):
-    v1, v2 = isplit v0
-    return v2
-}
-; check:  function %legalize_entry(i64 ptr [%rcx]) -> i64 [%rax] windows_fastcall {
-; nextln: block0(v3: i64):
-; nextln:     v4 = load.i64 v3
-; nextln:     v1 -> v4
-; nextln:     v5 = load.i64 v3+8
-; nextln:     v2 -> v5
-; nextln:     v0 = iconcat v4, v5
-; nextln:     return v2
-
-function %legalize_call() {
-    fn0 = %foo(i32x4) windows_fastcall
-block0:
-    v0 = vconst.i32x4 [1 2 3 4]
-    call fn0(v0)
-    return
-}
-; check:  ss0 = explicit_slot 16
-; check:  sig0 = (i64 ptr [%rcx]) windows_fastcall
-; check:  v0 = vconst.i32x4 const0
-; nextln: v1 = stack_addr.i64 ss0
-; nextln: store v0, v1
-; nextln: v2 = func_addr.i64 fn0
-; nextln: call_indirect sig0, v2(v1)
diff --git a/cranelift/filetests/filetests/legalizer/popcnt-i128.clif b/cranelift/filetests/filetests/legalizer/popcnt-i128.clif
deleted file mode 100644
index 8976ad0e25..0000000000
--- a/cranelift/filetests/filetests/legalizer/popcnt-i128.clif
+++ /dev/null
@@ -1,21 +0,0 @@
-test legalizer
-target x86_64 legacy haswell
-
-function %foo() -> i128 {
-block0:
-    v1 = iconst.i64 0x6400000042
-    v2 = iconst.i64 0x7F10100042
-    v3 = iconcat v1, v2
-    v4 = popcnt.i128 v3
-    return v4
-}
-
-; check: v1 = iconst.i64 0x0064_0000_0042
-; check: v2 = iconst.i64 0x007f_1010_0042
-; check: v3 = iconcat v1, v2
-; check: v5 = popcnt v1
-; check: v6 = popcnt v2
-; check: v7 = iadd v5, v6
-; check: v8 = iconst.i64 0
-; check: v4 = iconcat v7, v8
-; check: return v7, v8
diff --git a/cranelift/filetests/filetests/peepmatic/simplify_instruction_into_alias_of_value.clif b/cranelift/filetests/filetests/peepmatic/simplify_instruction_into_alias_of_value.clif
index bb21ec2553..a7c059f6c0 100644
--- a/cranelift/filetests/filetests/peepmatic/simplify_instruction_into_alias_of_value.clif
+++ b/cranelift/filetests/filetests/peepmatic/simplify_instruction_into_alias_of_value.clif
@@ -7,12 +7,15 @@ target x86_64
 ;; we need to make an alias `v3 -> v2`.
 
 function %replace_inst_with_alias() -> i32 {
+    sig0 = (i32, i32) -> i32, i32
+    fn0 = u0:0 sig0
+
 block0:
     v0 = iconst.i32 0
-    v1, v2 = x86_smulx v0, v0
+    v1, v2 = call fn0(v0, v0)
     v3 = isub v2, v0
     ; check:  v0 = iconst.i32 0
-    ; nextln: v1, v2 = x86_smulx v0, v0
+    ; nextln: v1, v2 = call fn0(v0, v0)
     ; nextln: v3 -> v2
     return v3
 }
diff --git a/cranelift/filetests/filetests/postopt/basic.clif b/cranelift/filetests/filetests/postopt/basic.clif
deleted file mode 100644
index 55a8d03738..0000000000
--- a/cranelift/filetests/filetests/postopt/basic.clif
+++ /dev/null
@@ -1,125 +0,0 @@
-test postopt
-target aarch64
-target i686 legacy
-
-; Test that compare+branch sequences are folded effectively on x86.
-
-function %br_icmp(i32, i32) -> i32 {
-block0(v0: i32, v1: i32):
-[DynRexOp1icscc#39,%rdx]  v2 = icmp slt v0, v1
-[Op1t8jccd_long#85]       brnz v2, block1
-[Op1jmpb#eb]              jump block2
-
-block2:
-[Op1ret#c3]         return v1
-
-block1:
-[Op1pu_id#b8,%rax]  v8 = iconst.i32 3
-[Op1ret#c3]         return v8
-}
-; sameln: function %br_icmp
-; nextln: block0(v0: i32, v1: i32):
-; nextln:    v9 = ifcmp v0, v1
-; nextln:    v2 = trueif slt v9
-; nextln:    brif slt v9, block1
-; nextln:    jump block2
-; nextln: 
-; nextln: block2:
-; nextln:    return v1
-; nextln: 
-; nextln: block1:
-; nextln:    v8 = iconst.i32 3
-; nextln:    return v8
-; nextln: }
-
-; Use brz instead of brnz, so the condition is inverted.
-
-function %br_icmp_inverse(i32, i32) -> i32 {
-block0(v0: i32, v1: i32):
-[DynRexOp1icscc#39,%rdx]  v2 = icmp slt v0, v1
-[Op1t8jccd_long#84]       brz v2, block1
-[Op1jmpb#eb]              jump block2
-
-block2:
-[Op1ret#c3]         return v1
-
-block1:
-[Op1pu_id#b8,%rax]  v8 = iconst.i32 3
-[Op1ret#c3]         return v8
-}
-; sameln: function %br_icmp_inverse
-; nextln: block0(v0: i32, v1: i32):
-; nextln:    v9 = ifcmp v0, v1
-; nextln:    v2 = trueif slt v9
-; nextln:    brif sge v9, block1
-; nextln:    jump block2
-; nextln: 
-; nextln: block2:
-; nextln:    return v1
-; nextln: 
-; nextln: block1:
-; nextln:    v8 = iconst.i32 3
-; nextln:    return v8
-; nextln: }
-
-; Use icmp_imm instead of icmp.
-
-function %br_icmp_imm(i32, i32) -> i32 {
-block0(v0: i32, v1: i32):
-[DynRexOp1icscc_ib#7083]  v2 = icmp_imm slt v0, 2
-[Op1t8jccd_long#84]       brz v2, block1
-[Op1jmpb#eb]              jump block2
-
-block2:
-[Op1ret#c3]         return v1
-
-block1:
-[Op1pu_id#b8,%rax]  v8 = iconst.i32 3
-[Op1ret#c3]         return v8
-}
-; sameln: function %br_icmp_imm
-; nextln: block0(v0: i32, v1: i32):
-; nextln:    v9 = ifcmp_imm v0, 2
-; nextln:    v2 = trueif slt v9
-; nextln:    brif sge v9, block1
-; nextln:    jump block2
-; nextln: 
-; nextln: block2:
-; nextln:    return v1
-; nextln: 
-; nextln: block1:
-; nextln:    v8 = iconst.i32 3
-; nextln:    return v8
-; nextln: }
-
-; Use fcmp instead of icmp.
-
-function %br_fcmp(f32, f32) -> f32 {
-block0(v0: f32, v1: f32):
-[Op2fcscc#42e,%rdx] v2 = fcmp gt v0, v1
-[Op1t8jccd_long#84] brz v2, block1
-[Op1jmpb#eb]        jump block2
-
-block2:
-[Op1ret#c3]         return v1
-
-block1:
-[Op1pu_id#b8,%rax]   v18 = iconst.i32 0x40a8_0000
-[Mp2frurm#56e,%xmm0] v8 = bitcast.f32 v18
-[Op1ret#c3]         return v8
-}
-; sameln: function %br_fcmp
-; nextln: block0(v0: f32, v1: f32):
-; nextln:    v19 = ffcmp v0, v1
-; nextln:    v2 = trueff gt v19
-; nextln:    brff ule v19, block1
-; nextln:    jump block2
-; nextln: 
-; nextln: block2:
-; nextln:    return v1
-; nextln: 
-; nextln: block1:
-; nextln:    v18 = iconst.i32 0x40a8_0000
-; nextln:    v8 = bitcast.f32 v18
-; nextln:    return v8
-; nextln: }
diff --git a/cranelift/filetests/filetests/postopt/complex_memory_ops.clif b/cranelift/filetests/filetests/postopt/complex_memory_ops.clif
deleted file mode 100644
index acedb71087..0000000000
--- a/cranelift/filetests/filetests/postopt/complex_memory_ops.clif
+++ /dev/null
@@ -1,94 +0,0 @@
-test postopt
-target x86_64 legacy
-
-function %dual_loads(i64, i64) -> i64 {
-block0(v0: i64, v1: i64):
-[RexOp1rr#8001]    v3 = iadd v0, v1
-                   v4 = load.i64 v3
-                   v5 = uload8.i64 v3
-                   v6 = sload8.i64 v3
-                   v7 = uload16.i64 v3
-                   v8 = sload16.i64 v3
-                   v9 = uload32.i64 v3
-                   v10 = sload32.i64 v3
-[Op1ret#c3]        return v10
-}
-
-; sameln: function %dual_loads
-; nextln: block0(v0: i64, v1: i64):
-; nextln:    v3 = iadd v0, v1
-; nextln:    v4 = load_complex.i64 v0+v1
-; nextln:    v5 = uload8_complex.i64 v0+v1
-; nextln:    v6 = sload8_complex.i64 v0+v1
-; nextln:    v7 = uload16_complex.i64 v0+v1
-; nextln:    v8 = sload16_complex.i64 v0+v1
-; nextln:    v9 = uload32_complex v0+v1
-; nextln:    v10 = sload32_complex v0+v1
-; nextln:    return v10
-; nextln: }
-
-function %dual_loads2(i64, i64) -> i64 {
-block0(v0: i64, v1: i64):
-[RexOp1rr#8001]    v3 = iadd v0, v1
-                   v4 = load.i64 v3+1
-                   v5 = uload8.i64 v3+1
-                   v6 = sload8.i64 v3+1
-                   v7 = uload16.i64 v3+1
-                   v8 = sload16.i64 v3+1
-                   v9 = uload32.i64 v3+1
-                   v10 = sload32.i64 v3+1
-[Op1ret#c3]        return v10
-}
-
-; sameln: function %dual_loads2
-; nextln: block0(v0: i64, v1: i64):
-; nextln:    v3 = iadd v0, v1
-; nextln:    v4 = load_complex.i64 v0+v1+1
-; nextln:    v5 = uload8_complex.i64 v0+v1+1
-; nextln:    v6 = sload8_complex.i64 v0+v1+1
-; nextln:    v7 = uload16_complex.i64 v0+v1+1
-; nextln:    v8 = sload16_complex.i64 v0+v1+1
-; nextln:    v9 = uload32_complex v0+v1+1
-; nextln:    v10 = sload32_complex v0+v1+1
-; nextln:    return v10
-; nextln: }
-
-function %dual_stores(i64, i64, i64) {
-block0(v0: i64, v1: i64, v2: i64):
-[RexOp1rr#8001]    v3 = iadd v0, v1
-[RexOp1st#8089]    store.i64 v2, v3
-[RexOp1st#88]      istore8.i64 v2, v3
-[RexMp1st#189]     istore16.i64 v2, v3
-[RexOp1st#89]      istore32.i64 v2, v3
-[Op1ret#c3]        return
-}
-
-; sameln: function %dual_stores
-; nextln: block0(v0: i64, v1: i64, v2: i64):
-; nextln:    v3 = iadd v0, v1
-; nextln:    store_complex v2, v0+v1
-; nextln:    istore8_complex v2, v0+v1
-; nextln:    istore16_complex v2, v0+v1
-; nextln:    istore32_complex v2, v0+v1
-; nextln:    return
-; nextln: }
-
-function %dual_stores2(i64, i64, i64) {
-block0(v0: i64, v1: i64, v2: i64):
-[RexOp1rr#8001]         v3 = iadd v0, v1
-[RexOp1stDisp8#8089]    store.i64 v2, v3+1
-[RexOp1stDisp8#88]      istore8.i64 v2, v3+1
-[RexMp1stDisp8#189]     istore16.i64 v2, v3+1
-[RexOp1stDisp8#89]      istore32.i64 v2, v3+1
-[Op1ret#c3]             return
-}
-
-; sameln: function %dual_stores2
-; nextln: block0(v0: i64, v1: i64, v2: i64):
-; nextln:    v3 = iadd v0, v1
-; nextln:    store_complex v2, v0+v1+1
-; nextln:    istore8_complex v2, v0+v1+1
-; nextln:    istore16_complex v2, v0+v1+1
-; nextln:    istore32_complex v2, v0+v1+1
-; nextln:    return
-; nextln: }
diff --git a/cranelift/filetests/filetests/postopt/fold_offset_into_address.clif b/cranelift/filetests/filetests/postopt/fold_offset_into_address.clif
deleted file mode 100644
index 84ddf3b884..0000000000
--- a/cranelift/filetests/filetests/postopt/fold_offset_into_address.clif
+++ /dev/null
@@ -1,32 +0,0 @@
-test postopt
-target x86_64 legacy
-
-; Fold the immediate of an iadd_imm into an address offset.
-
-function u0:0(i64 vmctx) -> i64 {
-block0(v0: i64):
-                     v1 = iadd_imm.i64 v0, 16
-[RexOp1ldDisp8#808b] v2 = load.i64 notrap aligned v1
-[Op1ret#c3]          return v2
-}
-
-; sameln: function u0:0(i64 vmctx) -> i64 fast {
-; nextln: block0(v0: i64):
-; nextln:                                     v1 = iadd_imm v0, 16
-; nextln: [RexOp1ldDisp8#808b]                v2 = load.i64 notrap aligned v0+16
-; nextln: [Op1ret#c3]                         return v2
-; nextln: }
-
-function u0:1(i64, i64 vmctx) {
-block0(v3: i64, v0: i64):
-                     v1 = iadd_imm.i64 v0, 16
-[RexOp1stDisp8#8089] store.i64 notrap aligned v3, v1
-[Op1ret#c3]          return
-}
-
-; sameln: function u0:1(i64, i64 vmctx) fast {
-; nextln: block0(v3: i64, v0: i64):
-; nextln:                                     v1 = iadd_imm v0, 16
-; nextln: [RexOp1stDisp8#8089]                store notrap aligned v3, v0+16
-; nextln: [Op1ret#c3]                         return
-; nextln: }
diff --git a/cranelift/filetests/filetests/regalloc/aliases.clif b/cranelift/filetests/filetests/regalloc/aliases.clif
deleted file mode 100644
index e3dcfbad90..0000000000
--- a/cranelift/filetests/filetests/regalloc/aliases.clif
+++ /dev/null
@@ -1,35 +0,0 @@
-test regalloc
-target x86_64 legacy haswell
-
-function %value_aliases(i32, f32, i64 vmctx) baldrdash_system_v {
-    gv0 = vmctx
-    heap0 = static gv0, min 0x0001_0000, bound 0x0001_0000_0000, offset_guard 0x8000_0000
-
-block0(v0: i32, v1: f32, v2: i64):
-    v3 = iconst.i32 0
-    jump block3(v3)
-
-block3(v4: i32):
-    v5 = heap_addr.i64 heap0, v4, 1
-    v6 = load.f32 v5
-    v7 -> v1
-    v8 = fdiv v6, v7
-    v9 = heap_addr.i64 heap0, v4, 1
-    store v8, v9
-    v10 = iconst.i32 4
-    v11 = iadd v4, v10
-    v12 -> v0
-    v13 = icmp ult v11, v12
-    v14 = bint.i32 v13
-    brnz v14, block3(v11)
-    jump block4
-
-block4:
-    jump block2
-
-block2:
-    jump block1
-
-block1:
-    return
-}
diff --git a/cranelift/filetests/filetests/regalloc/basic.clif b/cranelift/filetests/filetests/regalloc/basic.clif
deleted file mode 100644
index 48111253ae..0000000000
--- a/cranelift/filetests/filetests/regalloc/basic.clif
+++ /dev/null
@@ -1,80 +0,0 @@
-test regalloc
-
-; We can add more ISAs once they have defined encodings.
-target riscv32
-
-; regex: RX=%x\d+
-
-function %add(i32, i32) {
-block0(v1: i32, v2: i32):
-    v3 = iadd v1, v2
-; check: [R#0c,%x5]
-; sameln: iadd
-    return
-}
-
-; Function with a dead argument.
-function %dead_arg(i32, i32) -> i32{
-block0(v1: i32, v2: i32):
-; not: regmove
-; check: return v1
-    return v1
-}
-
-; Return a value from a different register.
-function %move1(i32, i32) -> i32 {
-block0(v1: i32, v2: i32):
-; not: regmove
-; check: regmove v2, %x11 -> %x10
-; nextln: return v2
-        return v2
-}
-
-; Swap two registers.
-function %swap(i32, i32) -> i32, i32 {
-block0(v1: i32, v2: i32):
-; not: regmove
-; check: regmove v2, %x11 -> $(tmp=$RX)
-; nextln: regmove v1, %x10 -> %x11
-; nextln: regmove v2, $tmp -> %x10
-; nextln: return v2, v1
-        return v2, v1
-}
-
-; Return a block argument.
-function %retblock(i32, i32) -> i32 {
-block0(v1: i32, v2: i32):
-    brnz v1, block1(v1)
-    jump block1(v2)
-
-block1(v10: i32):
-    return v10
-}
-
-; Pass a block argument as a function argument.
-function %callblock(i32, i32) -> i32 {
-    fn0 = %foo(i32) -> i32
-
-block0(v1: i32, v2: i32):
-    brnz v1, block1(v1)
-    jump block1(v2)
-
-block1(v10: i32):
-    v11 = call fn0(v10)
-    return v11
-}
-
-; Pass a block argument as a jump argument.
-function %jumpblock(i32, i32) -> i32 {
-    fn0 = %foo(i32) -> i32
-
-block0(v1: i32, v2: i32):
-    brnz v1, block1(v1, v2)
-    jump block1(v2, v1)
-
-block1(v10: i32, v11: i32):
-    jump block2(v10, v11)
-
-block2(v20: i32, v21: i32):
-    return v21
-}
diff --git a/cranelift/filetests/filetests/regalloc/coalesce.clif b/cranelift/filetests/filetests/regalloc/coalesce.clif
deleted file mode 100644
index 48395da1b3..0000000000
--- a/cranelift/filetests/filetests/regalloc/coalesce.clif
+++ /dev/null
@@ -1,157 +0,0 @@
-test regalloc
-target riscv32
-
-; Test the coalescer.
-; regex: V=v\d+
-; regex: WS=\s+
-; regex: LOC=%\w+
-; regex: BB=block\d+
-
-; This function is already CSSA, so no copies should be inserted.
-function %cssa(i32) -> i32 {
-block0(v0: i32):
-    ; not: copy
-    ; v0 is used by the branch and passed as an arg - that's no conflict.
-    brnz v0, block1(v0)
-    jump block2
-
-block2:
-    ; v0 is live across the branch above. That's no conflict.
-    v1 = iadd_imm v0, 7
-    jump block1(v1)
-
-block1(v10: i32):
-    v11 = iadd_imm v10, 7
-    return v11
-}
-
-function %trivial(i32) -> i32 {
-block0(v0: i32):
-    ; check:    brnz v0, $(splitEdge=$BB)
-    brnz v0, block1(v0)
-    jump block2
-
-block2:
-    ; not: copy
-    v1 = iadd_imm v0, 7
-    jump block1(v1)
-
-    ; check:  $splitEdge:
-    ; nextln:   $(cp1=$V) = copy.i32 v0
-    ; nextln:   jump block1($cp1)
-
-block1(v10: i32):
-    ; Use v0 in the destination block causes a conflict.
-    v11 = iadd v10, v0
-    return v11
-}
-
-; A value is used as an SSA argument twice in the same branch.
-function %dualuse(i32) -> i32 {
-block0(v0: i32):
-    ; check:  brnz v0, $(splitEdge=$BB)
-    brnz v0, block1(v0, v0)
-    jump block2
-
-block2:
-    v1 = iadd_imm v0, 7
-    v2 = iadd_imm v1, 56
-    jump block1(v1, v2)
-
-    ; check:  $splitEdge:
-    ; check:    $(cp1=$V) = copy.i32 v0
-    ; nextln:   jump block1($cp1, v0)
-
-block1(v10: i32, v11: i32):
-    v12 = iadd v10, v11
-    return v12
-}
-
-; Interference away from the branch
-; The interference can be broken with a copy at either branch.
-function %interference(i32) -> i32 {
-block0(v0: i32):
-    ; not:    copy
-    ; check:  brnz v0, $(splitEdge=$BB)
-    ; not:    copy
-    brnz v0, block1(v0)
-    jump block2
-
-block2:
-    v1 = iadd_imm v0, 7
-    ; v1 and v0 interfere here:
-    v2 = iadd_imm v0, 8
-    ; check: $(cp0=$V) = copy v1
-    ; check: jump block1($cp0)
-    jump block1(v1)
-
-    ; check:  $splitEdge:
-    ; not:      copy
-    ; nextln:   jump block1(v0)
-
-block1(v10: i32):
-    ; not: copy
-    v11 = iadd_imm v10, 7
-    return v11
-}
-
-; A loop where one induction variable is used as a backedge argument.
-function %fibonacci(i32) -> i32 {
-block0(v0: i32):
-    v1 = iconst.i32 1
-    v2 = iconst.i32 2
-    jump block1(v1, v2)
-
-    ; check:  $(splitEdge=$BB):
-    ; check:    $(nv11b=$V) = copy.i32 v11
-    ; not:      copy
-    ; check:    jump block1($nv11b, v12)
-
-block1(v10: i32, v11: i32):
-    ; v11 needs to be isolated because it interferes with v10.
-    ; check: block1(v10: i32 [$LOC], $(nv11a=$V): i32 [$LOC])
-    ; check: v11 = copy $nv11a
-    v12 = iadd v10, v11
-    v13 = icmp ult v12, v0
-    ; check:  brnz v13, $splitEdge
-    brnz v13, block1(v11, v12)
-    jump block2
-
-block2:
-    return v12
-}
-
-; Function arguments passed on the stack aren't allowed to be part of a virtual
-; register, at least for now. This is because the other values in the virtual
-; register would need to be spilled to the incoming_arg stack slot which we treat
-; as belonging to the caller.
-function %stackarg(i32, i32, i32, i32, i32, i32, i32, i32, i32) -> i32 {
-; check: ss0 = incoming_arg 4
-; not: incoming_arg
-block0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i32, v5: i32, v6: i32, v7: i32, v8: i32):
-    ; check: fill v8
-    ; not: v8
-    jump block1(v8)
-
-block1(v10: i32):
-    v11 = iadd_imm v10, 1
-    return v11
-}
-
-function %gvn_unremovable_phi(i32) system_v {
-block0(v0: i32):
-    v2 = iconst.i32 0
-    jump block2(v2, v0)
-
-block2(v3: i32, v4: i32):
-    brnz v3, block2(v3, v4)
-    jump block3
-
-block3:
-    v5 = iconst.i32 1
-    brnz v3, block2(v2, v5)
-    jump block4
-
-block4:
-    return
-}
diff --git a/cranelift/filetests/filetests/regalloc/coalescing-207.clif b/cranelift/filetests/filetests/regalloc/coalescing-207.clif
deleted file mode 100644
index c549cbd3d2..0000000000
--- a/cranelift/filetests/filetests/regalloc/coalescing-207.clif
+++ /dev/null
@@ -1,1527 +0,0 @@
-test regalloc
-target x86_64 legacy haswell
-
-; Reported as https://github.com/bytecodealliance/cranelift/issues/207
-;
-; The coalescer creates a virtual register with two interfering values.
-function %pr207(i64 vmctx, i32, i32) -> i32 system_v {
-    gv1 = vmctx
-    gv0 = iadd_imm.i64 gv1, -8
-    heap0 = static gv0, min 0, bound 0x5000, offset_guard 0x0040_0000
-    sig0 = (i64 vmctx, i32, i32) -> i32 system_v
-    sig1 = (i64 vmctx, i32, i32, i32) -> i32 system_v
-    sig2 = (i64 vmctx, i32, i32, i32) -> i32 system_v
-    fn0 = u0:2 sig0
-    fn1 = u0:0 sig1
-    fn2 = u0:1 sig2
-
-block0(v0: i64, v1: i32, v2: i32):
-    v3 = iconst.i32 0
-    v4 = iconst.i32 0
-    v5 = iconst.i32 0
-    v6 = iconst.i32 0x4ffe
-    v7 = icmp uge v5, v6
-    brz v7, block1
-    jump block100
-
-block100:
-    trap heap_oob
-
-block1:
-    v8 = uextend.i64 v5
-    v9 = iadd_imm.i64 v0, -8
-    v10 = load.i64 v9
-    v11 = iadd v10, v8
-    v12 = load.i32 v11+4
-    v13 = iconst.i32 1056
-    v14 = isub v12, v13
-    v15 = iconst.i32 0x4ffe
-    v16 = icmp.i32 uge v4, v15
-    brz v16, block2
-    jump block101
-
-block101:
-    trap heap_oob
-
-block2:
-    v17 = uextend.i64 v4
-    v18 = iadd_imm.i64 v0, -8
-    v19 = load.i64 v18
-    v20 = iadd v19, v17
-    store.i32 v14, v20+4
-    v21 = iconst.i32 0x4ffe
-    v22 = icmp.i32 uge v2, v21
-    brz v22, block3
-    jump block102
-
-block102:
-    trap heap_oob
-
-block3:
-    v23 = uextend.i64 v2
-    v24 = iadd_imm.i64 v0, -8
-    v25 = load.i64 v24
-    v26 = iadd v25, v23
-    v27 = sload8.i32 v26
-    v28 = iconst.i32 255
-    v29 = band v27, v28
-    v30 = iconst.i32 0
-    v31 = icmp eq v29, v30
-    v32 = bint.i32 v31
-    brnz v32, block90(v14, v1)
-    jump block103
-
-block103:
-    v33 = call fn0(v0, v1, v27)
-    v34 = iconst.i32 0
-    v35 = iconst.i32 0
-    v36 = icmp eq v33, v35
-    v37 = bint.i32 v36
-    brnz v37, block90(v14, v34)
-    jump block104
-
-block104:
-    v38 = iconst.i32 0x4ffe
-    v39 = icmp.i32 uge v2, v38
-    brz v39, block4
-    jump block105
-
-block105:
-    trap heap_oob
-
-block4:
-    v40 = uextend.i64 v2
-    v41 = iadd_imm.i64 v0, -8
-    v42 = load.i64 v41
-    v43 = iadd v42, v40
-    v44 = uload8.i32 v43+1
-    v45 = iconst.i32 0
-    v46 = icmp eq v44, v45
-    v47 = bint.i32 v46
-    brnz v47, block56(v33, v14)
-    jump block106
-
-block106:
-    v48 = iconst.i32 0x4ffe
-    v49 = icmp.i32 uge v33, v48
-    brz v49, block5
-    jump block107
-
-block107:
-    trap heap_oob
-
-block5:
-    v50 = uextend.i64 v33
-    v51 = iadd_imm.i64 v0, -8
-    v52 = load.i64 v51
-    v53 = iadd v52, v50
-    v54 = uload8.i32 v53+1
-    v55 = iconst.i32 0
-    v56 = icmp eq v54, v55
-    v57 = bint.i32 v56
-    brnz v57, block90(v14, v34)
-    jump block108
-
-block108:
-    v58 = iconst.i32 0x4ffe
-    v59 = icmp.i32 uge v2, v58
-    brz v59, block6
-    jump block109
-
-block109:
-    trap heap_oob
-
-block6:
-    v60 = uextend.i64 v2
-    v61 = iadd_imm.i64 v0, -8
-    v62 = load.i64 v61
-    v63 = iadd v62, v60
-    v64 = uload8.i32 v63+2
-    v65 = iconst.i32 0
-    v66 = icmp eq v64, v65
-    v67 = bint.i32 v66
-    brnz v67, block42
-    jump block110
-
-block110:
-    v68 = iconst.i32 0x4ffe
-    v69 = icmp.i32 uge v33, v68
-    brz v69, block7
-    jump block111
-
-block111:
-    trap heap_oob
-
-block7:
-    v70 = uextend.i64 v33
-    v71 = iadd_imm.i64 v0, -8
-    v72 = load.i64 v71
-    v73 = iadd v72, v70
-    v74 = uload8.i32 v73+2
-    v75 = iconst.i32 0
-    v76 = icmp eq v74, v75
-    v77 = bint.i32 v76
-    brnz v77, block90(v14, v34)
-    jump block112
-
-block112:
-    v78 = iconst.i32 0x4ffe
-    v79 = icmp.i32 uge v2, v78
-    brz v79, block8
-    jump block113
-
-block113:
-    trap heap_oob
-
-block8:
-    v80 = uextend.i64 v2
-    v81 = iadd_imm.i64 v0, -8
-    v82 = load.i64 v81
-    v83 = iadd v82, v80
-    v84 = uload8.i32 v83+3
-    v85 = iconst.i32 0
-    v86 = icmp eq v84, v85
-    v87 = bint.i32 v86
-    brnz v87, block46
-    jump block114
-
-block114:
-    v88 = iconst.i32 0x4ffe
-    v89 = icmp.i32 uge v33, v88
-    brz v89, block9
-    jump block115
-
-block115:
-    trap heap_oob
-
-block9:
-    v90 = uextend.i64 v33
-    v91 = iadd_imm.i64 v0, -8
-    v92 = load.i64 v91
-    v93 = iadd v92, v90
-    v94 = uload8.i32 v93+3
-    v95 = iconst.i32 0
-    v96 = icmp eq v94, v95
-    v97 = bint.i32 v96
-    brnz v97, block90(v14, v34)
-    jump block116
-
-block116:
-    v98 = iconst.i32 0x4ffe
-    v99 = icmp.i32 uge v2, v98
-    brz v99, block10
-    jump block117
-
-block117:
-    trap heap_oob
-
-block10:
-    v100 = uextend.i64 v2
-    v101 = iadd_imm.i64 v0, -8
-    v102 = load.i64 v101
-    v103 = iadd v102, v100
-    v104 = uload8.i32 v103+4
-    v105 = iconst.i32 0
-    v106 = icmp eq v104, v105
-    v107 = bint.i32 v106
-    brnz v107, block54
-    jump block118
-
-block118:
-    v108 = iconst.i32 1
-    v109 = iadd.i32 v2, v108
-    v110 = iconst.i32 1048
-    v111 = iadd.i32 v14, v110
-    v112 = iconst.i64 0
-    v113 = iconst.i32 0x4ffe
-    v114 = icmp uge v111, v113
-    brz v114, block11
-    jump block119
-
-block119:
-    trap heap_oob
-
-block11:
-    v115 = uextend.i64 v111
-    v116 = iadd_imm.i64 v0, -8
-    v117 = load.i64 v116
-    v118 = iadd v117, v115
-    store.i64 v112, v118
-    v119 = iconst.i32 1040
-    v120 = iadd.i32 v14, v119
-    v121 = iconst.i64 0
-    v122 = iconst.i32 0x4ffe
-    v123 = icmp uge v120, v122
-    brz v123, block12
-    jump block120
-
-block120:
-    trap heap_oob
-
-block12:
-    v124 = uextend.i64 v120
-    v125 = iadd_imm.i64 v0, -8
-    v126 = load.i64 v125
-    v127 = iadd v126, v124
-    store.i64 v121, v127
-    v128 = iconst.i64 0
-    v129 = iconst.i32 0x4ffe
-    v130 = icmp.i32 uge v14, v129
-    brz v130, block13
-    jump block121
-
-block121:
-    trap heap_oob
-
-block13:
-    v131 = uextend.i64 v14
-    v132 = iadd_imm.i64 v0, -8
-    v133 = load.i64 v132
-    v134 = iadd v133, v131
-    store.i64 v128, v134+1032
-    v135 = iconst.i64 0
-    v136 = iconst.i32 0x4ffe
-    v137 = icmp.i32 uge v14, v136
-    brz v137, block14
-    jump block122
-
-block122:
-    trap heap_oob
-
-block14:
-    v138 = uextend.i64 v14
-    v139 = iadd_imm.i64 v0, -8
-    v140 = load.i64 v139
-    v141 = iadd v140, v138
-    store.i64 v135, v141+1024
-    v142 = iconst.i32 -1
-    jump block15(v142, v27)
-
-block15(v143: i32, v144: i32):
-    v145 = iadd.i32 v33, v143
-    v146 = iconst.i32 1
-    v147 = iadd v145, v146
-    v148 = iconst.i32 0x4ffe
-    v149 = icmp uge v147, v148
-    brz v149, block16
-    jump block123
-
-block123:
-    trap heap_oob
-
-block16:
-    v150 = uextend.i64 v147
-    v151 = iadd_imm.i64 v0, -8
-    v152 = load.i64 v151
-    v153 = iadd v152, v150
-    v154 = uload8.i32 v153
-    v155 = iconst.i32 0
-    v156 = icmp eq v154, v155
-    v157 = bint.i32 v156
-    brnz v157, block89(v14)
-    jump block124
-
-block124:
-    v158 = iconst.i32 255
-    v159 = band.i32 v144, v158
-    v160 = iconst.i32 2
-    v161 = ishl v159, v160
-    v162 = iadd.i32 v14, v161
-    v163 = iconst.i32 2
-    v164 = iadd.i32 v143, v163
-    v165 = iconst.i32 0x4ffe
-    v166 = icmp uge v162, v165
-    brz v166, block17
-    jump block125
-
-block125:
-    trap heap_oob
-
-block17:
-    v167 = uextend.i64 v162
-    v168 = iadd_imm.i64 v0, -8
-    v169 = load.i64 v168
-    v170 = iadd v169, v167
-    store.i32 v164, v170
-    v171 = iconst.i32 1024
-    v172 = iadd.i32 v14, v171
-    v173 = iconst.i32 3
-    v174 = ushr.i32 v159, v173
-    v175 = iconst.i32 28
-    v176 = band v174, v175
-    v177 = iadd v172, v176
-    v178 = iconst.i32 0x4ffe
-    v179 = icmp uge v177, v178
-    brz v179, block18
-    jump block126
-
-block126:
-    trap heap_oob
-
-block18:
-    v180 = uextend.i64 v177
-    v181 = iadd_imm.i64 v0, -8
-    v182 = load.i64 v181
-    v183 = iadd v182, v180
-    v184 = load.i32 v183
-    v185 = iconst.i32 1
-    v186 = iconst.i32 31
-    v187 = band.i32 v144, v186
-    v188 = ishl v185, v187
-    v189 = bor v184, v188
-    v190 = iconst.i32 0x4ffe
-    v191 = icmp.i32 uge v177, v190
-    brz v191, block19
-    jump block127
-
-block127:
-    trap heap_oob
-
-block19:
-    v192 = uextend.i64 v177
-    v193 = iadd_imm.i64 v0, -8
-    v194 = load.i64 v193
-    v195 = iadd v194, v192
-    store.i32 v189, v195
-    v196 = iadd.i32 v109, v143
-    v197 = iconst.i32 1
-    v198 = iadd.i32 v143, v197
-    v199 = iconst.i32 1
-    v200 = iadd v196, v199
-    v201 = iconst.i32 0x4ffe
-    v202 = icmp uge v200, v201
-    brz v202, block20
-    jump block128
-
-block128:
-    trap heap_oob
-
-block20:
-    v203 = uextend.i64 v200
-    v204 = iadd_imm.i64 v0, -8
-    v205 = load.i64 v204
-    v206 = iadd v205, v203
-    v207 = uload8.i32 v206
-    brnz v207, block15(v198, v207)
-    jump block21
-
-block21:
-    v208 = iconst.i32 -1
-    v209 = iconst.i32 1
-    v210 = iconst.i32 -1
-    v211 = iconst.i32 1
-    v212 = iconst.i32 1
-    v213 = iadd.i32 v198, v212
-    v214 = iconst.i32 2
-    v215 = icmp ult v213, v214
-    v216 = bint.i32 v215
-    brnz v216, block38(v2, v211, v209, v210, v208, v198, v213, v33, v14)
-    jump block129
-
-block129:
-    v217 = iconst.i32 -1
-    v218 = iconst.i32 0
-    v219 = iconst.i32 1
-    v220 = iconst.i32 1
-    v221 = iconst.i32 1
-    v222 = copy.i32 v44
-    jump block22(v217, v221, v44, v220, v218, v219, v213, v222, v198, v33, v14)
-
-block22(v223: i32, v224: i32, v225: i32, v226: i32, v227: i32, v228: i32, v229: i32, v230: i32, v231: i32, v232: i32, v233: i32):
-    v234 = copy v228
-    v235 = iadd v223, v224
-    v236 = iadd.i32 v2, v235
-    v237 = iconst.i32 0x4ffe
-    v238 = icmp uge v236, v237
-    brz v238, block23
-    jump block130
-
-block130:
-    trap heap_oob
-
-block23:
-    v239 = uextend.i64 v236
-    v240 = iadd_imm.i64 v0, -8
-    v241 = load.i64 v240
-    v242 = iadd v241, v239
-    v243 = uload8.i32 v242
-    v244 = iconst.i32 255
-    v245 = band.i32 v225, v244
-    v246 = icmp ne v243, v245
-    v247 = bint.i32 v246
-    brnz v247, block24
-    jump block131
-
-block131:
-    v248 = icmp.i32 ne v224, v226
-    v249 = bint.i32 v248
-    brnz v249, block25
-    jump block132
-
-block132:
-    v250 = iadd.i32 v227, v226
-    v251 = iconst.i32 1
-    jump block27(v251, v250, v223, v226)
-
-block24:
-    v252 = icmp.i32 ule v243, v245
-    v253 = bint.i32 v252
-    brnz v253, block26
-    jump block133
-
-block133:
-    v254 = isub.i32 v234, v223
-    v255 = iconst.i32 1
-    jump block27(v255, v234, v223, v254)
-
-block25:
-    v256 = iconst.i32 1
-    v257 = iadd.i32 v224, v256
-    v258 = copy.i32 v227
-    jump block27(v257, v258, v223, v226)
-
-block26:
-    v259 = iconst.i32 1
-    v260 = iconst.i32 1
-    v261 = iadd.i32 v227, v260
-    v262 = iconst.i32 1
-    v263 = copy.i32 v227
-    jump block27(v259, v261, v263, v262)
-
-block27(v264: i32, v265: i32, v266: i32, v267: i32):
-    v268 = iadd v264, v265
-    v269 = icmp uge v268, v229
-    v270 = bint.i32 v269
-    brnz v270, block29
-    jump block134
-
-block134:
-    v271 = iadd.i32 v2, v268
-    v272 = iconst.i32 0x4ffe
-    v273 = icmp uge v271, v272
-    brz v273, block28
-    jump block135
-
-block135:
-    trap heap_oob
-
-block28:
-    v274 = uextend.i64 v271
-    v275 = iadd_imm.i64 v0, -8
-    v276 = load.i64 v275
-    v277 = iadd v276, v274
-    v278 = uload8.i32 v277
-    v279 = copy.i32 v265
-    jump block22(v266, v264, v278, v267, v279, v268, v229, v230, v231, v232, v233)
-
-block29:
-    jump block30
-
-block30:
-    v280 = iconst.i32 -1
-    v281 = iconst.i32 0
-    v282 = iconst.i32 1
-    v283 = iconst.i32 1
-    v284 = iconst.i32 1
-    jump block31(v280, v284, v230, v283, v281, v282, v229, v267, v266, v231, v232, v233)
-
-block31(v285: i32, v286: i32, v287: i32, v288: i32, v289: i32, v290: i32, v291: i32, v292: i32, v293: i32, v294: i32, v295: i32, v296: i32):
-    v297 = copy v290
-    v298 = iadd v285, v286
-    v299 = iadd.i32 v2, v298
-    v300 = iconst.i32 0x4ffe
-    v301 = icmp uge v299, v300
-    brz v301, block32
-    jump block136
-
-block136:
-    trap heap_oob
-
-block32:
-    v302 = uextend.i64 v299
-    v303 = iadd_imm.i64 v0, -8
-    v304 = load.i64 v303
-    v305 = iadd v304, v302
-    v306 = uload8.i32 v305
-    v307 = iconst.i32 255
-    v308 = band.i32 v287, v307
-    v309 = icmp ne v306, v308
-    v310 = bint.i32 v309
-    brnz v310, block33
-    jump block137
-
-block137:
-    v311 = icmp.i32 ne v286, v288
-    v312 = bint.i32 v311
-    brnz v312, block34
-    jump block138
-
-block138:
-    v313 = iadd.i32 v289, v288
-    v314 = iconst.i32 1
-    jump block36(v314, v313, v285, v288)
-
-block33:
-    v315 = icmp.i32 uge v306, v308
-    v316 = bint.i32 v315
-    brnz v316, block35
-    jump block139
-
-block139:
-    v317 = isub.i32 v297, v285
-    v318 = iconst.i32 1
-    jump block36(v318, v297, v285, v317)
-
-block34:
-    v319 = iconst.i32 1
-    v320 = iadd.i32 v286, v319
-    v321 = copy.i32 v289
-    jump block36(v320, v321, v285, v288)
-
-block35:
-    v322 = iconst.i32 1
-    v323 = iconst.i32 1
-    v324 = iadd.i32 v289, v323
-    v325 = iconst.i32 1
-    v326 = copy.i32 v289
-    jump block36(v322, v324, v326, v325)
-
-block36(v327: i32, v328: i32, v329: i32, v330: i32):
-    v331 = iadd v327, v328
-    v332 = icmp uge v331, v291
-    v333 = bint.i32 v332
-    brnz v333, block38(v2, v330, v292, v329, v293, v294, v291, v295, v296)
-    jump block140
-
-block140:
-    v334 = iadd.i32 v2, v331
-    v335 = iconst.i32 0x4ffe
-    v336 = icmp uge v334, v335
-    brz v336, block37
-    jump block141
-
-block141:
-    trap heap_oob
-
-block37:
-    v337 = uextend.i64 v334
-    v338 = iadd_imm.i64 v0, -8
-    v339 = load.i64 v338
-    v340 = iadd v339, v337
-    v341 = uload8.i32 v340
-    v342 = copy.i32 v328
-    jump block31(v329, v327, v341, v330, v342, v331, v291, v292, v293, v294, v295, v296)
-
-block38(v343: i32, v344: i32, v345: i32, v346: i32, v347: i32, v348: i32, v349: i32, v350: i32, v351: i32):
-    v352 = iconst.i32 1
-    v353 = iadd v346, v352
-    v354 = iconst.i32 1
-    v355 = iadd v347, v354
-    v356 = icmp ugt v353, v355
-    v357 = bint.i32 v356
-    brnz v357, block39(v344)
-    jump block142
-
-block142:
-    v358 = copy v345
-    jump block39(v358)
-
-block39(v359: i32):
-    v360 = iadd.i32 v343, v359
-    brnz.i32 v357, block40(v346)
-    jump block143
-
-block143:
-    v361 = copy.i32 v347
-    jump block40(v361)
-
-block40(v362: i32):
-    v363 = iconst.i32 1
-    v364 = iadd v362, v363
-    v365 = call fn1(v0, v343, v360, v364)
-    v366 = iconst.i32 0
-    v367 = icmp eq v365, v366
-    v368 = bint.i32 v367
-    brnz v368, block63
-    jump block144
-
-block144:
-    v369 = iconst.i32 1
-    v370 = iadd v362, v369
-    v371 = isub.i32 v348, v370
-    v372 = iconst.i32 1
-    v373 = iadd v371, v372
-    v374 = icmp ugt v362, v373
-    v375 = bint.i32 v374
-    v376 = copy v362
-    brnz v375, block41(v376)
-    jump block145
-
-block145:
-    v377 = copy v373
-    jump block41(v377)
-
-block41(v378: i32):
-    v379 = iconst.i32 1
-    v380 = iadd v378, v379
-    v381 = iconst.i32 0
-    jump block64(v380, v381)
-
-block42:
-    v382 = iconst.i32 8
-    v383 = ishl.i32 v29, v382
-    v384 = bor v383, v44
-    v385 = iconst.i32 0x4ffe
-    v386 = icmp.i32 uge v33, v385
-    brz v386, block43
-    jump block146
-
-block146:
-    trap heap_oob
-
-block43:
-    v387 = uextend.i64 v33
-    v388 = iadd_imm.i64 v0, -8
-    v389 = load.i64 v388
-    v390 = iadd v389, v387
-    v391 = uload8.i32 v390
-    jump block44(v391, v54, v33)
-
-block44(v392: i32, v393: i32, v394: i32):
-    v395 = iconst.i32 8
-    v396 = ishl v392, v395
-    v397 = iconst.i32 0xff00
-    v398 = band v396, v397
-    v399 = iconst.i32 255
-    v400 = band v393, v399
-    v401 = bor v398, v400
-    v402 = icmp eq v401, v384
-    v403 = bint.i32 v402
-    brnz v403, block56(v394, v14)
-    jump block147
-
-block147:
-    v404 = iconst.i32 2
-    v405 = iadd v394, v404
-    v406 = iconst.i32 1
-    v407 = iadd v394, v406
-    v408 = iconst.i32 0x4ffe
-    v409 = icmp uge v405, v408
-    brz v409, block45
-    jump block148
-
-block148:
-    trap heap_oob
-
-block45:
-    v410 = uextend.i64 v405
-    v411 = iadd_imm.i64 v0, -8
-    v412 = load.i64 v411
-    v413 = iadd v412, v410
-    v414 = uload8.i32 v413
-    brnz v414, block44(v401, v414, v407)
-    jump block90(v14, v34)
-
-block46:
-    v415 = iconst.i32 8
-    v416 = ishl.i32 v74, v415
-    v417 = iconst.i32 16
-    v418 = ishl.i32 v54, v417
-    v419 = bor v416, v418
-    v420 = iconst.i32 0x4ffe
-    v421 = icmp.i32 uge v33, v420
-    brz v421, block47
-    jump block149
-
-block149:
-    trap heap_oob
-
-block47:
-    v422 = uextend.i64 v33
-    v423 = iadd_imm.i64 v0, -8
-    v424 = load.i64 v423
-    v425 = iadd v424, v422
-    v426 = uload8.i32 v425
-    v427 = iconst.i32 24
-    v428 = ishl v426, v427
-    v429 = bor.i32 v419, v428
-    v430 = iconst.i32 16
-    v431 = ishl.i32 v44, v430
-    v432 = iconst.i32 24
-    v433 = ishl.i32 v29, v432
-    v434 = bor v431, v433
-    v435 = iconst.i32 8
-    v436 = ishl.i32 v64, v435
-    v437 = bor v434, v436
-    v438 = icmp eq v429, v437
-    v439 = bint.i32 v438
-    brnz v439, block56(v33, v14)
-    jump block48(v33, v429)
-
-block48(v440: i32, v441: i32):
-    v442 = iconst.i32 1
-    v443 = iadd v440, v442
-    v444 = iconst.i32 3
-    v445 = iadd v440, v444
-    v446 = iconst.i32 0x4ffe
-    v447 = icmp uge v445, v446
-    brz v447, block49
-    jump block150
-
-block150:
-    trap heap_oob
-
-block49:
-    v448 = uextend.i64 v445
-    v449 = iadd_imm.i64 v0, -8
-    v450 = load.i64 v449
-    v451 = iadd v450, v448
-    v452 = uload8.i32 v451
-    v453 = iconst.i32 0
-    v454 = icmp eq v452, v453
-    v455 = bint.i32 v454
-    brnz v455, block51(v14)
-    jump block151
-
-block151:
-    v456 = bor.i32 v441, v452
-    v457 = iconst.i32 8
-    v458 = ishl v456, v457
-    v459 = icmp ne v458, v437
-    v460 = bint.i32 v459
-    v461 = copy.i32 v443
-    brnz v460, block48(v461, v458)
-    jump block50
-
-block50:
-    jump block51(v14)
-
-block51(v462: i32):
-    v463 = iconst.i32 0
-    v464 = iconst.i32 1056
-    v465 = iadd v462, v464
-    v466 = iconst.i32 0x4ffe
-    v467 = icmp uge v463, v466
-    brz v467, block52
-    jump block152
-
-block152:
-    trap heap_oob
-
-block52:
-    v468 = uextend.i64 v463
-    v469 = iadd_imm.i64 v0, -8
-    v470 = load.i64 v469
-    v471 = iadd v470, v468
-    store.i32 v465, v471+4
-    v472 = iconst.i32 0
-    brnz.i32 v452, block53(v443)
-    jump block153
-
-block153:
-    v473 = copy v472
-    jump block53(v473)
-
-block53(v474: i32):
-    return v474
-
-block54:
-    v475 = iconst.i32 8
-    v476 = ishl.i32 v74, v475
-    v477 = iconst.i32 16
-    v478 = ishl.i32 v54, v477
-    v479 = bor v476, v478
-    v480 = bor v479, v94
-    v481 = iconst.i32 0x4ffe
-    v482 = icmp.i32 uge v33, v481
-    brz v482, block55
-    jump block154
-
-block154:
-    trap heap_oob
-
-block55:
-    v483 = uextend.i64 v33
-    v484 = iadd_imm.i64 v0, -8
-    v485 = load.i64 v484
-    v486 = iadd v485, v483
-    v487 = uload8.i32 v486
-    v488 = iconst.i32 24
-    v489 = ishl v487, v488
-    v490 = bor.i32 v480, v489
-    v491 = iconst.i32 16
-    v492 = ishl.i32 v44, v491
-    v493 = iconst.i32 24
-    v494 = ishl.i32 v29, v493
-    v495 = bor v492, v494
-    v496 = iconst.i32 8
-    v497 = ishl.i32 v64, v496
-    v498 = bor v495, v497
-    v499 = bor v498, v84
-    v500 = icmp ne v490, v499
-    v501 = bint.i32 v500
-    brnz v501, block57
-    jump block56(v33, v14)
-
-block56(v502: i32, v503: i32):
-    v504 = copy v502
-    jump block90(v503, v504)
-
-block57:
-    jump block58(v33, v490)
-
-block58(v505: i32, v506: i32):
-    v507 = iconst.i32 4
-    v508 = iadd v505, v507
-    v509 = iconst.i32 1
-    v510 = iadd v505, v509
-    v511 = iconst.i32 0x4ffe
-    v512 = icmp uge v508, v511
-    brz v512, block59
-    jump block155
-
-block155:
-    trap heap_oob
-
-block59:
-    v513 = uextend.i64 v508
-    v514 = iadd_imm.i64 v0, -8
-    v515 = load.i64 v514
-    v516 = iadd v515, v513
-    v517 = uload8.i32 v516
-    v518 = iconst.i32 0
-    v519 = icmp eq v517, v518
-    v520 = bint.i32 v519
-    brnz v520, block61(v14)
-    jump block156
-
-block156:
-    v521 = iconst.i32 8
-    v522 = ishl.i32 v506, v521
-    v523 = bor v522, v517
-    v524 = icmp ne v523, v499
-    v525 = bint.i32 v524
-    brnz v525, block58(v510, v523)
-    jump block60
-
-block60:
-    jump block61(v14)
-
-block61(v526: i32):
-    v527 = iconst.i32 0
-    brnz.i32 v517, block62(v510)
-    jump block157
-
-block157:
-    v528 = copy v527
-    jump block62(v528)
-
-block62(v529: i32):
-    v530 = copy v529
-    jump block90(v526, v530)
-
-block63:
-    v531 = isub.i32 v348, v359
-    v532 = iconst.i32 1
-    v533 = iadd v531, v532
-    jump block64(v359, v533)
-
-block64(v534: i32, v535: i32):
-    v536 = iconst.i32 1
-    v537 = iadd.i32 v343, v536
-    v538 = iconst.i32 0
-    v539 = isub v538, v362
-    v540 = iconst.i32 63
-    v541 = bor.i32 v349, v540
-    v542 = isub.i32 v348, v534
-    v543 = iconst.i32 1
-    v544 = iadd v542, v543
-    v545 = iconst.i32 0
-    v546 = copy.i32 v350
-    jump block65(v350, v546, v349, v541, v348, v351, v544, v534, v545, v535, v343, v364, v537, v539, v362)
-
-block65(v547: i32, v548: i32, v549: i32, v550: i32, v551: i32, v552: i32, v553: i32, v554: i32, v555: i32, v556: i32, v557: i32, v558: i32, v559: i32, v560: i32, v561: i32):
-    v562 = copy v556
-    v563 = isub v547, v548
-    v564 = icmp uge v563, v549
-    v565 = bint.i32 v564
-    brnz v565, block67(v547)
-    jump block158
-
-block158:
-    v566 = iconst.i32 0
-    v567 = call fn2(v0, v547, v566, v550)
-    brnz v567, block66
-    jump block159
-
-block159:
-    v568 = iadd v547, v550
-    jump block67(v568)
-
-block66:
-    v569 = isub.i32 v567, v548
-    v570 = icmp ult v569, v549
-    v571 = bint.i32 v570
-    brnz v571, block89(v552)
-    jump block160
-
-block160:
-    v572 = copy.i32 v567
-    jump block67(v572)
-
-block67(v573: i32):
-    v574 = iconst.i32 1
-    v575 = iadd.i32 v548, v551
-    v576 = iconst.i32 0x4ffe
-    v577 = icmp uge v575, v576
-    brz v577, block68
-    jump block161
-
-block161:
-    trap heap_oob
-
-block68:
-    v578 = uextend.i64 v575
-    v579 = iadd_imm.i64 v0, -8
-    v580 = load.i64 v579
-    v581 = iadd v580, v578
-    v582 = uload8.i32 v581
-    v583 = iconst.i32 31
-    v584 = band v582, v583
-    v585 = ishl.i32 v574, v584
-    v586 = iconst.i32 1024
-    v587 = iadd.i32 v552, v586
-    v588 = iconst.i32 3
-    v589 = ushr v582, v588
-    v590 = iconst.i32 28
-    v591 = band v589, v590
-    v592 = iadd v587, v591
-    v593 = iconst.i32 0x4ffe
-    v594 = icmp uge v592, v593
-    brz v594, block69
-    jump block162
-
-block162:
-    trap heap_oob
-
-block69:
-    v595 = uextend.i64 v592
-    v596 = iadd_imm.i64 v0, -8
-    v597 = load.i64 v596
-    v598 = iadd v597, v595
-    v599 = load.i32 v598
-    v600 = band.i32 v585, v599
-    v601 = iconst.i32 0
-    v602 = icmp eq v600, v601
-    v603 = bint.i32 v602
-    brnz v603, block74
-    jump block163
-
-block163:
-    v604 = iconst.i32 2
-    v605 = ishl.i32 v582, v604
-    v606 = iadd.i32 v552, v605
-    v607 = iconst.i32 0x4ffe
-    v608 = icmp uge v606, v607
-    brz v608, block70
-    jump block164
-
-block164:
-    trap heap_oob
-
-block70:
-    v609 = uextend.i64 v606
-    v610 = iadd_imm.i64 v0, -8
-    v611 = load.i64 v610
-    v612 = iadd v611, v609
-    v613 = load.i32 v612
-    v614 = isub.i32 v551, v613
-    v615 = iconst.i32 -1
-    v616 = icmp eq v614, v615
-    v617 = bint.i32 v616
-    brnz v617, block75
-    jump block165
-
-block165:
-    v618 = iconst.i32 1
-    v619 = iadd v614, v618
-    v620 = icmp ult v619, v554
-    v621 = bint.i32 v620
-    v622 = copy.i32 v553
-    brnz v621, block71(v622)
-    jump block166
-
-block166:
-    v623 = copy v619
-    jump block71(v623)
-
-block71(v624: i32):
-    v625 = copy v624
-    brnz.i32 v555, block72(v625)
-    jump block72(v619)
-
-block72(v626: i32):
-    brnz.i32 v562, block73(v626)
-    jump block73(v619)
-
-block73(v627: i32):
-    v628 = copy.i32 v554
-    v629 = copy.i32 v562
-    jump block87(v548, v627, v573, v549, v550, v551, v552, v553, v628, v629, v557, v558, v559, v560, v561)
-
-block74:
-    v630 = copy.i32 v549
-    v631 = copy.i32 v554
-    v632 = copy.i32 v562
-    jump block87(v548, v630, v573, v549, v550, v551, v552, v553, v631, v632, v557, v558, v559, v560, v561)
-
-block75:
-    v633 = icmp.i32 ugt v558, v555
-    v634 = bint.i32 v633
-    v635 = copy.i32 v558
-    brnz v634, block76(v635)
-    jump block167
-
-block167:
-    v636 = copy.i32 v555
-    jump block76(v636)
-
-block76(v637: i32):
-    v638 = iadd.i32 v557, v637
-    v639 = iconst.i32 0x4ffe
-    v640 = icmp uge v638, v639
-    brz v640, block77
-    jump block168
-
-block168:
-    trap heap_oob
-
-block77:
-    v641 = uextend.i64 v638
-    v642 = iadd_imm.i64 v0, -8
-    v643 = load.i64 v642
-    v644 = iadd v643, v641
-    v645 = uload8.i32 v644
-    v646 = iconst.i32 0
-    v647 = icmp eq v645, v646
-    v648 = bint.i32 v647
-    brnz v648, block82(v548, v549, v551, v552)
-    jump block169
-
-block169:
-    v649 = iadd.i32 v548, v637
-    v650 = iadd.i32 v559, v637
-    v651 = iadd.i32 v560, v637
-    jump block78(v645, v649, v651, v650)
-
-block78(v652: i32, v653: i32, v654: i32, v655: i32):
-    v656 = iconst.i32 255
-    v657 = band v652, v656
-    v658 = iconst.i32 0x4ffe
-    v659 = icmp uge v653, v658
-    brz v659, block79
-    jump block170
-
-block170:
-    trap heap_oob
-
-block79:
-    v660 = uextend.i64 v653
-    v661 = iadd_imm.i64 v0, -8
-    v662 = load.i64 v661
-    v663 = iadd v662, v660
-    v664 = uload8.i32 v663
-    v665 = icmp.i32 ne v657, v664
-    v666 = bint.i32 v665
-    v667 = copy.i32 v554
-    v668 = copy.i32 v562
-    brnz v666, block87(v548, v654, v573, v549, v550, v551, v552, v553, v667, v668, v557, v558, v559, v560, v561)
-    jump block171
-
-block171:
-    v669 = iconst.i32 1
-    v670 = iadd.i32 v653, v669
-    v671 = iconst.i32 1
-    v672 = iadd.i32 v654, v671
-    v673 = iconst.i32 0x4ffe
-    v674 = icmp.i32 uge v655, v673
-    brz v674, block80
-    jump block172
-
-block172:
-    trap heap_oob
-
-block80:
-    v675 = uextend.i64 v655
-    v676 = iadd_imm.i64 v0, -8
-    v677 = load.i64 v676
-    v678 = iadd v677, v675
-    v679 = uload8.i32 v678
-    v680 = iconst.i32 1
-    v681 = iadd.i32 v655, v680
-    brnz v679, block78(v679, v670, v672, v681)
-    jump block81
-
-block81:
-    jump block82(v548, v549, v551, v552)
-
-block82(v682: i32, v683: i32, v684: i32, v685: i32):
-    v686 = icmp.i32 ule v558, v555
-    v687 = bint.i32 v686
-    brnz v687, block90(v685, v682)
-    jump block173
-
-block173:
-    v688 = copy.i32 v561
-    jump block83(v688)
-
-block83(v689: i32):
-    v690 = iadd.i32 v557, v689
-    v691 = iconst.i32 0x4ffe
-    v692 = icmp uge v690, v691
-    brz v692, block84
-    jump block174
-
-block174:
-    trap heap_oob
-
-block84:
-    v693 = uextend.i64 v690
-    v694 = iadd_imm.i64 v0, -8
-    v695 = load.i64 v694
-    v696 = iadd v695, v693
-    v697 = uload8.i32 v696
-    v698 = iadd.i32 v682, v689
-    v699 = iconst.i32 0x4ffe
-    v700 = icmp uge v698, v699
-    brz v700, block85
-    jump block175
-
-block175:
-    trap heap_oob
-
-block85:
-    v701 = uextend.i64 v698
-    v702 = iadd_imm.i64 v0, -8
-    v703 = load.i64 v702
-    v704 = iadd v703, v701
-    v705 = uload8.i32 v704
-    v706 = icmp.i32 ne v697, v705
-    v707 = bint.i32 v706
-    brnz v707, block86
-    jump block176
-
-block176:
-    v708 = icmp.i32 ule v689, v555
-    v709 = bint.i32 v708
-    v710 = iconst.i32 -1
-    v711 = iadd.i32 v689, v710
-    v712 = iconst.i32 0
-    v713 = icmp eq v709, v712
-    v714 = bint.i32 v713
-    brnz v714, block83(v711)
-    jump block90(v685, v682)
-
-block86:
-    v715 = copy.i32 v554
-    v716 = copy.i32 v562
-    jump block88(v682, v554, v573, v683, v550, v684, v685, v553, v715, v562, v716, v557, v558, v559, v560, v561)
-
-block87(v717: i32, v718: i32, v719: i32, v720: i32, v721: i32, v722: i32, v723: i32, v724: i32, v725: i32, v726: i32, v727: i32, v728: i32, v729: i32, v730: i32, v731: i32):
-    v732 = copy v718
-    v733 = iconst.i32 0
-    jump block88(v717, v732, v719, v720, v721, v722, v723, v724, v725, v733, v726, v727, v728, v729, v730, v731)
-
-block88(v734: i32, v735: i32, v736: i32, v737: i32, v738: i32, v739: i32, v740: i32, v741: i32, v742: i32, v743: i32, v744: i32, v745: i32, v746: i32, v747: i32, v748: i32, v749: i32):
-    v750 = iadd v734, v735
-    v751 = copy v742
-    v752 = copy v743
-    v753 = copy v744
-    jump block65(v736, v750, v737, v738, v739, v740, v741, v751, v752, v753, v745, v746, v747, v748, v749)
-
-block89(v754: i32):
-    v755 = iconst.i32 0
-    jump block90(v754, v755)
-
-block90(v756: i32, v757: i32):
-    v758 = iconst.i32 0
-    v759 = iconst.i32 1056
-    v760 = iadd v756, v759
-    v761 = iconst.i32 0x4ffe
-    v762 = icmp uge v758, v761
-    brz v762, block91
-    jump block177
-
-block177:
-    trap heap_oob
-
-block91:
-    v763 = uextend.i64 v758
-    v764 = iadd_imm.i64 v0, -8
-    v765 = load.i64 v764
-    v766 = iadd v765, v763
-    store.i32 v760, v766+4
-    jump block92(v757)
-
-block92(v767: i32):
-    return v767
-}
-
-; Same problem from musl.wasm.
-function %musl(f64 [%xmm0], i64 vmctx [%rdi]) -> f64 [%xmm0] system_v {
-    gv0 = vmctx
-    heap0 = static gv0, min 0, bound 0x0001_0000_0000, offset_guard 0x8000_0000
-    sig0 = (f64 [%xmm0], i32 [%rdi], i64 vmctx [%rsi]) -> f64 [%xmm0] system_v
-    fn0 = u0:517 sig0
-
-block0(v0: f64, v1: i64):
-    v3 = iconst.i64 0
-    v4 = iconst.i32 0
-    v131 = iconst.i64 0
-    v5 = bitcast.f64 v131
-    v6 = iconst.i32 0
-    v7 = iconst.i32 0
-    v8 = iconst.i32 0
-    v132 = uextend.i64 v8
-    v133 = iadd_imm v1, 0
-    v134 = load.i64 v133
-    v9 = iadd v134, v132
-    v10 = load.i32 v9+4
-    v11 = iconst.i32 16
-    v12 = isub v10, v11
-    v135 = uextend.i64 v7
-    v136 = iadd_imm v1, 0
-    v137 = load.i64 v136
-    v13 = iadd v137, v135
-    store v12, v13+4
-    v14 = bitcast.i64 v0
-    v15 = iconst.i64 63
-    v16 = ushr v14, v15
-    v17 = ireduce.i32 v16
-    v18 = iconst.i64 32
-    v19 = ushr v14, v18
-    v20 = ireduce.i32 v19
-    v21 = iconst.i32 0x7fff_ffff
-    v22 = band v20, v21
-    v23 = iconst.i32 0x4086_232b
-    v24 = icmp ult v22, v23
-    v25 = bint.i32 v24
-    brnz v25, block10
-    jump block178
-
-block178:
-    v26 = iconst.i64 0x7fff_ffff_ffff_ffff
-    v27 = band v14, v26
-    v28 = iconst.i64 0x7ff0_0000_0000_0000
-    v29 = icmp ule v27, v28
-    v30 = bint.i32 v29
-    brnz v30, block9
-    jump block2(v12, v0)
-
-block10:
-    v31 = iconst.i32 0x3fd6_2e43
-    v32 = icmp.i32 ult v22, v31
-    v33 = bint.i32 v32
-    brnz v33, block8
-    jump block179
-
-block179:
-    v34 = iconst.i32 0x3ff0_a2b2
-    v35 = icmp.i32 uge v22, v34
-    v36 = bint.i32 v35
-    brnz v36, block6
-    jump block180
-
-block180:
-    v37 = iconst.i32 1
-    v38 = bxor.i32 v17, v37
-    v39 = isub v38, v17
-    jump block5(v0, v39)
-
-block9:
-    v138 = iconst.i64 0x4086_2e42_fefa_39ef
-    v40 = bitcast.f64 v138
-    v41 = fcmp ge v40, v0
-    v42 = bint.i32 v41
-    v139 = fcmp.f64 uno v0, v0
-    v140 = fcmp.f64 one v0, v0
-    v43 = bor v139, v140
-    v44 = bint.i32 v43
-    v45 = bor v42, v44
-    brnz v45, block7
-    jump block181
-
-block181:
-    v141 = iconst.i64 0x7fe0_0000_0000_0000
-    v46 = bitcast.f64 v141
-    v47 = fmul.f64 v0, v46
-    jump block2(v12, v47)
-
-block8:
-    v48 = iconst.i32 0x3e30_0000
-    v49 = icmp.i32 ule v22, v48
-    v50 = bint.i32 v49
-    brnz v50, block3
-    jump block182
-
-block182:
-    v51 = iconst.i32 0
-    v142 = iconst.i64 0
-    v52 = bitcast.f64 v142
-    v178 = copy.f64 v0
-    jump block4(v0, v178, v52, v51)
-
-block7:
-    v143 = iconst.i64 0xc086_232b_dd7a_bcd2
-    v53 = bitcast.f64 v143
-    v54 = fcmp.f64 ge v0, v53
-    v55 = bint.i32 v54
-    v56 = bor v55, v44
-    brnz v56, block6
-    jump block183
-
-block183:
-    v144 = iconst.i64 0xb6a0_0000_0000_0000
-    v57 = bitcast.f64 v144
-    v58 = fdiv v57, v0
-    v59 = fdemote.f32 v58
-    v145 = uextend.i64 v12
-    v146 = iadd_imm.i64 v1, 0
-    v147 = load.i64 v146
-    v60 = iadd v147, v145
-    store v59, v60+12
-    v148 = iconst.i64 0
-    v61 = bitcast.f64 v148
-    v149 = iconst.i64 0xc087_4910_d52d_3051
-    v62 = bitcast.f64 v149
-    v63 = fcmp gt v62, v0
-    v64 = bint.i32 v63
-    brnz v64, block2(v12, v61)
-    jump block6
-
-block6:
-    v150 = iconst.i64 0x3ff7_1547_652b_82fe
-    v66 = bitcast.f64 v150
-    v67 = fmul.f64 v0, v66
-    v69 = iconst.i32 3
-    v70 = ishl.i32 v17, v69
-    v71 = iconst.i32 5040
-    v72 = iadd v70, v71
-    v151 = uextend.i64 v72
-    v152 = iadd_imm.i64 v1, 0
-    v153 = load.i64 v152
-    v73 = iadd v153, v151
-    v74 = load.f64 v73
-    v75 = fadd v67, v74
-    v76 = x86_cvtt2si.i32 v75
-    v158 = iconst.i32 0x8000_0000
-    v154 = icmp ne v76, v158
-    brnz v154, block11
-    jump block184
-
-block184:
-    v155 = fcmp uno v75, v75
-    brz v155, block12
-    jump block185
-
-block185:
-    trap bad_toint
-
-block12:
-    v159 = iconst.i64 0xc1e0_0000_0020_0000
-    v156 = bitcast.f64 v159
-    v157 = fcmp ge v156, v75
-    brz v157, block13
-    jump block186
-
-block186:
-    trap int_ovf
-
-block13:
-    jump block11
-
-block11:
-    jump block5(v0, v76)
-
-block5(v77: f64, v78: i32):
-    v79 = fcvt_from_sint.f64 v78
-    v160 = iconst.i64 0xbfe6_2e42_fee0_0000
-    v80 = bitcast.f64 v160
-    v81 = fmul v79, v80
-    v82 = fadd v77, v81
-    v161 = iconst.i64 0x3dea_39ef_3579_3c76
-    v83 = bitcast.f64 v161
-    v84 = fmul v79, v83
-    v85 = fsub v82, v84
-    jump block4(v82, v85, v84, v78)
-
-block4(v86: f64, v87: f64, v108: f64, v113: i32):
-    v88 = fmul v87, v87
-    v162 = iconst.i64 0x3e66_3769_72be_a4d0
-    v89 = bitcast.f64 v162
-    v90 = fmul v88, v89
-    v163 = iconst.i64 0xbeeb_bd41_c5d2_6bf1
-    v91 = bitcast.f64 v163
-    v92 = fadd v90, v91
-    v93 = fmul v88, v92
-    v164 = iconst.i64 0x3f11_566a_af25_de2c
-    v94 = bitcast.f64 v164
-    v95 = fadd v93, v94
-    v96 = fmul v88, v95
-    v165 = iconst.i64 0xbf66_c16c_16be_bd93
-    v97 = bitcast.f64 v165
-    v98 = fadd v96, v97
-    v99 = fmul v88, v98
-    v166 = iconst.i64 0x3fc5_5555_5555_553e
-    v100 = bitcast.f64 v166
-    v101 = fadd v99, v100
-    v102 = fmul v88, v101
-    v103 = fsub v87, v102
-    v104 = fmul v87, v103
-    v167 = iconst.i64 0x4000_0000_0000_0000
-    v105 = bitcast.f64 v167
-    v106 = fsub v105, v103
-    v107 = fdiv v104, v106
-    v109 = fsub v107, v108
-    v110 = fadd v86, v109
-    v168 = iconst.i64 0x3ff0_0000_0000_0000
-    v111 = bitcast.f64 v168
-    v112 = fadd v110, v111
-    v169 = iconst.i32 0
-    v114 = icmp eq v113, v169
-    v115 = bint.i32 v114
-    brnz v115, block2(v12, v112)
-    jump block187
-
-block187:
-    v116 = call fn0(v112, v113, v1)
-    jump block2(v12, v116)
-
-block3:
-    v170 = iconst.i64 0x7fe0_0000_0000_0000
-    v117 = bitcast.f64 v170
-    v118 = fadd.f64 v0, v117
-    v171 = uextend.i64 v12
-    v172 = iadd_imm.i64 v1, 0
-    v173 = load.i64 v172
-    v119 = iadd v173, v171
-    store v118, v119
-    v174 = iconst.i64 0x3ff0_0000_0000_0000
-    v120 = bitcast.f64 v174
-    v121 = fadd.f64 v0, v120
-    jump block2(v12, v121)
-
-block2(v123: i32, v130: f64):
-    v122 = iconst.i32 0
-    v127 = iconst.i32 16
-    v128 = iadd v123, v127
-    v175 = uextend.i64 v122
-    v176 = iadd_imm.i64 v1, 0
-    v177 = load.i64 v176
-    v129 = iadd v177, v175
-    store v128, v129+4
-    jump block1(v130)
-
-block1(v2: f64):
-    return v2
-}
diff --git a/cranelift/filetests/filetests/regalloc/coalescing-216.clif b/cranelift/filetests/filetests/regalloc/coalescing-216.clif
deleted file mode 100644
index 4c9b27d6b0..0000000000
--- a/cranelift/filetests/filetests/regalloc/coalescing-216.clif
+++ /dev/null
@@ -1,87 +0,0 @@
-test regalloc
-target x86_64 legacy haswell
-
-; Reported as https://github.com/bytecodealliance/cranelift/issues/216 from the Binaryen fuzzer.
-;
-; The (old) coalescer creates a virtual register with two identical values.
-function %pr216(i32 [%rdi], i64 vmctx [%rsi]) -> i64 [%rax] system_v {
-block0(v0: i32, v1: i64):
-    v3 = iconst.i64 0
-    v5 = iconst.i32 0
-    brz v5, block3(v3)
-    jump block4(v3, v3)
-
-block4(v11: i64, v29: i64):
-    v6 = iconst.i32 0
-    brz v6, block14
-    jump block15
-
-block15:
-    v9 = iconst.i32 -17
-    v12 = iconst.i32 0xffff_ffff_ffff_8000
-    jump block9(v12)
-
-block9(v10: i32):
-    brnz v10, block8(v9, v11, v11)
-    jump block16
-
-block16:
-    brz.i32 v9, block13
-    jump block17
-
-block17:
-    v13 = iconst.i32 0
-    brnz v13, block6(v11, v11)
-    jump block18
-
-block18:
-    v14 = iconst.i32 0
-    brz v14, block12
-    jump block11
-
-block12:
-    jump block4(v11, v11)
-
-block11:
-    jump block10(v11)
-
-block13:
-    v15 = iconst.i64 1
-    jump block10(v15)
-
-block10(v21: i64):
-    v16 = iconst.i32 0
-    brnz v16, block6(v21, v11)
-    jump block19
-
-block19:
-    v17 = iconst.i32 0xffff_ffff_ffff_9f35
-    jump block8(v17, v21, v11)
-
-block8(v8: i32, v23: i64, v28: i64):
-    jump block7(v8, v23, v28)
-
-block14:
-    v18 = iconst.i32 0
-    jump block7(v18, v11, v29)
-
-block7(v7: i32, v22: i64, v27: i64):
-    jump block6(v22, v27)
-
-block6(v20: i64, v25: i64):
-    v19 = iconst.i32 0xffc7
-    brnz v19, block4(v20, v25)
-    jump block5
-
-block5:
-    jump block3(v25)
-
-block3(v24: i64):
-    jump block2(v24)
-
-block2(v4: i64):
-    jump block1(v4)
-
-block1(v2: i64):
-    return v2
-}
diff --git a/cranelift/filetests/filetests/regalloc/coloring-227.clif b/cranelift/filetests/filetests/regalloc/coloring-227.clif
deleted file mode 100644
index d47a905637..0000000000
--- a/cranelift/filetests/filetests/regalloc/coloring-227.clif
+++ /dev/null
@@ -1,115 +0,0 @@
-test regalloc
-target x86_64 legacy haswell
-
-function %pr227(i32 [%rdi], i32 [%rsi], i32 [%rdx], i32 [%rcx], i64 vmctx [%r8]) system_v {
-    gv0 = vmctx
-    heap0 = static gv0, min 0, bound 0x0001_0000_0000, offset_guard 0x8000_0000
-
-                          block0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i64):
-[RexOp1pu_id#b8]              v5 = iconst.i32 0
-[RexOp1pu_id#b8]              v6 = iconst.i32 0
-[RexOp1tjccb#74]              brz v6, block10
-[Op1jmpb#eb]                  jump block3(v5, v5, v5, v5, v5, v5, v0, v1, v2, v3)
-
-                          block3(v15: i32, v17: i32, v25: i32, v31: i32, v40: i32, v47: i32, v54: i32, v61: i32, v68: i32, v75: i32):
-[Op1jmpb#eb]                  jump block6
-
-                          block6:
-[RexOp1pu_id#b8]              v8 = iconst.i32 0
-[RexOp1tjccb#75]              brnz v8, block5
-[Op1jmpb#eb]                  jump block20
-
-                          block20:
-[RexOp1pu_id#b8]              v9 = iconst.i32 0
-[RexOp1pu_id#b8]              v11 = iconst.i32 0
-[DynRexOp1icscc#39]           v12 = icmp.i32 eq v15, v11
-[RexOp2urm_noflags#4b6]       v13 = bint.i32 v12
-[DynRexOp1rr#21]              v14 = band v9, v13
-[RexOp1tjccb#75]              brnz v14, block6
-[Op1jmpb#eb]                  jump block7
-
-                          block7:
-[RexOp1tjccb#74]              brz.i32 v17, block8
-[Op1jmpb#eb]                  jump block17
-
-                          block17:
-[RexOp1pu_id#b8]              v18 = iconst.i32 0
-[RexOp1tjccb#74]              brz v18, block9
-[Op1jmpb#eb]                  jump block16
-
-                          block16:
-[RexOp1pu_id#b8]              v21 = iconst.i32 0
-[RexOp1umr#89]                v79 = uextend.i64 v5
-[RexOp1r_ib#8083]             v80 = iadd_imm.i64 v4, 0
-[RexOp1ld#808b]               v81 = load.i64 v80
-[RexOp1rr#8001]               v22 = iadd v81, v79
-[RexMp1st#189]                istore16 v21, v22
-[Op1jmpb#eb]                  jump block9
-
-                          block9:
-[Op1jmpb#eb]                  jump block8
-
-                          block8:
-[RexOp1pu_id#b8]              v27 = iconst.i32 3
-[RexOp1pu_id#b8]              v28 = iconst.i32 4
-[DynRexOp1rr#09]              v35 = bor.i32 v31, v13
-[RexOp1tjccb#75]              brnz v35, block15(v27)
-[Op1jmpb#eb]                  jump block15(v28)
-
-                          block15(v36: i32):
-[Op1jmpb#eb]                  jump block3(v25, v36, v25, v31, v40, v47, v54, v61, v68, v75)
-
-                          block5:
-[Op1jmpb#eb]                  jump block4
-
-                          block4:
-[Op1jmpb#eb]                  jump block2(v40, v47, v54, v61, v68, v75)
-
-                          block10:
-[RexOp1pu_id#b8]              v43 = iconst.i32 0
-[Op1jmpb#eb]                  jump block2(v43, v5, v0, v1, v2, v3)
-
-                          block2(v7: i32, v45: i32, v52: i32, v59: i32, v66: i32, v73: i32):
-[RexOp1pu_id#b8]              v44 = iconst.i32 0
-[RexOp1tjccb#74]              brz v44, block12
-[Op1jmpb#eb]                  jump block18
-
-                          block18:
-[RexOp1pu_id#b8]              v50 = iconst.i32 11
-[RexOp1tjccb#74]              brz v50, block14
-[Op1jmpb#eb]                  jump block19
-
-                          block19:
-[RexOp1umr#89]                v82 = uextend.i64 v52
-[RexOp1r_ib#8083]             v83 = iadd_imm.i64 v4, 0
-[RexOp1ld#808b]               v84 = load.i64 v83
-[RexOp1rr#8001]               v57 = iadd v84, v82
-[RexOp1ld#8b]                 v58 = load.i32 v57
-[RexOp1umr#89]                v85 = uextend.i64 v58
-[RexOp1r_ib#8083]             v86 = iadd_imm.i64 v4, 0
-[RexOp1ld#808b]               v87 = load.i64 v86
-[RexOp1rr#8001]               v64 = iadd v87, v85
-[RexOp1st#88]                 istore8 v59, v64
-[RexOp1pu_id#b8]              v65 = iconst.i32 0
-[Op1jmpb#eb]                  jump block13(v65)
-
-                          block14:
-[Op1jmpb#eb]                  jump block13(v66)
-
-                          block13(v51: i32):
-[RexOp1umr#89]                v88 = uextend.i64 v45
-[RexOp1r_ib#8083]             v89 = iadd_imm.i64 v4, 0
-[RexOp1ld#808b]               v90 = load.i64 v89
-[RexOp1rr#8001]               v71 = iadd v90, v88
-[RexOp1st#89]                 store v51, v71
-[Op1jmpb#eb]                  jump block12
-
-                          block12:
-[Op1jmpb#eb]                  jump block11
-
-                          block11:
-[Op1jmpb#eb]                  jump block1
-
-                          block1:
-[Op1ret#c3]                   return
-}
diff --git a/cranelift/filetests/filetests/regalloc/constraints.clif b/cranelift/filetests/filetests/regalloc/constraints.clif
deleted file mode 100644
index 60cd731ed8..0000000000
--- a/cranelift/filetests/filetests/regalloc/constraints.clif
+++ /dev/null
@@ -1,82 +0,0 @@
-test regalloc
-target i686
-
-; regex: V=v\d+
-; regex: REG=%r([abcd]x|[sd]i)
-
-; Tied operands, both are killed at instruction.
-function %tied_easy() -> i32 {
-block0:
-    v0 = iconst.i32 12
-    v1 = iconst.i32 13
-    ; not: copy
-    ; check: isub
-    v2 = isub v0, v1
-    return v2
-}
-
-; Tied operand is live after instruction.
-function %tied_alive() -> i32 {
-block0:
-    v0 = iconst.i32 12
-    v1 = iconst.i32 13
-    ; check: $(v0c=$V) = copy v0
-    ; check: v2 = isub $v0c, v1
-    v2 = isub v0, v1
-    ; check: v3 = iadd v2, v0
-    v3 = iadd v2, v0
-    return v3
-}
-
-; Fixed register constraint.
-function %fixed_op() -> i32 {
-block0:
-    ; check: ,%rax]
-    ; sameln: v0 = iconst.i32 12
-    v0 = iconst.i32 12
-    v1 = iconst.i32 13
-    ; The dynamic shift amount must be in %rcx
-    ; check: regmove v0, %rax -> %rcx
-    v2 = ishl v1, v0
-    return v2
-}
-
-; Fixed register constraint twice.
-function %fixed_op_twice() -> i32 {
-block0:
-    ; check: ,%rax]
-    ; sameln: v0 = iconst.i32 12
-    v0 = iconst.i32 12
-    v1 = iconst.i32 13
-    ; The dynamic shift amount must be in %rcx
-    ; check: regmove v0, %rax -> %rcx
-    v2 = ishl v1, v0
-    ; check: regmove v0, %rcx -> $REG
-    ; check: regmove v2, $REG -> %rcx
-    v3 = ishl v0, v2
-
-    return v3
-}
-
-; Tied use of a diverted register.
-function %fixed_op_twice() -> i32 {
-block0:
-    ; check: ,%rax]
-    ; sameln: v0 = iconst.i32 12
-    v0 = iconst.i32 12
-    v1 = iconst.i32 13
-    ; The dynamic shift amount must be in %rcx
-    ; check: regmove v0, %rax -> %rcx
-    ; check: v2 = ishl v1, v0
-    v2 = ishl v1, v0
-
-    ; Now v0 is globally allocated to %rax, but diverted to %rcx.
-    ; Check that the tied def gets the diverted register.
-    v3 = isub v0, v2
-    ; not: regmove
-    ; check: ,%rcx]
-    ; sameln: isub
-    ; Move it into place for the return value.
-    ; check: regmove v3, %rcx -> %rax
-    return v3
-}
diff --git a/cranelift/filetests/filetests/regalloc/fallthrough-return.clif b/cranelift/filetests/filetests/regalloc/fallthrough-return.clif
deleted file mode 100644
index 90650aa4f0..0000000000
--- a/cranelift/filetests/filetests/regalloc/fallthrough-return.clif
+++ /dev/null
@@ -1,23 +0,0 @@
-test regalloc
-target x86_64 legacy
-
-; Test that fallthrough returns are visited by reload and coloring.
-
-function %foo() -> f64 {
-  fn0 = %bar()
-
-block0:
-  v0 = f64const 0.0
-  call fn0()
-  fallthrough_return v0
-}
-; check: fill v0
-
-function %foo() -> f64 {
-  fn0 = %bar() -> f64, f64
-
-block0:
-  v0, v1 = call fn0()
-  fallthrough_return v1
-}
-; check: regmove v1, %xmm1 -> %xmm0
diff --git a/cranelift/filetests/filetests/regalloc/ghost-param.clif b/cranelift/filetests/filetests/regalloc/ghost-param.clif
deleted file mode 100644
index 1d569727dd..0000000000
--- a/cranelift/filetests/filetests/regalloc/ghost-param.clif
+++ /dev/null
@@ -1,45 +0,0 @@
-test regalloc
-target x86_64 legacy haswell
-
-; This test case would create a block parameter that was a ghost value.
-; The coalescer would insert a copy of the ghost value, leading to verifier errors.
-;
-; We don't allow block parameters to be ghost values any longer.
-;
-; Test case by binaryen fuzzer!
-
-function %pr215(i64 vmctx [%rdi]) system_v {
-block0(v0: i64):
-    v10 = iconst.i64 0
-    v1 = bitcast.f64 v10
-    jump block5(v1)
-
-block5(v9: f64):
-    v11 = iconst.i64 0xffff_ffff_ff9a_421a
-    v4 = bitcast.f64 v11
-    v6 = iconst.i32 0
-    v7 = iconst.i32 1
-    brnz v7, block4(v6)
-    jump block8
-
-block8:
-    v8 = iconst.i32 0
-    jump block7(v8)
-
-block7(v5: i32):
-    brnz v5, block3(v4)
-    jump block5(v4)
-
-block4(v3: i32):
-    brnz v3, block2
-    jump block3(v9)
-
-block3(v2: f64):
-    jump block2
-
-block2:
-    jump block1
-
-block1:
-    return
-}
diff --git a/cranelift/filetests/filetests/regalloc/global-constraints.clif b/cranelift/filetests/filetests/regalloc/global-constraints.clif
deleted file mode 100644
index 1fe89ae823..0000000000
--- a/cranelift/filetests/filetests/regalloc/global-constraints.clif
+++ /dev/null
@@ -1,30 +0,0 @@
-test regalloc
-target i686 legacy
-
-; This test covers the troubles when values with global live ranges are defined
-; by instructions with constrained register classes.
-;
-; The icmp_imm instrutions write their b1 result to the ABCD register class on
-; 32-bit x86. So if we define 5 live values, they can't all fit.
-function %global_constraints(i32) {
-block0(v0: i32):
-    v1 = icmp_imm eq v0, 1
-    v2 = icmp_imm ugt v0, 2
-    v3 = icmp_imm sle v0, 3
-    v4 = icmp_imm ne v0, 4
-    v5 = icmp_imm sge v0, 5
-    brnz v5, block1
-    jump block2
-
-block2:
-    return
-
-block1:
-    ; Make sure v1-v5 are live in.
-    v10 = band v1, v2
-    v11 = bor v3, v4
-    v12 = bor v10, v11
-    v13 = bor v12, v5
-    trapnz v13, user0
-    return
-}
diff --git a/cranelift/filetests/filetests/regalloc/global-fixed.clif b/cranelift/filetests/filetests/regalloc/global-fixed.clif
deleted file mode 100644
index 6d31f7511a..0000000000
--- a/cranelift/filetests/filetests/regalloc/global-fixed.clif
+++ /dev/null
@@ -1,16 +0,0 @@
-test regalloc
-target x86_64 legacy haswell
-
-function %foo() system_v {
-block4:
-    v3 = iconst.i32 0
-    jump block3
-
-block3:
-    v9 = udiv v3, v3
-    jump block1
-
-block1:
-    v19 = iadd.i32 v9, v9
-    jump block3
-}
diff --git a/cranelift/filetests/filetests/regalloc/gpr-deref-safe-335.clif b/cranelift/filetests/filetests/regalloc/gpr-deref-safe-335.clif
deleted file mode 100644
index c4534b0f8b..0000000000
--- a/cranelift/filetests/filetests/regalloc/gpr-deref-safe-335.clif
+++ /dev/null
@@ -1,44 +0,0 @@
-test regalloc
-target x86_64 legacy
-
-function u0:587() fast {
-block0:
-    v97 = iconst.i32 0
-    v169 = iconst.i32 0
-    v1729 = iconst.i32 0
-    jump block100(v97, v97, v97, v97, v97)
-
-block100(v1758: i32, v1784: i32, v1845: i32, v1856: i32, v1870: i32):
-    v1762 = iconst.i32 0
-    v1769 = iconst.i32 0
-    v1774 = iconst.i32 0
-    v1864 = iconst.i32 0
-    v1897 = iconst.i32 0
-    jump block102(v1774, v1784, v1845, v1856, v1870, v1758, v1762, v169, v1729, v97, v169, v169, v169, v169)
-
-block102(v1785: i32, v1789: i32, v1843: i32, v1854: i32, v1868: i32, v1882: i32, v1890: i32, v1901: i32, v1921: i32, v1933: i32, v2058: i32, v2124: i32, v2236: i32, v2366: i32):
-    v1929 = iconst.i32 0
-    v1943 = iconst.i32 0
-    v1949 = iconst.i32 0
-    jump block123(v1897, v1769)
-
-block123(v1950: i32, v1979: i32):
-    v1955 = iconst.i32 0
-    brz v1955, block125
-    jump block122(v1929, v1843, v1864, v2058, v1882, v1897, v1943, v1868, v2124, v1901)
-
-block125:
-    v1961 = iadd_imm.i32 v1949, 0
-    v1952 = iconst.i32 0
-    v1962 = iconst.i64 0
-    v1963 = load.i32 v1962
-    brz v1963, block123(v1952, v1961)
-    jump block127
-
-block127:
-    v1966 = iconst.i32 0
-    jump block122(v1963, v1966, v1966, v1966, v1966, v1966, v1966, v1966, v1966, v1966)
-
-block122(v1967: i32, v1971: i32, v1972: i32, v1978: i32, v2032: i32, v2041: i32, v2053: i32, v2076: i32, v2085: i32, v2096: i32):
-    trap user0
-}
diff --git a/cranelift/filetests/filetests/regalloc/infinite-interference.clif b/cranelift/filetests/filetests/regalloc/infinite-interference.clif
deleted file mode 100644
index b7a7736405..0000000000
--- a/cranelift/filetests/filetests/regalloc/infinite-interference.clif
+++ /dev/null
@@ -1,37 +0,0 @@
-test regalloc
-target riscv32
-
-; Here, the coalescer initially builds vreg0 = [v1, v2, v3]
-;
-; There's interference between v1 and v2 at the brz instruction. Isolating v2 is not going to
-; resolve that conflict since v1 will just interfere with the inserted copy too.
-
-;function %c1(i32) -> i32 {
-;block0(v0: i32):
-;    v1 = iadd_imm v0, 1
-;    v2 = iconst.i32 1
-;    brz v1, block1(v2)
-;    jump block2
-;
-;block1(v3: i32):
-;    return v3
-;
-;block2:
-;    jump block1(v1)
-;}
-
-; Same thing with v1 and v2 swapped to reverse the order of definitions.
-
-function %c2(i32) -> i32 {
-block0(v0: i32):
-    v1 = iadd_imm v0, 1
-    v2 = iconst.i32 1
-    brz v2, block1(v1)
-    jump block2
-
-block1(v3: i32):
-    return v3
-
-block2:
-    jump block1(v2)
-}
diff --git a/cranelift/filetests/filetests/regalloc/iterate.clif b/cranelift/filetests/filetests/regalloc/iterate.clif
deleted file mode 100644
index 3272199bca..0000000000
--- a/cranelift/filetests/filetests/regalloc/iterate.clif
+++ /dev/null
@@ -1,164 +0,0 @@
-test regalloc
-target x86_64 legacy haswell
-
-function u0:9(i64 [%rdi], f32 [%xmm0], f64 [%xmm1], i32 [%rsi], i32 [%rdx], i64 vmctx [%r14]) -> i64 [%rax] baldrdash_system_v {
-block0(v0: i64, v1: f32, v2: f64, v3: i32, v4: i32, v5: i64):
-    v32 = iconst.i32 0
-    v6 = bitcast.f32 v32
-    v7 = iconst.i64 0
-    v33 = iconst.i64 0
-    v8 = bitcast.f64 v33
-    v34 = iconst.i32 0xbe99_999a
-    v9 = bitcast.f32 v34
-    v10 = iconst.i32 40
-    v11 = iconst.i32 -7
-    v35 = iconst.i32 0x40b0_0000
-    v12 = bitcast.f32 v35
-    v13 = iconst.i64 6
-    v36 = iconst.i64 0x4020_0000_0000_0000
-    v14 = bitcast.f64 v36
-    v44 = iconst.i64 0
-    v37 = icmp slt v0, v44
-    brnz v37, block2
-    jump block11
-
-block11:
-    v38 = fcvt_from_sint.f64 v0
-    jump block3(v38)
-
-block2:
-    v45 = iconst.i32 1
-    v39 = ushr.i64 v0, v45
-    v40 = band_imm.i64 v0, 1
-    v41 = bor v39, v40
-    v42 = fcvt_from_sint.f64 v41
-    v43 = fadd v42, v42
-    jump block3(v43)
-
-block3(v15: f64):
-    v16 = fpromote.f64 v9
-    v46 = uextend.i64 v10
-    v17 = fcvt_from_sint.f64 v46
-    v18 = fcvt_from_sint.f64 v11
-    v19 = fpromote.f64 v12
-    v54 = iconst.i64 0
-    v47 = icmp.i64 slt v13, v54
-    brnz v47, block4
-    jump block12
-
-block12:
-    v48 = fcvt_from_sint.f64 v13
-    jump block5(v48)
-
-block4:
-    v55 = iconst.i32 1
-    v49 = ushr.i64 v13, v55
-    v50 = band_imm.i64 v13, 1
-    v51 = bor v49, v50
-    v52 = fcvt_from_sint.f64 v51
-    v53 = fadd v52, v52
-    jump block5(v53)
-
-block5(v20: f64):
-    v63 = iconst.i64 0
-    v56 = icmp.i64 slt v7, v63
-    brnz v56, block6
-    jump block13
-
-block13:
-    v57 = fcvt_from_sint.f64 v7
-    jump block7(v57)
-
-block6:
-    v64 = iconst.i32 1
-    v58 = ushr.i64 v7, v64
-    v59 = band_imm.i64 v7, 1
-    v60 = bor v58, v59
-    v61 = fcvt_from_sint.f64 v60
-    v62 = fadd v61, v61
-    jump block7(v62)
-
-block7(v21: f64):
-    v22 = fadd v21, v14
-    v23 = fadd.f64 v20, v22
-    v24 = fadd.f64 v19, v23
-    v25 = fadd.f64 v18, v24
-    v26 = fadd.f64 v17, v25
-    v27 = fadd.f64 v2, v26
-    v28 = fadd.f64 v16, v27
-    v29 = fadd.f64 v15, v28
-    v30 = x86_cvtt2si.i64 v29
-    v69 = iconst.i64 0x8000_0000_0000_0000
-    v65 = icmp ne v30, v69
-    brnz v65, block8
-    jump block15
-
-block15:
-    v66 = fcmp uno v29, v29
-    brz v66, block9
-    jump block16
-
-block16:
-    trap bad_toint
-
-block9:
-    v70 = iconst.i64 0xc3e0_0000_0000_0000
-    v67 = bitcast.f64 v70
-    v68 = fcmp gt v67, v29
-    brz v68, block10
-    jump block17
-
-block17:
-    trap int_ovf
-
-block10:
-    jump block8
-
-block8:
-    jump block1(v30)
-
-block1(v31: i64):
-    return v31
-}
-
-function u0:26(i64 vmctx [%r14]) -> i64 [%rax] baldrdash_system_v {
-    gv1 = vmctx
-    gv0 = iadd_imm.i64 gv1, 48
-    sig0 = (i32 [%rdi], i64 [%rsi], i64 vmctx [%r14], i64 sigid [%rbx]) -> i64 [%rax] baldrdash_system_v
-
-block0(v0: i64):
-    v1 = iconst.i32 32
-    v2 = iconst.i64 64
-    v3 = iconst.i32 9
-    v4 = iconst.i64 1063
-    v5 = iadd_imm v0, 48
-    v6 = load.i32 v5
-    v7 = icmp uge v3, v6
-    ; If we're unlucky, there are no ABCD registers available for v7 at this branch.
-    brz v7, block2
-    jump block4
-
-block4:
-    trap heap_oob
-
-block2:
-    v8 = load.i64 v5+8
-    v9 = uextend.i64 v3
-    v16 = iconst.i64 16
-    v10 = imul v9, v16
-    v11 = iadd v8, v10
-    v12 = load.i64 v11
-    brnz v12, block3
-    jump block5
-
-block5:
-    trap icall_null
-
-block3:
-    v13 = load.i64 v11+8
-    v14 = call_indirect.i64 sig0, v12(v1, v2, v13, v4)
-    jump block1(v14)
-
-block1(v15: i64):
-    return v15
-}
diff --git a/cranelift/filetests/filetests/regalloc/multi-constraints.clif b/cranelift/filetests/filetests/regalloc/multi-constraints.clif
deleted file mode 100644
index 0a6b160f09..0000000000
--- a/cranelift/filetests/filetests/regalloc/multi-constraints.clif
+++ /dev/null
@@ -1,51 +0,0 @@
-test regalloc
-target x86_64 legacy haswell
-
-; Test combinations of constraints.
-;
-; The x86 ushr instruction requires its second operand to be passed in %rcx and its output is
-; tied to the first input operand.
-;
-; If we pass the same value to both operands, both constraints must be satisfied.
-
-; Found by the Binaryen fuzzer in PR221.
-;
-; Conditions triggering the problem:
-;
-; - The same value used for a tied operand and a fixed operand.
-; - The common value is already in %rcx.
-; - The tied output value is live outside the block.
-;
-; Under these conditions, Solver::add_tied_input() would create a variable for the tied input
-; without considering the fixed constraint.
-function %pr221(i64 [%rdi], i64 [%rsi], i64 [%rdx], i64 [%rcx]) -> i64 [%rax] {
-block0(v0: i64, v1: i64, v2: i64, v3: i64):
-    v4 = ushr v3, v3
-    jump block1
-
-block1:
-    return v4
-}
-
-; Found by the Binaryen fuzzer in PR218.
-;
-; This is a similar situation involving combined constraints on the ushr instruction:
-;
-; - The %rcx register is already in use by a globally live value.
-; - The ushr x, x result is also a globally live value.
-;
-; Since the ushr x, x result is forced to be placed in %rcx, we must set the replace_global_defines
-; flag so it can be reassigned to a different global register.
-function %pr218(i64 [%rdi], i64 [%rsi], i64 [%rdx], i64 [%rcx]) -> i64 [%rax] {
-block0(v0: i64, v1: i64, v2: i64, v3: i64):
-    ; check: regmove v3, %rcx ->
-    v4 = ushr v0, v0
-    ; check: v4 = copy
-    jump block1
-
-block1:
-    ; v3 is globally live in %rcx.
-    ; v4 is also globally live. Needs to be assigned something else for the trip across the CFG edge.
-    v5 = iadd v3, v4
-    return v5
-}
diff --git a/cranelift/filetests/filetests/regalloc/multiple-returns.clif b/cranelift/filetests/filetests/regalloc/multiple-returns.clif
deleted file mode 100644
index 8825a4df72..0000000000
--- a/cranelift/filetests/filetests/regalloc/multiple-returns.clif
+++ /dev/null
@@ -1,23 +0,0 @@
-test regalloc
-target x86_64 legacy
-
-; Return the same value twice. This needs a copy so that each value can be
-; allocated its own register.
-function %multiple_returns() -> i64, i64 {
-block0:
-    v2 = iconst.i64 0
-    return v2, v2
-}
-; check: v2 = iconst.i64 0
-; check: v3 = copy v2
-; check: return v2, v3
-
-; Same thing, now with a fallthrough_return.
-function %multiple_returns() -> i64, i64 {
-block0:
-    v2 = iconst.i64 0
-    fallthrough_return v2, v2
-}
-; check: v2 = iconst.i64 0
-; check: v3 = copy v2
-; check: fallthrough_return v2, v3
diff --git a/cranelift/filetests/filetests/regalloc/output-interference.clif b/cranelift/filetests/filetests/regalloc/output-interference.clif
deleted file mode 100644
index 1ba797f6c8..0000000000
--- a/cranelift/filetests/filetests/regalloc/output-interference.clif
+++ /dev/null
@@ -1,14 +0,0 @@
-test regalloc
-target x86_64 legacy haswell
-
-function %test(i64) -> i64 system_v {
-block0(v0: i64):
-    v2 = iconst.i64 12
-    ; This division clobbers two of its fixed input registers on x86.
-    ; These are FixedTied constraints that the spiller needs to resolve.
-    v5 = udiv v0, v2
-    v6 = iconst.i64 13
-    v9 = udiv v0, v6
-    v10 = iadd v5, v9
-    return v10
-}
diff --git a/cranelift/filetests/filetests/regalloc/reload-208.clif b/cranelift/filetests/filetests/regalloc/reload-208.clif
deleted file mode 100644
index 5e6a7e9864..0000000000
--- a/cranelift/filetests/filetests/regalloc/reload-208.clif
+++ /dev/null
@@ -1,112 +0,0 @@
-test regalloc
-target x86_64 legacy haswell
-
-; regex: V=v\d+
-; regex: BB=block\d+
-
-; Filed as https://github.com/bytecodealliance/cranelift/issues/208
-;
-; The verifier complains about a branch argument that is not in the same virtual register as the
-; corresponding block argument.
-;
-; The problem was the reload pass rewriting block arguments on "brnz v9, block3(v9)"
-
-function %pr208(i64 vmctx [%rdi]) system_v {
-    gv1 = vmctx
-    gv0 = iadd_imm.i64 gv1, -8
-    heap0 = static gv0, min 0, bound 0x5000, offset_guard 0x0040_0000
-    sig0 = (i64 vmctx [%rdi]) -> i32 [%rax] system_v
-    sig1 = (i64 vmctx [%rdi], i32 [%rsi]) system_v
-    fn0 = u0:1 sig0
-    fn1 = u0:3 sig1
-
-block0(v0: i64):
-    v1 = iconst.i32 0
-    v2 = call fn0(v0)
-    v20 = iconst.i32 0x4ffe
-    v16 = icmp uge v2, v20
-    brz v16, block5
-    jump block9
-
-block9:
-    trap heap_oob
-
-block5:
-    v17 = uextend.i64 v2
-    v18 = iadd_imm.i64 v0, -8
-    v19 = load.i64 v18
-    v3 = iadd v19, v17
-    v4 = load.i32 v3
-    v21 = iconst.i32 0
-    v5 = icmp eq v4, v21
-    v6 = bint.i32 v5
-    brnz v6, block2
-    jump block3(v4)
-
-    ; check: block5:
-    ; check:   jump block3(v4)
-    ; check: $(splitEdge=$BB):
-    ; nextln:  jump block3(v9)
-
-block3(v7: i32):
-    call fn1(v0, v7)
-    v26 = iconst.i32 0x4ffe
-    v22 = icmp uge v7, v26
-    brz v22, block6
-    jump block10
-
-block10:
-    trap heap_oob
-
-block6:
-    v23 = uextend.i64 v7
-    v24 = iadd_imm.i64 v0, -8
-    v25 = load.i64 v24
-    v8 = iadd v25, v23
-    v9 = load.i32 v8+56
-    ; check: v9 = spill
-    ; check: brnz $V, $splitEdge
-    brnz v9, block3(v9)
-    jump block4
-
-block4:
-    jump block2
-
-block2:
-    v10 = iconst.i32 0
-    v31 = iconst.i32 0x4ffe
-    v27 = icmp uge v10, v31
-    brz v27, block7
-    jump block11
-
-block11:
-    trap heap_oob
-
-block7:
-    v28 = uextend.i64 v10
-    v29 = iadd_imm.i64 v0, -8
-    v30 = load.i64 v29
-    v11 = iadd v30, v28
-    v12 = load.i32 v11+12
-    call fn1(v0, v12)
-    v13 = iconst.i32 0
-    v36 = iconst.i32 0x4ffe
-    v32 = icmp uge v13, v36
-    brz v32, block8
-    jump block12
-
-block12:
-    trap heap_oob
-
-block8:
-    v33 = uextend.i64 v13
-    v34 = iadd_imm.i64 v0, -8
-    v35 = load.i64 v34
-    v14 = iadd v35, v33
-    v15 = load.i32 v14+12
-    call fn1(v0, v15)
-    jump block1
-
-block1:
-    return
-}
diff --git a/cranelift/filetests/filetests/regalloc/reload-779.clif b/cranelift/filetests/filetests/regalloc/reload-779.clif
deleted file mode 100644
index 5dafe32b5c..0000000000
--- a/cranelift/filetests/filetests/regalloc/reload-779.clif
+++ /dev/null
@@ -1,23 +0,0 @@
-test compile
-target x86_64 legacy
-
-; Filed as https://github.com/bytecodealliance/cranelift/issues/779
-;
-; The copy_nop optimisation to reload (see Issue 773) was creating
-; copy_nop instructions for types for which there were no encoding.
-
-function u0:0(i64, i64, i64) system_v {
-    sig0 = () system_v
-    sig1 = (i16) system_v
-    fn1 = u0:94 sig0
-    fn2 = u0:95 sig1
-
-block0(v0: i64, v1: i64, v2: i64):
-    v3 = iconst.i16 0
-    jump block1(v3)
-
-block1(v4: i16):
-    call fn1()
-    call fn2(v4)
-    jump block1(v4)
-}
diff --git a/cranelift/filetests/filetests/regalloc/reload.clif b/cranelift/filetests/filetests/regalloc/reload.clif
deleted file mode 100644
index 1ae755a988..0000000000
--- a/cranelift/filetests/filetests/regalloc/reload.clif
+++ /dev/null
@@ -1,46 +0,0 @@
-test regalloc
-target riscv32 legacy enable_e
-
-; regex: V=v\d+
-
-; Check that we can handle a function return value that got spilled.
-function %spill_return() -> i32 {
-    fn0 = %foo() -> i32 system_v
-
-block0:
-    v0 = call fn0()
-    ; check: $(reg=$V) = call fn0
-    ; check: v0 = spill $reg
-    v2 = call fn0()
-    ; check: v2 = call fn0
-    return v0
-    ; check: $(reload=$V) = fill v0
-    ; check: return $reload
-}
-
-; Check that copies where the arg has been spilled are replaced with fills.
-;
-; RV32E has 6 registers for function arguments so the 7th, v6, will be placed
-; on the stack.
-function %spilled_copy_arg(i32, i32, i32, i32, i32, i32, i32) -> i32 {
-
-block0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i32, v5: i32, v6: i32):
-    ; not: copy
-    ; check: v10 = fill v6
-    v10 = copy v6
-    return v10
-}
-
-; Check that copies where the result has been spilled are replaced with spills.
-;
-; v1 is live across a call so it will be spilled.
-function %spilled_copy_result(i32) -> i32 {
-    fn0 = %foo(i32)
-
-block0(v0: i32):
-    ; not: copy
-    ; check: v1 = spill v0
-    v1 = copy v0
-    call fn0(v1)
-    return v1
-}
diff --git a/cranelift/filetests/filetests/regalloc/schedule-moves.clif b/cranelift/filetests/filetests/regalloc/schedule-moves.clif
deleted file mode 100644
index 701a91a15a..0000000000
--- a/cranelift/filetests/filetests/regalloc/schedule-moves.clif
+++ /dev/null
@@ -1,39 +0,0 @@
-test regalloc
-target i686 legacy haswell
-
-function %pr165() system_v {
-block0:
-    v0 = iconst.i32 0x0102_0304
-    v1 = iconst.i32 0x1102_0304
-    v2 = iconst.i32 0x2102_0304
-    v20 = ishl v1, v0
-    v21 = ishl v2, v0
-    v22 = sshr v1, v0
-    v23 = sshr v2, v0
-    v24 = ushr v1, v0
-    v25 = ushr v2, v0
-    istore8 v0, v1+0x2710
-    istore8 v1, v0+0x2710
-    return
-}
-
-; Same as above, but use so many registers that spilling is required.
-; Note: This is also a candidate for using xchg instructions.
-function %emergency_spill() system_v {
-block0:
-    v0 = iconst.i32 0x0102_0304
-    v1 = iconst.i32 0x1102_0304
-    v2 = iconst.i32 0x2102_0304
-    v3 = iconst.i32 0x3102_0304
-    v4 = iconst.i32 0x4102_0304
-    v20 = ishl v1, v0
-    v21 = ishl v2, v3
-    v22 = sshr v1, v0
-    v23 = sshr v2, v0
-    v24 = ushr v1, v0
-    v25 = ushr v2, v0
-    istore8 v0, v1+0x2710
-    istore8 v1, v0+0x2710
-    istore8 v3, v4+0x2710
-    return
-}
diff --git a/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var-2.clif b/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var-2.clif
deleted file mode 100644
index b280db086f..0000000000
--- a/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var-2.clif
+++ /dev/null
@@ -1,100 +0,0 @@
-test compile
-set opt_level=speed
-set enable_pinned_reg=true
-target x86_64 legacy haswell
-
-function u0:0(i32, i32, i32, i64 vmctx) -> i64 uext system_v {
-block0(v0: i32, v1: i32, v2: i32, v3: i64):
-    v236 = iconst.i32 0x4de9_bd37
-    v424 = iconst.i32 0
-    jump block37(v424)
-
-block37(v65: i32):
-    v433 = iconst.i32 0
-    jump block40(v433)
-
-block40(v70: i32):
-    v75 = iconst.i32 0
-    v259 = iconst.i32 0
-    v78 -> v259
-    v449 = iconst.i32 0
-    v450, v451 = x86_sdivmodx v75, v449, v259
-    v79 -> v450
-    v269 = iconst.i32 0
-    v270 = ushr_imm v269, 31
-    v271 = iadd v269, v270
-    v98 -> v271
-    v100 = iconst.i32 -31
-    v272 = iconst.i32 0x4de9_bd37
-    v490, v273 = x86_smulx v100, v272
-    v493 = iconst.i32 0
-    jump block61(v493)
-
-block61(v103: i32):
-    v104 = iconst.i32 -23
-    v105 = iconst.i32 -23
-    v106 = popcnt v105
-    v500 = sshr_imm v104, 31
-    v501 = iconst.i32 0
-    jump block64(v501)
-
-block64(v107: i32):
-    v108 = iconst.i32 0
-    v109 = iconst.i32 0
-    v278 = iconst.i32 0
-    v507, v279 = x86_smulx v109, v278
-    v280 = isub v279, v109
-    v281 = sshr_imm v280, 11
-    v282 = iconst.i32 0
-    v283 = iadd v281, v282
-    v111 -> v283
-    v112 = rotr v108, v283
-    jump block65
-
-block65:
-    v509 = iconst.i32 0
-    v510, v511 = x86_sdivmodx v107, v509, v112
-    v113 -> v510
-    v114 = iconst.i32 0
-    v517 = iconst.i32 0
-    v518, v519 = x86_sdivmodx v103, v517, v114
-    v115 -> v518
-    v534 = iconst.i32 0
-    v122 -> v534
-    v541 = iconst.i32 0
-    v542, v543 = x86_sdivmodx v271, v541, v122
-    v123 -> v542
-    v289 = iconst.i32 0
-    v125 -> v289
-    v550 = iconst.i32 0
-    v551, v552 = x86_sdivmodx v79, v550, v289
-    v126 -> v551
-    v130 = iconst.i32 0
-    v558 = iconst.i32 0
-    v559, v560 = x86_sdivmodx v70, v558, v130
-    v131 -> v559
-    v305 = iconst.i32 0
-    v140 -> v305
-    v577 = iconst.i32 0
-    v578, v579 = x86_sdivmodx v65, v577, v305
-    v141 -> v578
-    v166 = iconst.i32 0
-    v167 = iconst.i32 -31
-    v318 = iconst.i32 0x4de9_bd37
-    v650, v319 = x86_smulx v167, v318
-    v320 = isub v319, v167
-    v321 = sshr_imm v320, 4
-    v322 = iconst.i32 0
-    v323 = iadd v321, v322
-    v169 -> v323
-    v652 = iconst.i32 0
-    v653, v654 = x86_sdivmodx v166, v652, v323
-    v170 -> v653
-    v171 = iconst.i32 -23
-    v172 = iconst.i32 -23
-    v173 = popcnt v172
-    v174 = popcnt v173
-    v660 = sshr_imm v171, 31
-    v661, v662 = x86_sdivmodx v171, v660, v174
-    trap user0
-}
diff --git a/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var-3.clif b/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var-3.clif
deleted file mode 100644
index 1c2d1b2bc0..0000000000
--- a/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var-3.clif
+++ /dev/null
@@ -1,137 +0,0 @@
-test compile
-set opt_level=speed
-set enable_pinned_reg=true
-target x86_64 legacy haswell
-
-function u0:0(i32, i32, i32, i64 vmctx) -> i64 uext system_v {
-block0(v0: i32, v1: i32, v2: i32, v3: i64):
-    v5 = iconst.i32 -8
-    v114 = iconst.i32 0
-    v16 = iconst.i32 -8
-    v17 = popcnt v16
-    v192 = ifcmp_imm v17, -1
-    trapif ne v192, user0
-    jump block12
-
-block12:
-    v122 = iconst.i32 0
-    v123 = ushr_imm v122, 31
-    v124 = iadd v122, v123
-    v20 -> v124
-    v25 = iconst.i32 -19
-    v204 = iconst.i32 0
-    v31 -> v204
-    v210 = ifcmp_imm v31, -1
-    trapif ne v210, user0
-    jump block18
-
-block18:
-    v215 = iconst.i32 0
-    jump block19(v215)
-
-block19(v32: i32):
-    v35 = iconst.i32 0
-    v218 = ifcmp_imm v35, -1
-    trapif ne v218, user0
-    jump block21
-
-block21:
-    v223 = iconst.i32 0
-    jump block22(v223)
-
-block22(v36: i32):
-    v136 = iconst.i32 0
-    v40 -> v136
-    v227 = ifcmp_imm v136, -1
-    trapif ne v227, user0
-    jump block24
-
-block24:
-    v232 = iconst.i32 0
-    jump block25(v232)
-
-block25(v41: i32):
-    v142 = iconst.i32 0
-    v45 -> v142
-    v236 = ifcmp_imm v142, -1
-    trapif ne v236, user0
-    jump block27
-
-block27:
-    v241 = iconst.i32 0
-    jump block28(v241)
-
-block28(v46: i32):
-    v49 = iconst.i32 0
-    v244 = ifcmp_imm v49, -1
-    trapif ne v244, user0
-    jump block30
-
-block30:
-    v254 = iconst.i32 0
-    v53 -> v254
-    v54 = iconst.i32 -23
-    v55 = popcnt v54
-    v143 = iconst.i32 0x4de9_bd37
-    v260, v144 = x86_smulx v55, v143
-    v145 = iconst.i32 0
-    v146 = sshr_imm v145, 4
-    v147 = iconst.i32 0
-    v148 = iadd v146, v147
-    v57 -> v148
-    v58 = ishl v53, v148
-    jump block35
-
-block35:
-    v262 = iconst.i32 0
-    v263, v264 = x86_sdivmodx v46, v262, v58
-    v59 -> v263
-    v270 = iconst.i32 0
-    v271, v272 = x86_sdivmodx v41, v270, v59
-    v60 -> v271
-    v61 = f32const 0.0
-    v280 = iconst.i32 0
-    v281 = ffcmp v61, v61
-    trapff ord v281, user0
-    jump block41(v280)
-
-block41(v62: i32):
-    v157 = iconst.i32 0
-    v158 = sshr_imm v157, 4
-    v159 = iconst.i32 0
-    v160 = iadd v158, v159
-    v75 -> v160
-    v308 = ifcmp_imm v160, -1
-    trapif ne v308, user0
-    jump block52
-
-block52:
-    v87 = iconst.i32 -23
-    v88 = iconst.i32 -23
-    v89 = popcnt v88
-    v161 = iconst.i32 0x4de9_bd37
-    v324, v162 = x86_smulx v89, v161
-    v163 = isub v162, v89
-    v164 = sshr_imm v163, 4
-    v165 = iconst.i32 0
-    v166 = iadd v164, v165
-    v91 -> v166
-    v326 = iconst.i32 0
-    v327, v328 = x86_sdivmodx v87, v326, v166
-    v92 -> v327
-    v351 = iconst.i32 0
-    v99 -> v351
-    v358 = iconst.i32 0
-    v359, v360 = x86_sdivmodx v36, v358, v99
-    v100 -> v359
-    v102 = iconst.i32 0
-    v103 = rotr.i32 v32, v102
-    v366 = iconst.i32 0
-    v367, v368 = x86_sdivmodx v25, v366, v103
-    v104 -> v367
-    v383 = iconst.i32 0
-    v107 -> v383
-    v390 = iconst.i32 0
-    v391, v392 = x86_sdivmodx v124, v390, v107
-    trap user0
-}
diff --git a/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var.clif b/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var.clif
deleted file mode 100644
index 1aec10354f..0000000000
--- a/cranelift/filetests/filetests/regalloc/solver-fixedconflict-var.clif
+++ /dev/null
@@ -1,173 +0,0 @@
-test compile
-set opt_level=speed
-set enable_pinned_reg=true
-target x86_64 legacy haswell
-
-;; Test for the issue #1123; https://github.com/bytecodealliance/cranelift/issues/1123
-
-function u0:0(i32, i32, i32, i64 vmctx) -> i64 uext system_v {
-block0(v0: i32, v1: i32, v2: i32, v3: i64):
-    v351 = iconst.i32 0x4de9_bd37
-    v31 = iconst.i32 -23
-    v35 = iconst.i32 0
-    v36 = iconst.i32 -31
-    v357 = iconst.i32 0x4de9_bd37
-    v530, v358 = x86_smulx v36, v357
-    v359 = isub v358, v36
-    v360 = sshr_imm v359, 4
-    v361 = iconst.i32 0
-    v362 = iadd v360, v361
-    v38 -> v362
-    v532 = sshr_imm v35, 31
-    v533, v534 = x86_sdivmodx v35, v532, v362
-    v39 -> v533
-    v53 = iconst.i32 0
-    v547 = ifcmp_imm v53, -1
-    trapif ne v547, user0
-    jump block30
-
-block30:
-    v75 = iconst.i32 0
-    v581 = ifcmp_imm v75, -1
-    trapif ne v581, user0
-    jump block42
-
-block42:
-    v136 = iconst.i32 0
-    v691 = ifcmp_imm v136, -1
-    trapif ne v691, user0
-    jump block81
-
-block81:
-    v158 = iconst.i32 0
-    v725 = ifcmp_imm v158, -1
-    trapif ne v725, user0
-    jump block93
-
-block93:
-    v760 = iconst.i32 0
-    jump block106(v760)
-
-block106(v175: i32):
-    v179 = iconst.i32 0
-    v180 = icmp_imm eq v179, 0
-    v183 = iconst.i32 0
-    v766 = ifcmp_imm v183, -1
-    trapif ne v766, user0
-    jump block108
-
-block108:
-    v771 = iconst.i32 0
-    jump block109(v771)
-
-block109(v184: i32):
-    v785 = iconst.i32 0
-    v193 -> v785
-    v791 = ifcmp_imm v193, -1
-    trapif ne v791, user0
-    jump block117
-
-block117:
-    v796 = iconst.i32 0
-    jump block118(v796)
-
-block118(v194: i32):
-    v203 = iconst.i32 -63
-    v809 = iconst.i32 0
-    v207 -> v809
-    v815 = ifcmp_imm v207, -1
-    trapif ne v815, user0
-    jump block126
-
-block126:
-    v209 = iconst.i32 0
-    v823 = ifcmp_imm v209, -1
-    trapif ne v823, user0
-    jump block129
-
-block129:
-    v213 = iconst.i32 -23
-    v214 = iconst.i32 -19
-    v215 = icmp_imm eq v214, 0
-    v216 = bint.i32 v215
-    v217 = popcnt v216
-    v435 = iconst.i32 0x7df7_df7d
-    v831, v436 = x86_smulx v217, v435
-    v437 = isub v436, v217
-    v438 = sshr_imm v437, 5
-    v439 = ushr_imm v438, 31
-    v440 = iadd v438, v439
-    v219 -> v440
-    v220 = rotr v213, v440
-    v229 = iconst.i32 0
-    v841 = iconst.i32 0
-    v842, v843 = x86_sdivmodx v194, v841, v229
-    v230 -> v842
-    v849 = iconst.i32 0
-    v850, v851 = x86_sdivmodx v184, v849, v230
-    v231 -> v850
-    v232 = iconst.i32 0
-    v857 = iconst.i32 0
-    v858, v859 = x86_sdivmodx v175, v857, v232
-    v233 -> v858
-    v915 = iconst.i32 0
-    jump block163(v915)
-
-block163(v253: i32):
-    v255 = iconst.i32 0
-    v256 = iconst.i32 -23
-    v257 = iconst.i32 -19
-    v258 = icmp_imm eq v257, 0
-    v259 = bint.i32 v258
-    v260 = popcnt v259
-    v447 = iconst.i32 0x7df7_df7d
-    v921, v448 = x86_smulx v260, v447
-    v449 = isub v448, v260
-    v450 = sshr_imm v449, 5
-    v451 = ushr_imm v450, 31
-    v452 = iadd v450, v451
-    v262 -> v452
-    v263 = rotr v256, v452
-    v264 = popcnt v263
-    v265 = popcnt v264
-    v266 = popcnt v265
-    v267 = rotr v255, v266
-    v268 = popcnt v267
-    v923 = iconst.i32 0
-    v924, v925 = x86_sdivmodx v253, v923, v268
-    v269 -> v924
-    v276 = iconst.i32 0
-    v277 = iconst.i32 -63
-    v278 = popcnt v277
-    v947 = iconst.i32 0
-    v948, v949 = x86_sdivmodx v276, v947, v278
-    v279 -> v948
-    v309 = iconst.i32 0
-    v310 = iconst.i32 0
-    v311 = iconst.i32 0
-    v312 = icmp_imm eq v311, 0
-    v313 = bint.i32 v312
-    v314 = rotr v310, v313
-    v315 = iconst.i32 -31
-    v464 = iconst.i32 0
-    v1020, v465 = x86_smulx v315, v464
-    v466 = isub v465, v315
-    v467 = sshr_imm v466, 4
-    v468 = iconst.i32 0
-    v469 = iadd v467, v468
-    v317 -> v469
-    v1022 = iconst.i32 0
-    v1023, v1024 = x86_sdivmodx v314, v1022, v469
-    v318 -> v1023
-    v320 = iconst.i32 0
-    v321 = iconst.i32 -19
-    v322 = popcnt v321
-    v1030 = iconst.i32 0
-    v1031, v1032 = x86_sdivmodx v320, v1030, v322
-    v323 -> v1031
-    v1047 = iconst.i32 0
-    v325 -> v1047
-    v1054 = sshr_imm v309, 31
-    v1055, v1056 = x86_sdivmodx v309, v1054, v325
-    trap user0
-}
diff --git a/cranelift/filetests/filetests/regalloc/spill-noregs.clif b/cranelift/filetests/filetests/regalloc/spill-noregs.clif
deleted file mode 100644
index e3540f6a59..0000000000
--- a/cranelift/filetests/filetests/regalloc/spill-noregs.clif
+++ /dev/null
@@ -1,175 +0,0 @@
-test regalloc
-target x86_64 legacy
-
-; Test case found by the Binaryen fuzzer.
-;
-; The spiller panics with a
-; 'Ran out of GPR registers when inserting copy before v68 = icmp.i32 eq v66, v67',
-; cranelift-codegen/src/regalloc/spilling.rs:425:28 message.
-;
-; The process_reg_uses() function is trying to insert a copy before the icmp instruction in block4
-; and runs out of registers to spill. Note that block7 has a lot of dead parameter values.
-;
-; The spiller was not releasing register pressure for dead block parameters.
-
-function %pr223(i32 [%rdi], i64 vmctx [%rsi]) -> i64 [%rax] system_v {
-block0(v0: i32, v1: i64):
-    v2 = iconst.i32 0
-    v3 = iconst.i64 0
-    v4 = iconst.i32 0xffff_ffff_bb3f_4a2c
-    brz v4, block5
-    jump block1
-
-block1:
-    v5 = iconst.i32 0
-    v6 = copy.i64 v3
-    v7 = copy.i64 v3
-    v8 = copy.i64 v3
-    v9 = copy.i64 v3
-    v10 = copy.i64 v3
-    v11 = copy.i64 v3
-    v12 = copy.i64 v3
-    v13 = copy.i64 v3
-    v14 = copy.i64 v3
-    v15 = copy.i64 v3
-    v16 = copy.i64 v3
-    brnz v5, block4(v2, v3, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16)
-    jump block2
-
-block2:
-    v17 = iconst.i32 0
-    v18 = copy.i64 v3
-    v19 = copy.i64 v3
-    v20 = copy.i64 v3
-    v21 = copy.i64 v3
-    v22 = copy.i64 v3
-    v23 = copy.i64 v3
-    v24 = copy.i64 v3
-    v25 = copy.i64 v3
-    v26 = copy.i64 v3
-    v27 = copy.i64 v3
-    v28 = copy.i64 v3
-    brnz v17, block4(v2, v3, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28)
-    jump block3
-
-block3:
-    jump block1
-
-block4(v29: i32, v30: i64, v31: i64, v32: i64, v33: i64, v34: i64, v35: i64, v36: i64, v37: i64, v38: i64, v39: i64, v40: i64, v41: i64):
-    jump block7(v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41)
-
-block5:
-    jump block6
-
-block6:
-    v42 = copy.i64 v3
-    v43 = copy.i64 v3
-    v44 = copy.i64 v3
-    v45 = copy.i64 v3
-    v46 = copy.i64 v3
-    v47 = copy.i64 v3
-    v48 = copy.i64 v3
-    v49 = copy.i64 v3
-    v50 = copy.i64 v3
-    v51 = copy.i64 v3
-    v52 = copy.i64 v3
-    jump block7(v2, v3, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52)
-
-block7(v53: i32, v54: i64, v55: i64, v56: i64, v57: i64, v58: i64, v59: i64, v60: i64, v61: i64, v62: i64, v63: i64, v64: i64, v65: i64):
-    v66 = iconst.i32 0
-    v67 = iconst.i32 0
-    v68 = icmp eq v66, v67
-    v69 = bint.i32 v68
-    jump block8
-
-block8:
-    jump block9
-
-block9:
-    v70 = iconst.i32 0xffff_ffff_ffff_912f
-    brz v70, block10
-    jump block35
-
-block10:
-    v71 = iconst.i32 0
-    brz v71, block11
-    jump block27
-
-block11:
-    jump block12
-
-block12:
-    jump block13
-
-block13:
-    jump block14
-
-block14:
-    jump block15
-
-block15:
-    jump block16
-
-block16:
-    jump block17
-
-block17:
-    jump block18
-
-block18:
-    jump block19
-
-block19:
-    jump block20
-
-block20:
-    jump block21
-
-block21:
-    jump block22
-
-block22:
-    jump block23
-
-block23:
-    jump block24
-
-block24:
-    jump block25
-
-block25:
-    jump block26
-
-block26:
-    jump block27
-
-block27:
-    jump block28
-
-block28:
-    jump block29
-
-block29:
-    jump block30
-
-block30:
-    jump block31
-
-block31:
-    jump block32
-
-block32:
-    jump block33
-
-block33:
-    jump block34
-
-block34:
-    jump block35
-
-block35:
-    jump block36
-
-block36:
-    trap user0
-}
diff --git a/cranelift/filetests/filetests/regalloc/spill.clif b/cranelift/filetests/filetests/regalloc/spill.clif
deleted file mode 100644
index 2a3f2ad959..0000000000
--- a/cranelift/filetests/filetests/regalloc/spill.clif
+++ /dev/null
@@ -1,223 +0,0 @@
-test regalloc
-
-; Test the spiler on an ISA with few registers.
-; RV32E has 16 registers, where:
-; - %x0 is hardwired to zero.
-; - %x1 is the return address.
-; - %x2 is the stack pointer.
-; - %x3 is the global pointer.
-; - %x4 is the thread pointer.
-; - %x10-%x15 are function arguments.
-;
-; regex: V=v\d+
-; regex: WS=\s+
-
-target riscv32 legacy enable_e
-
-; In straight-line code, the first value defined is spilled.
-; That is in order:
-; 1. The argument v1.
-; 2. The link register.
-; 3. The first computed value, v2
-function %pyramid(i32) -> i32 {
-; check: ss0 = spill_slot 4
-; check: ss1 = spill_slot 4
-; check: ss2 = spill_slot 4
-; not: spill_slot
-block0(v1: i32):
-; check: block0($(rv1=$V): i32 [%x10], $(rlink=$V): i32 [%x1])
-    ; check: ,ss0]$WS v1 = spill $rv1
-    ; nextln: ,ss1]$WS $(link=$V) = spill $rlink
-    ; not: spill
-    v2 = iadd_imm v1, 12
-    ; check: $(r1v2=$V) = iadd_imm
-    ; nextln: ,ss2]$WS v2 = spill $r1v2
-    ; not: spill
-    v3 = iadd_imm v2, 12
-    v4 = iadd_imm v3, 12
-    v5 = iadd_imm v4, 12
-    v6 = iadd_imm v5, 12
-    v7 = iadd_imm v6, 12
-    v8 = iadd_imm v7, 12
-    v9 = iadd_imm v8, 12
-    v10 = iadd_imm v9, 12
-    v11 = iadd_imm v10, 12
-    v12 = iadd_imm v11, 12
-    v13 = iadd_imm v12, 12
-    v14 = iadd_imm v13, 12
-    v33 = iadd v13, v14
-    ; check: iadd v13
-    v32 = iadd v33, v12
-    v31 = iadd v32, v11
-    v30 = iadd v31, v10
-    v29 = iadd v30, v9
-    v28 = iadd v29, v8
-    v27 = iadd v28, v7
-    v26 = iadd v27, v6
-    v25 = iadd v26, v5
-    v24 = iadd v25, v4
-    v23 = iadd v24, v3
-    v22 = iadd v23, v2
-    ; check: $(r2v2=$V) = fill v2
-    ; check: v22 = iadd v23, $r2v2
-    v21 = iadd v22, v1
-    ; check: $(r2v1=$V) = fill v1
-    ; check: v21 = iadd v22, $r2v1
-    ; check: $(rlink2=$V) = fill $link
-    return v21
-    ; check: return v21, $rlink2
-}
-
-; All values live across a call must be spilled
-function %across_call(i32) {
-    fn0 = %foo(i32)
-block0(v1: i32):
-    ; check: v1 = spill
-    call fn0(v1)
-    ; check: call fn0
-    call fn0(v1)
-    ; check: fill v1
-    ; check: call fn0
-    return
-}
-
-; The same value used for two function arguments.
-function %doubleuse(i32) {
-    fn0 = %xx(i32, i32)
-block0(v0: i32):
-    ; check: $(c=$V) = copy v0
-    call fn0(v0, v0)
-    ; check: call fn0(v0, $c)
-    return
-}
-
-; The same value used as indirect callee and argument.
-function %doubleuse_icall1(i32) {
-    sig0 = (i32) system_v
-block0(v0: i32):
-    ; not:copy
-    call_indirect sig0, v0(v0)
-    return
-}
-
-; The same value used as indirect callee and two arguments.
-function %doubleuse_icall2(i32) {
-    sig0 = (i32, i32) system_v
-block0(v0: i32):
-    ; check: $(c=$V) = copy v0
-    call_indirect sig0, v0(v0, v0)
-    ; check: call_indirect sig0, v0(v0, $c)
-    return
-}
-
-; Two arguments on the stack.
-function %stackargs(i32, i32, i32, i32, i32, i32, i32, i32) -> i32 {
-; check: ss0 = incoming_arg 4
-; check: ss1 = incoming_arg 4, offset 4
-; not: incoming_arg
-block0(v0: i32, v1: i32, v2: i32, v3: i32, v4: i32, v5: i32, v6: i32, v7: i32):
-    ; unordered: fill v6
-    ; unordered: fill v7
-    v10 = iadd v6, v7
-    return v10
-}
-
-; More block arguments than registers.
-function %blockargs(i32) -> i32 {
-block0(v1: i32):
-    ; check: v1 = spill
-    v2 = iconst.i32 1
-    jump block1(v2, v2, v2, v2, v2, v2, v2, v2, v2, v2, v2, v2)
-
-block1(v10: i32, v11: i32, v12: i32, v13: i32, v14: i32, v15: i32, v16: i32, v17: i32, v18: i32, v19: i32, v20: i32, v21: i32):
-    v22 = iadd v10, v11
-    v23 = iadd v22, v12
-    v24 = iadd v23, v13
-    v25 = iadd v24, v14
-    v26 = iadd v25, v15
-    v27 = iadd v26, v16
-    v28 = iadd v27, v17
-    v29 = iadd v28, v18
-    v30 = iadd v29, v19
-    v31 = iadd v30, v20
-    v32 = iadd v31, v21
-    v33 = iadd v32, v1
-    return v33
-}
-
-; Spilling a block argument to make room for a branch operand.
-function %brargs(i32) -> i32 {
-block0(v1: i32):
-    ; check: v1 = spill
-    v2 = iconst.i32 1
-    brnz v1, block1(v2, v2, v2, v2, v2, v2, v2, v2, v2, v2, v2, v2)
-    jump block2
-
-block2:
-    return v1
-
-block1(v10: i32, v11: i32, v12: i32, v13: i32, v14: i32, v15: i32, v16: i32, v17: i32, v18: i32, v19: i32, v20: i32, v21: i32):
-    v22 = iadd v10, v11
-    v23 = iadd v22, v12
-    v24 = iadd v23, v13
-    v25 = iadd v24, v14
-    v26 = iadd v25, v15
-    v27 = iadd v26, v16
-    v28 = iadd v27, v17
-    v29 = iadd v28, v18
-    v30 = iadd v29, v19
-    v31 = iadd v30, v20
-    v32 = iadd v31, v21
-    v33 = iadd v32, v1
-    return v33
-}
-
-; In straight-line code, the first value defined is spilled.
-; That is in order:
-; 1. The argument v1.
-; 2. The link register.
-; 3. The first computed value, v2
-function %use_spilled_value(i32) -> i32 {
-; check: ss0 = spill_slot 4
-; check: ss1 = spill_slot 4
-; check: ss2 = spill_slot 4
-block0(v1: i32):
-; check: block0($(rv1=$V): i32 [%x10], $(rlink=$V): i32 [%x1])
-    ; check: ,ss0]$WS v1 = spill $rv1
-    ; nextln: ,ss1]$WS $(link=$V) = spill $rlink
-    ; not: spill
-    v2 = iadd_imm v1, 12
-    ; check: $(r1v2=$V) = iadd_imm
-    ; nextln: ,ss2]$WS v2 = spill $r1v2
-    v3 = iadd_imm v2, 12
-    v4 = iadd_imm v3, 12
-    v5 = iadd_imm v4, 12
-    v6 = iadd_imm v5, 12
-    v7 = iadd_imm v6, 12
-    v8 = iadd_imm v7, 12
-    v9 = iadd_imm v8, 12
-    v10 = iadd_imm v9, 12
-    v11 = iadd_imm v10, 12
-    v12 = iadd_imm v11, 12
-    v13 = iadd_imm v12, 12
-    v14 = iadd_imm v13, 12
-
-    ; Here we have maximum register pressure, and v2 has been spilled.
-    ; What happens if we use it?
-    v33 = iadd v2, v14
-    v32 = iadd v33, v12
-    v31 = iadd v32, v11
-    v30 = iadd v31, v10
-    v29 = iadd v30, v9
-    v28 = iadd v29, v8
-    v27 = iadd v28, v7
-    v26 = iadd v27, v6
-    v25 = iadd v26, v5
-    v24 = iadd v25, v4
-    v23 = iadd v24, v3
-    v22 = iadd v23, v2
-    v21 = iadd v22, v1
-    v20 = iadd v21, v13
-    v19 = iadd v20, v2
-    return v21
-}
diff --git a/cranelift/filetests/filetests/regalloc/unreachable_code.clif b/cranelift/filetests/filetests/regalloc/unreachable_code.clif
deleted file mode 100644
index 219a299880..0000000000
--- a/cranelift/filetests/filetests/regalloc/unreachable_code.clif
+++ /dev/null
@@ -1,47 +0,0 @@
-; Use "test compile" here otherwise the dead blocks won't be eliminated.
-test compile
-
-set enable_probestack=0
-target x86_64 legacy haswell
-
-; This function contains unreachable blocks which trip up the register
-; allocator if they don't get cleared out.
-function %unreachable_blocks(i64 vmctx) -> i32 baldrdash_system_v {
-block0(v0: i64):
-    v1 = iconst.i32 0
-    v2 = iconst.i32 0
-    jump block2
-
-block2:
-    jump block4
-
-block4:
-    jump block2
-
-; Everything below this point is unreachable.
-
-block3(v3: i32):
-    v5 = iadd.i32 v2, v3
-    jump block6
-
-block6:
-    jump block6
-
-block7(v6: i32):
-    v7 = iadd.i32 v5, v6
-    jump block8
-
-block8:
-    jump block10
-
-block10:
-    jump block8
-
-block9(v8: i32):
-    v10 = iadd.i32 v7, v8
-    jump block1(v10)
-
-block1(v11: i32):
-    return v11
-}
-
diff --git a/cranelift/filetests/filetests/regalloc/x86-regres.clif b/cranelift/filetests/filetests/regalloc/x86-regres.clif
deleted file mode 100644
index 935b33c5b7..0000000000
--- a/cranelift/filetests/filetests/regalloc/x86-regres.clif
+++ /dev/null
@@ -1,49 +0,0 @@
-test regalloc
-target i686 legacy
-
-; regex: V=v\d+
-; regex: BB=block\d+
-
-; The value v9 appears both as the branch control and one of the block arguments
-; in the brnz instruction in block2. It also happens that v7 and v9 are assigned
-; to the same register, so v9 doesn't need to be moved before the brnz.
-;
-; This ended up confusong the constraint solver which had not made a record of
-; the fixed register assignment for v9 since it was already in the correct
-; register.
-function %pr147(i32) -> i32 system_v {
-block0(v0: i32):
-    v1 = iconst.i32 0
-    v2 = iconst.i32 1
-    v3 = iconst.i32 0
-    jump block2(v3, v2, v0)
-
-    ; check: $(splitEdge=$BB):
-    ; check:   jump block2($V, $V, v9)
-
-block2(v4: i32, v5: i32, v7: i32):
-    ; check: block2
-    v6 = iadd v4, v5
-    v8 = iconst.i32 -1
-    ; v7 is killed here and v9 gets the same register.
-    v9 = iadd v7, v8
-    ; check:   v9 = iadd v7, v8
-    ; Here v9 the brnz control appears to interfere with v9 the block argument,
-    ; so divert_fixed_input_conflicts() calls add_var(v9), which is ok. The
-    ; add_var sanity checks got confused when no fixed assignment could be
-    ; found for v9.
-    ;
-    ; We should be able to handle this situation without making copies of v9.
-    brnz v9, block2(v5, v6, v9)
-    ; check:   brnz v9, $splitEdge
-    jump block3
-
-block3:
-    return v5
-}
-
-function %select_i64(i64, i64, i32) -> i64 {
-block0(v0: i64, v1: i64, v2: i32):
-    v3 = select v2, v0, v1
-    return v3
-}
diff --git a/cranelift/filetests/filetests/regress/allow-relaxation-shrink.clif b/cranelift/filetests/filetests/regress/allow-relaxation-shrink.clif
deleted file mode 100644
index fd95cc2f4c..0000000000
--- a/cranelift/filetests/filetests/regress/allow-relaxation-shrink.clif
+++ /dev/null
@@ -1,57 +0,0 @@
-test compile
-target aarch64
-target x86_64 legacy
-
-; This checks that code shrink is allowed while relaxing code, when code shrink
-; has not run.
-
-function u0:0(i64, i64) -> i64 system_v {
-    ss1 = explicit_slot 8
-    sig0 = (i64) -> i64 system_v
-    fn0 = u0:8 sig0
-
-block0(v0: i64, v1: i64):
-    v3 = stack_addr.i64 ss1
-    v5 = call fn0(v1)
-    v6 = iconst.i64 0
-    v8 = iconst.i64 0
-    jump block3(v6, v1, v8)
-
-block3(v39: i64, v40: i64, v42: i64):
-    v9 = load.i64 v3
-    v11 = icmp_imm ugt v9, 1
-    v12 = bint.i8 v11
-    v13 = uextend.i32 v12
-    v14 = icmp_imm eq v13, 0
-    brnz v14, block4
-    jump block5
-
-block4:
-    v18 = icmp_imm.i64 eq v40, 0
-    v19 = bint.i8 v18
-    v20 = uextend.i32 v19
-    brz v20, block6
-    jump block7
-
-block7:
-    trap user0
-
-block5:
-    v22 = iconst.i32 1
-    v23 = ishl.i64 v39, v22
-    v25 = iconst.i64 1
-    v26 = band.i64 v42, v25
-    v27 = bor v23, v26
-    v28 = iconst.i32 1
-    v29 = ushr.i64 v42, v28
-    v30 = load.i64 v3
-    v31 = iconst.i32 1
-    v32 = ushr v30, v31
-    store v32, v3
-    jump block3(v27, v40, v29)
-
-block6:
-    v38 = iconst.i64 0
-    return v38
-}
-
diff --git a/cranelift/filetests/filetests/safepoint/basic.clif b/cranelift/filetests/filetests/safepoint/basic.clif
deleted file mode 100644
index 47acf2ad72..0000000000
--- a/cranelift/filetests/filetests/safepoint/basic.clif
+++ /dev/null
@@ -1,71 +0,0 @@
-test safepoint
-set enable_safepoints=true
-target x86_64 legacy
-
-function %test(i32, r64, r64) -> r64 {
-    block0(v0: i32, v1:r64, v2:r64):
-        jump block1(v0)
-    block1(v3: i32):
-        v4 = irsub_imm v3, 1
-        jump block2(v4)
-    block2(v5: i32):
-        resumable_trap interrupt
-        brz v5, block1(v5)
-        jump block3
-    block3:
-        v6 = null.r64
-        v7 = is_null v6
-        brnz v7, block2(v0)
-        jump block4
-    block4:
-        brnz v0, block5
-        jump block6
-    block5:
-        return v1
-    block6:
-        return v2
-}
-
-; sameln: function %test(i32 [%rdi], r64 [%rsi], r64 [%rdx]) -> r64 [%rax] fast {
-; nextln: block0(v0: i32 [%rdi], v1: r64 [%rsi], v2: r64 [%rdx]):
-; nextln:   v10 = copy v0
-; nextln:   jump block1(v10)
-; nextln: 
-; nextln: block7:
-; nextln:   regmove.i32 v5, %rcx -> %rax
-; nextln:   jump block1(v5)
-; nextln: 
-; nextln: block1(v3: i32 [%rax]):
-; nextln:   v8 = iconst.i32 1
-; nextln:   v4 = isub v8, v3
-; nextln:   jump block2(v4)
-; nextln: 
-; nextln: block8:
-; nextln:   v9 = copy.i32 v0
-; nextln:   regmove v9, %rax -> %rcx
-; nextln:   jump block2(v9)
-; nextln: 
-; nextln: block2(v5: i32 [%rcx]):
-; nextln:   safepoint v1, v2
-; nextln:   resumable_trap interrupt
-; nextln:   brz v5, block7
-; nextln:   jump block3
-; nextln: 
-; nextln: block3:
-; nextln:   v6 = null.r64 
-; nextln:   v7 = is_null v6
-; nextln:   brnz v7, block8
-; nextln:   jump block4
-; nextln: 
-; nextln: block4:
-; nextln:   brnz.i32 v0, block5
-; nextln:   jump block6
-; nextln: 
-; nextln: block5:
-; nextln:   regmove.r64 v1, %rsi -> %rax
-; nextln:   return v1
-; nextln: 
-; nextln: block6:
-; nextln:   regmove.r64 v2, %rdx -> %rax
-; nextln:   return v2
-; nextln: }
diff --git a/cranelift/filetests/filetests/safepoint/call.clif b/cranelift/filetests/filetests/safepoint/call.clif
deleted file mode 100644
index ffcf41fb46..0000000000
--- a/cranelift/filetests/filetests/safepoint/call.clif
+++ /dev/null
@@ -1,58 +0,0 @@
-test safepoint
-set enable_safepoints=true
-target x86_64 legacy
-
-function %direct() -> r64 {
-    fn0 = %none()
-    fn1 = %one() -> r64
-    fn2 = %two() -> i32, r64
-
-block0:
-    call fn0()
-    v1 = call fn1()
-    v2, v3 = call fn2()
-    brz v2, block2
-    jump block1
-block1:
-    return v1
-block2:
-    v4 = call fn1()
-    return v3
-}
-
-; sameln: function %direct() -> r64 [%rax] fast {
-; nextln: ss0 = spill_slot 8
-; nextln: ss1 = spill_slot 8
-; nextln: sig0 = () fast
-; nextln: sig1 = () -> r64 [%rax] fast
-; nextln: sig2 = () -> i32 [%rax], r64 [%rdx] fast
-; nextln: fn0 = %none sig0
-; nextln: fn1 = %one sig1
-; nextln: fn2 = %two sig2
-; nextln: 
-; nextln: block0:
-; nextln:   v5 = func_addr.i64 fn0
-; nextln:   call_indirect sig0, v5()
-; nextln:   v6 = func_addr.i64 fn1
-; nextln:   v9 = call_indirect sig1, v6()
-; nextln:   v1 = spill v9
-; nextln:   v7 = func_addr.i64 fn2
-; nextln:   safepoint v1
-; nextln:   v2, v10 = call_indirect sig2, v7()
-; nextln:   v3 = spill v10
-; nextln:   brz v2, block2
-; nextln:   jump block1
-; nextln: 
-; nextln: block1:
-; nextln:   v11 = fill.r64 v1
-; nextln:   regmove v11, %r15 -> %rax
-; nextln:   return v11
-; nextln: 
-; nextln: block2:
-; nextln:   v8 = func_addr.i64 fn1
-; nextln:   safepoint v3
-; nextln:   v4 = call_indirect sig1, v8()
-; nextln:   v12 = fill.r64 v3
-; nextln:   regmove v12, %r15 -> %rax
-; nextln:   return v12
-; nextln: }
diff --git a/cranelift/filetests/filetests/simple_preopt/simplify_instruction_into_alias_of_value.clif b/cranelift/filetests/filetests/simple_preopt/simplify_instruction_into_alias_of_value.clif
deleted file mode 100644
index 5d10588da3..0000000000
--- a/cranelift/filetests/filetests/simple_preopt/simplify_instruction_into_alias_of_value.clif
+++ /dev/null
@@ -1,18 +0,0 @@
-test simple_preopt
-target aarch64
-target x86_64
-
-;; The `isub` is a no-op, but we can't replace the whole `isub` instruction with
-;; its `v2` operand's instruction because `v2` is one of many results. Instead,
-;; we need to make an alias `v3 -> v2`.
-
-function %replace_inst_with_alias() -> i32 {
-block0:
-    v0 = iconst.i32 0
-    v1, v2 = x86_smulx v0, v0
-    v3 = isub v2, v0
-    ; check:  v0 = iconst.i32 0
-    ; nextln: v1, v2 = x86_smulx v0, v0
-    ; nextln: v3 -> v2
-    return v3
-}
diff --git a/cranelift/filetests/filetests/stack_maps/call.clif b/cranelift/filetests/filetests/stack_maps/call.clif
deleted file mode 100644
index 6563ad450a..0000000000
--- a/cranelift/filetests/filetests/stack_maps/call.clif
+++ /dev/null
@@ -1,103 +0,0 @@
-test stack_maps
-set enable_safepoints=true
-target x86_64 legacy
-
-function %icall_fast(r64) -> r64 fast {
-; check:  function %icall_fast
-; nextln:     ss0 = spill_slot 8, offset -32
-    fn0 = %none()
-block0(v0: r64):
-; check: ss0]         v0 = spill v2
-; check:              safepoint v0
-    call fn0()
-    return v0
-}
-; check:  Stack maps:
-; nextln: 
-; nextln: safepoint v0
-; nextln:   - mapped words: 4
-; nextln:   - live: [0]
-
-function %icall_sys_v(r64) -> r64 system_v {
-; check:  function %icall_sys_v
-; nextln:     ss0 = spill_slot 8, offset -32
-    fn0 = %none()
-block0(v0: r64):
-; check: ss0]         v0 = spill v2
-; check:              safepoint v0
-    call fn0()
-    return v0
-}
-; check:  Stack maps:
-; nextln: 
-; nextln: safepoint v0
-; nextln:   - mapped words: 4
-; nextln:   - live: [0]
-
-function %icall_fastcall(r64) -> r64 windows_fastcall {
-; check:  function %icall_fastcall
-; nextln:     ss0 = spill_slot 8, offset -32
-; nextln:     ss1 = incoming_arg 24, offset -24
-; nextln:     ss2 = explicit_slot 32, offset -64
-    fn0 = %none()
-block0(v0: r64):
-; check: ss0]         v0 = spill v2
-; check:              safepoint v0
-    call fn0()
-    return v0
-}
-; check:  Stack maps:
-; nextln: 
-; nextln: safepoint v0
-; nextln:   - mapped words: 8
-; nextln:   - live: [4]
-
-function %call_fast(r64) -> r64 fast {
-; check:  function %call_fast
-; nextln:     ss0 = spill_slot 8, offset -32
-    fn0 = colocated %none()
-block0(v0: r64):
-; check: ss0]         v0 = spill v1
-; check:              safepoint v0
-    call fn0()
-    return v0
-}
-; check:  Stack maps:
-; nextln: 
-; nextln: safepoint v0
-; nextln:   - mapped words: 4
-; nextln:   - live: [0]
-
-function %call_sys_v(r64) -> r64 system_v {
-; check:  function %call_sys_v
-; nextln:     ss0 = spill_slot 8, offset -32
-    fn0 = colocated %none()
-block0(v0: r64):
-; check: ss0]         v0 = spill v1
-; check:              safepoint v0
-    call fn0()
-    return v0
-}
-; check:  Stack maps:
-; nextln: 
-; nextln: safepoint v0
-; nextln:   - mapped words: 4
-; nextln:   - live: [0]
-
-function %call_fastcall(r64) -> r64 windows_fastcall {
-; check:  function %call_fastcall
-; nextln:     ss0 = spill_slot 8, offset -32
-; nextln:     ss1 = incoming_arg 24, offset -24
-; nextln:     ss2 = explicit_slot 32, offset -64
-    fn0 = colocated %none()
-block0(v0: r64):
-; check: ss0]         v0 = spill v1
-; check:              safepoint v0
-    call fn0()
-    return v0
-}
-; check:  Stack maps:
-; nextln: 
-; nextln: safepoint v0
-; nextln:   - mapped words: 8
-; nextln:   - live: [4]
diff --git a/cranelift/filetests/filetests/stack_maps/incoming_args.clif b/cranelift/filetests/filetests/stack_maps/incoming_args.clif
deleted file mode 100644
index e8231c3aad..0000000000
--- a/cranelift/filetests/filetests/stack_maps/incoming_args.clif
+++ /dev/null
@@ -1,30 +0,0 @@
-test stack_maps
-set enable_safepoints=true
-target x86_64 legacy
-
-;; Incoming args get included in stack maps.
-
-function %incoming_args(r64, r64, r64, r64, r64) -> r64 windows_fastcall {
-; check:  r64 [32]
-; nextln: ss0 = incoming_arg 8, offset 32
-; nextln: ss1 = incoming_arg 24, offset -24
-; nextln: ss2 = explicit_slot 32, offset -64
-
-    fn0 = %none()
-; nextln: sig0 = () fast
-; nextln: fn0 = %none sig0
-
-block0(v0: r64, v1: r64, v2: r64, v3: r64, v4: r64):
-; check: v4: r64 [ss0]
-
-    call fn0()
-; check:  safepoint v4
-; nextln: call_indirect
-    return v4
-}
-
-; check:  Stack maps:
-; nextln: 
-; nextln: safepoint v4
-; nextln:   - mapped words: 13
-; nextln:   - live: [12]
diff --git a/cranelift/filetests/filetests/verifier/flags.clif b/cranelift/filetests/filetests/verifier/flags.clif
deleted file mode 100644
index 088523d24a..0000000000
--- a/cranelift/filetests/filetests/verifier/flags.clif
+++ /dev/null
@@ -1,77 +0,0 @@
-test verifier
-target aarch64
-target i686
-
-; Simple, correct use of CPU flags.
-function %simple(i32) -> i32 {
-                    block0(v0: i32):
-    [DynRexOp1rcmp#39]        v1 = ifcmp v0, v0
-    [Op2seti_abcd#490]        v2 = trueif ugt v1
-    [Op2urm_noflags_abcd#4b6] v3 = bint.i32 v2
-    [Op1ret#c3]               return v3
-}
-
-; Overlapping flag values of different types.
-function %overlap(i32, f32) -> i32 {
-                    block0(v0: i32, v1: f32):
-    [DynRexOp1rcmp#39]        v2 = ifcmp v0, v0
-    [Op2fcmp#42e]             v3 = ffcmp v1, v1
-    [Op2setf_abcd#490]        v4 = trueff gt v3 ; error: conflicting live CPU flags: v2 and v3
-    [Op2seti_abcd#490]        v5 = trueif ugt v2
-    [Op1rr#21]                v6 = band v4, v5
-    [Op2urm_noflags_abcd#4b6] v7 = bint.i32 v6
-    [Op1ret#c3]               return v7
-}
-
-; CPU flags clobbered by arithmetic.
-function %clobbered(i32) -> i32 {
-                    block0(v0: i32):
-    [DynRexOp1rcmp#39]        v1 = ifcmp v0, v0
-    [DynRexOp1rr#01]          v2 = iadd v0, v0 ; error: encoding clobbers live CPU flags in v1
-    [Op2seti_abcd#490]        v3 = trueif ugt v1
-    [Op2urm_noflags_abcd#4b6] v4 = bint.i32 v3
-    [Op1ret#c3]               return v4
-}
-
-; CPU flags not clobbered by load.
-function %live_across_load(i32) -> i32 {
-                    block0(v0: i32):
-    [DynRexOp1rcmp#39]        v1 = ifcmp v0, v0
-    [Op1ld#8b]                v2 = load.i32 v0
-    [Op2seti_abcd#490]        v3 = trueif ugt v1
-    [Op2urm_noflags_abcd#4b6] v4 = bint.i32 v3
-    [Op1ret#c3]               return v4
-}
-
-; Correct use of CPU flags across block.
-function %live_across_block(i32) -> i32 {
-                          block0(v0: i32):
-    [DynRexOp1rcmp#39]        v1 = ifcmp v0, v0
-    [Op1jmpb#eb]              jump block1
-                          block1:
-    [Op2seti_abcd#490]        v2 = trueif ugt v1
-    [Op2urm_noflags_abcd#4b6] v3 = bint.i32 v2
-    [Op1ret#c3]               return v3
-}
-
-function %live_across_block_backwards(i32) -> i32 {
-                          block0(v0: i32):
-    [Op1jmpb#eb]              jump block2
-                          block1:
-    [Op2seti_abcd#490]        v2 = trueif ugt v1
-    [Op2urm_noflags_abcd#4b6] v3 = bint.i32 v2
-    [Op1ret#c3]               return v3
-                          block2:
-    [DynRexOp1rcmp#39]        v1 = ifcmp v0, v0
-    [Op1jmpb#eb]              jump block1
-}
-
-; Flags live into loop.
-function %live_into_loop(i32) -> i32 {
-                    block0(v0: i32):
-    [DynRexOp1rcmp#39]  v1 = ifcmp v0, v0
-    [Op1jmpb#eb]        jump block1
-                    block1:
-    [Op2seti_abcd#490]  v2 = trueif ugt v1
-    [Op1jmpb#eb]        jump block1
-}
diff --git a/cranelift/filetests/filetests/wasm/multi-val-b1.clif b/cranelift/filetests/filetests/wasm/multi-val-b1.clif
deleted file mode 100644
index f41f867918..0000000000
--- a/cranelift/filetests/filetests/wasm/multi-val-b1.clif
+++ /dev/null
@@ -1,68 +0,0 @@
-test compile
-target x86_64 legacy haswell
-
-;; `b1` return values need to be legalized into bytes so that they can be stored
-;; in memory.
-
-function %return_4_b1s(b1, b1, b1, b1) -> b1, b1, b1, b1 {
-;; check: function %return_4_b1s(b1 [%rsi], b1 [%rdx], b1 [%rcx], b1 [%r8], i64 sret [%rdi], i64 fp [%rbp]) -> i64 sret [%rax], i64 fp [%rbp] fast {
-
-block0(v0: b1, v1: b1, v2: b1, v3: b1):
-; check: block0(v0: b1 [%rsi], v1: b1 [%rdx], v2: b1 [%rcx], v3: b1 [%r8], v4: i64 [%rdi], v13: i64 [%rbp]):
-
-    return v0, v1, v2, v3
-    ; check:  v5 = bint.i8 v0
-    ; nextln: v9 = uextend.i32 v5
-    ; nextln: istore8 notrap aligned v9, v4
-    ; nextln: v6 = bint.i8 v1
-    ; nextln: v10 = uextend.i32 v6
-    ; nextln: istore8 notrap aligned v10, v4+1
-    ; nextln: v7 = bint.i8 v2
-    ; nextln: v11 = uextend.i32 v7
-    ; nextln: istore8 notrap aligned v11, v4+2
-    ; nextln: v8 = bint.i8 v3
-    ; nextln: v12 = uextend.i32 v8
-    ; nextln: istore8 notrap aligned v12, v4+3
-}
-
-function %call_4_b1s() {
-; check: function %call_4_b1s(i64 fp [%rbp], i64 csr [%rbx]) -> i64 fp [%rbp], i64 csr [%rbx] fast {
-; nextln:    ss0 = sret_slot 4, offset -28
-
-    fn0 = colocated %return_4_b1s(b1, b1, b1, b1) -> b1, b1, b1, b1
-    ; check: sig0 = (b1 [%rsi], b1 [%rdx], b1 [%rcx], b1 [%r8], i64 sret [%rdi]) -> i64 sret [%rax] fast
-
-block0:
-; check: block0(v26: i64 [%rbp], v27: i64 [%rbx]):
-
-    v0 = bconst.b1 true
-    v1 = bconst.b1 false
-    v2 = bconst.b1 true
-    v3 = bconst.b1 false
-
-    ; check: v8 = stack_addr.i64 ss0
-    v4, v5, v6, v7 = call fn0(v0, v1, v2, v3)
-    ; check:  v9 = call fn0(v0, v1, v2, v3, v8)
-    ; nextln: v22 = uload8.i32 notrap aligned v9
-    ; nextln: v10 = ireduce.i8 v22
-    ; nextln: v11 = raw_bitcast.b8 v10
-    ; nextln: v12 = breduce.b1 v11
-    ; nextln: v4 -> v12
-    ; nextln: v23 = uload8.i32 notrap aligned v9+1
-    ; nextln: v13 = ireduce.i8 v23
-    ; nextln: v14 = raw_bitcast.b8 v13
-    ; nextln: v15 = breduce.b1 v14
-    ; nextln: v5 -> v15
-    ; nextln: v24 = uload8.i32 notrap aligned v9+2
-    ; nextln: v16 = ireduce.i8 v24
-    ; nextln: v17 = raw_bitcast.b8 v16
-    ; nextln: v18 = breduce.b1 v17
-    ; nextln: v6 -> v18
-    ; nextln: v25 = uload8.i32 notrap aligned v9+3
-    ; nextln: v19 = ireduce.i8 v25
-    ; nextln: v20 = raw_bitcast.b8 v19
-    ; nextln: v21 = breduce.b1 v20
-    ; nextln: v7 -> v21
-
-    return
-}
diff --git a/cranelift/filetests/filetests/wasm/multi-val-call-indirect.clif b/cranelift/filetests/filetests/wasm/multi-val-call-indirect.clif
deleted file mode 100644
index 06d0814dfb..0000000000
--- a/cranelift/filetests/filetests/wasm/multi-val-call-indirect.clif
+++ /dev/null
@@ -1,26 +0,0 @@
-test legalizer
-target x86_64 legacy haswell
-
-;; Indirect calls with many returns.
-
-function %call_indirect_many_rets(i64) {
-    ; check: ss0 = sret_slot 32
-
-    sig0 = () -> i64, i64, i64, i64
-    ; check: sig0 = (i64 sret [%rdi]) -> i64 sret [%rax] fast
-
-block0(v0: i64):
-    v1, v2, v3, v4 = call_indirect sig0, v0()
-    ; check:  v5 = stack_addr.i64 ss0
-    ; nextln: v6 = call_indirect sig0, v0(v5)
-    ; nextln: v7 = load.i64 notrap aligned v6
-    ; nextln: v1 -> v7
-    ; nextln: v8 = load.i64 notrap aligned v6+8
-    ; nextln: v2 -> v8
-    ; nextln: v9 = load.i64 notrap aligned v6+16
-    ; nextln: v3 -> v9
-    ; nextln: v10 = load.i64 notrap aligned v6+24
-    ; nextln: v4 -> v10
-
-    return
-}
diff --git a/cranelift/filetests/filetests/wasm/multi-val-call-legalize-args.clif b/cranelift/filetests/filetests/wasm/multi-val-call-legalize-args.clif
deleted file mode 100644
index aae733ddf4..0000000000
--- a/cranelift/filetests/filetests/wasm/multi-val-call-legalize-args.clif
+++ /dev/null
@@ -1,24 +0,0 @@
-test legalizer
-target x86_64 legacy haswell
-
-;; Test if arguments are legalized if function uses sret
-
-function %call_indirect_with_split_arg(i64, i64, i64) {
-    ; check: ss0 = sret_slot 32
-    sig0 = (i128) -> i64, i64, i64, i64
-    ; check: sig0 = (i64 [%rsi], i64 [%rdx], i64 sret [%rdi]) -> i64 sret [%rax] fast
-block0(v0: i64, v1: i64, v2: i64):
-    v3 = iconcat v1, v2
-    v4, v5, v6, v7 = call_indirect sig0, v0(v3)
-    ; check: v8 = stack_addr.i64 ss0
-    ; check: v9 = call_indirect sig0, v0(v1, v2, v8)
-    ; check: v10 = load.i64 notrap aligned v9
-    ; check: v4 -> v10
-    ; check: v11 = load.i64 notrap aligned v9+8
-    ; check: v5 -> v11
-    ; check: v12 = load.i64 notrap aligned v9+16
-    ; check: v6 -> v12
-    ; check: v13 = load.i64 notrap aligned v9+24
-    ; check: v7 -> v13
-    return
-}
diff --git a/cranelift/filetests/filetests/wasm/multi-val-reuse-ret-ptr-stack-slot.clif b/cranelift/filetests/filetests/wasm/multi-val-reuse-ret-ptr-stack-slot.clif
deleted file mode 100644
index c58102aedc..0000000000
--- a/cranelift/filetests/filetests/wasm/multi-val-reuse-ret-ptr-stack-slot.clif
+++ /dev/null
@@ -1,61 +0,0 @@
-test legalizer
-target x86_64 legacy haswell
-
-;; Test that we don't reuse `sret` stack slots for multiple calls. We could do
-;; this one day, but it would require some care to ensure that we don't have
-;; subsequent calls overwrite the results of previous calls.
-
-function %foo() -> i32, f32 {
-    ; check:  ss0 = sret_slot 20
-    ; nextln: ss1 = sret_slot 20
-
-    fn0 = %f() -> i32, i32, i32, i32, i32
-    fn1 = %g() -> f32, f32, f32, f32, f32
-    ; check:  sig0 = (i64 sret [%rdi]) -> i64 sret [%rax] fast
-    ; nextln: sig1 = (i64 sret [%rdi]) -> i64 sret [%rax] fast
-    ; nextln: fn0 = %f sig0
-    ; nextln: fn1 = %g sig1
-
-block0:
-    v0, v1, v2, v3, v4 = call fn0()
-    ; check:  v18 = stack_addr.i64 ss0
-    ; nextln: v25 = func_addr.i64 fn0
-    ; nextln: v19 = call_indirect sig0, v25(v18)
-    ; nextln: v20 = load.i32 notrap aligned v19
-    ; nextln: v0 -> v20
-    ; nextln: v21 = load.i32 notrap aligned v19+4
-    ; nextln: v1 -> v21
-    ; nextln: v22 = load.i32 notrap aligned v19+8
-    ; nextln: v2 -> v22
-    ; nextln: v23 = load.i32 notrap aligned v19+12
-    ; nextln: v3 -> v23
-    ; nextln: v24 = load.i32 notrap aligned v19+16
-    ; nextln: v4 -> v24
-
-    v5, v6, v7, v8, v9 = call fn1()
-    ; check:  v26 = stack_addr.i64 ss1
-    ; nextln: v33 = func_addr.i64 fn1
-    ; nextln: v27 = call_indirect sig1, v33(v26)
-    ; nextln: v28 = load.f32 notrap aligned v27
-    ; nextln: v5 -> v28
-    ; nextln: v29 = load.f32 notrap aligned v27+4
-    ; nextln: v6 -> v29
-    ; nextln: v30 = load.f32 notrap aligned v27+8
-    ; nextln: v7 -> v30
-    ; nextln: v31 = load.f32 notrap aligned v27+12
-    ; nextln: v8 -> v31
-    ; nextln: v32 = load.f32 notrap aligned v27+16
-    ; nextln: v9 -> v32
-
-    v10 = iadd v0, v1
-    v11 = iadd v2, v3
-    v12 = iadd v10, v11
-    v13 = iadd v12, v4
-
-    v14 = fadd v5, v6
-    v15 = fadd v7, v8
-    v16 = fadd v14, v15
-    v17 = fadd v16, v9
-
-    return v13, v17
-}
diff --git a/cranelift/filetests/filetests/wasm/multi-val-sret-slot-alignment.clif b/cranelift/filetests/filetests/wasm/multi-val-sret-slot-alignment.clif
deleted file mode 100644
index da9f25ed97..0000000000
--- a/cranelift/filetests/filetests/wasm/multi-val-sret-slot-alignment.clif
+++ /dev/null
@@ -1,51 +0,0 @@
-test legalizer
-target x86_64 legacy haswell
-
-;; Need to insert padding after the `i8`s so that the `i32` and `i64` are
-;; aligned.
-
-function %returner() -> i8, i32, i8, i64 {
-; check: function %returner(i64 sret [%rdi]) -> i64 sret [%rax] fast {
-
-block0:
-; check: block0(v4: i64):
-
-    v0 = iconst.i8 0
-    v1 = iconst.i32 1
-    v2 = iconst.i8 2
-    v3 = iconst.i64 3
-    return v0, v1, v2, v3
-    ; check:  v6 = uextend.i32 v0
-    ; nextln: istore8 notrap aligned v6, v4
-    ; nextln: store notrap aligned v1, v4+4
-    ; nextln: v7 = uextend.i32 v2
-    ; nextln: istore8 notrap aligned v7, v4+8
-    ; nextln: store notrap aligned v3, v4+16
-    ; nextln: return v4
-}
-
-function %caller() {
-    ; check:  ss0 = sret_slot 24
-
-    fn0 = %returner() -> i8, i32, i8, i64
-    ; check:  sig0 = (i64 sret [%rdi]) -> i64 sret [%rax] fast
-    ; nextln: fn0 = %returner sig0
-
-block0:
-    v0, v1, v2, v3 = call fn0()
-    ; check:  v4 = stack_addr.i64 ss0
-    ; nextln: v10 = func_addr.i64 fn0
-    ; nextln: v5 = call_indirect sig0, v10(v4)
-    ; nextln: v11 = uload8.i32 notrap aligned v5
-    ; nextln: v6 = ireduce.i8 v11
-    ; nextln: v0 -> v6
-    ; nextln: v7 = load.i32 notrap aligned v5+4
-    ; nextln: v1 -> v7
-    ; nextln: v12 = uload8.i32 notrap aligned v5+8
-    ; nextln: v8 = ireduce.i8 v12
-    ; nextln: v2 -> v8
-    ; nextln: v9 = load.i64 notrap aligned v5+16
-    ; nextln: v3 -> v9
-
-    return
-}
diff --git a/cranelift/filetests/src/function_runner.rs b/cranelift/filetests/src/function_runner.rs
index d764b916e5..6a7fb5a282 100644
--- a/cranelift/filetests/src/function_runner.rs
+++ b/cranelift/filetests/src/function_runner.rs
@@ -47,8 +47,8 @@ impl SingleFunctionCompiler {
     }
 
     /// Build a [SingleFunctionCompiler] using the host machine's ISA and the passed flags.
-    pub fn with_host_isa(flags: settings::Flags, variant: BackendVariant) -> Self {
-        let builder = builder_with_options(variant, true)
+    pub fn with_host_isa(flags: settings::Flags) -> Self {
+        let builder = builder_with_options(true)
             .expect("Unable to build a TargetIsa for the current host");
         let isa = builder.finish(flags);
         Self::new(isa)
@@ -58,7 +58,7 @@ impl SingleFunctionCompiler {
     /// ISA.
     pub fn with_default_host_isa() -> Self {
         let flags = settings::Flags::new(settings::builder());
-        Self::with_host_isa(flags, BackendVariant::Any)
+        Self::with_host_isa(flags)
     }
 
     /// Compile the passed [Function] to a `CompiledFunction`. This function will:
diff --git a/cranelift/filetests/src/test_run.rs b/cranelift/filetests/src/test_run.rs
index 4b9e528cfd..86b346e21b 100644
--- a/cranelift/filetests/src/test_run.rs
+++ b/cranelift/filetests/src/test_run.rs
@@ -48,11 +48,10 @@ impl SubTest for TestRun {
             );
             return Ok(());
         }
-        let variant = context.isa.unwrap().variant();
 
         let test_env = RuntestEnvironment::parse(&context.details.comments[..])?;
 
-        let mut compiler = SingleFunctionCompiler::with_host_isa(context.flags.clone(), variant);
+        let mut compiler = SingleFunctionCompiler::with_host_isa(context.flags.clone());
         for comment in context.details.comments.iter() {
             if let Some(command) = parse_run_command(comment.text, &func.signature)? {
                 trace!("Parsed run command: {}", command);
diff --git a/cranelift/interpreter/src/step.rs b/cranelift/interpreter/src/step.rs
index a82be29ace..93c6bf0a3f 100644
--- a/cranelift/interpreter/src/step.rs
+++ b/cranelift/interpreter/src/step.rs
@@ -1033,44 +1033,6 @@ where
         }
         Opcode::IaddPairwise => assign(binary_pairwise(arg(0)?, arg(1)?, ctrl_ty, Value::add)?),
 
-        // TODO: these instructions should be removed once the new backend makes these obsolete
-        // (see https://github.com/bytecodealliance/wasmtime/issues/1936); additionally, the
-        // "all-arch" feature for cranelift-codegen would become unnecessary for this crate.
-        Opcode::X86Udivmodx
-        | Opcode::X86Sdivmodx
-        | Opcode::X86Umulx
-        | Opcode::X86Smulx
-        | Opcode::X86Cvtt2si
-        | Opcode::X86Vcvtudq2ps
-        | Opcode::X86Fmin
-        | Opcode::X86Fmax
-        | Opcode::X86Push
-        | Opcode::X86Pop
-        | Opcode::X86Bsr
-        | Opcode::X86Bsf
-        | Opcode::X86Pshufd
-        | Opcode::X86Pshufb
-        | Opcode::X86Pblendw
-        | Opcode::X86Pextr
-        | Opcode::X86Pinsr
-        | Opcode::X86Insertps
-        | Opcode::X86Punpckh
-        | Opcode::X86Punpckl
-        | Opcode::X86Movsd
-        | Opcode::X86Movlhps
-        | Opcode::X86Psll
-        | Opcode::X86Psrl
-        | Opcode::X86Psra
-        | Opcode::X86Pmullq
-        | Opcode::X86Pmuludq
-        | Opcode::X86Ptest
-        | Opcode::X86Pmaxs
-        | Opcode::X86Pmaxu
-        | Opcode::X86Pmins
-        | Opcode::X86Pminu
-        | Opcode::X86Palignr
-        | Opcode::X86ElfTlsGetAddr
-        | Opcode::X86MachoTlsGetAddr => unimplemented!("x86 instruction: {}", inst.opcode()),
         Opcode::JumpTableBase | Opcode::JumpTableEntry | Opcode::IndirectJumpTableBr => {
             unimplemented!("Legacy instruction: {}", inst.opcode())
         }
diff --git a/cranelift/native/src/lib.rs b/cranelift/native/src/lib.rs
index b2364c6ad9..c2a5aa78b8 100644
--- a/cranelift/native/src/lib.rs
+++ b/cranelift/native/src/lib.rs
@@ -30,7 +30,7 @@ use target_lexicon::Triple;
 /// machine, or `Err(())` if the host machine is not supported
 /// in the current configuration.
 pub fn builder() -> Result<isa::Builder, &'static str> {
-    builder_with_options(isa::BackendVariant::Any, true)
+    builder_with_options(true)
 }
 
 /// Return an `isa` builder configured for the current host
@@ -40,17 +40,11 @@ pub fn builder() -> Result<isa::Builder, &'static str> {
 /// Selects the given backend variant specifically; this is
 /// useful when more than oen backend exists for a given target
 /// (e.g., on x86-64).
-pub fn builder_with_options(
-    variant: isa::BackendVariant,
-    infer_native_flags: bool,
-) -> Result<isa::Builder, &'static str> {
-    let mut isa_builder =
-        isa::lookup_variant(Triple::host(), variant).map_err(|err| match err {
-            isa::LookupError::SupportDisabled => {
-                "support for architecture disabled at compile time"
-            }
-            isa::LookupError::Unsupported => "unsupported architecture",
-        })?;
+pub fn builder_with_options(infer_native_flags: bool) -> Result<isa::Builder, &'static str> {
+    let mut isa_builder = isa::lookup_variant(Triple::host()).map_err(|err| match err {
+        isa::LookupError::SupportDisabled => "support for architecture disabled at compile time",
+        isa::LookupError::Unsupported => "unsupported architecture",
+    })?;
 
     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
     {
diff --git a/cranelift/reader/src/parser.rs b/cranelift/reader/src/parser.rs
index a8f20230fc..98c46ab2e3 100644
--- a/cranelift/reader/src/parser.rs
+++ b/cranelift/reader/src/parser.rs
@@ -22,7 +22,7 @@ use cranelift_codegen::ir::{
     HeapStyle, JumpTable, JumpTableData, MemFlags, Opcode, SigRef, Signature, StackSlot,
     StackSlotData, StackSlotKind, Table, TableData, Type, Value, ValueLoc,
 };
-use cranelift_codegen::isa::{self, BackendVariant, CallConv, Encoding, RegUnit, TargetIsa};
+use cranelift_codegen::isa::{self, CallConv, Encoding, RegUnit, TargetIsa};
 use cranelift_codegen::packed_option::ReservedValue;
 use cranelift_codegen::{settings, settings::Configurable, timing};
 use smallvec::SmallVec;
@@ -1159,19 +1159,7 @@ impl<'a> Parser<'a> {
                         Ok(triple) => triple,
                         Err(err) => return err!(loc, err),
                     };
-                    // Look for `machinst` or `legacy` option before instantiating IsaBuilder.
-                    let variant = match words.peek() {
-                        Some(&"machinst") => {
-                            words.next();
-                            BackendVariant::MachInst
-                        }
-                        Some(&"legacy") => {
-                            words.next();
-                            BackendVariant::Legacy
-                        }
-                        _ => BackendVariant::Any,
-                    };
-                    let mut isa_builder = match isa::lookup_variant(triple, variant) {
+                    let mut isa_builder = match isa::lookup_variant(triple) {
                         Err(isa::LookupError::SupportDisabled) => {
                             continue;
                         }
diff --git a/cranelift/tests/bugpoint_test.clif b/cranelift/tests/bugpoint_test.clif
index b2e9acc37e..ced5b9e809 100644
--- a/cranelift/tests/bugpoint_test.clif
+++ b/cranelift/tests/bugpoint_test.clif
@@ -300,7 +300,8 @@ block0(v0: i64, v1: i64, v2: i64):
     v241 -> v1
     v256 -> v1
     v262 -> v1
-    v3, v4 = x86_sdivmodx v0, v1, v2
+    v3 = imul v0, v1
+    v4 = imul v1, v2
     store aligned v4, v3
     v5 = load.i64 aligned v2+8
     store aligned v5, v3+8
diff --git a/crates/bench-api/Cargo.toml b/crates/bench-api/Cargo.toml
index a67b48c501..2edf20800c 100644
--- a/crates/bench-api/Cargo.toml
+++ b/crates/bench-api/Cargo.toml
@@ -31,4 +31,3 @@ wat = "1.0"
 default = ["shuffling-allocator"]
 wasi-crypto = ["wasmtime-wasi-crypto"]
 wasi-nn = ["wasmtime-wasi-nn"]
-old-x86-backend = ["wasmtime/old-x86-backend"]
diff --git a/crates/cranelift/Cargo.toml b/crates/cranelift/Cargo.toml
index 49a9069dc1..331f660f79 100644
--- a/crates/cranelift/Cargo.toml
+++ b/crates/cranelift/Cargo.toml
@@ -27,4 +27,3 @@ thiserror = "1.0.4"
 
 [features]
 all-arch = ["cranelift-codegen/all-arch"]
-old-x86-backend = ["cranelift-codegen/old-x86-backend"]
diff --git a/crates/wasmtime/Cargo.toml b/crates/wasmtime/Cargo.toml
index badd61a737..b38205bfeb 100644
--- a/crates/wasmtime/Cargo.toml
+++ b/crates/wasmtime/Cargo.toml
@@ -71,9 +71,6 @@ parallel-compilation = ["rayon"]
 # Enables support for automatic cache configuration to be enabled in `Config`.
 cache = ["wasmtime-cache"]
 
-# Use Cranelift's old x86 backend.
-old-x86-backend = ["wasmtime-cranelift/old-x86-backend"]
-
 # Enables support for "async stores" as well as defining host functions as
 # `async fn` and calling functions asynchronously.
 async = ["wasmtime-fiber", "wasmtime-runtime/async"]
diff --git a/crates/wasmtime/src/func.rs b/crates/wasmtime/src/func.rs
index 5615d438c2..dd8877e8f9 100644
--- a/crates/wasmtime/src/func.rs
+++ b/crates/wasmtime/src/func.rs
@@ -1129,9 +1129,7 @@ impl Func {
     /// and similarly if a function has multiple results you can bind that too
     ///
     /// ```
-    /// # #[cfg(not(feature = "old-x86-backend"))]
     /// # use wasmtime::*;
-    /// # #[cfg(not(feature = "old-x86-backend"))]
     /// # fn foo(add_with_overflow: &Func, mut store: Store<()>) -> anyhow::Result<()> {
     /// let typed = add_with_overflow.typed::<(u32, u32), (u32, i32), _>(&store)?;
     /// let (result, overflow) = typed.call(&mut store, (u32::max_value(), 2))?;
@@ -1564,12 +1562,10 @@ macro_rules! impl_host_abi {
         #[doc(hidden)]
         #[allow(non_snake_case)]
         #[repr(C)]
-        #[cfg(not(feature = "old-x86-backend"))]
         pub struct [<TupleRet $n>]<$($u,)*> {
             $($u: $u,)*
         }
 
-        #[cfg(not(feature = "old-x86-backend"))]
         #[allow(non_snake_case, unused_assignments)]
         impl<$t: Copy, $($u: Copy,)*> HostAbi for ($t, $($u,)*) {
             type Abi = $t;
diff --git a/crates/wasmtime/src/module/registry.rs b/crates/wasmtime/src/module/registry.rs
index 89f851c488..2c5e05df89 100644
--- a/crates/wasmtime/src/module/registry.rs
+++ b/crates/wasmtime/src/module/registry.rs
@@ -134,54 +134,7 @@ impl ModuleInfo for RegisteredModule {
             // Because we know we are in Wasm code, and we must be at some kind
             // of call/safepoint, then the Cranelift backend must have avoided
             // emitting a stack map for this location because no refs were live.
-            #[cfg(not(feature = "old-x86-backend"))]
             Err(_) => return None,
-
-            // ### Old x86_64 backend specific code.
-            //
-            // Because GC safepoints are technically only associated with a
-            // single PC, we should ideally only care about `Ok(index)` values
-            // returned from the binary search. However, safepoints are inserted
-            // right before calls, and there are two things that can disturb the
-            // PC/offset associated with the safepoint versus the PC we actually
-            // use to query for the stack map:
-            //
-            // 1. The `backtrace` crate gives us the PC in a frame that will be
-            //    *returned to*, and where execution will continue from, rather than
-            //    the PC of the call we are currently at. So we would need to
-            //    disassemble one instruction backwards to query the actual PC for
-            //    the stack map.
-            //
-            //    TODO: One thing we *could* do to make this a little less error
-            //    prone, would be to assert/check that the nearest GC safepoint
-            //    found is within `max_encoded_size(any kind of call instruction)`
-            //    our queried PC for the target architecture.
-            //
-            // 2. Cranelift's stack maps only handle the stack, not
-            //    registers. However, some references that are arguments to a call
-            //    may need to be in registers. In these cases, what Cranelift will
-            //    do is:
-            //
-            //      a. spill all the live references,
-            //      b. insert a GC safepoint for those references,
-            //      c. reload the references into registers, and finally
-            //      d. make the call.
-            //
-            //    Step (c) adds drift between the GC safepoint and the location of
-            //    the call, which is where we actually walk the stack frame and
-            //    collect its live references.
-            //
-            //    Luckily, the spill stack slots for the live references are still
-            //    up to date, so we can still find all the on-stack roots.
-            //    Furthermore, we do not have a moving GC, so we don't need to worry
-            //    whether the following code will reuse the references in registers
-            //    (which would not have been updated to point to the moved objects)
-            //    or reload from the stack slots (which would have been updated to
-            //    point to the moved objects).
-            #[cfg(feature = "old-x86-backend")]
-            Err(0) => return None,
-            #[cfg(feature = "old-x86-backend")]
-            Err(i) => i - 1,
         };
 
         Some(&info.stack_maps[index].stack_map)
diff --git a/examples/multi.rs b/examples/multi.rs
index df36671ceb..b243d83cd7 100644
--- a/examples/multi.rs
+++ b/examples/multi.rs
@@ -9,7 +9,6 @@
 
 use anyhow::Result;
 
-#[cfg(not(feature = "old-x86-backend"))]
 fn main() -> Result<()> {
     use wasmtime::*;
 
@@ -63,11 +62,3 @@ fn main() -> Result<()> {
 
     Ok(())
 }
-
-// Note that this example is not supported in the off-by-default feature of the
-// old x86 compiler backend for Cranelift. Wasmtime's default configuration
-// supports this example, however.
-#[cfg(feature = "old-x86-backend")]
-fn main() -> Result<()> {
-    Ok(())
-}
diff --git a/tests/all/debug/lldb.rs b/tests/all/debug/lldb.rs
index 3e72eaf49f..dada5deee8 100644
--- a/tests/all/debug/lldb.rs
+++ b/tests/all/debug/lldb.rs
@@ -133,44 +133,6 @@ check: exited with status
     Ok(())
 }
 
-#[test]
-#[ignore]
-#[cfg(all(
-    any(target_os = "linux", target_os = "macos"),
-    target_pointer_width = "64",
-    // Ignore test on new backend. The value this is looking for is
-    // not available at the point that the breakpoint is set when
-    // compiled by the new backend.
-    feature = "old-x86-backend",
-))]
-pub fn test_debug_dwarf_ptr() -> Result<()> {
-    let output = lldb_with_script(
-        &[
-            "-g",
-            "--opt-level",
-            "0",
-            "tests/all/debug/testsuite/reverse-str.wasm",
-        ],
-        r#"b reverse-str.c:9
-r
-p __vmctx->set(),&*s
-c"#,
-    )?;
-
-    check_lldb_output(
-        &output,
-        r#"
-check: Breakpoint 1: no locations (pending)
-check: stop reason = breakpoint 1.1
-check: frame #0
-sameln: reverse(s=(__ptr =
-check: "Hello, world."
-check: resuming
-"#,
-    )?;
-    Ok(())
-}
-
 #[test]
 #[ignore]
 #[cfg(all(
diff --git a/tests/all/debug/translate.rs b/tests/all/debug/translate.rs
index 2560a71b03..aa1b79343b 100644
--- a/tests/all/debug/translate.rs
+++ b/tests/all/debug/translate.rs
@@ -109,26 +109,3 @@ check:        DW_AT_decl_line	(10)
     )
 }
 
-#[test]
-#[ignore]
-#[cfg(all(
-    any(target_os = "linux", target_os = "macos"),
-    target_arch = "x86_64",
-    target_pointer_width = "64",
-    // Ignore test on new backend. This is a specific test with hardcoded
-    // offsets and the new backend compiles the return basic-block at a different
-    // offset, causing mismatches.
-    feature = "old-x86-backend",
-))]
-fn test_debug_dwarf5_translate_lines() -> Result<()> {
-    check_line_program(
-        "tests/all/debug/testsuite/fib-wasm-dwarf5.wasm",
-        r##"
-check:   Address            Line   Column File   ISA Discriminator Flags
-check: 0x000000000000013c     15      3      1   0             0
-# The important point is that the following offset must be _after_ the `ret` instruction.
-# FIXME: this +1 increment might vary on other archs.
-nextln: 0x000000000000013d     15      3      1   0             0  end_sequence
-    "##,
-    )
-}
diff --git a/tests/all/func.rs b/tests/all/func.rs
index da4c630fa4..eef7a9efd6 100644
--- a/tests/all/func.rs
+++ b/tests/all/func.rs
@@ -525,7 +525,6 @@ fn pass_cross_store_arg() -> anyhow::Result<()> {
 }
 
 #[test]
-#[cfg_attr(feature = "old-x86-backend", ignore)]
 fn externref_signature_no_reference_types() -> anyhow::Result<()> {
     let mut config = Config::new();
     config.wasm_reference_types(false);
@@ -569,7 +568,6 @@ fn trampolines_always_valid() -> anyhow::Result<()> {
 }
 
 #[test]
-#[cfg(not(feature = "old-x86-backend"))]
 fn typed_multiple_results() -> anyhow::Result<()> {
     let mut store = Store::<()>::default();
     let module = Module::new(
@@ -647,7 +645,6 @@ fn trap_doesnt_leak() -> anyhow::Result<()> {
 }
 
 #[test]
-#[cfg(not(feature = "old-x86-backend"))]
 fn wrap_multiple_results() -> anyhow::Result<()> {
     fn test<T>(store: &mut Store<()>, t: T) -> anyhow::Result<()>
     where
diff --git a/tests/all/gc.rs b/tests/all/gc.rs
index 73ffe03ece..27c9341ea0 100644
--- a/tests/all/gc.rs
+++ b/tests/all/gc.rs
@@ -189,7 +189,6 @@ fn many_live_refs() -> anyhow::Result<()> {
 }
 
 #[test]
-#[cfg(not(feature = "old-x86-backend"))] // uses atomic instrs not implemented here
 fn drop_externref_via_table_set() -> anyhow::Result<()> {
     let (mut store, module) = ref_types_module(
         r#"
@@ -285,7 +284,6 @@ fn global_drops_externref() -> anyhow::Result<()> {
 }
 
 #[test]
-#[cfg(not(feature = "old-x86-backend"))] // uses atomic instrs not implemented here
 fn table_drops_externref() -> anyhow::Result<()> {
     test_engine(&Engine::default())?;
 
@@ -336,7 +334,6 @@ fn table_drops_externref() -> anyhow::Result<()> {
 }
 
 #[test]
-#[cfg(not(feature = "old-x86-backend"))] // uses atomic instrs not implemented here
 fn gee_i_sure_hope_refcounting_is_atomic() -> anyhow::Result<()> {
     let mut config = Config::new();
     config.wasm_reference_types(true);
@@ -426,7 +423,6 @@ fn global_init_no_leak() -> anyhow::Result<()> {
 }
 
 #[test]
-#[cfg(not(feature = "old-x86-backend"))]
 fn no_gc_middle_of_args() -> anyhow::Result<()> {
     let (mut store, module) = ref_types_module(
         r#"
diff --git a/tests/all/relocs.rs b/tests/all/relocs.rs
index 6dab73cd74..fdd0730dfa 100644
--- a/tests/all/relocs.rs
+++ b/tests/all/relocs.rs
@@ -8,8 +8,6 @@
 //! 32-bits, and right now object files aren't supported larger than 4gb anyway
 //! so we would need a lot of other support necessary to exercise that.
 
-#![cfg(not(feature = "old-x86-backend"))] // multi-value not supported here
-
 use anyhow::Result;
 use wasmtime::*;
 
diff --git a/tests/all/wast.rs b/tests/all/wast.rs
index 675850df36..f700842193 100644
--- a/tests/all/wast.rs
+++ b/tests/all/wast.rs
@@ -25,12 +25,6 @@ fn run_wast(wast: &str, strategy: Strategy, pooling: bool) -> anyhow::Result<()>
     // by reference types.
     let reftypes = simd || feature_found(wast, "reference-types");
 
-    // Threads & simd aren't implemented in the old backend, so skip those
-    // tests.
-    if (threads || simd) && cfg!(feature = "old-x86-backend") {
-        return Ok(());
-    }
-
     let mut cfg = Config::new();
     cfg.wasm_simd(simd)
         .wasm_bulk_memory(bulk_mem)