Allow binding immediates to instructions (#1012)

This change should make the code more clear (and less code) when adding encodings for instructions with specific immediates; e.g., a constant with a 0 immediate could be encoded as an XOR with something like `const.bind(...)` without explicitly creating the necessary predicates. It has several parts: * Introduce Bindable trait to instructions * Convert all instruction bindings to use Bindable::bind() * Add ability to bind immediates to BoundInstruction This is an attempt to reduce some of the issues in #955.
2019-10-10 08:54:46 -07:00
parent f1c25c2c5a
commit 6d690e5275
6 changed files with 477 additions and 341 deletions
--- a/cranelift/codegen/meta/src/isa/riscv/encodings.rs
+++ b/cranelift/codegen/meta/src/isa/riscv/encodings.rs
@@ -1,7 +1,7 @@
 use crate::cdsl::ast::{Apply, Expr, Literal, VarPool};
 use crate::cdsl::encodings::{Encoding, EncodingBuilder};
 use crate::cdsl::instructions::{
-    BoundInstruction, InstSpec, InstructionPredicateNode, InstructionPredicateRegistry,
+    Bindable, BoundInstruction, InstSpec, InstructionPredicateNode, InstructionPredicateRegistry,
 };
 use crate::cdsl::recipes::{EncodingRecipeNumber, Recipes};
 use crate::cdsl::settings::SettingGroup;
@@ -13,27 +13,34 @@ use crate::shared::types::Reference::{R32, R64};
 use crate::shared::Definitions as SharedDefinitions;

 use super::recipes::RecipeGroup;
-
-fn enc(inst: impl Into<InstSpec>, recipe: EncodingRecipeNumber, bits: u16) -> EncodingBuilder {
-    EncodingBuilder::new(inst.into(), recipe, bits)
-}
+use crate::cdsl::formats::FormatRegistry;

 pub(crate) struct PerCpuModeEncodings<'defs> {
    pub inst_pred_reg: InstructionPredicateRegistry,
    pub enc32: Vec<Encoding>,
    pub enc64: Vec<Encoding>,
    recipes: &'defs Recipes,
+    formats: &'defs FormatRegistry,
 }

 impl<'defs> PerCpuModeEncodings<'defs> {
-    fn new(recipes: &'defs Recipes) -> Self {
+    fn new(recipes: &'defs Recipes, formats: &'defs FormatRegistry) -> Self {
        Self {
            inst_pred_reg: InstructionPredicateRegistry::new(),
            enc32: Vec::new(),
            enc64: Vec::new(),
            recipes,
+            formats,
        }
    }
+    fn enc(
+        &self,
+        inst: impl Into<InstSpec>,
+        recipe: EncodingRecipeNumber,
+        bits: u16,
+    ) -> EncodingBuilder {
+        EncodingBuilder::new(inst.into(), recipe, bits, self.formats)
+    }
    fn add32(&mut self, encoding: EncodingBuilder) {
        self.enc32
            .push(encoding.build(self.recipes, &mut self.inst_pred_reg));
@@ -169,7 +176,7 @@ pub(crate) fn define<'defs>(
    let use_m = isa_settings.predicate_by_name("use_m");

    // Definitions.
-    let mut e = PerCpuModeEncodings::new(&recipes.recipes);
+    let mut e = PerCpuModeEncodings::new(&recipes.recipes, &shared_defs.format_registry);

    // Basic arithmetic binary instructions are encoded in an R-type instruction.
    for &(inst, inst_imm, f3, f7) in &[
@@ -179,26 +186,26 @@ pub(crate) fn define<'defs>(
        (bor, Some(bor_imm), 0b110, 0b0000000),
        (band, Some(band_imm), 0b111, 0b0000000),
    ] {
-        e.add32(enc(inst.bind(I32), r_r, op_bits(f3, f7)));
-        e.add64(enc(inst.bind(I64), r_r, op_bits(f3, f7)));
+        e.add32(e.enc(inst.bind(I32), r_r, op_bits(f3, f7)));
+        e.add64(e.enc(inst.bind(I64), r_r, op_bits(f3, f7)));

        // Immediate versions for add/xor/or/and.
        if let Some(inst_imm) = inst_imm {
-            e.add32(enc(inst_imm.bind(I32), r_ii, opimm_bits(f3, 0)));
-            e.add64(enc(inst_imm.bind(I64), r_ii, opimm_bits(f3, 0)));
+            e.add32(e.enc(inst_imm.bind(I32), r_ii, opimm_bits(f3, 0)));
+            e.add64(e.enc(inst_imm.bind(I64), r_ii, opimm_bits(f3, 0)));
        }
    }

    // 32-bit ops in RV64.
-    e.add64(enc(iadd.bind(I32), r_r, op32_bits(0b000, 0b0000000)));
-    e.add64(enc(isub.bind(I32), r_r, op32_bits(0b000, 0b0100000)));
+    e.add64(e.enc(iadd.bind(I32), r_r, op32_bits(0b000, 0b0000000)));
+    e.add64(e.enc(isub.bind(I32), r_r, op32_bits(0b000, 0b0100000)));
    // There are no andiw/oriw/xoriw variations.
-    e.add64(enc(iadd_imm.bind(I32), r_ii, opimm32_bits(0b000, 0)));
+    e.add64(e.enc(iadd_imm.bind(I32), r_ii, opimm32_bits(0b000, 0)));

    // Use iadd_imm with %x0 to materialize constants.
-    e.add32(enc(iconst.bind(I32), r_iz, opimm_bits(0b0, 0)));
-    e.add64(enc(iconst.bind(I32), r_iz, opimm_bits(0b0, 0)));
-    e.add64(enc(iconst.bind(I64), r_iz, opimm_bits(0b0, 0)));
+    e.add32(e.enc(iconst.bind(I32), r_iz, opimm_bits(0b0, 0)));
+    e.add64(e.enc(iconst.bind(I32), r_iz, opimm_bits(0b0, 0)));
+    e.add64(e.enc(iconst.bind(I64), r_iz, opimm_bits(0b0, 0)));

    // Dynamic shifts have the same masking semantics as the clif base instructions.
    for &(inst, inst_imm, f3, f7) in &[
@@ -206,17 +213,17 @@ pub(crate) fn define<'defs>(
        (ushr, ushr_imm, 0b101, 0b0),
        (sshr, sshr_imm, 0b101, 0b100000),
    ] {
-        e.add32(enc(inst.bind(I32).bind(I32), r_r, op_bits(f3, f7)));
-        e.add64(enc(inst.bind(I64).bind(I64), r_r, op_bits(f3, f7)));
-        e.add64(enc(inst.bind(I32).bind(I32), r_r, op32_bits(f3, f7)));
+        e.add32(e.enc(inst.bind(I32).bind(I32), r_r, op_bits(f3, f7)));
+        e.add64(e.enc(inst.bind(I64).bind(I64), r_r, op_bits(f3, f7)));
+        e.add64(e.enc(inst.bind(I32).bind(I32), r_r, op32_bits(f3, f7)));
        // Allow i32 shift amounts in 64-bit shifts.
-        e.add64(enc(inst.bind(I64).bind(I32), r_r, op_bits(f3, f7)));
-        e.add64(enc(inst.bind(I32).bind(I64), r_r, op32_bits(f3, f7)));
+        e.add64(e.enc(inst.bind(I64).bind(I32), r_r, op_bits(f3, f7)));
+        e.add64(e.enc(inst.bind(I32).bind(I64), r_r, op32_bits(f3, f7)));

        // Immediate shifts.
-        e.add32(enc(inst_imm.bind(I32), r_rshamt, opimm_bits(f3, f7)));
-        e.add64(enc(inst_imm.bind(I64), r_rshamt, opimm_bits(f3, f7)));
-        e.add64(enc(inst_imm.bind(I32), r_rshamt, opimm32_bits(f3, f7)));
+        e.add32(e.enc(inst_imm.bind(I32), r_rshamt, opimm_bits(f3, f7)));
+        e.add64(e.enc(inst_imm.bind(I64), r_rshamt, opimm_bits(f3, f7)));
+        e.add64(e.enc(inst_imm.bind(I32), r_rshamt, opimm32_bits(f3, f7)));
    }

    // Signed and unsigned integer 'less than'. There are no 'w' variants for comparing 32-bit
@@ -242,20 +249,20 @@ pub(crate) fn define<'defs>(
        let icmp_i32 = icmp.bind(I32);
        let icmp_i64 = icmp.bind(I64);
        e.add32(
-            enc(icmp_i32.clone(), r_ricmp, op_bits(0b010, 0b0000000))
+            e.enc(icmp_i32.clone(), r_ricmp, op_bits(0b010, 0b0000000))
                .inst_predicate(icmp_instp(&icmp_i32, "slt")),
        );
        e.add64(
-            enc(icmp_i64.clone(), r_ricmp, op_bits(0b010, 0b0000000))
+            e.enc(icmp_i64.clone(), r_ricmp, op_bits(0b010, 0b0000000))
                .inst_predicate(icmp_instp(&icmp_i64, "slt")),
        );

        e.add32(
-            enc(icmp_i32.clone(), r_ricmp, op_bits(0b011, 0b0000000))
+            e.enc(icmp_i32.clone(), r_ricmp, op_bits(0b011, 0b0000000))
                .inst_predicate(icmp_instp(&icmp_i32, "ult")),
        );
        e.add64(
-            enc(icmp_i64.clone(), r_ricmp, op_bits(0b011, 0b0000000))
+            e.enc(icmp_i64.clone(), r_ricmp, op_bits(0b011, 0b0000000))
                .inst_predicate(icmp_instp(&icmp_i64, "ult")),
        );

@@ -263,42 +270,51 @@ pub(crate) fn define<'defs>(
        let icmp_i32 = icmp_imm.bind(I32);
        let icmp_i64 = icmp_imm.bind(I64);
        e.add32(
-            enc(icmp_i32.clone(), r_iicmp, opimm_bits(0b010, 0))
+            e.enc(icmp_i32.clone(), r_iicmp, opimm_bits(0b010, 0))
                .inst_predicate(icmp_instp(&icmp_i32, "slt")),
        );
        e.add64(
-            enc(icmp_i64.clone(), r_iicmp, opimm_bits(0b010, 0))
+            e.enc(icmp_i64.clone(), r_iicmp, opimm_bits(0b010, 0))
                .inst_predicate(icmp_instp(&icmp_i64, "slt")),
        );

        e.add32(
-            enc(icmp_i32.clone(), r_iicmp, opimm_bits(0b011, 0))
+            e.enc(icmp_i32.clone(), r_iicmp, opimm_bits(0b011, 0))
                .inst_predicate(icmp_instp(&icmp_i32, "ult")),
        );
        e.add64(
-            enc(icmp_i64.clone(), r_iicmp, opimm_bits(0b011, 0))
+            e.enc(icmp_i64.clone(), r_iicmp, opimm_bits(0b011, 0))
                .inst_predicate(icmp_instp(&icmp_i64, "ult")),
        );
    }

    // Integer constants with the low 12 bits clear are materialized by lui.
-    e.add32(enc(iconst.bind(I32), r_u, lui_bits()));
-    e.add64(enc(iconst.bind(I32), r_u, lui_bits()));
-    e.add64(enc(iconst.bind(I64), r_u, lui_bits()));
+    e.add32(e.enc(iconst.bind(I32), r_u, lui_bits()));
+    e.add64(e.enc(iconst.bind(I32), r_u, lui_bits()));
+    e.add64(e.enc(iconst.bind(I64), r_u, lui_bits()));

    // "M" Standard Extension for Integer Multiplication and Division.
    // Gated by the `use_m` flag.
-    e.add32(enc(imul.bind(I32), r_r, op_bits(0b000, 0b00000001)).isa_predicate(use_m));
-    e.add64(enc(imul.bind(I64), r_r, op_bits(0b000, 0b00000001)).isa_predicate(use_m));
-    e.add64(enc(imul.bind(I32), r_r, op32_bits(0b000, 0b00000001)).isa_predicate(use_m));
+    e.add32(
+        e.enc(imul.bind(I32), r_r, op_bits(0b000, 0b00000001))
+            .isa_predicate(use_m),
+    );
+    e.add64(
+        e.enc(imul.bind(I64), r_r, op_bits(0b000, 0b00000001))
+            .isa_predicate(use_m),
+    );
+    e.add64(
+        e.enc(imul.bind(I32), r_r, op32_bits(0b000, 0b00000001))
+            .isa_predicate(use_m),
+    );

    // Control flow.

    // Unconditional branches.
-    e.add32(enc(jump, r_uj, jal_bits()));
-    e.add64(enc(jump, r_uj, jal_bits()));
-    e.add32(enc(call, r_uj_call, jal_bits()));
-    e.add64(enc(call, r_uj_call, jal_bits()));
+    e.add32(e.enc(jump, r_uj, jal_bits()));
+    e.add64(e.enc(jump, r_uj, jal_bits()));
+    e.add32(e.enc(call, r_uj_call, jal_bits()));
+    e.add64(e.enc(call, r_uj_call, jal_bits()));

    // Conditional branches.
    {
@@ -338,101 +354,81 @@ pub(crate) fn define<'defs>(
            ("uge", 0b111),
        ] {
            e.add32(
-                enc(br_icmp_i32.clone(), r_sb, branch_bits(f3))
+                e.enc(br_icmp_i32.clone(), r_sb, branch_bits(f3))
                    .inst_predicate(br_icmp_instp(&br_icmp_i32, cond)),
            );
            e.add64(
-                enc(br_icmp_i64.clone(), r_sb, branch_bits(f3))
+                e.enc(br_icmp_i64.clone(), r_sb, branch_bits(f3))
                    .inst_predicate(br_icmp_instp(&br_icmp_i64, cond)),
            );
        }
    }

    for &(inst, f3) in &[(brz, 0b000), (brnz, 0b001)] {
-        e.add32(enc(inst.bind(I32), r_sb_zero, branch_bits(f3)));
-        e.add64(enc(inst.bind(I64), r_sb_zero, branch_bits(f3)));
-        e.add32(enc(inst.bind(B1), r_sb_zero, branch_bits(f3)));
-        e.add64(enc(inst.bind(B1), r_sb_zero, branch_bits(f3)));
+        e.add32(e.enc(inst.bind(I32), r_sb_zero, branch_bits(f3)));
+        e.add64(e.enc(inst.bind(I64), r_sb_zero, branch_bits(f3)));
+        e.add32(e.enc(inst.bind(B1), r_sb_zero, branch_bits(f3)));
+        e.add64(e.enc(inst.bind(B1), r_sb_zero, branch_bits(f3)));
    }

    // Returns are a special case of jalr_bits using %x1 to hold the return address.
    // The return address is provided by a special-purpose `link` return value that
    // is added by legalize_signature().
-    e.add32(enc(return_, r_iret, jalr_bits()));
-    e.add64(enc(return_, r_iret, jalr_bits()));
-    e.add32(enc(call_indirect.bind(I32), r_icall, jalr_bits()));
-    e.add64(enc(call_indirect.bind(I64), r_icall, jalr_bits()));
+    e.add32(e.enc(return_, r_iret, jalr_bits()));
+    e.add64(e.enc(return_, r_iret, jalr_bits()));
+    e.add32(e.enc(call_indirect.bind(I32), r_icall, jalr_bits()));
+    e.add64(e.enc(call_indirect.bind(I64), r_icall, jalr_bits()));

    // Spill and fill.
-    e.add32(enc(spill.bind(I32), r_gp_sp, store_bits(0b010)));
-    e.add64(enc(spill.bind(I32), r_gp_sp, store_bits(0b010)));
-    e.add64(enc(spill.bind(I64), r_gp_sp, store_bits(0b011)));
-    e.add32(enc(fill.bind(I32), r_gp_fi, load_bits(0b010)));
-    e.add64(enc(fill.bind(I32), r_gp_fi, load_bits(0b010)));
-    e.add64(enc(fill.bind(I64), r_gp_fi, load_bits(0b011)));
+    e.add32(e.enc(spill.bind(I32), r_gp_sp, store_bits(0b010)));
+    e.add64(e.enc(spill.bind(I32), r_gp_sp, store_bits(0b010)));
+    e.add64(e.enc(spill.bind(I64), r_gp_sp, store_bits(0b011)));
+    e.add32(e.enc(fill.bind(I32), r_gp_fi, load_bits(0b010)));
+    e.add64(e.enc(fill.bind(I32), r_gp_fi, load_bits(0b010)));
+    e.add64(e.enc(fill.bind(I64), r_gp_fi, load_bits(0b011)));

    // No-op fills, created by late-stage redundant-fill removal.
    for &ty in &[I64, I32] {
-        e.add64(enc(fill_nop.bind(ty), r_fillnull, 0));
-        e.add32(enc(fill_nop.bind(ty), r_fillnull, 0));
+        e.add64(e.enc(fill_nop.bind(ty), r_fillnull, 0));
+        e.add32(e.enc(fill_nop.bind(ty), r_fillnull, 0));
    }
-    e.add64(enc(fill_nop.bind(B1), r_fillnull, 0));
-    e.add32(enc(fill_nop.bind(B1), r_fillnull, 0));
+    e.add64(e.enc(fill_nop.bind(B1), r_fillnull, 0));
+    e.add32(e.enc(fill_nop.bind(B1), r_fillnull, 0));

    // Register copies.
-    e.add32(enc(copy.bind(I32), r_icopy, opimm_bits(0b000, 0)));
-    e.add64(enc(copy.bind(I64), r_icopy, opimm_bits(0b000, 0)));
-    e.add64(enc(copy.bind(I32), r_icopy, opimm32_bits(0b000, 0)));
+    e.add32(e.enc(copy.bind(I32), r_icopy, opimm_bits(0b000, 0)));
+    e.add64(e.enc(copy.bind(I64), r_icopy, opimm_bits(0b000, 0)));
+    e.add64(e.enc(copy.bind(I32), r_icopy, opimm32_bits(0b000, 0)));

-    e.add32(enc(regmove.bind(I32), r_irmov, opimm_bits(0b000, 0)));
-    e.add64(enc(regmove.bind(I64), r_irmov, opimm_bits(0b000, 0)));
-    e.add64(enc(regmove.bind(I32), r_irmov, opimm32_bits(0b000, 0)));
+    e.add32(e.enc(regmove.bind(I32), r_irmov, opimm_bits(0b000, 0)));
+    e.add64(e.enc(regmove.bind(I64), r_irmov, opimm_bits(0b000, 0)));
+    e.add64(e.enc(regmove.bind(I32), r_irmov, opimm32_bits(0b000, 0)));

-    e.add32(enc(copy.bind(B1), r_icopy, opimm_bits(0b000, 0)));
-    e.add64(enc(copy.bind(B1), r_icopy, opimm_bits(0b000, 0)));
-    e.add32(enc(regmove.bind(B1), r_irmov, opimm_bits(0b000, 0)));
-    e.add64(enc(regmove.bind(B1), r_irmov, opimm_bits(0b000, 0)));
+    e.add32(e.enc(copy.bind(B1), r_icopy, opimm_bits(0b000, 0)));
+    e.add64(e.enc(copy.bind(B1), r_icopy, opimm_bits(0b000, 0)));
+    e.add32(e.enc(regmove.bind(B1), r_irmov, opimm_bits(0b000, 0)));
+    e.add64(e.enc(regmove.bind(B1), r_irmov, opimm_bits(0b000, 0)));

    // Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn
    // into a no-op.
    // The same encoding is generated for both the 64- and 32-bit architectures.
    for &ty in &[I64, I32, I16, I8] {
-        e.add32(enc(copy_nop.bind(ty), r_stacknull, 0));
-        e.add64(enc(copy_nop.bind(ty), r_stacknull, 0));
+        e.add32(e.enc(copy_nop.bind(ty), r_stacknull, 0));
+        e.add64(e.enc(copy_nop.bind(ty), r_stacknull, 0));
    }
    for &ty in &[F64, F32] {
-        e.add32(enc(copy_nop.bind(ty), r_stacknull, 0));
-        e.add64(enc(copy_nop.bind(ty), r_stacknull, 0));
+        e.add32(e.enc(copy_nop.bind(ty), r_stacknull, 0));
+        e.add64(e.enc(copy_nop.bind(ty), r_stacknull, 0));
    }

    // Copy-to-SSA
-    e.add32(enc(
-        copy_to_ssa.bind(I32),
-        r_copytossa,
-        opimm_bits(0b000, 0),
-    ));
-    e.add64(enc(
-        copy_to_ssa.bind(I64),
-        r_copytossa,
-        opimm_bits(0b000, 0),
-    ));
-    e.add64(enc(
-        copy_to_ssa.bind(I32),
-        r_copytossa,
-        opimm32_bits(0b000, 0),
-    ));
-    e.add32(enc(copy_to_ssa.bind(B1), r_copytossa, opimm_bits(0b000, 0)));
-    e.add64(enc(copy_to_ssa.bind(B1), r_copytossa, opimm_bits(0b000, 0)));
-    e.add32(enc(
-        copy_to_ssa.bind_ref(R32),
-        r_copytossa,
-        opimm_bits(0b000, 0),
-    ));
-    e.add64(enc(
-        copy_to_ssa.bind_ref(R64),
-        r_copytossa,
-        opimm_bits(0b000, 0),
-    ));
+    e.add32(e.enc(copy_to_ssa.bind(I32), r_copytossa, opimm_bits(0b000, 0)));
+    e.add64(e.enc(copy_to_ssa.bind(I64), r_copytossa, opimm_bits(0b000, 0)));
+    e.add64(e.enc(copy_to_ssa.bind(I32), r_copytossa, opimm32_bits(0b000, 0)));
+    e.add32(e.enc(copy_to_ssa.bind(B1), r_copytossa, opimm_bits(0b000, 0)));
+    e.add64(e.enc(copy_to_ssa.bind(B1), r_copytossa, opimm_bits(0b000, 0)));
+    e.add32(e.enc(copy_to_ssa.bind(R32), r_copytossa, opimm_bits(0b000, 0)));
+    e.add64(e.enc(copy_to_ssa.bind(R64), r_copytossa, opimm_bits(0b000, 0)));

    e
 }
--- a/cranelift/codegen/meta/src/isa/x86/encodings.rs
+++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs
@@ -5,8 +5,8 @@ use std::collections::HashMap;

 use crate::cdsl::encodings::{Encoding, EncodingBuilder};
 use crate::cdsl::instructions::{
-    InstSpec, Instruction, InstructionGroup, InstructionPredicate, InstructionPredicateNode,
-    InstructionPredicateRegistry,
+    vector, Bindable, InstSpec, Instruction, InstructionGroup, InstructionPredicate,
+    InstructionPredicateNode, InstructionPredicateRegistry,
 };
 use crate::cdsl::recipes::{EncodingRecipe, EncodingRecipeNumber, Recipes};
 use crate::cdsl::settings::{SettingGroup, SettingPredicateNumber};
@@ -20,23 +20,27 @@ use crate::shared::Definitions as SharedDefinitions;
 use crate::isa::x86::opcodes::*;

 use super::recipes::{RecipeGroup, Template};
+use crate::cdsl::formats::FormatRegistry;
+use crate::cdsl::instructions::BindParameter::Any;

-pub(crate) struct PerCpuModeEncodings {
+pub(crate) struct PerCpuModeEncodings<'defs> {
    pub enc32: Vec<Encoding>,
    pub enc64: Vec<Encoding>,
    pub recipes: Recipes,
    recipes_by_name: HashMap<String, EncodingRecipeNumber>,
    pub inst_pred_reg: InstructionPredicateRegistry,
+    formats: &'defs FormatRegistry,
 }

-impl PerCpuModeEncodings {
-    fn new() -> Self {
+impl<'defs> PerCpuModeEncodings<'defs> {
+    fn new(formats: &'defs FormatRegistry) -> Self {
        Self {
            enc32: Vec::new(),
            enc64: Vec::new(),
            recipes: Recipes::new(),
            recipes_by_name: HashMap::new(),
            inst_pred_reg: InstructionPredicateRegistry::new(),
+            formats,
        }
    }

@@ -69,7 +73,7 @@ impl PerCpuModeEncodings {
    {
        let (recipe, bits) = template.build();
        let recipe_number = self.add_recipe(recipe);
-        let builder = EncodingBuilder::new(inst.into(), recipe_number, bits);
+        let builder = EncodingBuilder::new(inst.into(), recipe_number, bits, self.formats);
        builder_closure(builder).build(&self.recipes, &mut self.inst_pred_reg)
    }

@@ -101,7 +105,7 @@ impl PerCpuModeEncodings {
    }
    fn enc32_rec(&mut self, inst: impl Into<InstSpec>, recipe: &EncodingRecipe, bits: u16) {
        let recipe_number = self.add_recipe(recipe.clone());
-        let builder = EncodingBuilder::new(inst.into(), recipe_number, bits);
+        let builder = EncodingBuilder::new(inst.into(), recipe_number, bits, self.formats);
        let encoding = builder.build(&self.recipes, &mut self.inst_pred_reg);
        self.enc32.push(encoding);
    }
@@ -134,7 +138,7 @@ impl PerCpuModeEncodings {
    }
    fn enc64_rec(&mut self, inst: impl Into<InstSpec>, recipe: &EncodingRecipe, bits: u16) {
        let recipe_number = self.add_recipe(recipe.clone());
-        let builder = EncodingBuilder::new(inst.into(), recipe_number, bits);
+        let builder = EncodingBuilder::new(inst.into(), recipe_number, bits, self.formats);
        let encoding = builder.build(&self.recipes, &mut self.inst_pred_reg);
        self.enc64.push(encoding);
    }
@@ -207,8 +211,8 @@ impl PerCpuModeEncodings {
    /// Add encodings for `inst.r64` to X86_64 with a REX.W prefix.
    fn enc_r32_r64_rex_only(&mut self, inst: impl Into<InstSpec>, template: Template) {
        let inst: InstSpec = inst.into();
-        self.enc32(inst.bind_ref(R32), template.nonrex());
-        self.enc64(inst.bind_ref(R64), template.rex().w());
+        self.enc32(inst.bind(R32), template.nonrex());
+        self.enc64(inst.bind(R64), template.rex().w());
    }

    /// Add encodings for `inst` to X86_64 with and without a REX prefix.
@@ -281,18 +285,18 @@ impl PerCpuModeEncodings {
    /// Add encodings for `inst.i64` to X86_64 with a REX prefix, using the `w_bit`
    /// argument to determine whether or not to set the REX.W bit.
    fn enc_i32_i64_ld_st(&mut self, inst: &Instruction, w_bit: bool, template: Template) {
-        self.enc32(inst.clone().bind(I32).bind_any(), template.clone());
+        self.enc32(inst.clone().bind(I32).bind(Any), template.clone());

        // REX-less encoding must come after REX encoding so we don't use it by
        // default. Otherwise reg-alloc would never use r8 and up.
-        self.enc64(inst.clone().bind(I32).bind_any(), template.clone().rex());
-        self.enc64(inst.clone().bind(I32).bind_any(), template.clone());
+        self.enc64(inst.clone().bind(I32).bind(Any), template.clone().rex());
+        self.enc64(inst.clone().bind(I32).bind(Any), template.clone());

        if w_bit {
-            self.enc64(inst.clone().bind(I64).bind_any(), template.rex().w());
+            self.enc64(inst.clone().bind(I64).bind(Any), template.rex().w());
        } else {
-            self.enc64(inst.clone().bind(I64).bind_any(), template.clone().rex());
-            self.enc64(inst.clone().bind(I64).bind_any(), template);
+            self.enc64(inst.clone().bind(I64).bind(Any), template.clone().rex());
+            self.enc64(inst.clone().bind(I64).bind(Any), template);
        }
    }

@@ -366,12 +370,12 @@ impl PerCpuModeEncodings {

 // Definitions.

-pub(crate) fn define(
-    shared_defs: &SharedDefinitions,
+pub(crate) fn define<'defs>(
+    shared_defs: &'defs SharedDefinitions,
    settings: &SettingGroup,
    x86: &InstructionGroup,
    r: &RecipeGroup,
-) -> PerCpuModeEncodings {
+) -> PerCpuModeEncodings<'defs> {
    let shared = &shared_defs.instructions;
    let formats = &shared_defs.format_registry;

@@ -681,7 +685,7 @@ pub(crate) fn define(
    let use_sse41_simd = settings.predicate_by_name("use_sse41_simd");

    // Definitions.
-    let mut e = PerCpuModeEncodings::new();
+    let mut e = PerCpuModeEncodings::new(formats);

    // The pinned reg is fixed to a certain value entirely user-controlled, so it generates nothing!
    e.enc64_rec(get_pinned_reg.bind(I64), rec_get_pinned_reg, 0);
@@ -742,15 +746,11 @@ pub(crate) fn define(
        e.enc64(regmove.bind(ty), rec_rmov.opcodes(&MOV_STORE).rex());
    }
    e.enc64(regmove.bind(I64), rec_rmov.opcodes(&MOV_STORE).rex().w());
-    e.enc64(regmove.bind(B64), rec_rmov.opcodes(&MOV_STORE).rex().w());
    e.enc_both(regmove.bind(B1), rec_rmov.opcodes(&MOV_STORE));
    e.enc_both(regmove.bind(I8), rec_rmov.opcodes(&MOV_STORE));
-    e.enc32(regmove.bind_ref(R32), rec_rmov.opcodes(&MOV_STORE));
-    e.enc64(regmove.bind_ref(R32), rec_rmov.opcodes(&MOV_STORE).rex());
-    e.enc64(
-        regmove.bind_ref(R64),
-        rec_rmov.opcodes(&MOV_STORE).rex().w(),
-    );
+    e.enc32(regmove.bind(R32), rec_rmov.opcodes(&MOV_STORE));
+    e.enc64(regmove.bind(R32), rec_rmov.opcodes(&MOV_STORE).rex());
+    e.enc64(regmove.bind(R64), rec_rmov.opcodes(&MOV_STORE).rex().w());

    e.enc_i32_i64(iadd_imm, rec_r_ib.opcodes(&ADD_IMM8_SIGN_EXTEND).rrr(0));
    e.enc_i32_i64(iadd_imm, rec_r_id.opcodes(&ADD_IMM).rrr(0));
@@ -834,19 +834,19 @@ pub(crate) fn define(
        // Cannot use enc_i32_i64 for this pattern because instructions require
        // to bind any.
        e.enc32(
-            inst.bind(I32).bind_any(),
+            inst.bind(I32).bind(Any),
            rec_rc.opcodes(&ROTATE_CL).rrr(rrr),
        );
        e.enc64(
-            inst.bind(I64).bind_any(),
+            inst.bind(I64).bind(Any),
            rec_rc.opcodes(&ROTATE_CL).rrr(rrr).rex().w(),
        );
        e.enc64(
-            inst.bind(I32).bind_any(),
+            inst.bind(I32).bind(Any),
            rec_rc.opcodes(&ROTATE_CL).rrr(rrr).rex(),
        );
        e.enc64(
-            inst.bind(I32).bind_any(),
+            inst.bind(I32).bind(Any),
            rec_rc.opcodes(&ROTATE_CL).rrr(rrr),
        );
    }
@@ -970,7 +970,7 @@ pub(crate) fn define(

    for recipe in &[rec_st, rec_stDisp8, rec_stDisp32] {
        e.enc_i32_i64_ld_st(store, true, recipe.opcodes(&MOV_STORE));
-        e.enc_x86_64(istore32.bind(I64).bind_any(), recipe.opcodes(&MOV_STORE));
+        e.enc_x86_64(istore32.bind(I64).bind(Any), recipe.opcodes(&MOV_STORE));
        e.enc_i32_i64_ld_st(istore16, false, recipe.opcodes(&MOV_STORE_16));
    }

@@ -979,14 +979,8 @@ pub(crate) fn define(
    // the corresponding st* recipes when a REX prefix is applied.

    for recipe in &[rec_st_abcd, rec_stDisp8_abcd, rec_stDisp32_abcd] {
-        e.enc_both(
-            istore8.bind(I32).bind_any(),
-            recipe.opcodes(&MOV_BYTE_STORE),
-        );
-        e.enc_x86_64(
-            istore8.bind(I64).bind_any(),
-            recipe.opcodes(&MOV_BYTE_STORE),
-        );
+        e.enc_both(istore8.bind(I32).bind(Any), recipe.opcodes(&MOV_BYTE_STORE));
+        e.enc_x86_64(istore8.bind(I64).bind(Any), recipe.opcodes(&MOV_BYTE_STORE));
    }

    e.enc_i32_i64(spill, rec_spillSib32.opcodes(&MOV_STORE));
@@ -1121,12 +1115,9 @@ pub(crate) fn define(
    );

    // Float loads and stores.
-    e.enc_both(load.bind(F32).bind_any(), rec_fld.opcodes(&MOVSS_LOAD));
-    e.enc_both(load.bind(F32).bind_any(), rec_fldDisp8.opcodes(&MOVSS_LOAD));
-    e.enc_both(
-        load.bind(F32).bind_any(),
-        rec_fldDisp32.opcodes(&MOVSS_LOAD),
-    );
+    e.enc_both(load.bind(F32).bind(Any), rec_fld.opcodes(&MOVSS_LOAD));
+    e.enc_both(load.bind(F32).bind(Any), rec_fldDisp8.opcodes(&MOVSS_LOAD));
+    e.enc_both(load.bind(F32).bind(Any), rec_fldDisp32.opcodes(&MOVSS_LOAD));

    e.enc_both(
        load_complex.bind(F32),
@@ -1141,12 +1132,9 @@ pub(crate) fn define(
        rec_fldWithIndexDisp32.opcodes(&MOVSS_LOAD),
    );

-    e.enc_both(load.bind(F64).bind_any(), rec_fld.opcodes(&MOVSD_LOAD));
-    e.enc_both(load.bind(F64).bind_any(), rec_fldDisp8.opcodes(&MOVSD_LOAD));
-    e.enc_both(
-        load.bind(F64).bind_any(),
-        rec_fldDisp32.opcodes(&MOVSD_LOAD),
-    );
+    e.enc_both(load.bind(F64).bind(Any), rec_fld.opcodes(&MOVSD_LOAD));
+    e.enc_both(load.bind(F64).bind(Any), rec_fldDisp8.opcodes(&MOVSD_LOAD));
+    e.enc_both(load.bind(F64).bind(Any), rec_fldDisp32.opcodes(&MOVSD_LOAD));

    e.enc_both(
        load_complex.bind(F64),
@@ -1161,13 +1149,13 @@ pub(crate) fn define(
        rec_fldWithIndexDisp32.opcodes(&MOVSD_LOAD),
    );

-    e.enc_both(store.bind(F32).bind_any(), rec_fst.opcodes(&MOVSS_STORE));
+    e.enc_both(store.bind(F32).bind(Any), rec_fst.opcodes(&MOVSS_STORE));
    e.enc_both(
-        store.bind(F32).bind_any(),
+        store.bind(F32).bind(Any),
        rec_fstDisp8.opcodes(&MOVSS_STORE),
    );
    e.enc_both(
-        store.bind(F32).bind_any(),
+        store.bind(F32).bind(Any),
        rec_fstDisp32.opcodes(&MOVSS_STORE),
    );

@@ -1184,13 +1172,13 @@ pub(crate) fn define(
        rec_fstWithIndexDisp32.opcodes(&MOVSS_STORE),
    );

-    e.enc_both(store.bind(F64).bind_any(), rec_fst.opcodes(&MOVSD_STORE));
+    e.enc_both(store.bind(F64).bind(Any), rec_fst.opcodes(&MOVSD_STORE));
    e.enc_both(
-        store.bind(F64).bind_any(),
+        store.bind(F64).bind(Any),
        rec_fstDisp8.opcodes(&MOVSD_STORE),
    );
    e.enc_both(
-        store.bind(F64).bind_any(),
+        store.bind(F64).bind(Any),
        rec_fstDisp32.opcodes(&MOVSD_STORE),
    );

@@ -1727,7 +1715,7 @@ pub(crate) fn define(

    // PSHUFB, 8-bit shuffle using two XMM registers.
    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
-        let instruction = x86_pshufb.bind_vector_from_lane(ty, sse_vector_size);
+        let instruction = x86_pshufb.bind(vector(ty, sse_vector_size));
        let template = rec_fa.nonrex().opcodes(&PSHUFB);
        e.enc32_isap(instruction.clone(), template.clone(), use_ssse3_simd);
        e.enc64_isap(instruction, template, use_ssse3_simd);
@@ -1735,7 +1723,7 @@ pub(crate) fn define(

    // PSHUFD, 32-bit shuffle using one XMM register and a u8 immediate.
    for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 32) {
-        let instruction = x86_pshufd.bind_vector_from_lane(ty, sse_vector_size);
+        let instruction = x86_pshufd.bind(vector(ty, sse_vector_size));
        let template = rec_r_ib_unsigned_fpr.nonrex().opcodes(&PSHUFD);
        e.enc32(instruction.clone(), template.clone());
        e.enc64(instruction, template);
@@ -1745,7 +1733,7 @@ pub(crate) fn define(
    // to the Intel manual: "When the destination operand is an XMM register, the source operand is
    // written to the low doubleword of the register and the register is zero-extended to 128 bits."
    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
-        let instruction = scalar_to_vector.bind_vector_from_lane(ty, sse_vector_size);
+        let instruction = scalar_to_vector.bind(vector(ty, sse_vector_size));
        if ty.is_float() {
            e.enc_32_64_rec(instruction, rec_null_fpr, 0);
        } else {
@@ -1767,7 +1755,7 @@ pub(crate) fn define(
            _ => panic!("invalid size for SIMD insertlane"),
        };

-        let instruction = x86_pinsr.bind_vector_from_lane(ty, sse_vector_size);
+        let instruction = x86_pinsr.bind(vector(ty, sse_vector_size));
        let template = rec_r_ib_unsigned_r.opcodes(opcode);
        if ty.lane_bits() < 64 {
            e.enc_32_64_maybe_isap(instruction, template.nonrex(), isap);
@@ -1780,21 +1768,21 @@ pub(crate) fn define(

    // For legalizing insertlane with floats, INSERTPS from SSE4.1.
    {
-        let instruction = x86_insertps.bind_vector_from_lane(F32, sse_vector_size);
+        let instruction = x86_insertps.bind(vector(F32, sse_vector_size));
        let template = rec_fa_ib.nonrex().opcodes(&INSERTPS);
        e.enc_32_64_maybe_isap(instruction, template, Some(use_sse41_simd));
    }

    // For legalizing insertlane with floats,  MOVSD from SSE2.
    {
-        let instruction = x86_movsd.bind_vector_from_lane(F64, sse_vector_size);
+        let instruction = x86_movsd.bind(vector(F64, sse_vector_size));
        let template = rec_fa.nonrex().opcodes(&MOVSD_LOAD);
        e.enc_32_64_maybe_isap(instruction, template, None); // from SSE2
    }

    // For legalizing insertlane with floats, MOVLHPS from SSE.
    {
-        let instruction = x86_movlhps.bind_vector_from_lane(F64, sse_vector_size);
+        let instruction = x86_movlhps.bind(vector(F64, sse_vector_size));
        let template = rec_fa.nonrex().opcodes(&MOVLHPS);
        e.enc_32_64_maybe_isap(instruction, template, None); // from SSE
    }
@@ -1808,7 +1796,7 @@ pub(crate) fn define(
            _ => panic!("invalid size for SIMD extractlane"),
        };

-        let instruction = x86_pextr.bind_vector_from_lane(ty, sse_vector_size);
+        let instruction = x86_pextr.bind(vector(ty, sse_vector_size));
        let template = rec_r_ib_unsigned_gpr.opcodes(opcode);
        if ty.lane_bits() < 64 {
            e.enc_32_64_maybe_isap(instruction, template.nonrex(), Some(use_sse41_simd));
@@ -1825,8 +1813,8 @@ pub(crate) fn define(
            ValueType::all_lane_types().filter(|t| allowed_simd_type(t) && *t != from_type)
        {
            let instruction = raw_bitcast
-                .bind_vector_from_lane(to_type, sse_vector_size)
-                .bind_vector_from_lane(from_type, sse_vector_size);
+                .bind(vector(to_type, sse_vector_size))
+                .bind(vector(from_type, sse_vector_size));
            e.enc_32_64_rec(instruction, rec_null_fpr, 0);
        }
    }
@@ -1837,7 +1825,7 @@ pub(crate) fn define(
        for lane_type in ValueType::all_lane_types().filter(allowed_simd_type) {
            e.enc_32_64_rec(
                raw_bitcast
-                    .bind_vector_from_lane(lane_type, sse_vector_size)
+                    .bind(vector(lane_type, sse_vector_size))
                    .bind(*float_type),
                rec_null_fpr,
                0,
@@ -1845,7 +1833,7 @@ pub(crate) fn define(
            e.enc_32_64_rec(
                raw_bitcast
                    .bind(*float_type)
-                    .bind_vector_from_lane(lane_type, sse_vector_size),
+                    .bind(vector(lane_type, sse_vector_size)),
                rec_null_fpr,
                0,
            );
@@ -1857,7 +1845,7 @@ pub(crate) fn define(
    // encoding first
    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
        let f_unary_const = formats.get(formats.by_name("UnaryConst"));
-        let instruction = vconst.bind_vector_from_lane(ty, sse_vector_size);
+        let instruction = vconst.bind(vector(ty, sse_vector_size));

        let is_zero_128bit =
            InstructionPredicate::new_is_all_zeroes_128bit(f_unary_const, "constant_handle");
@@ -1881,14 +1869,14 @@ pub(crate) fn define(
    // MOVQ + MOVHPD + MOVQ + MOVLPD (this allows the constants to be immediates instead of stored
    // in memory) but some performance measurements are needed.
    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
-        let instruction = vconst.bind_vector_from_lane(ty, sse_vector_size);
+        let instruction = vconst.bind(vector(ty, sse_vector_size));
        let template = rec_vconst.nonrex().opcodes(&MOVUPS_LOAD);
        e.enc_32_64_maybe_isap(instruction, template, None); // from SSE
    }

    // SIMD bor using ORPS
    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
-        let instruction = bor.bind_vector_from_lane(ty, sse_vector_size);
+        let instruction = bor.bind(vector(ty, sse_vector_size));
        let template = rec_fa.nonrex().opcodes(&ORPS);
        e.enc_32_64_maybe_isap(instruction, template, None); // from SSE
    }
@@ -1898,87 +1886,87 @@ pub(crate) fn define(
    // alignment or type-specific encodings, see https://github.com/CraneStation/cranelift/issues/1039).
    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
        // Store
-        let bound_store = store.bind_vector_from_lane(ty, sse_vector_size).bind_any();
+        let bound_store = store.bind(vector(ty, sse_vector_size)).bind(Any);
        e.enc_32_64(bound_store.clone(), rec_fst.opcodes(&MOVUPS_STORE));
        e.enc_32_64(bound_store.clone(), rec_fstDisp8.opcodes(&MOVUPS_STORE));
        e.enc_32_64(bound_store, rec_fstDisp32.opcodes(&MOVUPS_STORE));

        // Load
-        let bound_load = load.bind_vector_from_lane(ty, sse_vector_size).bind_any();
+        let bound_load = load.bind(vector(ty, sse_vector_size)).bind(Any);
        e.enc_32_64(bound_load.clone(), rec_fld.opcodes(&MOVUPS_LOAD));
        e.enc_32_64(bound_load.clone(), rec_fldDisp8.opcodes(&MOVUPS_LOAD));
        e.enc_32_64(bound_load, rec_fldDisp32.opcodes(&MOVUPS_LOAD));

        // Spill
-        let bound_spill = spill.bind_vector_from_lane(ty, sse_vector_size);
+        let bound_spill = spill.bind(vector(ty, sse_vector_size));
        e.enc_32_64(bound_spill, rec_fspillSib32.opcodes(&MOVUPS_STORE));
-        let bound_regspill = regspill.bind_vector_from_lane(ty, sse_vector_size);
+        let bound_regspill = regspill.bind(vector(ty, sse_vector_size));
        e.enc_32_64(bound_regspill, rec_fregspill32.opcodes(&MOVUPS_STORE));

        // Fill
-        let bound_fill = fill.bind_vector_from_lane(ty, sse_vector_size);
+        let bound_fill = fill.bind(vector(ty, sse_vector_size));
        e.enc_32_64(bound_fill, rec_ffillSib32.opcodes(&MOVUPS_LOAD));
-        let bound_regfill = regfill.bind_vector_from_lane(ty, sse_vector_size);
+        let bound_regfill = regfill.bind(vector(ty, sse_vector_size));
        e.enc_32_64(bound_regfill, rec_fregfill32.opcodes(&MOVUPS_LOAD));
-        let bound_fill_nop = fill_nop.bind_vector_from_lane(ty, sse_vector_size);
+        let bound_fill_nop = fill_nop.bind(vector(ty, sse_vector_size));
        e.enc_32_64_rec(bound_fill_nop, rec_ffillnull, 0);

        // Regmove
-        let bound_regmove = regmove.bind_vector_from_lane(ty, sse_vector_size);
+        let bound_regmove = regmove.bind(vector(ty, sse_vector_size));
        e.enc_32_64(bound_regmove, rec_frmov.opcodes(&MOVAPS_LOAD));

        // Copy
-        let bound_copy = copy.bind_vector_from_lane(ty, sse_vector_size);
+        let bound_copy = copy.bind(vector(ty, sse_vector_size));
        e.enc_32_64(bound_copy, rec_furm.opcodes(&MOVAPS_LOAD));
-        let bound_copy_nop = copy_nop.bind_vector_from_lane(ty, sse_vector_size);
+        let bound_copy_nop = copy_nop.bind(vector(ty, sse_vector_size));
        e.enc_32_64_rec(bound_copy_nop, rec_stacknull, 0);
    }

    // SIMD integer addition
    for (ty, opcodes) in &[(I8, &PADDB), (I16, &PADDW), (I32, &PADDD), (I64, &PADDQ)] {
-        let iadd = iadd.bind_vector_from_lane(ty.clone(), sse_vector_size);
+        let iadd = iadd.bind(vector(ty.clone(), sse_vector_size));
        e.enc_32_64(iadd, rec_fa.opcodes(*opcodes));
    }

    // SIMD integer saturating addition
    e.enc_32_64(
-        sadd_sat.bind_vector_from_lane(I8, sse_vector_size),
+        sadd_sat.bind(vector(I8, sse_vector_size)),
        rec_fa.opcodes(&PADDSB),
    );
    e.enc_32_64(
-        sadd_sat.bind_vector_from_lane(I16, sse_vector_size),
+        sadd_sat.bind(vector(I16, sse_vector_size)),
        rec_fa.opcodes(&PADDSW),
    );
    e.enc_32_64(
-        uadd_sat.bind_vector_from_lane(I8, sse_vector_size),
+        uadd_sat.bind(vector(I8, sse_vector_size)),
        rec_fa.opcodes(&PADDUSB),
    );
    e.enc_32_64(
-        uadd_sat.bind_vector_from_lane(I16, sse_vector_size),
+        uadd_sat.bind(vector(I16, sse_vector_size)),
        rec_fa.opcodes(&PADDUSW),
    );

    // SIMD integer subtraction
    for (ty, opcodes) in &[(I8, &PSUBB), (I16, &PSUBW), (I32, &PSUBD), (I64, &PSUBQ)] {
-        let isub = isub.bind_vector_from_lane(ty.clone(), sse_vector_size);
+        let isub = isub.bind(vector(ty.clone(), sse_vector_size));
        e.enc_32_64(isub, rec_fa.opcodes(*opcodes));
    }

    // SIMD integer saturating subtraction
    e.enc_32_64(
-        ssub_sat.bind_vector_from_lane(I8, sse_vector_size),
+        ssub_sat.bind(vector(I8, sse_vector_size)),
        rec_fa.opcodes(&PSUBSB),
    );
    e.enc_32_64(
-        ssub_sat.bind_vector_from_lane(I16, sse_vector_size),
+        ssub_sat.bind(vector(I16, sse_vector_size)),
        rec_fa.opcodes(&PSUBSW),
    );
    e.enc_32_64(
-        usub_sat.bind_vector_from_lane(I8, sse_vector_size),
+        usub_sat.bind(vector(I8, sse_vector_size)),
        rec_fa.opcodes(&PSUBUSB),
    );
    e.enc_32_64(
-        usub_sat.bind_vector_from_lane(I16, sse_vector_size),
+        usub_sat.bind(vector(I16, sse_vector_size)),
        rec_fa.opcodes(&PSUBUSW),
    );

@@ -1988,7 +1976,7 @@ pub(crate) fn define(
        (I16, &PMULLW[..], None),
        (I32, &PMULLD[..], Some(use_sse41_simd)),
    ] {
-        let imul = imul.bind_vector_from_lane(ty.clone(), sse_vector_size);
+        let imul = imul.bind(vector(ty.clone(), sse_vector_size));
        e.enc_32_64_maybe_isap(imul, rec_fa.opcodes(opcodes), *isap);
    }

@@ -2002,7 +1990,7 @@ pub(crate) fn define(
            _ => panic!("invalid size for SIMD icmp"),
        };

-        let instruction = icmp.bind_vector_from_lane(ty, sse_vector_size);
+        let instruction = icmp.bind(vector(ty, sse_vector_size));
        let f_int_compare = formats.get(formats.by_name("IntCompare"));
        let has_eq_condition_code =
            InstructionPredicate::new_has_condition_code(f_int_compare, IntCC::Equal, "cond");
@@ -2020,10 +2008,10 @@ pub(crate) fn define(
    // Reference type instructions

    // Null references implemented as iconst 0.
-    e.enc32(null.bind_ref(R32), rec_pu_id_ref.opcodes(&MOV_IMM));
+    e.enc32(null.bind(R32), rec_pu_id_ref.opcodes(&MOV_IMM));

-    e.enc64(null.bind_ref(R64), rec_pu_id_ref.rex().opcodes(&MOV_IMM));
-    e.enc64(null.bind_ref(R64), rec_pu_id_ref.opcodes(&MOV_IMM));
+    e.enc64(null.bind(R64), rec_pu_id_ref.rex().opcodes(&MOV_IMM));
+    e.enc64(null.bind(R64), rec_pu_id_ref.opcodes(&MOV_IMM));

    // is_null, implemented by testing whether the value is 0.
    e.enc_r32_r64_rex_only(is_null, rec_is_zero.opcodes(&TEST_REG));
--- a/cranelift/codegen/meta/src/isa/x86/legalize.rs
+++ b/cranelift/codegen/meta/src/isa/x86/legalize.rs
@@ -1,5 +1,5 @@
 use crate::cdsl::ast::{var, ExprBuilder, Literal};
-use crate::cdsl::instructions::InstructionGroup;
+use crate::cdsl::instructions::{vector, Bindable, InstructionGroup};
 use crate::cdsl::types::ValueType;
 use crate::cdsl::xform::TransformGroupBuilder;
 use crate::shared::types::Float::F64;
@@ -322,10 +322,8 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct

    // SIMD splat: 8-bits
    for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 8) {
-        let splat_any8x16 = splat.bind_vector_from_lane(ty, sse_vector_size);
-        let bitcast_f64_to_any8x16 = raw_bitcast
-            .bind_vector_from_lane(ty, sse_vector_size)
-            .bind(F64);
+        let splat_any8x16 = splat.bind(vector(ty, sse_vector_size));
+        let bitcast_f64_to_any8x16 = raw_bitcast.bind(vector(ty, sse_vector_size)).bind(F64);
        narrow.legalize(
            def!(y = splat_any8x16(x)),
            vec![
@@ -340,13 +338,13 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct

    // SIMD splat: 16-bits
    for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 16) {
-        let splat_x16x8 = splat.bind_vector_from_lane(ty, sse_vector_size);
+        let splat_x16x8 = splat.bind(vector(ty, sse_vector_size));
        let raw_bitcast_any16x8_to_i32x4 = raw_bitcast
-            .bind_vector_from_lane(I32, sse_vector_size)
-            .bind_vector_from_lane(ty, sse_vector_size);
+            .bind(vector(I32, sse_vector_size))
+            .bind(vector(ty, sse_vector_size));
        let raw_bitcast_i32x4_to_any16x8 = raw_bitcast
-            .bind_vector_from_lane(ty, sse_vector_size)
-            .bind_vector_from_lane(I32, sse_vector_size);
+            .bind(vector(ty, sse_vector_size))
+            .bind(vector(I32, sse_vector_size));
        narrow.legalize(
            def!(y = splat_x16x8(x)),
            vec![
@@ -361,7 +359,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct

    // SIMD splat: 32-bits
    for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 32) {
-        let splat_any32x4 = splat.bind_vector_from_lane(ty, sse_vector_size);
+        let splat_any32x4 = splat.bind(vector(ty, sse_vector_size));
        narrow.legalize(
            def!(y = splat_any32x4(x)),
            vec![
@@ -373,7 +371,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct

    // SIMD splat: 64-bits
    for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 64) {
-        let splat_any64x2 = splat.bind_vector_from_lane(ty, sse_vector_size);
+        let splat_any64x2 = splat.bind(vector(ty, sse_vector_size));
        narrow.legalize(
            def!(y = splat_any64x2(x)),
            vec![