diff --git a/cranelift/codegen/meta/src/isa/x86/encodings.rs b/cranelift/codegen/meta/src/isa/x86/encodings.rs
new file mode 100644
index 0000000000..99bfecafc2
--- /dev/null
+++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs
@@ -0,0 +1,1569 @@
+#![allow(non_snake_case)]
+
+use std::collections::HashMap;
+
+use crate::cdsl::encodings::{Encoding, EncodingBuilder};
+use crate::cdsl::instructions::{
+    BoundInstruction, InstSpec, Instruction, InstructionGroup, InstructionPredicate,
+    InstructionPredicateNode, InstructionPredicateRegistry,
+};
+use crate::cdsl::recipes::{EncodingRecipe, EncodingRecipeNumber, Recipes};
+use crate::cdsl::settings::{SettingGroup, SettingPredicateNumber};
+
+use crate::shared::types::Bool::B1;
+use crate::shared::types::Float::{F32, F64};
+use crate::shared::types::Int::{I16, I32, I64, I8};
+use crate::shared::Definitions as SharedDefinitions;
+
+use super::recipes::{RecipeGroup, Template};
+
+pub struct PerCpuModeEncodings {
+    pub enc32: Vec<Encoding>,
+    pub enc64: Vec<Encoding>,
+    pub recipes: Recipes,
+    recipes_inverse: HashMap<EncodingRecipe, EncodingRecipeNumber>,
+    pub inst_pred_reg: InstructionPredicateRegistry,
+}
+
+impl PerCpuModeEncodings {
+    fn new() -> Self {
+        Self {
+            enc32: Vec::new(),
+            enc64: Vec::new(),
+            recipes: Recipes::new(),
+            recipes_inverse: HashMap::new(),
+            inst_pred_reg: InstructionPredicateRegistry::new(),
+        }
+    }
+
+    fn add_recipe(&mut self, recipe: EncodingRecipe) -> EncodingRecipeNumber {
+        if let Some(found_index) = self.recipes_inverse.get(&recipe) {
+            assert!(
+                self.recipes[*found_index].name == recipe.name,
+                format!(
+                    "trying to insert different recipes with a same name ({})",
+                    recipe.name
+                )
+            );
+            *found_index
+        } else {
+            let index = self.recipes.push(recipe.clone());
+            self.recipes_inverse.insert(recipe, index);
+            index
+        }
+    }
+
+    fn make_encoding<T>(
+        &mut self,
+        inst: InstSpec,
+        template: Template,
+        builder_closure: T,
+    ) -> Encoding
+    where
+        T: FnOnce(EncodingBuilder) -> EncodingBuilder,
+    {
+        let (recipe, bits) = template.build();
+        let recipe_number = self.add_recipe(recipe);
+        let builder = EncodingBuilder::new(inst.into(), recipe_number, bits);
+        builder_closure(builder).build(&self.recipes, &mut self.inst_pred_reg)
+    }
+
+    fn enc32_func<T>(&mut self, inst: impl Into<InstSpec>, template: Template, builder_closure: T)
+    where
+        T: FnOnce(EncodingBuilder) -> EncodingBuilder,
+    {
+        let encoding = self.make_encoding(inst.into(), template, builder_closure);
+        self.enc32.push(encoding);
+    }
+    fn enc32(&mut self, inst: impl Into<InstSpec>, template: Template) {
+        self.enc32_func(inst, template, |x| x);
+    }
+    fn enc32_isap(
+        &mut self,
+        inst: impl Into<InstSpec>,
+        template: Template,
+        isap: SettingPredicateNumber,
+    ) {
+        self.enc32_func(inst, template, |encoding| encoding.isa_predicate(isap));
+    }
+    fn enc32_instp(
+        &mut self,
+        inst: impl Into<InstSpec>,
+        template: Template,
+        instp: InstructionPredicateNode,
+    ) {
+        self.enc32_func(inst, template, |encoding| encoding.inst_predicate(instp));
+    }
+    fn enc32_rec(&mut self, inst: impl Into<InstSpec>, recipe: &EncodingRecipe, bits: u16) {
+        let recipe_number = self.add_recipe(recipe.clone());
+        let builder = EncodingBuilder::new(inst.into(), recipe_number, bits);
+        let encoding = builder.build(&self.recipes, &mut self.inst_pred_reg);
+        self.enc32.push(encoding);
+    }
+
+    fn enc64_func<T>(&mut self, inst: impl Into<InstSpec>, template: Template, builder_closure: T)
+    where
+        T: FnOnce(EncodingBuilder) -> EncodingBuilder,
+    {
+        let encoding = self.make_encoding(inst.into(), template, builder_closure);
+        self.enc64.push(encoding);
+    }
+    fn enc64(&mut self, inst: impl Into<InstSpec>, template: Template) {
+        self.enc64_func(inst, template, |x| x);
+    }
+    fn enc64_isap(
+        &mut self,
+        inst: impl Into<InstSpec>,
+        template: Template,
+        isap: SettingPredicateNumber,
+    ) {
+        self.enc64_func(inst, template, |encoding| encoding.isa_predicate(isap));
+    }
+    fn enc64_instp(
+        &mut self,
+        inst: impl Into<InstSpec>,
+        template: Template,
+        instp: InstructionPredicateNode,
+    ) {
+        self.enc64_func(inst, template, |encoding| encoding.inst_predicate(instp));
+    }
+    fn enc64_rec(&mut self, inst: impl Into<InstSpec>, recipe: &EncodingRecipe, bits: u16) {
+        let recipe_number = self.add_recipe(recipe.clone());
+        let builder = EncodingBuilder::new(inst.into(), recipe_number, bits);
+        let encoding = builder.build(&self.recipes, &mut self.inst_pred_reg);
+        self.enc64.push(encoding);
+    }
+
+    /// Add encodings for `inst.i32` to X86_32.
+    /// Add encodings for `inst.i32` to X86_64 with and without REX.
+    /// Add encodings for `inst.i64` to X86_64 with a REX.W prefix.
+    fn enc_i32_i64(&mut self, inst: impl Into<InstSpec>, template: Template) {
+        let inst: InstSpec = inst.into();
+        self.enc32(inst.bind(I32), template.nonrex());
+
+        // REX-less encoding must come after REX encoding so we don't use it by default. Otherwise
+        // reg-alloc would never use r8 and up.
+        self.enc64(inst.bind(I32), template.rex());
+        self.enc64(inst.bind(I32), template.nonrex());
+        self.enc64(inst.bind(I64), template.rex().w());
+    }
+
+    /// Add encodings for `inst.i32` to X86_32.
+    /// Add encodings for `inst.i32` to X86_64 with and without REX.
+    /// Add encodings for `inst.i64` to X86_64 with a REX.W prefix.
+    fn enc_i32_i64_instp(
+        &mut self,
+        inst: &Instruction,
+        template: Template,
+        instp: InstructionPredicateNode,
+    ) {
+        self.enc32_func(inst.bind(I32), template.nonrex(), |builder| {
+            builder.inst_predicate(instp.clone())
+        });
+
+        // REX-less encoding must come after REX encoding so we don't use it by default. Otherwise
+        // reg-alloc would never use r8 and up.
+        self.enc64_func(inst.bind(I32), template.rex(), |builder| {
+            builder.inst_predicate(instp.clone())
+        });
+        self.enc64_func(inst.bind(I32), template.nonrex(), |builder| {
+            builder.inst_predicate(instp.clone())
+        });
+        self.enc64_func(inst.bind(I64), template.rex().w(), |builder| {
+            builder.inst_predicate(instp)
+        });
+    }
+
+    /// Add encodings for `inst` to X86_64 with and without a REX prefix.
+    fn enc_x86_64(&mut self, inst: impl Into<InstSpec> + Clone, template: Template) {
+        // See above comment about the ordering of rex vs non-rex encodings.
+        self.enc64(inst.clone(), template.rex());
+        self.enc64(inst, template);
+    }
+
+    /// Add encodings for `inst` to X86_64 with and without a REX prefix.
+    fn enc_x86_64_instp(
+        &mut self,
+        inst: impl Clone + Into<InstSpec>,
+        template: Template,
+        instp: InstructionPredicateNode,
+    ) {
+        // See above comment about the ordering of rex vs non-rex encodings.
+        self.enc64_func(inst.clone(), template.rex(), |builder| {
+            builder.inst_predicate(instp.clone())
+        });
+        self.enc64_func(inst, template, |builder| builder.inst_predicate(instp));
+    }
+    fn enc_x86_64_isap(
+        &mut self,
+        inst: impl Clone + Into<InstSpec>,
+        template: Template,
+        isap: SettingPredicateNumber,
+    ) {
+        // See above comment about the ordering of rex vs non-rex encodings.
+        self.enc64_isap(inst.clone(), template.rex(), isap);
+        self.enc64_isap(inst, template, isap);
+    }
+
+    /// Add all three encodings for `inst`:
+    /// - X86_32
+    /// - X86_64 with and without the REX prefix.
+    fn enc_both(&mut self, inst: impl Clone + Into<InstSpec>, template: Template) {
+        self.enc32(inst.clone(), template.clone());
+        self.enc_x86_64(inst, template);
+    }
+    fn enc_both_isap(
+        &mut self,
+        inst: BoundInstruction,
+        template: Template,
+        isap: SettingPredicateNumber,
+    ) {
+        self.enc32_isap(inst.clone(), template.clone(), isap);
+        self.enc_x86_64_isap(inst, template, isap);
+    }
+    fn enc_both_instp(
+        &mut self,
+        inst: BoundInstruction,
+        template: Template,
+        instp: InstructionPredicateNode,
+    ) {
+        self.enc32_instp(inst.clone(), template.clone(), instp.clone());
+        self.enc_x86_64_instp(inst, template, instp);
+    }
+
+    /// Add encodings for `inst.i32` to X86_32.
+    /// Add encodings for `inst.i32` to X86_64 with and without REX.
+    /// Add encodings for `inst.i64` to X86_64 with a REX prefix, using the `w_bit`
+    /// argument to determine whether or not to set the REX.W bit.
+    fn enc_i32_i64_ld_st(&mut self, inst: &Instruction, w_bit: bool, template: Template) {
+        self.enc32(inst.clone().bind(I32).bind_any(), template.clone());
+
+        // REX-less encoding must come after REX encoding so we don't use it by
+        // default. Otherwise reg-alloc would never use r8 and up.
+        self.enc64(inst.clone().bind(I32).bind_any(), template.clone().rex());
+        self.enc64(inst.clone().bind(I32).bind_any(), template.clone());
+
+        if w_bit {
+            self.enc64(inst.clone().bind(I64).bind_any(), template.rex().w());
+        } else {
+            self.enc64(inst.clone().bind(I64).bind_any(), template.clone().rex());
+            self.enc64(inst.clone().bind(I64).bind_any(), template);
+        }
+    }
+}
+
+// Definitions.
+
+pub fn define(
+    shared_defs: &SharedDefinitions,
+    settings: &SettingGroup,
+    x86: &InstructionGroup,
+    r: &RecipeGroup,
+) -> PerCpuModeEncodings {
+    let shared = &shared_defs.instructions;
+    let formats = &shared_defs.format_registry;
+
+    // Shorthands for instructions.
+    let adjust_sp_down = shared.by_name("adjust_sp_down");
+    let adjust_sp_down_imm = shared.by_name("adjust_sp_down_imm");
+    let adjust_sp_up_imm = shared.by_name("adjust_sp_up_imm");
+    let band = shared.by_name("band");
+    let band_imm = shared.by_name("band_imm");
+    let band_not = shared.by_name("band_not");
+    let bconst = shared.by_name("bconst");
+    let bint = shared.by_name("bint");
+    let bitcast = shared.by_name("bitcast");
+    let bnot = shared.by_name("bnot");
+    let bor = shared.by_name("bor");
+    let bor_imm = shared.by_name("bor_imm");
+    let brff = shared.by_name("brff");
+    let brif = shared.by_name("brif");
+    let brnz = shared.by_name("brnz");
+    let brz = shared.by_name("brz");
+    let bxor = shared.by_name("bxor");
+    let bxor_imm = shared.by_name("bxor_imm");
+    let call = shared.by_name("call");
+    let call_indirect = shared.by_name("call_indirect");
+    let ceil = shared.by_name("ceil");
+    let clz = shared.by_name("clz");
+    let copy = shared.by_name("copy");
+    let copy_nop = shared.by_name("copy_nop");
+    let copy_special = shared.by_name("copy_special");
+    let ctz = shared.by_name("ctz");
+    let debugtrap = shared.by_name("debugtrap");
+    let f32const = shared.by_name("f32const");
+    let f64const = shared.by_name("f64const");
+    let fadd = shared.by_name("fadd");
+    let fcmp = shared.by_name("fcmp");
+    let fcvt_from_sint = shared.by_name("fcvt_from_sint");
+    let fdemote = shared.by_name("fdemote");
+    let fdiv = shared.by_name("fdiv");
+    let ffcmp = shared.by_name("ffcmp");
+    let fill = shared.by_name("fill");
+    let floor = shared.by_name("floor");
+    let fmul = shared.by_name("fmul");
+    let fpromote = shared.by_name("fpromote");
+    let fsub = shared.by_name("fsub");
+    let func_addr = shared.by_name("func_addr");
+    let iadd = shared.by_name("iadd");
+    let iadd_imm = shared.by_name("iadd_imm");
+    let icmp = shared.by_name("icmp");
+    let icmp_imm = shared.by_name("icmp_imm");
+    let iconst = shared.by_name("iconst");
+    let ifcmp = shared.by_name("ifcmp");
+    let ifcmp_imm = shared.by_name("ifcmp_imm");
+    let ifcmp_sp = shared.by_name("ifcmp_sp");
+    let imul = shared.by_name("imul");
+    let indirect_jump_table_br = shared.by_name("indirect_jump_table_br");
+    let ireduce = shared.by_name("ireduce");
+    let ishl = shared.by_name("ishl");
+    let ishl_imm = shared.by_name("ishl_imm");
+    let istore16 = shared.by_name("istore16");
+    let istore16_complex = shared.by_name("istore16_complex");
+    let istore32 = shared.by_name("istore32");
+    let istore32_complex = shared.by_name("istore32_complex");
+    let istore8 = shared.by_name("istore8");
+    let istore8_complex = shared.by_name("istore8_complex");
+    let isub = shared.by_name("isub");
+    let jump = shared.by_name("jump");
+    let jump_table_base = shared.by_name("jump_table_base");
+    let jump_table_entry = shared.by_name("jump_table_entry");
+    let load = shared.by_name("load");
+    let load_complex = shared.by_name("load_complex");
+    let nearest = shared.by_name("nearest");
+    let popcnt = shared.by_name("popcnt");
+    let regfill = shared.by_name("regfill");
+    let regmove = shared.by_name("regmove");
+    let regspill = shared.by_name("regspill");
+    let return_ = shared.by_name("return");
+    let rotl = shared.by_name("rotl");
+    let rotl_imm = shared.by_name("rotl_imm");
+    let rotr = shared.by_name("rotr");
+    let rotr_imm = shared.by_name("rotr_imm");
+    let selectif = shared.by_name("selectif");
+    let sextend = shared.by_name("sextend");
+    let sload16 = shared.by_name("sload16");
+    let sload16_complex = shared.by_name("sload16_complex");
+    let sload32 = shared.by_name("sload32");
+    let sload32_complex = shared.by_name("sload32_complex");
+    let sload8 = shared.by_name("sload8");
+    let sload8_complex = shared.by_name("sload8_complex");
+    let spill = shared.by_name("spill");
+    let sqrt = shared.by_name("sqrt");
+    let sshr = shared.by_name("sshr");
+    let sshr_imm = shared.by_name("sshr_imm");
+    let stack_addr = shared.by_name("stack_addr");
+    let store = shared.by_name("store");
+    let store_complex = shared.by_name("store_complex");
+    let symbol_value = shared.by_name("symbol_value");
+    let trap = shared.by_name("trap");
+    let trapff = shared.by_name("trapff");
+    let trapif = shared.by_name("trapif");
+    let trueff = shared.by_name("trueff");
+    let trueif = shared.by_name("trueif");
+    let trunc = shared.by_name("trunc");
+    let uextend = shared.by_name("uextend");
+    let uload16 = shared.by_name("uload16");
+    let uload16_complex = shared.by_name("uload16_complex");
+    let uload32 = shared.by_name("uload32");
+    let uload32_complex = shared.by_name("uload32_complex");
+    let uload8 = shared.by_name("uload8");
+    let uload8_complex = shared.by_name("uload8_complex");
+    let ushr = shared.by_name("ushr");
+    let ushr_imm = shared.by_name("ushr_imm");
+    let x86_bsf = x86.by_name("x86_bsf");
+    let x86_bsr = x86.by_name("x86_bsr");
+    let x86_cvtt2si = x86.by_name("x86_cvtt2si");
+    let x86_fmax = x86.by_name("x86_fmax");
+    let x86_fmin = x86.by_name("x86_fmin");
+    let x86_pop = x86.by_name("x86_pop");
+    let x86_push = x86.by_name("x86_push");
+    let x86_sdivmodx = x86.by_name("x86_sdivmodx");
+    let x86_smulx = x86.by_name("x86_smulx");
+    let x86_udivmodx = x86.by_name("x86_udivmodx");
+    let x86_umulx = x86.by_name("x86_umulx");
+
+    // Shorthands for recipes.
+    let rec_adjustsp = r.template("adjustsp");
+    let rec_adjustsp_ib = r.template("adjustsp_ib");
+    let rec_adjustsp_id = r.template("adjustsp_id");
+    let rec_allones_fnaddr4 = r.template("allones_fnaddr4");
+    let rec_allones_fnaddr8 = r.template("allones_fnaddr8");
+    let rec_brfb = r.template("brfb");
+    let rec_brfd = r.template("brfd");
+    let rec_brib = r.template("brib");
+    let rec_brid = r.template("brid");
+    let rec_bsf_and_bsr = r.template("bsf_and_bsr");
+    let rec_call_id = r.template("call_id");
+    let rec_call_plt_id = r.template("call_plt_id");
+    let rec_call_r = r.template("call_r");
+    let rec_cmov = r.template("cmov");
+    let rec_copysp = r.template("copysp");
+    let rec_div = r.template("div");
+    let rec_debugtrap = r.recipe("debugtrap");
+    let rec_f32imm_z = r.template("f32imm_z");
+    let rec_f64imm_z = r.template("f64imm_z");
+    let rec_fa = r.template("fa");
+    let rec_fax = r.template("fax");
+    let rec_fcmp = r.template("fcmp");
+    let rec_fcscc = r.template("fcscc");
+    let rec_ffillSib32 = r.template("ffillSib32");
+    let rec_fillSib32 = r.template("fillSib32");
+    let rec_fld = r.template("fld");
+    let rec_fldDisp32 = r.template("fldDisp32");
+    let rec_fldDisp8 = r.template("fldDisp8");
+    let rec_fldWithIndex = r.template("fldWithIndex");
+    let rec_fldWithIndexDisp32 = r.template("fldWithIndexDisp32");
+    let rec_fldWithIndexDisp8 = r.template("fldWithIndexDisp8");
+    let rec_fnaddr4 = r.template("fnaddr4");
+    let rec_fnaddr8 = r.template("fnaddr8");
+    let rec_fregfill32 = r.template("fregfill32");
+    let rec_fregspill32 = r.template("fregspill32");
+    let rec_frmov = r.template("frmov");
+    let rec_frurm = r.template("frurm");
+    let rec_fspillSib32 = r.template("fspillSib32");
+    let rec_fst = r.template("fst");
+    let rec_fstDisp32 = r.template("fstDisp32");
+    let rec_fstDisp8 = r.template("fstDisp8");
+    let rec_fstWithIndex = r.template("fstWithIndex");
+    let rec_fstWithIndexDisp32 = r.template("fstWithIndexDisp32");
+    let rec_fstWithIndexDisp8 = r.template("fstWithIndexDisp8");
+    let rec_furm = r.template("furm");
+    let rec_furmi_rnd = r.template("furmi_rnd");
+    let rec_got_fnaddr8 = r.template("got_fnaddr8");
+    let rec_got_gvaddr8 = r.template("got_gvaddr8");
+    let rec_gvaddr4 = r.template("gvaddr4");
+    let rec_gvaddr8 = r.template("gvaddr8");
+    let rec_icscc = r.template("icscc");
+    let rec_icscc_ib = r.template("icscc_ib");
+    let rec_icscc_id = r.template("icscc_id");
+    let rec_indirect_jmp = r.template("indirect_jmp");
+    let rec_jmpb = r.template("jmpb");
+    let rec_jmpd = r.template("jmpd");
+    let rec_jt_base = r.template("jt_base");
+    let rec_jt_entry = r.template("jt_entry");
+    let rec_ld = r.template("ld");
+    let rec_ldDisp32 = r.template("ldDisp32");
+    let rec_ldDisp8 = r.template("ldDisp8");
+    let rec_ldWithIndex = r.template("ldWithIndex");
+    let rec_ldWithIndexDisp32 = r.template("ldWithIndexDisp32");
+    let rec_ldWithIndexDisp8 = r.template("ldWithIndexDisp8");
+    let rec_mulx = r.template("mulx");
+    let rec_null = r.recipe("null");
+    let rec_pcrel_fnaddr8 = r.template("pcrel_fnaddr8");
+    let rec_pcrel_gvaddr8 = r.template("pcrel_gvaddr8");
+    let rec_popq = r.template("popq");
+    let rec_pu_id = r.template("pu_id");
+    let rec_pu_id_bool = r.template("pu_id_bool");
+    let rec_pu_iq = r.template("pu_iq");
+    let rec_pushq = r.template("pushq");
+    let rec_ret = r.template("ret");
+    let rec_r_ib = r.template("r_ib");
+    let rec_r_id = r.template("r_id");
+    let rec_rcmp = r.template("rcmp");
+    let rec_rcmp_ib = r.template("rcmp_ib");
+    let rec_rcmp_id = r.template("rcmp_id");
+    let rec_rcmp_sp = r.template("rcmp_sp");
+    let rec_regfill32 = r.template("regfill32");
+    let rec_regspill32 = r.template("regspill32");
+    let rec_rc = r.template("rc");
+    let rec_rfumr = r.template("rfumr");
+    let rec_rfurm = r.template("rfurm");
+    let rec_rmov = r.template("rmov");
+    let rec_rr = r.template("rr");
+    let rec_rrx = r.template("rrx");
+    let rec_setf_abcd = r.template("setf_abcd");
+    let rec_seti_abcd = r.template("seti_abcd");
+    let rec_spaddr4_id = r.template("spaddr4_id");
+    let rec_spaddr8_id = r.template("spaddr8_id");
+    let rec_spillSib32 = r.template("spillSib32");
+    let rec_st = r.template("st");
+    let rec_stacknull = r.recipe("stacknull");
+    let rec_stDisp32 = r.template("stDisp32");
+    let rec_stDisp32_abcd = r.template("stDisp32_abcd");
+    let rec_stDisp8 = r.template("stDisp8");
+    let rec_stDisp8_abcd = r.template("stDisp8_abcd");
+    let rec_stWithIndex = r.template("stWithIndex");
+    let rec_stWithIndexDisp32 = r.template("stWithIndexDisp32");
+    let rec_stWithIndexDisp32_abcd = r.template("stWithIndexDisp32_abcd");
+    let rec_stWithIndexDisp8 = r.template("stWithIndexDisp8");
+    let rec_stWithIndexDisp8_abcd = r.template("stWithIndexDisp8_abcd");
+    let rec_stWithIndex_abcd = r.template("stWithIndex_abcd");
+    let rec_st_abcd = r.template("st_abcd");
+    let rec_t8jccb_abcd = r.template("t8jccb_abcd");
+    let rec_t8jccd_abcd = r.template("t8jccd_abcd");
+    let rec_t8jccd_long = r.template("t8jccd_long");
+    let rec_tjccb = r.template("tjccb");
+    let rec_tjccd = r.template("tjccd");
+    let rec_trap = r.template("trap");
+    let rec_trapif = r.recipe("trapif");
+    let rec_trapff = r.recipe("trapff");
+    let rec_u_id = r.template("u_id");
+    let rec_umr = r.template("umr");
+    let rec_ur = r.template("ur");
+    let rec_urm = r.template("urm");
+    let rec_urm_noflags = r.template("urm_noflags");
+    let rec_urm_noflags_abcd = r.template("urm_noflags_abcd");
+
+    // Predicates shorthands.
+    let all_ones_funcaddrs_and_not_is_pic =
+        settings.predicate_by_name("all_ones_funcaddrs_and_not_is_pic");
+    let is_pic = settings.predicate_by_name("is_pic");
+    let not_all_ones_funcaddrs_and_not_is_pic =
+        settings.predicate_by_name("not_all_ones_funcaddrs_and_not_is_pic");
+    let not_is_pic = settings.predicate_by_name("not_is_pic");
+    let use_popcnt = settings.predicate_by_name("use_popcnt");
+    let use_lzcnt = settings.predicate_by_name("use_lzcnt");
+    let use_bmi1 = settings.predicate_by_name("use_bmi1");
+    let use_sse41 = settings.predicate_by_name("use_sse41");
+
+    // Definitions.
+    let mut e = PerCpuModeEncodings::new();
+
+    e.enc_i32_i64(iadd, rec_rr.opcodes(vec![0x01]));
+    e.enc_i32_i64(isub, rec_rr.opcodes(vec![0x29]));
+    e.enc_i32_i64(band, rec_rr.opcodes(vec![0x21]));
+    e.enc_i32_i64(bor, rec_rr.opcodes(vec![0x09]));
+    e.enc_i32_i64(bxor, rec_rr.opcodes(vec![0x31]));
+
+    // x86 has a bitwise not instruction NOT.
+    e.enc_i32_i64(bnot, rec_ur.opcodes(vec![0xf7]).rrr(2));
+
+    // Also add a `b1` encodings for the logic instructions.
+    // TODO: Should this be done with 8-bit instructions? It would improve partial register
+    // dependencies.
+    e.enc_both(band.bind(B1), rec_rr.opcodes(vec![0x21]));
+    e.enc_both(bor.bind(B1), rec_rr.opcodes(vec![0x09]));
+    e.enc_both(bxor.bind(B1), rec_rr.opcodes(vec![0x31]));
+
+    e.enc_i32_i64(imul, rec_rrx.opcodes(vec![0x0f, 0xaf]));
+    e.enc_i32_i64(x86_sdivmodx, rec_div.opcodes(vec![0xf7]).rrr(7));
+    e.enc_i32_i64(x86_udivmodx, rec_div.opcodes(vec![0xf7]).rrr(6));
+
+    e.enc_i32_i64(x86_smulx, rec_mulx.opcodes(vec![0xf7]).rrr(5));
+    e.enc_i32_i64(x86_umulx, rec_mulx.opcodes(vec![0xf7]).rrr(4));
+
+    e.enc_i32_i64(copy, rec_umr.opcodes(vec![0x89]));
+    e.enc_both(copy.bind(B1), rec_umr.opcodes(vec![0x89]));
+    e.enc_both(copy.bind(I8), rec_umr.opcodes(vec![0x89]));
+    e.enc_both(copy.bind(I16), rec_umr.opcodes(vec![0x89]));
+
+    // TODO For x86-64, only define REX forms for now, since we can't describe the
+    // special regunit immediate operands with the current constraint language.
+    for &ty in &[I8, I16, I32] {
+        e.enc32(regmove.bind(ty), rec_rmov.opcodes(vec![0x89]));
+        e.enc64(regmove.bind(ty), rec_rmov.opcodes(vec![0x89]).rex());
+    }
+    e.enc64(regmove.bind(I64), rec_rmov.opcodes(vec![0x89]).rex().w());
+    e.enc_both(regmove.bind(B1), rec_rmov.opcodes(vec![0x89]));
+    e.enc_both(regmove.bind(I8), rec_rmov.opcodes(vec![0x89]));
+
+    e.enc_i32_i64(iadd_imm, rec_r_ib.opcodes(vec![0x83]).rrr(0));
+    e.enc_i32_i64(iadd_imm, rec_r_id.opcodes(vec![0x81]).rrr(0));
+
+    e.enc_i32_i64(band_imm, rec_r_ib.opcodes(vec![0x83]).rrr(4));
+    e.enc_i32_i64(band_imm, rec_r_id.opcodes(vec![0x81]).rrr(4));
+
+    e.enc_i32_i64(bor_imm, rec_r_ib.opcodes(vec![0x83]).rrr(1));
+    e.enc_i32_i64(bor_imm, rec_r_id.opcodes(vec![0x81]).rrr(1));
+
+    e.enc_i32_i64(bxor_imm, rec_r_ib.opcodes(vec![0x83]).rrr(6));
+    e.enc_i32_i64(bxor_imm, rec_r_id.opcodes(vec![0x81]).rrr(6));
+
+    // TODO: band_imm.i64 with an unsigned 32-bit immediate can be encoded as band_imm.i32. Can
+    // even use the single-byte immediate for 0xffff_ffXX masks.
+
+    // Immediate constants.
+    e.enc32(iconst.bind(I32), rec_pu_id.opcodes(vec![0xb8]));
+
+    e.enc64(iconst.bind(I32), rec_pu_id.rex().opcodes(vec![0xb8]));
+    e.enc64(iconst.bind(I32), rec_pu_id.opcodes(vec![0xb8]));
+
+    // The 32-bit immediate movl also zero-extends to 64 bits.
+    let f_unary_imm = formats.get(formats.by_name("UnaryImm"));
+    let is_unsigned_int32 = InstructionPredicate::new_is_unsigned_int(f_unary_imm, "imm", 32, 0);
+
+    e.enc64_func(
+        iconst.bind(I64),
+        rec_pu_id.opcodes(vec![0xb8]).rex(),
+        |encoding| encoding.inst_predicate(is_unsigned_int32.clone()),
+    );
+    e.enc64_func(
+        iconst.bind(I64),
+        rec_pu_id.opcodes(vec![0xb8]),
+        |encoding| encoding.inst_predicate(is_unsigned_int32),
+    );
+
+    // Sign-extended 32-bit immediate.
+    e.enc64(
+        iconst.bind(I64),
+        rec_u_id.rex().opcodes(vec![0xc7]).rrr(0).w(),
+    );
+
+    // Finally, the 0xb8 opcode takes an 8-byte immediate with a REX.W prefix.
+    e.enc64(iconst.bind(I64), rec_pu_iq.opcodes(vec![0xb8]).rex().w());
+
+    // Bool constants.
+    e.enc_both(bconst.bind(B1), rec_pu_id_bool.opcodes(vec![0xb8]));
+
+    // Shifts and rotates.
+    // Note that the dynamic shift amount is only masked by 5 or 6 bits; the 8-bit
+    // and 16-bit shifts would need explicit masking.
+
+    for &(inst, rrr) in &[(rotl, 0), (rotr, 1), (ishl, 4), (ushr, 5), (sshr, 7)] {
+        // Cannot use enc_i32_i64 for this pattern because instructions require
+        // to bind any.
+        e.enc32(
+            inst.bind(I32).bind_any(),
+            rec_rc.opcodes(vec![0xd3]).rrr(rrr),
+        );
+        e.enc64(
+            inst.bind(I64).bind_any(),
+            rec_rc.opcodes(vec![0xd3]).rrr(rrr).rex().w(),
+        );
+        e.enc64(
+            inst.bind(I32).bind_any(),
+            rec_rc.opcodes(vec![0xd3]).rrr(rrr).rex(),
+        );
+        e.enc64(
+            inst.bind(I32).bind_any(),
+            rec_rc.opcodes(vec![0xd3]).rrr(rrr),
+        );
+    }
+
+    for &(inst, rrr) in &[
+        (rotl_imm, 0),
+        (rotr_imm, 1),
+        (ishl_imm, 4),
+        (ushr_imm, 5),
+        (sshr_imm, 7),
+    ] {
+        e.enc_i32_i64(inst, rec_r_ib.opcodes(vec![0xc1]).rrr(rrr));
+    }
+
+    // Population count.
+    e.enc32_isap(
+        popcnt.bind(I32),
+        rec_urm.opcodes(vec![0xf3, 0x0f, 0xb8]),
+        use_popcnt,
+    );
+    e.enc64_isap(
+        popcnt.bind(I64),
+        rec_urm.opcodes(vec![0xf3, 0x0f, 0xb8]).rex().w(),
+        use_popcnt,
+    );
+    e.enc64_isap(
+        popcnt.bind(I32),
+        rec_urm.opcodes(vec![0xf3, 0x0f, 0xb8]).rex(),
+        use_popcnt,
+    );
+    e.enc64_isap(
+        popcnt.bind(I32),
+        rec_urm.opcodes(vec![0xf3, 0x0f, 0xb8]),
+        use_popcnt,
+    );
+
+    // Count leading zero bits.
+    e.enc32_isap(
+        clz.bind(I32),
+        rec_urm.opcodes(vec![0xf3, 0x0f, 0xbd]),
+        use_lzcnt,
+    );
+    e.enc64_isap(
+        clz.bind(I64),
+        rec_urm.opcodes(vec![0xf3, 0x0f, 0xbd]).rex().w(),
+        use_lzcnt,
+    );
+    e.enc64_isap(
+        clz.bind(I32),
+        rec_urm.opcodes(vec![0xf3, 0x0f, 0xbd]).rex(),
+        use_lzcnt,
+    );
+    e.enc64_isap(
+        clz.bind(I32),
+        rec_urm.opcodes(vec![0xf3, 0x0f, 0xbd]),
+        use_lzcnt,
+    );
+
+    // Count trailing zero bits.
+    e.enc32_isap(
+        ctz.bind(I32),
+        rec_urm.opcodes(vec![0xf3, 0x0f, 0xbc]),
+        use_bmi1,
+    );
+    e.enc64_isap(
+        ctz.bind(I64),
+        rec_urm.opcodes(vec![0xf3, 0x0f, 0xbc]).rex().w(),
+        use_bmi1,
+    );
+    e.enc64_isap(
+        ctz.bind(I32),
+        rec_urm.opcodes(vec![0xf3, 0x0f, 0xbc]).rex(),
+        use_bmi1,
+    );
+    e.enc64_isap(
+        ctz.bind(I32),
+        rec_urm.opcodes(vec![0xf3, 0x0f, 0xbc]),
+        use_bmi1,
+    );
+
+    // Loads and stores.
+    let f_load_complex = formats.get(formats.by_name("LoadComplex"));
+    let is_load_complex_length_two = InstructionPredicate::new_length_equals(f_load_complex, 2);
+
+    for recipe in &[rec_ldWithIndex, rec_ldWithIndexDisp8, rec_ldWithIndexDisp32] {
+        e.enc_i32_i64_instp(
+            load_complex,
+            recipe.opcodes(vec![0x8b]),
+            is_load_complex_length_two.clone(),
+        );
+        e.enc_x86_64_instp(
+            uload32_complex,
+            recipe.opcodes(vec![0x8b]),
+            is_load_complex_length_two.clone(),
+        );
+
+        e.enc64_instp(
+            sload32_complex,
+            recipe.opcodes(vec![0x63]).rex().w(),
+            is_load_complex_length_two.clone(),
+        );
+
+        e.enc_i32_i64_instp(
+            uload16_complex,
+            recipe.opcodes(vec![0x0f, 0xb7]),
+            is_load_complex_length_two.clone(),
+        );
+        e.enc_i32_i64_instp(
+            sload16_complex,
+            recipe.opcodes(vec![0x0f, 0xbf]),
+            is_load_complex_length_two.clone(),
+        );
+
+        e.enc_i32_i64_instp(
+            uload8_complex,
+            recipe.opcodes(vec![0x0f, 0xb6]),
+            is_load_complex_length_two.clone(),
+        );
+
+        e.enc_i32_i64_instp(
+            sload8_complex,
+            recipe.opcodes(vec![0x0f, 0xbe]),
+            is_load_complex_length_two.clone(),
+        );
+    }
+
+    let f_store_complex = formats.get(formats.by_name("StoreComplex"));
+    let is_store_complex_length_three = InstructionPredicate::new_length_equals(f_store_complex, 3);
+
+    for recipe in &[rec_stWithIndex, rec_stWithIndexDisp8, rec_stWithIndexDisp32] {
+        e.enc_i32_i64_instp(
+            store_complex,
+            recipe.opcodes(vec![0x89]),
+            is_store_complex_length_three.clone(),
+        );
+        e.enc_x86_64_instp(
+            istore32_complex,
+            recipe.opcodes(vec![0x89]),
+            is_store_complex_length_three.clone(),
+        );
+        e.enc_both_instp(
+            istore16_complex.bind(I32),
+            recipe.opcodes(vec![0x66, 0x89]),
+            is_store_complex_length_three.clone(),
+        );
+        e.enc_x86_64_instp(
+            istore16_complex.bind(I64),
+            recipe.opcodes(vec![0x66, 0x89]),
+            is_store_complex_length_three.clone(),
+        );
+    }
+
+    for recipe in &[
+        rec_stWithIndex_abcd,
+        rec_stWithIndexDisp8_abcd,
+        rec_stWithIndexDisp32_abcd,
+    ] {
+        e.enc_both_instp(
+            istore8_complex.bind(I32),
+            recipe.opcodes(vec![0x88]),
+            is_store_complex_length_three.clone(),
+        );
+        e.enc_x86_64_instp(
+            istore8_complex.bind(I64),
+            recipe.opcodes(vec![0x88]),
+            is_store_complex_length_three.clone(),
+        );
+    }
+
+    for recipe in &[rec_st, rec_stDisp8, rec_stDisp32] {
+        e.enc_i32_i64_ld_st(store, true, recipe.opcodes(vec![0x89]));
+        e.enc_x86_64(istore32.bind(I64).bind_any(), recipe.opcodes(vec![0x89]));
+        e.enc_i32_i64_ld_st(istore16, false, recipe.opcodes(vec![0x66, 0x89]));
+    }
+
+    // Byte stores are more complicated because the registers they can address
+    // depends of the presence of a REX prefix. The st*_abcd recipes fall back to
+    // the corresponding st* recipes when a REX prefix is applied.
+
+    for recipe in &[rec_st_abcd, rec_stDisp8_abcd, rec_stDisp32_abcd] {
+        e.enc_both(istore8.bind(I32).bind_any(), recipe.opcodes(vec![0x88]));
+        e.enc_x86_64(istore8.bind(I64).bind_any(), recipe.opcodes(vec![0x88]));
+    }
+
+    e.enc_i32_i64(spill, rec_spillSib32.opcodes(vec![0x89]));
+    e.enc_i32_i64(regspill, rec_regspill32.opcodes(vec![0x89]));
+
+    // Use a 32-bit write for spilling `b1`, `i8` and `i16` to avoid
+    // constraining the permitted registers.
+    // See MIN_SPILL_SLOT_SIZE which makes this safe.
+
+    e.enc_both(spill.bind(B1), rec_spillSib32.opcodes(vec![0x89]));
+    e.enc_both(regspill.bind(B1), rec_regspill32.opcodes(vec![0x89]));
+    for &ty in &[I8, I16] {
+        e.enc_both(spill.bind(ty), rec_spillSib32.opcodes(vec![0x89]));
+        e.enc_both(regspill.bind(ty), rec_regspill32.opcodes(vec![0x89]));
+    }
+
+    for recipe in &[rec_ld, rec_ldDisp8, rec_ldDisp32] {
+        e.enc_i32_i64_ld_st(load, true, recipe.opcodes(vec![0x8b]));
+        e.enc_x86_64(uload32.bind(I64), recipe.opcodes(vec![0x8b]));
+        e.enc64(sload32.bind(I64), recipe.opcodes(vec![0x63]).rex().w());
+        e.enc_i32_i64_ld_st(uload16, true, recipe.opcodes(vec![0x0f, 0xb7]));
+        e.enc_i32_i64_ld_st(sload16, true, recipe.opcodes(vec![0x0f, 0xbf]));
+        e.enc_i32_i64_ld_st(uload8, true, recipe.opcodes(vec![0x0f, 0xb6]));
+        e.enc_i32_i64_ld_st(sload8, true, recipe.opcodes(vec![0x0f, 0xbe]));
+    }
+
+    e.enc_i32_i64(fill, rec_fillSib32.opcodes(vec![0x8b]));
+    e.enc_i32_i64(regfill, rec_regfill32.opcodes(vec![0x8b]));
+
+    // Load 32 bits from `b1`, `i8` and `i16` spill slots. See `spill.b1` above.
+
+    e.enc_both(fill.bind(B1), rec_fillSib32.opcodes(vec![0x8b]));
+    e.enc_both(regfill.bind(B1), rec_regfill32.opcodes(vec![0x8b]));
+    for &ty in &[I8, I16] {
+        e.enc_both(fill.bind(ty), rec_fillSib32.opcodes(vec![0x8b]));
+        e.enc_both(regfill.bind(ty), rec_regfill32.opcodes(vec![0x8b]));
+    }
+
+    // Push and Pop.
+    e.enc32(x86_push.bind(I32), rec_pushq.opcodes(vec![0x50]));
+    e.enc_x86_64(x86_push.bind(I64), rec_pushq.opcodes(vec![0x50]));
+
+    e.enc32(x86_pop.bind(I32), rec_popq.opcodes(vec![0x58]));
+    e.enc_x86_64(x86_pop.bind(I64), rec_popq.opcodes(vec![0x58]));
+
+    // Copy Special
+    // For x86-64, only define REX forms for now, since we can't describe the
+    // special regunit immediate operands with the current constraint language.
+    e.enc64(copy_special, rec_copysp.opcodes(vec![0x89]).rex().w());
+    e.enc32(copy_special, rec_copysp.opcodes(vec![0x89]));
+
+    // Stack-slot-to-the-same-stack-slot copy, which is guaranteed to turn
+    // into a no-op.
+    // The same encoding is generated for both the 64- and 32-bit architectures.
+    for &ty in &[I64, I32, I16, I8] {
+        e.enc64_rec(copy_nop.bind(ty), rec_stacknull, 0);
+        e.enc32_rec(copy_nop.bind(ty), rec_stacknull, 0);
+    }
+    for &ty in &[F64, F32] {
+        e.enc64_rec(copy_nop.bind(ty), rec_stacknull, 0);
+        e.enc32_rec(copy_nop.bind(ty), rec_stacknull, 0);
+    }
+
+    // Adjust SP down by a dynamic value (or up, with a negative operand).
+    e.enc32(adjust_sp_down.bind(I32), rec_adjustsp.opcodes(vec![0x29]));
+    e.enc64(
+        adjust_sp_down.bind(I64),
+        rec_adjustsp.opcodes(vec![0x29]).rex().w(),
+    );
+
+    // Adjust SP up by an immediate (or down, with a negative immediate).
+    e.enc32(adjust_sp_up_imm, rec_adjustsp_ib.opcodes(vec![0x83]));
+    e.enc32(adjust_sp_up_imm, rec_adjustsp_id.opcodes(vec![0x81]));
+    e.enc64(
+        adjust_sp_up_imm,
+        rec_adjustsp_ib.opcodes(vec![0x83]).rex().w(),
+    );
+    e.enc64(
+        adjust_sp_up_imm,
+        rec_adjustsp_id.opcodes(vec![0x81]).rex().w(),
+    );
+
+    // Adjust SP down by an immediate (or up, with a negative immediate).
+    e.enc32(
+        adjust_sp_down_imm,
+        rec_adjustsp_ib.opcodes(vec![0x83]).rrr(5),
+    );
+    e.enc32(
+        adjust_sp_down_imm,
+        rec_adjustsp_id.opcodes(vec![0x81]).rrr(5),
+    );
+    e.enc64(
+        adjust_sp_down_imm,
+        rec_adjustsp_ib.opcodes(vec![0x83]).rrr(5).rex().w(),
+    );
+    e.enc64(
+        adjust_sp_down_imm,
+        rec_adjustsp_id.opcodes(vec![0x81]).rrr(5).rex().w(),
+    );
+
+    // Float loads and stores.
+    e.enc_both(
+        load.bind(F32).bind_any(),
+        rec_fld.opcodes(vec![0xf3, 0x0f, 0x10]),
+    );
+    e.enc_both(
+        load.bind(F32).bind_any(),
+        rec_fldDisp8.opcodes(vec![0xf3, 0x0f, 0x10]),
+    );
+    e.enc_both(
+        load.bind(F32).bind_any(),
+        rec_fldDisp32.opcodes(vec![0xf3, 0x0f, 0x10]),
+    );
+
+    e.enc_both(
+        load_complex.bind(F32),
+        rec_fldWithIndex.opcodes(vec![0xf3, 0x0f, 0x10]),
+    );
+    e.enc_both(
+        load_complex.bind(F32),
+        rec_fldWithIndexDisp8.opcodes(vec![0xf3, 0x0f, 0x10]),
+    );
+    e.enc_both(
+        load_complex.bind(F32),
+        rec_fldWithIndexDisp32.opcodes(vec![0xf3, 0x0f, 0x10]),
+    );
+
+    e.enc_both(
+        load.bind(F64).bind_any(),
+        rec_fld.opcodes(vec![0xf2, 0x0f, 0x10]),
+    );
+    e.enc_both(
+        load.bind(F64).bind_any(),
+        rec_fldDisp8.opcodes(vec![0xf2, 0x0f, 0x10]),
+    );
+    e.enc_both(
+        load.bind(F64).bind_any(),
+        rec_fldDisp32.opcodes(vec![0xf2, 0x0f, 0x10]),
+    );
+
+    e.enc_both(
+        load_complex.bind(F64),
+        rec_fldWithIndex.opcodes(vec![0xf2, 0x0f, 0x10]),
+    );
+    e.enc_both(
+        load_complex.bind(F64),
+        rec_fldWithIndexDisp8.opcodes(vec![0xf2, 0x0f, 0x10]),
+    );
+    e.enc_both(
+        load_complex.bind(F64),
+        rec_fldWithIndexDisp32.opcodes(vec![0xf2, 0x0f, 0x10]),
+    );
+
+    e.enc_both(
+        store.bind(F32).bind_any(),
+        rec_fst.opcodes(vec![0xf3, 0x0f, 0x11]),
+    );
+    e.enc_both(
+        store.bind(F32).bind_any(),
+        rec_fstDisp8.opcodes(vec![0xf3, 0x0f, 0x11]),
+    );
+    e.enc_both(
+        store.bind(F32).bind_any(),
+        rec_fstDisp32.opcodes(vec![0xf3, 0x0f, 0x11]),
+    );
+
+    e.enc_both(
+        store_complex.bind(F32),
+        rec_fstWithIndex.opcodes(vec![0xf3, 0x0f, 0x11]),
+    );
+    e.enc_both(
+        store_complex.bind(F32),
+        rec_fstWithIndexDisp8.opcodes(vec![0xf3, 0x0f, 0x11]),
+    );
+    e.enc_both(
+        store_complex.bind(F32),
+        rec_fstWithIndexDisp32.opcodes(vec![0xf3, 0x0f, 0x11]),
+    );
+
+    e.enc_both(
+        store.bind(F64).bind_any(),
+        rec_fst.opcodes(vec![0xf2, 0x0f, 0x11]),
+    );
+    e.enc_both(
+        store.bind(F64).bind_any(),
+        rec_fstDisp8.opcodes(vec![0xf2, 0x0f, 0x11]),
+    );
+    e.enc_both(
+        store.bind(F64).bind_any(),
+        rec_fstDisp32.opcodes(vec![0xf2, 0x0f, 0x11]),
+    );
+
+    e.enc_both(
+        store_complex.bind(F64),
+        rec_fstWithIndex.opcodes(vec![0xf2, 0x0f, 0x11]),
+    );
+    e.enc_both(
+        store_complex.bind(F64),
+        rec_fstWithIndexDisp8.opcodes(vec![0xf2, 0x0f, 0x11]),
+    );
+    e.enc_both(
+        store_complex.bind(F64),
+        rec_fstWithIndexDisp32.opcodes(vec![0xf2, 0x0f, 0x11]),
+    );
+
+    e.enc_both(
+        fill.bind(F32),
+        rec_ffillSib32.opcodes(vec![0xf3, 0x0f, 0x10]),
+    );
+    e.enc_both(
+        regfill.bind(F32),
+        rec_fregfill32.opcodes(vec![0xf3, 0x0f, 0x10]),
+    );
+    e.enc_both(
+        fill.bind(F64),
+        rec_ffillSib32.opcodes(vec![0xf2, 0x0f, 0x10]),
+    );
+    e.enc_both(
+        regfill.bind(F64),
+        rec_fregfill32.opcodes(vec![0xf2, 0x0f, 0x10]),
+    );
+
+    e.enc_both(
+        spill.bind(F32),
+        rec_fspillSib32.opcodes(vec![0xf3, 0x0f, 0x11]),
+    );
+    e.enc_both(
+        regspill.bind(F32),
+        rec_fregspill32.opcodes(vec![0xf3, 0x0f, 0x11]),
+    );
+    e.enc_both(
+        spill.bind(F64),
+        rec_fspillSib32.opcodes(vec![0xf2, 0x0f, 0x11]),
+    );
+    e.enc_both(
+        regspill.bind(F64),
+        rec_fregspill32.opcodes(vec![0xf2, 0x0f, 0x11]),
+    );
+
+    // Function addresses.
+
+    // Non-PIC, all-ones funcaddresses.
+    e.enc32_isap(
+        func_addr.bind(I32),
+        rec_fnaddr4.opcodes(vec![0xb8]),
+        not_all_ones_funcaddrs_and_not_is_pic,
+    );
+    e.enc64_isap(
+        func_addr.bind(I64),
+        rec_fnaddr8.opcodes(vec![0xb8]).rex().w(),
+        not_all_ones_funcaddrs_and_not_is_pic,
+    );
+
+    // Non-PIC, all-zeros funcaddresses.
+    e.enc32_isap(
+        func_addr.bind(I32),
+        rec_allones_fnaddr4.opcodes(vec![0xb8]),
+        all_ones_funcaddrs_and_not_is_pic,
+    );
+    e.enc64_isap(
+        func_addr.bind(I64),
+        rec_allones_fnaddr8.opcodes(vec![0xb8]).rex().w(),
+        all_ones_funcaddrs_and_not_is_pic,
+    );
+
+    // 64-bit, colocated, both PIC and non-PIC. Use the lea instruction's pc-relative field.
+    let f_func_addr = formats.get(formats.by_name("FuncAddr"));
+    let is_colocated_func = InstructionPredicate::new_is_colocated_func(f_func_addr, "func_ref");
+    e.enc64_instp(
+        func_addr.bind(I64),
+        rec_pcrel_fnaddr8.opcodes(vec![0x8d]).rex().w(),
+        is_colocated_func,
+    );
+
+    // 64-bit, non-colocated, PIC.
+    e.enc64_isap(
+        func_addr.bind(I64),
+        rec_got_fnaddr8.opcodes(vec![0x8b]).rex().w(),
+        is_pic,
+    );
+
+    // Global addresses.
+
+    // Non-PIC.
+    e.enc32_isap(
+        symbol_value.bind(I32),
+        rec_gvaddr4.opcodes(vec![0xb8]),
+        not_is_pic,
+    );
+    e.enc64_isap(
+        symbol_value.bind(I64),
+        rec_gvaddr8.opcodes(vec![0xb8]).rex().w(),
+        not_is_pic,
+    );
+
+    // PIC, colocated.
+    e.enc64_func(
+        symbol_value.bind(I64),
+        rec_pcrel_gvaddr8.opcodes(vec![0x8d]).rex().w(),
+        |encoding| {
+            encoding
+                .isa_predicate(is_pic)
+                .inst_predicate(InstructionPredicate::new_is_colocated_data(formats))
+        },
+    );
+
+    // PIC, non-colocated.
+    e.enc64_isap(
+        symbol_value.bind(I64),
+        rec_got_gvaddr8.opcodes(vec![0x8b]).rex().w(),
+        is_pic,
+    );
+
+    // Stack addresses.
+    //
+    // TODO: Add encoding rules for stack_load and stack_store, so that they
+    // don't get legalized to stack_addr + load/store.
+    e.enc32(stack_addr.bind(I32), rec_spaddr4_id.opcodes(vec![0x8d]));
+    e.enc64(
+        stack_addr.bind(I64),
+        rec_spaddr8_id.opcodes(vec![0x8d]).rex().w(),
+    );
+
+    // Call/return
+
+    // 32-bit, both PIC and non-PIC.
+    e.enc32(call, rec_call_id.opcodes(vec![0xe8]));
+
+    // 64-bit, colocated, both PIC and non-PIC. Use the call instruction's pc-relative field.
+    let f_call = formats.get(formats.by_name("Call"));
+    let is_colocated_func = InstructionPredicate::new_is_colocated_func(f_call, "func_ref");
+    e.enc64_instp(call, rec_call_id.opcodes(vec![0xe8]), is_colocated_func);
+
+    // 64-bit, non-colocated, PIC. There is no 64-bit non-colocated non-PIC version, since non-PIC
+    // is currently using the large model, which requires calls be lowered to
+    // func_addr+call_indirect.
+    e.enc64_isap(call, rec_call_plt_id.opcodes(vec![0xe8]), is_pic);
+
+    e.enc32(
+        call_indirect.bind(I32),
+        rec_call_r.opcodes(vec![0xff]).rrr(2),
+    );
+    e.enc64(
+        call_indirect.bind(I64),
+        rec_call_r.opcodes(vec![0xff]).rrr(2).rex(),
+    );
+    e.enc64(
+        call_indirect.bind(I64),
+        rec_call_r.opcodes(vec![0xff]).rrr(2),
+    );
+
+    e.enc32(return_, rec_ret.opcodes(vec![0xc3]));
+    e.enc64(return_, rec_ret.opcodes(vec![0xc3]));
+
+    // Branches.
+    e.enc32(jump, rec_jmpb.opcodes(vec![0xeb]));
+    e.enc64(jump, rec_jmpb.opcodes(vec![0xeb]));
+    e.enc32(jump, rec_jmpd.opcodes(vec![0xe9]));
+    e.enc64(jump, rec_jmpd.opcodes(vec![0xe9]));
+
+    e.enc_both(brif, rec_brib.opcodes(vec![0x70]));
+    e.enc_both(brif, rec_brid.opcodes(vec![0x0f, 0x80]));
+
+    // Not all float condition codes are legal, see `supported_floatccs`.
+    e.enc_both(brff, rec_brfb.opcodes(vec![0x70]));
+    e.enc_both(brff, rec_brfd.opcodes(vec![0x0f, 0x80]));
+
+    // Note that the tjccd opcode will be prefixed with 0x0f.
+    e.enc_i32_i64(brz, rec_tjccb.opcodes(vec![0x74]));
+    e.enc_i32_i64(brz, rec_tjccd.opcodes(vec![0x84]));
+    e.enc_i32_i64(brnz, rec_tjccb.opcodes(vec![0x75]));
+    e.enc_i32_i64(brnz, rec_tjccd.opcodes(vec![0x85]));
+
+    // Branch on a b1 value in a register only looks at the low 8 bits. See also
+    // bint encodings below.
+    //
+    // Start with the worst-case encoding for X86_32 only. The register allocator
+    // can't handle a branch with an ABCD-constrained operand.
+    e.enc32(brz.bind(B1), rec_t8jccd_long.opcodes(vec![0x84]));
+    e.enc32(brnz.bind(B1), rec_t8jccd_long.opcodes(vec![0x85]));
+
+    e.enc_both(brz.bind(B1), rec_t8jccb_abcd.opcodes(vec![0x74]));
+    e.enc_both(brz.bind(B1), rec_t8jccd_abcd.opcodes(vec![0x84]));
+    e.enc_both(brnz.bind(B1), rec_t8jccb_abcd.opcodes(vec![0x75]));
+    e.enc_both(brnz.bind(B1), rec_t8jccd_abcd.opcodes(vec![0x85]));
+
+    // Jump tables.
+    e.enc64(
+        jump_table_entry.bind(I64).bind_any().bind_any(),
+        rec_jt_entry.opcodes(vec![0x63]).rex().w(),
+    );
+    e.enc32(
+        jump_table_entry.bind(I32).bind_any().bind_any(),
+        rec_jt_entry.opcodes(vec![0x8b]),
+    );
+
+    e.enc64(
+        jump_table_base.bind(I64),
+        rec_jt_base.opcodes(vec![0x8d]).rex().w(),
+    );
+    e.enc32(jump_table_base.bind(I32), rec_jt_base.opcodes(vec![0x8d]));
+
+    e.enc_x86_64(
+        indirect_jump_table_br.bind(I64),
+        rec_indirect_jmp.opcodes(vec![0xff]).rrr(4),
+    );
+    e.enc32(
+        indirect_jump_table_br.bind(I32),
+        rec_indirect_jmp.opcodes(vec![0xff]).rrr(4),
+    );
+
+    // Trap as ud2
+    e.enc32(trap, rec_trap.opcodes(vec![0x0f, 0x0b]));
+    e.enc64(trap, rec_trap.opcodes(vec![0x0f, 0x0b]));
+
+    // Debug trap as int3
+    e.enc32_rec(debugtrap, rec_debugtrap, 0);
+    e.enc64_rec(debugtrap, rec_debugtrap, 0);
+
+    e.enc32_rec(trapif, rec_trapif, 0);
+    e.enc64_rec(trapif, rec_trapif, 0);
+    e.enc32_rec(trapff, rec_trapff, 0);
+    e.enc64_rec(trapff, rec_trapff, 0);
+
+    // Comparisons
+    e.enc_i32_i64(icmp, rec_icscc.opcodes(vec![0x39]));
+    e.enc_i32_i64(icmp_imm, rec_icscc_ib.opcodes(vec![0x83]).rrr(7));
+    e.enc_i32_i64(icmp_imm, rec_icscc_id.opcodes(vec![0x81]).rrr(7));
+    e.enc_i32_i64(ifcmp, rec_rcmp.opcodes(vec![0x39]));
+    e.enc_i32_i64(ifcmp_imm, rec_rcmp_ib.opcodes(vec![0x83]).rrr(7));
+    e.enc_i32_i64(ifcmp_imm, rec_rcmp_id.opcodes(vec![0x81]).rrr(7));
+    // TODO: We could special-case ifcmp_imm(x, 0) to TEST(x, x).
+
+    e.enc32(ifcmp_sp.bind(I32), rec_rcmp_sp.opcodes(vec![0x39]));
+    e.enc64(
+        ifcmp_sp.bind(I64),
+        rec_rcmp_sp.opcodes(vec![0x39]).rex().w(),
+    );
+
+    // Convert flags to bool.
+    // This encodes `b1` as an 8-bit low register with the value 0 or 1.
+    e.enc_both(trueif, rec_seti_abcd.opcodes(vec![0x0f, 0x90]));
+    e.enc_both(trueff, rec_setf_abcd.opcodes(vec![0x0f, 0x90]));
+
+    // Conditional move (a.k.a integer select).
+    e.enc_i32_i64(selectif, rec_cmov.opcodes(vec![0x0f, 0x40]));
+
+    // Bit scan forwards and reverse
+    e.enc_i32_i64(x86_bsf, rec_bsf_and_bsr.opcodes(vec![0x0f, 0xbc]));
+    e.enc_i32_i64(x86_bsr, rec_bsf_and_bsr.opcodes(vec![0x0f, 0xbd]));
+
+    // Convert bool to int.
+    //
+    // This assumes that b1 is represented as an 8-bit low register with the value 0
+    // or 1.
+    //
+    // Encode movzbq as movzbl, because it's equivalent and shorter.
+    e.enc32(
+        bint.bind(I32).bind(B1),
+        rec_urm_noflags_abcd.opcodes(vec![0x0f, 0xb6]),
+    );
+
+    e.enc64(
+        bint.bind(I64).bind(B1),
+        rec_urm_noflags.opcodes(vec![0x0f, 0xb6]).rex(),
+    );
+    e.enc64(
+        bint.bind(I64).bind(B1),
+        rec_urm_noflags_abcd.opcodes(vec![0x0f, 0xb6]),
+    );
+    e.enc64(
+        bint.bind(I32).bind(B1),
+        rec_urm_noflags.opcodes(vec![0x0f, 0xb6]).rex(),
+    );
+    e.enc64(
+        bint.bind(I32).bind(B1),
+        rec_urm_noflags_abcd.opcodes(vec![0x0f, 0xb6]),
+    );
+
+    // Numerical conversions.
+
+    // Reducing an integer is a no-op.
+    e.enc32_rec(ireduce.bind(I8).bind(I16), rec_null, 0);
+    e.enc32_rec(ireduce.bind(I8).bind(I32), rec_null, 0);
+    e.enc32_rec(ireduce.bind(I16).bind(I32), rec_null, 0);
+
+    e.enc64_rec(ireduce.bind(I8).bind(I16), rec_null, 0);
+    e.enc64_rec(ireduce.bind(I8).bind(I32), rec_null, 0);
+    e.enc64_rec(ireduce.bind(I16).bind(I32), rec_null, 0);
+    e.enc64_rec(ireduce.bind(I8).bind(I64), rec_null, 0);
+    e.enc64_rec(ireduce.bind(I16).bind(I64), rec_null, 0);
+    e.enc64_rec(ireduce.bind(I32).bind(I64), rec_null, 0);
+
+    // TODO: Add encodings for cbw, cwde, cdqe, which are sign-extending
+    // instructions for %al/%ax/%eax to %ax/%eax/%rax.
+
+    // movsbl
+    e.enc32(
+        sextend.bind(I32).bind(I8),
+        rec_urm_noflags_abcd.opcodes(vec![0x0f, 0xbe]),
+    );
+    e.enc64(
+        sextend.bind(I32).bind(I8),
+        rec_urm_noflags.opcodes(vec![0x0f, 0xbe]).rex(),
+    );
+    e.enc64(
+        sextend.bind(I32).bind(I8),
+        rec_urm_noflags_abcd.opcodes(vec![0x0f, 0xbe]),
+    );
+
+    // movswl
+    e.enc32(
+        sextend.bind(I32).bind(I16),
+        rec_urm_noflags.opcodes(vec![0x0f, 0xbf]),
+    );
+    e.enc64(
+        sextend.bind(I32).bind(I16),
+        rec_urm_noflags.opcodes(vec![0x0f, 0xbf]).rex(),
+    );
+    e.enc64(
+        sextend.bind(I32).bind(I16),
+        rec_urm_noflags.opcodes(vec![0x0f, 0xbf]),
+    );
+
+    // movsbq
+    e.enc64(
+        sextend.bind(I64).bind(I8),
+        rec_urm_noflags.opcodes(vec![0x0f, 0xbe]).rex().w(),
+    );
+
+    // movswq
+    e.enc64(
+        sextend.bind(I64).bind(I16),
+        rec_urm_noflags.opcodes(vec![0x0f, 0xbf]).rex().w(),
+    );
+
+    // movslq
+    e.enc64(
+        sextend.bind(I64).bind(I32),
+        rec_urm_noflags.opcodes(vec![0x63]).rex().w(),
+    );
+
+    // movzbl
+    e.enc32(
+        uextend.bind(I32).bind(I8),
+        rec_urm_noflags_abcd.opcodes(vec![0x0f, 0xb6]),
+    );
+    e.enc64(
+        uextend.bind(I32).bind(I8),
+        rec_urm_noflags.opcodes(vec![0x0f, 0xb6]).rex(),
+    );
+    e.enc64(
+        uextend.bind(I32).bind(I8),
+        rec_urm_noflags_abcd.opcodes(vec![0x0f, 0xb6]),
+    );
+
+    // movzwl
+    e.enc32(
+        uextend.bind(I32).bind(I16),
+        rec_urm_noflags.opcodes(vec![0x0f, 0xb7]),
+    );
+    e.enc64(
+        uextend.bind(I32).bind(I16),
+        rec_urm_noflags.opcodes(vec![0x0f, 0xb7]).rex(),
+    );
+    e.enc64(
+        uextend.bind(I32).bind(I16),
+        rec_urm_noflags.opcodes(vec![0x0f, 0xb7]),
+    );
+
+    // movzbq, encoded as movzbl because it's equivalent and shorter.
+    e.enc64(
+        uextend.bind(I64).bind(I8),
+        rec_urm_noflags.opcodes(vec![0x0f, 0xb6]).rex(),
+    );
+    e.enc64(
+        uextend.bind(I64).bind(I8),
+        rec_urm_noflags_abcd.opcodes(vec![0x0f, 0xb6]),
+    );
+
+    // movzwq, encoded as movzwl because it's equivalent and shorter
+    e.enc64(
+        uextend.bind(I64).bind(I16),
+        rec_urm_noflags.opcodes(vec![0x0f, 0xb7]).rex(),
+    );
+    e.enc64(
+        uextend.bind(I64).bind(I16),
+        rec_urm_noflags.opcodes(vec![0x0f, 0xb7]),
+    );
+
+    // A 32-bit register copy clears the high 32 bits.
+    e.enc64(
+        uextend.bind(I64).bind(I32),
+        rec_umr.opcodes(vec![0x89]).rex(),
+    );
+    e.enc64(uextend.bind(I64).bind(I32), rec_umr.opcodes(vec![0x89]));
+
+    // Floating point
+
+    // Floating-point constants equal to 0.0 can be encoded using either `xorps` or `xorpd`, for
+    // 32-bit and 64-bit floats respectively.
+    let f_unary_ieee32 = formats.get(formats.by_name("UnaryIeee32"));
+    let is_zero_32_bit_float = InstructionPredicate::new_is_zero_32bit_float(f_unary_ieee32, "imm");
+    e.enc32_instp(
+        f32const,
+        rec_f32imm_z.opcodes(vec![0x0f, 0x57]),
+        is_zero_32_bit_float.clone(),
+    );
+
+    let f_unary_ieee64 = formats.get(formats.by_name("UnaryIeee64"));
+    let is_zero_64_bit_float = InstructionPredicate::new_is_zero_64bit_float(f_unary_ieee64, "imm");
+    e.enc32_instp(
+        f64const,
+        rec_f64imm_z.opcodes(vec![0x66, 0x0f, 0x57]),
+        is_zero_64_bit_float.clone(),
+    );
+
+    e.enc_x86_64_instp(
+        f32const,
+        rec_f32imm_z.opcodes(vec![0x0f, 0x57]),
+        is_zero_32_bit_float,
+    );
+    e.enc_x86_64_instp(
+        f64const,
+        rec_f64imm_z.opcodes(vec![0x66, 0x0f, 0x57]),
+        is_zero_64_bit_float,
+    );
+
+    // movd
+    e.enc_both(
+        bitcast.bind(F32).bind(I32),
+        rec_frurm.opcodes(vec![0x66, 0x0f, 0x6e]),
+    );
+    e.enc_both(
+        bitcast.bind(I32).bind(F32),
+        rec_rfumr.opcodes(vec![0x66, 0x0f, 0x7e]),
+    );
+
+    // movq
+    e.enc64(
+        bitcast.bind(F64).bind(I64),
+        rec_frurm.opcodes(vec![0x66, 0x0f, 0x6e]).rex().w(),
+    );
+    e.enc64(
+        bitcast.bind(I64).bind(F64),
+        rec_rfumr.opcodes(vec![0x66, 0x0f, 0x7e]).rex().w(),
+    );
+
+    // movaps
+    e.enc_both(copy.bind(F32), rec_furm.opcodes(vec![0x0f, 0x28]));
+    e.enc_both(copy.bind(F64), rec_furm.opcodes(vec![0x0f, 0x28]));
+
+    // TODO For x86-64, only define REX forms for now, since we can't describe the special regunit
+    // immediate operands with the current constraint language.
+    e.enc32(regmove.bind(F32), rec_frmov.opcodes(vec![0x0f, 0x28]));
+    e.enc64(regmove.bind(F32), rec_frmov.opcodes(vec![0x0f, 0x28]).rex());
+
+    // TODO For x86-64, only define REX forms for now, since we can't describe the special regunit
+    // immediate operands with the current constraint language.
+    e.enc32(regmove.bind(F64), rec_frmov.opcodes(vec![0x0f, 0x28]));
+    e.enc64(regmove.bind(F64), rec_frmov.opcodes(vec![0x0f, 0x28]).rex());
+
+    // cvtsi2ss
+    e.enc_i32_i64(
+        fcvt_from_sint.bind(F32),
+        rec_frurm.opcodes(vec![0xf3, 0x0f, 0x2a]),
+    );
+
+    // cvtsi2sd
+    e.enc_i32_i64(
+        fcvt_from_sint.bind(F64),
+        rec_frurm.opcodes(vec![0xf2, 0x0f, 0x2a]),
+    );
+
+    // cvtss2sd
+    e.enc_both(
+        fpromote.bind(F64).bind(F32),
+        rec_furm.opcodes(vec![0xf3, 0x0f, 0x5a]),
+    );
+
+    // cvtsd2ss
+    e.enc_both(
+        fdemote.bind(F32).bind(F64),
+        rec_furm.opcodes(vec![0xf2, 0x0f, 0x5a]),
+    );
+
+    // cvttss2si
+    e.enc_both(
+        x86_cvtt2si.bind(I32).bind(F32),
+        rec_rfurm.opcodes(vec![0xf3, 0x0f, 0x2c]),
+    );
+    e.enc64(
+        x86_cvtt2si.bind(I64).bind(F32),
+        rec_rfurm.opcodes(vec![0xf3, 0x0f, 0x2c]).rex().w(),
+    );
+
+    // cvttsd2si
+    e.enc_both(
+        x86_cvtt2si.bind(I32).bind(F64),
+        rec_rfurm.opcodes(vec![0xf2, 0x0f, 0x2c]),
+    );
+    e.enc64(
+        x86_cvtt2si.bind(I64).bind(F64),
+        rec_rfurm.opcodes(vec![0xf2, 0x0f, 0x2c]).rex().w(),
+    );
+
+    // Exact square roots.
+    e.enc_both(sqrt.bind(F32), rec_furm.opcodes(vec![0xf3, 0x0f, 0x51]));
+    e.enc_both(sqrt.bind(F64), rec_furm.opcodes(vec![0xf2, 0x0f, 0x51]));
+
+    // Rounding. The recipe looks at the opcode to pick an immediate.
+    for inst in &[nearest, floor, ceil, trunc] {
+        e.enc_both_isap(
+            inst.bind(F32),
+            rec_furmi_rnd.opcodes(vec![0x66, 0x0f, 0x3a, 0x0a]),
+            use_sse41,
+        );
+        e.enc_both_isap(
+            inst.bind(F64),
+            rec_furmi_rnd.opcodes(vec![0x66, 0x0f, 0x3a, 0x0b]),
+            use_sse41,
+        );
+    }
+
+    // Binary arithmetic ops.
+    for &(inst, opc) in &[
+        (fadd, 0x58),
+        (fsub, 0x5c),
+        (fmul, 0x59),
+        (fdiv, 0x5e),
+        (x86_fmin, 0x5d),
+        (x86_fmax, 0x5f),
+    ] {
+        e.enc_both(inst.bind(F32), rec_fa.opcodes(vec![0xf3, 0x0f, opc]));
+        e.enc_both(inst.bind(F64), rec_fa.opcodes(vec![0xf2, 0x0f, opc]));
+    }
+
+    // Binary bitwise ops.
+    for &(inst, opc) in &[(band, 0x54), (bor, 0x56), (bxor, 0x57)] {
+        e.enc_both(inst.bind(F32), rec_fa.opcodes(vec![0x0f, opc]));
+        e.enc_both(inst.bind(F64), rec_fa.opcodes(vec![0x0f, opc]));
+    }
+
+    // The `andnps(x,y)` instruction computes `~x&y`, while band_not(x,y)` is `x&~y.
+    e.enc_both(band_not.bind(F32), rec_fax.opcodes(vec![0x0f, 0x55]));
+    e.enc_both(band_not.bind(F64), rec_fax.opcodes(vec![0x0f, 0x55]));
+
+    // Comparisons.
+    //
+    // This only covers the condition codes in `supported_floatccs`, the rest are
+    // handled by legalization patterns.
+    e.enc_both(fcmp.bind(F32), rec_fcscc.opcodes(vec![0x0f, 0x2e]));
+    e.enc_both(fcmp.bind(F64), rec_fcscc.opcodes(vec![0x66, 0x0f, 0x2e]));
+    e.enc_both(ffcmp.bind(F32), rec_fcmp.opcodes(vec![0x0f, 0x2e]));
+    e.enc_both(ffcmp.bind(F64), rec_fcmp.opcodes(vec![0x66, 0x0f, 0x2e]));
+
+    e
+}
diff --git a/cranelift/codegen/meta/src/isa/x86/mod.rs b/cranelift/codegen/meta/src/isa/x86/mod.rs
index 5407ec2e87..a8002890f7 100644
--- a/cranelift/codegen/meta/src/isa/x86/mod.rs
+++ b/cranelift/codegen/meta/src/isa/x86/mod.rs
@@ -1,15 +1,15 @@
 use crate::cdsl::cpu_modes::CpuMode;
-use crate::cdsl::instructions::InstructionPredicateMap;
 use crate::cdsl::isa::TargetIsa;
-use crate::cdsl::recipes::Recipes;
 
 use crate::shared::types::Bool::B1;
 use crate::shared::types::Float::{F32, F64};
 use crate::shared::types::Int::{I16, I32, I64, I8};
 use crate::shared::Definitions as SharedDefinitions;
 
+mod encodings;
 mod instructions;
 mod legalize;
+mod recipes;
 mod registers;
 mod settings;
 
@@ -51,12 +51,17 @@ pub fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
     x86_64.legalize_type(F32, x86_expand);
     x86_64.legalize_type(F64, x86_expand);
 
+    let recipes = recipes::define(shared_defs, &settings, &regs);
+
+    let encodings = encodings::define(shared_defs, &settings, &inst_group, &recipes);
+    x86_32.set_encodings(encodings.enc32);
+    x86_64.set_encodings(encodings.enc64);
+    let encodings_predicates = encodings.inst_pred_reg.extract();
+
+    let recipes = encodings.recipes;
+
     let cpu_modes = vec![x86_64, x86_32];
 
-    let recipes = Recipes::new();
-
-    let encodings_predicates = InstructionPredicateMap::new();
-
     TargetIsa::new(
         "x86",
         inst_group,
diff --git a/cranelift/codegen/meta/src/isa/x86/recipes.rs b/cranelift/codegen/meta/src/isa/x86/recipes.rs
new file mode 100644
index 0000000000..5c1d28ef20
--- /dev/null
+++ b/cranelift/codegen/meta/src/isa/x86/recipes.rs
@@ -0,0 +1,2805 @@
+use std::rc::Rc;
+
+use crate::cdsl::ast::Literal;
+use crate::cdsl::formats::{FormatRegistry, InstructionFormat};
+use crate::cdsl::instructions::InstructionPredicate;
+use crate::cdsl::recipes::{
+    EncodingRecipe, EncodingRecipeBuilder, OperandConstraint, Register, Stack,
+};
+use crate::cdsl::regs::IsaRegs;
+use crate::cdsl::settings::SettingGroup;
+use crate::shared::Definitions as SharedDefinitions;
+
+/// Helper data structure to create recipes and template recipes.
+/// It contains all the recipes and recipe templates that might be used in the encodings crate of
+/// this same directory.
+pub struct RecipeGroup<'builder> {
+    /// Memoized format pointer, to pass it to builders later.
+    formats: &'builder FormatRegistry,
+
+    /// Memoized registers description, to pass it to builders later.
+    regs: &'builder IsaRegs,
+
+    /// All the recipes explicitly created in this file. This is different from the final set of
+    /// recipes, which is definitive only once encodings have generated new recipes on the fly.
+    recipes: Vec<EncodingRecipe>,
+
+    /// All the recipe templates created in this file.
+    templates: Vec<Rc<Template<'builder>>>,
+}
+
+impl<'builder> RecipeGroup<'builder> {
+    fn new(formats: &'builder FormatRegistry, regs: &'builder IsaRegs) -> Self {
+        Self {
+            formats,
+            regs,
+            recipes: Vec::new(),
+            templates: Vec::new(),
+        }
+    }
+    fn add_recipe(&mut self, recipe: EncodingRecipeBuilder) {
+        self.recipes.push(recipe.build(self.formats));
+    }
+    fn add_template_recipe(&mut self, recipe: EncodingRecipeBuilder) -> Rc<Template<'builder>> {
+        let template = Rc::new(Template::new(recipe, self.formats, self.regs));
+        self.templates.push(template.clone());
+        template
+    }
+    fn add_template(&mut self, template: Template<'builder>) -> Rc<Template<'builder>> {
+        let template = Rc::new(template);
+        self.templates.push(template.clone());
+        template
+    }
+    pub fn recipe(&self, name: &str) -> &EncodingRecipe {
+        self.recipes
+            .iter()
+            .find(|recipe| recipe.name == name)
+            .expect(&format!("unknown recipe name: {}. Try template?", name))
+    }
+    pub fn template(&self, name: &str) -> &Template {
+        self.templates
+            .iter()
+            .find(|recipe| recipe.name() == name)
+            .expect(&format!("unknown tail recipe name: {}. Try recipe?", name))
+    }
+}
+
+// Opcode representation.
+//
+// Cranelift requires each recipe to have a single encoding size in bytes, and x86 opcodes are
+// variable length, so we use separate recipes for different styles of opcodes and prefixes. The
+// opcode format is indicated by the recipe name prefix.
+//
+// The match case below does not include the REX prefix which goes after the mandatory prefix.
+// VEX/XOP and EVEX prefixes are not yet supported. Encodings using any of these prefixes are
+// represented by separate recipes.
+//
+// The encoding bits are:
+//
+// 0-7:   The opcode byte <op>.
+// 8-9:   pp, mandatory prefix:
+//        00 none (Op*)
+//        01 66   (Mp*)
+//        10 F3   (Mp*)
+//        11 F2   (Mp*)
+// 10-11: mm, opcode map:
+//        00 <op>        (Op1/Mp1)
+//        01 0F <op>     (Op2/Mp2)
+//        10 0F 38 <op>  (Op3/Mp3)
+//        11 0F 3A <op>  (Op3/Mp3)
+// 12-14  rrr, opcode bits for the ModR/M byte for certain opcodes.
+// 15:    REX.W bit (or VEX.W/E)
+//
+// There is some redundancy between bits 8-11 and the recipe names, but we have enough bits, and
+// the pp+mm format is ready for supporting VEX prefixes.
+//
+// TODO Cranelift doesn't actually require recipe to have different encoding sizes anymore, so this
+// could be simplified.
+
+/// Given a sequence of opcode bytes, compute the recipe name prefix and encoding bits.
+fn decode_opcodes(op_bytes: &[u8], rrr: u16, w: u16) -> (&'static str, u16) {
+    assert!(op_bytes.len() >= 1, "at least one opcode byte");
+
+    let prefix_bytes = &op_bytes[..op_bytes.len() - 1];
+    let (name, mmpp) = match prefix_bytes {
+        [] => ("Op1", 0b000),
+        [0x66] => ("Mp1", 0b0001),
+        [0xf3] => ("Mp1", 0b0010),
+        [0xf2] => ("Mp1", 0b0011),
+        [0x0f] => ("Op2", 0b0100),
+        [0x66, 0x0f] => ("Mp2", 0b0101),
+        [0xf3, 0x0f] => ("Mp2", 0b0110),
+        [0xf2, 0x0f] => ("Mp2", 0b0111),
+        [0x0f, 0x38] => ("Op3", 0b1000),
+        [0x66, 0x0f, 0x38] => ("Mp3", 0b1001),
+        [0xf3, 0x0f, 0x38] => ("Mp3", 0b1010),
+        [0xf2, 0x0f, 0x38] => ("Mp3", 0b1011),
+        [0x0f, 0x3a] => ("Op3", 0b1100),
+        [0x66, 0x0f, 0x3a] => ("Mp3", 0b1101),
+        [0xf3, 0x0f, 0x3a] => ("Mp3", 0b1110),
+        [0xf2, 0x0f, 0x3a] => ("Mp3", 0b1111),
+        _ => {
+            panic!("unexpected opcode sequence: {:?}", op_bytes);
+        }
+    };
+
+    let opcode_byte = op_bytes[op_bytes.len() - 1] as u16;
+    (name, opcode_byte | (mmpp << 8) | (rrr << 12) | w << 15)
+}
+
+/// Given a snippet of Rust code (or None), replace the `PUT_OP` macro with the
+/// corresponding `put_*` function from the `binemit.rs` module.
+fn replace_put_op(code: Option<String>, prefix: &str) -> Option<String> {
+    code.map(|code| code.replace("{{PUT_OP}}", &format!("put_{}", prefix.to_lowercase())))
+}
+
+/// Replaces constraints to a REX-prefixed register class by the equivalent non-REX register class.
+fn replace_nonrex_constraints(
+    regs: &IsaRegs,
+    constraints: Vec<OperandConstraint>,
+) -> Vec<OperandConstraint> {
+    constraints
+        .into_iter()
+        .map(|constraint| match constraint {
+            OperandConstraint::RegClass(rc_index) => {
+                let new_rc_index = if rc_index == regs.class_by_name("GPR") {
+                    regs.class_by_name("GPR8")
+                } else if rc_index == regs.class_by_name("FPR") {
+                    regs.class_by_name("FPR8")
+                } else {
+                    rc_index
+                };
+                OperandConstraint::RegClass(new_rc_index)
+            }
+            _ => constraint,
+        })
+        .collect()
+}
+
+/// Previously called a TailRecipe in the Python meta language, this allows to create multiple
+/// variants of a single base EncodingRecipe (rex prefix, specialized w/rrr bits, different
+/// opcodes). It serves as a prototype of an EncodingRecipe, which is then used when actually creating
+/// Encodings, in encodings.rs. This is an idiosyncrasy of the x86 meta-language, and could be
+/// reconsidered later.
+#[derive(Clone)]
+pub struct Template<'builder> {
+    /// Mapping of format indexes to format data, used in the build() method.
+    formats: &'builder FormatRegistry,
+
+    /// Description of registers, used in the build() method.
+    regs: &'builder IsaRegs,
+
+    /// The recipe template, which is to be specialized (by copy).
+    recipe: EncodingRecipeBuilder,
+
+    /// Does this recipe requires a REX prefix?
+    requires_prefix: bool,
+
+    /// Other recipe to use when REX-prefixed.
+    when_prefixed: Option<Rc<Template<'builder>>>,
+
+    // Specialized parameters.
+    /// Should we include the REX prefix?
+    rex: bool,
+    /// Value of the W bit (0 or 1).
+    w_bit: u16,
+    /// Value of the RRR bits (between 0 and 0b111).
+    rrr_bits: u16,
+    /// Opcode bytes.
+    op_bytes: Vec<u8>,
+}
+
+impl<'builder> Template<'builder> {
+    fn new(
+        recipe: EncodingRecipeBuilder,
+        formats: &'builder FormatRegistry,
+        regs: &'builder IsaRegs,
+    ) -> Self {
+        Self {
+            formats,
+            regs,
+            recipe,
+            requires_prefix: false,
+            when_prefixed: None,
+            rex: false,
+            w_bit: 0,
+            rrr_bits: 0,
+            op_bytes: Vec::new(),
+        }
+    }
+
+    fn name(&self) -> &str {
+        &self.recipe.name
+    }
+    fn requires_prefix(self, value: bool) -> Self {
+        Self {
+            requires_prefix: value,
+            ..self
+        }
+    }
+    fn when_prefixed(self, template: Rc<Template<'builder>>) -> Self {
+        assert!(self.when_prefixed.is_none());
+        Self {
+            when_prefixed: Some(template),
+            ..self
+        }
+    }
+
+    // Copy setters.
+    pub fn opcodes(&self, op_bytes: Vec<u8>) -> Self {
+        assert!(!op_bytes.is_empty());
+        let mut copy = self.clone();
+        copy.op_bytes = op_bytes;
+        copy
+    }
+    pub fn w(&self) -> Self {
+        let mut copy = self.clone();
+        copy.w_bit = 1;
+        copy
+    }
+    pub fn rrr(&self, value: u16) -> Self {
+        assert!(value <= 0b111);
+        let mut copy = self.clone();
+        copy.rrr_bits = value;
+        copy
+    }
+    pub fn nonrex(&self) -> Self {
+        assert!(!self.requires_prefix, "Tail recipe requires REX prefix.");
+        let mut copy = self.clone();
+        copy.rex = false;
+        copy
+    }
+    pub fn rex(&self) -> Self {
+        if let Some(prefixed) = &self.when_prefixed {
+            let mut ret = prefixed.rex();
+            // Forward specialized parameters.
+            ret.op_bytes = self.op_bytes.clone();
+            ret.w_bit = self.w_bit;
+            ret.rrr_bits = self.rrr_bits;
+            return ret;
+        }
+        let mut copy = self.clone();
+        copy.rex = true;
+        copy
+    }
+
+    pub fn build(mut self) -> (EncodingRecipe, u16) {
+        let (name, bits) = decode_opcodes(&self.op_bytes, self.rrr_bits, self.w_bit);
+
+        let (name, rex_prefix_size) = if self.rex {
+            ("Rex".to_string() + name, 1)
+        } else {
+            (name.into(), 0)
+        };
+
+        let size_addendum = self.op_bytes.len() as u64 + rex_prefix_size;
+        self.recipe.base_size += size_addendum;
+
+        // Branch ranges are relative to the end of the instruction.
+        self.recipe
+            .branch_range
+            .as_mut()
+            .map(|range| range.inst_size += size_addendum);
+
+        self.recipe.emit = replace_put_op(self.recipe.emit, &name);
+        self.recipe.name = name + &self.recipe.name;
+
+        if !self.rex {
+            let operands_in = self.recipe.operands_in.unwrap_or(Vec::new());
+            self.recipe.operands_in = Some(replace_nonrex_constraints(self.regs, operands_in));
+            let operands_out = self.recipe.operands_out.unwrap_or(Vec::new());
+            self.recipe.operands_out = Some(replace_nonrex_constraints(self.regs, operands_out));
+        }
+
+        (self.recipe.build(self.formats), bits)
+    }
+}
+
+/// Returns a predicate checking that the "cond" field of the instruction contains one of the
+/// directly supported floating point condition codes.
+fn supported_floatccs_predicate(
+    supported_cc: &[Literal],
+    format: &InstructionFormat,
+) -> InstructionPredicate {
+    supported_cc
+        .iter()
+        .fold(InstructionPredicate::new(), |pred, literal| {
+            pred.or(InstructionPredicate::new_is_field_equal(
+                format,
+                "cond",
+                literal.to_rust_code(),
+            ))
+        })
+}
+
+/// Return an instruction predicate that checks if `iform.imm` is a valid `scale` for a SIB byte.
+fn valid_scale(format: &InstructionFormat) -> InstructionPredicate {
+    ["1", "2", "4", "8"]
+        .iter()
+        .fold(InstructionPredicate::new(), |pred, &literal| {
+            pred.or(InstructionPredicate::new_is_field_equal(
+                format,
+                "imm",
+                literal.into(),
+            ))
+        })
+}
+
+pub fn define<'shared>(
+    shared_defs: &'shared SharedDefinitions,
+    settings: &'shared SettingGroup,
+    regs: &'shared IsaRegs,
+) -> RecipeGroup<'shared> {
+    // The set of floating point condition codes that are directly supported.
+    // Other condition codes need to be reversed or expressed as two tests.
+    let floatcc = shared_defs.operand_kinds.by_name("floatcc");
+    let supported_floatccs: Vec<Literal> = ["ord", "uno", "one", "ueq", "gt", "ge", "ult", "ule"]
+        .iter()
+        .map(|name| Literal::enumerator_for(floatcc, name))
+        .collect();
+
+    let formats = &shared_defs.format_registry;
+
+    // Register classes shorthands.
+    let abcd = regs.class_by_name("ABCD");
+    let gpr = regs.class_by_name("GPR");
+    let fpr = regs.class_by_name("FPR");
+    let flag = regs.class_by_name("FLAG");
+
+    // Operand constraints shorthands.
+    let reg_rflags = Register::new(flag, regs.regunit_by_name(flag, "rflags"));
+    let reg_rax = Register::new(gpr, regs.regunit_by_name(gpr, "rax"));
+    let reg_rcx = Register::new(gpr, regs.regunit_by_name(gpr, "rcx"));
+    let reg_rdx = Register::new(gpr, regs.regunit_by_name(gpr, "rdx"));
+
+    // Stack operand with a 32-bit signed displacement from either RBP or RSP.
+    let stack_gpr32 = Stack::new(gpr);
+    let stack_fpr32 = Stack::new(fpr);
+
+    // Format shorthands, prefixed with f_.
+    let f_binary = formats.by_name("Binary");
+    let f_binary_imm = formats.by_name("BinaryImm");
+    let f_branch = formats.by_name("Branch");
+    let f_branch_float = formats.by_name("BranchFloat");
+    let f_branch_int = formats.by_name("BranchInt");
+    let f_branch_table_entry = formats.by_name("BranchTableEntry");
+    let f_branch_table_base = formats.by_name("BranchTableBase");
+    let f_call = formats.by_name("Call");
+    let f_call_indirect = formats.by_name("CallIndirect");
+    let f_copy_special = formats.by_name("CopySpecial");
+    let f_float_compare = formats.by_name("FloatCompare");
+    let f_float_cond = formats.by_name("FloatCond");
+    let f_float_cond_trap = formats.by_name("FloatCondTrap");
+    let f_func_addr = formats.by_name("FuncAddr");
+    let f_indirect_jump = formats.by_name("IndirectJump");
+    let f_int_compare = formats.by_name("IntCompare");
+    let f_int_compare_imm = formats.by_name("IntCompareImm");
+    let f_int_cond = formats.by_name("IntCond");
+    let f_int_cond_trap = formats.by_name("IntCondTrap");
+    let f_int_select = formats.by_name("IntSelect");
+    let f_jump = formats.by_name("Jump");
+    let f_load = formats.by_name("Load");
+    let f_load_complex = formats.by_name("LoadComplex");
+    let f_multiary = formats.by_name("MultiAry");
+    let f_nullary = formats.by_name("NullAry");
+    let f_reg_fill = formats.by_name("RegFill");
+    let f_reg_move = formats.by_name("RegMove");
+    let f_reg_spill = formats.by_name("RegSpill");
+    let f_stack_load = formats.by_name("StackLoad");
+    let f_store = formats.by_name("Store");
+    let f_store_complex = formats.by_name("StoreComplex");
+    let f_ternary = formats.by_name("Ternary");
+    let f_trap = formats.by_name("Trap");
+    let f_unary = formats.by_name("Unary");
+    let f_unary_bool = formats.by_name("UnaryBool");
+    let f_unary_global_value = formats.by_name("UnaryGlobalValue");
+    let f_unary_ieee32 = formats.by_name("UnaryIeee32");
+    let f_unary_ieee64 = formats.by_name("UnaryIeee64");
+    let f_unary_imm = formats.by_name("UnaryImm");
+
+    // Predicates shorthands.
+    let use_sse41 = settings.predicate_by_name("use_sse41");
+
+    // Definitions.
+    let mut recipes = RecipeGroup::new(formats, regs);
+
+    // A null unary instruction that takes a GPR register. Can be used for identity copies and
+    // no-op conversions.
+    recipes.add_recipe(
+        EncodingRecipeBuilder::new("null", f_unary, 0)
+            .operands_in(vec![gpr])
+            .operands_out(vec![0])
+            .emit(""),
+    );
+    recipes.add_recipe(
+        EncodingRecipeBuilder::new("stacknull", f_unary, 0)
+            .operands_in(vec![stack_gpr32])
+            .operands_out(vec![stack_gpr32])
+            .emit(""),
+    );
+
+    recipes
+        .add_recipe(EncodingRecipeBuilder::new("debugtrap", f_nullary, 1).emit("sink.put1(0xcc);"));
+
+    // XX opcode, no ModR/M.
+    recipes.add_template_recipe(EncodingRecipeBuilder::new("trap", f_trap, 0).emit(
+        r#"
+            sink.trap(code, func.srclocs[inst]);
+            {{PUT_OP}}(bits, BASE_REX, sink);
+        "#,
+    ));
+
+    // Macro: conditional jump over a ud2.
+    recipes.add_recipe(
+        EncodingRecipeBuilder::new("trapif", f_int_cond_trap, 4)
+            .operands_in(vec![reg_rflags])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    // Jump over a 2-byte ud2.
+                    sink.put1(0x70 | (icc2opc(cond.inverse()) as u8));
+                    sink.put1(2);
+                    // ud2.
+                    sink.trap(code, func.srclocs[inst]);
+                    sink.put1(0x0f);
+                    sink.put1(0x0b);
+                "#,
+            ),
+    );
+
+    recipes.add_recipe(
+        EncodingRecipeBuilder::new("trapff", f_float_cond_trap, 4)
+            .operands_in(vec![reg_rflags])
+            .clobbers_flags(false)
+            .inst_predicate(supported_floatccs_predicate(
+                &supported_floatccs,
+                formats.get(f_float_cond_trap),
+            ))
+            .emit(
+                r#"
+                    // Jump over a 2-byte ud2.
+                    sink.put1(0x70 | (fcc2opc(cond.inverse()) as u8));
+                    sink.put1(2);
+                    // ud2.
+                    sink.trap(code, func.srclocs[inst]);
+                    sink.put1(0x0f);
+                    sink.put1(0x0b);
+                "#,
+            ),
+    );
+
+    // XX /r
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("rr", f_binary, 1)
+            .operands_in(vec![gpr, gpr])
+            .operands_out(vec![0])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink);
+                    modrm_rr(in_reg0, in_reg1, sink);
+                "#,
+            ),
+    );
+
+    // XX /r with operands swapped. (RM form).
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("rrx", f_binary, 1)
+            .operands_in(vec![gpr, gpr])
+            .operands_out(vec![0])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+                    modrm_rr(in_reg1, in_reg0, sink);
+                "#,
+            ),
+    );
+
+    // XX /r with FPR ins and outs. A form.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("fa", f_binary, 1)
+            .operands_in(vec![fpr, fpr])
+            .operands_out(vec![0])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+                    modrm_rr(in_reg1, in_reg0, sink);
+                "#,
+            ),
+    );
+
+    // XX /r with FPR ins and outs. A form with input operands swapped.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("fax", f_binary, 1)
+            .operands_in(vec![fpr, fpr])
+            .operands_out(vec![1])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink);
+                    modrm_rr(in_reg0, in_reg1, sink);
+                "#,
+            ),
+    );
+
+    // XX /n for a unary operation with extension bits.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("ur", f_unary, 1)
+            .operands_in(vec![gpr])
+            .operands_out(vec![0])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex1(in_reg0), sink);
+                    modrm_r_bits(in_reg0, bits, sink);
+                "#,
+            ),
+    );
+
+    // XX /r, but for a unary operator with separate input/output register, like
+    // copies. MR form, preserving flags.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("umr", f_unary, 1)
+            .operands_in(vec![gpr])
+            .operands_out(vec![gpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(out_reg0, in_reg0), sink);
+                    modrm_rr(out_reg0, in_reg0, sink);
+                "#,
+            ),
+    );
+
+    // Same as umr, but with FPR -> GPR registers.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("rfumr", f_unary, 1)
+            .operands_in(vec![fpr])
+            .operands_out(vec![gpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(out_reg0, in_reg0), sink);
+                    modrm_rr(out_reg0, in_reg0, sink);
+                "#,
+            ),
+    );
+
+    // XX /r, but for a unary operator with separate input/output register.
+    // RM form. Clobbers FLAGS.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("urm", f_unary, 1)
+            .operands_in(vec![gpr])
+            .operands_out(vec![gpr])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+                    modrm_rr(in_reg0, out_reg0, sink);
+                "#,
+            ),
+    );
+
+    // XX /r. Same as urm, but doesn't clobber FLAGS.
+    let urm_noflags = recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("urm_noflags", f_unary, 1)
+            .operands_in(vec![gpr])
+            .operands_out(vec![gpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+                    modrm_rr(in_reg0, out_reg0, sink);
+                "#,
+            ),
+    );
+
+    // XX /r. Same as urm_noflags, but input limited to ABCD.
+    recipes.add_template(
+        Template::new(
+            EncodingRecipeBuilder::new("urm_noflags_abcd", f_unary, 1)
+                .operands_in(vec![abcd])
+                .operands_out(vec![gpr])
+                .clobbers_flags(false)
+                .emit(
+                    r#"
+                    {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+                    modrm_rr(in_reg0, out_reg0, sink);
+                "#,
+                ),
+            formats,
+            regs,
+        )
+        .when_prefixed(urm_noflags),
+    );
+
+    // XX /r, RM form, FPR -> FPR.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("furm", f_unary, 1)
+            .operands_in(vec![fpr])
+            .operands_out(vec![fpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+                    modrm_rr(in_reg0, out_reg0, sink);
+                "#,
+            ),
+    );
+
+    // XX /r, RM form, GPR -> FPR.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("frurm", f_unary, 1)
+            .operands_in(vec![gpr])
+            .operands_out(vec![fpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+                    modrm_rr(in_reg0, out_reg0, sink);
+                "#,
+            ),
+    );
+
+    // XX /r, RM form, FPR -> GPR.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("rfurm", f_unary, 1)
+            .operands_in(vec![fpr])
+            .operands_out(vec![gpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+                    modrm_rr(in_reg0, out_reg0, sink);
+                "#,
+            ),
+    );
+
+    // XX /r, RMI form for one of the roundXX SSE 4.1 instructions.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("furmi_rnd", f_unary, 2)
+            .operands_in(vec![fpr])
+            .operands_out(vec![fpr])
+            .isa_predicate(use_sse41)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+                    modrm_rr(in_reg0, out_reg0, sink);
+                    sink.put1(match opcode {
+                        Opcode::Nearest => 0b00,
+                        Opcode::Floor => 0b01,
+                        Opcode::Ceil => 0b10,
+                        Opcode::Trunc => 0b11,
+                        x => panic!("{} unexpected for furmi_rnd", opcode),
+                    });
+                "#,
+            ),
+    );
+
+    // XX /r, for regmove instructions.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("rmov", f_reg_move, 1)
+            .operands_in(vec![gpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(dst, src), sink);
+                    modrm_rr(dst, src, sink);
+                "#,
+            ),
+    );
+
+    // XX /r, for regmove instructions (FPR version, RM encoded).
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("frmov", f_reg_move, 1)
+            .operands_in(vec![fpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(src, dst), sink);
+                    modrm_rr(src, dst, sink);
+                "#,
+            ),
+    );
+
+    // XX /n with one arg in %rcx, for shifts.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("rc", f_binary, 1)
+            .operands_in(vec![
+                OperandConstraint::RegClass(gpr),
+                OperandConstraint::FixedReg(reg_rcx),
+            ])
+            .operands_out(vec![0])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex1(in_reg0), sink);
+                    modrm_r_bits(in_reg0, bits, sink);
+                "#,
+            ),
+    );
+
+    // XX /n for division: inputs in %rax, %rdx, r. Outputs in %rax, %rdx.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("div", f_ternary, 1)
+            .operands_in(vec![
+                OperandConstraint::FixedReg(reg_rax),
+                OperandConstraint::FixedReg(reg_rdx),
+                OperandConstraint::RegClass(gpr),
+            ])
+            .operands_out(vec![reg_rax, reg_rdx])
+            .emit(
+                r#"
+                    sink.trap(TrapCode::IntegerDivisionByZero, func.srclocs[inst]);
+                    {{PUT_OP}}(bits, rex1(in_reg2), sink);
+                    modrm_r_bits(in_reg2, bits, sink);
+                "#,
+            ),
+    );
+
+    // XX /n for {s,u}mulx: inputs in %rax, r. Outputs in %rdx(hi):%rax(lo)
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("mulx", f_binary, 1)
+            .operands_in(vec![
+                OperandConstraint::FixedReg(reg_rax),
+                OperandConstraint::RegClass(gpr),
+            ])
+            .operands_out(vec![
+                OperandConstraint::FixedReg(reg_rax),
+                OperandConstraint::FixedReg(reg_rdx),
+            ])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex1(in_reg1), sink);
+                    modrm_r_bits(in_reg1, bits, sink);
+                "#,
+            ),
+    );
+
+    // XX /n ib with 8-bit immediate sign-extended.
+    {
+        let format = formats.get(f_binary_imm);
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("r_ib", f_binary_imm, 2)
+                .operands_in(vec![gpr])
+                .operands_out(vec![0])
+                .inst_predicate(InstructionPredicate::new_is_signed_int(format, "imm", 8, 0))
+                .emit(
+                    r#"
+                        {{PUT_OP}}(bits, rex1(in_reg0), sink);
+                        modrm_r_bits(in_reg0, bits, sink);
+                        let imm: i64 = imm.into();
+                        sink.put1(imm as u8);
+                    "#,
+                ),
+        );
+
+        // XX /n id with 32-bit immediate sign-extended.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("r_id", f_binary_imm, 5)
+                .operands_in(vec![gpr])
+                .operands_out(vec![0])
+                .inst_predicate(InstructionPredicate::new_is_signed_int(
+                    format, "imm", 32, 0,
+                ))
+                .emit(
+                    r#"
+                        {{PUT_OP}}(bits, rex1(in_reg0), sink);
+                        modrm_r_bits(in_reg0, bits, sink);
+                        let imm: i64 = imm.into();
+                        sink.put4(imm as u32);
+                    "#,
+                ),
+        );
+    }
+
+    {
+        // XX /n id with 32-bit immediate sign-extended. UnaryImm version.
+        let format = formats.get(f_unary_imm);
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("u_id", f_unary_imm, 5)
+                .operands_out(vec![gpr])
+                .inst_predicate(InstructionPredicate::new_is_signed_int(
+                    format, "imm", 32, 0,
+                ))
+                .emit(
+                    r#"
+                        {{PUT_OP}}(bits, rex1(out_reg0), sink);
+                        modrm_r_bits(out_reg0, bits, sink);
+                        let imm: i64 = imm.into();
+                        sink.put4(imm as u32);
+                    "#,
+                ),
+        );
+    }
+
+    // XX+rd id unary with 32-bit immediate. Note no recipe predicate.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("pu_id", f_unary_imm, 4)
+            .operands_out(vec![gpr])
+            .emit(
+                r#"
+                    // The destination register is encoded in the low bits of the opcode.
+                    // No ModR/M.
+                    {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
+                    let imm: i64 = imm.into();
+                    sink.put4(imm as u32);
+                "#,
+            ),
+    );
+
+    // XX+rd id unary with bool immediate. Note no recipe predicate.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("pu_id_bool", f_unary_bool, 4)
+            .operands_out(vec![gpr])
+            .emit(
+                r#"
+                    // The destination register is encoded in the low bits of the opcode.
+                    // No ModR/M.
+                    {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
+                    let imm: u32 = if imm { 1 } else { 0 };
+                    sink.put4(imm);
+                "#,
+            ),
+    );
+
+    // XX+rd iq unary with 64-bit immediate.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("pu_iq", f_unary_imm, 8)
+            .operands_out(vec![gpr])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
+                    let imm: i64 = imm.into();
+                    sink.put8(imm as u64);
+                "#,
+            ),
+    );
+
+    // XX /n Unary with floating point 32-bit immediate equal to zero.
+    {
+        let format = formats.get(f_unary_ieee32);
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("f32imm_z", f_unary_ieee32, 1)
+                .operands_out(vec![fpr])
+                .inst_predicate(InstructionPredicate::new_is_zero_32bit_float(format, "imm"))
+                .emit(
+                    r#"
+                        {{PUT_OP}}(bits, rex2(out_reg0, out_reg0), sink);
+                        modrm_rr(out_reg0, out_reg0, sink);
+                    "#,
+                ),
+        );
+    }
+
+    // XX /n Unary with floating point 64-bit immediate equal to zero.
+    {
+        let format = formats.get(f_unary_ieee64);
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("f64imm_z", f_unary_ieee64, 1)
+                .operands_out(vec![fpr])
+                .inst_predicate(InstructionPredicate::new_is_zero_64bit_float(format, "imm"))
+                .emit(
+                    r#"
+                        {{PUT_OP}}(bits, rex2(out_reg0, out_reg0), sink);
+                        modrm_rr(out_reg0, out_reg0, sink);
+                    "#,
+                ),
+        );
+    }
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("pushq", f_unary, 0)
+            .operands_in(vec![gpr])
+            .emit(
+                r#"
+                    sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
+                    {{PUT_OP}}(bits | (in_reg0 & 7), rex1(in_reg0), sink);
+                "#,
+            ),
+    );
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("popq", f_nullary, 0)
+            .operands_out(vec![gpr])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
+                "#,
+            ),
+    );
+
+    // XX /r, for regmove instructions.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("copysp", f_copy_special, 1)
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(dst, src), sink);
+                    modrm_rr(dst, src, sink);
+                "#,
+            ),
+    );
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("adjustsp", f_unary, 1)
+            .operands_in(vec![gpr])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(RU::rsp.into(), in_reg0), sink);
+                    modrm_rr(RU::rsp.into(), in_reg0, sink);
+                "#,
+            ),
+    );
+
+    {
+        let format = formats.get(f_unary_imm);
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("adjustsp_ib", f_unary_imm, 2)
+                .inst_predicate(InstructionPredicate::new_is_signed_int(format, "imm", 8, 0))
+                .emit(
+                    r#"
+                        {{PUT_OP}}(bits, rex1(RU::rsp.into()), sink);
+                        modrm_r_bits(RU::rsp.into(), bits, sink);
+                        let imm: i64 = imm.into();
+                        sink.put1(imm as u8);
+                    "#,
+                ),
+        );
+
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("adjustsp_id", f_unary_imm, 5)
+                .inst_predicate(InstructionPredicate::new_is_signed_int(
+                    format, "imm", 32, 0,
+                ))
+                .emit(
+                    r#"
+                        {{PUT_OP}}(bits, rex1(RU::rsp.into()), sink);
+                        modrm_r_bits(RU::rsp.into(), bits, sink);
+                        let imm: i64 = imm.into();
+                        sink.put4(imm as u32);
+                    "#,
+                ),
+        );
+    }
+
+    // XX+rd id with Abs4 function relocation.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("fnaddr4", f_func_addr, 4)
+            .operands_out(vec![gpr])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
+                    sink.reloc_external(Reloc::Abs4,
+                                        &func.dfg.ext_funcs[func_ref].name,
+                                        0);
+                    sink.put4(0);
+                "#,
+            ),
+    );
+
+    // XX+rd iq with Abs8 function relocation.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("fnaddr8", f_func_addr, 8)
+            .operands_out(vec![gpr])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
+                    sink.reloc_external(Reloc::Abs8,
+                                        &func.dfg.ext_funcs[func_ref].name,
+                                        0);
+                    sink.put8(0);
+                "#,
+            ),
+    );
+
+    // Similar to fnaddr4, but writes !0 (this is used by BaldrMonkey).
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("allones_fnaddr4", f_func_addr, 4)
+            .operands_out(vec![gpr])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
+                    sink.reloc_external(Reloc::Abs4,
+                                        &func.dfg.ext_funcs[func_ref].name,
+                                        0);
+                    // Write the immediate as `!0` for the benefit of BaldrMonkey.
+                    sink.put4(!0);
+                "#,
+            ),
+    );
+
+    // Similar to fnaddr8, but writes !0 (this is used by BaldrMonkey).
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("allones_fnaddr8", f_func_addr, 8)
+            .operands_out(vec![gpr])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
+                    sink.reloc_external(Reloc::Abs8,
+                                        &func.dfg.ext_funcs[func_ref].name,
+                                        0);
+                    // Write the immediate as `!0` for the benefit of BaldrMonkey.
+                    sink.put8(!0);
+                "#,
+            ),
+    );
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("pcrel_fnaddr8", f_func_addr, 5)
+            .operands_out(vec![gpr])
+            // rex2 gets passed 0 for r/m register because the upper bit of
+            // r/m doesn't get decoded when in rip-relative addressing mode.
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(0, out_reg0), sink);
+                    modrm_riprel(out_reg0, sink);
+                    // The addend adjusts for the difference between the end of the
+                    // instruction and the beginning of the immediate field.
+                    sink.reloc_external(Reloc::X86PCRel4,
+                                        &func.dfg.ext_funcs[func_ref].name,
+                                        -4);
+                    sink.put4(0);
+                "#,
+            ),
+    );
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("got_fnaddr8", f_func_addr, 5)
+            .operands_out(vec![gpr])
+            // rex2 gets passed 0 for r/m register because the upper bit of
+            // r/m doesn't get decoded when in rip-relative addressing mode.
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(0, out_reg0), sink);
+                    modrm_riprel(out_reg0, sink);
+                    // The addend adjusts for the difference between the end of the
+                    // instruction and the beginning of the immediate field.
+                    sink.reloc_external(Reloc::X86GOTPCRel4,
+                                        &func.dfg.ext_funcs[func_ref].name,
+                                        -4);
+                    sink.put4(0);
+                "#,
+            ),
+    );
+
+    // XX+rd id with Abs4 globalsym relocation.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("gvaddr4", f_unary_global_value, 4)
+            .operands_out(vec![gpr])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
+                    sink.reloc_external(Reloc::Abs4,
+                                        &func.global_values[global_value].symbol_name(),
+                                        0);
+                    sink.put4(0);
+                "#,
+            ),
+    );
+
+    // XX+rd iq with Abs8 globalsym relocation.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("gvaddr8", f_unary_global_value, 8)
+            .operands_out(vec![gpr])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits | (out_reg0 & 7), rex1(out_reg0), sink);
+                    sink.reloc_external(Reloc::Abs8,
+                                        &func.global_values[global_value].symbol_name(),
+                                        0);
+                    sink.put8(0);
+                "#,
+            ),
+    );
+
+    // XX+rd iq with PCRel4 globalsym relocation.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("pcrel_gvaddr8", f_unary_global_value, 5)
+            .operands_out(vec![gpr])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(0, out_reg0), sink);
+                    modrm_rm(5, out_reg0, sink);
+                    // The addend adjusts for the difference between the end of the
+                    // instruction and the beginning of the immediate field.
+                    sink.reloc_external(Reloc::X86PCRel4,
+                                        &func.global_values[global_value].symbol_name(),
+                                        -4);
+                    sink.put4(0);
+                "#,
+            ),
+    );
+
+    // XX+rd iq with Abs8 globalsym relocation.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("got_gvaddr8", f_unary_global_value, 5)
+            .operands_out(vec![gpr])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(0, out_reg0), sink);
+                    modrm_rm(5, out_reg0, sink);
+                    // The addend adjusts for the difference between the end of the
+                    // instruction and the beginning of the immediate field.
+                    sink.reloc_external(Reloc::X86GOTPCRel4,
+                                        &func.global_values[global_value].symbol_name(),
+                                        -4);
+                    sink.put4(0);
+                "#,
+            ),
+    );
+
+    // Stack addresses.
+    //
+    // TODO Alternative forms for 8-bit immediates, when applicable.
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("spaddr4_id", f_stack_load, 6)
+            .operands_out(vec![gpr])
+            .emit(
+                r#"
+                    let sp = StackRef::sp(stack_slot, &func.stack_slots);
+                    let base = stk_base(sp.base);
+                    {{PUT_OP}}(bits, rex2(out_reg0, base), sink);
+                    modrm_sib_disp8(out_reg0, sink);
+                    sib_noindex(base, sink);
+                    let imm : i32 = offset.into();
+                    sink.put4(sp.offset.checked_add(imm).unwrap() as u32);
+                "#,
+            ),
+    );
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("spaddr8_id", f_stack_load, 6)
+            .operands_out(vec![gpr])
+            .emit(
+                r#"
+                    let sp = StackRef::sp(stack_slot, &func.stack_slots);
+                    let base = stk_base(sp.base);
+                    {{PUT_OP}}(bits, rex2(base, out_reg0), sink);
+                    modrm_sib_disp32(out_reg0, sink);
+                    sib_noindex(base, sink);
+                    let imm : i32 = offset.into();
+                    sink.put4(sp.offset.checked_add(imm).unwrap() as u32);
+                "#,
+            ),
+    );
+
+    // Store recipes.
+
+    {
+        // Simple stores.
+        let format = formats.get(f_store);
+
+        // A predicate asking if the offset is zero.
+        let has_no_offset = InstructionPredicate::new_is_field_equal(format, "offset", "0".into());
+
+        // XX /r register-indirect store with no offset.
+        let st = recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("st", f_store, 1)
+                .operands_in(vec![gpr, gpr])
+                .inst_predicate(has_no_offset.clone())
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_sib_or_offset_for_in_reg_1")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+                        if needs_sib_byte(in_reg1) {
+                            modrm_sib(in_reg0, sink);
+                            sib_noindex(in_reg1, sink);
+                        } else if needs_offset(in_reg1) {
+                            modrm_disp8(in_reg1, in_reg0, sink);
+                            sink.put1(0);
+                        } else {
+                            modrm_rm(in_reg1, in_reg0, sink);
+                        }
+                    "#,
+                ),
+        );
+
+        // XX /r register-indirect store with no offset.
+        // Only ABCD allowed for stored value. This is for byte stores with no REX.
+        recipes.add_template(
+            Template::new(
+                EncodingRecipeBuilder::new("st_abcd", f_store, 1)
+                    .operands_in(vec![abcd, gpr])
+                    .inst_predicate(has_no_offset.clone())
+                    .clobbers_flags(false)
+                    .compute_size("size_plus_maybe_sib_or_offset_for_in_reg_1")
+                    .emit(
+                        r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+                        if needs_sib_byte(in_reg1) {
+                            modrm_sib(in_reg0, sink);
+                            sib_noindex(in_reg1, sink);
+                        } else if needs_offset(in_reg1) {
+                            modrm_disp8(in_reg1, in_reg0, sink);
+                            sink.put1(0);
+                        } else {
+                            modrm_rm(in_reg1, in_reg0, sink);
+                        }
+                    "#,
+                    ),
+                formats,
+                regs,
+            )
+            .when_prefixed(st),
+        );
+
+        // XX /r register-indirect store of FPR with no offset.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("fst", f_store, 1)
+                .operands_in(vec![fpr, gpr])
+                .inst_predicate(has_no_offset.clone())
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_sib_or_offset_for_in_reg_1")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+                        if needs_sib_byte(in_reg1) {
+                            modrm_sib(in_reg0, sink);
+                            sib_noindex(in_reg1, sink);
+                        } else if needs_offset(in_reg1) {
+                            modrm_disp8(in_reg1, in_reg0, sink);
+                            sink.put1(0);
+                        } else {
+                            modrm_rm(in_reg1, in_reg0, sink);
+                        }
+                    "#,
+                ),
+        );
+
+        let has_small_offset = InstructionPredicate::new_is_signed_int(format, "offset", 8, 0);
+
+        // XX /r register-indirect store with 8-bit offset.
+        let st_disp8 = recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("stDisp8", f_store, 2)
+                .operands_in(vec![gpr, gpr])
+                .inst_predicate(has_small_offset.clone())
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_sib_for_in_reg_1")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+                        if needs_sib_byte(in_reg1) {
+                            modrm_sib_disp8(in_reg0, sink);
+                            sib_noindex(in_reg1, sink);
+                        } else {
+                            modrm_disp8(in_reg1, in_reg0, sink);
+                        }
+                        let offset: i32 = offset.into();
+                        sink.put1(offset as u8);
+                    "#,
+                ),
+        );
+
+        // XX /r register-indirect store with 8-bit offset.
+        // Only ABCD allowed for stored value. This is for byte stores with no REX.
+        recipes.add_template(
+            Template::new(
+                EncodingRecipeBuilder::new("stDisp8_abcd", f_store, 2)
+                    .operands_in(vec![abcd, gpr])
+                    .inst_predicate(has_small_offset.clone())
+                    .clobbers_flags(false)
+                    .compute_size("size_plus_maybe_sib_for_in_reg_1")
+                    .emit(
+                        r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+                        if needs_sib_byte(in_reg1) {
+                            modrm_sib_disp8(in_reg0, sink);
+                            sib_noindex(in_reg1, sink);
+                        } else {
+                            modrm_disp8(in_reg1, in_reg0, sink);
+                        }
+                        let offset: i32 = offset.into();
+                        sink.put1(offset as u8);
+                    "#,
+                    ),
+                formats,
+                regs,
+            )
+            .when_prefixed(st_disp8),
+        );
+
+        // XX /r register-indirect store with 8-bit offset of FPR.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("fstDisp8", f_store, 2)
+                .operands_in(vec![fpr, gpr])
+                .inst_predicate(has_small_offset.clone())
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_sib_for_in_reg_1")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+                        if needs_sib_byte(in_reg1) {
+                            modrm_sib_disp8(in_reg0, sink);
+                            sib_noindex(in_reg1, sink);
+                        } else {
+                            modrm_disp8(in_reg1, in_reg0, sink);
+                        }
+                        let offset: i32 = offset.into();
+                        sink.put1(offset as u8);
+                    "#,
+                ),
+        );
+
+        // XX /r register-indirect store with 32-bit offset.
+        let st_disp32 = recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("stDisp32", f_store, 5)
+                .operands_in(vec![gpr, gpr])
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_sib_for_in_reg_1")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+                        if needs_sib_byte(in_reg1) {
+                            modrm_sib_disp32(in_reg0, sink);
+                            sib_noindex(in_reg1, sink);
+                        } else {
+                            modrm_disp32(in_reg1, in_reg0, sink);
+                        }
+                        let offset: i32 = offset.into();
+                        sink.put4(offset as u32);
+                    "#,
+                ),
+        );
+
+        // XX /r register-indirect store with 32-bit offset.
+        // Only ABCD allowed for stored value. This is for byte stores with no REX.
+        recipes.add_template(
+            Template::new(
+                EncodingRecipeBuilder::new("stDisp32_abcd", f_store, 5)
+                    .operands_in(vec![abcd, gpr])
+                    .clobbers_flags(false)
+                    .compute_size("size_plus_maybe_sib_for_in_reg_1")
+                    .emit(
+                        r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+                        if needs_sib_byte(in_reg1) {
+                            modrm_sib_disp32(in_reg0, sink);
+                            sib_noindex(in_reg1, sink);
+                        } else {
+                            modrm_disp32(in_reg1, in_reg0, sink);
+                        }
+                        let offset: i32 = offset.into();
+                        sink.put4(offset as u32);
+                    "#,
+                    ),
+                formats,
+                regs,
+            )
+            .when_prefixed(st_disp32),
+        );
+
+        // XX /r register-indirect store with 32-bit offset of FPR.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("fstDisp32", f_store, 5)
+                .operands_in(vec![fpr, gpr])
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_sib_for_in_reg_1")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+                        if needs_sib_byte(in_reg1) {
+                            modrm_sib_disp32(in_reg0, sink);
+                            sib_noindex(in_reg1, sink);
+                        } else {
+                            modrm_disp32(in_reg1, in_reg0, sink);
+                        }
+                        let offset: i32 = offset.into();
+                        sink.put4(offset as u32);
+                    "#,
+                ),
+        );
+    }
+
+    {
+        // Complex stores.
+        let format = formats.get(f_store_complex);
+
+        // A predicate asking if the offset is zero.
+        let has_no_offset = InstructionPredicate::new_is_field_equal(format, "offset", "0".into());
+
+        // XX /r register-indirect store with index and no offset.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("stWithIndex", f_store_complex, 2)
+                .operands_in(vec![gpr, gpr, gpr])
+                .inst_predicate(has_no_offset.clone())
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_offset_for_in_reg_1")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
+                        // The else branch always inserts an SIB byte.
+                        if needs_offset(in_reg1) {
+                            modrm_sib_disp8(in_reg0, sink);
+                            sib(0, in_reg2, in_reg1, sink);
+                            sink.put1(0);
+                        } else {
+                            modrm_sib(in_reg0, sink);
+                            sib(0, in_reg2, in_reg1, sink);
+                        }
+                    "#,
+                ),
+        );
+
+        // XX /r register-indirect store with index and no offset.
+        // Only ABCD allowed for stored value. This is for byte stores with no REX.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("stWithIndex_abcd", f_store_complex, 2)
+                .operands_in(vec![abcd, gpr, gpr])
+                .inst_predicate(has_no_offset.clone())
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_offset_for_in_reg_1")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
+                        // The else branch always inserts an SIB byte.
+                        if needs_offset(in_reg1) {
+                            modrm_sib_disp8(in_reg0, sink);
+                            sib(0, in_reg2, in_reg1, sink);
+                            sink.put1(0);
+                        } else {
+                            modrm_sib(in_reg0, sink);
+                            sib(0, in_reg2, in_reg1, sink);
+                        }
+                    "#,
+                ),
+        );
+
+        // XX /r register-indirect store with index and no offset of FPR.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("fstWithIndex", f_store_complex, 2)
+                .operands_in(vec![fpr, gpr, gpr])
+                .inst_predicate(has_no_offset.clone())
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_offset_for_in_reg_1")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
+                        // The else branch always inserts an SIB byte.
+                        if needs_offset(in_reg1) {
+                            modrm_sib_disp8(in_reg0, sink);
+                            sib(0, in_reg2, in_reg1, sink);
+                            sink.put1(0);
+                        } else {
+                            modrm_sib(in_reg0, sink);
+                            sib(0, in_reg2, in_reg1, sink);
+                        }
+                    "#,
+                ),
+        );
+
+        let has_small_offset = InstructionPredicate::new_is_signed_int(format, "offset", 8, 0);
+
+        // XX /r register-indirect store with index and 8-bit offset.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("stWithIndexDisp8", f_store_complex, 3)
+                .operands_in(vec![gpr, gpr, gpr])
+                .inst_predicate(has_small_offset.clone())
+                .clobbers_flags(false)
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
+                        modrm_sib_disp8(in_reg0, sink);
+                        sib(0, in_reg2, in_reg1, sink);
+                        let offset: i32 = offset.into();
+                        sink.put1(offset as u8);
+                    "#,
+                ),
+        );
+
+        // XX /r register-indirect store with index and 8-bit offset.
+        // Only ABCD allowed for stored value. This is for byte stores with no REX.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("stWithIndexDisp8_abcd", f_store_complex, 3)
+                .operands_in(vec![abcd, gpr, gpr])
+                .inst_predicate(has_small_offset.clone())
+                .clobbers_flags(false)
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
+                        modrm_sib_disp8(in_reg0, sink);
+                        sib(0, in_reg2, in_reg1, sink);
+                        let offset: i32 = offset.into();
+                        sink.put1(offset as u8);
+                    "#,
+                ),
+        );
+
+        // XX /r register-indirect store with index and 8-bit offset of FPR.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("fstWithIndexDisp8", f_store_complex, 3)
+                .operands_in(vec![fpr, gpr, gpr])
+                .inst_predicate(has_small_offset.clone())
+                .clobbers_flags(false)
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
+                        modrm_sib_disp8(in_reg0, sink);
+                        sib(0, in_reg2, in_reg1, sink);
+                        let offset: i32 = offset.into();
+                        sink.put1(offset as u8);
+                    "#,
+                ),
+        );
+
+        let has_big_offset = InstructionPredicate::new_is_signed_int(format, "offset", 32, 0);
+
+        // XX /r register-indirect store with index and 32-bit offset.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("stWithIndexDisp32", f_store_complex, 6)
+                .operands_in(vec![gpr, gpr, gpr])
+                .inst_predicate(has_big_offset.clone())
+                .clobbers_flags(false)
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
+                        modrm_sib_disp32(in_reg0, sink);
+                        sib(0, in_reg2, in_reg1, sink);
+                        let offset: i32 = offset.into();
+                        sink.put4(offset as u32);
+                    "#,
+                ),
+        );
+
+        // XX /r register-indirect store with index and 32-bit offset.
+        // Only ABCD allowed for stored value. This is for byte stores with no REX.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("stWithIndexDisp32_abcd", f_store_complex, 6)
+                .operands_in(vec![abcd, gpr, gpr])
+                .inst_predicate(has_big_offset.clone())
+                .clobbers_flags(false)
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
+                        modrm_sib_disp32(in_reg0, sink);
+                        sib(0, in_reg2, in_reg1, sink);
+                        let offset: i32 = offset.into();
+                        sink.put4(offset as u32);
+                    "#,
+                ),
+        );
+
+        // XX /r register-indirect store with index and 32-bit offset of FPR.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("fstWithIndexDisp32", f_store_complex, 6)
+                .operands_in(vec![fpr, gpr, gpr])
+                .inst_predicate(has_big_offset.clone())
+                .clobbers_flags(false)
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex3(in_reg1, in_reg0, in_reg2), sink);
+                        modrm_sib_disp32(in_reg0, sink);
+                        sib(0, in_reg2, in_reg1, sink);
+                        let offset: i32 = offset.into();
+                        sink.put4(offset as u32);
+                    "#,
+                ),
+        );
+    }
+
+    // Unary spill with SIB and 32-bit displacement.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("spillSib32", f_unary, 6)
+            .operands_in(vec![gpr])
+            .operands_out(vec![stack_gpr32])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
+                    let base = stk_base(out_stk0.base);
+                    {{PUT_OP}}(bits, rex2(base, in_reg0), sink);
+                    modrm_sib_disp32(in_reg0, sink);
+                    sib_noindex(base, sink);
+                    sink.put4(out_stk0.offset as u32);
+                "#,
+            ),
+    );
+
+    // Like spillSib32, but targeting an FPR rather than a GPR.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("fspillSib32", f_unary, 6)
+            .operands_in(vec![fpr])
+            .operands_out(vec![stack_fpr32])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
+                    let base = stk_base(out_stk0.base);
+                    {{PUT_OP}}(bits, rex2(base, in_reg0), sink);
+                    modrm_sib_disp32(in_reg0, sink);
+                    sib_noindex(base, sink);
+                    sink.put4(out_stk0.offset as u32);
+                "#,
+            ),
+    );
+
+    // Regspill using RSP-relative addressing.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("regspill32", f_reg_spill, 6)
+            .operands_in(vec![gpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
+                    let dst = StackRef::sp(dst, &func.stack_slots);
+                    let base = stk_base(dst.base);
+                    {{PUT_OP}}(bits, rex2(base, src), sink);
+                    modrm_sib_disp32(src, sink);
+                    sib_noindex(base, sink);
+                    sink.put4(dst.offset as u32);
+                "#,
+            ),
+    );
+
+    // Like regspill32, but targeting an FPR rather than a GPR.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("fregspill32", f_reg_spill, 6)
+            .operands_in(vec![fpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
+                    let dst = StackRef::sp(dst, &func.stack_slots);
+                    let base = stk_base(dst.base);
+                    {{PUT_OP}}(bits, rex2(base, src), sink);
+                    modrm_sib_disp32(src, sink);
+                    sib_noindex(base, sink);
+                    sink.put4(dst.offset as u32);
+                "#,
+            ),
+    );
+
+    // Load recipes.
+
+    {
+        // Simple loads.
+        let format = formats.get(f_load);
+
+        // A predicate asking if the offset is zero.
+        let has_no_offset = InstructionPredicate::new_is_field_equal(format, "offset", "0".into());
+
+        // XX /r load with no offset.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("ld", f_load, 1)
+                .operands_in(vec![gpr])
+                .operands_out(vec![gpr])
+                .inst_predicate(has_no_offset.clone())
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_sib_or_offset_for_in_reg_0")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+                        if needs_sib_byte(in_reg0) {
+                            modrm_sib(out_reg0, sink);
+                            sib_noindex(in_reg0, sink);
+                        } else if needs_offset(in_reg0) {
+                            modrm_disp8(in_reg0, out_reg0, sink);
+                            sink.put1(0);
+                        } else {
+                            modrm_rm(in_reg0, out_reg0, sink);
+                        }
+                    "#,
+                ),
+        );
+
+        // XX /r float load with no offset.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("fld", f_load, 1)
+                .operands_in(vec![gpr])
+                .operands_out(vec![fpr])
+                .inst_predicate(has_no_offset.clone())
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_sib_or_offset_for_in_reg_0")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+                        if needs_sib_byte(in_reg0) {
+                            modrm_sib(out_reg0, sink);
+                            sib_noindex(in_reg0, sink);
+                        } else if needs_offset(in_reg0) {
+                            modrm_disp8(in_reg0, out_reg0, sink);
+                            sink.put1(0);
+                        } else {
+                            modrm_rm(in_reg0, out_reg0, sink);
+                        }
+                    "#,
+                ),
+        );
+
+        let has_small_offset = InstructionPredicate::new_is_signed_int(format, "offset", 8, 0);
+
+        // XX /r load with 8-bit offset.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("ldDisp8", f_load, 2)
+                .operands_in(vec![gpr])
+                .operands_out(vec![gpr])
+                .inst_predicate(has_small_offset.clone())
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_sib_for_in_reg_0")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+                        if needs_sib_byte(in_reg0) {
+                            modrm_sib_disp8(out_reg0, sink);
+                            sib_noindex(in_reg0, sink);
+                        } else {
+                            modrm_disp8(in_reg0, out_reg0, sink);
+                        }
+                        let offset: i32 = offset.into();
+                        sink.put1(offset as u8);
+                    "#,
+                ),
+        );
+
+        // XX /r float load with 8-bit offset.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("fldDisp8", f_load, 2)
+                .operands_in(vec![gpr])
+                .operands_out(vec![fpr])
+                .inst_predicate(has_small_offset.clone())
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_sib_for_in_reg_0")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+                        if needs_sib_byte(in_reg0) {
+                            modrm_sib_disp8(out_reg0, sink);
+                            sib_noindex(in_reg0, sink);
+                        } else {
+                            modrm_disp8(in_reg0, out_reg0, sink);
+                        }
+                        let offset: i32 = offset.into();
+                        sink.put1(offset as u8);
+                    "#,
+                ),
+        );
+
+        let has_big_offset = InstructionPredicate::new_is_signed_int(format, "offset", 32, 0);
+
+        // XX /r load with 32-bit offset.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("ldDisp32", f_load, 5)
+                .operands_in(vec![gpr])
+                .operands_out(vec![gpr])
+                .inst_predicate(has_big_offset.clone())
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_sib_for_in_reg_0")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+                        if needs_sib_byte(in_reg0) {
+                            modrm_sib_disp32(out_reg0, sink);
+                            sib_noindex(in_reg0, sink);
+                        } else {
+                            modrm_disp32(in_reg0, out_reg0, sink);
+                        }
+                        let offset: i32 = offset.into();
+                        sink.put4(offset as u32);
+                    "#,
+                ),
+        );
+
+        // XX /r float load with 32-bit offset.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("fldDisp32", f_load, 5)
+                .operands_in(vec![gpr])
+                .operands_out(vec![fpr])
+                .inst_predicate(has_big_offset.clone())
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_sib_for_in_reg_0")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+                        if needs_sib_byte(in_reg0) {
+                            modrm_sib_disp32(out_reg0, sink);
+                            sib_noindex(in_reg0, sink);
+                        } else {
+                            modrm_disp32(in_reg0, out_reg0, sink);
+                        }
+                        let offset: i32 = offset.into();
+                        sink.put4(offset as u32);
+                    "#,
+                ),
+        );
+    }
+
+    {
+        // Complex loads.
+        let format = formats.get(f_load_complex);
+
+        // A predicate asking if the offset is zero.
+        let has_no_offset = InstructionPredicate::new_is_field_equal(format, "offset", "0".into());
+
+        // XX /r load with index and no offset.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("ldWithIndex", f_load_complex, 2)
+                .operands_in(vec![gpr, gpr])
+                .operands_out(vec![gpr])
+                .inst_predicate(has_no_offset.clone())
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_offset_for_in_reg_0")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
+                        // The else branch always inserts an SIB byte.
+                        if needs_offset(in_reg0) {
+                            modrm_sib_disp8(out_reg0, sink);
+                            sib(0, in_reg1, in_reg0, sink);
+                            sink.put1(0);
+                        } else {
+                            modrm_sib(out_reg0, sink);
+                            sib(0, in_reg1, in_reg0, sink);
+                        }
+                    "#,
+                ),
+        );
+
+        // XX /r float load with index and no offset.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("fldWithIndex", f_load_complex, 2)
+                .operands_in(vec![gpr, gpr])
+                .operands_out(vec![fpr])
+                .inst_predicate(has_no_offset.clone())
+                .clobbers_flags(false)
+                .compute_size("size_plus_maybe_offset_for_in_reg_0")
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
+                        // The else branch always inserts an SIB byte.
+                        if needs_offset(in_reg0) {
+                            modrm_sib_disp8(out_reg0, sink);
+                            sib(0, in_reg1, in_reg0, sink);
+                            sink.put1(0);
+                        } else {
+                            modrm_sib(out_reg0, sink);
+                            sib(0, in_reg1, in_reg0, sink);
+                        }
+                    "#,
+                ),
+        );
+
+        let has_small_offset = InstructionPredicate::new_is_signed_int(format, "offset", 8, 0);
+
+        // XX /r load with index and 8-bit offset.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("ldWithIndexDisp8", f_load_complex, 3)
+                .operands_in(vec![gpr, gpr])
+                .operands_out(vec![gpr])
+                .inst_predicate(has_small_offset.clone())
+                .clobbers_flags(false)
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
+                        modrm_sib_disp8(out_reg0, sink);
+                        sib(0, in_reg1, in_reg0, sink);
+                        let offset: i32 = offset.into();
+                        sink.put1(offset as u8);
+                    "#,
+                ),
+        );
+
+        // XX /r float load with 8-bit offset.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("fldWithIndexDisp8", f_load_complex, 3)
+                .operands_in(vec![gpr, gpr])
+                .operands_out(vec![fpr])
+                .inst_predicate(has_small_offset.clone())
+                .clobbers_flags(false)
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
+                        modrm_sib_disp8(out_reg0, sink);
+                        sib(0, in_reg1, in_reg0, sink);
+                        let offset: i32 = offset.into();
+                        sink.put1(offset as u8);
+                    "#,
+                ),
+        );
+
+        let has_big_offset = InstructionPredicate::new_is_signed_int(format, "offset", 32, 0);
+
+        // XX /r load with index and 32-bit offset.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("ldWithIndexDisp32", f_load_complex, 6)
+                .operands_in(vec![gpr, gpr])
+                .operands_out(vec![gpr])
+                .inst_predicate(has_big_offset.clone())
+                .clobbers_flags(false)
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
+                        modrm_sib_disp32(out_reg0, sink);
+                        sib(0, in_reg1, in_reg0, sink);
+                        let offset: i32 = offset.into();
+                        sink.put4(offset as u32);
+                    "#,
+                ),
+        );
+
+        // XX /r float load with index and 32-bit offset.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("fldWithIndexDisp32", f_load_complex, 6)
+                .operands_in(vec![gpr, gpr])
+                .operands_out(vec![fpr])
+                .inst_predicate(has_big_offset.clone())
+                .clobbers_flags(false)
+                .emit(
+                    r#"
+                        if !flags.notrap() {
+                            sink.trap(TrapCode::HeapOutOfBounds, func.srclocs[inst]);
+                        }
+                        {{PUT_OP}}(bits, rex3(in_reg0, out_reg0, in_reg1), sink);
+                        modrm_sib_disp32(out_reg0, sink);
+                        sib(0, in_reg1, in_reg0, sink);
+                        let offset: i32 = offset.into();
+                        sink.put4(offset as u32);
+                    "#,
+                ),
+        );
+    }
+
+    // Unary fill with SIB and 32-bit displacement.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("fillSib32", f_unary, 6)
+            .operands_in(vec![stack_gpr32])
+            .operands_out(vec![gpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    let base = stk_base(in_stk0.base);
+                    {{PUT_OP}}(bits, rex2(base, out_reg0), sink);
+                    modrm_sib_disp32(out_reg0, sink);
+                    sib_noindex(base, sink);
+                    sink.put4(in_stk0.offset as u32);
+                "#,
+            ),
+    );
+
+    // Like fillSib32, but targeting an FPR rather than a GPR.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("ffillSib32", f_unary, 6)
+            .operands_in(vec![stack_fpr32])
+            .operands_out(vec![fpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    let base = stk_base(in_stk0.base);
+                    {{PUT_OP}}(bits, rex2(base, out_reg0), sink);
+                    modrm_sib_disp32(out_reg0, sink);
+                    sib_noindex(base, sink);
+                    sink.put4(in_stk0.offset as u32);
+                "#,
+            ),
+    );
+
+    // Regfill with RSP-relative 32-bit displacement.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("regfill32", f_reg_fill, 6)
+            .operands_in(vec![stack_gpr32])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    let src = StackRef::sp(src, &func.stack_slots);
+                    let base = stk_base(src.base);
+                    {{PUT_OP}}(bits, rex2(base, dst), sink);
+                    modrm_sib_disp32(dst, sink);
+                    sib_noindex(base, sink);
+                    sink.put4(src.offset as u32);
+                "#,
+            ),
+    );
+
+    // Like regfill32, but targeting an FPR rather than a GPR.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("fregfill32", f_reg_fill, 6)
+            .operands_in(vec![stack_fpr32])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    let src = StackRef::sp(src, &func.stack_slots);
+                    let base = stk_base(src.base);
+                    {{PUT_OP}}(bits, rex2(base, dst), sink);
+                    modrm_sib_disp32(dst, sink);
+                    sib_noindex(base, sink);
+                    sink.put4(src.offset as u32);
+                "#,
+            ),
+    );
+
+    // Call/return.
+
+    recipes.add_template_recipe(EncodingRecipeBuilder::new("call_id", f_call, 4).emit(
+        r#"
+            sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
+            {{PUT_OP}}(bits, BASE_REX, sink);
+            // The addend adjusts for the difference between the end of the
+            // instruction and the beginning of the immediate field.
+            sink.reloc_external(Reloc::X86CallPCRel4,
+                                &func.dfg.ext_funcs[func_ref].name,
+                                -4);
+            sink.put4(0);
+        "#,
+    ));
+
+    recipes.add_template_recipe(EncodingRecipeBuilder::new("call_plt_id", f_call, 4).emit(
+        r#"
+            sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
+            {{PUT_OP}}(bits, BASE_REX, sink);
+            sink.reloc_external(Reloc::X86CallPLTRel4,
+                                &func.dfg.ext_funcs[func_ref].name,
+                                -4);
+            sink.put4(0);
+        "#,
+    ));
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("call_r", f_call_indirect, 1)
+            .operands_in(vec![gpr])
+            .emit(
+                r#"
+                    sink.trap(TrapCode::StackOverflow, func.srclocs[inst]);
+                    {{PUT_OP}}(bits, rex1(in_reg0), sink);
+                    modrm_r_bits(in_reg0, bits, sink);
+                "#,
+            ),
+    );
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("ret", f_multiary, 0).emit("{{PUT_OP}}(bits, BASE_REX, sink);"),
+    );
+
+    // Branches.
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("jmpb", f_jump, 1)
+            .branch_range((1, 8))
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, BASE_REX, sink);
+                    disp1(destination, func, sink);
+                "#,
+            ),
+    );
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("jmpd", f_jump, 4)
+            .branch_range((4, 32))
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, BASE_REX, sink);
+                    disp4(destination, func, sink);
+                "#,
+            ),
+    );
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("brib", f_branch_int, 1)
+            .operands_in(vec![reg_rflags])
+            .branch_range((1, 8))
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits | icc2opc(cond), BASE_REX, sink);
+                    disp1(destination, func, sink);
+                "#,
+            ),
+    );
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("brid", f_branch_int, 4)
+            .operands_in(vec![reg_rflags])
+            .branch_range((4, 32))
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits | icc2opc(cond), BASE_REX, sink);
+                    disp4(destination, func, sink);
+                "#,
+            ),
+    );
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("brfb", f_branch_float, 1)
+            .operands_in(vec![reg_rflags])
+            .branch_range((1, 8))
+            .clobbers_flags(false)
+            .inst_predicate(supported_floatccs_predicate(
+                &supported_floatccs,
+                formats.get(f_branch_float),
+            ))
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits | fcc2opc(cond), BASE_REX, sink);
+                    disp1(destination, func, sink);
+                "#,
+            ),
+    );
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("brfd", f_branch_float, 4)
+            .operands_in(vec![reg_rflags])
+            .branch_range((4, 32))
+            .clobbers_flags(false)
+            .inst_predicate(supported_floatccs_predicate(
+                &supported_floatccs,
+                formats.get(f_branch_float),
+            ))
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits | fcc2opc(cond), BASE_REX, sink);
+                    disp4(destination, func, sink);
+                "#,
+            ),
+    );
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("indirect_jmp", f_indirect_jump, 1)
+            .operands_in(vec![gpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex1(in_reg0), sink);
+                    modrm_r_bits(in_reg0, bits, sink);
+                "#,
+            ),
+    );
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("jt_entry", f_branch_table_entry, 2)
+            .operands_in(vec![gpr, gpr])
+            .operands_out(vec![gpr])
+            .clobbers_flags(false)
+            .inst_predicate(valid_scale(formats.get(f_branch_table_entry)))
+            .compute_size("size_plus_maybe_offset_for_in_reg_1")
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex3(in_reg1, out_reg0, in_reg0), sink);
+                    if needs_offset(in_reg1) {
+                        modrm_sib_disp8(out_reg0, sink);
+                        sib(imm.trailing_zeros() as u8, in_reg0, in_reg1, sink);
+                        sink.put1(0);
+                    } else {
+                        modrm_sib(out_reg0, sink);
+                        sib(imm.trailing_zeros() as u8, in_reg0, in_reg1, sink);
+                    }
+                "#,
+            ),
+    );
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("jt_base", f_branch_table_base, 5)
+            .operands_out(vec![gpr])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(0, out_reg0), sink);
+                    modrm_riprel(out_reg0, sink);
+
+                    // No reloc is needed here as the jump table is emitted directly after
+                    // the function body.
+                    jt_disp4(table, func, sink);
+                "#,
+            ),
+    );
+
+    // Test flags and set a register.
+    //
+    // These setCC instructions only set the low 8 bits, and they can only write ABCD registers
+    // without a REX prefix.
+    //
+    // Other instruction encodings accepting `b1` inputs have the same constraints and only look at
+    // the low 8 bits of the input register.
+
+    let seti = recipes.add_template(
+        Template::new(
+            EncodingRecipeBuilder::new("seti", f_int_cond, 1)
+                .operands_in(vec![reg_rflags])
+                .operands_out(vec![gpr])
+                .clobbers_flags(false)
+                .emit(
+                    r#"
+                    {{PUT_OP}}(bits | icc2opc(cond), rex1(out_reg0), sink);
+                    modrm_r_bits(out_reg0, bits, sink);
+                "#,
+                ),
+            formats,
+            regs,
+        )
+        .requires_prefix(true),
+    );
+
+    recipes.add_template(
+        Template::new(
+            EncodingRecipeBuilder::new("seti_abcd", f_int_cond, 1)
+                .operands_in(vec![reg_rflags])
+                .operands_out(vec![abcd])
+                .clobbers_flags(false)
+                .emit(
+                    r#"
+                    {{PUT_OP}}(bits | icc2opc(cond), rex1(out_reg0), sink);
+                    modrm_r_bits(out_reg0, bits, sink);
+                "#,
+                ),
+            formats,
+            regs,
+        )
+        .when_prefixed(seti),
+    );
+
+    let setf = recipes.add_template(
+        Template::new(
+            EncodingRecipeBuilder::new("setf", f_float_cond, 1)
+                .operands_in(vec![reg_rflags])
+                .operands_out(vec![gpr])
+                .clobbers_flags(false)
+                .emit(
+                    r#"
+                    {{PUT_OP}}(bits | fcc2opc(cond), rex1(out_reg0), sink);
+                    modrm_r_bits(out_reg0, bits, sink);
+                "#,
+                ),
+            formats,
+            regs,
+        )
+        .requires_prefix(true),
+    );
+
+    recipes.add_template(
+        Template::new(
+            EncodingRecipeBuilder::new("setf_abcd", f_float_cond, 1)
+                .operands_in(vec![reg_rflags])
+                .operands_out(vec![abcd])
+                .clobbers_flags(false)
+                .emit(
+                    r#"
+                    {{PUT_OP}}(bits | fcc2opc(cond), rex1(out_reg0), sink);
+                    modrm_r_bits(out_reg0, bits, sink);
+                "#,
+                ),
+            formats,
+            regs,
+        )
+        .when_prefixed(setf),
+    );
+
+    // Conditional move (a.k.a integer select)
+    // (maybe-REX.W) 0F 4x modrm(r,r)
+    // 1 byte, modrm(r,r), is after the opcode
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("cmov", f_int_select, 1)
+            .operands_in(vec![
+                OperandConstraint::FixedReg(reg_rflags),
+                OperandConstraint::RegClass(gpr),
+                OperandConstraint::RegClass(gpr),
+            ])
+            .operands_out(vec![2])
+            .clobbers_flags(false)
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits | icc2opc(cond), rex2(in_reg1, in_reg2), sink);
+                    modrm_rr(in_reg1, in_reg2, sink);
+                "#,
+            ),
+    );
+
+    // Bit scan forwards and reverse
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("bsf_and_bsr", f_unary, 1)
+            .operands_in(vec![gpr])
+            .operands_out(vec![
+                OperandConstraint::RegClass(gpr),
+                OperandConstraint::FixedReg(reg_rflags),
+            ])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
+                    modrm_rr(in_reg0, out_reg0, sink);
+                "#,
+            ),
+    );
+
+    // Compare and set flags.
+
+    // XX /r, MR form. Compare two GPR registers and set flags.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("rcmp", f_binary, 1)
+            .operands_in(vec![gpr, gpr])
+            .operands_out(vec![reg_rflags])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink);
+                    modrm_rr(in_reg0, in_reg1, sink);
+                "#,
+            ),
+    );
+
+    // Same as rcmp, but second operand is the stack pointer.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("rcmp_sp", f_unary, 1)
+            .operands_in(vec![gpr])
+            .operands_out(vec![reg_rflags])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(in_reg0, RU::rsp.into()), sink);
+                    modrm_rr(in_reg0, RU::rsp.into(), sink);
+                "#,
+            ),
+    );
+
+    // XX /r, RM form. Compare two FPR registers and set flags.
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("fcmp", f_binary, 1)
+            .operands_in(vec![fpr, fpr])
+            .operands_out(vec![reg_rflags])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+                    modrm_rr(in_reg1, in_reg0, sink);
+                "#,
+            ),
+    );
+
+    {
+        let format = formats.get(f_binary_imm);
+
+        let has_small_offset = InstructionPredicate::new_is_signed_int(format, "imm", 8, 0);
+
+        // XX /n, MI form with imm8.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("rcmp_ib", f_binary_imm, 2)
+                .operands_in(vec![gpr])
+                .operands_out(vec![reg_rflags])
+                .inst_predicate(has_small_offset)
+                .emit(
+                    r#"
+                        {{PUT_OP}}(bits, rex1(in_reg0), sink);
+                        modrm_r_bits(in_reg0, bits, sink);
+                        let imm: i64 = imm.into();
+                        sink.put1(imm as u8);
+                    "#,
+                ),
+        );
+
+        let has_big_offset = InstructionPredicate::new_is_signed_int(format, "imm", 32, 0);
+
+        // XX /n, MI form with imm32.
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("rcmp_id", f_binary_imm, 5)
+                .operands_in(vec![gpr])
+                .operands_out(vec![reg_rflags])
+                .inst_predicate(has_big_offset)
+                .emit(
+                    r#"
+                        {{PUT_OP}}(bits, rex1(in_reg0), sink);
+                        modrm_r_bits(in_reg0, bits, sink);
+                        let imm: i64 = imm.into();
+                        sink.put4(imm as u32);
+                    "#,
+                ),
+        );
+    }
+
+    // Test-and-branch.
+    //
+    // This recipe represents the macro fusion of a test and a conditional branch.
+    // This serves two purposes:
+    //
+    // 1. Guarantee that the test and branch get scheduled next to each other so
+    //    macro fusion is guaranteed to be possible.
+    // 2. Hide the status flags from Cranelift which doesn't currently model flags.
+    //
+    // The encoding bits affect both the test and the branch instruction:
+    //
+    // Bits 0-7 are the Jcc opcode.
+    // Bits 8-15 control the test instruction which always has opcode byte 0x85.
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("tjccb", f_branch, 1 + 2)
+            .operands_in(vec![gpr])
+            .branch_range((3, 8))
+            .emit(
+                r#"
+                    // test r, r.
+                    {{PUT_OP}}((bits & 0xff00) | 0x85, rex2(in_reg0, in_reg0), sink);
+                    modrm_rr(in_reg0, in_reg0, sink);
+                    // Jcc instruction.
+                    sink.put1(bits as u8);
+                    disp1(destination, func, sink);
+                "#,
+            ),
+    );
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("tjccd", f_branch, 1 + 6)
+            .operands_in(vec![gpr])
+            .branch_range((7, 32))
+            .emit(
+                r#"
+                    // test r, r.
+                    {{PUT_OP}}((bits & 0xff00) | 0x85, rex2(in_reg0, in_reg0), sink);
+                    modrm_rr(in_reg0, in_reg0, sink);
+                    // Jcc instruction.
+                    sink.put1(0x0f);
+                    sink.put1(bits as u8);
+                    disp4(destination, func, sink);
+                "#,
+            ),
+    );
+
+    // 8-bit test-and-branch.
+
+    let t8jccb = recipes.add_template(
+        Template::new(
+            EncodingRecipeBuilder::new("t8jccb", f_branch, 1 + 2)
+                .operands_in(vec![gpr])
+                .branch_range((3, 8))
+                .emit(
+                    r#"
+                    // test8 r, r.
+                    {{PUT_OP}}((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink);
+                    modrm_rr(in_reg0, in_reg0, sink);
+                    // Jcc instruction.
+                    sink.put1(bits as u8);
+                    disp1(destination, func, sink);
+                "#,
+                ),
+            formats,
+            regs,
+        )
+        .requires_prefix(true),
+    );
+
+    recipes.add_template(
+        Template::new(
+            EncodingRecipeBuilder::new("t8jccb_abcd", f_branch, 1 + 2)
+                .operands_in(vec![abcd])
+                .branch_range((3, 8))
+                .emit(
+                    r#"
+                    // test8 r, r.
+                    {{PUT_OP}}((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink);
+                    modrm_rr(in_reg0, in_reg0, sink);
+                    // Jcc instruction.
+                    sink.put1(bits as u8);
+                    disp1(destination, func, sink);
+                "#,
+                ),
+            formats,
+            regs,
+        )
+        .when_prefixed(t8jccb),
+    );
+
+    let t8jccd = recipes.add_template(
+        Template::new(
+            EncodingRecipeBuilder::new("t8jccd", f_branch, 1 + 6)
+                .operands_in(vec![gpr])
+                .branch_range((7, 32))
+                .emit(
+                    r#"
+                    // test8 r, r.
+                    {{PUT_OP}}((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink);
+                    modrm_rr(in_reg0, in_reg0, sink);
+                    // Jcc instruction.
+                    sink.put1(0x0f);
+                    sink.put1(bits as u8);
+                    disp4(destination, func, sink);
+                "#,
+                ),
+            formats,
+            regs,
+        )
+        .requires_prefix(true),
+    );
+
+    recipes.add_template(
+        Template::new(
+            EncodingRecipeBuilder::new("t8jccd_abcd", f_branch, 1 + 6)
+                .operands_in(vec![abcd])
+                .branch_range((7, 32))
+                .emit(
+                    r#"
+                    // test8 r, r.
+                    {{PUT_OP}}((bits & 0xff00) | 0x84, rex2(in_reg0, in_reg0), sink);
+                    modrm_rr(in_reg0, in_reg0, sink);
+                    // Jcc instruction.
+                    sink.put1(0x0f);
+                    sink.put1(bits as u8);
+                    disp4(destination, func, sink);
+                "#,
+                ),
+            formats,
+            regs,
+        )
+        .when_prefixed(t8jccd),
+    );
+
+    // Worst case test-and-branch recipe for brz.b1 and brnz.b1 in 32-bit mode.
+    // The register allocator can't handle a branch instruction with constrained
+    // operands like the t8jccd_abcd above. This variant can accept the b1 opernd in
+    // any register, but is is larger because it uses a 32-bit test instruction with
+    // a 0xff immediate.
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("t8jccd_long", f_branch, 5 + 6)
+            .operands_in(vec![gpr])
+            .branch_range((11, 32))
+            .emit(
+                r#"
+                    // test32 r, 0xff.
+                    {{PUT_OP}}((bits & 0xff00) | 0xf7, rex1(in_reg0), sink);
+                    modrm_r_bits(in_reg0, bits, sink);
+                    sink.put4(0xff);
+                    // Jcc instruction.
+                    sink.put1(0x0f);
+                    sink.put1(bits as u8);
+                    disp4(destination, func, sink);
+                "#,
+            ),
+    );
+
+    // Comparison that produces a `b1` result in a GPR.
+    //
+    // This is a macro of a `cmp` instruction followed by a `setCC` instruction.
+    //
+    // TODO This is not a great solution because:
+    //
+    // - The cmp+setcc combination is not recognized by CPU's macro fusion.
+    // - The 64-bit encoding has issues with REX prefixes. The `cmp` and `setCC`
+    //   instructions may need a REX independently.
+    // - Modeling CPU flags in the type system would be better.
+    //
+    // Since the `setCC` instructions only write an 8-bit register, we use that as
+    // our `b1` representation: A `b1` value is represented as a GPR where the low 8
+    // bits are known to be 0 or 1. The high bits are undefined.
+    //
+    // This bandaid macro doesn't support a REX prefix for the final `setCC`
+    // instruction, so it is limited to the `ABCD` register class for booleans.
+    // The omission of a `when_prefixed` alternative is deliberate here.
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("icscc", f_int_compare, 1 + 3)
+            .operands_in(vec![gpr, gpr])
+            .operands_out(vec![abcd])
+            .emit(
+                r#"
+                    // Comparison instruction.
+                    {{PUT_OP}}(bits, rex2(in_reg0, in_reg1), sink);
+                    modrm_rr(in_reg0, in_reg1, sink);
+                    // `setCC` instruction, no REX.
+                    use crate::ir::condcodes::IntCC::*;
+                    let setcc = match cond {
+                        Equal => 0x94,
+                        NotEqual => 0x95,
+                        SignedLessThan => 0x9c,
+                        SignedGreaterThanOrEqual => 0x9d,
+                        SignedGreaterThan => 0x9f,
+                        SignedLessThanOrEqual => 0x9e,
+                        UnsignedLessThan => 0x92,
+                        UnsignedGreaterThanOrEqual => 0x93,
+                        UnsignedGreaterThan => 0x97,
+                        UnsignedLessThanOrEqual => 0x96,
+                    };
+                    sink.put1(0x0f);
+                    sink.put1(setcc);
+                    modrm_rr(out_reg0, 0, sink);
+                "#,
+            ),
+    );
+
+    {
+        let format = formats.get(f_int_compare_imm);
+
+        let is_small_imm = InstructionPredicate::new_is_signed_int(format, "imm", 8, 0);
+
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("icscc_ib", f_int_compare_imm, 2 + 3)
+                .operands_in(vec![gpr])
+                .operands_out(vec![abcd])
+                .inst_predicate(is_small_imm)
+                .emit(
+                    r#"
+                        // Comparison instruction.
+                        {{PUT_OP}}(bits, rex1(in_reg0), sink);
+                        modrm_r_bits(in_reg0, bits, sink);
+                        let imm: i64 = imm.into();
+                        sink.put1(imm as u8);
+                        // `setCC` instruction, no REX.
+                        use crate::ir::condcodes::IntCC::*;
+                        let setcc = match cond {
+                            Equal => 0x94,
+                            NotEqual => 0x95,
+                            SignedLessThan => 0x9c,
+                            SignedGreaterThanOrEqual => 0x9d,
+                            SignedGreaterThan => 0x9f,
+                            SignedLessThanOrEqual => 0x9e,
+                            UnsignedLessThan => 0x92,
+                            UnsignedGreaterThanOrEqual => 0x93,
+                            UnsignedGreaterThan => 0x97,
+                            UnsignedLessThanOrEqual => 0x96,
+                        };
+                        sink.put1(0x0f);
+                        sink.put1(setcc);
+                        modrm_rr(out_reg0, 0, sink);
+                    "#,
+                ),
+        );
+
+        let is_big_imm = InstructionPredicate::new_is_signed_int(format, "imm", 32, 0);
+
+        recipes.add_template_recipe(
+            EncodingRecipeBuilder::new("icscc_id", f_int_compare_imm, 5 + 3)
+                .operands_in(vec![gpr])
+                .operands_out(vec![abcd])
+                .inst_predicate(is_big_imm)
+                .emit(
+                    r#"
+                        // Comparison instruction.
+                        {{PUT_OP}}(bits, rex1(in_reg0), sink);
+                        modrm_r_bits(in_reg0, bits, sink);
+                        let imm: i64 = imm.into();
+                        sink.put4(imm as u32);
+                        // `setCC` instruction, no REX.
+                        use crate::ir::condcodes::IntCC::*;
+                        let setcc = match cond {
+                            Equal => 0x94,
+                            NotEqual => 0x95,
+                            SignedLessThan => 0x9c,
+                            SignedGreaterThanOrEqual => 0x9d,
+                            SignedGreaterThan => 0x9f,
+                            SignedLessThanOrEqual => 0x9e,
+                            UnsignedLessThan => 0x92,
+                            UnsignedGreaterThanOrEqual => 0x93,
+                            UnsignedGreaterThan => 0x97,
+                            UnsignedLessThanOrEqual => 0x96,
+                        };
+                        sink.put1(0x0f);
+                        sink.put1(setcc);
+                        modrm_rr(out_reg0, 0, sink);
+                    "#,
+                ),
+        );
+    }
+
+    // Make a FloatCompare instruction predicate with the supported condition codes.
+    //
+    // Same thing for floating point.
+    //
+    // The ucomiss/ucomisd instructions set the FLAGS bits CF/PF/CF like this:
+    //
+    //    ZPC OSA
+    // UN 111 000
+    // GT 000 000
+    // LT 001 000
+    // EQ 100 000
+    //
+    // Not all floating point condition codes are supported.
+    // The omission of a `when_prefixed` alternative is deliberate here.
+
+    recipes.add_template_recipe(
+        EncodingRecipeBuilder::new("fcscc", f_float_compare, 1 + 3)
+            .operands_in(vec![fpr, fpr])
+            .operands_out(vec![abcd])
+            .inst_predicate(supported_floatccs_predicate(
+                &supported_floatccs,
+                formats.get(f_float_compare),
+            ))
+            .emit(
+                r#"
+                    // Comparison instruction.
+                    {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
+                    modrm_rr(in_reg1, in_reg0, sink);
+                    // `setCC` instruction, no REX.
+                    use crate::ir::condcodes::FloatCC::*;
+                    let setcc = match cond {
+                        Ordered                    => 0x9b, // EQ|LT|GT => setnp (P=0)
+                        Unordered                  => 0x9a, // UN       => setp  (P=1)
+                        OrderedNotEqual            => 0x95, // LT|GT    => setne (Z=0),
+                        UnorderedOrEqual           => 0x94, // UN|EQ    => sete  (Z=1)
+                        GreaterThan                => 0x97, // GT       => seta  (C=0&Z=0)
+                        GreaterThanOrEqual         => 0x93, // GT|EQ    => setae (C=0)
+                        UnorderedOrLessThan        => 0x92, // UN|LT    => setb  (C=1)
+                        UnorderedOrLessThanOrEqual => 0x96, // UN|LT|EQ => setbe (Z=1|C=1)
+                        Equal |                       // EQ
+                        NotEqual |                    // UN|LT|GT
+                        LessThan |                    // LT
+                        LessThanOrEqual |             // LT|EQ
+                        UnorderedOrGreaterThan |      // UN|GT
+                        UnorderedOrGreaterThanOrEqual // UN|GT|EQ
+                        => panic!("{} not supported by fcscc", cond),
+                    };
+                    sink.put1(0x0f);
+                    sink.put1(setcc);
+                    modrm_rr(out_reg0, 0, sink);
+                "#,
+            ),
+    );
+
+    recipes
+}