Remove the old x86 backend

2021-06-18 17:28:55 +02:00
parent e989caf337
commit 9e34df33b9
246 changed files with 76 additions and 28804 deletions
--- a/cranelift/codegen/meta/src/isa/x86/encodings.rs
+++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs
--- a/cranelift/codegen/meta/src/isa/x86/instructions.rs
+++ b/cranelift/codegen/meta/src/isa/x86/instructions.rs
@@ -1,723 +0,0 @@
-#![allow(non_snake_case)]
-
-use crate::cdsl::instructions::{
-    AllInstructions, InstructionBuilder as Inst, InstructionGroup, InstructionGroupBuilder,
-};
-use crate::cdsl::operands::Operand;
-use crate::cdsl::types::ValueType;
-use crate::cdsl::typevar::{Interval, TypeSetBuilder, TypeVar};
-use crate::shared::entities::EntityRefs;
-use crate::shared::formats::Formats;
-use crate::shared::immediates::Immediates;
-use crate::shared::types;
-
-#[allow(clippy::many_single_char_names)]
-pub(crate) fn define(
-    mut all_instructions: &mut AllInstructions,
-    formats: &Formats,
-    immediates: &Immediates,
-    entities: &EntityRefs,
-) -> InstructionGroup {
-    let mut ig = InstructionGroupBuilder::new(&mut all_instructions);
-
-    let iflags: &TypeVar = &ValueType::Special(types::Flag::IFlags.into()).into();
-
-    let iWord = &TypeVar::new(
-        "iWord",
-        "A scalar integer machine word",
-        TypeSetBuilder::new().ints(32..64).build(),
-    );
-    let nlo = &Operand::new("nlo", iWord).with_doc("Low part of numerator");
-    let nhi = &Operand::new("nhi", iWord).with_doc("High part of numerator");
-    let d = &Operand::new("d", iWord).with_doc("Denominator");
-    let q = &Operand::new("q", iWord).with_doc("Quotient");
-    let r = &Operand::new("r", iWord).with_doc("Remainder");
-
-    ig.push(
-        Inst::new(
-            "x86_udivmodx",
-            r#"
-        Extended unsigned division.
-
-        Concatenate the bits in `nhi` and `nlo` to form the numerator.
-        Interpret the bits as an unsigned number and divide by the unsigned
-        denominator `d`. Trap when `d` is zero or if the quotient is larger
-        than the range of the output.
-
-        Return both quotient and remainder.
-        "#,
-            &formats.ternary,
-        )
-        .operands_in(vec![nlo, nhi, d])
-        .operands_out(vec![q, r])
-        .can_trap(true),
-    );
-
-    ig.push(
-        Inst::new(
-            "x86_sdivmodx",
-            r#"
-        Extended signed division.
-
-        Concatenate the bits in `nhi` and `nlo` to form the numerator.
-        Interpret the bits as a signed number and divide by the signed
-        denominator `d`. Trap when `d` is zero or if the quotient is outside
-        the range of the output.
-
-        Return both quotient and remainder.
-        "#,
-            &formats.ternary,
-        )
-        .operands_in(vec![nlo, nhi, d])
-        .operands_out(vec![q, r])
-        .can_trap(true),
-    );
-
-    let argL = &Operand::new("argL", iWord);
-    let argR = &Operand::new("argR", iWord);
-    let resLo = &Operand::new("resLo", iWord);
-    let resHi = &Operand::new("resHi", iWord);
-
-    ig.push(
-        Inst::new(
-            "x86_umulx",
-            r#"
-        Unsigned integer multiplication, producing a double-length result.
-
-        Polymorphic over all scalar integer types, but does not support vector
-        types.
-        "#,
-            &formats.binary,
-        )
-        .operands_in(vec![argL, argR])
-        .operands_out(vec![resLo, resHi]),
-    );
-
-    ig.push(
-        Inst::new(
-            "x86_smulx",
-            r#"
-        Signed integer multiplication, producing a double-length result.
-
-        Polymorphic over all scalar integer types, but does not support vector
-        types.
-        "#,
-            &formats.binary,
-        )
-        .operands_in(vec![argL, argR])
-        .operands_out(vec![resLo, resHi]),
-    );
-
-    let Float = &TypeVar::new(
-        "Float",
-        "A scalar or vector floating point number",
-        TypeSetBuilder::new()
-            .floats(Interval::All)
-            .simd_lanes(Interval::All)
-            .build(),
-    );
-    let IntTo = &TypeVar::new(
-        "IntTo",
-        "An integer type with the same number of lanes",
-        TypeSetBuilder::new()
-            .ints(32..64)
-            .simd_lanes(Interval::All)
-            .build(),
-    );
-    let x = &Operand::new("x", Float);
-    let a = &Operand::new("a", IntTo);
-
-    ig.push(
-        Inst::new(
-            "x86_cvtt2si",
-            r#"
-        Convert with truncation floating point to signed integer.
-
-        The source floating point operand is converted to a signed integer by
-        rounding towards zero. If the result can't be represented in the output
-        type, returns the smallest signed value the output type can represent.
-
-        This instruction does not trap.
-        "#,
-            &formats.unary,
-        )
-        .operands_in(vec![x])
-        .operands_out(vec![a]),
-    );
-
-    let f32x4 = &TypeVar::new(
-        "f32x4",
-        "A floating point number",
-        TypeSetBuilder::new()
-            .floats(32..32)
-            .simd_lanes(4..4)
-            .build(),
-    );
-    let i32x4 = &TypeVar::new(
-        "i32x4",
-        "An integer type with the same number of lanes",
-        TypeSetBuilder::new().ints(32..32).simd_lanes(4..4).build(),
-    );
-    let x = &Operand::new("x", i32x4);
-    let a = &Operand::new("a", f32x4);
-
-    ig.push(
-        Inst::new(
-            "x86_vcvtudq2ps",
-            r#"
-        Convert unsigned integer to floating point.
-
-        Convert packed doubleword unsigned integers to packed single-precision floating-point 
-        values. This instruction does not trap.
-        "#,
-            &formats.unary,
-        )
-        .operands_in(vec![x])
-        .operands_out(vec![a]),
-    );
-
-    let x = &Operand::new("x", Float);
-    let a = &Operand::new("a", Float);
-    let y = &Operand::new("y", Float);
-
-    ig.push(
-        Inst::new(
-            "x86_fmin",
-            r#"
-        Floating point minimum with x86 semantics.
-
-        This is equivalent to the C ternary operator `x < y ? x : y` which
-        differs from `fmin` when either operand is NaN or when comparing
-        +0.0 to -0.0.
-
-        When the two operands don't compare as LT, `y` is returned unchanged,
-        even if it is a signalling NaN.
-        "#,
-            &formats.binary,
-        )
-        .operands_in(vec![x, y])
-        .operands_out(vec![a]),
-    );
-
-    ig.push(
-        Inst::new(
-            "x86_fmax",
-            r#"
-        Floating point maximum with x86 semantics.
-
-        This is equivalent to the C ternary operator `x > y ? x : y` which
-        differs from `fmax` when either operand is NaN or when comparing
-        +0.0 to -0.0.
-
-        When the two operands don't compare as GT, `y` is returned unchanged,
-        even if it is a signalling NaN.
-        "#,
-            &formats.binary,
-        )
-        .operands_in(vec![x, y])
-        .operands_out(vec![a]),
-    );
-
-    let x = &Operand::new("x", iWord);
-
-    ig.push(
-        Inst::new(
-            "x86_push",
-            r#"
-    Pushes a value onto the stack.
-
-    Decrements the stack pointer and stores the specified value on to the top.
-
-    This is polymorphic in i32 and i64. However, it is only implemented for i64
-    in 64-bit mode, and only for i32 in 32-bit mode.
-    "#,
-            &formats.unary,
-        )
-        .operands_in(vec![x])
-        .other_side_effects(true)
-        .can_store(true),
-    );
-
-    ig.push(
-        Inst::new(
-            "x86_pop",
-            r#"
-    Pops a value from the stack.
-
-    Loads a value from the top of the stack and then increments the stack
-    pointer.
-
-    This is polymorphic in i32 and i64. However, it is only implemented for i64
-    in 64-bit mode, and only for i32 in 32-bit mode.
-    "#,
-            &formats.nullary,
-        )
-        .operands_out(vec![x])
-        .other_side_effects(true)
-        .can_load(true),
-    );
-
-    let y = &Operand::new("y", iWord);
-    let rflags = &Operand::new("rflags", iflags);
-
-    ig.push(
-        Inst::new(
-            "x86_bsr",
-            r#"
-    Bit Scan Reverse -- returns the bit-index of the most significant 1
-    in the word. Result is undefined if the argument is zero. However, it
-    sets the Z flag depending on the argument, so it is at least easy to
-    detect and handle that case.
-
-    This is polymorphic in i32 and i64. It is implemented for both i64 and
-    i32 in 64-bit mode, and only for i32 in 32-bit mode.
-    "#,
-            &formats.unary,
-        )
-        .operands_in(vec![x])
-        .operands_out(vec![y, rflags]),
-    );
-
-    ig.push(
-        Inst::new(
-            "x86_bsf",
-            r#"
-    Bit Scan Forwards -- returns the bit-index of the least significant 1
-    in the word. Is otherwise identical to 'bsr', just above.
-    "#,
-            &formats.unary,
-        )
-        .operands_in(vec![x])
-        .operands_out(vec![y, rflags]),
-    );
-
-    let uimm8 = &immediates.uimm8;
-    let TxN = &TypeVar::new(
-        "TxN",
-        "A SIMD vector type",
-        TypeSetBuilder::new()
-            .ints(Interval::All)
-            .floats(Interval::All)
-            .bools(Interval::All)
-            .simd_lanes(Interval::All)
-            .includes_scalars(false)
-            .build(),
-    );
-    let a = &Operand::new("a", TxN).with_doc("A vector value (i.e. held in an XMM register)");
-    let b = &Operand::new("b", TxN).with_doc("A vector value (i.e. held in an XMM register)");
-    let i = &Operand::new("i", uimm8).with_doc("An ordering operand controlling the copying of data from the source to the destination; see PSHUFD in Intel manual for details");
-
-    ig.push(
-        Inst::new(
-            "x86_pshufd",
-            r#"
-    Packed Shuffle Doublewords -- copies data from either memory or lanes in an extended
-    register and re-orders the data according to the passed immediate byte.
-    "#,
-            &formats.binary_imm8,
-        )
-        .operands_in(vec![a, i]) // TODO allow copying from memory here (need more permissive type than TxN)
-        .operands_out(vec![a]),
-    );
-
-    ig.push(
-        Inst::new(
-            "x86_pshufb",
-            r#"
-    Packed Shuffle Bytes -- re-orders data in an extended register using a shuffle
-    mask from either memory or another extended register
-    "#,
-            &formats.binary,
-        )
-        .operands_in(vec![a, b]) // TODO allow re-ordering from memory here (need more permissive type than TxN)
-        .operands_out(vec![a]),
-    );
-
-    let mask = &Operand::new("mask", uimm8).with_doc("mask to select lanes from b");
-    ig.push(
-        Inst::new(
-            "x86_pblendw",
-            r#"
-    Blend packed words using an immediate mask. Each bit of the 8-bit immediate corresponds to a 
-    lane in ``b``: if the bit is set, the lane is copied into ``a``.
-    "#,
-            &formats.ternary_imm8,
-        )
-        .operands_in(vec![a, b, mask])
-        .operands_out(vec![a]),
-    );
-
-    let Idx = &Operand::new("Idx", uimm8).with_doc("Lane index");
-    let x = &Operand::new("x", TxN);
-    let a = &Operand::new("a", &TxN.lane_of());
-
-    ig.push(
-        Inst::new(
-            "x86_pextr",
-            r#"
-        Extract lane ``Idx`` from ``x``.
-        The lane index, ``Idx``, is an immediate value, not an SSA value. It
-        must indicate a valid lane index for the type of ``x``.
-        "#,
-            &formats.binary_imm8,
-        )
-        .operands_in(vec![x, Idx])
-        .operands_out(vec![a]),
-    );
-
-    let IBxN = &TypeVar::new(
-        "IBxN",
-        "A SIMD vector type containing only booleans and integers",
-        TypeSetBuilder::new()
-            .ints(Interval::All)
-            .bools(Interval::All)
-            .simd_lanes(Interval::All)
-            .includes_scalars(false)
-            .build(),
-    );
-    let x = &Operand::new("x", IBxN);
-    let y = &Operand::new("y", &IBxN.lane_of()).with_doc("New lane value");
-    let a = &Operand::new("a", IBxN);
-
-    ig.push(
-        Inst::new(
-            "x86_pinsr",
-            r#"
-        Insert ``y`` into ``x`` at lane ``Idx``.
-        The lane index, ``Idx``, is an immediate value, not an SSA value. It
-        must indicate a valid lane index for the type of ``x``.
-        "#,
-            &formats.ternary_imm8,
-        )
-        .operands_in(vec![x, y, Idx])
-        .operands_out(vec![a]),
-    );
-
-    let FxN = &TypeVar::new(
-        "FxN",
-        "A SIMD vector type containing floats",
-        TypeSetBuilder::new()
-            .floats(Interval::All)
-            .simd_lanes(Interval::All)
-            .includes_scalars(false)
-            .build(),
-    );
-    let x = &Operand::new("x", FxN);
-    let y = &Operand::new("y", &FxN.lane_of()).with_doc("New lane value");
-    let a = &Operand::new("a", FxN);
-
-    ig.push(
-        Inst::new(
-            "x86_insertps",
-            r#"
-        Insert a lane of ``y`` into ``x`` at using ``Idx`` to encode both which lane the value is
-        extracted from and which it is inserted to. This is similar to x86_pinsr but inserts
-        floats, which are already stored in an XMM register.
-        "#,
-            &formats.ternary_imm8,
-        )
-        .operands_in(vec![x, y, Idx])
-        .operands_out(vec![a]),
-    );
-
-    let x = &Operand::new("x", TxN);
-    let y = &Operand::new("y", TxN);
-    let a = &Operand::new("a", TxN);
-
-    ig.push(
-        Inst::new(
-            "x86_punpckh",
-            r#"
-        Unpack the high-order lanes of ``x`` and ``y`` and interleave into ``a``. With notional
-        i8x4 vectors, where ``x = [x3, x2, x1, x0]`` and ``y = [y3, y2, y1, y0]``, this operation
-        would result in ``a = [y3, x3, y2, x2]`` (using the Intel manual's right-to-left lane
-        ordering). 
-        "#,
-            &formats.binary,
-        )
-        .operands_in(vec![x, y])
-        .operands_out(vec![a]),
-    );
-
-    ig.push(
-        Inst::new(
-            "x86_punpckl",
-            r#"
-        Unpack the low-order lanes of ``x`` and ``y`` and interleave into ``a``. With notional
-        i8x4 vectors, where ``x = [x3, x2, x1, x0]`` and ``y = [y3, y2, y1, y0]``, this operation
-        would result in ``a = [y1, x1, y0, x0]`` (using the Intel manual's right-to-left lane
-        ordering).
-        "#,
-            &formats.binary,
-        )
-        .operands_in(vec![x, y])
-        .operands_out(vec![a]),
-    );
-
-    let x = &Operand::new("x", FxN);
-    let y = &Operand::new("y", FxN);
-    let a = &Operand::new("a", FxN);
-
-    ig.push(
-        Inst::new(
-            "x86_movsd",
-            r#"
-        Move the low 64 bits of the float vector ``y`` to the low 64 bits of float vector ``x``
-        "#,
-            &formats.binary,
-        )
-        .operands_in(vec![x, y])
-        .operands_out(vec![a]),
-    );
-
-    ig.push(
-        Inst::new(
-            "x86_movlhps",
-            r#"
-        Move the low 64 bits of the float vector ``y`` to the high 64 bits of float vector ``x``
-        "#,
-            &formats.binary,
-        )
-        .operands_in(vec![x, y])
-        .operands_out(vec![a]),
-    );
-
-    let IxN = &TypeVar::new(
-        "IxN",
-        "A SIMD vector type containing integers",
-        TypeSetBuilder::new()
-            .ints(Interval::All)
-            .simd_lanes(Interval::All)
-            .includes_scalars(false)
-            .build(),
-    );
-    let I128 = &TypeVar::new(
-        "I128",
-        "A SIMD vector type containing one large integer (due to Cranelift type constraints, \
-        this uses the Cranelift I64X2 type but should be understood as one large value, i.e., the \
-        upper lane is concatenated with the lower lane to form the integer)",
-        TypeSetBuilder::new()
-            .ints(64..64)
-            .simd_lanes(2..2)
-            .includes_scalars(false)
-            .build(),
-    );
-
-    let x = &Operand::new("x", IxN).with_doc("Vector value to shift");
-    let y = &Operand::new("y", I128).with_doc("Number of bits to shift");
-    let a = &Operand::new("a", IxN);
-
-    ig.push(
-        Inst::new(
-            "x86_psll",
-            r#"
-        Shift Packed Data Left Logical -- This implements the behavior of the shared instruction
-        ``ishl`` but alters the shift operand to live in an XMM register as expected by the PSLL*
-        family of instructions.
-        "#,
-            &formats.binary,
-        )
-        .operands_in(vec![x, y])
-        .operands_out(vec![a]),
-    );
-
-    ig.push(
-        Inst::new(
-            "x86_psrl",
-            r#"
-        Shift Packed Data Right Logical -- This implements the behavior of the shared instruction
-        ``ushr`` but alters the shift operand to live in an XMM register as expected by the PSRL*
-        family of instructions.
-        "#,
-            &formats.binary,
-        )
-        .operands_in(vec![x, y])
-        .operands_out(vec![a]),
-    );
-
-    ig.push(
-        Inst::new(
-            "x86_psra",
-            r#"
-        Shift Packed Data Right Arithmetic -- This implements the behavior of the shared
-        instruction ``sshr`` but alters the shift operand to live in an XMM register as expected by
-        the PSRA* family of instructions.
-        "#,
-            &formats.binary,
-        )
-        .operands_in(vec![x, y])
-        .operands_out(vec![a]),
-    );
-
-    let I64x2 = &TypeVar::new(
-        "I64x2",
-        "A SIMD vector type containing two 64-bit integers",
-        TypeSetBuilder::new()
-            .ints(64..64)
-            .simd_lanes(2..2)
-            .includes_scalars(false)
-            .build(),
-    );
-
-    let x = &Operand::new("x", I64x2);
-    let y = &Operand::new("y", I64x2);
-    let a = &Operand::new("a", I64x2);
-    ig.push(
-        Inst::new(
-            "x86_pmullq",
-            r#"
-        Multiply Packed Integers -- Multiply two 64x2 integers and receive a 64x2 result with
-        lane-wise wrapping if the result overflows. This instruction is necessary to add distinct
-        encodings for CPUs with newer vector features.
-        "#,
-            &formats.binary,
-        )
-        .operands_in(vec![x, y])
-        .operands_out(vec![a]),
-    );
-
-    ig.push(
-        Inst::new(
-            "x86_pmuludq",
-            r#"
-        Multiply Packed Integers -- Using only the bottom 32 bits in each lane, multiply two 64x2
-        unsigned integers and receive a 64x2 result. This instruction avoids the need for handling
-        overflow as in `x86_pmullq`.
-        "#,
-            &formats.binary,
-        )
-        .operands_in(vec![x, y])
-        .operands_out(vec![a]),
-    );
-
-    let x = &Operand::new("x", TxN);
-    let y = &Operand::new("y", TxN);
-    let f = &Operand::new("f", iflags);
-    ig.push(
-        Inst::new(
-            "x86_ptest",
-            r#"
-        Logical Compare -- PTEST will set the ZF flag if all bits in the result are 0 of the
-        bitwise AND of the first source operand (first operand) and the second source operand
-        (second operand). PTEST sets the CF flag if all bits in the result are 0 of the bitwise
-        AND of the second source operand (second operand) and the logical NOT of the destination
-        operand (first operand).
-        "#,
-            &formats.binary,
-        )
-        .operands_in(vec![x, y])
-        .operands_out(vec![f]),
-    );
-
-    let x = &Operand::new("x", IxN);
-    let y = &Operand::new("y", IxN);
-    let a = &Operand::new("a", IxN);
-    ig.push(
-        Inst::new(
-            "x86_pmaxs",
-            r#"
-        Maximum of Packed Signed Integers -- Compare signed integers in the first and second
-        operand and return the maximum values.
-        "#,
-            &formats.binary,
-        )
-        .operands_in(vec![x, y])
-        .operands_out(vec![a]),
-    );
-
-    ig.push(
-        Inst::new(
-            "x86_pmaxu",
-            r#"
-        Maximum of Packed Unsigned Integers -- Compare unsigned integers in the first and second
-        operand and return the maximum values.
-        "#,
-            &formats.binary,
-        )
-        .operands_in(vec![x, y])
-        .operands_out(vec![a]),
-    );
-
-    ig.push(
-        Inst::new(
-            "x86_pmins",
-            r#"
-        Minimum of Packed Signed Integers -- Compare signed integers in the first and second
-        operand and return the minimum values.
-        "#,
-            &formats.binary,
-        )
-        .operands_in(vec![x, y])
-        .operands_out(vec![a]),
-    );
-
-    ig.push(
-        Inst::new(
-            "x86_pminu",
-            r#"
-        Minimum of Packed Unsigned Integers -- Compare unsigned integers in the first and second
-        operand and return the minimum values.
-        "#,
-            &formats.binary,
-        )
-        .operands_in(vec![x, y])
-        .operands_out(vec![a]),
-    );
-
-    let c = &Operand::new("c", uimm8)
-        .with_doc("The number of bytes to shift right; see PALIGNR in Intel manual for details");
-    ig.push(
-        Inst::new(
-            "x86_palignr",
-            r#"
-        Concatenate destination and source operands, extracting a byte-aligned result shifted to 
-        the right by `c`.
-        "#,
-            &formats.ternary_imm8,
-        )
-        .operands_in(vec![x, y, c])
-        .operands_out(vec![a]),
-    );
-
-    let i64_t = &TypeVar::new(
-        "i64_t",
-        "A scalar 64bit integer",
-        TypeSetBuilder::new().ints(64..64).build(),
-    );
-
-    let GV = &Operand::new("GV", &entities.global_value);
-    let addr = &Operand::new("addr", i64_t);
-
-    ig.push(
-        Inst::new(
-            "x86_elf_tls_get_addr",
-            r#"
-        Elf tls get addr -- This implements the GD TLS model for ELF. The clobber output should
-        not be used.
-            "#,
-            &formats.unary_global_value,
-        )
-        // This is a bit overly broad to mark as clobbering *all* the registers, because it should
-        // only preserve caller-saved registers. There's no way to indicate this to register
-        // allocation yet, though, so mark as clobbering all registers instead.
-        .clobbers_all_regs(true)
-        .operands_in(vec![GV])
-        .operands_out(vec![addr]),
-    );
-    ig.push(
-        Inst::new(
-            "x86_macho_tls_get_addr",
-            r#"
-        Mach-O tls get addr -- This implements TLS access for Mach-O. The clobber output should
-        not be used.
-            "#,
-            &formats.unary_global_value,
-        )
-        // See above comment for x86_elf_tls_get_addr.
-        .clobbers_all_regs(true)
-        .operands_in(vec![GV])
-        .operands_out(vec![addr]),
-    );
-
-    ig.build()
-}
--- a/cranelift/codegen/meta/src/isa/x86/legalize.rs
+++ b/cranelift/codegen/meta/src/isa/x86/legalize.rs
@@ -1,827 +0,0 @@
-use crate::cdsl::ast::{constant, var, ExprBuilder, Literal};
-use crate::cdsl::instructions::{vector, Bindable, InstructionGroup};
-use crate::cdsl::types::{LaneType, ValueType};
-use crate::cdsl::xform::TransformGroupBuilder;
-use crate::shared::types::Float::{F32, F64};
-use crate::shared::types::Int::{I16, I32, I64, I8};
-use crate::shared::Definitions as SharedDefinitions;
-
-#[allow(clippy::many_single_char_names)]
-pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &InstructionGroup) {
-    let mut expand = TransformGroupBuilder::new(
-        "x86_expand",
-        r#"
-    Legalize instructions by expansion.
-
-    Use x86-specific instructions if needed."#,
-    )
-    .isa("x86")
-    .chain_with(shared.transform_groups.by_name("expand_flags").id);
-
-    let mut narrow = TransformGroupBuilder::new(
-        "x86_narrow",
-        r#"
-    Legalize instructions by narrowing.
-
-    Use x86-specific instructions if needed."#,
-    )
-    .isa("x86")
-    .chain_with(shared.transform_groups.by_name("narrow_flags").id);
-
-    let mut narrow_avx = TransformGroupBuilder::new(
-        "x86_narrow_avx",
-        r#"
-    Legalize instructions by narrowing with CPU feature checks.
-
-    This special case converts using x86 AVX instructions where available."#,
-    )
-    .isa("x86");
-    // We cannot chain with the x86_narrow group until this group is built, see bottom of this
-    // function for where this is chained.
-
-    let mut widen = TransformGroupBuilder::new(
-        "x86_widen",
-        r#"
-    Legalize instructions by widening.
-
-    Use x86-specific instructions if needed."#,
-    )
-    .isa("x86")
-    .chain_with(shared.transform_groups.by_name("widen").id);
-
-    // List of instructions.
-    let insts = &shared.instructions;
-    let band = insts.by_name("band");
-    let bor = insts.by_name("bor");
-    let clz = insts.by_name("clz");
-    let ctz = insts.by_name("ctz");
-    let fcmp = insts.by_name("fcmp");
-    let fcvt_from_uint = insts.by_name("fcvt_from_uint");
-    let fcvt_to_sint = insts.by_name("fcvt_to_sint");
-    let fcvt_to_uint = insts.by_name("fcvt_to_uint");
-    let fcvt_to_sint_sat = insts.by_name("fcvt_to_sint_sat");
-    let fcvt_to_uint_sat = insts.by_name("fcvt_to_uint_sat");
-    let fmax = insts.by_name("fmax");
-    let fmin = insts.by_name("fmin");
-    let iadd = insts.by_name("iadd");
-    let iconst = insts.by_name("iconst");
-    let imul = insts.by_name("imul");
-    let ineg = insts.by_name("ineg");
-    let isub = insts.by_name("isub");
-    let ishl = insts.by_name("ishl");
-    let ireduce = insts.by_name("ireduce");
-    let popcnt = insts.by_name("popcnt");
-    let sdiv = insts.by_name("sdiv");
-    let selectif = insts.by_name("selectif");
-    let smulhi = insts.by_name("smulhi");
-    let srem = insts.by_name("srem");
-    let tls_value = insts.by_name("tls_value");
-    let udiv = insts.by_name("udiv");
-    let umulhi = insts.by_name("umulhi");
-    let ushr = insts.by_name("ushr");
-    let ushr_imm = insts.by_name("ushr_imm");
-    let urem = insts.by_name("urem");
-
-    let x86_bsf = x86_instructions.by_name("x86_bsf");
-    let x86_bsr = x86_instructions.by_name("x86_bsr");
-    let x86_umulx = x86_instructions.by_name("x86_umulx");
-    let x86_smulx = x86_instructions.by_name("x86_smulx");
-
-    let imm = &shared.imm;
-
-    // Shift by a 64-bit amount is equivalent to a shift by that amount mod 32, so we can reduce
-    // the size of the shift amount. This is useful for x86_32, where an I64 shift amount is
-    // not encodable.
-    let a = var("a");
-    let x = var("x");
-    let y = var("y");
-    let z = var("z");
-
-    for &ty in &[I8, I16, I32] {
-        let ishl_by_i64 = ishl.bind(ty).bind(I64);
-        let ireduce = ireduce.bind(I32);
-        expand.legalize(
-            def!(a = ishl_by_i64(x, y)),
-            vec![def!(z = ireduce(y)), def!(a = ishl(x, z))],
-        );
-    }
-
-    for &ty in &[I8, I16, I32] {
-        let ushr_by_i64 = ushr.bind(ty).bind(I64);
-        let ireduce = ireduce.bind(I32);
-        expand.legalize(
-            def!(a = ushr_by_i64(x, y)),
-            vec![def!(z = ireduce(y)), def!(a = ishl(x, z))],
-        );
-    }
-
-    // Division and remainder.
-    //
-    // The srem expansion requires custom code because srem INT_MIN, -1 is not
-    // allowed to trap. The other ops need to check avoid_div_traps.
-    expand.custom_legalize(sdiv, "expand_sdivrem");
-    expand.custom_legalize(srem, "expand_sdivrem");
-    expand.custom_legalize(udiv, "expand_udivrem");
-    expand.custom_legalize(urem, "expand_udivrem");
-
-    // Double length (widening) multiplication.
-    let a = var("a");
-    let x = var("x");
-    let y = var("y");
-    let a1 = var("a1");
-    let a2 = var("a2");
-    let res_lo = var("res_lo");
-    let res_hi = var("res_hi");
-
-    expand.legalize(
-        def!(res_hi = umulhi(x, y)),
-        vec![def!((res_lo, res_hi) = x86_umulx(x, y))],
-    );
-
-    expand.legalize(
-        def!(res_hi = smulhi(x, y)),
-        vec![def!((res_lo, res_hi) = x86_smulx(x, y))],
-    );
-
-    // Floating point condition codes.
-    //
-    // The 8 condition codes in `supported_floatccs` are directly supported by a
-    // `ucomiss` or `ucomisd` instruction. The remaining codes need legalization
-    // patterns.
-
-    let floatcc_eq = Literal::enumerator_for(&imm.floatcc, "eq");
-    let floatcc_ord = Literal::enumerator_for(&imm.floatcc, "ord");
-    let floatcc_ueq = Literal::enumerator_for(&imm.floatcc, "ueq");
-    let floatcc_ne = Literal::enumerator_for(&imm.floatcc, "ne");
-    let floatcc_uno = Literal::enumerator_for(&imm.floatcc, "uno");
-    let floatcc_one = Literal::enumerator_for(&imm.floatcc, "one");
-
-    // Equality needs an explicit `ord` test which checks the parity bit.
-    expand.legalize(
-        def!(a = fcmp(floatcc_eq, x, y)),
-        vec![
-            def!(a1 = fcmp(floatcc_ord, x, y)),
-            def!(a2 = fcmp(floatcc_ueq, x, y)),
-            def!(a = band(a1, a2)),
-        ],
-    );
-    expand.legalize(
-        def!(a = fcmp(floatcc_ne, x, y)),
-        vec![
-            def!(a1 = fcmp(floatcc_uno, x, y)),
-            def!(a2 = fcmp(floatcc_one, x, y)),
-            def!(a = bor(a1, a2)),
-        ],
-    );
-
-    let floatcc_lt = &Literal::enumerator_for(&imm.floatcc, "lt");
-    let floatcc_gt = &Literal::enumerator_for(&imm.floatcc, "gt");
-    let floatcc_le = &Literal::enumerator_for(&imm.floatcc, "le");
-    let floatcc_ge = &Literal::enumerator_for(&imm.floatcc, "ge");
-    let floatcc_ugt = &Literal::enumerator_for(&imm.floatcc, "ugt");
-    let floatcc_ult = &Literal::enumerator_for(&imm.floatcc, "ult");
-    let floatcc_uge = &Literal::enumerator_for(&imm.floatcc, "uge");
-    let floatcc_ule = &Literal::enumerator_for(&imm.floatcc, "ule");
-
-    // Inequalities that need to be reversed.
-    for &(cc, rev_cc) in &[
-        (floatcc_lt, floatcc_gt),
-        (floatcc_le, floatcc_ge),
-        (floatcc_ugt, floatcc_ult),
-        (floatcc_uge, floatcc_ule),
-    ] {
-        expand.legalize(def!(a = fcmp(cc, x, y)), vec![def!(a = fcmp(rev_cc, y, x))]);
-    }
-
-    // We need to modify the CFG for min/max legalization.
-    expand.custom_legalize(fmin, "expand_minmax");
-    expand.custom_legalize(fmax, "expand_minmax");
-
-    // Conversions from unsigned need special handling.
-    expand.custom_legalize(fcvt_from_uint, "expand_fcvt_from_uint");
-    // Conversions from float to int can trap and modify the control flow graph.
-    expand.custom_legalize(fcvt_to_sint, "expand_fcvt_to_sint");
-    expand.custom_legalize(fcvt_to_uint, "expand_fcvt_to_uint");
-    expand.custom_legalize(fcvt_to_sint_sat, "expand_fcvt_to_sint_sat");
-    expand.custom_legalize(fcvt_to_uint_sat, "expand_fcvt_to_uint_sat");
-
-    // Count leading and trailing zeroes, for baseline x86_64
-    let c_minus_one = var("c_minus_one");
-    let c_thirty_one = var("c_thirty_one");
-    let c_thirty_two = var("c_thirty_two");
-    let c_sixty_three = var("c_sixty_three");
-    let c_sixty_four = var("c_sixty_four");
-    let index1 = var("index1");
-    let r2flags = var("r2flags");
-    let index2 = var("index2");
-
-    let intcc_eq = Literal::enumerator_for(&imm.intcc, "eq");
-    let imm64_minus_one = Literal::constant(&imm.imm64, -1);
-    let imm64_63 = Literal::constant(&imm.imm64, 63);
-    expand.legalize(
-        def!(a = clz.I64(x)),
-        vec![
-            def!(c_minus_one = iconst(imm64_minus_one)),
-            def!(c_sixty_three = iconst(imm64_63)),
-            def!((index1, r2flags) = x86_bsr(x)),
-            def!(index2 = selectif(intcc_eq, r2flags, c_minus_one, index1)),
-            def!(a = isub(c_sixty_three, index2)),
-        ],
-    );
-
-    let imm64_31 = Literal::constant(&imm.imm64, 31);
-    expand.legalize(
-        def!(a = clz.I32(x)),
-        vec![
-            def!(c_minus_one = iconst(imm64_minus_one)),
-            def!(c_thirty_one = iconst(imm64_31)),
-            def!((index1, r2flags) = x86_bsr(x)),
-            def!(index2 = selectif(intcc_eq, r2flags, c_minus_one, index1)),
-            def!(a = isub(c_thirty_one, index2)),
-        ],
-    );
-
-    let imm64_64 = Literal::constant(&imm.imm64, 64);
-    expand.legalize(
-        def!(a = ctz.I64(x)),
-        vec![
-            def!(c_sixty_four = iconst(imm64_64)),
-            def!((index1, r2flags) = x86_bsf(x)),
-            def!(a = selectif(intcc_eq, r2flags, c_sixty_four, index1)),
-        ],
-    );
-
-    let imm64_32 = Literal::constant(&imm.imm64, 32);
-    expand.legalize(
-        def!(a = ctz.I32(x)),
-        vec![
-            def!(c_thirty_two = iconst(imm64_32)),
-            def!((index1, r2flags) = x86_bsf(x)),
-            def!(a = selectif(intcc_eq, r2flags, c_thirty_two, index1)),
-        ],
-    );
-
-    // Population count for baseline x86_64
-    let x = var("x");
-    let r = var("r");
-
-    let qv3 = var("qv3");
-    let qv4 = var("qv4");
-    let qv5 = var("qv5");
-    let qv6 = var("qv6");
-    let qv7 = var("qv7");
-    let qv8 = var("qv8");
-    let qv9 = var("qv9");
-    let qv10 = var("qv10");
-    let qv11 = var("qv11");
-    let qv12 = var("qv12");
-    let qv13 = var("qv13");
-    let qv14 = var("qv14");
-    let qv15 = var("qv15");
-    let qc77 = var("qc77");
-    #[allow(non_snake_case)]
-    let qc0F = var("qc0F");
-    let qc01 = var("qc01");
-
-    let imm64_1 = Literal::constant(&imm.imm64, 1);
-    let imm64_4 = Literal::constant(&imm.imm64, 4);
-    expand.legalize(
-        def!(r = popcnt.I64(x)),
-        vec![
-            def!(qv3 = ushr_imm(x, imm64_1)),
-            def!(qc77 = iconst(Literal::constant(&imm.imm64, 0x7777_7777_7777_7777))),
-            def!(qv4 = band(qv3, qc77)),
-            def!(qv5 = isub(x, qv4)),
-            def!(qv6 = ushr_imm(qv4, imm64_1)),
-            def!(qv7 = band(qv6, qc77)),
-            def!(qv8 = isub(qv5, qv7)),
-            def!(qv9 = ushr_imm(qv7, imm64_1)),
-            def!(qv10 = band(qv9, qc77)),
-            def!(qv11 = isub(qv8, qv10)),
-            def!(qv12 = ushr_imm(qv11, imm64_4)),
-            def!(qv13 = iadd(qv11, qv12)),
-            def!(qc0F = iconst(Literal::constant(&imm.imm64, 0x0F0F_0F0F_0F0F_0F0F))),
-            def!(qv14 = band(qv13, qc0F)),
-            def!(qc01 = iconst(Literal::constant(&imm.imm64, 0x0101_0101_0101_0101))),
-            def!(qv15 = imul(qv14, qc01)),
-            def!(r = ushr_imm(qv15, Literal::constant(&imm.imm64, 56))),
-        ],
-    );
-
-    let lv3 = var("lv3");
-    let lv4 = var("lv4");
-    let lv5 = var("lv5");
-    let lv6 = var("lv6");
-    let lv7 = var("lv7");
-    let lv8 = var("lv8");
-    let lv9 = var("lv9");
-    let lv10 = var("lv10");
-    let lv11 = var("lv11");
-    let lv12 = var("lv12");
-    let lv13 = var("lv13");
-    let lv14 = var("lv14");
-    let lv15 = var("lv15");
-    let lc77 = var("lc77");
-    #[allow(non_snake_case)]
-    let lc0F = var("lc0F");
-    let lc01 = var("lc01");
-
-    expand.legalize(
-        def!(r = popcnt.I32(x)),
-        vec![
-            def!(lv3 = ushr_imm(x, imm64_1)),
-            def!(lc77 = iconst(Literal::constant(&imm.imm64, 0x7777_7777))),
-            def!(lv4 = band(lv3, lc77)),
-            def!(lv5 = isub(x, lv4)),
-            def!(lv6 = ushr_imm(lv4, imm64_1)),
-            def!(lv7 = band(lv6, lc77)),
-            def!(lv8 = isub(lv5, lv7)),
-            def!(lv9 = ushr_imm(lv7, imm64_1)),
-            def!(lv10 = band(lv9, lc77)),
-            def!(lv11 = isub(lv8, lv10)),
-            def!(lv12 = ushr_imm(lv11, imm64_4)),
-            def!(lv13 = iadd(lv11, lv12)),
-            def!(lc0F = iconst(Literal::constant(&imm.imm64, 0x0F0F_0F0F))),
-            def!(lv14 = band(lv13, lc0F)),
-            def!(lc01 = iconst(Literal::constant(&imm.imm64, 0x0101_0101))),
-            def!(lv15 = imul(lv14, lc01)),
-            def!(r = ushr_imm(lv15, Literal::constant(&imm.imm64, 24))),
-        ],
-    );
-
-    expand.custom_legalize(ineg, "convert_ineg");
-    expand.custom_legalize(tls_value, "expand_tls_value");
-    widen.custom_legalize(ineg, "convert_ineg");
-
-    // To reduce compilation times, separate out large blocks of legalizations by theme.
-    define_simd(shared, x86_instructions, &mut narrow, &mut narrow_avx);
-
-    expand.build_and_add_to(&mut shared.transform_groups);
-    let narrow_id = narrow.build_and_add_to(&mut shared.transform_groups);
-    narrow_avx
-        .chain_with(narrow_id)
-        .build_and_add_to(&mut shared.transform_groups);
-    widen.build_and_add_to(&mut shared.transform_groups);
-}
-
-fn define_simd(
-    shared: &mut SharedDefinitions,
-    x86_instructions: &InstructionGroup,
-    narrow: &mut TransformGroupBuilder,
-    narrow_avx: &mut TransformGroupBuilder,
-) {
-    let insts = &shared.instructions;
-    let band = insts.by_name("band");
-    let band_not = insts.by_name("band_not");
-    let bitcast = insts.by_name("bitcast");
-    let bitselect = insts.by_name("bitselect");
-    let bor = insts.by_name("bor");
-    let bnot = insts.by_name("bnot");
-    let bxor = insts.by_name("bxor");
-    let extractlane = insts.by_name("extractlane");
-    let fabs = insts.by_name("fabs");
-    let fcmp = insts.by_name("fcmp");
-    let fcvt_from_uint = insts.by_name("fcvt_from_uint");
-    let fcvt_to_sint_sat = insts.by_name("fcvt_to_sint_sat");
-    let fcvt_to_uint_sat = insts.by_name("fcvt_to_uint_sat");
-    let fmax = insts.by_name("fmax");
-    let fmin = insts.by_name("fmin");
-    let fneg = insts.by_name("fneg");
-    let iadd_imm = insts.by_name("iadd_imm");
-    let icmp = insts.by_name("icmp");
-    let imax = insts.by_name("imax");
-    let imin = insts.by_name("imin");
-    let imul = insts.by_name("imul");
-    let ineg = insts.by_name("ineg");
-    let insertlane = insts.by_name("insertlane");
-    let ishl = insts.by_name("ishl");
-    let ishl_imm = insts.by_name("ishl_imm");
-    let raw_bitcast = insts.by_name("raw_bitcast");
-    let scalar_to_vector = insts.by_name("scalar_to_vector");
-    let splat = insts.by_name("splat");
-    let shuffle = insts.by_name("shuffle");
-    let sshr = insts.by_name("sshr");
-    let swizzle = insts.by_name("swizzle");
-    let trueif = insts.by_name("trueif");
-    let uadd_sat = insts.by_name("uadd_sat");
-    let umax = insts.by_name("umax");
-    let umin = insts.by_name("umin");
-    let snarrow = insts.by_name("snarrow");
-    let swiden_high = insts.by_name("swiden_high");
-    let swiden_low = insts.by_name("swiden_low");
-    let ushr_imm = insts.by_name("ushr_imm");
-    let ushr = insts.by_name("ushr");
-    let uwiden_high = insts.by_name("uwiden_high");
-    let uwiden_low = insts.by_name("uwiden_low");
-    let vconst = insts.by_name("vconst");
-    let vall_true = insts.by_name("vall_true");
-    let vany_true = insts.by_name("vany_true");
-    let vselect = insts.by_name("vselect");
-
-    let x86_palignr = x86_instructions.by_name("x86_palignr");
-    let x86_pmaxs = x86_instructions.by_name("x86_pmaxs");
-    let x86_pmaxu = x86_instructions.by_name("x86_pmaxu");
-    let x86_pmins = x86_instructions.by_name("x86_pmins");
-    let x86_pminu = x86_instructions.by_name("x86_pminu");
-    let x86_pshufb = x86_instructions.by_name("x86_pshufb");
-    let x86_pshufd = x86_instructions.by_name("x86_pshufd");
-    let x86_psra = x86_instructions.by_name("x86_psra");
-    let x86_ptest = x86_instructions.by_name("x86_ptest");
-    let x86_punpckh = x86_instructions.by_name("x86_punpckh");
-    let x86_punpckl = x86_instructions.by_name("x86_punpckl");
-
-    let imm = &shared.imm;
-
-    // Set up variables and immediates.
-    let uimm8_zero = Literal::constant(&imm.uimm8, 0x00);
-    let uimm8_one = Literal::constant(&imm.uimm8, 0x01);
-    let uimm8_eight = Literal::constant(&imm.uimm8, 8);
-    let u128_zeroes = constant(vec![0x00; 16]);
-    let u128_ones = constant(vec![0xff; 16]);
-    let u128_seventies = constant(vec![0x70; 16]);
-    let a = var("a");
-    let b = var("b");
-    let c = var("c");
-    let d = var("d");
-    let e = var("e");
-    let f = var("f");
-    let g = var("g");
-    let h = var("h");
-    let x = var("x");
-    let y = var("y");
-    let z = var("z");
-
-    // Limit the SIMD vector size: eventually multiple vector sizes may be supported
-    // but for now only SSE-sized vectors are available.
-    let sse_vector_size: u64 = 128;
-    let allowed_simd_type = |t: &LaneType| t.lane_bits() >= 8 && t.lane_bits() < 128;
-
-    // SIMD splat: 8-bits
-    for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 8) {
-        let splat_any8x16 = splat.bind(vector(ty, sse_vector_size));
-        narrow.legalize(
-            def!(y = splat_any8x16(x)),
-            vec![
-                // Move into the lowest 8 bits of an XMM register.
-                def!(a = scalar_to_vector(x)),
-                // Zero out a different XMM register; the shuffle mask for moving the lowest byte
-                // to all other byte lanes is 0x0.
-                def!(b = vconst(u128_zeroes)),
-                // PSHUFB takes two XMM operands, one of which is a shuffle mask (i.e. b).
-                def!(y = x86_pshufb(a, b)),
-            ],
-        );
-    }
-
-    // SIMD splat: 16-bits
-    for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 16) {
-        let splat_x16x8 = splat.bind(vector(ty, sse_vector_size));
-        let raw_bitcast_any16x8_to_i32x4 = raw_bitcast
-            .bind(vector(I32, sse_vector_size))
-            .bind(vector(ty, sse_vector_size));
-        let raw_bitcast_i32x4_to_any16x8 = raw_bitcast
-            .bind(vector(ty, sse_vector_size))
-            .bind(vector(I32, sse_vector_size));
-        narrow.legalize(
-            def!(y = splat_x16x8(x)),
-            vec![
-                // Move into the lowest 16 bits of an XMM register.
-                def!(a = scalar_to_vector(x)),
-                // Insert the value again but in the next lowest 16 bits.
-                def!(b = insertlane(a, x, uimm8_one)),
-                // No instruction emitted; pretend this is an I32x4 so we can use PSHUFD.
-                def!(c = raw_bitcast_any16x8_to_i32x4(b)),
-                // Broadcast the bytes in the XMM register with PSHUFD.
-                def!(d = x86_pshufd(c, uimm8_zero)),
-                // No instruction emitted; pretend this is an X16x8 again.
-                def!(y = raw_bitcast_i32x4_to_any16x8(d)),
-            ],
-        );
-    }
-
-    // SIMD splat: 32-bits
-    for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 32) {
-        let splat_any32x4 = splat.bind(vector(ty, sse_vector_size));
-        narrow.legalize(
-            def!(y = splat_any32x4(x)),
-            vec![
-                // Translate to an x86 MOV to get the value in an XMM register.
-                def!(a = scalar_to_vector(x)),
-                // Broadcast the bytes in the XMM register with PSHUFD.
-                def!(y = x86_pshufd(a, uimm8_zero)),
-            ],
-        );
-    }
-
-    // SIMD splat: 64-bits
-    for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 64) {
-        let splat_any64x2 = splat.bind(vector(ty, sse_vector_size));
-        narrow.legalize(
-            def!(y = splat_any64x2(x)),
-            vec![
-                // Move into the lowest 64 bits of an XMM register.
-                def!(a = scalar_to_vector(x)),
-                // Move into the highest 64 bits of the same XMM register.
-                def!(y = insertlane(a, x, uimm8_one)),
-            ],
-        );
-    }
-
-    // SIMD swizzle; the following inefficient implementation is due to the Wasm SIMD spec requiring
-    // mask indexes greater than 15 to have the same semantics as a 0 index. For the spec discussion,
-    // see https://github.com/WebAssembly/simd/issues/93.
-    {
-        let swizzle = swizzle.bind(vector(I8, sse_vector_size));
-        narrow.legalize(
-            def!(a = swizzle(x, y)),
-            vec![
-                def!(b = vconst(u128_seventies)),
-                def!(c = uadd_sat(y, b)),
-                def!(a = x86_pshufb(x, c)),
-            ],
-        );
-    }
-
-    // SIMD bnot
-    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
-        let bnot = bnot.bind(vector(ty, sse_vector_size));
-        narrow.legalize(
-            def!(y = bnot(x)),
-            vec![def!(a = vconst(u128_ones)), def!(y = bxor(a, x))],
-        );
-    }
-
-    // SIMD shift right (arithmetic, i16x8 and i32x4)
-    for ty in &[I16, I32] {
-        let sshr = sshr.bind(vector(*ty, sse_vector_size));
-        let bitcast_i64x2 = bitcast.bind(vector(I64, sse_vector_size));
-        narrow.legalize(
-            def!(a = sshr(x, y)),
-            vec![def!(b = bitcast_i64x2(y)), def!(a = x86_psra(x, b))],
-        );
-    }
-    // SIMD shift right (arithmetic, i8x16)
-    {
-        let sshr = sshr.bind(vector(I8, sse_vector_size));
-        let bitcast_i64x2 = bitcast.bind(vector(I64, sse_vector_size));
-        let raw_bitcast_i16x8 = raw_bitcast.bind(vector(I16, sse_vector_size));
-        let raw_bitcast_i16x8_again = raw_bitcast.bind(vector(I16, sse_vector_size));
-        narrow.legalize(
-            def!(z = sshr(x, y)),
-            vec![
-                // Since we will use the high byte of each 16x8 lane, shift an extra 8 bits.
-                def!(a = iadd_imm(y, uimm8_eight)),
-                def!(b = bitcast_i64x2(a)),
-                // Take the low 8 bytes of x, duplicate them in 16x8 lanes, then shift right.
-                def!(c = x86_punpckl(x, x)),
-                def!(d = raw_bitcast_i16x8(c)),
-                def!(e = x86_psra(d, b)),
-                // Take the high 8 bytes of x, duplicate them in 16x8 lanes, then shift right.
-                def!(f = x86_punpckh(x, x)),
-                def!(g = raw_bitcast_i16x8_again(f)),
-                def!(h = x86_psra(g, b)),
-                // Re-pack the vector.
-                def!(z = snarrow(e, h)),
-            ],
-        );
-    }
-    // SIMD shift right (arithmetic, i64x2)
-    {
-        let sshr_vector = sshr.bind(vector(I64, sse_vector_size));
-        let sshr_scalar_lane0 = sshr.bind(I64);
-        let sshr_scalar_lane1 = sshr.bind(I64);
-        narrow.legalize(
-            def!(z = sshr_vector(x, y)),
-            vec![
-                // Use scalar operations to shift the first lane.
-                def!(a = extractlane(x, uimm8_zero)),
-                def!(b = sshr_scalar_lane0(a, y)),
-                def!(c = insertlane(x, b, uimm8_zero)),
-                // Do the same for the second lane.
-                def!(d = extractlane(x, uimm8_one)),
-                def!(e = sshr_scalar_lane1(d, y)),
-                def!(z = insertlane(c, e, uimm8_one)),
-            ],
-        );
-    }
-
-    // SIMD select
-    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
-        let bitselect = bitselect.bind(vector(ty, sse_vector_size)); // must bind both x/y and c
-        narrow.legalize(
-            def!(d = bitselect(c, x, y)),
-            vec![
-                def!(a = band(x, c)),
-                def!(b = band_not(y, c)),
-                def!(d = bor(a, b)),
-            ],
-        );
-    }
-
-    // SIMD vselect; replace with bitselect if BLEND* instructions are not available.
-    // This works, because each lane of boolean vector is filled with zeroes or ones.
-    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
-        let vselect = vselect.bind(vector(ty, sse_vector_size));
-        let raw_bitcast = raw_bitcast.bind(vector(ty, sse_vector_size));
-        narrow.legalize(
-            def!(d = vselect(c, x, y)),
-            vec![def!(a = raw_bitcast(c)), def!(d = bitselect(a, x, y))],
-        );
-    }
-
-    // SIMD vany_true
-    let ne = Literal::enumerator_for(&imm.intcc, "ne");
-    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
-        let vany_true = vany_true.bind(vector(ty, sse_vector_size));
-        narrow.legalize(
-            def!(y = vany_true(x)),
-            vec![def!(a = x86_ptest(x, x)), def!(y = trueif(ne, a))],
-        );
-    }
-
-    // SIMD vall_true
-    let eq = Literal::enumerator_for(&imm.intcc, "eq");
-    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
-        let vall_true = vall_true.bind(vector(ty, sse_vector_size));
-        if ty.is_int() {
-            // In the common case (Wasm's integer-only all_true), we do not require a
-            // bitcast.
-            narrow.legalize(
-                def!(y = vall_true(x)),
-                vec![
-                    def!(a = vconst(u128_zeroes)),
-                    def!(c = icmp(eq, x, a)),
-                    def!(d = x86_ptest(c, c)),
-                    def!(y = trueif(eq, d)),
-                ],
-            );
-        } else {
-            // However, to support other types we must bitcast them to an integer vector to
-            // use icmp.
-            let lane_type_as_int = LaneType::int_from_bits(ty.lane_bits() as u16);
-            let raw_bitcast_to_int = raw_bitcast.bind(vector(lane_type_as_int, sse_vector_size));
-            narrow.legalize(
-                def!(y = vall_true(x)),
-                vec![
-                    def!(a = vconst(u128_zeroes)),
-                    def!(b = raw_bitcast_to_int(x)),
-                    def!(c = icmp(eq, b, a)),
-                    def!(d = x86_ptest(c, c)),
-                    def!(y = trueif(eq, d)),
-                ],
-            );
-        }
-    }
-
-    // SIMD icmp ne
-    let ne = Literal::enumerator_for(&imm.intcc, "ne");
-    for ty in ValueType::all_lane_types().filter(|ty| allowed_simd_type(ty) && ty.is_int()) {
-        let icmp_ = icmp.bind(vector(ty, sse_vector_size));
-        narrow.legalize(
-            def!(c = icmp_(ne, a, b)),
-            vec![def!(x = icmp(eq, a, b)), def!(c = bnot(x))],
-        );
-    }
-
-    // SIMD icmp greater-/less-than
-    let sgt = Literal::enumerator_for(&imm.intcc, "sgt");
-    let ugt = Literal::enumerator_for(&imm.intcc, "ugt");
-    let sge = Literal::enumerator_for(&imm.intcc, "sge");
-    let uge = Literal::enumerator_for(&imm.intcc, "uge");
-    let slt = Literal::enumerator_for(&imm.intcc, "slt");
-    let ult = Literal::enumerator_for(&imm.intcc, "ult");
-    let sle = Literal::enumerator_for(&imm.intcc, "sle");
-    let ule = Literal::enumerator_for(&imm.intcc, "ule");
-    for ty in &[I8, I16, I32] {
-        // greater-than
-        let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
-        narrow.legalize(
-            def!(c = icmp_(ugt, a, b)),
-            vec![
-                def!(x = x86_pmaxu(a, b)),
-                def!(y = icmp(eq, x, b)),
-                def!(c = bnot(y)),
-            ],
-        );
-        let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
-        narrow.legalize(
-            def!(c = icmp_(sge, a, b)),
-            vec![def!(x = x86_pmins(a, b)), def!(c = icmp(eq, x, b))],
-        );
-        let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
-        narrow.legalize(
-            def!(c = icmp_(uge, a, b)),
-            vec![def!(x = x86_pminu(a, b)), def!(c = icmp(eq, x, b))],
-        );
-
-        // less-than
-        let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
-        narrow.legalize(def!(c = icmp_(slt, a, b)), vec![def!(c = icmp(sgt, b, a))]);
-        let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
-        narrow.legalize(def!(c = icmp_(ult, a, b)), vec![def!(c = icmp(ugt, b, a))]);
-        let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
-        narrow.legalize(def!(c = icmp_(sle, a, b)), vec![def!(c = icmp(sge, b, a))]);
-        let icmp_ = icmp.bind(vector(*ty, sse_vector_size));
-        narrow.legalize(def!(c = icmp_(ule, a, b)), vec![def!(c = icmp(uge, b, a))]);
-    }
-
-    // SIMD integer min/max
-    for ty in &[I8, I16, I32] {
-        let imin = imin.bind(vector(*ty, sse_vector_size));
-        narrow.legalize(def!(c = imin(a, b)), vec![def!(c = x86_pmins(a, b))]);
-        let umin = umin.bind(vector(*ty, sse_vector_size));
-        narrow.legalize(def!(c = umin(a, b)), vec![def!(c = x86_pminu(a, b))]);
-        let imax = imax.bind(vector(*ty, sse_vector_size));
-        narrow.legalize(def!(c = imax(a, b)), vec![def!(c = x86_pmaxs(a, b))]);
-        let umax = umax.bind(vector(*ty, sse_vector_size));
-        narrow.legalize(def!(c = umax(a, b)), vec![def!(c = x86_pmaxu(a, b))]);
-    }
-
-    // SIMD fcmp greater-/less-than
-    let gt = Literal::enumerator_for(&imm.floatcc, "gt");
-    let lt = Literal::enumerator_for(&imm.floatcc, "lt");
-    let ge = Literal::enumerator_for(&imm.floatcc, "ge");
-    let le = Literal::enumerator_for(&imm.floatcc, "le");
-    let ugt = Literal::enumerator_for(&imm.floatcc, "ugt");
-    let ult = Literal::enumerator_for(&imm.floatcc, "ult");
-    let uge = Literal::enumerator_for(&imm.floatcc, "uge");
-    let ule = Literal::enumerator_for(&imm.floatcc, "ule");
-    for ty in &[F32, F64] {
-        let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size));
-        narrow.legalize(def!(c = fcmp_(gt, a, b)), vec![def!(c = fcmp(lt, b, a))]);
-        let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size));
-        narrow.legalize(def!(c = fcmp_(ge, a, b)), vec![def!(c = fcmp(le, b, a))]);
-        let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size));
-        narrow.legalize(def!(c = fcmp_(ult, a, b)), vec![def!(c = fcmp(ugt, b, a))]);
-        let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size));
-        narrow.legalize(def!(c = fcmp_(ule, a, b)), vec![def!(c = fcmp(uge, b, a))]);
-    }
-
-    for ty in &[F32, F64] {
-        let fneg = fneg.bind(vector(*ty, sse_vector_size));
-        let lane_type_as_int = LaneType::int_from_bits(LaneType::from(*ty).lane_bits() as u16);
-        let uimm8_shift = Literal::constant(&imm.uimm8, lane_type_as_int.lane_bits() as i64 - 1);
-        let vconst = vconst.bind(vector(lane_type_as_int, sse_vector_size));
-        let bitcast_to_float = raw_bitcast.bind(vector(*ty, sse_vector_size));
-        narrow.legalize(
-            def!(b = fneg(a)),
-            vec![
-                def!(c = vconst(u128_ones)),
-                def!(d = ishl_imm(c, uimm8_shift)), // Create a mask of all 0s except the MSB.
-                def!(e = bitcast_to_float(d)),      // Cast mask to the floating-point type.
-                def!(b = bxor(a, e)),               // Flip the MSB.
-            ],
-        );
-    }
-
-    // SIMD fabs
-    for ty in &[F32, F64] {
-        let fabs = fabs.bind(vector(*ty, sse_vector_size));
-        let lane_type_as_int = LaneType::int_from_bits(LaneType::from(*ty).lane_bits() as u16);
-        let vconst = vconst.bind(vector(lane_type_as_int, sse_vector_size));
-        let bitcast_to_float = raw_bitcast.bind(vector(*ty, sse_vector_size));
-        narrow.legalize(
-            def!(b = fabs(a)),
-            vec![
-                def!(c = vconst(u128_ones)),
-                def!(d = ushr_imm(c, uimm8_one)), // Create a mask of all 1s except the MSB.
-                def!(e = bitcast_to_float(d)),    // Cast mask to the floating-point type.
-                def!(b = band(a, e)),             // Unset the MSB.
-            ],
-        );
-    }
-
-    // SIMD widen
-    for ty in &[I8, I16] {
-        let swiden_high = swiden_high.bind(vector(*ty, sse_vector_size));
-        narrow.legalize(
-            def!(b = swiden_high(a)),
-            vec![
-                def!(c = x86_palignr(a, a, uimm8_eight)),
-                def!(b = swiden_low(c)),
-            ],
-        );
-        let uwiden_high = uwiden_high.bind(vector(*ty, sse_vector_size));
-        narrow.legalize(
-            def!(b = uwiden_high(a)),
-            vec![
-                def!(c = x86_palignr(a, a, uimm8_eight)),
-                def!(b = uwiden_low(c)),
-            ],
-        );
-    }
-
-    narrow.custom_legalize(shuffle, "convert_shuffle");
-    narrow.custom_legalize(extractlane, "convert_extractlane");
-    narrow.custom_legalize(insertlane, "convert_insertlane");
-    narrow.custom_legalize(ineg, "convert_ineg");
-    narrow.custom_legalize(ushr, "convert_ushr");
-    narrow.custom_legalize(ishl, "convert_ishl");
-    narrow.custom_legalize(fcvt_to_sint_sat, "expand_fcvt_to_sint_sat_vector");
-    narrow.custom_legalize(fmin, "expand_minmax_vector");
-    narrow.custom_legalize(fmax, "expand_minmax_vector");
-
-    narrow_avx.custom_legalize(imul, "convert_i64x2_imul");
-    narrow_avx.custom_legalize(fcvt_from_uint, "expand_fcvt_from_uint_vector");
-    narrow_avx.custom_legalize(fcvt_to_uint_sat, "expand_fcvt_to_uint_sat_vector");
-}
--- a/cranelift/codegen/meta/src/isa/x86/mod.rs
+++ b/cranelift/codegen/meta/src/isa/x86/mod.rs
@@ -1,87 +1,25 @@
-use crate::cdsl::cpu_modes::CpuMode;
+use crate::cdsl::instructions::{InstructionGroupBuilder, InstructionPredicateMap};
 use crate::cdsl::isa::TargetIsa;
-use crate::cdsl::types::{ReferenceType, VectorType};
+use crate::cdsl::recipes::Recipes;
+use crate::cdsl::regs::IsaRegsBuilder;

-use crate::shared::types::Bool::B1;
-use crate::shared::types::Float::{F32, F64};
-use crate::shared::types::Int::{I16, I32, I64, I8};
-use crate::shared::types::Reference::{R32, R64};
 use crate::shared::Definitions as SharedDefinitions;

-mod encodings;
-mod instructions;
-mod legalize;
-mod opcodes;
-mod recipes;
-mod registers;
 pub(crate) mod settings;

 pub(crate) fn define(shared_defs: &mut SharedDefinitions) -> TargetIsa {
    let settings = settings::define(&shared_defs.settings);
-    let regs = registers::define();

-    let inst_group = instructions::define(
-        &mut shared_defs.all_instructions,
-        &shared_defs.formats,
-        &shared_defs.imm,
-        &shared_defs.entities,
-    );
-    legalize::define(shared_defs, &inst_group);
+    let inst_group = InstructionGroupBuilder::new(&mut shared_defs.all_instructions).build();

-    // CPU modes for 32-bit and 64-bit operations.
-    let mut x86_64 = CpuMode::new("I64");
-    let mut x86_32 = CpuMode::new("I32");
-
-    let expand_flags = shared_defs.transform_groups.by_name("expand_flags");
-    let x86_widen = shared_defs.transform_groups.by_name("x86_widen");
-    let x86_narrow = shared_defs.transform_groups.by_name("x86_narrow");
-    let x86_narrow_avx = shared_defs.transform_groups.by_name("x86_narrow_avx");
-    let x86_expand = shared_defs.transform_groups.by_name("x86_expand");
-
-    x86_32.legalize_monomorphic(expand_flags);
-    x86_32.legalize_default(x86_narrow);
-    x86_32.legalize_type(B1, expand_flags);
-    x86_32.legalize_type(I8, x86_widen);
-    x86_32.legalize_type(I16, x86_widen);
-    x86_32.legalize_type(I32, x86_expand);
-    x86_32.legalize_value_type(ReferenceType(R32), x86_expand);
-    x86_32.legalize_type(F32, x86_expand);
-    x86_32.legalize_type(F64, x86_expand);
-    x86_32.legalize_value_type(VectorType::new(I32.into(), 4), x86_narrow_avx);
-    x86_32.legalize_value_type(VectorType::new(I64.into(), 2), x86_narrow_avx);
-    x86_32.legalize_value_type(VectorType::new(F32.into(), 4), x86_narrow_avx);
-
-    x86_64.legalize_monomorphic(expand_flags);
-    x86_64.legalize_default(x86_narrow);
-    x86_64.legalize_type(B1, expand_flags);
-    x86_64.legalize_type(I8, x86_widen);
-    x86_64.legalize_type(I16, x86_widen);
-    x86_64.legalize_type(I32, x86_expand);
-    x86_64.legalize_type(I64, x86_expand);
-    x86_64.legalize_value_type(ReferenceType(R64), x86_expand);
-    x86_64.legalize_type(F32, x86_expand);
-    x86_64.legalize_type(F64, x86_expand);
-    x86_64.legalize_value_type(VectorType::new(I32.into(), 4), x86_narrow_avx);
-    x86_64.legalize_value_type(VectorType::new(I64.into(), 2), x86_narrow_avx);
-    x86_64.legalize_value_type(VectorType::new(F32.into(), 4), x86_narrow_avx);
-
-    let recipes = recipes::define(shared_defs, &settings, &regs);
-
-    let encodings = encodings::define(shared_defs, &settings, &inst_group, &recipes);
-    x86_32.set_encodings(encodings.enc32);
-    x86_64.set_encodings(encodings.enc64);
-    let encodings_predicates = encodings.inst_pred_reg.extract();
-
-    let recipes = encodings.recipes;
-
-    let cpu_modes = vec![x86_64, x86_32];
+    let cpu_modes = vec![];

    TargetIsa::new(
        "x86",
        settings,
-        regs,
-        recipes,
+        IsaRegsBuilder::new().build(),
+        Recipes::new(),
        cpu_modes,
-        encodings_predicates,
+        InstructionPredicateMap::new(),
    )
 }
--- a/cranelift/codegen/meta/src/isa/x86/opcodes.rs
+++ b/cranelift/codegen/meta/src/isa/x86/opcodes.rs
@@ -1,721 +0,0 @@
-//! Static, named definitions of instruction opcodes.
-
-/// Empty opcode for use as a default.
-pub static EMPTY: [u8; 0] = [];
-
-/// Add with carry flag r{16,32,64} to r/m of the same size.
-pub static ADC: [u8; 1] = [0x11];
-
-/// Add r{16,32,64} to r/m of the same size.
-pub static ADD: [u8; 1] = [0x01];
-
-/// Add imm{16,32} to r/m{16,32,64}, possibly sign-extended.
-pub static ADD_IMM: [u8; 1] = [0x81];
-
-/// Add sign-extended imm8 to r/m{16,32,64}.
-pub static ADD_IMM8_SIGN_EXTEND: [u8; 1] = [0x83];
-
-/// Add packed double-precision floating-point values from xmm2/mem to xmm1 and store result in  
-/// xmm1 (SSE2).
-pub static ADDPD: [u8; 3] = [0x66, 0x0f, 0x58];
-
-/// Add packed single-precision floating-point values from xmm2/mem to xmm1 and store result in  
-/// xmm1 (SSE).
-pub static ADDPS: [u8; 2] = [0x0f, 0x58];
-
-/// Add the low double-precision floating-point value from xmm2/mem to xmm1
-/// and store the result in xmm1.
-pub static ADDSD: [u8; 3] = [0xf2, 0x0f, 0x58];
-
-/// Add the low single-precision floating-point value from xmm2/mem to xmm1
-/// and store the result in xmm1.
-pub static ADDSS: [u8; 3] = [0xf3, 0x0f, 0x58];
-
-/// r/m{16,32,64} AND register of the same size (Intel docs have a typo).
-pub static AND: [u8; 1] = [0x21];
-
-/// imm{16,32} AND r/m{16,32,64}, possibly sign-extended.
-pub static AND_IMM: [u8; 1] = [0x81];
-
-/// r/m{16,32,64} AND sign-extended imm8.
-pub static AND_IMM8_SIGN_EXTEND: [u8; 1] = [0x83];
-
-/// Return the bitwise logical AND NOT of packed single-precision floating-point
-/// values in xmm1 and xmm2/mem.
-pub static ANDNPS: [u8; 2] = [0x0f, 0x55];
-
-/// Return the bitwise logical AND of packed single-precision floating-point values
-/// in xmm1 and xmm2/mem.
-pub static ANDPS: [u8; 2] = [0x0f, 0x54];
-
-/// Bit scan forward (stores index of first encountered 1 from the front).
-pub static BIT_SCAN_FORWARD: [u8; 2] = [0x0f, 0xbc];
-
-/// Bit scan reverse (stores index of first encountered 1 from the back).
-pub static BIT_SCAN_REVERSE: [u8; 2] = [0x0f, 0xbd];
-
-/// Select packed single-precision floating-point values from xmm1 and xmm2/m128
-/// from mask specified in XMM0 and store the values into xmm1 (SSE4.1).
-pub static BLENDVPS: [u8; 4] = [0x66, 0x0f, 0x38, 0x14];
-
-/// Select packed double-precision floating-point values from xmm1 and xmm2/m128
-/// from mask specified in XMM0 and store the values into xmm1 (SSE4.1).
-pub static BLENDVPD: [u8; 4] = [0x66, 0x0f, 0x38, 0x15];
-
-/// Call near, relative, displacement relative to next instruction (sign-extended).
-pub static CALL_RELATIVE: [u8; 1] = [0xe8];
-
-/// Move r/m{16,32,64} if overflow (OF=1).
-pub static CMOV_OVERFLOW: [u8; 2] = [0x0f, 0x40];
-
-/// Compare imm{16,32} with r/m{16,32,64} (sign-extended if 64).
-pub static CMP_IMM: [u8; 1] = [0x81];
-
-/// Compare imm8 with r/m{16,32,64}.
-pub static CMP_IMM8: [u8; 1] = [0x83];
-
-/// Compare r{16,32,64} with r/m of the same size.
-pub static CMP_REG: [u8; 1] = [0x39];
-
-/// Compare packed double-precision floating-point value in xmm2/m32 and xmm1 using bits 2:0 of
-/// imm8 as comparison predicate (SSE2).
-pub static CMPPD: [u8; 3] = [0x66, 0x0f, 0xc2];
-
-/// Compare packed single-precision floating-point value in xmm2/m32 and xmm1 using bits 2:0 of
-/// imm8 as comparison predicate (SSE).
-pub static CMPPS: [u8; 2] = [0x0f, 0xc2];
-
-/// Convert four packed signed doubleword integers from xmm2/mem to four packed single-precision
-/// floating-point values in xmm1 (SSE2).
-pub static CVTDQ2PS: [u8; 2] = [0x0f, 0x5b];
-
-/// Convert scalar double-precision floating-point value to scalar single-precision
-/// floating-point value.
-pub static CVTSD2SS: [u8; 3] = [0xf2, 0x0f, 0x5a];
-
-/// Convert doubleword integer to scalar double-precision floating-point value.
-pub static CVTSI2SD: [u8; 3] = [0xf2, 0x0f, 0x2a];
-
-/// Convert doubleword integer to scalar single-precision floating-point value.
-pub static CVTSI2SS: [u8; 3] = [0xf3, 0x0f, 0x2a];
-
-/// Convert scalar single-precision floating-point value to scalar double-precision
-/// float-point value.
-pub static CVTSS2SD: [u8; 3] = [0xf3, 0x0f, 0x5a];
-
-/// Convert four packed single-precision floating-point values from xmm2/mem to four packed signed
-/// doubleword values in xmm1 using truncation (SSE2).
-pub static CVTTPS2DQ: [u8; 3] = [0xf3, 0x0f, 0x5b];
-
-/// Convert with truncation scalar double-precision floating-point value to signed
-/// integer.
-pub static CVTTSD2SI: [u8; 3] = [0xf2, 0x0f, 0x2c];
-
-/// Convert with truncation scalar single-precision floating-point value to integer.
-pub static CVTTSS2SI: [u8; 3] = [0xf3, 0x0f, 0x2c];
-
-/// Unsigned divide for {16,32,64}-bit.
-pub static DIV: [u8; 1] = [0xf7];
-
-/// Divide packed double-precision floating-point values in xmm1 by packed double-precision
-/// floating-point values in xmm2/mem (SSE2).
-pub static DIVPD: [u8; 3] = [0x66, 0x0f, 0x5e];
-
-/// Divide packed single-precision floating-point values in xmm1 by packed single-precision
-/// floating-point values in xmm2/mem (SSE).
-pub static DIVPS: [u8; 2] = [0x0f, 0x5e];
-
-/// Divide low double-precision floating-point value in xmm1 by low double-precision
-/// floating-point value in xmm2/m64.
-pub static DIVSD: [u8; 3] = [0xf2, 0x0f, 0x5e];
-
-/// Divide low single-precision floating-point value in xmm1 by low single-precision
-/// floating-point value in xmm2/m32.
-pub static DIVSS: [u8; 3] = [0xf3, 0x0f, 0x5e];
-
-/// Signed divide for {16,32,64}-bit.
-pub static IDIV: [u8; 1] = [0xf7];
-
-/// Signed multiply for {16,32,64}-bit, generic registers.
-pub static IMUL: [u8; 2] = [0x0f, 0xaf];
-
-/// Signed multiply for {16,32,64}-bit, storing into RDX:RAX.
-pub static IMUL_RDX_RAX: [u8; 1] = [0xf7];
-
-/// Insert scalar single-precision floating-point value.
-pub static INSERTPS: [u8; 4] = [0x66, 0x0f, 0x3a, 0x21];
-
-/// Either:
-///  1. Jump near, absolute indirect, RIP = 64-bit offset from register or memory.
-///  2. Jump far, absolute indirect, address given in m16:64.
-pub static JUMP_ABSOLUTE: [u8; 1] = [0xff];
-
-/// Jump near, relative, RIP = RIP + 32-bit displacement sign extended to 64 bits.
-pub static JUMP_NEAR_RELATIVE: [u8; 1] = [0xe9];
-
-/// Jump near (rel32) if overflow (OF=1).
-pub static JUMP_NEAR_IF_OVERFLOW: [u8; 2] = [0x0f, 0x80];
-
-/// Jump short, relative, RIP = RIP + 8-bit displacement sign extended to 64 bits.
-pub static JUMP_SHORT: [u8; 1] = [0xeb];
-
-/// Jump short (rel8) if equal (ZF=1).
-pub static JUMP_SHORT_IF_EQUAL: [u8; 1] = [0x74];
-
-/// Jump short (rel8) if not equal (ZF=0).
-pub static JUMP_SHORT_IF_NOT_EQUAL: [u8; 1] = [0x75];
-
-/// Jump short (rel8) if overflow (OF=1).
-pub static JUMP_SHORT_IF_OVERFLOW: [u8; 1] = [0x70];
-
-/// Store effective address for m in register r{16,32,64}.
-pub static LEA: [u8; 1] = [0x8d];
-
-/// Count the number of leading zero bits.
-pub static LZCNT: [u8; 3] = [0xf3, 0x0f, 0xbd];
-
-/// Return the maximum packed double-precision floating-point values between xmm1 and xmm2/m128
-/// (SSE2).
-pub static MAXPD: [u8; 3] = [0x66, 0x0f, 0x5f];
-
-/// Return the maximum packed single-precision floating-point values between  xmm1 and xmm2/m128
-/// (SSE).
-pub static MAXPS: [u8; 2] = [0x0f, 0x5f];
-
-/// Return the maximum scalar double-precision floating-point value between
-/// xmm2/m64 and xmm1.
-pub static MAXSD: [u8; 3] = [0xf2, 0x0f, 0x5f];
-
-/// Return the maximum scalar single-precision floating-point value between
-/// xmm2/m32 and xmm1.
-pub static MAXSS: [u8; 3] = [0xf3, 0x0f, 0x5f];
-
-/// Return the minimum packed double-precision floating-point values between xmm1 and xmm2/m128
-/// (SSE2).
-pub static MINPD: [u8; 3] = [0x66, 0x0f, 0x5d];
-
-/// Return the minimum packed single-precision floating-point values between xmm1 and xmm2/m128
-/// (SSE).
-pub static MINPS: [u8; 2] = [0x0f, 0x5d];
-
-/// Return the minimum scalar double-precision floating-point value between
-/// xmm2/m64 and xmm1.
-pub static MINSD: [u8; 3] = [0xf2, 0x0f, 0x5d];
-
-/// Return the minimum scalar single-precision floating-point value between
-/// xmm2/m32 and xmm1.
-pub static MINSS: [u8; 3] = [0xf3, 0x0f, 0x5d];
-
-/// Move r8 to r/m8.
-pub static MOV_BYTE_STORE: [u8; 1] = [0x88];
-
-/// Move imm{16,32,64} to same-sized register.
-pub static MOV_IMM: [u8; 1] = [0xb8];
-
-/// Move imm{16,32} to r{16,32,64}, sign-extended if 64-bit target.
-pub static MOV_IMM_SIGNEXTEND: [u8; 1] = [0xc7];
-
-/// Move {r/m16, r/m32, r/m64} to same-sized register.
-pub static MOV_LOAD: [u8; 1] = [0x8b];
-
-/// Move r16 to r/m16.
-pub static MOV_STORE_16: [u8; 2] = [0x66, 0x89];
-
-/// Move {r16, r32, r64} to same-sized register or memory.
-pub static MOV_STORE: [u8; 1] = [0x89];
-
-/// Move aligned packed single-precision floating-point values from x/m to xmm (SSE).
-pub static MOVAPS_LOAD: [u8; 2] = [0x0f, 0x28];
-
-/// Move doubleword from r/m32 to xmm (SSE2). Quadword with REX prefix.
-pub static MOVD_LOAD_XMM: [u8; 3] = [0x66, 0x0f, 0x6e];
-
-/// Move doubleword from xmm to r/m32 (SSE2). Quadword with REX prefix.
-pub static MOVD_STORE_XMM: [u8; 3] = [0x66, 0x0f, 0x7e];
-
-/// Move packed single-precision floating-point values low to high (SSE).
-pub static MOVLHPS: [u8; 2] = [0x0f, 0x16];
-
-/// Move scalar double-precision floating-point value (from reg/mem to reg).
-pub static MOVSD_LOAD: [u8; 3] = [0xf2, 0x0f, 0x10];
-
-/// Move scalar double-precision floating-point value (from reg to reg/mem).
-pub static MOVSD_STORE: [u8; 3] = [0xf2, 0x0f, 0x11];
-
-/// Move scalar single-precision floating-point value (from reg to reg/mem).
-pub static MOVSS_STORE: [u8; 3] = [0xf3, 0x0f, 0x11];
-
-/// Move scalar single-precision floating-point-value (from reg/mem to reg).
-pub static MOVSS_LOAD: [u8; 3] = [0xf3, 0x0f, 0x10];
-
-/// Move byte to register with sign-extension.
-pub static MOVSX_BYTE: [u8; 2] = [0x0f, 0xbe];
-
-/// Move word to register with sign-extension.
-pub static MOVSX_WORD: [u8; 2] = [0x0f, 0xbf];
-
-/// Move doubleword to register with sign-extension.
-pub static MOVSXD: [u8; 1] = [0x63];
-
-/// Move unaligned packed single-precision floating-point from x/m to xmm (SSE).
-pub static MOVUPS_LOAD: [u8; 2] = [0x0f, 0x10];
-
-/// Move unaligned packed single-precision floating-point value from xmm to x/m (SSE).
-pub static MOVUPS_STORE: [u8; 2] = [0x0f, 0x11];
-
-/// Move byte to register with zero-extension.
-pub static MOVZX_BYTE: [u8; 2] = [0x0f, 0xb6];
-
-/// Move word to register with zero-extension.
-pub static MOVZX_WORD: [u8; 2] = [0x0f, 0xb7];
-
-/// Unsigned multiply for {16,32,64}-bit.
-pub static MUL: [u8; 1] = [0xf7];
-
-/// Multiply packed double-precision floating-point values from xmm2/mem to xmm1 and store result
-/// in xmm1 (SSE2).
-pub static MULPD: [u8; 3] = [0x66, 0x0f, 0x59];
-
-/// Multiply packed single-precision floating-point values from xmm2/mem to xmm1 and store result
-/// in xmm1 (SSE).
-pub static MULPS: [u8; 2] = [0x0f, 0x59];
-
-/// Multiply the low double-precision floating-point value in xmm2/m64 by the
-/// low double-precision floating-point value in xmm1.
-pub static MULSD: [u8; 3] = [0xf2, 0x0f, 0x59];
-
-/// Multiply the low single-precision floating-point value in xmm2/m32 by the
-/// low single-precision floating-point value in xmm1.
-pub static MULSS: [u8; 3] = [0xf3, 0x0f, 0x59];
-
-/// Reverse each bit of r/m{16,32,64}.
-pub static NOT: [u8; 1] = [0xf7];
-
-/// r{16,32,64} OR register of same size.
-pub static OR: [u8; 1] = [0x09];
-
-/// imm{16,32} OR r/m{16,32,64}, possibly sign-extended.
-pub static OR_IMM: [u8; 1] = [0x81];
-
-/// r/m{16,32,64} OR sign-extended imm8.
-pub static OR_IMM8_SIGN_EXTEND: [u8; 1] = [0x83];
-
-/// Return the bitwise logical OR of packed single-precision values in xmm and x/m (SSE).
-pub static ORPS: [u8; 2] = [0x0f, 0x56];
-
-/// Compute the absolute value of bytes in xmm2/m128 and store the unsigned result in xmm1 (SSSE3).
-pub static PABSB: [u8; 4] = [0x66, 0x0f, 0x38, 0x1c];
-
-/// Compute the absolute value of 32-bit integers in xmm2/m128 and store the unsigned result in
-/// xmm1 (SSSE3).
-pub static PABSD: [u8; 4] = [0x66, 0x0f, 0x38, 0x1e];
-
-/// Compute the absolute value of 16-bit integers in xmm2/m128 and store the unsigned result in
-/// xmm1 (SSSE3).
-pub static PABSW: [u8; 4] = [0x66, 0x0f, 0x38, 0x1d];
-
-/// Converts 8 packed signed word integers from xmm1 and from xmm2/m128 into 16 packed signed byte
-/// integers in xmm1 using signed saturation (SSE2).
-pub static PACKSSWB: [u8; 3] = [0x66, 0x0f, 0x63];
-
-/// Converts 4 packed signed doubleword integers from xmm1 and from xmm2/m128 into 8 packed signed
-/// word integers in xmm1 using signed saturation (SSE2).
-pub static PACKSSDW: [u8; 3] = [0x66, 0x0f, 0x6b];
-
-/// Converts 8 packed signed word integers from xmm1 and from xmm2/m128 into 16 packed unsigned byte
-/// integers in xmm1 using unsigned saturation (SSE2).
-pub static PACKUSWB: [u8; 3] = [0x66, 0x0f, 0x67];
-
-/// Converts 4 packed signed doubleword integers from xmm1 and from xmm2/m128 into 8 unpacked signed
-/// word integers in xmm1 using unsigned saturation (SSE4.1).
-pub static PACKUSDW: [u8; 4] = [0x66, 0x0f, 0x38, 0x2b];
-
-/// Add packed byte integers from xmm2/m128 and xmm1 (SSE2).
-pub static PADDB: [u8; 3] = [0x66, 0x0f, 0xfc];
-
-/// Add packed doubleword integers from xmm2/m128 and xmm1 (SSE2).
-pub static PADDD: [u8; 3] = [0x66, 0x0f, 0xfe];
-
-/// Add packed quadword integers from xmm2/m128 and xmm1 (SSE2).
-pub static PADDQ: [u8; 3] = [0x66, 0x0f, 0xd4];
-
-/// Add packed word integers from xmm2/m128 and xmm1 (SSE2).
-pub static PADDW: [u8; 3] = [0x66, 0x0f, 0xfd];
-
-/// Add packed signed byte integers from xmm2/m128 and xmm1 saturate the results (SSE).
-pub static PADDSB: [u8; 3] = [0x66, 0x0f, 0xec];
-
-/// Add packed signed word integers from xmm2/m128 and xmm1 saturate the results (SSE).
-pub static PADDSW: [u8; 3] = [0x66, 0x0f, 0xed];
-
-/// Add packed unsigned byte integers from xmm2/m128 and xmm1 saturate the results (SSE).
-pub static PADDUSB: [u8; 3] = [0x66, 0x0f, 0xdc];
-
-/// Add packed unsigned word integers from xmm2/m128 and xmm1 saturate the results (SSE).
-pub static PADDUSW: [u8; 3] = [0x66, 0x0f, 0xdd];
-
-/// Concatenate destination and source operands, extract a byte-aligned result into xmm1 that is
-/// shifted to the right by the constant number of bytes in imm8 (SSSE3).
-pub static PALIGNR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0f];
-
-/// Bitwise AND of xmm2/m128 and xmm1 (SSE2).
-pub static PAND: [u8; 3] = [0x66, 0x0f, 0xdb];
-
-/// Bitwise AND NOT of xmm2/m128 and xmm1 (SSE2).
-pub static PANDN: [u8; 3] = [0x66, 0x0f, 0xdf];
-
-/// Average packed unsigned byte integers from xmm2/m128 and xmm1 with rounding (SSE2).
-pub static PAVGB: [u8; 3] = [0x66, 0x0f, 0xE0];
-
-/// Average packed unsigned word integers from xmm2/m128 and xmm1 with rounding (SSE2).
-pub static PAVGW: [u8; 3] = [0x66, 0x0f, 0xE3];
-
-/// Select byte values from xmm1 and xmm2/m128 from mask specified in the high bit of each byte
-/// in XMM0 and store the values into xmm1 (SSE4.1).
-pub static PBLENDVB: [u8; 4] = [0x66, 0x0f, 0x38, 0x10];
-
-/// Select words from xmm1 and xmm2/m128 from mask specified in imm8 and store the values into xmm1
-/// (SSE4.1).
-pub static PBLENDW: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0e];
-
-/// Compare packed data for equal (SSE2).
-pub static PCMPEQB: [u8; 3] = [0x66, 0x0f, 0x74];
-
-/// Compare packed data for equal (SSE2).
-pub static PCMPEQD: [u8; 3] = [0x66, 0x0f, 0x76];
-
-/// Compare packed data for equal (SSE4.1).
-pub static PCMPEQQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x29];
-
-/// Compare packed data for equal (SSE2).
-pub static PCMPEQW: [u8; 3] = [0x66, 0x0f, 0x75];
-
-/// Compare packed signed byte integers for greater than (SSE2).
-pub static PCMPGTB: [u8; 3] = [0x66, 0x0f, 0x64];
-
-/// Compare packed signed doubleword integers for greater than (SSE2).
-pub static PCMPGTD: [u8; 3] = [0x66, 0x0f, 0x66];
-
-/// Compare packed signed quadword integers for greater than (SSE4.2).
-pub static PCMPGTQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x37];
-
-/// Compare packed signed word integers for greater than (SSE2).
-pub static PCMPGTW: [u8; 3] = [0x66, 0x0f, 0x65];
-
-/// Extract doubleword or quadword, depending on REX.W (SSE4.1).
-pub static PEXTR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x16];
-
-/// Extract byte (SSE4.1).
-pub static PEXTRB: [u8; 4] = [0x66, 0x0f, 0x3a, 0x14];
-
-/// Extract word (SSE4.1). There is a 3-byte SSE2 variant that can also move to m/16.
-pub static PEXTRW: [u8; 4] = [0x66, 0x0f, 0x3a, 0x15];
-
-/// Insert doubleword or quadword, depending on REX.W (SSE4.1).
-pub static PINSR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x22];
-
-/// Insert byte (SSE4.1).
-pub static PINSRB: [u8; 4] = [0x66, 0x0f, 0x3a, 0x20];
-
-/// Insert word (SSE2).
-pub static PINSRW: [u8; 3] = [0x66, 0x0f, 0xc4];
-
-/// Compare packed signed byte integers in xmm1 and xmm2/m128 and store packed maximum values in
-/// xmm1 (SSE4.1).
-pub static PMAXSB: [u8; 4] = [0x66, 0x0f, 0x38, 0x3c];
-
-/// Compare packed signed doubleword integers in xmm1 and xmm2/m128 and store packed maximum
-/// values in xmm1 (SSE4.1).
-pub static PMAXSD: [u8; 4] = [0x66, 0x0f, 0x38, 0x3d];
-
-/// Compare packed signed word integers in xmm1 and xmm2/m128 and store packed maximum values in
-/// xmm1 (SSE2).
-pub static PMAXSW: [u8; 3] = [0x66, 0x0f, 0xee];
-
-/// Compare packed unsigned byte integers in xmm1 and xmm2/m128 and store packed maximum values in
-/// xmm1 (SSE2).
-pub static PMAXUB: [u8; 3] = [0x66, 0x0f, 0xde];
-
-/// Compare packed unsigned doubleword integers in xmm1 and xmm2/m128 and store packed maximum
-/// values in xmm1 (SSE4.1).
-pub static PMAXUD: [u8; 4] = [0x66, 0x0f, 0x38, 0x3f];
-
-/// Compare packed unsigned word integers in xmm1 and xmm2/m128 and store packed maximum values in
-/// xmm1 (SSE4.1).
-pub static PMAXUW: [u8; 4] = [0x66, 0x0f, 0x38, 0x3e];
-
-/// Compare packed signed byte integers in xmm1 and xmm2/m128 and store packed minimum values in
-/// xmm1 (SSE4.1).
-pub static PMINSB: [u8; 4] = [0x66, 0x0f, 0x38, 0x38];
-
-/// Compare packed signed doubleword integers in xmm1 and xmm2/m128 and store packed minimum
-/// values in xmm1 (SSE4.1).
-pub static PMINSD: [u8; 4] = [0x66, 0x0f, 0x38, 0x39];
-
-/// Compare packed signed word integers in xmm1 and xmm2/m128 and store packed minimum values in
-/// xmm1 (SSE2).
-pub static PMINSW: [u8; 3] = [0x66, 0x0f, 0xea];
-
-/// Compare packed unsigned byte integers in xmm1 and xmm2/m128 and store packed minimum values in
-/// xmm1 (SSE2).
-pub static PMINUB: [u8; 3] = [0x66, 0x0f, 0xda];
-
-/// Compare packed unsigned doubleword integers in xmm1 and xmm2/m128 and store packed minimum
-/// values in xmm1 (SSE4.1).
-pub static PMINUD: [u8; 4] = [0x66, 0x0f, 0x38, 0x3b];
-
-/// Compare packed unsigned word integers in xmm1 and xmm2/m128 and store packed minimum values in
-/// xmm1 (SSE4.1).
-pub static PMINUW: [u8; 4] = [0x66, 0x0f, 0x38, 0x3a];
-
-/// Sign extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 16-bit
-/// integers in xmm1 (SSE4.1).
-pub static PMOVSXBW: [u8; 4] = [0x66, 0x0f, 0x38, 0x20];
-
-/// Sign extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 32-bit
-/// integers in xmm1 (SSE4.1).
-pub static PMOVSXWD: [u8; 4] = [0x66, 0x0f, 0x38, 0x23];
-
-/// Sign extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit
-/// integers in xmm1 (SSE4.1).
-pub static PMOVSXDQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x25];
-
-/// Zero extend 8 packed 8-bit integers in the low 8 bytes of xmm2/m64 to 8 packed 16-bit
-/// integers in xmm1 (SSE4.1).
-pub static PMOVZXBW: [u8; 4] = [0x66, 0x0f, 0x38, 0x30];
-
-/// Zero extend 4 packed 16-bit integers in the low 8 bytes of xmm2/m64 to 4 packed 32-bit
-/// integers in xmm1 (SSE4.1).
-pub static PMOVZXWD: [u8; 4] = [0x66, 0x0f, 0x38, 0x33];
-
-/// Zero extend 2 packed 32-bit integers in the low 8 bytes of xmm2/m64 to 2 packed 64-bit
-/// integers in xmm1 (SSE4.1).
-pub static PMOVZXDQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x35];
-
-/// Multiply the packed signed word integers in xmm1 and xmm2/m128, and store the low 16 bits of
-/// the results in xmm1 (SSE2).
-pub static PMULLW: [u8; 3] = [0x66, 0x0f, 0xd5];
-
-/// Multiply the packed doubleword signed integers in xmm1 and xmm2/m128 and store the low 32
-/// bits of each product in xmm1 (SSE4.1).
-pub static PMULLD: [u8; 4] = [0x66, 0x0f, 0x38, 0x40];
-
-/// Multiply the packed quadword signed integers in xmm2 and xmm3/m128 and store the low 64
-/// bits of each product in xmm1 (AVX512VL/DQ). Requires an EVEX encoding.
-pub static VPMULLQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x40];
-
-/// Multiply packed unsigned doubleword integers in xmm1 by packed unsigned doubleword integers
-/// in xmm2/m128, and store the quadword results in xmm1 (SSE2).
-pub static PMULUDQ: [u8; 3] = [0x66, 0x0f, 0xf4];
-
-/// Multiply the packed word integers, add adjacent doubleword results.
-pub static PMADDWD: [u8; 3] = [0x66, 0x0f, 0xf5];
-
-/// Pop top of stack into r{16,32,64}; increment stack pointer.
-pub static POP_REG: [u8; 1] = [0x58];
-
-/// Returns the count of number of bits set to 1.
-pub static POPCNT: [u8; 3] = [0xf3, 0x0f, 0xb8];
-
-/// Bitwise OR of xmm2/m128 and xmm1 (SSE2).
-pub static POR: [u8; 3] = [0x66, 0x0f, 0xeb];
-
-/// Shuffle bytes in xmm1 according to contents of xmm2/m128 (SSE3).
-pub static PSHUFB: [u8; 4] = [0x66, 0x0f, 0x38, 0x00];
-
-/// Shuffle the doublewords in xmm2/m128 based on the encoding in imm8 and
-/// store the result in xmm1 (SSE2).
-pub static PSHUFD: [u8; 3] = [0x66, 0x0f, 0x70];
-
-/// Shift words in xmm1 by imm8; the direction and sign-bit behavior is controlled by the RRR
-/// digit used in the ModR/M byte (SSE2).
-pub static PS_W_IMM: [u8; 3] = [0x66, 0x0f, 0x71];
-
-/// Shift doublewords in xmm1 by imm8; the direction and sign-bit behavior is controlled by the RRR
-/// digit used in the ModR/M byte (SSE2).
-pub static PS_D_IMM: [u8; 3] = [0x66, 0x0f, 0x72];
-
-/// Shift quadwords in xmm1 by imm8; the direction and sign-bit behavior is controlled by the RRR
-/// digit used in the ModR/M byte (SSE2).
-pub static PS_Q_IMM: [u8; 3] = [0x66, 0x0f, 0x73];
-
-/// Shift words in xmm1 left by xmm2/m128 while shifting in 0s (SSE2).
-pub static PSLLW: [u8; 3] = [0x66, 0x0f, 0xf1];
-
-/// Shift doublewords in xmm1 left by xmm2/m128 while shifting in 0s (SSE2).
-pub static PSLLD: [u8; 3] = [0x66, 0x0f, 0xf2];
-
-/// Shift quadwords in xmm1 left by xmm2/m128 while shifting in 0s (SSE2).
-pub static PSLLQ: [u8; 3] = [0x66, 0x0f, 0xf3];
-
-/// Shift words in xmm1 right by xmm2/m128 while shifting in 0s (SSE2).
-pub static PSRLW: [u8; 3] = [0x66, 0x0f, 0xd1];
-
-/// Shift doublewords in xmm1 right by xmm2/m128 while shifting in 0s (SSE2).
-pub static PSRLD: [u8; 3] = [0x66, 0x0f, 0xd2];
-
-/// Shift quadwords in xmm1 right by xmm2/m128 while shifting in 0s (SSE2).
-pub static PSRLQ: [u8; 3] = [0x66, 0x0f, 0xd3];
-
-/// Shift words in xmm1 right by xmm2/m128 while shifting in sign bits (SSE2).
-pub static PSRAW: [u8; 3] = [0x66, 0x0f, 0xe1];
-
-/// Shift doublewords in xmm1 right by xmm2/m128 while shifting in sign bits (SSE2).
-pub static PSRAD: [u8; 3] = [0x66, 0x0f, 0xe2];
-
-/// Subtract packed byte integers in xmm2/m128 from packed byte integers in xmm1 (SSE2).
-pub static PSUBB: [u8; 3] = [0x66, 0x0f, 0xf8];
-
-/// Subtract packed word integers in xmm2/m128 from packed word integers in xmm1 (SSE2).
-pub static PSUBW: [u8; 3] = [0x66, 0x0f, 0xf9];
-
-/// Subtract packed doubleword integers in xmm2/m128 from doubleword byte integers in xmm1 (SSE2).
-pub static PSUBD: [u8; 3] = [0x66, 0x0f, 0xfa];
-
-/// Subtract packed quadword integers in xmm2/m128 from xmm1 (SSE2).
-pub static PSUBQ: [u8; 3] = [0x66, 0x0f, 0xfb];
-
-/// Subtract packed signed byte integers in xmm2/m128 from packed signed byte integers in xmm1
-/// and saturate results (SSE2).
-pub static PSUBSB: [u8; 3] = [0x66, 0x0f, 0xe8];
-
-/// Subtract packed signed word integers in xmm2/m128 from packed signed word integers in xmm1
-/// and saturate results (SSE2).
-pub static PSUBSW: [u8; 3] = [0x66, 0x0f, 0xe9];
-
-/// Subtract packed unsigned byte integers in xmm2/m128 from packed unsigned byte integers in xmm1
-/// and saturate results (SSE2).
-pub static PSUBUSB: [u8; 3] = [0x66, 0x0f, 0xd8];
-
-/// Subtract packed unsigned word integers in xmm2/m128 from packed unsigned word integers in xmm1
-/// and saturate results (SSE2).
-pub static PSUBUSW: [u8; 3] = [0x66, 0x0f, 0xd9];
-
-/// Set ZF if xmm2/m128 AND xmm1 result is all 0s; set CF if xmm2/m128 AND NOT xmm1 result is all
-/// 0s (SSE4.1).
-pub static PTEST: [u8; 4] = [0x66, 0x0f, 0x38, 0x17];
-
-/// Unpack and interleave high-order bytes from xmm1 and xmm2/m128 into xmm1 (SSE2).
-pub static PUNPCKHBW: [u8; 3] = [0x66, 0x0f, 0x68];
-
-/// Unpack and interleave high-order words from xmm1 and xmm2/m128 into xmm1 (SSE2).
-pub static PUNPCKHWD: [u8; 3] = [0x66, 0x0f, 0x69];
-
-/// Unpack and interleave high-order doublewords from xmm1 and xmm2/m128 into xmm1 (SSE2).
-pub static PUNPCKHDQ: [u8; 3] = [0x66, 0x0f, 0x6A];
-
-/// Unpack and interleave high-order quadwords from xmm1 and xmm2/m128 into xmm1 (SSE2).
-pub static PUNPCKHQDQ: [u8; 3] = [0x66, 0x0f, 0x6D];
-
-/// Unpack and interleave low-order bytes from xmm1 and xmm2/m128 into xmm1 (SSE2).
-pub static PUNPCKLBW: [u8; 3] = [0x66, 0x0f, 0x60];
-
-/// Unpack and interleave low-order words from xmm1 and xmm2/m128 into xmm1 (SSE2).
-pub static PUNPCKLWD: [u8; 3] = [0x66, 0x0f, 0x61];
-
-/// Unpack and interleave low-order doublewords from xmm1 and xmm2/m128 into xmm1 (SSE2).
-pub static PUNPCKLDQ: [u8; 3] = [0x66, 0x0f, 0x62];
-
-/// Unpack and interleave low-order quadwords from xmm1 and xmm2/m128 into xmm1 (SSE2).
-pub static PUNPCKLQDQ: [u8; 3] = [0x66, 0x0f, 0x6C];
-
-/// Push r{16,32,64}.
-pub static PUSH_REG: [u8; 1] = [0x50];
-
-/// Logical exclusive OR (SSE2).
-pub static PXOR: [u8; 3] = [0x66, 0x0f, 0xef];
-
-/// Near return to calling procedure.
-pub static RET_NEAR: [u8; 1] = [0xc3];
-
-/// General rotation opcode. Kind of rotation depends on encoding.
-pub static ROTATE_CL: [u8; 1] = [0xd3];
-
-/// General rotation opcode. Kind of rotation depends on encoding.
-pub static ROTATE_IMM8: [u8; 1] = [0xc1];
-
-/// Round scalar doubl-precision floating-point values.
-pub static ROUNDSD: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0b];
-
-/// Round scalar single-precision floating-point values.
-pub static ROUNDSS: [u8; 4] = [0x66, 0x0f, 0x3a, 0x0a];
-
-/// Subtract with borrow r{16,32,64} from r/m of the same size.
-pub static SBB: [u8; 1] = [0x19];
-
-/// Set byte if overflow (OF=1).
-pub static SET_BYTE_IF_OVERFLOW: [u8; 2] = [0x0f, 0x90];
-
-/// Compute the square root of the packed double-precision floating-point values and store the
-/// result in xmm1 (SSE2).
-pub static SQRTPD: [u8; 3] = [0x66, 0x0f, 0x51];
-
-/// Compute the square root of the packed double-precision floating-point values and store the
-/// result in xmm1 (SSE).
-pub static SQRTPS: [u8; 2] = [0x0f, 0x51];
-
-/// Compute square root of scalar double-precision floating-point value.
-pub static SQRTSD: [u8; 3] = [0xf2, 0x0f, 0x51];
-
-/// Compute square root of scalar single-precision value.
-pub static SQRTSS: [u8; 3] = [0xf3, 0x0f, 0x51];
-
-/// Subtract r{16,32,64} from r/m of same size.
-pub static SUB: [u8; 1] = [0x29];
-
-/// Subtract packed double-precision floating-point values in xmm2/mem from xmm1 and store result
-/// in xmm1 (SSE2).
-pub static SUBPD: [u8; 3] = [0x66, 0x0f, 0x5c];
-
-/// Subtract packed single-precision floating-point values in xmm2/mem from xmm1 and store result
-/// in xmm1 (SSE).
-pub static SUBPS: [u8; 2] = [0x0f, 0x5c];
-
-/// Subtract the low double-precision floating-point value in xmm2/m64 from xmm1
-/// and store the result in xmm1.
-pub static SUBSD: [u8; 3] = [0xf2, 0x0f, 0x5c];
-
-/// Subtract the low single-precision floating-point value in xmm2/m32 from xmm1
-/// and store the result in xmm1.
-pub static SUBSS: [u8; 3] = [0xf3, 0x0f, 0x5c];
-
-/// AND r8 with r/m8; set SF, ZF, PF according to result.
-pub static TEST_BYTE_REG: [u8; 1] = [0x84];
-
-/// AND {r16, r32, r64} with r/m of the same size; set SF, ZF, PF according to result.
-pub static TEST_REG: [u8; 1] = [0x85];
-
-/// Count the number of trailing zero bits.
-pub static TZCNT: [u8; 3] = [0xf3, 0x0f, 0xbc];
-
-/// Compare low double-precision floating-point values in xmm1 and xmm2/mem64
-/// and set the EFLAGS flags accordingly.
-pub static UCOMISD: [u8; 3] = [0x66, 0x0f, 0x2e];
-
-/// Compare low single-precision floating-point values in xmm1 and xmm2/mem32
-/// and set the EFLAGS flags accordingly.
-pub static UCOMISS: [u8; 2] = [0x0f, 0x2e];
-
-/// Raise invalid opcode instruction.
-pub static UNDEFINED2: [u8; 2] = [0x0f, 0x0b];
-
-/// Convert four packed unsigned doubleword integers from xmm2/m128/m32bcst to packed
-/// single-precision floating-point values in xmm1 with writemask k1. Rounding behavior
-/// is controlled by MXCSR but can be overriden by EVEX.L'L in static rounding mode
-/// (AVX512VL, AVX512F).
-pub static VCVTUDQ2PS: [u8; 3] = [0xf2, 0x0f, 0x7a];
-
-/// imm{16,32} XOR r/m{16,32,64}, possibly sign-extended.
-pub static XOR_IMM: [u8; 1] = [0x81];
-
-/// r/m{16,32,64} XOR sign-extended imm8.
-pub static XOR_IMM8_SIGN_EXTEND: [u8; 1] = [0x83];
-
-/// r/m{16,32,64} XOR register of the same size.
-pub static XOR: [u8; 1] = [0x31];
-
-/// Bitwise logical XOR of packed double-precision floating-point values.
-pub static XORPD: [u8; 3] = [0x66, 0x0f, 0x57];
-
-/// Bitwise logical XOR of packed single-precision floating-point values.
-pub static XORPS: [u8; 2] = [0x0f, 0x57];
--- a/cranelift/codegen/meta/src/isa/x86/recipes.rs
+++ b/cranelift/codegen/meta/src/isa/x86/recipes.rs
--- a/cranelift/codegen/meta/src/isa/x86/registers.rs
+++ b/cranelift/codegen/meta/src/isa/x86/registers.rs
@@ -1,43 +0,0 @@
-use crate::cdsl::regs::{IsaRegs, IsaRegsBuilder, RegBankBuilder, RegClassBuilder};
-
-pub(crate) fn define() -> IsaRegs {
-    let mut regs = IsaRegsBuilder::new();
-
-    let builder = RegBankBuilder::new("FloatRegs", "xmm")
-        .units(16)
-        .track_pressure(true);
-    let float_regs = regs.add_bank(builder);
-
-    let builder = RegBankBuilder::new("IntRegs", "r")
-        .units(16)
-        .names(vec!["rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi"])
-        .track_pressure(true)
-        .pinned_reg(15);
-    let int_regs = regs.add_bank(builder);
-
-    let builder = RegBankBuilder::new("FlagRegs", "")
-        .units(1)
-        .names(vec!["rflags"])
-        .track_pressure(false);
-    let flag_reg = regs.add_bank(builder);
-
-    let builder = RegClassBuilder::new_toplevel("GPR", int_regs);
-    let gpr = regs.add_class(builder);
-
-    let builder = RegClassBuilder::new_toplevel("FPR", float_regs);
-    let fpr = regs.add_class(builder);
-
-    let builder = RegClassBuilder::new_toplevel("FLAG", flag_reg);
-    regs.add_class(builder);
-
-    let builder = RegClassBuilder::subclass_of("GPR8", gpr, 0, 8);
-    let gpr8 = regs.add_class(builder);
-
-    let builder = RegClassBuilder::subclass_of("ABCD", gpr8, 0, 4);
-    regs.add_class(builder);
-
-    let builder = RegClassBuilder::subclass_of("FPR8", fpr, 0, 8);
-    regs.add_class(builder);
-
-    regs.build()
-}