diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index d7b3896f66..7303ff56d3 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -15,7 +15,7 @@ jobs:
     name: Rustfmt
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v1
+    - uses: actions/checkout@v2
       with:
         submodules: true
     - uses: ./.github/actions/install-rust
@@ -28,7 +28,7 @@ jobs:
     name: Doc - build the book
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v1
+    - uses: actions/checkout@v2
       with:
         submodules: true
     - run: |
@@ -50,7 +50,7 @@ jobs:
     name: Doc - build the API documentation
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v1
+    - uses: actions/checkout@v2
       with:
         submodules: true
     - uses: ./.github/actions/install-rust
@@ -70,7 +70,7 @@ jobs:
     name: Check
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v1
+    - uses: actions/checkout@v2
       with:
         submodules: true
     - uses: ./.github/actions/install-rust
@@ -112,7 +112,7 @@ jobs:
     name: Fuzz Targets
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v1
+    - uses: actions/checkout@v2
       with:
         submodules: true
     - uses: ./.github/actions/install-rust
@@ -127,7 +127,7 @@ jobs:
     name: Rebuild Peephole Optimizers
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v1
+    - uses: actions/checkout@v2
       with:
         submodules: true
     - name: Test `peepmatic`
@@ -175,7 +175,7 @@ jobs:
             os: windows-latest
             rust: stable
     steps:
-    - uses: actions/checkout@v1
+    - uses: actions/checkout@v2
       with:
         submodules: true
     - uses: ./.github/actions/install-rust
@@ -245,7 +245,7 @@ jobs:
     name: Meta deterministic check
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v1
+    - uses: actions/checkout@v2
       with:
         submodules: true
     - name: Install Rust
@@ -284,7 +284,7 @@ jobs:
           qemu: qemu-aarch64 -L /usr/aarch64-linux-gnu
           qemu_target: aarch64-linux-user
     steps:
-    - uses: actions/checkout@v1
+    - uses: actions/checkout@v2
       with:
         submodules: true
     - uses: ./.github/actions/install-rust
@@ -407,7 +407,7 @@ jobs:
     needs: [doc_book, doc_api, build]
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v1
+    - uses: actions/checkout@v2
       with:
         submodules: true
     - run: rustup update stable && rustup default stable
@@ -513,7 +513,7 @@ jobs:
       CARGO_AUDIT_VERSION: 0.11.2
     runs-on: ubuntu-latest
     steps:
-    - uses: actions/checkout@v1
+    - uses: actions/checkout@v2
     - uses: actions/cache@v1
       with:
         path: ${{ runner.tool_cache }}/cargo-audit
diff --git a/Cargo.lock b/Cargo.lock
index 0859dec9bd..be772cc0e1 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2170,6 +2170,7 @@ dependencies = [
  "cfg-if",
  "lazy_static",
  "libc",
+ "log",
  "region",
  "rustc-demangle",
  "target-lexicon",
diff --git a/build.rs b/build.rs
index 28b1ca3636..3841b85303 100644
--- a/build.rs
+++ b/build.rs
@@ -189,7 +189,6 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
             ("simd", "simd_f64x2") => return true, // FIXME expected V128(F64x2([Value(Float64 { bits: 9221120237041090560 }), Value(Float64 { bits: 0 })])), got V128(0)
             ("simd", "simd_f64x2_arith") => return true, // FIXME expected V128(F64x2([Value(Float64 { bits: 9221120237041090560 }), Value(Float64 { bits: 13835058055282163712 })])), got V128(255211775190703847615975447847722024960)
             ("simd", "simd_i64x2_arith") => return true, // FIXME Unsupported feature: proposed SIMD operator I64x2Mul
-            ("simd", "simd_lane") => return true, // FIXME invalid u8 number: constant out of range: (v8x16.shuffle -1 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14...
             ("simd", "simd_load") => return true, // FIXME Unsupported feature: proposed SIMD operator I8x16Shl
             ("simd", "simd_splat") => return true, // FIXME Unsupported feature: proposed SIMD operator I8x16ShrS
 
diff --git a/cranelift/codegen/meta/src/isa/riscv/recipes.rs b/cranelift/codegen/meta/src/isa/riscv/recipes.rs
index 1036d15566..47acdbb042 100644
--- a/cranelift/codegen/meta/src/isa/riscv/recipes.rs
+++ b/cranelift/codegen/meta/src/isa/riscv/recipes.rs
@@ -64,7 +64,7 @@ pub(crate) fn define(shared_defs: &SharedDefinitions, regs: &IsaRegs) -> RecipeG
 
     // R-type with an immediate shift amount instead of rs2.
     recipes.push(
-        EncodingRecipeBuilder::new("Rshamt", &formats.binary_imm, 4)
+        EncodingRecipeBuilder::new("Rshamt", &formats.binary_imm64, 4)
             .operands_in(vec![gpr])
             .operands_out(vec![gpr])
             .emit("put_rshamt(bits, in_reg0, imm.into(), out_reg0, sink);"),
@@ -79,11 +79,11 @@ pub(crate) fn define(shared_defs: &SharedDefinitions, regs: &IsaRegs) -> RecipeG
     );
 
     recipes.push(
-        EncodingRecipeBuilder::new("Ii", &formats.binary_imm, 4)
+        EncodingRecipeBuilder::new("Ii", &formats.binary_imm64, 4)
             .operands_in(vec![gpr])
             .operands_out(vec![gpr])
             .inst_predicate(InstructionPredicate::new_is_signed_int(
-                &*formats.binary_imm,
+                &*formats.binary_imm64,
                 "imm",
                 12,
                 0,
diff --git a/cranelift/codegen/meta/src/isa/x86/encodings.rs b/cranelift/codegen/meta/src/isa/x86/encodings.rs
index 7863e2bd85..65df907256 100644
--- a/cranelift/codegen/meta/src/isa/x86/encodings.rs
+++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs
@@ -1454,6 +1454,7 @@ fn define_alu(
     // x86 has a bitwise not instruction NOT.
     e.enc_i32_i64(bnot, rec_ur.opcodes(&NOT).rrr(2));
     e.enc_b32_b64(bnot, rec_ur.opcodes(&NOT).rrr(2));
+    e.enc_both(bnot.bind(B1), rec_ur.opcodes(&NOT).rrr(2));
 
     // Also add a `b1` encodings for the logic instructions.
     // TODO: Should this be done with 8-bit instructions? It would improve partial register
@@ -1633,6 +1634,7 @@ fn define_simd(
     let ushr_imm = shared.by_name("ushr_imm");
     let usub_sat = shared.by_name("usub_sat");
     let vconst = shared.by_name("vconst");
+    let vselect = shared.by_name("vselect");
     let x86_insertps = x86.by_name("x86_insertps");
     let x86_movlhps = x86.by_name("x86_movlhps");
     let x86_movsd = x86.by_name("x86_movsd");
@@ -1653,6 +1655,7 @@ fn define_simd(
     let x86_punpckl = x86.by_name("x86_punpckl");
 
     // Shorthands for recipes.
+    let rec_blend = r.template("blend");
     let rec_evex_reg_vvvv_rm_128 = r.template("evex_reg_vvvv_rm_128");
     let rec_f_ib = r.template("f_ib");
     let rec_fa = r.template("fa");
@@ -1722,6 +1725,20 @@ fn define_simd(
         e.enc_both_inferred(instruction, template);
     }
 
+    // SIMD vselect; controlling value of vselect is a boolean vector, so each lane should be
+    // either all ones or all zeroes - it makes it possible to always use 8-bit PBLENDVB;
+    // for 32/64-bit lanes we can also use BLENDVPS and BLENDVPD
+    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
+        let opcode = match ty.lane_bits() {
+            32 => &BLENDVPS,
+            64 => &BLENDVPD,
+            _ => &PBLENDVB,
+        };
+        let instruction = vselect.bind(vector(ty, sse_vector_size));
+        let template = rec_blend.opcodes(opcode);
+        e.enc_both_inferred_maybe_isap(instruction, template, Some(use_sse41_simd));
+    }
+
     // SIMD scalar_to_vector; this uses MOV to copy the scalar value to an XMM register; according
     // to the Intel manual: "When the destination operand is an XMM register, the source operand is
     // written to the low doubleword of the register and the register is zero-extended to 128 bits."
@@ -2269,8 +2286,7 @@ fn define_entity_ref(
     let rec_gvaddr8 = r.template("gvaddr8");
     let rec_pcrel_fnaddr8 = r.template("pcrel_fnaddr8");
     let rec_pcrel_gvaddr8 = r.template("pcrel_gvaddr8");
-    let rec_spaddr4_id = r.template("spaddr4_id");
-    let rec_spaddr8_id = r.template("spaddr8_id");
+    let rec_spaddr_id = r.template("spaddr_id");
 
     // Predicates shorthands.
     let all_ones_funcaddrs_and_not_is_pic =
@@ -2358,8 +2374,8 @@ fn define_entity_ref(
     //
     // TODO: Add encoding rules for stack_load and stack_store, so that they
     // don't get legalized to stack_addr + load/store.
-    e.enc32(stack_addr.bind(I32), rec_spaddr4_id.opcodes(&LEA));
-    e.enc64(stack_addr.bind(I64), rec_spaddr8_id.opcodes(&LEA).rex().w());
+    e.enc64(stack_addr.bind(I64), rec_spaddr_id.opcodes(&LEA).rex().w());
+    e.enc32(stack_addr.bind(I32), rec_spaddr_id.opcodes(&LEA));
 
     // Constant addresses (PIC).
     e.enc64(const_addr.bind(I64), rec_const_addr.opcodes(&LEA).rex().w());
diff --git a/cranelift/codegen/meta/src/isa/x86/instructions.rs b/cranelift/codegen/meta/src/isa/x86/instructions.rs
index 2675937353..f516a928dd 100644
--- a/cranelift/codegen/meta/src/isa/x86/instructions.rs
+++ b/cranelift/codegen/meta/src/isa/x86/instructions.rs
@@ -283,7 +283,7 @@ pub(crate) fn define(
     Packed Shuffle Doublewords -- copies data from either memory or lanes in an extended
     register and re-orders the data according to the passed immediate byte.
     "#,
-            &formats.extract_lane,
+            &formats.binary_imm8,
         )
         .operands_in(vec![a, i]) // TODO allow copying from memory here (need more permissive type than TxN)
         .operands_out(vec![a]),
@@ -314,7 +314,7 @@ pub(crate) fn define(
         The lane index, ``Idx``, is an immediate value, not an SSA value. It
         must indicate a valid lane index for the type of ``x``.
         "#,
-            &formats.extract_lane,
+            &formats.binary_imm8,
         )
         .operands_in(vec![x, Idx])
         .operands_out(vec![a]),
@@ -342,9 +342,9 @@ pub(crate) fn define(
         The lane index, ``Idx``, is an immediate value, not an SSA value. It
         must indicate a valid lane index for the type of ``x``.
         "#,
-            &formats.insert_lane,
+            &formats.ternary_imm8,
         )
-        .operands_in(vec![x, Idx, y])
+        .operands_in(vec![x, y, Idx])
         .operands_out(vec![a]),
     );
 
@@ -369,9 +369,9 @@ pub(crate) fn define(
         extracted from and which it is inserted to. This is similar to x86_pinsr but inserts
         floats, which are already stored in an XMM register.
         "#,
-            &formats.insert_lane,
+            &formats.ternary_imm8,
         )
-        .operands_in(vec![x, Idx, y])
+        .operands_in(vec![x, y, Idx])
         .operands_out(vec![a]),
     );
 
diff --git a/cranelift/codegen/meta/src/isa/x86/legalize.rs b/cranelift/codegen/meta/src/isa/x86/legalize.rs
index 3b073c1fa6..5d7e3c7619 100644
--- a/cranelift/codegen/meta/src/isa/x86/legalize.rs
+++ b/cranelift/codegen/meta/src/isa/x86/legalize.rs
@@ -378,6 +378,7 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro
     let vconst = insts.by_name("vconst");
     let vall_true = insts.by_name("vall_true");
     let vany_true = insts.by_name("vany_true");
+    let vselect = insts.by_name("vselect");
 
     let x86_packss = x86_instructions.by_name("x86_packss");
     let x86_pmaxs = x86_instructions.by_name("x86_pmaxs");
@@ -459,7 +460,7 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro
                 // Move into the lowest 16 bits of an XMM register.
                 def!(a = scalar_to_vector(x)),
                 // Insert the value again but in the next lowest 16 bits.
-                def!(b = insertlane(a, uimm8_one, x)),
+                def!(b = insertlane(a, x, uimm8_one)),
                 // No instruction emitted; pretend this is an I32x4 so we can use PSHUFD.
                 def!(c = raw_bitcast_any16x8_to_i32x4(b)),
                 // Broadcast the bytes in the XMM register with PSHUFD.
@@ -493,7 +494,7 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro
                 // Move into the lowest 64 bits of an XMM register.
                 def!(a = scalar_to_vector(x)),
                 // Move into the highest 64 bits of the same XMM register.
-                def!(y = insertlane(a, uimm8_one, x)),
+                def!(y = insertlane(a, x, uimm8_one)),
             ],
         );
     }
@@ -567,11 +568,11 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro
                 // Use scalar operations to shift the first lane.
                 def!(a = extractlane(x, uimm8_zero)),
                 def!(b = sshr_scalar_lane0(a, y)),
-                def!(c = insertlane(x, uimm8_zero, b)),
+                def!(c = insertlane(x, b, uimm8_zero)),
                 // Do the same for the second lane.
                 def!(d = extractlane(x, uimm8_one)),
                 def!(e = sshr_scalar_lane1(d, y)),
-                def!(z = insertlane(c, uimm8_one, e)),
+                def!(z = insertlane(c, e, uimm8_one)),
             ],
         );
     }
@@ -589,6 +590,17 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro
         );
     }
 
+    // SIMD vselect; replace with bitselect if BLEND* instructions are not available.
+    // This works, because each lane of boolean vector is filled with zeroes or ones.
+    for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
+        let vselect = vselect.bind(vector(ty, sse_vector_size));
+        let raw_bitcast = raw_bitcast.bind(vector(ty, sse_vector_size));
+        narrow.legalize(
+            def!(d = vselect(c, x, y)),
+            vec![def!(a = raw_bitcast(c)), def!(d = bitselect(a, x, y))],
+        );
+    }
+
     // SIMD vany_true
     let ne = Literal::enumerator_for(&imm.intcc, "ne");
     for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
diff --git a/cranelift/codegen/meta/src/isa/x86/opcodes.rs b/cranelift/codegen/meta/src/isa/x86/opcodes.rs
index d34761d246..74dff216e7 100644
--- a/cranelift/codegen/meta/src/isa/x86/opcodes.rs
+++ b/cranelift/codegen/meta/src/isa/x86/opcodes.rs
@@ -54,6 +54,14 @@ pub static BIT_SCAN_FORWARD: [u8; 2] = [0x0f, 0xbc];
 /// Bit scan reverse (stores index of first encountered 1 from the back).
 pub static BIT_SCAN_REVERSE: [u8; 2] = [0x0f, 0xbd];
 
+/// Select packed single-precision floating-point values from xmm1 and xmm2/m128
+/// from mask specified in XMM0 and store the values into xmm1 (SSE4.1).
+pub static BLENDVPS: [u8; 4] = [0x66, 0x0f, 0x38, 0x14];
+
+/// Select packed double-precision floating-point values from xmm1 and xmm2/m128
+/// from mask specified in XMM0 and store the values into xmm1 (SSE4.1).
+pub static BLENDVPD: [u8; 4] = [0x66, 0x0f, 0x38, 0x15];
+
 /// Call near, relative, displacement relative to next instruction (sign-extended).
 pub static CALL_RELATIVE: [u8; 1] = [0xe8];
 
@@ -335,6 +343,10 @@ pub static PAVGB: [u8; 3] = [0x66, 0x0f, 0xE0];
 /// Average packed unsigned word integers from xmm2/m128 and xmm1 with rounding (SSE2).
 pub static PAVGW: [u8; 3] = [0x66, 0x0f, 0xE3];
 
+/// Select byte values from xmm1 and xmm2/m128 from mask specified in the high bit of each byte
+/// in XMM0 and store the values into xmm1 (SSE4.1).
+pub static PBLENDVB: [u8; 4] = [0x66, 0x0f, 0x38, 0x10];
+
 /// Compare packed data for equal (SSE2).
 pub static PCMPEQB: [u8; 3] = [0x66, 0x0f, 0x74];
 
diff --git a/cranelift/codegen/meta/src/isa/x86/recipes.rs b/cranelift/codegen/meta/src/isa/x86/recipes.rs
index 0af1dd29a3..0cfd83d373 100644
--- a/cranelift/codegen/meta/src/isa/x86/recipes.rs
+++ b/cranelift/codegen/meta/src/isa/x86/recipes.rs
@@ -427,6 +427,7 @@ pub(crate) fn define<'shared>(
     let reg_rcx = Register::new(gpr, regs.regunit_by_name(gpr, "rcx"));
     let reg_rdx = Register::new(gpr, regs.regunit_by_name(gpr, "rdx"));
     let reg_r15 = Register::new(gpr, regs.regunit_by_name(gpr, "r15"));
+    let reg_xmm0 = Register::new(fpr, regs.regunit_by_name(fpr, "xmm0"));
 
     // Stack operand with a 32-bit signed displacement from either RBP or RSP.
     let stack_gpr32 = Stack::new(gpr);
@@ -607,12 +608,12 @@ pub(crate) fn define<'shared>(
     // XX /r with FPR ins and outs. A form with a byte immediate.
     {
         recipes.add_template_inferred(
-            EncodingRecipeBuilder::new("fa_ib", &formats.insert_lane, 2)
+            EncodingRecipeBuilder::new("fa_ib", &formats.ternary_imm8, 2)
                 .operands_in(vec![fpr, fpr])
                 .operands_out(vec![0])
                 .inst_predicate(InstructionPredicate::new_is_unsigned_int(
-                    &*formats.insert_lane,
-                    "lane",
+                    &*formats.ternary_imm8,
+                    "imm",
                     8,
                     0,
                 ))
@@ -620,7 +621,7 @@ pub(crate) fn define<'shared>(
                     r#"
                     {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
                     modrm_rr(in_reg1, in_reg0, sink);
-                    let imm:i64 = lane.into();
+                    let imm: i64 = imm.into();
                     sink.put1(imm as u8);
                 "#,
                 ),
@@ -904,14 +905,32 @@ pub(crate) fn define<'shared>(
         .inferred_rex_compute_size("size_with_inferred_rex_for_inreg1"),
     );
 
+    // XX /r for BLEND* instructions
+    recipes.add_template_inferred(
+        EncodingRecipeBuilder::new("blend", &formats.ternary, 1)
+            .operands_in(vec![
+                OperandConstraint::FixedReg(reg_xmm0),
+                OperandConstraint::RegClass(fpr),
+                OperandConstraint::RegClass(fpr),
+            ])
+            .operands_out(vec![2])
+            .emit(
+                r#"
+                    {{PUT_OP}}(bits, rex2(in_reg1, in_reg2), sink);
+                    modrm_rr(in_reg1, in_reg2, sink);
+                "#,
+            ),
+        "size_with_inferred_rex_for_inreg1_inreg2",
+    );
+
     // XX /n ib with 8-bit immediate sign-extended.
     {
         recipes.add_template_inferred(
-            EncodingRecipeBuilder::new("r_ib", &formats.binary_imm, 2)
+            EncodingRecipeBuilder::new("r_ib", &formats.binary_imm64, 2)
                 .operands_in(vec![gpr])
                 .operands_out(vec![0])
                 .inst_predicate(InstructionPredicate::new_is_signed_int(
-                    &*formats.binary_imm,
+                    &*formats.binary_imm64,
                     "imm",
                     8,
                     0,
@@ -928,11 +947,11 @@ pub(crate) fn define<'shared>(
         );
 
         recipes.add_template_inferred(
-            EncodingRecipeBuilder::new("f_ib", &formats.binary_imm, 2)
+            EncodingRecipeBuilder::new("f_ib", &formats.binary_imm64, 2)
                 .operands_in(vec![fpr])
                 .operands_out(vec![0])
                 .inst_predicate(InstructionPredicate::new_is_signed_int(
-                    &*formats.binary_imm,
+                    &*formats.binary_imm64,
                     "imm",
                     8,
                     0,
@@ -951,11 +970,11 @@ pub(crate) fn define<'shared>(
         // XX /n id with 32-bit immediate sign-extended.
         recipes.add_template(
             Template::new(
-                EncodingRecipeBuilder::new("r_id", &formats.binary_imm, 5)
+                EncodingRecipeBuilder::new("r_id", &formats.binary_imm64, 5)
                     .operands_in(vec![gpr])
                     .operands_out(vec![0])
                     .inst_predicate(InstructionPredicate::new_is_signed_int(
-                        &*formats.binary_imm,
+                        &*formats.binary_imm64,
                         "imm",
                         32,
                         0,
@@ -977,20 +996,20 @@ pub(crate) fn define<'shared>(
     // XX /r ib with 8-bit unsigned immediate (e.g. for pshufd)
     {
         recipes.add_template_inferred(
-            EncodingRecipeBuilder::new("r_ib_unsigned_fpr", &formats.extract_lane, 2)
+            EncodingRecipeBuilder::new("r_ib_unsigned_fpr", &formats.binary_imm8, 2)
                 .operands_in(vec![fpr])
                 .operands_out(vec![fpr])
                 .inst_predicate(InstructionPredicate::new_is_unsigned_int(
-                    &*formats.extract_lane,
-                    "lane",
+                    &*formats.binary_imm8,
+                    "imm",
                     8,
                     0,
-                )) // TODO if the format name is changed then "lane" should be renamed to something more appropriate--ordering mask? broadcast immediate?
+                ))
                 .emit(
                     r#"
                     {{PUT_OP}}(bits, rex2(in_reg0, out_reg0), sink);
                     modrm_rr(in_reg0, out_reg0, sink);
-                    let imm:i64 = lane.into();
+                    let imm: i64 = imm.into();
                     sink.put1(imm as u8);
                 "#,
                 ),
@@ -1001,17 +1020,17 @@ pub(crate) fn define<'shared>(
     // XX /r ib with 8-bit unsigned immediate (e.g. for extractlane)
     {
         recipes.add_template_inferred(
-            EncodingRecipeBuilder::new("r_ib_unsigned_gpr", &formats.extract_lane, 2)
+            EncodingRecipeBuilder::new("r_ib_unsigned_gpr", &formats.binary_imm8, 2)
                 .operands_in(vec![fpr])
                 .operands_out(vec![gpr])
                 .inst_predicate(InstructionPredicate::new_is_unsigned_int(
-                    &*formats.extract_lane, "lane", 8, 0,
+                    &*formats.binary_imm8, "imm", 8, 0,
                 ))
                 .emit(
                     r#"
                     {{PUT_OP}}(bits, rex2(out_reg0, in_reg0), sink);
                     modrm_rr(out_reg0, in_reg0, sink); // note the flipped register in the ModR/M byte
-                    let imm:i64 = lane.into();
+                    let imm: i64 = imm.into();
                     sink.put1(imm as u8);
                 "#,
                 ), "size_with_inferred_rex_for_inreg0_outreg0"
@@ -1021,12 +1040,12 @@ pub(crate) fn define<'shared>(
     // XX /r ib with 8-bit unsigned immediate (e.g. for insertlane)
     {
         recipes.add_template_inferred(
-            EncodingRecipeBuilder::new("r_ib_unsigned_r", &formats.insert_lane, 2)
+            EncodingRecipeBuilder::new("r_ib_unsigned_r", &formats.ternary_imm8, 2)
                 .operands_in(vec![fpr, gpr])
                 .operands_out(vec![0])
                 .inst_predicate(InstructionPredicate::new_is_unsigned_int(
-                    &*formats.insert_lane,
-                    "lane",
+                    &*formats.ternary_imm8,
+                    "imm",
                     8,
                     0,
                 ))
@@ -1034,7 +1053,7 @@ pub(crate) fn define<'shared>(
                     r#"
                     {{PUT_OP}}(bits, rex2(in_reg1, in_reg0), sink);
                     modrm_rr(in_reg1, in_reg0, sink);
-                    let imm:i64 = lane.into();
+                    let imm: i64 = imm.into();
                     sink.put1(imm as u8);
                 "#,
                 ),
@@ -1432,23 +1451,7 @@ pub(crate) fn define<'shared>(
     // TODO Alternative forms for 8-bit immediates, when applicable.
 
     recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("spaddr4_id", &formats.stack_load, 6)
-            .operands_out(vec![gpr])
-            .emit(
-                r#"
-                    let sp = StackRef::sp(stack_slot, &func.stack_slots);
-                    let base = stk_base(sp.base);
-                    {{PUT_OP}}(bits, rex2(out_reg0, base), sink);
-                    modrm_sib_disp8(out_reg0, sink);
-                    sib_noindex(base, sink);
-                    let imm : i32 = offset.into();
-                    sink.put4(sp.offset.checked_add(imm).unwrap() as u32);
-                "#,
-            ),
-    );
-
-    recipes.add_template_recipe(
-        EncodingRecipeBuilder::new("spaddr8_id", &formats.stack_load, 6)
+        EncodingRecipeBuilder::new("spaddr_id", &formats.stack_load, 6)
             .operands_out(vec![gpr])
             .emit(
                 r#"
@@ -2871,12 +2874,12 @@ pub(crate) fn define<'shared>(
 
     {
         let has_small_offset =
-            InstructionPredicate::new_is_signed_int(&*formats.binary_imm, "imm", 8, 0);
+            InstructionPredicate::new_is_signed_int(&*formats.binary_imm64, "imm", 8, 0);
 
         // XX /n, MI form with imm8.
         recipes.add_template(
             Template::new(
-                EncodingRecipeBuilder::new("rcmp_ib", &formats.binary_imm, 2)
+                EncodingRecipeBuilder::new("rcmp_ib", &formats.binary_imm64, 2)
                     .operands_in(vec![gpr])
                     .operands_out(vec![reg_rflags])
                     .inst_predicate(has_small_offset)
@@ -2894,12 +2897,12 @@ pub(crate) fn define<'shared>(
         );
 
         let has_big_offset =
-            InstructionPredicate::new_is_signed_int(&*formats.binary_imm, "imm", 32, 0);
+            InstructionPredicate::new_is_signed_int(&*formats.binary_imm64, "imm", 32, 0);
 
         // XX /n, MI form with imm32.
         recipes.add_template(
             Template::new(
-                EncodingRecipeBuilder::new("rcmp_id", &formats.binary_imm, 5)
+                EncodingRecipeBuilder::new("rcmp_id", &formats.binary_imm64, 5)
                     .operands_in(vec![gpr])
                     .operands_out(vec![reg_rflags])
                     .inst_predicate(has_big_offset)
diff --git a/cranelift/codegen/meta/src/shared/formats.rs b/cranelift/codegen/meta/src/shared/formats.rs
index 03c09e2e2b..204f3fccb1 100644
--- a/cranelift/codegen/meta/src/shared/formats.rs
+++ b/cranelift/codegen/meta/src/shared/formats.rs
@@ -4,7 +4,7 @@ use std::rc::Rc;
 
 pub(crate) struct Formats {
     pub(crate) binary: Rc<InstructionFormat>,
-    pub(crate) binary_imm: Rc<InstructionFormat>,
+    pub(crate) binary_imm64: Rc<InstructionFormat>,
     pub(crate) branch: Rc<InstructionFormat>,
     pub(crate) branch_float: Rc<InstructionFormat>,
     pub(crate) branch_icmp: Rc<InstructionFormat>,
@@ -17,14 +17,13 @@ pub(crate) struct Formats {
     pub(crate) cond_trap: Rc<InstructionFormat>,
     pub(crate) copy_special: Rc<InstructionFormat>,
     pub(crate) copy_to_ssa: Rc<InstructionFormat>,
-    pub(crate) extract_lane: Rc<InstructionFormat>,
+    pub(crate) binary_imm8: Rc<InstructionFormat>,
     pub(crate) float_compare: Rc<InstructionFormat>,
     pub(crate) float_cond: Rc<InstructionFormat>,
     pub(crate) float_cond_trap: Rc<InstructionFormat>,
     pub(crate) func_addr: Rc<InstructionFormat>,
     pub(crate) heap_addr: Rc<InstructionFormat>,
     pub(crate) indirect_jump: Rc<InstructionFormat>,
-    pub(crate) insert_lane: Rc<InstructionFormat>,
     pub(crate) int_compare: Rc<InstructionFormat>,
     pub(crate) int_compare_imm: Rc<InstructionFormat>,
     pub(crate) int_cond: Rc<InstructionFormat>,
@@ -45,6 +44,7 @@ pub(crate) struct Formats {
     pub(crate) store_complex: Rc<InstructionFormat>,
     pub(crate) table_addr: Rc<InstructionFormat>,
     pub(crate) ternary: Rc<InstructionFormat>,
+    pub(crate) ternary_imm8: Rc<InstructionFormat>,
     pub(crate) trap: Rc<InstructionFormat>,
     pub(crate) unary: Rc<InstructionFormat>,
     pub(crate) unary_bool: Rc<InstructionFormat>,
@@ -76,7 +76,9 @@ impl Formats {
 
             binary: Builder::new("Binary").value().value().build(),
 
-            binary_imm: Builder::new("BinaryImm").value().imm(&imm.imm64).build(),
+            binary_imm8: Builder::new("BinaryImm8").value().imm(&imm.uimm8).build(),
+
+            binary_imm64: Builder::new("BinaryImm64").value().imm(&imm.imm64).build(),
 
             // The select instructions are controlled by the second VALUE operand.
             // The first VALUE operand is the controlling flag which has a derived type.
@@ -88,23 +90,18 @@ impl Formats {
                 .typevar_operand(1)
                 .build(),
 
+            ternary_imm8: Builder::new("TernaryImm8")
+                .value()
+                .imm(&imm.uimm8)
+                .value()
+                .build(),
+
             // Catch-all for instructions with many outputs and inputs and no immediate
             // operands.
             multiary: Builder::new("MultiAry").varargs().build(),
 
             nullary: Builder::new("NullAry").build(),
 
-            insert_lane: Builder::new("InsertLane")
-                .value()
-                .imm_with_name("lane", &imm.uimm8)
-                .value()
-                .build(),
-
-            extract_lane: Builder::new("ExtractLane")
-                .value()
-                .imm_with_name("lane", &imm.uimm8)
-                .build(),
-
             shuffle: Builder::new("Shuffle")
                 .value()
                 .value()
diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs
index 3a85f80599..62cd1ebeb1 100644
--- a/cranelift/codegen/meta/src/shared/instructions.rs
+++ b/cranelift/codegen/meta/src/shared/instructions.rs
@@ -559,9 +559,9 @@ fn define_simd_lane_access(
         The lane index, ``Idx``, is an immediate value, not an SSA value. It
         must indicate a valid lane index for the type of ``x``.
         "#,
-            &formats.insert_lane,
+            &formats.ternary_imm8,
         )
-        .operands_in(vec![x, Idx, y])
+        .operands_in(vec![x, y, Idx])
         .operands_out(vec![a]),
     );
 
@@ -579,7 +579,7 @@ fn define_simd_lane_access(
         may or may not be zeroed depending on the ISA but the type system should prevent using
         ``a`` as anything other than the extracted value.
         "#,
-            &formats.extract_lane,
+            &formats.binary_imm8,
         )
         .operands_in(vec![x, Idx])
         .operands_out(vec![a]),
@@ -2215,7 +2215,7 @@ pub(crate) fn define(
         Like `icmp_imm`, but returns integer CPU flags instead of testing
         a specific condition code.
         "#,
-            &formats.binary_imm,
+            &formats.binary_imm64,
         )
         .operands_in(vec![x, Y])
         .operands_out(vec![f]),
@@ -2460,7 +2460,7 @@ pub(crate) fn define(
         Polymorphic over all scalar integer types, but does not support vector
         types.
         "#,
-            &formats.binary_imm,
+            &formats.binary_imm64,
         )
         .operands_in(vec![x, Y])
         .operands_out(vec![a]),
@@ -2475,7 +2475,7 @@ pub(crate) fn define(
         Polymorphic over all scalar integer types, but does not support vector
         types.
         "#,
-            &formats.binary_imm,
+            &formats.binary_imm64,
         )
         .operands_in(vec![x, Y])
         .operands_out(vec![a]),
@@ -2489,7 +2489,7 @@ pub(crate) fn define(
 
         This operation traps if the divisor is zero.
         "#,
-            &formats.binary_imm,
+            &formats.binary_imm64,
         )
         .operands_in(vec![x, Y])
         .operands_out(vec![a]),
@@ -2505,7 +2505,7 @@ pub(crate) fn define(
         representable in `B` bits two's complement. This only happens
         when `x = -2^{B-1}, Y = -1`.
         "#,
-            &formats.binary_imm,
+            &formats.binary_imm64,
         )
         .operands_in(vec![x, Y])
         .operands_out(vec![a]),
@@ -2519,7 +2519,7 @@ pub(crate) fn define(
 
         This operation traps if the divisor is zero.
         "#,
-            &formats.binary_imm,
+            &formats.binary_imm64,
         )
         .operands_in(vec![x, Y])
         .operands_out(vec![a]),
@@ -2533,7 +2533,7 @@ pub(crate) fn define(
 
         This operation traps if the divisor is zero.
         "#,
-            &formats.binary_imm,
+            &formats.binary_imm64,
         )
         .operands_in(vec![x, Y])
         .operands_out(vec![a]),
@@ -2552,7 +2552,7 @@ pub(crate) fn define(
         Polymorphic over all scalar integer types, but does not support vector
         types.
         "#,
-            &formats.binary_imm,
+            &formats.binary_imm64,
         )
         .operands_in(vec![x, Y])
         .operands_out(vec![a]),
@@ -2952,7 +2952,7 @@ pub(crate) fn define(
         Polymorphic over all scalar integer types, but does not support vector
         types.
         "#,
-            &formats.binary_imm,
+            &formats.binary_imm64,
         )
         .operands_in(vec![x, Y])
         .operands_out(vec![a]),
@@ -2969,7 +2969,7 @@ pub(crate) fn define(
         Polymorphic over all scalar integer types, but does not support vector
         types.
         "#,
-            &formats.binary_imm,
+            &formats.binary_imm64,
         )
         .operands_in(vec![x, Y])
         .operands_out(vec![a]),
@@ -2986,7 +2986,7 @@ pub(crate) fn define(
         Polymorphic over all scalar integer types, but does not support vector
         types.
         "#,
-            &formats.binary_imm,
+            &formats.binary_imm64,
         )
         .operands_in(vec![x, Y])
         .operands_out(vec![a]),
@@ -3031,7 +3031,7 @@ pub(crate) fn define(
             r#"
         Rotate left by immediate.
         "#,
-            &formats.binary_imm,
+            &formats.binary_imm64,
         )
         .operands_in(vec![x, Y])
         .operands_out(vec![a]),
@@ -3043,7 +3043,7 @@ pub(crate) fn define(
             r#"
         Rotate right by immediate.
         "#,
-            &formats.binary_imm,
+            &formats.binary_imm64,
         )
         .operands_in(vec![x, Y])
         .operands_out(vec![a]),
@@ -3118,7 +3118,7 @@ pub(crate) fn define(
 
         The shift amount is masked to the size of ``x``.
         "#,
-            &formats.binary_imm,
+            &formats.binary_imm64,
         )
         .operands_in(vec![x, Y])
         .operands_out(vec![a]),
@@ -3132,7 +3132,7 @@ pub(crate) fn define(
 
         The shift amount is masked to the size of the register.
         "#,
-            &formats.binary_imm,
+            &formats.binary_imm64,
         )
         .operands_in(vec![x, Y])
         .operands_out(vec![a]),
@@ -3146,7 +3146,7 @@ pub(crate) fn define(
 
         The shift amount is masked to the size of the register.
         "#,
-            &formats.binary_imm,
+            &formats.binary_imm64,
         )
         .operands_in(vec![x, Y])
         .operands_out(vec![a]),
diff --git a/cranelift/codegen/src/binemit/stackmap.rs b/cranelift/codegen/src/binemit/stackmap.rs
index 10ae96a7cb..27d87f896f 100644
--- a/cranelift/codegen/src/binemit/stackmap.rs
+++ b/cranelift/codegen/src/binemit/stackmap.rs
@@ -15,7 +15,8 @@ const NUM_BITS: usize = core::mem::size_of::<Num>() * 8;
 /// The first value in the bitmap is of the lowest addressed slot on the stack.
 /// As all stacks in Isa's supported by Cranelift grow down, this means that
 /// first value is of the top of the stack and values proceed down the stack.
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, PartialEq, Eq)]
+#[cfg_attr(feature = "enable-serde", derive(serde::Deserialize, serde::Serialize))]
 pub struct Stackmap {
     bitmap: Vec<BitSet<Num>>,
     mapped_words: u32,
diff --git a/cranelift/codegen/src/bitset.rs b/cranelift/codegen/src/bitset.rs
index 8035d80b96..2cb0194b51 100644
--- a/cranelift/codegen/src/bitset.rs
+++ b/cranelift/codegen/src/bitset.rs
@@ -5,12 +5,14 @@
 //!
 //! If you would like to add support for larger bitsets in the future, you need to change the trait
 //! bound Into<u32> and the u32 in the implementation of `max_bits()`.
+
 use core::convert::{From, Into};
 use core::mem::size_of;
 use core::ops::{Add, BitOr, Shl, Sub};
 
 /// A small bitset built on a single primitive integer type
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
+#[cfg_attr(feature = "enable-serde", derive(serde::Serialize, serde::Deserialize))]
 pub struct BitSet<T>(pub T);
 
 impl<T> BitSet<T>
diff --git a/cranelift/codegen/src/ir/instructions.rs b/cranelift/codegen/src/ir/instructions.rs
index ccc47081a1..653926a242 100644
--- a/cranelift/codegen/src/ir/instructions.rs
+++ b/cranelift/codegen/src/ir/instructions.rs
@@ -306,7 +306,7 @@ impl InstructionData {
         let bit_width = ctrl_typevar.bits();
 
         match self {
-            Self::BinaryImm {
+            Self::BinaryImm64 {
                 opcode,
                 arg: _,
                 imm,
diff --git a/cranelift/codegen/src/isa/riscv/mod.rs b/cranelift/codegen/src/isa/riscv/mod.rs
index 8aa264f34f..e957367bbb 100644
--- a/cranelift/codegen/src/isa/riscv/mod.rs
+++ b/cranelift/codegen/src/isa/riscv/mod.rs
@@ -163,7 +163,7 @@ mod tests {
         let arg32 = func.dfg.append_block_param(block, types::I32);
 
         // Try to encode iadd_imm.i64 v1, -10.
-        let inst64 = InstructionData::BinaryImm {
+        let inst64 = InstructionData::BinaryImm64 {
             opcode: Opcode::IaddImm,
             arg: arg64,
             imm: immediates::Imm64::new(-10),
@@ -176,7 +176,7 @@ mod tests {
         );
 
         // Try to encode iadd_imm.i64 v1, -10000.
-        let inst64_large = InstructionData::BinaryImm {
+        let inst64_large = InstructionData::BinaryImm64 {
             opcode: Opcode::IaddImm,
             arg: arg64,
             imm: immediates::Imm64::new(-10000),
@@ -186,7 +186,7 @@ mod tests {
         assert!(isa.encode(&func, &inst64_large, types::I64).is_err());
 
         // Create an iadd_imm.i32 which is encodable in RV64.
-        let inst32 = InstructionData::BinaryImm {
+        let inst32 = InstructionData::BinaryImm64 {
             opcode: Opcode::IaddImm,
             arg: arg32,
             imm: immediates::Imm64::new(10),
@@ -214,7 +214,7 @@ mod tests {
         let arg32 = func.dfg.append_block_param(block, types::I32);
 
         // Try to encode iadd_imm.i64 v1, -10.
-        let inst64 = InstructionData::BinaryImm {
+        let inst64 = InstructionData::BinaryImm64 {
             opcode: Opcode::IaddImm,
             arg: arg64,
             imm: immediates::Imm64::new(-10),
@@ -224,7 +224,7 @@ mod tests {
         assert!(isa.encode(&func, &inst64, types::I64).is_err());
 
         // Try to encode iadd_imm.i64 v1, -10000.
-        let inst64_large = InstructionData::BinaryImm {
+        let inst64_large = InstructionData::BinaryImm64 {
             opcode: Opcode::IaddImm,
             arg: arg64,
             imm: immediates::Imm64::new(-10000),
@@ -234,7 +234,7 @@ mod tests {
         assert!(isa.encode(&func, &inst64_large, types::I64).is_err());
 
         // Create an iadd_imm.i32 which is encodable in RV32.
-        let inst32 = InstructionData::BinaryImm {
+        let inst32 = InstructionData::BinaryImm64 {
             opcode: Opcode::IaddImm,
             arg: arg32,
             imm: immediates::Imm64::new(10),
diff --git a/cranelift/codegen/src/isa/x86/abi.rs b/cranelift/codegen/src/isa/x86/abi.rs
index c072594b61..d0308f73e8 100644
--- a/cranelift/codegen/src/isa/x86/abi.rs
+++ b/cranelift/codegen/src/isa/x86/abi.rs
@@ -245,24 +245,17 @@ pub fn legalize_signature(
         isa_flags,
     );
 
-    let sig_is_multi_return = sig.is_multi_return();
-
-    // If this is a multi-value return and we don't have enough available return
-    // registers to fit all of the return values, we need to backtrack and start
+    // If we don't have enough available return registers
+    // to fit all of the return values, we need to backtrack and start
     // assigning locations all over again with a different strategy. In order to
     // do that, we need a copy of the original assigner for the returns.
-    let backup_rets_for_struct_return = if sig_is_multi_return {
-        Some(rets.clone())
-    } else {
-        None
-    };
+    let mut backup_rets = rets.clone();
 
     if let Some(new_returns) = legalize_args(&sig.returns, &mut rets) {
-        if sig.is_multi_return()
-            && new_returns
-                .iter()
-                .filter(|r| r.purpose == ArgumentPurpose::Normal)
-                .any(|r| !r.location.is_reg())
+        if new_returns
+            .iter()
+            .filter(|r| r.purpose == ArgumentPurpose::Normal)
+            .any(|r| !r.location.is_reg())
         {
             // The return values couldn't all fit into available return
             // registers. Introduce the use of a struct-return parameter.
@@ -283,8 +276,6 @@ pub fn legalize_signature(
                 _ => unreachable!("return pointer should always get a register assignment"),
             }
 
-            let mut backup_rets = backup_rets_for_struct_return.unwrap();
-
             // We're using the first return register for the return pointer (like
             // sys v does).
             let mut ret_ptr_return = AbiParam {
diff --git a/cranelift/codegen/src/isa/x86/enc_tables.rs b/cranelift/codegen/src/isa/x86/enc_tables.rs
index c00ca97357..19b08d58a2 100644
--- a/cranelift/codegen/src/isa/x86/enc_tables.rs
+++ b/cranelift/codegen/src/isa/x86/enc_tables.rs
@@ -246,6 +246,20 @@ fn size_with_inferred_rex_for_inreg0_inreg1(
     sizing.base_size + if needs_rex { 1 } else { 0 }
 }
 
+/// Infers whether a dynamic REX prefix will be emitted, based on second and third operand.
+fn size_with_inferred_rex_for_inreg1_inreg2(
+    sizing: &RecipeSizing,
+    _enc: Encoding,
+    inst: Inst,
+    divert: &RegDiversions,
+    func: &Function,
+) -> u8 {
+    // No need to check for REX.W in `needs_rex` because `infer_rex().w()` is not allowed.
+    let needs_rex = test_input(1, inst, divert, func, is_extended_reg)
+        || test_input(2, inst, divert, func, is_extended_reg);
+    sizing.base_size + if needs_rex { 1 } else { 0 }
+}
+
 /// Infers whether a dynamic REX prefix will be emitted, based on a single
 /// input register and a single output register.
 fn size_with_inferred_rex_for_inreg0_outreg0(
@@ -1181,10 +1195,10 @@ fn convert_extractlane(
     let mut pos = FuncCursor::new(func).at_inst(inst);
     pos.use_srcloc(inst);
 
-    if let ir::InstructionData::ExtractLane {
+    if let ir::InstructionData::BinaryImm8 {
         opcode: ir::Opcode::Extractlane,
         arg,
-        lane,
+        imm: lane,
     } = pos.func.dfg[inst]
     {
         // NOTE: the following legalization assumes that the upper bits of the XMM register do
@@ -1237,10 +1251,10 @@ fn convert_insertlane(
     let mut pos = FuncCursor::new(func).at_inst(inst);
     pos.use_srcloc(inst);
 
-    if let ir::InstructionData::InsertLane {
+    if let ir::InstructionData::TernaryImm8 {
         opcode: ir::Opcode::Insertlane,
         args: [vector, replacement],
-        lane,
+        imm: lane,
     } = pos.func.dfg[inst]
     {
         let value_type = pos.func.dfg.value_type(vector);
@@ -1255,7 +1269,7 @@ fn convert_insertlane(
                     pos.func
                         .dfg
                         .replace(inst)
-                        .x86_insertps(vector, immediate, replacement)
+                        .x86_insertps(vector, replacement, immediate)
                 }
                 F64X2 => {
                     let replacement_as_vector = pos.ins().raw_bitcast(F64X2, replacement); // only necessary due to SSA types
@@ -1283,7 +1297,7 @@ fn convert_insertlane(
             pos.func
                 .dfg
                 .replace(inst)
-                .x86_pinsr(vector, lane, replacement);
+                .x86_pinsr(vector, replacement, lane);
         }
     }
 }
@@ -1326,7 +1340,7 @@ fn expand_dword_to_xmm<'f>(
     if arg_type == I64 {
         let (arg_lo, arg_hi) = pos.ins().isplit(arg);
         let arg = pos.ins().scalar_to_vector(I32X4, arg_lo);
-        let arg = pos.ins().insertlane(arg, 1, arg_hi);
+        let arg = pos.ins().insertlane(arg, arg_hi, 1);
         let arg = pos.ins().raw_bitcast(I64X2, arg);
         arg
     } else {
diff --git a/cranelift/codegen/src/legalizer/boundary.rs b/cranelift/codegen/src/legalizer/boundary.rs
index 7fb977a06a..185e4c74fa 100644
--- a/cranelift/codegen/src/legalizer/boundary.rs
+++ b/cranelift/codegen/src/legalizer/boundary.rs
@@ -757,12 +757,6 @@ pub fn handle_call_abi(
     {
         legalize_sret_call(isa, pos, sig_ref, inst);
     } else {
-        // OK, we need to fix the call arguments to match the ABI signature.
-        let abi_args = pos.func.dfg.signatures[sig_ref].params.len();
-        legalize_inst_arguments(pos, cfg, abi_args, |func, abi_arg| {
-            func.dfg.signatures[sig_ref].params[abi_arg]
-        });
-
         if !pos.func.dfg.signatures[sig_ref].returns.is_empty() {
             inst = legalize_inst_results(pos, |func, abi_res| {
                 func.dfg.signatures[sig_ref].returns[abi_res]
@@ -770,6 +764,13 @@ pub fn handle_call_abi(
         }
     }
 
+    // Go back and fix the call arguments to match the ABI signature.
+    pos.goto_inst(inst);
+    let abi_args = pos.func.dfg.signatures[sig_ref].params.len();
+    legalize_inst_arguments(pos, cfg, abi_args, |func, abi_arg| {
+        func.dfg.signatures[sig_ref].params[abi_arg]
+    });
+
     debug_assert!(
         check_call_signature(&pos.func.dfg, inst).is_ok(),
         "Signature still wrong: {}, {}{}",
diff --git a/cranelift/codegen/src/peepmatic.rs b/cranelift/codegen/src/peepmatic.rs
index 1ea2031bfa..1e22019407 100644
--- a/cranelift/codegen/src/peepmatic.rs
+++ b/cranelift/codegen/src/peepmatic.rs
@@ -378,7 +378,7 @@ fn intcc_to_peepmatic(cc: IntCC) -> ConditionCode {
 
 fn get_immediate(dfg: &DataFlowGraph, inst: Inst, i: usize) -> Part<ValueOrInst> {
     return match dfg[inst] {
-        InstructionData::BinaryImm { imm, .. } if i == 0 => imm.into(),
+        InstructionData::BinaryImm64 { imm, .. } if i == 0 => imm.into(),
         InstructionData::BranchIcmp { cond, .. } if i == 0 => intcc_to_peepmatic(cond).into(),
         InstructionData::BranchInt { cond, .. } if i == 0 => intcc_to_peepmatic(cond).into(),
         InstructionData::IntCompare { cond, .. } if i == 0 => intcc_to_peepmatic(cond).into(),
diff --git a/cranelift/codegen/src/postopt.rs b/cranelift/codegen/src/postopt.rs
index 3e27c90a39..426aab00b8 100644
--- a/cranelift/codegen/src/postopt.rs
+++ b/cranelift/codegen/src/postopt.rs
@@ -341,7 +341,7 @@ fn optimize_complex_addresses(pos: &mut EncCursor, inst: Inst, isa: &dyn TargetI
                 }
                 _ => panic!("Unsupported load or store opcode"),
             },
-            InstructionData::BinaryImm {
+            InstructionData::BinaryImm64 {
                 opcode: Opcode::IaddImm,
                 arg,
                 imm,
diff --git a/cranelift/codegen/src/simple_preopt.rs b/cranelift/codegen/src/simple_preopt.rs
index 2f47c7d91b..6903d18f6c 100644
--- a/cranelift/codegen/src/simple_preopt.rs
+++ b/cranelift/codegen/src/simple_preopt.rs
@@ -142,7 +142,7 @@ fn package_up_divrem_info(
 /// Examine `inst` to see if it is a div or rem by a constant, and if so return the operands,
 /// signedness, operation size and div-vs-rem-ness in a handy bundle.
 fn get_div_info(inst: Inst, dfg: &DataFlowGraph) -> Option<DivRemByConstInfo> {
-    if let InstructionData::BinaryImm { opcode, arg, imm } = dfg[inst] {
+    if let InstructionData::BinaryImm64 { opcode, arg, imm } = dfg[inst] {
         let (is_signed, is_rem) = match opcode {
             Opcode::UdivImm => (false, false),
             Opcode::UremImm => (false, true),
@@ -656,7 +656,7 @@ mod simplify {
         dfg::ValueDef,
         immediates,
         instructions::{Opcode, ValueList},
-        types::{I16, I32, I8},
+        types::{B8, I16, I32, I8},
     };
     use std::marker::PhantomData;
 
@@ -704,7 +704,7 @@ mod simplify {
         imm: immediates::Imm64,
     ) -> bool {
         if let ValueDef::Result(arg_inst, _) = pos.func.dfg.value_def(arg) {
-            if let InstructionData::BinaryImm {
+            if let InstructionData::BinaryImm64 {
                 opcode: Opcode::IshlImm,
                 arg: prev_arg,
                 imm: prev_imm,
@@ -784,7 +784,7 @@ mod simplify {
                         pos.func
                             .dfg
                             .replace(inst)
-                            .BinaryImm(new_opcode, ty, imm, args[0]);
+                            .BinaryImm64(new_opcode, ty, imm, args[0]);
 
                         // Repeat for BinaryImm simplification.
                         simplify(pos, inst, native_word_width);
@@ -804,7 +804,7 @@ mod simplify {
                         pos.func
                             .dfg
                             .replace(inst)
-                            .BinaryImm(new_opcode, ty, imm, args[1]);
+                            .BinaryImm64(new_opcode, ty, imm, args[1]);
                     }
                 }
             }
@@ -818,7 +818,7 @@ mod simplify {
                 }
             }
 
-            InstructionData::BinaryImm { opcode, arg, imm } => {
+            InstructionData::BinaryImm64 { opcode, arg, imm } => {
                 let ty = pos.func.dfg.ctrl_typevar(inst);
 
                 let mut arg = arg;
@@ -831,7 +831,7 @@ mod simplify {
                     | Opcode::BxorImm => {
                         // Fold binary_op(C2, binary_op(C1, x)) into binary_op(binary_op(C1, C2), x)
                         if let ValueDef::Result(arg_inst, _) = pos.func.dfg.value_def(arg) {
-                            if let InstructionData::BinaryImm {
+                            if let InstructionData::BinaryImm64 {
                                 opcode: prev_opcode,
                                 arg: prev_arg,
                                 imm: prev_imm,
@@ -855,7 +855,7 @@ mod simplify {
                                     pos.func
                                         .dfg
                                         .replace(inst)
-                                        .BinaryImm(opcode, ty, new_imm, new_arg);
+                                        .BinaryImm64(opcode, ty, new_imm, new_arg);
                                     imm = new_imm;
                                     arg = new_arg;
                                 }
@@ -935,6 +935,69 @@ mod simplify {
                 }
             }
 
+            InstructionData::Ternary {
+                opcode: Opcode::Bitselect,
+                args,
+            } => {
+                let old_cond_type = pos.func.dfg.value_type(args[0]);
+                if !old_cond_type.is_vector() {
+                    return;
+                }
+
+                // Replace bitselect with vselect if each lane of controlling mask is either
+                // all ones or all zeroes; on x86 bitselect is encoded using 3 instructions,
+                // while vselect can be encoded using single BLEND instruction.
+                if let ValueDef::Result(def_inst, _) = pos.func.dfg.value_def(args[0]) {
+                    let (cond_val, cond_type) = match pos.func.dfg[def_inst] {
+                        InstructionData::Unary {
+                            opcode: Opcode::RawBitcast,
+                            arg,
+                        } => {
+                            // If controlling mask is raw-bitcasted boolean vector then
+                            // we know each lane is either all zeroes or ones,
+                            // so we can use vselect instruction instead.
+                            let arg_type = pos.func.dfg.value_type(arg);
+                            if !arg_type.is_vector() || !arg_type.lane_type().is_bool() {
+                                return;
+                            }
+                            (arg, arg_type)
+                        }
+                        InstructionData::UnaryConst {
+                            opcode: Opcode::Vconst,
+                            constant_handle,
+                        } => {
+                            // If each byte of controlling mask is 0x00 or 0xFF then
+                            // we will always bitcast our way to vselect(B8x16, I8x16, I8x16).
+                            // Bitselect operates at bit level, so the lane types don't matter.
+                            let const_data = pos.func.dfg.constants.get(constant_handle);
+                            if !const_data.iter().all(|&b| b == 0 || b == 0xFF) {
+                                return;
+                            }
+                            let new_type = B8.by(old_cond_type.bytes() as u16).unwrap();
+                            (pos.ins().raw_bitcast(new_type, args[0]), new_type)
+                        }
+                        _ => return,
+                    };
+
+                    let lane_type = Type::int(cond_type.lane_bits() as u16).unwrap();
+                    let arg_type = lane_type.by(cond_type.lane_count()).unwrap();
+                    let old_arg_type = pos.func.dfg.value_type(args[1]);
+
+                    if arg_type != old_arg_type {
+                        // Operands types must match, we need to add bitcasts.
+                        let arg1 = pos.ins().raw_bitcast(arg_type, args[1]);
+                        let arg2 = pos.ins().raw_bitcast(arg_type, args[2]);
+                        let ret = pos.ins().vselect(cond_val, arg1, arg2);
+                        pos.func.dfg.replace(inst).raw_bitcast(old_arg_type, ret);
+                    } else {
+                        pos.func
+                            .dfg
+                            .replace(inst)
+                            .vselect(cond_val, args[1], args[2]);
+                    }
+                }
+            }
+
             _ => {}
         }
     }
diff --git a/cranelift/codegen/src/verifier/mod.rs b/cranelift/codegen/src/verifier/mod.rs
index 7f6953c7e5..00fc31e212 100644
--- a/cranelift/codegen/src/verifier/mod.rs
+++ b/cranelift/codegen/src/verifier/mod.rs
@@ -756,10 +756,10 @@ impl<'a> Verifier<'a> {
             | UnaryIeee64 { .. }
             | UnaryBool { .. }
             | Binary { .. }
-            | BinaryImm { .. }
+            | BinaryImm8 { .. }
+            | BinaryImm64 { .. }
             | Ternary { .. }
-            | InsertLane { .. }
-            | ExtractLane { .. }
+            | TernaryImm8 { .. }
             | Shuffle { .. }
             | IntCompare { .. }
             | IntCompareImm { .. }
@@ -1912,20 +1912,20 @@ impl<'a> Verifier<'a> {
                     Ok(())
                 }
             }
-            ir::InstructionData::ExtractLane {
+            ir::InstructionData::BinaryImm8 {
                 opcode: ir::instructions::Opcode::Extractlane,
-                lane,
+                imm: lane,
                 arg,
                 ..
             }
-            | ir::InstructionData::InsertLane {
+            | ir::InstructionData::TernaryImm8 {
                 opcode: ir::instructions::Opcode::Insertlane,
-                lane,
+                imm: lane,
                 args: [arg, _],
                 ..
             } => {
                 // We must be specific about the opcodes above because other instructions are using
-                // the ExtractLane/InsertLane formats.
+                // the same formats.
                 let ty = self.func.dfg.value_type(arg);
                 if u16::from(lane) >= ty.lane_count() {
                     errors.fatal((
diff --git a/cranelift/codegen/src/write.rs b/cranelift/codegen/src/write.rs
index acf181af2b..ba4543d39f 100644
--- a/cranelift/codegen/src/write.rs
+++ b/cranelift/codegen/src/write.rs
@@ -508,7 +508,8 @@ pub fn write_operands(
             constant_handle, ..
         } => write!(w, " {}", constant_handle),
         Binary { args, .. } => write!(w, " {}, {}", args[0], args[1]),
-        BinaryImm { arg, imm, .. } => write!(w, " {}, {}", arg, imm),
+        BinaryImm8 { arg, imm, .. } => write!(w, " {}, {}", arg, imm),
+        BinaryImm64 { arg, imm, .. } => write!(w, " {}, {}", arg, imm),
         Ternary { args, .. } => write!(w, " {}, {}, {}", args[0], args[1], args[2]),
         MultiAry { ref args, .. } => {
             if args.is_empty() {
@@ -518,8 +519,7 @@ pub fn write_operands(
             }
         }
         NullAry { .. } => write!(w, " "),
-        InsertLane { lane, args, .. } => write!(w, " {}, {}, {}", args[0], lane, args[1]),
-        ExtractLane { lane, arg, .. } => write!(w, " {}, {}", arg, lane),
+        TernaryImm8 { imm, args, .. } => write!(w, " {}, {}, {}", args[0], args[1], imm),
         Shuffle { mask, args, .. } => {
             let data = dfg.immediates.get(mask).expect(
                 "Expected the shuffle mask to already be inserted into the immediates table",
diff --git a/cranelift/filetests/filetests/interpreter/fibonacci.clif b/cranelift/filetests/filetests/interpreter/fibonacci.clif
index 70515c4b20..bdce284f36 100644
--- a/cranelift/filetests/filetests/interpreter/fibonacci.clif
+++ b/cranelift/filetests/filetests/interpreter/fibonacci.clif
@@ -10,12 +10,12 @@ block0(v0: i32):
 
 block1(v4: i32, v5:i32):
     v6 = iconst.i32 1
-    v7 = irsub_imm v4, 2
+    v7 = iadd_imm v4, -2
     fallthrough block2(v7, v5, v6)
 
 block2(v10: i32, v11: i32, v12: i32): ; params: n, fib(n-1), fib(n-2)
     v13 = iadd v11, v12
-    v14 = irsub_imm v10, 1
+    v14 = iadd_imm v10, -1
     v15 = icmp_imm eq v14, 0
     brnz v15, block3(v13)
     jump block2(v14, v13, v11)
@@ -43,9 +43,9 @@ block0(v0: i32):
     fallthrough block1(v0)
 
 block1(v10: i32):
-    v11 = irsub_imm v10, 1
+    v11 = iadd_imm v10, -1
     v12 = call fn0(v11)
-    v13 = irsub_imm v10, 2
+    v13 = iadd_imm v10, -2
     v14 = call fn0(v13)
     v15 = iadd v12, v14
     return v15
diff --git a/cranelift/filetests/filetests/isa/x86/abi64.clif b/cranelift/filetests/filetests/isa/x86/abi64.clif
index 9494e78c67..d99761a4dc 100644
--- a/cranelift/filetests/filetests/isa/x86/abi64.clif
+++ b/cranelift/filetests/filetests/isa/x86/abi64.clif
@@ -14,6 +14,12 @@ function %f() {
     sig2 = (f32, i64) -> f64 system_v
     ; check: sig2 = (f32 [%xmm0], i64 [%rdi]) -> f64 [%xmm0] system_v
 
+    sig3 = () -> i128 system_v
+    ; check: sig3 = () -> i64 [%rax], i64 [%rdx] system_v
+
+    sig4 = (i128) -> i128 system_v
+    ; check: sig4 = (i64 [%rdi], i64 [%rsi]) -> i64 [%rax], i64 [%rdx] system_v
+
 block0:
     return
 }
diff --git a/cranelift/filetests/filetests/isa/x86/bnot-b1.clif b/cranelift/filetests/filetests/isa/x86/bnot-b1.clif
new file mode 100644
index 0000000000..ef3736c54c
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/x86/bnot-b1.clif
@@ -0,0 +1,14 @@
+test binemit
+test run
+
+target x86_64
+
+function u0:323() -> b1 {
+block0:
+    [-,%rax] v221 = bconst.b1 false   ; bin: 40 b8 00000000
+    [-,%rcx] v222 = bconst.b1 true    ; bin: 40 b9 00000001
+    [-,%rax] v223 = bnot v221         ; bin: 40 f7 d0
+    [-,%rax] v224 = band v223, v222   ; bin: 40 21 c8
+    return v224
+}
+; run
diff --git a/cranelift/filetests/filetests/isa/x86/simd-bitselect-to-vselect-run.clif b/cranelift/filetests/filetests/isa/x86/simd-bitselect-to-vselect-run.clif
new file mode 100644
index 0000000000..03cc645712
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/x86/simd-bitselect-to-vselect-run.clif
@@ -0,0 +1,39 @@
+test run
+set opt_level=speed_and_size
+set enable_simd
+target x86_64 haswell
+
+;; Test if bitselect->vselect optimization works properly
+
+function %mask_from_icmp(i32x4, i32x4) -> i32x4 {
+block0(v0: i32x4, v1: i32x4):
+    v2 = icmp sge v0, v1
+    v3 = raw_bitcast.i32x4 v2
+    v4 = bitselect v3, v0, v1
+    return v4
+}
+; run: %mask_from_icmp([5 6 7 8], [1 10 20 7]) == [5 10 20 8]
+
+function %mask_casted(i64x2, i64x2, i32x4) -> i64x2 {
+block0(v0: i64x2, v1: i64x2, v2: i32x4):
+    v3 = raw_bitcast.i64x2 v2
+    v4 = bitselect v3, v0, v1
+    return v4
+}
+; run: %mask_casted([0 0], [0xFFFFFF 0xFFFF4F], [0xFFF1 0 0xF 0]) == [0xFF000E 0xFFFF40]
+
+function %good_const_mask(i32x4, i32x4) -> i32x4 {
+block0(v0: i32x4, v1: i32x4):
+    v2 = vconst.i32x4 [0x0000FF00 0x00FF00FF 0x00FF00FF 0xFF00FFFF]
+    v4 = bitselect v2, v0, v1
+    return v4
+}
+; run: %good_const_mask([0x1234 0x5678 0x1234 0x5678], [0xAAAA 0xAAAA 0xAAAA 0xAAAA]) == [0x12AA 0xAA78 0xAA34 0x5678]
+
+function %bad_const_mask(i32x4, i32x4) -> i32x4 {
+block0(v0: i32x4, v1: i32x4):
+    v2 = vconst.i32x4 [0x0000FF00 0x00FF00FF 0x00FF000F 0xFF00FFF0]
+    v4 = bitselect v2, v0, v1
+    return v4
+}
+; run: %bad_const_mask([0x1234 0x5678 0x1234 0x5678], [0xAAAA 0xAAAA 0xAAAA 0xAAAA]) == [0x12AA 0xAA78 0xAAA4 0x567A]
diff --git a/cranelift/filetests/filetests/isa/x86/simd-bitwise-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-bitwise-legalize.clif
index 102719351b..7193aa2b54 100644
--- a/cranelift/filetests/filetests/isa/x86/simd-bitwise-legalize.clif
+++ b/cranelift/filetests/filetests/isa/x86/simd-bitwise-legalize.clif
@@ -91,10 +91,10 @@ block0:
     v2 = sshr v1, v0
     ; check:  v3 = x86_pextr v1, 0
     ; nextln: v4 = sshr v3, v0
-    ; nextln: v5 = x86_pinsr v1, 0, v4
+    ; nextln: v5 = x86_pinsr v1, v4, 0
     ; nextln: v6 = x86_pextr v1, 1
     ; nextln: v7 = sshr v6, v0
-    ; nextln: v2 = x86_pinsr v5, 1, v7
+    ; nextln: v2 = x86_pinsr v5, v7, 1
     return v2
 }
 
diff --git a/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit.clif
index a1ffac1822..e5eea1f637 100644
--- a/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit.clif
+++ b/cranelift/filetests/filetests/isa/x86/simd-lane-access-binemit.clif
@@ -10,7 +10,7 @@ block0:
 [-, %rax]   v0 = bconst.b8 true
 [-, %rbx]   v1 = bconst.b8 false
 [-, %xmm0]  v2 = splat.b8x16 v0
-[-, %xmm0]  v3 = x86_pinsr v2, 10, v1    ; bin: 66 0f 3a 20 c3 0a
+[-, %xmm0]  v3 = x86_pinsr v2, v1, 10    ; bin: 66 0f 3a 20 c3 0a
             return
 }
 
@@ -19,7 +19,7 @@ block0:
 [-, %rax]   v0 = iconst.i16 4
 [-, %rbx]   v1 = iconst.i16 5
 [-, %xmm1]  v2 = splat.i16x8 v0
-[-, %xmm1]  v3 = x86_pinsr v2, 4, v1    ; bin: 66 0f c4 cb 04
+[-, %xmm1]  v3 = x86_pinsr v2, v1, 4    ; bin: 66 0f c4 cb 04
             return
 }
 
@@ -28,7 +28,7 @@ block0:
 [-, %rax]   v0 = iconst.i32 42
 [-, %rbx]   v1 = iconst.i32 99
 [-, %xmm4]  v2 = splat.i32x4 v0
-[-, %xmm4]  v3 = x86_pinsr v2, 2, v1    ; bin: 66 0f 3a 22 e3 02
+[-, %xmm4]  v3 = x86_pinsr v2, v1, 2    ; bin: 66 0f 3a 22 e3 02
             return
 }
 
@@ -37,7 +37,7 @@ block0:
 [-, %rax]   v0 = bconst.b64 true
 [-, %rbx]   v1 = bconst.b64 false
 [-, %xmm2]  v2 = splat.b64x2 v0
-[-, %xmm2]  v3 = x86_pinsr v2, 1, v1    ; bin: 66 48 0f 3a 22 d3 01
+[-, %xmm2]  v3 = x86_pinsr v2, v1, 1    ; bin: 66 48 0f 3a 22 d3 01
             return
 }
 
diff --git a/cranelift/filetests/filetests/isa/x86/simd-lane-access-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-lane-access-legalize.clif
index 5480116404..0f22ed3669 100644
--- a/cranelift/filetests/filetests/isa/x86/simd-lane-access-legalize.clif
+++ b/cranelift/filetests/filetests/isa/x86/simd-lane-access-legalize.clif
@@ -55,7 +55,7 @@ block0:
 ; check:   block0:
 ; nextln:     v0 = iconst.i64 42
 ; nextln:     v2 = scalar_to_vector.i64x2 v0
-; nextln:     v1 = x86_pinsr v2, 1, v0
+; nextln:     v1 = x86_pinsr v2, v0, 1
 ; nextln:     return v1
 
 function %splat_b16() -> b16x8 {
@@ -67,7 +67,7 @@ block0:
 ; check:   block0:
 ; nextln:     v0 = bconst.b16 true
 ; nextln:     v2 = scalar_to_vector.b16x8 v0
-; nextln:     v3 = x86_pinsr v2, 1, v0
+; nextln:     v3 = x86_pinsr v2, v0, 1
 ; nextln:     v4 = raw_bitcast.i32x4 v3
 ; nextln:     v5 = x86_pshufd v4, 0
 ; nextln:     v1 = raw_bitcast.b16x8 v5
diff --git a/cranelift/filetests/filetests/isa/x86/simd-lane-access-run.clif b/cranelift/filetests/filetests/isa/x86/simd-lane-access-run.clif
index 115a0be7cb..00ebae26f6 100644
--- a/cranelift/filetests/filetests/isa/x86/simd-lane-access-run.clif
+++ b/cranelift/filetests/filetests/isa/x86/simd-lane-access-run.clif
@@ -62,7 +62,7 @@ block0:
     v1 = bconst.b8 true
     v2 = vconst.b8x16 [false false false false false false false false false false false false false
      false false false]
-    v3 = insertlane v2, 10, v1
+    v3 = insertlane v2, v1, 10
     v4 = extractlane v3, 10
     return v4
 }
@@ -72,7 +72,7 @@ function %insertlane_f32() -> b1 {
 block0:
     v0 = f32const 0x42.42
     v1 = vconst.f32x4 0x00
-    v2 = insertlane v1, 1, v0
+    v2 = insertlane v1, v0, 1
     v3 = extractlane v2, 1
     v4 = fcmp eq v3, v0
     return v4
@@ -83,7 +83,7 @@ function %insertlane_f64_lane1() -> b1 {
 block0:
     v0 = f64const 0x42.42
     v1 = vconst.f64x2 0x00
-    v2 = insertlane v1, 1, v0
+    v2 = insertlane v1, v0, 1
     v3 = extractlane v2, 1
     v4 = fcmp eq v3, v0
     return v4
@@ -94,7 +94,7 @@ function %insertlane_f64_lane0() -> b1 {
 block0:
     v0 = f64const 0x42.42
     v1 = vconst.f64x2 0x00
-    v2 = insertlane v1, 0, v0
+    v2 = insertlane v1, v0, 0
     v3 = extractlane v2, 0
     v4 = fcmp eq v3, v0
     return v4
@@ -135,7 +135,7 @@ block0:
     v1 = iconst.i32 99
 
     v2 = splat.i32x4 v0
-    v3 = insertlane v2, 2, v1
+    v3 = insertlane v2, v1, 2
 
     v4 = extractlane v3, 3
     v5 = icmp eq v4, v0
@@ -154,7 +154,7 @@ block0:
     v1 = f32const 0x99.99
 
     v2 = splat.f32x4 v0
-    v3 = insertlane v2, 2, v1
+    v3 = insertlane v2, v1, 2
 
     v4 = extractlane v3, 3
     v5 = fcmp eq v4, v0
diff --git a/cranelift/filetests/filetests/isa/x86/simd-vselect-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-vselect-binemit.clif
new file mode 100644
index 0000000000..a575c58f64
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/x86/simd-vselect-binemit.clif
@@ -0,0 +1,27 @@
+test binemit
+set enable_simd
+target x86_64 haswell
+
+function %vselect_i8x16(b8x16, i8x16, i8x16) {
+block0(v0: b8x16 [%xmm0], v1: i8x16 [%xmm3], v2: i8x16 [%xmm5]):
+[-, %xmm5]  v3 = vselect v0, v1, v2  ; bin: 66 0f 38 10 eb
+            return
+}
+
+function %vselect_i16x8(b16x8, i16x8, i16x8) {
+block0(v0: b16x8 [%xmm0], v1: i16x8 [%xmm3], v2: i16x8 [%xmm5]):
+[-, %xmm5]  v3 = vselect v0, v1, v2  ; bin: 66 0f 38 10 eb
+            return
+}
+
+function %vselect_i32x4(b32x4, i32x4, i32x4) {
+block0(v0: b32x4 [%xmm0], v1: i32x4 [%xmm3], v2: i32x4 [%xmm5]):
+[-, %xmm5]  v3 = vselect v0, v1, v2  ; bin: 66 0f 38 14 eb
+            return
+}
+
+function %vselect_i64x2(b64x2, i64x2, i64x2) {
+block0(v0: b64x2 [%xmm0], v1: i64x2 [%xmm3], v2: i64x2 [%xmm5]):
+[-, %xmm5]  v3 = vselect v0, v1, v2  ; bin: 66 0f 38 15 eb
+            return
+}
diff --git a/cranelift/filetests/filetests/isa/x86/simd-vselect-legalize-to-bitselect.clif b/cranelift/filetests/filetests/isa/x86/simd-vselect-legalize-to-bitselect.clif
new file mode 100644
index 0000000000..723539631d
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/x86/simd-vselect-legalize-to-bitselect.clif
@@ -0,0 +1,45 @@
+test legalizer
+set enable_simd
+target x86_64
+
+;; Test if vselect gets legalized if BLEND* instructions are not available
+
+function %vselect_i8x16(b8x16, i8x16, i8x16) -> i8x16 {
+block0(v0: b8x16, v1: i8x16, v2: i8x16):
+    v3 = vselect v0, v1, v2
+    ; check:  v4 = raw_bitcast.i8x16 v0
+    ; nextln: v5 = band v1, v4
+    ; nextln: v6 = band_not v2, v4
+    ; nextln: v3 = bor v5, v6
+    return v3
+}
+
+function %vselect_i16x8(b16x8, i16x8, i16x8) -> i16x8 {
+block0(v0: b16x8, v1: i16x8, v2: i16x8):
+    v3 = vselect v0, v1, v2
+    ; check:  v4 = raw_bitcast.i16x8 v0
+    ; nextln: v5 = band v1, v4
+    ; nextln: v6 = band_not v2, v4
+    ; nextln: v3 = bor v5, v6
+    return v3
+}
+
+function %vselect_i32x4(b32x4, i32x4, i32x4) -> i32x4 {
+block0(v0: b32x4, v1: i32x4, v2: i32x4):
+    v3 = vselect v0, v1, v2
+    ; check:  v4 = raw_bitcast.i32x4 v0
+    ; nextln: v5 = band v1, v4
+    ; nextln: v6 = band_not v2, v4
+    ; nextln: v3 = bor v5, v6
+    return v3
+}
+
+function %vselect_i64x2(b64x2, i64x2, i64x2) -> i64x2 {
+block0(v0: b64x2, v1: i64x2, v2: i64x2):
+    v3 = vselect v0, v1, v2
+    ; check:  v4 = raw_bitcast.i64x2 v0
+    ; nextln: v5 = band v1, v4
+    ; nextln: v6 = band_not v2, v4
+    ; nextln: v3 = bor v5, v6
+    return v3
+}
diff --git a/cranelift/filetests/filetests/isa/x86/simd-vselect-run.clif b/cranelift/filetests/filetests/isa/x86/simd-vselect-run.clif
new file mode 100644
index 0000000000..ac6feaa994
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/x86/simd-vselect-run.clif
@@ -0,0 +1,43 @@
+test run
+set enable_simd
+target x86_64 haswell
+
+function %vselect_i8x16() -> i8x16 {
+block0:
+    v1 = vconst.b8x16 [false true false true false true true true true true false false false false false false]
+    v2 = vconst.i8x16 [100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115]
+    v3 = vconst.i8x16 [200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215]
+    v4 = vselect v1, v2, v3
+    return v4
+}
+; run: %vselect_i8x16() == [200 101 202 103 204 105 106 107 108 109 210 211 212 213 214 215]
+
+function %vselect_i16x8() -> i16x8 {
+block0:
+    v1 = vconst.b16x8 [false true false true false true true true]
+    v2 = vconst.i16x8 [100 101 102 103 104 105 106 107]
+    v3 = vconst.i16x8 [200 201 202 203 204 205 206 207]
+    v4 = vselect v1, v2, v3
+    return v4
+}
+; run: %vselect_i16x8() == [200 101 202 103 204 105 106 107]
+
+function %vselect_i32x4() -> i32x4 {
+block0:
+    v1 = vconst.b32x4 [false true false true]
+    v2 = vconst.i32x4 [100 101 102 103]
+    v3 = vconst.i32x4 [200 201 202 203]
+    v4 = vselect v1, v2, v3
+    return v4
+}
+; run: %vselect_i32x4() == [200 101 202 103]
+
+function %vselect_i64x2() -> i64x2 {
+block0:
+    v1 = vconst.b64x2 [false true]
+    v2 = vconst.i64x2 [100 101]
+    v3 = vconst.i64x2 [200 201]
+    v4 = vselect v1, v2, v3
+    return v4
+}
+; run: %vselect_i64x2() == [200 101]
diff --git a/cranelift/filetests/filetests/isa/x86/stack-addr32.clif b/cranelift/filetests/filetests/isa/x86/stack-addr32.clif
new file mode 100644
index 0000000000..4b8a153795
--- /dev/null
+++ b/cranelift/filetests/filetests/isa/x86/stack-addr32.clif
@@ -0,0 +1,33 @@
+; binary emission of stack address instructions on i686.
+test binemit
+set opt_level=none
+target i686 haswell
+
+; The binary encodings can be verified with the command:
+;
+;   sed -ne 's/^ *; asm: *//p' filetests/isa/x86/stack-addr32.clif | llvm-mc -show-encoding -triple=i686
+;
+
+function %stack_addr() {
+           ss0 = incoming_arg 8, offset 0
+           ss1 = incoming_arg 1024, offset -1024
+           ss2 = incoming_arg 1024, offset -2048
+           ss3 = incoming_arg 8, offset -2056
+           ss4 = explicit_slot 8, offset 0
+           ss5 = explicit_slot 8, offset 1024
+
+block0:
+[-,%rcx]   v0 = stack_addr.i32 ss0                      ; bin: 8d 8c 24 00000808
+[-,%rcx]   v1 = stack_addr.i32 ss1                      ; bin: 8d 8c 24 00000408
+[-,%rcx]   v2 = stack_addr.i32 ss2                      ; bin: 8d 8c 24 00000008
+[-,%rcx]   v3 = stack_addr.i32 ss3                      ; bin: 8d 8c 24 00000000
+[-,%rcx]   v4 = stack_addr.i32 ss4                      ; bin: 8d 8c 24 00000808
+[-,%rcx]   v5 = stack_addr.i32 ss5                      ; bin: 8d 8c 24 00000c08
+
+[-,%rcx]   v20 = stack_addr.i32 ss4+1                   ; bin: 8d 8c 24 00000809
+[-,%rcx]   v21 = stack_addr.i32 ss4+2                   ; bin: 8d 8c 24 0000080a
+[-,%rcx]   v22 = stack_addr.i32 ss4+2048                ; bin: 8d 8c 24 00001008
+[-,%rcx]   v23 = stack_addr.i32 ss4-4096                ; bin: 8d 8c 24 fffff808
+
+           return
+}
diff --git a/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64.clif b/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64.clif
index 8e8d356479..cb1856ca3d 100644
--- a/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64.clif
+++ b/cranelift/filetests/filetests/isa/x86/windows_fastcall_x64.clif
@@ -110,6 +110,13 @@ block0(v0: f32, v1: f64, v2: i64, v3: i64):
 ; nextln: return v1, v5
 ; nextln: }
 
+function %ret_val_i128(i64, i64) -> i128 windows_fastcall {
+block0(v0: i64, v1: i64):
+    v2 = iconcat v0, v1
+    return v2
+}
+; check: function %ret_val_i128(i64 [%rdx], i64 [%r8], i64 sret [%rcx], i64 fp [%rbp]) -> i64 sret [%rax], i64 fp [%rbp] windows_fastcall {
+
 function %internal_stack_arg_function_call(i64) -> i64 windows_fastcall {
   fn0 = %foo(i64, i64, i64, i64) -> i64 windows_fastcall
   fn1 = %foo2(i64, i64, i64, i64) -> i64 windows_fastcall
diff --git a/cranelift/filetests/filetests/legalizer/popcnt-i128.clif b/cranelift/filetests/filetests/legalizer/popcnt-i128.clif
index f4919f4781..6d07f32631 100644
--- a/cranelift/filetests/filetests/legalizer/popcnt-i128.clif
+++ b/cranelift/filetests/filetests/legalizer/popcnt-i128.clif
@@ -1,5 +1,5 @@
 test legalizer
-target i686
+target x86_64 haswell
 
 function %foo() -> i128 {
 block0:
@@ -10,22 +10,12 @@ block0:
     return v4
 }
 
-; check: v5 = iconst.i32 66
-; check: v6 = iconst.i32 100
-; check: v1 = iconcat v5, v6
-; check: v7 = iconst.i32 0x1010_0042
-; check: v8 = iconst.i32 127
-; check: v2 = iconcat v7, v8
+; check: v1 = iconst.i64 0x0064_0000_0042
+; check: v2 = iconst.i64 0x007f_1010_0042
 ; check: v3 = iconcat v1, v2
-; check: v9 = popcnt v1
-; check: v10 = popcnt v2
-; check: v12, v13 = isplit v9
-; check: v14, v15 = isplit v10
-; check: v16, v17 = iadd_ifcout v12, v14
-; check: v18 = iadd_ifcin v13, v15, v17
-; check: v11 = iconcat v16, v18
-; check: v20 = iconst.i32 0
-; check: v21 = iconst.i32 0
-; check: v19 = iconcat v20, v21
-; check: v4 = iconcat v11, v19
-; check: return v16, v18, v20, v21
+; check: v5 = popcnt v1
+; check: v6 = popcnt v2
+; check: v7 = iadd v5, v6
+; check: v8 = iconst.i64 0
+; check: v4 = iconcat v7, v8
+; check: return v7, v8
diff --git a/cranelift/filetests/filetests/parser/tiny.clif b/cranelift/filetests/filetests/parser/tiny.clif
index 3f825e6eac..42fa5a8157 100644
--- a/cranelift/filetests/filetests/parser/tiny.clif
+++ b/cranelift/filetests/filetests/parser/tiny.clif
@@ -67,13 +67,13 @@ function %lanes() {
 block0:
     v0 = iconst.i32x4 2
     v1 = extractlane v0, 3
-    v2 = insertlane v0, 1, v1
+    v2 = insertlane v0, v1, 1
 }
 ; sameln: function %lanes() fast {
 ; nextln: block0:
 ; nextln:     v0 = iconst.i32x4 2
 ; nextln:     v1 = extractlane v0, 3
-; nextln:     v2 = insertlane v0, 1, v1
+; nextln:     v2 = insertlane v0, v1, 1
 ; nextln: }
 
 ; Integer condition codes.
diff --git a/cranelift/filetests/filetests/simple_preopt/bitselect.clif b/cranelift/filetests/filetests/simple_preopt/bitselect.clif
new file mode 100644
index 0000000000..684d91ee31
--- /dev/null
+++ b/cranelift/filetests/filetests/simple_preopt/bitselect.clif
@@ -0,0 +1,50 @@
+test simple_preopt
+target x86_64
+
+;; Test replacement of bitselect with vselect for special masks
+
+function %mask_from_icmp(i8x16, i8x16) -> i8x16 {
+block0(v0: i8x16, v1: i8x16):
+    v2 = icmp eq v0, v1
+    v3 = raw_bitcast.i8x16 v2
+    v4 = bitselect v3, v0, v1
+    ; check: v4 = vselect v2, v0, v1
+    return v4
+}
+
+function %mask_casted(i8x16, i8x16, i32x4) -> i8x16 {
+block0(v0: i8x16, v1: i8x16, v2: i32x4):
+    v3 = raw_bitcast.i8x16 v2
+    v4 = bitselect v3, v0, v1
+    ; check: v4 = bitselect v3, v0, v1
+    return v4
+}
+
+function %good_const_mask_i8x16(i8x16, i8x16) -> i8x16 {
+block0(v0: i8x16, v1: i8x16):
+    v3 = vconst.i8x16 [0 0 0xFF 0 0 0xFF 0 0 0 0 0xFF 0 0 0 0 0xFF]
+    v4 = bitselect v3, v0, v1
+    ; check:  v5 = raw_bitcast.b8x16 v3
+    ; nextln: v4 = vselect v5, v0, v1
+    return v4
+}
+
+function %good_const_mask_i16x8(i16x8, i16x8) -> i16x8 {
+block0(v0: i16x8, v1: i16x8):
+    v3 = vconst.i16x8 [0x0000 0xFF00 0x0000 0x00FF 0x0000 0xFFFF 0x00FF 0xFFFF]
+    v4 = bitselect v3, v0, v1
+    ; check:  v5 = raw_bitcast.b8x16 v3
+    ; nextln: v6 = raw_bitcast.i8x16 v0
+    ; nextln: v7 = raw_bitcast.i8x16 v1
+    ; nextln: v8 = vselect v5, v6, v7
+    ; nextln: v4 = raw_bitcast.i16x8 v8
+    return v4
+}
+
+function %bad_const_mask(i8x16, i8x16) -> i8x16 {
+block0(v0: i8x16, v1: i8x16):
+    v3 = vconst.i8x16 [0 0 0xF0 0 0 0xFF 0 0 0 0 0xFF 0 0 0 0 0xFF]
+    v4 = bitselect v3, v0, v1
+    ; check: v4 = bitselect v3, v0, v1
+    return v4
+}
diff --git a/cranelift/filetests/filetests/verifier/simd-lane-index.clif b/cranelift/filetests/filetests/verifier/simd-lane-index.clif
index 2f7ca8d095..b8051a6b5a 100644
--- a/cranelift/filetests/filetests/verifier/simd-lane-index.clif
+++ b/cranelift/filetests/filetests/verifier/simd-lane-index.clif
@@ -6,7 +6,7 @@ function %insertlane_i32x4() {
 block0:
     v0 = vconst.i32x4 [0 0 0 0]
     v1 = iconst.i32 42
-    v2 = insertlane v0, 4, v1 ; error: The lane 4 does not index into the type i32x4
+    v2 = insertlane v0, v1, 4 ; error: The lane 4 does not index into the type i32x4
     return
 }
 
@@ -14,7 +14,7 @@ function %insertlane_b16x8() {
 block0:
     v0 = vconst.b16x8 [false false false false false false false false]
     v1 = bconst.b16 true
-    v2 = insertlane v0, 8, v1 ; error: The lane 8 does not index into the type b16x8
+    v2 = insertlane v0, v1, 8 ; error: The lane 8 does not index into the type b16x8
     return
 }
 
@@ -22,7 +22,7 @@ function %insertlane_f64x2() {
 block0:
     v0 = vconst.f64x2 0x00
     v1 = f64const 0x0.1
-    v2 = insertlane v0, 2, v1 ; error: The lane 2 does not index into the type f64x2
+    v2 = insertlane v0, v1, 2 ; error: The lane 2 does not index into the type f64x2
     return
 }
 
diff --git a/cranelift/filetests/filetests/wasm/multi-val-call-legalize-args.clif b/cranelift/filetests/filetests/wasm/multi-val-call-legalize-args.clif
new file mode 100644
index 0000000000..b57090d851
--- /dev/null
+++ b/cranelift/filetests/filetests/wasm/multi-val-call-legalize-args.clif
@@ -0,0 +1,24 @@
+test legalizer
+target x86_64 haswell
+
+;; Test if arguments are legalized if function uses sret
+
+function %call_indirect_with_split_arg(i64, i64, i64) {
+    ; check: ss0 = sret_slot 32
+    sig0 = (i128) -> i64, i64, i64, i64
+    ; check: sig0 = (i64 [%rsi], i64 [%rdx], i64 sret [%rdi]) -> i64 sret [%rax] fast
+block0(v0: i64, v1: i64, v2: i64):
+    v3 = iconcat v1, v2
+    v4, v5, v6, v7 = call_indirect sig0, v0(v3)
+    ; check: v8 = stack_addr.i64 ss0
+    ; check: v9 = call_indirect sig0, v0(v1, v2, v8)
+    ; check: v10 = load.i64 notrap aligned v9
+    ; check: v4 -> v10
+    ; check: v11 = load.i64 notrap aligned v9+8
+    ; check: v5 -> v11
+    ; check: v12 = load.i64 notrap aligned v9+16
+    ; check: v6 -> v12
+    ; check: v13 = load.i64 notrap aligned v9+24
+    ; check: v7 -> v13
+    return
+}
diff --git a/cranelift/interpreter/src/interpreter.rs b/cranelift/interpreter/src/interpreter.rs
index 3dbbe1d69d..5485713d7e 100644
--- a/cranelift/interpreter/src/interpreter.rs
+++ b/cranelift/interpreter/src/interpreter.rs
@@ -12,7 +12,7 @@ use cranelift_codegen::ir::{
 };
 use cranelift_reader::{DataValue, DataValueCastFailure};
 use log::trace;
-use std::ops::{Add, Sub};
+use std::ops::{Add, Div, Mul, Sub};
 use thiserror::Error;
 
 /// The valid control flow states.
@@ -150,20 +150,26 @@ impl Interpreter {
                 let arg1 = frame.get(&args[0]);
                 let arg2 = frame.get(&args[1]);
                 let result = match opcode {
-                    Iadd => binary_op!(Add::add[arg1, arg2]; [I8, I16, I32, I64, F32, F64]; inst),
-                    Isub => binary_op!(Sub::sub[arg1, arg2]; [I8, I16, I32, I64, F32, F64]; inst),
+                    Iadd => binary_op!(Add::add[arg1, arg2]; [I8, I16, I32, I64]; inst),
+                    Isub => binary_op!(Sub::sub[arg1, arg2]; [I8, I16, I32, I64]; inst),
+                    Imul => binary_op!(Mul::mul[arg1, arg2]; [I8, I16, I32, I64]; inst),
+                    Fadd => binary_op!(Add::add[arg1, arg2]; [F32, F64]; inst),
+                    Fsub => binary_op!(Sub::sub[arg1, arg2]; [F32, F64]; inst),
+                    Fmul => binary_op!(Mul::mul[arg1, arg2]; [F32, F64]; inst),
+                    Fdiv => binary_op!(Div::div[arg1, arg2]; [F32, F64]; inst),
                     _ => unimplemented!("interpreter does not support opcode yet: {}", opcode),
                 }?;
                 frame.set(first_result(frame.function, inst), result);
                 Ok(Continue)
             }
 
-            BinaryImm { opcode, arg, imm } => {
+            BinaryImm64 { opcode, arg, imm } => {
                 let imm = DataValue::from_integer(*imm, type_of(*arg, frame.function))?;
                 let arg = frame.get(&arg);
                 let result = match opcode {
-                    IaddImm => binary_op!(Add::add[arg, imm]; [I8, I16, I32, I64, F32, F64]; inst),
-                    IrsubImm => binary_op!(Sub::sub[arg, imm]; [I8, I16, I32, I64, F32, F64]; inst),
+                    IaddImm => binary_op!(Add::add[arg, imm]; [I8, I16, I32, I64]; inst),
+                    IrsubImm => binary_op!(Sub::sub[imm, arg]; [I8, I16, I32, I64]; inst),
+                    ImulImm => binary_op!(Mul::mul[arg, imm]; [I8, I16, I32, I64]; inst),
                     _ => unimplemented!("interpreter does not support opcode yet: {}", opcode),
                 }?;
                 frame.set(first_result(frame.function, inst), result);
@@ -349,9 +355,9 @@ mod tests {
     fn sanity() {
         let code = "function %test() -> b1 {
         block0:
-            v0 = iconst.i32 40
-            v1 = iadd_imm v0, 3
-            v2 = irsub_imm v1, 1
+            v0 = iconst.i32 1
+            v1 = iadd_imm v0, 1
+            v2 = irsub_imm v1, 44  ; 44 - 2 == 42 (see irsub_imm's semantics)
             v3 = icmp_imm eq v2, 42
             return v3
         }";
diff --git a/cranelift/peepmatic/crates/macro/Cargo.toml b/cranelift/peepmatic/crates/macro/Cargo.toml
index e13cffd988..560953e0df 100644
--- a/cranelift/peepmatic/crates/macro/Cargo.toml
+++ b/cranelift/peepmatic/crates/macro/Cargo.toml
@@ -13,3 +13,5 @@ syn = { version = "1.0.16", features = ['extra-traits'] }
 
 [lib]
 proc_macro = true
+test = false
+doctest = false
diff --git a/cranelift/reader/src/parser.rs b/cranelift/reader/src/parser.rs
index 08942962ed..55e44f41f8 100644
--- a/cranelift/reader/src/parser.rs
+++ b/cranelift/reader/src/parser.rs
@@ -2752,11 +2752,17 @@ impl<'a> Parser<'a> {
                     args: [lhs, rhs],
                 }
             }
-            InstructionFormat::BinaryImm => {
+            InstructionFormat::BinaryImm8 => {
+                let arg = self.match_value("expected SSA value first operand")?;
+                self.match_token(Token::Comma, "expected ',' between operands")?;
+                let imm = self.match_uimm8("expected unsigned 8-bit immediate")?;
+                InstructionData::BinaryImm8 { opcode, arg, imm }
+            }
+            InstructionFormat::BinaryImm64 => {
                 let lhs = self.match_value("expected SSA value first operand")?;
                 self.match_token(Token::Comma, "expected ',' between operands")?;
                 let rhs = self.match_imm64("expected immediate integer second operand")?;
-                InstructionData::BinaryImm {
+                InstructionData::BinaryImm64 {
                     opcode,
                     arg: lhs,
                     imm: rhs,
@@ -2887,24 +2893,18 @@ impl<'a> Parser<'a> {
                 ctx.check_jt(table, self.loc)?;
                 InstructionData::IndirectJump { opcode, arg, table }
             }
-            InstructionFormat::InsertLane => {
+            InstructionFormat::TernaryImm8 => {
                 let lhs = self.match_value("expected SSA value first operand")?;
                 self.match_token(Token::Comma, "expected ',' between operands")?;
-                let lane = self.match_uimm8("expected lane number")?;
-                self.match_token(Token::Comma, "expected ',' between operands")?;
                 let rhs = self.match_value("expected SSA value last operand")?;
-                InstructionData::InsertLane {
+                self.match_token(Token::Comma, "expected ',' between operands")?;
+                let imm = self.match_uimm8("expected 8-bit immediate")?;
+                InstructionData::TernaryImm8 {
                     opcode,
-                    lane,
+                    imm,
                     args: [lhs, rhs],
                 }
             }
-            InstructionFormat::ExtractLane => {
-                let arg = self.match_value("expected SSA value last operand")?;
-                self.match_token(Token::Comma, "expected ',' between operands")?;
-                let lane = self.match_uimm8("expected lane number")?;
-                InstructionData::ExtractLane { opcode, lane, arg }
-            }
             InstructionFormat::Shuffle => {
                 let a = self.match_value("expected SSA value first operand")?;
                 self.match_token(Token::Comma, "expected ',' between operands")?;
diff --git a/cranelift/serde/src/serde_clif_json.rs b/cranelift/serde/src/serde_clif_json.rs
index 2d950cf3a8..80ee84633a 100644
--- a/cranelift/serde/src/serde_clif_json.rs
+++ b/cranelift/serde/src/serde_clif_json.rs
@@ -32,7 +32,12 @@ pub enum SerInstData {
         opcode: String,
         args: [String; 2],
     },
-    BinaryImm {
+    BinaryImm8 {
+        opcode: String,
+        arg: String,
+        imm: String,
+    },
+    BinaryImm64 {
         opcode: String,
         arg: String,
         imm: String,
@@ -41,6 +46,11 @@ pub enum SerInstData {
         opcode: String,
         args: [String; 3],
     },
+    TernaryImm8 {
+        opcode: String,
+        args: [String; 2],
+        imm: String,
+    },
     MultiAry {
         opcode: String,
         args: Vec<String>,
@@ -48,16 +58,6 @@ pub enum SerInstData {
     NullAry {
         opcode: String,
     },
-    InsertLane {
-        opcode: String,
-        args: [String; 2],
-        lane: String,
-    },
-    ExtractLane {
-        opcode: String,
-        arg: String,
-        lane: String,
-    },
     Shuffle {
         opcode: String,
         args: [String; 2],
@@ -292,7 +292,12 @@ pub fn get_inst_data(inst_index: Inst, func: &Function) -> SerInstData {
                 args: hold_args,
             }
         }
-        InstructionData::BinaryImm { opcode, arg, imm } => SerInstData::BinaryImm {
+        InstructionData::BinaryImm8 { opcode, arg, imm } => SerInstData::BinaryImm8 {
+            opcode: opcode.to_string(),
+            arg: arg.to_string(),
+            imm: imm.to_string(),
+        },
+        InstructionData::BinaryImm64 { opcode, arg, imm } => SerInstData::BinaryImm64 {
             opcode: opcode.to_string(),
             arg: arg.to_string(),
             imm: imm.to_string(),
@@ -323,19 +328,14 @@ pub fn get_inst_data(inst_index: Inst, func: &Function) -> SerInstData {
         InstructionData::NullAry { opcode } => SerInstData::NullAry {
             opcode: opcode.to_string(),
         },
-        InstructionData::InsertLane { opcode, args, lane } => {
+        InstructionData::TernaryImm8 { opcode, args, imm } => {
             let hold_args = [args[0].to_string(), args[1].to_string()];
-            SerInstData::InsertLane {
+            SerInstData::TernaryImm8 {
                 opcode: opcode.to_string(),
                 args: hold_args,
-                lane: lane.to_string(),
+                imm: imm.to_string(),
             }
         }
-        InstructionData::ExtractLane { opcode, arg, lane } => SerInstData::ExtractLane {
-            opcode: opcode.to_string(),
-            arg: arg.to_string(),
-            lane: lane.to_string(),
-        },
         InstructionData::UnaryConst {
             opcode,
             constant_handle,
diff --git a/cranelift/wasm/src/code_translator.rs b/cranelift/wasm/src/code_translator.rs
index 6056d65f06..a8db0433bc 100644
--- a/cranelift/wasm/src/code_translator.rs
+++ b/cranelift/wasm/src/code_translator.rs
@@ -125,7 +125,11 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
                 GlobalVariable::Memory { gv, offset, ty } => {
                     let addr = builder.ins().global_value(environ.pointer_type(), gv);
                     let flags = ir::MemFlags::trusted();
-                    let val = state.pop1();
+                    let mut val = state.pop1();
+                    // Ensure SIMD values are cast to their default Cranelift type, I8x16.
+                    if ty.is_vector() {
+                        val = optionally_bitcast_vector(val, I8X16, builder);
+                    }
                     debug_assert_eq!(ty, builder.func.dfg.value_type(val));
                     builder.ins().store(flags, val, addr, offset);
                 }
@@ -1302,7 +1306,7 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
             let ty = type_of(op);
             let reduced = builder.ins().ireduce(ty.lane_type(), replacement);
             let vector = optionally_bitcast_vector(vector, ty, builder);
-            state.push1(builder.ins().insertlane(vector, *lane, reduced))
+            state.push1(builder.ins().insertlane(vector, reduced, *lane))
         }
         Operator::I32x4ReplaceLane { lane }
         | Operator::I64x2ReplaceLane { lane }
@@ -1310,7 +1314,7 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
         | Operator::F64x2ReplaceLane { lane } => {
             let (vector, replacement) = state.pop2();
             let vector = optionally_bitcast_vector(vector, type_of(op), builder);
-            state.push1(builder.ins().insertlane(vector, *lane, replacement))
+            state.push1(builder.ins().insertlane(vector, replacement, *lane))
         }
         Operator::V8x16Shuffle { lanes, .. } => {
             let (a, b) = pop2_with_bitcast(state, I8X16, builder);
diff --git a/crates/c-api/src/error.rs b/crates/c-api/src/error.rs
index db0e25eae1..cbb84873df 100644
--- a/crates/c-api/src/error.rs
+++ b/crates/c-api/src/error.rs
@@ -10,8 +10,8 @@ pub struct wasmtime_error_t {
 wasmtime_c_api_macros::declare_own!(wasmtime_error_t);
 
 impl wasmtime_error_t {
-    pub(crate) fn to_trap(&self) -> Box<wasm_trap_t> {
-        Box::new(wasm_trap_t::new(Trap::new(format!("{:?}", self.error))))
+    pub(crate) fn to_trap(self) -> Box<wasm_trap_t> {
+        Box::new(wasm_trap_t::new(Trap::from(self.error)))
     }
 }
 
diff --git a/crates/c-api/src/trap.rs b/crates/c-api/src/trap.rs
index 8a93cba14f..5f0cc2df98 100644
--- a/crates/c-api/src/trap.rs
+++ b/crates/c-api/src/trap.rs
@@ -53,7 +53,7 @@ pub extern "C" fn wasm_trap_new(
 #[no_mangle]
 pub extern "C" fn wasm_trap_message(trap: &wasm_trap_t, out: &mut wasm_message_t) {
     let mut buffer = Vec::new();
-    buffer.extend_from_slice(trap.trap.borrow().message().as_bytes());
+    buffer.extend_from_slice(trap.trap.borrow().to_string().as_bytes());
     buffer.reserve_exact(1);
     buffer.push(0);
     out.set_buffer(buffer);
diff --git a/crates/wasmtime/Cargo.toml b/crates/wasmtime/Cargo.toml
index 84291e1983..4334f72a64 100644
--- a/crates/wasmtime/Cargo.toml
+++ b/crates/wasmtime/Cargo.toml
@@ -23,6 +23,7 @@ cfg-if = "0.1.9"
 backtrace = "0.3.42"
 rustc-demangle = "0.1.16"
 lazy_static = "1.4"
+log = "0.4.8"
 wat = { version = "1.0.18", optional = true }
 
 [target.'cfg(target_os = "windows")'.dependencies]
diff --git a/crates/wasmtime/src/linker.rs b/crates/wasmtime/src/linker.rs
index ed9aacdd56..3a0682c18c 100644
--- a/crates/wasmtime/src/linker.rs
+++ b/crates/wasmtime/src/linker.rs
@@ -2,7 +2,8 @@ use crate::{
     Extern, ExternType, Func, FuncType, GlobalType, ImportType, Instance, IntoFunc, Module, Store,
     Trap,
 };
-use anyhow::{anyhow, bail, Context, Result};
+use anyhow::{anyhow, bail, Context, Error, Result};
+use log::warn;
 use std::collections::hash_map::{Entry, HashMap};
 use std::rc::Rc;
 
@@ -403,12 +404,7 @@ impl Linker {
                 let export_name = export.name().to_owned();
                 let func = Func::new(&self.store, func_ty.clone(), move |_, params, results| {
                     // Create a new instance for this command execution.
-                    let instance = Instance::new(&module, &imports).map_err(|error| match error
-                        .downcast::<Trap>()
-                    {
-                        Ok(trap) => trap,
-                        Err(error) => Trap::new(format!("{:?}", error)),
-                    })?;
+                    let instance = Instance::new(&module, &imports)?;
 
                     // `unwrap()` everything here because we know the instance contains a
                     // function export with the given name and signature because we're
@@ -436,14 +432,22 @@ impl Linker {
             } else if export.name() == "__indirect_function_table" && export.ty().table().is_some()
             {
                 // Allow an exported "__indirect_function_table" table for now.
+            } else if export.name() == "table" && export.ty().table().is_some() {
+                // Allow an exported "table" table for now.
             } else if export.name() == "__data_end" && export.ty().global().is_some() {
                 // Allow an exported "__data_end" memory for compatibility with toolchains
                 // which use --export-dynamic, which unfortunately doesn't work the way
                 // we want it to.
+                warn!("command module exporting '__data_end' is deprecated");
             } else if export.name() == "__heap_base" && export.ty().global().is_some() {
                 // Allow an exported "__data_end" memory for compatibility with toolchains
                 // which use --export-dynamic, which unfortunately doesn't work the way
                 // we want it to.
+                warn!("command module exporting '__heap_base' is deprecated");
+            } else if export.name() == "__rtti_base" && export.ty().global().is_some() {
+                // Allow an exported "__rtti_base" memory for compatibility with
+                // AssemblyScript.
+                warn!("command module exporting '__rtti_base' is deprecated; pass `--runtime half` to the AssemblyScript compiler");
             } else {
                 bail!("command export '{}' is not a function", export.name());
             }
@@ -529,7 +533,7 @@ impl Linker {
     ///
     /// Each import of `module` will be looked up in this [`Linker`] and must
     /// have previously been defined. If it was previously defined with an
-    /// incorrect signature or if it was not prevoiusly defined then an error
+    /// incorrect signature or if it was not previously defined then an error
     /// will be returned because the import can not be satisfied.
     ///
     /// Per the WebAssembly spec, instantiation includes running the module's
@@ -568,45 +572,41 @@ impl Linker {
     }
 
     fn compute_imports(&self, module: &Module) -> Result<Vec<Extern>> {
-        let mut imports = Vec::new();
+        module
+            .imports()
+            .map(|import| self.get(&import).ok_or_else(|| self.link_error(&import)))
+            .collect()
+    }
 
-        for import in module.imports() {
-            if let Some(item) = self.get(&import) {
-                imports.push(item);
+    fn link_error(&self, import: &ImportType) -> Error {
+        let mut options = Vec::new();
+        for i in self.map.keys() {
+            if &*self.strings[i.module] != import.module()
+                || &*self.strings[i.name] != import.name()
+            {
                 continue;
             }
-
-            let mut options = String::new();
-            for i in self.map.keys() {
-                if &*self.strings[i.module] != import.module()
-                    || &*self.strings[i.name] != import.name()
-                {
-                    continue;
-                }
-                options.push_str("  * ");
-                options.push_str(&format!("{:?}", i.kind));
-                options.push_str("\n");
-            }
-            if options.len() == 0 {
-                bail!(
-                    "unknown import: `{}::{}` has not been defined",
-                    import.module(),
-                    import.name()
-                )
-            }
-
-            bail!(
-                "incompatible import type for `{}::{}` specified\n\
-                 desired signature was: {:?}\n\
-                 signatures available:\n\n{}",
+            options.push(format!("  * {:?}\n", i.kind));
+        }
+        if options.is_empty() {
+            return anyhow!(
+                "unknown import: `{}::{}` has not been defined",
                 import.module(),
-                import.name(),
-                import.ty(),
-                options,
-            )
+                import.name()
+            );
         }
 
-        Ok(imports)
+        options.sort();
+
+        anyhow!(
+            "incompatible import type for `{}::{}` specified\n\
+                 desired signature was: {:?}\n\
+                 signatures available:\n\n{}",
+            import.module(),
+            import.name(),
+            import.ty(),
+            options.concat(),
+        )
     }
 
     /// Returns the [`Store`] that this linker is connected to.
@@ -614,7 +614,7 @@ impl Linker {
         &self.store
     }
 
-    /// Returns an iterator over all items defined in this `Linker`.
+    /// Returns an iterator over all items defined in this `Linker`, in arbitrary order.
     ///
     /// The iterator returned will yield 3-tuples where the first two elements
     /// are the module name and item name for the external item, and the third
@@ -716,15 +716,14 @@ impl Linker {
     }
 }
 
-/// Modules can be interpreted either as Commands (instance lifetime ends
-/// when the start function returns) or Reactor (instance persists).
+/// Modules can be interpreted either as Commands or Reactors.
 enum ModuleKind {
-    /// The instance is a Command, and this is its start function. The
-    /// instance should be consumed.
+    /// The instance is a Command, meaning an instance is created for each
+    /// exported function and lives for the duration of the function call.
     Command,
 
-    /// The instance is a Reactor, and this is its initialization function,
-    /// along with the instance itself, which should persist.
+    /// The instance is a Reactor, meaning one instance is created which
+    /// may live across multiple calls.
     Reactor,
 }
 
diff --git a/crates/wasmtime/src/runtime.rs b/crates/wasmtime/src/runtime.rs
index 2ba9f2fc4b..03a7c349d0 100644
--- a/crates/wasmtime/src/runtime.rs
+++ b/crates/wasmtime/src/runtime.rs
@@ -905,7 +905,7 @@ impl Store {
     /// });
     ///
     /// let trap = run().unwrap_err();
-    /// assert!(trap.message().contains("wasm trap: interrupt"));
+    /// assert!(trap.to_string().contains("wasm trap: interrupt"));
     /// # Ok(())
     /// # }
     /// ```
diff --git a/crates/wasmtime/src/trap.rs b/crates/wasmtime/src/trap.rs
index 4c160b4cfe..f8ec2acfae 100644
--- a/crates/wasmtime/src/trap.rs
+++ b/crates/wasmtime/src/trap.rs
@@ -20,6 +20,9 @@ enum TrapReason {
 
     /// An `i32` exit status describing an explicit program exit.
     I32Exit(i32),
+
+    /// A structured error describing a trap.
+    Error(Box<dyn std::error::Error + Send + Sync>),
 }
 
 impl fmt::Display for TrapReason {
@@ -27,6 +30,7 @@ impl fmt::Display for TrapReason {
         match self {
             TrapReason::Message(s) => write!(f, "{}", s),
             TrapReason::I32Exit(status) => write!(f, "Exited with i32 exit status {}", status),
+            TrapReason::Error(e) => write!(f, "{}", e),
         }
     }
 }
@@ -46,11 +50,12 @@ impl Trap {
     /// # Example
     /// ```
     /// let trap = wasmtime::Trap::new("unexpected error");
-    /// assert_eq!("unexpected error", trap.message());
+    /// assert!(trap.to_string().contains("unexpected error"));
     /// ```
     pub fn new<I: Into<String>>(message: I) -> Self {
         let info = FRAME_INFO.read().unwrap();
-        Trap::new_with_trace(&info, None, message.into(), Backtrace::new_unresolved())
+        let reason = TrapReason::Message(message.into());
+        Trap::new_with_trace(&info, None, reason, Backtrace::new_unresolved())
     }
 
     /// Creates a new `Trap` representing an explicit program exit with a classic `i32`
@@ -68,19 +73,7 @@ impl Trap {
     pub(crate) fn from_runtime(runtime_trap: wasmtime_runtime::Trap) -> Self {
         let info = FRAME_INFO.read().unwrap();
         match runtime_trap {
-            wasmtime_runtime::Trap::User(error) => {
-                // Since we're the only one using the wasmtime internals (in
-                // theory) we should only see user errors which were originally
-                // created from our own `Trap` type (see the trampoline module
-                // with functions).
-                //
-                // If this unwrap trips for someone we'll need to tweak the
-                // return type of this function to probably be `anyhow::Error`
-                // or something like that.
-                *error
-                    .downcast()
-                    .expect("only `Trap` user errors are supported")
-            }
+            wasmtime_runtime::Trap::User(error) => Trap::from(error),
             wasmtime_runtime::Trap::Jit {
                 pc,
                 backtrace,
@@ -100,7 +93,8 @@ impl Trap {
                 backtrace,
             } => Trap::new_wasm(&info, None, trap_code, backtrace),
             wasmtime_runtime::Trap::OOM { backtrace } => {
-                Trap::new_with_trace(&info, None, "out of memory".to_string(), backtrace)
+                let reason = TrapReason::Message("out of memory".to_string());
+                Trap::new_with_trace(&info, None, reason, backtrace)
             }
         }
     }
@@ -125,14 +119,14 @@ impl Trap {
             Interrupt => "interrupt",
             User(_) => unreachable!(),
         };
-        let msg = format!("wasm trap: {}", desc);
+        let msg = TrapReason::Message(format!("wasm trap: {}", desc));
         Trap::new_with_trace(info, trap_pc, msg, backtrace)
     }
 
     fn new_with_trace(
         info: &GlobalFrameInfo,
         trap_pc: Option<usize>,
-        message: String,
+        reason: TrapReason,
         native_trace: Backtrace,
     ) -> Self {
         let mut wasm_trace = Vec::new();
@@ -158,24 +152,13 @@ impl Trap {
         }
         Trap {
             inner: Arc::new(TrapInner {
-                reason: TrapReason::Message(message),
+                reason,
                 wasm_trace,
                 native_trace,
             }),
         }
     }
 
-    /// Returns a reference the `message` stored in `Trap`.
-    ///
-    /// In the case of an explicit exit, the exit status can be obtained by
-    /// calling `i32_exit_status`.
-    pub fn message(&self) -> &str {
-        match &self.inner.reason {
-            TrapReason::Message(message) => message,
-            TrapReason::I32Exit(_) => "explicitly exited",
-        }
-    }
-
     /// If the trap was the result of an explicit program exit with a classic
     /// `i32` exit status value, return the value, otherwise return `None`.
     pub fn i32_exit_status(&self) -> Option<i32> {
@@ -226,4 +209,30 @@ impl fmt::Display for Trap {
     }
 }
 
-impl std::error::Error for Trap {}
+impl std::error::Error for Trap {
+    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
+        match &self.inner.reason {
+            TrapReason::Error(e) => e.source(),
+            TrapReason::I32Exit(_) | TrapReason::Message(_) => None,
+        }
+    }
+}
+
+impl From<anyhow::Error> for Trap {
+    fn from(e: anyhow::Error) -> Trap {
+        Box::<dyn std::error::Error + Send + Sync>::from(e).into()
+    }
+}
+
+impl From<Box<dyn std::error::Error + Send + Sync>> for Trap {
+    fn from(e: Box<dyn std::error::Error + Send + Sync>) -> Trap {
+        // If the top-level error is already a trap, don't be redundant and just return it.
+        if let Some(trap) = e.downcast_ref::<Trap>() {
+            trap.clone()
+        } else {
+            let info = FRAME_INFO.read().unwrap();
+            let reason = TrapReason::Error(e.into());
+            Trap::new_with_trace(&info, None, reason, Backtrace::new_unresolved())
+        }
+    }
+}
diff --git a/crates/wast/src/wast.rs b/crates/wast/src/wast.rs
index 5c3c225311..5574525dae 100644
--- a/crates/wast/src/wast.rs
+++ b/crates/wast/src/wast.rs
@@ -123,7 +123,7 @@ impl WastContext {
     fn module(&mut self, instance_name: Option<&str>, module: &[u8]) -> Result<()> {
         let instance = match self.instantiate(module)? {
             Outcome::Ok(i) => i,
-            Outcome::Trap(e) => bail!("instantiation failed: {}", e.message()),
+            Outcome::Trap(e) => return Err(e).context("instantiation failed"),
         };
         if let Some(name) = instance_name {
             self.linker.instance(name, &instance)?;
@@ -189,7 +189,7 @@ impl WastContext {
             Outcome::Ok(values) => bail!("expected trap, got {:?}", values),
             Outcome::Trap(t) => t,
         };
-        let actual = trap.message();
+        let actual = trap.to_string();
         if actual.contains(expected)
             // `bulk-memory-operations/bulk.wast` checks for a message that
             // specifies which element is uninitialized, but our traps don't
diff --git a/crates/wiggle/generate/src/config.rs b/crates/wiggle/generate/src/config.rs
index 5b09f9dee6..6eecf28c96 100644
--- a/crates/wiggle/generate/src/config.rs
+++ b/crates/wiggle/generate/src/config.rs
@@ -1,12 +1,16 @@
-use std::collections::HashMap;
-use std::path::{Path, PathBuf};
-
-use proc_macro2::Span;
-use syn::{
-    braced, bracketed,
-    parse::{Parse, ParseStream},
-    punctuated::Punctuated,
-    Error, Ident, LitStr, Result, Token,
+use {
+    proc_macro2::Span,
+    std::{
+        collections::HashMap,
+        iter::FromIterator,
+        path::{Path, PathBuf},
+    },
+    syn::{
+        braced, bracketed,
+        parse::{Parse, ParseStream},
+        punctuated::Punctuated,
+        Error, Ident, LitStr, Result, Token,
+    },
 };
 
 #[derive(Debug, Clone)]
@@ -26,7 +30,8 @@ pub enum ConfigField {
 impl ConfigField {
     pub fn parse_pair(ident: &str, value: ParseStream, err_loc: Span) -> Result<Self> {
         match ident {
-            "witx" => Ok(ConfigField::Witx(value.parse()?)),
+            "witx" => Ok(ConfigField::Witx(WitxConf::Paths(value.parse()?))),
+            "witx_literal" => Ok(ConfigField::Witx(WitxConf::Literal(value.parse()?))),
             "ctx" => Ok(ConfigField::Ctx(value.parse()?)),
             "errors" => Ok(ConfigField::Error(value.parse()?)),
             _ => Err(Error::new(err_loc, "expected `witx`, `ctx`, or `errors`")),
@@ -79,6 +84,15 @@ impl Config {
             errors: errors.take().unwrap_or_default(),
         })
     }
+
+    /// Load the `witx` document for the configuration.
+    ///
+    /// # Panics
+    ///
+    /// This method will panic if the paths given in the `witx` field were not valid documents.
+    pub fn load_document(&self) -> witx::Document {
+        self.witx.load_document()
+    }
 }
 
 impl Parse for Config {
@@ -91,31 +105,110 @@ impl Parse for Config {
     }
 }
 
+/// The witx document(s) that will be loaded from a [`Config`](struct.Config.html).
+///
+/// A witx interface definition can be provided either as a collection of relative paths to
+/// documents, or as a single inlined string literal. Note that `(use ...)` directives are not
+/// permitted when providing a string literal.
 #[derive(Debug, Clone)]
-pub struct WitxConf {
-    pub paths: Vec<PathBuf>,
+pub enum WitxConf {
+    /// A collection of paths pointing to witx files.
+    Paths(Paths),
+    /// A single witx document, provided as a string literal.
+    Literal(Literal),
 }
 
 impl WitxConf {
+    /// Load the `witx` document.
+    ///
+    /// # Panics
+    ///
+    /// This method will panic if the paths given in the `witx` field were not valid documents, or
+    /// if any of the given documents were not syntactically valid.
+    pub fn load_document(&self) -> witx::Document {
+        match self {
+            Self::Paths(paths) => witx::load(paths.as_ref()).expect("loading witx"),
+            Self::Literal(doc) => witx::parse(doc.as_ref()).expect("parsing witx"),
+        }
+    }
+
+    /// If using the [`Paths`][paths] syntax, make all paths relative to a root directory.
+    ///
+    /// [paths]: enum.WitxConf.html#variant.Paths
     pub fn make_paths_relative_to<P: AsRef<Path>>(&mut self, root: P) {
-        self.paths.iter_mut().for_each(|p| {
-            if !p.is_absolute() {
-                *p = PathBuf::from(root.as_ref()).join(p.clone());
-            }
-        });
+        if let Self::Paths(paths) = self {
+            paths.as_mut().iter_mut().for_each(|p| {
+                if !p.is_absolute() {
+                    *p = PathBuf::from(root.as_ref()).join(p.clone());
+                }
+            });
+        }
     }
 }
 
-impl Parse for WitxConf {
+/// A collection of paths, pointing to witx documents.
+#[derive(Debug, Clone)]
+pub struct Paths(Vec<PathBuf>);
+
+impl Paths {
+    /// Create a new, empty collection of paths.
+    pub fn new() -> Self {
+        Default::default()
+    }
+}
+
+impl Default for Paths {
+    fn default() -> Self {
+        Self(Default::default())
+    }
+}
+
+impl AsRef<[PathBuf]> for Paths {
+    fn as_ref(&self) -> &[PathBuf] {
+        self.0.as_ref()
+    }
+}
+
+impl AsMut<[PathBuf]> for Paths {
+    fn as_mut(&mut self) -> &mut [PathBuf] {
+        self.0.as_mut()
+    }
+}
+
+impl FromIterator<PathBuf> for Paths {
+    fn from_iter<I>(iter: I) -> Self
+    where
+        I: IntoIterator<Item = PathBuf>,
+    {
+        Self(iter.into_iter().collect())
+    }
+}
+
+impl Parse for Paths {
     fn parse(input: ParseStream) -> Result<Self> {
         let content;
         let _ = bracketed!(content in input);
         let path_lits: Punctuated<LitStr, Token![,]> = content.parse_terminated(Parse::parse)?;
-        let paths = path_lits
+        Ok(path_lits
             .iter()
             .map(|lit| PathBuf::from(lit.value()))
-            .collect();
-        Ok(WitxConf { paths })
+            .collect())
+    }
+}
+
+/// A single witx document, provided as a string literal.
+#[derive(Debug, Clone)]
+pub struct Literal(String);
+
+impl AsRef<str> for Literal {
+    fn as_ref(&self) -> &str {
+        self.0.as_ref()
+    }
+}
+
+impl Parse for Literal {
+    fn parse(input: ParseStream) -> Result<Self> {
+        Ok(Self(input.parse::<syn::LitStr>()?.value()))
     }
 }
 
diff --git a/crates/wiggle/generate/src/names.rs b/crates/wiggle/generate/src/names.rs
index 47ab1312a6..f6266ec41f 100644
--- a/crates/wiggle/generate/src/names.rs
+++ b/crates/wiggle/generate/src/names.rs
@@ -1,4 +1,5 @@
-use heck::{CamelCase, ShoutySnakeCase, SnakeCase};
+use escaping::{escape_id, handle_2big_enum_variant, NamingConvention};
+use heck::{ShoutySnakeCase, SnakeCase};
 use proc_macro2::{Ident, TokenStream};
 use quote::{format_ident, quote};
 use witx::{AtomType, BuiltinType, Id, Type, TypeRef};
@@ -17,16 +18,20 @@ impl Names {
             runtime_mod,
         }
     }
+
     pub fn ctx_type(&self) -> Ident {
         self.ctx_type.clone()
     }
+
     pub fn runtime_mod(&self) -> TokenStream {
         self.runtime_mod.clone()
     }
+
     pub fn type_(&self, id: &Id) -> TokenStream {
-        let ident = format_ident!("{}", id.as_str().to_camel_case());
+        let ident = escape_id(id, NamingConvention::CamelCase);
         quote!(#ident)
     }
+
     pub fn builtin_type(&self, b: BuiltinType, lifetime: TokenStream) -> TokenStream {
         match b {
             BuiltinType::String => {
@@ -83,15 +88,12 @@ impl Names {
         }
     }
 
+    /// Convert an enum variant from its [`Id`][witx] name to its Rust [`Ident`][id] representation.
+    ///
+    /// [id]: https://docs.rs/proc-macro2/*/proc_macro2/struct.Ident.html
+    /// [witx]: https://docs.rs/witx/*/witx/struct.Id.html
     pub fn enum_variant(&self, id: &Id) -> Ident {
-        // FIXME this is a hack - just a proof of concept.
-        if id.as_str().starts_with('2') {
-            format_ident!("TooBig")
-        } else if id.as_str() == "type" {
-            format_ident!("Type")
-        } else {
-            format_ident!("{}", id.as_str().to_camel_case())
-        }
+        handle_2big_enum_variant(id).unwrap_or_else(|| escape_id(id, NamingConvention::CamelCase))
     }
 
     pub fn flag_member(&self, id: &Id) -> Ident {
@@ -102,34 +104,44 @@ impl Names {
         format_ident!("{}", id.as_str().to_shouty_snake_case())
     }
 
+    /// Convert a struct member from its [`Id`][witx] name to its Rust [`Ident`][id] representation.
+    ///
+    /// [id]: https://docs.rs/proc-macro2/*/proc_macro2/struct.Ident.html
+    /// [witx]: https://docs.rs/witx/*/witx/struct.Id.html
     pub fn struct_member(&self, id: &Id) -> Ident {
-        // FIXME this is a hack - just a proof of concept.
-        if id.as_str() == "type" {
-            format_ident!("type_")
-        } else {
-            format_ident!("{}", id.as_str().to_snake_case())
-        }
+        escape_id(id, NamingConvention::SnakeCase)
     }
 
+    /// Convert a module name from its [`Id`][witx] name to its Rust [`Ident`][id] representation.
+    ///
+    /// [id]: https://docs.rs/proc-macro2/*/proc_macro2/struct.Ident.html
+    /// [witx]: https://docs.rs/witx/*/witx/struct.Id.html
     pub fn module(&self, id: &Id) -> Ident {
-        format_ident!("{}", id.as_str().to_snake_case())
+        escape_id(id, NamingConvention::SnakeCase)
     }
 
+    /// Convert a trait name from its [`Id`][witx] name to its Rust [`Ident`][id] representation.
+    ///
+    /// [id]: https://docs.rs/proc-macro2/*/proc_macro2/struct.Ident.html
+    /// [witx]: https://docs.rs/witx/*/witx/struct.Id.html
     pub fn trait_name(&self, id: &Id) -> Ident {
-        format_ident!("{}", id.as_str().to_camel_case())
+        escape_id(id, NamingConvention::CamelCase)
     }
 
+    /// Convert a function name from its [`Id`][witx] name to its Rust [`Ident`][id] representation.
+    ///
+    /// [id]: https://docs.rs/proc-macro2/*/proc_macro2/struct.Ident.html
+    /// [witx]: https://docs.rs/witx/*/witx/struct.Id.html
     pub fn func(&self, id: &Id) -> Ident {
-        format_ident!("{}", id.as_str().to_snake_case())
+        escape_id(id, NamingConvention::SnakeCase)
     }
 
+    /// Convert a parameter name from its [`Id`][witx] name to its Rust [`Ident`][id] representation.
+    ///
+    /// [id]: https://docs.rs/proc-macro2/*/proc_macro2/struct.Ident.html
+    /// [witx]: https://docs.rs/witx/*/witx/struct.Id.html
     pub fn func_param(&self, id: &Id) -> Ident {
-        // FIXME this is a hack - just a proof of concept.
-        if id.as_str() == "in" {
-            format_ident!("in_")
-        } else {
-            format_ident!("{}", id.as_str().to_snake_case())
-        }
+        escape_id(id, NamingConvention::SnakeCase)
     }
 
     pub fn func_core_arg(&self, arg: &witx::CoreParamType) -> Ident {
@@ -192,3 +204,117 @@ impl Names {
         )
     }
 }
+
+/// Identifier escaping utilities.
+///
+/// This module most importantly exports an `escape_id` function that can be used to properly
+/// escape tokens that conflict with strict and reserved keywords, as of Rust's 2018 edition.
+///
+/// Weak keywords are not included as their semantic rules do not have the same implications as
+/// those of strict and reserved keywords. `union` for example, is permitted as the name of a
+/// variable. `dyn` was promoted to a strict keyword beginning in the 2018 edition.
+mod escaping {
+    use {
+        heck::{CamelCase, SnakeCase},
+        proc_macro2::Ident,
+        quote::format_ident,
+        witx::Id,
+    };
+
+    /// Identifier naming convention.
+    ///
+    /// Because shouty snake case values (identifiers that look `LIKE_THIS`) cannot potentially
+    /// conflict with any Rust keywords, this enum only include snake and camel case variants.
+    pub enum NamingConvention {
+        /// Snake case. Used to denote values `LikeThis`.
+        CamelCase,
+        /// Snake case. Used to denote values `like_this`.
+        SnakeCase,
+    }
+
+    /// Given a witx [`Id`][witx] and a [`NamingConvention`][naming], return a [`Ident`] word of
+    /// Rust syntax that accounts for escaping both strict and reserved keywords. If an identifier
+    /// would have conflicted with a keyword, a trailing underscode will be appended.
+    ///
+    /// [id]: https://docs.rs/proc-macro2/*/proc_macro2/struct.Ident.html
+    /// [naming]: enum.NamingConvention.html
+    /// [witx]: https://docs.rs/witx/*/witx/struct.Id.html
+    pub fn escape_id(id: &Id, conv: NamingConvention) -> Ident {
+        use NamingConvention::{CamelCase, SnakeCase};
+        match (conv, id.as_str()) {
+            // For camel-cased identifiers, `Self` is the only potential keyword conflict.
+            (CamelCase, "self") => format_ident!("Self_"),
+            (CamelCase, s) => format_ident!("{}", s.to_camel_case()),
+            // Snake-cased identifiers are where the bulk of conflicts can occur.
+            (SnakeCase, s) => {
+                let s = s.to_snake_case();
+                if STRICT.iter().chain(RESERVED).any(|k| *k == s) {
+                    // If the camel-cased string matched any strict or reserved keywords, then
+                    // append a trailing underscore to the identifier we generate.
+                    format_ident!("{}_", s)
+                } else {
+                    format_ident!("{}", s) // Otherwise, use the string as is.
+                }
+            }
+        }
+    }
+
+    /// Strict keywords.
+    ///
+    /// >  Strict keywords cannot be used as the names of:
+    /// >    * Items
+    /// >    * Variables and function parameters
+    /// >    * Fields and variants
+    /// >    * Type parameters
+    /// >    * Lifetime parameters or loop labels
+    /// >    * Macros or attributes
+    /// >    * Macro placeholders
+    /// >    * Crates
+    /// >
+    /// > - <cite>[The Rust Reference][ref]</cite>
+    ///
+    /// This list also includes keywords that were introduced in the 2018 edition of Rust.
+    ///
+    /// [ref]: https://doc.rust-lang.org/reference/keywords.html#strict-keywords
+    const STRICT: &[&str] = &[
+        "as", "async", "await", "break", "const", "continue", "crate", "dyn", "else", "enum",
+        "extern", "false", "fn", "for", "if", "impl", "in", "let", "loop", "match", "mod", "move",
+        "mut", "pub", "ref", "return", "self", "Self", "static", "struct", "super", "trait",
+        "true", "type", "unsafe", "use", "where", "while",
+    ];
+
+    /// Reserved keywords.
+    ///
+    /// > These keywords aren't used yet, but they are reserved for future use. They have the same
+    /// > restrictions as strict keywords. The reasoning behind this is to make current programs
+    /// > forward compatible with future versions of Rust by forbidding them to use these keywords.
+    /// >
+    /// > - <cite>[The Rust Reference][ref]</cite>
+    ///
+    /// This list also includes keywords that were introduced in the 2018 edition of Rust.
+    ///
+    /// [ref]: https://doc.rust-lang.org/reference/keywords.html#reserved-keywords
+    const RESERVED: &[&str] = &[
+        "abstract", "become", "box", "do", "final", "macro", "override", "priv", "try", "typeof",
+        "unsized", "virtual", "yield",
+    ];
+
+    /// Handle WASI's [`errno::2big`][err] variant.
+    ///
+    /// This is an unfortunate edge case that must account for when generating `enum` variants.
+    /// This will only return `Some(_)` if the given witx identifier *is* `2big`, otherwise this
+    /// function will return `None`.
+    ///
+    /// This functionality is a short-term fix that keeps WASI working. Instead of expanding these sort of special cases,
+    /// we should replace this function by having the user provide a mapping of witx identifiers to Rust identifiers in the
+    /// arguments to the macro.
+    ///
+    /// [err]: https://github.com/WebAssembly/WASI/blob/master/phases/snapshot/docs.md#-errno-enumu16
+    pub fn handle_2big_enum_variant(id: &Id) -> Option<Ident> {
+        if id.as_str() == "2big" {
+            Some(format_ident!("TooBig"))
+        } else {
+            None
+        }
+    }
+}
diff --git a/crates/wiggle/macro/src/lib.rs b/crates/wiggle/macro/src/lib.rs
index 214d14dd72..ed4a3dc6f0 100644
--- a/crates/wiggle/macro/src/lib.rs
+++ b/crates/wiggle/macro/src/lib.rs
@@ -95,7 +95,7 @@ pub fn from_witx(args: TokenStream) -> TokenStream {
         std::env::var("CARGO_MANIFEST_DIR").expect("CARGO_MANIFEST_DIR env var"),
     );
 
-    let doc = witx::load(&config.witx.paths).expect("loading witx");
+    let doc = config.load_document();
     let names = wiggle_generate::Names::new(&config.ctx.name, quote!(wiggle));
 
     let error_transform = wiggle_generate::ErrorTransform::new(&config.errors, &doc)
diff --git a/crates/wiggle/tests/keywords.rs b/crates/wiggle/tests/keywords.rs
new file mode 100644
index 0000000000..ea002fcc6d
--- /dev/null
+++ b/crates/wiggle/tests/keywords.rs
@@ -0,0 +1,64 @@
+//! Tests to check that keywords in `witx` files are escaped.
+//!
+//! No `#[test]` functions are defined below because the `wiggle::from_witx!` macro expanding into
+//! syntactically correct Rust code at compile time is the subject under test.
+
+/// Test that an enum variant that conflicts with a Rust keyword can be compiled properly.
+mod enum_test {
+    wiggle::from_witx!({
+        witx_literal:
+            "(typename $self
+                 (enum u8
+                     $self
+                     $2big
+                 )
+             )",
+        ctx: DummyCtx,
+    });
+}
+
+/// Test module, trait, function, and function parameter names conflicting with Rust keywords.
+///
+/// We use `self` because the camel-cased trait name `Self` is *also* a strict keyword. This lets
+/// us simultaneously test the name of the module and the generated trait.
+mod module_trait_fn_and_arg_test {
+    use wiggle_test::WasiCtx;
+    wiggle::from_witx!({
+        witx_literal:
+            "(module $self
+                 (@interface func (export \"fn\")
+                     (param $use u32)
+                     (param $virtual u32)
+                 )
+             )",
+        ctx: WasiCtx,
+    });
+    impl<'a> self_::Self_ for WasiCtx<'a> {
+        #[allow(unused_variables)]
+        fn fn_(&self, use_: u32, virtual_: u32) -> Result<(), ()> {
+            unimplemented!();
+        }
+    }
+}
+
+/// Test that a struct and member names conflicting with Rust keywords can be compiled properly.
+mod struct_test {
+    wiggle::from_witx!({
+        witx_literal:
+            "(typename $self
+                 (struct
+                     (field $become s32)
+                     (field $mut s32)
+                 )
+             )",
+        ctx: DummyCtx,
+    });
+}
+
+/// Test that a union variant that conflicts with a Rust keyword can be compiled properly.
+mod union_test {
+    wiggle::from_witx!({
+        witx: ["tests/keywords_union.witx"],
+        ctx: DummyCtx,
+    });
+}
diff --git a/crates/wiggle/tests/keywords_union.witx b/crates/wiggle/tests/keywords_union.witx
new file mode 100644
index 0000000000..14d29b70dd
--- /dev/null
+++ b/crates/wiggle/tests/keywords_union.witx
@@ -0,0 +1,15 @@
+(typename $union
+  (enum u8
+    $self
+    $power
+  )
+)
+
+(typename $self
+  (union $union
+    ;; A union variant that will expand to a strict keyword `Self`.
+    (field $self (@witx pointer f32))
+    ;; Oh it's true, that there's power in a union!
+    (field $power (@witx pointer f32))
+  )
+)
diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md
index f2cfcf34a8..6409a86d9f 100644
--- a/docs/SUMMARY.md
+++ b/docs/SUMMARY.md
@@ -39,6 +39,7 @@
 - [Writing WebAssembly](./wasm.md)
   - [Rust](./wasm-rust.md)
   - [C/C++](./wasm-c.md)
+  - [AssemblyScript](./wasm-assemblyscript.md)
   - [WebAssembly Text Format (`*.wat`)](./wasm-wat.md)
   - [Example: Markdown Parser](./wasm-markdown.md)
 - [Stability](stability.md)
diff --git a/docs/assemblyscript-hello-world/package-lock.json b/docs/assemblyscript-hello-world/package-lock.json
new file mode 100644
index 0000000000..f4568e8796
--- /dev/null
+++ b/docs/assemblyscript-hello-world/package-lock.json
@@ -0,0 +1,35 @@
+{
+  "name": "wasi-hello-world",
+  "version": "1.0.0",
+  "lockfileVersion": 1,
+  "requires": true,
+  "dependencies": {
+    "as-wasi": {
+      "version": "0.1.1",
+      "resolved": "https://registry.npmjs.org/as-wasi/-/as-wasi-0.1.1.tgz",
+      "integrity": "sha512-7PrSjsD/K2Pg95/2fu+4RJCfZLiuM0w0k5lMceaCf73EvH+7WPQTM1WW/vS0cizRTaEDj8Wz5ttoZBJSvsZpBQ=="
+    },
+    "assemblyscript": {
+      "version": "0.10.0",
+      "resolved": "https://registry.npmjs.org/assemblyscript/-/assemblyscript-0.10.0.tgz",
+      "integrity": "sha512-ErUNhHboD+zsB4oG6X1YICDAIo27Gq7LeNX6jVe+Q0W5cI51/fHwC8yJ68IukqvupmZgYPdp1JqqRXlS+BrUfA==",
+      "dev": true,
+      "requires": {
+        "binaryen": "93.0.0-nightly.20200514",
+        "long": "^4.0.0"
+      }
+    },
+    "binaryen": {
+      "version": "93.0.0-nightly.20200514",
+      "resolved": "https://registry.npmjs.org/binaryen/-/binaryen-93.0.0-nightly.20200514.tgz",
+      "integrity": "sha512-SRRItmNvhRVfoWWbRloO4i8IqkKH8rZ7/0QWRgLpM3umupK8gBpo9MY7Zp3pDysRSp+rVoqxvM5x4tFyCSa9zw==",
+      "dev": true
+    },
+    "long": {
+      "version": "4.0.0",
+      "resolved": "https://registry.npmjs.org/long/-/long-4.0.0.tgz",
+      "integrity": "sha512-XsP+KhQif4bjX1kbuSiySJFNAehNxgLb6hPRGJ9QsUr8ajHkuXGdrHmFUTUUXhDwVX2R5bY4JNZEwbUiMhV+MA==",
+      "dev": true
+    }
+  }
+}
diff --git a/docs/assemblyscript-hello-world/package.json b/docs/assemblyscript-hello-world/package.json
new file mode 100644
index 0000000000..2bcd9100a2
--- /dev/null
+++ b/docs/assemblyscript-hello-world/package.json
@@ -0,0 +1,18 @@
+{
+  "name": "wasi-hello-world",
+  "version": "1.0.0",
+  "description": "Hello world in Wasi with AS and as-wasi",
+  "main": "index.js",
+  "scripts": {
+    "build": "asc wasi-hello-world.ts -b wasi-hello-world.wasm -t wasi-hello-world.wat --runtime half",
+    "wasmtime": "wasmtime wasi-hello-world.wasm"
+  },
+  "author": "Aaron Turner",
+  "license": "MIT",
+  "devDependencies": {
+    "assemblyscript": "^0.10.0"
+  },
+  "dependencies": {
+    "as-wasi": "^0.1.1"
+  }
+}
diff --git a/docs/assemblyscript-hello-world/wasi-hello-world.ts b/docs/assemblyscript-hello-world/wasi-hello-world.ts
new file mode 100644
index 0000000000..1cd3065fe4
--- /dev/null
+++ b/docs/assemblyscript-hello-world/wasi-hello-world.ts
@@ -0,0 +1,5 @@
+import "wasi"
+
+import {Console} from "as-wasi"
+Console.log('Hello World!\n');
+
diff --git a/docs/wasm-assemblyscript.md b/docs/wasm-assemblyscript.md
new file mode 100644
index 0000000000..fddd9bd741
--- /dev/null
+++ b/docs/wasm-assemblyscript.md
@@ -0,0 +1,53 @@
+# AssemblyScript
+
+[AssemblyScript] 0.10.0 includes support for targeting WASI. To use it, add
+`import "wasi"` at the top of your entrypoint file.
+
+To create a program which can be run directly as a command, pass `--runtime half`
+to the AssemblyScript linker. This selects the [half runtime], which ensures that
+the generated wasm module doesn't contain any extraneous exports. (This isn't
+strictly required today, but the handling of extraneous exports may change in
+the future, so it's encouraged. As a bonus, it also reduces code size.)
+
+To create a program which can be loaded as a library and used from other modules,
+no special options are needed.
+
+Let's walk through a simple hello world example.
+
+## `wasi-hello-world.ts`
+
+```typescript
+{{#include ./assemblyscript-hello-world/wasi-hello-world.ts}}
+```
+
+This uses [as-wasi] as a dependency to make working with the AssemblyScript WASI
+bindings easier. Then, you can run:
+
+```sh
+asc wasi-hello-world.ts -b wasi-hello-world.wasm
+```
+
+to compile it to wasm, and
+
+```sh
+wasmtime wasi-hello-world.wasm
+```
+
+to run it from the command-line. Or you can instantiate it using the [Wasmtime API].
+
+## `package.json`
+
+It can also be packaged using a `package.json` file:
+
+```json
+{{#include ./assemblyscript-hello-world/package.json}}
+```
+
+You can also [browse this source code online][code] and clone the wasmtime
+repository to run the example locally.
+
+[code]: https://github.com/bytecodealliance/wasmtime/blob/master/docs/assemblyscript-hello-world
+[AssemblyScript]: https://assemblyscript.org
+[as-wasi]: https://github.com/jedisct1/as-wasi
+[half runtime]: https://docs.assemblyscript.org/details/runtime#runtime-variants
+[Wasmtime API]: ./lang.md
diff --git a/docs/wasm.md b/docs/wasm.md
index b524cc811e..fb442f6006 100644
--- a/docs/wasm.md
+++ b/docs/wasm.md
@@ -9,4 +9,5 @@ check out your language's documentation for WebAssembly as well.
 
 * [Rust](wasm-rust.md)
 * [C/C++](wasm-c.md)
+* [AssemblyScript](wasm-assemblyscript.md)
 * [WebAssembly Text Format (`*.wat`)](wasm-wat.md)
diff --git a/examples/interrupt.rs b/examples/interrupt.rs
index 696d8a2d95..6a42867cc2 100644
--- a/examples/interrupt.rs
+++ b/examples/interrupt.rs
@@ -32,7 +32,7 @@ fn main() -> Result<()> {
     let trap = run().unwrap_err();
 
     println!("trap received...");
-    assert!(trap.message().contains("wasm trap: interrupt"));
+    assert!(trap.to_string().contains("wasm trap: interrupt"));
 
     Ok(())
 }
diff --git a/src/commands/run.rs b/src/commands/run.rs
index 0e3d3e4661..4e40d057fd 100644
--- a/src/commands/run.rs
+++ b/src/commands/run.rs
@@ -162,8 +162,6 @@ impl RunCommand {
                 // a message and exit.
                 if let Some(trap) = e.downcast_ref::<Trap>() {
                     // Print the error message in the usual way.
-                    eprintln!("Error: {:?}", e);
-
                     if let Some(status) = trap.i32_exit_status() {
                         // On Windows, exit status 3 indicates an abort (see below),
                         // so return 1 indicating a non-zero status to avoid ambiguity.
@@ -173,6 +171,8 @@ impl RunCommand {
                         process::exit(status);
                     }
 
+                    eprintln!("Error: {:?}", e);
+
                     // If the program exited because of a trap, return an error code
                     // to the outside environment indicating a more severe problem
                     // than a simple failure.
diff --git a/tests/all/custom_signal_handler.rs b/tests/all/custom_signal_handler.rs
index 990c2af378..50425f0dfc 100644
--- a/tests/all/custom_signal_handler.rs
+++ b/tests/all/custom_signal_handler.rs
@@ -113,10 +113,10 @@ mod tests {
                 .unwrap_err()
                 .downcast::<Trap>()?;
             assert!(
-                trap.message()
-                    .starts_with("wasm trap: out of bounds memory access"),
+                trap.to_string()
+                    .contains("wasm trap: out of bounds memory access"),
                 "bad trap message: {:?}",
-                trap.message()
+                trap.to_string()
             );
         }
 
@@ -140,8 +140,8 @@ mod tests {
                 .unwrap_err()
                 .downcast::<Trap>()?;
             assert!(trap
-                .message()
-                .starts_with("wasm trap: out of bounds memory access"));
+                .to_string()
+                .contains("wasm trap: out of bounds memory access"));
         }
         Ok(())
     }
diff --git a/tests/all/func.rs b/tests/all/func.rs
index 16da165bab..4d8f5d8702 100644
--- a/tests/all/func.rs
+++ b/tests/all/func.rs
@@ -181,7 +181,7 @@ fn trap_smoke() -> Result<()> {
     let store = Store::default();
     let f = Func::wrap(&store, || -> Result<(), Trap> { Err(Trap::new("test")) });
     let err = f.call(&[]).unwrap_err().downcast::<Trap>()?;
-    assert_eq!(err.message(), "test");
+    assert!(err.to_string().contains("test"));
     assert!(err.i32_exit_status().is_none());
     Ok(())
 }
@@ -203,7 +203,7 @@ fn trap_import() -> Result<()> {
     .err()
     .unwrap()
     .downcast::<Trap>()?;
-    assert_eq!(trap.message(), "foo");
+    assert!(trap.to_string().contains("foo"));
     Ok(())
 }
 
@@ -397,9 +397,8 @@ fn func_write_nothing() -> anyhow::Result<()> {
     let ty = FuncType::new(Box::new([]), Box::new([ValType::I32]));
     let f = Func::new(&store, ty, |_, _, _| Ok(()));
     let err = f.call(&[]).unwrap_err().downcast::<Trap>()?;
-    assert_eq!(
-        err.message(),
-        "function attempted to return an incompatible value"
-    );
+    assert!(err
+        .to_string()
+        .contains("function attempted to return an incompatible value"));
     Ok(())
 }
diff --git a/tests/all/import_calling_export.rs b/tests/all/import_calling_export.rs
index 7affdd8759..9e7f96b078 100644
--- a/tests/all/import_calling_export.rs
+++ b/tests/all/import_calling_export.rs
@@ -90,9 +90,8 @@ fn test_returns_incorrect_type() -> Result<()> {
         .call(&[])
         .expect_err("the execution should fail")
         .downcast::<Trap>()?;
-    assert_eq!(
-        trap.message(),
-        "function attempted to return an incompatible value"
-    );
+    assert!(trap
+        .to_string()
+        .contains("function attempted to return an incompatible value"));
     Ok(())
 }
diff --git a/tests/all/linker.rs b/tests/all/linker.rs
index b84d7e9c1d..b2c7a64da5 100644
--- a/tests/all/linker.rs
+++ b/tests/all/linker.rs
@@ -66,7 +66,40 @@ fn link_twice_bad() -> Result<()> {
 }
 
 #[test]
-fn interposition() -> Result<()> {
+fn function_interposition() -> Result<()> {
+    let store = Store::default();
+    let mut linker = Linker::new(&store);
+    linker.allow_shadowing(true);
+    let mut module = Module::new(
+        &store,
+        r#"(module (func (export "green") (result i32) (i32.const 7)))"#,
+    )?;
+    for _ in 0..4 {
+        let instance = linker.instantiate(&module)?;
+        linker.define(
+            "red",
+            "green",
+            instance.get_export("green").unwrap().clone(),
+        )?;
+        module = Module::new(
+            &store,
+            r#"(module
+                (import "red" "green" (func (result i32)))
+                (func (export "green") (result i32) (i32.mul (call 0) (i32.const 2)))
+            )"#,
+        )?;
+    }
+    let instance = linker.instantiate(&module)?;
+    let func = instance.get_export("green").unwrap().into_func().unwrap();
+    let func = func.get0::<i32>()?;
+    assert_eq!(func()?, 112);
+    Ok(())
+}
+
+// Same as `function_interposition`, but the linker's name for the function
+// differs from the module's name.
+#[test]
+fn function_interposition_renamed() -> Result<()> {
     let store = Store::default();
     let mut linker = Linker::new(&store);
     linker.allow_shadowing(true);
@@ -95,3 +128,32 @@ fn interposition() -> Result<()> {
     assert_eq!(func()?, 112);
     Ok(())
 }
+
+// Similar to `function_interposition`, but use `Linker::instance` instead of
+// `Linker::define`.
+#[test]
+fn module_interposition() -> Result<()> {
+    let store = Store::default();
+    let mut linker = Linker::new(&store);
+    linker.allow_shadowing(true);
+    let mut module = Module::new(
+        &store,
+        r#"(module (func (export "export") (result i32) (i32.const 7)))"#,
+    )?;
+    for _ in 0..4 {
+        let instance = linker.instantiate(&module)?;
+        linker.instance("instance", &instance)?;
+        module = Module::new(
+            &store,
+            r#"(module
+                (import "instance" "export" (func (result i32)))
+                (func (export "export") (result i32) (i32.mul (call 0) (i32.const 2)))
+            )"#,
+        )?;
+    }
+    let instance = linker.instantiate(&module)?;
+    let func = instance.get_export("export").unwrap().into_func().unwrap();
+    let func = func.get0::<i32>()?;
+    assert_eq!(func()?, 112);
+    Ok(())
+}
diff --git a/tests/all/traps.rs b/tests/all/traps.rs
index f4ae458f48..575e6741b8 100644
--- a/tests/all/traps.rs
+++ b/tests/all/traps.rs
@@ -25,7 +25,7 @@ fn test_trap_return() -> Result<()> {
         .expect("error calling function")
         .downcast::<Trap>()?;
 
-    assert_eq!(e.message(), "test 123");
+    assert!(e.to_string().contains("test 123"));
 
     Ok(())
 }
@@ -64,9 +64,9 @@ fn test_trap_trace() -> Result<()> {
     assert_eq!(trace[1].func_offset(), 1);
     assert_eq!(trace[1].module_offset(), 0x21);
     assert!(
-        e.message().contains("unreachable"),
+        e.to_string().contains("unreachable"),
         "wrong message: {}",
-        e.message()
+        e.to_string()
     );
 
     Ok(())
@@ -103,7 +103,7 @@ fn test_trap_trace_cb() -> Result<()> {
     assert_eq!(trace[0].func_index(), 2);
     assert_eq!(trace[1].module_name().unwrap(), "hello_mod");
     assert_eq!(trace[1].func_index(), 1);
-    assert_eq!(e.message(), "cb throw");
+    assert!(e.to_string().contains("cb throw"));
 
     Ok(())
 }
@@ -135,7 +135,7 @@ fn test_trap_stack_overflow() -> Result<()> {
         assert_eq!(trace[i].func_index(), 0);
         assert_eq!(trace[i].func_name(), Some("run"));
     }
-    assert!(e.message().contains("call stack exhausted"));
+    assert!(e.to_string().contains("call stack exhausted"));
 
     Ok(())
 }
@@ -234,7 +234,11 @@ fn trap_start_function_import() -> Result<()> {
     let sig = FuncType::new(Box::new([]), Box::new([]));
     let func = Func::new(&store, sig, |_, _, _| Err(Trap::new("user trap")));
     let err = Instance::new(&module, &[func.into()]).err().unwrap();
-    assert_eq!(err.downcast_ref::<Trap>().unwrap().message(), "user trap");
+    assert!(err
+        .downcast_ref::<Trap>()
+        .unwrap()
+        .to_string()
+        .contains("user trap"));
     Ok(())
 }
 
@@ -373,7 +377,9 @@ fn call_signature_mismatch() -> Result<()> {
         .unwrap()
         .downcast::<Trap>()
         .unwrap();
-    assert_eq!(err.message(), "wasm trap: indirect call type mismatch");
+    assert!(err
+        .to_string()
+        .contains("wasm trap: indirect call type mismatch"));
     Ok(())
 }