Cranelift: implement heap_{load,store} instruction legalization (#5351)

* Cranelift: implement `heap_{load,store}` instruction legalization This does not remove `heap_addr` yet, but it does factor out the common bounds-check-and-compute-the-native-address functionality that is shared between all of `heap_{addr,load,store}`. Finally, this adds a missing optimization for when we can dedupe explicit bounds checks for static memories and Spectre mitigations. * Cranelift: Enable `heap_load_store_*` run tests on all targets
2022-11-30 11:12:49 -08:00
parent 830885383f
commit 79f7fa6079
27 changed files with 566 additions and 471 deletions
--- a/cranelift/codegen/src/legalizer/heap.rs
+++ b/cranelift/codegen/src/legalizer/heap.rs
@@ -6,11 +6,73 @@
 use crate::cursor::{Cursor, FuncCursor};
 use crate::flowgraph::ControlFlowGraph;
 use crate::ir::condcodes::IntCC;
-use crate::ir::immediates::{Uimm32, Uimm8};
+use crate::ir::immediates::{HeapImmData, Offset32, Uimm32, Uimm8};
 use crate::ir::{self, InstBuilder, RelSourceLoc};
 use crate::isa::TargetIsa;
 use crate::trace;
 /// Expand a `heap_load` instruction according to the definition of the heap.
 pub fn expand_heap_load(
    inst: ir::Inst,
    func: &mut ir::Function,
    cfg: &mut ControlFlowGraph,
    isa: &dyn TargetIsa,
    heap_imm: ir::HeapImm,
    index: ir::Value,
 ) {
    let HeapImmData {
        flags,
        heap,
        offset,
    } = func.dfg.heap_imms[heap_imm];
    let result_ty = func.dfg.ctrl_typevar(inst);
    let access_size = result_ty.bytes();
    let access_size = u8::try_from(access_size).unwrap();
    let mut pos = FuncCursor::new(func).at_inst(inst);
    pos.use_srcloc(inst);
    let addr =
        bounds_check_and_compute_addr(&mut pos, cfg, isa, heap, index, offset.into(), access_size);
    pos.func
        .dfg
        .replace(inst)
        .load(result_ty, flags, addr, Offset32::new(0));
 }
 /// Expand a `heap_store` instruction according to the definition of the heap.
 pub fn expand_heap_store(
    inst: ir::Inst,
    func: &mut ir::Function,
    cfg: &mut ControlFlowGraph,
    isa: &dyn TargetIsa,
    heap_imm: ir::HeapImm,
    index: ir::Value,
    value: ir::Value,
 ) {
    let HeapImmData {
        flags,
        heap,
        offset,
    } = func.dfg.heap_imms[heap_imm];
    let store_ty = func.dfg.value_type(value);
    let access_size = u8::try_from(store_ty.bytes()).unwrap();
    let mut pos = FuncCursor::new(func).at_inst(inst);
    pos.use_srcloc(inst);
    let addr =
        bounds_check_and_compute_addr(&mut pos, cfg, isa, heap, index, offset.into(), access_size);
    pos.func
        .dfg
        .replace(inst)
        .store(flags, value, addr, Offset32::new(0));
 }
 /// Expand a `heap_addr` instruction according to the definition of the heap.
 pub fn expand_heap_addr(
    inst: ir::Inst,
@@ -18,156 +80,199 @@ pub fn expand_heap_addr(
    cfg: &mut ControlFlowGraph,
    isa: &dyn TargetIsa,
    heap: ir::Heap,
-    index_operand: ir::Value,
+    index: ir::Value,
-    offset_immediate: Uimm32,
+    offset: Uimm32,
    access_size: Uimm8,
 ) {
-    trace!(
+    let mut pos = FuncCursor::new(func).at_inst(inst);
-        "expanding heap_addr: {:?}: {}",
+    pos.use_srcloc(inst);
-        inst,
+
-        func.dfg.display_inst(inst)
+    let addr =
-    );
+        bounds_check_and_compute_addr(&mut pos, cfg, isa, heap, index, offset.into(), access_size);
    // Replace the `heap_addr` and its result value with the legalized native
    // address.
    let addr_inst = pos.func.dfg.value_def(addr).unwrap_inst();
    pos.func.dfg.replace_with_aliases(inst, addr_inst);
    pos.func.layout.remove_inst(inst);
 }
 /// Helper used to emit bounds checks (as necessary) and compute the native
 /// address of a heap access.
 ///
 /// Returns the `ir::Value` holding the native address of the heap access.
 fn bounds_check_and_compute_addr(
    pos: &mut FuncCursor,
    cfg: &mut ControlFlowGraph,
    isa: &dyn TargetIsa,
    heap: ir::Heap,
    // Dynamic operand indexing into the heap.
    index: ir::Value,
    // Static immediate added to the index.
    offset: u32,
    // Static size of the heap access.
    access_size: u8,
 ) -> ir::Value {
    let pointer_type = isa.pointer_type();
    let spectre = isa.flags().enable_heap_access_spectre_mitigation();
    let offset_and_size = offset_plus_size(offset, access_size);
    let ir::HeapData {
-        offset_guard_size,
+        base: _,
        min_size,
        offset_guard_size: guard_size,
        style,
-        ..
+        index_type,
-    } = &func.heaps[heap];
+    } = pos.func.heaps[heap].clone();
    match *style {
        ir::HeapStyle::Dynamic { bound_gv } => dynamic_addr(
            isa,
            inst,
            heap,
            index_operand,
            u32::from(offset_immediate),
            u8::from(access_size),
            bound_gv,
            func,
        ),
        ir::HeapStyle::Static { bound } => static_addr(
            isa,
            inst,
            heap,
            index_operand,
            u32::from(offset_immediate),
            u8::from(access_size),
            bound.into(),
            (*offset_guard_size).into(),
            func,
            cfg,
        ),
    }
 }
-/// Expand a `heap_addr` for a dynamic heap.
+    let index = cast_index_to_pointer_ty(index, index_type, pointer_type, pos);
 fn dynamic_addr(
    isa: &dyn TargetIsa,
    inst: ir::Inst,
    heap: ir::Heap,
    index: ir::Value,
    offset: u32,
    access_size: u8,
    bound_gv: ir::GlobalValue,
    func: &mut ir::Function,
 ) {
    let index_ty = func.dfg.value_type(index);
    let addr_ty = func.dfg.value_type(func.dfg.first_result(inst));
    let min_size = func.heaps[heap].min_size.into();
    let mut pos = FuncCursor::new(func).at_inst(inst);
    pos.use_srcloc(inst);
    let index = cast_index_to_pointer_ty(index, index_ty, addr_ty, &mut pos);
    // Start with the bounds check. Trap if `index + offset + access_size > bound`.
    let bound = pos.ins().global_value(addr_ty, bound_gv);
    let (cc, lhs, bound) = if offset == 0 && access_size == 1 {
        // `index > bound - 1` is the same as `index >= bound`.
        (IntCC::UnsignedGreaterThanOrEqual, index, bound)
    } else if offset_plus_size(offset, access_size) <= min_size {
        // We know that `bound >= min_size`, so here we can compare `offset >
        // bound - (offset + access_size)` without wrapping.
        let adj_bound = pos
            .ins()
            .iadd_imm(bound, -(offset_plus_size(offset, access_size) as i64));
        trace!(
            "  inserting: {}",
            pos.func.dfg.display_value_inst(adj_bound)
        );
        (IntCC::UnsignedGreaterThan, index, adj_bound)
    } else {
        // We need an overflow check for the adjusted offset.
        let access_size_val = pos
            .ins()
            .iconst(addr_ty, offset_plus_size(offset, access_size) as i64);
        let adj_offset =
            pos.ins()
                .uadd_overflow_trap(index, access_size_val, ir::TrapCode::HeapOutOfBounds);
        trace!(
            "  inserting: {}",
            pos.func.dfg.display_value_inst(adj_offset)
        );
        (IntCC::UnsignedGreaterThan, adj_offset, bound)
    };
    let spectre_oob_comparison = if isa.flags().enable_heap_access_spectre_mitigation() {
        // When we emit a spectre-guarded heap access, we do a `select
        // is_out_of_bounds, NULL, addr` to compute the address, and so the load
        // will trap if the address is out of bounds, which means we don't need
        // to do another explicit bounds check like we do below.
        Some(SpectreOobComparison {
            cc,
            lhs,
            rhs: bound,
        })
    } else {
        let oob = pos.ins().icmp(cc, lhs, bound);
        trace!("  inserting: {}", pos.func.dfg.display_value_inst(oob));
        let trapnz = pos.ins().trapnz(oob, ir::TrapCode::HeapOutOfBounds);
        trace!("  inserting: {}", pos.func.dfg.display_inst(trapnz));
        None
    };
    // We need to emit code that will trap (or compute an address that will trap
    // when accessed) if
    //
    //     index + offset + access_size > bound
    //
    // or if the `index + offset + access_size` addition overflows.
    //
    // Note that we ultimately want a 64-bit integer (we only target 64-bit
    // architectures at the moment) and that `offset` is a `u32` and
    // `access_size` is a `u8`. This means that we can add the latter together
    // as `u64`s without fear of overflow, and we only have to be concerned with
    // whether adding in `index` will overflow.
    //
    // Finally, the following right-hand sides of the matches do have a little
    // bit of duplicated code across them, but I think writing it this way is
    // worth it for readability and seeing very clearly each of our cases for
    // different bounds checks and optimizations of those bounds checks. It is
    // intentionally written in a straightforward case-matching style that will
    // hopefully make it easy to port to ISLE one day.
    match style {
        // ====== Dynamic Memories ======
        //
        // 1. First special case for when `offset + access_size == 1`:
        //
        //            index + 1 > bound
        //        ==> index >= bound
        //
        //    1.a. When Spectre mitigations are enabled, avoid duplicating
        //         bounds checks between the mitigations and the regular bounds
        //         checks.
        ir::HeapStyle::Dynamic { bound_gv } if offset_and_size == 1 && spectre => {
            let bound = pos.ins().global_value(pointer_type, bound_gv);
            compute_addr(
                isa,
-        inst,
+                pos,
                heap,
-        addr_ty,
+                pointer_type,
                index,
                offset,
-        pos.func,
+                Some(SpectreOobComparison {
-        spectre_oob_comparison,
+                    cc: IntCC::UnsignedGreaterThanOrEqual,
-    );
+                    lhs: index,
                    rhs: bound,
                }),
            )
        }
        //    1.b. Emit explicit `index >= bound` bounds checks.
        ir::HeapStyle::Dynamic { bound_gv } if offset_and_size == 1 => {
            let bound = pos.ins().global_value(pointer_type, bound_gv);
            let oob = pos
                .ins()
                .icmp(IntCC::UnsignedGreaterThanOrEqual, index, bound);
            pos.ins().trapnz(oob, ir::TrapCode::HeapOutOfBounds);
            compute_addr(isa, pos, heap, pointer_type, index, offset, None)
        }
-/// Expand a `heap_addr` for a static heap.
+        // 2. Second special case for when `offset + access_size <= min_size`.
 fn static_addr(
    isa: &dyn TargetIsa,
    inst: ir::Inst,
    heap: ir::Heap,
    index: ir::Value,
    offset: u32,
    access_size: u8,
    bound: u64,
    guard_size: u64,
    func: &mut ir::Function,
    cfg: &mut ControlFlowGraph,
 ) {
    let index_ty = func.dfg.value_type(index);
    let addr_ty = func.dfg.value_type(func.dfg.first_result(inst));
    let mut pos = FuncCursor::new(func).at_inst(inst);
    pos.use_srcloc(inst);
    // The goal here is to trap if `index + offset + access_size > bound`.
        //
-    // This first case is a trivial case where we can statically trap.
+        //    We know that `bound >= min_size`, so we can do the following
-    if offset_plus_size(offset, access_size) > bound {
+        //    comparison, without fear of the right-hand side wrapping around:
-        // This will simply always trap since `offset >= 0`.
+        //
-        let trap = pos.ins().trap(ir::TrapCode::HeapOutOfBounds);
+        //            index + offset + access_size > bound
-        trace!("  inserting: {}", pos.func.dfg.display_inst(trap));
+        //        ==> index > bound - (offset + access_size)
-        let iconst = pos.func.dfg.replace(inst).iconst(addr_ty, 0);
+        //
-        trace!("  inserting: {}", pos.func.dfg.display_value_inst(iconst));
+        //    2.a. Dedupe bounds checks with Spectre mitigations.
        ir::HeapStyle::Dynamic { bound_gv } if offset_and_size <= min_size.into() && spectre => {
            let bound = pos.ins().global_value(pointer_type, bound_gv);
            let adjusted_bound = pos.ins().iadd_imm(bound, -(offset_and_size as i64));
            compute_addr(
                isa,
                pos,
                heap,
                pointer_type,
                index,
                offset,
                Some(SpectreOobComparison {
                    cc: IntCC::UnsignedGreaterThan,
                    lhs: index,
                    rhs: adjusted_bound,
                }),
            )
        }
        //    2.b. Emit explicit `index > bound - (offset + access_size)` bounds
        //         checks.
        ir::HeapStyle::Dynamic { bound_gv } if offset_and_size <= min_size.into() => {
            let bound = pos.ins().global_value(pointer_type, bound_gv);
            let adjusted_bound = pos.ins().iadd_imm(bound, -(offset_and_size as i64));
            let oob = pos
                .ins()
                .icmp(IntCC::UnsignedGreaterThan, index, adjusted_bound);
            pos.ins().trapnz(oob, ir::TrapCode::HeapOutOfBounds);
            compute_addr(isa, pos, heap, pointer_type, index, offset, None)
        }
        // 3. General case for dynamic memories:
        //
        //        index + offset + access_size > bound
        //
        //    And we have to handle the overflow case in the left-hand side.
        //
        //    3.a. Dedupe bounds checks with Spectre mitigations.
        ir::HeapStyle::Dynamic { bound_gv } if spectre => {
            let access_size_val = pos.ins().iconst(pointer_type, offset_and_size as i64);
            let adjusted_index =
                pos.ins()
                    .uadd_overflow_trap(index, access_size_val, ir::TrapCode::HeapOutOfBounds);
            let bound = pos.ins().global_value(pointer_type, bound_gv);
            compute_addr(
                isa,
                pos,
                heap,
                pointer_type,
                index,
                offset,
                Some(SpectreOobComparison {
                    cc: IntCC::UnsignedGreaterThan,
                    lhs: adjusted_index,
                    rhs: bound,
                }),
            )
        }
        //    3.b. Emit an explicit `index + offset + access_size > bound`
        //         check.
        ir::HeapStyle::Dynamic { bound_gv } => {
            let access_size_val = pos.ins().iconst(pointer_type, offset_and_size as i64);
            let adjusted_index =
                pos.ins()
                    .uadd_overflow_trap(index, access_size_val, ir::TrapCode::HeapOutOfBounds);
            let bound = pos.ins().global_value(pointer_type, bound_gv);
            let oob = pos
                .ins()
                .icmp(IntCC::UnsignedGreaterThan, adjusted_index, bound);
            pos.ins().trapnz(oob, ir::TrapCode::HeapOutOfBounds);
            compute_addr(isa, pos, heap, pointer_type, index, offset, None)
        }
        // ====== Static Memories ======
        //
        // With static memories we know the size of the heap bound at compile
        // time.
        //
        // 1. First special case: trap immediately if `offset + access_size >
        //    bound`, since we will end up being out-of-bounds regardless of the
        //    given `index`.
        ir::HeapStyle::Static { bound } if offset_and_size > bound.into() => {
            pos.ins().trap(ir::TrapCode::HeapOutOfBounds);
            // Split the block, as the trap is a terminator instruction.
            let curr_block = pos.current_block().expect("Cursor is not in a block");
@@ -175,99 +280,120 @@ fn static_addr(
            pos.insert_block(new_block);
            cfg.recompute_block(pos.func, curr_block);
            cfg.recompute_block(pos.func, new_block);
-        return;
+
            let null = pos.ins().iconst(pointer_type, 0);
            return null;
        }
-    // After the trivial case is done we're now mostly interested in trapping if
+        // 2. Second special case for when we can completely omit explicit
        //    bounds checks for 32-bit static memories.
        //
-    //     index + offset + size > bound
+        //    First, let's rewrite our comparison to move all of the constants
        //    to one side:
        //
-    // We know `bound - offset - access_size` here is non-negative from the
+        //            index + offset + access_size > bound
-    // above comparison, so we can rewrite that as
+        //        ==> index > bound - (offset + access_size)
        //
-    //     index > bound - offset - size
+        //    We know the subtraction on the right-hand side won't wrap because
        //    we didn't hit the first special case.
        //
-    // Additionally, we add our guard pages (if any) to the right-hand side,
+        //    Additionally, we add our guard pages (if any) to the right-hand
-    // since we can rely on the virtual memory subsystem at runtime to catch
+        //    side, since we can rely on the virtual memory subsystem at runtime
-    // out-of-bound accesses within the range `bound .. bound + guard_size`. So
+        //    to catch out-of-bound accesses within the range `bound .. bound +
-    // now we are dealing with
+        //    guard_size`. So now we are dealing with
        //
-    //     index > bound + guard_size - offset - size
+        //        index > bound + guard_size - (offset + access_size)
        //
-    // (Note that `bound + guard_size` cannot overflow for correctly-configured
+        //    Note that `bound + guard_size` cannot overflow for
-    // heaps, as otherwise the heap wouldn't fit in a 64-bit memory space.)
+        //    correctly-configured heaps, as otherwise the heap wouldn't fit in
        //    a 64-bit memory space.
        //
-    // If we know the right-hand side is greater than or equal to 4GiB - 1, aka
+        //    The complement of our should-this-trap comparison expression is
-    // 0xffff_ffff, then with a 32-bit index we're guaranteed:
+        //    the should-this-not-trap comparison expression:
        //
-    //     index <= 0xffff_ffff <= bound + guard_size - offset - access_size
+        //        index <= bound + guard_size - (offset + access_size)
        //
-    // meaning that `index` is always either in bounds or within the guard page
+        //    If we know the right-hand side is greater than or equal to
-    // region, neither of which require emitting an explicit bounds check.
+        //    `u32::MAX`, then
-    assert!(
+        //
-        bound.checked_add(guard_size).is_some(),
+        //        index <= u32::MAX <= bound + guard_size - (offset + access_size)
-        "heap's configuration doesn't fit in a 64-bit memory space"
+        //
-    );
+        //    This expression is always true when the heap is indexed with
-    let mut spectre_oob_comparison = None;
+        //    32-bit integers because `index` cannot be larger than
-    let index = cast_index_to_pointer_ty(index, index_ty, addr_ty, &mut pos);
+        //    `u32::MAX`. This means that `index` is always either in bounds or
-    if index_ty == ir::types::I32
+        //    within the guard page region, neither of which require emitting an
-        && bound + guard_size - offset_plus_size(offset, access_size) >= 0xffff_ffff
+        //    explicit bounds check.
        ir::HeapStyle::Static { bound }
            if index_type == ir::types::I32
                && u64::from(u32::MAX)
                    <= u64::from(bound) + u64::from(guard_size) - offset_and_size =>
        {
-        // Happy path! No bounds checks necessary!
+            compute_addr(isa, pos, heap, pointer_type, index, offset, None)
    } else {
        // Since we have to emit explicit bounds checks anyways, ignore the
        // guard pages and test against the precise limit.
        let limit = bound - offset_plus_size(offset, access_size);
        // Here we want to test the condition `index > limit` and if that's true
        // then this is an out-of-bounds access and needs to trap.
        let oob = pos
            .ins()
            .icmp_imm(IntCC::UnsignedGreaterThan, index, limit as i64);
        trace!("  inserting: {}", pos.func.dfg.display_value_inst(oob));
        let trapnz = pos.ins().trapnz(oob, ir::TrapCode::HeapOutOfBounds);
        trace!("  inserting: {}", pos.func.dfg.display_inst(trapnz));
        if isa.flags().enable_heap_access_spectre_mitigation() {
            let limit = pos.ins().iconst(addr_ty, limit as i64);
            trace!("  inserting: {}", pos.func.dfg.display_value_inst(limit));
            spectre_oob_comparison = Some(SpectreOobComparison {
                cc: IntCC::UnsignedGreaterThan,
                lhs: index,
                rhs: limit,
            });
        }
        }
        // 3. General case for static memories.
        //
        //    We have to explicitly test whether
        //
        //        index > bound - (offset + access_size)
        //
        //    and trap if so.
        //
        //    Since we have to emit explicit bounds checks, we might as well be
        //    precise, not rely on the virtual memory subsystem at all, and not
        //    factor in the guard pages here.
        //
        //    3.a. Dedupe the Spectre mitigation and the explicit bounds check.
        ir::HeapStyle::Static { bound } if spectre => {
            // NB: this subtraction cannot wrap because we didn't hit the first
            // special case.
            let adjusted_bound = u64::from(bound) - offset_and_size;
            let adjusted_bound = pos.ins().iconst(pointer_type, adjusted_bound as i64);
            compute_addr(
                isa,
-        inst,
+                pos,
                heap,
-        addr_ty,
+                pointer_type,
                index,
                offset,
-        pos.func,
+                Some(SpectreOobComparison {
-        spectre_oob_comparison,
+                    cc: IntCC::UnsignedGreaterThan,
-    );
+                    lhs: index,
                    rhs: adjusted_bound,
                }),
            )
        }
        //    3.b. Emit the explicit `index > bound - (offset + access_size)`
        //         check.
        ir::HeapStyle::Static { bound } => {
            // See comment in 3.a. above.
            let adjusted_bound = u64::from(bound) - offset_and_size;
            let oob = pos
                .ins()
                .icmp_imm(IntCC::UnsignedGreaterThan, index, adjusted_bound as i64);
            pos.ins().trapnz(oob, ir::TrapCode::HeapOutOfBounds);
            compute_addr(isa, pos, heap, pointer_type, index, offset, None)
        }
    }
 }
 fn cast_index_to_pointer_ty(
    index: ir::Value,
    index_ty: ir::Type,
-    addr_ty: ir::Type,
+    pointer_ty: ir::Type,
    pos: &mut FuncCursor,
 ) -> ir::Value {
-    if index_ty == addr_ty {
+    if index_ty == pointer_ty {
        return index;
    }
    // Note that using 64-bit heaps on a 32-bit host is not currently supported,
    // would require at least a bounds check here to ensure that the truncation
    // from 64-to-32 bits doesn't lose any upper bits. For now though we're
    // mostly interested in the 32-bit-heaps-on-64-bit-hosts cast.
-    assert!(index_ty.bits() < addr_ty.bits());
+    assert!(index_ty.bits() < pointer_ty.bits());
    // Convert `index` to `addr_ty`.
-    let extended_index = pos.ins().uextend(addr_ty, index);
+    let extended_index = pos.ins().uextend(pointer_ty, index);
    // Add debug value-label alias so that debuginfo can name the extended
    // value as the address
@@ -287,24 +413,22 @@ struct SpectreOobComparison {
    rhs: ir::Value,
 }
-/// Emit code for the base address computation of a `heap_addr` instruction.
+/// Emit code for the base address computation of a `heap_addr` instruction,
 /// without any bounds checks (other than optional Spectre mitigations).
 fn compute_addr(
    isa: &dyn TargetIsa,
-    inst: ir::Inst,
+    pos: &mut FuncCursor,
    heap: ir::Heap,
    addr_ty: ir::Type,
    index: ir::Value,
    offset: u32,
    func: &mut ir::Function,
    // If we are performing Spectre mitigation with conditional selects, the
    // values to compare and the condition code that indicates an out-of bounds
    // condition; on this condition, the conditional move will choose a
    // speculatively safe address (a zero / null pointer) instead.
    spectre_oob_comparison: Option<SpectreOobComparison>,
-) {
+) -> ir::Value {
-    debug_assert_eq!(func.dfg.value_type(index), addr_ty);
+    debug_assert_eq!(pos.func.dfg.value_type(index), addr_ty);
    let mut pos = FuncCursor::new(func).at_inst(inst);
    pos.use_srcloc(inst);
    // Add the heap base address base
    let base = if isa.flags().enable_pinned_reg() && isa.flags().use_pinned_reg_as_heap_base() {
@@ -339,30 +463,26 @@ fn compute_addr(
        let cmp = pos.ins().icmp(cc, lhs, rhs);
        trace!("  inserting: {}", pos.func.dfg.display_value_inst(cmp));
-        let value = pos
+        let value = pos.ins().select_spectre_guard(cmp, zero, final_addr);
            .func
            .dfg
            .replace(inst)
            .select_spectre_guard(cmp, zero, final_addr);
        trace!("  inserting: {}", pos.func.dfg.display_value_inst(value));
        value
    } else if offset == 0 {
-        let addr = pos.func.dfg.replace(inst).iadd(base, index);
+        let addr = pos.ins().iadd(base, index);
        trace!("  inserting: {}", pos.func.dfg.display_value_inst(addr));
        addr
    } else {
        let final_base = pos.ins().iadd(base, index);
        trace!(
            "  inserting: {}",
            pos.func.dfg.display_value_inst(final_base)
        );
-        let addr = pos
+        let addr = pos.ins().iadd_imm(final_base, offset as i64);
            .func
            .dfg
            .replace(inst)
            .iadd_imm(final_base, offset as i64);
        trace!("  inserting: {}", pos.func.dfg.display_value_inst(addr));
        addr
    }
 }
 #[inline]
 fn offset_plus_size(offset: u32, size: u8) -> u64 {
    // Cannot overflow because we are widening to `u64`.
    offset as u64 + size as u64
--- a/cranelift/codegen/src/legalizer/mod.rs
+++ b/cranelift/codegen/src/legalizer/mod.rs
@@ -26,7 +26,7 @@ mod heap;
 mod table;
 use self::globalvalue::expand_global_value;
-use self::heap::expand_heap_addr;
+use self::heap::{expand_heap_addr, expand_heap_load, expand_heap_store};
 use self::table::expand_table_addr;
 fn imm_const(pos: &mut FuncCursor, arg: Value, imm: Imm64, is_signed: bool) -> Value {
@@ -78,6 +78,16 @@ pub fn simple_legalize(func: &mut ir::Function, cfg: &mut ControlFlowGraph, isa:
                    offset,
                    size,
                } => expand_heap_addr(inst, &mut pos.func, cfg, isa, heap, arg, offset, size),
                InstructionData::HeapLoad {
                    opcode: ir::Opcode::HeapLoad,
                    heap_imm,
                    arg,
                } => expand_heap_load(inst, &mut pos.func, cfg, isa, heap_imm, arg),
                InstructionData::HeapStore {
                    opcode: ir::Opcode::HeapStore,
                    heap_imm,
                    args,
                } => expand_heap_store(inst, &mut pos.func, cfg, isa, heap_imm, args[0], args[1]),
                InstructionData::StackLoad {
                    opcode: ir::Opcode::StackLoad,
                    stack_slot,
--- a/cranelift/filetests/filetests/isa/aarch64/heap_addr.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/heap_addr.clif
@@ -35,18 +35,12 @@ block0(v0: i64, v1: i32):
 ; block0:
 ;   mov w6, w1
 ;   add x7, x0, x1, UXTW
 ;   movz x5, #0
 ;   subs xzr, x6, #65536
-;   b.ls label1 ; b label2
+;   csel x0, x5, x7, hi
 ; block1:
 ;   add x8, x0, x1, UXTW
 ;   movz x7, #0
 ;   subs xzr, x6, #65536
 ;   csel x0, x7, x8, hi
 ;   csdb
 ;   ret
 ; block2:
 ;   udf #0xc11f
 function %dynamic_heap_check_with_offset(i64 vmctx, i32) -> i64 {
    gv0 = vmctx
@@ -59,16 +53,16 @@ block0(v0: i64, v1: i32):
 }
 ; block0:
-;   mov w11, w1
+;   mov w10, w1
 ;   ldr x10, [x0]
 ;   movz x9, #24
-;   adds x11, x11, x9
+;   adds x11, x10, x9
 ;   b.lo 8 ; udf
-;   add x12, x0, x1, UXTW
+;   ldr x12, [x0]
-;   add x12, x12, #16
+;   add x13, x0, x1, UXTW
-;   movz x13, #0
+;   add x13, x13, #16
-;   subs xzr, x11, x10
+;   movz x10, #0
-;   csel x0, x13, x12, hi
+;   subs xzr, x11, x12
 ;   csel x0, x10, x13, hi
 ;   csdb
 ;   ret
@@ -82,19 +76,13 @@ block0(v0: i64, v1: i32):
 }
 ; block0:
-;   mov w9, w1
+;   mov w8, w1
-;   movz w10, #65512
+;   add x9, x0, x1, UXTW
-;   subs xzr, x9, x10
+;   add x9, x9, #16
-;   b.ls label1 ; b label2
+;   movz w6, #65512
-; block1:
+;   movz x10, #0
-;   add x11, x0, x1, UXTW
+;   subs xzr, x8, x6
-;   add x11, x11, #16
+;   csel x0, x10, x9, hi
 ;   movz w10, #65512
 ;   movz x12, #0
 ;   subs xzr, x9, x10
 ;   csel x0, x12, x11, hi
 ;   csdb
 ;   ret
 ; block2:
 ;   udf #0xc11f
--- a/cranelift/filetests/filetests/isa/riscv64/heap-addr.clif
+++ b/cranelift/filetests/filetests/isa/riscv64/heap-addr.clif
@@ -32,19 +32,13 @@ block0(v0: i64, v1: i32):
 }
 ; block0:
-;   uext.w a7,a1
+;   uext.w a6,a1
-;   lui a6,16
+;   add a5,a0,a6
-;   ule t3,a7,a6##ty=i64
+;   lui a3,16
-;   bne t3,zero,taken(label1),not_taken(label2)
+;   ugt a6,a6,a3##ty=i64
-; block1:
+;   li a7,0
-;   add t3,a0,a7
+;   selectif_spectre_guard a0,a7,a5##test=a6
 ;   lui a6,16
 ;   ugt t4,a7,a6##ty=i64
 ;   li t0,0
 ;   selectif_spectre_guard a0,t0,t3##test=t4
 ;   ret
 ; block2:
 ;   udf##trap_code=heap_oob
 function %dynamic_heap_check_with_offset(i64 vmctx, i32) -> i64 {
    gv0 = vmctx
@@ -57,17 +51,17 @@ block0(v0: i64, v1: i32):
 }
 ; block0:
-;   uext.w t0,a1
+;   uext.w t4,a1
 ;   ld t4,0(a0)
 ;   li a7,24
-;   add t1,t0,a7
+;   add t0,t4,a7
-;   ult t2,t1,t0##ty=i64
+;   ult t1,t0,t4##ty=i64
-;   trap_if t2,heap_oob
+;   trap_if t1,heap_oob
-;   add t0,a0,t0
+;   ld t1,0(a0)
-;   addi t0,t0,16
+;   add t2,a0,t4
-;   ugt t4,t1,t4##ty=i64
+;   addi t2,t2,16
 ;   ugt t4,t0,t1##ty=i64
 ;   li t1,0
-;   selectif_spectre_guard a0,t1,t0##test=t4
+;   selectif_spectre_guard a0,t1,t2##test=t4
 ;   ret
 function %static_heap_check_with_offset(i64 vmctx, i32) -> i64 {
@@ -80,20 +74,13 @@ block0(v0: i64, v1: i32):
 }
 ; block0:
-;   uext.w t3,a1
+;   uext.w a6,a1
-;   lui a7,16
+;   add a7,a0,a6
-;   addi a7,a7,4072
+;   addi a7,a7,16
-;   ule t0,t3,a7##ty=i64
+;   lui a4,16
-;   bne t0,zero,taken(label1),not_taken(label2)
+;   addi a4,a4,4072
-; block1:
+;   ugt t3,a6,a4##ty=i64
-;   add t0,a0,t3
+;   li t4,0
-;   addi t0,t0,16
+;   selectif_spectre_guard a0,t4,a7##test=t3
 ;   lui t4,16
 ;   addi t4,t4,4072
 ;   ugt t1,t3,t4##ty=i64
 ;   li a0,0
 ;   selectif_spectre_guard a0,a0,t0##test=t1
 ;   ret
 ; block2:
 ;   udf##trap_code=heap_oob
--- a/cranelift/filetests/filetests/isa/s390x/heap_addr.clif
+++ b/cranelift/filetests/filetests/isa/s390x/heap_addr.clif
@@ -32,16 +32,11 @@ block0(v0: i64, v1: i32):
 ; block0:
 ;   llgfr %r4, %r3
 ;   clgfi %r4, 65536
 ;   jgnh label1 ; jg label2
 ; block1:
 ;   agr %r2, %r4
-;   lghi %r5, 0
+;   lghi %r3, 0
 ;   clgfi %r4, 65536
-;   locgrh %r2, %r5
+;   locgrh %r2, %r3
 ;   br %r14
 ; block2:
 ;   trap
 function %dynamic_heap_check_with_offset(i64 vmctx, i32) -> i64 {
    gv0 = vmctx
@@ -53,19 +48,17 @@ block0(v0: i64, v1: i32):
    return v2
 }
 ;   stmg %r7, %r15, 56(%r15)
 ; block0:
-;   llgfr %r7, %r3
+;   llgfr %r5, %r3
-;   lg %r4, 0(%r2)
+;   lghi %r4, 24
-;   lghi %r5, 24
+;   algfr %r4, %r3
 ;   algfr %r5, %r3
 ;   jle 6 ; trap
-;   agr %r2, %r7
+;   lg %r3, 0(%r2)
-;   aghi %r2, 16
+;   agrk %r5, %r2, %r5
-;   lghi %r3, 0
+;   aghik %r2, %r5, 16
-;   clgr %r5, %r4
+;   lghi %r5, 0
-;   locgrh %r2, %r3
+;   clgr %r4, %r3
-;   lmg %r7, %r15, 56(%r15)
+;   locgrh %r2, %r5
 ;   br %r14
 function %static_heap_check_with_offset(i64 vmctx, i32) -> i64 {
@@ -79,15 +72,10 @@ block0(v0: i64, v1: i32):
 ; block0:
 ;   llgfr %r5, %r3
 ;   agr %r2, %r5
 ;   aghi %r2, 16
 ;   lghi %r4, 0
 ;   clgfi %r5, 65512
-;   jgnh label1 ; jg label2
+;   locgrh %r2, %r4
 ; block1:
 ;   agrk %r3, %r2, %r5
 ;   aghik %r2, %r3, 16
 ;   lghi %r3, 0
 ;   clgfi %r5, 65512
 ;   locgrh %r2, %r3
 ;   br %r14
 ; block2:
 ;   trap
--- a/cranelift/filetests/filetests/isa/x64/heap-no-spectre.clif
+++ b/cranelift/filetests/filetests/isa/x64/heap-no-spectre.clif
@@ -20,11 +20,11 @@ block0(v0: i32, v1: i64):
 ;   movq    %rsp, %rbp
 ; block0:
 ;   movl    %edi, %eax
-;   movq    8(%rsi), %r10
+;   movq    %rax, %r10
-;   movq    %rax, %r11
+;   addq    %r10, $32768, %r10
 ;   addq    %r11, $32768, %r11
 ;   jnb ; ud2 heap_oob ;
-;   cmpq    %r10, %r11
+;   movq    8(%rsi), %r11
 ;   cmpq    %r11, %r10
 ;   jbe     label1; j label2
 ; block1:
 ;   addq    %rax, 0(%rsi), %rax
--- a/cranelift/filetests/filetests/isa/x64/heap.clif
+++ b/cranelift/filetests/filetests/isa/x64/heap.clif
@@ -33,14 +33,14 @@ block0(v0: i32, v1: i64):
 ;   movq    %rsp, %rbp
 ; block0:
 ;   movl    %edi, %eax
-;   movq    8(%rsi), %rdi
+;   movq    %rax, %rdi
-;   movq    %rax, %rcx
+;   addq    %rdi, $32768, %rdi
 ;   addq    %rcx, $32768, %rcx
 ;   jnb ; ud2 heap_oob ;
 ;   movq    8(%rsi), %rcx
 ;   addq    %rax, 0(%rsi), %rax
 ;   addq    %rax, $32768, %rax
 ;   xorq    %rsi, %rsi, %rsi
-;   cmpq    %rdi, %rcx
+;   cmpq    %rcx, %rdi
 ;   cmovnbeq %rsi, %rax, %rax
 ;   movq    %rbp, %rsp
 ;   popq    %rbp
@@ -64,19 +64,14 @@ block0(v0: i64, v1: i32):
 ;   movq    %rsp, %rbp
 ; block0:
 ;   movl    %esi, %r9d
 ;   cmpq    $4096, %r9
 ;   jbe     label1; j label2
 ; block1:
 ;   movq    %r9, %rax
 ;   addq    %rax, 0(%rdi), %rax
-;   xorq    %r10, %r10, %r10
+;   xorq    %r8, %r8, %r8
 ;   cmpq    $4096, %r9
-;   cmovnbeq %r10, %rax, %rax
+;   cmovnbeq %r8, %rax, %rax
 ;   movq    %rbp, %rsp
 ;   popq    %rbp
 ;   ret
 ; block2:
 ;   ud2 heap_oob
 ;; When a static memory is the "right" size (4GB memory, 2GB guard regions), the
 ;; Spectre mitigation is not present. Cranelift relies on the memory permissions
@@ -113,17 +108,16 @@ block0(v0: i64, v1: i32):
 ;   pushq   %rbp
 ;   movq    %rsp, %rbp
 ; block0:
-;   movq    %rdi, %rax
+;   movl    %esi, %esi
-;   movl    %esi, %edi
+;   movq    %rsi, %r11
-;   movq    0(%rax), %rsi
+;   addq    %r11, $24, %r11
 ;   movq    %rdi, %rcx
 ;   addq    %rcx, $24, %rcx
 ;   jnb ; ud2 heap_oob ;
-;   addq    %rax, %rdi, %rax
+;   movq    %rdi, %rax
 ;   addq    %rax, %rsi, %rax
 ;   addq    %rax, $16, %rax
-;   xorq    %rdi, %rdi, %rdi
+;   xorq    %rsi, %rsi, %rsi
-;   cmpq    %rsi, %rcx
+;   cmpq    0(%rdi), %r11
-;   cmovnbeq %rdi, %rax, %rax
+;   cmovnbeq %rsi, %rax, %rax
 ;   movq    %rbp, %rsp
 ;   popq    %rbp
 ;   ret
@@ -141,18 +135,13 @@ block0(v0: i64, v1: i32):
 ;   movq    %rsp, %rbp
 ; block0:
 ;   movl    %esi, %r10d
 ;   cmpq    $65512, %r10
 ;   jbe     label1; j label2
 ; block1:
 ;   movq    %rdi, %rax
 ;   addq    %rax, %r10, %rax
 ;   addq    %rax, $16, %rax
-;   xorq    %r11, %r11, %r11
+;   xorq    %r9, %r9, %r9
 ;   cmpq    $65512, %r10
-;   cmovnbeq %r11, %rax, %rax
+;   cmovnbeq %r9, %rax, %rax
 ;   movq    %rbp, %rsp
 ;   popq    %rbp
 ;   ret
 ; block2:
 ;   ud2 heap_oob
--- a/cranelift/filetests/filetests/legalizer/bounds-checks.clif
+++ b/cranelift/filetests/filetests/legalizer/bounds-checks.clif
@@ -20,13 +20,14 @@ block0(v0: i64, v1: i32):
 ; check:  block0(v0: i64, v1: i32):
 ; nextln:     v4 = uextend.i64 v1
-; nextln:     v5 = load.i64 notrap aligned v0+88
+; nextln:     v5 = iconst.i64 4
-; nextln:     v6 = iconst.i64 4
+; nextln:     v6 = uadd_overflow_trap v4, v5, heap_oob  ; v5 = 4
-; nextln:     v7 = uadd_overflow_trap v4, v6, heap_oob  ; v6 = 4
+; nextln:     v7 = load.i64 notrap aligned v0+88
 ; nextln:     v8 = load.i64 notrap aligned v0+80
 ; nextln:     v9 = iadd v8, v4
 ; nextln:     v10 = iconst.i64 0
-; nextln:     v11 = icmp ugt v7, v5
+; nextln:     v11 = icmp ugt v6, v7
-; nextln:     v2 = select_spectre_guard v11, v10, v9  ; v10 = 0
+; nextln:     v12 = select_spectre_guard v11, v10, v9  ; v10 = 0
 ; nextln:     v2 -> v12
 ; nextln:     v3 = load.i32 little heap v2
 ; nextln:     return v3
--- a/cranelift/filetests/filetests/legalizer/static-heap-with-guard-pages.clif
+++ b/cranelift/filetests/filetests/legalizer/static-heap-with-guard-pages.clif
@@ -17,5 +17,6 @@ block0(v0: i64, v1: i32):
 ; check:  block0(v0: i64, v1: i32):
 ; nextln:     v3 = uextend.i64 v1
 ; nextln:     v4 = load.i64 notrap aligned v0
-; nextln:     v2 = iadd v4, v3
+; nextln:     v5 = iadd v4, v3
 ; nextln:     v2 -> v5
 ; nextln:     return v2
--- a/cranelift/filetests/filetests/legalizer/static-heap-without-guard-pages.clif
+++ b/cranelift/filetests/filetests/legalizer/static-heap-without-guard-pages.clif
@@ -3,6 +3,8 @@ set enable_heap_access_spectre_mitigation=true
 target x86_64
 ;; The offset guard is not large enough to avoid explicit bounds checks.
 ;; Additionally, the explicit bounds check gets deduped with the Spectre
 ;; mitigation.
 function %test(i64 vmctx, i32) -> i64 {
    gv0 = vmctx
@@ -16,19 +18,11 @@ block0(v0: i64, v1: i32):
 ; check:  block0(v0: i64, v1: i32):
 ; nextln:     v3 = uextend.i64 v1
-; nextln:     v10 = iconst.i64 4092
+; nextln:     v4 = iconst.i64 4092
-; nextln:     v4 = icmp ugt v3, v10  ; v10 = 4092
+; nextln:     v5 = load.i64 notrap aligned v0
-; nextln:     brz v4, block2
+; nextln:     v6 = iadd v5, v3
-; nextln:     jump block1
+; nextln:     v7 = iconst.i64 0
-; nextln: 
+; nextln:     v8 = icmp ugt v3, v4  ; v4 = 4092
-; nextln: block1:
+; nextln:     v9 = select_spectre_guard v8, v7, v6  ; v7 = 0
-; nextln:     trap heap_oob
+; nextln:     v2 -> v9
 ; nextln: 
 ; nextln: block2:
 ; nextln:     v5 = iconst.i64 4092
 ; nextln:     v6 = load.i64 notrap aligned v0
 ; nextln:     v7 = iadd v6, v3
 ; nextln:     v8 = iconst.i64 0
 ; nextln:     v9 = icmp.i64 ugt v3, v5  ; v5 = 4092
 ; nextln:     v2 = select_spectre_guard v9, v8, v7  ; v8 = 0
 ; nextln:     return v2
--- a/cranelift/filetests/filetests/runtests/heap_load_store_dynamic_i32_no_guards_no_spectre.clif
+++ b/cranelift/filetests/filetests/runtests/heap_load_store_dynamic_i32_no_guards_no_spectre.clif
@@ -1,14 +1,15 @@
 ;; !!! GENERATED BY 'make-heap-load-store-tests.sh' DO NOT EDIT !!!
 test interpret
-;; test run
+test run
 ;; target x86_64
 ;; target s390x
 ;; target aarch64
 ;; target riscv64
 set enable_heap_access_spectre_mitigation=false
 target x86_64
 target s390x
 target aarch64
 target riscv64
 function %do_store(i64 vmctx, i32, i32) {
    gv0 = vmctx
    gv1 = load.i64 notrap aligned gv0+0
--- a/cranelift/filetests/filetests/runtests/heap_load_store_dynamic_i32_no_guards_yes_spectre.clif
+++ b/cranelift/filetests/filetests/runtests/heap_load_store_dynamic_i32_no_guards_yes_spectre.clif
@@ -1,14 +1,15 @@
 ;; !!! GENERATED BY 'make-heap-load-store-tests.sh' DO NOT EDIT !!!
 test interpret
-;; test run
+test run
 ;; target x86_64
 ;; target s390x
 ;; target aarch64
 ;; target riscv64
 set enable_heap_access_spectre_mitigation=true
 target x86_64
 target s390x
 target aarch64
 target riscv64
 function %do_store(i64 vmctx, i32, i32) {
    gv0 = vmctx
    gv1 = load.i64 notrap aligned gv0+0
--- a/cranelift/filetests/filetests/runtests/heap_load_store_dynamic_i32_yes_guards_no_spectre.clif
+++ b/cranelift/filetests/filetests/runtests/heap_load_store_dynamic_i32_yes_guards_no_spectre.clif
@@ -1,14 +1,15 @@
 ;; !!! GENERATED BY 'make-heap-load-store-tests.sh' DO NOT EDIT !!!
 test interpret
-;; test run
+test run
 ;; target x86_64
 ;; target s390x
 ;; target aarch64
 ;; target riscv64
 set enable_heap_access_spectre_mitigation=false
 target x86_64
 target s390x
 target aarch64
 target riscv64
 function %do_store(i64 vmctx, i32, i32) {
    gv0 = vmctx
    gv1 = load.i64 notrap aligned gv0+0
--- a/cranelift/filetests/filetests/runtests/heap_load_store_dynamic_i32_yes_guards_yes_spectre.clif
+++ b/cranelift/filetests/filetests/runtests/heap_load_store_dynamic_i32_yes_guards_yes_spectre.clif
@@ -1,14 +1,15 @@
 ;; !!! GENERATED BY 'make-heap-load-store-tests.sh' DO NOT EDIT !!!
 test interpret
-;; test run
+test run
 ;; target x86_64
 ;; target s390x
 ;; target aarch64
 ;; target riscv64
 set enable_heap_access_spectre_mitigation=true
 target x86_64
 target s390x
 target aarch64
 target riscv64
 function %do_store(i64 vmctx, i32, i32) {
    gv0 = vmctx
    gv1 = load.i64 notrap aligned gv0+0
--- a/cranelift/filetests/filetests/runtests/heap_load_store_dynamic_i64_no_guards_no_spectre.clif
+++ b/cranelift/filetests/filetests/runtests/heap_load_store_dynamic_i64_no_guards_no_spectre.clif
@@ -1,14 +1,15 @@
 ;; !!! GENERATED BY 'make-heap-load-store-tests.sh' DO NOT EDIT !!!
 test interpret
-;; test run
+test run
 ;; target x86_64
 ;; target s390x
 ;; target aarch64
 ;; target riscv64
 set enable_heap_access_spectre_mitigation=false
 target x86_64
 target s390x
 target aarch64
 target riscv64
 function %do_store(i64 vmctx, i64, i32) {
    gv0 = vmctx
    gv1 = load.i64 notrap aligned gv0+0
--- a/cranelift/filetests/filetests/runtests/heap_load_store_dynamic_i64_no_guards_yes_spectre.clif
+++ b/cranelift/filetests/filetests/runtests/heap_load_store_dynamic_i64_no_guards_yes_spectre.clif
@@ -1,14 +1,15 @@
 ;; !!! GENERATED BY 'make-heap-load-store-tests.sh' DO NOT EDIT !!!
 test interpret
-;; test run
+test run
 ;; target x86_64
 ;; target s390x
 ;; target aarch64
 ;; target riscv64
 set enable_heap_access_spectre_mitigation=true
 target x86_64
 target s390x
 target aarch64
 target riscv64
 function %do_store(i64 vmctx, i64, i32) {
    gv0 = vmctx
    gv1 = load.i64 notrap aligned gv0+0
--- a/cranelift/filetests/filetests/runtests/heap_load_store_dynamic_i64_yes_guards_no_spectre.clif
+++ b/cranelift/filetests/filetests/runtests/heap_load_store_dynamic_i64_yes_guards_no_spectre.clif
@@ -1,14 +1,15 @@
 ;; !!! GENERATED BY 'make-heap-load-store-tests.sh' DO NOT EDIT !!!
 test interpret
-;; test run
+test run
 ;; target x86_64
 ;; target s390x
 ;; target aarch64
 ;; target riscv64
 set enable_heap_access_spectre_mitigation=false
 target x86_64
 target s390x
 target aarch64
 target riscv64
 function %do_store(i64 vmctx, i64, i32) {
    gv0 = vmctx
    gv1 = load.i64 notrap aligned gv0+0
--- a/cranelift/filetests/filetests/runtests/heap_load_store_dynamic_i64_yes_guards_yes_spectre.clif
+++ b/cranelift/filetests/filetests/runtests/heap_load_store_dynamic_i64_yes_guards_yes_spectre.clif
@@ -1,14 +1,15 @@
 ;; !!! GENERATED BY 'make-heap-load-store-tests.sh' DO NOT EDIT !!!
 test interpret
-;; test run
+test run
 ;; target x86_64
 ;; target s390x
 ;; target aarch64
 ;; target riscv64
 set enable_heap_access_spectre_mitigation=true
 target x86_64
 target s390x
 target aarch64
 target riscv64
 function %do_store(i64 vmctx, i64, i32) {
    gv0 = vmctx
    gv1 = load.i64 notrap aligned gv0+0
--- a/cranelift/filetests/filetests/runtests/heap_load_store_static_i32_no_guards_no_spectre.clif
+++ b/cranelift/filetests/filetests/runtests/heap_load_store_static_i32_no_guards_no_spectre.clif
@@ -1,14 +1,15 @@
 ;; !!! GENERATED BY 'make-heap-load-store-tests.sh' DO NOT EDIT !!!
 test interpret
-;; test run
+test run
 ;; target x86_64
 ;; target s390x
 ;; target aarch64
 ;; target riscv64
 set enable_heap_access_spectre_mitigation=false
 target x86_64
 target s390x
 target aarch64
 target riscv64
 function %do_store(i64 vmctx, i32, i32) {
    gv0 = vmctx
    gv1 = load.i64 notrap aligned gv0+0
--- a/cranelift/filetests/filetests/runtests/heap_load_store_static_i32_no_guards_yes_spectre.clif
+++ b/cranelift/filetests/filetests/runtests/heap_load_store_static_i32_no_guards_yes_spectre.clif
@@ -1,14 +1,15 @@
 ;; !!! GENERATED BY 'make-heap-load-store-tests.sh' DO NOT EDIT !!!
 test interpret
-;; test run
+test run
 ;; target x86_64
 ;; target s390x
 ;; target aarch64
 ;; target riscv64
 set enable_heap_access_spectre_mitigation=true
 target x86_64
 target s390x
 target aarch64
 target riscv64
 function %do_store(i64 vmctx, i32, i32) {
    gv0 = vmctx
    gv1 = load.i64 notrap aligned gv0+0
--- a/cranelift/filetests/filetests/runtests/heap_load_store_static_i32_yes_guards_no_spectre.clif
+++ b/cranelift/filetests/filetests/runtests/heap_load_store_static_i32_yes_guards_no_spectre.clif
@@ -1,14 +1,15 @@
 ;; !!! GENERATED BY 'make-heap-load-store-tests.sh' DO NOT EDIT !!!
 test interpret
-;; test run
+test run
 ;; target x86_64
 ;; target s390x
 ;; target aarch64
 ;; target riscv64
 set enable_heap_access_spectre_mitigation=false
 target x86_64
 target s390x
 target aarch64
 target riscv64
 function %do_store(i64 vmctx, i32, i32) {
    gv0 = vmctx
    gv1 = load.i64 notrap aligned gv0+0
--- a/cranelift/filetests/filetests/runtests/heap_load_store_static_i32_yes_guards_yes_spectre.clif
+++ b/cranelift/filetests/filetests/runtests/heap_load_store_static_i32_yes_guards_yes_spectre.clif
@@ -1,14 +1,15 @@
 ;; !!! GENERATED BY 'make-heap-load-store-tests.sh' DO NOT EDIT !!!
 test interpret
-;; test run
+test run
 ;; target x86_64
 ;; target s390x
 ;; target aarch64
 ;; target riscv64
 set enable_heap_access_spectre_mitigation=true
 target x86_64
 target s390x
 target aarch64
 target riscv64
 function %do_store(i64 vmctx, i32, i32) {
    gv0 = vmctx
    gv1 = load.i64 notrap aligned gv0+0
--- a/cranelift/filetests/filetests/runtests/heap_load_store_static_i64_no_guards_no_spectre.clif
+++ b/cranelift/filetests/filetests/runtests/heap_load_store_static_i64_no_guards_no_spectre.clif
@@ -1,14 +1,15 @@
 ;; !!! GENERATED BY 'make-heap-load-store-tests.sh' DO NOT EDIT !!!
 test interpret
-;; test run
+test run
 ;; target x86_64
 ;; target s390x
 ;; target aarch64
 ;; target riscv64
 set enable_heap_access_spectre_mitigation=false
 target x86_64
 target s390x
 target aarch64
 target riscv64
 function %do_store(i64 vmctx, i64, i32) {
    gv0 = vmctx
    gv1 = load.i64 notrap aligned gv0+0
--- a/cranelift/filetests/filetests/runtests/heap_load_store_static_i64_no_guards_yes_spectre.clif
+++ b/cranelift/filetests/filetests/runtests/heap_load_store_static_i64_no_guards_yes_spectre.clif
@@ -1,14 +1,15 @@
 ;; !!! GENERATED BY 'make-heap-load-store-tests.sh' DO NOT EDIT !!!
 test interpret
-;; test run
+test run
 ;; target x86_64
 ;; target s390x
 ;; target aarch64
 ;; target riscv64
 set enable_heap_access_spectre_mitigation=true
 target x86_64
 target s390x
 target aarch64
 target riscv64
 function %do_store(i64 vmctx, i64, i32) {
    gv0 = vmctx
    gv1 = load.i64 notrap aligned gv0+0
--- a/cranelift/filetests/filetests/runtests/heap_load_store_static_i64_yes_guards_no_spectre.clif
+++ b/cranelift/filetests/filetests/runtests/heap_load_store_static_i64_yes_guards_no_spectre.clif
@@ -1,14 +1,15 @@
 ;; !!! GENERATED BY 'make-heap-load-store-tests.sh' DO NOT EDIT !!!
 test interpret
-;; test run
+test run
 ;; target x86_64
 ;; target s390x
 ;; target aarch64
 ;; target riscv64
 set enable_heap_access_spectre_mitigation=false
 target x86_64
 target s390x
 target aarch64
 target riscv64
 function %do_store(i64 vmctx, i64, i32) {
    gv0 = vmctx
    gv1 = load.i64 notrap aligned gv0+0
--- a/cranelift/filetests/filetests/runtests/heap_load_store_static_i64_yes_guards_yes_spectre.clif
+++ b/cranelift/filetests/filetests/runtests/heap_load_store_static_i64_yes_guards_yes_spectre.clif
@@ -1,14 +1,15 @@
 ;; !!! GENERATED BY 'make-heap-load-store-tests.sh' DO NOT EDIT !!!
 test interpret
-;; test run
+test run
 ;; target x86_64
 ;; target s390x
 ;; target aarch64
 ;; target riscv64
 set enable_heap_access_spectre_mitigation=true
 target x86_64
 target s390x
 target aarch64
 target riscv64
 function %do_store(i64 vmctx, i64, i32) {
    gv0 = vmctx
    gv1 = load.i64 notrap aligned gv0+0
--- a/cranelift/filetests/filetests/runtests/make-heap-load-store-tests.sh
+++ b/cranelift/filetests/filetests/runtests/make-heap-load-store-tests.sh
@@ -39,14 +39,15 @@ function generate_one_test() {
 ;; !!! GENERATED BY 'make-heap-load-store-tests.sh' DO NOT EDIT !!!
 test interpret
-;; test run
+test run
 ;; target x86_64
 ;; target s390x
 ;; target aarch64
 ;; target riscv64
 set enable_heap_access_spectre_mitigation=${enable_spectre}
 target x86_64
 target s390x
 target aarch64
 target riscv64
 function %do_store(i64 vmctx, ${index_type}, i32) {
    gv0 = vmctx
    gv1 = load.i64 notrap aligned gv0+0