Cranelift: Make heap_addr return calculated base + index + offset (#5231)

* Cranelift: Make `heap_addr` return calculated `base + index + offset` Rather than return just the `base + index`. (Note: I've chosen to use the nomenclature "index" for the dynamic operand and "offset" for the static immediate.) This move the addition of the `offset` into `heap_addr`, instead of leaving it for the subsequent memory operation, so that we can Spectre-guard the full address, and not allow speculative execution to read the first 4GiB of memory. Before this commit, we were effectively doing load(spectre_guard(base + index) + offset) Now we are effectively doing load(spectre_guard(base + index + offset)) Finally, this also corrects `heap_addr`'s documented semantics to say that it returns an address that will trap on access if `index + offset + access_size` is out of bounds for the given heap, rather than saying that the `heap_addr` itself will trap. This matches the implemented behavior for static memories, and after https://github.com/bytecodealliance/wasmtime/pull/5190 lands (which is blocked on this commit) will also match the implemented behavior for dynamic memories. * Update heap_addr docs * Factor out `offset + size` to a helper
2022-11-09 11:53:51 -08:00
parent 33a192556e
commit fc62d4ad65
39 changed files with 563 additions and 284 deletions
--- a/cranelift/codegen/meta/src/cdsl/formats.rs
+++ b/cranelift/codegen/meta/src/cdsl/formats.rs
@@ -116,6 +116,15 @@ impl InstructionFormatBuilder {
        self
    }

+    pub fn imm_with_name(mut self, name: &'static str, operand_kind: &OperandKind) -> Self {
+        let field = FormatField {
+            kind: operand_kind.clone(),
+            member: name,
+        };
+        self.0.imm_fields.push(field);
+        self
+    }
+
    pub fn typevar_operand(mut self, operand_index: usize) -> Self {
        assert!(self.0.typevar_operand.is_none());
        assert!(operand_index < self.0.num_value_operands);
--- a/cranelift/codegen/meta/src/shared/formats.rs
+++ b/cranelift/codegen/meta/src/shared/formats.rs
@@ -202,7 +202,8 @@ impl Formats {
            heap_addr: Builder::new("HeapAddr")
                .imm(&entities.heap)
                .value()
-                .imm(&imm.uimm32)
+                .imm_with_name("offset", &imm.uimm32)
+                .imm_with_name("size", &imm.uimm8)
                .build(),

            // Accessing a WebAssembly table.
--- a/cranelift/codegen/meta/src/shared/instructions.rs
+++ b/cranelift/codegen/meta/src/shared/instructions.rs
@@ -1128,26 +1128,30 @@ pub(crate) fn define(
    );

    let H = &Operand::new("H", &entities.heap);
-    let p = &Operand::new("p", HeapOffset);
-    let Size = &Operand::new("Size", &imm.uimm32).with_doc("Size in bytes");
+    let index = &Operand::new("index", HeapOffset);
+    let Offset = &Operand::new("Offset", &imm.uimm32).with_doc("Static offset immediate in bytes");
+    let Size = &Operand::new("Size", &imm.uimm8).with_doc("Static size immediate in bytes");

    ig.push(
        Inst::new(
            "heap_addr",
            r#"
-        Bounds check and compute absolute address of heap memory.
+        Bounds check and compute absolute address of ``index + Offset`` in heap memory.

-        Verify that the offset range ``p .. p + Size - 1`` is in bounds for the
-        heap H, and generate an absolute address that is safe to dereference.
+        Verify that the range ``index .. index + Offset + Size`` is in bounds for the
+        heap ``H``, and generate an absolute address that is safe to dereference.

-        1. If ``p + Size`` is not greater than the heap bound, return an
-           absolute address corresponding to a byte offset of ``p`` from the
+        1. If ``index + Offset + Size`` is less than or equal ot the heap bound, return an
+           absolute address corresponding to a byte offset of ``index + Offset`` from the
           heap's base address.
-        2. If ``p + Size`` is greater than the heap bound, generate a trap.
+
+        2. If ``index + Offset + Size`` is greater than the heap bound, return the
+           ``NULL`` pointer or any other address that is guaranteed to generate a trap
+           when accessed.
        "#,
            &formats.heap_addr,
        )
-        .operands_in(vec![H, p, Size])
+        .operands_in(vec![H, index, Offset, Size])
        .operands_out(vec![addr]),
    );

--- a/cranelift/codegen/src/legalizer/heap.rs
+++ b/cranelift/codegen/src/legalizer/heap.rs
@@ -6,7 +6,7 @@
 use crate::cursor::{Cursor, FuncCursor};
 use crate::flowgraph::ControlFlowGraph;
 use crate::ir::condcodes::IntCC;
-use crate::ir::immediates::Uimm32;
+use crate::ir::immediates::{Uimm32, Uimm8};
 use crate::ir::{self, InstBuilder, RelSourceLoc};
 use crate::isa::TargetIsa;

@@ -17,16 +17,18 @@ pub fn expand_heap_addr(
    cfg: &mut ControlFlowGraph,
    isa: &dyn TargetIsa,
    heap: ir::Heap,
-    offset: ir::Value,
-    access_size: Uimm32,
+    index_operand: ir::Value,
+    offset_immediate: Uimm32,
+    access_size: Uimm8,
 ) {
    match func.heaps[heap].style {
        ir::HeapStyle::Dynamic { bound_gv } => dynamic_addr(
            isa,
            inst,
            heap,
-            offset,
-            u64::from(access_size),
+            index_operand,
+            u32::from(offset_immediate),
+            u8::from(access_size),
            bound_gv,
            func,
        ),
@@ -34,8 +36,9 @@ pub fn expand_heap_addr(
            isa,
            inst,
            heap,
-            offset,
-            u64::from(access_size),
+            index_operand,
+            u32::from(offset_immediate),
+            u8::from(access_size),
            bound.into(),
            func,
            cfg,
@@ -48,35 +51,40 @@ fn dynamic_addr(
    isa: &dyn TargetIsa,
    inst: ir::Inst,
    heap: ir::Heap,
-    offset: ir::Value,
-    access_size: u64,
+    index: ir::Value,
+    offset: u32,
+    access_size: u8,
    bound_gv: ir::GlobalValue,
    func: &mut ir::Function,
 ) {
-    let offset_ty = func.dfg.value_type(offset);
+    let index_ty = func.dfg.value_type(index);
    let addr_ty = func.dfg.value_type(func.dfg.first_result(inst));
    let min_size = func.heaps[heap].min_size.into();
    let mut pos = FuncCursor::new(func).at_inst(inst);
    pos.use_srcloc(inst);

-    let offset = cast_offset_to_pointer_ty(offset, offset_ty, addr_ty, &mut pos);
+    let index = cast_index_to_pointer_ty(index, index_ty, addr_ty, &mut pos);

-    // Start with the bounds check. Trap if `offset + access_size > bound`.
+    // Start with the bounds check. Trap if `index + offset + access_size > bound`.
    let bound = pos.ins().global_value(addr_ty, bound_gv);
-    let (cc, lhs, bound) = if access_size == 1 {
-        // `offset > bound - 1` is the same as `offset >= bound`.
-        (IntCC::UnsignedGreaterThanOrEqual, offset, bound)
-    } else if access_size <= min_size {
-        // We know that bound >= min_size, so here we can compare `offset > bound - access_size`
-        // without wrapping.
-        let adj_bound = pos.ins().iadd_imm(bound, -(access_size as i64));
-        (IntCC::UnsignedGreaterThan, offset, adj_bound)
+    let (cc, lhs, bound) = if offset == 0 && access_size == 1 {
+        // `index > bound - 1` is the same as `index >= bound`.
+        (IntCC::UnsignedGreaterThanOrEqual, index, bound)
+    } else if offset_plus_size(offset, access_size) <= min_size {
+        // We know that `bound >= min_size`, so here we can compare `offset >
+        // bound - (offset + access_size)` without wrapping.
+        let adj_bound = pos
+            .ins()
+            .iadd_imm(bound, -(offset_plus_size(offset, access_size) as i64));
+        (IntCC::UnsignedGreaterThan, index, adj_bound)
    } else {
        // We need an overflow check for the adjusted offset.
-        let access_size_val = pos.ins().iconst(addr_ty, access_size as i64);
+        let access_size_val = pos
+            .ins()
+            .iconst(addr_ty, offset_plus_size(offset, access_size) as i64);
        let adj_offset =
            pos.ins()
-                .uadd_overflow_trap(offset, access_size_val, ir::TrapCode::HeapOutOfBounds);
+                .uadd_overflow_trap(index, access_size_val, ir::TrapCode::HeapOutOfBounds);
        (IntCC::UnsignedGreaterThan, adj_offset, bound)
    };
    let oob = pos.ins().icmp(cc, lhs, bound);
@@ -93,6 +101,7 @@ fn dynamic_addr(
        inst,
        heap,
        addr_ty,
+        index,
        offset,
        pos.func,
        spectre_oob_comparison,
@@ -104,26 +113,27 @@ fn static_addr(
    isa: &dyn TargetIsa,
    inst: ir::Inst,
    heap: ir::Heap,
-    mut offset: ir::Value,
-    access_size: u64,
+    index: ir::Value,
+    offset: u32,
+    access_size: u8,
    bound: u64,
    func: &mut ir::Function,
    cfg: &mut ControlFlowGraph,
 ) {
-    let offset_ty = func.dfg.value_type(offset);
+    let index_ty = func.dfg.value_type(index);
    let addr_ty = func.dfg.value_type(func.dfg.first_result(inst));
    let mut pos = FuncCursor::new(func).at_inst(inst);
    pos.use_srcloc(inst);

-    // The goal here is to trap if `offset + access_size > bound`.
+    // The goal here is to trap if `index + offset + access_size > bound`.
    //
-    // This first case is a trivial case where we can easily trap.
-    if access_size > bound {
+    // This first case is a trivial case where we can statically trap.
+    if offset_plus_size(offset, access_size) > bound {
        // This will simply always trap since `offset >= 0`.
        pos.ins().trap(ir::TrapCode::HeapOutOfBounds);
        pos.func.dfg.replace(inst).iconst(addr_ty, 0);

-        // Split Block, as the trap is a terminator instruction.
+        // Split the block, as the trap is a terminator instruction.
        let curr_block = pos.current_block().expect("Cursor is not in a block");
        let new_block = pos.func.dfg.make_block();
        pos.insert_block(new_block);
@@ -132,29 +142,29 @@ fn static_addr(
        return;
    }

-    // After the trivial case is done we're now mostly interested in trapping
-    // if `offset > bound - access_size`. We know `bound - access_size` here is
-    // non-negative from the above comparison.
+    // After the trivial case is done we're now mostly interested in trapping if
+    // `index > bound - offset - access_size`. We know `bound - offset -
+    // access_size` here is non-negative from the above comparison.
    //
-    // If we can know `bound - access_size >= 4GB` then with a 32-bit offset
-    // we're guaranteed:
+    // If we can know `bound - offset - access_size >= 4GB` then with a 32-bit
+    // offset we're guaranteed:
    //
-    //      bound - access_size >= 4GB > offset
+    //      bound - offset - access_size >= 4GB > index
    //
-    // or, in other words, `offset < bound - access_size`, meaning we can't trap
-    // for any value of `offset`.
+    // or, in other words, `index < bound - offset - access_size`, meaning we
+    // can't trap for any value of `index`.
    //
    // With that we have an optimization here where with 32-bit offsets and
    // `bound - access_size >= 4GB` we can omit a bounds check.
-    let limit = bound - access_size;
+    let limit = bound - offset as u64 - access_size as u64;
    let mut spectre_oob_comparison = None;
-    offset = cast_offset_to_pointer_ty(offset, offset_ty, addr_ty, &mut pos);
-    if offset_ty != ir::types::I32 || limit < 0xffff_ffff {
-        // Here we want to test the condition `offset > limit` and if that's
+    let index = cast_index_to_pointer_ty(index, index_ty, addr_ty, &mut pos);
+    if index_ty != ir::types::I32 || limit < 0xffff_ffff {
+        // Here we want to test the condition `index > limit` and if that's
        // true then this is an out-of-bounds access and needs to trap. For ARM
        // and other RISC architectures it's easier to test against an immediate
        // that's even instead of odd, so if `limit` is odd then we instead test
-        // for `offset >= limit + 1`.
+        // for `index >= limit + 1`.
        //
        // The thinking behind this is that:
        //
@@ -164,10 +174,10 @@ fn static_addr(
        // should mean that `A >= B + 1` is an equivalent check for `A > B`
        let (cc, lhs, limit_imm) = if limit & 1 == 1 {
            let limit = limit as i64 + 1;
-            (IntCC::UnsignedGreaterThanOrEqual, offset, limit)
+            (IntCC::UnsignedGreaterThanOrEqual, index, limit)
        } else {
            let limit = limit as i64;
-            (IntCC::UnsignedGreaterThan, offset, limit)
+            (IntCC::UnsignedGreaterThan, index, limit)
        };
        let oob = pos.ins().icmp_imm(cc, lhs, limit_imm);
        pos.ins().trapnz(oob, ir::TrapCode::HeapOutOfBounds);
@@ -182,29 +192,30 @@ fn static_addr(
        inst,
        heap,
        addr_ty,
+        index,
        offset,
        pos.func,
        spectre_oob_comparison,
    );
 }

-fn cast_offset_to_pointer_ty(
-    offset: ir::Value,
-    offset_ty: ir::Type,
+fn cast_index_to_pointer_ty(
+    index: ir::Value,
+    index_ty: ir::Type,
    addr_ty: ir::Type,
    pos: &mut FuncCursor,
 ) -> ir::Value {
-    if offset_ty == addr_ty {
-        return offset;
+    if index_ty == addr_ty {
+        return index;
    }
    // Note that using 64-bit heaps on a 32-bit host is not currently supported,
    // would require at least a bounds check here to ensure that the truncation
    // from 64-to-32 bits doesn't lose any upper bits. For now though we're
    // mostly interested in the 32-bit-heaps-on-64-bit-hosts cast.
-    assert!(offset_ty.bits() < addr_ty.bits());
+    assert!(index_ty.bits() < addr_ty.bits());

-    // Convert `offset` to `addr_ty`.
-    let extended_offset = pos.ins().uextend(addr_ty, offset);
+    // Convert `index` to `addr_ty`.
+    let extended_index = pos.ins().uextend(addr_ty, index);

    // Add debug value-label alias so that debuginfo can name the extended
    // value as the address
@@ -213,9 +224,9 @@ fn cast_offset_to_pointer_ty(
    pos.func
        .stencil
        .dfg
-        .add_value_label_alias(extended_offset, loc, offset);
+        .add_value_label_alias(extended_index, loc, index);

-    extended_offset
+    extended_index
 }

 /// Emit code for the base address computation of a `heap_addr` instruction.
@@ -224,7 +235,8 @@ fn compute_addr(
    inst: ir::Inst,
    heap: ir::Heap,
    addr_ty: ir::Type,
-    offset: ir::Value,
+    index: ir::Value,
+    offset: u32,
    func: &mut ir::Function,
    // If we are performing Spectre mitigation with conditional selects, the
    // values to compare and the condition code that indicates an out-of bounds
@@ -232,7 +244,7 @@ fn compute_addr(
    // speculatively safe address (a zero / null pointer) instead.
    spectre_oob_comparison: Option<(IntCC, ir::Value, ir::Value)>,
 ) {
-    debug_assert_eq!(func.dfg.value_type(offset), addr_ty);
+    debug_assert_eq!(func.dfg.value_type(index), addr_ty);
    let mut pos = FuncCursor::new(func).at_inst(inst);
    pos.use_srcloc(inst);

@@ -245,14 +257,33 @@ fn compute_addr(
    };

    if let Some((cc, a, b)) = spectre_oob_comparison {
-        let final_addr = pos.ins().iadd(base, offset);
+        let final_base = pos.ins().iadd(base, index);
+        // NB: The addition of the offset immediate must happen *before* the
+        // `select_spectre_guard`. If it happens after, then we potentially are
+        // letting speculative execution read the whole first 4GiB of memory.
+        let final_addr = if offset == 0 {
+            final_base
+        } else {
+            pos.ins().iadd_imm(final_base, offset as i64)
+        };
        let zero = pos.ins().iconst(addr_ty, 0);
        let cmp = pos.ins().icmp(cc, a, b);
        pos.func
            .dfg
            .replace(inst)
            .select_spectre_guard(cmp, zero, final_addr);
+    } else if offset == 0 {
+        pos.func.dfg.replace(inst).iadd(base, index);
    } else {
-        pos.func.dfg.replace(inst).iadd(base, offset);
+        let final_base = pos.ins().iadd(base, index);
+        pos.func
+            .dfg
+            .replace(inst)
+            .iadd_imm(final_base, offset as i64);
    }
 }
+
+fn offset_plus_size(offset: u32, size: u8) -> u64 {
+    // Cannot overflow because we are widening to `u64`.
+    offset as u64 + size as u64
+}
--- a/cranelift/codegen/src/legalizer/mod.rs
+++ b/cranelift/codegen/src/legalizer/mod.rs
@@ -72,8 +72,9 @@ pub fn simple_legalize(func: &mut ir::Function, cfg: &mut ControlFlowGraph, isa:
                    opcode: ir::Opcode::HeapAddr,
                    heap,
                    arg,
-                    imm,
-                } => expand_heap_addr(inst, &mut pos.func, cfg, isa, heap, arg, imm),
+                    offset,
+                    size,
+                } => expand_heap_addr(inst, &mut pos.func, cfg, isa, heap, arg, offset, size),
                InstructionData::StackLoad {
                    opcode: ir::Opcode::StackLoad,
                    stack_slot,
--- a/cranelift/codegen/src/write.rs
+++ b/cranelift/codegen/src/write.rs
@@ -476,7 +476,13 @@ pub fn write_operands(w: &mut dyn Write, dfg: &DataFlowGraph, inst: Inst) -> fmt
            dynamic_stack_slot,
            ..
        } => write!(w, " {}, {}", arg, dynamic_stack_slot),
-        HeapAddr { heap, arg, imm, .. } => write!(w, " {}, {}, {}", heap, arg, imm),
+        HeapAddr {
+            heap,
+            arg,
+            offset,
+            size,
+            ..
+        } => write!(w, " {}, {}, {}, {}", heap, arg, offset, size),
        TableAddr { table, arg, .. } => write!(w, " {}, {}", table, arg),
        Load {
            flags, arg, offset, ..