Rework bounds checking for atomic operations (#5239)

Before, we would do a `heap_addr` to translate the given Wasm memory address into a native memory address and pass it into the libcall that implemented the atomic operation, which would then treat the address as a Wasm memory address and pass it to `validate_atomic_addr` to be bounds checked a second time. This is a bit nonsensical, as we are validating a native memory address as if it were a Wasm memory address. Now, we no longer do a `heap_addr` to translate the Wasm memory address to a native memory address. Instead, we pass the Wasm memory address to the libcall, and the libcall is responsible for doing the bounds check (by calling `validate_atomic_addr` with the correct type of memory address now).
2022-11-09 16:19:43 -08:00
parent 86679489ef
commit 47fa1ad6a8
4 changed files with 74 additions and 52 deletions
--- a/cranelift/wasm/src/code_translator.rs
+++ b/cranelift/wasm/src/code_translator.rs
@@ -1066,21 +1066,24 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
            let heap = state.get_heap(builder.func, memarg.memory, environ)?;
            let timeout = state.pop1(); // 64 (fixed)
            let expected = state.pop1(); // 32 or 64 (per the `Ixx` in `IxxAtomicWait`)
            let (_flags, addr) = prepare_atomic_addr(
                memarg,
                u8::try_from(implied_ty.bytes()).unwrap(),
                builder,
                state,
                environ,
            )?;
            assert!(builder.func.dfg.value_type(expected) == implied_ty);
            let addr = state.pop1();
            let effective_addr = if memarg.offset == 0 {
                addr
            } else {
                let index_type = builder.func.heaps[heap].index_type;
                let offset = builder.ins().iconst(index_type, memarg.offset as i64);
                builder
                    .ins()
                    .uadd_overflow_trap(addr, offset, ir::TrapCode::HeapOutOfBounds)
            };
            // `fn translate_atomic_wait` can inspect the type of `expected` to figure out what
            // code it needs to generate, if it wants.
            let res = environ.translate_atomic_wait(
                builder.cursor(),
                heap_index,
                heap,
-                addr,
+                effective_addr,
                expected,
                timeout,
            )?;
@@ -1090,12 +1093,23 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
            let heap_index = MemoryIndex::from_u32(memarg.memory);
            let heap = state.get_heap(builder.func, memarg.memory, environ)?;
            let count = state.pop1(); // 32 (fixed)
-
+            let addr = state.pop1();
-            // `memory.atomic.notify` is defined to have an access size of 4
+            let effective_addr = if memarg.offset == 0 {
-            // bytes in the spec, even though it doesn't necessarily access memory.
+                addr
-            let (_flags, addr) = prepare_atomic_addr(memarg, 4, builder, state, environ)?;
+            } else {
-            let res =
+                let index_type = builder.func.heaps[heap].index_type;
-                environ.translate_atomic_notify(builder.cursor(), heap_index, heap, addr, count)?;
+                let offset = builder.ins().iconst(index_type, memarg.offset as i64);
                builder
                    .ins()
                    .uadd_overflow_trap(addr, offset, ir::TrapCode::HeapOutOfBounds)
            };
            let res = environ.translate_atomic_notify(
                builder.cursor(),
                heap_index,
                heap,
                effective_addr,
                count,
            )?;
            state.push1(res);
        }
        Operator::I32AtomicLoad { memarg } => {
@@ -2324,13 +2338,12 @@ fn prepare_addr<FE: FuncEnvironment + ?Sized>(
    Ok((flags, addr))
 }
-fn prepare_atomic_addr<FE: FuncEnvironment + ?Sized>(
+fn align_atomic_addr(
    memarg: &MemArg,
    loaded_bytes: u8,
    builder: &mut FunctionBuilder,
    state: &mut FuncTranslationState,
-    environ: &mut FE,
+) {
 ) -> WasmResult<(MemFlags, Value)> {
    // Atomic addresses must all be aligned correctly, and for now we check
    // alignment before we check out-of-bounds-ness. The order of this check may
    // need to be updated depending on the outcome of the official threads
@@ -2358,7 +2371,16 @@ fn prepare_atomic_addr<FE: FuncEnvironment + ?Sized>(
        let f = builder.ins().icmp_imm(IntCC::NotEqual, misalignment, 0);
        builder.ins().trapnz(f, ir::TrapCode::HeapMisaligned);
    }
 }
 fn prepare_atomic_addr<FE: FuncEnvironment + ?Sized>(
    memarg: &MemArg,
    loaded_bytes: u8,
    builder: &mut FunctionBuilder,
    state: &mut FuncTranslationState,
    environ: &mut FE,
 ) -> WasmResult<(MemFlags, Value)> {
    align_atomic_addr(memarg, loaded_bytes, builder, state);
    prepare_addr(memarg, loaded_bytes, builder, state, environ)
 }
--- a/crates/cranelift/src/func_environ.rs
+++ b/crates/cranelift/src/func_environ.rs
@@ -1981,6 +1981,7 @@ impl<'module_environment> cranelift_wasm::FuncEnvironment for FuncEnvironment<'m
        expected: ir::Value,
        timeout: ir::Value,
    ) -> WasmResult<ir::Value> {
        let addr = self.cast_memory_index_to_i64(&mut pos, addr, memory_index);
        let implied_ty = pos.func.dfg.value_type(expected);
        let (func_sig, memory_index, func_idx) =
            self.get_memory_atomic_wait(&mut pos.func, memory_index, implied_ty);
@@ -2006,6 +2007,7 @@ impl<'module_environment> cranelift_wasm::FuncEnvironment for FuncEnvironment<'m
        addr: ir::Value,
        count: ir::Value,
    ) -> WasmResult<ir::Value> {
        let addr = self.cast_memory_index_to_i64(&mut pos, addr, memory_index);
        let func_sig = self
            .builtin_function_signatures
            .memory_atomic_notify(&mut pos.func);
--- a/crates/environ/src/builtin.rs
+++ b/crates/environ/src/builtin.rs
@@ -42,11 +42,11 @@ macro_rules! foreach_builtin_function {
            /// Returns an index for Wasm's `global.get` instruction for `externref`s.
            externref_global_set(vmctx: vmctx, global: i32, val: reference);
            /// Returns an index for wasm's `memory.atomic.notify` instruction.
-            memory_atomic_notify(vmctx: vmctx, memory: i32, addr: pointer, count: i32) -> i32;
+            memory_atomic_notify(vmctx: vmctx, memory: i32, addr: i64, count: i32) -> i32;
            /// Returns an index for wasm's `memory.atomic.wait32` instruction.
-            memory_atomic_wait32(vmctx: vmctx, memory: i32, addr: pointer, expected: i32, timeout: i64) -> i32;
+            memory_atomic_wait32(vmctx: vmctx, memory: i32, addr: i64, expected: i32, timeout: i64) -> i32;
            /// Returns an index for wasm's `memory.atomic.wait64` instruction.
-            memory_atomic_wait64(vmctx: vmctx, memory: i32, addr: pointer, expected: i64, timeout: i64) -> i32;
+            memory_atomic_wait64(vmctx: vmctx, memory: i32, addr: i64, expected: i64, timeout: i64) -> i32;
            /// Invoked when fuel has run out while executing a function.
            out_of_gas(vmctx: vmctx);
            /// Invoked when we reach a new epoch.
--- a/crates/runtime/src/libcalls.rs
+++ b/crates/runtime/src/libcalls.rs
@@ -434,17 +434,12 @@ unsafe fn externref_global_set(vmctx: *mut VMContext, index: u32, externref: *mu
 unsafe fn memory_atomic_notify(
    vmctx: *mut VMContext,
    memory_index: u32,
-    addr: *mut u8,
+    addr: u64,
    _count: u32,
 ) -> Result<u32, TrapReason> {
    let addr = addr as usize;
    let memory = MemoryIndex::from_u32(memory_index);
    let instance = (*vmctx).instance();
-    // this should never overflow since addr + 4 either hits a guard page
+    validate_atomic_addr(instance, memory, addr, 4, 4)?;
    // or it's been validated to be in-bounds already. Double-check for now
    // just to be sure.
    let addr_to_check = addr.checked_add(4).unwrap();
    validate_atomic_addr(instance, memory, addr_to_check)?;
    Err(
        anyhow::anyhow!("unimplemented: wasm atomics (fn memory_atomic_notify) unsupported",)
            .into(),
@@ -455,17 +450,13 @@ unsafe fn memory_atomic_notify(
 unsafe fn memory_atomic_wait32(
    vmctx: *mut VMContext,
    memory_index: u32,
-    addr: *mut u8,
+    addr: u64,
    _expected: u32,
    _timeout: u64,
 ) -> Result<u32, TrapReason> {
    let addr = addr as usize;
    let memory = MemoryIndex::from_u32(memory_index);
    let instance = (*vmctx).instance();
-    // see wasmtime_memory_atomic_notify for why this shouldn't overflow
+    validate_atomic_addr(instance, memory, addr, 4, 4)?;
    // but we still double-check
    let addr_to_check = addr.checked_add(4).unwrap();
    validate_atomic_addr(instance, memory, addr_to_check)?;
    Err(
        anyhow::anyhow!("unimplemented: wasm atomics (fn memory_atomic_wait32) unsupported",)
            .into(),
@@ -476,40 +467,47 @@ unsafe fn memory_atomic_wait32(
 unsafe fn memory_atomic_wait64(
    vmctx: *mut VMContext,
    memory_index: u32,
-    addr: *mut u8,
+    addr: u64,
    _expected: u64,
    _timeout: u64,
 ) -> Result<u32, TrapReason> {
    let addr = addr as usize;
    let memory = MemoryIndex::from_u32(memory_index);
    let instance = (*vmctx).instance();
-    // see wasmtime_memory_atomic_notify for why this shouldn't overflow
+    validate_atomic_addr(instance, memory, addr, 8, 8)?;
    // but we still double-check
    let addr_to_check = addr.checked_add(8).unwrap();
    validate_atomic_addr(instance, memory, addr_to_check)?;
    Err(
        anyhow::anyhow!("unimplemented: wasm atomics (fn memory_atomic_wait64) unsupported",)
            .into(),
    )
 }
-/// For atomic operations we still check the actual address despite this also
+macro_rules! ensure {
-/// being checked via the `heap_addr` instruction in cranelift. The reason for
+    ($cond:expr, $trap:expr) => {
-/// that is because the `heap_addr` instruction can defer to a later segfault to
+        if !($cond) {
-/// actually recognize the out-of-bounds whereas once we're running Rust code
+            return Err($trap);
-/// here we don't want to segfault.
+        }
-///
+    };
-/// In the situations where bounds checks were elided in JIT code (because oob
+}
-/// would then be later guaranteed to segfault) this manual check is here
+
-/// so we don't segfault from Rust.
+/// In the configurations where bounds checks were elided in JIT code (because
 /// we are using static memories with virtual memory guard pages) this manual
 /// check is here so we don't segfault from Rust. For other configurations,
 /// these checks are required anyways.
 unsafe fn validate_atomic_addr(
    instance: &Instance,
    memory: MemoryIndex,
-    addr: usize,
+    addr: u64,
    access_size: u64,
    access_alignment: u64,
 ) -> Result<(), TrapCode> {
-    if addr > instance.get_memory(memory).current_length() {
+    debug_assert!(access_alignment.is_power_of_two());
-        return Err(TrapCode::HeapOutOfBounds);
+    ensure!(addr % access_alignment == 0, TrapCode::HeapMisaligned);
-    }
+
    let length = u64::try_from(instance.get_memory(memory).current_length()).unwrap();
    ensure!(
        addr.saturating_add(access_size) < length,
        TrapCode::HeapOutOfBounds
    );
    Ok(())
 }