Don't re-capture backtraces when propagating traps through host frames (#5049)

* Add a benchmark for traps with many Wasm<-->host calls on the stack * Add a test for expected Wasm stack traces with Wasm<--host calls on the stack when we trap * Don't re-capture backtraces when propagating traps through host frames This fixes some accidentally quadratic code where we would re-capture a Wasm stack trace (takes `O(n)` time) every time we propagated a trap through a host frame back to Wasm (can happen `O(n)` times). And `O(n) * O(n) = O(n^2)`, of course. Whoops. After this commit, it trapping with a call stack that is `n` frames deep of Wasm-to-host-to-Wasm calls just captures a single backtrace and is therefore just a proper `O(n)` time operation, as it is intended to be. Now we explicitly track whether we need to capture a Wasm backtrace or not when raising a trap. This unfortunately isn't as straightforward as one might hope, however, because of the split between `wasmtime::Trap` and `wasmtime_runtime::Trap`. We need to decide whether or not to capture a Wasm backtrace inside `wasmtime_runtime` but in order to determine whether to do that or not we need to reflect on the `anyhow::Error` and see if it is a `wasmtime::Trap` that already has a backtrace or not. This can't be done the straightforward way because it would introduce a cyclic dependency between the `wasmtime` and `wasmtime-runtime` crates. We can't merge those two `Trap` types-- at least not without effectively merging the whole `wasmtime` and `wasmtime-runtime` crates together, which would be a good idea in a perfect world but would be a *ton* of ocean boiling from where we currently are -- because `wasmtime::Trap` does symbolication of stack traces which relies on module registration information data that resides inside the `wasmtime` crate and therefore can't be moved into `wasmtime-runtime`. We resolve this problem by adding a boolean to `wasmtime_runtime::raise_user_trap` that controls whether we should capture a Wasm backtrace or not, and then determine whether we need a backtrace or not at each of that function's call sites, which are in `wasmtime` and therefore can do the reflection to determine whether the user trap already has a backtrace or not. Phew! Fixes #5037 * debug assert that we don't record unnecessary backtraces for traps * Add assertions around `needs_backtrace` Unfortunately we can't do debug_assert_eq!(needs_backtrace, trap.inner.backtrace.get().is_some()); because `needs_backtrace` doesn't consider whether Wasm backtraces have been disabled via config. * Consolidate `needs_backtrace` calculation followed by calling `raise_user_trap` into one place
2022-10-13 07:22:46 -07:00
parent f96491f333
commit a2f846f124
9 changed files with 221 additions and 25 deletions
--- a/crates/runtime/src/component/transcode.rs
+++ b/crates/runtime/src/component/transcode.rs
@@ -86,7 +86,12 @@ mod trampolines {
                    });
                    match result {
                        Ok(Ok(ret)) => transcoders!(@convert_ret ret _retptr $($result)?),
-                        Ok(Err(err)) => crate::traphandlers::raise_trap(err.into()),
+                        Ok(Err(err)) => crate::traphandlers::raise_trap(
+                            crate::traphandlers::TrapReason::User {
+                                error: err,
+                                needs_backtrace: true,
+                            },
+                        ),
                        Err(panic) => crate::traphandlers::resume_panic(panic),
                    }
                }
--- a/crates/runtime/src/libcalls.rs
+++ b/crates/runtime/src/libcalls.rs
@@ -165,13 +165,23 @@ pub mod trampolines {
    }
 }

-unsafe fn memory32_grow(vmctx: *mut VMContext, delta: u64, memory_index: u32) -> Result<*mut u8> {
+unsafe fn memory32_grow(
+    vmctx: *mut VMContext,
+    delta: u64,
+    memory_index: u32,
+) -> Result<*mut u8, TrapReason> {
    let instance = (*vmctx).instance_mut();
    let memory_index = MemoryIndex::from_u32(memory_index);
-    let result = match instance.memory_grow(memory_index, delta)? {
-        Some(size_in_bytes) => size_in_bytes / (wasmtime_environ::WASM_PAGE_SIZE as usize),
-        None => usize::max_value(),
-    };
+    let result =
+        match instance
+            .memory_grow(memory_index, delta)
+            .map_err(|error| TrapReason::User {
+                error,
+                needs_backtrace: true,
+            })? {
+            Some(size_in_bytes) => size_in_bytes / (wasmtime_environ::WASM_PAGE_SIZE as usize),
+            None => usize::max_value(),
+        };
    Ok(result as *mut _)
 }

--- a/crates/runtime/src/traphandlers.rs
+++ b/crates/runtime/src/traphandlers.rs
@@ -95,8 +95,11 @@ pub unsafe fn raise_trap(reason: TrapReason) -> ! {
 /// Only safe to call when wasm code is on the stack, aka `catch_traps` must
 /// have been previously called. Additionally no Rust destructors can be on the
 /// stack. They will be skipped and not executed.
-pub unsafe fn raise_user_trap(data: Error) -> ! {
-    raise_trap(TrapReason::User(data))
+pub unsafe fn raise_user_trap(error: Error, needs_backtrace: bool) -> ! {
+    raise_trap(TrapReason::User {
+        error,
+        needs_backtrace,
+    })
 }

 /// Raises a trap from inside library code immediately.
@@ -138,7 +141,12 @@ pub struct Trap {
 #[derive(Debug)]
 pub enum TrapReason {
    /// A user-raised trap through `raise_user_trap`.
-    User(Error),
+    User {
+        /// The actual user trap error.
+        error: Error,
+        /// Whether we need to capture a backtrace for this error or not.
+        needs_backtrace: bool,
+    },

    /// A trap raised from Cranelift-generated code with the pc listed of where
    /// the trap came from.
@@ -149,6 +157,22 @@ pub enum TrapReason {
 }

 impl TrapReason {
+    /// Create a new `TrapReason::User` that does not have a backtrace yet.
+    pub fn user_without_backtrace(error: Error) -> Self {
+        TrapReason::User {
+            error,
+            needs_backtrace: true,
+        }
+    }
+
+    /// Create a new `TrapReason::User` that already has a backtrace.
+    pub fn user_with_backtrace(error: Error) -> Self {
+        TrapReason::User {
+            error,
+            needs_backtrace: false,
+        }
+    }
+
    /// Is this a JIT trap?
    pub fn is_jit(&self) -> bool {
        matches!(self, TrapReason::Jit(_))
@@ -157,7 +181,7 @@ impl TrapReason {

 impl From<Error> for TrapReason {
    fn from(err: Error) -> Self {
-        TrapReason::User(err)
+        TrapReason::user_without_backtrace(err)
    }
 }

@@ -381,7 +405,21 @@ impl CallThreadState {
    }

    fn unwind_with(&self, reason: UnwindReason) -> ! {
-        let backtrace = self.capture_backtrace(None);
+        let backtrace = match reason {
+            // Panics don't need backtraces. There is nowhere to attach the
+            // hypothetical backtrace to and it doesn't really make sense to try
+            // in the first place since this is a Rust problem rather than a
+            // Wasm problem.
+            UnwindReason::Panic(_)
+            // And if we are just propagating an existing trap that already has
+            // a backtrace attached to it, then there is no need to capture a
+            // new backtrace either.
+            | UnwindReason::Trap(TrapReason::User {
+                needs_backtrace: false,
+                ..
+            }) => None,
+            UnwindReason::Trap(_) => self.capture_backtrace(None),
+        };
        unsafe {
            (*self.unwind.get()).as_mut_ptr().write((reason, backtrace));
            wasmtime_longjmp(self.jmp_buf.get());
--- a/crates/wasmtime/src/component/func/host.rs
+++ b/crates/wasmtime/src/component/func/host.rs
@@ -1,7 +1,7 @@
 use crate::component::func::{Memory, MemoryMut, Options};
 use crate::component::storage::slice_to_storage_mut;
 use crate::component::{ComponentNamedList, ComponentType, Lift, Lower, Type, Val};
-use crate::{AsContextMut, StoreContextMut, ValRaw};
+use crate::{AsContextMut, StoreContextMut, Trap, ValRaw};
 use anyhow::{anyhow, bail, Context, Result};
 use std::any::Any;
 use std::mem::{self, MaybeUninit};
@@ -265,7 +265,7 @@ fn validate_inbounds<T: ComponentType>(memory: &[u8], ptr: &ValRaw) -> Result<us
 unsafe fn handle_result(func: impl FnOnce() -> Result<()>) {
    match panic::catch_unwind(AssertUnwindSafe(func)) {
        Ok(Ok(())) => {}
-        Ok(Err(e)) => wasmtime_runtime::raise_user_trap(e),
+        Ok(Err(e)) => Trap::raise(e),
        Err(e) => wasmtime_runtime::resume_panic(e),
    }
 }
--- a/crates/wasmtime/src/func.rs
+++ b/crates/wasmtime/src/func.rs
@@ -11,9 +11,8 @@ use std::pin::Pin;
 use std::ptr::NonNull;
 use std::sync::Arc;
 use wasmtime_runtime::{
-    raise_user_trap, ExportFunction, InstanceHandle, VMCallerCheckedAnyfunc, VMContext,
-    VMFunctionBody, VMFunctionImport, VMHostFuncContext, VMOpaqueContext, VMSharedSignatureIndex,
-    VMTrampoline,
+    ExportFunction, InstanceHandle, VMCallerCheckedAnyfunc, VMContext, VMFunctionBody,
+    VMFunctionImport, VMHostFuncContext, VMOpaqueContext, VMSharedSignatureIndex, VMTrampoline,
 };

 /// A WebAssembly function which can be called.
@@ -1887,7 +1886,7 @@ macro_rules! impl_into_func {

                    match result {
                        CallResult::Ok(val) => val,
-                        CallResult::Trap(trap) => raise_user_trap(trap),
+                        CallResult::Trap(err) => Trap::raise(err),
                        CallResult::Panic(panic) => wasmtime_runtime::resume_panic(panic),
                    }
                }
--- a/crates/wasmtime/src/trampoline/func.rs
+++ b/crates/wasmtime/src/trampoline/func.rs
@@ -56,7 +56,7 @@ unsafe extern "C" fn stub_fn<F>(
        // call-site, which gets unwrapped in `Trap::from_runtime` later on as we
        // convert from the internal `Trap` type to our own `Trap` type in this
        // crate.
-        Ok(Err(trap)) => wasmtime_runtime::raise_user_trap(trap.into()),
+        Ok(Err(trap)) => Trap::raise(trap.into()),

        // And finally if the imported function panicked, then we trigger the
        // form of unwinding that's safe to jump over wasm code on all
--- a/crates/wasmtime/src/trap.rs
+++ b/crates/wasmtime/src/trap.rs
@@ -252,6 +252,15 @@ impl Trap {
        Trap::new_with_trace(TrapReason::I32Exit(status), None)
    }

+    // Same safety requirements and caveats as
+    // `wasmtime_runtime::raise_user_trap`.
+    pub(crate) unsafe fn raise(error: anyhow::Error) -> ! {
+        let needs_backtrace = error
+            .downcast_ref::<Trap>()
+            .map_or(true, |trap| trap.trace().is_none());
+        wasmtime_runtime::raise_user_trap(error, needs_backtrace)
+    }
+
    #[cold] // see Trap::new
    pub(crate) fn from_runtime_box(
        store: &StoreOpaque,
@@ -264,9 +273,14 @@ impl Trap {
    pub(crate) fn from_runtime(store: &StoreOpaque, runtime_trap: wasmtime_runtime::Trap) -> Self {
        let wasmtime_runtime::Trap { reason, backtrace } = runtime_trap;
        match reason {
-            wasmtime_runtime::TrapReason::User(error) => {
+            wasmtime_runtime::TrapReason::User {
+                error,
+                needs_backtrace,
+            } => {
                let trap = Trap::from(error);
                if let Some(backtrace) = backtrace {
+                    debug_assert!(needs_backtrace);
+                    debug_assert!(trap.inner.backtrace.get().is_none());
                    trap.record_backtrace(TrapBacktrace::new(store, backtrace, None));
                }
                trap
@@ -359,12 +373,15 @@ impl Trap {
    fn record_backtrace(&self, backtrace: TrapBacktrace) {
        // When a trap is created on top of the wasm stack, the trampoline will
        // re-raise it via
-        // `wasmtime_runtime::raise_user_trap(trap.into::<Box<dyn Error>>())`
-        // after panic::catch_unwind. We don't want to overwrite the first
-        // backtrace recorded, as it is most precise.
-        // FIXME: make sure backtraces are only created once per trap! they are
-        // actually kinda expensive to create.
-        let _ = self.inner.backtrace.try_insert(backtrace);
+        // `wasmtime_runtime::raise_user_trap(trap.into::<Box<dyn Error>>(),
+        // ..)` after `panic::catch_unwind`. We don't want to overwrite the
+        // first backtrace recorded, as it is most precise. However, this should
+        // never happen in the first place because we thread `needs_backtrace`
+        // booleans throuch all calls to `raise_user_trap` to avoid capturing
+        // unnecessary backtraces! So debug assert that we don't ever capture
+        // unnecessary backtraces.
+        let result = self.inner.backtrace.try_insert(backtrace);
+        debug_assert!(result.is_ok());
    }
 }