From c8f55ed688ad733dee333f23e51b108e0cb9b41b Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 31 Aug 2021 08:34:31 -0700 Subject: [PATCH] Optimize codegen slightly calling wasm functions Currently wasm-calls work with `Result` internally but `Trap` is an enum defined in `wasmtime-runtime` which is actually quite large. Since traps are supposed to be rare this commit changes these functions to return a `Box` which is un-boxed later up in the `wasmtime` crate within a `#[cold]` function. --- crates/runtime/src/traphandlers.rs | 44 ++++++++++++---------- crates/runtime/src/traphandlers/macos.rs | 3 +- crates/runtime/src/traphandlers/unix.rs | 7 ++-- crates/runtime/src/traphandlers/windows.rs | 2 +- crates/wasmtime/src/engine.rs | 2 +- crates/wasmtime/src/func.rs | 2 +- crates/wasmtime/src/trap.rs | 5 +++ 7 files changed, 38 insertions(+), 27 deletions(-) diff --git a/crates/runtime/src/traphandlers.rs b/crates/runtime/src/traphandlers.rs index e4c4faf6a9..ad78dcb05b 100644 --- a/crates/runtime/src/traphandlers.rs +++ b/crates/runtime/src/traphandlers.rs @@ -173,7 +173,7 @@ pub unsafe fn catch_traps<'a, F>( signal_handler: Option<*const SignalHandler<'static>>, callee: *mut VMContext, mut closure: F, -) -> Result<(), Trap> +) -> Result<(), Box> where F: FnMut(*mut VMContext), { @@ -227,26 +227,31 @@ impl CallThreadState { self, interrupts: *mut VMInterrupts, closure: impl FnOnce(&CallThreadState) -> i32, - ) -> Result<(), Trap> { + ) -> Result<(), Box> { let ret = tls::set(&self, || closure(&self))?; if ret != 0 { - return Ok(()); + Ok(()) + } else { + Err(unsafe { self.read_trap(interrupts) }) } - match unsafe { (*self.unwind.get()).as_ptr().read() } { - UnwindReason::UserTrap(data) => Err(Trap::User(data)), - UnwindReason::LibTrap(trap) => Err(trap), + } + + #[cold] + unsafe fn read_trap(&self, interrupts: *mut VMInterrupts) -> Box { + Box::new(match (*self.unwind.get()).as_ptr().read() { + UnwindReason::UserTrap(data) => Trap::User(data), + UnwindReason::LibTrap(trap) => trap, UnwindReason::JitTrap { backtrace, pc } => { - let maybe_interrupted = unsafe { - (*interrupts).stack_limit.load(SeqCst) == wasmtime_environ::INTERRUPTED - }; - Err(Trap::Jit { + let maybe_interrupted = + (*interrupts).stack_limit.load(SeqCst) == wasmtime_environ::INTERRUPTED; + Trap::Jit { pc, backtrace, maybe_interrupted, - }) + } } UnwindReason::Panic(panic) => std::panic::resume_unwind(panic), - } + }) } fn unwind_with(&self, reason: UnwindReason) -> ! { @@ -372,17 +377,16 @@ mod tls { thread_local!(static PTR: Cell<(Ptr, bool)> = Cell::new((ptr::null(), false))); #[inline(never)] // see module docs for why this is here - pub fn replace(val: Ptr) -> Result { + pub fn replace(val: Ptr) -> Result> { PTR.with(|p| { // When a new value is configured that means that we may be // entering WebAssembly so check to see if this thread has // performed per-thread initialization for traps. - let (prev, mut initialized) = p.get(); + let (prev, initialized) = p.get(); if !initialized { super::super::sys::lazy_per_thread_init()?; - initialized = true; } - p.set((val, initialized)); + p.set((val, true)); Ok(prev) }) } @@ -390,7 +394,7 @@ mod tls { #[inline(never)] /// Eagerly initialize thread-local runtime functionality. This will be performed /// lazily by the runtime if users do not perform it eagerly. - pub fn initialize() -> Result<(), Trap> { + pub fn initialize() -> Result<(), Box> { PTR.with(|p| { let (state, initialized) = p.get(); if initialized { @@ -420,7 +424,7 @@ mod tls { /// /// This is not a safe operation since it's intended to only be used /// with stack switching found with fibers and async wasmtime. - pub unsafe fn take() -> Result { + pub unsafe fn take() -> Result> { // Our tls pointer must be set at this time, and it must not be // null. We need to restore the previous pointer since we're // removing ourselves from the call-stack, and in the process we @@ -437,7 +441,7 @@ mod tls { /// /// This is unsafe because it's intended to only be used within the /// context of stack switching within wasmtime. - pub unsafe fn replace(self) -> Result<(), super::Trap> { + pub unsafe fn replace(self) -> Result<(), Box> { // We need to configure our previous TLS pointer to whatever is in // TLS at this time, and then we set the current state to ourselves. let prev = raw::get(); @@ -452,7 +456,7 @@ mod tls { /// execution of `closure` any call to `with` will yield `ptr`, unless this /// is recursively called again. #[inline] - pub fn set(state: &CallThreadState, closure: impl FnOnce() -> R) -> Result { + pub fn set(state: &CallThreadState, closure: impl FnOnce() -> R) -> Result> { struct Reset<'a>(&'a CallThreadState); impl Drop for Reset<'_> { diff --git a/crates/runtime/src/traphandlers/macos.rs b/crates/runtime/src/traphandlers/macos.rs index c5f7f2527e..1520e95f91 100644 --- a/crates/runtime/src/traphandlers/macos.rs +++ b/crates/runtime/src/traphandlers/macos.rs @@ -409,7 +409,8 @@ unsafe extern "C" fn unwind(wasm_pc: *const u8) -> ! { /// unhandled thread-level exceptions get automatically forwarded to the /// task-level port which is where we'd expected things like breakpad/crashpad /// exception handlers to get registered. -pub fn lazy_per_thread_init() -> Result<(), Trap> { +#[cold] +pub fn lazy_per_thread_init() -> Result<(), Box> { unsafe { assert!(WASMTIME_PORT != MACH_PORT_NULL); let this_thread = mach_thread_self(); diff --git a/crates/runtime/src/traphandlers/unix.rs b/crates/runtime/src/traphandlers/unix.rs index cd635239b4..d202cf297c 100644 --- a/crates/runtime/src/traphandlers/unix.rs +++ b/crates/runtime/src/traphandlers/unix.rs @@ -251,7 +251,8 @@ unsafe fn set_pc(cx: *mut libc::c_void, pc: usize, arg1: usize) { /// always large enough for our signal handling code. Override it by creating /// and registering our own alternate stack that is large enough and has a guard /// page. -pub fn lazy_per_thread_init() -> Result<(), Trap> { +#[cold] +pub fn lazy_per_thread_init() -> Result<(), Box> { // This thread local is purely used to register a `Stack` to get deallocated // when the thread exists. Otherwise this function is only ever called at // most once per-thread. @@ -273,7 +274,7 @@ pub fn lazy_per_thread_init() -> Result<(), Trap> { Ok(()) }); - unsafe fn allocate_sigaltstack() -> Result, Trap> { + unsafe fn allocate_sigaltstack() -> Result, Box> { // Check to see if the existing sigaltstack, if it exists, is big // enough. If so we don't need to allocate our own. let mut old_stack = mem::zeroed(); @@ -303,7 +304,7 @@ pub fn lazy_per_thread_init() -> Result<(), Trap> { 0, ); if ptr == libc::MAP_FAILED { - return Err(Trap::oom()); + return Err(Box::new(Trap::oom())); } // Prepare the stack with readable/writable memory and then register it diff --git a/crates/runtime/src/traphandlers/windows.rs b/crates/runtime/src/traphandlers/windows.rs index a2a45654c5..5ca3037a27 100644 --- a/crates/runtime/src/traphandlers/windows.rs +++ b/crates/runtime/src/traphandlers/windows.rs @@ -74,7 +74,7 @@ unsafe extern "system" fn exception_handler(exception_info: PEXCEPTION_POINTERS) }) } -pub fn lazy_per_thread_init() -> Result<(), Trap> { +pub fn lazy_per_thread_init() -> Result<(), Box> { // Unused on Windows Ok(()) } diff --git a/crates/wasmtime/src/engine.rs b/crates/wasmtime/src/engine.rs index 8eab0e28bf..62bcca848f 100644 --- a/crates/wasmtime/src/engine.rs +++ b/crates/wasmtime/src/engine.rs @@ -87,7 +87,7 @@ impl Engine { /// latency of WebAssembly calls are extra-important, which is not /// necessarily true of all embeddings. pub fn tls_eager_initialize() -> Result<(), Trap> { - wasmtime_runtime::tls_eager_initialize().map_err(Trap::from_runtime) + wasmtime_runtime::tls_eager_initialize().map_err(Trap::from_runtime_box) } /// Returns the configuration settings that this engine is using. diff --git a/crates/wasmtime/src/func.rs b/crates/wasmtime/src/func.rs index 3bb4391e12..c66a43aa6e 100644 --- a/crates/wasmtime/src/func.rs +++ b/crates/wasmtime/src/func.rs @@ -1056,7 +1056,7 @@ pub(crate) fn invoke_wasm_and_catch_traps( ); exit_wasm(store, exit); store.0.entering_native_hook()?; - result.map_err(Trap::from_runtime) + result.map_err(Trap::from_runtime_box) } } diff --git a/crates/wasmtime/src/trap.rs b/crates/wasmtime/src/trap.rs index a3cd869f07..61da2dcce5 100644 --- a/crates/wasmtime/src/trap.rs +++ b/crates/wasmtime/src/trap.rs @@ -161,6 +161,11 @@ impl Trap { ) } + #[cold] // see Trap::new + pub(crate) fn from_runtime_box(runtime_trap: Box) -> Self { + Self::from_runtime(*runtime_trap) + } + #[cold] // see Trap::new pub(crate) fn from_runtime(runtime_trap: wasmtime_runtime::Trap) -> Self { match runtime_trap {