Implement wasm trap handlers. (#27)

* Implement wasm trap handlers.

This adds signal handlers based on SpiderMonkey's signal-handler code.
The functionality for looking up the trap code and wasm bytecode offset
isn't yet implemented, but this is a start.

I considered rewriting this code in Rust, but decided against it for now
as C++ allows us to talk to the relevant OS APIs more directly.

Fixes #15.

* Compile with -std=c++11.

* Refactor InstallState initialization.

* Compile with -fPIC.

* Factor out the code for calling a wasm function with a given index.

* Fix unclear wording in a comment.
This commit is contained in:
Dan Gohman
2018-11-27 06:05:58 -08:00
committed by GitHub
parent 8e1e75f1f4
commit 35627cf37f
9 changed files with 1132 additions and 19 deletions

View File

@@ -1,15 +1,17 @@
use cranelift_codegen::binemit::Reloc;
use cranelift_codegen::isa::TargetIsa;
use cranelift_entity::{EntityRef, PrimaryMap};
use cranelift_wasm::{DefinedFuncIndex, MemoryIndex, TableIndex};
use cranelift_wasm::{DefinedFuncIndex, FuncIndex, MemoryIndex, TableIndex};
use instance::Instance;
use memory::LinearMemory;
use region::protect;
use region::Protection;
use signalhandlers::{ensure_eager_signal_handlers, ensure_full_signal_handlers, TrapContext};
use std::mem::transmute;
use std::ptr::{self, write_unaligned};
use std::string::String;
use std::vec::Vec;
use traphandlers::call_wasm;
use wasmtime_environ::{
compile_module, Compilation, Export, Module, ModuleTranslation, Relocation, RelocationTarget,
};
@@ -165,22 +167,10 @@ pub fn finish_instantiation(
.map(LinearMemory::base_addr)
.collect::<Vec<_>>();
let vmctx = make_vmctx(instance, &mut mem_base_addrs);
let mut vmctx = make_vmctx(instance, &mut mem_base_addrs);
if let Some(start_index) = module.start_func {
let code_buf =
&compilation.functions[module
.defined_func_index(start_index)
.expect("imported start functions not supported yet")];
// Rather than writing inline assembly to jump to the code region, we use the fact that
// the Rust ABI for calling a function with no arguments and no return matches the one of
// the generated code. Thanks to this, we can transmute the code region into a first-class
// Rust function and call it.
unsafe {
let start_func = transmute::<_, fn(*const *mut u8)>(code_buf.as_ptr());
start_func(vmctx.as_ptr());
}
execute_by_index(module, compilation, &mut vmctx, start_index)?;
}
Ok(vmctx)
@@ -199,18 +189,39 @@ pub fn execute(
None => return Err(format!("no export named \"{}\"", function)),
};
execute_by_index(module, compilation, vmctx, fn_index)
}
fn execute_by_index(
module: &Module,
compilation: &Compilation,
vmctx: &mut Vec<*mut u8>,
fn_index: FuncIndex,
) -> Result<(), String> {
let code_buf =
&compilation.functions[module
.defined_func_index(fn_index)
.expect("imported start functions not supported yet")];
let mut traps = TrapContext {
triedToInstallSignalHandlers: false,
haveSignalHandlers: false,
};
// Rather than writing inline assembly to jump to the code region, we use the fact that
// the Rust ABI for calling a function with no arguments and no return matches the one of
// the generated code. Thanks to this, we can transmute the code region into a first-class
// the Rust ABI for calling a function with no arguments and no return values matches the one
// of the generated code. Thanks to this, we can transmute the code region into a first-class
// Rust function and call it.
unsafe {
// Ensure that our signal handlers are ready for action.
ensure_eager_signal_handlers();
ensure_full_signal_handlers(&mut traps);
if !traps.haveSignalHandlers {
return Err("failed to install signal handlers".to_string());
}
let func = transmute::<_, fn(*const *mut u8)>(code_buf.as_ptr());
func(vmctx.as_ptr());
call_wasm(|| func(vmctx.as_mut_ptr()))?;
}
Ok(())
}

View File

@@ -36,13 +36,19 @@ extern crate wasmtime_environ;
#[cfg(not(feature = "std"))]
#[macro_use]
extern crate alloc;
#[macro_use]
extern crate lazy_static;
extern crate libc;
mod execute;
mod instance;
mod memory;
mod signalhandlers;
mod traphandlers;
pub use execute::{compile_and_link_module, execute, finish_instantiation};
pub use instance::Instance;
pub use traphandlers::{call_wasm, LookupCodeSegment, RecordTrap, Unwind};
#[cfg(not(feature = "std"))]
mod std {

View File

@@ -0,0 +1,101 @@
//! Interface to low-level signal-handling mechanisms.
#![allow(non_upper_case_globals)]
#![allow(non_camel_case_types)]
#![allow(non_snake_case)]
use std::borrow::{Borrow, BorrowMut};
use std::sync::RwLock;
include!(concat!(env!("OUT_DIR"), "/signalhandlers.rs"));
struct InstallState {
tried: bool,
success: bool,
}
impl InstallState {
fn new() -> Self {
Self {
tried: false,
success: false,
}
}
}
lazy_static! {
static ref EAGER_INSTALL_STATE: RwLock<InstallState> = RwLock::new(InstallState::new());
static ref LAZY_INSTALL_STATE: RwLock<InstallState> = RwLock::new(InstallState::new());
}
/// This function performs the low-overhead signal handler initialization that we
/// want to do eagerly to ensure a more-deterministic global process state. This
/// is especially relevant for signal handlers since handler ordering depends on
/// installation order: the wasm signal handler must run *before* the other crash
/// handlers and since POSIX signal handlers work LIFO, this function needs to be
/// called at the end of the startup process, after other handlers have been
/// installed. This function can thus be called multiple times, having no effect
/// after the first call.
pub fn ensure_eager_signal_handlers() {
let mut locked = EAGER_INSTALL_STATE.write().unwrap();
let state = locked.borrow_mut();
if state.tried {
return;
}
state.tried = true;
assert!(state.success == false);
if !unsafe { EnsureEagerSignalHandlers() } {
return;
}
state.success = true;
}
#[cfg(any(target_os = "macos", target_os = "ios"))]
fn ensure_darwin_mach_ports() {
let mut locked = LAZY_INSTALL_STATE.write().unwrap();
let state = locked.borrow_mut();
if state.tried {
return;
}
state.tried = true;
assert!(state.success == false);
if !unsafe { EnsureDarwinMachPorts() } {
return;
}
state.success = true;
}
/// Assuming `EnsureEagerProcessSignalHandlers` has already been called,
/// this function performs the full installation of signal handlers which must
/// be performed per-thread. This operation may incur some overhead and
/// so should be done only when needed to use wasm.
pub fn ensure_full_signal_handlers(cx: &mut TrapContext) {
if cx.triedToInstallSignalHandlers {
return;
}
cx.triedToInstallSignalHandlers = true;
assert!(!cx.haveSignalHandlers);
{
let locked = EAGER_INSTALL_STATE.read().unwrap();
let state = locked.borrow();
assert!(state.tried);
if !state.success {
return;
}
}
#[cfg(any(target_os = "macos", target_os = "ios"))]
ensure_darwin_mach_ports();
cx.haveSignalHandlers = true;
}

View File

@@ -0,0 +1,102 @@
//! WebAssembly trap handling, which is built on top of the lower-level
//! signalhandling mechanisms.
use libc::c_int;
use signalhandlers::{jmp_buf, CodeSegment};
use std::cell::{Cell, RefCell};
use std::mem;
use std::ptr;
// Currently we uset setjmp/longjmp to unwind out of a signal handler
// and back to the point where WebAssembly was called (via `call_wasm`).
// This works because WebAssembly code currently does not use any EH
// or require any cleanups, and we never unwind through non-wasm frames.
// In the future, we'll likely replace this with fancier stack unwinding.
extern "C" {
fn setjmp(env: *mut jmp_buf) -> c_int;
fn longjmp(env: *const jmp_buf, val: c_int) -> !;
}
#[derive(Copy, Clone, Debug)]
struct TrapData {
pc: *const u8,
}
thread_local! {
static TRAP_DATA: Cell<TrapData> = Cell::new(TrapData { pc: ptr::null() });
static JMP_BUFS: RefCell<Vec<jmp_buf>> = RefCell::new(Vec::new());
}
/// Record the Trap code and wasm bytecode offset in TLS somewhere
#[doc(hidden)]
#[allow(non_snake_case)]
#[no_mangle]
pub extern "C" fn RecordTrap(pc: *const u8, _codeSegment: *const CodeSegment) {
// TODO: Look up the wasm bytecode offset and trap code and record them instead.
TRAP_DATA.with(|data| data.set(TrapData { pc }));
}
/// Initiate an unwind.
#[doc(hidden)]
#[allow(non_snake_case)]
#[no_mangle]
pub extern "C" fn Unwind() {
JMP_BUFS.with(|bufs| unsafe {
let buf = bufs.borrow_mut().pop().unwrap();
longjmp(&buf, 1);
})
}
/// Return the CodeSegment containing the given pc, if any exist in the process.
/// This method does not take a lock.
#[doc(hidden)]
#[allow(non_snake_case)]
#[no_mangle]
pub extern "C" fn LookupCodeSegment(_pc: *const ::std::os::raw::c_void) -> *const CodeSegment {
// TODO: Implement this.
unsafe { mem::transmute(-1isize) }
}
/// A simple guard to ensure that `JMP_BUFS` is reset when we're done.
struct ScopeGuard {
orig_num_bufs: usize,
}
impl ScopeGuard {
fn new() -> Self {
Self {
orig_num_bufs: JMP_BUFS.with(|bufs| bufs.borrow().len()),
}
}
}
impl Drop for ScopeGuard {
fn drop(&mut self) {
let orig_num_bufs = self.orig_num_bufs;
// TODO: Use `shrink_to` once it stablizes.
JMP_BUFS.with(|bufs| {
bufs.borrow_mut()
.resize(orig_num_bufs, unsafe { mem::uninitialized() })
});
}
}
/// Call the wasm function poined to by `f`.
pub fn call_wasm<F>(f: F) -> Result<(), String>
where
F: FnOnce(),
{
// In case wasm code calls Rust that panics and unwinds past this point,
// ensure that JMP_BUFS is unwound to its incoming state.
let _ = ScopeGuard::new();
JMP_BUFS.with(|bufs| {
let mut buf = unsafe { mem::uninitialized() };
if unsafe { setjmp(&mut buf) } != 0 {
return TRAP_DATA.with(|data| Err(format!("wasm trap at {:?}", data.get().pc)));
}
bufs.borrow_mut().push(buf);
f();
Ok(())
})
}