* Validate modules while translating This commit is a change to cranelift-wasm to validate each function body as it is translated. Additionally top-level module translation functions will perform module validation. This commit builds on changes in wasmparser to perform module validation interwtwined with parsing and translation. This will be necessary for future wasm features such as module linking where the type behind a function index, for example, can be far away in another module. Additionally this also brings a nice benefit where parsing the binary only happens once (instead of having an up-front serial validation step) and validation can happen in parallel for each function. Most of the changes in this commit are plumbing to make sure everything lines up right. The major functional change here is that module compilation should be faster by validating in parallel (or skipping function validation entirely in the case of a cache hit). Otherwise from a user-facing perspective nothing should be that different. This commit does mean that cranelift's translation now inherently validates the input wasm module. This means that the Spidermonkey integration of cranelift-wasm will also be validating the function as it's being translated with cranelift. The associated PR for wasmparser (bytecodealliance/wasmparser#62) provides the necessary tools to create a `FuncValidator` for Gecko, but this is something I'll want careful review for before landing! * Read function operators until EOF This way we can let the validator take care of any issues with mismatched `end` instructions and/or trailing operators/bytes.
442 lines
14 KiB
Rust
442 lines
14 KiB
Rust
//! Define the `instantiate` function, which takes a byte array containing an
|
|
//! encoded wasm module and returns a live wasm instance. Also, define
|
|
//! `CompiledModule` to allow compiling and instantiating to be done as separate
|
|
//! steps.
|
|
|
|
use crate::code_memory::CodeMemory;
|
|
use crate::compiler::{Compilation, Compiler};
|
|
use crate::link::link_module;
|
|
use crate::object::ObjectUnwindInfo;
|
|
use object::File as ObjectFile;
|
|
use serde::{Deserialize, Serialize};
|
|
use std::any::Any;
|
|
use std::collections::HashMap;
|
|
use std::sync::Arc;
|
|
use thiserror::Error;
|
|
use wasmtime_debug::create_gdbjit_image;
|
|
use wasmtime_environ::entity::{BoxedSlice, PrimaryMap};
|
|
use wasmtime_environ::isa::TargetIsa;
|
|
use wasmtime_environ::wasm::{DefinedFuncIndex, SignatureIndex};
|
|
use wasmtime_environ::{
|
|
CompileError, DataInitializer, DataInitializerLocation, FunctionAddressMap, Module,
|
|
ModuleEnvironment, ModuleTranslation, StackMapInformation, TrapInformation,
|
|
};
|
|
use wasmtime_profiling::ProfilingAgent;
|
|
use wasmtime_runtime::{
|
|
GdbJitImageRegistration, Imports, InstanceHandle, InstantiationError, RuntimeMemoryCreator,
|
|
SignatureRegistry, StackMapRegistry, VMExternRefActivationsTable, VMFunctionBody, VMInterrupts,
|
|
VMTrampoline,
|
|
};
|
|
|
|
/// An error condition while setting up a wasm instance, be it validation,
|
|
/// compilation, or instantiation.
|
|
#[derive(Error, Debug)]
|
|
pub enum SetupError {
|
|
/// The module did not pass validation.
|
|
#[error("Validation error: {0}")]
|
|
Validate(String),
|
|
|
|
/// A wasm translation error occured.
|
|
#[error("WebAssembly failed to compile")]
|
|
Compile(#[from] CompileError),
|
|
|
|
/// Some runtime resource was unavailable or insufficient, or the start function
|
|
/// trapped.
|
|
#[error("Instantiation failed during setup")]
|
|
Instantiate(#[from] InstantiationError),
|
|
|
|
/// Debug information generation error occured.
|
|
#[error("Debug information error")]
|
|
DebugInfo(#[from] anyhow::Error),
|
|
}
|
|
|
|
/// Contains all compilation artifacts.
|
|
#[derive(Serialize, Deserialize)]
|
|
pub struct CompilationArtifacts {
|
|
/// Module metadata.
|
|
module: Module,
|
|
|
|
/// ELF image with functions code.
|
|
obj: Box<[u8]>,
|
|
|
|
/// Unwind information for function code.
|
|
unwind_info: Box<[ObjectUnwindInfo]>,
|
|
|
|
/// Data initiailizers.
|
|
data_initializers: Box<[OwnedDataInitializer]>,
|
|
|
|
/// Descriptions of compiled functions
|
|
funcs: PrimaryMap<DefinedFuncIndex, FunctionInfo>,
|
|
|
|
/// Debug info presence flags.
|
|
debug_info: bool,
|
|
}
|
|
|
|
#[derive(Serialize, Deserialize, Clone)]
|
|
struct FunctionInfo {
|
|
traps: Vec<TrapInformation>,
|
|
address_map: FunctionAddressMap,
|
|
stack_maps: Vec<StackMapInformation>,
|
|
}
|
|
|
|
impl CompilationArtifacts {
|
|
/// Builds compilation artifacts.
|
|
pub fn build(compiler: &Compiler, data: &[u8]) -> Result<Self, SetupError> {
|
|
let environ = ModuleEnvironment::new(
|
|
compiler.frontend_config(),
|
|
compiler.tunables(),
|
|
compiler.features(),
|
|
);
|
|
|
|
let mut translation = environ
|
|
.translate(data)
|
|
.map_err(|error| SetupError::Compile(CompileError::Wasm(error)))?;
|
|
|
|
let Compilation {
|
|
obj,
|
|
unwind_info,
|
|
funcs,
|
|
} = compiler.compile(&mut translation)?;
|
|
|
|
let ModuleTranslation {
|
|
module,
|
|
data_initializers,
|
|
..
|
|
} = translation;
|
|
|
|
let data_initializers = data_initializers
|
|
.into_iter()
|
|
.map(OwnedDataInitializer::new)
|
|
.collect::<Vec<_>>()
|
|
.into_boxed_slice();
|
|
|
|
let obj = obj.write().map_err(|_| {
|
|
SetupError::Instantiate(InstantiationError::Resource(
|
|
"failed to create image memory".to_string(),
|
|
))
|
|
})?;
|
|
|
|
Ok(Self {
|
|
module,
|
|
obj: obj.into_boxed_slice(),
|
|
unwind_info: unwind_info.into_boxed_slice(),
|
|
data_initializers,
|
|
funcs: funcs
|
|
.into_iter()
|
|
.map(|(_, func)| FunctionInfo {
|
|
stack_maps: func.stack_maps,
|
|
traps: func.traps,
|
|
address_map: func.address_map,
|
|
})
|
|
.collect(),
|
|
debug_info: compiler.tunables().debug_info,
|
|
})
|
|
}
|
|
}
|
|
|
|
struct FinishedFunctions(BoxedSlice<DefinedFuncIndex, *mut [VMFunctionBody]>);
|
|
|
|
unsafe impl Send for FinishedFunctions {}
|
|
unsafe impl Sync for FinishedFunctions {}
|
|
|
|
/// Container for data needed for an Instance function to exist.
|
|
pub struct ModuleCode {
|
|
code_memory: CodeMemory,
|
|
#[allow(dead_code)]
|
|
dbg_jit_registration: Option<GdbJitImageRegistration>,
|
|
}
|
|
|
|
/// A compiled wasm module, ready to be instantiated.
|
|
pub struct CompiledModule {
|
|
module: Arc<Module>,
|
|
code: Arc<ModuleCode>,
|
|
finished_functions: FinishedFunctions,
|
|
trampolines: PrimaryMap<SignatureIndex, VMTrampoline>,
|
|
data_initializers: Box<[OwnedDataInitializer]>,
|
|
funcs: PrimaryMap<DefinedFuncIndex, FunctionInfo>,
|
|
obj: Box<[u8]>,
|
|
unwind_info: Box<[ObjectUnwindInfo]>,
|
|
}
|
|
|
|
impl CompiledModule {
|
|
/// Compile a data buffer into a `CompiledModule`, which may then be instantiated.
|
|
pub fn new<'data>(
|
|
compiler: &Compiler,
|
|
data: &'data [u8],
|
|
profiler: &dyn ProfilingAgent,
|
|
) -> Result<Self, SetupError> {
|
|
let artifacts = CompilationArtifacts::build(compiler, data)?;
|
|
Self::from_artifacts(artifacts, compiler.isa(), profiler)
|
|
}
|
|
|
|
/// Creates `CompiledModule` directly from `CompilationArtifacts`.
|
|
pub fn from_artifacts(
|
|
artifacts: CompilationArtifacts,
|
|
isa: &dyn TargetIsa,
|
|
profiler: &dyn ProfilingAgent,
|
|
) -> Result<Self, SetupError> {
|
|
let CompilationArtifacts {
|
|
module,
|
|
obj,
|
|
unwind_info,
|
|
data_initializers,
|
|
funcs,
|
|
debug_info,
|
|
} = artifacts;
|
|
|
|
// Allocate all of the compiled functions into executable memory,
|
|
// copying over their contents.
|
|
let (code_memory, code_range, finished_functions, trampolines) =
|
|
build_code_memory(isa, &obj, &module, &unwind_info).map_err(|message| {
|
|
SetupError::Instantiate(InstantiationError::Resource(format!(
|
|
"failed to build code memory for functions: {}",
|
|
message
|
|
)))
|
|
})?;
|
|
|
|
// Register GDB JIT images; initialize profiler and load the wasm module.
|
|
let dbg_jit_registration = if debug_info {
|
|
let bytes = create_dbg_image(obj.to_vec(), code_range, &module, &finished_functions)?;
|
|
|
|
profiler.module_load(&module, &finished_functions, Some(&bytes));
|
|
|
|
let reg = GdbJitImageRegistration::register(bytes);
|
|
Some(reg)
|
|
} else {
|
|
profiler.module_load(&module, &finished_functions, None);
|
|
None
|
|
};
|
|
|
|
let finished_functions = FinishedFunctions(finished_functions.into_boxed_slice());
|
|
|
|
Ok(Self {
|
|
module: Arc::new(module),
|
|
code: Arc::new(ModuleCode {
|
|
code_memory,
|
|
dbg_jit_registration,
|
|
}),
|
|
finished_functions,
|
|
trampolines,
|
|
data_initializers,
|
|
funcs,
|
|
obj,
|
|
unwind_info,
|
|
})
|
|
}
|
|
|
|
/// Extracts `CompilationArtifacts` from the compiled module.
|
|
pub fn to_compilation_artifacts(&self) -> CompilationArtifacts {
|
|
CompilationArtifacts {
|
|
module: (*self.module).clone(),
|
|
obj: self.obj.clone(),
|
|
unwind_info: self.unwind_info.clone(),
|
|
data_initializers: self.data_initializers.clone(),
|
|
funcs: self.funcs.clone(),
|
|
debug_info: self.code.dbg_jit_registration.is_some(),
|
|
}
|
|
}
|
|
|
|
/// Crate an `Instance` from this `CompiledModule`.
|
|
///
|
|
/// Note that if only one instance of this module is needed, it may be more
|
|
/// efficient to call the top-level `instantiate`, since that avoids copying
|
|
/// the data initializers.
|
|
///
|
|
/// # Unsafety
|
|
///
|
|
/// See `InstanceHandle::new`
|
|
pub unsafe fn instantiate(
|
|
&self,
|
|
imports: Imports<'_>,
|
|
signature_registry: &mut SignatureRegistry,
|
|
mem_creator: Option<&dyn RuntimeMemoryCreator>,
|
|
interrupts: Arc<VMInterrupts>,
|
|
host_state: Box<dyn Any>,
|
|
externref_activations_table: *mut VMExternRefActivationsTable,
|
|
stack_map_registry: *mut StackMapRegistry,
|
|
) -> Result<InstanceHandle, InstantiationError> {
|
|
// Compute indices into the shared signature table.
|
|
let signatures = {
|
|
self.module
|
|
.signatures
|
|
.values()
|
|
.map(|(wasm_sig, native)| {
|
|
signature_registry.register(wasm_sig.clone(), native.clone())
|
|
})
|
|
.collect::<PrimaryMap<_, _>>()
|
|
};
|
|
|
|
let mut trampolines = HashMap::new();
|
|
for (i, trampoline) in self.trampolines.iter() {
|
|
trampolines.insert(signatures[i], trampoline.clone());
|
|
}
|
|
|
|
let finished_functions = self.finished_functions.0.clone();
|
|
|
|
InstanceHandle::new(
|
|
self.module.clone(),
|
|
self.code.clone(),
|
|
finished_functions,
|
|
trampolines,
|
|
imports,
|
|
mem_creator,
|
|
signatures.into_boxed_slice(),
|
|
host_state,
|
|
interrupts,
|
|
externref_activations_table,
|
|
stack_map_registry,
|
|
)
|
|
}
|
|
|
|
/// Returns data initializers to pass to `InstanceHandle::initialize`
|
|
pub fn data_initializers(&self) -> Vec<DataInitializer<'_>> {
|
|
self.data_initializers
|
|
.iter()
|
|
.map(|init| DataInitializer {
|
|
location: init.location.clone(),
|
|
data: &*init.data,
|
|
})
|
|
.collect()
|
|
}
|
|
|
|
/// Return a reference-counting pointer to a module.
|
|
pub fn module(&self) -> &Arc<Module> {
|
|
&self.module
|
|
}
|
|
|
|
/// Return a reference to a mutable module (if possible).
|
|
pub fn module_mut(&mut self) -> Option<&mut Module> {
|
|
Arc::get_mut(&mut self.module)
|
|
}
|
|
|
|
/// Returns the map of all finished JIT functions compiled for this module
|
|
pub fn finished_functions(&self) -> &BoxedSlice<DefinedFuncIndex, *mut [VMFunctionBody]> {
|
|
&self.finished_functions.0
|
|
}
|
|
|
|
/// Returns the stack map information for all functions defined in this
|
|
/// module.
|
|
///
|
|
/// The iterator returned iterates over the span of the compiled function in
|
|
/// memory with the stack maps associated with those bytes.
|
|
pub fn stack_maps(
|
|
&self,
|
|
) -> impl Iterator<Item = (*mut [VMFunctionBody], &[StackMapInformation])> {
|
|
self.finished_functions()
|
|
.values()
|
|
.copied()
|
|
.zip(self.funcs.values().map(|f| f.stack_maps.as_slice()))
|
|
}
|
|
|
|
/// Iterates over all functions in this module, returning information about
|
|
/// how to decode traps which happen in the function.
|
|
pub fn trap_information(
|
|
&self,
|
|
) -> impl Iterator<
|
|
Item = (
|
|
DefinedFuncIndex,
|
|
*mut [VMFunctionBody],
|
|
&[TrapInformation],
|
|
&FunctionAddressMap,
|
|
),
|
|
> {
|
|
self.finished_functions()
|
|
.iter()
|
|
.zip(self.funcs.values())
|
|
.map(|((i, alloc), func)| (i, *alloc, func.traps.as_slice(), &func.address_map))
|
|
}
|
|
|
|
/// Returns all ranges convered by JIT code.
|
|
pub fn jit_code_ranges<'a>(&'a self) -> impl Iterator<Item = (usize, usize)> + 'a {
|
|
self.code.code_memory.published_ranges()
|
|
}
|
|
|
|
/// Returns module's JIT code.
|
|
pub fn code(&self) -> &Arc<ModuleCode> {
|
|
&self.code
|
|
}
|
|
}
|
|
|
|
/// Similar to `DataInitializer`, but owns its own copy of the data rather
|
|
/// than holding a slice of the original module.
|
|
#[derive(Clone, Serialize, Deserialize)]
|
|
pub struct OwnedDataInitializer {
|
|
/// The location where the initialization is to be performed.
|
|
location: DataInitializerLocation,
|
|
|
|
/// The initialization data.
|
|
data: Box<[u8]>,
|
|
}
|
|
|
|
impl OwnedDataInitializer {
|
|
fn new(borrowed: DataInitializer<'_>) -> Self {
|
|
Self {
|
|
location: borrowed.location.clone(),
|
|
data: borrowed.data.to_vec().into_boxed_slice(),
|
|
}
|
|
}
|
|
}
|
|
|
|
fn create_dbg_image(
|
|
obj: Vec<u8>,
|
|
code_range: (*const u8, usize),
|
|
module: &Module,
|
|
finished_functions: &PrimaryMap<DefinedFuncIndex, *mut [VMFunctionBody]>,
|
|
) -> Result<Vec<u8>, SetupError> {
|
|
let funcs = finished_functions
|
|
.values()
|
|
.map(|allocated: &*mut [VMFunctionBody]| (*allocated) as *const u8)
|
|
.collect::<Vec<_>>();
|
|
create_gdbjit_image(obj, code_range, module.num_imported_funcs, &funcs)
|
|
.map_err(SetupError::DebugInfo)
|
|
}
|
|
|
|
fn build_code_memory(
|
|
isa: &dyn TargetIsa,
|
|
obj: &[u8],
|
|
module: &Module,
|
|
unwind_info: &Box<[ObjectUnwindInfo]>,
|
|
) -> Result<
|
|
(
|
|
CodeMemory,
|
|
(*const u8, usize),
|
|
PrimaryMap<DefinedFuncIndex, *mut [VMFunctionBody]>,
|
|
PrimaryMap<SignatureIndex, VMTrampoline>,
|
|
),
|
|
String,
|
|
> {
|
|
let obj = ObjectFile::parse(obj).map_err(|_| "Unable to read obj".to_string())?;
|
|
|
|
let mut code_memory = CodeMemory::new();
|
|
|
|
let allocation = code_memory.allocate_for_object(&obj, unwind_info)?;
|
|
|
|
// Second, create a PrimaryMap from result vector of pointers.
|
|
let mut finished_functions = PrimaryMap::new();
|
|
for (i, fat_ptr) in allocation.funcs() {
|
|
let fat_ptr: *mut [VMFunctionBody] = fat_ptr;
|
|
assert_eq!(
|
|
Some(finished_functions.push(fat_ptr)),
|
|
module.defined_func_index(i)
|
|
);
|
|
}
|
|
|
|
let mut trampolines = PrimaryMap::new();
|
|
for (i, fat_ptr) in allocation.trampolines() {
|
|
let fat_ptr =
|
|
unsafe { std::mem::transmute::<*const VMFunctionBody, VMTrampoline>(fat_ptr.as_ptr()) };
|
|
assert_eq!(trampolines.push(fat_ptr), i);
|
|
}
|
|
|
|
let code_range = allocation.code_range();
|
|
|
|
link_module(&obj, &module, code_range, &finished_functions);
|
|
|
|
let code_range = (code_range.as_ptr(), code_range.len());
|
|
|
|
// Make all code compiled thus far executable.
|
|
code_memory.publish(isa);
|
|
|
|
Ok((code_memory, code_range, finished_functions, trampolines))
|
|
}
|