Refactor module instantiation in the runtime.

This commit refactors module instantiation in the runtime to allow for
different instance allocation strategy implementations.

It adds an `InstanceAllocator` trait with the current implementation put behind
the `OnDemandInstanceAllocator` struct.

The Wasmtime API has been updated to allow a `Config` to have an instance
allocation strategy set which will determine how instances get allocated.

This change is in preparation for an alternative *pooling* instance allocator
that can reserve all needed host process address space in advance.

This commit also makes changes to the `wasmtime_environ` crate to represent
compiled modules in a way that reduces copying at instantiation time.
This commit is contained in:
Peter Huene
2020-11-25 16:10:09 -08:00
parent 8854dec01d
commit b58afbf849
14 changed files with 829 additions and 686 deletions

View File

@@ -4,12 +4,11 @@
use crate::export::Export;
use crate::externref::{StackMapRegistry, VMExternRefActivationsTable};
use crate::imports::Imports;
use crate::memory::{DefaultMemoryCreator, RuntimeLinearMemory, RuntimeMemoryCreator};
use crate::memory::{RuntimeLinearMemory, RuntimeMemoryCreator};
use crate::table::{Table, TableElement};
use crate::traphandlers::Trap;
use crate::vmcontext::{
VMBuiltinFunctionsArray, VMCallerCheckedAnyfunc, VMContext, VMFunctionBody, VMFunctionImport,
VMBuiltinFunctionsArray, VMCallerCheckedAnyfunc, VMContext, VMFunctionImport,
VMGlobalDefinition, VMGlobalImport, VMInterrupts, VMMemoryDefinition, VMMemoryImport,
VMSharedSignatureIndex, VMTableDefinition, VMTableImport,
};
@@ -17,23 +16,24 @@ use crate::{ExportFunction, ExportGlobal, ExportMemory, ExportTable};
use indexmap::IndexMap;
use memoffset::offset_of;
use more_asserts::assert_lt;
use std::alloc::{self, Layout};
use std::alloc::Layout;
use std::any::Any;
use std::cell::RefCell;
use std::collections::HashMap;
use std::convert::TryFrom;
use std::ptr::NonNull;
use std::rc::Rc;
use std::sync::Arc;
use std::{mem, ptr, slice};
use thiserror::Error;
use wasmtime_environ::entity::{packed_option::ReservedValue, BoxedSlice, EntityRef, PrimaryMap};
use wasmtime_environ::entity::{packed_option::ReservedValue, BoxedSlice, EntityRef, EntitySet};
use wasmtime_environ::wasm::{
DataIndex, DefinedFuncIndex, DefinedGlobalIndex, DefinedMemoryIndex, DefinedTableIndex,
ElemIndex, EntityIndex, FuncIndex, GlobalIndex, GlobalInit, MemoryIndex, SignatureIndex,
TableElementType, TableIndex, WasmType,
DataIndex, DefinedGlobalIndex, DefinedMemoryIndex, DefinedTableIndex, ElemIndex, EntityIndex,
FuncIndex, GlobalIndex, MemoryIndex, TableElementType, TableIndex,
};
use wasmtime_environ::{ir, DataInitializer, Module, ModuleType, TableElements, VMOffsets};
use wasmtime_environ::{ir, Module, VMOffsets};
mod allocator;
pub use allocator::*;
/// Runtime representation of an instance value, which erases all `Instance`
/// information since instances are just a collection of values.
@@ -56,14 +56,13 @@ pub(crate) struct Instance {
/// WebAssembly table data.
tables: BoxedSlice<DefinedTableIndex, Table>,
/// Passive elements in this instantiation. As `elem.drop`s happen, these
/// entries get removed. A missing entry is considered equivalent to an
/// empty slice.
passive_elements: RefCell<HashMap<ElemIndex, Box<[*mut VMCallerCheckedAnyfunc]>>>,
/// Stores the dropped passive element segments in this instantiation by index.
/// If the index is present in the set, the segment has been dropped.
dropped_elements: RefCell<EntitySet<ElemIndex>>,
/// Passive data segments from our module. As `data.drop`s happen, entries
/// get removed. A missing entry is considered equivalent to an empty slice.
passive_data: RefCell<HashMap<DataIndex, Arc<[u8]>>>,
/// Stores the dropped passive data segments in this instantiation by index.
/// If the index is present in the set, the segment has been dropped.
dropped_data: RefCell<EntitySet<DataIndex>>,
/// Hosts can store arbitrary per-instance information here.
host_state: Box<dyn Any>,
@@ -551,11 +550,21 @@ impl Instance {
// https://webassembly.github.io/bulk-memory-operations/core/exec/instructions.html#exec-table-init
let table = self.get_table(table_index);
let passive_elements = self.passive_elements.borrow();
let elem = passive_elements
.get(&elem_index)
.map(|e| &**e)
.unwrap_or_else(|| &[]);
let elem_index = self.module.passive_elements_map.get(&elem_index);
let elem = match elem_index {
Some(index) => {
if self
.dropped_elements
.borrow()
.contains(ElemIndex::new(*index))
{
&[]
} else {
self.module.passive_elements[*index].as_ref()
}
}
None => &[],
};
if src
.checked_add(len)
@@ -567,8 +576,14 @@ impl Instance {
// TODO(#983): investigate replacing this get/set loop with a `memcpy`.
for (dst, src) in (dst..dst + len).zip(src..src + len) {
let elem = self
.get_caller_checked_anyfunc(elem[src as usize])
.map_or(ptr::null_mut(), |f: &VMCallerCheckedAnyfunc| {
f as *const VMCallerCheckedAnyfunc as *mut _
});
table
.set(dst, TableElement::FuncRef(elem[src as usize]))
.set(dst, TableElement::FuncRef(elem))
.expect("should never panic because we already did the bounds check above");
}
@@ -579,10 +594,14 @@ impl Instance {
pub(crate) fn elem_drop(&self, elem_index: ElemIndex) {
// https://webassembly.github.io/reference-types/core/exec/instructions.html#exec-elem-drop
let mut passive_elements = self.passive_elements.borrow_mut();
passive_elements.remove(&elem_index);
// Note that we don't check that we actually removed an element because
// dropping a non-passive element is a no-op (not a trap).
if let Some(index) = self.module.passive_elements_map.get(&elem_index) {
self.dropped_elements
.borrow_mut()
.insert(ElemIndex::new(*index));
}
// Note that we don't check that we actually removed a segment because
// dropping a non-passive segment is a no-op (not a trap).
}
/// Do a `memory.copy`
@@ -701,10 +720,17 @@ impl Instance {
// https://webassembly.github.io/bulk-memory-operations/core/exec/instructions.html#exec-memory-init
let memory = self.get_memory(memory_index);
let passive_data = self.passive_data.borrow();
let data = passive_data
.get(&data_index)
.map_or(&[][..], |data| &**data);
let data_index = self.module.passive_data_map.get(&data_index);
let data = match data_index {
Some(index) => {
if self.dropped_data.borrow().contains(DataIndex::new(*index)) {
&[]
} else {
self.module.passive_data[*index].as_ref()
}
}
None => &[],
};
if src
.checked_add(len)
@@ -729,8 +755,14 @@ impl Instance {
/// Drop the given data segment, truncating its length to zero.
pub(crate) fn data_drop(&self, data_index: DataIndex) {
let mut passive_data = self.passive_data.borrow_mut();
passive_data.remove(&data_index);
if let Some(index) = self.module.passive_data_map.get(&data_index) {
self.dropped_data
.borrow_mut()
.insert(DataIndex::new(*index));
}
// Note that we don't check that we actually removed a segment because
// dropping a non-passive segment is a no-op (not a trap).
}
/// Get a table by index regardless of whether it is locally-defined or an
@@ -780,197 +812,8 @@ pub struct InstanceHandle {
}
impl InstanceHandle {
/// Create a new `InstanceHandle` pointing at a new `Instance`.
///
/// # Unsafety
///
/// This method is not necessarily inherently unsafe to call, but in general
/// the APIs of an `Instance` are quite unsafe and have not been really
/// audited for safety that much. As a result the unsafety here on this
/// method is a low-overhead way of saying "this is an extremely unsafe type
/// to work with".
///
/// Extreme care must be taken when working with `InstanceHandle` and it's
/// recommended to have relatively intimate knowledge of how it works
/// internally if you'd like to do so. If possible it's recommended to use
/// the `wasmtime` crate API rather than this type since that is vetted for
/// safety.
///
/// It is your responsibility to ensure that the given raw
/// `externref_activations_table` and `stack_map_registry` outlive this
/// instance.
pub unsafe fn new(
module: Arc<Module>,
finished_functions: &PrimaryMap<DefinedFuncIndex, *mut [VMFunctionBody]>,
imports: Imports,
mem_creator: Option<&dyn RuntimeMemoryCreator>,
lookup_shared_signature: &dyn Fn(SignatureIndex) -> VMSharedSignatureIndex,
host_state: Box<dyn Any>,
interrupts: *const VMInterrupts,
externref_activations_table: *mut VMExternRefActivationsTable,
stack_map_registry: *mut StackMapRegistry,
) -> Result<Self, InstantiationError> {
debug_assert!(!externref_activations_table.is_null());
debug_assert!(!stack_map_registry.is_null());
let tables = create_tables(&module);
let memories = create_memories(&module, mem_creator.unwrap_or(&DefaultMemoryCreator {}))?;
let vmctx_tables = tables
.values()
.map(Table::vmtable)
.collect::<PrimaryMap<DefinedTableIndex, _>>()
.into_boxed_slice();
let vmctx_memories = memories
.values()
.map(|a| a.vmmemory())
.collect::<PrimaryMap<DefinedMemoryIndex, _>>()
.into_boxed_slice();
let vmctx_globals = create_globals(&module);
let offsets = VMOffsets::new(mem::size_of::<*const u8>() as u8, &module);
let passive_data = RefCell::new(module.passive_data.clone());
let handle = {
let instance = Instance {
module,
offsets,
memories,
tables,
passive_elements: Default::default(),
passive_data,
host_state,
vmctx: VMContext {},
};
let layout = instance.alloc_layout();
let instance_ptr = alloc::alloc(layout) as *mut Instance;
if instance_ptr.is_null() {
alloc::handle_alloc_error(layout);
}
ptr::write(instance_ptr, instance);
InstanceHandle {
instance: instance_ptr,
}
};
let instance = handle.instance();
let mut ptr = instance.signature_ids_ptr();
for sig in handle.module().types.values() {
*ptr = match sig {
ModuleType::Function(sig) => lookup_shared_signature(*sig),
_ => VMSharedSignatureIndex::new(u32::max_value()),
};
ptr = ptr.add(1);
}
debug_assert_eq!(imports.functions.len(), handle.module().num_imported_funcs);
ptr::copy(
imports.functions.as_ptr(),
instance.imported_functions_ptr() as *mut VMFunctionImport,
imports.functions.len(),
);
debug_assert_eq!(imports.tables.len(), handle.module().num_imported_tables);
ptr::copy(
imports.tables.as_ptr(),
instance.imported_tables_ptr() as *mut VMTableImport,
imports.tables.len(),
);
debug_assert_eq!(
imports.memories.len(),
handle.module().num_imported_memories
);
ptr::copy(
imports.memories.as_ptr(),
instance.imported_memories_ptr() as *mut VMMemoryImport,
imports.memories.len(),
);
debug_assert_eq!(imports.globals.len(), handle.module().num_imported_globals);
ptr::copy(
imports.globals.as_ptr(),
instance.imported_globals_ptr() as *mut VMGlobalImport,
imports.globals.len(),
);
ptr::copy(
vmctx_tables.values().as_slice().as_ptr(),
instance.tables_ptr() as *mut VMTableDefinition,
vmctx_tables.len(),
);
ptr::copy(
vmctx_memories.values().as_slice().as_ptr(),
instance.memories_ptr() as *mut VMMemoryDefinition,
vmctx_memories.len(),
);
ptr::copy(
vmctx_globals.values().as_slice().as_ptr(),
instance.globals_ptr() as *mut VMGlobalDefinition,
vmctx_globals.len(),
);
ptr::write(
instance.builtin_functions_ptr() as *mut VMBuiltinFunctionsArray,
VMBuiltinFunctionsArray::initialized(),
);
*instance.interrupts() = interrupts;
*instance.externref_activations_table() = externref_activations_table;
*instance.stack_map_registry() = stack_map_registry;
for (index, sig) in instance.module.functions.iter() {
let type_index = lookup_shared_signature(*sig);
let (func_ptr, vmctx) =
if let Some(def_index) = instance.module.defined_func_index(index) {
(
NonNull::new(finished_functions[def_index] as *mut _).unwrap(),
instance.vmctx_ptr(),
)
} else {
let import = instance.imported_function(index);
(import.body, import.vmctx)
};
ptr::write(
instance.anyfunc_ptr(index),
VMCallerCheckedAnyfunc {
func_ptr,
type_index,
vmctx,
},
);
}
// Perform infallible initialization in this constructor, while fallible
// initialization is deferred to the `initialize` method.
initialize_passive_elements(instance);
initialize_globals(instance);
Ok(handle)
}
/// Finishes the instantiation process started by `Instance::new`.
///
/// Only safe to call immediately after instantiation.
pub unsafe fn initialize(
&self,
is_bulk_memory: bool,
data_initializers: &[DataInitializer<'_>],
) -> Result<(), InstantiationError> {
// Check initializer bounds before initializing anything. Only do this
// when bulk memory is disabled, since the bulk memory proposal changes
// instantiation such that the intermediate results of failed
// initializations are visible.
if !is_bulk_memory {
check_table_init_bounds(self.instance())?;
check_memory_init_bounds(self.instance(), data_initializers)?;
}
// Apply fallible initializers. Note that this can "leak" state even if
// it fails.
initialize_tables(self.instance())?;
initialize_memories(self.instance(), data_initializers)?;
Ok(())
pub(crate) unsafe fn new(instance: *mut Instance) -> Self {
Self { instance }
}
/// Create a new `InstanceHandle` pointing at the instance
@@ -1126,305 +969,4 @@ impl InstanceHandle {
instance: self.instance,
}
}
/// Deallocates memory associated with this instance.
///
/// Note that this is unsafe because there might be other handles to this
/// `InstanceHandle` elsewhere, and there's nothing preventing usage of
/// this handle after this function is called.
pub unsafe fn dealloc(&self) {
let instance = self.instance();
let layout = instance.alloc_layout();
ptr::drop_in_place(self.instance);
alloc::dealloc(self.instance.cast(), layout);
}
}
fn check_table_init_bounds(instance: &Instance) -> Result<(), InstantiationError> {
for init in &instance.module().table_elements {
let start = get_table_init_start(init, instance);
let table = instance.get_table(init.table_index);
let size = usize::try_from(table.size()).unwrap();
if size < start + init.elements.len() {
return Err(InstantiationError::Link(LinkError(
"table out of bounds: elements segment does not fit".to_owned(),
)));
}
}
Ok(())
}
/// Compute the offset for a memory data initializer.
fn get_memory_init_start(init: &DataInitializer<'_>, instance: &Instance) -> usize {
let mut start = init.location.offset;
if let Some(base) = init.location.base {
let val = unsafe {
if let Some(def_index) = instance.module.defined_global_index(base) {
*instance.global(def_index).as_u32()
} else {
*(*instance.imported_global(base).from).as_u32()
}
};
start += usize::try_from(val).unwrap();
}
start
}
/// Return a byte-slice view of a memory's data.
unsafe fn get_memory_slice<'instance>(
init: &DataInitializer<'_>,
instance: &'instance Instance,
) -> &'instance mut [u8] {
let memory = if let Some(defined_memory_index) = instance
.module
.defined_memory_index(init.location.memory_index)
{
instance.memory(defined_memory_index)
} else {
let import = instance.imported_memory(init.location.memory_index);
let foreign_instance = (&mut *(import).vmctx).instance();
let foreign_memory = &mut *(import).from;
let foreign_index = foreign_instance.memory_index(foreign_memory);
foreign_instance.memory(foreign_index)
};
slice::from_raw_parts_mut(memory.base, memory.current_length)
}
fn check_memory_init_bounds(
instance: &Instance,
data_initializers: &[DataInitializer<'_>],
) -> Result<(), InstantiationError> {
for init in data_initializers {
let start = get_memory_init_start(init, instance);
unsafe {
let mem_slice = get_memory_slice(init, instance);
if mem_slice.get_mut(start..start + init.data.len()).is_none() {
return Err(InstantiationError::Link(LinkError(
"memory out of bounds: data segment does not fit".into(),
)));
}
}
}
Ok(())
}
/// Allocate memory for just the tables of the current module.
fn create_tables(module: &Module) -> BoxedSlice<DefinedTableIndex, Table> {
let num_imports = module.num_imported_tables;
let mut tables: PrimaryMap<DefinedTableIndex, _> =
PrimaryMap::with_capacity(module.table_plans.len() - num_imports);
for table in &module.table_plans.values().as_slice()[num_imports..] {
tables.push(Table::new(table));
}
tables.into_boxed_slice()
}
/// Compute the offset for a table element initializer.
fn get_table_init_start(init: &TableElements, instance: &Instance) -> usize {
let mut start = init.offset;
if let Some(base) = init.base {
let val = unsafe {
if let Some(def_index) = instance.module.defined_global_index(base) {
*instance.global(def_index).as_u32()
} else {
*(*instance.imported_global(base).from).as_u32()
}
};
start += usize::try_from(val).unwrap();
}
start
}
/// Initialize the table memory from the provided initializers.
fn initialize_tables(instance: &Instance) -> Result<(), InstantiationError> {
for init in &instance.module().table_elements {
let start = get_table_init_start(init, instance);
let table = instance.get_table(init.table_index);
if start
.checked_add(init.elements.len())
.map_or(true, |end| end > table.size() as usize)
{
return Err(InstantiationError::Trap(Trap::wasm(
ir::TrapCode::TableOutOfBounds,
)));
}
for (i, func_idx) in init.elements.iter().enumerate() {
let item = match table.element_type() {
TableElementType::Func => instance
.get_caller_checked_anyfunc(*func_idx)
.map_or(ptr::null_mut(), |f: &VMCallerCheckedAnyfunc| {
f as *const VMCallerCheckedAnyfunc as *mut VMCallerCheckedAnyfunc
})
.into(),
TableElementType::Val(_) => {
assert!(*func_idx == FuncIndex::reserved_value());
TableElement::ExternRef(None)
}
};
table.set(u32::try_from(start + i).unwrap(), item).unwrap();
}
}
Ok(())
}
/// Initialize the `Instance::passive_elements` map by resolving the
/// `Module::passive_elements`'s `FuncIndex`s into `VMCallerCheckedAnyfunc`s for
/// this instance.
fn initialize_passive_elements(instance: &Instance) {
let mut passive_elements = instance.passive_elements.borrow_mut();
debug_assert!(
passive_elements.is_empty(),
"should only be called once, at initialization time"
);
passive_elements.extend(
instance
.module
.passive_elements
.iter()
.filter(|(_, segments)| !segments.is_empty())
.map(|(idx, segments)| {
(
*idx,
segments
.iter()
.map(|s| {
instance.get_caller_checked_anyfunc(*s).map_or(
ptr::null_mut(),
|f: &VMCallerCheckedAnyfunc| {
f as *const VMCallerCheckedAnyfunc as *mut _
},
)
})
.collect(),
)
}),
);
}
/// Allocate memory for just the memories of the current module.
fn create_memories(
module: &Module,
mem_creator: &dyn RuntimeMemoryCreator,
) -> Result<BoxedSlice<DefinedMemoryIndex, Box<dyn RuntimeLinearMemory>>, InstantiationError> {
let num_imports = module.num_imported_memories;
let mut memories: PrimaryMap<DefinedMemoryIndex, _> =
PrimaryMap::with_capacity(module.memory_plans.len() - num_imports);
for plan in &module.memory_plans.values().as_slice()[num_imports..] {
memories.push(
mem_creator
.new_memory(plan)
.map_err(InstantiationError::Resource)?,
);
}
Ok(memories.into_boxed_slice())
}
/// Initialize the table memory from the provided initializers.
fn initialize_memories(
instance: &Instance,
data_initializers: &[DataInitializer<'_>],
) -> Result<(), InstantiationError> {
for init in data_initializers {
let memory = instance.get_memory(init.location.memory_index);
let start = get_memory_init_start(init, instance);
if start
.checked_add(init.data.len())
.map_or(true, |end| end > memory.current_length)
{
return Err(InstantiationError::Trap(Trap::wasm(
ir::TrapCode::HeapOutOfBounds,
)));
}
unsafe {
let mem_slice = get_memory_slice(init, instance);
let end = start + init.data.len();
let to_init = &mut mem_slice[start..end];
to_init.copy_from_slice(init.data);
}
}
Ok(())
}
/// Allocate memory for just the globals of the current module,
/// with initializers applied.
fn create_globals(module: &Module) -> BoxedSlice<DefinedGlobalIndex, VMGlobalDefinition> {
let num_imports = module.num_imported_globals;
let mut vmctx_globals = PrimaryMap::with_capacity(module.globals.len() - num_imports);
for _ in &module.globals.values().as_slice()[num_imports..] {
vmctx_globals.push(VMGlobalDefinition::new());
}
vmctx_globals.into_boxed_slice()
}
fn initialize_globals(instance: &Instance) {
let module = instance.module();
let num_imports = module.num_imported_globals;
for (index, global) in module.globals.iter().skip(num_imports) {
let def_index = module.defined_global_index(index).unwrap();
unsafe {
let to = instance.global_ptr(def_index);
match global.initializer {
GlobalInit::I32Const(x) => *(*to).as_i32_mut() = x,
GlobalInit::I64Const(x) => *(*to).as_i64_mut() = x,
GlobalInit::F32Const(x) => *(*to).as_f32_bits_mut() = x,
GlobalInit::F64Const(x) => *(*to).as_f64_bits_mut() = x,
GlobalInit::V128Const(x) => *(*to).as_u128_bits_mut() = x.0,
GlobalInit::GetGlobal(x) => {
let from = if let Some(def_x) = module.defined_global_index(x) {
instance.global(def_x)
} else {
*instance.imported_global(x).from
};
*to = from;
}
GlobalInit::RefFunc(f) => {
*(*to).as_anyfunc_mut() = instance.get_caller_checked_anyfunc(f).unwrap()
as *const VMCallerCheckedAnyfunc;
}
GlobalInit::RefNullConst => match global.wasm_ty {
WasmType::FuncRef => *(*to).as_anyfunc_mut() = ptr::null(),
WasmType::ExternRef => *(*to).as_externref_mut() = None,
ty => panic!("unsupported reference type for global: {:?}", ty),
},
GlobalInit::Import => panic!("locally-defined global initialized as import"),
}
}
}
}
/// An link error while instantiating a module.
#[derive(Error, Debug)]
#[error("Link error: {0}")]
pub struct LinkError(pub String);
/// An error while instantiating a module.
#[derive(Error, Debug)]
pub enum InstantiationError {
/// Insufficient resources available for execution.
#[error("Insufficient resources: {0}")]
Resource(String),
/// A wasm link error occured.
#[error("Failed to link module")]
Link(#[from] LinkError),
/// A trap ocurred during instantiation, after linking.
#[error("Trap occurred during instantiation")]
Trap(Trap),
}