Add a pooling allocator mode based on copy-on-write mappings of memfds.

As first suggested by Jan on the Zulip here [1], a cheap and effective
way to obtain copy-on-write semantics of a "backing image" for a Wasm
memory is to mmap a file with `MAP_PRIVATE`. The `memfd` mechanism
provided by the Linux kernel allows us to create anonymous,
in-memory-only files that we can use for this mapping, so we can
construct the image contents on-the-fly then effectively create a CoW
overlay. Furthermore, and importantly, `madvise(MADV_DONTNEED, ...)`
will discard the CoW overlay, returning the mapping to its original
state.

By itself this is almost enough for a very fast
instantiation-termination loop of the same image over and over,
without changing the address space mapping at all (which is
expensive). The only missing bit is how to implement
heap *growth*. But here memfds can help us again: if we create another
anonymous file and map it where the extended parts of the heap would
go, we can take advantage of the fact that a `mmap()` mapping can
be *larger than the file itself*, with accesses beyond the end
generating a `SIGBUS`, and the fact that we can cheaply resize the
file with `ftruncate`, even after a mapping exists. So we can map the
"heap extension" file once with the maximum memory-slot size and grow
the memfd itself as `memory.grow` operations occur.

The above CoW technique and heap-growth technique together allow us a
fastpath of `madvise()` and `ftruncate()` only when we re-instantiate
the same module over and over, as long as we can reuse the same
slot. This fastpath avoids all whole-process address-space locks in
the Linux kernel, which should mean it is highly scalable. It also
avoids the cost of copying data on read, as the `uffd` heap backend
does when servicing pagefaults; the kernel's own optimized CoW
logic (same as used by all file mmaps) is used instead.

[1] https://bytecodealliance.zulipchat.com/#narrow/stream/206238-general/topic/Copy.20on.20write.20based.20instance.20reuse/near/266657772
This commit is contained in:
Chris Fallin
2022-01-18 16:42:24 -08:00
parent 90e7cef56c
commit b73ac83c37
26 changed files with 1070 additions and 135 deletions

View File

@@ -4,28 +4,37 @@ use crate::memory::{DefaultMemoryCreator, Memory};
use crate::table::Table;
use crate::traphandlers::Trap;
use crate::vmcontext::{
VMBuiltinFunctionsArray, VMCallerCheckedAnyfunc, VMContext, VMGlobalDefinition,
VMSharedSignatureIndex,
VMBuiltinFunctionsArray, VMCallerCheckedAnyfunc, VMGlobalDefinition, VMSharedSignatureIndex,
};
use crate::ModuleMemFds;
use crate::Store;
use anyhow::Result;
use std::alloc;
use std::any::Any;
use std::convert::TryFrom;
use std::marker;
use std::ptr::{self, NonNull};
use std::slice;
use std::sync::Arc;
use thiserror::Error;
use wasmtime_environ::{
DefinedFuncIndex, DefinedMemoryIndex, DefinedTableIndex, EntityRef, EntitySet, FunctionInfo,
GlobalInit, HostPtr, MemoryInitialization, MemoryInitializer, Module, ModuleType, PrimaryMap,
SignatureIndex, TableInitializer, TrapCode, VMOffsets, WasmType, WASM_PAGE_SIZE,
DefinedFuncIndex, DefinedMemoryIndex, DefinedTableIndex, EntityRef, FunctionInfo, GlobalInit,
MemoryInitialization, MemoryInitializer, Module, ModuleType, PrimaryMap, SignatureIndex,
TableInitializer, TrapCode, WasmType, WASM_PAGE_SIZE,
};
#[cfg(feature = "pooling-allocator")]
mod pooling;
#[cfg(feature = "memfd-allocator")]
mod memfd;
#[cfg(feature = "memfd-allocator")]
pub use self::memfd::MemFdSlot;
#[cfg(not(feature = "memfd-allocator"))]
mod memfd_disabled;
#[cfg(not(feature = "memfd-allocator"))]
pub use self::memfd_disabled::MemFdSlot;
#[cfg(feature = "pooling-allocator")]
pub use self::pooling::{
InstanceLimits, ModuleLimits, PoolingAllocationStrategy, PoolingInstanceAllocator,
@@ -39,6 +48,9 @@ pub struct InstanceAllocationRequest<'a> {
/// The base address of where JIT functions are located.
pub image_base: usize,
/// If using MemFD-based memories, the backing MemFDs.
pub memfds: Option<Arc<ModuleMemFds>>,
/// Descriptors about each compiled function, such as the offset from
/// `image_base`.
pub functions: &'a PrimaryMap<DefinedFuncIndex, FunctionInfo>,
@@ -376,9 +388,23 @@ fn check_memory_init_bounds(
fn initialize_memories(
instance: &mut Instance,
module: &Module,
initializers: &[MemoryInitializer],
) -> Result<(), InstantiationError> {
for init in initializers {
// Check whether this is a MemFD memory; if so, we can skip
// all initializers.
let memory = init.memory_index;
if let Some(defined_index) = module.defined_memory_index(memory) {
// We can only skip if there is actually a MemFD image. In
// some situations the MemFD image creation code will bail
// (e.g. due to an out of bounds data segment) and so we
// need to fall back on the usual initialization below.
if instance.memories[defined_index].is_memfd_with_image() {
continue;
}
}
instance
.memory_init_segment(
init.memory_index,
@@ -432,6 +458,14 @@ fn initialize_instance(
match &module.memory_initialization {
MemoryInitialization::Paged { map, out_of_bounds } => {
for (index, pages) in map {
// We can only skip if there is actually a MemFD image. In
// some situations the MemFD image creation code will bail
// (e.g. due to an out of bounds data segment) and so we
// need to fall back on the usual initialization below.
if instance.memories[index].is_memfd_with_image() {
continue;
}
let memory = instance.memory(index);
let slice =
unsafe { slice::from_raw_parts_mut(memory.base, memory.current_length) };
@@ -453,7 +487,7 @@ fn initialize_instance(
}
}
MemoryInitialization::Segmented(initializers) => {
initialize_memories(instance, initializers)?;
initialize_memories(instance, module, initializers)?;
}
}
@@ -691,19 +725,8 @@ unsafe impl InstanceAllocator for OnDemandInstanceAllocator {
let host_state = std::mem::replace(&mut req.host_state, Box::new(()));
let mut handle = {
let instance = Instance {
module: req.module.clone(),
offsets: VMOffsets::new(HostPtr, &req.module),
memories,
tables,
dropped_elements: EntitySet::with_capacity(req.module.passive_elements.len()),
dropped_data: EntitySet::with_capacity(req.module.passive_data_map.len()),
host_state,
wasm_data: &*req.wasm_data,
vmctx: VMContext {
_marker: marker::PhantomPinned,
},
};
let instance =
Instance::create_raw(&req.module, &*req.wasm_data, memories, tables, host_state);
let layout = instance.alloc_layout();
let instance_ptr = alloc::alloc(layout) as *mut Instance;
if instance_ptr.is_null() {

View File

@@ -0,0 +1,290 @@
//! memfd mapping logic for use by the pooling allocator.
use crate::memfd::MemoryMemFd;
use crate::InstantiationError;
use anyhow::Result;
use libc::c_void;
use rustix::fd::AsRawFd;
use std::convert::TryFrom;
use std::fs::File;
use std::sync::Arc;
/// A single slot handled by the memfd instance-heap mechanism.
///
/// The mmap scheme is:
///
/// base ==> (points here)
/// - (image.offset bytes) anonymous zero memory, pre-image
/// - (image.len bytes) CoW mapping of memfd heap image
/// - (up to extension_offset) anonymous zero memory, post-image
/// - (up to static_size) heap expansion region; CoW mapping of per-slot memfd
///
/// The ordering of mmaps to set this up is:
///
/// - once, when pooling allocator is created:
/// - one large mmap to create 8GiB * instances * memories slots
///
/// - per instantiation of new image in a slot:
/// - mmap of anonymous zero memory, from 0 to initial heap size
/// - mmap of CoW'd memfd image, from `image.offset` to
/// `image.offset + image.len`. This overwrites part of the
/// anonymous zero memory, potentially splitting it into a pre-
/// and post-region.
/// - mmap of CoW'd extension file, past the initial heap size up to
/// the end of the max memory size (just before the
/// post-guard). This is always adjacent to the above mmaps, but
/// does not overlap/overwrite them.
#[derive(Debug)]
pub struct MemFdSlot {
/// The base of the actual heap memory. Bytes at this address are
/// what is seen by the Wasm guest code.
base: usize,
/// The maximum static memory size, plus post-guard.
static_size: usize,
/// The memfd image that backs this memory. May be `None`, in
/// which case the memory is all zeroes.
pub(crate) image: Option<Arc<MemoryMemFd>>,
/// The offset at which the "extension file", which is used to
/// allow for efficient heap growth, is mapped. This is always
/// immediately after the end of the initial memory size.
extension_offset: usize,
/// The anonymous memfd, owned by this slot, which we mmap in the
/// area where the heap may grow during runtime. We use the
/// ftruncate() syscall (invoked via `File::set_len()`) to set its
/// size. We never write any data to it -- we CoW-map it so we can
/// throw away dirty data on termination. Instead, we just use its
/// size as a "watermark" that delineates the boundary between
/// safe-to-access memory and SIGBUS-causing memory. (This works
/// because one can mmap a file beyond its end, and is good
/// because ftruncate does not take the process-wide lock that
/// mmap and mprotect do.)
extension_file: File,
/// Whether this slot may have "dirty" pages (pages written by an
/// instantiation). Set by `instantiate()` and cleared by
/// `clear_and_remain_ready()`, and used in assertions to ensure
/// those methods are called properly.
dirty: bool,
}
impl MemFdSlot {
pub(crate) fn create(
base_addr: *mut c_void,
static_size: usize,
) -> Result<Self, InstantiationError> {
let base = base_addr as usize;
// Create a MemFD for the memory growth first -- this covers
// extended heap beyond the initial image.
let extension_memfd = memfd::MemfdOptions::new()
.allow_sealing(true)
.create("wasm-anonymous-heap")
.map_err(|e| InstantiationError::Resource(e.into()))?;
// Seal the ability to write the extension file (make it
// permanently read-only). This is a defense-in-depth
// mitigation to make extra-sure that we don't leak
// information between instantiations. See note in `memfd.rs`
// for more about why we use seals.
extension_memfd
.add_seal(memfd::FileSeal::SealWrite)
.map_err(|e| InstantiationError::Resource(e.into()))?;
extension_memfd
.add_seal(memfd::FileSeal::SealSeal)
.map_err(|e| InstantiationError::Resource(e.into()))?;
let extension_file = extension_memfd.into_file();
extension_file
.set_len(0)
.map_err(|e| InstantiationError::Resource(e.into()))?;
Ok(MemFdSlot {
base,
static_size,
image: None,
extension_file,
extension_offset: 0,
dirty: false,
})
}
pub(crate) fn set_heap_limit(&mut self, size_bytes: usize) -> Result<()> {
assert!(size_bytes >= self.extension_offset);
// This is all that is needed to make the new memory
// accessible; we don't need to mprotect anything. (The
// mapping itself is always R+W for the max possible heap
// size, and only the anonymous-backing file length catches
// out-of-bounds accesses.)
self.extension_file
.set_len(u64::try_from(size_bytes - self.extension_offset).unwrap())?;
Ok(())
}
pub(crate) fn instantiate(
&mut self,
initial_size_bytes: usize,
maybe_image: Option<&Arc<MemoryMemFd>>,
) -> Result<(), InstantiationError> {
assert!(!self.dirty);
if let Some(existing_image) = &self.image {
// Fast-path: previously instantiated with the same image,
// so the mappings are already correct; there is no need
// to mmap anything. Given that we asserted not-dirty
// above, any dirty pages will have already been thrown
// away by madvise() during the previous termination.
if let Some(image) = maybe_image {
if existing_image.fd.as_file().as_raw_fd() == image.fd.as_file().as_raw_fd() {
self.dirty = true;
return Ok(());
}
}
}
// Otherwise, we need to redo (i) the anonymous-mmap backing
// for the initial heap size, (ii) the extension-file backing,
// and (iii) the initial-heap-image mapping if present.
// Security/audit note: we map all of these MAP_PRIVATE, so
// all instance data is local to the mapping, not propagated
// to the backing fd. We throw away this CoW overlay with
// madvise() below, from base up to extension_offset (which is
// at least initial_size_bytes, and extended when the
// extension file is, so it covers all three mappings) when
// terminating the instance.
// Anonymous mapping behind the initial heap size: this gives
// zeroes for any "holes" in the initial heap image. Anonymous
// mmap memory is faster to fault in than a CoW of a file,
// even a file with zero holes, because the kernel's CoW path
// unconditionally copies *something* (even if just a page of
// zeroes). Anonymous zero pages are fast: the kernel
// pre-zeroes them, and even if it runs out of those, a memset
// is half as expensive as a memcpy (only writes, no reads).
if initial_size_bytes > 0 {
unsafe {
let ptr = rustix::io::mmap_anonymous(
self.base as *mut c_void,
initial_size_bytes,
rustix::io::ProtFlags::READ | rustix::io::ProtFlags::WRITE,
rustix::io::MapFlags::PRIVATE | rustix::io::MapFlags::FIXED,
)
.map_err(|e| InstantiationError::Resource(e.into()))?;
assert_eq!(ptr as usize, self.base);
}
}
// An "extension file": this allows us to grow the heap by
// doing just an ftruncate(), without changing any
// mappings. This is important to avoid the process-wide mmap
// lock on Linux.
self.extension_offset = initial_size_bytes;
let extension_map_len = self.static_size - initial_size_bytes;
if extension_map_len > 0 {
unsafe {
let fd = rustix::fd::BorrowedFd::borrow_raw_fd(self.extension_file.as_raw_fd());
let ptr = rustix::io::mmap(
(self.base + initial_size_bytes) as *mut c_void,
extension_map_len,
rustix::io::ProtFlags::READ | rustix::io::ProtFlags::WRITE,
rustix::io::MapFlags::PRIVATE | rustix::io::MapFlags::FIXED,
&fd,
0,
)
.map_err(|e| InstantiationError::Resource(e.into()))?;
assert_eq!(ptr as usize, self.base + initial_size_bytes);
}
}
// Finally, the initial memory image.
if let Some(image) = maybe_image {
if image.len > 0 {
let image = image.clone();
unsafe {
let fd = rustix::fd::BorrowedFd::borrow_raw_fd(image.fd.as_file().as_raw_fd());
let ptr = rustix::io::mmap(
(self.base + image.offset) as *mut c_void,
image.len,
rustix::io::ProtFlags::READ | rustix::io::ProtFlags::WRITE,
rustix::io::MapFlags::PRIVATE | rustix::io::MapFlags::FIXED,
&fd,
image.offset as u64,
)
.map_err(|e| InstantiationError::Resource(e.into()))?;
assert_eq!(ptr as usize, self.base + image.offset);
}
self.image = Some(image);
}
}
self.dirty = true;
Ok(())
}
pub(crate) fn clear_and_remain_ready(&mut self) -> Result<()> {
assert!(self.dirty);
// madvise the image range; that's it! This will throw away
// dirty pages, which are CoW-private pages on top of the
// initial heap image memfd.
unsafe {
rustix::io::madvise(
self.base as *mut c_void,
self.extension_offset,
rustix::io::Advice::LinuxDontNeed,
)?;
}
// truncate the extension file down to zero bytes to reset heap length.
self.extension_file
.set_len(0)
.map_err(|e| InstantiationError::Resource(e.into()))?;
self.dirty = false;
Ok(())
}
pub(crate) fn has_image(&self) -> bool {
self.image.is_some()
}
pub(crate) fn is_dirty(&self) -> bool {
self.dirty
}
}
#[cfg(feature = "memfd-allocator")]
impl Drop for MemFdSlot {
fn drop(&mut self) {
// The MemFdSlot may be dropped if there is an error during
// instantiation: for example, if a memory-growth limiter
// disallows a guest from having a memory of a certain size,
// after we've already initialized the MemFdSlot.
//
// We need to return this region of the large pool mmap to a
// safe state (with no module-specific mappings). The
// MemFdSlot will not be returned to the MemoryPool, so a new
// MemFdSlot will be created and overwrite the mappings anyway
// on the slot's next use; but for safety and to avoid
// resource leaks it's better not to have stale mappings to a
// possibly-otherwise-dead module's image.
//
// To "wipe the slate clean", let's do a mmap of anonymous
// memory over the whole region, with PROT_NONE. Note that we
// *can't* simply munmap, because that leaves a hole in the
// middle of the pooling allocator's big memory area that some
// other random mmap may swoop in and take, to be trampled
// over by the next MemFdSlot later.
//
// Since we're in drop(), we can't sanely return an error if
// this mmap fails. Let's ignore the failure if so; the next
// MemFdSlot to be created for this slot will try to overwrite
// the existing stale mappings, and return a failure properly
// if we still cannot map new memory.
unsafe {
let _ = rustix::io::mmap_anonymous(
self.base as *mut _,
self.static_size,
rustix::io::ProtFlags::empty(),
rustix::io::MapFlags::FIXED | rustix::io::MapFlags::NORESERVE,
);
}
}
}

View File

@@ -0,0 +1,49 @@
//! Shims for MemFdSlot when the memfd allocator is not
//! included. Enables unconditional use of the type and its methods
//! throughout higher-level code.
use crate::InstantiationError;
use anyhow::Result;
use std::sync::Arc;
/// A placeholder for MemFdSlot when we have not included the pooling
/// allocator.
///
/// To allow MemFdSlot to be unconditionally passed around in various
/// places (e.g. a `Memory`), we define a zero-sized type when memfd is
/// not included in the build.
#[cfg(not(feature = "memfd-allocator"))]
#[derive(Debug)]
pub struct MemFdSlot;
#[cfg(not(feature = "memfd-allocator"))]
#[allow(dead_code)]
impl MemFdSlot {
pub(crate) fn create(_: *mut libc::c_void, _: usize) -> Result<Self, InstantiationError> {
panic!("create() on invalid MemFdSlot");
}
pub(crate) fn instantiate(
&mut self,
_: usize,
_: Option<&Arc<crate::memfd::MemoryMemFd>>,
) -> Result<Self, InstantiationError> {
panic!("instantiate() on invalid MemFdSlot");
}
pub(crate) fn clear_and_remain_ready(&mut self) -> Result<()> {
Ok(())
}
pub(crate) fn has_image(&self) -> bool {
false
}
pub(crate) fn is_dirty(&self) -> bool {
false
}
pub(crate) fn set_heap_limit(&mut self, _: usize) -> Result<()> {
panic!("set_heap_limit on invalid MemFdSlot");
}
}

View File

@@ -7,19 +7,21 @@
//! Using the pooling instance allocator can speed up module instantiation
//! when modules can be constrained based on configurable limits.
use super::MemFdSlot;
use super::{
initialize_instance, initialize_vmcontext, InstanceAllocationRequest, InstanceAllocator,
InstanceHandle, InstantiationError,
};
use crate::{instance::Instance, Memory, Mmap, Table, VMContext};
use crate::{instance::Instance, Memory, Mmap, ModuleMemFds, Table};
use anyhow::{anyhow, bail, Context, Result};
use libc::c_void;
use rand::Rng;
use std::convert::TryFrom;
use std::marker;
use std::mem;
use std::sync::{Arc, Mutex};
use std::sync::Arc;
use std::sync::Mutex;
use wasmtime_environ::{
EntitySet, HostPtr, MemoryStyle, Module, PrimaryMap, Tunables, VMOffsets, VMOffsetsFields,
HostPtr, MemoryIndex, MemoryStyle, Module, PrimaryMap, Tunables, VMOffsets, VMOffsetsFields,
WASM_PAGE_SIZE,
};
@@ -284,7 +286,6 @@ struct InstancePool {
free_list: Mutex<Vec<usize>>,
memories: MemoryPool,
tables: TablePool,
empty_module: Arc<Module>,
}
impl InstancePool {
@@ -332,14 +333,8 @@ impl InstancePool {
free_list: Mutex::new((0..max_instances).collect()),
memories: MemoryPool::new(module_limits, instance_limits, tunables)?,
tables: TablePool::new(module_limits, instance_limits)?,
empty_module: Arc::new(Module::default()),
};
// Use a default module to initialize the instances to start
for i in 0..instance_limits.count as usize {
pool.initialize(module_limits, i);
}
Ok(pool)
}
@@ -348,41 +343,26 @@ impl InstancePool {
&mut *(self.mapping.as_mut_ptr().add(index * self.instance_size) as *mut Instance)
}
fn initialize(&self, limits: &ModuleLimits, index: usize) {
unsafe {
let instance = self.instance(index);
// Write a default instance with preallocated memory/table map storage to the ptr
std::ptr::write(
instance as _,
Instance {
module: self.empty_module.clone(),
offsets: VMOffsets::new(HostPtr, &self.empty_module),
memories: PrimaryMap::with_capacity(limits.memories as usize),
tables: PrimaryMap::with_capacity(limits.tables as usize),
dropped_elements: EntitySet::new(),
dropped_data: EntitySet::new(),
host_state: Box::new(()),
wasm_data: &[],
vmctx: VMContext {
_marker: marker::PhantomPinned,
},
},
);
}
}
unsafe fn setup_instance(
&self,
index: usize,
mut req: InstanceAllocationRequest,
) -> Result<InstanceHandle, InstantiationError> {
let instance = self.instance(index);
let host_state = std::mem::replace(&mut req.host_state, Box::new(()));
let instance_data = Instance::create_raw(
&req.module,
&*req.wasm_data,
PrimaryMap::default(),
PrimaryMap::default(),
host_state,
);
instance.module = req.module.clone();
instance.offsets = VMOffsets::new(HostPtr, instance.module.as_ref());
instance.host_state = std::mem::replace(&mut req.host_state, Box::new(()));
instance.wasm_data = &*req.wasm_data;
// Instances are uninitialized memory at first; we need to
// write an empty but initialized `Instance` struct into the
// chosen slot before we do anything else with it. (This is
// paired with a `drop_in_place` in deallocate below.)
let instance = self.instance(index);
std::ptr::write(instance as _, instance_data);
// set_instance_memories and _tables will need the store before we can completely
// initialize the vmcontext.
@@ -391,8 +371,10 @@ impl InstancePool {
}
Self::set_instance_memories(
index,
instance,
self.memories.get(index),
&self.memories,
&req.memfds,
self.memories.max_wasm_pages,
)?;
@@ -448,20 +430,44 @@ impl InstancePool {
let instance = unsafe { &mut *handle.instance };
// Decommit any linear memories that were used
for (memory, base) in instance.memories.values_mut().zip(self.memories.get(index)) {
for ((def_mem_idx, memory), base) in
instance.memories.iter_mut().zip(self.memories.get(index))
{
let mut memory = mem::take(memory);
debug_assert!(memory.is_static());
// Reset any faulted guard pages as the physical memory may be reused for another instance in the future
#[cfg(all(feature = "uffd", target_os = "linux"))]
memory
.reset_guard_pages()
.expect("failed to reset guard pages");
drop(&mut memory); // require mutable on all platforms, not just uffd
match memory {
Memory::Static {
memfd_slot: Some(mut memfd_slot),
..
} => {
let mem_idx = instance.module.memory_index(def_mem_idx);
// If there was any error clearing the memfd, just
// drop it here, and let the drop handler for the
// MemFdSlot unmap in a way that retains the
// address space reservation.
if memfd_slot.clear_and_remain_ready().is_ok() {
self.memories.return_memfd_slot(index, mem_idx, memfd_slot);
}
}
let size = memory.byte_size();
drop(memory);
decommit_memory_pages(base, size).expect("failed to decommit linear memory pages");
_ => {
// Reset any faulted guard pages as the physical
// memory may be reused for another instance in
// the future.
#[cfg(all(feature = "uffd", target_os = "linux"))]
memory
.reset_guard_pages()
.expect("failed to reset guard pages");
// require mutable on all platforms, not just uffd
drop(&mut memory);
let size = memory.byte_size();
drop(memory);
decommit_memory_pages(base, size)
.expect("failed to decommit linear memory pages");
}
}
}
instance.memories.clear();
@@ -481,50 +487,81 @@ impl InstancePool {
decommit_table_pages(base, size).expect("failed to decommit table pages");
}
instance.tables.clear();
instance.dropped_elements.clear();
// Drop all `global` values which need a destructor, such as externref
// values which now need their reference count dropped.
instance.drop_globals();
// Drop any host state
instance.host_state = Box::new(());
// And finally reset the module/offsets back to their original. This
// should put everything back in a relatively pristine state for each
// fresh allocation later on.
instance.module = self.empty_module.clone();
instance.offsets = VMOffsets::new(HostPtr, &self.empty_module);
instance.wasm_data = &[];
// We've now done all of the pooling-allocator-specific
// teardown, so we can drop the Instance and let destructors
// take care of any other fields (host state, globals, etc.).
unsafe {
std::ptr::drop_in_place(instance as *mut _);
}
// The instance is now uninitialized memory and cannot be
// touched again until we write a fresh Instance in-place with
// std::ptr::write in allocate() above.
self.free_list.lock().unwrap().push(index);
}
fn set_instance_memories(
instance_idx: usize,
instance: &mut Instance,
mut memories: impl Iterator<Item = *mut u8>,
memories: &MemoryPool,
maybe_memfds: &Option<Arc<ModuleMemFds>>,
max_pages: u64,
) -> Result<(), InstantiationError> {
let module = instance.module.as_ref();
debug_assert!(instance.memories.is_empty());
for plan in
(&module.memory_plans.values().as_slice()[module.num_imported_memories..]).iter()
for (memory_index, plan) in module
.memory_plans
.iter()
.skip(module.num_imported_memories)
{
let defined_index = module
.defined_memory_index(memory_index)
.expect("should be a defined memory since we skipped imported ones");
let memory = unsafe {
std::slice::from_raw_parts_mut(
memories.next().unwrap(),
memories.get_base(instance_idx, memory_index),
(max_pages as usize) * (WASM_PAGE_SIZE as usize),
)
};
instance.memories.push(
Memory::new_static(plan, memory, commit_memory_pages, unsafe {
&mut *instance.store()
})
.map_err(InstantiationError::Resource)?,
);
if let Some(memfds) = maybe_memfds {
let image = memfds.get_memory_image(defined_index);
let mut slot = memories.take_memfd_slot(instance_idx, memory_index)?;
let initial_size = plan.memory.minimum * WASM_PAGE_SIZE as u64;
// If instantiation fails, we can propagate the error
// upward and drop the slot. This will cause the Drop
// handler to attempt to map the range with PROT_NONE
// memory, to reserve the space while releasing any
// stale mappings. The next use of this slot will then
// create a new MemFdSlot that will try to map over
// this, returning errors as well if the mapping
// errors persist. The unmap-on-drop is best effort;
// if it fails, then we can still soundly continue
// using the rest of the pool and allowing the rest of
// the process to continue, because we never perform a
// mmap that would leave an open space for someone
// else to come in and map something.
slot.instantiate(initial_size as usize, image)
.map_err(|e| InstantiationError::Resource(e.into()))?;
instance.memories.push(
Memory::new_static(plan, memory, None, Some(slot), unsafe {
&mut *instance.store()
})
.map_err(InstantiationError::Resource)?,
);
} else {
instance.memories.push(
Memory::new_static(plan, memory, Some(commit_memory_pages), None, unsafe {
&mut *instance.store()
})
.map_err(InstantiationError::Resource)?,
);
}
}
debug_assert!(instance.dropped_data.is_empty());
@@ -566,17 +603,6 @@ impl InstancePool {
}
}
impl Drop for InstancePool {
fn drop(&mut self) {
unsafe {
for i in 0..self.max_instances {
let ptr = self.mapping.as_mut_ptr().add(i * self.instance_size) as *mut Instance;
std::ptr::drop_in_place(ptr);
}
}
}
}
/// Represents a pool of WebAssembly linear memories.
///
/// A linear memory is divided into accessible pages and guard pages.
@@ -589,6 +615,10 @@ impl Drop for InstancePool {
#[derive(Debug)]
struct MemoryPool {
mapping: Mmap,
// If using the memfd allocation scheme, the MemFd slots. We
// dynamically transfer ownership of a slot to a Memory when in
// use.
memfd_slots: Vec<Mutex<Option<MemFdSlot>>>,
// The size, in bytes, of each linear memory's reservation plus the guard
// region allocated for it.
memory_size: usize,
@@ -673,8 +703,18 @@ impl MemoryPool {
let mapping = Mmap::accessible_reserved(0, allocation_size)
.context("failed to create memory pool mapping")?;
let num_memfd_slots = if cfg!(feature = "memfd-allocator") {
max_instances * max_memories
} else {
0
};
let memfd_slots: Vec<_> = std::iter::repeat_with(|| Mutex::new(None))
.take(num_memfd_slots)
.collect();
let pool = Self {
mapping,
memfd_slots,
memory_size,
initial_memory_offset,
max_memories,
@@ -689,17 +729,43 @@ impl MemoryPool {
Ok(pool)
}
fn get(&self, instance_index: usize) -> impl Iterator<Item = *mut u8> {
fn get_base(&self, instance_index: usize, memory_index: MemoryIndex) -> *mut u8 {
debug_assert!(instance_index < self.max_instances);
let memory_index = memory_index.as_u32() as usize;
debug_assert!(memory_index < self.max_memories);
let idx = instance_index * self.max_memories + memory_index;
let offset = self.initial_memory_offset + idx * self.memory_size;
unsafe { self.mapping.as_mut_ptr().offset(offset as isize) }
}
let base: *mut u8 = unsafe {
self.mapping.as_mut_ptr().add(
self.initial_memory_offset + instance_index * self.memory_size * self.max_memories,
) as _
};
fn get<'a>(&'a self, instance_index: usize) -> impl Iterator<Item = *mut u8> + 'a {
(0..self.max_memories)
.map(move |i| self.get_base(instance_index, MemoryIndex::from_u32(i as u32)))
}
let size = self.memory_size;
(0..self.max_memories).map(move |i| unsafe { base.add(i * size) })
/// Take ownership of the given memfd slot. Must be returned via
/// `return_memfd_slot` when the instance is done using it.
fn take_memfd_slot(
&self,
instance_index: usize,
memory_index: MemoryIndex,
) -> Result<MemFdSlot, InstantiationError> {
let idx = instance_index * self.max_memories + (memory_index.as_u32() as usize);
let maybe_slot = self.memfd_slots[idx].lock().unwrap().take();
maybe_slot.map(|slot| Ok(slot)).unwrap_or_else(|| {
MemFdSlot::create(
self.get_base(instance_index, memory_index) as *mut c_void,
self.memory_size,
)
})
}
/// Return ownership of the given memfd slot.
fn return_memfd_slot(&self, instance_index: usize, memory_index: MemoryIndex, slot: MemFdSlot) {
assert!(!slot.is_dirty());
let idx = instance_index * self.max_memories + (memory_index.as_u32() as usize);
*self.memfd_slots[idx].lock().unwrap() = Some(slot);
}
}
@@ -1413,6 +1479,7 @@ mod test {
host_state: Box::new(()),
store: StorePtr::empty(),
wasm_data: &[],
memfds: None,
},
)
.expect("allocation should succeed"),
@@ -1437,6 +1504,7 @@ mod test {
host_state: Box::new(()),
store: StorePtr::empty(),
wasm_data: &[],
memfds: None,
},
) {
Err(InstantiationError::Limit(3)) => {}

View File

@@ -577,6 +577,7 @@ mod test {
PoolingAllocationStrategy::Random,
InstanceAllocationRequest {
module: module.clone(),
memfds: None,
image_base: 0,
functions,
imports: Imports {