Add a pooling allocator mode based on copy-on-write mappings of memfds.
As first suggested by Jan on the Zulip here [1], a cheap and effective way to obtain copy-on-write semantics of a "backing image" for a Wasm memory is to mmap a file with `MAP_PRIVATE`. The `memfd` mechanism provided by the Linux kernel allows us to create anonymous, in-memory-only files that we can use for this mapping, so we can construct the image contents on-the-fly then effectively create a CoW overlay. Furthermore, and importantly, `madvise(MADV_DONTNEED, ...)` will discard the CoW overlay, returning the mapping to its original state. By itself this is almost enough for a very fast instantiation-termination loop of the same image over and over, without changing the address space mapping at all (which is expensive). The only missing bit is how to implement heap *growth*. But here memfds can help us again: if we create another anonymous file and map it where the extended parts of the heap would go, we can take advantage of the fact that a `mmap()` mapping can be *larger than the file itself*, with accesses beyond the end generating a `SIGBUS`, and the fact that we can cheaply resize the file with `ftruncate`, even after a mapping exists. So we can map the "heap extension" file once with the maximum memory-slot size and grow the memfd itself as `memory.grow` operations occur. The above CoW technique and heap-growth technique together allow us a fastpath of `madvise()` and `ftruncate()` only when we re-instantiate the same module over and over, as long as we can reuse the same slot. This fastpath avoids all whole-process address-space locks in the Linux kernel, which should mean it is highly scalable. It also avoids the cost of copying data on read, as the `uffd` heap backend does when servicing pagefaults; the kernel's own optimized CoW logic (same as used by all file mmaps) is used instead. [1] https://bytecodealliance.zulipchat.com/#narrow/stream/206238-general/topic/Copy.20on.20write.20based.20instance.20reuse/near/266657772
This commit is contained in:
@@ -4,28 +4,37 @@ use crate::memory::{DefaultMemoryCreator, Memory};
|
||||
use crate::table::Table;
|
||||
use crate::traphandlers::Trap;
|
||||
use crate::vmcontext::{
|
||||
VMBuiltinFunctionsArray, VMCallerCheckedAnyfunc, VMContext, VMGlobalDefinition,
|
||||
VMSharedSignatureIndex,
|
||||
VMBuiltinFunctionsArray, VMCallerCheckedAnyfunc, VMGlobalDefinition, VMSharedSignatureIndex,
|
||||
};
|
||||
use crate::ModuleMemFds;
|
||||
use crate::Store;
|
||||
use anyhow::Result;
|
||||
use std::alloc;
|
||||
use std::any::Any;
|
||||
use std::convert::TryFrom;
|
||||
use std::marker;
|
||||
use std::ptr::{self, NonNull};
|
||||
use std::slice;
|
||||
use std::sync::Arc;
|
||||
use thiserror::Error;
|
||||
use wasmtime_environ::{
|
||||
DefinedFuncIndex, DefinedMemoryIndex, DefinedTableIndex, EntityRef, EntitySet, FunctionInfo,
|
||||
GlobalInit, HostPtr, MemoryInitialization, MemoryInitializer, Module, ModuleType, PrimaryMap,
|
||||
SignatureIndex, TableInitializer, TrapCode, VMOffsets, WasmType, WASM_PAGE_SIZE,
|
||||
DefinedFuncIndex, DefinedMemoryIndex, DefinedTableIndex, EntityRef, FunctionInfo, GlobalInit,
|
||||
MemoryInitialization, MemoryInitializer, Module, ModuleType, PrimaryMap, SignatureIndex,
|
||||
TableInitializer, TrapCode, WasmType, WASM_PAGE_SIZE,
|
||||
};
|
||||
|
||||
#[cfg(feature = "pooling-allocator")]
|
||||
mod pooling;
|
||||
|
||||
#[cfg(feature = "memfd-allocator")]
|
||||
mod memfd;
|
||||
#[cfg(feature = "memfd-allocator")]
|
||||
pub use self::memfd::MemFdSlot;
|
||||
|
||||
#[cfg(not(feature = "memfd-allocator"))]
|
||||
mod memfd_disabled;
|
||||
#[cfg(not(feature = "memfd-allocator"))]
|
||||
pub use self::memfd_disabled::MemFdSlot;
|
||||
|
||||
#[cfg(feature = "pooling-allocator")]
|
||||
pub use self::pooling::{
|
||||
InstanceLimits, ModuleLimits, PoolingAllocationStrategy, PoolingInstanceAllocator,
|
||||
@@ -39,6 +48,9 @@ pub struct InstanceAllocationRequest<'a> {
|
||||
/// The base address of where JIT functions are located.
|
||||
pub image_base: usize,
|
||||
|
||||
/// If using MemFD-based memories, the backing MemFDs.
|
||||
pub memfds: Option<Arc<ModuleMemFds>>,
|
||||
|
||||
/// Descriptors about each compiled function, such as the offset from
|
||||
/// `image_base`.
|
||||
pub functions: &'a PrimaryMap<DefinedFuncIndex, FunctionInfo>,
|
||||
@@ -376,9 +388,23 @@ fn check_memory_init_bounds(
|
||||
|
||||
fn initialize_memories(
|
||||
instance: &mut Instance,
|
||||
module: &Module,
|
||||
initializers: &[MemoryInitializer],
|
||||
) -> Result<(), InstantiationError> {
|
||||
for init in initializers {
|
||||
// Check whether this is a MemFD memory; if so, we can skip
|
||||
// all initializers.
|
||||
let memory = init.memory_index;
|
||||
if let Some(defined_index) = module.defined_memory_index(memory) {
|
||||
// We can only skip if there is actually a MemFD image. In
|
||||
// some situations the MemFD image creation code will bail
|
||||
// (e.g. due to an out of bounds data segment) and so we
|
||||
// need to fall back on the usual initialization below.
|
||||
if instance.memories[defined_index].is_memfd_with_image() {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
instance
|
||||
.memory_init_segment(
|
||||
init.memory_index,
|
||||
@@ -432,6 +458,14 @@ fn initialize_instance(
|
||||
match &module.memory_initialization {
|
||||
MemoryInitialization::Paged { map, out_of_bounds } => {
|
||||
for (index, pages) in map {
|
||||
// We can only skip if there is actually a MemFD image. In
|
||||
// some situations the MemFD image creation code will bail
|
||||
// (e.g. due to an out of bounds data segment) and so we
|
||||
// need to fall back on the usual initialization below.
|
||||
if instance.memories[index].is_memfd_with_image() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let memory = instance.memory(index);
|
||||
let slice =
|
||||
unsafe { slice::from_raw_parts_mut(memory.base, memory.current_length) };
|
||||
@@ -453,7 +487,7 @@ fn initialize_instance(
|
||||
}
|
||||
}
|
||||
MemoryInitialization::Segmented(initializers) => {
|
||||
initialize_memories(instance, initializers)?;
|
||||
initialize_memories(instance, module, initializers)?;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -691,19 +725,8 @@ unsafe impl InstanceAllocator for OnDemandInstanceAllocator {
|
||||
let host_state = std::mem::replace(&mut req.host_state, Box::new(()));
|
||||
|
||||
let mut handle = {
|
||||
let instance = Instance {
|
||||
module: req.module.clone(),
|
||||
offsets: VMOffsets::new(HostPtr, &req.module),
|
||||
memories,
|
||||
tables,
|
||||
dropped_elements: EntitySet::with_capacity(req.module.passive_elements.len()),
|
||||
dropped_data: EntitySet::with_capacity(req.module.passive_data_map.len()),
|
||||
host_state,
|
||||
wasm_data: &*req.wasm_data,
|
||||
vmctx: VMContext {
|
||||
_marker: marker::PhantomPinned,
|
||||
},
|
||||
};
|
||||
let instance =
|
||||
Instance::create_raw(&req.module, &*req.wasm_data, memories, tables, host_state);
|
||||
let layout = instance.alloc_layout();
|
||||
let instance_ptr = alloc::alloc(layout) as *mut Instance;
|
||||
if instance_ptr.is_null() {
|
||||
|
||||
290
crates/runtime/src/instance/allocator/memfd.rs
Normal file
290
crates/runtime/src/instance/allocator/memfd.rs
Normal file
@@ -0,0 +1,290 @@
|
||||
//! memfd mapping logic for use by the pooling allocator.
|
||||
|
||||
use crate::memfd::MemoryMemFd;
|
||||
use crate::InstantiationError;
|
||||
use anyhow::Result;
|
||||
use libc::c_void;
|
||||
use rustix::fd::AsRawFd;
|
||||
use std::convert::TryFrom;
|
||||
use std::fs::File;
|
||||
use std::sync::Arc;
|
||||
|
||||
/// A single slot handled by the memfd instance-heap mechanism.
|
||||
///
|
||||
/// The mmap scheme is:
|
||||
///
|
||||
/// base ==> (points here)
|
||||
/// - (image.offset bytes) anonymous zero memory, pre-image
|
||||
/// - (image.len bytes) CoW mapping of memfd heap image
|
||||
/// - (up to extension_offset) anonymous zero memory, post-image
|
||||
/// - (up to static_size) heap expansion region; CoW mapping of per-slot memfd
|
||||
///
|
||||
/// The ordering of mmaps to set this up is:
|
||||
///
|
||||
/// - once, when pooling allocator is created:
|
||||
/// - one large mmap to create 8GiB * instances * memories slots
|
||||
///
|
||||
/// - per instantiation of new image in a slot:
|
||||
/// - mmap of anonymous zero memory, from 0 to initial heap size
|
||||
/// - mmap of CoW'd memfd image, from `image.offset` to
|
||||
/// `image.offset + image.len`. This overwrites part of the
|
||||
/// anonymous zero memory, potentially splitting it into a pre-
|
||||
/// and post-region.
|
||||
/// - mmap of CoW'd extension file, past the initial heap size up to
|
||||
/// the end of the max memory size (just before the
|
||||
/// post-guard). This is always adjacent to the above mmaps, but
|
||||
/// does not overlap/overwrite them.
|
||||
#[derive(Debug)]
|
||||
pub struct MemFdSlot {
|
||||
/// The base of the actual heap memory. Bytes at this address are
|
||||
/// what is seen by the Wasm guest code.
|
||||
base: usize,
|
||||
/// The maximum static memory size, plus post-guard.
|
||||
static_size: usize,
|
||||
/// The memfd image that backs this memory. May be `None`, in
|
||||
/// which case the memory is all zeroes.
|
||||
pub(crate) image: Option<Arc<MemoryMemFd>>,
|
||||
/// The offset at which the "extension file", which is used to
|
||||
/// allow for efficient heap growth, is mapped. This is always
|
||||
/// immediately after the end of the initial memory size.
|
||||
extension_offset: usize,
|
||||
/// The anonymous memfd, owned by this slot, which we mmap in the
|
||||
/// area where the heap may grow during runtime. We use the
|
||||
/// ftruncate() syscall (invoked via `File::set_len()`) to set its
|
||||
/// size. We never write any data to it -- we CoW-map it so we can
|
||||
/// throw away dirty data on termination. Instead, we just use its
|
||||
/// size as a "watermark" that delineates the boundary between
|
||||
/// safe-to-access memory and SIGBUS-causing memory. (This works
|
||||
/// because one can mmap a file beyond its end, and is good
|
||||
/// because ftruncate does not take the process-wide lock that
|
||||
/// mmap and mprotect do.)
|
||||
extension_file: File,
|
||||
/// Whether this slot may have "dirty" pages (pages written by an
|
||||
/// instantiation). Set by `instantiate()` and cleared by
|
||||
/// `clear_and_remain_ready()`, and used in assertions to ensure
|
||||
/// those methods are called properly.
|
||||
dirty: bool,
|
||||
}
|
||||
|
||||
impl MemFdSlot {
|
||||
pub(crate) fn create(
|
||||
base_addr: *mut c_void,
|
||||
static_size: usize,
|
||||
) -> Result<Self, InstantiationError> {
|
||||
let base = base_addr as usize;
|
||||
|
||||
// Create a MemFD for the memory growth first -- this covers
|
||||
// extended heap beyond the initial image.
|
||||
let extension_memfd = memfd::MemfdOptions::new()
|
||||
.allow_sealing(true)
|
||||
.create("wasm-anonymous-heap")
|
||||
.map_err(|e| InstantiationError::Resource(e.into()))?;
|
||||
// Seal the ability to write the extension file (make it
|
||||
// permanently read-only). This is a defense-in-depth
|
||||
// mitigation to make extra-sure that we don't leak
|
||||
// information between instantiations. See note in `memfd.rs`
|
||||
// for more about why we use seals.
|
||||
extension_memfd
|
||||
.add_seal(memfd::FileSeal::SealWrite)
|
||||
.map_err(|e| InstantiationError::Resource(e.into()))?;
|
||||
extension_memfd
|
||||
.add_seal(memfd::FileSeal::SealSeal)
|
||||
.map_err(|e| InstantiationError::Resource(e.into()))?;
|
||||
let extension_file = extension_memfd.into_file();
|
||||
extension_file
|
||||
.set_len(0)
|
||||
.map_err(|e| InstantiationError::Resource(e.into()))?;
|
||||
|
||||
Ok(MemFdSlot {
|
||||
base,
|
||||
static_size,
|
||||
image: None,
|
||||
extension_file,
|
||||
extension_offset: 0,
|
||||
dirty: false,
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn set_heap_limit(&mut self, size_bytes: usize) -> Result<()> {
|
||||
assert!(size_bytes >= self.extension_offset);
|
||||
// This is all that is needed to make the new memory
|
||||
// accessible; we don't need to mprotect anything. (The
|
||||
// mapping itself is always R+W for the max possible heap
|
||||
// size, and only the anonymous-backing file length catches
|
||||
// out-of-bounds accesses.)
|
||||
self.extension_file
|
||||
.set_len(u64::try_from(size_bytes - self.extension_offset).unwrap())?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn instantiate(
|
||||
&mut self,
|
||||
initial_size_bytes: usize,
|
||||
maybe_image: Option<&Arc<MemoryMemFd>>,
|
||||
) -> Result<(), InstantiationError> {
|
||||
assert!(!self.dirty);
|
||||
|
||||
if let Some(existing_image) = &self.image {
|
||||
// Fast-path: previously instantiated with the same image,
|
||||
// so the mappings are already correct; there is no need
|
||||
// to mmap anything. Given that we asserted not-dirty
|
||||
// above, any dirty pages will have already been thrown
|
||||
// away by madvise() during the previous termination.
|
||||
if let Some(image) = maybe_image {
|
||||
if existing_image.fd.as_file().as_raw_fd() == image.fd.as_file().as_raw_fd() {
|
||||
self.dirty = true;
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Otherwise, we need to redo (i) the anonymous-mmap backing
|
||||
// for the initial heap size, (ii) the extension-file backing,
|
||||
// and (iii) the initial-heap-image mapping if present.
|
||||
|
||||
// Security/audit note: we map all of these MAP_PRIVATE, so
|
||||
// all instance data is local to the mapping, not propagated
|
||||
// to the backing fd. We throw away this CoW overlay with
|
||||
// madvise() below, from base up to extension_offset (which is
|
||||
// at least initial_size_bytes, and extended when the
|
||||
// extension file is, so it covers all three mappings) when
|
||||
// terminating the instance.
|
||||
|
||||
// Anonymous mapping behind the initial heap size: this gives
|
||||
// zeroes for any "holes" in the initial heap image. Anonymous
|
||||
// mmap memory is faster to fault in than a CoW of a file,
|
||||
// even a file with zero holes, because the kernel's CoW path
|
||||
// unconditionally copies *something* (even if just a page of
|
||||
// zeroes). Anonymous zero pages are fast: the kernel
|
||||
// pre-zeroes them, and even if it runs out of those, a memset
|
||||
// is half as expensive as a memcpy (only writes, no reads).
|
||||
if initial_size_bytes > 0 {
|
||||
unsafe {
|
||||
let ptr = rustix::io::mmap_anonymous(
|
||||
self.base as *mut c_void,
|
||||
initial_size_bytes,
|
||||
rustix::io::ProtFlags::READ | rustix::io::ProtFlags::WRITE,
|
||||
rustix::io::MapFlags::PRIVATE | rustix::io::MapFlags::FIXED,
|
||||
)
|
||||
.map_err(|e| InstantiationError::Resource(e.into()))?;
|
||||
assert_eq!(ptr as usize, self.base);
|
||||
}
|
||||
}
|
||||
|
||||
// An "extension file": this allows us to grow the heap by
|
||||
// doing just an ftruncate(), without changing any
|
||||
// mappings. This is important to avoid the process-wide mmap
|
||||
// lock on Linux.
|
||||
self.extension_offset = initial_size_bytes;
|
||||
let extension_map_len = self.static_size - initial_size_bytes;
|
||||
if extension_map_len > 0 {
|
||||
unsafe {
|
||||
let fd = rustix::fd::BorrowedFd::borrow_raw_fd(self.extension_file.as_raw_fd());
|
||||
let ptr = rustix::io::mmap(
|
||||
(self.base + initial_size_bytes) as *mut c_void,
|
||||
extension_map_len,
|
||||
rustix::io::ProtFlags::READ | rustix::io::ProtFlags::WRITE,
|
||||
rustix::io::MapFlags::PRIVATE | rustix::io::MapFlags::FIXED,
|
||||
&fd,
|
||||
0,
|
||||
)
|
||||
.map_err(|e| InstantiationError::Resource(e.into()))?;
|
||||
assert_eq!(ptr as usize, self.base + initial_size_bytes);
|
||||
}
|
||||
}
|
||||
|
||||
// Finally, the initial memory image.
|
||||
if let Some(image) = maybe_image {
|
||||
if image.len > 0 {
|
||||
let image = image.clone();
|
||||
|
||||
unsafe {
|
||||
let fd = rustix::fd::BorrowedFd::borrow_raw_fd(image.fd.as_file().as_raw_fd());
|
||||
let ptr = rustix::io::mmap(
|
||||
(self.base + image.offset) as *mut c_void,
|
||||
image.len,
|
||||
rustix::io::ProtFlags::READ | rustix::io::ProtFlags::WRITE,
|
||||
rustix::io::MapFlags::PRIVATE | rustix::io::MapFlags::FIXED,
|
||||
&fd,
|
||||
image.offset as u64,
|
||||
)
|
||||
.map_err(|e| InstantiationError::Resource(e.into()))?;
|
||||
assert_eq!(ptr as usize, self.base + image.offset);
|
||||
}
|
||||
|
||||
self.image = Some(image);
|
||||
}
|
||||
}
|
||||
|
||||
self.dirty = true;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn clear_and_remain_ready(&mut self) -> Result<()> {
|
||||
assert!(self.dirty);
|
||||
// madvise the image range; that's it! This will throw away
|
||||
// dirty pages, which are CoW-private pages on top of the
|
||||
// initial heap image memfd.
|
||||
unsafe {
|
||||
rustix::io::madvise(
|
||||
self.base as *mut c_void,
|
||||
self.extension_offset,
|
||||
rustix::io::Advice::LinuxDontNeed,
|
||||
)?;
|
||||
}
|
||||
|
||||
// truncate the extension file down to zero bytes to reset heap length.
|
||||
self.extension_file
|
||||
.set_len(0)
|
||||
.map_err(|e| InstantiationError::Resource(e.into()))?;
|
||||
self.dirty = false;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn has_image(&self) -> bool {
|
||||
self.image.is_some()
|
||||
}
|
||||
|
||||
pub(crate) fn is_dirty(&self) -> bool {
|
||||
self.dirty
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "memfd-allocator")]
|
||||
impl Drop for MemFdSlot {
|
||||
fn drop(&mut self) {
|
||||
// The MemFdSlot may be dropped if there is an error during
|
||||
// instantiation: for example, if a memory-growth limiter
|
||||
// disallows a guest from having a memory of a certain size,
|
||||
// after we've already initialized the MemFdSlot.
|
||||
//
|
||||
// We need to return this region of the large pool mmap to a
|
||||
// safe state (with no module-specific mappings). The
|
||||
// MemFdSlot will not be returned to the MemoryPool, so a new
|
||||
// MemFdSlot will be created and overwrite the mappings anyway
|
||||
// on the slot's next use; but for safety and to avoid
|
||||
// resource leaks it's better not to have stale mappings to a
|
||||
// possibly-otherwise-dead module's image.
|
||||
//
|
||||
// To "wipe the slate clean", let's do a mmap of anonymous
|
||||
// memory over the whole region, with PROT_NONE. Note that we
|
||||
// *can't* simply munmap, because that leaves a hole in the
|
||||
// middle of the pooling allocator's big memory area that some
|
||||
// other random mmap may swoop in and take, to be trampled
|
||||
// over by the next MemFdSlot later.
|
||||
//
|
||||
// Since we're in drop(), we can't sanely return an error if
|
||||
// this mmap fails. Let's ignore the failure if so; the next
|
||||
// MemFdSlot to be created for this slot will try to overwrite
|
||||
// the existing stale mappings, and return a failure properly
|
||||
// if we still cannot map new memory.
|
||||
unsafe {
|
||||
let _ = rustix::io::mmap_anonymous(
|
||||
self.base as *mut _,
|
||||
self.static_size,
|
||||
rustix::io::ProtFlags::empty(),
|
||||
rustix::io::MapFlags::FIXED | rustix::io::MapFlags::NORESERVE,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
49
crates/runtime/src/instance/allocator/memfd_disabled.rs
Normal file
49
crates/runtime/src/instance/allocator/memfd_disabled.rs
Normal file
@@ -0,0 +1,49 @@
|
||||
//! Shims for MemFdSlot when the memfd allocator is not
|
||||
//! included. Enables unconditional use of the type and its methods
|
||||
//! throughout higher-level code.
|
||||
|
||||
use crate::InstantiationError;
|
||||
use anyhow::Result;
|
||||
use std::sync::Arc;
|
||||
|
||||
/// A placeholder for MemFdSlot when we have not included the pooling
|
||||
/// allocator.
|
||||
///
|
||||
/// To allow MemFdSlot to be unconditionally passed around in various
|
||||
/// places (e.g. a `Memory`), we define a zero-sized type when memfd is
|
||||
/// not included in the build.
|
||||
#[cfg(not(feature = "memfd-allocator"))]
|
||||
#[derive(Debug)]
|
||||
pub struct MemFdSlot;
|
||||
|
||||
#[cfg(not(feature = "memfd-allocator"))]
|
||||
#[allow(dead_code)]
|
||||
impl MemFdSlot {
|
||||
pub(crate) fn create(_: *mut libc::c_void, _: usize) -> Result<Self, InstantiationError> {
|
||||
panic!("create() on invalid MemFdSlot");
|
||||
}
|
||||
|
||||
pub(crate) fn instantiate(
|
||||
&mut self,
|
||||
_: usize,
|
||||
_: Option<&Arc<crate::memfd::MemoryMemFd>>,
|
||||
) -> Result<Self, InstantiationError> {
|
||||
panic!("instantiate() on invalid MemFdSlot");
|
||||
}
|
||||
|
||||
pub(crate) fn clear_and_remain_ready(&mut self) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn has_image(&self) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
pub(crate) fn is_dirty(&self) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
pub(crate) fn set_heap_limit(&mut self, _: usize) -> Result<()> {
|
||||
panic!("set_heap_limit on invalid MemFdSlot");
|
||||
}
|
||||
}
|
||||
@@ -7,19 +7,21 @@
|
||||
//! Using the pooling instance allocator can speed up module instantiation
|
||||
//! when modules can be constrained based on configurable limits.
|
||||
|
||||
use super::MemFdSlot;
|
||||
use super::{
|
||||
initialize_instance, initialize_vmcontext, InstanceAllocationRequest, InstanceAllocator,
|
||||
InstanceHandle, InstantiationError,
|
||||
};
|
||||
use crate::{instance::Instance, Memory, Mmap, Table, VMContext};
|
||||
use crate::{instance::Instance, Memory, Mmap, ModuleMemFds, Table};
|
||||
use anyhow::{anyhow, bail, Context, Result};
|
||||
use libc::c_void;
|
||||
use rand::Rng;
|
||||
use std::convert::TryFrom;
|
||||
use std::marker;
|
||||
use std::mem;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::sync::Arc;
|
||||
use std::sync::Mutex;
|
||||
use wasmtime_environ::{
|
||||
EntitySet, HostPtr, MemoryStyle, Module, PrimaryMap, Tunables, VMOffsets, VMOffsetsFields,
|
||||
HostPtr, MemoryIndex, MemoryStyle, Module, PrimaryMap, Tunables, VMOffsets, VMOffsetsFields,
|
||||
WASM_PAGE_SIZE,
|
||||
};
|
||||
|
||||
@@ -284,7 +286,6 @@ struct InstancePool {
|
||||
free_list: Mutex<Vec<usize>>,
|
||||
memories: MemoryPool,
|
||||
tables: TablePool,
|
||||
empty_module: Arc<Module>,
|
||||
}
|
||||
|
||||
impl InstancePool {
|
||||
@@ -332,14 +333,8 @@ impl InstancePool {
|
||||
free_list: Mutex::new((0..max_instances).collect()),
|
||||
memories: MemoryPool::new(module_limits, instance_limits, tunables)?,
|
||||
tables: TablePool::new(module_limits, instance_limits)?,
|
||||
empty_module: Arc::new(Module::default()),
|
||||
};
|
||||
|
||||
// Use a default module to initialize the instances to start
|
||||
for i in 0..instance_limits.count as usize {
|
||||
pool.initialize(module_limits, i);
|
||||
}
|
||||
|
||||
Ok(pool)
|
||||
}
|
||||
|
||||
@@ -348,41 +343,26 @@ impl InstancePool {
|
||||
&mut *(self.mapping.as_mut_ptr().add(index * self.instance_size) as *mut Instance)
|
||||
}
|
||||
|
||||
fn initialize(&self, limits: &ModuleLimits, index: usize) {
|
||||
unsafe {
|
||||
let instance = self.instance(index);
|
||||
|
||||
// Write a default instance with preallocated memory/table map storage to the ptr
|
||||
std::ptr::write(
|
||||
instance as _,
|
||||
Instance {
|
||||
module: self.empty_module.clone(),
|
||||
offsets: VMOffsets::new(HostPtr, &self.empty_module),
|
||||
memories: PrimaryMap::with_capacity(limits.memories as usize),
|
||||
tables: PrimaryMap::with_capacity(limits.tables as usize),
|
||||
dropped_elements: EntitySet::new(),
|
||||
dropped_data: EntitySet::new(),
|
||||
host_state: Box::new(()),
|
||||
wasm_data: &[],
|
||||
vmctx: VMContext {
|
||||
_marker: marker::PhantomPinned,
|
||||
},
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
unsafe fn setup_instance(
|
||||
&self,
|
||||
index: usize,
|
||||
mut req: InstanceAllocationRequest,
|
||||
) -> Result<InstanceHandle, InstantiationError> {
|
||||
let instance = self.instance(index);
|
||||
let host_state = std::mem::replace(&mut req.host_state, Box::new(()));
|
||||
let instance_data = Instance::create_raw(
|
||||
&req.module,
|
||||
&*req.wasm_data,
|
||||
PrimaryMap::default(),
|
||||
PrimaryMap::default(),
|
||||
host_state,
|
||||
);
|
||||
|
||||
instance.module = req.module.clone();
|
||||
instance.offsets = VMOffsets::new(HostPtr, instance.module.as_ref());
|
||||
instance.host_state = std::mem::replace(&mut req.host_state, Box::new(()));
|
||||
instance.wasm_data = &*req.wasm_data;
|
||||
// Instances are uninitialized memory at first; we need to
|
||||
// write an empty but initialized `Instance` struct into the
|
||||
// chosen slot before we do anything else with it. (This is
|
||||
// paired with a `drop_in_place` in deallocate below.)
|
||||
let instance = self.instance(index);
|
||||
std::ptr::write(instance as _, instance_data);
|
||||
|
||||
// set_instance_memories and _tables will need the store before we can completely
|
||||
// initialize the vmcontext.
|
||||
@@ -391,8 +371,10 @@ impl InstancePool {
|
||||
}
|
||||
|
||||
Self::set_instance_memories(
|
||||
index,
|
||||
instance,
|
||||
self.memories.get(index),
|
||||
&self.memories,
|
||||
&req.memfds,
|
||||
self.memories.max_wasm_pages,
|
||||
)?;
|
||||
|
||||
@@ -448,20 +430,44 @@ impl InstancePool {
|
||||
let instance = unsafe { &mut *handle.instance };
|
||||
|
||||
// Decommit any linear memories that were used
|
||||
for (memory, base) in instance.memories.values_mut().zip(self.memories.get(index)) {
|
||||
for ((def_mem_idx, memory), base) in
|
||||
instance.memories.iter_mut().zip(self.memories.get(index))
|
||||
{
|
||||
let mut memory = mem::take(memory);
|
||||
debug_assert!(memory.is_static());
|
||||
|
||||
// Reset any faulted guard pages as the physical memory may be reused for another instance in the future
|
||||
#[cfg(all(feature = "uffd", target_os = "linux"))]
|
||||
memory
|
||||
.reset_guard_pages()
|
||||
.expect("failed to reset guard pages");
|
||||
drop(&mut memory); // require mutable on all platforms, not just uffd
|
||||
match memory {
|
||||
Memory::Static {
|
||||
memfd_slot: Some(mut memfd_slot),
|
||||
..
|
||||
} => {
|
||||
let mem_idx = instance.module.memory_index(def_mem_idx);
|
||||
// If there was any error clearing the memfd, just
|
||||
// drop it here, and let the drop handler for the
|
||||
// MemFdSlot unmap in a way that retains the
|
||||
// address space reservation.
|
||||
if memfd_slot.clear_and_remain_ready().is_ok() {
|
||||
self.memories.return_memfd_slot(index, mem_idx, memfd_slot);
|
||||
}
|
||||
}
|
||||
|
||||
let size = memory.byte_size();
|
||||
drop(memory);
|
||||
decommit_memory_pages(base, size).expect("failed to decommit linear memory pages");
|
||||
_ => {
|
||||
// Reset any faulted guard pages as the physical
|
||||
// memory may be reused for another instance in
|
||||
// the future.
|
||||
#[cfg(all(feature = "uffd", target_os = "linux"))]
|
||||
memory
|
||||
.reset_guard_pages()
|
||||
.expect("failed to reset guard pages");
|
||||
// require mutable on all platforms, not just uffd
|
||||
drop(&mut memory);
|
||||
|
||||
let size = memory.byte_size();
|
||||
drop(memory);
|
||||
decommit_memory_pages(base, size)
|
||||
.expect("failed to decommit linear memory pages");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
instance.memories.clear();
|
||||
@@ -481,50 +487,81 @@ impl InstancePool {
|
||||
decommit_table_pages(base, size).expect("failed to decommit table pages");
|
||||
}
|
||||
|
||||
instance.tables.clear();
|
||||
instance.dropped_elements.clear();
|
||||
|
||||
// Drop all `global` values which need a destructor, such as externref
|
||||
// values which now need their reference count dropped.
|
||||
instance.drop_globals();
|
||||
|
||||
// Drop any host state
|
||||
instance.host_state = Box::new(());
|
||||
|
||||
// And finally reset the module/offsets back to their original. This
|
||||
// should put everything back in a relatively pristine state for each
|
||||
// fresh allocation later on.
|
||||
instance.module = self.empty_module.clone();
|
||||
instance.offsets = VMOffsets::new(HostPtr, &self.empty_module);
|
||||
instance.wasm_data = &[];
|
||||
// We've now done all of the pooling-allocator-specific
|
||||
// teardown, so we can drop the Instance and let destructors
|
||||
// take care of any other fields (host state, globals, etc.).
|
||||
unsafe {
|
||||
std::ptr::drop_in_place(instance as *mut _);
|
||||
}
|
||||
// The instance is now uninitialized memory and cannot be
|
||||
// touched again until we write a fresh Instance in-place with
|
||||
// std::ptr::write in allocate() above.
|
||||
|
||||
self.free_list.lock().unwrap().push(index);
|
||||
}
|
||||
|
||||
fn set_instance_memories(
|
||||
instance_idx: usize,
|
||||
instance: &mut Instance,
|
||||
mut memories: impl Iterator<Item = *mut u8>,
|
||||
memories: &MemoryPool,
|
||||
maybe_memfds: &Option<Arc<ModuleMemFds>>,
|
||||
max_pages: u64,
|
||||
) -> Result<(), InstantiationError> {
|
||||
let module = instance.module.as_ref();
|
||||
|
||||
debug_assert!(instance.memories.is_empty());
|
||||
|
||||
for plan in
|
||||
(&module.memory_plans.values().as_slice()[module.num_imported_memories..]).iter()
|
||||
for (memory_index, plan) in module
|
||||
.memory_plans
|
||||
.iter()
|
||||
.skip(module.num_imported_memories)
|
||||
{
|
||||
let defined_index = module
|
||||
.defined_memory_index(memory_index)
|
||||
.expect("should be a defined memory since we skipped imported ones");
|
||||
|
||||
let memory = unsafe {
|
||||
std::slice::from_raw_parts_mut(
|
||||
memories.next().unwrap(),
|
||||
memories.get_base(instance_idx, memory_index),
|
||||
(max_pages as usize) * (WASM_PAGE_SIZE as usize),
|
||||
)
|
||||
};
|
||||
instance.memories.push(
|
||||
Memory::new_static(plan, memory, commit_memory_pages, unsafe {
|
||||
&mut *instance.store()
|
||||
})
|
||||
.map_err(InstantiationError::Resource)?,
|
||||
);
|
||||
|
||||
if let Some(memfds) = maybe_memfds {
|
||||
let image = memfds.get_memory_image(defined_index);
|
||||
let mut slot = memories.take_memfd_slot(instance_idx, memory_index)?;
|
||||
let initial_size = plan.memory.minimum * WASM_PAGE_SIZE as u64;
|
||||
|
||||
// If instantiation fails, we can propagate the error
|
||||
// upward and drop the slot. This will cause the Drop
|
||||
// handler to attempt to map the range with PROT_NONE
|
||||
// memory, to reserve the space while releasing any
|
||||
// stale mappings. The next use of this slot will then
|
||||
// create a new MemFdSlot that will try to map over
|
||||
// this, returning errors as well if the mapping
|
||||
// errors persist. The unmap-on-drop is best effort;
|
||||
// if it fails, then we can still soundly continue
|
||||
// using the rest of the pool and allowing the rest of
|
||||
// the process to continue, because we never perform a
|
||||
// mmap that would leave an open space for someone
|
||||
// else to come in and map something.
|
||||
slot.instantiate(initial_size as usize, image)
|
||||
.map_err(|e| InstantiationError::Resource(e.into()))?;
|
||||
|
||||
instance.memories.push(
|
||||
Memory::new_static(plan, memory, None, Some(slot), unsafe {
|
||||
&mut *instance.store()
|
||||
})
|
||||
.map_err(InstantiationError::Resource)?,
|
||||
);
|
||||
} else {
|
||||
instance.memories.push(
|
||||
Memory::new_static(plan, memory, Some(commit_memory_pages), None, unsafe {
|
||||
&mut *instance.store()
|
||||
})
|
||||
.map_err(InstantiationError::Resource)?,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
debug_assert!(instance.dropped_data.is_empty());
|
||||
@@ -566,17 +603,6 @@ impl InstancePool {
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for InstancePool {
|
||||
fn drop(&mut self) {
|
||||
unsafe {
|
||||
for i in 0..self.max_instances {
|
||||
let ptr = self.mapping.as_mut_ptr().add(i * self.instance_size) as *mut Instance;
|
||||
std::ptr::drop_in_place(ptr);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Represents a pool of WebAssembly linear memories.
|
||||
///
|
||||
/// A linear memory is divided into accessible pages and guard pages.
|
||||
@@ -589,6 +615,10 @@ impl Drop for InstancePool {
|
||||
#[derive(Debug)]
|
||||
struct MemoryPool {
|
||||
mapping: Mmap,
|
||||
// If using the memfd allocation scheme, the MemFd slots. We
|
||||
// dynamically transfer ownership of a slot to a Memory when in
|
||||
// use.
|
||||
memfd_slots: Vec<Mutex<Option<MemFdSlot>>>,
|
||||
// The size, in bytes, of each linear memory's reservation plus the guard
|
||||
// region allocated for it.
|
||||
memory_size: usize,
|
||||
@@ -673,8 +703,18 @@ impl MemoryPool {
|
||||
let mapping = Mmap::accessible_reserved(0, allocation_size)
|
||||
.context("failed to create memory pool mapping")?;
|
||||
|
||||
let num_memfd_slots = if cfg!(feature = "memfd-allocator") {
|
||||
max_instances * max_memories
|
||||
} else {
|
||||
0
|
||||
};
|
||||
let memfd_slots: Vec<_> = std::iter::repeat_with(|| Mutex::new(None))
|
||||
.take(num_memfd_slots)
|
||||
.collect();
|
||||
|
||||
let pool = Self {
|
||||
mapping,
|
||||
memfd_slots,
|
||||
memory_size,
|
||||
initial_memory_offset,
|
||||
max_memories,
|
||||
@@ -689,17 +729,43 @@ impl MemoryPool {
|
||||
Ok(pool)
|
||||
}
|
||||
|
||||
fn get(&self, instance_index: usize) -> impl Iterator<Item = *mut u8> {
|
||||
fn get_base(&self, instance_index: usize, memory_index: MemoryIndex) -> *mut u8 {
|
||||
debug_assert!(instance_index < self.max_instances);
|
||||
let memory_index = memory_index.as_u32() as usize;
|
||||
debug_assert!(memory_index < self.max_memories);
|
||||
let idx = instance_index * self.max_memories + memory_index;
|
||||
let offset = self.initial_memory_offset + idx * self.memory_size;
|
||||
unsafe { self.mapping.as_mut_ptr().offset(offset as isize) }
|
||||
}
|
||||
|
||||
let base: *mut u8 = unsafe {
|
||||
self.mapping.as_mut_ptr().add(
|
||||
self.initial_memory_offset + instance_index * self.memory_size * self.max_memories,
|
||||
) as _
|
||||
};
|
||||
fn get<'a>(&'a self, instance_index: usize) -> impl Iterator<Item = *mut u8> + 'a {
|
||||
(0..self.max_memories)
|
||||
.map(move |i| self.get_base(instance_index, MemoryIndex::from_u32(i as u32)))
|
||||
}
|
||||
|
||||
let size = self.memory_size;
|
||||
(0..self.max_memories).map(move |i| unsafe { base.add(i * size) })
|
||||
/// Take ownership of the given memfd slot. Must be returned via
|
||||
/// `return_memfd_slot` when the instance is done using it.
|
||||
fn take_memfd_slot(
|
||||
&self,
|
||||
instance_index: usize,
|
||||
memory_index: MemoryIndex,
|
||||
) -> Result<MemFdSlot, InstantiationError> {
|
||||
let idx = instance_index * self.max_memories + (memory_index.as_u32() as usize);
|
||||
let maybe_slot = self.memfd_slots[idx].lock().unwrap().take();
|
||||
|
||||
maybe_slot.map(|slot| Ok(slot)).unwrap_or_else(|| {
|
||||
MemFdSlot::create(
|
||||
self.get_base(instance_index, memory_index) as *mut c_void,
|
||||
self.memory_size,
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
/// Return ownership of the given memfd slot.
|
||||
fn return_memfd_slot(&self, instance_index: usize, memory_index: MemoryIndex, slot: MemFdSlot) {
|
||||
assert!(!slot.is_dirty());
|
||||
let idx = instance_index * self.max_memories + (memory_index.as_u32() as usize);
|
||||
*self.memfd_slots[idx].lock().unwrap() = Some(slot);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1413,6 +1479,7 @@ mod test {
|
||||
host_state: Box::new(()),
|
||||
store: StorePtr::empty(),
|
||||
wasm_data: &[],
|
||||
memfds: None,
|
||||
},
|
||||
)
|
||||
.expect("allocation should succeed"),
|
||||
@@ -1437,6 +1504,7 @@ mod test {
|
||||
host_state: Box::new(()),
|
||||
store: StorePtr::empty(),
|
||||
wasm_data: &[],
|
||||
memfds: None,
|
||||
},
|
||||
) {
|
||||
Err(InstantiationError::Limit(3)) => {}
|
||||
|
||||
@@ -577,6 +577,7 @@ mod test {
|
||||
PoolingAllocationStrategy::Random,
|
||||
InstanceAllocationRequest {
|
||||
module: module.clone(),
|
||||
memfds: None,
|
||||
image_base: 0,
|
||||
functions,
|
||||
imports: Imports {
|
||||
|
||||
Reference in New Issue
Block a user