Implement user fault handling with userfaultfd on Linux.

This commit implements the `uffd` feature which turns on support for utilizing
the `userfaultfd` system call on Linux for the pooling instance allocator.

By handling page faults in userland, we are able to detect guard page accesses
without having to constantly change memory page protections.

This should help reduce the number of syscalls as well as kernel lock
contentions when many threads are allocating and deallocating instances.

Additionally, the user fault handler can lazy initialize linear
memories of an instance (implementation to come).
This commit is contained in:
Peter Huene
2021-02-10 20:29:20 -08:00
parent e71ccbf9bc
commit a2c439117a
8 changed files with 874 additions and 8 deletions

View File

@@ -122,6 +122,7 @@ jobs:
- run: cargo check --manifest-path crates/wasmtime/Cargo.toml --features jitdump - run: cargo check --manifest-path crates/wasmtime/Cargo.toml --features jitdump
- run: cargo check --manifest-path crates/wasmtime/Cargo.toml --features cache - run: cargo check --manifest-path crates/wasmtime/Cargo.toml --features cache
- run: cargo check --manifest-path crates/wasmtime/Cargo.toml --features async - run: cargo check --manifest-path crates/wasmtime/Cargo.toml --features async
- run: cargo check --manifest-path crates/wasmtime/Cargo.toml --features uffd
# Check some feature combinations of the `wasmtime-c-api` crate # Check some feature combinations of the `wasmtime-c-api` crate
- run: cargo check --manifest-path crates/c-api/Cargo.toml --no-default-features - run: cargo check --manifest-path crates/c-api/Cargo.toml --no-default-features

View File

@@ -89,6 +89,7 @@ jitdump = ["wasmtime/jitdump"]
vtune = ["wasmtime/vtune"] vtune = ["wasmtime/vtune"]
wasi-crypto = ["wasmtime-wasi-crypto"] wasi-crypto = ["wasmtime-wasi-crypto"]
wasi-nn = ["wasmtime-wasi-nn"] wasi-nn = ["wasmtime-wasi-nn"]
uffd = ["wasmtime/uffd"]
# Try the experimental, work-in-progress new x86_64 backend. This is not stable # Try the experimental, work-in-progress new x86_64 backend. This is not stable
# as of June 2020. # as of June 2020.

View File

@@ -37,3 +37,9 @@ cc = "1.0"
[badges] [badges]
maintenance = { status = "actively-developed" } maintenance = { status = "actively-developed" }
[features]
default = []
# Enables support for userfaultfd in the pooling allocator when building on Linux
uffd = ["userfaultfd"]

View File

@@ -67,6 +67,11 @@ pub(crate) struct Instance {
/// Hosts can store arbitrary per-instance information here. /// Hosts can store arbitrary per-instance information here.
host_state: Box<dyn Any>, host_state: Box<dyn Any>,
/// Stores guard page faults in memory relating to the instance.
/// This is used for the pooling allocator with uffd enabled on Linux.
#[cfg(all(feature = "uffd", target_os = "linux"))]
guard_page_faults: RefCell<Vec<(*mut u8, usize, unsafe fn(*mut u8, usize) -> bool)>>,
/// Additional context used by compiled wasm code. This field is last, and /// Additional context used by compiled wasm code. This field is last, and
/// represents a dynamically-sized array that extends beyond the nominal /// represents a dynamically-sized array that extends beyond the nominal
/// end of the struct (similar to a flexible array member). /// end of the struct (similar to a flexible array member).
@@ -376,6 +381,10 @@ impl Instance {
/// Returns `None` if memory can't be grown by the specified amount /// Returns `None` if memory can't be grown by the specified amount
/// of pages. /// of pages.
pub(crate) fn memory_grow(&self, memory_index: DefinedMemoryIndex, delta: u32) -> Option<u32> { pub(crate) fn memory_grow(&self, memory_index: DefinedMemoryIndex, delta: u32) -> Option<u32> {
// Reset all guard pages before growing any memory
#[cfg(all(feature = "uffd", target_os = "linux"))]
self.reset_guard_pages().ok()?;
let result = self let result = self
.memories .memories
.get(memory_index) .get(memory_index)
@@ -803,6 +812,40 @@ impl Instance {
(foreign_table_index, foreign_instance) (foreign_table_index, foreign_instance)
} }
} }
/// Records a faulted guard page.
///
/// This is used to track faulted guard pages that need to be reset.
#[cfg(all(feature = "uffd", target_os = "linux"))]
pub(crate) fn record_guard_page_fault(
&self,
page_addr: *mut u8,
size: usize,
reset: unsafe fn(*mut u8, usize) -> bool,
) {
self.guard_page_faults
.borrow_mut()
.push((page_addr, size, reset));
}
/// Resets previously faulted guard pages.
///
/// This is used to reset the protection of any guard pages that were previously faulted.
///
/// Resetting the guard pages is required before growing memory.
#[cfg(all(feature = "uffd", target_os = "linux"))]
pub(crate) fn reset_guard_pages(&self) -> Result<(), String> {
let mut faults = self.guard_page_faults.borrow_mut();
for (addr, len, reset) in faults.drain(..) {
unsafe {
if !reset(addr, len) {
return Err("failed to reset previously faulted memory guard page".into());
}
}
}
Ok(())
}
} }
/// A handle holding an `Instance` of a WebAssembly module. /// A handle holding an `Instance` of a WebAssembly module.

View File

@@ -525,6 +525,8 @@ unsafe impl InstanceAllocator for OnDemandInstanceAllocator {
)), )),
dropped_data: RefCell::new(EntitySet::with_capacity(req.module.passive_data.len())), dropped_data: RefCell::new(EntitySet::with_capacity(req.module.passive_data.len())),
host_state: req.host_state, host_state: req.host_state,
#[cfg(all(feature = "uffd", target_os = "linux"))]
guard_page_faults: RefCell::new(Vec::new()),
vmctx: VMContext {}, vmctx: VMContext {},
}; };
let layout = instance.alloc_layout(); let layout = instance.alloc_layout();

View File

@@ -31,6 +31,11 @@ cfg_if::cfg_if! {
if #[cfg(windows)] { if #[cfg(windows)] {
mod windows; mod windows;
use windows as imp; use windows as imp;
} else if #[cfg(all(feature = "uffd", target_os = "linux"))] {
mod uffd;
use uffd as imp;
use imp::{PageFaultHandler, reset_guard_page};
use std::sync::atomic::{AtomicBool, Ordering};
} else if #[cfg(target_os = "linux")] { } else if #[cfg(target_os = "linux")] {
mod linux; mod linux;
use linux as imp; use linux as imp;
@@ -335,6 +340,9 @@ impl Iterator for BasePointerIterator {
/// structure depending on the limits used to create the pool. /// structure depending on the limits used to create the pool.
/// ///
/// The pool maintains a free list for fast instance allocation. /// The pool maintains a free list for fast instance allocation.
///
/// The userfault handler relies on how instances are stored in the mapping,
/// so make sure the uffd implementation is kept up-to-date.
#[derive(Debug)] #[derive(Debug)]
struct InstancePool { struct InstancePool {
mapping: Mmap, mapping: Mmap,
@@ -413,6 +421,8 @@ impl InstancePool {
dropped_elements: RefCell::new(EntitySet::new()), dropped_elements: RefCell::new(EntitySet::new()),
dropped_data: RefCell::new(EntitySet::new()), dropped_data: RefCell::new(EntitySet::new()),
host_state: Box::new(()), host_state: Box::new(()),
#[cfg(all(feature = "uffd", target_os = "linux"))]
guard_page_faults: RefCell::new(Vec::new()),
vmctx: VMContext {}, vmctx: VMContext {},
}, },
); );
@@ -523,6 +533,12 @@ impl InstancePool {
) -> Result<(), InstantiationError> { ) -> Result<(), InstantiationError> {
let module = instance.module.as_ref(); let module = instance.module.as_ref();
// Reset all guard pages before reusing the instance
#[cfg(all(feature = "uffd", target_os = "linux"))]
instance
.reset_guard_pages()
.map_err(InstantiationError::Resource)?;
instance.memories.clear(); instance.memories.clear();
for plan in for plan in
@@ -590,6 +606,10 @@ impl Drop for InstancePool {
/// ///
/// Each instance index into the pool returns an iterator over the base addresses /// Each instance index into the pool returns an iterator over the base addresses
/// of the instance's linear memories. /// of the instance's linear memories.
///
///
/// The userfault handler relies on how memories are stored in the mapping,
/// so make sure the uffd implementation is kept up-to-date.
#[derive(Debug)] #[derive(Debug)]
struct MemoryPool { struct MemoryPool {
mapping: Mmap, mapping: Mmap,
@@ -646,6 +666,9 @@ impl MemoryPool {
/// ///
/// Each instance index into the pool returns an iterator over the base addresses /// Each instance index into the pool returns an iterator over the base addresses
/// of the instance's tables. /// of the instance's tables.
///
/// The userfault handler relies on how tables are stored in the mapping,
/// so make sure the uffd implementation is kept up-to-date.
#[derive(Debug)] #[derive(Debug)]
struct TablePool { struct TablePool {
mapping: Mmap, mapping: Mmap,
@@ -710,6 +733,9 @@ impl TablePool {
/// ///
/// The top of the stack (starting stack pointer) is returned when a stack is allocated /// The top of the stack (starting stack pointer) is returned when a stack is allocated
/// from the pool. /// from the pool.
///
/// The userfault handler relies on how stacks are stored in the mapping,
/// so make sure the uffd implementation is kept up-to-date.
#[derive(Debug)] #[derive(Debug)]
struct StackPool { struct StackPool {
mapping: Mmap, mapping: Mmap,
@@ -717,6 +743,8 @@ struct StackPool {
max_instances: usize, max_instances: usize,
page_size: usize, page_size: usize,
free_list: Mutex<Vec<usize>>, free_list: Mutex<Vec<usize>>,
#[cfg(all(feature = "uffd", target_os = "linux"))]
faulted_guard_pages: Arc<[AtomicBool]>,
} }
impl StackPool { impl StackPool {
@@ -745,6 +773,11 @@ impl StackPool {
max_instances, max_instances,
page_size, page_size,
free_list: Mutex::new((0..max_instances).collect()), free_list: Mutex::new((0..max_instances).collect()),
#[cfg(all(feature = "uffd", target_os = "linux"))]
faulted_guard_pages: std::iter::repeat_with(|| false.into())
.take(max_instances)
.collect::<Vec<_>>()
.into(),
}) })
} }
@@ -774,11 +807,25 @@ impl StackPool {
.as_mut_ptr() .as_mut_ptr()
.add((index * self.stack_size) + self.page_size); .add((index * self.stack_size) + self.page_size);
// Make the stack accessible (excluding the guard page) cfg_if::cfg_if! {
if !make_accessible(bottom_of_stack, size_without_guard) { if #[cfg(all(feature = "uffd", target_os = "linux"))] {
return Err(FiberStackError::Resource( // Check to see if a guard page needs to be reset
"failed to make instance memory accessible".into(), if self.faulted_guard_pages[index].swap(false, Ordering::SeqCst) {
)); if !reset_guard_page(bottom_of_stack.sub(self.page_size), self.page_size) {
return Err(FiberStackError::Resource(
"failed to reset stack guard page".into(),
));
}
}
} else {
// Make the stack accessible (excluding the guard page)
if !make_accessible(bottom_of_stack, size_without_guard) {
return Err(FiberStackError::Resource(
"failed to make instance memory accessible".into(),
));
}
}
} }
// The top of the stack should be returned // The top of the stack should be returned
@@ -824,6 +871,8 @@ pub struct PoolingInstanceAllocator {
instance_limits: InstanceLimits, instance_limits: InstanceLimits,
instances: mem::ManuallyDrop<InstancePool>, instances: mem::ManuallyDrop<InstancePool>,
stacks: mem::ManuallyDrop<StackPool>, stacks: mem::ManuallyDrop<StackPool>,
#[cfg(all(feature = "uffd", target_os = "linux"))]
_fault_handler: PageFaultHandler,
} }
impl PoolingInstanceAllocator { impl PoolingInstanceAllocator {
@@ -866,19 +915,28 @@ impl PoolingInstanceAllocator {
)); ));
} }
let instances = InstancePool::new(&module_limits, &instance_limits)?;
let stacks = StackPool::new(&instance_limits, stack_size)?;
#[cfg(all(feature = "uffd", target_os = "linux"))]
let _fault_handler = PageFaultHandler::new(&instances, &stacks)?;
Ok(Self { Ok(Self {
strategy, strategy,
module_limits, module_limits,
instance_limits, instance_limits,
instances: mem::ManuallyDrop::new(InstancePool::new(&module_limits, &instance_limits)?), instances: mem::ManuallyDrop::new(instances),
stacks: mem::ManuallyDrop::new(StackPool::new(&instance_limits, stack_size)?), stacks: mem::ManuallyDrop::new(stacks),
#[cfg(all(feature = "uffd", target_os = "linux"))]
_fault_handler,
}) })
} }
} }
impl Drop for PoolingInstanceAllocator { impl Drop for PoolingInstanceAllocator {
fn drop(&mut self) { fn drop(&mut self) {
// There are manually dropped for the future uffd implementation // Manually drop the pools before the fault handler (if uffd is enabled)
// This ensures that any fault handler thread monitoring the pool memory terminates
unsafe { unsafe {
mem::ManuallyDrop::drop(&mut self.instances); mem::ManuallyDrop::drop(&mut self.instances);
mem::ManuallyDrop::drop(&mut self.stacks); mem::ManuallyDrop::drop(&mut self.stacks);

View File

@@ -0,0 +1,752 @@
//! Implements user-mode page fault handling with the `userfaultfd` ("uffd") system call on Linux.
//!
//! Handling page faults for memory accesses in regions relating to WebAssembly instances
//! enables the implementation of guard pages in user space rather than kernel space.
//!
//! This reduces the number of system calls and kernel locks needed to provide correct
//! WebAssembly memory semantics.
//!
//! Additionally, linear memories and WebAssembly tables can be lazy-initialized upon access.
//!
//! This feature requires a Linux kernel 4.11 or newer to use.
use super::{InstancePool, StackPool};
use crate::{instance::Instance, Mmap};
use std::convert::TryInto;
use std::ptr;
use std::sync::{
atomic::{AtomicBool, Ordering},
Arc,
};
use std::thread;
use userfaultfd::{Event, FeatureFlags, IoctlFlags, Uffd, UffdBuilder};
use wasmtime_environ::{wasm::DefinedMemoryIndex, WASM_PAGE_SIZE};
pub unsafe fn make_accessible(_addr: *mut u8, _len: usize) -> bool {
// A no-op when userfaultfd is used
true
}
pub unsafe fn reset_guard_page(addr: *mut u8, len: usize) -> bool {
// Guard pages are READ_WRITE with uffd until faulted
region::protect(addr, len, region::Protection::READ_WRITE).is_ok()
}
pub unsafe fn decommit(addr: *mut u8, len: usize) {
// Use MADV_DONTNEED to mark the pages as missing
// This will cause a missing page fault for next access on any page in the given range
assert_eq!(
libc::madvise(addr as _, len, libc::MADV_DONTNEED),
0,
"madvise failed to mark pages as missing: {}",
std::io::Error::last_os_error()
);
}
pub fn create_memory_map(_accessible_size: usize, mapping_size: usize) -> Result<Mmap, String> {
// Allocate a single read-write region at once
// As writable pages need to count towards commit charge, use MAP_NORESERVE to override.
// This implies that the kernel is configured to allow overcommit or else
// this allocation will almost certainly fail without a plethora of physical memory to back the alloction.
// The consequence of not reserving is that our process may segfault on any write to a memory
// page that cannot be backed (i.e. out of memory conditions).
if mapping_size == 0 {
return Ok(Mmap::new());
}
unsafe {
let ptr = libc::mmap(
ptr::null_mut(),
mapping_size,
libc::PROT_READ | libc::PROT_WRITE,
libc::MAP_PRIVATE | libc::MAP_ANON | libc::MAP_NORESERVE,
-1,
0,
);
if ptr as isize == -1_isize {
return Err(format!(
"failed to allocate pool memory: {}",
std::io::Error::last_os_error()
));
}
Ok(Mmap::from_raw(ptr as usize, mapping_size))
}
}
/// Represents a location of a page fault within monitored regions of memory.
enum AddressLocation<'a> {
/// The address location is in a WebAssembly table page.
/// The fault handler will zero the page as tables are initialized at instantiation-time.
TablePage {
/// The address of the page being accessed.
page_addr: *mut u8,
/// The length of the page being accessed.
len: usize,
},
/// The address location is in a WebAssembly linear memory page.
/// The fault handler will copy the pages from initialization data if necessary.
MemoryPage {
/// The address of the page being accessed.
page_addr: *mut u8,
/// The length of the page being accessed.
len: usize,
/// The instance related to the memory page that was accessed.
instance: &'a Instance,
/// The index of the memory that was accessed.
memory_index: usize,
/// The Wasm page index to initialize if the access was not a guard page.
page_index: Option<usize>,
},
/// The address location is in an execution stack.
/// The fault handler will zero the page.
StackPage {
/// The address of the page being accessed.
page_addr: *mut u8,
/// The length of the page being accessed.
len: usize,
/// The index of the stack that was accessed.
index: usize,
/// Whether or not the access was to a guard page.
guard_page: bool,
},
}
/// Used to resolve fault addresses to address locations.
///
/// This implementation relies heavily on how the various resource pools utilize their memory.
///
/// `usize` is used here instead of pointers to keep this `Send` as it gets sent to the handler thread.
struct AddressLocator {
instances_start: usize,
instance_size: usize,
max_instances: usize,
memories_start: usize,
memories_end: usize,
memory_size: usize,
max_memories: usize,
tables_start: usize,
tables_end: usize,
table_size: usize,
stacks_start: usize,
stacks_end: usize,
stack_size: usize,
page_size: usize,
}
impl AddressLocator {
fn new(instances: &InstancePool, stacks: &StackPool) -> Self {
let instances_start = instances.mapping.as_ptr() as usize;
let memories_start = instances.memories.mapping.as_ptr() as usize;
let memories_end = memories_start + instances.memories.mapping.len();
let tables_start = instances.tables.mapping.as_ptr() as usize;
let tables_end = tables_start + instances.tables.mapping.len();
let stacks_start = stacks.mapping.as_ptr() as usize;
let stacks_end = stacks_start + stacks.mapping.len();
let stack_size = stacks.stack_size;
// Should always have instances
debug_assert!(instances_start != 0);
Self {
instances_start,
instance_size: instances.instance_size,
max_instances: instances.max_instances,
memories_start,
memories_end,
memory_size: instances.memories.memory_size,
max_memories: instances.memories.max_memories,
tables_start,
tables_end,
table_size: instances.tables.table_size,
stacks_start,
stacks_end,
stack_size,
page_size: instances.tables.page_size,
}
}
// This is super-duper unsafe as it is used from the handler thread
// to access instance data without any locking primitives.
///
/// It is assumed that the thread that owns the instance being accessed is
/// currently suspended waiting on a fault to be handled.
///
/// Of course a stray faulting memory access from a thread that does not own
/// the instance might introduce a race, but this implementation considers
/// such to be a serious bug.
///
/// If the assumption holds true, accessing the instance data from the handler thread
/// should, in theory, be safe.
unsafe fn get_instance(&self, index: usize) -> &mut Instance {
debug_assert!(index < self.max_instances);
&mut *((self.instances_start + (index * self.instance_size)) as *mut Instance)
}
unsafe fn get_location(&self, addr: usize) -> Option<AddressLocation> {
// Check for a memory location
if addr >= self.memories_start && addr < self.memories_end {
let index = (addr - self.memories_start) / self.memory_size;
let memory_index = index % self.max_memories;
let memory_start = self.memories_start + (index * self.memory_size);
let page_index = (addr - memory_start) / (WASM_PAGE_SIZE as usize);
let instance = self.get_instance(index / self.max_memories);
let init_page_index = instance
.memories
.get(
DefinedMemoryIndex::from_u32(memory_index as u32)
.try_into()
.unwrap(),
)
.and_then(|m| {
if page_index < m.size() as usize {
Some(page_index)
} else {
None
}
});
return Some(AddressLocation::MemoryPage {
page_addr: (memory_start + page_index * (WASM_PAGE_SIZE as usize)) as _,
len: WASM_PAGE_SIZE as usize,
instance,
memory_index,
page_index: init_page_index,
});
}
// Check for a table location
if addr >= self.tables_start && addr < self.tables_end {
let index = (addr - self.tables_start) / self.table_size;
let table_start = self.tables_start + (index * self.table_size);
let table_offset = addr - table_start;
let page_index = table_offset / self.page_size;
return Some(AddressLocation::TablePage {
page_addr: (table_start + (page_index * self.page_size)) as _,
len: self.page_size,
});
}
// Check for a stack location
if addr >= self.stacks_start && addr < self.stacks_end {
let index = (addr - self.stacks_start) / self.stack_size;
let stack_start = self.stacks_start + (index * self.stack_size);
let stack_offset = addr - stack_start;
let page_offset = (stack_offset / self.page_size) * self.page_size;
return Some(AddressLocation::StackPage {
page_addr: (stack_start + page_offset) as _,
len: self.page_size,
index,
guard_page: stack_offset < self.page_size,
});
}
None
}
}
fn wake_guard_page_access(uffd: &Uffd, page_addr: *const u8, len: usize) -> Result<(), String> {
unsafe {
// Set the page to NONE to induce a SIGSEV for the access on the next retry
region::protect(page_addr, len, region::Protection::NONE)
.map_err(|e| format!("failed to change guard page protection: {}", e))?;
uffd.wake(page_addr as _, len).map_err(|e| {
format!(
"failed to wake page at {:p} with length {}: {}",
page_addr, len, e
)
})?;
Ok(())
}
}
fn handler_thread(
uffd: Uffd,
locator: AddressLocator,
mut registrations: usize,
faulted_stack_guard_pages: Arc<[AtomicBool]>,
) -> Result<(), String> {
loop {
match uffd.read_event().expect("failed to read event") {
Some(Event::Unmap { start, end }) => {
log::trace!("memory region unmapped: {:p}-{:p}", start, end);
let (start, end) = (start as usize, end as usize);
if (start == locator.memories_start && end == locator.memories_end)
|| (start == locator.tables_start && end == locator.tables_end)
|| (start == locator.stacks_start && end == locator.stacks_end)
{
registrations -= 1;
if registrations == 0 {
break;
}
} else {
panic!("unexpected memory region unmapped");
}
}
Some(Event::Pagefault {
addr: access_addr, ..
}) => {
unsafe {
match locator.get_location(access_addr as usize) {
Some(AddressLocation::TablePage { page_addr, len }) => {
log::trace!(
"handling fault in table at address {:p} on page {:p}",
access_addr,
page_addr,
);
// Tables are always initialized upon instantiation, so zero the page
uffd.zeropage(page_addr as _, len, true).map_err(|e| {
format!(
"failed to zero page at {:p} with length {}: {}",
page_addr, len, e
)
})?;
}
Some(AddressLocation::MemoryPage {
page_addr,
len,
instance,
memory_index,
page_index,
}) => {
log::trace!(
"handling fault in linear memory at address {:p} on page {:p}",
access_addr,
page_addr
);
match page_index {
Some(page_index) => {
// TODO: copy the memory initialization data rather than zero the page
uffd.zeropage(page_addr as _, len, true).map_err(|e| {
format!(
"failed to zero page at {:p} with length {}: {}",
page_addr, len, e
)
})?;
}
None => {
log::trace!("out of bounds memory access at {:p}", access_addr);
// Record the guard page fault with the instance so it can be reset later.
instance.record_guard_page_fault(
page_addr,
len,
reset_guard_page,
);
wake_guard_page_access(&uffd, page_addr, len)?;
}
}
}
Some(AddressLocation::StackPage {
page_addr,
len,
index,
guard_page,
}) => {
log::trace!(
"handling fault in stack {} at address {:p}",
index,
access_addr,
);
if guard_page {
// Logging as trace as stack guard pages might be a trap condition in the future
log::trace!("stack overflow fault at {:p}", access_addr);
// Mark the stack as having a faulted guard page
// The next time the stack is used the guard page will be reset
faulted_stack_guard_pages[index].store(true, Ordering::SeqCst);
wake_guard_page_access(&uffd, page_addr, len)?;
continue;
}
// Always zero stack pages
uffd.zeropage(page_addr as _, len, true).map_err(|e| {
format!(
"failed to zero page at {:p} with length {}: {}",
page_addr, len, e
)
})?;
}
None => {
return Err(format!(
"failed to locate fault address {:p} in registered memory regions",
access_addr
));
}
}
}
}
Some(_) => continue,
None => break,
}
}
Ok(())
}
#[derive(Debug)]
pub struct PageFaultHandler {
thread: Option<thread::JoinHandle<Result<(), String>>>,
}
impl PageFaultHandler {
pub(super) fn new(instances: &InstancePool, stacks: &StackPool) -> Result<Self, String> {
let uffd = UffdBuilder::new()
.close_on_exec(true)
.require_features(FeatureFlags::EVENT_UNMAP)
.create()
.map_err(|e| format!("failed to create user fault descriptor: {}", e))?;
// Register the ranges with the userfault fd
let mut registrations = 0;
for (start, len) in &[
(
instances.memories.mapping.as_ptr() as usize,
instances.memories.mapping.len(),
),
(
instances.tables.mapping.as_ptr() as usize,
instances.tables.mapping.len(),
),
(stacks.mapping.as_ptr() as usize, stacks.mapping.len()),
] {
if *start == 0 || *len == 0 {
continue;
}
let ioctls = uffd
.register(*start as _, *len)
.map_err(|e| format!("failed to register user fault range: {}", e))?;
if !ioctls.contains(IoctlFlags::WAKE | IoctlFlags::COPY | IoctlFlags::ZEROPAGE) {
return Err(format!(
"required user fault ioctls not supported; found: {:?}",
ioctls,
));
}
registrations += 1;
}
let thread = if registrations == 0 {
log::trace!("user fault handling disabled as there are no regions to monitor");
None
} else {
log::trace!(
"user fault handling enabled on {} memory regions",
registrations
);
let locator = AddressLocator::new(&instances, &stacks);
let faulted_stack_guard_pages = stacks.faulted_guard_pages.clone();
Some(
thread::Builder::new()
.name("page fault handler".into())
.spawn(move || {
handler_thread(uffd, locator, registrations, faulted_stack_guard_pages)
})
.map_err(|e| format!("failed to spawn page fault handler thread: {}", e))?,
)
};
Ok(Self { thread })
}
}
impl Drop for PageFaultHandler {
fn drop(&mut self) {
if let Some(thread) = self.thread.take() {
thread
.join()
.expect("failed to join page fault handler thread")
.expect("fault handler thread failed");
}
}
}
#[cfg(test)]
mod test {
use super::*;
use crate::{
table::max_table_element_size, Imports, InstanceAllocationRequest, InstanceLimits,
ModuleLimits, PoolingAllocationStrategy, VMSharedSignatureIndex,
};
use wasmtime_environ::{
entity::PrimaryMap,
wasm::{Memory, Table, TableElementType, WasmType},
MemoryPlan, MemoryStyle, Module, TablePlan, TableStyle,
};
#[cfg(target_pointer_width = "64")]
#[test]
fn test_address_locator() {
let module_limits = ModuleLimits {
imported_functions: 0,
imported_tables: 0,
imported_memories: 0,
imported_globals: 0,
types: 0,
functions: 0,
tables: 3,
memories: 2,
globals: 0,
table_elements: 1000,
memory_pages: 2,
};
let instance_limits = InstanceLimits {
count: 3,
address_space_size: (WASM_PAGE_SIZE * 10) as u64,
};
let instances =
InstancePool::new(&module_limits, &instance_limits).expect("should allocate");
let stacks = StackPool::new(&instance_limits, 8192).expect("should allocate");
let locator = AddressLocator::new(&instances, &stacks);
assert_eq!(locator.instances_start, instances.mapping.as_ptr() as usize);
assert_eq!(locator.instance_size, 4096);
assert_eq!(locator.max_instances, 3);
assert_eq!(
locator.memories_start,
instances.memories.mapping.as_ptr() as usize
);
assert_eq!(
locator.memories_end,
locator.memories_start + instances.memories.mapping.len()
);
assert_eq!(locator.memory_size, (WASM_PAGE_SIZE * 10) as usize);
assert_eq!(locator.max_memories, 2);
assert_eq!(
locator.tables_start,
instances.tables.mapping.as_ptr() as usize
);
assert_eq!(
locator.tables_end,
locator.tables_start + instances.tables.mapping.len()
);
assert_eq!(locator.table_size, 8192);
assert_eq!(locator.stacks_start, stacks.mapping.as_ptr() as usize);
assert_eq!(
locator.stacks_end,
locator.stacks_start + stacks.mapping.len()
);
assert_eq!(locator.stack_size, 12288);
unsafe {
assert!(locator.get_location(0).is_none());
assert!(locator
.get_location(std::cmp::max(
locator.memories_end,
std::cmp::max(locator.tables_end, locator.stacks_end)
))
.is_none());
let mut module = Module::new();
for _ in 0..module_limits.memories {
module.memory_plans.push(MemoryPlan {
memory: Memory {
minimum: 2,
maximum: Some(2),
shared: false,
},
style: MemoryStyle::Static { bound: 1 },
offset_guard_size: 0,
});
}
for _ in 0..module_limits.tables {
module.table_plans.push(TablePlan {
table: Table {
wasm_ty: WasmType::FuncRef,
ty: TableElementType::Func,
minimum: 800,
maximum: Some(900),
},
style: TableStyle::CallerChecksSignature,
});
}
module_limits
.validate_module(&module)
.expect("should validate");
let mut handles = Vec::new();
let module = Arc::new(module);
let finished_functions = &PrimaryMap::new();
// Allocate the maximum number of instances with the maxmimum number of memories and tables
for _ in 0..instances.max_instances {
handles.push(
instances
.allocate(
PoolingAllocationStrategy::Random,
InstanceAllocationRequest {
module: module.clone(),
finished_functions,
imports: Imports {
functions: &[],
tables: &[],
memories: &[],
globals: &[],
},
lookup_shared_signature: &|_| VMSharedSignatureIndex::default(),
host_state: Box::new(()),
interrupts: std::ptr::null(),
externref_activations_table: std::ptr::null_mut(),
stack_map_registry: std::ptr::null_mut(),
},
)
.expect("instance should allocate"),
);
}
// Validate memory locations
for instance_index in 0..instances.max_instances {
for memory_index in 0..instances.memories.max_memories {
let memory_start = locator.memories_start
+ (instance_index * locator.memory_size * locator.max_memories)
+ (memory_index * locator.memory_size);
// Test for access to first page
match locator.get_location(memory_start + 10000) {
Some(AddressLocation::MemoryPage {
page_addr,
len,
instance: _,
memory_index: mem_index,
page_index,
}) => {
assert_eq!(page_addr, memory_start as _);
assert_eq!(len, WASM_PAGE_SIZE as usize);
assert_eq!(mem_index, memory_index);
assert_eq!(page_index, Some(0));
}
_ => panic!("expected a memory page location"),
}
// Test for access to second page
match locator.get_location(memory_start + 1024 + WASM_PAGE_SIZE as usize) {
Some(AddressLocation::MemoryPage {
page_addr,
len,
instance: _,
memory_index: mem_index,
page_index,
}) => {
assert_eq!(page_addr, (memory_start + WASM_PAGE_SIZE as usize) as _);
assert_eq!(len, WASM_PAGE_SIZE as usize);
assert_eq!(mem_index, memory_index);
assert_eq!(page_index, Some(1));
}
_ => panic!("expected a memory page location"),
}
// Test for guard page
match locator.get_location(memory_start + 10 + 9 * WASM_PAGE_SIZE as usize) {
Some(AddressLocation::MemoryPage {
page_addr,
len,
instance: _,
memory_index: mem_index,
page_index,
}) => {
assert_eq!(
page_addr,
(memory_start + (9 * WASM_PAGE_SIZE as usize)) as _
);
assert_eq!(len, WASM_PAGE_SIZE as usize);
assert_eq!(mem_index, memory_index);
assert_eq!(page_index, None);
}
_ => panic!("expected a memory page location"),
}
}
}
// Validate table locations
for instance_index in 0..instances.max_instances {
for table_index in 0..instances.tables.max_tables {
let table_start = locator.tables_start
+ (instance_index * locator.table_size * instances.tables.max_tables)
+ (table_index * locator.table_size);
// Check for an access of index 107 (first page)
match locator.get_location(table_start + (107 * max_table_element_size())) {
Some(AddressLocation::TablePage { page_addr, len }) => {
assert_eq!(page_addr, table_start as _);
assert_eq!(len, locator.page_size);
}
_ => panic!("expected a table page location"),
}
// Check for an access of index 799 (second page)
match locator.get_location(table_start + (799 * max_table_element_size())) {
Some(AddressLocation::TablePage { page_addr, len }) => {
assert_eq!(page_addr, (table_start + locator.page_size) as _);
assert_eq!(len, locator.page_size);
}
_ => panic!("expected a table page location"),
}
}
}
// Validate stack locations
for stack_index in 0..instances.max_instances {
let stack_start = locator.stacks_start + (stack_index * locator.stack_size);
// Check for stack page location
match locator.get_location(stack_start + locator.page_size * 2) {
Some(AddressLocation::StackPage {
page_addr,
len,
index,
guard_page,
}) => {
assert_eq!(page_addr, (stack_start + locator.page_size * 2) as _);
assert_eq!(len, locator.page_size);
assert_eq!(index, stack_index);
assert!(!guard_page);
}
_ => panic!("expected a stack page location"),
}
// Check for guard page
match locator.get_location(stack_start) {
Some(AddressLocation::StackPage {
page_addr,
len,
index,
guard_page,
}) => {
assert_eq!(page_addr, stack_start as _);
assert_eq!(len, locator.page_size);
assert_eq!(index, stack_index);
assert!(guard_page);
}
_ => panic!("expected a stack page location"),
}
}
for handle in handles.drain(..) {
instances.deallocate(&handle);
}
}
}
}

View File

@@ -73,3 +73,6 @@ experimental_x64 = ["wasmtime-jit/experimental_x64"]
# Enables support for "async stores" as well as defining host functions as # Enables support for "async stores" as well as defining host functions as
# `async fn` and calling functions asynchronously. # `async fn` and calling functions asynchronously.
async = ["wasmtime-fiber"] async = ["wasmtime-fiber"]
# Enables userfaultfd support in the runtime's pooling allocator when building on Linux
uffd = ["wasmtime-runtime/uffd"]