diff --git a/crates/environ/src/module.rs b/crates/environ/src/module.rs index d941801658..204a64c350 100644 --- a/crates/environ/src/module.rs +++ b/crates/environ/src/module.rs @@ -95,19 +95,6 @@ impl MemoryPlan { }, } } - - /// Determine whether a data segment (memory initializer) is - /// possibly out-of-bounds. Returns `true` if the initializer has a - /// dynamic location and this question cannot be resolved - /// pre-instantiation; hence, this method's result should not be - /// used to signal an error, only to exit optimized/simple fastpaths. - pub fn initializer_possibly_out_of_bounds(&self, init: &MemoryInitializer) -> bool { - match init.end() { - // Not statically known, so possibly out of bounds (we can't guarantee in-bounds). - None => true, - Some(end) => end > self.memory.minimum * (WASM_PAGE_SIZE as u64), - } - } } /// A WebAssembly linear memory initializer. @@ -126,28 +113,19 @@ pub struct MemoryInitializer { pub data: Range, } -impl MemoryInitializer { - /// If this initializer has a definite, static, non-overflowed end address, return it. - pub fn end(&self) -> Option { - if self.base.is_some() { - return None; - } - self.offset.checked_add(self.data.len() as u64) - } -} - /// The type of WebAssembly linear memory initialization to use for a module. #[derive(Clone, Debug, Serialize, Deserialize)] pub enum MemoryInitialization { /// Memory initialization is segmented. /// - /// Segmented initialization can be used for any module, but it is required if: + /// Segmented initialization can be used for any module, but it is required + /// if: /// /// * A data segment referenced an imported memory. /// * A data segment uses a global base. /// - /// Segmented initialization is performed by processing the complete set of data segments - /// when the module is instantiated. + /// Segmented initialization is performed by processing the complete set of + /// data segments when the module is instantiated. /// /// This is the default memory initialization type. Segmented(Vec), @@ -159,13 +137,16 @@ pub enum MemoryInitialization { /// * All data segments must reference defined memories. /// * All data segments must not use a global base. /// - /// Paged initialization is performed by copying (or mapping) entire WebAssembly pages to each linear memory. + /// Paged initialization is performed by copying (or mapping) entire + /// WebAssembly pages to each linear memory. /// - /// The `uffd` feature makes use of this type of memory initialization because it can instruct the kernel - /// to back an entire WebAssembly page from an existing set of in-memory pages. + /// The `uffd` feature makes use of this type of memory initialization + /// because it can instruct the kernel to back an entire WebAssembly page + /// from an existing set of in-memory pages. /// - /// By processing the data segments at module compilation time, the uffd fault handler doesn't have to do - /// any work to point the kernel at the right linear memory page to use. + /// By processing the data segments at module compilation time, the uffd + /// fault handler doesn't have to do any work to point the kernel at the + /// right linear memory page to use. Paged { /// The map of defined memory index to a list of initialization pages. /// @@ -177,10 +158,7 @@ pub enum MemoryInitialization { /// indices, like those in `MemoryInitializer`, point within a data /// segment that will come as an auxiliary descriptor with other data /// such as the compiled code for the wasm module. - map: PrimaryMap)>>, - /// Whether or not an out-of-bounds data segment was observed. - /// This is used to fail module instantiation after the pages are initialized. - out_of_bounds: bool, + map: PrimaryMap)>>, }, } @@ -192,88 +170,66 @@ impl ModuleTranslation<'_> { /// initialization then this won't change anything. Otherwise if it is /// compatible then the `memory_initialization` field will be updated. pub fn try_paged_init(&mut self) { - let initializers = match &self.module.memory_initialization { - MemoryInitialization::Segmented(list) => list, - MemoryInitialization::Paged { .. } => return, - }; - let page_size = u64::from(WASM_PAGE_SIZE); - let num_defined_memories = - self.module.memory_plans.len() - self.module.num_imported_memories; - let mut out_of_bounds = false; + // This method only attempts to transform a a `Segmented` memory init + // into a `Paged` one, no other state. + if !self.module.memory_initialization.is_segmented() { + return; + } // Initially all memories start out as all zeros, represented with a // lack of entries in the `BTreeMap` here. The map indexes byte offset // (which is always wasm-page-aligned) to the contents of the page, with // missing entries implicitly as all zeros. - let mut page_contents = PrimaryMap::with_capacity(num_defined_memories); - for _ in 0..num_defined_memories { + let mut page_contents = PrimaryMap::with_capacity(self.module.memory_plans.len()); + for _ in 0..self.module.memory_plans.len() { page_contents.push(BTreeMap::new()); } - assert_eq!(initializers.len(), self.data.len()); - for (initializer, data) in initializers.iter().zip(&self.data) { - let memory_index = match ( - self.module.defined_memory_index(initializer.memory_index), - initializer.base.is_some(), - ) { - (None, _) | (_, true) => { - // If the initializer references an imported memory or uses a global base, - // the complete set of segments will need to be processed at module instantiation - return; + // Perform a "dry run" of memory initialization which will fail if we + // can't switch to paged initialization. When data is written it's + // transformed into the representation of `page_contents`. + let mut data = self.data.iter(); + let ok = self.module.memory_initialization.init_memory( + InitMemory::CompileTime(&self.module), + &mut |memory, offset, data_range| { + let data = data.next().unwrap(); + assert_eq!(data.len(), data_range.len()); + // If an initializer references an imported memory then + // everything will need to be processed in-order anyway to + // handle the dynamic limits of the memory specified. + if self.module.defined_memory_index(memory).is_none() { + return false; + }; + let page_size = u64::from(WASM_PAGE_SIZE); + let contents = &mut page_contents[memory]; + let mut page_index = offset / page_size; + let mut page_offset = (offset % page_size) as usize; + let mut data = &data[..]; + + while !data.is_empty() { + // If this page hasn't been seen before, then it starts out + // as all zeros. + let page = contents + .entry(page_index) + .or_insert_with(|| vec![0; page_size as usize]); + let page = &mut page[page_offset..]; + + let len = std::cmp::min(data.len(), page.len()); + page[..len].copy_from_slice(&data[..len]); + + page_index += 1; + page_offset = 0; + data = &data[len..]; } - (Some(index), false) => index, - }; - if out_of_bounds { - continue; - } - // Perform a bounds check on the segment - // - // As this segment is referencing a defined memory without a global - // base, the last byte written to by the segment cannot exceed the - // memory's initial minimum size - let len = u64::try_from(initializer.data.len()).unwrap(); - let end = match initializer.offset.checked_add(len) { - Some(end) => end, - None => { - out_of_bounds = true; - continue; - } - }; - let memory = &self.module.memory_plans[initializer.memory_index].memory; - let initial_memory_end = memory.minimum * page_size; - if end > initial_memory_end { - out_of_bounds = true; - continue; - } + true + }, + ); - // Perform the same style of initialization that instantiating the - // module performs at this point, except initialize our - // `page_contents` map which is indexed by page number and contains - // the actual page contents. - // - // This is done iteratively page-by-page until the entire data - // segment has been copied into the page map. - let contents = &mut page_contents[memory_index]; - let mut page_index = initializer.offset / page_size; - let mut page_offset = (initializer.offset % page_size) as usize; - let mut data = &data[..]; - - while !data.is_empty() { - // If this page hasn't been seen before, then it starts out as - // all zeros. - let page = contents - .entry(page_index) - .or_insert_with(|| vec![0; page_size as usize]); - let page = &mut page[page_offset..]; - - let len = std::cmp::min(data.len(), page.len()); - page[..len].copy_from_slice(&data[..len]); - - page_index += 1; - page_offset = 0; - data = &data[len..]; - } + // If anything failed above or hit an unknown case then bail out + // entirely since this module cannot use paged initialization. + if !ok { + return; } // If we've gotten this far then we're switching to paged @@ -301,7 +257,7 @@ impl ModuleTranslation<'_> { let index = map.push(page_offsets); assert_eq!(index, memory); } - self.module.memory_initialization = MemoryInitialization::Paged { map, out_of_bounds }; + self.module.memory_initialization = MemoryInitialization::Paged { map }; } } @@ -311,6 +267,167 @@ impl Default for MemoryInitialization { } } +impl MemoryInitialization { + /// Returns whether this initialization is of the form + /// `MemoryInitialization::Segmented`. + pub fn is_segmented(&self) -> bool { + match self { + MemoryInitialization::Segmented(_) => true, + _ => false, + } + } + + /// Performs the memory initialization steps for this set of initializers. + /// + /// This will perform wasm initialization in compliance with the wasm spec + /// and how data segments are processed. This doesn't need to necessarily + /// only be called as part of initialization, however, as it's structured to + /// allow learning about memory ahead-of-time at compile time possibly. + /// + /// The various callbacks provided here are used to drive the smaller bits + /// of initialization, such as: + /// + /// * `get_cur_size_in_pages` - gets the current size, in wasm pages, of the + /// memory specified. For compile-time purposes this would be the memory + /// type's minimum size. + /// + /// * `get_global` - gets the value of the global specified. This is + /// statically, via validation, a pointer to the global of the correct + /// type (either u32 or u64 depending on the memory), but the value + /// returned here is `u64`. A `None` value can be returned to indicate + /// that the global's value isn't known yet. + /// + /// * `write` - a callback used to actually write data. This indicates that + /// the specified memory must receive the specified range of data at the + /// specified offset. This can internally return an false error if it + /// wants to fail. + /// + /// This function will return true if all memory initializers are processed + /// successfully. If any initializer hits an error or, for example, a + /// global value is needed but `None` is returned, then false will be + /// returned. At compile-time this typically means that the "error" in + /// question needs to be deferred to runtime, and at runtime this means + /// that an invalid initializer has been found and a trap should be + /// generated. + pub fn init_memory( + &self, + state: InitMemory<'_>, + write: &mut dyn FnMut(MemoryIndex, u64, &Range) -> bool, + ) -> bool { + let initializers = match self { + // Fall through below to the segmented memory one-by-one + // initialization. + MemoryInitialization::Segmented(list) => list, + + // If previously switched to paged initialization then pass through + // all those parameters here to the `write` callback. + // + // Note that existence of `Paged` already guarantees that all + // indices are in-bounds. + MemoryInitialization::Paged { map } => { + for (index, pages) in map { + for (page_index, page) in pages { + debug_assert_eq!(page.end - page.start, WASM_PAGE_SIZE); + let result = write(index, *page_index * u64::from(WASM_PAGE_SIZE), page); + if !result { + return result; + } + } + } + return true; + } + }; + + for initializer in initializers { + let MemoryInitializer { + memory_index, + base, + offset, + ref data, + } = *initializer; + + // First up determine the start/end range and verify that they're + // in-bounds for the initial size of the memory at `memory_index`. + // Note that this can bail if we don't have access to globals yet + // (e.g. this is a task happening before instantiation at + // compile-time). + let base = match base { + Some(index) => match &state { + InitMemory::Runtime { + get_global_as_u64, .. + } => get_global_as_u64(index), + InitMemory::CompileTime(_) => return false, + }, + None => 0, + }; + let start = match base.checked_add(offset) { + Some(start) => start, + None => return false, + }; + let len = u64::try_from(data.len()).unwrap(); + let end = match start.checked_add(len) { + Some(end) => end, + None => return false, + }; + + let cur_size_in_pages = match &state { + InitMemory::CompileTime(module) => module.memory_plans[memory_index].memory.minimum, + InitMemory::Runtime { + memory_size_in_pages, + .. + } => memory_size_in_pages(memory_index), + }; + + // Note that this `minimum` can overflow if `minimum` is + // `1 << 48`, the maximum number of minimum pages for 64-bit + // memories. If this overflow happens, though, then there's no need + // to check the `end` value since `end` fits in a `u64` and it is + // naturally less than the overflowed value. + // + // This is a bit esoteric though because it's impossible to actually + // create a memory of `u64::MAX + 1` bytes, so this is largely just + // here to avoid having the multiplication here overflow in debug + // mode. + if let Some(max) = cur_size_in_pages.checked_mul(u64::from(WASM_PAGE_SIZE)) { + if end > max { + return false; + } + } + + // The limits of the data segment have been validated at this point + // so the `write` callback is called with the range of data being + // written. Any erroneous result is propagated upwards. + let result = write(memory_index, start, data); + if !result { + return result; + } + } + + return true; + } +} + +/// Argument to [`MemoryInitialization::init_memory`] indicating the current +/// status of the instance. +pub enum InitMemory<'a> { + /// This evaluation of memory initializers is happening at compile time. + /// This means that the current state of memories is whatever their initial + /// state is, and additionally globals are not available if data segments + /// have global offsets. + CompileTime(&'a Module), + + /// Evaluation of memory initializers is happening at runtime when the + /// instance is available, and callbacks are provided to learn about the + /// instance's state. + Runtime { + /// Returns the size, in wasm pages, of the the memory specified. + memory_size_in_pages: &'a dyn Fn(MemoryIndex) -> u64, + /// Returns the value of the global, as a `u64`. Note that this may + /// involve zero-extending a 32-bit global to a 64-bit number. + get_global_as_u64: &'a dyn Fn(GlobalIndex) -> u64, + }, +} + /// Implementation styles for WebAssembly tables. #[derive(Debug, Clone, Hash, Serialize, Deserialize)] pub enum TableStyle { diff --git a/crates/runtime/src/instance/allocator.rs b/crates/runtime/src/instance/allocator.rs index 320537defc..a215df0e54 100644 --- a/crates/runtime/src/instance/allocator.rs +++ b/crates/runtime/src/instance/allocator.rs @@ -18,8 +18,8 @@ use std::sync::Arc; use thiserror::Error; use wasmtime_environ::{ DefinedFuncIndex, DefinedMemoryIndex, DefinedTableIndex, EntityRef, FunctionInfo, GlobalInit, - MemoryInitialization, MemoryInitializer, Module, ModuleType, PrimaryMap, SignatureIndex, - TableInitializer, TrapCode, WasmType, WASM_PAGE_SIZE, + InitMemory, MemoryInitialization, MemoryInitializer, Module, ModuleType, PrimaryMap, + SignatureIndex, TableInitializer, TrapCode, WasmType, WASM_PAGE_SIZE, }; #[cfg(feature = "pooling-allocator")] @@ -379,34 +379,60 @@ fn check_memory_init_bounds( Ok(()) } -fn initialize_memories( - instance: &mut Instance, - module: &Module, - initializers: &[MemoryInitializer], -) -> Result<(), InstantiationError> { - for init in initializers { - // Check whether we can skip all initializers (due to, e.g., - // memfd). - let memory = init.memory_index; - if let Some(defined_index) = module.defined_memory_index(memory) { - // We can only skip if there is actually a MemFD image. In - // some situations the MemFD image creation code will bail - // (e.g. due to an out of bounds data segment) and so we - // need to fall back on the usual initialization below. - if !instance.memories[defined_index].needs_init() { - continue; - } - } +fn initialize_memories(instance: &mut Instance, module: &Module) -> Result<(), InstantiationError> { + let memory_size_in_pages = + &|memory| (instance.get_memory(memory).current_length as u64) / u64::from(WASM_PAGE_SIZE); - instance - .memory_init_segment( - init.memory_index, - init.data.clone(), - get_memory_init_start(init, instance)?, - 0, - init.data.end - init.data.start, - ) - .map_err(InstantiationError::Trap)?; + // Loads the `global` value and returns it as a `u64`, but sign-extends + // 32-bit globals which can be used as the base for 32-bit memories. + let get_global_as_u64 = &|global| unsafe { + let def = if let Some(def_index) = instance.module.defined_global_index(global) { + instance.global(def_index) + } else { + &*instance.imported_global(global).from + }; + if module.globals[global].wasm_ty == WasmType::I64 { + *def.as_u64() + } else { + u64::from(*def.as_u32()) + } + }; + + // Delegates to the `init_memory` method which is sort of a duplicate of + // `instance.memory_init_segment` but is used at compile-time in other + // contexts so is shared here to have only one method of memory + // initialization. + // + // This call to `init_memory` notably implements all the bells and whistles + // so errors only happen if an out-of-bounds segment is found, in which case + // a trap is returned. + let ok = module.memory_initialization.init_memory( + InitMemory::Runtime { + memory_size_in_pages, + get_global_as_u64, + }, + &mut |memory_index, offset, data| { + // If this initializer applies to a defined memory but that memory + // doesn't need initialization, due to something like uffd or memfd + // pre-initializing it via mmap magic, then this initializer can be + // skipped entirely. + if let Some(memory_index) = module.defined_memory_index(memory_index) { + if !instance.memories[memory_index].needs_init() { + return true; + } + } + let memory = instance.get_memory(memory_index); + let dst_slice = + unsafe { slice::from_raw_parts_mut(memory.base, memory.current_length) }; + let dst = &mut dst_slice[usize::try_from(offset).unwrap()..][..data.len()]; + dst.copy_from_slice(instance.wasm_data(data.clone())); + true + }, + ); + if !ok { + return Err(InstantiationError::Trap(Trap::wasm( + TrapCode::HeapOutOfBounds, + ))); } Ok(()) @@ -416,16 +442,11 @@ fn check_init_bounds(instance: &mut Instance, module: &Module) -> Result<(), Ins check_table_init_bounds(instance, module)?; match &instance.module.memory_initialization { - MemoryInitialization::Paged { out_of_bounds, .. } => { - if *out_of_bounds { - return Err(InstantiationError::Link(LinkError( - "memory out of bounds: data segment does not fit".into(), - ))); - } - } MemoryInitialization::Segmented(initializers) => { check_memory_init_bounds(instance, initializers)?; } + // Statically validated already to have everything in-bounds. + MemoryInitialization::Paged { .. } => {} } Ok(()) @@ -448,40 +469,7 @@ fn initialize_instance( initialize_tables(instance, module)?; // Initialize the memories - match &module.memory_initialization { - MemoryInitialization::Paged { map, out_of_bounds } => { - for (index, pages) in map { - // Check whether the memory actually needs - // initialization. It may not if we're using a CoW - // mechanism like memfd. - if !instance.memories[index].needs_init() { - continue; - } - - let memory = instance.memory(index); - let slice = - unsafe { slice::from_raw_parts_mut(memory.base, memory.current_length) }; - - for (page_index, page) in pages { - debug_assert_eq!(page.end - page.start, WASM_PAGE_SIZE); - let start = (*page_index * u64::from(WASM_PAGE_SIZE)) as usize; - let end = start + WASM_PAGE_SIZE as usize; - slice[start..end].copy_from_slice(instance.wasm_data(page.clone())); - } - } - - // Check for out of bound access after initializing the pages to maintain - // the expected behavior of the bulk memory spec. - if *out_of_bounds { - return Err(InstantiationError::Trap(Trap::wasm( - TrapCode::HeapOutOfBounds, - ))); - } - } - MemoryInitialization::Segmented(initializers) => { - initialize_memories(instance, module, initializers)?; - } - } + initialize_memories(instance, &module)?; Ok(()) } diff --git a/crates/runtime/src/instance/allocator/pooling.rs b/crates/runtime/src/instance/allocator/pooling.rs index a0ab594694..9990e797d8 100644 --- a/crates/runtime/src/instance/allocator/pooling.rs +++ b/crates/runtime/src/instance/allocator/pooling.rs @@ -1069,7 +1069,7 @@ unsafe impl InstanceAllocator for PoolingInstanceAllocator { cfg_if::cfg_if! { if #[cfg(all(feature = "uffd", target_os = "linux"))] { match &module.memory_initialization { - wasmtime_environ::MemoryInitialization::Paged{ out_of_bounds, .. } => { + wasmtime_environ::MemoryInitialization::Paged { .. } => { if !is_bulk_memory { super::check_init_bounds(instance, module)?; } @@ -1079,13 +1079,6 @@ unsafe impl InstanceAllocator for PoolingInstanceAllocator { // Don't initialize the memory; the fault handler will back the pages when accessed - // If there was an out of bounds access observed in initialization, return a trap - if *out_of_bounds { - return Err(InstantiationError::Trap(crate::traphandlers::Trap::wasm( - wasmtime_environ::TrapCode::HeapOutOfBounds, - ))); - } - Ok(()) }, _ => initialize_instance(instance, module, is_bulk_memory) diff --git a/crates/runtime/src/instance/allocator/pooling/uffd.rs b/crates/runtime/src/instance/allocator/pooling/uffd.rs index 787aec0397..c1f59641cb 100644 --- a/crates/runtime/src/instance/allocator/pooling/uffd.rs +++ b/crates/runtime/src/instance/allocator/pooling/uffd.rs @@ -263,6 +263,7 @@ unsafe fn initialize_wasm_page( ) -> Result<()> { // Check for paged initialization and copy the page if present in the initialization data if let MemoryInitialization::Paged { map, .. } = &instance.module.memory_initialization { + let memory_index = instance.module().memory_index(memory_index); let pages = &map[memory_index]; let pos = pages.binary_search_by_key(&(page_index as u64), |k| k.0); diff --git a/crates/runtime/src/memfd.rs b/crates/runtime/src/memfd.rs index 5cf4cfbcb4..cf0da3e0df 100644 --- a/crates/runtime/src/memfd.rs +++ b/crates/runtime/src/memfd.rs @@ -6,12 +6,10 @@ use anyhow::Result; use libc::c_void; use memfd::{Memfd, MemfdOptions}; use rustix::fd::AsRawFd; -use rustix::fs::FileExt; +use std::io::Write; use std::sync::Arc; use std::{convert::TryFrom, ops::Range}; -use wasmtime_environ::{ - DefinedMemoryIndex, MemoryInitialization, MemoryInitializer, MemoryPlan, Module, PrimaryMap, -}; +use wasmtime_environ::{DefinedMemoryIndex, InitMemory, Module, PrimaryMap}; /// MemFDs containing backing images for certain memories in a module. /// @@ -21,7 +19,7 @@ pub struct ModuleMemFds { memories: PrimaryMap>>, } -const MAX_MEMFD_IMAGE_SIZE: u64 = 1024 * 1024 * 1024; // limit to 1GiB. +const MAX_MEMFD_IMAGE_SIZE: usize = 1024 * 1024 * 1024; // limit to 1GiB. impl ModuleMemFds { pub(crate) fn get_memory_image( @@ -54,33 +52,6 @@ pub struct MemoryMemFd { pub offset: usize, } -fn unsupported_initializer(segment: &MemoryInitializer, plan: &MemoryPlan) -> bool { - // If the segment has a base that is dynamically determined - // (by a global value, which may be a function of an imported - // module, for example), then we cannot build a single static - // image that is used for every instantiation. So we skip this - // memory entirely. - let end = match segment.end() { - None => { - return true; - } - Some(end) => end, - }; - - // Cannot be out-of-bounds. If there is a *possibility* it may - // be, then we just fall back on ordinary initialization. - if plan.initializer_possibly_out_of_bounds(segment) { - return true; - } - - // Must fit in our max size. - if end > MAX_MEMFD_IMAGE_SIZE { - return true; - } - - false -} - fn create_memfd() -> Result { // Create the memfd. It needs a name, but the // documentation for `memfd_create()` says that names can @@ -97,124 +68,104 @@ impl ModuleMemFds { /// instantiation and execution by using memfd-backed memories. pub fn new(module: &Module, wasm_data: &[u8]) -> Result>> { let page_size = region::page::size() as u64; + let page_align = |x: u64| x & !(page_size - 1); + let page_align_up = |x: u64| page_align(x + page_size - 1); + + // First build up an in-memory image for each memory. This in-memory + // representation is discarded if the memory initializers aren't "of + // the right shape" where the desired shape is: + // + // * Only initializers for defined memories. + // * Only initializers with static offsets (no globals). + // * Only in-bound initializers. + // + // The `init_memory` method of `MemoryInitialization` is used here to + // do most of the validation for us, and otherwise the data chunks are + // collected into the `images` array here. + let mut images: PrimaryMap> = PrimaryMap::default(); let num_defined_memories = module.memory_plans.len() - module.num_imported_memories; - - // Allocate a memfd file initially for every memory. We'll - // release those and set `excluded_memories` for those that we - // determine during initializer processing we cannot support a - // static image (e.g. due to dynamically-located segments). - let mut memfds: PrimaryMap> = PrimaryMap::default(); - let mut sizes: PrimaryMap = PrimaryMap::default(); - let mut excluded_memories: PrimaryMap = PrimaryMap::new(); - for _ in 0..num_defined_memories { - memfds.push(None); - sizes.push(0); - excluded_memories.push(false); + images.push(Vec::new()); + } + let ok = module.memory_initialization.init_memory( + InitMemory::CompileTime(module), + &mut |memory, offset, data_range| { + // Memfd-based initialization of an imported memory isn't + // implemented right now, although might perhaps be + // theoretically possible for statically-known-in-bounds + // segments with page-aligned portions. + let memory = match module.defined_memory_index(memory) { + Some(index) => index, + None => return false, + }; + + // Splat the `data_range` into the `image` for this memory, + // updating it as necessary with 0s for holes and such. + let image = &mut images[memory]; + let data = &wasm_data[data_range.start as usize..data_range.end as usize]; + let offset = offset as usize; + let new_image_len = offset + data.len(); + if image.len() < new_image_len { + if new_image_len > MAX_MEMFD_IMAGE_SIZE { + return false; + } + image.resize(new_image_len, 0); + } + image[offset..][..data.len()].copy_from_slice(data); + true + }, + ); + + // If any initializer wasn't applicable then we skip memfds entirely. + if !ok { + return Ok(None); } - let round_up_page = |len: u64| (len + page_size - 1) & !(page_size - 1); - - match &module.memory_initialization { - &MemoryInitialization::Segmented(ref segments) => { - for (i, segment) in segments.iter().enumerate() { - let defined_memory = match module.defined_memory_index(segment.memory_index) { - Some(defined_memory) => defined_memory, - None => continue, - }; - if excluded_memories[defined_memory] { - continue; - } - - if unsupported_initializer(segment, &module.memory_plans[segment.memory_index]) - { - memfds[defined_memory] = None; - excluded_memories[defined_memory] = true; - continue; - } - - if memfds[defined_memory].is_none() { - memfds[defined_memory] = Some(create_memfd()?); - } - let memfd = memfds[defined_memory].as_mut().unwrap(); - - let end = round_up_page(segment.end().expect("must have statically-known end")); - if end > sizes[defined_memory] { - sizes[defined_memory] = end; - memfd.as_file().set_len(end)?; - } - - let base = segments[i].offset; - let data = &wasm_data[segment.data.start as usize..segment.data.end as usize]; - memfd.as_file().write_at(data, base)?; - } - } - &MemoryInitialization::Paged { ref map, .. } => { - for (defined_memory, pages) in map { - let top = pages - .iter() - .map(|(base, range)| *base + range.len() as u64) - .max() - .unwrap_or(0); - - let memfd = create_memfd()?; - memfd.as_file().set_len(top)?; - - for (base, range) in pages { - let data = &wasm_data[range.start as usize..range.end as usize]; - memfd.as_file().write_at(data, *base)?; - } - - memfds[defined_memory] = Some(memfd); - sizes[defined_memory] = top; - } - } - } - - // Now finalize each memory. - let mut memories: PrimaryMap>> = - PrimaryMap::default(); - for (defined_memory, maybe_memfd) in memfds { - let memfd = match maybe_memfd { - Some(memfd) => memfd, + // With an in-memory representation of all memory images a `memfd` is + // now created and the data is pushed into the memfd. Note that the + // memfd representation will trim leading and trailing pages of zeros + // to store as little data as possible in the memfd. This is not only a + // performance improvement in the sense of "copy less data to the + // kernel" but it's also more performant to fault in zeros from + // anonymous-backed pages instead of memfd-backed pages-of-zeros (as + // the kernel knows anonymous mappings are always zero and has a cache + // of zero'd pages). + let mut memories = PrimaryMap::default(); + for (defined_memory, image) in images { + // Find the first nonzero byte, and if all the bytes are zero then + // we can skip the memfd for this memory since there's no + // meaningful initialization. + let nonzero_start = match image.iter().position(|b| *b != 0) { + Some(i) => i as u64, None => { memories.push(None); continue; } }; - let size = sizes[defined_memory]; - // Find leading and trailing zero data so that the mmap - // can precisely map only the nonzero data; anon-mmap zero - // memory is faster for anything that doesn't actually - // have content. - let mut page_data = vec![0; page_size as usize]; - let mut page_is_nonzero = |page| { - let offset = page_size * page; - memfd.as_file().read_at(&mut page_data[..], offset).unwrap(); - page_data.iter().any(|byte| *byte != 0) - }; - let n_pages = size / page_size; + // Find the last nonzero byte, which must exist at this point since + // we found one going forward. Add one to find the index of the + // last zero, which may also be the length of the image. + let nonzero_end = image.iter().rposition(|b| *b != 0).unwrap() as u64 + 1; - let mut offset = 0; - for page in 0..n_pages { - if page_is_nonzero(page) { - break; - } - offset += page_size; - } - let len = if offset == size { - 0 - } else { - let mut len = 0; - for page in (0..n_pages).rev() { - if page_is_nonzero(page) { - len = (page + 1) * page_size - offset; - break; - } - } - len - }; + // The offset of this image must be OS-page-aligned since we'll be + // starting the mmap at an aligned address. Align down the start + // index to the first index that's page aligned. + let offset = page_align(nonzero_start); + + // The length of the image must also be page aligned and may reach + // beyond the end of the `image` array we have already. Take the + // length of the nonzero portion and then align it up to the page size. + let len = page_align_up(nonzero_end - offset); + + // Write the nonzero data to the memfd and then use `set_len` to + // ensure that the length of the memfd is page-aligned where the gap + // at the end, if any, is filled with zeros. + let memfd = create_memfd()?; + memfd + .as_file() + .write_all(&image[offset as usize..nonzero_end as usize])?; + memfd.as_file().set_len(len)?; // Seal the memfd's data and length. // @@ -239,11 +190,12 @@ impl ModuleMemFds { assert_eq!(offset % page_size, 0); assert_eq!(len % page_size, 0); - memories.push(Some(Arc::new(MemoryMemFd { + let idx = memories.push(Some(Arc::new(MemoryMemFd { fd: memfd, offset: usize::try_from(offset).unwrap(), len: usize::try_from(len).unwrap(), }))); + assert_eq!(idx, defined_memory); } Ok(Some(Arc::new(ModuleMemFds { memories }))) @@ -457,7 +409,7 @@ impl MemFdSlot { rustix::io::ProtFlags::READ | rustix::io::ProtFlags::WRITE, rustix::io::MapFlags::PRIVATE | rustix::io::MapFlags::FIXED, image.fd.as_file(), - image.offset as u64, + 0, ) .map_err(|e| InstantiationError::Resource(e.into()))?; assert_eq!(ptr as usize, self.base + image.offset); @@ -580,17 +532,19 @@ mod test { use super::MemoryMemFd; use crate::mmap::Mmap; use anyhow::Result; - use rustix::fs::FileExt; + use std::io::Write; fn create_memfd_with_data(offset: usize, data: &[u8]) -> Result { + // Offset must be page-aligned. let page_size = region::page::size(); - let memfd = create_memfd()?; - // Offset and length have to be page-aligned. assert_eq!(offset & (page_size - 1), 0); - let image_len = offset + data.len(); - let image_len = (image_len + page_size - 1) & !(page_size - 1); + let memfd = create_memfd()?; + memfd.as_file().write_all(data)?; + + // The image length is rounded up to the nearest page size + let image_len = (data.len() + page_size - 1) & !(page_size - 1); memfd.as_file().set_len(image_len as u64)?; - memfd.as_file().write_at(data, offset as u64)?; + Ok(MemoryMemFd { fd: memfd, len: image_len, diff --git a/tests/all/memory.rs b/tests/all/memory.rs index d2e2e5afa3..a43f823bf2 100644 --- a/tests/all/memory.rs +++ b/tests/all/memory.rs @@ -431,3 +431,46 @@ fn dynamic_extra_growth_unchanged_pointer() -> Result<()> { Ok(()) } + +// This test exercises trying to create memories of the maximum 64-bit memory +// size of `1 << 48` pages. This should always fail but in the process of +// determining this failure we shouldn't hit any overflows or anything like that +// (checked via debug-mode tests). +#[test] +fn memory64_maximum_minimum() -> Result<()> { + let mut config = Config::new(); + config.wasm_memory64(true); + let engine = Engine::new(&config)?; + let mut store = Store::new(&engine, ()); + + assert!(Memory::new(&mut store, MemoryType::new64(1 << 48, None)).is_err()); + + let module = Module::new( + &engine, + &format!( + r#" + (module + (memory i64 {}) + ) + "#, + 1u64 << 48, + ), + )?; + assert!(Instance::new(&mut store, &module, &[]).is_err()); + + let module = Module::new( + &engine, + &format!( + r#" + (module + (memory i64 {}) + (data (i64.const 0) "") + ) + "#, + 1u64 << 48, + ), + )?; + assert!(Instance::new(&mut store, &module, &[]).is_err()); + + Ok(()) +}