//! Copy-on-write initialization support: creation of backing images for //! modules, and logic to support mapping these backing images into memory. #![cfg_attr(not(unix), allow(unused_imports, unused_variables))] use crate::MmapVec; use anyhow::Result; use libc::c_void; use std::fs::File; use std::sync::Arc; use std::{convert::TryFrom, ops::Range}; use wasmtime_environ::{DefinedMemoryIndex, MemoryInitialization, MemoryStyle, Module, PrimaryMap}; /// Backing images for memories in a module. /// /// This is meant to be built once, when a module is first loaded/constructed, /// and then used many times for instantiation. pub struct ModuleMemoryImages { memories: PrimaryMap>>, } impl ModuleMemoryImages { /// Get the MemoryImage for a given memory. pub fn get_memory_image(&self, defined_index: DefinedMemoryIndex) -> Option<&Arc> { self.memories[defined_index].as_ref() } } /// One backing image for one memory. #[derive(Debug, PartialEq)] pub struct MemoryImage { /// The file descriptor source of this image. /// /// This might be an mmaped `*.cwasm` file or on Linux it could also be a /// `Memfd` as an anonymous file in memory. In either case this is used as /// the backing-source for the CoW image. fd: FdSource, /// Length of image, in bytes. /// /// Note that initial memory size may be larger; leading and trailing zeroes /// are truncated (handled by backing fd). /// /// Must be a multiple of the system page size. len: usize, /// Image starts this many bytes into `fd` source. /// /// This is 0 for anonymous-backed memfd files and is the offset of the data /// section in a `*.cwasm` file for `*.cwasm`-backed images. /// /// Must be a multiple of the system page size. fd_offset: u64, /// Image starts this many bytes into heap space. /// /// Must be a multiple of the system page size. linear_memory_offset: usize, } #[derive(Debug)] enum FdSource { #[cfg(unix)] Mmap(Arc), #[cfg(target_os = "linux")] Memfd(memfd::Memfd), } impl FdSource { #[cfg(unix)] fn as_file(&self) -> &File { match self { FdSource::Mmap(ref file) => file, #[cfg(target_os = "linux")] FdSource::Memfd(ref memfd) => memfd.as_file(), } } } impl PartialEq for FdSource { fn eq(&self, other: &FdSource) -> bool { cfg_if::cfg_if! { if #[cfg(unix)] { use rustix::fd::AsRawFd; self.as_file().as_raw_fd() == other.as_file().as_raw_fd() } else { drop(other); match *self {} } } } } impl MemoryImage { fn new( page_size: u32, offset: u64, data: &[u8], mmap: Option<&MmapVec>, ) -> Result> { // Sanity-check that various parameters are page-aligned. let len = data.len(); assert_eq!(offset % u64::from(page_size), 0); assert_eq!((len as u32) % page_size, 0); let linear_memory_offset = match usize::try_from(offset) { Ok(offset) => offset, Err(_) => return Ok(None), }; // If a backing `mmap` is present then `data` should be a sub-slice of // the `mmap`. The sanity-checks here double-check that. Additionally // compilation should have ensured that the `data` section is // page-aligned within `mmap`, so that's also all double-checked here. // // Finally if the `mmap` itself comes from a backing file on disk, such // as a `*.cwasm` file, then that's a valid source of data for the // memory image so we simply return referencing that. // // Note that this path is platform-agnostic in the sense of all // platforms we support support memory mapping copy-on-write data from // files, but for now this is still a Linux-specific region of Wasmtime. // Some work will be needed to get this file compiling for macOS and // Windows. #[cfg(not(windows))] if let Some(mmap) = mmap { let start = mmap.as_ptr() as usize; let end = start + mmap.len(); let data_start = data.as_ptr() as usize; let data_end = data_start + data.len(); assert!(start <= data_start && data_end <= end); assert_eq!((start as u32) % page_size, 0); assert_eq!((data_start as u32) % page_size, 0); assert_eq!((data_end as u32) % page_size, 0); assert_eq!((mmap.original_offset() as u32) % page_size, 0); if let Some(file) = mmap.original_file() { return Ok(Some(MemoryImage { fd: FdSource::Mmap(file.clone()), fd_offset: u64::try_from(mmap.original_offset() + (data_start - start)) .unwrap(), linear_memory_offset, len, })); } } // If `mmap` doesn't come from a file then platform-specific mechanisms // may be used to place the data in a form that's amenable to an mmap. cfg_if::cfg_if! { if #[cfg(target_os = "linux")] { // On Linux `memfd_create` is used to create an anonymous // in-memory file to represent the heap image. This anonymous // file is then used as the basis for further mmaps. use std::io::Write; let memfd = create_memfd()?; memfd.as_file().write_all(data)?; // Seal the memfd's data and length. // // This is a defense-in-depth security mitigation. The // memfd will serve as the starting point for the heap of // every instance of this module. If anything were to // write to this, it could affect every execution. The // memfd object itself is owned by the machinery here and // not exposed elsewhere, but it is still an ambient open // file descriptor at the syscall level, so some other // vulnerability that allowed writes to arbitrary fds // could modify it. Or we could have some issue with the // way that we map it into each instance. To be // extra-super-sure that it never changes, and because // this costs very little, we use the kernel's "seal" API // to make the memfd image permanently read-only. memfd.add_seals(&[ memfd::FileSeal::SealGrow, memfd::FileSeal::SealShrink, memfd::FileSeal::SealWrite, memfd::FileSeal::SealSeal, ])?; Ok(Some(MemoryImage { fd: FdSource::Memfd(memfd), fd_offset: 0, linear_memory_offset, len, })) } else { // Other platforms don't have an easily available way of // representing the heap image as an mmap-source right now. We // could theoretically create a file and immediately unlink it // but that means that data may likely be preserved to disk // which isn't what we want here. Ok(None) } } } unsafe fn map_at(&self, base: usize) -> Result<()> { cfg_if::cfg_if! { if #[cfg(unix)] { let ptr = rustix::mm::mmap( (base + self.linear_memory_offset) as *mut c_void, self.len, rustix::mm::ProtFlags::READ | rustix::mm::ProtFlags::WRITE, rustix::mm::MapFlags::PRIVATE | rustix::mm::MapFlags::FIXED, self.fd.as_file(), self.fd_offset, )?; assert_eq!(ptr as usize, base + self.linear_memory_offset); Ok(()) } else { match self.fd {} } } } unsafe fn remap_as_zeros_at(&self, base: usize) -> Result<()> { cfg_if::cfg_if! { if #[cfg(unix)] { let ptr = rustix::mm::mmap_anonymous( (base + self.linear_memory_offset) as *mut c_void, self.len, rustix::mm::ProtFlags::READ | rustix::mm::ProtFlags::WRITE, rustix::mm::MapFlags::PRIVATE | rustix::mm::MapFlags::FIXED, )?; assert_eq!(ptr as usize, base + self.linear_memory_offset); Ok(()) } else { match self.fd {} } } } } #[cfg(target_os = "linux")] fn create_memfd() -> Result { // Create the memfd. It needs a name, but the // documentation for `memfd_create()` says that names can // be duplicated with no issues. memfd::MemfdOptions::new() .allow_sealing(true) .create("wasm-memory-image") .map_err(|e| e.into()) } impl ModuleMemoryImages { /// Create a new `ModuleMemoryImages` for the given module. This can be /// passed in as part of a `InstanceAllocationRequest` to speed up /// instantiation and execution by using copy-on-write-backed memories. pub fn new( module: &Module, wasm_data: &[u8], mmap: Option<&MmapVec>, ) -> Result> { let map = match &module.memory_initialization { MemoryInitialization::Static { map } => map, _ => return Ok(None), }; let mut memories = PrimaryMap::with_capacity(map.len()); let page_size = crate::page_size() as u32; for (memory_index, init) in map { // mmap-based-initialization only works for defined memories with a // known starting point of all zeros, so bail out if the mmeory is // imported. let defined_memory = match module.defined_memory_index(memory_index) { Some(idx) => idx, None => return Ok(None), }; // If there's no initialization for this memory known then we don't // need an image for the memory so push `None` and move on. let init = match init { Some(init) => init, None => { memories.push(None); continue; } }; // Get the image for this wasm module as a subslice of `wasm_data`, // and then use that to try to create the `MemoryImage`. If this // creation files then we fail creating `ModuleMemoryImages` since this // memory couldn't be represented. let data = &wasm_data[init.data.start as usize..init.data.end as usize]; let image = match MemoryImage::new(page_size, init.offset, data, mmap)? { Some(image) => image, None => return Ok(None), }; let idx = memories.push(Some(Arc::new(image))); assert_eq!(idx, defined_memory); } Ok(Some(ModuleMemoryImages { memories })) } } /// Slot management of a copy-on-write image which can be reused for the pooling /// allocator. /// /// This data structure manages a slot of linear memory, primarily in the /// pooling allocator, which optionally has a contiguous memory image in the /// middle of it. Pictorially this data structure manages a virtual memory /// region that looks like: /// /// ```ignore /// +--------------------+-------------------+--------------+--------------+ /// | anonymous | optional | anonymous | PROT_NONE | /// | zero | memory | zero | memory | /// | memory | image | memory | | /// +--------------------+-------------------+--------------+--------------+ /// | <------+----------> /// |<-----+------------> \ /// | \ image.len /// | \ /// | image.linear_memory_offset /// | /// \ /// self.base is this virtual address /// /// <------------------+------------------------------------------------> /// \ /// static_size /// /// <------------------+----------------------------------> /// \ /// accessible /// ``` /// /// When a `MemoryImageSlot` is created it's told what the `static_size` and /// `accessible` limits are. Initially there is assumed to be no image in linear /// memory. /// /// When [`MemoryImageSlot::instantiate`] is called then the method will perform /// a "synchronization" to take the image from its prior state to the new state /// for the image specified. The first instantiation for example will mmap the /// heap image into place. Upon reuse of a slot nothing happens except possibly /// shrinking `self.accessible`. When a new image is used then the old image is /// mapped to anonymous zero memory and then the new image is mapped in place. /// /// A `MemoryImageSlot` is either `dirty` or it isn't. When a `MemoryImageSlot` /// is dirty then it is assumed that any memory beneath `self.accessible` could /// have any value. Instantiation cannot happen into a `dirty` slot, however, so /// the [`MemoryImageSlot::clear_and_remain_ready`] returns this memory back to /// its original state to mark `dirty = false`. This is done by resetting all /// anonymous memory back to zero and the image itself back to its initial /// contents. /// /// On Linux this is achieved with the `madvise(MADV_DONTNEED)` syscall. This /// syscall will release the physical pages back to the OS but retain the /// original mappings, effectively resetting everything back to its initial /// state. Non-linux platforms will replace all memory below `self.accessible` /// with a fresh zero'd mmap, meaning that reuse is effectively not supported. #[derive(Debug)] pub struct MemoryImageSlot { /// The base address in virtual memory of the actual heap memory. /// /// Bytes at this address are what is seen by the Wasm guest code. /// /// Note that this is stored as `usize` instead of `*mut u8` to not deal /// with `Send`/`Sync. base: usize, /// The maximum static memory size which `self.accessible` can grow to. static_size: usize, /// An optional image that is currently being used in this linear memory. /// /// This can be `None` in which case memory is originally all zeros. When /// `Some` the image describes where it's located within the image. image: Option>, /// The size of the heap that is readable and writable. /// /// Note that this may extend beyond the actual linear memory heap size in /// the case of dynamic memories in use. Memory accesses to memory below /// `self.accessible` may still page fault as pages are lazily brought in /// but the faults will always be resolved by the kernel. accessible: usize, /// Whether this slot may have "dirty" pages (pages written by an /// instantiation). Set by `instantiate()` and cleared by /// `clear_and_remain_ready()`, and used in assertions to ensure /// those methods are called properly. /// /// Invariant: if !dirty, then this memory slot contains a clean /// CoW mapping of `image`, if `Some(..)`, and anonymous-zero /// memory beyond the image up to `static_size`. The addresses /// from offset 0 to `self.accessible` are R+W and set to zero or the /// initial image content, as appropriate. Everything between /// `self.accessible` and `self.static_size` is inaccessible. dirty: bool, /// Whether this MemoryImageSlot is responsible for mapping anonymous /// memory (to hold the reservation while overwriting mappings /// specific to this slot) in place when it is dropped. Default /// on, unless the caller knows what they are doing. clear_on_drop: bool, } impl MemoryImageSlot { /// Create a new MemoryImageSlot. Assumes that there is an anonymous /// mmap backing in the given range to start. /// /// The `accessible` parameter descibes how much of linear memory is /// already mapped as R/W with all zero-bytes. The `static_size` value is /// the maximum size of this image which `accessible` cannot grow beyond, /// and all memory from `accessible` from `static_size` should be mapped as /// `PROT_NONE` backed by zero-bytes. pub(crate) fn create(base_addr: *mut c_void, accessible: usize, static_size: usize) -> Self { let base = base_addr as usize; MemoryImageSlot { base, static_size, accessible, image: None, dirty: false, clear_on_drop: true, } } #[cfg(feature = "pooling-allocator")] pub(crate) fn dummy() -> MemoryImageSlot { MemoryImageSlot { base: 0, static_size: 0, image: None, accessible: 0, dirty: false, clear_on_drop: false, } } /// Inform the MemoryImageSlot that it should *not* clear the underlying /// address space when dropped. This should be used only when the /// caller will clear or reuse the address space in some other /// way. pub(crate) fn no_clear_on_drop(&mut self) { self.clear_on_drop = false; } pub(crate) fn set_heap_limit(&mut self, size_bytes: usize) -> Result<()> { assert!(size_bytes <= self.static_size); // If the heap limit already addresses accessible bytes then no syscalls // are necessary since the data is already mapped into the process and // waiting to go. // // This is used for "dynamic" memories where memory is not always // decommitted during recycling (but it's still always reset). if size_bytes <= self.accessible { return Ok(()); } // Otherwise use `mprotect` to make the new pages read/write. self.set_protection(self.accessible..size_bytes, true)?; self.accessible = size_bytes; Ok(()) } /// Prepares this slot for the instantiation of a new instance with the /// provided linear memory image. /// /// The `initial_size_bytes` parameter indicates the required initial size /// of the heap for the instance. The `maybe_image` is an optional initial /// image for linear memory to contains. The `style` is the way compiled /// code will be accessing this memory. /// /// The purpose of this method is to take a previously pristine slot /// (`!self.dirty`) and transform its prior state into state necessary for /// the given parameters. This could include, for example: /// /// * More memory may be made read/write if `initial_size_bytes` is larger /// than `self.accessible`. /// * For `MemoryStyle::Static` linear memory may be made `PROT_NONE` if /// `self.accessible` is larger than `initial_size_bytes`. /// * If no image was previously in place or if the wrong image was /// previously in place then `mmap` may be used to setup the initial /// image. pub(crate) fn instantiate( &mut self, initial_size_bytes: usize, maybe_image: Option<&Arc>, style: &MemoryStyle, ) -> Result<()> { assert!(!self.dirty); assert!(initial_size_bytes <= self.static_size); // First order of business is to blow away the previous linear memory // image if it doesn't match the image specified here. If one is // detected then it's reset with anonymous memory which means that all // of memory up to `self.accessible` will now be read/write and zero. // // Note that this intentionally a "small mmap" which only covers the // extent of the prior initialization image in order to preserve // resident memory that might come before or after the image. if self.image.as_ref() != maybe_image { self.remove_image()?; } // The next order of business is to ensure that `self.accessible` is // appropriate. First up is to grow the read/write portion of memory if // it's not large enough to accommodate `initial_size_bytes`. if self.accessible < initial_size_bytes { self.set_protection(self.accessible..initial_size_bytes, true)?; self.accessible = initial_size_bytes; } // Next, if the "static" style of memory is being used then that means // that the addressable heap must be shrunk to match // `initial_size_bytes`. This is because the "static" flavor of memory // relies on page faults to indicate out-of-bounds accesses to memory. // // Note that "dynamic" memories do not shrink the heap here. A dynamic // memory performs dynamic bounds checks so if the remaining heap is // still addressable then that's ok since it still won't get accessed. if initial_size_bytes < self.accessible { match style { MemoryStyle::Static { .. } => { self.set_protection(initial_size_bytes..self.accessible, false)?; self.accessible = initial_size_bytes; } MemoryStyle::Dynamic { .. } => {} } } // Now that memory is sized appropriately the final operation is to // place the new image into linear memory. Note that this operation is // skipped if `self.image` matches `maybe_image`. assert!(initial_size_bytes <= self.accessible); if self.image.as_ref() != maybe_image { if let Some(image) = maybe_image.as_ref() { assert!( image.linear_memory_offset.checked_add(image.len).unwrap() <= initial_size_bytes ); if image.len > 0 { unsafe { image.map_at(self.base)?; } } } self.image = maybe_image.cloned(); } // Flag ourselves as `dirty` which means that the next operation on this // slot is required to be `clear_and_remain_ready`. self.dirty = true; Ok(()) } pub(crate) fn remove_image(&mut self) -> Result<()> { if let Some(image) = &self.image { unsafe { image.remap_as_zeros_at(self.base)?; } self.image = None; } Ok(()) } /// Resets this linear memory slot back to a "pristine state". /// /// This will reset the memory back to its original contents on Linux or /// reset the contents back to zero on other platforms. The `keep_resident` /// argument is the maximum amount of memory to keep resident in this /// process's memory on Linux. Up to that much memory will be `memset` to /// zero where the rest of it will be reset or released with `madvise`. #[allow(dead_code)] // ignore warnings as this is only used in some cfgs pub(crate) fn clear_and_remain_ready(&mut self, keep_resident: usize) -> Result<()> { assert!(self.dirty); unsafe { self.reset_all_memory_contents(keep_resident)?; } self.dirty = false; Ok(()) } #[allow(dead_code)] // ignore warnings as this is only used in some cfgs unsafe fn reset_all_memory_contents(&mut self, keep_resident: usize) -> Result<()> { if !cfg!(target_os = "linux") { // If we're not on Linux then there's no generic platform way to // reset memory back to its original state, so instead reset memory // back to entirely zeros with an anonymous backing. // // Additionally the previous image, if any, is dropped here // since it's no longer applicable to this mapping. return self.reset_with_anon_memory(); } match &self.image { Some(image) => { assert!(self.accessible >= image.linear_memory_offset + image.len); if image.linear_memory_offset < keep_resident { // If the image starts below the `keep_resident` then // memory looks something like this: // // up to `keep_resident` bytes // | // +--------------------------+ remaining_memset // | | / // <--------------> <-------> // // image_end // 0 linear_memory_offset | accessible // | | | | // +----------------+--------------+---------+--------+ // | dirty memory | image | dirty memory | // +----------------+--------------+---------+--------+ // // <------+-------> <-----+-----> <---+---> <--+---> // | | | | // | | | | // memset (1) / | madvise (4) // mmadvise (2) / // / // memset (3) // // // In this situation there are two disjoint regions that are // `memset` manually to zero. Note that `memset (3)` may be // zero bytes large. Furthermore `madvise (4)` may also be // zero bytes large. let image_end = image.linear_memory_offset + image.len; let mem_after_image = self.accessible - image_end; let remaining_memset = (keep_resident - image.linear_memory_offset).min(mem_after_image); // This is memset (1) std::ptr::write_bytes(self.base as *mut u8, 0u8, image.linear_memory_offset); // This is madvise (2) self.madvise_reset(image.linear_memory_offset, image.len)?; // This is memset (3) std::ptr::write_bytes( (self.base + image_end) as *mut u8, 0u8, remaining_memset, ); // This is madvise (4) self.madvise_reset( image_end + remaining_memset, mem_after_image - remaining_memset, )?; } else { // If the image starts after the `keep_resident` threshold // then we memset the start of linear memory and then use // madvise below for the rest of it, including the image. // // 0 keep_resident accessible // | | | // +----------------+---+----------+------------------+ // | dirty memory | image | dirty memory | // +----------------+---+----------+------------------+ // // <------+-------> <-------------+-----------------> // | | // | | // memset (1) madvise (2) // // Here only a single memset is necessary since the image // started after the threshold which we're keeping resident. // Note that the memset may be zero bytes here. // This is memset (1) std::ptr::write_bytes(self.base as *mut u8, 0u8, keep_resident); // This is madvise (2) self.madvise_reset(keep_resident, self.accessible - keep_resident)?; } } // If there's no memory image for this slot then memset the first // bytes in the memory back to zero while using `madvise` to purge // the rest. None => { let size_to_memset = keep_resident.min(self.accessible); std::ptr::write_bytes(self.base as *mut u8, 0u8, size_to_memset); self.madvise_reset(size_to_memset, self.accessible - size_to_memset)?; } } Ok(()) } #[allow(dead_code)] // ignore warnings as this is only used in some cfgs unsafe fn madvise_reset(&self, base: usize, len: usize) -> Result<()> { assert!(base + len <= self.accessible); if len == 0 { return Ok(()); } cfg_if::cfg_if! { if #[cfg(target_os = "linux")] { rustix::mm::madvise( (self.base + base) as *mut c_void, len, rustix::mm::Advice::LinuxDontNeed, )?; Ok(()) } else { unreachable!(); } } } fn set_protection(&self, range: Range, readwrite: bool) -> Result<()> { assert!(range.start <= range.end); assert!(range.end <= self.static_size); let start = self.base.checked_add(range.start).unwrap(); if range.len() == 0 { return Ok(()); } unsafe { cfg_if::cfg_if! { if #[cfg(unix)] { let flags = if readwrite { rustix::mm::MprotectFlags::READ | rustix::mm::MprotectFlags::WRITE } else { rustix::mm::MprotectFlags::empty() }; rustix::mm::mprotect(start as *mut _, range.len(), flags)?; } else { use windows_sys::Win32::System::Memory::*; let failure = if readwrite { VirtualAlloc(start as _, range.len(), MEM_COMMIT, PAGE_READWRITE).is_null() } else { VirtualFree(start as _, range.len(), MEM_DECOMMIT) == 0 }; if failure { return Err(std::io::Error::last_os_error().into()); } } } } Ok(()) } pub(crate) fn has_image(&self) -> bool { self.image.is_some() } #[allow(dead_code)] // ignore warnings as this is only used in some cfgs pub(crate) fn is_dirty(&self) -> bool { self.dirty } /// Map anonymous zeroed memory across the whole slot, /// inaccessible. Used both during instantiate and during drop. fn reset_with_anon_memory(&mut self) -> Result<()> { if self.static_size == 0 { assert!(self.image.is_none()); assert_eq!(self.accessible, 0); return Ok(()); } unsafe { cfg_if::cfg_if! { if #[cfg(unix)] { let ptr = rustix::mm::mmap_anonymous( self.base as *mut c_void, self.static_size, rustix::mm::ProtFlags::empty(), rustix::mm::MapFlags::PRIVATE | rustix::mm::MapFlags::FIXED, )?; assert_eq!(ptr as usize, self.base); } else { use windows_sys::Win32::System::Memory::*; if VirtualFree(self.base as _, self.static_size, MEM_DECOMMIT) == 0 { return Err(std::io::Error::last_os_error().into()); } } } } self.image = None; self.accessible = 0; Ok(()) } } impl Drop for MemoryImageSlot { fn drop(&mut self) { // The MemoryImageSlot may be dropped if there is an error during // instantiation: for example, if a memory-growth limiter // disallows a guest from having a memory of a certain size, // after we've already initialized the MemoryImageSlot. // // We need to return this region of the large pool mmap to a // safe state (with no module-specific mappings). The // MemoryImageSlot will not be returned to the MemoryPool, so a new // MemoryImageSlot will be created and overwrite the mappings anyway // on the slot's next use; but for safety and to avoid // resource leaks it's better not to have stale mappings to a // possibly-otherwise-dead module's image. // // To "wipe the slate clean", let's do a mmap of anonymous // memory over the whole region, with PROT_NONE. Note that we // *can't* simply munmap, because that leaves a hole in the // middle of the pooling allocator's big memory area that some // other random mmap may swoop in and take, to be trampled // over by the next MemoryImageSlot later. // // Since we're in drop(), we can't sanely return an error if // this mmap fails. Instead though the result is unwrapped here to // trigger a panic if something goes wrong. Otherwise if this // reset-the-mapping fails then on reuse it might be possible, depending // on precisely where errors happened, that stale memory could get // leaked through. // // The exception to all of this is if the `clear_on_drop` flag // (which is set by default) is false. If so, the owner of // this MemoryImageSlot has indicated that it will clean up in some // other way. if self.clear_on_drop { self.reset_with_anon_memory().unwrap(); } } } #[cfg(all(test, target_os = "linux"))] mod test { use std::sync::Arc; use super::{create_memfd, FdSource, MemoryImage, MemoryImageSlot, MemoryStyle}; use crate::mmap::Mmap; use anyhow::Result; use std::io::Write; fn create_memfd_with_data(offset: usize, data: &[u8]) -> Result { // Offset must be page-aligned. let page_size = crate::page_size(); assert_eq!(offset & (page_size - 1), 0); let memfd = create_memfd()?; memfd.as_file().write_all(data)?; // The image length is rounded up to the nearest page size let image_len = (data.len() + page_size - 1) & !(page_size - 1); memfd.as_file().set_len(image_len as u64)?; Ok(MemoryImage { fd: FdSource::Memfd(memfd), len: image_len, fd_offset: 0, linear_memory_offset: offset, }) } #[test] fn instantiate_no_image() { let style = MemoryStyle::Static { bound: 4 << 30 }; // 4 MiB mmap'd area, not accessible let mut mmap = Mmap::accessible_reserved(0, 4 << 20).unwrap(); // Create a MemoryImageSlot on top of it let mut memfd = MemoryImageSlot::create(mmap.as_mut_ptr() as *mut _, 0, 4 << 20); memfd.no_clear_on_drop(); assert!(!memfd.is_dirty()); // instantiate with 64 KiB initial size memfd.instantiate(64 << 10, None, &style).unwrap(); assert!(memfd.is_dirty()); // We should be able to access this 64 KiB (try both ends) and // it should consist of zeroes. let slice = mmap.as_mut_slice(); assert_eq!(0, slice[0]); assert_eq!(0, slice[65535]); slice[1024] = 42; assert_eq!(42, slice[1024]); // grow the heap memfd.set_heap_limit(128 << 10).unwrap(); let slice = mmap.as_slice(); assert_eq!(42, slice[1024]); assert_eq!(0, slice[131071]); // instantiate again; we should see zeroes, even as the // reuse-anon-mmap-opt kicks in memfd.clear_and_remain_ready(0).unwrap(); assert!(!memfd.is_dirty()); memfd.instantiate(64 << 10, None, &style).unwrap(); let slice = mmap.as_slice(); assert_eq!(0, slice[1024]); } #[test] fn instantiate_image() { let style = MemoryStyle::Static { bound: 4 << 30 }; // 4 MiB mmap'd area, not accessible let mut mmap = Mmap::accessible_reserved(0, 4 << 20).unwrap(); // Create a MemoryImageSlot on top of it let mut memfd = MemoryImageSlot::create(mmap.as_mut_ptr() as *mut _, 0, 4 << 20); memfd.no_clear_on_drop(); // Create an image with some data. let image = Arc::new(create_memfd_with_data(4096, &[1, 2, 3, 4]).unwrap()); // Instantiate with this image memfd.instantiate(64 << 10, Some(&image), &style).unwrap(); assert!(memfd.has_image()); let slice = mmap.as_mut_slice(); assert_eq!(&[1, 2, 3, 4], &slice[4096..4100]); slice[4096] = 5; // Clear and re-instantiate same image memfd.clear_and_remain_ready(0).unwrap(); memfd.instantiate(64 << 10, Some(&image), &style).unwrap(); let slice = mmap.as_slice(); // Should not see mutation from above assert_eq!(&[1, 2, 3, 4], &slice[4096..4100]); // Clear and re-instantiate no image memfd.clear_and_remain_ready(0).unwrap(); memfd.instantiate(64 << 10, None, &style).unwrap(); assert!(!memfd.has_image()); let slice = mmap.as_slice(); assert_eq!(&[0, 0, 0, 0], &slice[4096..4100]); // Clear and re-instantiate image again memfd.clear_and_remain_ready(0).unwrap(); memfd.instantiate(64 << 10, Some(&image), &style).unwrap(); let slice = mmap.as_slice(); assert_eq!(&[1, 2, 3, 4], &slice[4096..4100]); // Create another image with different data. let image2 = Arc::new(create_memfd_with_data(4096, &[10, 11, 12, 13]).unwrap()); memfd.clear_and_remain_ready(0).unwrap(); memfd.instantiate(128 << 10, Some(&image2), &style).unwrap(); let slice = mmap.as_slice(); assert_eq!(&[10, 11, 12, 13], &slice[4096..4100]); // Instantiate the original image again; we should notice it's // a different image and not reuse the mappings. memfd.clear_and_remain_ready(0).unwrap(); memfd.instantiate(64 << 10, Some(&image), &style).unwrap(); let slice = mmap.as_slice(); assert_eq!(&[1, 2, 3, 4], &slice[4096..4100]); } #[test] #[cfg(target_os = "linux")] fn memset_instead_of_madvise() { let style = MemoryStyle::Static { bound: 100 }; let mut mmap = Mmap::accessible_reserved(0, 4 << 20).unwrap(); let mut memfd = MemoryImageSlot::create(mmap.as_mut_ptr() as *mut _, 0, 4 << 20); memfd.no_clear_on_drop(); // Test basics with the image for image_off in [0, 4096, 8 << 10] { let image = Arc::new(create_memfd_with_data(image_off, &[1, 2, 3, 4]).unwrap()); for amt_to_memset in [0, 4096, 10 << 12, 1 << 20, 10 << 20] { memfd.instantiate(64 << 10, Some(&image), &style).unwrap(); assert!(memfd.has_image()); let slice = mmap.as_mut_slice(); if image_off > 0 { assert_eq!(slice[image_off - 1], 0); } assert_eq!(slice[image_off + 5], 0); assert_eq!(&[1, 2, 3, 4], &slice[image_off..][..4]); slice[image_off] = 5; assert_eq!(&[5, 2, 3, 4], &slice[image_off..][..4]); memfd.clear_and_remain_ready(amt_to_memset).unwrap(); } } // Test without an image for amt_to_memset in [0, 4096, 10 << 12, 1 << 20, 10 << 20] { memfd.instantiate(64 << 10, None, &style).unwrap(); for chunk in mmap.as_mut_slice()[..64 << 10].chunks_mut(1024) { assert_eq!(chunk[0], 0); chunk[0] = 5; } memfd.clear_and_remain_ready(amt_to_memset).unwrap(); } } #[test] #[cfg(target_os = "linux")] fn dynamic() { let style = MemoryStyle::Dynamic { reserve: 200 }; let mut mmap = Mmap::accessible_reserved(0, 4 << 20).unwrap(); let mut memfd = MemoryImageSlot::create(mmap.as_mut_ptr() as *mut _, 0, 4 << 20); memfd.no_clear_on_drop(); let image = Arc::new(create_memfd_with_data(4096, &[1, 2, 3, 4]).unwrap()); let initial = 64 << 10; // Instantiate the image and test that memory remains accessible after // it's cleared. memfd.instantiate(initial, Some(&image), &style).unwrap(); assert!(memfd.has_image()); let slice = mmap.as_mut_slice(); assert_eq!(&[1, 2, 3, 4], &slice[4096..4100]); slice[4096] = 5; assert_eq!(&[5, 2, 3, 4], &slice[4096..4100]); memfd.clear_and_remain_ready(0).unwrap(); assert_eq!(&[1, 2, 3, 4], &slice[4096..4100]); // Re-instantiate make sure it preserves memory. Grow a bit and set data // beyond the initial size. memfd.instantiate(initial, Some(&image), &style).unwrap(); assert_eq!(&[1, 2, 3, 4], &slice[4096..4100]); memfd.set_heap_limit(initial * 2).unwrap(); assert_eq!(&[0, 0], &slice[initial..initial + 2]); slice[initial] = 100; assert_eq!(&[100, 0], &slice[initial..initial + 2]); memfd.clear_and_remain_ready(0).unwrap(); // Test that memory is still accessible, but it's been reset assert_eq!(&[0, 0], &slice[initial..initial + 2]); // Instantiate again, and again memory beyond the initial size should // still be accessible. Grow into it again and make sure it works. memfd.instantiate(initial, Some(&image), &style).unwrap(); assert_eq!(&[0, 0], &slice[initial..initial + 2]); memfd.set_heap_limit(initial * 2).unwrap(); assert_eq!(&[0, 0], &slice[initial..initial + 2]); slice[initial] = 100; assert_eq!(&[100, 0], &slice[initial..initial + 2]); memfd.clear_and_remain_ready(0).unwrap(); // Reset the image to none and double-check everything is back to zero memfd.instantiate(64 << 10, None, &style).unwrap(); assert!(!memfd.has_image()); assert_eq!(&[0, 0, 0, 0], &slice[4096..4100]); assert_eq!(&[0, 0], &slice[initial..initial + 2]); } }