Use mmap'd *.cwasm as a source for memory initialization images (#3787)

* Skip memfd creation with precompiled modules This commit updates the memfd support internally to not actually use a memfd if a compiled module originally came from disk via the `wasmtime::Module::deserialize_file` API. In this situation we already have a file descriptor open and there's no need to copy a module's heap image to a new file descriptor. To facilitate a new source of `mmap` the currently-memfd-specific-logic of creating a heap image is generalized to a new form of `MemoryInitialization` which is attempted for all modules at module-compile-time. This means that the serialized artifact to disk will have the memory image in its entirety waiting for us. Furthermore the memory image is ensured to be padded and aligned carefully to the target system's page size, notably meaning that the data section in the final object file is page-aligned and the size of the data section is also page aligned. This means that when a precompiled module is mapped from disk we can reuse the underlying `File` to mmap all initial memory images. This means that the offset-within-the-memory-mapped-file can differ for memfd-vs-not, but that's just another piece of state to track in the memfd implementation. In the limit this waters down the term "memfd" for this technique of quickly initializing memory because we no longer use memfd unconditionally (only when the backing file isn't available). This does however open up an avenue in the future to porting this support to other OSes because while `memfd_create` is Linux-specific both macOS and Windows support mapping a file with copy-on-write. This porting isn't done in this PR and is left for a future refactoring. Closes #3758 * Enable "memfd" support on all unix systems Cordon off the Linux-specific bits and enable the memfd support to compile and run on platforms like macOS which have a Linux-like `mmap`. This only works if a module is mapped from a precompiled module file on disk, but that's better than not supporting it at all! * Fix linux compile * Use `Arc<File>` instead of `MmapVecFileBacking` * Use a named struct instead of mysterious tuples * Comment about unsafety in `Module::deserialize_file` * Fix tests * Fix uffd compile * Always align data segments No need to have conditional alignment since their sizes are all aligned anyway * Update comment in build.rs * Use rustix, not `region` * Fix some confusing logic/names around memory indexes These functions all work with memory indexes, not specifically defined memory indexes.
2022-02-10 15:40:40 -06:00
parent 1cb08d4e67
commit c0c368d151
18 changed files with 629 additions and 280 deletions
--- a/crates/jit/src/code_memory.rs
+++ b/crates/jit/src/code_memory.rs
@@ -1,10 +1,10 @@
 //! Memory management for executable code.

 use crate::unwind::UnwindRegistration;
-use crate::MmapVec;
 use anyhow::{bail, Context, Result};
 use object::read::{File, Object, ObjectSection};
 use std::mem::ManuallyDrop;
+use wasmtime_runtime::MmapVec;

 /// Management of executable memory within a `MmapVec`
 ///
--- a/crates/jit/src/instantiate.rs
+++ b/crates/jit/src/instantiate.rs
@@ -5,9 +5,9 @@

 use crate::code_memory::CodeMemory;
 use crate::debug::create_gdbjit_image;
-use crate::{MmapVec, ProfilingAgent};
-use anyhow::{anyhow, bail, Context, Result};
-use object::write::{Object, StandardSegment};
+use crate::ProfilingAgent;
+use anyhow::{anyhow, bail, Context, Error, Result};
+use object::write::{Object, StandardSegment, WritableBuffer};
 use object::{File, Object as _, ObjectSection, SectionKind};
 use serde::{Deserialize, Serialize};
 use std::convert::TryFrom;
@@ -23,7 +23,7 @@ use wasmtime_environ::{
 };
 use wasmtime_runtime::{
    CompiledModuleId, CompiledModuleIdAllocator, GdbJitImageRegistration, InstantiationError,
-    VMFunctionBody, VMTrampoline,
+    MmapVec, VMFunctionBody, VMTrampoline,
 };

 /// This is the name of the section in the final ELF image which contains
@@ -167,6 +167,7 @@ pub fn finish_compile(
        debuginfo,
        has_unparsed_debuginfo,
        data,
+        data_align,
        passive_data,
        ..
    } = translation;
@@ -179,8 +180,13 @@ pub fn finish_compile(
        SectionKind::ReadOnlyData,
    );
    let mut total_data_len = 0;
-    for data in data.iter() {
-        obj.append_section_data(data_id, data, 1);
+    for data in data {
+        let offset = obj.append_section_data(data_id, &data, data_align.unwrap_or(1));
+        // All data segments are expected to be adjacent to one another, and
+        // with a higher alignment each data segment needs to be individually
+        // aligned to make this so, so assert that the offset this was placed at
+        // is always against the previous segment.
+        assert_eq!(offset as usize, total_data_len);
        total_data_len += data.len();
    }
    for data in passive_data.iter() {
@@ -266,7 +272,7 @@ pub fn finish_compile(
    bincode::serialize_into(&mut bytes, &info)?;
    obj.append_section_data(info_id, &bytes, 1);

-    return Ok((MmapVec::from_obj(obj)?, info));
+    return Ok((mmap_vec_from_obj(obj)?, info));

    fn push_debug<'a, T>(obj: &mut Object, section: &T)
    where
@@ -285,6 +291,74 @@ pub fn finish_compile(
    }
 }

+/// Creates a new `MmapVec` from serializing the specified `obj`.
+///
+/// The returned `MmapVec` will contain the serialized version of `obj` and
+/// is sized appropriately to the exact size of the object serialized.
+pub fn mmap_vec_from_obj(obj: Object) -> Result<MmapVec> {
+    let mut result = ObjectMmap::default();
+    return match obj.emit(&mut result) {
+        Ok(()) => {
+            assert!(result.mmap.is_some(), "no reserve");
+            let mmap = result.mmap.expect("reserve not called");
+            assert_eq!(mmap.len(), result.len);
+            Ok(mmap)
+        }
+        Err(e) => match result.err.take() {
+            Some(original) => Err(original.context(e)),
+            None => Err(e.into()),
+        },
+    };
+
+    /// Helper struct to implement the `WritableBuffer` trait from the `object`
+    /// crate.
+    ///
+    /// This enables writing an object directly into an mmap'd memory so it's
+    /// immediately usable for execution after compilation. This implementation
+    /// relies on a call to `reserve` happening once up front with all the needed
+    /// data, and the mmap internally does not attempt to grow afterwards.
+    #[derive(Default)]
+    struct ObjectMmap {
+        mmap: Option<MmapVec>,
+        len: usize,
+        err: Option<Error>,
+    }
+
+    impl WritableBuffer for ObjectMmap {
+        fn len(&self) -> usize {
+            self.len
+        }
+
+        fn reserve(&mut self, additional: usize) -> Result<(), ()> {
+            assert!(self.mmap.is_none(), "cannot reserve twice");
+            self.mmap = match MmapVec::with_capacity(additional) {
+                Ok(mmap) => Some(mmap),
+                Err(e) => {
+                    self.err = Some(e);
+                    return Err(());
+                }
+            };
+            Ok(())
+        }
+
+        fn resize(&mut self, new_len: usize) {
+            // Resizing always appends 0 bytes and since new mmaps start out as 0
+            // bytes we don't actually need to do anything as part of this other
+            // than update our own length.
+            if new_len <= self.len {
+                return;
+            }
+            self.len = new_len;
+        }
+
+        fn write_bytes(&mut self, val: &[u8]) {
+            let mmap = self.mmap.as_mut().expect("write before reserve");
+            mmap[self.len..][..val.len()].copy_from_slice(val);
+            self.len += val.len();
+        }
+    }
+}
+
 /// This is intended to mirror the type tables in `wasmtime_environ`, except that
 /// it doesn't store the native signatures which are no longer needed past compilation.
 #[derive(Serialize, Deserialize)]
--- a/crates/jit/src/lib.rs
+++ b/crates/jit/src/lib.rs
@@ -25,16 +25,14 @@ mod debug;
 mod demangling;
 mod instantiate;
 mod link;
-mod mmap_vec;
 mod profiling;
 mod unwind;

 pub use crate::code_memory::CodeMemory;
 pub use crate::instantiate::{
-    finish_compile, subslice_range, CompiledModule, CompiledModuleInfo, SetupError,
-    SymbolizeContext, TypeTables,
+    finish_compile, mmap_vec_from_obj, subslice_range, CompiledModule, CompiledModuleInfo,
+    SetupError, SymbolizeContext, TypeTables,
 };
-pub use crate::mmap_vec::MmapVec;
 pub use demangling::*;
 pub use profiling::*;

--- a/crates/jit/src/mmap_vec.rs
+++ b/crates/jit/src/mmap_vec.rs
@@ -1,258 +0,0 @@
-use anyhow::{Context, Error, Result};
-use object::write::{Object, WritableBuffer};
-use std::ops::{Deref, DerefMut, Range, RangeTo};
-use std::path::Path;
-use std::sync::Arc;
-use wasmtime_runtime::Mmap;
-
-/// A type akin to `Vec<u8>`, but backed by `mmap` and able to be split.
-///
-/// This type is a non-growable owned list of bytes. It can be segmented into
-/// disjoint separately owned views akin to the `split_at` method on slices in
-/// Rust. An `MmapVec` is backed by an OS-level memory allocation and is not
-/// suitable for lots of small allocation (since it works at the page
-/// granularity).
-///
-/// An `MmapVec` is an owned value which means that owners have the ability to
-/// get exclusive access to the underlying bytes, enabling mutation.
-pub struct MmapVec {
-    mmap: Arc<Mmap>,
-    range: Range<usize>,
-}
-
-impl MmapVec {
-    /// Consumes an existing `mmap` and wraps it up into an `MmapVec`.
-    ///
-    /// The returned `MmapVec` will have the `size` specified, which can be
-    /// smaller than the region mapped by the `Mmap`. The returned `MmapVec`
-    /// will only have at most `size` bytes accessible.
-    pub fn new(mmap: Mmap, size: usize) -> MmapVec {
-        assert!(size <= mmap.len());
-        MmapVec {
-            mmap: Arc::new(mmap),
-            range: 0..size,
-        }
-    }
-
-    /// Creates a new zero-initialized `MmapVec` with the given `size`.
-    ///
-    /// This commit will return a new `MmapVec` suitably sized to hold `size`
-    /// bytes. All bytes will be initialized to zero since this is a fresh OS
-    /// page allocation.
-    pub fn with_capacity(size: usize) -> Result<MmapVec> {
-        Ok(MmapVec::new(Mmap::with_at_least(size)?, size))
-    }
-
-    /// Creates a new `MmapVec` from the contents of an existing `slice`.
-    ///
-    /// A new `MmapVec` is allocated to hold the contents of `slice` and then
-    /// `slice` is copied into the new mmap. It's recommended to avoid this
-    /// method if possible to avoid the need to copy data around.
-    pub fn from_slice(slice: &[u8]) -> Result<MmapVec> {
-        let mut result = MmapVec::with_capacity(slice.len())?;
-        result.copy_from_slice(slice);
-        Ok(result)
-    }
-
-    /// Creates a new `MmapVec` from serializing the specified `obj`.
-    ///
-    /// The returned `MmapVec` will contain the serialized version of `obj` and
-    /// is sized appropriately to the exact size of the object serialized.
-    pub fn from_obj(obj: Object) -> Result<MmapVec> {
-        let mut result = ObjectMmap::default();
-        match obj.emit(&mut result) {
-            Ok(()) => {
-                assert!(result.mmap.is_some(), "no reserve");
-                let mmap = result.mmap.expect("reserve not called");
-                assert_eq!(mmap.len(), result.len);
-                Ok(mmap)
-            }
-            Err(e) => match result.err.take() {
-                Some(original) => Err(original.context(e)),
-                None => Err(e.into()),
-            },
-        }
-    }
-
-    /// Creates a new `MmapVec` which is the `path` specified mmap'd into
-    /// memory.
-    ///
-    /// This function will attempt to open the file located at `path` and will
-    /// then use that file to learn about its size and map the full contents
-    /// into memory. This will return an error if the file doesn't exist or if
-    /// it's too large to be fully mapped into memory.
-    pub fn from_file(path: &Path) -> Result<MmapVec> {
-        let mmap = Mmap::from_file(path)
-            .with_context(|| format!("failed to create mmap for file: {}", path.display()))?;
-        let len = mmap.len();
-        Ok(MmapVec::new(mmap, len))
-    }
-
-    /// Returns whether the original mmap was created from a readonly mapping.
-    pub fn is_readonly(&self) -> bool {
-        self.mmap.is_readonly()
-    }
-
-    /// "Drains" leading bytes up to the end specified in `range` from this
-    /// `MmapVec`, returning a separately owned `MmapVec` which retains access
-    /// to the bytes.
-    ///
-    /// This method is similar to the `Vec` type's `drain` method, except that
-    /// the return value is not an iterator but rather a new `MmapVec`. The
-    /// purpose of this method is the ability to split-off new `MmapVec` values
-    /// which are sub-slices of the original one.
-    ///
-    /// Once data has been drained from an `MmapVec` it is no longer accessible
-    /// from the original `MmapVec`, it's only accessible from the returned
-    /// `MmapVec`. In other words ownership of the drain'd bytes is returned
-    /// through the `MmapVec` return value.
-    ///
-    /// This `MmapVec` will shrink by `range.end` bytes, and it will only refer
-    /// to the bytes that come after the drain range.
-    ///
-    /// This is an `O(1)` operation which does not involve copies.
-    pub fn drain(&mut self, range: RangeTo<usize>) -> MmapVec {
-        let amt = range.end;
-        assert!(amt <= (self.range.end - self.range.start));
-
-        // Create a new `MmapVec` which refers to the same underlying mmap, but
-        // has a disjoint range from ours. Our own range is adjusted to be
-        // disjoint just after `ret` is created.
-        let ret = MmapVec {
-            mmap: self.mmap.clone(),
-            range: self.range.start..self.range.start + amt,
-        };
-        self.range.start += amt;
-        return ret;
-    }
-
-    /// Makes the specified `range` within this `mmap` to be read/write.
-    pub unsafe fn make_writable(&self, range: Range<usize>) -> Result<()> {
-        self.mmap
-            .make_writable(range.start + self.range.start..range.end + self.range.start)
-    }
-
-    /// Makes the specified `range` within this `mmap` to be read/execute.
-    pub unsafe fn make_executable(&self, range: Range<usize>) -> Result<()> {
-        self.mmap
-            .make_executable(range.start + self.range.start..range.end + self.range.start)
-    }
-}
-
-impl Deref for MmapVec {
-    type Target = [u8];
-
-    fn deref(&self) -> &[u8] {
-        &self.mmap.as_slice()[self.range.clone()]
-    }
-}
-
-impl DerefMut for MmapVec {
-    fn deref_mut(&mut self) -> &mut [u8] {
-        debug_assert!(!self.is_readonly());
-        // SAFETY: The underlying mmap is protected behind an `Arc` which means
-        // there there can be many references to it. We are guaranteed, though,
-        // that each reference to the underlying `mmap` has a disjoint `range`
-        // listed that it can access. This means that despite having shared
-        // access to the mmap itself we have exclusive ownership of the bytes
-        // specified in `self.range`. This should allow us to safely hand out
-        // mutable access to these bytes if so desired.
-        unsafe {
-            let slice = std::slice::from_raw_parts_mut(self.mmap.as_mut_ptr(), self.mmap.len());
-            &mut slice[self.range.clone()]
-        }
-    }
-}
-
-/// Helper struct to implement the `WritableBuffer` trait from the `object`
-/// crate.
-///
-/// This enables writing an object directly into an mmap'd memory so it's
-/// immediately usable for execution after compilation. This implementation
-/// relies on a call to `reserve` happening once up front with all the needed
-/// data, and the mmap internally does not attempt to grow afterwards.
-#[derive(Default)]
-struct ObjectMmap {
-    mmap: Option<MmapVec>,
-    len: usize,
-    err: Option<Error>,
-}
-
-impl WritableBuffer for ObjectMmap {
-    fn len(&self) -> usize {
-        self.len
-    }
-
-    fn reserve(&mut self, additional: usize) -> Result<(), ()> {
-        assert!(self.mmap.is_none(), "cannot reserve twice");
-        self.mmap = match MmapVec::with_capacity(additional) {
-            Ok(mmap) => Some(mmap),
-            Err(e) => {
-                self.err = Some(e);
-                return Err(());
-            }
-        };
-        Ok(())
-    }
-
-    fn resize(&mut self, new_len: usize) {
-        // Resizing always appends 0 bytes and since new mmaps start out as 0
-        // bytes we don't actually need to do anything as part of this other
-        // than update our own length.
-        if new_len <= self.len {
-            return;
-        }
-        self.len = new_len;
-    }
-
-    fn write_bytes(&mut self, val: &[u8]) {
-        let mmap = self.mmap.as_mut().expect("write before reserve");
-        mmap[self.len..][..val.len()].copy_from_slice(val);
-        self.len += val.len();
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::MmapVec;
-
-    #[test]
-    fn smoke() {
-        let mut mmap = MmapVec::with_capacity(10).unwrap();
-        assert_eq!(mmap.len(), 10);
-        assert_eq!(&mmap[..], &[0; 10]);
-
-        mmap[0] = 1;
-        mmap[2] = 3;
-        assert!(mmap.get(10).is_none());
-        assert_eq!(mmap[0], 1);
-        assert_eq!(mmap[2], 3);
-    }
-
-    #[test]
-    fn drain() {
-        let mut mmap = MmapVec::from_slice(&[1, 2, 3, 4]).unwrap();
-        assert_eq!(mmap.len(), 4);
-        assert!(mmap.drain(..0).is_empty());
-        assert_eq!(mmap.len(), 4);
-        let one = mmap.drain(..1);
-        assert_eq!(one.len(), 1);
-        assert_eq!(one[0], 1);
-        assert_eq!(mmap.len(), 3);
-        assert_eq!(&mmap[..], &[2, 3, 4]);
-        drop(one);
-        assert_eq!(mmap.len(), 3);
-
-        let two = mmap.drain(..2);
-        assert_eq!(two.len(), 2);
-        assert_eq!(two[0], 2);
-        assert_eq!(two[1], 3);
-        assert_eq!(mmap.len(), 1);
-        assert_eq!(mmap[0], 4);
-        drop(two);
-        assert!(mmap.drain(..0).is_empty());
-        assert!(mmap.drain(..1).len() == 1);
-        assert!(mmap.is_empty());
-        assert!(mmap.drain(..0).is_empty());
-    }
-}