Add a Module::deserialize_file method (#3266)

* Add a `Module::deserialize_file` method

This commit adds a new method to the `wasmtime::Module` type,
`deserialize_file`. This is intended to be the same as the `deserialize`
method except for the serialized module is present as an on-disk file.
This enables Wasmtime to internally use `mmap` to avoid copying bytes
around and generally makes loading a module much faster.

A C API is added in this commit as well for various bindings to use this
accelerated path now as well. Another option perhaps for a Rust-based
API is to have an API taking a `File` itself to allow for a custom file
descriptor in one way or another, but for now that's left for a possible
future refactoring if we find a use case.

* Fix compat with main - handle readdonly mmap

* wip

* Try to fix Windows support
This commit is contained in:
Alex Crichton
2021-08-31 13:05:51 -05:00
committed by GitHub
parent 4378ea8e01
commit 9e0c910023
9 changed files with 339 additions and 20 deletions

View File

@@ -165,6 +165,26 @@ WASM_API_EXTERN wasmtime_error_t *wasmtime_module_deserialize(
wasmtime_module_t **ret wasmtime_module_t **ret
); );
/**
* \brief Deserialize a module from an on-disk file.
*
* This function is the same as #wasmtime_module_deserialize except that it
* reads the data for the serialized module from the path on disk. This can be
* faster than the alternative which may require copying the data around.
*
* This function does not take ownership of any of its arguments, but the
* returned error and module are owned by the caller.
*
* This function is not safe to receive arbitrary user input. See the Rust
* documentation for more information on what inputs are safe to pass in here
* (e.g. only that of #wasmtime_module_serialize)
*/
WASM_API_EXTERN wasmtime_error_t *wasmtime_module_deserialize_file(
wasm_engine_t *engine,
const char *path,
wasmtime_module_t **ret
);
#ifdef __cplusplus #ifdef __cplusplus
} // extern "C" } // extern "C"
#endif #endif

View File

@@ -3,6 +3,9 @@ use crate::{
wasm_extern_t, wasm_importtype_t, wasm_importtype_vec_t, wasm_store_t, wasmtime_error_t, wasm_extern_t, wasm_importtype_t, wasm_importtype_vec_t, wasm_store_t, wasmtime_error_t,
wasmtime_moduletype_t, StoreRef, wasmtime_moduletype_t, StoreRef,
}; };
use anyhow::Context;
use std::ffi::CStr;
use std::os::raw::c_char;
use wasmtime::{Engine, Extern, Module}; use wasmtime::{Engine, Extern, Module};
#[derive(Clone)] #[derive(Clone)]
@@ -202,3 +205,19 @@ pub unsafe extern "C" fn wasmtime_module_deserialize(
*out = Box::into_raw(Box::new(wasmtime_module_t { module })); *out = Box::into_raw(Box::new(wasmtime_module_t { module }));
}) })
} }
#[no_mangle]
pub unsafe extern "C" fn wasmtime_module_deserialize_file(
engine: &wasm_engine_t,
path: *const c_char,
out: &mut *mut wasmtime_module_t,
) -> Option<Box<wasmtime_error_t>> {
let path = CStr::from_ptr(path);
let result = path
.to_str()
.context("input path is not valid utf-8")
.and_then(|path| Module::deserialize_file(&engine.engine, path));
handle_result(result, |module| {
*out = Box::into_raw(Box::new(wasmtime_module_t { module }));
})
}

View File

@@ -136,23 +136,27 @@ impl CodeMemory {
unsafe { unsafe {
let text_mut = let text_mut =
std::slice::from_raw_parts_mut(ret.text.as_ptr() as *mut u8, ret.text.len()); std::slice::from_raw_parts_mut(ret.text.as_ptr() as *mut u8, ret.text.len());
let text_offset = ret.text.as_ptr() as usize - ret.mmap.as_ptr() as usize;
let text_range = text_offset..text_offset + text_mut.len();
let mut text_section_readwrite = false;
for (offset, r) in text.relocations() { for (offset, r) in text.relocations() {
// If the text section was mapped at readonly we need to make it
// briefly read/write here as we apply relocations.
if !text_section_readwrite && self.mmap.is_readonly() {
self.mmap
.make_writable(text_range.clone())
.expect("unable to make memory writable");
text_section_readwrite = true;
}
crate::link::apply_reloc(&ret.obj, text_mut, offset, r); crate::link::apply_reloc(&ret.obj, text_mut, offset, r);
} }
// Switch the executable portion from read/write to // Switch the executable portion from read/write to
// read/execute, notably not using read/write/execute to prevent // read/execute, notably not using read/write/execute to prevent
// modifications. // modifications.
assert!( self.mmap
ret.text.as_ptr() as usize % region::page::size() == 0, .make_executable(text_range.clone())
"text section is not page-aligned" .expect("unable to make memory executable");
);
region::protect(
ret.text.as_ptr() as *mut _,
ret.text.len(),
region::Protection::READ_EXECUTE,
)
.expect("unable to make memory readonly and executable");
// With all our memory set up use the platform-specific // With all our memory set up use the platform-specific
// `UnwindRegistration` implementation to inform the general // `UnwindRegistration` implementation to inform the general

View File

@@ -1,6 +1,7 @@
use anyhow::{Error, Result}; use anyhow::{Context, Error, Result};
use object::write::{Object, WritableBuffer}; use object::write::{Object, WritableBuffer};
use std::ops::{Deref, DerefMut, Range, RangeTo}; use std::ops::{Deref, DerefMut, Range, RangeTo};
use std::path::Path;
use std::sync::Arc; use std::sync::Arc;
use wasmtime_runtime::Mmap; use wasmtime_runtime::Mmap;
@@ -73,6 +74,25 @@ impl MmapVec {
} }
} }
/// Creates a new `MmapVec` which is the `path` specified mmap'd into
/// memory.
///
/// This function will attempt to open the file located at `path` and will
/// then use that file to learn about its size and map the full contents
/// into memory. This will return an error if the file doesn't exist or if
/// it's too large to be fully mapped into memory.
pub fn from_file(path: &Path) -> Result<MmapVec> {
let mmap = Mmap::from_file(path)
.with_context(|| format!("failed to create mmap for file: {}", path.display()))?;
let len = mmap.len();
Ok(MmapVec::new(mmap, len))
}
/// Returns whether the original mmap was created from a readonly mapping.
pub fn is_readonly(&self) -> bool {
self.mmap.is_readonly()
}
/// "Drains" leading bytes up to the end specified in `range` from this /// "Drains" leading bytes up to the end specified in `range` from this
/// `MmapVec`, returning a separately owned `MmapVec` which retains access /// `MmapVec`, returning a separately owned `MmapVec` which retains access
/// to the bytes. /// to the bytes.
@@ -105,6 +125,18 @@ impl MmapVec {
self.range.start += amt; self.range.start += amt;
return ret; return ret;
} }
/// Makes the specified `range` within this `mmap` to be read/write.
pub unsafe fn make_writable(&self, range: Range<usize>) -> Result<()> {
self.mmap
.make_writable(range.start + self.range.start..range.end + self.range.start)
}
/// Makes the specified `range` within this `mmap` to be read/execute.
pub unsafe fn make_executable(&self, range: Range<usize>) -> Result<()> {
self.mmap
.make_executable(range.start + self.range.start..range.end + self.range.start)
}
} }
impl Deref for MmapVec { impl Deref for MmapVec {
@@ -117,6 +149,7 @@ impl Deref for MmapVec {
impl DerefMut for MmapVec { impl DerefMut for MmapVec {
fn deref_mut(&mut self) -> &mut [u8] { fn deref_mut(&mut self) -> &mut [u8] {
debug_assert!(!self.is_readonly());
// SAFETY: The underlying mmap is protected behind an `Arc` which means // SAFETY: The underlying mmap is protected behind an `Arc` which means
// there there can be many references to it. We are guaranteed, though, // there there can be many references to it. We are guaranteed, though,
// that each reference to the underlying `mmap` has a disjoint `range` // that each reference to the underlying `mmap` has a disjoint `range`

View File

@@ -30,7 +30,7 @@ anyhow = "1.0.38"
mach = "0.3.2" mach = "0.3.2"
[target.'cfg(target_os = "windows")'.dependencies] [target.'cfg(target_os = "windows")'.dependencies]
winapi = { version = "0.3.7", features = ["winbase", "memoryapi", "errhandlingapi"] } winapi = { version = "0.3.7", features = ["winbase", "memoryapi", "errhandlingapi", "handleapi"] }
[target.'cfg(target_os = "linux")'.dependencies] [target.'cfg(target_os = "linux")'.dependencies]
userfaultfd = { version = "0.3.0", optional = true } userfaultfd = { version = "0.3.0", optional = true }

View File

@@ -1,9 +1,14 @@
//! Low-level abstraction for allocating and managing zero-filled pages //! Low-level abstraction for allocating and managing zero-filled pages
//! of memory. //! of memory.
use anyhow::{bail, Result}; use anyhow::anyhow;
use anyhow::{bail, Context, Result};
use more_asserts::assert_le; use more_asserts::assert_le;
use std::convert::TryFrom;
use std::fs::File;
use std::io; use std::io;
use std::ops::Range;
use std::path::Path;
use std::ptr; use std::ptr;
use std::slice; use std::slice;
@@ -22,6 +27,7 @@ pub struct Mmap {
// the coordination all happens at the OS layer. // the coordination all happens at the OS layer.
ptr: usize, ptr: usize,
len: usize, len: usize,
file: Option<File>,
} }
impl Mmap { impl Mmap {
@@ -34,6 +40,7 @@ impl Mmap {
Self { Self {
ptr: empty.as_ptr() as usize, ptr: empty.as_ptr() as usize,
len: 0, len: 0,
file: None,
} }
} }
@@ -44,6 +51,117 @@ impl Mmap {
Self::accessible_reserved(rounded_size, rounded_size) Self::accessible_reserved(rounded_size, rounded_size)
} }
/// Creates a new `Mmap` by opening the file located at `path` and mapping
/// it into memory.
///
/// The memory is mapped in read-only mode for the entire file. If portions
/// of the file need to be modified then the `region` crate can be use to
/// alter permissions of each page.
///
/// The memory mapping and the length of the file within the mapping are
/// returned.
pub fn from_file(path: &Path) -> Result<Self> {
#[cfg(unix)]
{
use std::os::unix::prelude::*;
let file = File::open(path).context("failed to open file")?;
let len = file
.metadata()
.context("failed to get file metadata")?
.len();
let len = usize::try_from(len).map_err(|_| anyhow!("file too large to map"))?;
let ptr = unsafe {
libc::mmap(
ptr::null_mut(),
len,
libc::PROT_READ,
libc::MAP_PRIVATE,
file.as_raw_fd(),
0,
)
};
if ptr as isize == -1_isize {
return Err(io::Error::last_os_error())
.context(format!("mmap failed to allocate {:#x} bytes", len));
}
Ok(Self {
ptr: ptr as usize,
len,
file: Some(file),
})
}
#[cfg(windows)]
{
use std::fs::OpenOptions;
use std::os::windows::prelude::*;
use winapi::um::handleapi::*;
use winapi::um::memoryapi::*;
use winapi::um::winnt::*;
unsafe {
// Open the file with read/execute access and only share for
// read. This will enable us to perform the proper mmap below
// while also disallowing other processes modifying the file
// and having those modifications show up in our address space.
let file = OpenOptions::new()
.read(true)
.access_mode(FILE_GENERIC_READ | FILE_GENERIC_EXECUTE)
.share_mode(FILE_SHARE_READ)
.open(path)
.context("failed to open file")?;
let len = file
.metadata()
.context("failed to get file metadata")?
.len();
let len = usize::try_from(len).map_err(|_| anyhow!("file too large to map"))?;
// Create a file mapping that allows PAGE_EXECUTE_READ which
// we'll be using for mapped text sections in ELF images later.
let mapping = CreateFileMappingW(
file.as_raw_handle().cast(),
ptr::null_mut(),
PAGE_EXECUTE_READ,
0,
0,
ptr::null(),
);
if mapping.is_null() {
return Err(io::Error::last_os_error())
.context("failed to create file mapping");
}
// Create a view for the entire file using `FILE_MAP_EXECUTE`
// here so that we can later change the text section to execute.
let ptr = MapViewOfFile(mapping, FILE_MAP_READ | FILE_MAP_EXECUTE, 0, 0, len);
let err = io::Error::last_os_error();
CloseHandle(mapping);
if ptr.is_null() {
return Err(err)
.context(format!("failed to create map view of {:#x} bytes", len));
}
let ret = Self {
ptr: ptr as usize,
len,
file: Some(file),
};
// Protect the entire file as PAGE_READONLY to start (i.e.
// remove the execute bit)
let mut old = 0;
if VirtualProtect(ret.ptr as *mut _, ret.len, PAGE_READONLY, &mut old) == 0 {
return Err(io::Error::last_os_error())
.context("failed change pages to `PAGE_READONLY`");
}
Ok(ret)
}
}
}
/// Create a new `Mmap` pointing to `accessible_size` bytes of page-aligned accessible memory, /// Create a new `Mmap` pointing to `accessible_size` bytes of page-aligned accessible memory,
/// within a reserved mapping of `mapping_size` bytes. `accessible_size` and `mapping_size` /// within a reserved mapping of `mapping_size` bytes. `accessible_size` and `mapping_size`
/// must be native page-size multiples. /// must be native page-size multiples.
@@ -83,6 +201,7 @@ impl Mmap {
Self { Self {
ptr: ptr as usize, ptr: ptr as usize,
len: mapping_size, len: mapping_size,
file: None,
} }
} else { } else {
// Reserve the mapping size. // Reserve the mapping size.
@@ -107,6 +226,7 @@ impl Mmap {
let mut result = Self { let mut result = Self {
ptr: ptr as usize, ptr: ptr as usize,
len: mapping_size, len: mapping_size,
file: None,
}; };
if accessible_size != 0 { if accessible_size != 0 {
@@ -152,6 +272,7 @@ impl Mmap {
Self { Self {
ptr: ptr as usize, ptr: ptr as usize,
len: mapping_size, len: mapping_size,
file: None,
} }
} else { } else {
// Reserve the mapping size. // Reserve the mapping size.
@@ -164,6 +285,7 @@ impl Mmap {
let mut result = Self { let mut result = Self {
ptr: ptr as usize, ptr: ptr as usize,
len: mapping_size, len: mapping_size,
file: None,
}; };
if accessible_size != 0 { if accessible_size != 0 {
@@ -234,6 +356,7 @@ impl Mmap {
/// Return the allocated memory as a mutable slice of u8. /// Return the allocated memory as a mutable slice of u8.
pub fn as_mut_slice(&mut self) -> &mut [u8] { pub fn as_mut_slice(&mut self) -> &mut [u8] {
debug_assert!(!self.is_readonly());
unsafe { slice::from_raw_parts_mut(self.ptr as *mut u8, self.len) } unsafe { slice::from_raw_parts_mut(self.ptr as *mut u8, self.len) }
} }
@@ -257,9 +380,65 @@ impl Mmap {
self.len() == 0 self.len() == 0
} }
#[allow(dead_code)] /// Returns whether the underlying mapping is readonly, meaning that
pub(crate) unsafe fn from_raw(ptr: usize, len: usize) -> Self { /// attempts to write will fault.
Self { ptr, len } pub fn is_readonly(&self) -> bool {
self.file.is_some()
}
/// Makes the specified `range` within this `Mmap` to be read/write.
pub unsafe fn make_writable(&self, range: Range<usize>) -> Result<()> {
assert!(range.start <= self.len());
assert!(range.end <= self.len());
assert!(range.start <= range.end);
assert!(
range.start % region::page::size() == 0,
"changing of protections isn't page-aligned",
);
let base = self.as_ptr().add(range.start);
let len = range.end - range.start;
// On Windows when we have a file mapping we need to specifically use
// `PAGE_WRITECOPY` to ensure that pages are COW'd into place because
// we don't want our modifications to go back to the original file.
#[cfg(windows)]
{
use winapi::um::memoryapi::*;
use winapi::um::winnt::*;
if self.file.is_some() {
let mut old = 0;
if VirtualProtect(base as *mut _, len, PAGE_WRITECOPY, &mut old) == 0 {
return Err(io::Error::last_os_error())
.context("failed to change pages to `PAGE_WRITECOPY`");
}
return Ok(());
}
}
// If we're not on Windows or if we're on Windows with an anonymous
// mapping then we can use the `region` crate.
region::protect(base, len, region::Protection::READ_WRITE)?;
Ok(())
}
/// Makes the specified `range` within this `Mmap` to be read/execute.
pub unsafe fn make_executable(&self, range: Range<usize>) -> Result<()> {
assert!(range.start <= self.len());
assert!(range.end <= self.len());
assert!(range.start <= range.end);
assert!(
range.start % region::page::size() == 0,
"changing of protections isn't page-aligned",
);
region::protect(
self.as_ptr().add(range.start),
range.end - range.start,
region::Protection::READ_EXECUTE,
)?;
Ok(())
} }
} }
@@ -276,10 +455,15 @@ impl Drop for Mmap {
fn drop(&mut self) { fn drop(&mut self) {
if self.len != 0 { if self.len != 0 {
use winapi::ctypes::c_void; use winapi::ctypes::c_void;
use winapi::um::memoryapi::VirtualFree; use winapi::um::memoryapi::*;
use winapi::um::winnt::MEM_RELEASE; use winapi::um::winnt::MEM_RELEASE;
let r = unsafe { VirtualFree(self.ptr as *mut c_void, 0, MEM_RELEASE) }; if self.file.is_none() {
assert_ne!(r, 0); let r = unsafe { VirtualFree(self.ptr as *mut c_void, 0, MEM_RELEASE) };
assert_ne!(r, 0);
} else {
let r = unsafe { UnmapViewOfFile(self.ptr as *mut c_void) };
assert_ne!(r, 0);
}
} }
} }
} }

View File

@@ -474,6 +474,25 @@ impl Module {
module.into_module(engine) module.into_module(engine)
} }
/// Same as [`deserialize`], except that the contents of `path` are read to
/// deserialize into a [`Module`].
///
/// For more information see the documentation of the [`deserialize`]
/// method for why this function is `unsafe`.
///
/// This method is provided because it can be faster than [`deserialize`]
/// since the data doesn't need to be copied around, but rather the module
/// can be used directly from an mmap'd view of the file provided.
///
/// [`deserialize`]: Module::deserialize
pub unsafe fn deserialize_file(engine: &Engine, path: impl AsRef<Path>) -> Result<Module> {
let module = SerializedModule::from_file(
path.as_ref(),
engine.config().deserialize_check_wasmtime_version,
)?;
module.into_module(engine)
}
fn from_parts( fn from_parts(
engine: &Engine, engine: &Engine,
mut modules: Vec<Arc<CompiledModule>>, mut modules: Vec<Arc<CompiledModule>>,

View File

@@ -55,6 +55,7 @@ use object::{Bytes, File, Object, ObjectSection};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::collections::BTreeMap; use std::collections::BTreeMap;
use std::convert::TryFrom; use std::convert::TryFrom;
use std::path::Path;
use std::str::FromStr; use std::str::FromStr;
use std::sync::Arc; use std::sync::Arc;
use wasmtime_environ::{Compiler, FlagValue, Tunables}; use wasmtime_environ::{Compiler, FlagValue, Tunables};
@@ -367,6 +368,15 @@ impl<'a> SerializedModule<'a> {
Self::from_mmap(MmapVec::from_slice(bytes)?, check_version) Self::from_mmap(MmapVec::from_slice(bytes)?, check_version)
} }
pub fn from_file(path: &Path, check_version: bool) -> Result<Self> {
Self::from_mmap(
MmapVec::from_file(path).with_context(|| {
format!("failed to create file mapping for: {}", path.display())
})?,
check_version,
)
}
pub fn from_mmap(mut mmap: MmapVec, check_version: bool) -> Result<Self> { pub fn from_mmap(mut mmap: MmapVec, check_version: bool) -> Result<Self> {
// Artifacts always start with an ELF file, so read that first. // Artifacts always start with an ELF file, so read that first.
// Afterwards we continually read ELF files until we see the `u64::MAX` // Afterwards we continually read ELF files until we see the `u64::MAX`

View File

@@ -1,7 +1,8 @@
use anyhow::{bail, Result}; use anyhow::{bail, Result};
use std::fs;
use wasmtime::*; use wasmtime::*;
fn serialize(engine: &Engine, wat: &'static str) -> Result<Vec<u8>> { fn serialize(engine: &Engine, wat: &str) -> Result<Vec<u8>> {
let module = Module::new(&engine, wat)?; let module = Module::new(&engine, wat)?;
Ok(module.serialize()?) Ok(module.serialize()?)
} }
@@ -68,3 +69,32 @@ fn test_module_serialize_fail() -> Result<()> {
} }
Ok(()) Ok(())
} }
#[test]
fn test_deserialize_from_file() -> Result<()> {
serialize_and_call("(module (func (export \"run\") (result i32) i32.const 42))")?;
serialize_and_call(
"(module
(func (export \"run\") (result i32)
call $answer)
(func $answer (result i32)
i32.const 42))
",
)?;
return Ok(());
fn serialize_and_call(wat: &str) -> Result<()> {
let mut store = Store::<()>::default();
let td = tempfile::TempDir::new()?;
let buffer = serialize(store.engine(), wat)?;
let path = td.path().join("module.bin");
fs::write(&path, &buffer)?;
let module = unsafe { Module::deserialize_file(store.engine(), &path)? };
let instance = Instance::new(&mut store, &module, &[])?;
let func = instance.get_typed_func::<(), i32, _>(&mut store, "run")?;
assert_eq!(func.call(&mut store, ())?, 42);
Ok(())
}
}