Expand Memory docs and add examples (#1357)

Try to thoroughly document unsafety of `Memory` and how it can be used
safely.

cc #1272
This commit is contained in:
Alex Crichton
2020-03-18 14:39:55 -05:00
committed by GitHub
parent 5bd03d282f
commit d2666b2f3b

View File

@@ -468,6 +468,159 @@ impl Table {
/// It is intended that `Memory` is safe to share between threads. At this time
/// this is not implemented in `wasmtime`, however. This is planned to be
/// implemented though!
///
/// # `Memory` and Safety
///
/// Linear memory is a lynchpin of safety for WebAssembly, but it turns out
/// there are very few ways to safely inspect the contents of a memory from the
/// host (Rust). This is because memory safety is quite tricky when working with
/// a `Memory` and we're still working out the best idioms to encapsulate
/// everything safely where it's efficient and ergonomic. This section of
/// documentation, however, is intended to help educate a bit what is and isn't
/// safe when working with `Memory`.
///
/// For safety purposes you can think of a `Memory` as a glorified
/// `Rc<UnsafeCell<Vec<u8>>>`. There's a few consequences of this
/// interpretation:
///
/// * At any time someone else may have access to the memory (hence the `Rc`).
/// This could be a wasm instance, other host code, or a set of wasm instances
/// which all reference a `Memory`. When in doubt assume someone else has a
/// handle to your `Memory`.
///
/// * At any time, memory can be read from or written to (hence the
/// `UnsafeCell`). Anyone with a handle to a wasm memory can read/write to it.
/// Primarily other instances can execute the `load` and `store` family of
/// instructions, as well as any other which modifies or reads memory.
///
/// * At any time memory may grow (hence the `Vec<..>`). Growth may relocate the
/// base memory pointer (similar to how `vec.push(...)` can change the result
/// of `.as_ptr()`)
///
/// So given that we're working roughly with `Rc<UnsafeCell<Vec<u8>>>` that's a
/// lot to keep in mind! It's hopefully though sort of setting the stage as to
/// what you can safely do with memories.
///
/// Let's run through a few safe examples first of how you can use a `Memory`.
///
/// ```rust
/// use wasmtime::Memory;
///
/// fn safe_examples(mem: &Memory) {
/// // Just like wasm, it's safe to read memory almost at any time. The
/// // gotcha here is that we need to be sure to load from the correct base
/// // pointer and perform the bounds check correctly. So long as this is
/// // all self contained here (e.g. not arbitrary code in the middle) we're
/// // good to go.
/// let byte = unsafe { mem.data_unchecked()[0x123] };
///
/// // Short-lived borrows of memory are safe, but they most be scoped and
/// // not have code which modifies/etc `Memory` while the borrow is active.
/// // For example if you want to read a string from memory it is safe to do
/// // so:
/// let string_base = 0xdead;
/// let string_len = 0xbeef;
/// let string = unsafe {
/// let bytes = &mem.data_unchecked()[string_base..][..string_len];
/// match std::str::from_utf8(bytes) {
/// Ok(s) => s.to_string(), // copy out of wasm memory
/// Err(_) => panic!("not valid utf-8"),
/// }
/// };
///
/// // Additionally like wasm you can write to memory at any point in time,
/// // again making sure that after you get the unchecked slice you don't
/// // execute code which could read/write/modify `Memory`:
/// unsafe {
/// mem.data_unchecked_mut()[0x123] = 3;
/// }
///
/// // When working with *borrows* that point directly into wasm memory you
/// // need to be extremely careful. Any functionality that operates on a
/// // borrow into wasm memory needs to be thoroughly audited to effectively
/// // not touch the `Memory` at all
/// let data_base = 0xfeed;
/// let data_len = 0xface;
/// unsafe {
/// let data = &mem.data_unchecked()[data_base..][..data_len];
/// host_function_that_doesnt_touch_memory(data);
///
/// // effectively the same rules apply to mutable borrows
/// let data_mut = &mut mem.data_unchecked_mut()[data_base..][..data_len];
/// host_function_that_doesnt_touch_memory(data);
/// }
/// }
/// # fn host_function_that_doesnt_touch_memory(_: &[u8]){}
/// ```
///
/// It's worth also, however, covering some examples of **incorrect**,
/// **unsafe** usages of `Memory`. Do not do these things!
///
/// ```rust
/// use wasmtime::Memory;
///
/// // NOTE: All code in this function is not safe to execute and may cause
/// // segfaults/undefined behavior at runtime. Do not copy/paste these examples
/// // into production code!
/// unsafe fn unsafe_examples(mem: &Memory) {
/// // First and foremost, any borrow can be invalidated at any time via the
/// // `Memory::grow` function. This can relocate memory which causes any
/// // previous pointer to be possibly invalid now.
/// let pointer: &u8 = &mem.data_unchecked()[0x100];
/// mem.grow(1); // invalidates `pointer`!
/// // println!("{}", *pointer); // FATAL: use-after-free
///
/// // Note that the use-after-free also applies to slices, whether they're
/// // slices of bytes or strings.
/// let slice: &[u8] = &mem.data_unchecked()[0x100..0x102];
/// mem.grow(1); // invalidates `slice`!
/// // println!("{:?}", slice); // FATAL: use-after-free
///
/// // Due to the reference-counted nature of `Memory` note that literal
/// // calls to `Memory::grow` are not sufficient to audit for. You'll need
/// // to be careful that any mutation of `Memory` doesn't happen while
/// // you're holding an active borrow.
/// let slice: &[u8] = &mem.data_unchecked()[0x100..0x102];
/// some_other_function(mem); // may invalidate `slice`!
/// // println!("{:?}", slice); // FATAL: maybe a use-after-free
///
/// // An especially subtle aspect of accessing a wasm instance's memory is
/// // that you need to be extremely careful about aliasing. Anyone at any
/// // time can call `data_unchecked()` or `data_unchecked_mut()`, which
/// // means you can easily have aliasing mutable references:
/// let ref1: &u8 = &mem.data_unchecked()[0x100];
/// let ref2: &mut u8 = &mut mem.data_unchecked_mut()[0x100];
/// // *ref2 = *ref1; // FATAL: violates Rust's aliasing rules
///
/// // Note that aliasing applies to strings as well, for example this is
/// // not valid because the slices overlap
/// let slice1: &mut [u8] = &mut mem.data_unchecked_mut()[0x100..][..3];
/// let slice2: &mut [u8] = &mut mem.data_unchecked_mut()[0x102..][..4];
/// // println!("{:?} {:?}", slice1, slice2); // FATAL: aliasing mutable pointers
/// }
/// # fn some_other_function(_mem: &Memory) {}
/// ```
///
/// Overall there's some general rules of thumb when working with `Memory` and
/// getting raw pointers inside of it:
///
/// * If you never have a "long lived" pointer into memory, you're good.
/// * Long-lived pointers must always respect Rust'a aliasing rules. It's ok for
/// shared borrows to overlap with each other, but mutable borrows must
/// overlap with nothing.
/// * Long-lived pointers are only valid if `Memory` isn't used in an unsafe way
/// while the pointer is valid. This includes both aliasing and growth.
///
/// At this point it's worth reiterating again that working with `Memory` is
/// pretty tricky and that's not great! Proposals such as [interface types] are
/// intended to prevent wasm modules from even needing to import/export memory
/// in the first place, which obviates the need for all of these safety caveats!
/// Additionally over time we're still working out the best idioms to expose in
/// `wasmtime`, so if you've got ideas or questions please feel free to [open an
/// issue]!
///
/// [interface types]: https://github.com/webassembly/interface-types
/// [open an issue]: https://github.com/bytecodealliance/wasmtime/issues/new
#[derive(Clone)]
pub struct Memory {
store: Store,
@@ -482,6 +635,23 @@ impl Memory {
/// The `store` argument is a general location for cache information, and
/// otherwise the memory will immediately be allocated according to the
/// type's configuration. All WebAssembly memory is initialized to zero.
///
/// # Examples
///
/// ```
/// # use wasmtime::*;
/// # fn main() -> anyhow::Result<()> {
/// let store = Store::default();
///
/// let memory_ty = MemoryType::new(Limits::new(1, None));
/// let memory = Memory::new(&store, memory_ty);
///
/// let module = Module::new(&store, "(module (memory (import \"\" \"\") 1))")?;
/// let instance = Instance::new(&module, &[memory.into()])?;
/// // ...
/// # Ok(())
/// # }
/// ```
pub fn new(store: &Store, ty: MemoryType) -> Memory {
let (wasmtime_handle, wasmtime_export) =
generate_memory_export(store, &ty).expect("generated memory");
@@ -494,6 +664,21 @@ impl Memory {
}
/// Returns the underlying type of this memory.
///
/// # Examples
///
/// ```
/// # use wasmtime::*;
/// # fn main() -> anyhow::Result<()> {
/// let store = Store::default();
/// let module = Module::new(&store, "(module (memory (export \"mem\") 1))")?;
/// let instance = Instance::new(&module, &[])?;
/// let memory = instance.get_export("mem").unwrap().memory().unwrap();
/// let ty = memory.ty();
/// assert_eq!(ty.limits().min(), 1);
/// # Ok(())
/// # }
/// ```
pub fn ty(&self) -> &MemoryType {
&self.ty
}
@@ -522,6 +707,9 @@ impl Memory {
/// your program. Additionally `Memory` can be shared and used in any number
/// of wasm instances, so calling any wasm code should be considered
/// dangerous while you're holding a slice of memory.
///
/// For more information and examples see the documentation on the
/// [`Memory`] type.
pub unsafe fn data_unchecked(&self) -> &[u8] {
self.data_unchecked_mut()
}
@@ -538,6 +726,9 @@ impl Memory {
/// function twice. Extreme caution should be used when using this method,
/// and in general you probably want to result to unsafe accessors and the
/// `data` methods below.
///
/// For more information and examples see the documentation on the
/// [`Memory`] type.
pub unsafe fn data_unchecked_mut(&self) -> &mut [u8] {
let definition = &*self.wasmtime_export.definition;
slice::from_raw_parts_mut(definition.base, definition.current_length)
@@ -549,6 +740,9 @@ impl Memory {
/// When reading and manipulating memory be sure to read up on the caveats
/// of [`Memory::data_unchecked`] to make sure that you can safely
/// read/write the memory.
///
/// For more information and examples see the documentation on the
/// [`Memory`] type.
pub fn data_ptr(&self) -> *mut u8 {
unsafe { (*self.wasmtime_export.definition).base }
}
@@ -556,6 +750,9 @@ impl Memory {
/// Returns the byte length of this memory.
///
/// The returned value will be a multiple of the wasm page size, 64k.
///
/// For more information and examples see the documentation on the
/// [`Memory`] type.
pub fn data_size(&self) -> usize {
unsafe { (*self.wasmtime_export.definition).current_length }
}
@@ -579,6 +776,26 @@ impl Memory {
///
/// Returns an error if memory could not be grown, for example if it exceeds
/// the maximum limits of this memory.
///
/// # Examples
///
/// ```
/// # use wasmtime::*;
/// # fn main() -> anyhow::Result<()> {
/// let store = Store::default();
/// let module = Module::new(&store, "(module (memory (export \"mem\") 1 2))")?;
/// let instance = Instance::new(&module, &[])?;
/// let memory = instance.get_export("mem").unwrap().memory().unwrap();
///
/// assert_eq!(memory.size(), 1);
/// assert_eq!(memory.grow(1)?, 1);
/// assert_eq!(memory.size(), 2);
/// assert!(memory.grow(1).is_err());
/// assert_eq!(memory.size(), 2);
/// assert_eq!(memory.grow(0)?, 2);
/// # Ok(())
/// # }
/// ```
pub fn grow(&self, delta: u32) -> Result<u32> {
let index = self
.wasmtime_handle