From d2666b2f3b26ebbcb81ef00a0a7198d9dd88ff1b Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 18 Mar 2020 14:39:55 -0500 Subject: [PATCH] Expand `Memory` docs and add examples (#1357) Try to thoroughly document unsafety of `Memory` and how it can be used safely. cc #1272 --- crates/api/src/externals.rs | 217 ++++++++++++++++++++++++++++++++++++ 1 file changed, 217 insertions(+) diff --git a/crates/api/src/externals.rs b/crates/api/src/externals.rs index fb5efa91c6..3390c74ca9 100644 --- a/crates/api/src/externals.rs +++ b/crates/api/src/externals.rs @@ -468,6 +468,159 @@ impl Table { /// It is intended that `Memory` is safe to share between threads. At this time /// this is not implemented in `wasmtime`, however. This is planned to be /// implemented though! +/// +/// # `Memory` and Safety +/// +/// Linear memory is a lynchpin of safety for WebAssembly, but it turns out +/// there are very few ways to safely inspect the contents of a memory from the +/// host (Rust). This is because memory safety is quite tricky when working with +/// a `Memory` and we're still working out the best idioms to encapsulate +/// everything safely where it's efficient and ergonomic. This section of +/// documentation, however, is intended to help educate a bit what is and isn't +/// safe when working with `Memory`. +/// +/// For safety purposes you can think of a `Memory` as a glorified +/// `Rc>>`. There's a few consequences of this +/// interpretation: +/// +/// * At any time someone else may have access to the memory (hence the `Rc`). +/// This could be a wasm instance, other host code, or a set of wasm instances +/// which all reference a `Memory`. When in doubt assume someone else has a +/// handle to your `Memory`. +/// +/// * At any time, memory can be read from or written to (hence the +/// `UnsafeCell`). Anyone with a handle to a wasm memory can read/write to it. +/// Primarily other instances can execute the `load` and `store` family of +/// instructions, as well as any other which modifies or reads memory. +/// +/// * At any time memory may grow (hence the `Vec<..>`). Growth may relocate the +/// base memory pointer (similar to how `vec.push(...)` can change the result +/// of `.as_ptr()`) +/// +/// So given that we're working roughly with `Rc>>` that's a +/// lot to keep in mind! It's hopefully though sort of setting the stage as to +/// what you can safely do with memories. +/// +/// Let's run through a few safe examples first of how you can use a `Memory`. +/// +/// ```rust +/// use wasmtime::Memory; +/// +/// fn safe_examples(mem: &Memory) { +/// // Just like wasm, it's safe to read memory almost at any time. The +/// // gotcha here is that we need to be sure to load from the correct base +/// // pointer and perform the bounds check correctly. So long as this is +/// // all self contained here (e.g. not arbitrary code in the middle) we're +/// // good to go. +/// let byte = unsafe { mem.data_unchecked()[0x123] }; +/// +/// // Short-lived borrows of memory are safe, but they most be scoped and +/// // not have code which modifies/etc `Memory` while the borrow is active. +/// // For example if you want to read a string from memory it is safe to do +/// // so: +/// let string_base = 0xdead; +/// let string_len = 0xbeef; +/// let string = unsafe { +/// let bytes = &mem.data_unchecked()[string_base..][..string_len]; +/// match std::str::from_utf8(bytes) { +/// Ok(s) => s.to_string(), // copy out of wasm memory +/// Err(_) => panic!("not valid utf-8"), +/// } +/// }; +/// +/// // Additionally like wasm you can write to memory at any point in time, +/// // again making sure that after you get the unchecked slice you don't +/// // execute code which could read/write/modify `Memory`: +/// unsafe { +/// mem.data_unchecked_mut()[0x123] = 3; +/// } +/// +/// // When working with *borrows* that point directly into wasm memory you +/// // need to be extremely careful. Any functionality that operates on a +/// // borrow into wasm memory needs to be thoroughly audited to effectively +/// // not touch the `Memory` at all +/// let data_base = 0xfeed; +/// let data_len = 0xface; +/// unsafe { +/// let data = &mem.data_unchecked()[data_base..][..data_len]; +/// host_function_that_doesnt_touch_memory(data); +/// +/// // effectively the same rules apply to mutable borrows +/// let data_mut = &mut mem.data_unchecked_mut()[data_base..][..data_len]; +/// host_function_that_doesnt_touch_memory(data); +/// } +/// } +/// # fn host_function_that_doesnt_touch_memory(_: &[u8]){} +/// ``` +/// +/// It's worth also, however, covering some examples of **incorrect**, +/// **unsafe** usages of `Memory`. Do not do these things! +/// +/// ```rust +/// use wasmtime::Memory; +/// +/// // NOTE: All code in this function is not safe to execute and may cause +/// // segfaults/undefined behavior at runtime. Do not copy/paste these examples +/// // into production code! +/// unsafe fn unsafe_examples(mem: &Memory) { +/// // First and foremost, any borrow can be invalidated at any time via the +/// // `Memory::grow` function. This can relocate memory which causes any +/// // previous pointer to be possibly invalid now. +/// let pointer: &u8 = &mem.data_unchecked()[0x100]; +/// mem.grow(1); // invalidates `pointer`! +/// // println!("{}", *pointer); // FATAL: use-after-free +/// +/// // Note that the use-after-free also applies to slices, whether they're +/// // slices of bytes or strings. +/// let slice: &[u8] = &mem.data_unchecked()[0x100..0x102]; +/// mem.grow(1); // invalidates `slice`! +/// // println!("{:?}", slice); // FATAL: use-after-free +/// +/// // Due to the reference-counted nature of `Memory` note that literal +/// // calls to `Memory::grow` are not sufficient to audit for. You'll need +/// // to be careful that any mutation of `Memory` doesn't happen while +/// // you're holding an active borrow. +/// let slice: &[u8] = &mem.data_unchecked()[0x100..0x102]; +/// some_other_function(mem); // may invalidate `slice`! +/// // println!("{:?}", slice); // FATAL: maybe a use-after-free +/// +/// // An especially subtle aspect of accessing a wasm instance's memory is +/// // that you need to be extremely careful about aliasing. Anyone at any +/// // time can call `data_unchecked()` or `data_unchecked_mut()`, which +/// // means you can easily have aliasing mutable references: +/// let ref1: &u8 = &mem.data_unchecked()[0x100]; +/// let ref2: &mut u8 = &mut mem.data_unchecked_mut()[0x100]; +/// // *ref2 = *ref1; // FATAL: violates Rust's aliasing rules +/// +/// // Note that aliasing applies to strings as well, for example this is +/// // not valid because the slices overlap +/// let slice1: &mut [u8] = &mut mem.data_unchecked_mut()[0x100..][..3]; +/// let slice2: &mut [u8] = &mut mem.data_unchecked_mut()[0x102..][..4]; +/// // println!("{:?} {:?}", slice1, slice2); // FATAL: aliasing mutable pointers +/// } +/// # fn some_other_function(_mem: &Memory) {} +/// ``` +/// +/// Overall there's some general rules of thumb when working with `Memory` and +/// getting raw pointers inside of it: +/// +/// * If you never have a "long lived" pointer into memory, you're good. +/// * Long-lived pointers must always respect Rust'a aliasing rules. It's ok for +/// shared borrows to overlap with each other, but mutable borrows must +/// overlap with nothing. +/// * Long-lived pointers are only valid if `Memory` isn't used in an unsafe way +/// while the pointer is valid. This includes both aliasing and growth. +/// +/// At this point it's worth reiterating again that working with `Memory` is +/// pretty tricky and that's not great! Proposals such as [interface types] are +/// intended to prevent wasm modules from even needing to import/export memory +/// in the first place, which obviates the need for all of these safety caveats! +/// Additionally over time we're still working out the best idioms to expose in +/// `wasmtime`, so if you've got ideas or questions please feel free to [open an +/// issue]! +/// +/// [interface types]: https://github.com/webassembly/interface-types +/// [open an issue]: https://github.com/bytecodealliance/wasmtime/issues/new #[derive(Clone)] pub struct Memory { store: Store, @@ -482,6 +635,23 @@ impl Memory { /// The `store` argument is a general location for cache information, and /// otherwise the memory will immediately be allocated according to the /// type's configuration. All WebAssembly memory is initialized to zero. + /// + /// # Examples + /// + /// ``` + /// # use wasmtime::*; + /// # fn main() -> anyhow::Result<()> { + /// let store = Store::default(); + /// + /// let memory_ty = MemoryType::new(Limits::new(1, None)); + /// let memory = Memory::new(&store, memory_ty); + /// + /// let module = Module::new(&store, "(module (memory (import \"\" \"\") 1))")?; + /// let instance = Instance::new(&module, &[memory.into()])?; + /// // ... + /// # Ok(()) + /// # } + /// ``` pub fn new(store: &Store, ty: MemoryType) -> Memory { let (wasmtime_handle, wasmtime_export) = generate_memory_export(store, &ty).expect("generated memory"); @@ -494,6 +664,21 @@ impl Memory { } /// Returns the underlying type of this memory. + /// + /// # Examples + /// + /// ``` + /// # use wasmtime::*; + /// # fn main() -> anyhow::Result<()> { + /// let store = Store::default(); + /// let module = Module::new(&store, "(module (memory (export \"mem\") 1))")?; + /// let instance = Instance::new(&module, &[])?; + /// let memory = instance.get_export("mem").unwrap().memory().unwrap(); + /// let ty = memory.ty(); + /// assert_eq!(ty.limits().min(), 1); + /// # Ok(()) + /// # } + /// ``` pub fn ty(&self) -> &MemoryType { &self.ty } @@ -522,6 +707,9 @@ impl Memory { /// your program. Additionally `Memory` can be shared and used in any number /// of wasm instances, so calling any wasm code should be considered /// dangerous while you're holding a slice of memory. + /// + /// For more information and examples see the documentation on the + /// [`Memory`] type. pub unsafe fn data_unchecked(&self) -> &[u8] { self.data_unchecked_mut() } @@ -538,6 +726,9 @@ impl Memory { /// function twice. Extreme caution should be used when using this method, /// and in general you probably want to result to unsafe accessors and the /// `data` methods below. + /// + /// For more information and examples see the documentation on the + /// [`Memory`] type. pub unsafe fn data_unchecked_mut(&self) -> &mut [u8] { let definition = &*self.wasmtime_export.definition; slice::from_raw_parts_mut(definition.base, definition.current_length) @@ -549,6 +740,9 @@ impl Memory { /// When reading and manipulating memory be sure to read up on the caveats /// of [`Memory::data_unchecked`] to make sure that you can safely /// read/write the memory. + /// + /// For more information and examples see the documentation on the + /// [`Memory`] type. pub fn data_ptr(&self) -> *mut u8 { unsafe { (*self.wasmtime_export.definition).base } } @@ -556,6 +750,9 @@ impl Memory { /// Returns the byte length of this memory. /// /// The returned value will be a multiple of the wasm page size, 64k. + /// + /// For more information and examples see the documentation on the + /// [`Memory`] type. pub fn data_size(&self) -> usize { unsafe { (*self.wasmtime_export.definition).current_length } } @@ -579,6 +776,26 @@ impl Memory { /// /// Returns an error if memory could not be grown, for example if it exceeds /// the maximum limits of this memory. + /// + /// # Examples + /// + /// ``` + /// # use wasmtime::*; + /// # fn main() -> anyhow::Result<()> { + /// let store = Store::default(); + /// let module = Module::new(&store, "(module (memory (export \"mem\") 1 2))")?; + /// let instance = Instance::new(&module, &[])?; + /// let memory = instance.get_export("mem").unwrap().memory().unwrap(); + /// + /// assert_eq!(memory.size(), 1); + /// assert_eq!(memory.grow(1)?, 1); + /// assert_eq!(memory.size(), 2); + /// assert!(memory.grow(1).is_err()); + /// assert_eq!(memory.size(), 2); + /// assert_eq!(memory.grow(0)?, 2); + /// # Ok(()) + /// # } + /// ``` pub fn grow(&self, delta: u32) -> Result { let index = self .wasmtime_handle