From da5c82b786245bd6aa8879c78275eac5131900ff Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 25 Aug 2021 12:14:13 -0500 Subject: [PATCH] Fix a possible use-after-free introduced in #3231 (#3238) In #3231 the wasm data sections were moved from the `wasmtime_environ::Module` structure into the `CompilationArtifacts`. Each `wasmtime_runtime::Instance` holds raw pointers into the data section owned by the compilation artifacts under the assumption that the runtime keeps the artifacts alive while the module is in use. Data is needed beyond original initialization for `memory.init` instructions as well as lazy-initialization with the `uffd` feature. The intention of #3231 was that all `CompiledModule` structures, which own `CompilationArtifacts` were owned by a store's `ModuleRegistry`, so this was already taken care of. It turns out, however, that empty modules which contain no functions are not held within a `ModuleRegistry` since there was no need prior to retain them. This commit remedies this mistake by retaining the `CompiledModule` structure, even if there aren't any functions compiled in. This should unblock #3235 and fixes the spurious error found there. The test here, at least on Linux, will deterministically reproduce the error before this commit since `uffd` was initializing wasm memory with free'd host memory. --- crates/wasmtime/src/module/registry.rs | 31 ++++++++++----- tests/all/pooling_allocator.rs | 52 ++++++++++++++++++++++++++ 2 files changed, 73 insertions(+), 10 deletions(-) diff --git a/crates/wasmtime/src/module/registry.rs b/crates/wasmtime/src/module/registry.rs index 0ff2e5073a..6f9faa81bc 100644 --- a/crates/wasmtime/src/module/registry.rs +++ b/crates/wasmtime/src/module/registry.rs @@ -27,7 +27,10 @@ fn func_by_pc(module: &CompiledModule, pc: usize) -> Option<(DefinedFuncIndex, u /// /// The `BTreeMap` is used to quickly locate a module based on a program counter value. #[derive(Default)] -pub struct ModuleRegistry(BTreeMap>); +pub struct ModuleRegistry { + modules_with_code: BTreeMap>, + modules_without_code: Vec>, +} impl ModuleRegistry { /// Fetches information about a registered module given a program counter value. @@ -37,7 +40,7 @@ impl ModuleRegistry { } fn module(&self, pc: usize) -> Option<&Arc> { - let (end, info) = self.0.range(pc..).next()?; + let (end, info) = self.modules_with_code.range(pc..).next()?; if pc < info.start || *end < pc { return None; } @@ -50,32 +53,40 @@ impl ModuleRegistry { let compiled_module = module.compiled_module(); let (start, end) = compiled_module.code().range(); - // Ignore modules with no code or finished functions - if start == end || compiled_module.finished_functions().is_empty() { + // If there's not actually any functions in this module then we may + // still need to preserve it for its data segments. Instances of this + // module will hold a pointer to the data stored in the module itself, + // and for schemes like uffd this performs lazy initialization which + // could use the module in the future. For that reason we continue to + // register empty modules and retain them. + if compiled_module.finished_functions().is_empty() { + self.modules_without_code.push(compiled_module.clone()); return; } // The module code range is exclusive for end, so make it inclusive as it // may be a valid PC value + assert!(start < end); let end = end - 1; // Ensure the module isn't already present in the registry - // This is expected when a module is instantiated multiple times in the same store - if let Some(m) = self.0.get(&end) { + // This is expected when a module is instantiated multiple times in the + // same store + if let Some(m) = self.modules_with_code.get(&end) { assert_eq!(m.start, start); return; } // Assert that this module's code doesn't collide with any other registered modules - if let Some((_, prev)) = self.0.range(end..).next() { + if let Some((_, prev)) = self.modules_with_code.range(end..).next() { assert!(prev.start > end); } - if let Some((prev_end, _)) = self.0.range(..=start).next_back() { + if let Some((prev_end, _)) = self.modules_with_code.range(..=start).next_back() { assert!(*prev_end < start); } - let prev = self.0.insert( + let prev = self.modules_with_code.insert( end, Arc::new(RegisteredModule { start, @@ -98,7 +109,7 @@ impl ModuleRegistry { impl Drop for ModuleRegistry { fn drop(&mut self) { let mut info = GLOBAL_MODULES.write().unwrap(); - for end in self.0.keys() { + for end in self.modules_with_code.keys() { info.unregister(*end); } } diff --git a/tests/all/pooling_allocator.rs b/tests/all/pooling_allocator.rs index 44f91c5828..b11a3c86b9 100644 --- a/tests/all/pooling_allocator.rs +++ b/tests/all/pooling_allocator.rs @@ -459,3 +459,55 @@ fn instantiation_limit() -> Result<()> { Ok(()) } + +#[test] +fn preserve_data_segments() -> Result<()> { + let mut config = Config::new(); + config.allocation_strategy(InstanceAllocationStrategy::Pooling { + strategy: PoolingAllocationStrategy::NextAvailable, + module_limits: ModuleLimits { + memory_pages: 1, + table_elements: 10, + ..Default::default() + }, + instance_limits: InstanceLimits { count: 2 }, + }); + let engine = Engine::new(&config)?; + let m = Module::new( + &engine, + r#" + (module + (memory (export "mem") 1 1) + (data (i32.const 0) "foo")) + "#, + )?; + let mut store = Store::new(&engine, ()); + let i = Instance::new(&mut store, &m, &[])?; + + // Drop the module. This should *not* drop the actual data referenced by the + // module, especially when uffd is enabled. If uffd is enabled we'll lazily + // fault in the memory of the module, which means it better still be alive + // after we drop this. + drop(m); + + // Spray some stuff on the heap. If wasm data lived on the heap this should + // paper over things and help us catch use-after-free here if it would + // otherwise happen. + let mut strings = Vec::new(); + for _ in 0..1000 { + let mut string = String::new(); + for _ in 0..1000 { + string.push('g'); + } + strings.push(string); + } + drop(strings); + + let mem = i.get_memory(&mut store, "mem").unwrap(); + + // This will segfault with uffd enabled, and then the uffd will lazily + // initialize the memory. Hopefully it's still `foo`! + assert!(mem.data(&store).starts_with(b"foo")); + + Ok(()) +}