From 0cde30197d959c1b16b17e0f0367e14805aeaef9 Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Fri, 6 Dec 2019 15:48:46 -0800 Subject: [PATCH] fuzzing: Add initial API call fuzzer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We only generate *valid* sequences of API calls. To do this, we keep track of what objects we've already created in earlier API calls via the `Scope` struct. To generate even-more-pathological sequences of API calls, we use [swarm testing]: > In swarm testing, the usual practice of potentially including all features > in every test case is abandoned. Rather, a large “swarm” of randomly > generated configurations, each of which omits some features, is used, with > configurations receiving equal resources. [swarm testing]: https://www.cs.utah.edu/~regehr/papers/swarm12.pdf There are more public APIs and instance introspection APIs that we have than this fuzzer exercises right now. We will need a better generator of valid Wasm than `wasm-opt -ttf` to really get the most out of those currently-unexercised APIs, since the Wasm modules generated by `wasm-opt -ttf` don't import and export a huge variety of things. --- crates/fuzzing/src/generators.rs | 15 ++- crates/fuzzing/src/generators/api.rs | 182 +++++++++++++++++++++++++++ crates/fuzzing/src/lib.rs | 6 +- crates/fuzzing/src/oracles.rs | 128 ++++++++++++++++++- fuzz/Cargo.toml | 6 + fuzz/fuzz_targets/api_calls.rs | 27 ++++ 6 files changed, 360 insertions(+), 4 deletions(-) create mode 100644 crates/fuzzing/src/generators/api.rs create mode 100755 fuzz/fuzz_targets/api_calls.rs diff --git a/crates/fuzzing/src/generators.rs b/crates/fuzzing/src/generators.rs index 043ce47baa..1fb6a29c0f 100644 --- a/crates/fuzzing/src/generators.rs +++ b/crates/fuzzing/src/generators.rs @@ -8,15 +8,28 @@ //! wrapper over an external tool, such that the wrapper implements the //! `Arbitrary` trait for the wrapped external tool. +pub mod api; + use arbitrary::{Arbitrary, Unstructured}; +use std::fmt; /// A Wasm test case generator that is powered by Binaryen's `wasm-opt -ttf`. -#[derive(Debug)] +#[derive(Clone)] pub struct WasmOptTtf { /// The raw, encoded Wasm bytes. pub wasm: Vec, } +impl fmt::Debug for WasmOptTtf { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "WasmOptTtf {{ wasm: wat::parse_str(r###\"\n{}\n\"###).unwrap() }}", + wasmprinter::print_bytes(&self.wasm).expect("valid wasm should always disassemble") + ) + } +} + impl Arbitrary for WasmOptTtf { fn arbitrary(input: &mut U) -> Result where diff --git a/crates/fuzzing/src/generators/api.rs b/crates/fuzzing/src/generators/api.rs new file mode 100644 index 0000000000..debae64f05 --- /dev/null +++ b/crates/fuzzing/src/generators/api.rs @@ -0,0 +1,182 @@ +//! Generating sequences of Wasmtime API calls. +//! +//! We only generate *valid* sequences of API calls. To do this, we keep track +//! of what objects we've already created in earlier API calls via the `Scope` +//! struct. +//! +//! To generate even-more-pathological sequences of API calls, we use [swarm +//! testing]: +//! +//! > In swarm testing, the usual practice of potentially including all features +//! > in every test case is abandoned. Rather, a large “swarm” of randomly +//! > generated configurations, each of which omits some features, is used, with +//! > configurations receiving equal resources. +//! +//! [swarm testing]: https://www.cs.utah.edu/~regehr/papers/swarm12.pdf + +use arbitrary::{Arbitrary, Unstructured}; +use std::collections::HashSet; + +struct Swarm { + config_debug_info: bool, + module_new: bool, + module_drop: bool, + instance_new: bool, + instance_drop: bool, + call_exported_func: bool, +} + +impl Arbitrary for Swarm { + fn arbitrary(input: &mut U) -> Result + where + U: Unstructured + ?Sized, + { + Ok(Swarm { + config_debug_info: bool::arbitrary(input)?, + module_new: bool::arbitrary(input)?, + module_drop: bool::arbitrary(input)?, + instance_new: bool::arbitrary(input)?, + instance_drop: bool::arbitrary(input)?, + call_exported_func: bool::arbitrary(input)?, + }) + } +} + +/// A call to one of Wasmtime's public APIs. +#[derive(Clone, Debug)] +#[allow(missing_docs)] +pub enum ApiCall { + ConfigNew, + ConfigDebugInfo(bool), + EngineNew, + StoreNew, + ModuleNew { id: usize, wasm: super::WasmOptTtf }, + ModuleDrop { id: usize }, + InstanceNew { id: usize, module: usize }, + InstanceDrop { id: usize }, + CallExportedFunc { instance: usize, nth: usize }, +} +use ApiCall::*; + +#[derive(Default)] +struct Scope { + id_counter: usize, + modules: HashSet, + instances: HashSet, +} + +impl Scope { + fn next_id(&mut self) -> usize { + let id = self.id_counter; + self.id_counter = id + 1; + id + } +} + +/// A sequence of API calls. +#[derive(Debug)] +pub struct ApiCalls { + /// The API calls. + pub calls: Vec, +} + +impl Arbitrary for ApiCalls { + fn arbitrary(input: &mut U) -> Result + where + U: Unstructured + ?Sized, + { + let swarm = Swarm::arbitrary(input)?; + let mut calls = vec![]; + + arbitrary_config(input, &swarm, &mut calls)?; + calls.push(EngineNew); + calls.push(StoreNew); + + let mut scope = Scope::default(); + + for _ in 0..input.container_size()? { + let mut choices: Vec Result> = vec![]; + + if swarm.module_new { + choices.push(|input, scope| { + let id = scope.next_id(); + scope.modules.insert(id); + let wasm = super::WasmOptTtf::arbitrary(input)?; + Ok(ModuleNew { id, wasm }) + }); + } + if swarm.module_drop && !scope.modules.is_empty() { + choices.push(|input, scope| { + let modules: Vec<_> = scope.modules.iter().cloned().collect(); + let id = arbitrary_choice(input, &modules)?.cloned().unwrap(); + scope.modules.remove(&id); + Ok(ModuleDrop { id }) + }); + } + if swarm.instance_new && !scope.modules.is_empty() { + choices.push(|input, scope| { + let modules: Vec<_> = scope.modules.iter().cloned().collect(); + let module = arbitrary_choice(input, &modules)?.cloned().unwrap(); + let id = scope.next_id(); + scope.instances.insert(id); + Ok(InstanceNew { id, module }) + }); + } + if swarm.instance_drop && !scope.instances.is_empty() { + choices.push(|input, scope| { + let instances: Vec<_> = scope.instances.iter().cloned().collect(); + let id = arbitrary_choice(input, &instances)?.cloned().unwrap(); + scope.instances.remove(&id); + Ok(InstanceDrop { id }) + }); + } + if swarm.call_exported_func && !scope.instances.is_empty() { + choices.push(|input, scope| { + let instances: Vec<_> = scope.instances.iter().cloned().collect(); + let instance = arbitrary_choice(input, &instances)?.cloned().unwrap(); + let nth = usize::arbitrary(input)?; + Ok(CallExportedFunc { instance, nth }) + }); + } + + if let Some(c) = arbitrary_choice(input, &choices)? { + calls.push(c(input, &mut scope)?); + } else { + break; + } + } + + Ok(ApiCalls { calls }) + } +} + +fn arbitrary_choice<'a, T, U>(input: &mut U, choices: &'a [T]) -> Result, U::Error> +where + U: Unstructured + ?Sized, +{ + if choices.is_empty() { + Ok(None) + } else { + let i = usize::arbitrary(input)? % choices.len(); + Ok(Some(&choices[i])) + } +} + +fn arbitrary_config( + input: &mut U, + swarm: &Swarm, + calls: &mut Vec, +) -> Result<(), U::Error> +where + U: Unstructured + ?Sized, +{ + calls.push(ConfigNew); + + if swarm.config_debug_info && bool::arbitrary(input)? { + calls.push(ConfigDebugInfo(bool::arbitrary(input)?)); + } + + // TODO: flags, features, and compilation strategy. + + Ok(()) +} diff --git a/crates/fuzzing/src/lib.rs b/crates/fuzzing/src/lib.rs index df232f8072..394038d0e4 100644 --- a/crates/fuzzing/src/lib.rs +++ b/crates/fuzzing/src/lib.rs @@ -107,8 +107,12 @@ fn my_fuzzing_regression_test() {{ } } -fn scratch_dir() -> PathBuf { +pub(crate) fn scratch_dir() -> PathBuf { let dir = Path::new(env!("CARGO_MANIFEST_DIR")) + // Pop "fuzzing". + .join("..") + // Pop "crates". + .join("..") .join("target") .join("scratch"); diff --git a/crates/fuzzing/src/oracles.rs b/crates/fuzzing/src/oracles.rs index b5de268f5d..1068cb8521 100644 --- a/crates/fuzzing/src/oracles.rs +++ b/crates/fuzzing/src/oracles.rs @@ -12,11 +12,11 @@ pub mod dummy; -use dummy::dummy_imports; +use dummy::{dummy_imports, dummy_value}; use std::cell::RefCell; use std::collections::HashMap; use std::rc::Rc; -use wasmtime::{Config, Engine, HostRef, Instance, Module, Store}; +use wasmtime::*; use wasmtime_environ::{isa, settings}; use wasmtime_jit::{native, CompilationStrategy, CompiledModule, Compiler, NullResolver}; @@ -83,3 +83,127 @@ pub fn compile(wasm: &[u8], compilation_strategy: CompilationStrategy) { let global_exports = Rc::new(RefCell::new(HashMap::new())); let _ = CompiledModule::new(&mut compiler, wasm, &mut resolver, global_exports, false); } + +/// Invoke the given API calls. +pub fn make_api_calls(api: crate::generators::api::ApiCalls) { + use crate::generators::api::ApiCall; + + let mut config: Option = None; + let mut engine: Option> = None; + let mut store: Option> = None; + let mut modules: HashMap> = Default::default(); + let mut instances: HashMap> = Default::default(); + + for call in api.calls { + match call { + ApiCall::ConfigNew => { + assert!(config.is_none()); + config = Some(Config::new()); + } + + ApiCall::ConfigDebugInfo(b) => { + config.as_mut().unwrap().debug_info(b); + } + + ApiCall::EngineNew => { + assert!(engine.is_none()); + engine = Some(HostRef::new(Engine::new(config.as_ref().unwrap()))); + } + + ApiCall::StoreNew => { + assert!(store.is_none()); + store = Some(HostRef::new(Store::new(engine.as_ref().unwrap()))); + } + + ApiCall::ModuleNew { id, wasm } => { + let module = HostRef::new(match Module::new(store.as_ref().unwrap(), &wasm.wasm) { + Ok(m) => m, + Err(_) => continue, + }); + let old = modules.insert(id, module); + assert!(old.is_none()); + } + + ApiCall::ModuleDrop { id } => { + drop(modules.remove(&id)); + } + + ApiCall::InstanceNew { id, module } => { + let module = match modules.get(&module) { + Some(m) => m, + None => continue, + }; + + let imports = { + let module = module.borrow(); + match dummy_imports(store.as_ref().unwrap(), module.imports()) { + Ok(imps) => imps, + Err(_) => { + // There are some value types that we can't synthesize a + // dummy value for (e.g. anyrefs) and for modules that + // import things of these types we skip instantiation. + continue; + } + } + }; + + // Don't unwrap this: there can be instantiation-/link-time errors that + // aren't caught during validation or compilation. For example, an imported + // table might not have room for an element segment that we want to + // initialize into it. + if let Ok(instance) = Instance::new(store.as_ref().unwrap(), &module, &imports) { + instances.insert(id, HostRef::new(instance)); + } + } + + ApiCall::InstanceDrop { id } => { + drop(instances.remove(&id)); + } + + ApiCall::CallExportedFunc { instance, nth } => { + let instance = match instances.get(&instance) { + Some(i) => i, + None => { + // Note that we aren't guaranteed to instantiate valid + // modules, see comments in `InstanceNew` for details on + // that. But the API call generator can't know if + // instantiation failed, so we might not actually have + // this instance. When that's the case, just skip the + // API call and keep going. + continue; + } + }; + + let funcs = { + let instance = instance.borrow(); + instance + .exports() + .iter() + .filter_map(|e| match e { + Extern::Func(f) => Some(f.clone()), + _ => None, + }) + .collect::>() + }; + + if funcs.is_empty() { + continue; + } + + let nth = nth % funcs.len(); + let f = funcs[nth].borrow(); + let ty = f.r#type(); + let params = match ty + .params() + .iter() + .map(|valty| dummy_value(valty)) + .collect::, _>>() + { + Ok(p) => p, + Err(_) => continue, + }; + let _ = f.call(¶ms); + } + } + } +} diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 3fb5420242..637c730c5f 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -10,6 +10,8 @@ cargo-fuzz = true [dependencies] arbitrary = "0.2.0" +env_logger = "0.7.1" +log = "0.4.8" wasmtime-fuzzing = { path = "../crates/fuzzing", features = ["env_logger"] } wasmtime-jit = { path = "../crates/jit" } libfuzzer-sys = { git = "https://github.com/rust-fuzz/libfuzzer-sys.git" } @@ -29,3 +31,7 @@ path = "fuzz_targets/instantiate.rs" [[bin]] name = "instantiate_translated" path = "fuzz_targets/instantiate_translated.rs" + +[[bin]] +name = "api_calls" +path = "fuzz_targets/api_calls.rs" diff --git a/fuzz/fuzz_targets/api_calls.rs b/fuzz/fuzz_targets/api_calls.rs new file mode 100755 index 0000000000..9cdc2685f8 --- /dev/null +++ b/fuzz/fuzz_targets/api_calls.rs @@ -0,0 +1,27 @@ +#![no_main] + +use libfuzzer_sys::fuzz_target; +use std::sync::Once; +use wasmtime_fuzzing::{generators::api::ApiCalls, oracles}; + +fuzz_target!(|api: ApiCalls| { + static INIT_LOGGING: Once = Once::new(); + INIT_LOGGING.call_once(|| env_logger::init()); + + log::debug!( + "If this fuzz test fails, here is a regression tests: +``` +#[test] +fn my_regression_test() {{ + use wasmtime_fuzzing::generators::{{ + api::{{ApiCall::*, ApiCalls}}, + WasmOptTtf, + }}; + wasmtime_fuzzing::oracles::make_api_calls({:#?}); +}} +```", + api + ); + + oracles::make_api_calls(api); +});