diff --git a/.gitmodules b/.gitmodules index ee264b99c4..725b4b4dc2 100644 --- a/.gitmodules +++ b/.gitmodules @@ -13,3 +13,7 @@ [submodule "crates/wasi-crypto/spec"] path = crates/wasi-crypto/spec url = https://github.com/WebAssembly/wasi-crypto.git +[submodule "crates/fuzzing/wasm-spec-interpreter/ocaml/spec"] + path = crates/fuzzing/wasm-spec-interpreter/ocaml/spec + url = https://github.com/WebAssembly/spec + shallow = true diff --git a/Cargo.lock b/Cargo.lock index 59bb30fc78..e88dcf86f7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -950,6 +950,12 @@ dependencies = [ "cipher", ] +[[package]] +name = "cty" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7313c0d620d0cb4dbd9d019e461a4beb501071ff46ec0ab933efb4daa76d73e3" + [[package]] name = "curve25519-dalek" version = "3.1.0" @@ -1899,6 +1905,35 @@ dependencies = [ "memchr", ] +[[package]] +name = "ocaml-boxroot-sys" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5186393bfbee4ce2bc5bbb82beafb77e85c1d0a557e3cfc8c8a0d63d7845fed5" +dependencies = [ + "cc", +] + +[[package]] +name = "ocaml-interop" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "feed1425b18c848323dceeed5fc35a085b07b58783b040560e3a05dfe2427753" +dependencies = [ + "ocaml-boxroot-sys", + "ocaml-sys", + "static_assertions", +] + +[[package]] +name = "ocaml-sys" +version = "0.20.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e7dee4179690e6af128fce2fab845ab45a833d04e117f5fdcafab1ba26ce518" +dependencies = [ + "cty", +] + [[package]] name = "once_cell" version = "1.7.2" @@ -2832,6 +2867,12 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "strsim" version = "0.8.0" @@ -3465,6 +3506,15 @@ dependencies = [ "wasm-encoder", ] +[[package]] +name = "wasm-spec-interpreter" +version = "0.1.0" +dependencies = [ + "lazy_static", + "ocaml-interop", + "wat", +] + [[package]] name = "wasmi" version = "0.7.0" @@ -3727,6 +3777,7 @@ dependencies = [ "rayon", "wasm-encoder", "wasm-smith", + "wasm-spec-interpreter", "wasmi", "wasmparser", "wasmprinter", diff --git a/crates/fuzzing/Cargo.toml b/crates/fuzzing/Cargo.toml index beb35779b0..def0bfa381 100644 --- a/crates/fuzzing/Cargo.toml +++ b/crates/fuzzing/Cargo.toml @@ -19,7 +19,13 @@ wasmtime = { path = "../wasmtime" } wasmtime-wast = { path = "../wast" } wasm-encoder = "0.6.0" wasm-smith = "0.6.0" +wasm-spec-interpreter = { path = "./wasm-spec-interpreter" } wasmi = "0.7.0" [dev-dependencies] wat = "1.0.37" + +# We only build the library containing the OCaml spec interpreter if the OCaml +# toolchain is available--which is assumed here to be the case when fuzzing. +[target.'cfg(fuzzing)'.dependencies] +wasm-spec-interpreter = { path = "./wasm-spec-interpreter", features = ["build-libinterpret"] } diff --git a/crates/fuzzing/wasm-spec-interpreter/Cargo.toml b/crates/fuzzing/wasm-spec-interpreter/Cargo.toml new file mode 100644 index 0000000000..bd37c83e6c --- /dev/null +++ b/crates/fuzzing/wasm-spec-interpreter/Cargo.toml @@ -0,0 +1,22 @@ +[package] +authors = ["The Wasmtime Project Developers"] +description = "A Rust-to-OCaml wrapper for the WebAssembly specification interpreter" +name = "wasm-spec-interpreter" +version = "0.1.0" +publish = false +edition = "2018" +license = "Apache-2.0 WITH LLVM-exception" + +# Until https://gitlab.com/ocaml-rust/ocaml-boxroot/-/issues/1 is resolved and +# this crate can use the `without-ocamlopt` feature to avoid build failures, it +# is better to only build the OCaml dependencies when fuzzing (see the +# `build-libinterpret` feature set by this crate's parent). +[dependencies] +ocaml-interop = { version = "0.8", optional = true } +lazy_static = { version = "1.4", optional = true } + +[dev-dependencies] +wat = "1.0" + +[features] +build-libinterpret = ["ocaml-interop", "lazy_static"] diff --git a/crates/fuzzing/wasm-spec-interpreter/README.md b/crates/fuzzing/wasm-spec-interpreter/README.md new file mode 100644 index 0000000000..9e00a4c442 --- /dev/null +++ b/crates/fuzzing/wasm-spec-interpreter/README.md @@ -0,0 +1,39 @@ +wasm-spec-interpreter +===================== + +This project shows how to use `ocaml-interop` to call into the Wasm spec +interpreter. There are several steps to making this work: + - building the OCaml Wasm spec interpreter as a static library + - building a Rust-to-OCaml FFI bridge using `ocaml-interop` and a custom OCaml + wrapper + - linking both things into a Rust crate + +### Dependencies + +This crate only builds in an environment with: +- `make` (the Wasm spec interpreter uses a `Makefile`) +- `ocamlopt`, `ocamlbuild` (available with, e.g., `dnf install ocaml`) +- Linux tools (e.g. `ar`); currently it is easiest to build the static + libraries in a single environment but this could be fixed in the future (TODO) + +Remember to retrieve the Wasm spec submodule: + +``` +git clone ... --recursive +``` + +### Build + +``` +RUSTFLAGS=--cfg=fuzzing cargo build +``` + +Use `FFI_LIB_DIR=path/to/lib/...` to specify a different location for the static +library (this is mainly for debugging). If the `--cfg=fuzzing` configuration is +not provided, this crate will build successfully but fail at runtime. + +### Test + +``` +RUSTFLAGS=--cfg=fuzzing cargo test +``` diff --git a/crates/fuzzing/wasm-spec-interpreter/build.rs b/crates/fuzzing/wasm-spec-interpreter/build.rs new file mode 100644 index 0000000000..61e8c316eb --- /dev/null +++ b/crates/fuzzing/wasm-spec-interpreter/build.rs @@ -0,0 +1,54 @@ +/// Build the OCaml code and statically link it into the Rust library; see the +/// [ocaml-interop +/// example](https://github.com/tezedge/ocaml-interop/blob/master/testing/rust-caller/build.rs) +/// for more details. After playing with this a bit, I discovered that the best +/// approach to avoid missing symbols was to imitate `dune`: I observed `rm -rf +/// _build && dune build ./ocaml/interpret.exe.o --display=verbose` and used +/// that as a pattern, now encoded in `ocaml/Makefile` for easier debugging. +use std::{env, process::Command}; + +const LIB_NAME: &'static str = "interpret"; +const OCAML_DIR: &'static str = "ocaml"; + +fn main() { + if cfg!(feature = "build-libinterpret") { + build(); + } +} + +fn build() { + let out_dir = &env::var("OUT_DIR").unwrap(); + + // Re-run if changed. + println!("cargo:rerun-if-changed={}/{}.ml", OCAML_DIR, LIB_NAME); + println!("cargo:rerun-if-changed={}/Makefile", OCAML_DIR); + + if let Some(other_dir) = env::var_os("FFI_LIB_DIR") { + // Link with a library provided in the `FFI_LIB_DIR`. + println!("cargo:rustc-link-search={}", other_dir.to_str().unwrap()); + println!("cargo:rustc-link-lib=static={}", LIB_NAME); + } else { + // Build the library to link to. + build_lib(out_dir, OCAML_DIR); + println!("cargo:rustc-link-search={}", out_dir); + println!("cargo:rustc-link-lib=static={}", LIB_NAME); + } + + // Enabling this feature alerts the compiler to use the `with_library` + // module. + println!("cargo:rustc-cfg=feature=\"has-libinterpret\""); +} + +// Build the OCaml library into Cargo's `out` directory. +fn build_lib(out_dir: &str, ocaml_dir: &str) { + let status = Command::new("make") + .arg(format!("BUILD_DIR={}", out_dir)) + .current_dir(ocaml_dir) + .status() + .expect("Failed to execute 'make' command to build OCaml library"); + + assert!( + status.success(), + "Failed to build the OCaml library using 'make'." + ) +} diff --git a/crates/fuzzing/wasm-spec-interpreter/ocaml/.gitignore b/crates/fuzzing/wasm-spec-interpreter/ocaml/.gitignore new file mode 100644 index 0000000000..e35d8850c9 --- /dev/null +++ b/crates/fuzzing/wasm-spec-interpreter/ocaml/.gitignore @@ -0,0 +1 @@ +_build diff --git a/crates/fuzzing/wasm-spec-interpreter/ocaml/Makefile b/crates/fuzzing/wasm-spec-interpreter/ocaml/Makefile new file mode 100644 index 0000000000..de7b79d934 --- /dev/null +++ b/crates/fuzzing/wasm-spec-interpreter/ocaml/Makefile @@ -0,0 +1,33 @@ +# Build a library allowing FFI access to the Wasm spec interpreter. + +OCAML_FLAGS := -g -keep-locs -runtime-variant _pic +# By default, we build in a sub-directory but we can override this with `make +# BUILD_DIR=...`. +BUILD_DIR := _build +# Currently the WebAssembly spec interpreter is buried in a Git submodule as is +# its build directory, `_build`. Cargo may not like that files are changing +# outside of `target` (TODO). +SPEC_DIR := spec/interpreter +SPEC_BUILD_DIR := $(SPEC_DIR)/_build +SPEC_LIB := $(SPEC_BUILD_DIR)/wasm.cmxa + + +# Build and package the static library, `libinterpret.a`. +$(BUILD_DIR)/libinterpret.a: $(BUILD_DIR)/interpret.lib.o + ar qs $@ $^ +$(BUILD_DIR)/interpret.lib.o: $(SPEC_LIB) $(BUILD_DIR)/interpret.cmx + ocamlopt $(OCAML_FLAGS) -I $(SPEC_BUILD_DIR) -o $@ -output-complete-obj $^ +$(BUILD_DIR)/interpret.cmx: interpret.ml $(SPEC_BUILD_DIR) $(BUILD_DIR) + ocamlopt $(OCAML_FLAGS) -I $(SPEC_BUILD_DIR) -o $@ -c -impl $< +$(BUILD_DIR): + mkdir -p $@ + + +# We also need to be able to build the spec's `wasm.cmxa`. +$(SPEC_LIB): + make -C $(SPEC_DIR) libopt + + +clean: + rm -rf $(BUILD_DIR) + make -C $(SPEC_DIR) clean diff --git a/crates/fuzzing/wasm-spec-interpreter/ocaml/README.md b/crates/fuzzing/wasm-spec-interpreter/ocaml/README.md new file mode 100644 index 0000000000..a7f4d218dd --- /dev/null +++ b/crates/fuzzing/wasm-spec-interpreter/ocaml/README.md @@ -0,0 +1,7 @@ +This directory contains the necessary parts for building a library with FFI +access to the Wasm spec interpreter. Its major parts: + - `spec`: the Wasm spec code as a Git submodule (you may need to retrieve it: + `git clone https://github.com/bytecodealliance/wasm-spec-mirror). + - `interpret.ml`: a shim layer for calling the Wasm spec code and exposing it + for FFI access + - `Makefile`: the steps for gluing these pieces together into a static library diff --git a/crates/fuzzing/wasm-spec-interpreter/ocaml/interpret.ml b/crates/fuzzing/wasm-spec-interpreter/ocaml/interpret.ml new file mode 100644 index 0000000000..ead9aad28a --- /dev/null +++ b/crates/fuzzing/wasm-spec-interpreter/ocaml/interpret.ml @@ -0,0 +1,66 @@ +(* This module exposes an [interpret] function to Rust. It wraps several different calls from the +WebAssembly specification interpreter in a way that we can access across the FFI boundary. To +understand this better, see: + - the OCaml manual documentation re: calling OCaml from C, https://ocaml.org/manual/intfc.html#s%3Ac-advexample + - the [ocaml-interop] example, https://github.com/tezedge/ocaml-interop/blob/master/testing/rust-caller/ocaml/callable.ml +*) + +(* Here we access the WebAssembly specification interpreter; this must be linked in. *) +open Wasm + +(** Enumerate the types of values we pass across the FFI boundary. This must match `Value` in +`src/lib.rs` *) +type ffi_value = + | I32 of int32 + | I64 of int64 + | F32 of int32 + | F64 of int64 + +(** Helper for converting the FFI values to their spec interpreter type. *) +let convert_to_wasm (v: ffi_value) : Values.value = match v with +| I32 n -> Values.Num (I32 n) +| I64 n -> Values.Num (I64 n) +| F32 n -> Values.Num (F32 (F32.of_bits n)) +| F64 n -> Values.Num (F64 (F64.of_bits n)) + +(** Helper for converting the spec interpreter values to their FFI type. *) +let convert_from_wasm (v: Values.value) : ffi_value = match v with +| Values.Num (I32 n) -> I32 n +| Values.Num (I64 n) -> I64 n +| Values.Num (F32 n) -> F32 (F32.to_bits n) +| Values.Num (F64 n) -> F64 (F64.to_bits n) +| _ -> failwith "Unknown type" + +(** Parse the given WebAssembly module binary into an Ast.module_. At some point in the future this +should also be able to parse the textual form (TODO). *) +let parse bytes = + (* Optionally, use Bytes.unsafe_to_string here to avoid the copy *) + let bytes_as_str = Bytes.to_string bytes in + Decode.decode "default" bytes_as_str + +(** Return true if an export is a function. *) +let match_exported_func export = match export with +| (_, Instance.ExternFunc(func)) -> true +| _ -> false + +(** Extract a function from its export or fail. *) +let extract_exported_func export = match export with +| (_, Instance.ExternFunc(func)) -> func +| _ -> failwith "" + +(** Interpret the first exported function with the given parameters and return the result. *) +let interpret_exn module_bytes params = + let params' = List.map convert_to_wasm params in + let module_ = parse module_bytes in + let instance = Eval.init module_ [] in + let func = extract_exported_func (List.find match_exported_func instance.exports) in + let returns = Eval.invoke func params' in + let returns' = List.map convert_from_wasm returns in + returns' (* TODO eventually we should hash the memory state and return the hash *) + +let interpret module_bytes params = + try Ok(interpret_exn module_bytes params) with + | _ as e -> Error(Printexc.to_string e) + +let () = + Callback.register "interpret" interpret; diff --git a/crates/fuzzing/wasm-spec-interpreter/ocaml/spec b/crates/fuzzing/wasm-spec-interpreter/ocaml/spec new file mode 160000 index 0000000000..46cf27ce07 --- /dev/null +++ b/crates/fuzzing/wasm-spec-interpreter/ocaml/spec @@ -0,0 +1 @@ +Subproject commit 46cf27ce076e1213e45b89ffdc99df4499b3cb33 diff --git a/crates/fuzzing/wasm-spec-interpreter/src/lib.rs b/crates/fuzzing/wasm-spec-interpreter/src/lib.rs new file mode 100644 index 0000000000..a2f46a3a9a --- /dev/null +++ b/crates/fuzzing/wasm-spec-interpreter/src/lib.rs @@ -0,0 +1,33 @@ +//! This library provides a way to interpret Wasm functions in the official Wasm +//! specification interpreter, written in OCaml, from Rust. +//! +//! In order to not break Wasmtime's build, this library will always compile. It +//! does depend on certain tools (see `README.md`) that may or may not be +//! available in the environment: +//! - when the tools are available, we build and link to an OCaml static +//! library (see `with_library` module) +//! - when the tools are not available, this library will panic at runtime (see +//! `without_library` module). + +/// Enumerate the kinds of Wasm values. +#[derive(Clone, Debug, PartialEq)] +pub enum Value { + I32(i32), + I64(i64), + F32(i32), + F64(i64), +} + +#[cfg(feature = "has-libinterpret")] +mod with_library; +#[cfg(feature = "has-libinterpret")] +pub use with_library::*; + +#[cfg(not(feature = "has-libinterpret"))] +mod without_library; +#[cfg(not(feature = "has-libinterpret"))] +pub use without_library::*; + +// If the user is fuzzing`, we expect the OCaml library to have been built. +#[cfg(all(fuzzing, not(feature = "has-libinterpret")))] +compile_error!("The OCaml library was not built."); diff --git a/crates/fuzzing/wasm-spec-interpreter/src/with_library.rs b/crates/fuzzing/wasm-spec-interpreter/src/with_library.rs new file mode 100644 index 0000000000..c8d7fec42b --- /dev/null +++ b/crates/fuzzing/wasm-spec-interpreter/src/with_library.rs @@ -0,0 +1,98 @@ +//! Interpret WebAssembly modules using the OCaml spec interpreter. +//! ``` +//! # use wasm_spec_interpreter::{Value, interpret}; +//! let module = wat::parse_file("tests/add.wat").unwrap(); +//! let parameters = vec![Value::I32(42), Value::I32(1)]; +//! let results = interpret(&module, parameters).unwrap(); +//! assert_eq!(results, &[Value::I32(43)]); +//! ``` +use crate::Value; +use lazy_static::lazy_static; +use ocaml_interop::{OCamlRuntime, ToOCaml}; +use std::sync::Mutex; + +lazy_static! { + static ref INTERPRET: Mutex<()> = Mutex::new(()); +} + +/// Interpret the first function in the passed WebAssembly module (in Wasm form, +/// currently, not WAT) with the given parameters. +pub fn interpret(module: &[u8], parameters: Vec) -> Result, String> { + // The OCaml runtime is not re-entrant + // (https://ocaml.org/manual/intfc.html#ss:parallel-execution-long-running-c-code). + // We need to make sure that only one Rust thread is executing at a time + // (using this lock) or we can observe `SIGSEGV` failures while running + // `cargo test`. + let _lock = INTERPRET.lock().unwrap(); + // Here we use an unsafe approach to initializing the `OCamlRuntime` based + // on the discussion in https://github.com/tezedge/ocaml-interop/issues/35. + // This was the recommendation to resolve seeing errors like `boxroot is not + // setup` followed by a `SIGSEGV`; this is similar to the testing approach + // in + // https://github.com/tezedge/ocaml-interop/blob/master/testing/rust-caller/src/lib.rs + // and is only as safe as the OCaml code running underneath. + OCamlRuntime::init_persistent(); + let ocaml_runtime = unsafe { OCamlRuntime::recover_handle() }; + // Parse and execute, returning results converted to Rust. + let module = module.to_boxroot(ocaml_runtime); + let parameters = parameters.to_boxroot(ocaml_runtime); + let results = ocaml_bindings::interpret(ocaml_runtime, &module, ¶meters); + results.to_rust(ocaml_runtime) +} + +// Here we declare which functions we will use from the OCaml library. See +// https://docs.rs/ocaml-interop/0.8.4/ocaml_interop/index.html#example. +mod ocaml_bindings { + use super::*; + use ocaml_interop::{ + impl_conv_ocaml_variant, ocaml, OCamlBytes, OCamlInt32, OCamlInt64, OCamlList, + }; + + // Using this macro converts the enum both ways: Rust to OCaml and OCaml to + // Rust. See + // https://docs.rs/ocaml-interop/0.8.4/ocaml_interop/macro.impl_conv_ocaml_variant.html. + impl_conv_ocaml_variant! { + Value { + Value::I32(i: OCamlInt32), + Value::I64(i: OCamlInt64), + Value::F32(i: OCamlInt32), + Value::F64(i: OCamlInt64), + } + } + + // These functions must be exposed from OCaml with: + // `Callback.register "interpret" interpret` + // + // In Rust, this function becomes: + // `pub fn interpret(_: &mut OCamlRuntime, ...: OCamlRef<...>) -> BoxRoot<...>;` + ocaml! { + pub fn interpret(module: OCamlBytes, params: OCamlList) -> Result, String>; + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn multiple() { + let module = wat::parse_file("tests/add.wat").unwrap(); + let parameters = vec![Value::I32(42), Value::I32(1)]; + let results1 = interpret(&module, parameters.clone()).unwrap(); + let results2 = interpret(&module, parameters.clone()).unwrap(); + assert_eq!(results1, results2); + let results3 = interpret(&module, parameters).unwrap(); + assert_eq!(results2, results3); + } + + #[test] + fn oob() { + let module = wat::parse_file("tests/oob.wat").unwrap(); + let parameters = vec![]; + let results = interpret(&module, parameters); + assert_eq!( + results, + Err("Error(_, \"out of bounds memory access\")".to_string()) + ); + } +} diff --git a/crates/fuzzing/wasm-spec-interpreter/src/without_library.rs b/crates/fuzzing/wasm-spec-interpreter/src/without_library.rs new file mode 100644 index 0000000000..899a592196 --- /dev/null +++ b/crates/fuzzing/wasm-spec-interpreter/src/without_library.rs @@ -0,0 +1,17 @@ +//! Panic when interpreting WebAssembly modules; see the rationale for this in +//! `lib.rs`. +//! +//! ```should_panic +//! # use wasm_spec_interpreter::interpret; +//! let _ = interpret(&[], vec![]); +//! ``` + +use crate::Value; + +#[allow(dead_code)] +pub fn interpret(_module: &[u8], _parameters: Vec) -> Result, String> { + panic!( + "wasm-spec-interpreter was built without its Rust-to-OCaml shim \ + library; re-compile with the dependencies listed in its README.md." + ); +} diff --git a/crates/fuzzing/wasm-spec-interpreter/tests/add.wat b/crates/fuzzing/wasm-spec-interpreter/tests/add.wat new file mode 100644 index 0000000000..e664c99b4e --- /dev/null +++ b/crates/fuzzing/wasm-spec-interpreter/tests/add.wat @@ -0,0 +1,5 @@ +(module + (func (export "add") (param $a i32) (param $b i32) (result i32) + local.get $a + local.get $b + i32.add)) diff --git a/crates/fuzzing/wasm-spec-interpreter/tests/oob.wat b/crates/fuzzing/wasm-spec-interpreter/tests/oob.wat new file mode 100644 index 0000000000..8cdbc6e978 --- /dev/null +++ b/crates/fuzzing/wasm-spec-interpreter/tests/oob.wat @@ -0,0 +1,5 @@ +(module + (memory (;0;) 0 0) + (func (export "oob") + i32.const 42 + f32.load align=1))