Add a crate to interface with the WebAssembly spec interpreter

The WebAssembly spec interpreter is written in OCaml and the new crate
uses `ocaml-interop` along with a small OCaml wrapper to interpret Wasm
modules in-process. The build process for this crate is currently
Linux-specific: it requires several OCaml packages (e.g. `apt install -y
ocaml-nox ocamlbuild`) as well as `make`, `cp`, and `ar`.
This commit is contained in:
Andrew Brown
2021-07-28 13:12:47 -07:00
parent 2e95d4e7c6
commit a7f592a026
16 changed files with 442 additions and 0 deletions

4
.gitmodules vendored
View File

@@ -13,3 +13,7 @@
[submodule "crates/wasi-crypto/spec"]
path = crates/wasi-crypto/spec
url = https://github.com/WebAssembly/wasi-crypto.git
[submodule "crates/fuzzing/wasm-spec-interpreter/ocaml/spec"]
path = crates/fuzzing/wasm-spec-interpreter/ocaml/spec
url = https://github.com/WebAssembly/spec
shallow = true

51
Cargo.lock generated
View File

@@ -950,6 +950,12 @@ dependencies = [
"cipher",
]
[[package]]
name = "cty"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7313c0d620d0cb4dbd9d019e461a4beb501071ff46ec0ab933efb4daa76d73e3"
[[package]]
name = "curve25519-dalek"
version = "3.1.0"
@@ -1899,6 +1905,35 @@ dependencies = [
"memchr",
]
[[package]]
name = "ocaml-boxroot-sys"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5186393bfbee4ce2bc5bbb82beafb77e85c1d0a557e3cfc8c8a0d63d7845fed5"
dependencies = [
"cc",
]
[[package]]
name = "ocaml-interop"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "feed1425b18c848323dceeed5fc35a085b07b58783b040560e3a05dfe2427753"
dependencies = [
"ocaml-boxroot-sys",
"ocaml-sys",
"static_assertions",
]
[[package]]
name = "ocaml-sys"
version = "0.20.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7e7dee4179690e6af128fce2fab845ab45a833d04e117f5fdcafab1ba26ce518"
dependencies = [
"cty",
]
[[package]]
name = "once_cell"
version = "1.7.2"
@@ -2832,6 +2867,12 @@ version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
[[package]]
name = "static_assertions"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]]
name = "strsim"
version = "0.8.0"
@@ -3465,6 +3506,15 @@ dependencies = [
"wasm-encoder",
]
[[package]]
name = "wasm-spec-interpreter"
version = "0.1.0"
dependencies = [
"lazy_static",
"ocaml-interop",
"wat",
]
[[package]]
name = "wasmi"
version = "0.7.0"
@@ -3727,6 +3777,7 @@ dependencies = [
"rayon",
"wasm-encoder",
"wasm-smith",
"wasm-spec-interpreter",
"wasmi",
"wasmparser",
"wasmprinter",

View File

@@ -19,7 +19,13 @@ wasmtime = { path = "../wasmtime" }
wasmtime-wast = { path = "../wast" }
wasm-encoder = "0.6.0"
wasm-smith = "0.6.0"
wasm-spec-interpreter = { path = "./wasm-spec-interpreter" }
wasmi = "0.7.0"
[dev-dependencies]
wat = "1.0.37"
# We only build the library containing the OCaml spec interpreter if the OCaml
# toolchain is available--which is assumed here to be the case when fuzzing.
[target.'cfg(fuzzing)'.dependencies]
wasm-spec-interpreter = { path = "./wasm-spec-interpreter", features = ["build-libinterpret"] }

View File

@@ -0,0 +1,22 @@
[package]
authors = ["The Wasmtime Project Developers"]
description = "A Rust-to-OCaml wrapper for the WebAssembly specification interpreter"
name = "wasm-spec-interpreter"
version = "0.1.0"
publish = false
edition = "2018"
license = "Apache-2.0 WITH LLVM-exception"
# Until https://gitlab.com/ocaml-rust/ocaml-boxroot/-/issues/1 is resolved and
# this crate can use the `without-ocamlopt` feature to avoid build failures, it
# is better to only build the OCaml dependencies when fuzzing (see the
# `build-libinterpret` feature set by this crate's parent).
[dependencies]
ocaml-interop = { version = "0.8", optional = true }
lazy_static = { version = "1.4", optional = true }
[dev-dependencies]
wat = "1.0"
[features]
build-libinterpret = ["ocaml-interop", "lazy_static"]

View File

@@ -0,0 +1,39 @@
wasm-spec-interpreter
=====================
This project shows how to use `ocaml-interop` to call into the Wasm spec
interpreter. There are several steps to making this work:
- building the OCaml Wasm spec interpreter as a static library
- building a Rust-to-OCaml FFI bridge using `ocaml-interop` and a custom OCaml
wrapper
- linking both things into a Rust crate
### Dependencies
This crate only builds in an environment with:
- `make` (the Wasm spec interpreter uses a `Makefile`)
- `ocamlopt`, `ocamlbuild` (available with, e.g., `dnf install ocaml`)
- Linux tools (e.g. `ar`); currently it is easiest to build the static
libraries in a single environment but this could be fixed in the future (TODO)
Remember to retrieve the Wasm spec submodule:
```
git clone ... --recursive
```
### Build
```
RUSTFLAGS=--cfg=fuzzing cargo build
```
Use `FFI_LIB_DIR=path/to/lib/...` to specify a different location for the static
library (this is mainly for debugging). If the `--cfg=fuzzing` configuration is
not provided, this crate will build successfully but fail at runtime.
### Test
```
RUSTFLAGS=--cfg=fuzzing cargo test
```

View File

@@ -0,0 +1,54 @@
/// Build the OCaml code and statically link it into the Rust library; see the
/// [ocaml-interop
/// example](https://github.com/tezedge/ocaml-interop/blob/master/testing/rust-caller/build.rs)
/// for more details. After playing with this a bit, I discovered that the best
/// approach to avoid missing symbols was to imitate `dune`: I observed `rm -rf
/// _build && dune build ./ocaml/interpret.exe.o --display=verbose` and used
/// that as a pattern, now encoded in `ocaml/Makefile` for easier debugging.
use std::{env, process::Command};
const LIB_NAME: &'static str = "interpret";
const OCAML_DIR: &'static str = "ocaml";
fn main() {
if cfg!(feature = "build-libinterpret") {
build();
}
}
fn build() {
let out_dir = &env::var("OUT_DIR").unwrap();
// Re-run if changed.
println!("cargo:rerun-if-changed={}/{}.ml", OCAML_DIR, LIB_NAME);
println!("cargo:rerun-if-changed={}/Makefile", OCAML_DIR);
if let Some(other_dir) = env::var_os("FFI_LIB_DIR") {
// Link with a library provided in the `FFI_LIB_DIR`.
println!("cargo:rustc-link-search={}", other_dir.to_str().unwrap());
println!("cargo:rustc-link-lib=static={}", LIB_NAME);
} else {
// Build the library to link to.
build_lib(out_dir, OCAML_DIR);
println!("cargo:rustc-link-search={}", out_dir);
println!("cargo:rustc-link-lib=static={}", LIB_NAME);
}
// Enabling this feature alerts the compiler to use the `with_library`
// module.
println!("cargo:rustc-cfg=feature=\"has-libinterpret\"");
}
// Build the OCaml library into Cargo's `out` directory.
fn build_lib(out_dir: &str, ocaml_dir: &str) {
let status = Command::new("make")
.arg(format!("BUILD_DIR={}", out_dir))
.current_dir(ocaml_dir)
.status()
.expect("Failed to execute 'make' command to build OCaml library");
assert!(
status.success(),
"Failed to build the OCaml library using 'make'."
)
}

View File

@@ -0,0 +1 @@
_build

View File

@@ -0,0 +1,33 @@
# Build a library allowing FFI access to the Wasm spec interpreter.
OCAML_FLAGS := -g -keep-locs -runtime-variant _pic
# By default, we build in a sub-directory but we can override this with `make
# BUILD_DIR=...`.
BUILD_DIR := _build
# Currently the WebAssembly spec interpreter is buried in a Git submodule as is
# its build directory, `_build`. Cargo may not like that files are changing
# outside of `target` (TODO).
SPEC_DIR := spec/interpreter
SPEC_BUILD_DIR := $(SPEC_DIR)/_build
SPEC_LIB := $(SPEC_BUILD_DIR)/wasm.cmxa
# Build and package the static library, `libinterpret.a`.
$(BUILD_DIR)/libinterpret.a: $(BUILD_DIR)/interpret.lib.o
ar qs $@ $^
$(BUILD_DIR)/interpret.lib.o: $(SPEC_LIB) $(BUILD_DIR)/interpret.cmx
ocamlopt $(OCAML_FLAGS) -I $(SPEC_BUILD_DIR) -o $@ -output-complete-obj $^
$(BUILD_DIR)/interpret.cmx: interpret.ml $(SPEC_BUILD_DIR) $(BUILD_DIR)
ocamlopt $(OCAML_FLAGS) -I $(SPEC_BUILD_DIR) -o $@ -c -impl $<
$(BUILD_DIR):
mkdir -p $@
# We also need to be able to build the spec's `wasm.cmxa`.
$(SPEC_LIB):
make -C $(SPEC_DIR) libopt
clean:
rm -rf $(BUILD_DIR)
make -C $(SPEC_DIR) clean

View File

@@ -0,0 +1,7 @@
This directory contains the necessary parts for building a library with FFI
access to the Wasm spec interpreter. Its major parts:
- `spec`: the Wasm spec code as a Git submodule (you may need to retrieve it:
`git clone https://github.com/bytecodealliance/wasm-spec-mirror).
- `interpret.ml`: a shim layer for calling the Wasm spec code and exposing it
for FFI access
- `Makefile`: the steps for gluing these pieces together into a static library

View File

@@ -0,0 +1,66 @@
(* This module exposes an [interpret] function to Rust. It wraps several different calls from the
WebAssembly specification interpreter in a way that we can access across the FFI boundary. To
understand this better, see:
- the OCaml manual documentation re: calling OCaml from C, https://ocaml.org/manual/intfc.html#s%3Ac-advexample
- the [ocaml-interop] example, https://github.com/tezedge/ocaml-interop/blob/master/testing/rust-caller/ocaml/callable.ml
*)
(* Here we access the WebAssembly specification interpreter; this must be linked in. *)
open Wasm
(** Enumerate the types of values we pass across the FFI boundary. This must match `Value` in
`src/lib.rs` *)
type ffi_value =
| I32 of int32
| I64 of int64
| F32 of int32
| F64 of int64
(** Helper for converting the FFI values to their spec interpreter type. *)
let convert_to_wasm (v: ffi_value) : Values.value = match v with
| I32 n -> Values.Num (I32 n)
| I64 n -> Values.Num (I64 n)
| F32 n -> Values.Num (F32 (F32.of_bits n))
| F64 n -> Values.Num (F64 (F64.of_bits n))
(** Helper for converting the spec interpreter values to their FFI type. *)
let convert_from_wasm (v: Values.value) : ffi_value = match v with
| Values.Num (I32 n) -> I32 n
| Values.Num (I64 n) -> I64 n
| Values.Num (F32 n) -> F32 (F32.to_bits n)
| Values.Num (F64 n) -> F64 (F64.to_bits n)
| _ -> failwith "Unknown type"
(** Parse the given WebAssembly module binary into an Ast.module_. At some point in the future this
should also be able to parse the textual form (TODO). *)
let parse bytes =
(* Optionally, use Bytes.unsafe_to_string here to avoid the copy *)
let bytes_as_str = Bytes.to_string bytes in
Decode.decode "default" bytes_as_str
(** Return true if an export is a function. *)
let match_exported_func export = match export with
| (_, Instance.ExternFunc(func)) -> true
| _ -> false
(** Extract a function from its export or fail. *)
let extract_exported_func export = match export with
| (_, Instance.ExternFunc(func)) -> func
| _ -> failwith ""
(** Interpret the first exported function with the given parameters and return the result. *)
let interpret_exn module_bytes params =
let params' = List.map convert_to_wasm params in
let module_ = parse module_bytes in
let instance = Eval.init module_ [] in
let func = extract_exported_func (List.find match_exported_func instance.exports) in
let returns = Eval.invoke func params' in
let returns' = List.map convert_from_wasm returns in
returns' (* TODO eventually we should hash the memory state and return the hash *)
let interpret module_bytes params =
try Ok(interpret_exn module_bytes params) with
| _ as e -> Error(Printexc.to_string e)
let () =
Callback.register "interpret" interpret;

View File

@@ -0,0 +1,33 @@
//! This library provides a way to interpret Wasm functions in the official Wasm
//! specification interpreter, written in OCaml, from Rust.
//!
//! In order to not break Wasmtime's build, this library will always compile. It
//! does depend on certain tools (see `README.md`) that may or may not be
//! available in the environment:
//! - when the tools are available, we build and link to an OCaml static
//! library (see `with_library` module)
//! - when the tools are not available, this library will panic at runtime (see
//! `without_library` module).
/// Enumerate the kinds of Wasm values.
#[derive(Clone, Debug, PartialEq)]
pub enum Value {
I32(i32),
I64(i64),
F32(i32),
F64(i64),
}
#[cfg(feature = "has-libinterpret")]
mod with_library;
#[cfg(feature = "has-libinterpret")]
pub use with_library::*;
#[cfg(not(feature = "has-libinterpret"))]
mod without_library;
#[cfg(not(feature = "has-libinterpret"))]
pub use without_library::*;
// If the user is fuzzing`, we expect the OCaml library to have been built.
#[cfg(all(fuzzing, not(feature = "has-libinterpret")))]
compile_error!("The OCaml library was not built.");

View File

@@ -0,0 +1,98 @@
//! Interpret WebAssembly modules using the OCaml spec interpreter.
//! ```
//! # use wasm_spec_interpreter::{Value, interpret};
//! let module = wat::parse_file("tests/add.wat").unwrap();
//! let parameters = vec![Value::I32(42), Value::I32(1)];
//! let results = interpret(&module, parameters).unwrap();
//! assert_eq!(results, &[Value::I32(43)]);
//! ```
use crate::Value;
use lazy_static::lazy_static;
use ocaml_interop::{OCamlRuntime, ToOCaml};
use std::sync::Mutex;
lazy_static! {
static ref INTERPRET: Mutex<()> = Mutex::new(());
}
/// Interpret the first function in the passed WebAssembly module (in Wasm form,
/// currently, not WAT) with the given parameters.
pub fn interpret(module: &[u8], parameters: Vec<Value>) -> Result<Vec<Value>, String> {
// The OCaml runtime is not re-entrant
// (https://ocaml.org/manual/intfc.html#ss:parallel-execution-long-running-c-code).
// We need to make sure that only one Rust thread is executing at a time
// (using this lock) or we can observe `SIGSEGV` failures while running
// `cargo test`.
let _lock = INTERPRET.lock().unwrap();
// Here we use an unsafe approach to initializing the `OCamlRuntime` based
// on the discussion in https://github.com/tezedge/ocaml-interop/issues/35.
// This was the recommendation to resolve seeing errors like `boxroot is not
// setup` followed by a `SIGSEGV`; this is similar to the testing approach
// in
// https://github.com/tezedge/ocaml-interop/blob/master/testing/rust-caller/src/lib.rs
// and is only as safe as the OCaml code running underneath.
OCamlRuntime::init_persistent();
let ocaml_runtime = unsafe { OCamlRuntime::recover_handle() };
// Parse and execute, returning results converted to Rust.
let module = module.to_boxroot(ocaml_runtime);
let parameters = parameters.to_boxroot(ocaml_runtime);
let results = ocaml_bindings::interpret(ocaml_runtime, &module, &parameters);
results.to_rust(ocaml_runtime)
}
// Here we declare which functions we will use from the OCaml library. See
// https://docs.rs/ocaml-interop/0.8.4/ocaml_interop/index.html#example.
mod ocaml_bindings {
use super::*;
use ocaml_interop::{
impl_conv_ocaml_variant, ocaml, OCamlBytes, OCamlInt32, OCamlInt64, OCamlList,
};
// Using this macro converts the enum both ways: Rust to OCaml and OCaml to
// Rust. See
// https://docs.rs/ocaml-interop/0.8.4/ocaml_interop/macro.impl_conv_ocaml_variant.html.
impl_conv_ocaml_variant! {
Value {
Value::I32(i: OCamlInt32),
Value::I64(i: OCamlInt64),
Value::F32(i: OCamlInt32),
Value::F64(i: OCamlInt64),
}
}
// These functions must be exposed from OCaml with:
// `Callback.register "interpret" interpret`
//
// In Rust, this function becomes:
// `pub fn interpret(_: &mut OCamlRuntime, ...: OCamlRef<...>) -> BoxRoot<...>;`
ocaml! {
pub fn interpret(module: OCamlBytes, params: OCamlList<Value>) -> Result<OCamlList<Value>, String>;
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn multiple() {
let module = wat::parse_file("tests/add.wat").unwrap();
let parameters = vec![Value::I32(42), Value::I32(1)];
let results1 = interpret(&module, parameters.clone()).unwrap();
let results2 = interpret(&module, parameters.clone()).unwrap();
assert_eq!(results1, results2);
let results3 = interpret(&module, parameters).unwrap();
assert_eq!(results2, results3);
}
#[test]
fn oob() {
let module = wat::parse_file("tests/oob.wat").unwrap();
let parameters = vec![];
let results = interpret(&module, parameters);
assert_eq!(
results,
Err("Error(_, \"out of bounds memory access\")".to_string())
);
}
}

View File

@@ -0,0 +1,17 @@
//! Panic when interpreting WebAssembly modules; see the rationale for this in
//! `lib.rs`.
//!
//! ```should_panic
//! # use wasm_spec_interpreter::interpret;
//! let _ = interpret(&[], vec![]);
//! ```
use crate::Value;
#[allow(dead_code)]
pub fn interpret(_module: &[u8], _parameters: Vec<Value>) -> Result<Vec<Value>, String> {
panic!(
"wasm-spec-interpreter was built without its Rust-to-OCaml shim \
library; re-compile with the dependencies listed in its README.md."
);
}

View File

@@ -0,0 +1,5 @@
(module
(func (export "add") (param $a i32) (param $b i32) (result i32)
local.get $a
local.get $b
i32.add))

View File

@@ -0,0 +1,5 @@
(module
(memory (;0;) 0 0)
(func (export "oob")
i32.const 42
f32.load align=1))