Minor instantiation benchmark updates (#3790)

This commit has a few minor updates and some improvements to the instantiation benchmark harness: * A `once_cell::unsync::Lazy` type is now used to guard creation of modules/engines/etc. This enables running singular benchmarks to be much faster since the benchmark no longer compiles all other benchmarks that are filtered out. Unfortunately I couldn't find a way in criterion to test whether a `BenchmarkId` is filtered out or not so we rely on the runtime laziness to initialize on the first run for benchmarks that do so. * All files located in `benches/instantiation` are now loaded for benchmarking instead of a hardcoded list. This makes it a bit easier to throw files into the directory and have them benchmarked instead of having to recompile when working with new files. * Finally a module deserialization benchmark was added to measure the time it takes to deserialize a precompiled module from disk (inspired by discussion on #3787) While I was at it I also upped some limits to be able to instantiate cfallin's `spidermonkey.wasm`.
2022-02-10 15:40:30 -06:00
parent 520a7f26d7
commit 1cb08d4e67
3 changed files with 120 additions and 97 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3496,6 +3496,7 @@ dependencies = [
 "memchr",
 "more-asserts",
 "num_cpus",
+ "once_cell",
 "pretty_env_logger",
 "rayon",
 "rustix",
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -62,6 +62,7 @@ winapi = { version = "0.3.9", features = ['memoryapi'] }
 memchr = "2.4"
 async-trait = "0.1"
 wat = "1.0.41"
+once_cell = "1.9.0"

 [build-dependencies]
 anyhow = "1.0.19"
--- a/benches/instantiation.rs
+++ b/benches/instantiation.rs
@@ -1,6 +1,7 @@
 use anyhow::Result;
 use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
-use std::path::{Path, PathBuf};
+use once_cell::unsync::Lazy;
+use std::path::Path;
 use std::process::Command;
 use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering::SeqCst};
 use std::sync::Arc;
@@ -16,7 +17,6 @@ fn store(engine: &Engine) -> Store<WasiCtx> {
 fn instantiate(pre: &InstancePre<WasiCtx>, engine: &Engine) -> Result<()> {
    let mut store = store(engine);
    let _instance = pre.instantiate(&mut store)?;
-
    Ok(())
 }

@@ -34,27 +34,32 @@ fn bench_sequential(c: &mut Criterion, path: &Path) {
    let mut group = c.benchmark_group("sequential");

    for strategy in strategies() {
-        let mut config = Config::default();
-        config.allocation_strategy(strategy.clone());
-
-        let engine = Engine::new(&config).expect("failed to create engine");
-        let module = Module::from_file(&engine, path)
-            .unwrap_or_else(|e| panic!("failed to load benchmark `{}`: {:?}", path.display(), e));
-        let mut linker = Linker::new(&engine);
-        wasmtime_wasi::add_to_linker(&mut linker, |cx| cx).unwrap();
-        let pre = linker
-            .instantiate_pre(&mut store(&engine), &module)
-            .expect("failed to pre-instantiate");
-
-        group.bench_function(
-            BenchmarkId::new(
-                benchmark_name(&strategy),
-                path.file_name().unwrap().to_str().unwrap(),
-            ),
-            |b| {
-                b.iter(|| instantiate(&pre, &engine).expect("failed to instantiate module"));
-            },
+        let id = BenchmarkId::new(
+            benchmark_name(&strategy),
+            path.file_name().unwrap().to_str().unwrap(),
        );
+        let state = Lazy::new(|| {
+            let mut config = Config::default();
+            config.allocation_strategy(strategy.clone());
+
+            let engine = Engine::new(&config).expect("failed to create engine");
+            let module = Module::from_file(&engine, path).unwrap_or_else(|e| {
+                panic!("failed to load benchmark `{}`: {:?}", path.display(), e)
+            });
+            let mut linker = Linker::new(&engine);
+            wasmtime_wasi::add_to_linker(&mut linker, |cx| cx).unwrap();
+            let pre = linker
+                .instantiate_pre(&mut store(&engine), &module)
+                .expect("failed to pre-instantiate");
+            (engine, pre)
+        });
+
+        group.bench_function(id, |b| {
+            let (engine, pre) = &*state;
+            b.iter(|| {
+                instantiate(&pre, &engine).expect("failed to instantiate module");
+            });
+        });
    }

    group.finish();
@@ -64,79 +69,101 @@ fn bench_parallel(c: &mut Criterion, path: &Path) {
    let mut group = c.benchmark_group("parallel");

    for strategy in strategies() {
-        let mut config = Config::default();
-        config.allocation_strategy(strategy.clone());
+        let state = Lazy::new(|| {
+            let mut config = Config::default();
+            config.allocation_strategy(strategy.clone());

-        let engine = Engine::new(&config).expect("failed to create engine");
-        let module = Module::from_file(&engine, path).expect("failed to load WASI example module");
-        let mut linker = Linker::new(&engine);
-        wasmtime_wasi::add_to_linker(&mut linker, |cx| cx).unwrap();
-        let pre = Arc::new(
-            linker
-                .instantiate_pre(&mut store(&engine), &module)
-                .expect("failed to pre-instantiate"),
-        );
+            let engine = Engine::new(&config).expect("failed to create engine");
+            let module =
+                Module::from_file(&engine, path).expect("failed to load WASI example module");
+            let mut linker = Linker::new(&engine);
+            wasmtime_wasi::add_to_linker(&mut linker, |cx| cx).unwrap();
+            let pre = Arc::new(
+                linker
+                    .instantiate_pre(&mut store(&engine), &module)
+                    .expect("failed to pre-instantiate"),
+            );
+            (engine, pre)
+        });

        for threads in 1..=num_cpus::get_physical() {
-            group.bench_function(
-                BenchmarkId::new(
-                    benchmark_name(&strategy),
-                    format!(
-                        "{}: with {} background thread{}",
-                        path.file_name().unwrap().to_str().unwrap(),
-                        threads,
-                        if threads == 1 { "" } else { "s" }
-                    ),
-                ),
-                |b| {
-                    // Spin up N-1 threads doing background instantiations to
-                    // simulate concurrent instantiations.
-                    let done = Arc::new(AtomicBool::new(false));
-                    let count = Arc::new(AtomicUsize::new(0));
-                    let workers = (0..threads - 1)
-                        .map(|_| {
-                            let pre = pre.clone();
-                            let done = done.clone();
-                            let engine = engine.clone();
-                            let count = count.clone();
-                            thread::spawn(move || {
-                                count.fetch_add(1, SeqCst);
-                                while !done.load(SeqCst) {
-                                    instantiate(&pre, &engine).unwrap();
-                                }
-                            })
-                        })
-                        .collect::<Vec<_>>();
-
-                    // Wait for our workers to all get started and have
-                    // instantiated their first module, at which point they'll
-                    // all be spinning.
-                    while count.load(SeqCst) != threads - 1 {
-                        thread::yield_now();
-                    }
-
-                    // Now that our background work is configured we can
-                    // benchmark the amount of time it takes to instantiate this
-                    // module.
-                    b.iter(|| {
-                        instantiate(&pre, &engine).expect("failed to instantiate module");
-                    });
-
-                    // Shut down this benchmark iteration by signalling to
-                    // worker threads they should exit and then wait for them to
-                    // have reached the exit point.
-                    done.store(true, SeqCst);
-                    for t in workers {
-                        t.join().unwrap();
-                    }
-                },
+            let name = format!(
+                "{}: with {} thread{}",
+                path.file_name().unwrap().to_str().unwrap(),
+                threads,
+                if threads == 1 { "" } else { "s" }
            );
+            let id = BenchmarkId::new(benchmark_name(&strategy), name);
+            group.bench_function(id, |b| {
+                let (engine, pre) = &*state;
+                // Spin up N-1 threads doing background instantiations to
+                // simulate concurrent instantiations.
+                let done = Arc::new(AtomicBool::new(false));
+                let count = Arc::new(AtomicUsize::new(0));
+                let workers = (0..threads - 1)
+                    .map(|_| {
+                        let pre = pre.clone();
+                        let done = done.clone();
+                        let engine = engine.clone();
+                        let count = count.clone();
+                        thread::spawn(move || {
+                            count.fetch_add(1, SeqCst);
+                            while !done.load(SeqCst) {
+                                instantiate(&pre, &engine).unwrap();
+                            }
+                        })
+                    })
+                    .collect::<Vec<_>>();
+
+                // Wait for our workers to all get started and have
+                // instantiated their first module, at which point they'll
+                // all be spinning.
+                while count.load(SeqCst) != threads - 1 {
+                    thread::yield_now();
+                }
+
+                // Now that our background work is configured we can
+                // benchmark the amount of time it takes to instantiate this
+                // module.
+                b.iter(|| {
+                    instantiate(&pre, &engine).expect("failed to instantiate module");
+                });
+
+                // Shut down this benchmark iteration by signalling to
+                // worker threads they should exit and then wait for them to
+                // have reached the exit point.
+                done.store(true, SeqCst);
+                for t in workers {
+                    t.join().unwrap();
+                }
+            });
        }
    }

    group.finish();
 }

+fn bench_deserialize_module(c: &mut Criterion, path: &Path) {
+    let mut group = c.benchmark_group("deserialize");
+
+    let name = path.file_name().unwrap().to_str().unwrap();
+    let tmpfile = tempfile::NamedTempFile::new().unwrap();
+    let state = Lazy::new(|| {
+        let engine = Engine::default();
+        let module = Module::from_file(&engine, path).expect("failed to load WASI example module");
+        std::fs::write(tmpfile.path(), module.serialize().unwrap()).unwrap();
+        (engine, tmpfile.path())
+    });
+    group.bench_function(BenchmarkId::new("deserialize", name), |b| {
+        let (engine, path) = &*state;
+        b.iter(|| unsafe {
+            Module::deserialize_file(&engine, path).unwrap();
+        });
+    });
+
+    group.finish();
+}
+
 fn build_wasi_example() {
    println!("Building WASI example module...");
    if !Command::new("cargo")
@@ -166,19 +193,12 @@ fn build_wasi_example() {

 fn bench_instantiation(c: &mut Criterion) {
    build_wasi_example();
-    let modules = &[
-        "empty.wat",
-        "small_memory.wat",
-        "data_segments.wat",
-        "wasi.wasm",
-    ];
-    for module in modules {
-        let mut path = PathBuf::new();
-        path.push("benches");
-        path.push("instantiation");
-        path.push(module);
+
+    for file in std::fs::read_dir("benches/instantiation").unwrap() {
+        let path = file.unwrap().path();
        bench_sequential(c, &path);
        bench_parallel(c, &path);
+        bench_deserialize_module(c, &path);
    }
 }

@@ -190,8 +210,9 @@ fn strategies() -> impl Iterator<Item = InstanceAllocationStrategy> {
        InstanceAllocationStrategy::Pooling {
            strategy: Default::default(),
            module_limits: ModuleLimits {
-                functions: 20_000,
+                functions: 40_000,
                memory_pages: 1_000,
+                types: 200,
                ..ModuleLimits::default()
            },
            instance_limits: InstanceLimits::default(),