Files
wasmtime/tests/all/wast.rs
Alex Crichton 8bb183f16e Implement the relaxed SIMD proposal (#5892)
* Initial support for the Relaxed SIMD proposal

This commit adds initial scaffolding and support for the Relaxed SIMD
proposal for WebAssembly. Codegen support is supported on the x64 and
AArch64 backends on this time.

The purpose of this commit is to get all the boilerplate out of the way
in terms of plumbing through a new feature, adding tests, etc. The tests
are copied from the upstream repository at this time while the
WebAssembly/testsuite repository hasn't been updated.

A summary of changes made in this commit are:

* Lowerings for all relaxed simd opcodes have been added, currently all
  exhibiting deterministic behavior. This means that few lowerings are
  optimal on the x86 backend, but on the AArch64 backend, for example,
  all lowerings should be optimal.

* Support is added to codegen to, eventually, conditionally generate
  different code based on input codegen flags. This is intended to
  enable codegen to more efficient instructions on x86 by default, for
  example, while still allowing embedders to force
  architecture-independent semantics and behavior. One good example of
  this is the `f32x4.relaxed_fmadd` instruction which when deterministic
  forces the `fma` instruction, but otherwise if the backend doesn't
  have support for `fma` then intermediate operations are performed
  instead.

* Lowerings of `iadd_pairwise` for `i16x8` and `i32x4` were added to the
  x86 backend as they're now exercised by the deterministic lowerings of
  relaxed simd instructions.

* Sample codegen tests for added for x86 and aarch64 for some relaxed
  simd instructions.

* Wasmtime embedder support for the relaxed-simd proposal and forcing
  determinism have been added to `Config` and the CLI.

* Support has been added to the `*.wast` runtime execution for the
  `(either ...)` matcher used in the relaxed-simd proposal.

* Tests for relaxed-simd are run both with a default `Engine` as well as
  a "force deterministic" `Engine` to test both configurations.

* All tests from the upstream repository were copied into Wasmtime.
  These tests should be deleted when WebAssembly/testsuite is updated.

* x64: Add x86-specific lowerings for relaxed simd

This commit builds on the prior commit and adds an array of `x86_*`
instructions to Cranelift which have semantics that match their
corresponding x86 equivalents. Translation for relaxed simd is then
additionally updated to conditionally generate different CLIF for
relaxed simd instructions depending on whether the target is x86 or not.
This means that for AArch64 no changes are made but for x86 most relaxed
instructions now lower to some x86-equivalent with slightly different
semantics than the "deterministic" lowering.

* Add libcall support for fma to Wasmtime

This will be required to implement the `f32x4.relaxed_madd` instruction
(and others) when an x86 host doesn't specify the `has_fma` feature.

* Ignore relaxed-simd tests on s390x and riscv64

* Enable relaxed-simd tests on s390x

* Update cranelift/codegen/meta/src/shared/instructions.rs

Co-authored-by: Andrew Brown <andrew.brown@intel.com>

* Add a FIXME from review

* Add notes about deterministic semantics

* Don't default `has_native_fma` to `true`

* Review comments and rebase fixes

---------

Co-authored-by: Andrew Brown <andrew.brown@intel.com>
2023-03-07 15:52:41 +00:00

191 lines
6.7 KiB
Rust

use anyhow::Context;
use bstr::ByteSlice;
use once_cell::sync::Lazy;
use std::path::Path;
use std::sync::{Condvar, Mutex};
use wasmtime::{
Config, Engine, InstanceAllocationStrategy, PoolingAllocationConfig, Store, Strategy,
};
use wasmtime_environ::WASM_PAGE_SIZE;
use wasmtime_wast::WastContext;
include!(concat!(env!("OUT_DIR"), "/wast_testsuite_tests.rs"));
// Each of the tests included from `wast_testsuite_tests` will call this
// function which actually executes the `wast` test suite given the `strategy`
// to compile it.
fn run_wast(wast: &str, strategy: Strategy, pooling: bool) -> anyhow::Result<()> {
drop(env_logger::try_init());
let wast_bytes = std::fs::read(wast).with_context(|| format!("failed to read `{}`", wast))?;
match strategy {
Strategy::Cranelift => {}
_ => unimplemented!(),
}
let wast = Path::new(wast);
let memory64 = feature_found(wast, "memory64");
let multi_memory = feature_found(wast, "multi-memory");
let threads = feature_found(wast, "threads");
let reference_types = !(threads && feature_found(wast, "proposals"));
let relaxed_simd = feature_found(wast, "relaxed-simd");
let use_shared_memory = feature_found_src(&wast_bytes, "shared_memory")
|| feature_found_src(&wast_bytes, "shared)");
if pooling && use_shared_memory {
eprintln!("skipping pooling test with shared memory");
return Ok(());
}
let mut cfg = Config::new();
cfg.wasm_multi_memory(multi_memory)
.wasm_threads(threads)
.wasm_memory64(memory64)
.wasm_reference_types(reference_types)
.wasm_relaxed_simd(relaxed_simd)
.cranelift_debug_verifier(true);
cfg.wasm_component_model(feature_found(wast, "component-model"));
if feature_found(wast, "canonicalize-nan") {
cfg.cranelift_nan_canonicalization(true);
}
let test_allocates_lots_of_memory = wast.ends_with("more-than-4gb.wast");
// By default we'll allocate huge chunks (6gb) of the address space for each
// linear memory. This is typically fine but when we emulate tests with QEMU
// it turns out that it causes memory usage to balloon massively. Leave a
// knob here so on CI we can cut down the memory usage of QEMU and avoid the
// OOM killer.
//
// Locally testing this out this drops QEMU's memory usage running this
// tests suite from 10GiB to 600MiB. Previously we saw that crossing the
// 10GiB threshold caused our processes to get OOM killed on CI.
if std::env::var("WASMTIME_TEST_NO_HOG_MEMORY").is_ok() {
// The pooling allocator hogs ~6TB of virtual address space for each
// store, so if we don't to hog memory then ignore pooling tests.
if pooling {
return Ok(());
}
// If the test allocates a lot of memory, that's considered "hogging"
// memory, so skip it.
if test_allocates_lots_of_memory {
return Ok(());
}
// Don't use 4gb address space reservations when not hogging memory, and
// also don't reserve lots of memory after dynamic memories for growth
// (makes growth slower).
if use_shared_memory {
cfg.static_memory_maximum_size(2 * WASM_PAGE_SIZE as u64);
} else {
cfg.static_memory_maximum_size(0);
}
cfg.dynamic_memory_reserved_for_growth(0);
}
let _pooling_lock = if pooling {
// Some memory64 tests take more than 4gb of resident memory to test,
// but we don't want to configure the pooling allocator to allow that
// (that's a ton of memory to reserve), so we skip those tests.
if test_allocates_lots_of_memory {
return Ok(());
}
// The limits here are crafted such that the wast tests should pass.
// However, these limits may become insufficient in the future as the wast tests change.
// If a wast test fails because of a limit being "exceeded" or if memory/table
// fails to grow, the values here will need to be adjusted.
let mut pool = PoolingAllocationConfig::default();
pool.instance_count(450)
.instance_memories(2)
.instance_tables(4)
.instance_memory_pages(805);
cfg.allocation_strategy(InstanceAllocationStrategy::Pooling(pool));
Some(lock_pooling())
} else {
None
};
let mut engines = vec![(Engine::new(&cfg)?, "default")];
// For tests that use relaxed-simd test both the default engine and the
// guaranteed-deterministic engine to ensure that both the 'native'
// semantics of the instructions plus the canonical semantics work.
if relaxed_simd {
engines.push((
Engine::new(cfg.relaxed_simd_deterministic(true))?,
"deterministic",
));
}
for (engine, desc) in engines {
let store = Store::new(&engine, ());
let mut wast_context = WastContext::new(store);
wast_context.register_spectest(use_shared_memory)?;
wast_context
.run_buffer(wast.to_str().unwrap(), &wast_bytes)
.with_context(|| format!("failed to run spec test with {desc} engine"))?;
}
Ok(())
}
fn feature_found(path: &Path, name: &str) -> bool {
path.iter().any(|part| match part.to_str() {
Some(s) => s.contains(name),
None => false,
})
}
fn feature_found_src(bytes: &[u8], name: &str) -> bool {
bytes.contains_str(name)
}
// The pooling tests make about 6TB of address space reservation which means
// that we shouldn't let too many of them run concurrently at once. On
// high-cpu-count systems (e.g. 80 threads) this leads to mmap failures because
// presumably too much of the address space has been reserved with our limits
// specified above. By keeping the number of active pooling-related tests to a
// specified maximum we can put a cap on the virtual address space reservations
// made.
fn lock_pooling() -> impl Drop {
const MAX_CONCURRENT_POOLING: u32 = 8;
static ACTIVE: Lazy<MyState> = Lazy::new(MyState::default);
#[derive(Default)]
struct MyState {
lock: Mutex<u32>,
waiters: Condvar,
}
impl MyState {
fn lock(&self) -> impl Drop + '_ {
let state = self.lock.lock().unwrap();
let mut state = self
.waiters
.wait_while(state, |cnt| *cnt >= MAX_CONCURRENT_POOLING)
.unwrap();
*state += 1;
LockGuard { state: self }
}
}
struct LockGuard<'a> {
state: &'a MyState,
}
impl Drop for LockGuard<'_> {
fn drop(&mut self) {
*self.state.lock.lock().unwrap() -= 1;
self.state.waiters.notify_one();
}
}
ACTIVE.lock()
}