Implement the relaxed SIMD proposal (#5892)

* Initial support for the Relaxed SIMD proposal

This commit adds initial scaffolding and support for the Relaxed SIMD
proposal for WebAssembly. Codegen support is supported on the x64 and
AArch64 backends on this time.

The purpose of this commit is to get all the boilerplate out of the way
in terms of plumbing through a new feature, adding tests, etc. The tests
are copied from the upstream repository at this time while the
WebAssembly/testsuite repository hasn't been updated.

A summary of changes made in this commit are:

* Lowerings for all relaxed simd opcodes have been added, currently all
  exhibiting deterministic behavior. This means that few lowerings are
  optimal on the x86 backend, but on the AArch64 backend, for example,
  all lowerings should be optimal.

* Support is added to codegen to, eventually, conditionally generate
  different code based on input codegen flags. This is intended to
  enable codegen to more efficient instructions on x86 by default, for
  example, while still allowing embedders to force
  architecture-independent semantics and behavior. One good example of
  this is the `f32x4.relaxed_fmadd` instruction which when deterministic
  forces the `fma` instruction, but otherwise if the backend doesn't
  have support for `fma` then intermediate operations are performed
  instead.

* Lowerings of `iadd_pairwise` for `i16x8` and `i32x4` were added to the
  x86 backend as they're now exercised by the deterministic lowerings of
  relaxed simd instructions.

* Sample codegen tests for added for x86 and aarch64 for some relaxed
  simd instructions.

* Wasmtime embedder support for the relaxed-simd proposal and forcing
  determinism have been added to `Config` and the CLI.

* Support has been added to the `*.wast` runtime execution for the
  `(either ...)` matcher used in the relaxed-simd proposal.

* Tests for relaxed-simd are run both with a default `Engine` as well as
  a "force deterministic" `Engine` to test both configurations.

* All tests from the upstream repository were copied into Wasmtime.
  These tests should be deleted when WebAssembly/testsuite is updated.

* x64: Add x86-specific lowerings for relaxed simd

This commit builds on the prior commit and adds an array of `x86_*`
instructions to Cranelift which have semantics that match their
corresponding x86 equivalents. Translation for relaxed simd is then
additionally updated to conditionally generate different CLIF for
relaxed simd instructions depending on whether the target is x86 or not.
This means that for AArch64 no changes are made but for x86 most relaxed
instructions now lower to some x86-equivalent with slightly different
semantics than the "deterministic" lowering.

* Add libcall support for fma to Wasmtime

This will be required to implement the `f32x4.relaxed_madd` instruction
(and others) when an x86 host doesn't specify the `has_fma` feature.

* Ignore relaxed-simd tests on s390x and riscv64

* Enable relaxed-simd tests on s390x

* Update cranelift/codegen/meta/src/shared/instructions.rs

Co-authored-by: Andrew Brown <andrew.brown@intel.com>

* Add a FIXME from review

* Add notes about deterministic semantics

* Don't default `has_native_fma` to `true`

* Review comments and rebase fixes

---------

Co-authored-by: Andrew Brown <andrew.brown@intel.com>
This commit is contained in:
Alex Crichton
2023-03-07 09:52:41 -06:00
committed by GitHub
parent e2dcb19099
commit 8bb183f16e
34 changed files with 1727 additions and 37 deletions

View File

@@ -35,6 +35,10 @@ pub const SUPPORTED_WASM_FEATURES: &[(&str, &str)] = &[
("multi-value", "enables support for multi-value functions"),
("reference-types", "enables support for reference types"),
("simd", "enables support for proposed SIMD instructions"),
(
"relaxed-simd",
"enables support for the relaxed simd proposal",
),
("threads", "enables support for WebAssembly threads"),
("memory64", "enables support for 64-bit memories"),
#[cfg(feature = "component-model")]
@@ -235,6 +239,17 @@ pub struct CommonOptions {
/// stack overflow is reported.
#[clap(long)]
pub max_wasm_stack: Option<usize>,
/// Whether or not to force deterministic and host-independent behavior of
/// the relaxed-simd instructions.
///
/// By default these instructions may have architecture-specific behavior as
/// allowed by the specification, but this can be used to force the behavior
/// of these instructions to match the deterministic behavior classified in
/// the specification. Note that enabling this option may come at a
/// performance cost.
#[clap(long)]
pub relaxed_simd_deterministic: bool,
}
impl CommonOptions {
@@ -329,12 +344,15 @@ impl CommonOptions {
config.max_wasm_stack(max);
}
config.relaxed_simd_deterministic(self.relaxed_simd_deterministic);
Ok(config)
}
pub fn enable_wasm_features(&self, config: &mut Config) {
let WasmFeatures {
simd,
relaxed_simd,
bulk_memory,
reference_types,
multi_value,
@@ -348,6 +366,9 @@ impl CommonOptions {
if let Some(enable) = simd {
config.wasm_simd(enable);
}
if let Some(enable) = relaxed_simd {
config.wasm_relaxed_simd(enable);
}
if let Some(enable) = bulk_memory {
config.wasm_bulk_memory(enable);
}
@@ -400,6 +421,7 @@ pub struct WasmFeatures {
pub multi_value: Option<bool>,
pub bulk_memory: Option<bool>,
pub simd: Option<bool>,
pub relaxed_simd: Option<bool>,
pub threads: Option<bool>,
pub multi_memory: Option<bool>,
pub memory64: Option<bool>,
@@ -450,6 +472,7 @@ fn parse_wasm_features(features: &str) -> Result<WasmFeatures> {
multi_value: all.or(values["multi-value"]),
bulk_memory: all.or(values["bulk-memory"]),
simd: all.or(values["simd"]),
relaxed_simd: all.or(values["relaxed-simd"]),
threads: all.or(values["threads"]),
multi_memory: all.or(values["multi-memory"]),
memory64: all.or(values["memory64"]),
@@ -560,6 +583,7 @@ mod test {
multi_value,
bulk_memory,
simd,
relaxed_simd,
threads,
multi_memory,
memory64,
@@ -572,6 +596,7 @@ mod test {
assert_eq!(threads, Some(true));
assert_eq!(multi_memory, Some(true));
assert_eq!(memory64, Some(true));
assert_eq!(relaxed_simd, Some(true));
Ok(())
}
@@ -585,6 +610,7 @@ mod test {
multi_value,
bulk_memory,
simd,
relaxed_simd,
threads,
multi_memory,
memory64,
@@ -597,6 +623,7 @@ mod test {
assert_eq!(threads, Some(false));
assert_eq!(multi_memory, Some(false));
assert_eq!(memory64, Some(false));
assert_eq!(relaxed_simd, Some(false));
Ok(())
}
@@ -613,6 +640,7 @@ mod test {
multi_value,
bulk_memory,
simd,
relaxed_simd,
threads,
multi_memory,
memory64,
@@ -625,6 +653,7 @@ mod test {
assert_eq!(threads, None);
assert_eq!(multi_memory, Some(true));
assert_eq!(memory64, Some(true));
assert_eq!(relaxed_simd, None);
Ok(())
}
@@ -662,6 +691,7 @@ mod test {
feature_test!(test_multi_value_feature, multi_value, "multi-value");
feature_test!(test_bulk_memory_feature, bulk_memory, "bulk-memory");
feature_test!(test_simd_feature, simd, "simd");
feature_test!(test_relaxed_simd_feature, relaxed_simd, "relaxed-simd");
feature_test!(test_threads_feature, threads, "threads");
feature_test!(test_multi_memory_feature, multi_memory, "multi-memory");
feature_test!(test_memory64_feature, memory64, "memory64");

View File

@@ -2153,4 +2153,16 @@ impl<'module_environment> cranelift_wasm::FuncEnvironment for FuncEnvironment<'m
fn unsigned_add_overflow_condition(&self) -> ir::condcodes::IntCC {
self.isa.unsigned_add_overflow_condition()
}
fn relaxed_simd_deterministic(&self) -> bool {
self.tunables.relaxed_simd_deterministic
}
fn has_native_fma(&self) -> bool {
self.isa.has_native_fma()
}
fn is_x86(&self) -> bool {
self.isa.triple().architecture == target_lexicon::Architecture::X86_64
}
}

View File

@@ -545,6 +545,8 @@ fn libcall_name(call: LibCall) -> &'static str {
LibCall::CeilF64 => LC::CeilF64,
LibCall::TruncF32 => LC::TruncF32,
LibCall::TruncF64 => LC::TruncF64,
LibCall::FmaF32 => LC::FmaF32,
LibCall::FmaF64 => LC::FmaF64,
_ => panic!("unknown libcall to give a name to: {call:?}"),
};
other.symbol()

View File

@@ -166,4 +166,6 @@ libcalls! {
CeilF64 = "libcall_ceilf64"
TruncF32 = "libcall_truncf32"
TruncF64 = "libcall_truncf64"
FmaF32 = "libcall_fmaf32"
FmaF64 = "libcall_fmaf64"
}

View File

@@ -45,6 +45,10 @@ pub struct Tunables {
/// Flag for the component module whether adapter modules have debug
/// assertions baked into them.
pub debug_adapter_modules: bool,
/// Whether or not lowerings for relaxed simd instructions are forced to
/// be deterministic.
pub relaxed_simd_deterministic: bool,
}
impl Default for Tunables {
@@ -91,6 +95,7 @@ impl Default for Tunables {
guard_before_linear_memory: true,
generate_address_map: true,
debug_adapter_modules: false,
relaxed_simd_deterministic: false,
}
}
}

View File

@@ -296,6 +296,8 @@ impl CodeMemory {
obj::LibCall::CeilF64 => libcalls::relocs::ceilf64 as usize,
obj::LibCall::TruncF32 => libcalls::relocs::truncf32 as usize,
obj::LibCall::TruncF64 => libcalls::relocs::truncf64 as usize,
obj::LibCall::FmaF32 => libcalls::relocs::fmaf32 as usize,
obj::LibCall::FmaF64 => libcalls::relocs::fmaf64 as usize,
};
*self.mmap.as_mut_ptr().add(offset).cast::<usize>() = libcall;
}

View File

@@ -584,4 +584,12 @@ pub mod relocs {
(x.abs() + TOINT_64 - TOINT_64).copysign(x)
}
}
pub extern "C" fn fmaf32(a: f32, b: f32, c: f32) -> f32 {
a.mul_add(b, c)
}
pub extern "C" fn fmaf64(a: f64, b: f64, c: f64) -> f64 {
a.mul_add(b, c)
}
}

View File

@@ -682,6 +682,56 @@ impl Config {
self
}
/// Configures whether the WebAssembly Relaxed SIMD proposal will be
/// enabled for compilation.
///
/// The [WebAssembly Relaxed SIMD proposal][proposal] is not, at the time of
/// this writing, at stage 4. The relaxed SIMD proposal adds new
/// instructions to WebAssembly which, for some specific inputs, are allowed
/// to produce different results on different hosts. More-or-less this
/// proposal enables exposing platform-specific semantics of SIMD
/// instructions in a controlled fashion to a WebAssembly program. From an
/// embedder's perspective this means that WebAssembly programs may execute
/// differently depending on whether the host is x86_64 or AArch64, for
/// example.
///
/// By default Wasmtime lowers relaxed SIMD instructions to the fastest
/// lowering for the platform it's running on. This means that, by default,
/// some relaxed SIMD instructions may have different results for the same
/// inputs across x86_64 and AArch64. This behavior can be disabled through
/// the [`Config::relaxed_simd_deterministic`] option which will force
/// deterministic behavior across all platforms, as classified by the
/// specification, at the cost of performance.
///
/// This is `false` by default.
///
/// [proposal]: https://github.com/webassembly/relaxed-simd
pub fn wasm_relaxed_simd(&mut self, enable: bool) -> &mut Self {
self.features.relaxed_simd = enable;
self
}
/// This option can be used to control the behavior of the [relaxed SIMD
/// proposal's][proposal] instructions.
///
/// The relaxed SIMD proposal introduces instructions that are allowed to
/// have different behavior on different architectures, primarily to afford
/// an efficient implementation on all architectures. This means, however,
/// that the same module may execute differently on one host than another,
/// which typically is not otherwise the case. This option is provided to
/// force Wasmtime to generate deterministic code for all relaxed simd
/// instructions, at the cost of performance, for all architectures. When
/// this option is enabled then the deterministic behavior of all
/// instructions in the relaxed SIMD proposal is selected.
///
/// This is `false` by default.
///
/// [proposal]: https://github.com/webassembly/relaxed-simd
pub fn relaxed_simd_deterministic(&mut self, enable: bool) -> &mut Self {
self.tunables.relaxed_simd_deterministic = enable;
self
}
/// Configures whether the [WebAssembly bulk memory operations
/// proposal][proposal] will be enabled for compilation.
///
@@ -1560,6 +1610,10 @@ impl Config {
}
}
if self.features.relaxed_simd && !self.features.simd {
bail!("cannot disable the simd proposal but enable the relaxed simd proposal");
}
// Apply compiler settings and flags
for (k, v) in self.compiler_config.settings.iter() {
compiler.set(k, v)?;
@@ -1608,6 +1662,7 @@ impl fmt::Debug for Config {
.field("wasm_reference_types", &self.features.reference_types)
.field("wasm_bulk_memory", &self.features.bulk_memory)
.field("wasm_simd", &self.features.simd)
.field("wasm_relaxed_simd", &self.features.relaxed_simd)
.field("wasm_multi_value", &self.features.multi_value)
.field(
"static_memory_maximum_size",

View File

@@ -309,6 +309,7 @@ impl Metadata {
epoch_interruption,
static_memory_bound_is_maximum,
guard_before_linear_memory,
relaxed_simd_deterministic,
// This doesn't affect compilation, it's just a runtime setting.
dynamic_memory_growth_reserve: _,
@@ -364,6 +365,11 @@ impl Metadata {
other.guard_before_linear_memory,
"guard before linear memory",
)?;
Self::check_bool(
relaxed_simd_deterministic,
other.relaxed_simd_deterministic,
"relaxed simd deterministic semantics",
)?;
Ok(())
}

View File

@@ -39,6 +39,14 @@ fn extract_lane_as_i64(bytes: u128, lane: usize) -> i64 {
pub fn match_val(actual: &Val, expected: &WastRetCore) -> Result<()> {
match (actual, expected) {
(_, WastRetCore::Either(expected)) => {
for expected in expected {
if match_val(actual, expected).is_ok() {
return Ok(());
}
}
match_val(actual, &expected[0])
}
(Val::I32(a), WastRetCore::I32(b)) => match_int(a, b),
(Val::I64(a), WastRetCore::I64(b)) => match_int(a, b),
// Note that these float comparisons are comparing bits, not float