diff --git a/Cargo.lock b/Cargo.lock index 7709f83295..2baa72a57a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -590,6 +590,7 @@ dependencies = [ "itertools 0.9.0", "log", "serde", + "smallvec", "target-lexicon", "thiserror", "wasmparser 0.63.0", diff --git a/cranelift/wasm/Cargo.toml b/cranelift/wasm/Cargo.toml index bae037044e..d4725d4286 100644 --- a/cranelift/wasm/Cargo.toml +++ b/cranelift/wasm/Cargo.toml @@ -20,6 +20,7 @@ hashbrown = { version = "0.7", optional = true } itertools = "0.9.0" log = { version = "0.4.6", default-features = false } serde = { version = "1.0.94", features = ["derive"], optional = true } +smallvec = "1.0.0" thiserror = "1.0.4" [dev-dependencies] diff --git a/cranelift/wasm/src/code_translator.rs b/cranelift/wasm/src/code_translator.rs index 7c827802ba..9faf8c0767 100644 --- a/cranelift/wasm/src/code_translator.rs +++ b/cranelift/wasm/src/code_translator.rs @@ -22,6 +22,55 @@ //! //! That is why `translate_function_body` takes an object having the `WasmRuntime` trait as //! argument. +//! +//! There is extra complexity associated with translation of 128-bit SIMD instructions. +//! Wasm only considers there to be a single 128-bit vector type. But CLIF's type system +//! distinguishes different lane configurations, so considers 8X16, 16X8, 32X4 and 64X2 to be +//! different types. The result is that, in wasm, it's perfectly OK to take the output of (eg) +//! an `add.16x8` and use that as an operand of a `sub.32x4`, without using any cast. But when +//! translated into CLIF, that will cause a verifier error due to the apparent type mismatch. +//! +//! This file works around that problem by liberally inserting `bitcast` instructions in many +//! places -- mostly, before the use of vector values, either as arguments to CLIF instructions +//! or as block actual parameters. These are no-op casts which nevertheless have different +//! input and output types, and are used (mostly) to "convert" 16X8, 32X4 and 64X2-typed vectors +//! to the "canonical" type, 8X16. Hence the functions `optionally_bitcast_vector`, +//! `bitcast_arguments`, `pop*_with_bitcast`, `canonicalise_then_jump`, +//! `canonicalise_then_br{z,nz}`, `is_non_canonical_v128` and `canonicalise_v128_values`. +//! Note that the `bitcast*` functions are occasionally used to convert to some type other than +//! 8X16, but the `canonicalise*` functions always convert to type 8X16. +//! +//! Be careful when adding support for new vector instructions. And when adding new jumps, even +//! if they are apparently don't have any connection to vectors. Never generate any kind of +//! (inter-block) jump directly. Instead use `canonicalise_then_jump` and +//! `canonicalise_then_br{z,nz}`. +//! +//! The use of bitcasts is ugly and inefficient, but currently unavoidable: +//! +//! * they make the logic in this file fragile: miss out a bitcast for any reason, and there is +//! the risk of the system failing in the verifier. At least for debug builds. +//! +//! * in the new backends, they potentially interfere with pattern matching on CLIF -- the +//! patterns need to take into account the presence of bitcast nodes. +//! +//! * in the new backends, they get translated into machine-level vector-register-copy +//! instructions, none of which are actually necessary. We then depend on the register +//! allocator to coalesce them all out. +//! +//! * they increase the total number of CLIF nodes that have to be processed, hence slowing down +//! the compilation pipeline. Also, the extra coalescing work generates a slowdown. +//! +//! A better solution which would avoid all four problems would be to remove the 8X16, 16X8, +//! 32X4 and 64X2 types from CLIF and instead have a single V128 type. +//! +//! For further background see also: +//! https://github.com/bytecodealliance/wasmtime/issues/1147 +//! ("Too many raw_bitcasts in SIMD code") +//! https://github.com/bytecodealliance/cranelift/pull/1251 +//! ("Add X128 type to represent WebAssembly's V128 type") +//! https://github.com/bytecodealliance/cranelift/pull/1236 +//! ("Relax verification to allow I8X16 to act as a default vector type") + use super::{hash_map, HashMap}; use crate::environ::{FuncEnvironment, GlobalVariable, ReturnMode, WasmResult}; use crate::state::{ControlStackFrame, ElseData, FuncTranslationState}; @@ -40,6 +89,7 @@ use cranelift_codegen::ir::{ }; use cranelift_codegen::packed_option::ReservedValue; use cranelift_frontend::{FunctionBuilder, Variable}; +use smallvec::SmallVec; use std::cmp; use std::convert::TryFrom; use std::vec::Vec; @@ -188,7 +238,7 @@ pub fn translate_operator( let (params, results) = blocktype_params_results(validator, *ty)?; let loop_body = block_with_params(builder, params.clone(), environ)?; let next = block_with_params(builder, results.clone(), environ)?; - builder.ins().jump(loop_body, state.peekn(params.len())); + canonicalise_then_jump(builder, loop_body, state.peekn(params.len())); state.push_loop(loop_body, next, params.len(), results.len()); // Pop the initial `Block` actuals and replace them with the `Block`'s @@ -213,24 +263,21 @@ pub fn translate_operator( // up discovering an `else`, then we will allocate a block for it // and go back and patch the jump. let destination = block_with_params(builder, results.clone(), environ)?; - let branch_inst = builder - .ins() - .brz(val, destination, state.peekn(params.len())); + let branch_inst = + canonicalise_then_brz(builder, val, destination, state.peekn(params.len())); (destination, ElseData::NoElse { branch_inst }) } else { // The `if` type signature is not valid without an `else` block, // so we eagerly allocate the `else` block here. let destination = block_with_params(builder, results.clone(), environ)?; let else_block = block_with_params(builder, params.clone(), environ)?; - builder - .ins() - .brz(val, else_block, state.peekn(params.len())); + canonicalise_then_brz(builder, val, else_block, state.peekn(params.len())); builder.seal_block(else_block); (destination, ElseData::WithElse { else_block }) }; let next_block = builder.create_block(); - builder.ins().jump(next_block, &[]); + canonicalise_then_jump(builder, next_block, &[]); builder.seal_block(next_block); // Only predecessor is the current block. builder.switch_to_block(next_block); @@ -272,7 +319,11 @@ pub fn translate_operator( debug_assert_eq!(params.len(), num_return_values); let else_block = block_with_params(builder, params.clone(), environ)?; - builder.ins().jump(destination, state.peekn(params.len())); + canonicalise_then_jump( + builder, + destination, + state.peekn(params.len()), + ); state.popn(params.len()); builder.change_jump_destination(branch_inst, else_block); @@ -280,9 +331,11 @@ pub fn translate_operator( else_block } ElseData::WithElse { else_block } => { - builder - .ins() - .jump(destination, state.peekn(num_return_values)); + canonicalise_then_jump( + builder, + destination, + state.peekn(num_return_values), + ); state.popn(num_return_values); else_block } @@ -314,9 +367,7 @@ pub fn translate_operator( if !builder.is_unreachable() || !builder.is_pristine() { let return_count = frame.num_return_values(); let return_args = state.peekn_mut(return_count); - let next_block_types = builder.func.dfg.block_param_types(next_block); - bitcast_arguments(return_args, &next_block_types, builder); - builder.ins().jump(frame.following_code(), return_args); + canonicalise_then_jump(builder, frame.following_code(), return_args); // You might expect that if we just finished an `if` block that // didn't have a corresponding `else` block, then we would clean // up our duplicate set of parameters that we pushed earlier @@ -372,17 +423,8 @@ pub fn translate_operator( }; (return_count, frame.br_destination()) }; - - // Bitcast any vector arguments to their default type, I8X16, before jumping. let destination_args = state.peekn_mut(return_count); - let destination_types = builder.func.dfg.block_param_types(br_destination); - bitcast_arguments( - destination_args, - &destination_types[..return_count], - builder, - ); - - builder.ins().jump(br_destination, destination_args); + canonicalise_then_jump(builder, br_destination, destination_args); state.popn(return_count); state.reachable = false; } @@ -462,17 +504,8 @@ pub fn translate_operator( frame.set_branched_to_exit(); frame.br_destination() }; - - // Bitcast any vector arguments to their default type, I8X16, before jumping. let destination_args = state.peekn_mut(return_count); - let destination_types = builder.func.dfg.block_param_types(real_dest_block); - bitcast_arguments( - destination_args, - &destination_types[..return_count], - builder, - ); - - builder.ins().jump(real_dest_block, destination_args); + canonicalise_then_jump(builder, real_dest_block, destination_args); } state.popn(return_count); } @@ -494,7 +527,7 @@ pub fn translate_operator( match environ.return_mode() { ReturnMode::NormalReturns => builder.ins().return_(return_args), ReturnMode::FallthroughReturn => { - builder.ins().jump(br_destination, return_args) + canonicalise_then_jump(builder, br_destination, return_args) } }; } @@ -1361,7 +1394,8 @@ pub fn translate_operator( let data = value.bytes().to_vec().into(); let handle = builder.func.dfg.constants.insert(data); let value = builder.ins().vconst(I8X16, handle); - // the v128.const is typed in CLIF as a I8x16 but raw_bitcast to a different type before use + // the v128.const is typed in CLIF as a I8x16 but raw_bitcast to a different type + // before use state.push1(value) } Operator::I8x16Splat | Operator::I16x8Splat => { @@ -2317,15 +2351,10 @@ fn translate_br_if( ) { let val = state.pop1(); let (br_destination, inputs) = translate_br_if_args(relative_depth, state); - - // Bitcast any vector arguments to their default type, I8X16, before jumping. - let destination_types = builder.func.dfg.block_param_types(br_destination); - bitcast_arguments(inputs, &destination_types[..inputs.len()], builder); - - builder.ins().brnz(val, br_destination, inputs); + canonicalise_then_brnz(builder, val, br_destination, inputs); let next_block = builder.create_block(); - builder.ins().jump(next_block, &[]); + canonicalise_then_jump(builder, next_block, &[]); builder.seal_block(next_block); // The only predecessor is the current block. builder.switch_to_block(next_block); } @@ -2530,7 +2559,7 @@ fn type_of(operator: &Operator) -> Type { /// Some SIMD operations only operate on I8X16 in CLIF; this will convert them to that type by /// adding a raw_bitcast if necessary. -pub fn optionally_bitcast_vector( +fn optionally_bitcast_vector( value: Value, needed_type: Type, builder: &mut FunctionBuilder, @@ -2542,6 +2571,80 @@ pub fn optionally_bitcast_vector( } } +#[inline(always)] +fn is_non_canonical_v128(ty: ir::Type) -> bool { + match ty { + B8X16 | B16X8 | B32X4 | B64X2 | I64X2 | I32X4 | I16X8 | F32X4 | F64X2 => true, + _ => false, + } +} + +/// Cast to I8X16, any vector values in `values` that are of "non-canonical" type (meaning, not +/// I8X16), and return them in a slice. A pre-scan is made to determine whether any casts are +/// actually necessary, and if not, the original slice is returned. Otherwise the cast values +/// are returned in a slice that belongs to the caller-supplied `SmallVec`. +fn canonicalise_v128_values<'a>( + tmp_canonicalised: &'a mut SmallVec<[ir::Value; 16]>, + builder: &mut FunctionBuilder, + values: &'a [ir::Value], +) -> &'a [ir::Value] { + debug_assert!(tmp_canonicalised.is_empty()); + // First figure out if any of the parameters need to be cast. Mostly they don't need to be. + let any_non_canonical = values + .iter() + .any(|v| is_non_canonical_v128(builder.func.dfg.value_type(*v))); + // Hopefully we take this exit most of the time, hence doing no heap allocation. + if !any_non_canonical { + return values; + } + // Otherwise we'll have to cast, and push the resulting `Value`s into `canonicalised`. + for v in values { + tmp_canonicalised.push(if is_non_canonical_v128(builder.func.dfg.value_type(*v)) { + builder.ins().raw_bitcast(I8X16, *v) + } else { + *v + }); + } + tmp_canonicalised.as_slice() +} + +/// Generate a `jump` instruction, but first cast all 128-bit vector values to I8X16 if they +/// don't have that type. This is done in somewhat roundabout way so as to ensure that we +/// almost never have to do any heap allocation. +fn canonicalise_then_jump( + builder: &mut FunctionBuilder, + destination: ir::Block, + params: &[ir::Value], +) -> ir::Inst { + let mut tmp_canonicalised = SmallVec::<[ir::Value; 16]>::new(); + let canonicalised = canonicalise_v128_values(&mut tmp_canonicalised, builder, params); + builder.ins().jump(destination, canonicalised) +} + +/// The same but for a `brz` instruction. +fn canonicalise_then_brz( + builder: &mut FunctionBuilder, + cond: ir::Value, + destination: ir::Block, + params: &[Value], +) -> ir::Inst { + let mut tmp_canonicalised = SmallVec::<[ir::Value; 16]>::new(); + let canonicalised = canonicalise_v128_values(&mut tmp_canonicalised, builder, params); + builder.ins().brz(cond, destination, canonicalised) +} + +/// The same but for a `brnz` instruction. +fn canonicalise_then_brnz( + builder: &mut FunctionBuilder, + cond: ir::Value, + destination: ir::Block, + params: &[Value], +) -> ir::Inst { + let mut tmp_canonicalised = SmallVec::<[ir::Value; 16]>::new(); + let canonicalised = canonicalise_v128_values(&mut tmp_canonicalised, builder, params); + builder.ins().brnz(cond, destination, canonicalised) +} + /// A helper for popping and bitcasting a single value; since SIMD values can lose their type by /// using v128 (i.e. CLIF's I8x16) we must re-type the values using a bitcast to avoid CLIF /// typing issues. diff --git a/cranelift/wasmtests/pr2303.wat b/cranelift/wasmtests/pr2303.wat new file mode 100644 index 0000000000..f86831e3ee --- /dev/null +++ b/cranelift/wasmtests/pr2303.wat @@ -0,0 +1,15 @@ +(module + (memory (export "mem") 1 1) + (func (export "runif") (param $cond i32) + i32.const 48 + (v128.load (i32.const 0)) + (v128.load (i32.const 16)) + (if (param v128) (param v128) (result v128 v128) + (local.get $cond) + (then i64x2.add + (v128.load (i32.const 32))) + (else i32x4.sub + (v128.load (i32.const 0)))) + i16x8.mul + v128.store) +)