diff --git a/cranelift/codegen/meta/src/isa/x86/encodings.rs b/cranelift/codegen/meta/src/isa/x86/encodings.rs index 9bdb751b26..8824329c77 100644 --- a/cranelift/codegen/meta/src/isa/x86/encodings.rs +++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs @@ -1785,7 +1785,7 @@ pub(crate) fn define( let allowed_simd_type = |t: &LaneType| t.lane_bits() >= 8 && t.lane_bits() < 128; // PSHUFB, 8-bit shuffle using two XMM registers. - for ty in ValueType::all_lane_types().filter(|t| t.lane_bits() == 8) { + for ty in ValueType::all_lane_types().filter(allowed_simd_type) { let instruction = x86_pshufb.bind_vector_from_lane(ty, sse_vector_size); let template = rec_fa.nonrex().opcodes(vec![0x66, 0x0f, 0x38, 00]); e.enc32_isap(instruction.clone(), template.clone(), use_ssse3_simd); @@ -1804,7 +1804,7 @@ pub(crate) fn define( // SIMD scalar_to_vector; this uses MOV to copy the scalar value to an XMM register; according // to the Intel manual: "When the destination operand is an XMM register, the source operand is - // written to the low doubleword of the register and the regiser is zero-extended to 128 bits." + // written to the low doubleword of the register and the register is zero-extended to 128 bits." for ty in ValueType::all_lane_types().filter(allowed_simd_type) { let instruction = scalar_to_vector.bind_vector_from_lane(ty, sse_vector_size); if ty.is_float() { @@ -1929,6 +1929,13 @@ pub(crate) fn define( e.enc_32_64_maybe_isap(instruction, template, None); // from SSE } + // SIMD bor using ORPS + for ty in ValueType::all_lane_types().filter(allowed_simd_type) { + let instruction = bor.bind_vector_from_lane(ty, sse_vector_size); + let template = rec_fa.nonrex().opcodes(vec![0x0f, 0x56]); + e.enc_32_64_maybe_isap(instruction, template, None); // from SSE + } + // Reference type instructions // Null references implemented as iconst 0. diff --git a/cranelift/codegen/meta/src/isa/x86/legalize.rs b/cranelift/codegen/meta/src/isa/x86/legalize.rs index dfd7f84334..e37759e892 100644 --- a/cranelift/codegen/meta/src/isa/x86/legalize.rs +++ b/cranelift/codegen/meta/src/isa/x86/legalize.rs @@ -45,6 +45,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct let selectif = insts.by_name("selectif"); let smulhi = insts.by_name("smulhi"); let splat = insts.by_name("splat"); + let shuffle = insts.by_name("shuffle"); let srem = insts.by_name("srem"); let udiv = insts.by_name("udiv"); let umulhi = insts.by_name("umulhi"); @@ -380,6 +381,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct ); } + narrow.custom_legalize(shuffle, "convert_shuffle"); narrow.custom_legalize(extractlane, "convert_extractlane"); narrow.custom_legalize(insertlane, "convert_insertlane"); diff --git a/cranelift/codegen/meta/src/isa/x86/recipes.rs b/cranelift/codegen/meta/src/isa/x86/recipes.rs index 11a9972d98..bee51883a4 100644 --- a/cranelift/codegen/meta/src/isa/x86/recipes.rs +++ b/cranelift/codegen/meta/src/isa/x86/recipes.rs @@ -396,11 +396,11 @@ pub(crate) fn define<'shared>( let f_trap = formats.by_name("Trap"); let f_unary = formats.by_name("Unary"); let f_unary_bool = formats.by_name("UnaryBool"); + let f_unary_const = formats.by_name("UnaryConst"); let f_unary_global_value = formats.by_name("UnaryGlobalValue"); let f_unary_ieee32 = formats.by_name("UnaryIeee32"); let f_unary_ieee64 = formats.by_name("UnaryIeee64"); let f_unary_imm = formats.by_name("UnaryImm"); - let f_unary_imm128 = formats.by_name("UnaryImm128"); // Predicates shorthands. let use_sse41 = settings.predicate_by_name("use_sse41"); @@ -2437,14 +2437,14 @@ pub(crate) fn define<'shared>( ); recipes.add_template_recipe( - EncodingRecipeBuilder::new("vconst", f_unary_imm128, 5) + EncodingRecipeBuilder::new("vconst", f_unary_const, 5) .operands_out(vec![fpr]) .clobbers_flags(false) .emit( r#" {{PUT_OP}}(bits, rex2(0, out_reg0), sink); modrm_riprel(out_reg0, sink); - const_disp4(imm, func, sink); + const_disp4(constant_handle, func, sink); "#, ), ); diff --git a/cranelift/codegen/meta/src/shared/formats.rs b/cranelift/codegen/meta/src/shared/formats.rs index 5309afc1b0..74a2f6bb8f 100644 --- a/cranelift/codegen/meta/src/shared/formats.rs +++ b/cranelift/codegen/meta/src/shared/formats.rs @@ -6,10 +6,10 @@ pub(crate) fn define(imm: &Immediates, entities: &EntityRefs) -> FormatRegistry registry.insert(Builder::new("Unary").value()); registry.insert(Builder::new("UnaryImm").imm(&imm.imm64)); - registry.insert(Builder::new("UnaryImm128").imm(&imm.uimm128)); registry.insert(Builder::new("UnaryIeee32").imm(&imm.ieee32)); registry.insert(Builder::new("UnaryIeee64").imm(&imm.ieee64)); registry.insert(Builder::new("UnaryBool").imm(&imm.boolean)); + registry.insert(Builder::new("UnaryConst").imm(&imm.pool_constant)); registry.insert(Builder::new("UnaryGlobalValue").imm(&entities.global_value)); registry.insert(Builder::new("Binary").value().value()); @@ -43,6 +43,12 @@ pub(crate) fn define(imm: &Immediates, entities: &EntityRefs) -> FormatRegistry .value() .imm_with_name("lane", &imm.uimm8), ); + registry.insert( + Builder::new("Shuffle") + .value() + .value() + .imm_with_name("mask", &imm.uimm128), + ); registry.insert(Builder::new("IntCompare").imm(&imm.intcc).value().value()); registry.insert( diff --git a/cranelift/codegen/meta/src/shared/immediates.rs b/cranelift/codegen/meta/src/shared/immediates.rs index 30c1a73970..0b5e84c521 100644 --- a/cranelift/codegen/meta/src/shared/immediates.rs +++ b/cranelift/codegen/meta/src/shared/immediates.rs @@ -23,6 +23,12 @@ pub(crate) struct Immediates { /// const. pub uimm128: OperandKind, + /// A constant stored in the constant pool. + /// + /// This operand is used to pass constants to instructions like vconst while storing the + /// actual bytes in the constant pool. + pub pool_constant: OperandKind, + /// A 32-bit immediate signed offset. /// /// This is used to represent an immediate address offset in load/store instructions. @@ -84,6 +90,12 @@ impl Immediates { uimm128: Builder::new_imm("uimm128") .doc("A 128-bit immediate unsigned integer.") + .rust_type("ir::Immediate") + .build(), + + pool_constant: Builder::new_imm("poolConstant") + .doc("A constant stored in the constant pool.") + .default_member("constant_handle") .rust_type("ir::Constant") .build(), diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs index bae1327b60..b52a8dafed 100644 --- a/cranelift/codegen/meta/src/shared/instructions.rs +++ b/cranelift/codegen/meta/src/shared/instructions.rs @@ -1090,7 +1090,7 @@ pub(crate) fn define( let N = &operand_doc( "N", - &imm.uimm128, + &imm.pool_constant, "The 16 immediate bytes of a 128-bit vector", ); let a = &operand_doc("a", TxN, "A constant vector value"); @@ -1108,6 +1108,41 @@ pub(crate) fn define( .operands_out(vec![a]), ); + let mask = &operand_doc( + "mask", + &imm.uimm128, + "The 16 immediate bytes used for selecting the elements to shuffle", + ); + let Tx16 = &TypeVar::new( + "Tx16", + "A SIMD vector with exactly 16 lanes of 8-bit values; eventually this may support other \ + lane counts and widths", + TypeSetBuilder::new() + .ints(8..8) + .bools(8..8) + .simd_lanes(16..16) + .includes_scalars(false) + .build(), + ); + let a = &operand_doc("a", Tx16, "A vector value"); + let b = &operand_doc("b", Tx16, "A vector value"); + + ig.push( + Inst::new( + "shuffle", + r#" + SIMD vector shuffle. + + Shuffle two vectors using the given immediate bytes. For each of the 16 bytes of the + immediate, a value i of 0-15 selects the i-th element of the first vector and a value i of + 16-31 selects the (i-16)th element of the second vector. Immediate values outside of the + 0-31 range place a 0 in the resulting vector lane. + "#, + ) + .operands_in(vec![a, b, mask]) + .operands_out(vec![a]), + ); + let a = &operand_doc("a", Ref, "A constant reference null value"); ig.push( diff --git a/cranelift/codegen/src/ir/dfg.rs b/cranelift/codegen/src/ir/dfg.rs index f4e2875952..5b3054b59d 100644 --- a/cranelift/codegen/src/ir/dfg.rs +++ b/cranelift/codegen/src/ir/dfg.rs @@ -5,7 +5,7 @@ use crate::ir; use crate::ir::builder::ReplaceBuilder; use crate::ir::extfunc::ExtFuncData; use crate::ir::instructions::{BranchInfo, CallInfo, InstructionData}; -use crate::ir::{types, ConstantPool}; +use crate::ir::{types, ConstantPool, Immediate}; use crate::ir::{ Ebb, FuncRef, Inst, SigRef, Signature, Type, Value, ValueLabelAssignments, ValueList, ValueListPool, @@ -19,6 +19,7 @@ use core::mem; use core::ops::{Index, IndexMut}; use core::u16; use std::collections::HashMap; +use std::vec::Vec; /// A data flow graph defines all instructions and extended basic blocks in a function as well as /// the data flow dependencies between them. The DFG also tracks values which can be either @@ -70,6 +71,9 @@ pub struct DataFlowGraph { /// Constants used within the function pub constants: ConstantPool, + + /// Stores large immediates that otherwise will not fit on InstructionData + pub immediates: PrimaryMap>, } impl DataFlowGraph { @@ -85,6 +89,7 @@ impl DataFlowGraph { ext_funcs: PrimaryMap::new(), values_labels: None, constants: ConstantPool::new(), + immediates: PrimaryMap::new(), } } @@ -98,7 +103,8 @@ impl DataFlowGraph { self.signatures.clear(); self.ext_funcs.clear(); self.values_labels = None; - self.constants.clear() + self.constants.clear(); + self.immediates.clear(); } /// Get the total number of instructions created in this function, whether they are currently diff --git a/cranelift/codegen/src/ir/entities.rs b/cranelift/codegen/src/ir/entities.rs index 1e09d18840..1f8e1fc6a2 100644 --- a/cranelift/codegen/src/ir/entities.rs +++ b/cranelift/codegen/src/ir/entities.rs @@ -181,6 +181,29 @@ impl Constant { } } +/// An opaque reference to an immediate. +/// +/// Some immediates (e.g. SIMD shuffle masks) are too large to store in the +/// [`InstructionData`](super::instructions::InstructionData) struct and therefore must be +/// tracked separately in [`DataFlowGraph::immediates`](super::dfg::DataFlowGraph). `Immediate` +/// provides a way to reference values stored there. +#[derive(Copy, Clone, PartialEq, Eq, Hash)] +pub struct Immediate(u32); +entity_impl!(Immediate, "imm"); + +impl Immediate { + /// Create an immediate reference from its number. + /// + /// This method is for use by the parser. + pub fn with_number(n: u32) -> Option { + if n < u32::MAX { + Some(Immediate(n)) + } else { + None + } + } +} + /// An opaque reference to a [jump table](https://en.wikipedia.org/wiki/Branch_table). /// /// `JumpTable`s are used for indirect branching and are specialized for dense, diff --git a/cranelift/codegen/src/ir/mod.rs b/cranelift/codegen/src/ir/mod.rs index 930f5d496d..2a0293b31b 100644 --- a/cranelift/codegen/src/ir/mod.rs +++ b/cranelift/codegen/src/ir/mod.rs @@ -31,7 +31,8 @@ pub use crate::ir::builder::{InsertBuilder, InstBuilder, InstBuilderBase, InstIn pub use crate::ir::constant::{ConstantData, ConstantOffset, ConstantPool}; pub use crate::ir::dfg::{DataFlowGraph, ValueDef}; pub use crate::ir::entities::{ - Constant, Ebb, FuncRef, GlobalValue, Heap, Inst, JumpTable, SigRef, StackSlot, Table, Value, + Constant, Ebb, FuncRef, GlobalValue, Heap, Immediate, Inst, JumpTable, SigRef, StackSlot, + Table, Value, }; pub use crate::ir::extfunc::{ AbiParam, ArgumentExtension, ArgumentPurpose, ExtFuncData, Signature, diff --git a/cranelift/codegen/src/isa/x86/enc_tables.rs b/cranelift/codegen/src/isa/x86/enc_tables.rs index 732fcd9628..94333116e6 100644 --- a/cranelift/codegen/src/isa/x86/enc_tables.rs +++ b/cranelift/codegen/src/isa/x86/enc_tables.rs @@ -899,6 +899,80 @@ fn expand_fcvt_to_uint_sat( cfg.recompute_ebb(pos.func, done); } +/// Convert shuffle instructions. +fn convert_shuffle( + inst: ir::Inst, + func: &mut ir::Function, + _cfg: &mut ControlFlowGraph, + _isa: &dyn TargetIsa, +) { + let mut pos = FuncCursor::new(func).at_inst(inst); + pos.use_srcloc(inst); + + if let ir::InstructionData::Shuffle { args, mask, .. } = pos.func.dfg[inst] { + // A mask-building helper: in 128-bit SIMD, 0-15 indicate which lane to read from and a 1 + // in the most significant position zeroes the lane. + let zero_unknown_lane_index = |b: u8| if b > 15 { 0b10000000 } else { b }; + + // We only have to worry about aliasing here because copies will be introduced later (in + // regalloc). + let a = pos.func.dfg.resolve_aliases(args[0]); + let b = pos.func.dfg.resolve_aliases(args[1]); + let mask = pos + .func + .dfg + .immediates + .get(mask) + .expect("The shuffle immediate should have been recorded before this point") + .clone(); + if a == b { + // PSHUFB the first argument (since it is the same as the second). + let constructed_mask = mask + .iter() + // If the mask is greater than 15 it still may be referring to a lane in b. + .map(|&b| if b > 15 { b.wrapping_sub(16) } else { b }) + .map(zero_unknown_lane_index) + .collect(); + let handle = pos.func.dfg.constants.insert(constructed_mask); + // Move the built mask into another XMM register. + let a_type = pos.func.dfg.value_type(a); + let mask_value = pos.ins().vconst(a_type, handle); + // Shuffle the single incoming argument. + pos.func.dfg.replace(inst).x86_pshufb(a, mask_value); + } else { + // PSHUFB the first argument, placing zeroes for unused lanes. + let constructed_mask = mask.iter().cloned().map(zero_unknown_lane_index).collect(); + let handle = pos.func.dfg.constants.insert(constructed_mask); + // Move the built mask into another XMM register. + let a_type = pos.func.dfg.value_type(a); + let mask_value = pos.ins().vconst(a_type, handle); + // Shuffle the first argument. + let shuffled_first_arg = pos.ins().x86_pshufb(a, mask_value); + + // PSHUFB the second argument, placing zeroes for unused lanes. + let constructed_mask = mask + .iter() + .map(|b| b.wrapping_sub(16)) + .map(zero_unknown_lane_index) + .collect(); + let handle = pos.func.dfg.constants.insert(constructed_mask); + // Move the built mask into another XMM register. + let b_type = pos.func.dfg.value_type(b); + let mask_value = pos.ins().vconst(b_type, handle); + // Shuffle the second argument. + let shuffled_second_arg = pos.ins().x86_pshufb(b, mask_value); + + // OR the vectors together to form the final shuffled value. + pos.func + .dfg + .replace(inst) + .bor(shuffled_first_arg, shuffled_second_arg); + + // TODO when AVX512 is enabled we should replace this sequence with a single VPERMB + }; + } +} + /// Because floats already exist in XMM registers, we can keep them there when executing a CLIF /// extractlane instruction fn convert_extractlane( diff --git a/cranelift/codegen/src/verifier/mod.rs b/cranelift/codegen/src/verifier/mod.rs index c76090e57d..21f0c72cae 100644 --- a/cranelift/codegen/src/verifier/mod.rs +++ b/cranelift/codegen/src/verifier/mod.rs @@ -706,7 +706,6 @@ impl<'a> Verifier<'a> { // Exhaustive list so we can't forget to add new formats Unary { .. } | UnaryImm { .. } - | UnaryImm128 { .. } | UnaryIeee32 { .. } | UnaryIeee64 { .. } | UnaryBool { .. } @@ -715,6 +714,8 @@ impl<'a> Verifier<'a> { | Ternary { .. } | InsertLane { .. } | ExtractLane { .. } + | UnaryConst { .. } + | Shuffle { .. } | IntCompare { .. } | IntCompareImm { .. } | IntCond { .. } diff --git a/cranelift/codegen/src/write.rs b/cranelift/codegen/src/write.rs index 4b616cab2e..e3c8bdb2fa 100644 --- a/cranelift/codegen/src/write.rs +++ b/cranelift/codegen/src/write.rs @@ -488,11 +488,6 @@ pub fn write_operands( match dfg[inst] { Unary { arg, .. } => write!(w, " {}", arg), UnaryImm { imm, .. } => write!(w, " {}", imm), - UnaryImm128 { imm, .. } => { - let data = dfg.constants.get(imm); - let uimm128 = Uimm128::from(&data[..]); - write!(w, " {}", uimm128) - } UnaryIeee32 { imm, .. } => write!(w, " {}", imm), UnaryIeee64 { imm, .. } => write!(w, " {}", imm), UnaryBool { imm, .. } => write!(w, " {}", imm), @@ -510,6 +505,20 @@ pub fn write_operands( NullAry { .. } => write!(w, " "), InsertLane { lane, args, .. } => write!(w, " {}, {}, {}", args[0], lane, args[1]), ExtractLane { lane, arg, .. } => write!(w, " {}, {}", arg, lane), + UnaryConst { + constant_handle, .. + } => { + let data = dfg.constants.get(constant_handle); + let uimm128 = Uimm128::from(&data[..]); + write!(w, " {}", uimm128) + } + Shuffle { mask, args, .. } => { + let data = dfg.immediates.get(mask).expect( + "Expected the shuffle mask to already be inserted into the immediates table", + ); + let uimm128 = Uimm128::from(&data[..]); + write!(w, " {}, {}, {}", args[0], args[1], uimm128) + } IntCompare { cond, args, .. } => write!(w, " {} {}, {}", cond, args[0], args[1]), IntCompareImm { cond, arg, imm, .. } => write!(w, " {} {}, {}", cond, arg, imm), IntCond { cond, arg, .. } => write!(w, " {} {}", cond, arg), diff --git a/cranelift/filetests/filetests/isa/x86/shuffle-legalize.clif b/cranelift/filetests/filetests/isa/x86/shuffle-legalize.clif new file mode 100644 index 0000000000..d192489448 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/shuffle-legalize.clif @@ -0,0 +1,31 @@ +test legalizer +set enable_simd +target x86_64 skylake + +function %test_shuffle_different_ssa_values() -> i8x16 { +ebb0: + v0 = vconst.i8x16 0x00 + v1 = vconst.i8x16 0x01 + v2 = shuffle v0, v1, 0x11000000000000000000000000000000 ; pick the second lane of v1, the rest use the first lane of v0 + return v2 +} + +; check: v1 = vconst.i8x16 0x01 +; nextln: v3 = vconst.i8x16 0x80000000000000000000000000000000 +; nextln: v4 = x86_pshufb v0, v3 +; nextln: v5 = vconst.i8x16 0x01808080808080808080808080808080 +; nextln: v6 = x86_pshufb v1, v5 +; nextln: v2 = bor v4, v6 + + + +function %test_shuffle_same_ssa_value() -> i8x16 { +ebb0: + v1 = vconst.i8x16 0x01 + v2 = shuffle v1, v1, 0x13000000000000000000000000000000 ; pick the fourth lane of v1 and the rest from the first lane of v1 + return v2 +} + +; check: v1 = vconst.i8x16 0x01 +; nextln: v3 = vconst.i8x16 0x03000000000000000000000000000000 +; nextln: v2 = x86_pshufb v1, v3 diff --git a/cranelift/filetests/filetests/isa/x86/shuffle-run.clif b/cranelift/filetests/filetests/isa/x86/shuffle-run.clif new file mode 100644 index 0000000000..60fd7d7b25 --- /dev/null +++ b/cranelift/filetests/filetests/isa/x86/shuffle-run.clif @@ -0,0 +1,44 @@ +test run +set enable_simd + +function %test_shuffle_different_ssa_values() -> b1 { +ebb0: + v0 = vconst.i8x16 0x00 + v1 = vconst.i8x16 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 42] + v2 = shuffle v0, v1, [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 31] ; use the first lane of v0 throughout except use the last lane of v1 + v3 = extractlane.i8x16 v2, 15 + v4 = iconst.i8 42 + v5 = icmp eq v3, v4 + return v5 +} + +; run + +function %test_shuffle_same_ssa_value() -> b1 { +ebb0: + v0 = vconst.i8x16 0x01000000_00000000_00000000_00000000 ; note where lane 15 is when written with hexadecimal syntax + v1 = shuffle v0, v0, 0x0f0f0f0f_0f0f0f0f_0f0f0f0f_0f0f0f0f ; use the last lane of v0 to fill all lanes + v2 = extractlane.i8x16 v1, 4 + v3 = iconst.i8 0x01 + v4 = icmp eq v2, v3 + return v4 +} + +; run + +function %compare_shuffle() -> b1 { +ebb0: + v1 = vconst.i32x4 [0 1 2 3] + v2 = raw_bitcast.i8x16 v1 ; we have to cast because shuffle is type-limited to Tx16 + ; keep each lane in place from the first vector + v3 = shuffle v2, v2, [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15] + v4 = raw_bitcast.i32x4 v3 + v5 = extractlane.i32x4 v4, 3 + v6 = icmp_imm eq v5, 3 + v7 = extractlane.i32x4 v4, 0 + v8 = icmp_imm eq v7, 0 + v9 = band v6, v8 + return v9 +} + +; run diff --git a/cranelift/filetests/filetests/isa/x86/vconst-rodata.clif b/cranelift/filetests/filetests/isa/x86/vconst-rodata.clif index 99e8455ed4..34c203dce6 100644 --- a/cranelift/filetests/filetests/isa/x86/vconst-rodata.clif +++ b/cranelift/filetests/filetests/isa/x86/vconst-rodata.clif @@ -1,6 +1,5 @@ test rodata set enable_simd=true -set probestack_enabled=false target x86_64 haswell function %test_vconst_i32() -> i32x4 { diff --git a/cranelift/reader/src/parser.rs b/cranelift/reader/src/parser.rs index 7965a4a31a..bc9436fe0c 100644 --- a/cranelift/reader/src/parser.rs +++ b/cranelift/reader/src/parser.rs @@ -2243,23 +2243,6 @@ impl<'a> Parser<'a> { opcode, imm: self.match_imm64("expected immediate integer operand")?, }, - InstructionFormat::UnaryImm128 => match explicit_control_type { - None => { - return err!( - self.loc, - "Expected {:?} to have a controlling type variable, e.g. inst.i32x4", - opcode - ) - } - Some(ty) => { - let uimm128 = self.match_uimm128_or_literals(ty)?; - let constant_handle = ctx.function.dfg.constants.insert(uimm128.0.to_vec()); - InstructionData::UnaryImm128 { - opcode, - imm: constant_handle, - } - } - }, InstructionFormat::UnaryIeee32 => InstructionData::UnaryIeee32 { opcode, imm: self.match_ieee32("expected immediate 32-bit float operand")?, @@ -2442,6 +2425,36 @@ impl<'a> Parser<'a> { let lane = self.match_uimm8("expected lane number")?; InstructionData::ExtractLane { opcode, lane, arg } } + InstructionFormat::UnaryConst => match explicit_control_type { + None => { + return err!( + self.loc, + "Expected {:?} to have a controlling type variable, e.g. inst.i32x4", + opcode + ) + } + Some(controlling_type) => { + let uimm128 = self.match_uimm128_or_literals(controlling_type)?; + let constant_handle = ctx.function.dfg.constants.insert(uimm128.to_vec()); + InstructionData::UnaryConst { + opcode, + constant_handle, + } + } + }, + InstructionFormat::Shuffle => { + let a = self.match_value("expected SSA value first operand")?; + self.match_token(Token::Comma, "expected ',' between operands")?; + let b = self.match_value("expected SSA value second operand")?; + self.match_token(Token::Comma, "expected ',' between operands")?; + let uimm128 = self.match_uimm128_or_literals(I8X16)?; + let mask = ctx.function.dfg.immediates.push(uimm128.to_vec()); + InstructionData::Shuffle { + opcode, + mask, + args: [a, b], + } + } InstructionFormat::IntCompare => { let cond = self.match_enum("expected intcc condition code")?; let lhs = self.match_value("expected SSA value first operand")?; diff --git a/cranelift/serde/src/serde_clif_json.rs b/cranelift/serde/src/serde_clif_json.rs index 90935e9234..0d19ee5fa0 100644 --- a/cranelift/serde/src/serde_clif_json.rs +++ b/cranelift/serde/src/serde_clif_json.rs @@ -1,4 +1,3 @@ -use cranelift_codegen::ir::immediates::Uimm128; use cranelift_codegen::ir::{Ebb, Function, Inst, InstructionData, Signature}; use serde_derive::{Deserialize, Serialize}; @@ -59,6 +58,11 @@ pub enum SerInstData { arg: String, lane: String, }, + Shuffle { + opcode: String, + args: [String; 2], + mask: String, + }, IntCompare { opcode: String, args: [String; 2], @@ -262,14 +266,6 @@ pub fn get_inst_data(inst_index: Inst, func: &Function) -> SerInstData { opcode: opcode.to_string(), imm: imm.to_string(), }, - InstructionData::UnaryImm128 { opcode, imm } => { - let data = func.dfg.constants.get(imm); - let uimm128 = Uimm128::from(&data[..]); - SerInstData::UnaryImm { - opcode: opcode.to_string(), - imm: uimm128.to_string(), - } - } InstructionData::UnaryIeee32 { opcode, imm } => SerInstData::UnaryIeee32 { opcode: opcode.to_string(), imm: imm.to_string(), @@ -340,6 +336,28 @@ pub fn get_inst_data(inst_index: Inst, func: &Function) -> SerInstData { arg: arg.to_string(), lane: lane.to_string(), }, + InstructionData::UnaryConst { + opcode, + constant_handle, + } => { + let constant = func.dfg.constants.get(constant_handle); + SerInstData::UnaryImm { + opcode: opcode.to_string(), + imm: format!("{:?}", constant), + } + } + InstructionData::Shuffle { opcode, args, mask } => { + let mask = func + .dfg + .immediates + .get(mask) + .expect("Expected shuffle mask to already be inserted in immediate mapping"); + SerInstData::Shuffle { + opcode: opcode.to_string(), + args: [args[0].to_string(), args[1].to_string()], + mask: format!("{:?}", mask), + } + } InstructionData::IntCompare { opcode, args, cond } => { let hold_args = [args[0].to_string(), args[1].to_string()]; SerInstData::IntCompare { diff --git a/cranelift/wasm/src/code_translator.rs b/cranelift/wasm/src/code_translator.rs index c420147550..a7bb331a39 100644 --- a/cranelift/wasm/src/code_translator.rs +++ b/cranelift/wasm/src/code_translator.rs @@ -974,9 +974,20 @@ pub fn translate_operator( builder, )) } + Operator::V8x16Shuffle { lanes, .. } => { + let (vector_a, vector_b) = state.pop2(); + let a = optionally_bitcast_vector(vector_a, I8X16, builder); + let b = optionally_bitcast_vector(vector_b, I8X16, builder); + let mask = builder.func.dfg.immediates.push(lanes.to_vec()); + let shuffled = builder.ins().shuffle(a, b, mask); + state.push1(shuffled) + // At this point the original types of a and b are lost; users of this value (i.e. this + // WASM-to-CLIF translator) may need to raw_bitcast for type-correctness. This is due + // to WASM using the less specific v128 type for certain operations and more specific + // types (e.g. i8x16) for others. + } Operator::V128Load { .. } | Operator::V128Store { .. } - | Operator::V8x16Shuffle { .. } | Operator::I8x16Eq | Operator::I8x16Ne | Operator::I8x16LtS