diff --git a/cranelift/codegen/meta/src/isa/x86/encodings.rs b/cranelift/codegen/meta/src/isa/x86/encodings.rs index f1df48f13b..680e0f0764 100644 --- a/cranelift/codegen/meta/src/isa/x86/encodings.rs +++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs @@ -1945,6 +1945,16 @@ pub(crate) fn define( e.enc_32_64(isub, rec_fa.opcodes(*opcodes)); } + // SIMD integer multiplication: the x86 ISA does not have instructions for multiplying I8x16 + // and I64x2 and these are (at the time of writing) not necessary for WASM SIMD. + for (ty, opcodes, isap) in &[ + (I16, &PMULLW[..], None), + (I32, &PMULLD[..], Some(use_sse41_simd)), + ] { + let imul = imul.bind_vector_from_lane(ty.clone(), sse_vector_size); + e.enc_32_64_maybe_isap(imul, rec_fa.opcodes(opcodes), *isap); + } + // SIMD icmp using PCMPEQ* let mut pcmpeq_mapping: HashMap)> = HashMap::new(); pcmpeq_mapping.insert(8, (&PCMPEQB, None)); diff --git a/cranelift/codegen/meta/src/isa/x86/opcodes.rs b/cranelift/codegen/meta/src/isa/x86/opcodes.rs index 706774a3d8..33b7d71c38 100644 --- a/cranelift/codegen/meta/src/isa/x86/opcodes.rs +++ b/cranelift/codegen/meta/src/isa/x86/opcodes.rs @@ -281,6 +281,14 @@ pub static PINSRB: [u8; 4] = [0x66, 0x0f, 0x3a, 0x20]; /// Insert word (SSE2). pub static PINSRW: [u8; 3] = [0x66, 0x0f, 0xc4]; +/// Multiply the packed signed word integers in xmm1 and xmm2/m128, and store the low 16 bits of +/// the results in xmm1 (SSE2). +pub static PMULLW: [u8; 3] = [0x66, 0x0f, 0xd5]; + +/// Multiply the packed doubleword signed integers in xmm1 and xmm2/m128 and store the low 32 +/// bits of each product in xmm1 (SSE4.1). +pub static PMULLD: [u8; 4] = [0x66, 0x0f, 0x38, 0x40]; + /// Pop top of stack into r{16,32,64}; increment stack pointer. pub static POP_REG: [u8; 1] = [0x58]; diff --git a/cranelift/codegen/meta/src/shared/instructions.rs b/cranelift/codegen/meta/src/shared/instructions.rs index d8f5474c29..49c7900e9b 100644 --- a/cranelift/codegen/meta/src/shared/instructions.rs +++ b/cranelift/codegen/meta/src/shared/instructions.rs @@ -1722,8 +1722,7 @@ pub(crate) fn define( Wrapping integer multiplication: `a := x y \pmod{2^B}`. This instruction does not depend on the signed/unsigned interpretation - of the - operands. + of the operands. Polymorphic over all integer types (vector and scalar). "#, diff --git a/cranelift/filetests/filetests/isa/x86/simd-arithmetic.clif b/cranelift/filetests/filetests/isa/x86/simd-arithmetic.clif index 8244177728..e2714a91dc 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-arithmetic.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-arithmetic.clif @@ -120,3 +120,47 @@ ebb0: return ; bin: c3 } + +function %imul_i32x4() -> b1 { +ebb0: +[-, %xmm0] v0 = vconst.i32x4 [-1 0 1 -2147483647] ; e.g. -2147483647 == 0x80_00_00_01 +[-, %xmm1] v1 = vconst.i32x4 [2 2 2 2] +[-, %xmm0] v2 = imul v0, v1 ; bin: 66 0f 38 40 c1 + + v3 = extractlane v2, 0 + v4 = icmp_imm eq v3, -2 + + v5 = extractlane v2, 1 + v6 = icmp_imm eq v5, 0 + + v7 = extractlane v2, 3 + v8 = icmp_imm eq v7, 2 ; 0x80_00_00_01 * 2 == 0x1_00_00_00_02 (and the 1 is dropped) + + v9 = band v4, v6 + v10 = band v8, v9 + return v10 +} +; run + +function %imul_i16x8() -> b1 { +ebb0: +[-, %xmm1] v0 = vconst.i16x8 [-1 0 1 32767 0 0 0 0] ; e.g. 32767 == 0x7f_ff +[-, %xmm2] v1 = vconst.i16x8 [2 2 2 2 0 0 0 0] +[-, %xmm1] v2 = imul v0, v1 ; bin: 66 0f d5 ca + + v3 = extractlane v2, 0 + v4 = icmp_imm eq v3, 0xfffe ; TODO -2 will not work here and below because v3 is being + ; uextend-ed, not sextend-ed + + v5 = extractlane v2, 1 + v6 = icmp_imm eq v5, 0 + + v7 = extractlane v2, 3 + v8 = icmp_imm eq v7, 0xfffe ; 0x7f_ff * 2 == 0xff_fe + + v9 = band v4, v6 + v10 = band v8, v9 + + return v4 +} +; run diff --git a/cranelift/wasm/src/code_translator.rs b/cranelift/wasm/src/code_translator.rs index ce2aa8dab7..8313212af9 100644 --- a/cranelift/wasm/src/code_translator.rs +++ b/cranelift/wasm/src/code_translator.rs @@ -1008,6 +1008,10 @@ pub fn translate_operator( let a = state.pop1(); state.push1(builder.ins().ineg(a)) } + Operator::I16x8Mul | Operator::I32x4Mul => { + let (a, b) = state.pop2(); + state.push1(builder.ins().imul(a, b)) + } Operator::I8x16Eq | Operator::I8x16Ne | Operator::I8x16LtS @@ -1074,13 +1078,11 @@ pub fn translate_operator( | Operator::I16x8AddSaturateU | Operator::I16x8SubSaturateS | Operator::I16x8SubSaturateU - | Operator::I16x8Mul | Operator::I32x4AnyTrue | Operator::I32x4AllTrue | Operator::I32x4Shl | Operator::I32x4ShrS | Operator::I32x4ShrU - | Operator::I32x4Mul | Operator::I64x2AnyTrue | Operator::I64x2AllTrue | Operator::I64x2Shl