diff --git a/cranelift/codegen/meta/src/isa/x86/encodings.rs b/cranelift/codegen/meta/src/isa/x86/encodings.rs index 65df907256..536a4c7608 100644 --- a/cranelift/codegen/meta/src/isa/x86/encodings.rs +++ b/cranelift/codegen/meta/src/isa/x86/encodings.rs @@ -1645,6 +1645,7 @@ fn define_simd( let x86_pmaxu = x86.by_name("x86_pmaxu"); let x86_pmins = x86.by_name("x86_pmins"); let x86_pminu = x86.by_name("x86_pminu"); + let x86_pmullq = x86.by_name("x86_pmullq"); let x86_pshufb = x86.by_name("x86_pshufb"); let x86_pshufd = x86.by_name("x86_pshufd"); let x86_psll = x86.by_name("x86_psll"); @@ -2101,9 +2102,8 @@ fn define_simd( // SIMD integer multiplication for I64x2 using a AVX512. { - let imul = imul.bind(vector(I64, sse_vector_size)); e.enc_32_64_maybe_isap( - imul, + x86_pmullq, rec_evex_reg_vvvv_rm_128.opcodes(&PMULLQ).w(), Some(use_avx512dq_simd), // TODO need an OR predicate to join with AVX512VL ); diff --git a/cranelift/codegen/meta/src/isa/x86/instructions.rs b/cranelift/codegen/meta/src/isa/x86/instructions.rs index f516a928dd..3e258713a0 100644 --- a/cranelift/codegen/meta/src/isa/x86/instructions.rs +++ b/cranelift/codegen/meta/src/isa/x86/instructions.rs @@ -532,6 +532,23 @@ pub(crate) fn define( .operands_out(vec![a]), ); + let x = &Operand::new("x", I64x2); + let y = &Operand::new("y", I64x2); + let a = &Operand::new("a", I64x2); + ig.push( + Inst::new( + "x86_pmullq", + r#" + Multiply Packed Integers -- Multiply two 64x2 integers and receive a 64x2 result with + lane-wise wrapping if the result overflows. This instruction is necessary to add distinct + encodings for CPUs with newer vector features. + "#, + &formats.binary, + ) + .operands_in(vec![x, y]) + .operands_out(vec![a]), + ); + let x = &Operand::new("x", TxN); let y = &Operand::new("y", TxN); let f = &Operand::new("f", iflags); diff --git a/cranelift/codegen/meta/src/isa/x86/legalize.rs b/cranelift/codegen/meta/src/isa/x86/legalize.rs index 5d7e3c7619..6e7864794e 100644 --- a/cranelift/codegen/meta/src/isa/x86/legalize.rs +++ b/cranelift/codegen/meta/src/isa/x86/legalize.rs @@ -359,6 +359,7 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro let icmp = insts.by_name("icmp"); let imax = insts.by_name("imax"); let imin = insts.by_name("imin"); + let imul = insts.by_name("imul"); let ineg = insts.by_name("ineg"); let insertlane = insts.by_name("insertlane"); let ishl = insts.by_name("ishl"); @@ -763,6 +764,12 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro ); } + // SIMD imul + { + let imul = imul.bind(vector(I64, sse_vector_size)); + narrow.legalize(def!(c = imul(a, b)), vec![def!(c = x86_pmullq(a, b))]); + } + narrow.custom_legalize(shuffle, "convert_shuffle"); narrow.custom_legalize(extractlane, "convert_extractlane"); narrow.custom_legalize(insertlane, "convert_insertlane"); diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 56fd628932..443137f43f 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -1911,6 +1911,7 @@ pub(crate) fn lower_insn_to_regs>( | Opcode::X86Pmaxu | Opcode::X86Pmins | Opcode::X86Pminu + | Opcode::X86Pmullq | Opcode::X86Packss | Opcode::X86Punpckh | Opcode::X86Punpckl diff --git a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-legalize.clif index c4f7b886dc..f9984cdd9c 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-legalize.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-legalize.clif @@ -69,3 +69,10 @@ block0: ; nextln: v1 = band v0, v4 return } + +function %imul(i64x2, i64x2) { +block0(v0:i64x2, v1:i64x2): + v2 = imul v0, v1 + ; check: v2 = x86_pmullq v0, v1 + return +} diff --git a/cranelift/filetests/filetests/isa/x86/simd-avx512-arithmetic-binemit.clif b/cranelift/filetests/filetests/isa/x86/simd-avx512-arithmetic-binemit.clif index f328dbf206..d9729ae161 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-avx512-arithmetic-binemit.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-avx512-arithmetic-binemit.clif @@ -6,7 +6,7 @@ function %imul_i64x2() { block0: [-, %xmm1] v0 = vconst.i64x2 [1 2] [-, %xmm2] v1 = vconst.i64x2 [2 2] - [-, %xmm14] v2 = imul v0, v1 ; bin: 62 72 f5 08 40 f2 + [-, %xmm14] v2 = x86_pmullq v0, v1 ; bin: 62 72 f5 08 40 f2 ; 62, mandatory EVEX prefix ; 72 = 0111 0010, R is set (MSB in %xmm14) while X, B, and R' are unset (note these are all inverted); mm is set to 0F38 ; f5 = 1111 0101, W is set (64-bit op), vvvv set to 1 (inverted), bit 2 always set, pp set to 01