Add x86-specific instruction for i64x2 multiplication
Without this special instruction, legalizing to the AVX512 instruction AND the SSE instruction sequence is impossible. This extra instruction would be rendered unnecessary by the x64 backend.
This commit is contained in:
@@ -1645,6 +1645,7 @@ fn define_simd(
|
|||||||
let x86_pmaxu = x86.by_name("x86_pmaxu");
|
let x86_pmaxu = x86.by_name("x86_pmaxu");
|
||||||
let x86_pmins = x86.by_name("x86_pmins");
|
let x86_pmins = x86.by_name("x86_pmins");
|
||||||
let x86_pminu = x86.by_name("x86_pminu");
|
let x86_pminu = x86.by_name("x86_pminu");
|
||||||
|
let x86_pmullq = x86.by_name("x86_pmullq");
|
||||||
let x86_pshufb = x86.by_name("x86_pshufb");
|
let x86_pshufb = x86.by_name("x86_pshufb");
|
||||||
let x86_pshufd = x86.by_name("x86_pshufd");
|
let x86_pshufd = x86.by_name("x86_pshufd");
|
||||||
let x86_psll = x86.by_name("x86_psll");
|
let x86_psll = x86.by_name("x86_psll");
|
||||||
@@ -2101,9 +2102,8 @@ fn define_simd(
|
|||||||
|
|
||||||
// SIMD integer multiplication for I64x2 using a AVX512.
|
// SIMD integer multiplication for I64x2 using a AVX512.
|
||||||
{
|
{
|
||||||
let imul = imul.bind(vector(I64, sse_vector_size));
|
|
||||||
e.enc_32_64_maybe_isap(
|
e.enc_32_64_maybe_isap(
|
||||||
imul,
|
x86_pmullq,
|
||||||
rec_evex_reg_vvvv_rm_128.opcodes(&PMULLQ).w(),
|
rec_evex_reg_vvvv_rm_128.opcodes(&PMULLQ).w(),
|
||||||
Some(use_avx512dq_simd), // TODO need an OR predicate to join with AVX512VL
|
Some(use_avx512dq_simd), // TODO need an OR predicate to join with AVX512VL
|
||||||
);
|
);
|
||||||
|
|||||||
@@ -532,6 +532,23 @@ pub(crate) fn define(
|
|||||||
.operands_out(vec![a]),
|
.operands_out(vec![a]),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
let x = &Operand::new("x", I64x2);
|
||||||
|
let y = &Operand::new("y", I64x2);
|
||||||
|
let a = &Operand::new("a", I64x2);
|
||||||
|
ig.push(
|
||||||
|
Inst::new(
|
||||||
|
"x86_pmullq",
|
||||||
|
r#"
|
||||||
|
Multiply Packed Integers -- Multiply two 64x2 integers and receive a 64x2 result with
|
||||||
|
lane-wise wrapping if the result overflows. This instruction is necessary to add distinct
|
||||||
|
encodings for CPUs with newer vector features.
|
||||||
|
"#,
|
||||||
|
&formats.binary,
|
||||||
|
)
|
||||||
|
.operands_in(vec![x, y])
|
||||||
|
.operands_out(vec![a]),
|
||||||
|
);
|
||||||
|
|
||||||
let x = &Operand::new("x", TxN);
|
let x = &Operand::new("x", TxN);
|
||||||
let y = &Operand::new("y", TxN);
|
let y = &Operand::new("y", TxN);
|
||||||
let f = &Operand::new("f", iflags);
|
let f = &Operand::new("f", iflags);
|
||||||
|
|||||||
@@ -359,6 +359,7 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro
|
|||||||
let icmp = insts.by_name("icmp");
|
let icmp = insts.by_name("icmp");
|
||||||
let imax = insts.by_name("imax");
|
let imax = insts.by_name("imax");
|
||||||
let imin = insts.by_name("imin");
|
let imin = insts.by_name("imin");
|
||||||
|
let imul = insts.by_name("imul");
|
||||||
let ineg = insts.by_name("ineg");
|
let ineg = insts.by_name("ineg");
|
||||||
let insertlane = insts.by_name("insertlane");
|
let insertlane = insts.by_name("insertlane");
|
||||||
let ishl = insts.by_name("ishl");
|
let ishl = insts.by_name("ishl");
|
||||||
@@ -763,6 +764,12 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SIMD imul
|
||||||
|
{
|
||||||
|
let imul = imul.bind(vector(I64, sse_vector_size));
|
||||||
|
narrow.legalize(def!(c = imul(a, b)), vec![def!(c = x86_pmullq(a, b))]);
|
||||||
|
}
|
||||||
|
|
||||||
narrow.custom_legalize(shuffle, "convert_shuffle");
|
narrow.custom_legalize(shuffle, "convert_shuffle");
|
||||||
narrow.custom_legalize(extractlane, "convert_extractlane");
|
narrow.custom_legalize(extractlane, "convert_extractlane");
|
||||||
narrow.custom_legalize(insertlane, "convert_insertlane");
|
narrow.custom_legalize(insertlane, "convert_insertlane");
|
||||||
|
|||||||
@@ -1911,6 +1911,7 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
|||||||
| Opcode::X86Pmaxu
|
| Opcode::X86Pmaxu
|
||||||
| Opcode::X86Pmins
|
| Opcode::X86Pmins
|
||||||
| Opcode::X86Pminu
|
| Opcode::X86Pminu
|
||||||
|
| Opcode::X86Pmullq
|
||||||
| Opcode::X86Packss
|
| Opcode::X86Packss
|
||||||
| Opcode::X86Punpckh
|
| Opcode::X86Punpckh
|
||||||
| Opcode::X86Punpckl
|
| Opcode::X86Punpckl
|
||||||
|
|||||||
@@ -69,3 +69,10 @@ block0:
|
|||||||
; nextln: v1 = band v0, v4
|
; nextln: v1 = band v0, v4
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function %imul(i64x2, i64x2) {
|
||||||
|
block0(v0:i64x2, v1:i64x2):
|
||||||
|
v2 = imul v0, v1
|
||||||
|
; check: v2 = x86_pmullq v0, v1
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ function %imul_i64x2() {
|
|||||||
block0:
|
block0:
|
||||||
[-, %xmm1] v0 = vconst.i64x2 [1 2]
|
[-, %xmm1] v0 = vconst.i64x2 [1 2]
|
||||||
[-, %xmm2] v1 = vconst.i64x2 [2 2]
|
[-, %xmm2] v1 = vconst.i64x2 [2 2]
|
||||||
[-, %xmm14] v2 = imul v0, v1 ; bin: 62 72 f5 08 40 f2
|
[-, %xmm14] v2 = x86_pmullq v0, v1 ; bin: 62 72 f5 08 40 f2
|
||||||
; 62, mandatory EVEX prefix
|
; 62, mandatory EVEX prefix
|
||||||
; 72 = 0111 0010, R is set (MSB in %xmm14) while X, B, and R' are unset (note these are all inverted); mm is set to 0F38
|
; 72 = 0111 0010, R is set (MSB in %xmm14) while X, B, and R' are unset (note these are all inverted); mm is set to 0F38
|
||||||
; f5 = 1111 0101, W is set (64-bit op), vvvv set to 1 (inverted), bit 2 always set, pp set to 01
|
; f5 = 1111 0101, W is set (64-bit op), vvvv set to 1 (inverted), bit 2 always set, pp set to 01
|
||||||
|
|||||||
Reference in New Issue
Block a user