Add i64x2 integer multiplication using AVX512DQ
This commit is contained in:
@@ -1612,6 +1612,7 @@ fn define_simd(
|
|||||||
let x86_ptest = x86.by_name("x86_ptest");
|
let x86_ptest = x86.by_name("x86_ptest");
|
||||||
|
|
||||||
// Shorthands for recipes.
|
// Shorthands for recipes.
|
||||||
|
let rec_evex_reg_vvvv_rm_128 = r.template("evex_reg_vvvv_rm_128");
|
||||||
let rec_f_ib = r.template("f_ib");
|
let rec_f_ib = r.template("f_ib");
|
||||||
let rec_fa = r.template("fa");
|
let rec_fa = r.template("fa");
|
||||||
let rec_fa_ib = r.template("fa_ib");
|
let rec_fa_ib = r.template("fa_ib");
|
||||||
@@ -1647,6 +1648,7 @@ fn define_simd(
|
|||||||
let use_ssse3_simd = settings.predicate_by_name("use_ssse3_simd");
|
let use_ssse3_simd = settings.predicate_by_name("use_ssse3_simd");
|
||||||
let use_sse41_simd = settings.predicate_by_name("use_sse41_simd");
|
let use_sse41_simd = settings.predicate_by_name("use_sse41_simd");
|
||||||
let use_sse42_simd = settings.predicate_by_name("use_sse42_simd");
|
let use_sse42_simd = settings.predicate_by_name("use_sse42_simd");
|
||||||
|
let use_avx512dq_simd = settings.predicate_by_name("use_avx512dq_simd");
|
||||||
|
|
||||||
// SIMD vector size: eventually multiple vector sizes may be supported but for now only
|
// SIMD vector size: eventually multiple vector sizes may be supported but for now only
|
||||||
// SSE-sized vectors are available.
|
// SSE-sized vectors are available.
|
||||||
@@ -1927,6 +1929,16 @@ fn define_simd(
|
|||||||
e.enc_32_64_maybe_isap(imul, rec_fa.opcodes(opcodes), *isap);
|
e.enc_32_64_maybe_isap(imul, rec_fa.opcodes(opcodes), *isap);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SIMD integer multiplication for I64x2 using a AVX512.
|
||||||
|
{
|
||||||
|
let imul = imul.bind(vector(I64, sse_vector_size));
|
||||||
|
e.enc_32_64_maybe_isap(
|
||||||
|
imul,
|
||||||
|
rec_evex_reg_vvvv_rm_128.opcodes(&PMULLQ).w(),
|
||||||
|
Some(use_avx512dq_simd), // TODO need an OR predicate to join with AVX512VL
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
// SIMD integer average with rounding.
|
// SIMD integer average with rounding.
|
||||||
for (ty, opcodes) in &[(I8, &PAVGB[..]), (I16, &PAVGW[..])] {
|
for (ty, opcodes) in &[(I8, &PAVGB[..]), (I16, &PAVGW[..])] {
|
||||||
let avgr = avg_round.bind(vector(*ty, sse_vector_size));
|
let avgr = avg_round.bind(vector(*ty, sse_vector_size));
|
||||||
|
|||||||
@@ -421,6 +421,10 @@ pub static PMULLW: [u8; 3] = [0x66, 0x0f, 0xd5];
|
|||||||
/// bits of each product in xmm1 (SSE4.1).
|
/// bits of each product in xmm1 (SSE4.1).
|
||||||
pub static PMULLD: [u8; 4] = [0x66, 0x0f, 0x38, 0x40];
|
pub static PMULLD: [u8; 4] = [0x66, 0x0f, 0x38, 0x40];
|
||||||
|
|
||||||
|
/// Multiply the packed quadword signed integers in xmm2 and xmm3/m128 and store the low 64
|
||||||
|
/// bits of each product in xmm1 (AVX512VL/DQ). Requires an EVEX encoding.
|
||||||
|
pub static PMULLQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x40];
|
||||||
|
|
||||||
/// Pop top of stack into r{16,32,64}; increment stack pointer.
|
/// Pop top of stack into r{16,32,64}; increment stack pointer.
|
||||||
pub static POP_REG: [u8; 1] = [0x58];
|
pub static POP_REG: [u8; 1] = [0x58];
|
||||||
|
|
||||||
|
|||||||
17
filetests/isa/x86/simd-avx512-arithmetic-binemit.clif
Normal file
17
filetests/isa/x86/simd-avx512-arithmetic-binemit.clif
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
test binemit
|
||||||
|
set enable_simd
|
||||||
|
target x86_64 skylake has_avx512dq=true
|
||||||
|
|
||||||
|
function %imul_i64x2() {
|
||||||
|
block0:
|
||||||
|
[-, %xmm1] v0 = vconst.i64x2 [1 2]
|
||||||
|
[-, %xmm2] v1 = vconst.i64x2 [2 2]
|
||||||
|
[-, %xmm19] v2 = imul v0, v1 ; bin: 62 e2 f5 08 40 da
|
||||||
|
; 62, mandatory EVEX prefix
|
||||||
|
; e2 = 1110 0010, R, X, B are unset (inverted) while R' is set (MSB in %xmm19); mm is set to 0F38
|
||||||
|
; f5 = 1111 0101, W is set (64-bit op), vvvv set to 1 (inverted), bit 2 always set, pp set to 01
|
||||||
|
; 08 = 0000 1000, everything, LL' indicates 128-bit, V' is unset (inverted, %xmm1 has MSB of 0)
|
||||||
|
; 40, opcode (correct)
|
||||||
|
; da = 1100 1010, ModR/M byte using 0b011 from %xmm19 in reg and 0b010 from %xmm2 in r/m
|
||||||
|
return
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user