Add x86_pmuludq
This instruction multiplies the lower 32 bits of two 64x2 unsigned integers into an i64x2; this is necessary for lowering Wasm's i64x2.mul.
This commit is contained in:
@@ -1646,6 +1646,7 @@ fn define_simd(
|
||||
let x86_pmins = x86.by_name("x86_pmins");
|
||||
let x86_pminu = x86.by_name("x86_pminu");
|
||||
let x86_pmullq = x86.by_name("x86_pmullq");
|
||||
let x86_pmuludq = x86.by_name("x86_pmuludq");
|
||||
let x86_pshufb = x86.by_name("x86_pshufb");
|
||||
let x86_pshufd = x86.by_name("x86_pshufd");
|
||||
let x86_psll = x86.by_name("x86_psll");
|
||||
@@ -2100,6 +2101,9 @@ fn define_simd(
|
||||
e.enc_both_inferred_maybe_isap(imul, rec_fa.opcodes(opcodes), *isap);
|
||||
}
|
||||
|
||||
// SIMD multiplication with lane expansion.
|
||||
e.enc_both_inferred(x86_pmuludq, rec_fa.opcodes(&PMULUDQ));
|
||||
|
||||
// SIMD integer multiplication for I64x2 using a AVX512.
|
||||
{
|
||||
e.enc_32_64_maybe_isap(
|
||||
|
||||
@@ -475,10 +475,11 @@ pub(crate) fn define(
|
||||
.includes_scalars(false)
|
||||
.build(),
|
||||
);
|
||||
let I64x2 = &TypeVar::new(
|
||||
"I64x2",
|
||||
"A SIMD vector type containing one large integer (the upper lane is concatenated with \
|
||||
the lower lane to form the integer)",
|
||||
let I128 = &TypeVar::new(
|
||||
"I128",
|
||||
"A SIMD vector type containing one large integer (due to Cranelift type constraints, \
|
||||
this uses the Cranelift I64X2 type but should be understood as one large value, i.e., the \
|
||||
upper lane is concatenated with the lower lane to form the integer)",
|
||||
TypeSetBuilder::new()
|
||||
.ints(64..64)
|
||||
.simd_lanes(2..2)
|
||||
@@ -487,7 +488,7 @@ pub(crate) fn define(
|
||||
);
|
||||
|
||||
let x = &Operand::new("x", IxN).with_doc("Vector value to shift");
|
||||
let y = &Operand::new("y", I64x2).with_doc("Number of bits to shift");
|
||||
let y = &Operand::new("y", I128).with_doc("Number of bits to shift");
|
||||
let a = &Operand::new("a", IxN);
|
||||
|
||||
ig.push(
|
||||
@@ -532,6 +533,16 @@ pub(crate) fn define(
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
let I64x2 = &TypeVar::new(
|
||||
"I64x2",
|
||||
"A SIMD vector type containing two 64-bit integers",
|
||||
TypeSetBuilder::new()
|
||||
.ints(64..64)
|
||||
.simd_lanes(2..2)
|
||||
.includes_scalars(false)
|
||||
.build(),
|
||||
);
|
||||
|
||||
let x = &Operand::new("x", I64x2);
|
||||
let y = &Operand::new("y", I64x2);
|
||||
let a = &Operand::new("a", I64x2);
|
||||
@@ -549,6 +560,20 @@ pub(crate) fn define(
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
ig.push(
|
||||
Inst::new(
|
||||
"x86_pmuludq",
|
||||
r#"
|
||||
Multiply Packed Integers -- Using only the bottom 32 bits in each lane, multiply two 64x2
|
||||
unsigned integers and receive a 64x2 result. This instruction avoids the need for handling
|
||||
overflow as in `x86_pmullq`.
|
||||
"#,
|
||||
&formats.binary,
|
||||
)
|
||||
.operands_in(vec![x, y])
|
||||
.operands_out(vec![a]),
|
||||
);
|
||||
|
||||
let x = &Operand::new("x", TxN);
|
||||
let y = &Operand::new("y", TxN);
|
||||
let f = &Operand::new("f", iflags);
|
||||
|
||||
@@ -473,6 +473,10 @@ pub static PMULLD: [u8; 4] = [0x66, 0x0f, 0x38, 0x40];
|
||||
/// bits of each product in xmm1 (AVX512VL/DQ). Requires an EVEX encoding.
|
||||
pub static PMULLQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x40];
|
||||
|
||||
/// Multiply packed unsigned doubleword integers in xmm1 by packed unsigned doubleword integers
|
||||
/// in xmm2/m128, and store the quadword results in xmm1 (SSE2).
|
||||
pub static PMULUDQ: [u8; 3] = [0x66, 0x0f, 0xf4];
|
||||
|
||||
/// Pop top of stack into r{16,32,64}; increment stack pointer.
|
||||
pub static POP_REG: [u8; 1] = [0x58];
|
||||
|
||||
|
||||
Reference in New Issue
Block a user