Add x86 encoding for SIMD imul

Only i16x8 and i32x4 are encoded in this commit mainly because i8x16 and i64x2 do not have simple encodings in x86. i64x2 is not required by the SIMD spec and there is discussion (https://github.com/WebAssembly/simd/pull/98#issuecomment-530092217) about removing i8x16.
This commit is contained in:
Andrew Brown
2019-09-18 14:44:06 -07:00
parent 168ad7fda3
commit 630cb3ee62
5 changed files with 67 additions and 4 deletions

View File

@@ -1945,6 +1945,16 @@ pub(crate) fn define(
e.enc_32_64(isub, rec_fa.opcodes(*opcodes));
}
// SIMD integer multiplication: the x86 ISA does not have instructions for multiplying I8x16
// and I64x2 and these are (at the time of writing) not necessary for WASM SIMD.
for (ty, opcodes, isap) in &[
(I16, &PMULLW[..], None),
(I32, &PMULLD[..], Some(use_sse41_simd)),
] {
let imul = imul.bind_vector_from_lane(ty.clone(), sse_vector_size);
e.enc_32_64_maybe_isap(imul, rec_fa.opcodes(opcodes), *isap);
}
// SIMD icmp using PCMPEQ*
let mut pcmpeq_mapping: HashMap<u64, (&[u8], Option<SettingPredicateNumber>)> = HashMap::new();
pcmpeq_mapping.insert(8, (&PCMPEQB, None));

View File

@@ -281,6 +281,14 @@ pub static PINSRB: [u8; 4] = [0x66, 0x0f, 0x3a, 0x20];
/// Insert word (SSE2).
pub static PINSRW: [u8; 3] = [0x66, 0x0f, 0xc4];
/// Multiply the packed signed word integers in xmm1 and xmm2/m128, and store the low 16 bits of
/// the results in xmm1 (SSE2).
pub static PMULLW: [u8; 3] = [0x66, 0x0f, 0xd5];
/// Multiply the packed doubleword signed integers in xmm1 and xmm2/m128 and store the low 32
/// bits of each product in xmm1 (SSE4.1).
pub static PMULLD: [u8; 4] = [0x66, 0x0f, 0x38, 0x40];
/// Pop top of stack into r{16,32,64}; increment stack pointer.
pub static POP_REG: [u8; 1] = [0x58];

View File

@@ -1722,8 +1722,7 @@ pub(crate) fn define(
Wrapping integer multiplication: `a := x y \pmod{2^B}`.
This instruction does not depend on the signed/unsigned interpretation
of the
operands.
of the operands.
Polymorphic over all integer types (vector and scalar).
"#,

View File

@@ -120,3 +120,47 @@ ebb0:
return ; bin: c3
}
function %imul_i32x4() -> b1 {
ebb0:
[-, %xmm0] v0 = vconst.i32x4 [-1 0 1 -2147483647] ; e.g. -2147483647 == 0x80_00_00_01
[-, %xmm1] v1 = vconst.i32x4 [2 2 2 2]
[-, %xmm0] v2 = imul v0, v1 ; bin: 66 0f 38 40 c1
v3 = extractlane v2, 0
v4 = icmp_imm eq v3, -2
v5 = extractlane v2, 1
v6 = icmp_imm eq v5, 0
v7 = extractlane v2, 3
v8 = icmp_imm eq v7, 2 ; 0x80_00_00_01 * 2 == 0x1_00_00_00_02 (and the 1 is dropped)
v9 = band v4, v6
v10 = band v8, v9
return v10
}
; run
function %imul_i16x8() -> b1 {
ebb0:
[-, %xmm1] v0 = vconst.i16x8 [-1 0 1 32767 0 0 0 0] ; e.g. 32767 == 0x7f_ff
[-, %xmm2] v1 = vconst.i16x8 [2 2 2 2 0 0 0 0]
[-, %xmm1] v2 = imul v0, v1 ; bin: 66 0f d5 ca
v3 = extractlane v2, 0
v4 = icmp_imm eq v3, 0xfffe ; TODO -2 will not work here and below because v3 is being
; uextend-ed, not sextend-ed
v5 = extractlane v2, 1
v6 = icmp_imm eq v5, 0
v7 = extractlane v2, 3
v8 = icmp_imm eq v7, 0xfffe ; 0x7f_ff * 2 == 0xff_fe
v9 = band v4, v6
v10 = band v8, v9
return v4
}
; run

View File

@@ -1008,6 +1008,10 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
let a = state.pop1();
state.push1(builder.ins().ineg(a))
}
Operator::I16x8Mul | Operator::I32x4Mul => {
let (a, b) = state.pop2();
state.push1(builder.ins().imul(a, b))
}
Operator::I8x16Eq
| Operator::I8x16Ne
| Operator::I8x16LtS
@@ -1074,13 +1078,11 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
| Operator::I16x8AddSaturateU
| Operator::I16x8SubSaturateS
| Operator::I16x8SubSaturateU
| Operator::I16x8Mul
| Operator::I32x4AnyTrue
| Operator::I32x4AllTrue
| Operator::I32x4Shl
| Operator::I32x4ShrS
| Operator::I32x4ShrU
| Operator::I32x4Mul
| Operator::I64x2AnyTrue
| Operator::I64x2AllTrue
| Operator::I64x2Shl