Add x86 implemention for SIMD iadd

This commit is contained in:
Andrew Brown
2019-09-11 10:58:21 -07:00
parent cd426cb7bc
commit 766cf8ddfd
3 changed files with 65 additions and 4 deletions

View File

@@ -1999,6 +1999,17 @@ pub(crate) fn define(
e.enc_32_64(bound_regmove, rec_frmov.opcodes(vec![0x0f, 0x28])); e.enc_32_64(bound_regmove, rec_frmov.opcodes(vec![0x0f, 0x28]));
} }
// SIMD integer addition
for (ty, opcodes) in &[
(I8, &[0x66, 0x0f, 0xfc]), // PADDB from SSE2
(I16, &[0x66, 0x0f, 0xfd]), // PADDW from SSE2
(I32, &[0x66, 0x0f, 0xfe]), // PADDD from SSE2
(I64, &[0x66, 0x0f, 0xd4]), // PADDQ from SSE2
] {
let iadd = iadd.bind_vector_from_lane(ty.clone(), sse_vector_size);
e.enc_32_64(iadd, rec_fa.opcodes(opcodes.to_vec()));
}
// Reference type instructions // Reference type instructions
// Null references implemented as iconst 0. // Null references implemented as iconst 0.

View File

@@ -0,0 +1,50 @@
test run
test binemit
set enable_simd
target x86_64 skylake
function %iadd_i32x4() -> b1 {
ebb0:
[-, %xmm0] v0 = vconst.i32x4 [1 1 1 1]
[-, %xmm1] v1 = vconst.i32x4 [1 2 3 4]
[-, %xmm0] v2 = iadd v0, v1 ; bin: 66 0f fe c1
v3 = extractlane v2, 0
v4 = icmp_imm eq v3, 2
v5 = extractlane v2, 3
v6 = icmp_imm eq v5, 5
; TODO replace extractlanes with vector comparison
v7 = band v4, v6
return v7
}
; run
function %iadd_i8x16_with_overflow() -> b1 {
ebb0:
[-, %xmm0] v0 = vconst.i8x16 [255 255 255 255 255 255 255 255 255 255 255 255 255 255 255 255]
[-, %xmm7] v1 = vconst.i8x16 [2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2]
[-, %xmm0] v2 = iadd v0, v1 ; bin: 66 0f fc c7
v3 = extractlane v2, 0
v4 = icmp_imm eq v3, 1
; TODO replace extractlane with vector comparison
return v4
}
; run
function %iadd_i16x8(i16x8, i16x8) -> i16x8 {
ebb0(v0: i16x8 [%xmm1], v1: i16x8 [%xmm2]):
[-, %xmm1] v2 = iadd v0, v1 ; bin: 66 0f fd ca
return v2
}
function %iadd_i64x2(i64x2, i64x2) -> i64x2 {
ebb0(v0: i64x2 [%xmm3], v1: i64x2 [%xmm4]):
[-, %xmm3] v2 = iadd v0, v1 ; bin: 66 0f d4 dc
return v2
}

View File

@@ -986,6 +986,10 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
// to WASM using the less specific v128 type for certain operations and more specific // to WASM using the less specific v128 type for certain operations and more specific
// types (e.g. i8x16) for others. // types (e.g. i8x16) for others.
} }
Operator::I8x16Add | Operator::I16x8Add | Operator::I32x4Add | Operator::I64x2Add => {
let (a, b) = state.pop2();
state.push1(builder.ins().iadd(a, b))
}
Operator::V128Load { .. } Operator::V128Load { .. }
| Operator::V128Store { .. } | Operator::V128Store { .. }
| Operator::I8x16Eq | Operator::I8x16Eq
@@ -1041,7 +1045,6 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
| Operator::I8x16Shl | Operator::I8x16Shl
| Operator::I8x16ShrS | Operator::I8x16ShrS
| Operator::I8x16ShrU | Operator::I8x16ShrU
| Operator::I8x16Add
| Operator::I8x16AddSaturateS | Operator::I8x16AddSaturateS
| Operator::I8x16AddSaturateU | Operator::I8x16AddSaturateU
| Operator::I8x16Sub | Operator::I8x16Sub
@@ -1054,7 +1057,6 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
| Operator::I16x8Shl | Operator::I16x8Shl
| Operator::I16x8ShrS | Operator::I16x8ShrS
| Operator::I16x8ShrU | Operator::I16x8ShrU
| Operator::I16x8Add
| Operator::I16x8AddSaturateS | Operator::I16x8AddSaturateS
| Operator::I16x8AddSaturateU | Operator::I16x8AddSaturateU
| Operator::I16x8Sub | Operator::I16x8Sub
@@ -1067,7 +1069,6 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
| Operator::I32x4Shl | Operator::I32x4Shl
| Operator::I32x4ShrS | Operator::I32x4ShrS
| Operator::I32x4ShrU | Operator::I32x4ShrU
| Operator::I32x4Add
| Operator::I32x4Sub | Operator::I32x4Sub
| Operator::I32x4Mul | Operator::I32x4Mul
| Operator::I64x2Neg | Operator::I64x2Neg
@@ -1076,7 +1077,6 @@ pub fn translate_operator<FE: FuncEnvironment + ?Sized>(
| Operator::I64x2Shl | Operator::I64x2Shl
| Operator::I64x2ShrS | Operator::I64x2ShrS
| Operator::I64x2ShrU | Operator::I64x2ShrU
| Operator::I64x2Add
| Operator::I64x2Sub | Operator::I64x2Sub
| Operator::F32x4Abs | Operator::F32x4Abs
| Operator::F32x4Neg | Operator::F32x4Neg