Implement iabs for x86 SIMD
This only covers the types necessary for implementing the Wasm SIMD spec--`i8x16`, `i16x8`, `i32x4`.
This commit is contained in:
@@ -1638,6 +1638,7 @@ fn define_simd(
|
|||||||
let fill_nop = shared.by_name("fill_nop");
|
let fill_nop = shared.by_name("fill_nop");
|
||||||
let fmul = shared.by_name("fmul");
|
let fmul = shared.by_name("fmul");
|
||||||
let fsub = shared.by_name("fsub");
|
let fsub = shared.by_name("fsub");
|
||||||
|
let iabs = shared.by_name("iabs");
|
||||||
let iadd = shared.by_name("iadd");
|
let iadd = shared.by_name("iadd");
|
||||||
let icmp = shared.by_name("icmp");
|
let icmp = shared.by_name("icmp");
|
||||||
let imul = shared.by_name("imul");
|
let imul = shared.by_name("imul");
|
||||||
@@ -2184,6 +2185,12 @@ fn define_simd(
|
|||||||
e.enc_both_inferred(avgr, rec_fa.opcodes(opcodes));
|
e.enc_both_inferred(avgr, rec_fa.opcodes(opcodes));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SIMD integer absolute value.
|
||||||
|
for (ty, opcodes) in &[(I8, &PABSB[..]), (I16, &PABSW[..]), (I32, &PABSD)] {
|
||||||
|
let iabs = iabs.bind(vector(*ty, sse_vector_size));
|
||||||
|
e.enc_both_inferred_maybe_isap(iabs, rec_furm.opcodes(opcodes), Some(use_ssse3_simd));
|
||||||
|
}
|
||||||
|
|
||||||
// SIMD logical operations
|
// SIMD logical operations
|
||||||
let band = shared.by_name("band");
|
let band = shared.by_name("band");
|
||||||
let band_not = shared.by_name("band_not");
|
let band_not = shared.by_name("band_not");
|
||||||
|
|||||||
@@ -303,6 +303,17 @@ pub static OR_IMM8_SIGN_EXTEND: [u8; 1] = [0x83];
|
|||||||
/// Return the bitwise logical OR of packed single-precision values in xmm and x/m (SSE).
|
/// Return the bitwise logical OR of packed single-precision values in xmm and x/m (SSE).
|
||||||
pub static ORPS: [u8; 2] = [0x0f, 0x56];
|
pub static ORPS: [u8; 2] = [0x0f, 0x56];
|
||||||
|
|
||||||
|
/// Compute the absolute value of bytes in xmm2/m128 and store the unsigned result in xmm1 (SSSE3).
|
||||||
|
pub static PABSB: [u8; 4] = [0x66, 0x0f, 0x38, 0x1c];
|
||||||
|
|
||||||
|
/// Compute the absolute value of 32-bit integers in xmm2/m128 and store the unsigned result in
|
||||||
|
/// xmm1 (SSSE3).
|
||||||
|
pub static PABSD: [u8; 4] = [0x66, 0x0f, 0x38, 0x1e];
|
||||||
|
|
||||||
|
/// Compute the absolute value of 16-bit integers in xmm2/m128 and store the unsigned result in
|
||||||
|
/// xmm1 (SSSE3).
|
||||||
|
pub static PABSW: [u8; 4] = [0x66, 0x0f, 0x38, 0x1d];
|
||||||
|
|
||||||
/// Converts 8 packed signed word integers from xmm1 and from xxm2/m128 into 16 packed signed byte
|
/// Converts 8 packed signed word integers from xmm1 and from xxm2/m128 into 16 packed signed byte
|
||||||
/// integers in xmm1 using signed saturation (SSE2).
|
/// integers in xmm1 using signed saturation (SSE2).
|
||||||
pub static PACKSSWB: [u8; 3] = [0x66, 0x0f, 0x63];
|
pub static PACKSSWB: [u8; 3] = [0x66, 0x0f, 0x63];
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ block0(v0: i8x16 [%xmm6], v1: i8x16 [%xmm2]):
|
|||||||
[-, %xmm6] v5 = ssub_sat v0, v1 ; bin: 66 0f e8 f2
|
[-, %xmm6] v5 = ssub_sat v0, v1 ; bin: 66 0f e8 f2
|
||||||
[-, %xmm6] v6 = usub_sat v0, v1 ; bin: 66 0f d8 f2
|
[-, %xmm6] v6 = usub_sat v0, v1 ; bin: 66 0f d8 f2
|
||||||
[-, %xmm6] v7 = avg_round v0, v1 ; bin: 66 0f e0 f2
|
[-, %xmm6] v7 = avg_round v0, v1 ; bin: 66 0f e0 f2
|
||||||
|
[-, %xmm6] v9 = iabs v1 ; bin: 66 0f 38 1c f2
|
||||||
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@@ -23,6 +24,7 @@ block0(v0: i16x8 [%xmm3], v1: i16x8 [%xmm5]):
|
|||||||
[-, %xmm3] v6 = ssub_sat v0, v1 ; bin: 66 0f e9 dd
|
[-, %xmm3] v6 = ssub_sat v0, v1 ; bin: 66 0f e9 dd
|
||||||
[-, %xmm3] v7 = usub_sat v0, v1 ; bin: 66 0f d9 dd
|
[-, %xmm3] v7 = usub_sat v0, v1 ; bin: 66 0f d9 dd
|
||||||
[-, %xmm3] v8 = avg_round v0, v1 ; bin: 66 0f e3 dd
|
[-, %xmm3] v8 = avg_round v0, v1 ; bin: 66 0f e3 dd
|
||||||
|
[-, %xmm3] v9 = iabs v1 ; bin: 66 0f 38 1d dd
|
||||||
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@@ -32,6 +34,7 @@ block0(v0: i32x4 [%xmm0], v1: i32x4 [%xmm1]):
|
|||||||
[-, %xmm0] v2 = iadd v0, v1 ; bin: 66 0f fe c1
|
[-, %xmm0] v2 = iadd v0, v1 ; bin: 66 0f fe c1
|
||||||
[-, %xmm0] v3 = isub v0, v1 ; bin: 66 0f fa c1
|
[-, %xmm0] v3 = isub v0, v1 ; bin: 66 0f fa c1
|
||||||
[-, %xmm0] v4 = imul v0, v1 ; bin: 66 0f 38 40 c1
|
[-, %xmm0] v4 = imul v0, v1 ; bin: 66 0f 38 40 c1
|
||||||
|
[-, %xmm0] v5 = iabs v1 ; bin: 66 0f 38 1e c1
|
||||||
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -270,3 +270,10 @@ block0:
|
|||||||
return v5
|
return v5
|
||||||
}
|
}
|
||||||
; run
|
; run
|
||||||
|
|
||||||
|
function %iabs(i32x4) -> i32x4 {
|
||||||
|
block0(v0: i32x4):
|
||||||
|
v1 = iabs v0
|
||||||
|
return v1
|
||||||
|
}
|
||||||
|
; run: %iabs([-42 -1 0 1]) == [42 1 0 1]
|
||||||
|
|||||||
Reference in New Issue
Block a user