Add x86 SIMD instructions for min and max

Only the I8, I16, and I32 versions are included since Cranelift lacks support for AVX.
This commit is contained in:
Andrew Brown
2019-10-25 10:12:35 -07:00
parent f053595748
commit 0ab5760fd7
5 changed files with 194 additions and 0 deletions

View File

@@ -25,3 +25,30 @@ ebb0(v0: i64x2 [%xmm0], v1: i64x2 [%xmm7]):
[-, %xmm0] v2 = icmp sgt v0, v1 ; bin: 66 0f 38 37 c7
return v2
}
function %min_max_i8x16(i8x16, i8x16) {
ebb0(v0: i8x16 [%xmm3], v1: i8x16 [%xmm1]):
[-, %xmm3] v2 = x86_pmaxs v0, v1 ; bin: 66 0f 38 3c d9
[-, %xmm3] v3 = x86_pmaxu v0, v1 ; bin: 66 0f de d9
[-, %xmm3] v4 = x86_pmins v0, v1 ; bin: 66 0f 38 38 d9
[-, %xmm3] v5 = x86_pminu v0, v1 ; bin: 66 0f da d9
return
}
function %min_max_i16x8(i16x8, i16x8) {
ebb0(v0: i16x8 [%xmm2], v1: i16x8 [%xmm5]):
[-, %xmm2] v2 = x86_pmaxs v0, v1 ; bin: 66 0f ee d5
[-, %xmm2] v3 = x86_pmaxu v0, v1 ; bin: 66 0f 38 3e d5
[-, %xmm2] v4 = x86_pmins v0, v1 ; bin: 66 0f ea d5
[-, %xmm2] v5 = x86_pminu v0, v1 ; bin: 66 0f 38 3a d5
return
}
function %min_max_i32x4(i32x4, i32x4) {
ebb0(v0: i32x4 [%xmm2], v1: i32x4 [%xmm4]):
[-, %xmm2] v2 = x86_pmaxs v0, v1 ; bin: 66 0f 38 3d d4
[-, %xmm2] v3 = x86_pmaxu v0, v1 ; bin: 66 0f 38 3f d4
[-, %xmm2] v4 = x86_pmins v0, v1 ; bin: 66 0f 38 39 d4
[-, %xmm2] v5 = x86_pminu v0, v1 ; bin: 66 0f 38 3b d4
return
}

View File

@@ -66,3 +66,44 @@ ebb0:
return v8
}
; run
function %maxs_i8x16() -> b1 {
ebb0:
v0 = vconst.i8x16 [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] ; 1 will be greater than -1 == 0xff with
; signed max
v1 = vconst.i8x16 [0xff 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
v2 = x86_pmaxs v0, v1
v8 = vall_true v2
return v8
}
; run
function %maxu_i16x8() -> b1 {
ebb0:
v0 = vconst.i16x8 [0 1 1 1 1 1 1 1]
v1 = vconst.i16x8 [-1 1 1 1 1 1 1 1] ; -1 == 0xff will be greater with unsigned max
v2 = x86_pmaxu v0, v1
v8 = vall_true v2
return v8
}
; run
function %mins_i32x4() -> b1 {
ebb0:
v0 = vconst.i32x4 [0 1 1 1]
v1 = vconst.i32x4 [-1 1 1 1] ; -1 == 0xff will be less with signed min
v2 = x86_pmins v0, v1
v8 = vall_true v2
return v8
}
; run
function %minu_i8x16() -> b1 {
ebb0:
v0 = vconst.i8x16 [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] ; 1 < 2 with unsiged min
v1 = vconst.i8x16 [2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2]
v2 = x86_pminu v0, v1
v8 = vall_true v2
return v8
}
; run