Add x86 SIMD instructions for min and max
Only the I8, I16, and I32 versions are included since Cranelift lacks support for AVX.
This commit is contained in:
@@ -517,6 +517,10 @@ pub(crate) fn define(
|
|||||||
let x86_pop = x86.by_name("x86_pop");
|
let x86_pop = x86.by_name("x86_pop");
|
||||||
let x86_pextr = x86.by_name("x86_pextr");
|
let x86_pextr = x86.by_name("x86_pextr");
|
||||||
let x86_pinsr = x86.by_name("x86_pinsr");
|
let x86_pinsr = x86.by_name("x86_pinsr");
|
||||||
|
let x86_pmaxs = x86.by_name("x86_pmaxs");
|
||||||
|
let x86_pmaxu = x86.by_name("x86_pmaxu");
|
||||||
|
let x86_pmins = x86.by_name("x86_pmins");
|
||||||
|
let x86_pminu = x86.by_name("x86_pminu");
|
||||||
let x86_pshufd = x86.by_name("x86_pshufd");
|
let x86_pshufd = x86.by_name("x86_pshufd");
|
||||||
let x86_pshufb = x86.by_name("x86_pshufb");
|
let x86_pshufb = x86.by_name("x86_pshufb");
|
||||||
let x86_psll = x86.by_name("x86_psll");
|
let x86_psll = x86.by_name("x86_psll");
|
||||||
@@ -2047,6 +2051,25 @@ pub(crate) fn define(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SIMD min/max
|
||||||
|
for (ty, inst, opcodes, isa_predicate) in &[
|
||||||
|
(I8, x86_pmaxs, &PMAXSB[..], Some(use_sse41_simd)),
|
||||||
|
(I16, x86_pmaxs, &PMAXSW[..], None),
|
||||||
|
(I32, x86_pmaxs, &PMAXSD[..], Some(use_sse41_simd)),
|
||||||
|
(I8, x86_pmaxu, &PMAXUB[..], None),
|
||||||
|
(I16, x86_pmaxu, &PMAXUW[..], Some(use_sse41_simd)),
|
||||||
|
(I32, x86_pmaxu, &PMAXUD[..], Some(use_sse41_simd)),
|
||||||
|
(I8, x86_pmins, &PMINSB[..], Some(use_sse41_simd)),
|
||||||
|
(I16, x86_pmins, &PMINSW[..], None),
|
||||||
|
(I32, x86_pmins, &PMINSD[..], Some(use_sse41_simd)),
|
||||||
|
(I8, x86_pminu, &PMINUB[..], None),
|
||||||
|
(I16, x86_pminu, &PMINUW[..], Some(use_sse41_simd)),
|
||||||
|
(I32, x86_pminu, &PMINUD[..], Some(use_sse41_simd)),
|
||||||
|
] {
|
||||||
|
let inst_ = inst.bind(vector(*ty, sse_vector_size));
|
||||||
|
e.enc_32_64_maybe_isap(inst_, rec_fa.opcodes(opcodes), *isa_predicate);
|
||||||
|
}
|
||||||
|
|
||||||
// Reference type instructions
|
// Reference type instructions
|
||||||
|
|
||||||
// Null references implemented as iconst 0.
|
// Null references implemented as iconst 0.
|
||||||
|
|||||||
@@ -487,5 +487,60 @@ pub(crate) fn define(
|
|||||||
.operands_out(vec![f]),
|
.operands_out(vec![f]),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
let x = &Operand::new("x", IxN);
|
||||||
|
let y = &Operand::new("y", IxN);
|
||||||
|
let a = &Operand::new("a", IxN);
|
||||||
|
ig.push(
|
||||||
|
Inst::new(
|
||||||
|
"x86_pmaxs",
|
||||||
|
r#"
|
||||||
|
Maximum of Packed Signed Integers -- Compare signed integers in the first and second
|
||||||
|
operand and return the maximum values.
|
||||||
|
"#,
|
||||||
|
&formats.binary,
|
||||||
|
)
|
||||||
|
.operands_in(vec![x, y])
|
||||||
|
.operands_out(vec![a]),
|
||||||
|
);
|
||||||
|
|
||||||
|
ig.push(
|
||||||
|
Inst::new(
|
||||||
|
"x86_pmaxu",
|
||||||
|
r#"
|
||||||
|
Maximum of Packed Unsigned Integers -- Compare unsigned integers in the first and second
|
||||||
|
operand and return the maximum values.
|
||||||
|
"#,
|
||||||
|
&formats.binary,
|
||||||
|
)
|
||||||
|
.operands_in(vec![x, y])
|
||||||
|
.operands_out(vec![a]),
|
||||||
|
);
|
||||||
|
|
||||||
|
ig.push(
|
||||||
|
Inst::new(
|
||||||
|
"x86_pmins",
|
||||||
|
r#"
|
||||||
|
Minimum of Packed Signed Integers -- Compare signed integers in the first and second
|
||||||
|
operand and return the minimum values.
|
||||||
|
"#,
|
||||||
|
&formats.binary,
|
||||||
|
)
|
||||||
|
.operands_in(vec![x, y])
|
||||||
|
.operands_out(vec![a]),
|
||||||
|
);
|
||||||
|
|
||||||
|
ig.push(
|
||||||
|
Inst::new(
|
||||||
|
"x86_pminu",
|
||||||
|
r#"
|
||||||
|
Minimum of Packed Unsigned Integers -- Compare unsigned integers in the first and second
|
||||||
|
operand and return the minimum values.
|
||||||
|
"#,
|
||||||
|
&formats.binary,
|
||||||
|
)
|
||||||
|
.operands_in(vec![x, y])
|
||||||
|
.operands_out(vec![a]),
|
||||||
|
);
|
||||||
|
|
||||||
ig.build()
|
ig.build()
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -311,6 +311,54 @@ pub static PINSRB: [u8; 4] = [0x66, 0x0f, 0x3a, 0x20];
|
|||||||
/// Insert word (SSE2).
|
/// Insert word (SSE2).
|
||||||
pub static PINSRW: [u8; 3] = [0x66, 0x0f, 0xc4];
|
pub static PINSRW: [u8; 3] = [0x66, 0x0f, 0xc4];
|
||||||
|
|
||||||
|
/// Compare packed signed byte integers in xmm1 and xmm2/m128 and store packed maximum values in
|
||||||
|
/// xmm1 (SSE4.1).
|
||||||
|
pub static PMAXSB: [u8; 4] = [0x66, 0x0f, 0x38, 0x3c];
|
||||||
|
|
||||||
|
/// Compare packed signed doubleword integers in xmm1 and xmm2/m128 and store packed maximum
|
||||||
|
/// values in xmm1 (SSE4.1).
|
||||||
|
pub static PMAXSD: [u8; 4] = [0x66, 0x0f, 0x38, 0x3d];
|
||||||
|
|
||||||
|
/// Compare packed signed word integers in xmm1 and xmm2/m128 and store packed maximum values in
|
||||||
|
/// xmm1 (SSE2).
|
||||||
|
pub static PMAXSW: [u8; 3] = [0x66, 0x0f, 0xee];
|
||||||
|
|
||||||
|
/// Compare packed unsigned byte integers in xmm1 and xmm2/m128 and store packed maximum values in
|
||||||
|
/// xmm1 (SSE2).
|
||||||
|
pub static PMAXUB: [u8; 3] = [0x66, 0x0f, 0xde];
|
||||||
|
|
||||||
|
/// Compare packed unsigned doubleword integers in xmm1 and xmm2/m128 and store packed maximum
|
||||||
|
/// values in xmm1 (SSE4.1).
|
||||||
|
pub static PMAXUD: [u8; 4] = [0x66, 0x0f, 0x38, 0x3f];
|
||||||
|
|
||||||
|
/// Compare packed unsigned word integers in xmm1 and xmm2/m128 and store packed maximum values in
|
||||||
|
/// xmm1 (SSE4.1).
|
||||||
|
pub static PMAXUW: [u8; 4] = [0x66, 0x0f, 0x38, 0x3e];
|
||||||
|
|
||||||
|
/// Compare packed signed byte integers in xmm1 and xmm2/m128 and store packed minimum values in
|
||||||
|
/// xmm1 (SSE4.1).
|
||||||
|
pub static PMINSB: [u8; 4] = [0x66, 0x0f, 0x38, 0x38];
|
||||||
|
|
||||||
|
/// Compare packed signed doubleword integers in xmm1 and xmm2/m128 and store packed minimum
|
||||||
|
/// values in xmm1 (SSE4.1).
|
||||||
|
pub static PMINSD: [u8; 4] = [0x66, 0x0f, 0x38, 0x39];
|
||||||
|
|
||||||
|
/// Compare packed signed word integers in xmm1 and xmm2/m128 and store packed minimum values in
|
||||||
|
/// xmm1 (SSE2).
|
||||||
|
pub static PMINSW: [u8; 3] = [0x66, 0x0f, 0xea];
|
||||||
|
|
||||||
|
/// Compare packed unsigned byte integers in xmm1 and xmm2/m128 and store packed minimum values in
|
||||||
|
/// xmm1 (SSE2).
|
||||||
|
pub static PMINUB: [u8; 3] = [0x66, 0x0f, 0xda];
|
||||||
|
|
||||||
|
/// Compare packed unsigned doubleword integers in xmm1 and xmm2/m128 and store packed minimum
|
||||||
|
/// values in xmm1 (SSE4.1).
|
||||||
|
pub static PMINUD: [u8; 4] = [0x66, 0x0f, 0x38, 0x3b];
|
||||||
|
|
||||||
|
/// Compare packed unsigned word integers in xmm1 and xmm2/m128 and store packed minimum values in
|
||||||
|
/// xmm1 (SSE4.1).
|
||||||
|
pub static PMINUW: [u8; 4] = [0x66, 0x0f, 0x38, 0x3a];
|
||||||
|
|
||||||
/// Multiply the packed signed word integers in xmm1 and xmm2/m128, and store the low 16 bits of
|
/// Multiply the packed signed word integers in xmm1 and xmm2/m128, and store the low 16 bits of
|
||||||
/// the results in xmm1 (SSE2).
|
/// the results in xmm1 (SSE2).
|
||||||
pub static PMULLW: [u8; 3] = [0x66, 0x0f, 0xd5];
|
pub static PMULLW: [u8; 3] = [0x66, 0x0f, 0xd5];
|
||||||
|
|||||||
@@ -25,3 +25,30 @@ ebb0(v0: i64x2 [%xmm0], v1: i64x2 [%xmm7]):
|
|||||||
[-, %xmm0] v2 = icmp sgt v0, v1 ; bin: 66 0f 38 37 c7
|
[-, %xmm0] v2 = icmp sgt v0, v1 ; bin: 66 0f 38 37 c7
|
||||||
return v2
|
return v2
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function %min_max_i8x16(i8x16, i8x16) {
|
||||||
|
ebb0(v0: i8x16 [%xmm3], v1: i8x16 [%xmm1]):
|
||||||
|
[-, %xmm3] v2 = x86_pmaxs v0, v1 ; bin: 66 0f 38 3c d9
|
||||||
|
[-, %xmm3] v3 = x86_pmaxu v0, v1 ; bin: 66 0f de d9
|
||||||
|
[-, %xmm3] v4 = x86_pmins v0, v1 ; bin: 66 0f 38 38 d9
|
||||||
|
[-, %xmm3] v5 = x86_pminu v0, v1 ; bin: 66 0f da d9
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
function %min_max_i16x8(i16x8, i16x8) {
|
||||||
|
ebb0(v0: i16x8 [%xmm2], v1: i16x8 [%xmm5]):
|
||||||
|
[-, %xmm2] v2 = x86_pmaxs v0, v1 ; bin: 66 0f ee d5
|
||||||
|
[-, %xmm2] v3 = x86_pmaxu v0, v1 ; bin: 66 0f 38 3e d5
|
||||||
|
[-, %xmm2] v4 = x86_pmins v0, v1 ; bin: 66 0f ea d5
|
||||||
|
[-, %xmm2] v5 = x86_pminu v0, v1 ; bin: 66 0f 38 3a d5
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
function %min_max_i32x4(i32x4, i32x4) {
|
||||||
|
ebb0(v0: i32x4 [%xmm2], v1: i32x4 [%xmm4]):
|
||||||
|
[-, %xmm2] v2 = x86_pmaxs v0, v1 ; bin: 66 0f 38 3d d4
|
||||||
|
[-, %xmm2] v3 = x86_pmaxu v0, v1 ; bin: 66 0f 38 3f d4
|
||||||
|
[-, %xmm2] v4 = x86_pmins v0, v1 ; bin: 66 0f 38 39 d4
|
||||||
|
[-, %xmm2] v5 = x86_pminu v0, v1 ; bin: 66 0f 38 3b d4
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|||||||
@@ -66,3 +66,44 @@ ebb0:
|
|||||||
return v8
|
return v8
|
||||||
}
|
}
|
||||||
; run
|
; run
|
||||||
|
|
||||||
|
function %maxs_i8x16() -> b1 {
|
||||||
|
ebb0:
|
||||||
|
v0 = vconst.i8x16 [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] ; 1 will be greater than -1 == 0xff with
|
||||||
|
; signed max
|
||||||
|
v1 = vconst.i8x16 [0xff 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
|
||||||
|
v2 = x86_pmaxs v0, v1
|
||||||
|
v8 = vall_true v2
|
||||||
|
return v8
|
||||||
|
}
|
||||||
|
; run
|
||||||
|
|
||||||
|
function %maxu_i16x8() -> b1 {
|
||||||
|
ebb0:
|
||||||
|
v0 = vconst.i16x8 [0 1 1 1 1 1 1 1]
|
||||||
|
v1 = vconst.i16x8 [-1 1 1 1 1 1 1 1] ; -1 == 0xff will be greater with unsigned max
|
||||||
|
v2 = x86_pmaxu v0, v1
|
||||||
|
v8 = vall_true v2
|
||||||
|
return v8
|
||||||
|
}
|
||||||
|
; run
|
||||||
|
|
||||||
|
function %mins_i32x4() -> b1 {
|
||||||
|
ebb0:
|
||||||
|
v0 = vconst.i32x4 [0 1 1 1]
|
||||||
|
v1 = vconst.i32x4 [-1 1 1 1] ; -1 == 0xff will be less with signed min
|
||||||
|
v2 = x86_pmins v0, v1
|
||||||
|
v8 = vall_true v2
|
||||||
|
return v8
|
||||||
|
}
|
||||||
|
; run
|
||||||
|
|
||||||
|
function %minu_i8x16() -> b1 {
|
||||||
|
ebb0:
|
||||||
|
v0 = vconst.i8x16 [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1] ; 1 < 2 with unsiged min
|
||||||
|
v1 = vconst.i8x16 [2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2]
|
||||||
|
v2 = x86_pminu v0, v1
|
||||||
|
v8 = vall_true v2
|
||||||
|
return v8
|
||||||
|
}
|
||||||
|
; run
|
||||||
|
|||||||
Reference in New Issue
Block a user