Add x86 SIMD instructions for min and max

Only the I8, I16, and I32 versions are included since Cranelift lacks support for AVX.
This commit is contained in:
Andrew Brown
2019-10-25 10:12:35 -07:00
parent f053595748
commit 0ab5760fd7
5 changed files with 194 additions and 0 deletions

View File

@@ -517,6 +517,10 @@ pub(crate) fn define(
let x86_pop = x86.by_name("x86_pop");
let x86_pextr = x86.by_name("x86_pextr");
let x86_pinsr = x86.by_name("x86_pinsr");
let x86_pmaxs = x86.by_name("x86_pmaxs");
let x86_pmaxu = x86.by_name("x86_pmaxu");
let x86_pmins = x86.by_name("x86_pmins");
let x86_pminu = x86.by_name("x86_pminu");
let x86_pshufd = x86.by_name("x86_pshufd");
let x86_pshufb = x86.by_name("x86_pshufb");
let x86_psll = x86.by_name("x86_psll");
@@ -2047,6 +2051,25 @@ pub(crate) fn define(
}
}
// SIMD min/max
for (ty, inst, opcodes, isa_predicate) in &[
(I8, x86_pmaxs, &PMAXSB[..], Some(use_sse41_simd)),
(I16, x86_pmaxs, &PMAXSW[..], None),
(I32, x86_pmaxs, &PMAXSD[..], Some(use_sse41_simd)),
(I8, x86_pmaxu, &PMAXUB[..], None),
(I16, x86_pmaxu, &PMAXUW[..], Some(use_sse41_simd)),
(I32, x86_pmaxu, &PMAXUD[..], Some(use_sse41_simd)),
(I8, x86_pmins, &PMINSB[..], Some(use_sse41_simd)),
(I16, x86_pmins, &PMINSW[..], None),
(I32, x86_pmins, &PMINSD[..], Some(use_sse41_simd)),
(I8, x86_pminu, &PMINUB[..], None),
(I16, x86_pminu, &PMINUW[..], Some(use_sse41_simd)),
(I32, x86_pminu, &PMINUD[..], Some(use_sse41_simd)),
] {
let inst_ = inst.bind(vector(*ty, sse_vector_size));
e.enc_32_64_maybe_isap(inst_, rec_fa.opcodes(opcodes), *isa_predicate);
}
// Reference type instructions
// Null references implemented as iconst 0.

View File

@@ -487,5 +487,60 @@ pub(crate) fn define(
.operands_out(vec![f]),
);
let x = &Operand::new("x", IxN);
let y = &Operand::new("y", IxN);
let a = &Operand::new("a", IxN);
ig.push(
Inst::new(
"x86_pmaxs",
r#"
Maximum of Packed Signed Integers -- Compare signed integers in the first and second
operand and return the maximum values.
"#,
&formats.binary,
)
.operands_in(vec![x, y])
.operands_out(vec![a]),
);
ig.push(
Inst::new(
"x86_pmaxu",
r#"
Maximum of Packed Unsigned Integers -- Compare unsigned integers in the first and second
operand and return the maximum values.
"#,
&formats.binary,
)
.operands_in(vec![x, y])
.operands_out(vec![a]),
);
ig.push(
Inst::new(
"x86_pmins",
r#"
Minimum of Packed Signed Integers -- Compare signed integers in the first and second
operand and return the minimum values.
"#,
&formats.binary,
)
.operands_in(vec![x, y])
.operands_out(vec![a]),
);
ig.push(
Inst::new(
"x86_pminu",
r#"
Minimum of Packed Unsigned Integers -- Compare unsigned integers in the first and second
operand and return the minimum values.
"#,
&formats.binary,
)
.operands_in(vec![x, y])
.operands_out(vec![a]),
);
ig.build()
}

View File

@@ -311,6 +311,54 @@ pub static PINSRB: [u8; 4] = [0x66, 0x0f, 0x3a, 0x20];
/// Insert word (SSE2).
pub static PINSRW: [u8; 3] = [0x66, 0x0f, 0xc4];
/// Compare packed signed byte integers in xmm1 and xmm2/m128 and store packed maximum values in
/// xmm1 (SSE4.1).
pub static PMAXSB: [u8; 4] = [0x66, 0x0f, 0x38, 0x3c];
/// Compare packed signed doubleword integers in xmm1 and xmm2/m128 and store packed maximum
/// values in xmm1 (SSE4.1).
pub static PMAXSD: [u8; 4] = [0x66, 0x0f, 0x38, 0x3d];
/// Compare packed signed word integers in xmm1 and xmm2/m128 and store packed maximum values in
/// xmm1 (SSE2).
pub static PMAXSW: [u8; 3] = [0x66, 0x0f, 0xee];
/// Compare packed unsigned byte integers in xmm1 and xmm2/m128 and store packed maximum values in
/// xmm1 (SSE2).
pub static PMAXUB: [u8; 3] = [0x66, 0x0f, 0xde];
/// Compare packed unsigned doubleword integers in xmm1 and xmm2/m128 and store packed maximum
/// values in xmm1 (SSE4.1).
pub static PMAXUD: [u8; 4] = [0x66, 0x0f, 0x38, 0x3f];
/// Compare packed unsigned word integers in xmm1 and xmm2/m128 and store packed maximum values in
/// xmm1 (SSE4.1).
pub static PMAXUW: [u8; 4] = [0x66, 0x0f, 0x38, 0x3e];
/// Compare packed signed byte integers in xmm1 and xmm2/m128 and store packed minimum values in
/// xmm1 (SSE4.1).
pub static PMINSB: [u8; 4] = [0x66, 0x0f, 0x38, 0x38];
/// Compare packed signed doubleword integers in xmm1 and xmm2/m128 and store packed minimum
/// values in xmm1 (SSE4.1).
pub static PMINSD: [u8; 4] = [0x66, 0x0f, 0x38, 0x39];
/// Compare packed signed word integers in xmm1 and xmm2/m128 and store packed minimum values in
/// xmm1 (SSE2).
pub static PMINSW: [u8; 3] = [0x66, 0x0f, 0xea];
/// Compare packed unsigned byte integers in xmm1 and xmm2/m128 and store packed minimum values in
/// xmm1 (SSE2).
pub static PMINUB: [u8; 3] = [0x66, 0x0f, 0xda];
/// Compare packed unsigned doubleword integers in xmm1 and xmm2/m128 and store packed minimum
/// values in xmm1 (SSE4.1).
pub static PMINUD: [u8; 4] = [0x66, 0x0f, 0x38, 0x3b];
/// Compare packed unsigned word integers in xmm1 and xmm2/m128 and store packed minimum values in
/// xmm1 (SSE4.1).
pub static PMINUW: [u8; 4] = [0x66, 0x0f, 0x38, 0x3a];
/// Multiply the packed signed word integers in xmm1 and xmm2/m128, and store the low 16 bits of
/// the results in xmm1 (SSE2).
pub static PMULLW: [u8; 3] = [0x66, 0x0f, 0xd5];