Add x86 SIMD encoding for icmp sgt

This commit is contained in:
Andrew Brown
2019-10-24 17:07:42 -07:00
parent 6f35273055
commit c454c3c771
4 changed files with 82 additions and 15 deletions

View File

@@ -685,6 +685,7 @@ pub(crate) fn define(
let use_sse41 = settings.predicate_by_name("use_sse41");
let use_ssse3_simd = settings.predicate_by_name("use_ssse3_simd");
let use_sse41_simd = settings.predicate_by_name("use_sse41_simd");
let use_sse42_simd = settings.predicate_by_name("use_sse42_simd");
// Definitions.
let mut e = PerCpuModeEncodings::new();
@@ -2025,21 +2026,25 @@ pub(crate) fn define(
e.enc_32_64(x86_psra, rec_fa.opcodes(*opcodes));
}
// SIMD icmp using PCMPEQ*
for ty in ValueType::all_lane_types().filter(|t| t.is_int() && allowed_simd_type(t)) {
let (opcodes, isa_predicate): (&[_], _) = match ty.lane_bits() {
8 => (&PCMPEQB, None),
16 => (&PCMPEQW, None),
32 => (&PCMPEQD, None),
64 => (&PCMPEQQ, Some(use_sse41_simd)),
_ => panic!("invalid size for SIMD icmp"),
};
// SIMD integer comparisons
{
use IntCC::*;
for (ty, cc, opcodes, isa_predicate) in &[
(I8, Equal, &PCMPEQB[..], None),
(I16, Equal, &PCMPEQW[..], None),
(I32, Equal, &PCMPEQD[..], None),
(I64, Equal, &PCMPEQQ[..], Some(use_sse41_simd)),
(I8, SignedGreaterThan, &PCMPGTB[..], None),
(I16, SignedGreaterThan, &PCMPGTW[..], None),
(I32, SignedGreaterThan, &PCMPGTD[..], None),
(I64, SignedGreaterThan, &PCMPGTQ, Some(use_sse42_simd)),
] {
let instruction = icmp
.bind(Immediate::IntCC(IntCC::Equal))
.bind(vector(ty, sse_vector_size));
.bind(Immediate::IntCC(*cc))
.bind(vector(*ty, sse_vector_size));
let template = rec_icscc_fpr.nonrex().opcodes(opcodes);
e.enc_32_64_maybe_isap(instruction, template, isa_predicate);
e.enc_32_64_maybe_isap(instruction, template, *isa_predicate);
}
}
// Reference type instructions

View File

@@ -281,6 +281,18 @@ pub static PCMPEQQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x29];
/// Compare packed data for equal (SSE2).
pub static PCMPEQW: [u8; 3] = [0x66, 0x0f, 0x75];
/// Compare packed signed byte integers for greater than (SSE2).
pub static PCMPGTB: [u8; 3] = [0x66, 0x0f, 0x64];
/// Compare packed signed doubleword integers for greater than (SSE2).
pub static PCMPGTD: [u8; 3] = [0x66, 0x0f, 0x66];
/// Compare packed signed quadword integers for greater than (SSE4.2).
pub static PCMPGTQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x37];
/// Compare packed signed word integers for greater than (SSE2).
pub static PCMPGTW: [u8; 3] = [0x66, 0x0f, 0x65];
/// Extract doubleword or quadword, depending on REX.W (SSE4.1).
pub static PEXTR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x16];

View File

@@ -0,0 +1,27 @@
test binemit
set enable_simd
target x86_64 skylake
function %icmp_sgt_i8x16(i8x16, i8x16) -> b8x16 {
ebb0(v0: i8x16 [%xmm2], v1: i8x16 [%xmm1]):
[-, %xmm2] v2 = icmp sgt v0, v1 ; bin: 66 0f 64 d1
return v2
}
function %icmp_sgt_i16x8(i16x8, i16x8) -> b16x8 {
ebb0(v0: i16x8 [%xmm4], v1: i16x8 [%xmm3]):
[-, %xmm4] v2 = icmp sgt v0, v1 ; bin: 66 0f 65 e3
return v2
}
function %icmp_sgt_i32x4(i32x4, i32x4) -> b32x4 {
ebb0(v0: i32x4 [%xmm6], v1: i32x4 [%xmm5]):
[-, %xmm6] v2 = icmp sgt v0, v1 ; bin: 66 0f 66 f5
return v2
}
function %icmp_sgt_i64x2(i64x2, i64x2) -> b64x2 {
ebb0(v0: i64x2 [%xmm0], v1: i64x2 [%xmm7]):
[-, %xmm0] v2 = icmp sgt v0, v1 ; bin: 66 0f 38 37 c7
return v2
}

View File

@@ -43,3 +43,26 @@ ebb0:
return v5
}
; run
function %icmp_sgt_i8x16() -> b1 {
ebb0:
v0 = vconst.i8x16 [0 1 2 0 0 0 0 0 0 0 0 0 0 0 0 0]
v1 = vconst.i8x16 [1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0xff]
v2 = icmp sgt v0, v1
v3 = raw_bitcast.i8x16 v2
v4 = vconst.i8x16 [0 0 0xff 0 0 0 0 0 0 0 0 0 0 0 0 0xff]
v7 = icmp eq v3, v4
v8 = vall_true v7
return v8
}
; run
function %icmp_sgt_i64x2() -> b1 {
ebb0:
v0 = vconst.i64x2 [0 -42]
v1 = vconst.i64x2 [-1 -43]
v2 = icmp sgt v0, v1
v8 = vall_true v2
return v8
}
; run