Add x86 SIMD encoding for icmp sgt

This commit is contained in:
Andrew Brown
2019-10-24 17:07:42 -07:00
parent 6f35273055
commit c454c3c771
4 changed files with 82 additions and 15 deletions

View File

@@ -685,6 +685,7 @@ pub(crate) fn define(
let use_sse41 = settings.predicate_by_name("use_sse41"); let use_sse41 = settings.predicate_by_name("use_sse41");
let use_ssse3_simd = settings.predicate_by_name("use_ssse3_simd"); let use_ssse3_simd = settings.predicate_by_name("use_ssse3_simd");
let use_sse41_simd = settings.predicate_by_name("use_sse41_simd"); let use_sse41_simd = settings.predicate_by_name("use_sse41_simd");
let use_sse42_simd = settings.predicate_by_name("use_sse42_simd");
// Definitions. // Definitions.
let mut e = PerCpuModeEncodings::new(); let mut e = PerCpuModeEncodings::new();
@@ -2025,21 +2026,25 @@ pub(crate) fn define(
e.enc_32_64(x86_psra, rec_fa.opcodes(*opcodes)); e.enc_32_64(x86_psra, rec_fa.opcodes(*opcodes));
} }
// SIMD icmp using PCMPEQ* // SIMD integer comparisons
for ty in ValueType::all_lane_types().filter(|t| t.is_int() && allowed_simd_type(t)) { {
let (opcodes, isa_predicate): (&[_], _) = match ty.lane_bits() { use IntCC::*;
8 => (&PCMPEQB, None), for (ty, cc, opcodes, isa_predicate) in &[
16 => (&PCMPEQW, None), (I8, Equal, &PCMPEQB[..], None),
32 => (&PCMPEQD, None), (I16, Equal, &PCMPEQW[..], None),
64 => (&PCMPEQQ, Some(use_sse41_simd)), (I32, Equal, &PCMPEQD[..], None),
_ => panic!("invalid size for SIMD icmp"), (I64, Equal, &PCMPEQQ[..], Some(use_sse41_simd)),
}; (I8, SignedGreaterThan, &PCMPGTB[..], None),
(I16, SignedGreaterThan, &PCMPGTW[..], None),
(I32, SignedGreaterThan, &PCMPGTD[..], None),
(I64, SignedGreaterThan, &PCMPGTQ, Some(use_sse42_simd)),
] {
let instruction = icmp let instruction = icmp
.bind(Immediate::IntCC(IntCC::Equal)) .bind(Immediate::IntCC(*cc))
.bind(vector(ty, sse_vector_size)); .bind(vector(*ty, sse_vector_size));
let template = rec_icscc_fpr.nonrex().opcodes(opcodes); let template = rec_icscc_fpr.nonrex().opcodes(opcodes);
e.enc_32_64_maybe_isap(instruction, template, isa_predicate); e.enc_32_64_maybe_isap(instruction, template, *isa_predicate);
}
} }
// Reference type instructions // Reference type instructions

View File

@@ -281,6 +281,18 @@ pub static PCMPEQQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x29];
/// Compare packed data for equal (SSE2). /// Compare packed data for equal (SSE2).
pub static PCMPEQW: [u8; 3] = [0x66, 0x0f, 0x75]; pub static PCMPEQW: [u8; 3] = [0x66, 0x0f, 0x75];
/// Compare packed signed byte integers for greater than (SSE2).
pub static PCMPGTB: [u8; 3] = [0x66, 0x0f, 0x64];
/// Compare packed signed doubleword integers for greater than (SSE2).
pub static PCMPGTD: [u8; 3] = [0x66, 0x0f, 0x66];
/// Compare packed signed quadword integers for greater than (SSE4.2).
pub static PCMPGTQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x37];
/// Compare packed signed word integers for greater than (SSE2).
pub static PCMPGTW: [u8; 3] = [0x66, 0x0f, 0x65];
/// Extract doubleword or quadword, depending on REX.W (SSE4.1). /// Extract doubleword or quadword, depending on REX.W (SSE4.1).
pub static PEXTR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x16]; pub static PEXTR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x16];

View File

@@ -0,0 +1,27 @@
test binemit
set enable_simd
target x86_64 skylake
function %icmp_sgt_i8x16(i8x16, i8x16) -> b8x16 {
ebb0(v0: i8x16 [%xmm2], v1: i8x16 [%xmm1]):
[-, %xmm2] v2 = icmp sgt v0, v1 ; bin: 66 0f 64 d1
return v2
}
function %icmp_sgt_i16x8(i16x8, i16x8) -> b16x8 {
ebb0(v0: i16x8 [%xmm4], v1: i16x8 [%xmm3]):
[-, %xmm4] v2 = icmp sgt v0, v1 ; bin: 66 0f 65 e3
return v2
}
function %icmp_sgt_i32x4(i32x4, i32x4) -> b32x4 {
ebb0(v0: i32x4 [%xmm6], v1: i32x4 [%xmm5]):
[-, %xmm6] v2 = icmp sgt v0, v1 ; bin: 66 0f 66 f5
return v2
}
function %icmp_sgt_i64x2(i64x2, i64x2) -> b64x2 {
ebb0(v0: i64x2 [%xmm0], v1: i64x2 [%xmm7]):
[-, %xmm0] v2 = icmp sgt v0, v1 ; bin: 66 0f 38 37 c7
return v2
}

View File

@@ -43,3 +43,26 @@ ebb0:
return v5 return v5
} }
; run ; run
function %icmp_sgt_i8x16() -> b1 {
ebb0:
v0 = vconst.i8x16 [0 1 2 0 0 0 0 0 0 0 0 0 0 0 0 0]
v1 = vconst.i8x16 [1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0xff]
v2 = icmp sgt v0, v1
v3 = raw_bitcast.i8x16 v2
v4 = vconst.i8x16 [0 0 0xff 0 0 0 0 0 0 0 0 0 0 0 0 0xff]
v7 = icmp eq v3, v4
v8 = vall_true v7
return v8
}
; run
function %icmp_sgt_i64x2() -> b1 {
ebb0:
v0 = vconst.i64x2 [0 -42]
v1 = vconst.i64x2 [-1 -43]
v2 = icmp sgt v0, v1
v8 = vall_true v2
return v8
}
; run