Add x86 SIMD encoding for icmp sgt
This commit is contained in:
@@ -685,6 +685,7 @@ pub(crate) fn define(
|
|||||||
let use_sse41 = settings.predicate_by_name("use_sse41");
|
let use_sse41 = settings.predicate_by_name("use_sse41");
|
||||||
let use_ssse3_simd = settings.predicate_by_name("use_ssse3_simd");
|
let use_ssse3_simd = settings.predicate_by_name("use_ssse3_simd");
|
||||||
let use_sse41_simd = settings.predicate_by_name("use_sse41_simd");
|
let use_sse41_simd = settings.predicate_by_name("use_sse41_simd");
|
||||||
|
let use_sse42_simd = settings.predicate_by_name("use_sse42_simd");
|
||||||
|
|
||||||
// Definitions.
|
// Definitions.
|
||||||
let mut e = PerCpuModeEncodings::new();
|
let mut e = PerCpuModeEncodings::new();
|
||||||
@@ -2025,21 +2026,25 @@ pub(crate) fn define(
|
|||||||
e.enc_32_64(x86_psra, rec_fa.opcodes(*opcodes));
|
e.enc_32_64(x86_psra, rec_fa.opcodes(*opcodes));
|
||||||
}
|
}
|
||||||
|
|
||||||
// SIMD icmp using PCMPEQ*
|
// SIMD integer comparisons
|
||||||
for ty in ValueType::all_lane_types().filter(|t| t.is_int() && allowed_simd_type(t)) {
|
{
|
||||||
let (opcodes, isa_predicate): (&[_], _) = match ty.lane_bits() {
|
use IntCC::*;
|
||||||
8 => (&PCMPEQB, None),
|
for (ty, cc, opcodes, isa_predicate) in &[
|
||||||
16 => (&PCMPEQW, None),
|
(I8, Equal, &PCMPEQB[..], None),
|
||||||
32 => (&PCMPEQD, None),
|
(I16, Equal, &PCMPEQW[..], None),
|
||||||
64 => (&PCMPEQQ, Some(use_sse41_simd)),
|
(I32, Equal, &PCMPEQD[..], None),
|
||||||
_ => panic!("invalid size for SIMD icmp"),
|
(I64, Equal, &PCMPEQQ[..], Some(use_sse41_simd)),
|
||||||
};
|
(I8, SignedGreaterThan, &PCMPGTB[..], None),
|
||||||
|
(I16, SignedGreaterThan, &PCMPGTW[..], None),
|
||||||
|
(I32, SignedGreaterThan, &PCMPGTD[..], None),
|
||||||
|
(I64, SignedGreaterThan, &PCMPGTQ, Some(use_sse42_simd)),
|
||||||
|
] {
|
||||||
let instruction = icmp
|
let instruction = icmp
|
||||||
.bind(Immediate::IntCC(IntCC::Equal))
|
.bind(Immediate::IntCC(*cc))
|
||||||
.bind(vector(ty, sse_vector_size));
|
.bind(vector(*ty, sse_vector_size));
|
||||||
let template = rec_icscc_fpr.nonrex().opcodes(opcodes);
|
let template = rec_icscc_fpr.nonrex().opcodes(opcodes);
|
||||||
e.enc_32_64_maybe_isap(instruction, template, isa_predicate);
|
e.enc_32_64_maybe_isap(instruction, template, *isa_predicate);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reference type instructions
|
// Reference type instructions
|
||||||
|
|||||||
@@ -281,6 +281,18 @@ pub static PCMPEQQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x29];
|
|||||||
/// Compare packed data for equal (SSE2).
|
/// Compare packed data for equal (SSE2).
|
||||||
pub static PCMPEQW: [u8; 3] = [0x66, 0x0f, 0x75];
|
pub static PCMPEQW: [u8; 3] = [0x66, 0x0f, 0x75];
|
||||||
|
|
||||||
|
/// Compare packed signed byte integers for greater than (SSE2).
|
||||||
|
pub static PCMPGTB: [u8; 3] = [0x66, 0x0f, 0x64];
|
||||||
|
|
||||||
|
/// Compare packed signed doubleword integers for greater than (SSE2).
|
||||||
|
pub static PCMPGTD: [u8; 3] = [0x66, 0x0f, 0x66];
|
||||||
|
|
||||||
|
/// Compare packed signed quadword integers for greater than (SSE4.2).
|
||||||
|
pub static PCMPGTQ: [u8; 4] = [0x66, 0x0f, 0x38, 0x37];
|
||||||
|
|
||||||
|
/// Compare packed signed word integers for greater than (SSE2).
|
||||||
|
pub static PCMPGTW: [u8; 3] = [0x66, 0x0f, 0x65];
|
||||||
|
|
||||||
/// Extract doubleword or quadword, depending on REX.W (SSE4.1).
|
/// Extract doubleword or quadword, depending on REX.W (SSE4.1).
|
||||||
pub static PEXTR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x16];
|
pub static PEXTR: [u8; 4] = [0x66, 0x0f, 0x3a, 0x16];
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,27 @@
|
|||||||
|
test binemit
|
||||||
|
set enable_simd
|
||||||
|
target x86_64 skylake
|
||||||
|
|
||||||
|
function %icmp_sgt_i8x16(i8x16, i8x16) -> b8x16 {
|
||||||
|
ebb0(v0: i8x16 [%xmm2], v1: i8x16 [%xmm1]):
|
||||||
|
[-, %xmm2] v2 = icmp sgt v0, v1 ; bin: 66 0f 64 d1
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
function %icmp_sgt_i16x8(i16x8, i16x8) -> b16x8 {
|
||||||
|
ebb0(v0: i16x8 [%xmm4], v1: i16x8 [%xmm3]):
|
||||||
|
[-, %xmm4] v2 = icmp sgt v0, v1 ; bin: 66 0f 65 e3
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
function %icmp_sgt_i32x4(i32x4, i32x4) -> b32x4 {
|
||||||
|
ebb0(v0: i32x4 [%xmm6], v1: i32x4 [%xmm5]):
|
||||||
|
[-, %xmm6] v2 = icmp sgt v0, v1 ; bin: 66 0f 66 f5
|
||||||
|
return v2
|
||||||
|
}
|
||||||
|
|
||||||
|
function %icmp_sgt_i64x2(i64x2, i64x2) -> b64x2 {
|
||||||
|
ebb0(v0: i64x2 [%xmm0], v1: i64x2 [%xmm7]):
|
||||||
|
[-, %xmm0] v2 = icmp sgt v0, v1 ; bin: 66 0f 38 37 c7
|
||||||
|
return v2
|
||||||
|
}
|
||||||
@@ -43,3 +43,26 @@ ebb0:
|
|||||||
return v5
|
return v5
|
||||||
}
|
}
|
||||||
; run
|
; run
|
||||||
|
|
||||||
|
function %icmp_sgt_i8x16() -> b1 {
|
||||||
|
ebb0:
|
||||||
|
v0 = vconst.i8x16 [0 1 2 0 0 0 0 0 0 0 0 0 0 0 0 0]
|
||||||
|
v1 = vconst.i8x16 [1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0xff]
|
||||||
|
v2 = icmp sgt v0, v1
|
||||||
|
v3 = raw_bitcast.i8x16 v2
|
||||||
|
v4 = vconst.i8x16 [0 0 0xff 0 0 0 0 0 0 0 0 0 0 0 0 0xff]
|
||||||
|
v7 = icmp eq v3, v4
|
||||||
|
v8 = vall_true v7
|
||||||
|
return v8
|
||||||
|
}
|
||||||
|
; run
|
||||||
|
|
||||||
|
function %icmp_sgt_i64x2() -> b1 {
|
||||||
|
ebb0:
|
||||||
|
v0 = vconst.i64x2 [0 -42]
|
||||||
|
v1 = vconst.i64x2 [-1 -43]
|
||||||
|
v2 = icmp sgt v0, v1
|
||||||
|
v8 = vall_true v2
|
||||||
|
return v8
|
||||||
|
}
|
||||||
|
; run
|
||||||
|
|||||||
Reference in New Issue
Block a user