Fix SIMD float comparison encoding (#1285)

The Intel manual uses `CMPNLT` and `CMPNLE` to denote not-less-than and not-less-than-or-equals. These were translated previously to `FloatCC::GreaterThan` and `FloatCC::GreaterThanOrEqual` but should be correctly translated to `FloatCC::UnorderedOrGreaterThanOrEqual` and `FloatCC::UnorderedOrGreaterThan`. This change adds the necessary legalizations to make use of these new encodings.
This commit is contained in:
Andrew Brown
2020-01-08 09:28:05 -08:00
committed by GitHub
parent 8ff6d640d6
commit 6fe86bcb61
4 changed files with 47 additions and 13 deletions

View File

@@ -552,6 +552,26 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
narrow.legalize(def!(c = icmp_(ule, a, b)), vec![def!(c = icmp(uge, b, a))]);
}
// SIMD fcmp greater-/less-than
let gt = Literal::enumerator_for(&imm.floatcc, "gt");
let lt = Literal::enumerator_for(&imm.floatcc, "lt");
let ge = Literal::enumerator_for(&imm.floatcc, "ge");
let le = Literal::enumerator_for(&imm.floatcc, "le");
let ugt = Literal::enumerator_for(&imm.floatcc, "ugt");
let ult = Literal::enumerator_for(&imm.floatcc, "ult");
let uge = Literal::enumerator_for(&imm.floatcc, "uge");
let ule = Literal::enumerator_for(&imm.floatcc, "ule");
for ty in &[F32, F64] {
let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size));
narrow.legalize(def!(c = fcmp_(gt, a, b)), vec![def!(c = fcmp(lt, b, a))]);
let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size));
narrow.legalize(def!(c = fcmp_(ge, a, b)), vec![def!(c = fcmp(le, b, a))]);
let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size));
narrow.legalize(def!(c = fcmp_(ult, a, b)), vec![def!(c = fcmp(ugt, b, a))]);
let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size));
narrow.legalize(def!(c = fcmp_(ule, a, b)), vec![def!(c = fcmp(uge, b, a))]);
}
for ty in &[F32, F64] {
let fneg = fneg.bind(vector(*ty, sse_vector_size));
let lane_type_as_int = LaneType::int_from_bits(LaneType::from(*ty).lane_bits() as u16);

View File

@@ -3169,7 +3169,7 @@ pub(crate) fn define<'shared>(
);
{
let supported_floatccs: Vec<Literal> = ["eq", "lt", "le", "uno", "ne", "gt", "ge", "ord"]
let supported_floatccs: Vec<Literal> = ["eq", "lt", "le", "uno", "ne", "uge", "ugt", "ord"]
.iter()
.map(|name| Literal::enumerator_for(floatcc, name))
.collect();
@@ -3189,14 +3189,14 @@ pub(crate) fn define<'shared>(
// Add immediate byte indicating what type of comparison.
use crate::ir::condcodes::FloatCC::*;
let imm = match cond {
Equal => 0x00,
LessThan => 0x01,
LessThanOrEqual => 0x02,
Unordered => 0x03,
NotEqual => 0x04,
GreaterThanOrEqual => 0x05,
GreaterThan => 0x06,
Ordered => 0x07,
Equal => 0x00,
LessThan => 0x01,
LessThanOrEqual => 0x02,
Unordered => 0x03,
NotEqual => 0x04,
UnorderedOrGreaterThanOrEqual => 0x05,
UnorderedOrGreaterThan => 0x06,
Ordered => 0x07,
_ => panic!("{} not supported by pfcmp", cond),
};
sink.put1(imm);

View File

@@ -60,8 +60,8 @@ ebb0(v0: f32x4 [%xmm2], v1: f32x4 [%xmm4]):
[-, %xmm2] v4 = fcmp le v0, v1 ; bin: 40 0f c2 d4 02
[-, %xmm2] v5 = fcmp uno v0, v1 ; bin: 40 0f c2 d4 03
[-, %xmm2] v6 = fcmp ne v0, v1 ; bin: 40 0f c2 d4 04
[-, %xmm2] v7 = fcmp ge v0, v1 ; bin: 40 0f c2 d4 05
[-, %xmm2] v8 = fcmp gt v0, v1 ; bin: 40 0f c2 d4 06
[-, %xmm2] v7 = fcmp uge v0, v1 ; bin: 40 0f c2 d4 05
[-, %xmm2] v8 = fcmp ugt v0, v1 ; bin: 40 0f c2 d4 06
[-, %xmm2] v9 = fcmp ord v0, v1 ; bin: 40 0f c2 d4 07
return
}
@@ -73,8 +73,8 @@ ebb0(v0: f64x2 [%xmm2], v1: f64x2 [%xmm0]):
[-, %xmm2] v4 = fcmp le v0, v1 ; bin: 66 40 0f c2 d0 02
[-, %xmm2] v5 = fcmp uno v0, v1 ; bin: 66 40 0f c2 d0 03
[-, %xmm2] v6 = fcmp ne v0, v1 ; bin: 66 40 0f c2 d0 04
[-, %xmm2] v7 = fcmp ge v0, v1 ; bin: 66 40 0f c2 d0 05
[-, %xmm2] v8 = fcmp gt v0, v1 ; bin: 66 40 0f c2 d0 06
[-, %xmm2] v7 = fcmp uge v0, v1 ; bin: 66 40 0f c2 d0 05
[-, %xmm2] v8 = fcmp ugt v0, v1 ; bin: 66 40 0f c2 d0 06
[-, %xmm2] v9 = fcmp ord v0, v1 ; bin: 66 40 0f c2 d0 07
return
}

View File

@@ -231,3 +231,17 @@ ebb0:
return v8
}
; run
function %fcmp_gt_nans_f32x4() -> b1 {
ebb0:
v0 = vconst.f32x4 [NaN 0x42.0 -NaN NaN]
v1 = vconst.f32x4 [NaN NaN 0x42.0 Inf]
v2 = fcmp gt v0, v1
; now check that the result v2 is all zeroes
v3 = vconst.i32x4 0x00
v4 = raw_bitcast.i32x4 v2
v5 = icmp eq v3, v4
v8 = vall_true v5
return v8
}
; run