Fix SIMD float comparison encoding (#1285)
The Intel manual uses `CMPNLT` and `CMPNLE` to denote not-less-than and not-less-than-or-equals. These were translated previously to `FloatCC::GreaterThan` and `FloatCC::GreaterThanOrEqual` but should be correctly translated to `FloatCC::UnorderedOrGreaterThanOrEqual` and `FloatCC::UnorderedOrGreaterThan`. This change adds the necessary legalizations to make use of these new encodings.
This commit is contained in:
@@ -552,6 +552,26 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
||||
narrow.legalize(def!(c = icmp_(ule, a, b)), vec![def!(c = icmp(uge, b, a))]);
|
||||
}
|
||||
|
||||
// SIMD fcmp greater-/less-than
|
||||
let gt = Literal::enumerator_for(&imm.floatcc, "gt");
|
||||
let lt = Literal::enumerator_for(&imm.floatcc, "lt");
|
||||
let ge = Literal::enumerator_for(&imm.floatcc, "ge");
|
||||
let le = Literal::enumerator_for(&imm.floatcc, "le");
|
||||
let ugt = Literal::enumerator_for(&imm.floatcc, "ugt");
|
||||
let ult = Literal::enumerator_for(&imm.floatcc, "ult");
|
||||
let uge = Literal::enumerator_for(&imm.floatcc, "uge");
|
||||
let ule = Literal::enumerator_for(&imm.floatcc, "ule");
|
||||
for ty in &[F32, F64] {
|
||||
let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size));
|
||||
narrow.legalize(def!(c = fcmp_(gt, a, b)), vec![def!(c = fcmp(lt, b, a))]);
|
||||
let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size));
|
||||
narrow.legalize(def!(c = fcmp_(ge, a, b)), vec![def!(c = fcmp(le, b, a))]);
|
||||
let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size));
|
||||
narrow.legalize(def!(c = fcmp_(ult, a, b)), vec![def!(c = fcmp(ugt, b, a))]);
|
||||
let fcmp_ = fcmp.bind(vector(*ty, sse_vector_size));
|
||||
narrow.legalize(def!(c = fcmp_(ule, a, b)), vec![def!(c = fcmp(uge, b, a))]);
|
||||
}
|
||||
|
||||
for ty in &[F32, F64] {
|
||||
let fneg = fneg.bind(vector(*ty, sse_vector_size));
|
||||
let lane_type_as_int = LaneType::int_from_bits(LaneType::from(*ty).lane_bits() as u16);
|
||||
|
||||
@@ -3169,7 +3169,7 @@ pub(crate) fn define<'shared>(
|
||||
);
|
||||
|
||||
{
|
||||
let supported_floatccs: Vec<Literal> = ["eq", "lt", "le", "uno", "ne", "gt", "ge", "ord"]
|
||||
let supported_floatccs: Vec<Literal> = ["eq", "lt", "le", "uno", "ne", "uge", "ugt", "ord"]
|
||||
.iter()
|
||||
.map(|name| Literal::enumerator_for(floatcc, name))
|
||||
.collect();
|
||||
@@ -3189,14 +3189,14 @@ pub(crate) fn define<'shared>(
|
||||
// Add immediate byte indicating what type of comparison.
|
||||
use crate::ir::condcodes::FloatCC::*;
|
||||
let imm = match cond {
|
||||
Equal => 0x00,
|
||||
LessThan => 0x01,
|
||||
LessThanOrEqual => 0x02,
|
||||
Unordered => 0x03,
|
||||
NotEqual => 0x04,
|
||||
GreaterThanOrEqual => 0x05,
|
||||
GreaterThan => 0x06,
|
||||
Ordered => 0x07,
|
||||
Equal => 0x00,
|
||||
LessThan => 0x01,
|
||||
LessThanOrEqual => 0x02,
|
||||
Unordered => 0x03,
|
||||
NotEqual => 0x04,
|
||||
UnorderedOrGreaterThanOrEqual => 0x05,
|
||||
UnorderedOrGreaterThan => 0x06,
|
||||
Ordered => 0x07,
|
||||
_ => panic!("{} not supported by pfcmp", cond),
|
||||
};
|
||||
sink.put1(imm);
|
||||
|
||||
@@ -60,8 +60,8 @@ ebb0(v0: f32x4 [%xmm2], v1: f32x4 [%xmm4]):
|
||||
[-, %xmm2] v4 = fcmp le v0, v1 ; bin: 40 0f c2 d4 02
|
||||
[-, %xmm2] v5 = fcmp uno v0, v1 ; bin: 40 0f c2 d4 03
|
||||
[-, %xmm2] v6 = fcmp ne v0, v1 ; bin: 40 0f c2 d4 04
|
||||
[-, %xmm2] v7 = fcmp ge v0, v1 ; bin: 40 0f c2 d4 05
|
||||
[-, %xmm2] v8 = fcmp gt v0, v1 ; bin: 40 0f c2 d4 06
|
||||
[-, %xmm2] v7 = fcmp uge v0, v1 ; bin: 40 0f c2 d4 05
|
||||
[-, %xmm2] v8 = fcmp ugt v0, v1 ; bin: 40 0f c2 d4 06
|
||||
[-, %xmm2] v9 = fcmp ord v0, v1 ; bin: 40 0f c2 d4 07
|
||||
return
|
||||
}
|
||||
@@ -73,8 +73,8 @@ ebb0(v0: f64x2 [%xmm2], v1: f64x2 [%xmm0]):
|
||||
[-, %xmm2] v4 = fcmp le v0, v1 ; bin: 66 40 0f c2 d0 02
|
||||
[-, %xmm2] v5 = fcmp uno v0, v1 ; bin: 66 40 0f c2 d0 03
|
||||
[-, %xmm2] v6 = fcmp ne v0, v1 ; bin: 66 40 0f c2 d0 04
|
||||
[-, %xmm2] v7 = fcmp ge v0, v1 ; bin: 66 40 0f c2 d0 05
|
||||
[-, %xmm2] v8 = fcmp gt v0, v1 ; bin: 66 40 0f c2 d0 06
|
||||
[-, %xmm2] v7 = fcmp uge v0, v1 ; bin: 66 40 0f c2 d0 05
|
||||
[-, %xmm2] v8 = fcmp ugt v0, v1 ; bin: 66 40 0f c2 d0 06
|
||||
[-, %xmm2] v9 = fcmp ord v0, v1 ; bin: 66 40 0f c2 d0 07
|
||||
return
|
||||
}
|
||||
|
||||
@@ -231,3 +231,17 @@ ebb0:
|
||||
return v8
|
||||
}
|
||||
; run
|
||||
|
||||
function %fcmp_gt_nans_f32x4() -> b1 {
|
||||
ebb0:
|
||||
v0 = vconst.f32x4 [NaN 0x42.0 -NaN NaN]
|
||||
v1 = vconst.f32x4 [NaN NaN 0x42.0 Inf]
|
||||
v2 = fcmp gt v0, v1
|
||||
; now check that the result v2 is all zeroes
|
||||
v3 = vconst.i32x4 0x00
|
||||
v4 = raw_bitcast.i32x4 v2
|
||||
v5 = icmp eq v3, v4
|
||||
v8 = vall_true v5
|
||||
return v8
|
||||
}
|
||||
; run
|
||||
|
||||
Reference in New Issue
Block a user