Add x86 SIMD floating-point absolute value
This commit is contained in:
@@ -36,6 +36,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
|||||||
let fcvt_to_uint = insts.by_name("fcvt_to_uint");
|
let fcvt_to_uint = insts.by_name("fcvt_to_uint");
|
||||||
let fcvt_to_sint_sat = insts.by_name("fcvt_to_sint_sat");
|
let fcvt_to_sint_sat = insts.by_name("fcvt_to_sint_sat");
|
||||||
let fcvt_to_uint_sat = insts.by_name("fcvt_to_uint_sat");
|
let fcvt_to_uint_sat = insts.by_name("fcvt_to_uint_sat");
|
||||||
|
let fabs = insts.by_name("fabs");
|
||||||
let fmax = insts.by_name("fmax");
|
let fmax = insts.by_name("fmax");
|
||||||
let fmin = insts.by_name("fmin");
|
let fmin = insts.by_name("fmin");
|
||||||
let fneg = insts.by_name("fneg");
|
let fneg = insts.by_name("fneg");
|
||||||
@@ -337,6 +338,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
|||||||
let b = var("b");
|
let b = var("b");
|
||||||
let c = var("c");
|
let c = var("c");
|
||||||
let d = var("d");
|
let d = var("d");
|
||||||
|
let e = var("e");
|
||||||
|
|
||||||
// SIMD vector size: eventually multiple vector sizes may be supported but for now only SSE-sized vectors are available
|
// SIMD vector size: eventually multiple vector sizes may be supported but for now only SSE-sized vectors are available
|
||||||
let sse_vector_size: u64 = 128;
|
let sse_vector_size: u64 = 128;
|
||||||
@@ -554,6 +556,23 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SIMD fabs
|
||||||
|
for ty in &[F32, F64] {
|
||||||
|
let fabs = fabs.bind(vector(*ty, sse_vector_size));
|
||||||
|
let lane_type_as_int = LaneType::int_from_bits(LaneType::from(*ty).lane_bits() as u16);
|
||||||
|
let vconst = vconst.bind(vector(lane_type_as_int, sse_vector_size));
|
||||||
|
let bitcast_to_float = raw_bitcast.bind(vector(*ty, sse_vector_size));
|
||||||
|
narrow.legalize(
|
||||||
|
def!(b = fabs(a)),
|
||||||
|
vec![
|
||||||
|
def!(c = vconst(ones)),
|
||||||
|
def!(d = ushr_imm(c, uimm8_one)), // Create a mask of all 1s except the MSB.
|
||||||
|
def!(e = bitcast_to_float(d)), // Cast mask to the floating-point type.
|
||||||
|
def!(b = band(a, e)), // Unset the MSB.
|
||||||
|
],
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
narrow.custom_legalize(shuffle, "convert_shuffle");
|
narrow.custom_legalize(shuffle, "convert_shuffle");
|
||||||
narrow.custom_legalize(extractlane, "convert_extractlane");
|
narrow.custom_legalize(extractlane, "convert_extractlane");
|
||||||
narrow.custom_legalize(insertlane, "convert_insertlane");
|
narrow.custom_legalize(insertlane, "convert_insertlane");
|
||||||
|
|||||||
@@ -49,3 +49,14 @@ ebb0:
|
|||||||
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function %fabs_legalized() {
|
||||||
|
ebb0:
|
||||||
|
v0 = vconst.f64x2 [0x1.0 -0x2.0]
|
||||||
|
v1 = fabs v0
|
||||||
|
; check: v2 = vconst.i64x2 0xffffffffffffffffffffffffffffffff
|
||||||
|
; nextln: v3 = ushr_imm v2, 1
|
||||||
|
; nextln: v4 = raw_bitcast.f64x2 v3
|
||||||
|
; nextln: v1 = band v0, v4
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|||||||
@@ -239,3 +239,16 @@ ebb0:
|
|||||||
return v4
|
return v4
|
||||||
}
|
}
|
||||||
; run
|
; run
|
||||||
|
|
||||||
|
function %fabs_f32x4() -> b1 {
|
||||||
|
ebb0:
|
||||||
|
v0 = vconst.f32x4 [0x0.0 -0x1.0 0x2.0 -0x3.0]
|
||||||
|
v1 = fabs v0
|
||||||
|
|
||||||
|
v2 = vconst.f32x4 [0x0.0 0x1.0 0x2.0 0x3.0]
|
||||||
|
v3 = fcmp eq v1, v2
|
||||||
|
v4 = vall_true v3
|
||||||
|
|
||||||
|
return v4
|
||||||
|
}
|
||||||
|
; run
|
||||||
|
|||||||
Reference in New Issue
Block a user