Fix legalization of SIMD fneg (#1286)
Previously `fsub` was used but this fails when negating -0.0 and +0.0 in the SIMD spec tests; using more instructions, this change uses shifts to create a constant for flipping the most significant bit of each lane with `bxor`.
This commit is contained in:
@@ -40,7 +40,6 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
|||||||
let fmax = insts.by_name("fmax");
|
let fmax = insts.by_name("fmax");
|
||||||
let fmin = insts.by_name("fmin");
|
let fmin = insts.by_name("fmin");
|
||||||
let fneg = insts.by_name("fneg");
|
let fneg = insts.by_name("fneg");
|
||||||
let fsub = insts.by_name("fsub");
|
|
||||||
let iadd = insts.by_name("iadd");
|
let iadd = insts.by_name("iadd");
|
||||||
let icmp = insts.by_name("icmp");
|
let icmp = insts.by_name("icmp");
|
||||||
let iconst = insts.by_name("iconst");
|
let iconst = insts.by_name("iconst");
|
||||||
@@ -48,6 +47,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
|||||||
let ineg = insts.by_name("ineg");
|
let ineg = insts.by_name("ineg");
|
||||||
let insertlane = insts.by_name("insertlane");
|
let insertlane = insts.by_name("insertlane");
|
||||||
let ishl = insts.by_name("ishl");
|
let ishl = insts.by_name("ishl");
|
||||||
|
let ishl_imm = insts.by_name("ishl_imm");
|
||||||
let isub = insts.by_name("isub");
|
let isub = insts.by_name("isub");
|
||||||
let popcnt = insts.by_name("popcnt");
|
let popcnt = insts.by_name("popcnt");
|
||||||
let raw_bitcast = insts.by_name("raw_bitcast");
|
let raw_bitcast = insts.by_name("raw_bitcast");
|
||||||
@@ -550,9 +550,18 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct
|
|||||||
|
|
||||||
for ty in &[F32, F64] {
|
for ty in &[F32, F64] {
|
||||||
let fneg = fneg.bind(vector(*ty, sse_vector_size));
|
let fneg = fneg.bind(vector(*ty, sse_vector_size));
|
||||||
|
let lane_type_as_int = LaneType::int_from_bits(LaneType::from(*ty).lane_bits() as u16);
|
||||||
|
let uimm8_shift = Literal::constant(&imm.uimm8, lane_type_as_int.lane_bits() as i64 - 1);
|
||||||
|
let vconst = vconst.bind(vector(lane_type_as_int, sse_vector_size));
|
||||||
|
let bitcast_to_float = raw_bitcast.bind(vector(*ty, sse_vector_size));
|
||||||
narrow.legalize(
|
narrow.legalize(
|
||||||
def!(b = fneg(a)),
|
def!(b = fneg(a)),
|
||||||
vec![def!(c = vconst(u128_zeroes)), def!(b = fsub(c, a))],
|
vec![
|
||||||
|
def!(c = vconst(ones)),
|
||||||
|
def!(d = ishl_imm(c, uimm8_shift)), // Create a mask of all 0s except the MSB.
|
||||||
|
def!(e = bitcast_to_float(d)), // Cast mask to the floating-point type.
|
||||||
|
def!(b = bxor(a, e)), // Flip the MSB.
|
||||||
|
],
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -39,13 +39,17 @@ function %fneg_legalized() {
|
|||||||
ebb0:
|
ebb0:
|
||||||
v0 = vconst.f32x4 [0x1.0 0x2.0 0x3.0 0x4.0]
|
v0 = vconst.f32x4 [0x1.0 0x2.0 0x3.0 0x4.0]
|
||||||
v1 = fneg v0
|
v1 = fneg v0
|
||||||
; check: v4 = vconst.f32x4 0x00
|
; check: v4 = vconst.i32x4 0xffffffffffffffffffffffffffffffff
|
||||||
; nextln: v1 = fsub v4, v0
|
; nextln: v5 = ishl_imm v4, 31
|
||||||
|
; nextln: v6 = raw_bitcast.f32x4 v5
|
||||||
|
; nextln: v1 = bxor v0, v6
|
||||||
|
|
||||||
v2 = vconst.f64x2 [0x1.0 0x2.0]
|
v2 = vconst.f64x2 [0x1.0 0x2.0]
|
||||||
v3 = fneg v2
|
v3 = fneg v2
|
||||||
; check: v5 = vconst.f64x2 0x00
|
; check: v7 = vconst.i64x2 0xffffffffffffffffffffffffffffffff
|
||||||
; nextln: v3 = fsub v5, v2
|
; nextln: v8 = ishl_imm v7, 63
|
||||||
|
; nextln: v9 = raw_bitcast.f64x2 v8
|
||||||
|
; nextln: v3 = bxor v2, v9
|
||||||
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -240,6 +240,19 @@ ebb0:
|
|||||||
}
|
}
|
||||||
; run
|
; run
|
||||||
|
|
||||||
|
function %fneg_f32x4() -> b1 {
|
||||||
|
ebb0:
|
||||||
|
v0 = vconst.f32x4 [0x0.0 -0x0.0 -Inf Inf]
|
||||||
|
v1 = fneg v0
|
||||||
|
|
||||||
|
v2 = vconst.f32x4 [-0x0.0 0x0.0 Inf -Inf]
|
||||||
|
v3 = fcmp eq v1, v2
|
||||||
|
v4 = vall_true v3
|
||||||
|
|
||||||
|
return v4
|
||||||
|
}
|
||||||
|
; run
|
||||||
|
|
||||||
function %fabs_f32x4() -> b1 {
|
function %fabs_f32x4() -> b1 {
|
||||||
ebb0:
|
ebb0:
|
||||||
v0 = vconst.f32x4 [0x0.0 -0x1.0 0x2.0 -0x3.0]
|
v0 = vconst.f32x4 [0x0.0 -0x1.0 0x2.0 -0x3.0]
|
||||||
|
|||||||
Reference in New Issue
Block a user