diff --git a/cranelift/codegen/meta/src/isa/x86/legalize.rs b/cranelift/codegen/meta/src/isa/x86/legalize.rs index 2728cad5cf..6644bae161 100644 --- a/cranelift/codegen/meta/src/isa/x86/legalize.rs +++ b/cranelift/codegen/meta/src/isa/x86/legalize.rs @@ -40,7 +40,6 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct let fmax = insts.by_name("fmax"); let fmin = insts.by_name("fmin"); let fneg = insts.by_name("fneg"); - let fsub = insts.by_name("fsub"); let iadd = insts.by_name("iadd"); let icmp = insts.by_name("icmp"); let iconst = insts.by_name("iconst"); @@ -48,6 +47,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct let ineg = insts.by_name("ineg"); let insertlane = insts.by_name("insertlane"); let ishl = insts.by_name("ishl"); + let ishl_imm = insts.by_name("ishl_imm"); let isub = insts.by_name("isub"); let popcnt = insts.by_name("popcnt"); let raw_bitcast = insts.by_name("raw_bitcast"); @@ -550,9 +550,18 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct for ty in &[F32, F64] { let fneg = fneg.bind(vector(*ty, sse_vector_size)); + let lane_type_as_int = LaneType::int_from_bits(LaneType::from(*ty).lane_bits() as u16); + let uimm8_shift = Literal::constant(&imm.uimm8, lane_type_as_int.lane_bits() as i64 - 1); + let vconst = vconst.bind(vector(lane_type_as_int, sse_vector_size)); + let bitcast_to_float = raw_bitcast.bind(vector(*ty, sse_vector_size)); narrow.legalize( def!(b = fneg(a)), - vec![def!(c = vconst(u128_zeroes)), def!(b = fsub(c, a))], + vec![ + def!(c = vconst(ones)), + def!(d = ishl_imm(c, uimm8_shift)), // Create a mask of all 0s except the MSB. + def!(e = bitcast_to_float(d)), // Cast mask to the floating-point type. + def!(b = bxor(a, e)), // Flip the MSB. + ], ); } diff --git a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-legalize.clif index 324027741b..b6b033833e 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-legalize.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-legalize.clif @@ -39,13 +39,17 @@ function %fneg_legalized() { ebb0: v0 = vconst.f32x4 [0x1.0 0x2.0 0x3.0 0x4.0] v1 = fneg v0 - ; check: v4 = vconst.f32x4 0x00 - ; nextln: v1 = fsub v4, v0 + ; check: v4 = vconst.i32x4 0xffffffffffffffffffffffffffffffff + ; nextln: v5 = ishl_imm v4, 31 + ; nextln: v6 = raw_bitcast.f32x4 v5 + ; nextln: v1 = bxor v0, v6 v2 = vconst.f64x2 [0x1.0 0x2.0] v3 = fneg v2 - ; check: v5 = vconst.f64x2 0x00 - ; nextln: v3 = fsub v5, v2 + ; check: v7 = vconst.i64x2 0xffffffffffffffffffffffffffffffff + ; nextln: v8 = ishl_imm v7, 63 + ; nextln: v9 = raw_bitcast.f64x2 v8 + ; nextln: v3 = bxor v2, v9 return } diff --git a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-run.clif b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-run.clif index 04facb0078..429928b213 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-run.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-run.clif @@ -240,6 +240,19 @@ ebb0: } ; run +function %fneg_f32x4() -> b1 { +ebb0: + v0 = vconst.f32x4 [0x0.0 -0x0.0 -Inf Inf] + v1 = fneg v0 + + v2 = vconst.f32x4 [-0x0.0 0x0.0 Inf -Inf] + v3 = fcmp eq v1, v2 + v4 = vall_true v3 + + return v4 +} +; run + function %fabs_f32x4() -> b1 { ebb0: v0 = vconst.f32x4 [0x0.0 -0x1.0 0x2.0 -0x3.0]