From 91d29c09d06565849842ae193b6841b099a1d830 Mon Sep 17 00:00:00 2001 From: Andrew Brown Date: Mon, 11 Nov 2019 12:47:01 -0800 Subject: [PATCH] Add x86 SIMD floating-point absolute value --- .../codegen/meta/src/isa/x86/legalize.rs | 19 +++++++++++++++++++ .../isa/x86/simd-arithmetic-legalize.clif | 11 +++++++++++ .../isa/x86/simd-arithmetic-run.clif | 13 +++++++++++++ 3 files changed, 43 insertions(+) diff --git a/cranelift/codegen/meta/src/isa/x86/legalize.rs b/cranelift/codegen/meta/src/isa/x86/legalize.rs index 753ee10b21..2728cad5cf 100644 --- a/cranelift/codegen/meta/src/isa/x86/legalize.rs +++ b/cranelift/codegen/meta/src/isa/x86/legalize.rs @@ -36,6 +36,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct let fcvt_to_uint = insts.by_name("fcvt_to_uint"); let fcvt_to_sint_sat = insts.by_name("fcvt_to_sint_sat"); let fcvt_to_uint_sat = insts.by_name("fcvt_to_uint_sat"); + let fabs = insts.by_name("fabs"); let fmax = insts.by_name("fmax"); let fmin = insts.by_name("fmin"); let fneg = insts.by_name("fneg"); @@ -337,6 +338,7 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct let b = var("b"); let c = var("c"); let d = var("d"); + let e = var("e"); // SIMD vector size: eventually multiple vector sizes may be supported but for now only SSE-sized vectors are available let sse_vector_size: u64 = 128; @@ -554,6 +556,23 @@ pub(crate) fn define(shared: &mut SharedDefinitions, x86_instructions: &Instruct ); } + // SIMD fabs + for ty in &[F32, F64] { + let fabs = fabs.bind(vector(*ty, sse_vector_size)); + let lane_type_as_int = LaneType::int_from_bits(LaneType::from(*ty).lane_bits() as u16); + let vconst = vconst.bind(vector(lane_type_as_int, sse_vector_size)); + let bitcast_to_float = raw_bitcast.bind(vector(*ty, sse_vector_size)); + narrow.legalize( + def!(b = fabs(a)), + vec![ + def!(c = vconst(ones)), + def!(d = ushr_imm(c, uimm8_one)), // Create a mask of all 1s except the MSB. + def!(e = bitcast_to_float(d)), // Cast mask to the floating-point type. + def!(b = band(a, e)), // Unset the MSB. + ], + ); + } + narrow.custom_legalize(shuffle, "convert_shuffle"); narrow.custom_legalize(extractlane, "convert_extractlane"); narrow.custom_legalize(insertlane, "convert_insertlane"); diff --git a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-legalize.clif b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-legalize.clif index 3a2ae10ab9..324027741b 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-legalize.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-legalize.clif @@ -49,3 +49,14 @@ ebb0: return } + +function %fabs_legalized() { +ebb0: + v0 = vconst.f64x2 [0x1.0 -0x2.0] + v1 = fabs v0 + ; check: v2 = vconst.i64x2 0xffffffffffffffffffffffffffffffff + ; nextln: v3 = ushr_imm v2, 1 + ; nextln: v4 = raw_bitcast.f64x2 v3 + ; nextln: v1 = band v0, v4 + return +} diff --git a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-run.clif b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-run.clif index fd52a5a52b..04facb0078 100644 --- a/cranelift/filetests/filetests/isa/x86/simd-arithmetic-run.clif +++ b/cranelift/filetests/filetests/isa/x86/simd-arithmetic-run.clif @@ -239,3 +239,16 @@ ebb0: return v4 } ; run + +function %fabs_f32x4() -> b1 { +ebb0: + v0 = vconst.f32x4 [0x0.0 -0x1.0 0x2.0 -0x3.0] + v1 = fabs v0 + + v2 = vconst.f32x4 [0x0.0 0x1.0 0x2.0 0x3.0] + v3 = fcmp eq v1, v2 + v4 = vall_true v3 + + return v4 +} +; run