diff --git a/cranelift/codegen/src/isa/aarch64/inst.isle b/cranelift/codegen/src/isa/aarch64/inst.isle index 3824a4ec99..dc5a16fd4a 100644 --- a/cranelift/codegen/src/isa/aarch64/inst.isle +++ b/cranelift/codegen/src/isa/aarch64/inst.isle @@ -1464,6 +1464,13 @@ (_ Unit (emit (MInst.VecRRR op dst src1 src2 size)))) dst)) +;; Helper for emitting `MInst.FpuRR` instructions. +(decl fpu_rr (FPUOp1 Reg ScalarSize) Reg) +(rule (fpu_rr op src size) + (let ((dst WritableReg (temp_writable_reg $F64)) + (_ Unit (emit (MInst.FpuRR op size dst src)))) + dst)) + ;; Helper for emitting `MInst.FpuRRR` instructions. (decl fpu_rrr (FPUOp2 Reg Reg ScalarSize) Reg) (rule (fpu_rrr op src1 src2 size) @@ -1644,6 +1651,12 @@ (MInst.FpuCSel64 dst if_true if_false cond) dst))) +;; Helper for emitting `MInst.FpuRound` instructions. +(decl fpu_round (FpuRoundMode Reg) Reg) +(rule (fpu_round op rn) + (let ((dst WritableReg (temp_writable_reg $F64)) + (_ Unit (emit (MInst.FpuRound op dst rn)))) + dst)) ;; Helper for emitting `MInst.MovToFpu` instructions. (decl mov_to_fpu (Reg ScalarSize) Reg) diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs index 2629b7835a..f9e25b47bd 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs @@ -4426,6 +4426,17 @@ fn test_aarch64_binemit() { "abs v1.2d, v10.2d", )); + insns.push(( + Inst::VecMisc { + op: VecMisc2::Fabs, + rd: writable_vreg(15), + rn: vreg(16), + size: VectorSize::Size32x2, + }, + "0FFAA00E", + "fabs v15.2s, v16.2s", + )); + insns.push(( Inst::VecMisc { op: VecMisc2::Fabs, @@ -4448,6 +4459,17 @@ fn test_aarch64_binemit() { "fabs v3.2d, v22.2d", )); + insns.push(( + Inst::VecMisc { + op: VecMisc2::Fneg, + rd: writable_vreg(31), + rn: vreg(0), + size: VectorSize::Size32x2, + }, + "1FF8A02E", + "fneg v31.2s, v0.2s", + )); + insns.push(( Inst::VecMisc { op: VecMisc2::Fneg, @@ -4481,6 +4503,17 @@ fn test_aarch64_binemit() { "fsqrt v18.2s, v25.2s", )); + insns.push(( + Inst::VecMisc { + op: VecMisc2::Fsqrt, + rd: writable_vreg(18), + rn: vreg(25), + size: VectorSize::Size32x4, + }, + "32FBA16E", + "fsqrt v18.4s, v25.4s", + )); + insns.push(( Inst::VecMisc { op: VecMisc2::Fsqrt, diff --git a/cranelift/codegen/src/isa/aarch64/lower.isle b/cranelift/codegen/src/isa/aarch64/lower.isle index eada9d0bdf..ab8fc79052 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.isle +++ b/cranelift/codegen/src/isa/aarch64/lower.isle @@ -230,6 +230,85 @@ (with_flags (fpu_cmp (scalar_size ty) rn rm) (fpu_csel ty (Cond.Gt) rn rm))) +;;;; Rules for `sqrt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type ty @ (multi_lane _ _) (sqrt x))) + (vec_misc (VecMisc2.Fsqrt) x (vector_size ty))) + +(rule (lower (has_type (ty_scalar_float ty) (sqrt x))) + (fpu_rr (FPUOp1.Sqrt) x (scalar_size ty))) + +;;;; Rules for `fneg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type ty @ (multi_lane _ _) (fneg x))) + (vec_misc (VecMisc2.Fneg) x (vector_size ty))) + +(rule (lower (has_type (ty_scalar_float ty) (fneg x))) + (fpu_rr (FPUOp1.Neg) x (scalar_size ty))) + +;;;; Rules for `fabs` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type ty @ (multi_lane _ _) (fabs x))) + (vec_misc (VecMisc2.Fabs) x (vector_size ty))) + +(rule (lower (has_type (ty_scalar_float ty) (fabs x))) + (fpu_rr (FPUOp1.Abs) x (scalar_size ty))) + +;;;; Rules for `fpromote` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type $F64 (fpromote x))) + (fpu_rr (FPUOp1.Cvt32To64) x (ScalarSize.Size32))) + +;;;; Rules for `fdemote` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type $F32 (fdemote x))) + (fpu_rr (FPUOp1.Cvt64To32) x (ScalarSize.Size64))) + +;;;; Rules for `ceil` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type ty @ (multi_lane _ _) (ceil x))) + (vec_misc (VecMisc2.Frintp) x (vector_size ty))) + +(rule (lower (has_type $F32 (ceil x))) + (fpu_round (FpuRoundMode.Plus32) x)) + +(rule (lower (has_type $F64 (ceil x))) + (fpu_round (FpuRoundMode.Plus64) x)) + +;;;; Rules for `floor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type ty @ (multi_lane _ _) (floor x))) + (vec_misc (VecMisc2.Frintm) x (vector_size ty))) + +(rule (lower (has_type $F32 (floor x))) + (fpu_round (FpuRoundMode.Minus32) x)) + +(rule (lower (has_type $F64 (floor x))) + (fpu_round (FpuRoundMode.Minus64) x)) + +;;;; Rules for `trunc` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type ty @ (multi_lane _ _) (trunc x))) + (vec_misc (VecMisc2.Frintz) x (vector_size ty))) + +(rule (lower (has_type $F32 (trunc x))) + (fpu_round (FpuRoundMode.Zero32) x)) + +(rule (lower (has_type $F64 (trunc x))) + (fpu_round (FpuRoundMode.Zero64) x)) + +;;;; Rules for `nearest` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(rule (lower (has_type ty @ (multi_lane _ _) (nearest x))) + (vec_misc (VecMisc2.Frintn) x (vector_size ty))) + +(rule (lower (has_type $F32 (nearest x))) + (fpu_round (FpuRoundMode.Nearest32) x)) + +(rule (lower (has_type $F64 (nearest x))) + (fpu_round (FpuRoundMode.Nearest64) x)) + + ;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; `i64` and smaller diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index f01221cb45..9c166e1c53 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -1250,107 +1250,10 @@ pub(crate) fn lower_insn_to_regs>( Opcode::FminPseudo | Opcode::FmaxPseudo => implemented_in_isle(ctx), Opcode::Sqrt | Opcode::Fneg | Opcode::Fabs | Opcode::Fpromote | Opcode::Fdemote => { - let ty = ty.unwrap(); - let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); - let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - if !ty.is_vector() { - let fpu_op = match op { - Opcode::Sqrt => FPUOp1::Sqrt, - Opcode::Fneg => FPUOp1::Neg, - Opcode::Fabs => FPUOp1::Abs, - Opcode::Fpromote => { - if ty != F64 { - return Err(CodegenError::Unsupported(format!( - "Fpromote: Unsupported type: {:?}", - ty - ))); - } - FPUOp1::Cvt32To64 - } - Opcode::Fdemote => { - if ty != F32 { - return Err(CodegenError::Unsupported(format!( - "Fdemote: Unsupported type: {:?}", - ty - ))); - } - FPUOp1::Cvt64To32 - } - _ => unreachable!(), - }; - ctx.emit(Inst::FpuRR { - fpu_op, - size: ScalarSize::from_ty(ctx.input_ty(insn, 0)), - rd, - rn, - }); - } else { - let op = match op { - Opcode::Fabs => VecMisc2::Fabs, - Opcode::Fneg => VecMisc2::Fneg, - Opcode::Sqrt => VecMisc2::Fsqrt, - _ => { - return Err(CodegenError::Unsupported(format!( - "{}: Unsupported type: {:?}", - op, ty - ))) - } - }; - - ctx.emit(Inst::VecMisc { - op, - rd, - rn, - size: VectorSize::from_ty(ty), - }); - } + implemented_in_isle(ctx) } - Opcode::Ceil | Opcode::Floor | Opcode::Trunc | Opcode::Nearest => { - let ty = ctx.output_ty(insn, 0); - if !ty.is_vector() { - let bits = ty_bits(ty); - let op = match (op, bits) { - (Opcode::Ceil, 32) => FpuRoundMode::Plus32, - (Opcode::Ceil, 64) => FpuRoundMode::Plus64, - (Opcode::Floor, 32) => FpuRoundMode::Minus32, - (Opcode::Floor, 64) => FpuRoundMode::Minus64, - (Opcode::Trunc, 32) => FpuRoundMode::Zero32, - (Opcode::Trunc, 64) => FpuRoundMode::Zero64, - (Opcode::Nearest, 32) => FpuRoundMode::Nearest32, - (Opcode::Nearest, 64) => FpuRoundMode::Nearest64, - _ => { - return Err(CodegenError::Unsupported(format!( - "{}: Unsupported type: {:?}", - op, ty - ))) - } - }; - let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); - let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - ctx.emit(Inst::FpuRound { op, rd, rn }); - } else { - let (op, size) = match (op, ty) { - (Opcode::Ceil, F32X4) => (VecMisc2::Frintp, VectorSize::Size32x4), - (Opcode::Ceil, F64X2) => (VecMisc2::Frintp, VectorSize::Size64x2), - (Opcode::Floor, F32X4) => (VecMisc2::Frintm, VectorSize::Size32x4), - (Opcode::Floor, F64X2) => (VecMisc2::Frintm, VectorSize::Size64x2), - (Opcode::Trunc, F32X4) => (VecMisc2::Frintz, VectorSize::Size32x4), - (Opcode::Trunc, F64X2) => (VecMisc2::Frintz, VectorSize::Size64x2), - (Opcode::Nearest, F32X4) => (VecMisc2::Frintn, VectorSize::Size32x4), - (Opcode::Nearest, F64X2) => (VecMisc2::Frintn, VectorSize::Size64x2), - _ => { - return Err(CodegenError::Unsupported(format!( - "{}: Unsupported type: {:?}", - op, ty - ))) - } - }; - let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); - let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap(); - ctx.emit(Inst::VecMisc { op, rd, rn, size }); - } - } + Opcode::Ceil | Opcode::Floor | Opcode::Trunc | Opcode::Nearest => implemented_in_isle(ctx), Opcode::Fma => { let ty = ty.unwrap(); diff --git a/cranelift/filetests/filetests/isa/aarch64/floating-point.clif b/cranelift/filetests/filetests/isa/aarch64/floating-point.clif index a8c3cb8191..8a4412a851 100644 --- a/cranelift/filetests/filetests/isa/aarch64/floating-point.clif +++ b/cranelift/filetests/filetests/isa/aarch64/floating-point.clif @@ -701,3 +701,212 @@ block0(v0: f64): ; fcvtzs x0, d7 ; ret +function %f57(f32x2) -> f32x2 { +block0(v0: f32x2): + v1 = sqrt v0 + return v1 +} + +; block0: +; fsqrt v0.2s, v0.2s +; ret + +function %f58(f32x4) -> f32x4 { +block0(v0: f32x4): + v1 = sqrt v0 + return v1 +} + +; block0: +; fsqrt v0.4s, v0.4s +; ret + +function %f59(f64x2) -> f64x2 { +block0(v0: f64x2): + v1 = sqrt v0 + return v1 +} + +; block0: +; fsqrt v0.2d, v0.2d +; ret + +function %f60(f32x2) -> f32x2 { +block0(v0: f32x2): + v1 = fneg v0 + return v1 +} + +; block0: +; fneg v0.2s, v0.2s +; ret + +function %f61(f32x4) -> f32x4 { +block0(v0: f32x4): + v1 = fneg v0 + return v1 +} + +; block0: +; fneg v0.4s, v0.4s +; ret + +function %f62(f64x2) -> f64x2 { +block0(v0: f64x2): + v1 = fneg v0 + return v1 +} + +; block0: +; fneg v0.2d, v0.2d +; ret + +function %f63(f32x2) -> f32x2 { +block0(v0: f32x2): + v1 = fabs v0 + return v1 +} + +; block0: +; fabs v0.2s, v0.2s +; ret + +function %f64(f32x4) -> f32x4 { +block0(v0: f32x4): + v1 = fabs v0 + return v1 +} + +; block0: +; fabs v0.4s, v0.4s +; ret + +function %f65(f64x2) -> f64x2 { +block0(v0: f64x2): + v1 = fabs v0 + return v1 +} + +; block0: +; fabs v0.2d, v0.2d +; ret + +function %f66(f32x2) -> f32x2 { +block0(v0: f32x2): + v1 = ceil v0 + return v1 +} + +; block0: +; frintp v0.2s, v0.2s +; ret + +function %f67(f32x4) -> f32x4 { +block0(v0: f32x4): + v1 = ceil v0 + return v1 +} + +; block0: +; frintp v0.4s, v0.4s +; ret + +function %f68(f64x2) -> f64x2 { +block0(v0: f64x2): + v1 = ceil v0 + return v1 +} + +; block0: +; frintp v0.2d, v0.2d +; ret + +function %f69(f32x2) -> f32x2 { +block0(v0: f32x2): + v1 = floor v0 + return v1 +} + +; block0: +; frintm v0.2s, v0.2s +; ret + +function %f70(f32x4) -> f32x4 { +block0(v0: f32x4): + v1 = floor v0 + return v1 +} + +; block0: +; frintm v0.4s, v0.4s +; ret + +function %f71(f64x2) -> f64x2 { +block0(v0: f64x2): + v1 = floor v0 + return v1 +} + +; block0: +; frintm v0.2d, v0.2d +; ret + +function %f72(f32x2) -> f32x2 { +block0(v0: f32x2): + v1 = trunc v0 + return v1 +} + +; block0: +; frintz v0.2s, v0.2s +; ret + +function %f73(f32x4) -> f32x4 { +block0(v0: f32x4): + v1 = trunc v0 + return v1 +} + +; block0: +; frintz v0.4s, v0.4s +; ret + +function %f74(f64x2) -> f64x2 { +block0(v0: f64x2): + v1 = trunc v0 + return v1 +} + +; block0: +; frintz v0.2d, v0.2d +; ret + +function %f75(f32x2) -> f32x2 { +block0(v0: f32x2): + v1 = nearest v0 + return v1 +} + +; block0: +; frintn v0.2s, v0.2s +; ret + +function %f76(f32x4) -> f32x4 { +block0(v0: f32x4): + v1 = nearest v0 + return v1 +} + +; block0: +; frintn v0.4s, v0.4s +; ret + +function %f77(f64x2) -> f64x2 { +block0(v0: f64x2): + v1 = nearest v0 + return v1 +} + +; block0: +; frintn v0.2d, v0.2d +; ret