Convert sqrt..nearest to ISLE (AArch64) (#4508)

Converted the existing implementations for the following opcodes to ISLE on AArch64: - `sqrt` - `fneg` - `fabs` - `fpromote` - `fdemote` - `ceil` - `floor` - `trunc` - `nearest` Copyright (c) 2022 Arm Limited
2022-07-22 22:48:07 +01:00
parent 4720d09651
commit f1a0c40a53
5 changed files with 336 additions and 99 deletions
--- a/cranelift/codegen/src/isa/aarch64/inst.isle
+++ b/cranelift/codegen/src/isa/aarch64/inst.isle
@@ -1464,6 +1464,13 @@
            (_ Unit (emit (MInst.VecRRR op dst src1 src2 size))))
        dst))

+;; Helper for emitting `MInst.FpuRR` instructions.
+(decl fpu_rr (FPUOp1 Reg ScalarSize) Reg)
+(rule (fpu_rr op src size)
+      (let ((dst WritableReg (temp_writable_reg $F64))
+            (_ Unit (emit (MInst.FpuRR op size dst src))))
+        dst))
+
 ;; Helper for emitting `MInst.FpuRRR` instructions.
 (decl fpu_rrr (FPUOp2 Reg Reg ScalarSize) Reg)
 (rule (fpu_rrr op src1 src2 size)
@@ -1644,6 +1651,12 @@
         (MInst.FpuCSel64 dst if_true if_false cond)
         dst)))

+;; Helper for emitting `MInst.FpuRound` instructions.
+(decl fpu_round (FpuRoundMode Reg) Reg)
+(rule (fpu_round op rn)
+      (let ((dst WritableReg (temp_writable_reg $F64))
+            (_ Unit (emit (MInst.FpuRound op dst rn))))
+        dst))

 ;; Helper for emitting `MInst.MovToFpu` instructions.
 (decl mov_to_fpu (Reg ScalarSize) Reg)
--- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
@@ -4426,6 +4426,17 @@ fn test_aarch64_binemit() {
        "abs v1.2d, v10.2d",
    ));

+    insns.push((
+        Inst::VecMisc {
+            op: VecMisc2::Fabs,
+            rd: writable_vreg(15),
+            rn: vreg(16),
+            size: VectorSize::Size32x2,
+        },
+        "0FFAA00E",
+        "fabs v15.2s, v16.2s",
+    ));
+
    insns.push((
        Inst::VecMisc {
            op: VecMisc2::Fabs,
@@ -4448,6 +4459,17 @@ fn test_aarch64_binemit() {
        "fabs v3.2d, v22.2d",
    ));

+    insns.push((
+        Inst::VecMisc {
+            op: VecMisc2::Fneg,
+            rd: writable_vreg(31),
+            rn: vreg(0),
+            size: VectorSize::Size32x2,
+        },
+        "1FF8A02E",
+        "fneg v31.2s, v0.2s",
+    ));
+
    insns.push((
        Inst::VecMisc {
            op: VecMisc2::Fneg,
@@ -4481,6 +4503,17 @@ fn test_aarch64_binemit() {
        "fsqrt v18.2s, v25.2s",
    ));

+    insns.push((
+        Inst::VecMisc {
+            op: VecMisc2::Fsqrt,
+            rd: writable_vreg(18),
+            rn: vreg(25),
+            size: VectorSize::Size32x4,
+        },
+        "32FBA16E",
+        "fsqrt v18.4s, v25.4s",
+    ));
+
    insns.push((
        Inst::VecMisc {
            op: VecMisc2::Fsqrt,
--- a/cranelift/codegen/src/isa/aarch64/lower.isle
+++ b/cranelift/codegen/src/isa/aarch64/lower.isle
@@ -230,6 +230,85 @@
      (with_flags (fpu_cmp (scalar_size ty) rn rm)
                  (fpu_csel ty (Cond.Gt) rn rm)))

+;;;; Rules for `sqrt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type ty @ (multi_lane _ _) (sqrt x)))
+      (vec_misc (VecMisc2.Fsqrt) x (vector_size ty)))
+
+(rule (lower (has_type (ty_scalar_float ty) (sqrt x)))
+      (fpu_rr (FPUOp1.Sqrt) x (scalar_size ty)))
+
+;;;; Rules for `fneg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type ty @ (multi_lane _ _) (fneg x)))
+      (vec_misc (VecMisc2.Fneg) x (vector_size ty)))
+
+(rule (lower (has_type (ty_scalar_float ty) (fneg x)))
+      (fpu_rr (FPUOp1.Neg) x (scalar_size ty)))
+
+;;;; Rules for `fabs` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type ty @ (multi_lane _ _) (fabs x)))
+      (vec_misc (VecMisc2.Fabs) x (vector_size ty)))
+
+(rule (lower (has_type (ty_scalar_float ty) (fabs x)))
+      (fpu_rr (FPUOp1.Abs) x (scalar_size ty)))
+
+;;;; Rules for `fpromote` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type $F64 (fpromote x)))
+      (fpu_rr (FPUOp1.Cvt32To64) x (ScalarSize.Size32)))
+
+;;;; Rules for `fdemote` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type $F32 (fdemote x)))
+      (fpu_rr (FPUOp1.Cvt64To32) x (ScalarSize.Size64)))
+
+;;;; Rules for `ceil` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type ty @ (multi_lane _ _) (ceil x)))
+      (vec_misc (VecMisc2.Frintp) x (vector_size ty)))
+
+(rule (lower (has_type $F32 (ceil x)))
+      (fpu_round (FpuRoundMode.Plus32) x))
+
+(rule (lower (has_type $F64 (ceil x)))
+      (fpu_round (FpuRoundMode.Plus64) x))
+
+;;;; Rules for `floor` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type ty @ (multi_lane _ _) (floor x)))
+      (vec_misc (VecMisc2.Frintm) x (vector_size ty)))
+
+(rule (lower (has_type $F32 (floor x)))
+      (fpu_round (FpuRoundMode.Minus32) x))
+
+(rule (lower (has_type $F64 (floor x)))
+      (fpu_round (FpuRoundMode.Minus64) x))
+
+;;;; Rules for `trunc` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type ty @ (multi_lane _ _) (trunc x)))
+      (vec_misc (VecMisc2.Frintz) x (vector_size ty)))
+
+(rule (lower (has_type $F32 (trunc x)))
+      (fpu_round (FpuRoundMode.Zero32) x))
+
+(rule (lower (has_type $F64 (trunc x)))
+      (fpu_round (FpuRoundMode.Zero64) x))
+
+;;;; Rules for `nearest` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(rule (lower (has_type ty @ (multi_lane _ _) (nearest x)))
+      (vec_misc (VecMisc2.Frintn) x (vector_size ty)))
+
+(rule (lower (has_type $F32 (nearest x)))
+      (fpu_round (FpuRoundMode.Nearest32) x))
+
+(rule (lower (has_type $F64 (nearest x)))
+      (fpu_round (FpuRoundMode.Nearest64) x))
+
+
 ;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 ;; `i64` and smaller
--- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
@@ -1250,107 +1250,10 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        Opcode::FminPseudo | Opcode::FmaxPseudo => implemented_in_isle(ctx),

        Opcode::Sqrt | Opcode::Fneg | Opcode::Fabs | Opcode::Fpromote | Opcode::Fdemote => {
-            let ty = ty.unwrap();
-            let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
-            let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
-            if !ty.is_vector() {
-                let fpu_op = match op {
-                    Opcode::Sqrt => FPUOp1::Sqrt,
-                    Opcode::Fneg => FPUOp1::Neg,
-                    Opcode::Fabs => FPUOp1::Abs,
-                    Opcode::Fpromote => {
-                        if ty != F64 {
-                            return Err(CodegenError::Unsupported(format!(
-                                "Fpromote: Unsupported type: {:?}",
-                                ty
-                            )));
-                        }
-                        FPUOp1::Cvt32To64
-                    }
-                    Opcode::Fdemote => {
-                        if ty != F32 {
-                            return Err(CodegenError::Unsupported(format!(
-                                "Fdemote: Unsupported type: {:?}",
-                                ty
-                            )));
-                        }
-                        FPUOp1::Cvt64To32
-                    }
-                    _ => unreachable!(),
-                };
-                ctx.emit(Inst::FpuRR {
-                    fpu_op,
-                    size: ScalarSize::from_ty(ctx.input_ty(insn, 0)),
-                    rd,
-                    rn,
-                });
-            } else {
-                let op = match op {
-                    Opcode::Fabs => VecMisc2::Fabs,
-                    Opcode::Fneg => VecMisc2::Fneg,
-                    Opcode::Sqrt => VecMisc2::Fsqrt,
-                    _ => {
-                        return Err(CodegenError::Unsupported(format!(
-                            "{}: Unsupported type: {:?}",
-                            op, ty
-                        )))
-                    }
-                };
-
-                ctx.emit(Inst::VecMisc {
-                    op,
-                    rd,
-                    rn,
-                    size: VectorSize::from_ty(ty),
-                });
-            }
+            implemented_in_isle(ctx)
        }

-        Opcode::Ceil | Opcode::Floor | Opcode::Trunc | Opcode::Nearest => {
-            let ty = ctx.output_ty(insn, 0);
-            if !ty.is_vector() {
-                let bits = ty_bits(ty);
-                let op = match (op, bits) {
-                    (Opcode::Ceil, 32) => FpuRoundMode::Plus32,
-                    (Opcode::Ceil, 64) => FpuRoundMode::Plus64,
-                    (Opcode::Floor, 32) => FpuRoundMode::Minus32,
-                    (Opcode::Floor, 64) => FpuRoundMode::Minus64,
-                    (Opcode::Trunc, 32) => FpuRoundMode::Zero32,
-                    (Opcode::Trunc, 64) => FpuRoundMode::Zero64,
-                    (Opcode::Nearest, 32) => FpuRoundMode::Nearest32,
-                    (Opcode::Nearest, 64) => FpuRoundMode::Nearest64,
-                    _ => {
-                        return Err(CodegenError::Unsupported(format!(
-                            "{}: Unsupported type: {:?}",
-                            op, ty
-                        )))
-                    }
-                };
-                let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
-                let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
-                ctx.emit(Inst::FpuRound { op, rd, rn });
-            } else {
-                let (op, size) = match (op, ty) {
-                    (Opcode::Ceil, F32X4) => (VecMisc2::Frintp, VectorSize::Size32x4),
-                    (Opcode::Ceil, F64X2) => (VecMisc2::Frintp, VectorSize::Size64x2),
-                    (Opcode::Floor, F32X4) => (VecMisc2::Frintm, VectorSize::Size32x4),
-                    (Opcode::Floor, F64X2) => (VecMisc2::Frintm, VectorSize::Size64x2),
-                    (Opcode::Trunc, F32X4) => (VecMisc2::Frintz, VectorSize::Size32x4),
-                    (Opcode::Trunc, F64X2) => (VecMisc2::Frintz, VectorSize::Size64x2),
-                    (Opcode::Nearest, F32X4) => (VecMisc2::Frintn, VectorSize::Size32x4),
-                    (Opcode::Nearest, F64X2) => (VecMisc2::Frintn, VectorSize::Size64x2),
-                    _ => {
-                        return Err(CodegenError::Unsupported(format!(
-                            "{}: Unsupported type: {:?}",
-                            op, ty
-                        )))
-                    }
-                };
-                let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
-                let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
-                ctx.emit(Inst::VecMisc { op, rd, rn, size });
-            }
-        }
+        Opcode::Ceil | Opcode::Floor | Opcode::Trunc | Opcode::Nearest => implemented_in_isle(ctx),

        Opcode::Fma => {
            let ty = ty.unwrap();
--- a/cranelift/filetests/filetests/isa/aarch64/floating-point.clif
+++ b/cranelift/filetests/filetests/isa/aarch64/floating-point.clif
@@ -701,3 +701,212 @@ block0(v0: f64):
 ;   fcvtzs x0, d7
 ;   ret

+function %f57(f32x2) -> f32x2 {
+block0(v0: f32x2):
+  v1 = sqrt v0
+  return v1
+}
+
+; block0:
+;   fsqrt v0.2s, v0.2s
+;   ret
+
+function %f58(f32x4) -> f32x4 {
+block0(v0: f32x4):
+  v1 = sqrt v0
+  return v1
+}
+
+; block0:
+;   fsqrt v0.4s, v0.4s
+;   ret
+
+function %f59(f64x2) -> f64x2 {
+block0(v0: f64x2):
+  v1 = sqrt v0
+  return v1
+}
+
+; block0:
+;   fsqrt v0.2d, v0.2d
+;   ret
+
+function %f60(f32x2) -> f32x2 {
+block0(v0: f32x2):
+  v1 = fneg v0
+  return v1
+}
+
+; block0:
+;   fneg v0.2s, v0.2s
+;   ret
+
+function %f61(f32x4) -> f32x4 {
+block0(v0: f32x4):
+  v1 = fneg v0
+  return v1
+}
+
+; block0:
+;   fneg v0.4s, v0.4s
+;   ret
+
+function %f62(f64x2) -> f64x2 {
+block0(v0: f64x2):
+  v1 = fneg v0
+  return v1
+}
+
+; block0:
+;   fneg v0.2d, v0.2d
+;   ret
+
+function %f63(f32x2) -> f32x2 {
+block0(v0: f32x2):
+  v1 = fabs v0
+  return v1
+}
+
+; block0:
+;   fabs v0.2s, v0.2s
+;   ret
+
+function %f64(f32x4) -> f32x4 {
+block0(v0: f32x4):
+  v1 = fabs v0
+  return v1
+}
+
+; block0:
+;   fabs v0.4s, v0.4s
+;   ret
+
+function %f65(f64x2) -> f64x2 {
+block0(v0: f64x2):
+  v1 = fabs v0
+  return v1
+}
+
+; block0:
+;   fabs v0.2d, v0.2d
+;   ret
+
+function %f66(f32x2) -> f32x2 {
+block0(v0: f32x2):
+  v1 = ceil v0
+  return v1
+}
+
+; block0:
+;   frintp v0.2s, v0.2s
+;   ret
+
+function %f67(f32x4) -> f32x4 {
+block0(v0: f32x4):
+  v1 = ceil v0
+  return v1
+}
+
+; block0:
+;   frintp v0.4s, v0.4s
+;   ret
+
+function %f68(f64x2) -> f64x2 {
+block0(v0: f64x2):
+  v1 = ceil v0
+  return v1
+}
+
+; block0:
+;   frintp v0.2d, v0.2d
+;   ret
+
+function %f69(f32x2) -> f32x2 {
+block0(v0: f32x2):
+  v1 = floor v0
+  return v1
+}
+
+; block0:
+;   frintm v0.2s, v0.2s
+;   ret
+
+function %f70(f32x4) -> f32x4 {
+block0(v0: f32x4):
+  v1 = floor v0
+  return v1
+}
+
+; block0:
+;   frintm v0.4s, v0.4s
+;   ret
+
+function %f71(f64x2) -> f64x2 {
+block0(v0: f64x2):
+  v1 = floor v0
+  return v1
+}
+
+; block0:
+;   frintm v0.2d, v0.2d
+;   ret
+
+function %f72(f32x2) -> f32x2 {
+block0(v0: f32x2):
+  v1 = trunc v0
+  return v1
+}
+
+; block0:
+;   frintz v0.2s, v0.2s
+;   ret
+
+function %f73(f32x4) -> f32x4 {
+block0(v0: f32x4):
+  v1 = trunc v0
+  return v1
+}
+
+; block0:
+;   frintz v0.4s, v0.4s
+;   ret
+
+function %f74(f64x2) -> f64x2 {
+block0(v0: f64x2):
+  v1 = trunc v0
+  return v1
+}
+
+; block0:
+;   frintz v0.2d, v0.2d
+;   ret
+
+function %f75(f32x2) -> f32x2 {
+block0(v0: f32x2):
+  v1 = nearest v0
+  return v1
+}
+
+; block0:
+;   frintn v0.2s, v0.2s
+;   ret
+
+function %f76(f32x4) -> f32x4 {
+block0(v0: f32x4):
+  v1 = nearest v0
+  return v1
+}
+
+; block0:
+;   frintn v0.4s, v0.4s
+;   ret
+
+function %f77(f64x2) -> f64x2 {
+block0(v0: f64x2):
+  v1 = nearest v0
+  return v1
+}
+
+; block0:
+;   frintn v0.2d, v0.2d
+;   ret