riscv64: Implement fcmp in ISLE (#5512)

Rework the compilation of fcmp in the riscv64 backend to be in ISLE, removing the need for the dedicated Fcmp instruction. This change is motivated by #5500, which showed that the riscv64 backend was generating branch instructions in the middle of a basic block. We can't remove lower_br_fcmp quite yet as it's used in a few places in the emit module, but it's now no longer reachable from the ISLE lowerings. Fixes #5500
2023-01-04 11:52:00 -08:00
parent d1920f5a2d
commit b2d5afdf83
7 changed files with 202 additions and 113 deletions
--- a/cranelift/codegen/src/isa/riscv64/inst.isle
+++ b/cranelift/codegen/src/isa/riscv64/inst.isle
@@ -107,6 +107,7 @@
      (rs2 Reg)
      (cc IntCC)
      (trap_code TrapCode))
+
    (TrapFf
      (cc FloatCC)
      (x Reg)
@@ -204,14 +205,6 @@
      (x Reg)
      (t0 WritableReg))

-    ;; a float compare
-    (Fcmp
-      (cc FloatCC)
-      (rd WritableReg)
-      (rs1 Reg)
-      (rs2 Reg)
-      (ty Type))
-
    ;; select x or y base on condition
    (Select
      (dst VecWritableReg)
@@ -690,7 +683,6 @@
    (Tso)
 ))

-
 (type VecBranchTarget (primitive VecBranchTarget))
 (type BoxCallInfo (primitive BoxCallInfo))
 (type BoxCallIndInfo (primitive BoxCallIndInfo))
@@ -1880,9 +1872,6 @@
 (decl lower_br_icmp (IntCC ValueRegs ValueRegs VecMachLabel Type) Unit)
 (extern constructor lower_br_icmp lower_br_icmp)

-(decl lower_br_fcmp (FloatCC Reg Reg VecMachLabel Type) Unit)
-(extern constructor lower_br_fcmp lower_br_fcmp)
-
 ;; int scalar zero regs.
 (decl int_zero_reg (Type) ValueRegs)
 (extern constructor int_zero_reg int_zero_reg)
@@ -1935,7 +1924,9 @@

 (rule 1
  (lower_branch (brz (fcmp cc a @ (value_type ty) b) _ _) targets)
-  (lower_br_fcmp (floatcc_inverse cc) a b targets ty))
+  (let ((then BranchTarget (label_to_br_target (vec_label_get targets 0)))
+        (else BranchTarget (label_to_br_target (vec_label_get targets 1))))
+    (emit_side_effect (cond_br (emit_fcmp (floatcc_inverse cc) ty a b) then else))))

 ;;;;
 (rule
@@ -1955,7 +1946,9 @@

 (rule 1
  (lower_branch (brnz (fcmp cc a @ (value_type ty) b) _ _) targets)
-  (lower_br_fcmp cc a b targets ty))
+  (let ((then BranchTarget (label_to_br_target (vec_label_get targets 0)))
+        (else BranchTarget (label_to_br_target (vec_label_get targets 1))))
+    (emit_side_effect (cond_br (emit_fcmp cc ty a b) then else))))

 ;;;
 (decl lower_br_table (Reg VecMachLabel) Unit)
@@ -2194,3 +2187,159 @@

 (decl sp_reg () PReg)
 (extern constructor sp_reg sp_reg)
+
+
+;;;; Helpers for floating point comparisons ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(decl not (Reg) Reg)
+(rule (not x) (alu_rr_imm12 (AluOPRRI.Xori) x (imm_from_bits 1)))
+
+(decl emit_or (Reg Reg) Reg)
+(rule (emit_or x y) (alu_rrr (AluOPRRR.Or) x y))
+
+(decl emit_and (Reg Reg) Reg)
+(rule (emit_and x y) (alu_rrr (AluOPRRR.And) x y))
+
+(decl is_not_nan (Type Reg) Reg)
+(rule (is_not_nan ty a) (feq ty a a))
+
+(decl feq (Type Reg Reg) Reg)
+(rule (feq $F32 a b) (fpu_rrr (FpuOPRRR.FeqS) $I64 a b))
+(rule (feq $F64 a b) (fpu_rrr (FpuOPRRR.FeqD) $I64 a b))
+
+(decl flt (Type Reg Reg) Reg)
+(rule (flt $F32 a b) (fpu_rrr (FpuOPRRR.FltS) $I64 a b))
+(rule (flt $F64 a b) (fpu_rrr (FpuOPRRR.FltD) $I64 a b))
+
+(decl fle (Type Reg Reg) Reg)
+(rule (fle $F32 a b) (fpu_rrr (FpuOPRRR.FleS) $I64 a b))
+(rule (fle $F64 a b) (fpu_rrr (FpuOPRRR.FleD) $I64 a b))
+
+(decl fgt (Type Reg Reg) Reg)
+(rule (fgt ty a b) (flt ty b a))
+
+(decl fge (Type Reg Reg) Reg)
+(rule (fge ty a b) (fle ty b a))
+
+(decl ordered (Type Reg Reg) Reg)
+(rule (ordered ty a b) (emit_and (is_not_nan ty a) (is_not_nan ty b)))
+
+(type CmpResult (enum
+                  (Result
+                    (result Reg)
+                    (invert bool))))
+
+;; Wrapper for the common case when constructing comparison results. It assumes
+;; that the result isn't negated.
+(decl cmp_result (Reg) CmpResult)
+(rule (cmp_result result) (CmpResult.Result result $false))
+
+;; Wrapper for the case where it's more convenient to construct the negated
+;; version of the comparison.
+(decl cmp_result_invert (Reg) CmpResult)
+(rule (cmp_result_invert result) (CmpResult.Result result $true))
+
+;; Consume a CmpResult, producing a branch on its result.
+(decl cond_br (CmpResult BranchTarget BranchTarget) SideEffectNoResult)
+(rule (cond_br cmp then else)
+      (SideEffectNoResult.Inst
+        (MInst.CondBr then else (cmp_integer_compare cmp))))
+
+;; Construct an IntegerCompare value.
+(decl int_compare (IntCC Reg Reg) IntegerCompare)
+(extern constructor int_compare int_compare)
+
+;; Convert a comparison into a branch test.
+(decl cmp_integer_compare (CmpResult) IntegerCompare)
+
+(rule
+  (cmp_integer_compare (CmpResult.Result res $false))
+  (int_compare (IntCC.NotEqual) res (zero_reg)))
+
+(rule
+  (cmp_integer_compare (CmpResult.Result res $true))
+  (int_compare (IntCC.Equal) res (zero_reg)))
+
+;; Convert a comparison into a boolean value.
+(decl cmp_value (CmpResult) Reg)
+(rule (cmp_value (CmpResult.Result res $false)) res)
+(rule (cmp_value (CmpResult.Result res $true)) (not res))
+
+;; Compare two floating point numbers and return a zero/non-zero result.
+(decl emit_fcmp (FloatCC Type Reg Reg) CmpResult)
+
+;; a is not nan && b is not nan
+(rule
+  (emit_fcmp (FloatCC.Ordered) ty a b)
+  (cmp_result (ordered ty a b)))
+
+;; a is nan || b is nan
+;; == !(a is not nan && b is not nan)
+(rule
+  (emit_fcmp (FloatCC.Unordered) ty a b)
+  (cmp_result_invert (ordered ty a b)))
+
+;; a == b
+(rule
+  (emit_fcmp (FloatCC.Equal) ty a b)
+  (cmp_result (feq ty a b)))
+
+;; a != b
+;; == !(a == b)
+(rule
+  (emit_fcmp (FloatCC.NotEqual) ty a b)
+  (cmp_result_invert (feq ty a b)))
+
+;; a < b || a > b
+(rule
+  (emit_fcmp (FloatCC.OrderedNotEqual) ty a b)
+  (cmp_result (emit_or (flt ty a b) (fgt ty a b))))
+
+;; !(ordered a b) || a == b
+(rule
+  (emit_fcmp (FloatCC.UnorderedOrEqual) ty a b)
+  (cmp_result (emit_or (not (ordered ty a b)) (feq ty a b))))
+
+;; a < b
+(rule
+  (emit_fcmp (FloatCC.LessThan) ty a b)
+  (cmp_result (flt ty a b)))
+
+;; a <= b
+(rule
+  (emit_fcmp (FloatCC.LessThanOrEqual) ty a b)
+  (cmp_result (fle ty a b)))
+
+;; a > b
+(rule
+  (emit_fcmp (FloatCC.GreaterThan) ty a b)
+  (cmp_result (fgt ty a b)))
+
+;; a >= b
+(rule
+  (emit_fcmp (FloatCC.GreaterThanOrEqual) ty a b)
+  (cmp_result (fge ty a b)))
+
+;; !(ordered a b) || a < b
+;; == !(ordered a b && a >= b)
+(rule
+  (emit_fcmp (FloatCC.UnorderedOrLessThan) ty a b)
+  (cmp_result_invert (emit_and (ordered ty a b) (fge ty a b))))
+
+;; !(ordered a b) || a <= b
+;; == !(ordered a b && a > b)
+(rule
+  (emit_fcmp (FloatCC.UnorderedOrLessThanOrEqual) ty a b)
+  (cmp_result_invert (emit_and (ordered ty a b) (fgt ty a b))))
+
+;; !(ordered a b) || a > b
+;; == !(ordered a b && a <= b)
+(rule
+  (emit_fcmp (FloatCC.UnorderedOrGreaterThan) ty a b)
+  (cmp_result_invert (emit_and (ordered ty a b) (fle ty a b))))
+
+;; !(ordered a b) || a >= b
+;; == !(ordered a b && a < b)
+(rule
+  (emit_fcmp (FloatCC.UnorderedOrGreaterThanOrEqual) ty a b)
+  (cmp_result_invert (emit_and (ordered ty a b) (flt ty a b))))
--- a/cranelift/codegen/src/isa/riscv64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/riscv64/inst/emit.rs
@@ -1337,42 +1337,6 @@ impl MachInstEmit for Inst {
                }
            }

-            &Inst::Fcmp {
-                rd,
-                cc,
-                ty,
-                rs1,
-                rs2,
-            } => {
-                let rs1 = allocs.next(rs1);
-                let rs2 = allocs.next(rs2);
-                let rd = allocs.next_writable(rd);
-                let label_true = sink.get_label();
-                let label_jump_over = sink.get_label();
-                Inst::lower_br_fcmp(
-                    cc,
-                    rs1,
-                    rs2,
-                    BranchTarget::Label(label_true),
-                    BranchTarget::zero(),
-                    ty,
-                    rd,
-                )
-                .iter()
-                .for_each(|i| i.emit(&[], sink, emit_info, state));
-                // here is not taken.
-                Inst::load_imm12(rd, Imm12::FALSE).emit(&[], sink, emit_info, state);
-                // jump over.
-                Inst::Jal {
-                    dest: BranchTarget::Label(label_jump_over),
-                }
-                .emit(&[], sink, emit_info, state);
-                // here is true
-                sink.bind_label(label_true);
-                Inst::load_imm12(rd, Imm12::TRUE).emit(&[], sink, emit_info, state);
-                sink.bind_label(label_jump_over);
-            }
-
            &Inst::Select {
                ref dst,
                condition,
--- a/cranelift/codegen/src/isa/riscv64/inst/emit_tests.rs
+++ b/cranelift/codegen/src/isa/riscv64/inst/emit_tests.rs
@@ -2177,14 +2177,6 @@ fn riscv64_worst_case_instruction_size() {
    //there are all candidates potential generate a lot of bytes.
    let mut candidates: Vec<MInst> = vec![];

-    candidates.push(Inst::Fcmp {
-        rd: writable_a0(),
-        cc: FloatCC::UnorderedOrLessThanOrEqual,
-        ty: F64,
-        rs1: fa1(),
-        rs2: fa0(),
-    });
-
    candidates.push(Inst::IntSelect {
        dst: vec![writable_a0(), writable_a0()],
        ty: I128,
--- a/cranelift/codegen/src/isa/riscv64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/riscv64/inst/mod.rs
@@ -460,11 +460,6 @@ fn riscv64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
            collector.reg_use(src);
            collector.reg_def(rd);
        }
-        &Inst::Fcmp { rd, rs1, rs2, .. } => {
-            collector.reg_use(rs1);
-            collector.reg_use(rs2);
-            collector.reg_early_def(rd);
-        }
        &Inst::Select {
            ref dst,
            condition,
@@ -1366,25 +1361,6 @@ impl Inst {
                let rd = format_reg(rd.to_reg(), allocs);
                format!("{} {},{}", op.op_name(), rd, base,)
            }
-            &Inst::Fcmp {
-                rd,
-                cc,
-                ty,
-                rs1,
-                rs2,
-            } => {
-                let rs1 = format_reg(rs1, allocs);
-                let rs2 = format_reg(rs2, allocs);
-                let rd = format_reg(rd.to_reg(), allocs);
-                format!(
-                    "f{}.{} {},{},{}",
-                    cc,
-                    if ty == F32 { "s" } else { "d" },
-                    rd,
-                    rs1,
-                    rs2,
-                )
-            }
            &Inst::Store {
                to,
                src,
--- a/cranelift/codegen/src/isa/riscv64/lower.isle
+++ b/cranelift/codegen/src/isa/riscv64/lower.isle
@@ -752,18 +752,10 @@
  (lower (icmp cc x @ (value_type ty) y))
  (lower_icmp cc x y ty))

-(decl gen_fcmp (FloatCC Value Value Type) Reg)
-(rule
-  (gen_fcmp cc x y ty)
-  (let
-    ((result WritableReg (temp_writable_reg $I64))
-      (_ Unit (emit (MInst.Fcmp cc result x y ty))))
-    (writable_reg_to_reg result)))
-
 ;;;;;  Rules for `fcmp`;;;;;;;;;
 (rule
  (lower (fcmp cc x @ (value_type ty) y))
-  (gen_fcmp cc x y ty))
+  (cmp_value (emit_fcmp cc ty x y)))

 ;;;;;  Rules for `func_addr`;;;;;;;;;
 (rule
--- a/cranelift/codegen/src/isa/riscv64/lower/isle.rs
+++ b/cranelift/codegen/src/isa/riscv64/lower/isle.rs
@@ -61,28 +61,6 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Riscv64Backend> {
        }
    }

-    fn lower_br_fcmp(
-        &mut self,
-        cc: &FloatCC,
-        a: Reg,
-        b: Reg,
-        targets: &VecMachLabel,
-        ty: Type,
-    ) -> Unit {
-        let tmp = self.temp_writable_reg(I64);
-        MInst::lower_br_fcmp(
-            *cc,
-            a,
-            b,
-            BranchTarget::Label(targets[0]),
-            BranchTarget::Label(targets[1]),
-            ty,
-            tmp,
-        )
-        .iter()
-        .for_each(|i| self.emit(i));
-    }
-
    fn lower_brz_or_nz(
        &mut self,
        cc: &IntCC,
@@ -434,6 +412,15 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Riscv64Backend> {

        tmp.to_reg()
    }
+
+    #[inline]
+    fn int_compare(&mut self, kind: &IntCC, rs1: Reg, rs2: Reg) -> IntegerCompare {
+        IntegerCompare {
+            kind: *kind,
+            rs1,
+            rs2,
+        }
+    }
 }

 impl IsleContext<'_, '_, MInst, Riscv64Backend> {
--- a/cranelift/filetests/filetests/isa/riscv64/fcmp.clif
+++ b/cranelift/filetests/filetests/isa/riscv64/fcmp.clif
@@ -0,0 +1,29 @@
+test compile precise-output
+target riscv64
+
+;; See #5500 for more details about this test case.
+function %f0() {
+block0:
+    v0 = f64const 0.0
+    v1 = fcmp ult v0, v0
+    brz v1, block1
+    jump block1
+
+block1:
+    return
+}
+
+; block0:
+;   li t1,0
+;   fmv.d.x ft1,t1
+;   li a2,0
+;   fmv.d.x ft5,a2
+;   fle.d a5,ft5,ft1
+;   bne a5,zero,taken(label1),not_taken(label2)
+; block1:
+;   j label3
+; block2:
+;   j label3
+; block3:
+;   ret
+