From b2d5afdf833c475ca5ae82c89e4b38b5e62b5177 Mon Sep 17 00:00:00 2001 From: Trevor Elliott Date: Wed, 4 Jan 2023 11:52:00 -0800 Subject: [PATCH] riscv64: Implement fcmp in ISLE (#5512) Rework the compilation of fcmp in the riscv64 backend to be in ISLE, removing the need for the dedicated Fcmp instruction. This change is motivated by #5500, which showed that the riscv64 backend was generating branch instructions in the middle of a basic block. We can't remove lower_br_fcmp quite yet as it's used in a few places in the emit module, but it's now no longer reachable from the ISLE lowerings. Fixes #5500 --- cranelift/codegen/src/isa/riscv64/inst.isle | 177 ++++++++++++++++-- .../codegen/src/isa/riscv64/inst/emit.rs | 36 ---- .../src/isa/riscv64/inst/emit_tests.rs | 8 - cranelift/codegen/src/isa/riscv64/inst/mod.rs | 24 --- cranelift/codegen/src/isa/riscv64/lower.isle | 10 +- .../codegen/src/isa/riscv64/lower/isle.rs | 31 +-- .../filetests/filetests/isa/riscv64/fcmp.clif | 29 +++ 7 files changed, 202 insertions(+), 113 deletions(-) create mode 100644 cranelift/filetests/filetests/isa/riscv64/fcmp.clif diff --git a/cranelift/codegen/src/isa/riscv64/inst.isle b/cranelift/codegen/src/isa/riscv64/inst.isle index 6fc3b254de..1d7e3beb57 100644 --- a/cranelift/codegen/src/isa/riscv64/inst.isle +++ b/cranelift/codegen/src/isa/riscv64/inst.isle @@ -107,6 +107,7 @@ (rs2 Reg) (cc IntCC) (trap_code TrapCode)) + (TrapFf (cc FloatCC) (x Reg) @@ -204,14 +205,6 @@ (x Reg) (t0 WritableReg)) - ;; a float compare - (Fcmp - (cc FloatCC) - (rd WritableReg) - (rs1 Reg) - (rs2 Reg) - (ty Type)) - ;; select x or y base on condition (Select (dst VecWritableReg) @@ -690,7 +683,6 @@ (Tso) )) - (type VecBranchTarget (primitive VecBranchTarget)) (type BoxCallInfo (primitive BoxCallInfo)) (type BoxCallIndInfo (primitive BoxCallIndInfo)) @@ -1880,9 +1872,6 @@ (decl lower_br_icmp (IntCC ValueRegs ValueRegs VecMachLabel Type) Unit) (extern constructor lower_br_icmp lower_br_icmp) -(decl lower_br_fcmp (FloatCC Reg Reg VecMachLabel Type) Unit) -(extern constructor lower_br_fcmp lower_br_fcmp) - ;; int scalar zero regs. (decl int_zero_reg (Type) ValueRegs) (extern constructor int_zero_reg int_zero_reg) @@ -1935,7 +1924,9 @@ (rule 1 (lower_branch (brz (fcmp cc a @ (value_type ty) b) _ _) targets) - (lower_br_fcmp (floatcc_inverse cc) a b targets ty)) + (let ((then BranchTarget (label_to_br_target (vec_label_get targets 0))) + (else BranchTarget (label_to_br_target (vec_label_get targets 1)))) + (emit_side_effect (cond_br (emit_fcmp (floatcc_inverse cc) ty a b) then else)))) ;;;; (rule @@ -1955,7 +1946,9 @@ (rule 1 (lower_branch (brnz (fcmp cc a @ (value_type ty) b) _ _) targets) - (lower_br_fcmp cc a b targets ty)) + (let ((then BranchTarget (label_to_br_target (vec_label_get targets 0))) + (else BranchTarget (label_to_br_target (vec_label_get targets 1)))) + (emit_side_effect (cond_br (emit_fcmp cc ty a b) then else)))) ;;; (decl lower_br_table (Reg VecMachLabel) Unit) @@ -2194,3 +2187,159 @@ (decl sp_reg () PReg) (extern constructor sp_reg sp_reg) + + +;;;; Helpers for floating point comparisons ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(decl not (Reg) Reg) +(rule (not x) (alu_rr_imm12 (AluOPRRI.Xori) x (imm_from_bits 1))) + +(decl emit_or (Reg Reg) Reg) +(rule (emit_or x y) (alu_rrr (AluOPRRR.Or) x y)) + +(decl emit_and (Reg Reg) Reg) +(rule (emit_and x y) (alu_rrr (AluOPRRR.And) x y)) + +(decl is_not_nan (Type Reg) Reg) +(rule (is_not_nan ty a) (feq ty a a)) + +(decl feq (Type Reg Reg) Reg) +(rule (feq $F32 a b) (fpu_rrr (FpuOPRRR.FeqS) $I64 a b)) +(rule (feq $F64 a b) (fpu_rrr (FpuOPRRR.FeqD) $I64 a b)) + +(decl flt (Type Reg Reg) Reg) +(rule (flt $F32 a b) (fpu_rrr (FpuOPRRR.FltS) $I64 a b)) +(rule (flt $F64 a b) (fpu_rrr (FpuOPRRR.FltD) $I64 a b)) + +(decl fle (Type Reg Reg) Reg) +(rule (fle $F32 a b) (fpu_rrr (FpuOPRRR.FleS) $I64 a b)) +(rule (fle $F64 a b) (fpu_rrr (FpuOPRRR.FleD) $I64 a b)) + +(decl fgt (Type Reg Reg) Reg) +(rule (fgt ty a b) (flt ty b a)) + +(decl fge (Type Reg Reg) Reg) +(rule (fge ty a b) (fle ty b a)) + +(decl ordered (Type Reg Reg) Reg) +(rule (ordered ty a b) (emit_and (is_not_nan ty a) (is_not_nan ty b))) + +(type CmpResult (enum + (Result + (result Reg) + (invert bool)))) + +;; Wrapper for the common case when constructing comparison results. It assumes +;; that the result isn't negated. +(decl cmp_result (Reg) CmpResult) +(rule (cmp_result result) (CmpResult.Result result $false)) + +;; Wrapper for the case where it's more convenient to construct the negated +;; version of the comparison. +(decl cmp_result_invert (Reg) CmpResult) +(rule (cmp_result_invert result) (CmpResult.Result result $true)) + +;; Consume a CmpResult, producing a branch on its result. +(decl cond_br (CmpResult BranchTarget BranchTarget) SideEffectNoResult) +(rule (cond_br cmp then else) + (SideEffectNoResult.Inst + (MInst.CondBr then else (cmp_integer_compare cmp)))) + +;; Construct an IntegerCompare value. +(decl int_compare (IntCC Reg Reg) IntegerCompare) +(extern constructor int_compare int_compare) + +;; Convert a comparison into a branch test. +(decl cmp_integer_compare (CmpResult) IntegerCompare) + +(rule + (cmp_integer_compare (CmpResult.Result res $false)) + (int_compare (IntCC.NotEqual) res (zero_reg))) + +(rule + (cmp_integer_compare (CmpResult.Result res $true)) + (int_compare (IntCC.Equal) res (zero_reg))) + +;; Convert a comparison into a boolean value. +(decl cmp_value (CmpResult) Reg) +(rule (cmp_value (CmpResult.Result res $false)) res) +(rule (cmp_value (CmpResult.Result res $true)) (not res)) + +;; Compare two floating point numbers and return a zero/non-zero result. +(decl emit_fcmp (FloatCC Type Reg Reg) CmpResult) + +;; a is not nan && b is not nan +(rule + (emit_fcmp (FloatCC.Ordered) ty a b) + (cmp_result (ordered ty a b))) + +;; a is nan || b is nan +;; == !(a is not nan && b is not nan) +(rule + (emit_fcmp (FloatCC.Unordered) ty a b) + (cmp_result_invert (ordered ty a b))) + +;; a == b +(rule + (emit_fcmp (FloatCC.Equal) ty a b) + (cmp_result (feq ty a b))) + +;; a != b +;; == !(a == b) +(rule + (emit_fcmp (FloatCC.NotEqual) ty a b) + (cmp_result_invert (feq ty a b))) + +;; a < b || a > b +(rule + (emit_fcmp (FloatCC.OrderedNotEqual) ty a b) + (cmp_result (emit_or (flt ty a b) (fgt ty a b)))) + +;; !(ordered a b) || a == b +(rule + (emit_fcmp (FloatCC.UnorderedOrEqual) ty a b) + (cmp_result (emit_or (not (ordered ty a b)) (feq ty a b)))) + +;; a < b +(rule + (emit_fcmp (FloatCC.LessThan) ty a b) + (cmp_result (flt ty a b))) + +;; a <= b +(rule + (emit_fcmp (FloatCC.LessThanOrEqual) ty a b) + (cmp_result (fle ty a b))) + +;; a > b +(rule + (emit_fcmp (FloatCC.GreaterThan) ty a b) + (cmp_result (fgt ty a b))) + +;; a >= b +(rule + (emit_fcmp (FloatCC.GreaterThanOrEqual) ty a b) + (cmp_result (fge ty a b))) + +;; !(ordered a b) || a < b +;; == !(ordered a b && a >= b) +(rule + (emit_fcmp (FloatCC.UnorderedOrLessThan) ty a b) + (cmp_result_invert (emit_and (ordered ty a b) (fge ty a b)))) + +;; !(ordered a b) || a <= b +;; == !(ordered a b && a > b) +(rule + (emit_fcmp (FloatCC.UnorderedOrLessThanOrEqual) ty a b) + (cmp_result_invert (emit_and (ordered ty a b) (fgt ty a b)))) + +;; !(ordered a b) || a > b +;; == !(ordered a b && a <= b) +(rule + (emit_fcmp (FloatCC.UnorderedOrGreaterThan) ty a b) + (cmp_result_invert (emit_and (ordered ty a b) (fle ty a b)))) + +;; !(ordered a b) || a >= b +;; == !(ordered a b && a < b) +(rule + (emit_fcmp (FloatCC.UnorderedOrGreaterThanOrEqual) ty a b) + (cmp_result_invert (emit_and (ordered ty a b) (flt ty a b)))) diff --git a/cranelift/codegen/src/isa/riscv64/inst/emit.rs b/cranelift/codegen/src/isa/riscv64/inst/emit.rs index 49cf20878a..2f80467f1c 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/emit.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/emit.rs @@ -1337,42 +1337,6 @@ impl MachInstEmit for Inst { } } - &Inst::Fcmp { - rd, - cc, - ty, - rs1, - rs2, - } => { - let rs1 = allocs.next(rs1); - let rs2 = allocs.next(rs2); - let rd = allocs.next_writable(rd); - let label_true = sink.get_label(); - let label_jump_over = sink.get_label(); - Inst::lower_br_fcmp( - cc, - rs1, - rs2, - BranchTarget::Label(label_true), - BranchTarget::zero(), - ty, - rd, - ) - .iter() - .for_each(|i| i.emit(&[], sink, emit_info, state)); - // here is not taken. - Inst::load_imm12(rd, Imm12::FALSE).emit(&[], sink, emit_info, state); - // jump over. - Inst::Jal { - dest: BranchTarget::Label(label_jump_over), - } - .emit(&[], sink, emit_info, state); - // here is true - sink.bind_label(label_true); - Inst::load_imm12(rd, Imm12::TRUE).emit(&[], sink, emit_info, state); - sink.bind_label(label_jump_over); - } - &Inst::Select { ref dst, condition, diff --git a/cranelift/codegen/src/isa/riscv64/inst/emit_tests.rs b/cranelift/codegen/src/isa/riscv64/inst/emit_tests.rs index b12eae9af8..247ee446b6 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/emit_tests.rs @@ -2177,14 +2177,6 @@ fn riscv64_worst_case_instruction_size() { //there are all candidates potential generate a lot of bytes. let mut candidates: Vec = vec![]; - candidates.push(Inst::Fcmp { - rd: writable_a0(), - cc: FloatCC::UnorderedOrLessThanOrEqual, - ty: F64, - rs1: fa1(), - rs2: fa0(), - }); - candidates.push(Inst::IntSelect { dst: vec![writable_a0(), writable_a0()], ty: I128, diff --git a/cranelift/codegen/src/isa/riscv64/inst/mod.rs b/cranelift/codegen/src/isa/riscv64/inst/mod.rs index 956909df3e..16044b3c0e 100644 --- a/cranelift/codegen/src/isa/riscv64/inst/mod.rs +++ b/cranelift/codegen/src/isa/riscv64/inst/mod.rs @@ -460,11 +460,6 @@ fn riscv64_get_operands VReg>(inst: &Inst, collector: &mut Operan collector.reg_use(src); collector.reg_def(rd); } - &Inst::Fcmp { rd, rs1, rs2, .. } => { - collector.reg_use(rs1); - collector.reg_use(rs2); - collector.reg_early_def(rd); - } &Inst::Select { ref dst, condition, @@ -1366,25 +1361,6 @@ impl Inst { let rd = format_reg(rd.to_reg(), allocs); format!("{} {},{}", op.op_name(), rd, base,) } - &Inst::Fcmp { - rd, - cc, - ty, - rs1, - rs2, - } => { - let rs1 = format_reg(rs1, allocs); - let rs2 = format_reg(rs2, allocs); - let rd = format_reg(rd.to_reg(), allocs); - format!( - "f{}.{} {},{},{}", - cc, - if ty == F32 { "s" } else { "d" }, - rd, - rs1, - rs2, - ) - } &Inst::Store { to, src, diff --git a/cranelift/codegen/src/isa/riscv64/lower.isle b/cranelift/codegen/src/isa/riscv64/lower.isle index 6ec81a3a5d..76fb91605b 100644 --- a/cranelift/codegen/src/isa/riscv64/lower.isle +++ b/cranelift/codegen/src/isa/riscv64/lower.isle @@ -752,18 +752,10 @@ (lower (icmp cc x @ (value_type ty) y)) (lower_icmp cc x y ty)) -(decl gen_fcmp (FloatCC Value Value Type) Reg) -(rule - (gen_fcmp cc x y ty) - (let - ((result WritableReg (temp_writable_reg $I64)) - (_ Unit (emit (MInst.Fcmp cc result x y ty)))) - (writable_reg_to_reg result))) - ;;;;; Rules for `fcmp`;;;;;;;;; (rule (lower (fcmp cc x @ (value_type ty) y)) - (gen_fcmp cc x y ty)) + (cmp_value (emit_fcmp cc ty x y))) ;;;;; Rules for `func_addr`;;;;;;;;; (rule diff --git a/cranelift/codegen/src/isa/riscv64/lower/isle.rs b/cranelift/codegen/src/isa/riscv64/lower/isle.rs index b414148b6a..c41469eff6 100644 --- a/cranelift/codegen/src/isa/riscv64/lower/isle.rs +++ b/cranelift/codegen/src/isa/riscv64/lower/isle.rs @@ -61,28 +61,6 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Riscv64Backend> { } } - fn lower_br_fcmp( - &mut self, - cc: &FloatCC, - a: Reg, - b: Reg, - targets: &VecMachLabel, - ty: Type, - ) -> Unit { - let tmp = self.temp_writable_reg(I64); - MInst::lower_br_fcmp( - *cc, - a, - b, - BranchTarget::Label(targets[0]), - BranchTarget::Label(targets[1]), - ty, - tmp, - ) - .iter() - .for_each(|i| self.emit(i)); - } - fn lower_brz_or_nz( &mut self, cc: &IntCC, @@ -434,6 +412,15 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Riscv64Backend> { tmp.to_reg() } + + #[inline] + fn int_compare(&mut self, kind: &IntCC, rs1: Reg, rs2: Reg) -> IntegerCompare { + IntegerCompare { + kind: *kind, + rs1, + rs2, + } + } } impl IsleContext<'_, '_, MInst, Riscv64Backend> { diff --git a/cranelift/filetests/filetests/isa/riscv64/fcmp.clif b/cranelift/filetests/filetests/isa/riscv64/fcmp.clif new file mode 100644 index 0000000000..7289e1f50d --- /dev/null +++ b/cranelift/filetests/filetests/isa/riscv64/fcmp.clif @@ -0,0 +1,29 @@ +test compile precise-output +target riscv64 + +;; See #5500 for more details about this test case. +function %f0() { +block0: + v0 = f64const 0.0 + v1 = fcmp ult v0, v0 + brz v1, block1 + jump block1 + +block1: + return +} + +; block0: +; li t1,0 +; fmv.d.x ft1,t1 +; li a2,0 +; fmv.d.x ft5,a2 +; fle.d a5,ft5,ft1 +; bne a5,zero,taken(label1),not_taken(label2) +; block1: +; j label3 +; block2: +; j label3 +; block3: +; ret +