riscv64: Implement fcmp in ISLE (#5512)
Rework the compilation of fcmp in the riscv64 backend to be in ISLE, removing the need for the dedicated Fcmp instruction. This change is motivated by #5500, which showed that the riscv64 backend was generating branch instructions in the middle of a basic block. We can't remove lower_br_fcmp quite yet as it's used in a few places in the emit module, but it's now no longer reachable from the ISLE lowerings. Fixes #5500
This commit is contained in:
@@ -107,6 +107,7 @@
|
||||
(rs2 Reg)
|
||||
(cc IntCC)
|
||||
(trap_code TrapCode))
|
||||
|
||||
(TrapFf
|
||||
(cc FloatCC)
|
||||
(x Reg)
|
||||
@@ -204,14 +205,6 @@
|
||||
(x Reg)
|
||||
(t0 WritableReg))
|
||||
|
||||
;; a float compare
|
||||
(Fcmp
|
||||
(cc FloatCC)
|
||||
(rd WritableReg)
|
||||
(rs1 Reg)
|
||||
(rs2 Reg)
|
||||
(ty Type))
|
||||
|
||||
;; select x or y base on condition
|
||||
(Select
|
||||
(dst VecWritableReg)
|
||||
@@ -690,7 +683,6 @@
|
||||
(Tso)
|
||||
))
|
||||
|
||||
|
||||
(type VecBranchTarget (primitive VecBranchTarget))
|
||||
(type BoxCallInfo (primitive BoxCallInfo))
|
||||
(type BoxCallIndInfo (primitive BoxCallIndInfo))
|
||||
@@ -1880,9 +1872,6 @@
|
||||
(decl lower_br_icmp (IntCC ValueRegs ValueRegs VecMachLabel Type) Unit)
|
||||
(extern constructor lower_br_icmp lower_br_icmp)
|
||||
|
||||
(decl lower_br_fcmp (FloatCC Reg Reg VecMachLabel Type) Unit)
|
||||
(extern constructor lower_br_fcmp lower_br_fcmp)
|
||||
|
||||
;; int scalar zero regs.
|
||||
(decl int_zero_reg (Type) ValueRegs)
|
||||
(extern constructor int_zero_reg int_zero_reg)
|
||||
@@ -1935,7 +1924,9 @@
|
||||
|
||||
(rule 1
|
||||
(lower_branch (brz (fcmp cc a @ (value_type ty) b) _ _) targets)
|
||||
(lower_br_fcmp (floatcc_inverse cc) a b targets ty))
|
||||
(let ((then BranchTarget (label_to_br_target (vec_label_get targets 0)))
|
||||
(else BranchTarget (label_to_br_target (vec_label_get targets 1))))
|
||||
(emit_side_effect (cond_br (emit_fcmp (floatcc_inverse cc) ty a b) then else))))
|
||||
|
||||
;;;;
|
||||
(rule
|
||||
@@ -1955,7 +1946,9 @@
|
||||
|
||||
(rule 1
|
||||
(lower_branch (brnz (fcmp cc a @ (value_type ty) b) _ _) targets)
|
||||
(lower_br_fcmp cc a b targets ty))
|
||||
(let ((then BranchTarget (label_to_br_target (vec_label_get targets 0)))
|
||||
(else BranchTarget (label_to_br_target (vec_label_get targets 1))))
|
||||
(emit_side_effect (cond_br (emit_fcmp cc ty a b) then else))))
|
||||
|
||||
;;;
|
||||
(decl lower_br_table (Reg VecMachLabel) Unit)
|
||||
@@ -2194,3 +2187,159 @@
|
||||
|
||||
(decl sp_reg () PReg)
|
||||
(extern constructor sp_reg sp_reg)
|
||||
|
||||
|
||||
;;;; Helpers for floating point comparisons ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(decl not (Reg) Reg)
|
||||
(rule (not x) (alu_rr_imm12 (AluOPRRI.Xori) x (imm_from_bits 1)))
|
||||
|
||||
(decl emit_or (Reg Reg) Reg)
|
||||
(rule (emit_or x y) (alu_rrr (AluOPRRR.Or) x y))
|
||||
|
||||
(decl emit_and (Reg Reg) Reg)
|
||||
(rule (emit_and x y) (alu_rrr (AluOPRRR.And) x y))
|
||||
|
||||
(decl is_not_nan (Type Reg) Reg)
|
||||
(rule (is_not_nan ty a) (feq ty a a))
|
||||
|
||||
(decl feq (Type Reg Reg) Reg)
|
||||
(rule (feq $F32 a b) (fpu_rrr (FpuOPRRR.FeqS) $I64 a b))
|
||||
(rule (feq $F64 a b) (fpu_rrr (FpuOPRRR.FeqD) $I64 a b))
|
||||
|
||||
(decl flt (Type Reg Reg) Reg)
|
||||
(rule (flt $F32 a b) (fpu_rrr (FpuOPRRR.FltS) $I64 a b))
|
||||
(rule (flt $F64 a b) (fpu_rrr (FpuOPRRR.FltD) $I64 a b))
|
||||
|
||||
(decl fle (Type Reg Reg) Reg)
|
||||
(rule (fle $F32 a b) (fpu_rrr (FpuOPRRR.FleS) $I64 a b))
|
||||
(rule (fle $F64 a b) (fpu_rrr (FpuOPRRR.FleD) $I64 a b))
|
||||
|
||||
(decl fgt (Type Reg Reg) Reg)
|
||||
(rule (fgt ty a b) (flt ty b a))
|
||||
|
||||
(decl fge (Type Reg Reg) Reg)
|
||||
(rule (fge ty a b) (fle ty b a))
|
||||
|
||||
(decl ordered (Type Reg Reg) Reg)
|
||||
(rule (ordered ty a b) (emit_and (is_not_nan ty a) (is_not_nan ty b)))
|
||||
|
||||
(type CmpResult (enum
|
||||
(Result
|
||||
(result Reg)
|
||||
(invert bool))))
|
||||
|
||||
;; Wrapper for the common case when constructing comparison results. It assumes
|
||||
;; that the result isn't negated.
|
||||
(decl cmp_result (Reg) CmpResult)
|
||||
(rule (cmp_result result) (CmpResult.Result result $false))
|
||||
|
||||
;; Wrapper for the case where it's more convenient to construct the negated
|
||||
;; version of the comparison.
|
||||
(decl cmp_result_invert (Reg) CmpResult)
|
||||
(rule (cmp_result_invert result) (CmpResult.Result result $true))
|
||||
|
||||
;; Consume a CmpResult, producing a branch on its result.
|
||||
(decl cond_br (CmpResult BranchTarget BranchTarget) SideEffectNoResult)
|
||||
(rule (cond_br cmp then else)
|
||||
(SideEffectNoResult.Inst
|
||||
(MInst.CondBr then else (cmp_integer_compare cmp))))
|
||||
|
||||
;; Construct an IntegerCompare value.
|
||||
(decl int_compare (IntCC Reg Reg) IntegerCompare)
|
||||
(extern constructor int_compare int_compare)
|
||||
|
||||
;; Convert a comparison into a branch test.
|
||||
(decl cmp_integer_compare (CmpResult) IntegerCompare)
|
||||
|
||||
(rule
|
||||
(cmp_integer_compare (CmpResult.Result res $false))
|
||||
(int_compare (IntCC.NotEqual) res (zero_reg)))
|
||||
|
||||
(rule
|
||||
(cmp_integer_compare (CmpResult.Result res $true))
|
||||
(int_compare (IntCC.Equal) res (zero_reg)))
|
||||
|
||||
;; Convert a comparison into a boolean value.
|
||||
(decl cmp_value (CmpResult) Reg)
|
||||
(rule (cmp_value (CmpResult.Result res $false)) res)
|
||||
(rule (cmp_value (CmpResult.Result res $true)) (not res))
|
||||
|
||||
;; Compare two floating point numbers and return a zero/non-zero result.
|
||||
(decl emit_fcmp (FloatCC Type Reg Reg) CmpResult)
|
||||
|
||||
;; a is not nan && b is not nan
|
||||
(rule
|
||||
(emit_fcmp (FloatCC.Ordered) ty a b)
|
||||
(cmp_result (ordered ty a b)))
|
||||
|
||||
;; a is nan || b is nan
|
||||
;; == !(a is not nan && b is not nan)
|
||||
(rule
|
||||
(emit_fcmp (FloatCC.Unordered) ty a b)
|
||||
(cmp_result_invert (ordered ty a b)))
|
||||
|
||||
;; a == b
|
||||
(rule
|
||||
(emit_fcmp (FloatCC.Equal) ty a b)
|
||||
(cmp_result (feq ty a b)))
|
||||
|
||||
;; a != b
|
||||
;; == !(a == b)
|
||||
(rule
|
||||
(emit_fcmp (FloatCC.NotEqual) ty a b)
|
||||
(cmp_result_invert (feq ty a b)))
|
||||
|
||||
;; a < b || a > b
|
||||
(rule
|
||||
(emit_fcmp (FloatCC.OrderedNotEqual) ty a b)
|
||||
(cmp_result (emit_or (flt ty a b) (fgt ty a b))))
|
||||
|
||||
;; !(ordered a b) || a == b
|
||||
(rule
|
||||
(emit_fcmp (FloatCC.UnorderedOrEqual) ty a b)
|
||||
(cmp_result (emit_or (not (ordered ty a b)) (feq ty a b))))
|
||||
|
||||
;; a < b
|
||||
(rule
|
||||
(emit_fcmp (FloatCC.LessThan) ty a b)
|
||||
(cmp_result (flt ty a b)))
|
||||
|
||||
;; a <= b
|
||||
(rule
|
||||
(emit_fcmp (FloatCC.LessThanOrEqual) ty a b)
|
||||
(cmp_result (fle ty a b)))
|
||||
|
||||
;; a > b
|
||||
(rule
|
||||
(emit_fcmp (FloatCC.GreaterThan) ty a b)
|
||||
(cmp_result (fgt ty a b)))
|
||||
|
||||
;; a >= b
|
||||
(rule
|
||||
(emit_fcmp (FloatCC.GreaterThanOrEqual) ty a b)
|
||||
(cmp_result (fge ty a b)))
|
||||
|
||||
;; !(ordered a b) || a < b
|
||||
;; == !(ordered a b && a >= b)
|
||||
(rule
|
||||
(emit_fcmp (FloatCC.UnorderedOrLessThan) ty a b)
|
||||
(cmp_result_invert (emit_and (ordered ty a b) (fge ty a b))))
|
||||
|
||||
;; !(ordered a b) || a <= b
|
||||
;; == !(ordered a b && a > b)
|
||||
(rule
|
||||
(emit_fcmp (FloatCC.UnorderedOrLessThanOrEqual) ty a b)
|
||||
(cmp_result_invert (emit_and (ordered ty a b) (fgt ty a b))))
|
||||
|
||||
;; !(ordered a b) || a > b
|
||||
;; == !(ordered a b && a <= b)
|
||||
(rule
|
||||
(emit_fcmp (FloatCC.UnorderedOrGreaterThan) ty a b)
|
||||
(cmp_result_invert (emit_and (ordered ty a b) (fle ty a b))))
|
||||
|
||||
;; !(ordered a b) || a >= b
|
||||
;; == !(ordered a b && a < b)
|
||||
(rule
|
||||
(emit_fcmp (FloatCC.UnorderedOrGreaterThanOrEqual) ty a b)
|
||||
(cmp_result_invert (emit_and (ordered ty a b) (flt ty a b))))
|
||||
|
||||
@@ -1337,42 +1337,6 @@ impl MachInstEmit for Inst {
|
||||
}
|
||||
}
|
||||
|
||||
&Inst::Fcmp {
|
||||
rd,
|
||||
cc,
|
||||
ty,
|
||||
rs1,
|
||||
rs2,
|
||||
} => {
|
||||
let rs1 = allocs.next(rs1);
|
||||
let rs2 = allocs.next(rs2);
|
||||
let rd = allocs.next_writable(rd);
|
||||
let label_true = sink.get_label();
|
||||
let label_jump_over = sink.get_label();
|
||||
Inst::lower_br_fcmp(
|
||||
cc,
|
||||
rs1,
|
||||
rs2,
|
||||
BranchTarget::Label(label_true),
|
||||
BranchTarget::zero(),
|
||||
ty,
|
||||
rd,
|
||||
)
|
||||
.iter()
|
||||
.for_each(|i| i.emit(&[], sink, emit_info, state));
|
||||
// here is not taken.
|
||||
Inst::load_imm12(rd, Imm12::FALSE).emit(&[], sink, emit_info, state);
|
||||
// jump over.
|
||||
Inst::Jal {
|
||||
dest: BranchTarget::Label(label_jump_over),
|
||||
}
|
||||
.emit(&[], sink, emit_info, state);
|
||||
// here is true
|
||||
sink.bind_label(label_true);
|
||||
Inst::load_imm12(rd, Imm12::TRUE).emit(&[], sink, emit_info, state);
|
||||
sink.bind_label(label_jump_over);
|
||||
}
|
||||
|
||||
&Inst::Select {
|
||||
ref dst,
|
||||
condition,
|
||||
|
||||
@@ -2177,14 +2177,6 @@ fn riscv64_worst_case_instruction_size() {
|
||||
//there are all candidates potential generate a lot of bytes.
|
||||
let mut candidates: Vec<MInst> = vec![];
|
||||
|
||||
candidates.push(Inst::Fcmp {
|
||||
rd: writable_a0(),
|
||||
cc: FloatCC::UnorderedOrLessThanOrEqual,
|
||||
ty: F64,
|
||||
rs1: fa1(),
|
||||
rs2: fa0(),
|
||||
});
|
||||
|
||||
candidates.push(Inst::IntSelect {
|
||||
dst: vec![writable_a0(), writable_a0()],
|
||||
ty: I128,
|
||||
|
||||
@@ -460,11 +460,6 @@ fn riscv64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
|
||||
collector.reg_use(src);
|
||||
collector.reg_def(rd);
|
||||
}
|
||||
&Inst::Fcmp { rd, rs1, rs2, .. } => {
|
||||
collector.reg_use(rs1);
|
||||
collector.reg_use(rs2);
|
||||
collector.reg_early_def(rd);
|
||||
}
|
||||
&Inst::Select {
|
||||
ref dst,
|
||||
condition,
|
||||
@@ -1366,25 +1361,6 @@ impl Inst {
|
||||
let rd = format_reg(rd.to_reg(), allocs);
|
||||
format!("{} {},{}", op.op_name(), rd, base,)
|
||||
}
|
||||
&Inst::Fcmp {
|
||||
rd,
|
||||
cc,
|
||||
ty,
|
||||
rs1,
|
||||
rs2,
|
||||
} => {
|
||||
let rs1 = format_reg(rs1, allocs);
|
||||
let rs2 = format_reg(rs2, allocs);
|
||||
let rd = format_reg(rd.to_reg(), allocs);
|
||||
format!(
|
||||
"f{}.{} {},{},{}",
|
||||
cc,
|
||||
if ty == F32 { "s" } else { "d" },
|
||||
rd,
|
||||
rs1,
|
||||
rs2,
|
||||
)
|
||||
}
|
||||
&Inst::Store {
|
||||
to,
|
||||
src,
|
||||
|
||||
@@ -752,18 +752,10 @@
|
||||
(lower (icmp cc x @ (value_type ty) y))
|
||||
(lower_icmp cc x y ty))
|
||||
|
||||
(decl gen_fcmp (FloatCC Value Value Type) Reg)
|
||||
(rule
|
||||
(gen_fcmp cc x y ty)
|
||||
(let
|
||||
((result WritableReg (temp_writable_reg $I64))
|
||||
(_ Unit (emit (MInst.Fcmp cc result x y ty))))
|
||||
(writable_reg_to_reg result)))
|
||||
|
||||
;;;;; Rules for `fcmp`;;;;;;;;;
|
||||
(rule
|
||||
(lower (fcmp cc x @ (value_type ty) y))
|
||||
(gen_fcmp cc x y ty))
|
||||
(cmp_value (emit_fcmp cc ty x y)))
|
||||
|
||||
;;;;; Rules for `func_addr`;;;;;;;;;
|
||||
(rule
|
||||
|
||||
@@ -61,28 +61,6 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Riscv64Backend> {
|
||||
}
|
||||
}
|
||||
|
||||
fn lower_br_fcmp(
|
||||
&mut self,
|
||||
cc: &FloatCC,
|
||||
a: Reg,
|
||||
b: Reg,
|
||||
targets: &VecMachLabel,
|
||||
ty: Type,
|
||||
) -> Unit {
|
||||
let tmp = self.temp_writable_reg(I64);
|
||||
MInst::lower_br_fcmp(
|
||||
*cc,
|
||||
a,
|
||||
b,
|
||||
BranchTarget::Label(targets[0]),
|
||||
BranchTarget::Label(targets[1]),
|
||||
ty,
|
||||
tmp,
|
||||
)
|
||||
.iter()
|
||||
.for_each(|i| self.emit(i));
|
||||
}
|
||||
|
||||
fn lower_brz_or_nz(
|
||||
&mut self,
|
||||
cc: &IntCC,
|
||||
@@ -434,6 +412,15 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Riscv64Backend> {
|
||||
|
||||
tmp.to_reg()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn int_compare(&mut self, kind: &IntCC, rs1: Reg, rs2: Reg) -> IntegerCompare {
|
||||
IntegerCompare {
|
||||
kind: *kind,
|
||||
rs1,
|
||||
rs2,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl IsleContext<'_, '_, MInst, Riscv64Backend> {
|
||||
|
||||
29
cranelift/filetests/filetests/isa/riscv64/fcmp.clif
Normal file
29
cranelift/filetests/filetests/isa/riscv64/fcmp.clif
Normal file
@@ -0,0 +1,29 @@
|
||||
test compile precise-output
|
||||
target riscv64
|
||||
|
||||
;; See #5500 for more details about this test case.
|
||||
function %f0() {
|
||||
block0:
|
||||
v0 = f64const 0.0
|
||||
v1 = fcmp ult v0, v0
|
||||
brz v1, block1
|
||||
jump block1
|
||||
|
||||
block1:
|
||||
return
|
||||
}
|
||||
|
||||
; block0:
|
||||
; li t1,0
|
||||
; fmv.d.x ft1,t1
|
||||
; li a2,0
|
||||
; fmv.d.x ft5,a2
|
||||
; fle.d a5,ft5,ft1
|
||||
; bne a5,zero,taken(label1),not_taken(label2)
|
||||
; block1:
|
||||
; j label3
|
||||
; block2:
|
||||
; j label3
|
||||
; block3:
|
||||
; ret
|
||||
|
||||
Reference in New Issue
Block a user