riscv64: Implement fcmp in ISLE (#5512)
Rework the compilation of fcmp in the riscv64 backend to be in ISLE, removing the need for the dedicated Fcmp instruction. This change is motivated by #5500, which showed that the riscv64 backend was generating branch instructions in the middle of a basic block. We can't remove lower_br_fcmp quite yet as it's used in a few places in the emit module, but it's now no longer reachable from the ISLE lowerings. Fixes #5500
This commit is contained in:
@@ -107,6 +107,7 @@
|
|||||||
(rs2 Reg)
|
(rs2 Reg)
|
||||||
(cc IntCC)
|
(cc IntCC)
|
||||||
(trap_code TrapCode))
|
(trap_code TrapCode))
|
||||||
|
|
||||||
(TrapFf
|
(TrapFf
|
||||||
(cc FloatCC)
|
(cc FloatCC)
|
||||||
(x Reg)
|
(x Reg)
|
||||||
@@ -204,14 +205,6 @@
|
|||||||
(x Reg)
|
(x Reg)
|
||||||
(t0 WritableReg))
|
(t0 WritableReg))
|
||||||
|
|
||||||
;; a float compare
|
|
||||||
(Fcmp
|
|
||||||
(cc FloatCC)
|
|
||||||
(rd WritableReg)
|
|
||||||
(rs1 Reg)
|
|
||||||
(rs2 Reg)
|
|
||||||
(ty Type))
|
|
||||||
|
|
||||||
;; select x or y base on condition
|
;; select x or y base on condition
|
||||||
(Select
|
(Select
|
||||||
(dst VecWritableReg)
|
(dst VecWritableReg)
|
||||||
@@ -690,7 +683,6 @@
|
|||||||
(Tso)
|
(Tso)
|
||||||
))
|
))
|
||||||
|
|
||||||
|
|
||||||
(type VecBranchTarget (primitive VecBranchTarget))
|
(type VecBranchTarget (primitive VecBranchTarget))
|
||||||
(type BoxCallInfo (primitive BoxCallInfo))
|
(type BoxCallInfo (primitive BoxCallInfo))
|
||||||
(type BoxCallIndInfo (primitive BoxCallIndInfo))
|
(type BoxCallIndInfo (primitive BoxCallIndInfo))
|
||||||
@@ -1880,9 +1872,6 @@
|
|||||||
(decl lower_br_icmp (IntCC ValueRegs ValueRegs VecMachLabel Type) Unit)
|
(decl lower_br_icmp (IntCC ValueRegs ValueRegs VecMachLabel Type) Unit)
|
||||||
(extern constructor lower_br_icmp lower_br_icmp)
|
(extern constructor lower_br_icmp lower_br_icmp)
|
||||||
|
|
||||||
(decl lower_br_fcmp (FloatCC Reg Reg VecMachLabel Type) Unit)
|
|
||||||
(extern constructor lower_br_fcmp lower_br_fcmp)
|
|
||||||
|
|
||||||
;; int scalar zero regs.
|
;; int scalar zero regs.
|
||||||
(decl int_zero_reg (Type) ValueRegs)
|
(decl int_zero_reg (Type) ValueRegs)
|
||||||
(extern constructor int_zero_reg int_zero_reg)
|
(extern constructor int_zero_reg int_zero_reg)
|
||||||
@@ -1935,7 +1924,9 @@
|
|||||||
|
|
||||||
(rule 1
|
(rule 1
|
||||||
(lower_branch (brz (fcmp cc a @ (value_type ty) b) _ _) targets)
|
(lower_branch (brz (fcmp cc a @ (value_type ty) b) _ _) targets)
|
||||||
(lower_br_fcmp (floatcc_inverse cc) a b targets ty))
|
(let ((then BranchTarget (label_to_br_target (vec_label_get targets 0)))
|
||||||
|
(else BranchTarget (label_to_br_target (vec_label_get targets 1))))
|
||||||
|
(emit_side_effect (cond_br (emit_fcmp (floatcc_inverse cc) ty a b) then else))))
|
||||||
|
|
||||||
;;;;
|
;;;;
|
||||||
(rule
|
(rule
|
||||||
@@ -1955,7 +1946,9 @@
|
|||||||
|
|
||||||
(rule 1
|
(rule 1
|
||||||
(lower_branch (brnz (fcmp cc a @ (value_type ty) b) _ _) targets)
|
(lower_branch (brnz (fcmp cc a @ (value_type ty) b) _ _) targets)
|
||||||
(lower_br_fcmp cc a b targets ty))
|
(let ((then BranchTarget (label_to_br_target (vec_label_get targets 0)))
|
||||||
|
(else BranchTarget (label_to_br_target (vec_label_get targets 1))))
|
||||||
|
(emit_side_effect (cond_br (emit_fcmp cc ty a b) then else))))
|
||||||
|
|
||||||
;;;
|
;;;
|
||||||
(decl lower_br_table (Reg VecMachLabel) Unit)
|
(decl lower_br_table (Reg VecMachLabel) Unit)
|
||||||
@@ -2194,3 +2187,159 @@
|
|||||||
|
|
||||||
(decl sp_reg () PReg)
|
(decl sp_reg () PReg)
|
||||||
(extern constructor sp_reg sp_reg)
|
(extern constructor sp_reg sp_reg)
|
||||||
|
|
||||||
|
|
||||||
|
;;;; Helpers for floating point comparisons ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
(decl not (Reg) Reg)
|
||||||
|
(rule (not x) (alu_rr_imm12 (AluOPRRI.Xori) x (imm_from_bits 1)))
|
||||||
|
|
||||||
|
(decl emit_or (Reg Reg) Reg)
|
||||||
|
(rule (emit_or x y) (alu_rrr (AluOPRRR.Or) x y))
|
||||||
|
|
||||||
|
(decl emit_and (Reg Reg) Reg)
|
||||||
|
(rule (emit_and x y) (alu_rrr (AluOPRRR.And) x y))
|
||||||
|
|
||||||
|
(decl is_not_nan (Type Reg) Reg)
|
||||||
|
(rule (is_not_nan ty a) (feq ty a a))
|
||||||
|
|
||||||
|
(decl feq (Type Reg Reg) Reg)
|
||||||
|
(rule (feq $F32 a b) (fpu_rrr (FpuOPRRR.FeqS) $I64 a b))
|
||||||
|
(rule (feq $F64 a b) (fpu_rrr (FpuOPRRR.FeqD) $I64 a b))
|
||||||
|
|
||||||
|
(decl flt (Type Reg Reg) Reg)
|
||||||
|
(rule (flt $F32 a b) (fpu_rrr (FpuOPRRR.FltS) $I64 a b))
|
||||||
|
(rule (flt $F64 a b) (fpu_rrr (FpuOPRRR.FltD) $I64 a b))
|
||||||
|
|
||||||
|
(decl fle (Type Reg Reg) Reg)
|
||||||
|
(rule (fle $F32 a b) (fpu_rrr (FpuOPRRR.FleS) $I64 a b))
|
||||||
|
(rule (fle $F64 a b) (fpu_rrr (FpuOPRRR.FleD) $I64 a b))
|
||||||
|
|
||||||
|
(decl fgt (Type Reg Reg) Reg)
|
||||||
|
(rule (fgt ty a b) (flt ty b a))
|
||||||
|
|
||||||
|
(decl fge (Type Reg Reg) Reg)
|
||||||
|
(rule (fge ty a b) (fle ty b a))
|
||||||
|
|
||||||
|
(decl ordered (Type Reg Reg) Reg)
|
||||||
|
(rule (ordered ty a b) (emit_and (is_not_nan ty a) (is_not_nan ty b)))
|
||||||
|
|
||||||
|
(type CmpResult (enum
|
||||||
|
(Result
|
||||||
|
(result Reg)
|
||||||
|
(invert bool))))
|
||||||
|
|
||||||
|
;; Wrapper for the common case when constructing comparison results. It assumes
|
||||||
|
;; that the result isn't negated.
|
||||||
|
(decl cmp_result (Reg) CmpResult)
|
||||||
|
(rule (cmp_result result) (CmpResult.Result result $false))
|
||||||
|
|
||||||
|
;; Wrapper for the case where it's more convenient to construct the negated
|
||||||
|
;; version of the comparison.
|
||||||
|
(decl cmp_result_invert (Reg) CmpResult)
|
||||||
|
(rule (cmp_result_invert result) (CmpResult.Result result $true))
|
||||||
|
|
||||||
|
;; Consume a CmpResult, producing a branch on its result.
|
||||||
|
(decl cond_br (CmpResult BranchTarget BranchTarget) SideEffectNoResult)
|
||||||
|
(rule (cond_br cmp then else)
|
||||||
|
(SideEffectNoResult.Inst
|
||||||
|
(MInst.CondBr then else (cmp_integer_compare cmp))))
|
||||||
|
|
||||||
|
;; Construct an IntegerCompare value.
|
||||||
|
(decl int_compare (IntCC Reg Reg) IntegerCompare)
|
||||||
|
(extern constructor int_compare int_compare)
|
||||||
|
|
||||||
|
;; Convert a comparison into a branch test.
|
||||||
|
(decl cmp_integer_compare (CmpResult) IntegerCompare)
|
||||||
|
|
||||||
|
(rule
|
||||||
|
(cmp_integer_compare (CmpResult.Result res $false))
|
||||||
|
(int_compare (IntCC.NotEqual) res (zero_reg)))
|
||||||
|
|
||||||
|
(rule
|
||||||
|
(cmp_integer_compare (CmpResult.Result res $true))
|
||||||
|
(int_compare (IntCC.Equal) res (zero_reg)))
|
||||||
|
|
||||||
|
;; Convert a comparison into a boolean value.
|
||||||
|
(decl cmp_value (CmpResult) Reg)
|
||||||
|
(rule (cmp_value (CmpResult.Result res $false)) res)
|
||||||
|
(rule (cmp_value (CmpResult.Result res $true)) (not res))
|
||||||
|
|
||||||
|
;; Compare two floating point numbers and return a zero/non-zero result.
|
||||||
|
(decl emit_fcmp (FloatCC Type Reg Reg) CmpResult)
|
||||||
|
|
||||||
|
;; a is not nan && b is not nan
|
||||||
|
(rule
|
||||||
|
(emit_fcmp (FloatCC.Ordered) ty a b)
|
||||||
|
(cmp_result (ordered ty a b)))
|
||||||
|
|
||||||
|
;; a is nan || b is nan
|
||||||
|
;; == !(a is not nan && b is not nan)
|
||||||
|
(rule
|
||||||
|
(emit_fcmp (FloatCC.Unordered) ty a b)
|
||||||
|
(cmp_result_invert (ordered ty a b)))
|
||||||
|
|
||||||
|
;; a == b
|
||||||
|
(rule
|
||||||
|
(emit_fcmp (FloatCC.Equal) ty a b)
|
||||||
|
(cmp_result (feq ty a b)))
|
||||||
|
|
||||||
|
;; a != b
|
||||||
|
;; == !(a == b)
|
||||||
|
(rule
|
||||||
|
(emit_fcmp (FloatCC.NotEqual) ty a b)
|
||||||
|
(cmp_result_invert (feq ty a b)))
|
||||||
|
|
||||||
|
;; a < b || a > b
|
||||||
|
(rule
|
||||||
|
(emit_fcmp (FloatCC.OrderedNotEqual) ty a b)
|
||||||
|
(cmp_result (emit_or (flt ty a b) (fgt ty a b))))
|
||||||
|
|
||||||
|
;; !(ordered a b) || a == b
|
||||||
|
(rule
|
||||||
|
(emit_fcmp (FloatCC.UnorderedOrEqual) ty a b)
|
||||||
|
(cmp_result (emit_or (not (ordered ty a b)) (feq ty a b))))
|
||||||
|
|
||||||
|
;; a < b
|
||||||
|
(rule
|
||||||
|
(emit_fcmp (FloatCC.LessThan) ty a b)
|
||||||
|
(cmp_result (flt ty a b)))
|
||||||
|
|
||||||
|
;; a <= b
|
||||||
|
(rule
|
||||||
|
(emit_fcmp (FloatCC.LessThanOrEqual) ty a b)
|
||||||
|
(cmp_result (fle ty a b)))
|
||||||
|
|
||||||
|
;; a > b
|
||||||
|
(rule
|
||||||
|
(emit_fcmp (FloatCC.GreaterThan) ty a b)
|
||||||
|
(cmp_result (fgt ty a b)))
|
||||||
|
|
||||||
|
;; a >= b
|
||||||
|
(rule
|
||||||
|
(emit_fcmp (FloatCC.GreaterThanOrEqual) ty a b)
|
||||||
|
(cmp_result (fge ty a b)))
|
||||||
|
|
||||||
|
;; !(ordered a b) || a < b
|
||||||
|
;; == !(ordered a b && a >= b)
|
||||||
|
(rule
|
||||||
|
(emit_fcmp (FloatCC.UnorderedOrLessThan) ty a b)
|
||||||
|
(cmp_result_invert (emit_and (ordered ty a b) (fge ty a b))))
|
||||||
|
|
||||||
|
;; !(ordered a b) || a <= b
|
||||||
|
;; == !(ordered a b && a > b)
|
||||||
|
(rule
|
||||||
|
(emit_fcmp (FloatCC.UnorderedOrLessThanOrEqual) ty a b)
|
||||||
|
(cmp_result_invert (emit_and (ordered ty a b) (fgt ty a b))))
|
||||||
|
|
||||||
|
;; !(ordered a b) || a > b
|
||||||
|
;; == !(ordered a b && a <= b)
|
||||||
|
(rule
|
||||||
|
(emit_fcmp (FloatCC.UnorderedOrGreaterThan) ty a b)
|
||||||
|
(cmp_result_invert (emit_and (ordered ty a b) (fle ty a b))))
|
||||||
|
|
||||||
|
;; !(ordered a b) || a >= b
|
||||||
|
;; == !(ordered a b && a < b)
|
||||||
|
(rule
|
||||||
|
(emit_fcmp (FloatCC.UnorderedOrGreaterThanOrEqual) ty a b)
|
||||||
|
(cmp_result_invert (emit_and (ordered ty a b) (flt ty a b))))
|
||||||
|
|||||||
@@ -1337,42 +1337,6 @@ impl MachInstEmit for Inst {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
&Inst::Fcmp {
|
|
||||||
rd,
|
|
||||||
cc,
|
|
||||||
ty,
|
|
||||||
rs1,
|
|
||||||
rs2,
|
|
||||||
} => {
|
|
||||||
let rs1 = allocs.next(rs1);
|
|
||||||
let rs2 = allocs.next(rs2);
|
|
||||||
let rd = allocs.next_writable(rd);
|
|
||||||
let label_true = sink.get_label();
|
|
||||||
let label_jump_over = sink.get_label();
|
|
||||||
Inst::lower_br_fcmp(
|
|
||||||
cc,
|
|
||||||
rs1,
|
|
||||||
rs2,
|
|
||||||
BranchTarget::Label(label_true),
|
|
||||||
BranchTarget::zero(),
|
|
||||||
ty,
|
|
||||||
rd,
|
|
||||||
)
|
|
||||||
.iter()
|
|
||||||
.for_each(|i| i.emit(&[], sink, emit_info, state));
|
|
||||||
// here is not taken.
|
|
||||||
Inst::load_imm12(rd, Imm12::FALSE).emit(&[], sink, emit_info, state);
|
|
||||||
// jump over.
|
|
||||||
Inst::Jal {
|
|
||||||
dest: BranchTarget::Label(label_jump_over),
|
|
||||||
}
|
|
||||||
.emit(&[], sink, emit_info, state);
|
|
||||||
// here is true
|
|
||||||
sink.bind_label(label_true);
|
|
||||||
Inst::load_imm12(rd, Imm12::TRUE).emit(&[], sink, emit_info, state);
|
|
||||||
sink.bind_label(label_jump_over);
|
|
||||||
}
|
|
||||||
|
|
||||||
&Inst::Select {
|
&Inst::Select {
|
||||||
ref dst,
|
ref dst,
|
||||||
condition,
|
condition,
|
||||||
|
|||||||
@@ -2177,14 +2177,6 @@ fn riscv64_worst_case_instruction_size() {
|
|||||||
//there are all candidates potential generate a lot of bytes.
|
//there are all candidates potential generate a lot of bytes.
|
||||||
let mut candidates: Vec<MInst> = vec![];
|
let mut candidates: Vec<MInst> = vec![];
|
||||||
|
|
||||||
candidates.push(Inst::Fcmp {
|
|
||||||
rd: writable_a0(),
|
|
||||||
cc: FloatCC::UnorderedOrLessThanOrEqual,
|
|
||||||
ty: F64,
|
|
||||||
rs1: fa1(),
|
|
||||||
rs2: fa0(),
|
|
||||||
});
|
|
||||||
|
|
||||||
candidates.push(Inst::IntSelect {
|
candidates.push(Inst::IntSelect {
|
||||||
dst: vec![writable_a0(), writable_a0()],
|
dst: vec![writable_a0(), writable_a0()],
|
||||||
ty: I128,
|
ty: I128,
|
||||||
|
|||||||
@@ -460,11 +460,6 @@ fn riscv64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
|
|||||||
collector.reg_use(src);
|
collector.reg_use(src);
|
||||||
collector.reg_def(rd);
|
collector.reg_def(rd);
|
||||||
}
|
}
|
||||||
&Inst::Fcmp { rd, rs1, rs2, .. } => {
|
|
||||||
collector.reg_use(rs1);
|
|
||||||
collector.reg_use(rs2);
|
|
||||||
collector.reg_early_def(rd);
|
|
||||||
}
|
|
||||||
&Inst::Select {
|
&Inst::Select {
|
||||||
ref dst,
|
ref dst,
|
||||||
condition,
|
condition,
|
||||||
@@ -1366,25 +1361,6 @@ impl Inst {
|
|||||||
let rd = format_reg(rd.to_reg(), allocs);
|
let rd = format_reg(rd.to_reg(), allocs);
|
||||||
format!("{} {},{}", op.op_name(), rd, base,)
|
format!("{} {},{}", op.op_name(), rd, base,)
|
||||||
}
|
}
|
||||||
&Inst::Fcmp {
|
|
||||||
rd,
|
|
||||||
cc,
|
|
||||||
ty,
|
|
||||||
rs1,
|
|
||||||
rs2,
|
|
||||||
} => {
|
|
||||||
let rs1 = format_reg(rs1, allocs);
|
|
||||||
let rs2 = format_reg(rs2, allocs);
|
|
||||||
let rd = format_reg(rd.to_reg(), allocs);
|
|
||||||
format!(
|
|
||||||
"f{}.{} {},{},{}",
|
|
||||||
cc,
|
|
||||||
if ty == F32 { "s" } else { "d" },
|
|
||||||
rd,
|
|
||||||
rs1,
|
|
||||||
rs2,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
&Inst::Store {
|
&Inst::Store {
|
||||||
to,
|
to,
|
||||||
src,
|
src,
|
||||||
|
|||||||
@@ -752,18 +752,10 @@
|
|||||||
(lower (icmp cc x @ (value_type ty) y))
|
(lower (icmp cc x @ (value_type ty) y))
|
||||||
(lower_icmp cc x y ty))
|
(lower_icmp cc x y ty))
|
||||||
|
|
||||||
(decl gen_fcmp (FloatCC Value Value Type) Reg)
|
|
||||||
(rule
|
|
||||||
(gen_fcmp cc x y ty)
|
|
||||||
(let
|
|
||||||
((result WritableReg (temp_writable_reg $I64))
|
|
||||||
(_ Unit (emit (MInst.Fcmp cc result x y ty))))
|
|
||||||
(writable_reg_to_reg result)))
|
|
||||||
|
|
||||||
;;;;; Rules for `fcmp`;;;;;;;;;
|
;;;;; Rules for `fcmp`;;;;;;;;;
|
||||||
(rule
|
(rule
|
||||||
(lower (fcmp cc x @ (value_type ty) y))
|
(lower (fcmp cc x @ (value_type ty) y))
|
||||||
(gen_fcmp cc x y ty))
|
(cmp_value (emit_fcmp cc ty x y)))
|
||||||
|
|
||||||
;;;;; Rules for `func_addr`;;;;;;;;;
|
;;;;; Rules for `func_addr`;;;;;;;;;
|
||||||
(rule
|
(rule
|
||||||
|
|||||||
@@ -61,28 +61,6 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Riscv64Backend> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn lower_br_fcmp(
|
|
||||||
&mut self,
|
|
||||||
cc: &FloatCC,
|
|
||||||
a: Reg,
|
|
||||||
b: Reg,
|
|
||||||
targets: &VecMachLabel,
|
|
||||||
ty: Type,
|
|
||||||
) -> Unit {
|
|
||||||
let tmp = self.temp_writable_reg(I64);
|
|
||||||
MInst::lower_br_fcmp(
|
|
||||||
*cc,
|
|
||||||
a,
|
|
||||||
b,
|
|
||||||
BranchTarget::Label(targets[0]),
|
|
||||||
BranchTarget::Label(targets[1]),
|
|
||||||
ty,
|
|
||||||
tmp,
|
|
||||||
)
|
|
||||||
.iter()
|
|
||||||
.for_each(|i| self.emit(i));
|
|
||||||
}
|
|
||||||
|
|
||||||
fn lower_brz_or_nz(
|
fn lower_brz_or_nz(
|
||||||
&mut self,
|
&mut self,
|
||||||
cc: &IntCC,
|
cc: &IntCC,
|
||||||
@@ -434,6 +412,15 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Riscv64Backend> {
|
|||||||
|
|
||||||
tmp.to_reg()
|
tmp.to_reg()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn int_compare(&mut self, kind: &IntCC, rs1: Reg, rs2: Reg) -> IntegerCompare {
|
||||||
|
IntegerCompare {
|
||||||
|
kind: *kind,
|
||||||
|
rs1,
|
||||||
|
rs2,
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl IsleContext<'_, '_, MInst, Riscv64Backend> {
|
impl IsleContext<'_, '_, MInst, Riscv64Backend> {
|
||||||
|
|||||||
29
cranelift/filetests/filetests/isa/riscv64/fcmp.clif
Normal file
29
cranelift/filetests/filetests/isa/riscv64/fcmp.clif
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
test compile precise-output
|
||||||
|
target riscv64
|
||||||
|
|
||||||
|
;; See #5500 for more details about this test case.
|
||||||
|
function %f0() {
|
||||||
|
block0:
|
||||||
|
v0 = f64const 0.0
|
||||||
|
v1 = fcmp ult v0, v0
|
||||||
|
brz v1, block1
|
||||||
|
jump block1
|
||||||
|
|
||||||
|
block1:
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
; block0:
|
||||||
|
; li t1,0
|
||||||
|
; fmv.d.x ft1,t1
|
||||||
|
; li a2,0
|
||||||
|
; fmv.d.x ft5,a2
|
||||||
|
; fle.d a5,ft5,ft1
|
||||||
|
; bne a5,zero,taken(label1),not_taken(label2)
|
||||||
|
; block1:
|
||||||
|
; j label3
|
||||||
|
; block2:
|
||||||
|
; j label3
|
||||||
|
; block3:
|
||||||
|
; ret
|
||||||
|
|
||||||
Reference in New Issue
Block a user