riscv64: Implement fcmp in ISLE (#5512)

Rework the compilation of fcmp in the riscv64 backend to be in ISLE, removing the need for the dedicated Fcmp instruction. This change is motivated by #5500, which showed that the riscv64 backend was generating branch instructions in the middle of a basic block.

We can't remove lower_br_fcmp quite yet as it's used in a few places in the emit module, but it's now no longer reachable from the ISLE lowerings.

Fixes #5500
This commit is contained in:
Trevor Elliott
2023-01-04 11:52:00 -08:00
committed by GitHub
parent d1920f5a2d
commit b2d5afdf83
7 changed files with 202 additions and 113 deletions

View File

@@ -107,6 +107,7 @@
(rs2 Reg)
(cc IntCC)
(trap_code TrapCode))
(TrapFf
(cc FloatCC)
(x Reg)
@@ -204,14 +205,6 @@
(x Reg)
(t0 WritableReg))
;; a float compare
(Fcmp
(cc FloatCC)
(rd WritableReg)
(rs1 Reg)
(rs2 Reg)
(ty Type))
;; select x or y base on condition
(Select
(dst VecWritableReg)
@@ -690,7 +683,6 @@
(Tso)
))
(type VecBranchTarget (primitive VecBranchTarget))
(type BoxCallInfo (primitive BoxCallInfo))
(type BoxCallIndInfo (primitive BoxCallIndInfo))
@@ -1880,9 +1872,6 @@
(decl lower_br_icmp (IntCC ValueRegs ValueRegs VecMachLabel Type) Unit)
(extern constructor lower_br_icmp lower_br_icmp)
(decl lower_br_fcmp (FloatCC Reg Reg VecMachLabel Type) Unit)
(extern constructor lower_br_fcmp lower_br_fcmp)
;; int scalar zero regs.
(decl int_zero_reg (Type) ValueRegs)
(extern constructor int_zero_reg int_zero_reg)
@@ -1935,7 +1924,9 @@
(rule 1
(lower_branch (brz (fcmp cc a @ (value_type ty) b) _ _) targets)
(lower_br_fcmp (floatcc_inverse cc) a b targets ty))
(let ((then BranchTarget (label_to_br_target (vec_label_get targets 0)))
(else BranchTarget (label_to_br_target (vec_label_get targets 1))))
(emit_side_effect (cond_br (emit_fcmp (floatcc_inverse cc) ty a b) then else))))
;;;;
(rule
@@ -1955,7 +1946,9 @@
(rule 1
(lower_branch (brnz (fcmp cc a @ (value_type ty) b) _ _) targets)
(lower_br_fcmp cc a b targets ty))
(let ((then BranchTarget (label_to_br_target (vec_label_get targets 0)))
(else BranchTarget (label_to_br_target (vec_label_get targets 1))))
(emit_side_effect (cond_br (emit_fcmp cc ty a b) then else))))
;;;
(decl lower_br_table (Reg VecMachLabel) Unit)
@@ -2194,3 +2187,159 @@
(decl sp_reg () PReg)
(extern constructor sp_reg sp_reg)
;;;; Helpers for floating point comparisons ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(decl not (Reg) Reg)
(rule (not x) (alu_rr_imm12 (AluOPRRI.Xori) x (imm_from_bits 1)))
(decl emit_or (Reg Reg) Reg)
(rule (emit_or x y) (alu_rrr (AluOPRRR.Or) x y))
(decl emit_and (Reg Reg) Reg)
(rule (emit_and x y) (alu_rrr (AluOPRRR.And) x y))
(decl is_not_nan (Type Reg) Reg)
(rule (is_not_nan ty a) (feq ty a a))
(decl feq (Type Reg Reg) Reg)
(rule (feq $F32 a b) (fpu_rrr (FpuOPRRR.FeqS) $I64 a b))
(rule (feq $F64 a b) (fpu_rrr (FpuOPRRR.FeqD) $I64 a b))
(decl flt (Type Reg Reg) Reg)
(rule (flt $F32 a b) (fpu_rrr (FpuOPRRR.FltS) $I64 a b))
(rule (flt $F64 a b) (fpu_rrr (FpuOPRRR.FltD) $I64 a b))
(decl fle (Type Reg Reg) Reg)
(rule (fle $F32 a b) (fpu_rrr (FpuOPRRR.FleS) $I64 a b))
(rule (fle $F64 a b) (fpu_rrr (FpuOPRRR.FleD) $I64 a b))
(decl fgt (Type Reg Reg) Reg)
(rule (fgt ty a b) (flt ty b a))
(decl fge (Type Reg Reg) Reg)
(rule (fge ty a b) (fle ty b a))
(decl ordered (Type Reg Reg) Reg)
(rule (ordered ty a b) (emit_and (is_not_nan ty a) (is_not_nan ty b)))
(type CmpResult (enum
(Result
(result Reg)
(invert bool))))
;; Wrapper for the common case when constructing comparison results. It assumes
;; that the result isn't negated.
(decl cmp_result (Reg) CmpResult)
(rule (cmp_result result) (CmpResult.Result result $false))
;; Wrapper for the case where it's more convenient to construct the negated
;; version of the comparison.
(decl cmp_result_invert (Reg) CmpResult)
(rule (cmp_result_invert result) (CmpResult.Result result $true))
;; Consume a CmpResult, producing a branch on its result.
(decl cond_br (CmpResult BranchTarget BranchTarget) SideEffectNoResult)
(rule (cond_br cmp then else)
(SideEffectNoResult.Inst
(MInst.CondBr then else (cmp_integer_compare cmp))))
;; Construct an IntegerCompare value.
(decl int_compare (IntCC Reg Reg) IntegerCompare)
(extern constructor int_compare int_compare)
;; Convert a comparison into a branch test.
(decl cmp_integer_compare (CmpResult) IntegerCompare)
(rule
(cmp_integer_compare (CmpResult.Result res $false))
(int_compare (IntCC.NotEqual) res (zero_reg)))
(rule
(cmp_integer_compare (CmpResult.Result res $true))
(int_compare (IntCC.Equal) res (zero_reg)))
;; Convert a comparison into a boolean value.
(decl cmp_value (CmpResult) Reg)
(rule (cmp_value (CmpResult.Result res $false)) res)
(rule (cmp_value (CmpResult.Result res $true)) (not res))
;; Compare two floating point numbers and return a zero/non-zero result.
(decl emit_fcmp (FloatCC Type Reg Reg) CmpResult)
;; a is not nan && b is not nan
(rule
(emit_fcmp (FloatCC.Ordered) ty a b)
(cmp_result (ordered ty a b)))
;; a is nan || b is nan
;; == !(a is not nan && b is not nan)
(rule
(emit_fcmp (FloatCC.Unordered) ty a b)
(cmp_result_invert (ordered ty a b)))
;; a == b
(rule
(emit_fcmp (FloatCC.Equal) ty a b)
(cmp_result (feq ty a b)))
;; a != b
;; == !(a == b)
(rule
(emit_fcmp (FloatCC.NotEqual) ty a b)
(cmp_result_invert (feq ty a b)))
;; a < b || a > b
(rule
(emit_fcmp (FloatCC.OrderedNotEqual) ty a b)
(cmp_result (emit_or (flt ty a b) (fgt ty a b))))
;; !(ordered a b) || a == b
(rule
(emit_fcmp (FloatCC.UnorderedOrEqual) ty a b)
(cmp_result (emit_or (not (ordered ty a b)) (feq ty a b))))
;; a < b
(rule
(emit_fcmp (FloatCC.LessThan) ty a b)
(cmp_result (flt ty a b)))
;; a <= b
(rule
(emit_fcmp (FloatCC.LessThanOrEqual) ty a b)
(cmp_result (fle ty a b)))
;; a > b
(rule
(emit_fcmp (FloatCC.GreaterThan) ty a b)
(cmp_result (fgt ty a b)))
;; a >= b
(rule
(emit_fcmp (FloatCC.GreaterThanOrEqual) ty a b)
(cmp_result (fge ty a b)))
;; !(ordered a b) || a < b
;; == !(ordered a b && a >= b)
(rule
(emit_fcmp (FloatCC.UnorderedOrLessThan) ty a b)
(cmp_result_invert (emit_and (ordered ty a b) (fge ty a b))))
;; !(ordered a b) || a <= b
;; == !(ordered a b && a > b)
(rule
(emit_fcmp (FloatCC.UnorderedOrLessThanOrEqual) ty a b)
(cmp_result_invert (emit_and (ordered ty a b) (fgt ty a b))))
;; !(ordered a b) || a > b
;; == !(ordered a b && a <= b)
(rule
(emit_fcmp (FloatCC.UnorderedOrGreaterThan) ty a b)
(cmp_result_invert (emit_and (ordered ty a b) (fle ty a b))))
;; !(ordered a b) || a >= b
;; == !(ordered a b && a < b)
(rule
(emit_fcmp (FloatCC.UnorderedOrGreaterThanOrEqual) ty a b)
(cmp_result_invert (emit_and (ordered ty a b) (flt ty a b))))

View File

@@ -1337,42 +1337,6 @@ impl MachInstEmit for Inst {
}
}
&Inst::Fcmp {
rd,
cc,
ty,
rs1,
rs2,
} => {
let rs1 = allocs.next(rs1);
let rs2 = allocs.next(rs2);
let rd = allocs.next_writable(rd);
let label_true = sink.get_label();
let label_jump_over = sink.get_label();
Inst::lower_br_fcmp(
cc,
rs1,
rs2,
BranchTarget::Label(label_true),
BranchTarget::zero(),
ty,
rd,
)
.iter()
.for_each(|i| i.emit(&[], sink, emit_info, state));
// here is not taken.
Inst::load_imm12(rd, Imm12::FALSE).emit(&[], sink, emit_info, state);
// jump over.
Inst::Jal {
dest: BranchTarget::Label(label_jump_over),
}
.emit(&[], sink, emit_info, state);
// here is true
sink.bind_label(label_true);
Inst::load_imm12(rd, Imm12::TRUE).emit(&[], sink, emit_info, state);
sink.bind_label(label_jump_over);
}
&Inst::Select {
ref dst,
condition,

View File

@@ -2177,14 +2177,6 @@ fn riscv64_worst_case_instruction_size() {
//there are all candidates potential generate a lot of bytes.
let mut candidates: Vec<MInst> = vec![];
candidates.push(Inst::Fcmp {
rd: writable_a0(),
cc: FloatCC::UnorderedOrLessThanOrEqual,
ty: F64,
rs1: fa1(),
rs2: fa0(),
});
candidates.push(Inst::IntSelect {
dst: vec![writable_a0(), writable_a0()],
ty: I128,

View File

@@ -460,11 +460,6 @@ fn riscv64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
collector.reg_use(src);
collector.reg_def(rd);
}
&Inst::Fcmp { rd, rs1, rs2, .. } => {
collector.reg_use(rs1);
collector.reg_use(rs2);
collector.reg_early_def(rd);
}
&Inst::Select {
ref dst,
condition,
@@ -1366,25 +1361,6 @@ impl Inst {
let rd = format_reg(rd.to_reg(), allocs);
format!("{} {},{}", op.op_name(), rd, base,)
}
&Inst::Fcmp {
rd,
cc,
ty,
rs1,
rs2,
} => {
let rs1 = format_reg(rs1, allocs);
let rs2 = format_reg(rs2, allocs);
let rd = format_reg(rd.to_reg(), allocs);
format!(
"f{}.{} {},{},{}",
cc,
if ty == F32 { "s" } else { "d" },
rd,
rs1,
rs2,
)
}
&Inst::Store {
to,
src,

View File

@@ -752,18 +752,10 @@
(lower (icmp cc x @ (value_type ty) y))
(lower_icmp cc x y ty))
(decl gen_fcmp (FloatCC Value Value Type) Reg)
(rule
(gen_fcmp cc x y ty)
(let
((result WritableReg (temp_writable_reg $I64))
(_ Unit (emit (MInst.Fcmp cc result x y ty))))
(writable_reg_to_reg result)))
;;;;; Rules for `fcmp`;;;;;;;;;
(rule
(lower (fcmp cc x @ (value_type ty) y))
(gen_fcmp cc x y ty))
(cmp_value (emit_fcmp cc ty x y)))
;;;;; Rules for `func_addr`;;;;;;;;;
(rule

View File

@@ -61,28 +61,6 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Riscv64Backend> {
}
}
fn lower_br_fcmp(
&mut self,
cc: &FloatCC,
a: Reg,
b: Reg,
targets: &VecMachLabel,
ty: Type,
) -> Unit {
let tmp = self.temp_writable_reg(I64);
MInst::lower_br_fcmp(
*cc,
a,
b,
BranchTarget::Label(targets[0]),
BranchTarget::Label(targets[1]),
ty,
tmp,
)
.iter()
.for_each(|i| self.emit(i));
}
fn lower_brz_or_nz(
&mut self,
cc: &IntCC,
@@ -434,6 +412,15 @@ impl generated_code::Context for IsleContext<'_, '_, MInst, Riscv64Backend> {
tmp.to_reg()
}
#[inline]
fn int_compare(&mut self, kind: &IntCC, rs1: Reg, rs2: Reg) -> IntegerCompare {
IntegerCompare {
kind: *kind,
rs1,
rs2,
}
}
}
impl IsleContext<'_, '_, MInst, Riscv64Backend> {

View File

@@ -0,0 +1,29 @@
test compile precise-output
target riscv64
;; See #5500 for more details about this test case.
function %f0() {
block0:
v0 = f64const 0.0
v1 = fcmp ult v0, v0
brz v1, block1
jump block1
block1:
return
}
; block0:
; li t1,0
; fmv.d.x ft1,t1
; li a2,0
; fmv.d.x ft5,a2
; fle.d a5,ft5,ft1
; bne a5,zero,taken(label1),not_taken(label2)
; block1:
; j label3
; block2:
; j label3
; block3:
; ret