[AArch64] Merge Fcmp32 and Fcmp64 (#4032)

2022-04-14 23:39:43 +01:00
parent a40b5c3985
commit cf533a8041
8 changed files with 202 additions and 220 deletions
--- a/cranelift/codegen/src/isa/aarch64/inst.isle
+++ b/cranelift/codegen/src/isa/aarch64/inst.isle
@@ -327,13 +327,9 @@
        (rm Reg)
        (ra Reg))
-       ;; FPU comparison, single-precision (32 bit).
+       ;; FPU comparison.
-       (FpuCmp32
+       (FpuCmp
-        (rn Reg)
+        (size ScalarSize)
        (rm Reg))
       ;; FPU comparison, double-precision (64 bit).
       (FpuCmp64
        (rn Reg)
        (rm Reg))
--- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs
@@ -2008,15 +2008,10 @@ impl MachInstEmit for Inst {
                assert_eq!(machreg_to_vec(rn2), (machreg_to_vec(rn) + 1) % 32);
                sink.put4(enc_tbl(is_extension, 0b01, rd, rn, rm));
            }
-            &Inst::FpuCmp32 { rn, rm } => {
+            &Inst::FpuCmp { size, rn, rm } => {
                let rn = allocs.next(rn);
                let rm = allocs.next(rm);
-                sink.put4(enc_fcmp(ScalarSize::Size32, rn, rm));
+                sink.put4(enc_fcmp(size, rn, rm));
            }
            &Inst::FpuCmp64 { rn, rm } => {
                let rn = allocs.next(rn);
                let rm = allocs.next(rm);
                sink.put4(enc_fcmp(ScalarSize::Size64, rn, rm));
            }
            &Inst::FpuToInt { op, rd, rn } => {
                let rd = allocs.next_writable(rd);
--- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
@@ -5821,7 +5821,8 @@ fn test_aarch64_binemit() {
    ));
    insns.push((
-        Inst::FpuCmp32 {
+        Inst::FpuCmp {
            size: ScalarSize::Size32,
            rn: vreg(23),
            rm: vreg(24),
        },
@@ -5830,7 +5831,8 @@ fn test_aarch64_binemit() {
    ));
    insns.push((
-        Inst::FpuCmp64 {
+        Inst::FpuCmp {
            size: ScalarSize::Size64,
            rn: vreg(23),
            rm: vreg(24),
        },
--- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs
@@ -807,7 +807,7 @@ fn aarch64_get_operands<F: Fn(VReg) -> VReg>(inst: &Inst, collector: &mut Operan
            collector.reg_use(rn);
            collector.reg_use(rm);
        }
-        &Inst::FpuCmp32 { rn, rm } | &Inst::FpuCmp64 { rn, rm } => {
+        &Inst::FpuCmp { rn, rm, .. } => {
            collector.reg_use(rn);
            collector.reg_use(rm);
        }
@@ -1765,14 +1765,9 @@ impl Inst {
                let ra = pretty_print_vreg_scalar(ra, size, allocs);
                format!("{} {}, {}, {}, {}", op, rd, rn, rm, ra)
            }
-            &Inst::FpuCmp32 { rn, rm } => {
+            &Inst::FpuCmp { size, rn, rm } => {
-                let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size32, allocs);
+                let rn = pretty_print_vreg_scalar(rn, size, allocs);
-                let rm = pretty_print_vreg_scalar(rm, ScalarSize::Size32, allocs);
+                let rm = pretty_print_vreg_scalar(rm, size, allocs);
                format!("fcmp {}, {}", rn, rm)
            }
            &Inst::FpuCmp64 { rn, rm } => {
                let rn = pretty_print_vreg_scalar(rn, ScalarSize::Size64, allocs);
                let rm = pretty_print_vreg_scalar(rm, ScalarSize::Size64, allocs);
                format!("fcmp {}, {}", rn, rm)
            }
            &Inst::FpuLoad32 { rd, ref mem, .. } => {
--- a/cranelift/codegen/src/isa/aarch64/lower.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower.rs
@@ -1448,19 +1448,14 @@ pub(crate) fn lower_icmp<C: LowerCtx<I = Inst>>(
 pub(crate) fn lower_fcmp_or_ffcmp_to_flags<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) {
    let ty = ctx.input_ty(insn, 0);
    let bits = ty_bits(ty);
    let inputs = [InsnInput { insn, input: 0 }, InsnInput { insn, input: 1 }];
    let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
    let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
-    match bits {
+    ctx.emit(Inst::FpuCmp {
-        32 => {
+        size: ScalarSize::from_ty(ty),
-            ctx.emit(Inst::FpuCmp32 { rn, rm });
+        rn,
-        }
+        rm,
-        64 => {
+    });
            ctx.emit(Inst::FpuCmp64 { rn, rm });
        }
        _ => panic!("Unknown float size"),
    }
 }
 /// Materialize a boolean value into a register from the flags
--- a/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.manifest
+++ b/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.manifest
@@ -1,4 +1,4 @@
 src/clif.isle 443b34b797fc8ace
 src/prelude.isle afd037c4d91c875c
-src/isa/aarch64/inst.isle f7f03d5ea5411344
+src/isa/aarch64/inst.isle 77984cc33a05be7
 src/isa/aarch64/lower.isle 71c7e603b0e4bdef
--- a/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.rs
--- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
@@ -735,20 +735,11 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
            if !ty.is_vector() {
-                match ty_bits(ty) {
+                ctx.emit(Inst::FpuCmp {
-                    32 => {
+                    size: ScalarSize::from_ty(ty),
-                        ctx.emit(Inst::FpuCmp32 { rn, rm });
+                    rn,
-                    }
+                    rm,
-                    64 => {
+                });
                        ctx.emit(Inst::FpuCmp64 { rn, rm });
                    }
                    _ => {
                        return Err(CodegenError::Unsupported(format!(
                            "Fcmp: Unsupported type: {:?}",
                            ty
                        )))
                    }
                }
                materialize_bool_result(ctx, insn, rd, cond);
            } else {
                lower_vector_compare(ctx, rd, rn, rm, ty, cond)?;
@@ -1076,7 +1067,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                rd: tmp,
                rn: tmp.to_reg(),
            });
-            ctx.emit(Inst::FpuCmp64 {
+            ctx.emit(Inst::FpuCmp {
                size: ScalarSize::Size64,
                rn: tmp.to_reg(),
                rm: tmp.to_reg(),
            });
@@ -1672,8 +1664,12 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                    size,
                });
            } else {
                ctx.emit(Inst::FpuCmp {
                    size: ScalarSize::from_ty(lane_type),
                    rn: ra,
                    rm: rb,
                });
                if lane_type == F32 {
                    ctx.emit(Inst::FpuCmp32 { rn: ra, rm: rb });
                    ctx.emit(Inst::FpuCSel32 {
                        rd,
                        rn,
@@ -1681,7 +1677,6 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                        cond: Cond::Gt,
                    });
                } else {
                    ctx.emit(Inst::FpuCmp64 { rn: ra, rm: rb });
                    ctx.emit(Inst::FpuCSel64 {
                        rd,
                        rn,
@@ -1897,11 +1892,11 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            // in-bounds conversion, per wasm semantics.
            // Check that the input is not a NaN.
-            if in_bits == 32 {
+            ctx.emit(Inst::FpuCmp {
-                ctx.emit(Inst::FpuCmp32 { rn, rm: rn });
+                size: ScalarSize::from_ty(input_ty),
-            } else {
+                rn,
-                ctx.emit(Inst::FpuCmp64 { rn, rm: rn });
+                rm: rn,
-            }
+            });
            let trap_code = TrapCode::BadConversionToInteger;
            ctx.emit(Inst::TrapIf {
                trap_code,
@@ -1950,7 +1945,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                // >= low_bound
                lower_constant_f32(ctx, tmp, low_bound);
-                ctx.emit(Inst::FpuCmp32 {
+                ctx.emit(Inst::FpuCmp {
                    size: ScalarSize::Size32,
                    rn,
                    rm: tmp.to_reg(),
                });
@@ -1962,7 +1958,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                // <= high_bound
                lower_constant_f32(ctx, tmp, high_bound);
-                ctx.emit(Inst::FpuCmp32 {
+                ctx.emit(Inst::FpuCmp {
                    size: ScalarSize::Size32,
                    rn,
                    rm: tmp.to_reg(),
                });
@@ -2003,7 +2000,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                // >= low_bound
                lower_constant_f64(ctx, tmp, low_bound);
-                ctx.emit(Inst::FpuCmp64 {
+                ctx.emit(Inst::FpuCmp {
                    size: ScalarSize::Size64,
                    rn,
                    rm: tmp.to_reg(),
                });
@@ -2015,7 +2013,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                // <= high_bound
                lower_constant_f64(ctx, tmp, high_bound);
-                ctx.emit(Inst::FpuCmp64 {
+                ctx.emit(Inst::FpuCmp {
                    size: ScalarSize::Size64,
                    rn,
                    rm: tmp.to_reg(),
                });
@@ -2180,8 +2179,12 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                        lower_constant_f64(ctx, rtmp1, 0.0);
                    }
                }
                ctx.emit(Inst::FpuCmp {
                    size: ScalarSize::from_ty(in_ty),
                    rn,
                    rm: rn,
                });
                if in_bits == 32 {
                    ctx.emit(Inst::FpuCmp32 { rn, rm: rn });
                    ctx.emit(Inst::FpuCSel32 {
                        rd: rtmp2,
                        rn: rtmp1.to_reg(),
@@ -2189,7 +2192,6 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
                        cond: Cond::Ne,
                    });
                } else {
                    ctx.emit(Inst::FpuCmp64 { rn, rm: rn });
                    ctx.emit(Inst::FpuCSel64 {
                        rd: rtmp2,
                        rn: rtmp1.to_reg(),