Improve code generation for floating-point constants

2021-12-02 15:38:15 +00:00
parent 06a7bfdcbd
commit b5531580e7
10 changed files with 490 additions and 309 deletions
--- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs
@@ -1983,6 +1983,19 @@ impl MachInstEmit for Inst {
                };
                sink.put4(template | (machreg_to_gpr(rn) << 5) | machreg_to_vec(rd.to_reg()));
            }
+            &Inst::FpuMoveFPImm { rd, imm, size } => {
+                let size_code = match size {
+                    ScalarSize::Size32 => 0b00,
+                    ScalarSize::Size64 => 0b01,
+                    _ => unimplemented!(),
+                };
+                sink.put4(
+                    0b000_11110_00_1_00_000_000100_00000_00000
+                        | size_code << 22
+                        | ((imm.enc_bits() as u32) << 13)
+                        | machreg_to_vec(rd.to_reg()),
+                );
+            }
            &Inst::MovToVec { rd, rn, idx, size } => {
                let (imm5, shift) = match size.lane_size() {
                    ScalarSize::Size8 => (0b00001, 1),
--- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
@@ -2051,6 +2051,25 @@ fn test_aarch64_binemit() {
        "8103271E",
        "fmov s1, w28",
    ));
+    insns.push((
+        Inst::FpuMoveFPImm {
+            rd: writable_vreg(31),
+            imm: ASIMDFPModImm::maybe_from_u64(f64::to_bits(1.0), ScalarSize::Size64).unwrap(),
+            size: ScalarSize::Size64,
+        },
+        "1F106E1E",
+        "fmov d31, #1",
+    ));
+    insns.push((
+        Inst::FpuMoveFPImm {
+            rd: writable_vreg(1),
+            imm: ASIMDFPModImm::maybe_from_u64(f32::to_bits(31.0).into(), ScalarSize::Size32)
+                .unwrap(),
+            size: ScalarSize::Size32,
+        },
+        "01F0271E",
+        "fmov s1, #31",
+    ));
    insns.push((
        Inst::MovToVec {
            rd: writable_vreg(0),
--- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs
+++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs
@@ -239,29 +239,35 @@ impl Inst {
    /// Create instructions that load a 32-bit floating-point constant.
    pub fn load_fp_constant32<F: FnMut(Type) -> Writable<Reg>>(
        rd: Writable<Reg>,
-        value: u32,
+        const_data: u32,
        mut alloc_tmp: F,
    ) -> SmallVec<[Inst; 4]> {
        // Note that we must make sure that all bits outside the lowest 32 are set to 0
        // because this function is also used to load wider constants (that have zeros
        // in their most significant bits).
-        if value == 0 {
+        if const_data == 0 {
            smallvec![Inst::VecDupImm {
                rd,
                imm: ASIMDMovModImm::zero(ScalarSize::Size32),
                invert: false,
-                size: VectorSize::Size32x2
+                size: VectorSize::Size32x2,
+            }]
+        } else if let Some(imm) =
+            ASIMDFPModImm::maybe_from_u64(const_data.into(), ScalarSize::Size32)
+        {
+            smallvec![Inst::FpuMoveFPImm {
+                rd,
+                imm,
+                size: ScalarSize::Size32,
            }]
        } else {
-            // TODO: use FMOV immediate form when `value` has sufficiently few mantissa/exponent
-            // bits.
            let tmp = alloc_tmp(I32);
-            let mut insts = Inst::load_constant(tmp, value as u64);
+            let mut insts = Inst::load_constant(tmp, const_data as u64);

            insts.push(Inst::MovToFpu {
                rd,
                rn: tmp.to_reg(),
-                size: ScalarSize::Size64,
+                size: ScalarSize::Size32,
            });

            insts
@@ -277,11 +283,23 @@ impl Inst {
        // Note that we must make sure that all bits outside the lowest 64 are set to 0
        // because this function is also used to load wider constants (that have zeros
        // in their most significant bits).
-        if let Ok(const_data) = u32::try_from(const_data) {
+        // TODO: Treat as half of a 128 bit vector and consider replicated patterns.
+        // Scalar MOVI might also be an option.
+        if const_data == 0 {
+            smallvec![Inst::VecDupImm {
+                rd,
+                imm: ASIMDMovModImm::zero(ScalarSize::Size32),
+                invert: false,
+                size: VectorSize::Size32x2,
+            }]
+        } else if let Some(imm) = ASIMDFPModImm::maybe_from_u64(const_data, ScalarSize::Size64) {
+            smallvec![Inst::FpuMoveFPImm {
+                rd,
+                imm,
+                size: ScalarSize::Size64,
+            }]
+        } else if let Ok(const_data) = u32::try_from(const_data) {
            Inst::load_fp_constant32(rd, const_data, alloc_tmp)
-        // TODO: use FMOV immediate form when `const_data` has sufficiently few mantissa/exponent
-        // bits.  Also, treat it as half of a 128-bit vector and consider replicated
-        // patterns. Scalar MOVI might also be an option.
        } else if const_data & (u32::MAX as u64) == 0 {
            let tmp = alloc_tmp(I64);
            let mut insts = Inst::load_constant(tmp, const_data);
@@ -879,6 +897,9 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
            collector.add_def(rd);
            collector.add_use(rn);
        }
+        &Inst::FpuMoveFPImm { rd, .. } => {
+            collector.add_def(rd);
+        }
        &Inst::MovToVec { rd, rn, .. } => {
            collector.add_mod(rd);
            collector.add_use(rn);
@@ -1654,6 +1675,9 @@ pub fn aarch64_map_regs<RM: RegMapper>(inst: &mut Inst, mapper: &RM) {
            mapper.map_def(rd);
            mapper.map_use(rn);
        }
+        &mut Inst::FpuMoveFPImm { ref mut rd, .. } => {
+            mapper.map_def(rd);
+        }
        &mut Inst::MovToVec {
            ref mut rd,
            ref mut rn,
@@ -2693,6 +2717,12 @@ impl Inst {
                let rn = show_ireg_sized(rn, mb_rru, operand_size);
                format!("fmov {}, {}", rd, rn)
            }
+            &Inst::FpuMoveFPImm { rd, imm, size } => {
+                let imm = imm.show_rru(mb_rru);
+                let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size);
+
+                format!("fmov {}, {}", rd, imm)
+            }
            &Inst::MovToVec { rd, rn, idx, size } => {
                let rd = show_vreg_element(rd.to_reg(), mb_rru, idx, size);
                let rn = show_ireg_sized(rn, mb_rru, size.operand_size());