Port Fcopysign..FcvtToSintSat to ISLE (AArch64) (#4753)

* Port `Fcopysign`..``FcvtToSintSat` to ISLE (AArch64) Ported the existing implementations of the following opcodes to ISLE on AArch64: - `Fcopysign` - Also introduced missing support for `fcopysign` on vector values, as per the docs. - This introduces the vector encoding for the `SLI` machine instruction. - `FcvtToUint` - `FcvtToSint` - `FcvtFromUint` - `FcvtFromSint` - `FcvtToUintSat` - `FcvtToSintSat` Copyright (c) 2022 Arm Limited * Document helpers and abstract conversion checks
2022-08-24 18:37:14 +01:00
parent 7e3c481f4e
commit 94bcbe8446
12 changed files with 863 additions and 548 deletions
--- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
@@ -2,10 +2,9 @@

 use super::lower::*;
 use crate::binemit::CodeOffset;
-use crate::ir::condcodes::FloatCC;
 use crate::ir::types::*;
 use crate::ir::Inst as IRInst;
-use crate::ir::{InstructionData, Opcode, TrapCode};
+use crate::ir::{InstructionData, Opcode};
 use crate::isa::aarch64::abi::*;
 use crate::isa::aarch64::inst::*;
 use crate::isa::aarch64::settings as aarch64_settings;
@@ -978,408 +977,13 @@ pub(crate) fn lower_insn_to_regs(

        Opcode::Fma => implemented_in_isle(ctx),

-        Opcode::Fcopysign => {
-            // Copy the sign bit from inputs[1] to inputs[0]. We use the following sequence:
-            //
-            // This is a scalar Fcopysign.
-            // This uses scalar NEON operations for 64-bit and vector operations (2S) for 32-bit.
-            // In the latter case it still sets all bits except the lowest 32 to 0.
-            //
-            //  mov vd, vn
-            //  ushr vtmp, vm, #63 / #31
-            //  sli vd, vtmp, #63 / #31
+        Opcode::Fcopysign => implemented_in_isle(ctx),

-            let ty = ctx.output_ty(insn, 0);
+        Opcode::FcvtToUint | Opcode::FcvtToSint => implemented_in_isle(ctx),

-            if ty != F32 && ty != F64 {
-                return Err(CodegenError::Unsupported(format!(
-                    "Fcopysign: Unsupported type: {:?}",
-                    ty
-                )));
-            }
+        Opcode::FcvtFromUint | Opcode::FcvtFromSint => implemented_in_isle(ctx),

-            let bits = ty_bits(ty) as u8;
-            let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
-            let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None);
-            let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
-            let tmp = ctx.alloc_tmp(F64).only_reg().unwrap();
-
-            // Copy LHS to rd.
-            ctx.emit(Inst::gen_move(rd, rn, ty));
-
-            // Copy the sign bit to the lowest bit in tmp.
-            let imm = FPURightShiftImm::maybe_from_u8(bits - 1, bits).unwrap();
-            ctx.emit(Inst::FpuRRI {
-                fpu_op: choose_32_64(ty, FPUOpRI::UShr32(imm), FPUOpRI::UShr64(imm)),
-                rd: tmp,
-                rn: rm,
-            });
-
-            // Insert the bit from tmp into the sign bit of rd.
-            let imm = FPULeftShiftImm::maybe_from_u8(bits - 1, bits).unwrap();
-            ctx.emit(Inst::FpuRRI {
-                fpu_op: choose_32_64(ty, FPUOpRI::Sli32(imm), FPUOpRI::Sli64(imm)),
-                rd,
-                rn: tmp.to_reg(),
-            });
-        }
-
-        Opcode::FcvtToUint | Opcode::FcvtToSint => {
-            let input_ty = ctx.input_ty(insn, 0);
-            let in_bits = ty_bits(input_ty);
-            let output_ty = ty.unwrap();
-            let out_bits = ty_bits(output_ty);
-            let signed = op == Opcode::FcvtToSint;
-            let op = match (signed, in_bits, out_bits) {
-                (false, 32, 8) | (false, 32, 16) | (false, 32, 32) => FpuToIntOp::F32ToU32,
-                (true, 32, 8) | (true, 32, 16) | (true, 32, 32) => FpuToIntOp::F32ToI32,
-                (false, 32, 64) => FpuToIntOp::F32ToU64,
-                (true, 32, 64) => FpuToIntOp::F32ToI64,
-                (false, 64, 8) | (false, 64, 16) | (false, 64, 32) => FpuToIntOp::F64ToU32,
-                (true, 64, 8) | (true, 64, 16) | (true, 64, 32) => FpuToIntOp::F64ToI32,
-                (false, 64, 64) => FpuToIntOp::F64ToU64,
-                (true, 64, 64) => FpuToIntOp::F64ToI64,
-                _ => {
-                    return Err(CodegenError::Unsupported(format!(
-                        "{}: Unsupported types: {:?} -> {:?}",
-                        op, input_ty, output_ty
-                    )))
-                }
-            };
-
-            let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
-            let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
-
-            // First, check the output: it's important to carry the NaN conversion before the
-            // in-bounds conversion, per wasm semantics.
-
-            // Check that the input is not a NaN.
-            ctx.emit(Inst::FpuCmp {
-                size: ScalarSize::from_ty(input_ty),
-                rn,
-                rm: rn,
-            });
-            let trap_code = TrapCode::BadConversionToInteger;
-            ctx.emit(Inst::TrapIf {
-                trap_code,
-                kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::Unordered)),
-            });
-
-            let tmp = ctx.alloc_tmp(I8X16).only_reg().unwrap();
-
-            // Check that the input is in range, with "truncate towards zero" semantics. This means
-            // we allow values that are slightly out of range:
-            // - for signed conversions, we allow values strictly greater than INT_MIN-1 (when this
-            // can be represented), and strictly less than INT_MAX+1 (when this can be
-            // represented).
-            // - for unsigned conversions, we allow values strictly greater than -1, and strictly
-            // less than UINT_MAX+1 (when this can be represented).
-
-            if in_bits == 32 {
-                // From float32.
-                let (low_bound, low_cond, high_bound) = match (signed, out_bits) {
-                    (true, 8) => (
-                        i8::min_value() as f32 - 1.,
-                        FloatCC::GreaterThan,
-                        i8::max_value() as f32 + 1.,
-                    ),
-                    (true, 16) => (
-                        i16::min_value() as f32 - 1.,
-                        FloatCC::GreaterThan,
-                        i16::max_value() as f32 + 1.,
-                    ),
-                    (true, 32) => (
-                        i32::min_value() as f32, // I32_MIN - 1 isn't precisely representable as a f32.
-                        FloatCC::GreaterThanOrEqual,
-                        i32::max_value() as f32 + 1.,
-                    ),
-                    (true, 64) => (
-                        i64::min_value() as f32, // I64_MIN - 1 isn't precisely representable as a f32.
-                        FloatCC::GreaterThanOrEqual,
-                        i64::max_value() as f32 + 1.,
-                    ),
-                    (false, 8) => (-1., FloatCC::GreaterThan, u8::max_value() as f32 + 1.),
-                    (false, 16) => (-1., FloatCC::GreaterThan, u16::max_value() as f32 + 1.),
-                    (false, 32) => (-1., FloatCC::GreaterThan, u32::max_value() as f32 + 1.),
-                    (false, 64) => (-1., FloatCC::GreaterThan, u64::max_value() as f32 + 1.),
-                    _ => unreachable!(),
-                };
-
-                // >= low_bound
-                lower_constant_f32(ctx, tmp, low_bound);
-                ctx.emit(Inst::FpuCmp {
-                    size: ScalarSize::Size32,
-                    rn,
-                    rm: tmp.to_reg(),
-                });
-                let trap_code = TrapCode::IntegerOverflow;
-                ctx.emit(Inst::TrapIf {
-                    trap_code,
-                    kind: CondBrKind::Cond(lower_fp_condcode(low_cond).invert()),
-                });
-
-                // <= high_bound
-                lower_constant_f32(ctx, tmp, high_bound);
-                ctx.emit(Inst::FpuCmp {
-                    size: ScalarSize::Size32,
-                    rn,
-                    rm: tmp.to_reg(),
-                });
-                let trap_code = TrapCode::IntegerOverflow;
-                ctx.emit(Inst::TrapIf {
-                    trap_code,
-                    kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::LessThan).invert()),
-                });
-            } else {
-                // From float64.
-                let (low_bound, low_cond, high_bound) = match (signed, out_bits) {
-                    (true, 8) => (
-                        i8::min_value() as f64 - 1.,
-                        FloatCC::GreaterThan,
-                        i8::max_value() as f64 + 1.,
-                    ),
-                    (true, 16) => (
-                        i16::min_value() as f64 - 1.,
-                        FloatCC::GreaterThan,
-                        i16::max_value() as f64 + 1.,
-                    ),
-                    (true, 32) => (
-                        i32::min_value() as f64 - 1.,
-                        FloatCC::GreaterThan,
-                        i32::max_value() as f64 + 1.,
-                    ),
-                    (true, 64) => (
-                        i64::min_value() as f64, // I64_MIN - 1 is not precisely representable as an i64.
-                        FloatCC::GreaterThanOrEqual,
-                        i64::max_value() as f64 + 1.,
-                    ),
-                    (false, 8) => (-1., FloatCC::GreaterThan, u8::max_value() as f64 + 1.),
-                    (false, 16) => (-1., FloatCC::GreaterThan, u16::max_value() as f64 + 1.),
-                    (false, 32) => (-1., FloatCC::GreaterThan, u32::max_value() as f64 + 1.),
-                    (false, 64) => (-1., FloatCC::GreaterThan, u64::max_value() as f64 + 1.),
-                    _ => unreachable!(),
-                };
-
-                // >= low_bound
-                lower_constant_f64(ctx, tmp, low_bound);
-                ctx.emit(Inst::FpuCmp {
-                    size: ScalarSize::Size64,
-                    rn,
-                    rm: tmp.to_reg(),
-                });
-                let trap_code = TrapCode::IntegerOverflow;
-                ctx.emit(Inst::TrapIf {
-                    trap_code,
-                    kind: CondBrKind::Cond(lower_fp_condcode(low_cond).invert()),
-                });
-
-                // <= high_bound
-                lower_constant_f64(ctx, tmp, high_bound);
-                ctx.emit(Inst::FpuCmp {
-                    size: ScalarSize::Size64,
-                    rn,
-                    rm: tmp.to_reg(),
-                });
-                let trap_code = TrapCode::IntegerOverflow;
-                ctx.emit(Inst::TrapIf {
-                    trap_code,
-                    kind: CondBrKind::Cond(lower_fp_condcode(FloatCC::LessThan).invert()),
-                });
-            };
-
-            // Do the conversion.
-            ctx.emit(Inst::FpuToInt { op, rd, rn });
-        }
-
-        Opcode::FcvtFromUint | Opcode::FcvtFromSint => {
-            let input_ty = ctx.input_ty(insn, 0);
-            let ty = ty.unwrap();
-            let signed = op == Opcode::FcvtFromSint;
-            let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
-
-            if ty.is_vector() {
-                if input_ty.lane_bits() != ty.lane_bits() {
-                    return Err(CodegenError::Unsupported(format!(
-                        "{}: Unsupported types: {:?} -> {:?}",
-                        op, input_ty, ty
-                    )));
-                }
-
-                let op = if signed {
-                    VecMisc2::Scvtf
-                } else {
-                    VecMisc2::Ucvtf
-                };
-                let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
-
-                ctx.emit(Inst::VecMisc {
-                    op,
-                    rd,
-                    rn,
-                    size: VectorSize::from_ty(ty),
-                });
-            } else {
-                let in_bits = ty_bits(input_ty);
-                let out_bits = ty_bits(ty);
-                let op = match (signed, in_bits, out_bits) {
-                    (false, 8, 32) | (false, 16, 32) | (false, 32, 32) => IntToFpuOp::U32ToF32,
-                    (true, 8, 32) | (true, 16, 32) | (true, 32, 32) => IntToFpuOp::I32ToF32,
-                    (false, 8, 64) | (false, 16, 64) | (false, 32, 64) => IntToFpuOp::U32ToF64,
-                    (true, 8, 64) | (true, 16, 64) | (true, 32, 64) => IntToFpuOp::I32ToF64,
-                    (false, 64, 32) => IntToFpuOp::U64ToF32,
-                    (true, 64, 32) => IntToFpuOp::I64ToF32,
-                    (false, 64, 64) => IntToFpuOp::U64ToF64,
-                    (true, 64, 64) => IntToFpuOp::I64ToF64,
-                    _ => {
-                        return Err(CodegenError::Unsupported(format!(
-                            "{}: Unsupported types: {:?} -> {:?}",
-                            op, input_ty, ty
-                        )))
-                    }
-                };
-                let narrow_mode = match (signed, in_bits) {
-                    (false, 8) | (false, 16) | (false, 32) => NarrowValueMode::ZeroExtend32,
-                    (true, 8) | (true, 16) | (true, 32) => NarrowValueMode::SignExtend32,
-                    (false, 64) => NarrowValueMode::ZeroExtend64,
-                    (true, 64) => NarrowValueMode::SignExtend64,
-                    _ => unreachable!(),
-                };
-                let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
-                ctx.emit(Inst::IntToFpu { op, rd, rn });
-            }
-        }
-
-        Opcode::FcvtToUintSat | Opcode::FcvtToSintSat => {
-            let in_ty = ctx.input_ty(insn, 0);
-            let ty = ty.unwrap();
-            let out_signed = op == Opcode::FcvtToSintSat;
-            let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
-            let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
-
-            if ty.is_vector() {
-                if in_ty.lane_bits() != ty.lane_bits() {
-                    return Err(CodegenError::Unsupported(format!(
-                        "{}: Unsupported types: {:?} -> {:?}",
-                        op, in_ty, ty
-                    )));
-                }
-
-                let op = if out_signed {
-                    VecMisc2::Fcvtzs
-                } else {
-                    VecMisc2::Fcvtzu
-                };
-
-                ctx.emit(Inst::VecMisc {
-                    op,
-                    rd,
-                    rn,
-                    size: VectorSize::from_ty(ty),
-                });
-            } else {
-                let in_bits = ty_bits(in_ty);
-                let out_bits = ty_bits(ty);
-                // FIMM Vtmp1, u32::MAX or u64::MAX or i32::MAX or i64::MAX
-                // FMIN Vtmp2, Vin, Vtmp1
-                // FIMM Vtmp1, 0 or 0 or i32::MIN or i64::MIN
-                // FMAX Vtmp2, Vtmp2, Vtmp1
-                // (if signed) FIMM Vtmp1, 0
-                // FCMP Vin, Vin
-                // FCSEL Vtmp2, Vtmp1, Vtmp2, NE  // on NaN, select 0
-                // convert Rout, Vtmp2
-
-                assert!(in_ty.is_float() && (in_bits == 32 || in_bits == 64));
-                assert!(out_bits == 32 || out_bits == 64);
-
-                let min: f64 = match (out_bits, out_signed) {
-                    (32, true) => std::i32::MIN as f64,
-                    (32, false) => 0.0,
-                    (64, true) => std::i64::MIN as f64,
-                    (64, false) => 0.0,
-                    _ => unreachable!(),
-                };
-
-                let max = match (out_bits, out_signed) {
-                    (32, true) => std::i32::MAX as f64,
-                    (32, false) => std::u32::MAX as f64,
-                    (64, true) => std::i64::MAX as f64,
-                    (64, false) => std::u64::MAX as f64,
-                    _ => unreachable!(),
-                };
-
-                let rtmp1 = ctx.alloc_tmp(in_ty).only_reg().unwrap();
-                let rtmp2 = ctx.alloc_tmp(in_ty).only_reg().unwrap();
-
-                if in_bits == 32 {
-                    lower_constant_f32(ctx, rtmp1, max as f32);
-                } else {
-                    lower_constant_f64(ctx, rtmp1, max);
-                }
-                ctx.emit(Inst::FpuRRR {
-                    fpu_op: FPUOp2::Min,
-                    size: ScalarSize::from_ty(in_ty),
-                    rd: rtmp2,
-                    rn,
-                    rm: rtmp1.to_reg(),
-                });
-                if in_bits == 32 {
-                    lower_constant_f32(ctx, rtmp1, min as f32);
-                } else {
-                    lower_constant_f64(ctx, rtmp1, min);
-                }
-                ctx.emit(Inst::FpuRRR {
-                    fpu_op: FPUOp2::Max,
-                    size: ScalarSize::from_ty(in_ty),
-                    rd: rtmp2,
-                    rn: rtmp2.to_reg(),
-                    rm: rtmp1.to_reg(),
-                });
-                if out_signed {
-                    if in_bits == 32 {
-                        lower_constant_f32(ctx, rtmp1, 0.0);
-                    } else {
-                        lower_constant_f64(ctx, rtmp1, 0.0);
-                    }
-                }
-                ctx.emit(Inst::FpuCmp {
-                    size: ScalarSize::from_ty(in_ty),
-                    rn,
-                    rm: rn,
-                });
-                if in_bits == 32 {
-                    ctx.emit(Inst::FpuCSel32 {
-                        rd: rtmp2,
-                        rn: rtmp1.to_reg(),
-                        rm: rtmp2.to_reg(),
-                        cond: Cond::Ne,
-                    });
-                } else {
-                    ctx.emit(Inst::FpuCSel64 {
-                        rd: rtmp2,
-                        rn: rtmp1.to_reg(),
-                        rm: rtmp2.to_reg(),
-                        cond: Cond::Ne,
-                    });
-                }
-
-                let cvt = match (in_bits, out_bits, out_signed) {
-                    (32, 32, false) => FpuToIntOp::F32ToU32,
-                    (32, 32, true) => FpuToIntOp::F32ToI32,
-                    (32, 64, false) => FpuToIntOp::F32ToU64,
-                    (32, 64, true) => FpuToIntOp::F32ToI64,
-                    (64, 32, false) => FpuToIntOp::F64ToU32,
-                    (64, 32, true) => FpuToIntOp::F64ToI32,
-                    (64, 64, false) => FpuToIntOp::F64ToU64,
-                    (64, 64, true) => FpuToIntOp::F64ToI64,
-                    _ => unreachable!(),
-                };
-                ctx.emit(Inst::FpuToInt {
-                    op: cvt,
-                    rd,
-                    rn: rtmp2.to_reg(),
-                });
-            }
-        }
+        Opcode::FcvtToUintSat | Opcode::FcvtToSintSat => implemented_in_isle(ctx),

        Opcode::IaddIfcout => {
            // This is a two-output instruction that is needed for the