Enable more CLIF tests on AArch64

The tests for the SIMD floating-point maximum and minimum operations require particular care because the handling of the NaN values is non-deterministic and may vary between platforms. There is no way to match several NaN values in a test, so the solution is to extract the non-deterministic test cases into a separate file that is subsequently replicated for every backend under test, with adjustments made to the expected results. Copyright (c) 2021, Arm Limited.
2021-07-06 13:22:11 +01:00
parent fb32e49ed7
commit a1b39276e1
78 changed files with 258 additions and 77 deletions
--- a/cranelift/codegen/src/isa/aarch64/lower.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower.rs
@@ -13,7 +13,7 @@ use crate::ir::Inst as IRInst;
 use crate::ir::{Opcode, Type};
 use crate::machinst::lower::*;
 use crate::machinst::*;
-use crate::CodegenResult;
+use crate::{CodegenError, CodegenResult};

 use crate::isa::aarch64::inst::*;
 use crate::isa::aarch64::AArch64Backend;
@@ -1103,50 +1103,96 @@ pub(crate) fn lower_vector_compare<C: LowerCtx<I = Inst>>(
        _ => false,
    };
    let size = VectorSize::from_ty(ty);
-    // 'Less than' operations are implemented by swapping
-    // the order of operands and using the 'greater than'
-    // instructions.
-    // 'Not equal' is implemented with 'equal' and inverting
-    // the result.
-    let (alu_op, swap) = match (is_float, cond) {
-        (false, Cond::Eq) => (VecALUOp::Cmeq, false),
-        (false, Cond::Ne) => (VecALUOp::Cmeq, false),
-        (false, Cond::Ge) => (VecALUOp::Cmge, false),
-        (false, Cond::Gt) => (VecALUOp::Cmgt, false),
-        (false, Cond::Le) => (VecALUOp::Cmge, true),
-        (false, Cond::Lt) => (VecALUOp::Cmgt, true),
-        (false, Cond::Hs) => (VecALUOp::Cmhs, false),
-        (false, Cond::Hi) => (VecALUOp::Cmhi, false),
-        (false, Cond::Ls) => (VecALUOp::Cmhs, true),
-        (false, Cond::Lo) => (VecALUOp::Cmhi, true),
-        (true, Cond::Eq) => (VecALUOp::Fcmeq, false),
-        (true, Cond::Ne) => (VecALUOp::Fcmeq, false),
-        (true, Cond::Mi) => (VecALUOp::Fcmgt, true),
-        (true, Cond::Ls) => (VecALUOp::Fcmge, true),
-        (true, Cond::Ge) => (VecALUOp::Fcmge, false),
-        (true, Cond::Gt) => (VecALUOp::Fcmgt, false),
-        _ => unreachable!(),
-    };

-    if swap {
-        std::mem::swap(&mut rn, &mut rm);
-    }
+    if is_float && (cond == Cond::Vc || cond == Cond::Vs) {
+        let tmp = ctx.alloc_tmp(ty).only_reg().unwrap();

-    ctx.emit(Inst::VecRRR {
-        alu_op,
-        rd,
-        rn,
-        rm,
-        size,
-    });
-
-    if cond == Cond::Ne {
-        ctx.emit(Inst::VecMisc {
-            op: VecMisc2::Not,
+        ctx.emit(Inst::VecRRR {
+            alu_op: VecALUOp::Fcmeq,
            rd,
-            rn: rd.to_reg(),
+            rn,
+            rm: rn,
            size,
        });
+        ctx.emit(Inst::VecRRR {
+            alu_op: VecALUOp::Fcmeq,
+            rd: tmp,
+            rn: rm,
+            rm,
+            size,
+        });
+        ctx.emit(Inst::VecRRR {
+            alu_op: VecALUOp::And,
+            rd,
+            rn: rd.to_reg(),
+            rm: tmp.to_reg(),
+            size,
+        });
+
+        if cond == Cond::Vs {
+            ctx.emit(Inst::VecMisc {
+                op: VecMisc2::Not,
+                rd,
+                rn: rd.to_reg(),
+                size,
+            });
+        }
+    } else {
+        // 'Less than' operations are implemented by swapping
+        // the order of operands and using the 'greater than'
+        // instructions.
+        // 'Not equal' is implemented with 'equal' and inverting
+        // the result.
+        let (alu_op, swap) = match (is_float, cond) {
+            (false, Cond::Eq) => (VecALUOp::Cmeq, false),
+            (false, Cond::Ne) => (VecALUOp::Cmeq, false),
+            (false, Cond::Ge) => (VecALUOp::Cmge, false),
+            (false, Cond::Gt) => (VecALUOp::Cmgt, false),
+            (false, Cond::Le) => (VecALUOp::Cmge, true),
+            (false, Cond::Lt) => (VecALUOp::Cmgt, true),
+            (false, Cond::Hs) => (VecALUOp::Cmhs, false),
+            (false, Cond::Hi) => (VecALUOp::Cmhi, false),
+            (false, Cond::Ls) => (VecALUOp::Cmhs, true),
+            (false, Cond::Lo) => (VecALUOp::Cmhi, true),
+            (true, Cond::Eq) => (VecALUOp::Fcmeq, false),
+            (true, Cond::Ne) => (VecALUOp::Fcmeq, false),
+            (true, Cond::Mi) => (VecALUOp::Fcmgt, true),
+            (true, Cond::Ls) => (VecALUOp::Fcmge, true),
+            (true, Cond::Ge) => (VecALUOp::Fcmge, false),
+            (true, Cond::Gt) => (VecALUOp::Fcmgt, false),
+            _ => {
+                return Err(CodegenError::Unsupported(format!(
+                    "Unsupported {} SIMD vector comparison: {:?}",
+                    if is_float {
+                        "floating-point"
+                    } else {
+                        "integer"
+                    },
+                    cond
+                )))
+            }
+        };
+
+        if swap {
+            std::mem::swap(&mut rn, &mut rm);
+        }
+
+        ctx.emit(Inst::VecRRR {
+            alu_op,
+            rd,
+            rn,
+            rm,
+            size,
+        });
+
+        if cond == Cond::Ne {
+            ctx.emit(Inst::VecMisc {
+                op: VecMisc2::Not,
+                rd,
+                rn: rd.to_reg(),
+                size,
+            });
+        }
    }

    Ok(())
--- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
@@ -1803,23 +1803,30 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        }

        Opcode::Bint => {
+            let ty = ty.unwrap();
+
+            if ty.is_vector() {
+                return Err(CodegenError::Unsupported(format!(
+                    "Bint: Unsupported type: {:?}",
+                    ty
+                )));
+            }
+
            // Booleans are stored as all-zeroes (0) or all-ones (-1). We AND
            // out the LSB to give a 0 / 1-valued integer result.
-            let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
-            let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
-            let output_bits = ty_bits(ctx.output_ty(insn, 0));
+            let input = put_input_in_regs(ctx, inputs[0]);
+            let output = get_output_reg(ctx, outputs[0]);

-            let (imm_ty, alu_op) = if output_bits > 32 {
-                (I64, ALUOp::And64)
-            } else {
-                (I32, ALUOp::And32)
-            };
            ctx.emit(Inst::AluRRImmLogic {
-                alu_op,
-                rd,
-                rn,
-                imml: ImmLogic::maybe_from_u64(1, imm_ty).unwrap(),
+                alu_op: ALUOp::And32,
+                rd: output.regs()[0],
+                rn: input.regs()[0],
+                imml: ImmLogic::maybe_from_u64(1, I32).unwrap(),
            });
+
+            if ty_bits(ty) > 64 {
+                lower_constant_u64(ctx, output.regs()[1], 0);
+            }
        }

        Opcode::Bitcast => {
@@ -2240,7 +2247,9 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            }
        }

-        Opcode::VallTrue if ctx.input_ty(insn, 0) == I64X2 => {
+        Opcode::VallTrue if ty_bits(ctx.input_ty(insn, 0).lane_type()) == 64 => {
+            debug_assert!(ctx.input_ty(insn, 0).is_vector());
+
            let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
            let rm = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
            let tmp = ctx.alloc_tmp(I64X2).only_reg().unwrap();