Enable more CLIF tests on AArch64

The tests for the SIMD floating-point maximum and minimum operations require particular care because the handling of the NaN values is non-deterministic and may vary between platforms. There is no way to match several NaN values in a test, so the solution is to extract the non-deterministic test cases into a separate file that is subsequently replicated for every backend under test, with adjustments made to the expected results. Copyright (c) 2021, Arm Limited.
2021-07-06 13:22:11 +01:00
parent fb32e49ed7
commit a1b39276e1
78 changed files with 258 additions and 77 deletions
--- a/cranelift/codegen/meta/src/shared/instructions.rs
+++ b/cranelift/codegen/meta/src/shared/instructions.rs
@@ -3597,9 +3597,12 @@ pub(crate) fn define(
        Inst::new(
            "fmin",
            r#"
-        Floating point minimum, propagating NaNs.
+        Floating point minimum, propagating NaNs using the WebAssembly rules.
-        If either operand is NaN, this returns a NaN.
+        If either operand is NaN, this returns NaN with an unspecified sign. Furthermore, if
        each input NaN consists of a mantissa whose most significant bit is 1 and the rest is
        0, then the output has the same form. Otherwise, the output mantissa's most significant
        bit is 1 and the rest is unspecified.
        "#,
            &formats.binary,
        )
@@ -3629,9 +3632,12 @@ pub(crate) fn define(
        Inst::new(
            "fmax",
            r#"
-        Floating point maximum, propagating NaNs.
+        Floating point maximum, propagating NaNs using the WebAssembly rules.
-        If either operand is NaN, this returns a NaN.
+        If either operand is NaN, this returns NaN with an unspecified sign. Furthermore, if
        each input NaN consists of a mantissa whose most significant bit is 1 and the rest is
        0, then the output has the same form. Otherwise, the output mantissa's most significant
        bit is 1 and the rest is unspecified.
        "#,
            &formats.binary,
        )
--- a/cranelift/codegen/src/isa/aarch64/lower.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower.rs
@@ -13,7 +13,7 @@ use crate::ir::Inst as IRInst;
 use crate::ir::{Opcode, Type};
 use crate::machinst::lower::*;
 use crate::machinst::*;
-use crate::CodegenResult;
+use crate::{CodegenError, CodegenResult};
 use crate::isa::aarch64::inst::*;
 use crate::isa::aarch64::AArch64Backend;
@@ -1103,6 +1103,41 @@ pub(crate) fn lower_vector_compare<C: LowerCtx<I = Inst>>(
        _ => false,
    };
    let size = VectorSize::from_ty(ty);
    if is_float && (cond == Cond::Vc || cond == Cond::Vs) {
        let tmp = ctx.alloc_tmp(ty).only_reg().unwrap();
        ctx.emit(Inst::VecRRR {
            alu_op: VecALUOp::Fcmeq,
            rd,
            rn,
            rm: rn,
            size,
        });
        ctx.emit(Inst::VecRRR {
            alu_op: VecALUOp::Fcmeq,
            rd: tmp,
            rn: rm,
            rm,
            size,
        });
        ctx.emit(Inst::VecRRR {
            alu_op: VecALUOp::And,
            rd,
            rn: rd.to_reg(),
            rm: tmp.to_reg(),
            size,
        });
        if cond == Cond::Vs {
            ctx.emit(Inst::VecMisc {
                op: VecMisc2::Not,
                rd,
                rn: rd.to_reg(),
                size,
            });
        }
    } else {
        // 'Less than' operations are implemented by swapping
        // the order of operands and using the 'greater than'
        // instructions.
@@ -1125,7 +1160,17 @@ pub(crate) fn lower_vector_compare<C: LowerCtx<I = Inst>>(
            (true, Cond::Ls) => (VecALUOp::Fcmge, true),
            (true, Cond::Ge) => (VecALUOp::Fcmge, false),
            (true, Cond::Gt) => (VecALUOp::Fcmgt, false),
-        _ => unreachable!(),
+            _ => {
                return Err(CodegenError::Unsupported(format!(
                    "Unsupported {} SIMD vector comparison: {:?}",
                    if is_float {
                        "floating-point"
                    } else {
                        "integer"
                    },
                    cond
                )))
            }
        };
        if swap {
@@ -1148,6 +1193,7 @@ pub(crate) fn lower_vector_compare<C: LowerCtx<I = Inst>>(
                size,
            });
        }
    }
    Ok(())
 }
--- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs
+++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs
@@ -1803,23 +1803,30 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
        }
        Opcode::Bint => {
            let ty = ty.unwrap();
            if ty.is_vector() {
                return Err(CodegenError::Unsupported(format!(
                    "Bint: Unsupported type: {:?}",
                    ty
                )));
            }
            // Booleans are stored as all-zeroes (0) or all-ones (-1). We AND
            // out the LSB to give a 0 / 1-valued integer result.
-            let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
+            let input = put_input_in_regs(ctx, inputs[0]);
-            let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
+            let output = get_output_reg(ctx, outputs[0]);
            let output_bits = ty_bits(ctx.output_ty(insn, 0));
            let (imm_ty, alu_op) = if output_bits > 32 {
                (I64, ALUOp::And64)
            } else {
                (I32, ALUOp::And32)
            };
            ctx.emit(Inst::AluRRImmLogic {
-                alu_op,
+                alu_op: ALUOp::And32,
-                rd,
+                rd: output.regs()[0],
-                rn,
+                rn: input.regs()[0],
-                imml: ImmLogic::maybe_from_u64(1, imm_ty).unwrap(),
+                imml: ImmLogic::maybe_from_u64(1, I32).unwrap(),
            });
            if ty_bits(ty) > 64 {
                lower_constant_u64(ctx, output.regs()[1], 0);
            }
        }
        Opcode::Bitcast => {
@@ -2240,7 +2247,9 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
            }
        }
-        Opcode::VallTrue if ctx.input_ty(insn, 0) == I64X2 => {
+        Opcode::VallTrue if ty_bits(ctx.input_ty(insn, 0).lane_type()) == 64 => {
            debug_assert!(ctx.input_ty(insn, 0).is_vector());
            let rd = get_output_reg(ctx, outputs[0]).only_reg().unwrap();
            let rm = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
            let tmp = ctx.alloc_tmp(I64X2).only_reg().unwrap();
--- a/cranelift/filetests/filetests/legalizer/bxor_imm.clif
+++ b/cranelift/filetests/filetests/legalizer/bxor_imm.clif
@@ -1,4 +1,5 @@
 test legalizer
 target aarch64
 target x86_64
 function %foo(i64, i64) -> i64 {
--- a/cranelift/filetests/filetests/legalizer/iconst-i64.clif
+++ b/cranelift/filetests/filetests/legalizer/iconst-i64.clif
@@ -1,4 +1,5 @@
 test legalizer
 target aarch64
 target i686
 function %foo() -> i64 {
--- a/cranelift/filetests/filetests/legalizer/isplit-bb.clif
+++ b/cranelift/filetests/filetests/legalizer/isplit-bb.clif
@@ -1,4 +1,5 @@
 test legalizer
 target aarch64
 target x86_64
 function u0:0(i128, i128, i64) -> i128 system_v {
--- a/cranelift/filetests/filetests/licm/br-table.clif
+++ b/cranelift/filetests/filetests/licm/br-table.clif
@@ -1,5 +1,6 @@
 test compile
 set opt_level=speed_and_size
 target aarch64
 target x86_64
 function %br_table_opt() {
--- a/cranelift/filetests/filetests/licm/jump-table-entry.clif
+++ b/cranelift/filetests/filetests/licm/jump-table-entry.clif
@@ -1,4 +1,5 @@
 test licm
 target aarch64
 target x86_64
 function %dont_hoist_jump_table_entry_during_licm() {
--- a/cranelift/filetests/filetests/licm/load_readonly_notrap.clif
+++ b/cranelift/filetests/filetests/licm/load_readonly_notrap.clif
@@ -1,5 +1,6 @@
 test licm
 target aarch64
 target x86_64
 ;; Nontrapping readonly load from address that is not loop-dependent
--- a/cranelift/filetests/filetests/licm/reject_load_notrap.clif
+++ b/cranelift/filetests/filetests/licm/reject_load_notrap.clif
@@ -1,5 +1,6 @@
 test licm
 target aarch64
 target x86_64
 ;; Nontrapping possibly-not-readonly load from address that is not
--- a/cranelift/filetests/filetests/licm/reject_load_readonly.clif
+++ b/cranelift/filetests/filetests/licm/reject_load_readonly.clif
@@ -1,5 +1,6 @@
 test licm
 target aarch64
 target x86_64
 ;; Maybe-trapping readonly load from address that is not
--- a/cranelift/filetests/filetests/peepmatic/branch.clif
+++ b/cranelift/filetests/filetests/peepmatic/branch.clif
@@ -1,4 +1,5 @@
 test peepmatic
 target aarch64
 target x86_64
 function %icmp_to_brz_fold(i32) -> i32 {
--- a/cranelift/filetests/filetests/peepmatic/div_by_const_indirect.clif
+++ b/cranelift/filetests/filetests/peepmatic/div_by_const_indirect.clif
@@ -1,4 +1,5 @@
 test peepmatic
 target aarch64
 target x86_64 baseline
 ; Cases where the denominator is created by an iconst
--- a/cranelift/filetests/filetests/peepmatic/div_by_const_non_power_of_2.clif
+++ b/cranelift/filetests/filetests/peepmatic/div_by_const_non_power_of_2.clif
@@ -1,4 +1,5 @@
 test peepmatic
 target aarch64
 target i686 baseline
 ; -------- U32 --------
--- a/cranelift/filetests/filetests/peepmatic/div_by_const_power_of_2.clif
+++ b/cranelift/filetests/filetests/peepmatic/div_by_const_power_of_2.clif
@@ -1,4 +1,5 @@
 test peepmatic
 target aarch64
 target i686 baseline
 ; -------- U32 --------
--- a/cranelift/filetests/filetests/peepmatic/do_not_keep_applying_optimizations_after_replacing_with_an_alias.clif
+++ b/cranelift/filetests/filetests/peepmatic/do_not_keep_applying_optimizations_after_replacing_with_an_alias.clif
@@ -1,4 +1,5 @@
 test peepmatic
 target aarch64
 target x86_64
 ;; This file used to trigger assertions where we would keep trying to
--- a/cranelift/filetests/filetests/peepmatic/do_not_reorder_instructions_when_transplanting.clif
+++ b/cranelift/filetests/filetests/peepmatic/do_not_reorder_instructions_when_transplanting.clif
@@ -1,4 +1,5 @@
 test peepmatic
 target aarch64
 target x86_64
 ;; Test that although v5 can be replaced with v1, we don't transplant `load.i32
--- a/cranelift/filetests/filetests/peepmatic/fold-extended-move-wraparound.clif
+++ b/cranelift/filetests/filetests/peepmatic/fold-extended-move-wraparound.clif
@@ -1,4 +1,5 @@
 test peepmatic
 target aarch64
 target x86_64
 function %wraparound(i64 vmctx) -> f32 system_v {
--- a/cranelift/filetests/filetests/peepmatic/rem_by_const_non_power_of_2.clif
+++ b/cranelift/filetests/filetests/peepmatic/rem_by_const_non_power_of_2.clif
@@ -1,4 +1,5 @@
 test peepmatic
 target aarch64
 target i686 baseline
 ; -------- U32 --------
--- a/cranelift/filetests/filetests/peepmatic/rem_by_const_power_of_2.clif
+++ b/cranelift/filetests/filetests/peepmatic/rem_by_const_power_of_2.clif
@@ -1,4 +1,5 @@
 test peepmatic
 target aarch64
 target i686 baseline
 ; -------- U32 --------
--- a/cranelift/filetests/filetests/peepmatic/replace_branching_instructions_and_cfg_predecessors.clif
+++ b/cranelift/filetests/filetests/peepmatic/replace_branching_instructions_and_cfg_predecessors.clif
@@ -1,4 +1,5 @@
 test peepmatic
 target aarch64
 target x86_64
 function u0:2(i64 , i64) {
--- a/cranelift/filetests/filetests/peepmatic/simplify32.clif
+++ b/cranelift/filetests/filetests/peepmatic/simplify32.clif
@@ -1,4 +1,5 @@
 test peepmatic
 target aarch64
 target i686
 ;; 32-bits platforms.
--- a/cranelift/filetests/filetests/peepmatic/simplify64.clif
+++ b/cranelift/filetests/filetests/peepmatic/simplify64.clif
@@ -1,4 +1,5 @@
 test peepmatic
 target aarch64
 target x86_64
 ;; 64-bits platforms.
--- a/cranelift/filetests/filetests/peepmatic/simplify_instruction_into_alias_of_value.clif
+++ b/cranelift/filetests/filetests/peepmatic/simplify_instruction_into_alias_of_value.clif
@@ -1,4 +1,5 @@
 test peepmatic
 target aarch64
 target x86_64
 ;; The `isub` is a no-op, but we can't replace the whole `isub` instruction with
--- a/cranelift/filetests/filetests/postopt/basic.clif
+++ b/cranelift/filetests/filetests/postopt/basic.clif
@@ -1,4 +1,5 @@
 test postopt
 target aarch64
 target i686 legacy
 ; Test that compare+branch sequences are folded effectively on x86.
--- a/cranelift/filetests/filetests/preopt/branch.clif
+++ b/cranelift/filetests/filetests/preopt/branch.clif
@@ -1,4 +1,5 @@
 test preopt
 target aarch64
 target x86_64
 function %brz_fold() -> i32 {
--- a/cranelift/filetests/filetests/preopt/constant_fold.clif
+++ b/cranelift/filetests/filetests/preopt/constant_fold.clif
@@ -1,4 +1,5 @@
 test preopt
 target aarch64
 target x86_64
 function %constant_fold(f64) -> f64 {
--- a/cranelift/filetests/filetests/preopt/numerical.clif
+++ b/cranelift/filetests/filetests/preopt/numerical.clif
@@ -1,4 +1,5 @@
 test preopt
 target aarch64
 target x86_64
 function %iadd_fold() -> i32 {
--- a/cranelift/filetests/filetests/regress/allow-relaxation-shrink.clif
+++ b/cranelift/filetests/filetests/regress/allow-relaxation-shrink.clif
@@ -1,4 +1,5 @@
 test compile
 target aarch64
 target x86_64 legacy
 ; This checks that code shrink is allowed while relaxing code, when code shrink
--- a/cranelift/filetests/filetests/runtests/i128-bint.clif
+++ b/cranelift/filetests/filetests/runtests/i128-bint.clif
@@ -1,4 +1,5 @@
 test run
 target aarch64
 target x86_64 machinst
 function %bint_b8_i128() -> i64, i64 {
--- a/cranelift/filetests/filetests/runtests/i128-bitrev.clif
+++ b/cranelift/filetests/filetests/runtests/i128-bitrev.clif
@@ -1,4 +1,5 @@
 test run
 target aarch64
 target x86_64 machinst
 target x86_64 legacy
--- a/cranelift/filetests/filetests/runtests/simd-arithmetic-nondeterministic-aarch64.clif
+++ b/cranelift/filetests/filetests/runtests/simd-arithmetic-nondeterministic-aarch64.clif
@@ -0,0 +1,23 @@
 ; Test the non-deterministic aspects of the SIMD arithmetic operations.
 ; If you change this file, you should most likely update
 ; simd-arithmetic-nondeterministic*.clif as well.
 test run
 target aarch64
 function %fmax_f64x2(f64x2, f64x2) -> f64x2 {
 block0(v0: f64x2, v1: f64x2):
    v2 = fmax v0, v1
    return v2
 }
 ; run: %fmax_f64x2([NaN:0x42 0.0], [0x1.0 0.0]) == [NaN:0x42 0.0]
 function %fmin_f64x2(f64x2, f64x2) -> f64x2 {
 block0(v0: f64x2, v1: f64x2):
    v2 = fmin v0, v1
    return v2
 }
 ; run: %fmin_f64x2([-NaN 0x100.0], [0.0 NaN]) == [-NaN NaN]
 ; run: %fmin_f64x2([NaN 0.0], [0.0 0.0]) == [NaN 0.0]
 ; run: %fmin_f64x2([NaN:0x42 0.0], [0x1.0 0.0]) == [NaN:0x42 0.0]
--- a/cranelift/filetests/filetests/runtests/simd-arithmetic-nondeterministic-x86_64.clif
+++ b/cranelift/filetests/filetests/runtests/simd-arithmetic-nondeterministic-x86_64.clif
@@ -0,0 +1,28 @@
 ; Test the non-deterministic aspects of the SIMD arithmetic operations.
 ; If you change this file, you should most likely update
 ; simd-arithmetic-nondeterministic*.clif as well.
 test run
 set enable_simd
 target x86_64 machinst skylake
 function %fmax_f64x2(f64x2, f64x2) -> f64x2 {
 block0(v0: f64x2, v1: f64x2):
    v2 = fmax v0, v1
    return v2
 }
 ; note below how NaNs are quieted but (unlike fmin), retain their sign: this discrepancy is allowed by non-determinism
 ; in the spec, see https://webassembly.github.io/spec/core/bikeshed/index.html#nan-propagation%E2%91%A0.
 ; run: %fmax_f64x2([NaN:0x42 0.0], [0x1.0 0.0]) == [NaN 0.0]
 function %fmin_f64x2(f64x2, f64x2) -> f64x2 {
 block0(v0: f64x2, v1: f64x2):
    v2 = fmin v0, v1
    return v2
 }
 ; note below how NaNs are quieted and negative: this is due to non-determinism in the spec for NaNs, see
 ; https://webassembly.github.io/spec/core/bikeshed/index.html#nan-propagation%E2%91%A0.
 ; run: %fmin_f64x2([-NaN 0x100.0], [0.0 NaN]) == [-NaN -NaN]
 ; run: %fmin_f64x2([NaN 0.0], [0.0 0.0]) == [-NaN 0.0]
 ; run: %fmin_f64x2([NaN:0x42 0.0], [0x1.0 0.0]) == [-NaN 0.0]
--- a/cranelift/filetests/filetests/runtests/simd-arithmetic.clif
+++ b/cranelift/filetests/filetests/runtests/simd-arithmetic.clif
@@ -1,5 +1,5 @@
 test run
-; target aarch64 TODO: Not yet implemented on aarch64
+target aarch64
 ; target s390x TODO: Not yet implemented on s390x
 set enable_simd
 target x86_64 machinst skylake
@@ -136,26 +136,22 @@ block0(v0: f64x2, v1: f64x2):
    v2 = fmax v0, v1
    return v2
 }
-; note below how NaNs are quieted but (unlike fmin), retain their sign: this discrepancy is allowed by non-determinism
+; This operation exhibits non-deterministic behaviour for some input NaN values;
-; in the spec, see https://webassembly.github.io/spec/core/bikeshed/index.html#nan-propagation%E2%91%A0.
+; refer to the simd-arithmetic-nondeterministic*.clif files for the respective tests.
 ; run: %fmax_f64x2([-0x0.0 -0x1.0], [+0x0.0 0x1.0]) == [+0x0.0 0x1.0]
 ; run: %fmax_f64x2([-NaN NaN], [0x0.0 0x100.0]) == [-NaN NaN]
 ; run: %fmax_f64x2([NaN 0.0], [0.0 0.0]) == [NaN 0.0]
 ; run: %fmax_f64x2([-NaN 0.0], [0x1.0 0.0]) == [-NaN 0.0]
 ; run: %fmax_f64x2([NaN:0x42 0.0], [0x1.0 0.0]) == [NaN 0.0]
 function %fmin_f64x2(f64x2, f64x2) -> f64x2 {
 block0(v0: f64x2, v1: f64x2):
    v2 = fmin v0, v1
    return v2
 }
-; note below how NaNs are quieted and negative: this is due to non-determinism in the spec for NaNs, see
+; This operation exhibits non-deterministic behaviour for some input NaN values;
-; https://webassembly.github.io/spec/core/bikeshed/index.html#nan-propagation%E2%91%A0.
+; refer to the simd-arithmetic-nondeterministic*.clif files for the respective tests.
 ; run: %fmin_f64x2([-0x0.0 -0x1.0], [+0x0.0 0x1.0]) == [-0x0.0 -0x1.0]
 ; run: %fmin_f64x2([-NaN 0x100.0], [0.0 NaN]) == [-NaN -NaN]
 ; run: %fmin_f64x2([NaN 0.0], [0.0 0.0]) == [-NaN 0.0]
 ; run: %fmin_f64x2([-NaN 0.0], [0x1.0 0.0]) == [-NaN 0.0]
 ; run: %fmin_f64x2([NaN:0x42 0.0], [0x1.0 0.0]) == [-NaN 0.0]
 function %fneg_f64x2(f64x2) -> f64x2 {
 block0(v0: f64x2):
--- a/cranelift/filetests/filetests/runtests/simd-bitwise-run.clif
+++ b/cranelift/filetests/filetests/runtests/simd-bitwise-run.clif
@@ -1,5 +1,6 @@
 test run
 set enable_simd
 target aarch64
 target x86_64 legacy skylake
 ; TODO: once available, replace all lane extraction with `icmp + all_ones`
--- a/cranelift/filetests/filetests/runtests/simd-comparison.clif
+++ b/cranelift/filetests/filetests/runtests/simd-comparison.clif
@@ -1,5 +1,5 @@
 test run
-; target aarch64 TODO: Not yet implemented on aarch64
+target aarch64
 ; target s390x TODO: Not yet implemented on s390x
 set enable_simd
 target x86_64 machinst
--- a/cranelift/filetests/filetests/runtests/simd-lane-access.clif
+++ b/cranelift/filetests/filetests/runtests/simd-lane-access.clif
@@ -1,5 +1,5 @@
 test run
-; target aarch64 TODO: Not yet implemented on aarch64
+target aarch64
 ; target s390x TODO: Not yet implemented on s390x
 set enable_simd
 target x86_64 machinst
--- a/cranelift/filetests/filetests/runtests/simd-vconst.clif
+++ b/cranelift/filetests/filetests/runtests/simd-vconst.clif
@@ -1,6 +1,6 @@
 test run
 ; target s390x TODO: Not yet implemented on s390x
-; target aarch64 TODO: Not yet implemented on aarch64
+target aarch64
 set enable_simd
 target x86_64 machinst
 set enable_simd
--- a/cranelift/filetests/filetests/simple_gvn/readonly.clif
+++ b/cranelift/filetests/filetests/simple_gvn/readonly.clif
@@ -1,5 +1,6 @@
 test simple-gvn
 target aarch64
 target x86_64
 function %eliminate_redundant_global_loads(i32, i64 vmctx) {
--- a/cranelift/filetests/filetests/simple_preopt/bitselect.clif
+++ b/cranelift/filetests/filetests/simple_preopt/bitselect.clif
@@ -1,4 +1,5 @@
 test simple_preopt
 target aarch64
 target x86_64
 ;; Test replacement of bitselect with vselect for special masks
--- a/cranelift/filetests/filetests/simple_preopt/branch.clif
+++ b/cranelift/filetests/filetests/simple_preopt/branch.clif
@@ -1,4 +1,5 @@
 test simple_preopt
 target aarch64
 target x86_64
 function %icmp_to_brz_fold(i32) -> i32 {
--- a/cranelift/filetests/filetests/simple_preopt/div_by_const_indirect.clif
+++ b/cranelift/filetests/filetests/simple_preopt/div_by_const_indirect.clif
@@ -1,4 +1,5 @@
 test simple_preopt
 target aarch64
 target x86_64 baseline
 ; Cases where the denominator is created by an iconst
--- a/cranelift/filetests/filetests/simple_preopt/div_by_const_non_power_of_2.clif
+++ b/cranelift/filetests/filetests/simple_preopt/div_by_const_non_power_of_2.clif
@@ -1,4 +1,5 @@
 test simple_preopt
 target aarch64
 target i686 baseline
 ; -------- U32 --------
--- a/cranelift/filetests/filetests/simple_preopt/div_by_const_power_of_2.clif
+++ b/cranelift/filetests/filetests/simple_preopt/div_by_const_power_of_2.clif
@@ -1,4 +1,5 @@
 test simple_preopt
 target aarch64
 target i686 baseline
 ; -------- U32 --------
--- a/cranelift/filetests/filetests/simple_preopt/do_not_reorder_instructions_when_transplanting.clif
+++ b/cranelift/filetests/filetests/simple_preopt/do_not_reorder_instructions_when_transplanting.clif
@@ -1,4 +1,5 @@
 test simple_preopt
 target aarch64
 target x86_64
 ;; Test that although v5 can be replaced with v1, we don't transplant `load.i32
--- a/cranelift/filetests/filetests/simple_preopt/fold-extended-move-wraparound.clif
+++ b/cranelift/filetests/filetests/simple_preopt/fold-extended-move-wraparound.clif
@@ -1,4 +1,5 @@
 test simple_preopt
 target aarch64
 target x86_64
 function %wraparound(i64 vmctx) -> f32 system_v {
--- a/cranelift/filetests/filetests/simple_preopt/rem_by_const_non_power_of_2.clif
+++ b/cranelift/filetests/filetests/simple_preopt/rem_by_const_non_power_of_2.clif
@@ -1,4 +1,5 @@
 test simple_preopt
 target aarch64
 target i686 baseline
 ; -------- U32 --------
--- a/cranelift/filetests/filetests/simple_preopt/rem_by_const_power_of_2.clif
+++ b/cranelift/filetests/filetests/simple_preopt/rem_by_const_power_of_2.clif
@@ -1,4 +1,5 @@
 test simple_preopt
 target aarch64
 target i686 baseline
 ; -------- U32 --------
--- a/cranelift/filetests/filetests/simple_preopt/replace_branching_instructions_and_cfg_predecessors.clif
+++ b/cranelift/filetests/filetests/simple_preopt/replace_branching_instructions_and_cfg_predecessors.clif
@@ -1,4 +1,5 @@
 test simple_preopt
 target aarch64
 target x86_64
 function u0:2(i64 , i64) {
--- a/cranelift/filetests/filetests/simple_preopt/sign_extend.clif
+++ b/cranelift/filetests/filetests/simple_preopt/sign_extend.clif
@@ -1,4 +1,5 @@
 test simple_preopt
 target aarch64
 target x86_64
 ;; Tests for sign-extending immediates.
--- a/cranelift/filetests/filetests/simple_preopt/simplify32.clif
+++ b/cranelift/filetests/filetests/simple_preopt/simplify32.clif
@@ -1,4 +1,5 @@
 test simple_preopt
 target aarch64
 target i686
 ;; 32-bits platforms.
--- a/cranelift/filetests/filetests/simple_preopt/simplify64.clif
+++ b/cranelift/filetests/filetests/simple_preopt/simplify64.clif
@@ -1,4 +1,5 @@
 test simple_preopt
 target aarch64
 target x86_64
 ;; 64-bits platforms.
--- a/cranelift/filetests/filetests/simple_preopt/simplify_instruction_into_alias_of_value.clif
+++ b/cranelift/filetests/filetests/simple_preopt/simplify_instruction_into_alias_of_value.clif
@@ -1,4 +1,5 @@
 test simple_preopt
 target aarch64
 target x86_64
 ;; The `isub` is a no-op, but we can't replace the whole `isub` instruction with
--- a/cranelift/filetests/filetests/verifier/flags.clif
+++ b/cranelift/filetests/filetests/verifier/flags.clif
@@ -1,4 +1,5 @@
 test verifier
 target aarch64
 target i686
 ; Simple, correct use of CPU flags.
--- a/cranelift/filetests/filetests/verifier/scalar-to-vector.clif
+++ b/cranelift/filetests/filetests/verifier/scalar-to-vector.clif
@@ -1,5 +1,6 @@
 test verifier
 set enable_simd=true
 target aarch64
 target x86_64
 function %scalar_to_vector() {
--- a/cranelift/filetests/filetests/verifier/simd-lane-index.clif
+++ b/cranelift/filetests/filetests/verifier/simd-lane-index.clif
@@ -1,5 +1,6 @@
 test verifier
 set enable_simd
 target aarch64
 target x86_64
 function %insertlane_i32x4() {
--- a/cranelift/filetests/filetests/wasm/control.clif
+++ b/cranelift/filetests/filetests/wasm/control.clif
@@ -1,8 +1,8 @@
 ; Test basic code generation for control flow WebAssembly instructions.
 test compile
 target aarch64
 target i686 haswell
 target x86_64 haswell
 function %br_if(i32) -> i32 {
--- a/cranelift/filetests/filetests/wasm/conversions.clif
+++ b/cranelift/filetests/filetests/wasm/conversions.clif
@@ -1,6 +1,7 @@
 ; Test code generation for WebAssembly type conversion operators.
 test compile
 target aarch64
 target x86_64 haswell
 function %i32_wrap_i64(i64) -> i32 {
--- a/cranelift/filetests/filetests/wasm/f32-arith.clif
+++ b/cranelift/filetests/filetests/wasm/f32-arith.clif
@@ -1,6 +1,7 @@
 ; Test basic code generation for f32 arithmetic WebAssembly instructions.
 test compile
 target aarch64
 target i686 haswell
 target i686 baseline
 target x86_64 haswell
--- a/cranelift/filetests/filetests/wasm/f32-compares.clif
+++ b/cranelift/filetests/filetests/wasm/f32-compares.clif
@@ -1,8 +1,8 @@
 ; Test code generation for WebAssembly f32 comparison operators.
 test compile
 target aarch64
 target i686 haswell
 target x86_64 haswell
 function %f32_eq(f32, f32) -> i32 {
--- a/cranelift/filetests/filetests/wasm/f32-memory64.clif
+++ b/cranelift/filetests/filetests/wasm/f32-memory64.clif
@@ -3,6 +3,7 @@ test compile
 ; We only test on 64-bit since the heap_addr instructions and vmctx parameters
 ; explicitly mention the pointer width.
 target aarch64
 target x86_64 haswell
 function %f32_load(i32, i64 vmctx) -> f32 {
--- a/cranelift/filetests/filetests/wasm/f64-arith.clif
+++ b/cranelift/filetests/filetests/wasm/f64-arith.clif
@@ -1,6 +1,7 @@
 ; Test basic code generation for f64 arithmetic WebAssembly instructions.
 test compile
 target aarch64
 target x86_64 haswell
 target x86_64 baseline
--- a/cranelift/filetests/filetests/wasm/f64-compares.clif
+++ b/cranelift/filetests/filetests/wasm/f64-compares.clif
@@ -1,8 +1,8 @@
 ; Test code generation for WebAssembly f64 comparison operators.
 test compile
 target aarch64
 target i686 haswell
 target x86_64 haswell
 function %f64_eq(f64, f64) -> i32 {
--- a/cranelift/filetests/filetests/wasm/f64-memory64.clif
+++ b/cranelift/filetests/filetests/wasm/f64-memory64.clif
@@ -3,6 +3,7 @@ test compile
 ; We only test on 64-bit since the heap_addr instructions and vmctx parameters
 ; explicitly mention the pointer width.
 target aarch64
 target x86_64 haswell
 function %f64_load(i32, i64 vmctx) -> f64 {
--- a/cranelift/filetests/filetests/wasm/i32-arith.clif
+++ b/cranelift/filetests/filetests/wasm/i32-arith.clif
@@ -1,6 +1,7 @@
 ; Test basic code generation for i32 arithmetic WebAssembly instructions.
 test compile
 target aarch64
 target i686 haswell
 target i686 baseline
 target x86_64 haswell
--- a/cranelift/filetests/filetests/wasm/i32-compares.clif
+++ b/cranelift/filetests/filetests/wasm/i32-compares.clif
@@ -1,8 +1,8 @@
 ; Test code generation for WebAssembly i32 comparison operators.
 test compile
 target aarch64
 target i686 haswell
 target x86_64 haswell
 function %i32_eqz(i32) -> i32 {
--- a/cranelift/filetests/filetests/wasm/i32-memory64.clif
+++ b/cranelift/filetests/filetests/wasm/i32-memory64.clif
@@ -3,6 +3,7 @@ test compile
 ; We only test on 64-bit since the heap_addr instructions and vmctx parameters
 ; explicitly mention the pointer width.
 target aarch64
 target x86_64 haswell
 function %i32_load(i32, i64 vmctx) -> i32 {
--- a/cranelift/filetests/filetests/wasm/i64-arith.clif
+++ b/cranelift/filetests/filetests/wasm/i64-arith.clif
@@ -1,6 +1,7 @@
 ; Test basic code generation for i64 arithmetic WebAssembly instructions.
 test compile
 target aarch64
 target x86_64 haswell
 target x86_64 baseline
--- a/cranelift/filetests/filetests/wasm/i64-compares.clif
+++ b/cranelift/filetests/filetests/wasm/i64-compares.clif
@@ -1,6 +1,7 @@
 ; Test code generation for WebAssembly i64 comparison operators.
 test compile
 target aarch64
 target x86_64 haswell
 function %i64_eqz(i64) -> i32 {
--- a/cranelift/filetests/filetests/wasm/i64-memory64.clif
+++ b/cranelift/filetests/filetests/wasm/i64-memory64.clif
@@ -3,6 +3,7 @@ test compile
 ; We only test on 64-bit since the heap_addr instructions and vmctx parameters
 ; explicitly mention the pointer width.
 target aarch64
 target x86_64 haswell
 function %i64_load(i32, i64 vmctx) -> i64 {
--- a/cranelift/filetests/filetests/wasm/multi-val-mixed.clif
+++ b/cranelift/filetests/filetests/wasm/multi-val-mixed.clif
@@ -1,4 +1,5 @@
 test compile
 target aarch64
 target x86_64 haswell
 ;; Returning many mixed values.
--- a/cranelift/filetests/filetests/wasm/multi-val-take-many-and-return-many.clif
+++ b/cranelift/filetests/filetests/wasm/multi-val-take-many-and-return-many.clif
@@ -1,4 +1,5 @@
 test compile
 target aarch64
 target x86_64 haswell
 function %returner(i32, i64, f32, f64) -> i32, i64, f32, f64 {
--- a/cranelift/filetests/filetests/wasm/r32.clif
+++ b/cranelift/filetests/filetests/wasm/r32.clif
@@ -4,6 +4,7 @@
 test compile
 set enable_safepoints=true
 target aarch64
 target i686 haswell
 function %select_ref(i32, r32, r32) -> r32 {
--- a/cranelift/filetests/filetests/wasm/r64.clif
+++ b/cranelift/filetests/filetests/wasm/r64.clif
@@ -4,6 +4,7 @@
 test compile
 set enable_safepoints=true
 target aarch64
 target x86_64 haswell
 function %select_ref(i32, r64, r64) -> r64 {
--- a/cranelift/filetests/filetests/wasm/select.clif
+++ b/cranelift/filetests/filetests/wasm/select.clif
@@ -1,8 +1,8 @@
 ; Test basic code generation for the select WebAssembly instruction.
 test compile
 target aarch64
 target i686 haswell
 target x86_64 haswell
 function %select_i32(i32, i32, i32) -> i32 {
--- a/cranelift/filetests/src/test_licm.rs
+++ b/cranelift/filetests/src/test_licm.rs
@@ -26,6 +26,10 @@ impl SubTest for TestLICM {
        "licm"
    }
    fn needs_isa(&self) -> bool {
        true
    }
    fn is_mutating(&self) -> bool {
        true
    }
--- a/cranelift/filetests/src/test_postopt.rs
+++ b/cranelift/filetests/src/test_postopt.rs
@@ -23,6 +23,10 @@ impl SubTest for TestPostopt {
        "postopt"
    }
    fn needs_isa(&self) -> bool {
        true
    }
    fn is_mutating(&self) -> bool {
        true
    }
--- a/cranelift/filetests/src/test_simple_preopt.rs
+++ b/cranelift/filetests/src/test_simple_preopt.rs
@@ -23,6 +23,10 @@ impl SubTest for TestSimplePreopt {
        "simple_preopt"
    }
    fn needs_isa(&self) -> bool {
        true
    }
    fn is_mutating(&self) -> bool {
        true
    }