diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index d8216972ab..19c2764e94 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -2338,7 +2338,11 @@ impl MachInstEmit for Inst { VecALUOp::Orr => (0b000_01110_10_1, 0b000111), VecALUOp::Eor => (0b001_01110_00_1, 0b000111), VecALUOp::Bsl => (0b001_01110_01_1, 0b000111), - VecALUOp::Umaxp => (0b001_01110_00_1 | enc_size << 1, 0b101001), + VecALUOp::Umaxp => { + debug_assert_ne!(size, VectorSize::Size64x2); + + (0b001_01110_00_1 | enc_size << 1, 0b101001) + } VecALUOp::Add => (0b000_01110_00_1 | enc_size << 1, 0b100001), VecALUOp::Sub => (0b001_01110_00_1 | enc_size << 1, 0b100001), VecALUOp::Mul => { diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 655ea8458c..6a4e18cbe3 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -2397,7 +2397,15 @@ pub(crate) fn lower_insn_to_regs>( // cmp xm, #0 // cset xm, ne - let size = VectorSize::from_ty(ctx.input_ty(insn, 0)); + let s = VectorSize::from_ty(src_ty); + let size = if s == VectorSize::Size64x2 { + // `vall_true` with 64-bit elements is handled elsewhere. + debug_assert_ne!(op, Opcode::VallTrue); + + VectorSize::Size32x4 + } else { + s + }; if op == Opcode::VanyTrue { ctx.emit(Inst::VecRRR { diff --git a/cranelift/filetests/filetests/runtests/simd-logical.clif b/cranelift/filetests/filetests/runtests/simd-logical.clif index 48470cdb35..6480dd98f7 100644 --- a/cranelift/filetests/filetests/runtests/simd-logical.clif +++ b/cranelift/filetests/filetests/runtests/simd-logical.clif @@ -3,8 +3,6 @@ target aarch64 ; target s390x TODO: Not yet implemented on s390x set enable_simd target x86_64 machinst -set enable_simd -target x86_64 legacy skylake function %bnot() -> b32 { block0: @@ -26,13 +24,77 @@ block0: } ; run -function %vany_true_i16x8() -> b1 { +function %vany_true_i8x16() -> b1, b1 { block0: - v0 = vconst.i16x8 [1 0 0 0 0 0 0 0] + v0 = vconst.i8x16 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] v1 = vany_true v0 - return v1 + + v2 = vconst.i8x16 [0 0 0 1 0 0 0 0 0 0 42 0 0 0 0 0] + v3 = vany_true v2 + + return v1, v3 } -; run +; run: %vany_true_i8x16() == [false, true] + +function %vany_true_i16x8() -> b1, b1 { +block0: + v0 = vconst.i16x8 [0 0 0 0 0 0 0 0] + v1 = vany_true v0 + + v2 = vconst.i16x8 [0 0 42 0 0 0 0 0] + v3 = vany_true v2 + + return v1, v3 +} +; run: %vany_true_i16x8() == [false, true] + +function %vany_true_i32x4() -> b1, b1 { +block0: + v0 = vconst.i32x4 [0 0 0 0] + v1 = vany_true v0 + + v2 = vconst.i32x4 [0 42 0 0] + v3 = vany_true v2 + + return v1, v3 +} +; run: %vany_true_i32x4() == [false, true] + +function %vany_true_i64x2() -> b1, b1 { +block0: + v0 = vconst.i64x2 [0 0] + v1 = vany_true v0 + + v2 = vconst.i64x2 [0 1] + v3 = vany_true v2 + + return v1, v3 +} +; run: %vany_true_i64x2() == [false, true] + +function %vany_true_f32x4() -> b1, b1 { +block0: + v0 = vconst.f32x4 [0.0 0.0 0.0 0.0] + v1 = vany_true v0 + + v2 = vconst.f32x4 [0.0 0x4.2 0.0 0.0] + v3 = vany_true v2 + + return v1, v3 +} +; run: %vany_true_f32x4() == [false, true] + +function %vany_true_f64x2() -> b1, b1 { +block0: + v0 = vconst.f64x2 [0.0 0.0] + v1 = vany_true v0 + + v2 = vconst.f64x2 [0.0 0x1.0] + v3 = vany_true v2 + + return v1, v3 +} +; run: %vany_true_f64x2() == [false, true] function %vany_true_b32x4() -> b1 { block0: diff --git a/cranelift/filetests/filetests/runtests/simd-vanytrue.clif b/cranelift/filetests/filetests/runtests/simd-vanytrue.clif index fd206d54ef..cf407ea606 100644 --- a/cranelift/filetests/filetests/runtests/simd-vanytrue.clif +++ b/cranelift/filetests/filetests/runtests/simd-vanytrue.clif @@ -1,7 +1,7 @@ test interpret test run +target aarch64 target x86_64 machinst -; TODO: The AArch64 backend is producing an illegal instruction for b64x2. See: #3304 function %vany_true_b8x16(b8x16) -> b1 { block0(v0: b8x16):