Cranelift AArch64: Fix the VanyTrue implementation for 64-bit elements

Copyright (c) 2021, Arm Limited.
This commit is contained in:
Anton Kirilov
2021-09-10 17:23:24 +01:00
parent 144a0bfd83
commit 0fb3acfb94
4 changed files with 83 additions and 9 deletions

View File

@@ -2338,7 +2338,11 @@ impl MachInstEmit for Inst {
VecALUOp::Orr => (0b000_01110_10_1, 0b000111), VecALUOp::Orr => (0b000_01110_10_1, 0b000111),
VecALUOp::Eor => (0b001_01110_00_1, 0b000111), VecALUOp::Eor => (0b001_01110_00_1, 0b000111),
VecALUOp::Bsl => (0b001_01110_01_1, 0b000111), VecALUOp::Bsl => (0b001_01110_01_1, 0b000111),
VecALUOp::Umaxp => (0b001_01110_00_1 | enc_size << 1, 0b101001), VecALUOp::Umaxp => {
debug_assert_ne!(size, VectorSize::Size64x2);
(0b001_01110_00_1 | enc_size << 1, 0b101001)
}
VecALUOp::Add => (0b000_01110_00_1 | enc_size << 1, 0b100001), VecALUOp::Add => (0b000_01110_00_1 | enc_size << 1, 0b100001),
VecALUOp::Sub => (0b001_01110_00_1 | enc_size << 1, 0b100001), VecALUOp::Sub => (0b001_01110_00_1 | enc_size << 1, 0b100001),
VecALUOp::Mul => { VecALUOp::Mul => {

View File

@@ -2397,7 +2397,15 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
// cmp xm, #0 // cmp xm, #0
// cset xm, ne // cset xm, ne
let size = VectorSize::from_ty(ctx.input_ty(insn, 0)); let s = VectorSize::from_ty(src_ty);
let size = if s == VectorSize::Size64x2 {
// `vall_true` with 64-bit elements is handled elsewhere.
debug_assert_ne!(op, Opcode::VallTrue);
VectorSize::Size32x4
} else {
s
};
if op == Opcode::VanyTrue { if op == Opcode::VanyTrue {
ctx.emit(Inst::VecRRR { ctx.emit(Inst::VecRRR {

View File

@@ -3,8 +3,6 @@ target aarch64
; target s390x TODO: Not yet implemented on s390x ; target s390x TODO: Not yet implemented on s390x
set enable_simd set enable_simd
target x86_64 machinst target x86_64 machinst
set enable_simd
target x86_64 legacy skylake
function %bnot() -> b32 { function %bnot() -> b32 {
block0: block0:
@@ -26,13 +24,77 @@ block0:
} }
; run ; run
function %vany_true_i16x8() -> b1 { function %vany_true_i8x16() -> b1, b1 {
block0: block0:
v0 = vconst.i16x8 [1 0 0 0 0 0 0 0] v0 = vconst.i8x16 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
v1 = vany_true v0 v1 = vany_true v0
return v1
v2 = vconst.i8x16 [0 0 0 1 0 0 0 0 0 0 42 0 0 0 0 0]
v3 = vany_true v2
return v1, v3
} }
; run ; run: %vany_true_i8x16() == [false, true]
function %vany_true_i16x8() -> b1, b1 {
block0:
v0 = vconst.i16x8 [0 0 0 0 0 0 0 0]
v1 = vany_true v0
v2 = vconst.i16x8 [0 0 42 0 0 0 0 0]
v3 = vany_true v2
return v1, v3
}
; run: %vany_true_i16x8() == [false, true]
function %vany_true_i32x4() -> b1, b1 {
block0:
v0 = vconst.i32x4 [0 0 0 0]
v1 = vany_true v0
v2 = vconst.i32x4 [0 42 0 0]
v3 = vany_true v2
return v1, v3
}
; run: %vany_true_i32x4() == [false, true]
function %vany_true_i64x2() -> b1, b1 {
block0:
v0 = vconst.i64x2 [0 0]
v1 = vany_true v0
v2 = vconst.i64x2 [0 1]
v3 = vany_true v2
return v1, v3
}
; run: %vany_true_i64x2() == [false, true]
function %vany_true_f32x4() -> b1, b1 {
block0:
v0 = vconst.f32x4 [0.0 0.0 0.0 0.0]
v1 = vany_true v0
v2 = vconst.f32x4 [0.0 0x4.2 0.0 0.0]
v3 = vany_true v2
return v1, v3
}
; run: %vany_true_f32x4() == [false, true]
function %vany_true_f64x2() -> b1, b1 {
block0:
v0 = vconst.f64x2 [0.0 0.0]
v1 = vany_true v0
v2 = vconst.f64x2 [0.0 0x1.0]
v3 = vany_true v2
return v1, v3
}
; run: %vany_true_f64x2() == [false, true]
function %vany_true_b32x4() -> b1 { function %vany_true_b32x4() -> b1 {
block0: block0:

View File

@@ -1,7 +1,7 @@
test interpret test interpret
test run test run
target aarch64
target x86_64 machinst target x86_64 machinst
; TODO: The AArch64 backend is producing an illegal instruction for b64x2. See: #3304
function %vany_true_b8x16(b8x16) -> b1 { function %vany_true_b8x16(b8x16) -> b1 {
block0(v0: b8x16): block0(v0: b8x16):