From 95b0b05af283ded8a6130856c4d385ea49833ef1 Mon Sep 17 00:00:00 2001 From: Anton Kirilov Date: Fri, 19 Jun 2020 01:00:47 +0100 Subject: [PATCH] AArch64: Introduce an enum to specify vector instruction operand sizes Copyright (c) 2020, Arm Limited. --- .../codegen/src/isa/aarch64/inst/args.rs | 53 ++++ .../codegen/src/isa/aarch64/inst/emit.rs | 133 ++++------ .../src/isa/aarch64/inst/emit_tests.rs | 250 +++++++++--------- cranelift/codegen/src/isa/aarch64/inst/mod.rs | 188 ++++++------- .../codegen/src/isa/aarch64/inst/regs.rs | 50 ++-- cranelift/codegen/src/isa/aarch64/lower.rs | 17 +- .../codegen/src/isa/aarch64/lower_inst.rs | 71 +++-- 7 files changed, 374 insertions(+), 388 deletions(-) diff --git a/cranelift/codegen/src/isa/aarch64/inst/args.rs b/cranelift/codegen/src/isa/aarch64/inst/args.rs index 6bbd618685..43e8471ac7 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/args.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/args.rs @@ -3,6 +3,7 @@ // Some variants are never constructed, but we still want them as options in the future. #![allow(dead_code)] +use crate::ir::types::{F32X2, F32X4, F64X2, I16X4, I16X8, I32X2, I32X4, I64X2, I8X16, I8X8}; use crate::ir::Type; use crate::isa::aarch64::inst::*; use crate::isa::aarch64::lower::ty_bits; @@ -587,3 +588,55 @@ impl ScalarSize { } } } + +/// Type used to communicate the size of a vector operand. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum VectorSize { + Size8x8, + Size8x16, + Size16x4, + Size16x8, + Size32x2, + Size32x4, + Size64x2, +} + +impl VectorSize { + /// Convert from a type into a vector operand size. + pub fn from_ty(ty: Type) -> VectorSize { + match ty { + F32X2 => VectorSize::Size32x2, + F32X4 => VectorSize::Size32x4, + F64X2 => VectorSize::Size64x2, + I8X8 => VectorSize::Size8x8, + I8X16 => VectorSize::Size8x16, + I16X4 => VectorSize::Size16x4, + I16X8 => VectorSize::Size16x8, + I32X2 => VectorSize::Size32x2, + I32X4 => VectorSize::Size32x4, + I64X2 => VectorSize::Size64x2, + _ => unimplemented!(), + } + } + + /// Get the integer operand size that corresponds to a lane of a vector with a certain size. + pub fn operand_size(&self) -> OperandSize { + match self { + VectorSize::Size64x2 => OperandSize::Size64, + _ => OperandSize::Size32, + } + } + + /// Get the scalar operand size that corresponds to a lane of a vector with a certain size. + pub fn lane_size(&self) -> ScalarSize { + match self { + VectorSize::Size8x8 => ScalarSize::Size8, + VectorSize::Size8x16 => ScalarSize::Size8, + VectorSize::Size16x4 => ScalarSize::Size16, + VectorSize::Size16x8 => ScalarSize::Size16, + VectorSize::Size32x2 => ScalarSize::Size32, + VectorSize::Size32x4 => ScalarSize::Size32, + VectorSize::Size64x2 => ScalarSize::Size64, + } + } +} diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index 9fc952f644..f12205dbd4 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -1007,7 +1007,7 @@ impl MachInstEmit for Inst { sink.put4(enc_vecmov(/* 16b = */ true, rd, rn)); } &Inst::FpuMoveFromVec { rd, rn, idx, size } => { - let (imm5, shift, mask) = match size { + let (imm5, shift, mask) = match size.lane_size() { ScalarSize::Size32 => (0b00100, 3, 0b011), ScalarSize::Size64 => (0b01000, 4, 0b001), _ => unimplemented!(), @@ -1048,6 +1048,10 @@ impl MachInstEmit for Inst { FPUOp2::Max64 => 0b000_11110_01_1_00000_010010, FPUOp2::Min32 => 0b000_11110_00_1_00000_010110, FPUOp2::Min64 => 0b000_11110_01_1_00000_010110, + FPUOp2::Sqadd64 => 0b010_11110_11_1_00000_000011, + FPUOp2::Uqadd64 => 0b011_11110_11_1_00000_000011, + FPUOp2::Sqsub64 => 0b010_11110_11_1_00000_001011, + FPUOp2::Uqsub64 => 0b011_11110_11_1_00000_001011, }; sink.put4(enc_fpurrr(top22, rd, rn, rm)); } @@ -1102,31 +1106,25 @@ impl MachInstEmit for Inst { }; sink.put4(enc_fpurrrr(top17, rd, rn, rm, ra)); } - &Inst::VecMisc { op, rd, rn, ty } => { - let enc_size = match ty { - I8X16 => 0b00, - I16X8 => 0b01, - I32X4 => 0b10, - I64X2 => 0b11, - _ => 0, + &Inst::VecMisc { op, rd, rn, size } => { + let enc_size = match size { + VectorSize::Size8x16 => 0b00, + VectorSize::Size16x8 => 0b01, + VectorSize::Size32x4 => 0b10, + VectorSize::Size64x2 => 0b11, + _ => unimplemented!(), }; let (bits_12_16, size) = match op { - VecMisc2::Not => { - debug_assert_eq!(128, ty_bits(ty)); - (0b00101, 0b00) - } - VecMisc2::Neg => { - debug_assert_eq!(128, ty_bits(ty)); - (0b01011, enc_size) - } + VecMisc2::Not => (0b00101, 0b00), + VecMisc2::Neg => (0b01011, enc_size), }; sink.put4(enc_vec_rr_misc(size, bits_12_16, rd, rn)); } - &Inst::VecLanes { op, rd, rn, ty } => { - let (q, size) = match ty { - I8X16 => (0b1, 0b00), - I16X8 => (0b1, 0b01), - I32X4 => (0b1, 0b10), + &Inst::VecLanes { op, rd, rn, size } => { + let (q, size) = match size { + VectorSize::Size8x16 => (0b1, 0b00), + VectorSize::Size16x8 => (0b1, 0b01), + VectorSize::Size32x4 => (0b1, 0b10), _ => unreachable!(), }; let (u, opcode) = match op { @@ -1250,12 +1248,12 @@ impl MachInstEmit for Inst { | machreg_to_vec(rd.to_reg()), ); } - &Inst::MovFromVec { rd, rn, idx, ty } => { - let (q, imm5, shift, mask) = match ty { - I8 => (0b0, 0b00001, 1, 0b1111), - I16 => (0b0, 0b00010, 2, 0b0111), - I32 => (0b0, 0b00100, 3, 0b0011), - I64 => (0b1, 0b01000, 4, 0b0001), + &Inst::MovFromVec { rd, rn, idx, size } => { + let (q, imm5, shift, mask) = match size { + VectorSize::Size8x16 => (0b0, 0b00001, 1, 0b1111), + VectorSize::Size16x8 => (0b0, 0b00010, 2, 0b0111), + VectorSize::Size32x4 => (0b0, 0b00100, 3, 0b0011), + VectorSize::Size64x2 => (0b1, 0b01000, 4, 0b0001), _ => unreachable!(), }; debug_assert_eq!(idx & mask, idx); @@ -1268,12 +1266,12 @@ impl MachInstEmit for Inst { | machreg_to_gpr(rd.to_reg()), ); } - &Inst::VecDup { rd, rn, ty } => { - let imm5 = match ty { - I8 => 0b00001, - I16 => 0b00010, - I32 => 0b00100, - I64 => 0b01000, + &Inst::VecDup { rd, rn, size } => { + let imm5 = match size { + VectorSize::Size8x16 => 0b00001, + VectorSize::Size16x8 => 0b00010, + VectorSize::Size32x4 => 0b00100, + VectorSize::Size64x2 => 0b01000, _ => unimplemented!(), }; sink.put4( @@ -1283,10 +1281,10 @@ impl MachInstEmit for Inst { | machreg_to_vec(rd.to_reg()), ); } - &Inst::VecDupFromFpu { rd, rn, ty } => { - let imm5 = match ty { - F32 => 0b00100, - F64 => 0b01000, + &Inst::VecDupFromFpu { rd, rn, size } => { + let imm5 = match size { + VectorSize::Size32x4 => 0b00100, + VectorSize::Size64x2 => 0b01000, _ => unimplemented!(), }; sink.put4( @@ -1318,41 +1316,25 @@ impl MachInstEmit for Inst { rn, rm, alu_op, - ty, + size, } => { - let enc_size = match ty { - I8X16 => 0b00, - I16X8 => 0b01, - I32X4 => 0b10, - I64X2 => 0b11, + let enc_size = match size { + VectorSize::Size8x16 => 0b00, + VectorSize::Size16x8 => 0b01, + VectorSize::Size32x4 => 0b10, + VectorSize::Size64x2 => 0b11, _ => 0, }; - let enc_size_for_fcmp = match ty { - F32X4 => 0b0, - F64X2 => 0b1, + let enc_size_for_fcmp = match size { + VectorSize::Size32x4 => 0b0, + VectorSize::Size64x2 => 0b1, _ => 0, }; let (top11, bit15_10) = match alu_op { - VecALUOp::SQAddScalar => { - debug_assert_eq!(I64, ty); - (0b010_11110_11_1, 0b000011) - } VecALUOp::Sqadd => (0b010_01110_00_1 | enc_size << 1, 0b000011), - VecALUOp::SQSubScalar => { - debug_assert_eq!(I64, ty); - (0b010_11110_11_1, 0b001011) - } VecALUOp::Sqsub => (0b010_01110_00_1 | enc_size << 1, 0b001011), - VecALUOp::UQAddScalar => { - debug_assert_eq!(I64, ty); - (0b011_11110_11_1, 0b000011) - } VecALUOp::Uqadd => (0b011_01110_00_1 | enc_size << 1, 0b000011), - VecALUOp::UQSubScalar => { - debug_assert_eq!(I64, ty); - (0b011_11110_11_1, 0b001011) - } VecALUOp::Uqsub => (0b011_01110_00_1 | enc_size << 1, 0b001011), VecALUOp::Cmeq => (0b011_01110_00_1 | enc_size << 1, 0b100011), VecALUOp::Cmge => (0b010_01110_00_1 | enc_size << 1, 0b001111), @@ -1364,31 +1346,16 @@ impl MachInstEmit for Inst { VecALUOp::Fcmge => (0b011_01110_00_1 | enc_size_for_fcmp << 1, 0b111001), // The following logical instructions operate on bytes, so are not encoded differently // for the different vector types. - VecALUOp::And => { - debug_assert_eq!(128, ty_bits(ty)); - (0b010_01110_00_1, 0b000111) - } - VecALUOp::Bic => { - debug_assert_eq!(128, ty_bits(ty)); - (0b010_01110_01_1, 0b000111) - } - VecALUOp::Orr => { - debug_assert_eq!(128, ty_bits(ty)); - (0b010_01110_10_1, 0b000111) - } - VecALUOp::Eor => { - debug_assert_eq!(128, ty_bits(ty)); - (0b011_01110_00_1, 0b000111) - } - VecALUOp::Bsl => { - debug_assert_eq!(128, ty_bits(ty)); - (0b011_01110_01_1, 0b000111) - } + VecALUOp::And => (0b010_01110_00_1, 0b000111), + VecALUOp::Bic => (0b010_01110_01_1, 0b000111), + VecALUOp::Orr => (0b010_01110_10_1, 0b000111), + VecALUOp::Eor => (0b011_01110_00_1, 0b000111), + VecALUOp::Bsl => (0b011_01110_01_1, 0b000111), VecALUOp::Umaxp => (0b011_01110_00_1 | enc_size << 1, 0b101001), VecALUOp::Add => (0b010_01110_00_1 | enc_size << 1, 0b100001), VecALUOp::Sub => (0b011_01110_00_1 | enc_size << 1, 0b100001), VecALUOp::Mul => { - debug_assert_ne!(I64X2, ty); + debug_assert_ne!(size, VectorSize::Size64x2); (0b010_01110_00_1 | enc_size << 1, 0b100111) } VecALUOp::Sshl => (0b010_01110_00_1 | enc_size << 1, 0b010001), diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs index e8148dbe41..29e3036e16 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs @@ -1841,7 +1841,7 @@ fn test_aarch64_binemit() { rd: writable_xreg(3), rn: vreg(27), idx: 14, - ty: I8, + size: VectorSize::Size8x16, }, "633F1D0E", "umov w3, v27.b[14]", @@ -1851,7 +1851,7 @@ fn test_aarch64_binemit() { rd: writable_xreg(24), rn: vreg(5), idx: 3, - ty: I16, + size: VectorSize::Size16x8, }, "B83C0E0E", "umov w24, v5.h[3]", @@ -1861,7 +1861,7 @@ fn test_aarch64_binemit() { rd: writable_xreg(12), rn: vreg(17), idx: 1, - ty: I32, + size: VectorSize::Size32x4, }, "2C3E0C0E", "mov w12, v17.s[1]", @@ -1871,7 +1871,7 @@ fn test_aarch64_binemit() { rd: writable_xreg(21), rn: vreg(20), idx: 0, - ty: I64, + size: VectorSize::Size64x2, }, "953E084E", "mov x21, v20.d[0]", @@ -1900,7 +1900,7 @@ fn test_aarch64_binemit() { Inst::VecDup { rd: writable_vreg(25), rn: xreg(7), - ty: I8, + size: VectorSize::Size8x16, }, "F90C014E", "dup v25.16b, w7", @@ -1909,7 +1909,7 @@ fn test_aarch64_binemit() { Inst::VecDup { rd: writable_vreg(2), rn: xreg(23), - ty: I16, + size: VectorSize::Size16x8, }, "E20E024E", "dup v2.8h, w23", @@ -1918,7 +1918,7 @@ fn test_aarch64_binemit() { Inst::VecDup { rd: writable_vreg(0), rn: xreg(28), - ty: I32, + size: VectorSize::Size32x4, }, "800F044E", "dup v0.4s, w28", @@ -1927,7 +1927,7 @@ fn test_aarch64_binemit() { Inst::VecDup { rd: writable_vreg(31), rn: xreg(5), - ty: I64, + size: VectorSize::Size64x2, }, "BF0C084E", "dup v31.2d, x5", @@ -1936,7 +1936,7 @@ fn test_aarch64_binemit() { Inst::VecDupFromFpu { rd: writable_vreg(14), rn: vreg(19), - ty: F32, + size: VectorSize::Size32x4, }, "6E06044E", "dup v14.4s, v19.s[0]", @@ -1945,7 +1945,7 @@ fn test_aarch64_binemit() { Inst::VecDupFromFpu { rd: writable_vreg(18), rn: vreg(10), - ty: F64, + size: VectorSize::Size64x2, }, "5205084E", "dup v18.2d, v10.d[0]", @@ -2004,50 +2004,6 @@ fn test_aarch64_binemit() { "5CA4202F", "uxtl v28.2d, v2.2s", )); - insns.push(( - Inst::VecRRR { - rd: writable_vreg(21), - rn: vreg(22), - rm: vreg(23), - alu_op: VecALUOp::UQAddScalar, - ty: I64, - }, - "D50EF77E", - "uqadd d21, d22, d23", - )); - insns.push(( - Inst::VecRRR { - rd: writable_vreg(21), - rn: vreg(22), - rm: vreg(23), - alu_op: VecALUOp::SQAddScalar, - ty: I64, - }, - "D50EF75E", - "sqadd d21, d22, d23", - )); - insns.push(( - Inst::VecRRR { - rd: writable_vreg(21), - rn: vreg(22), - rm: vreg(23), - alu_op: VecALUOp::UQSubScalar, - ty: I64, - }, - "D52EF77E", - "uqsub d21, d22, d23", - )); - insns.push(( - Inst::VecRRR { - rd: writable_vreg(21), - rn: vreg(22), - rm: vreg(23), - alu_op: VecALUOp::SQSubScalar, - ty: I64, - }, - "D52EF75E", - "sqsub d21, d22, d23", - )); insns.push(( Inst::VecRRR { @@ -2055,7 +2011,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(1), rn: vreg(2), rm: vreg(8), - ty: I8X16, + size: VectorSize::Size8x16, }, "410C284E", "sqadd v1.16b, v2.16b, v8.16b", @@ -2067,7 +2023,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(1), rn: vreg(12), rm: vreg(28), - ty: I16X8, + size: VectorSize::Size16x8, }, "810D7C4E", "sqadd v1.8h, v12.8h, v28.8h", @@ -2079,7 +2035,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(12), rn: vreg(2), rm: vreg(6), - ty: I32X4, + size: VectorSize::Size32x4, }, "4C0CA64E", "sqadd v12.4s, v2.4s, v6.4s", @@ -2091,7 +2047,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(20), rn: vreg(7), rm: vreg(13), - ty: I64X2, + size: VectorSize::Size64x2, }, "F40CED4E", "sqadd v20.2d, v7.2d, v13.2d", @@ -2103,7 +2059,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(1), rn: vreg(2), rm: vreg(8), - ty: I8X16, + size: VectorSize::Size8x16, }, "412C284E", "sqsub v1.16b, v2.16b, v8.16b", @@ -2115,7 +2071,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(1), rn: vreg(12), rm: vreg(28), - ty: I16X8, + size: VectorSize::Size16x8, }, "812D7C4E", "sqsub v1.8h, v12.8h, v28.8h", @@ -2127,7 +2083,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(12), rn: vreg(2), rm: vreg(6), - ty: I32X4, + size: VectorSize::Size32x4, }, "4C2CA64E", "sqsub v12.4s, v2.4s, v6.4s", @@ -2139,7 +2095,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(20), rn: vreg(7), rm: vreg(13), - ty: I64X2, + size: VectorSize::Size64x2, }, "F42CED4E", "sqsub v20.2d, v7.2d, v13.2d", @@ -2151,7 +2107,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(1), rn: vreg(2), rm: vreg(8), - ty: I8X16, + size: VectorSize::Size8x16, }, "410C286E", "uqadd v1.16b, v2.16b, v8.16b", @@ -2163,7 +2119,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(1), rn: vreg(12), rm: vreg(28), - ty: I16X8, + size: VectorSize::Size16x8, }, "810D7C6E", "uqadd v1.8h, v12.8h, v28.8h", @@ -2175,7 +2131,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(12), rn: vreg(2), rm: vreg(6), - ty: I32X4, + size: VectorSize::Size32x4, }, "4C0CA66E", "uqadd v12.4s, v2.4s, v6.4s", @@ -2187,7 +2143,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(20), rn: vreg(7), rm: vreg(13), - ty: I64X2, + size: VectorSize::Size64x2, }, "F40CED6E", "uqadd v20.2d, v7.2d, v13.2d", @@ -2199,7 +2155,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(1), rn: vreg(2), rm: vreg(8), - ty: I8X16, + size: VectorSize::Size8x16, }, "412C286E", "uqsub v1.16b, v2.16b, v8.16b", @@ -2211,7 +2167,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(1), rn: vreg(12), rm: vreg(28), - ty: I16X8, + size: VectorSize::Size16x8, }, "812D7C6E", "uqsub v1.8h, v12.8h, v28.8h", @@ -2223,7 +2179,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(12), rn: vreg(2), rm: vreg(6), - ty: I32X4, + size: VectorSize::Size32x4, }, "4C2CA66E", "uqsub v12.4s, v2.4s, v6.4s", @@ -2235,7 +2191,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(20), rn: vreg(7), rm: vreg(13), - ty: I64X2, + size: VectorSize::Size64x2, }, "F42CED6E", "uqsub v20.2d, v7.2d, v13.2d", @@ -2247,7 +2203,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(3), rn: vreg(23), rm: vreg(24), - ty: I8X16, + size: VectorSize::Size8x16, }, "E38E386E", "cmeq v3.16b, v23.16b, v24.16b", @@ -2259,7 +2215,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(3), rn: vreg(23), rm: vreg(24), - ty: I8X16, + size: VectorSize::Size8x16, }, "E336384E", "cmgt v3.16b, v23.16b, v24.16b", @@ -2271,7 +2227,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(23), rn: vreg(9), rm: vreg(12), - ty: I8X16, + size: VectorSize::Size8x16, }, "373D2C4E", "cmge v23.16b, v9.16b, v12.16b", @@ -2283,7 +2239,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(5), rn: vreg(1), rm: vreg(1), - ty: I8X16, + size: VectorSize::Size8x16, }, "2534216E", "cmhi v5.16b, v1.16b, v1.16b", @@ -2295,7 +2251,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(8), rn: vreg(2), rm: vreg(15), - ty: I8X16, + size: VectorSize::Size8x16, }, "483C2F6E", "cmhs v8.16b, v2.16b, v15.16b", @@ -2307,7 +2263,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(3), rn: vreg(23), rm: vreg(24), - ty: I16X8, + size: VectorSize::Size16x8, }, "E38E786E", "cmeq v3.8h, v23.8h, v24.8h", @@ -2319,7 +2275,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(3), rn: vreg(23), rm: vreg(24), - ty: I16X8, + size: VectorSize::Size16x8, }, "E336784E", "cmgt v3.8h, v23.8h, v24.8h", @@ -2331,7 +2287,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(23), rn: vreg(9), rm: vreg(12), - ty: I16X8, + size: VectorSize::Size16x8, }, "373D6C4E", "cmge v23.8h, v9.8h, v12.8h", @@ -2343,7 +2299,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(5), rn: vreg(1), rm: vreg(1), - ty: I16X8, + size: VectorSize::Size16x8, }, "2534616E", "cmhi v5.8h, v1.8h, v1.8h", @@ -2355,7 +2311,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(8), rn: vreg(2), rm: vreg(15), - ty: I16X8, + size: VectorSize::Size16x8, }, "483C6F6E", "cmhs v8.8h, v2.8h, v15.8h", @@ -2367,7 +2323,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(3), rn: vreg(23), rm: vreg(24), - ty: I32X4, + size: VectorSize::Size32x4, }, "E38EB86E", "cmeq v3.4s, v23.4s, v24.4s", @@ -2379,7 +2335,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(3), rn: vreg(23), rm: vreg(24), - ty: I32X4, + size: VectorSize::Size32x4, }, "E336B84E", "cmgt v3.4s, v23.4s, v24.4s", @@ -2391,7 +2347,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(23), rn: vreg(9), rm: vreg(12), - ty: I32X4, + size: VectorSize::Size32x4, }, "373DAC4E", "cmge v23.4s, v9.4s, v12.4s", @@ -2403,7 +2359,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(5), rn: vreg(1), rm: vreg(1), - ty: I32X4, + size: VectorSize::Size32x4, }, "2534A16E", "cmhi v5.4s, v1.4s, v1.4s", @@ -2415,7 +2371,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(8), rn: vreg(2), rm: vreg(15), - ty: I32X4, + size: VectorSize::Size32x4, }, "483CAF6E", "cmhs v8.4s, v2.4s, v15.4s", @@ -2427,7 +2383,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(28), rn: vreg(12), rm: vreg(4), - ty: F32X4, + size: VectorSize::Size32x4, }, "9CE5244E", "fcmeq v28.4s, v12.4s, v4.4s", @@ -2439,7 +2395,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(3), rn: vreg(16), rm: vreg(31), - ty: F64X2, + size: VectorSize::Size64x2, }, "03E6FF6E", "fcmgt v3.2d, v16.2d, v31.2d", @@ -2451,7 +2407,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(18), rn: vreg(23), rm: vreg(0), - ty: F64X2, + size: VectorSize::Size64x2, }, "F2E6606E", "fcmge v18.2d, v23.2d, v0.2d", @@ -2463,7 +2419,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(20), rn: vreg(19), rm: vreg(18), - ty: I32X4, + size: VectorSize::Size32x4, }, "741E324E", "and v20.16b, v19.16b, v18.16b", @@ -2475,7 +2431,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(8), rn: vreg(11), rm: vreg(1), - ty: I8X16, + size: VectorSize::Size8x16, }, "681D614E", "bic v8.16b, v11.16b, v1.16b", @@ -2487,7 +2443,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(15), rn: vreg(2), rm: vreg(12), - ty: I16X8, + size: VectorSize::Size16x8, }, "4F1CAC4E", "orr v15.16b, v2.16b, v12.16b", @@ -2499,7 +2455,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(18), rn: vreg(3), rm: vreg(22), - ty: I8X16, + size: VectorSize::Size8x16, }, "721C366E", "eor v18.16b, v3.16b, v22.16b", @@ -2511,7 +2467,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(8), rn: vreg(9), rm: vreg(1), - ty: I8X16, + size: VectorSize::Size8x16, }, "281D616E", "bsl v8.16b, v9.16b, v1.16b", @@ -2523,7 +2479,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(8), rn: vreg(12), rm: vreg(1), - ty: I8X16, + size: VectorSize::Size8x16, }, "88A5216E", "umaxp v8.16b, v12.16b, v1.16b", @@ -2535,7 +2491,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(1), rn: vreg(6), rm: vreg(1), - ty: I16X8, + size: VectorSize::Size16x8, }, "C1A4616E", "umaxp v1.8h, v6.8h, v1.8h", @@ -2547,7 +2503,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(1), rn: vreg(20), rm: vreg(16), - ty: I32X4, + size: VectorSize::Size32x4, }, "81A6B06E", "umaxp v1.4s, v20.4s, v16.4s", @@ -2559,7 +2515,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(5), rn: vreg(1), rm: vreg(1), - ty: I8X16, + size: VectorSize::Size8x16, }, "2584214E", "add v5.16b, v1.16b, v1.16b", @@ -2571,7 +2527,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(7), rn: vreg(13), rm: vreg(2), - ty: I16X8, + size: VectorSize::Size16x8, }, "A785624E", "add v7.8h, v13.8h, v2.8h", @@ -2583,7 +2539,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(18), rn: vreg(9), rm: vreg(6), - ty: I32X4, + size: VectorSize::Size32x4, }, "3285A64E", "add v18.4s, v9.4s, v6.4s", @@ -2595,7 +2551,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(1), rn: vreg(3), rm: vreg(2), - ty: I64X2, + size: VectorSize::Size64x2, }, "6184E24E", "add v1.2d, v3.2d, v2.2d", @@ -2607,7 +2563,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(5), rn: vreg(1), rm: vreg(1), - ty: I8X16, + size: VectorSize::Size8x16, }, "2584216E", "sub v5.16b, v1.16b, v1.16b", @@ -2619,7 +2575,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(7), rn: vreg(13), rm: vreg(2), - ty: I16X8, + size: VectorSize::Size16x8, }, "A785626E", "sub v7.8h, v13.8h, v2.8h", @@ -2631,7 +2587,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(18), rn: vreg(9), rm: vreg(6), - ty: I32X4, + size: VectorSize::Size32x4, }, "3285A66E", "sub v18.4s, v9.4s, v6.4s", @@ -2643,7 +2599,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(18), rn: vreg(0), rm: vreg(8), - ty: I64X2, + size: VectorSize::Size64x2, }, "1284E86E", "sub v18.2d, v0.2d, v8.2d", @@ -2655,7 +2611,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(25), rn: vreg(9), rm: vreg(8), - ty: I8X16, + size: VectorSize::Size8x16, }, "399D284E", "mul v25.16b, v9.16b, v8.16b", @@ -2667,7 +2623,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(30), rn: vreg(30), rm: vreg(12), - ty: I16X8, + size: VectorSize::Size16x8, }, "DE9F6C4E", "mul v30.8h, v30.8h, v12.8h", @@ -2679,7 +2635,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(18), rn: vreg(18), rm: vreg(18), - ty: I32X4, + size: VectorSize::Size32x4, }, "529EB24E", "mul v18.4s, v18.4s, v18.4s", @@ -2691,7 +2647,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(18), rn: vreg(18), rm: vreg(18), - ty: I8X16, + size: VectorSize::Size8x16, }, "5246326E", "ushl v18.16b, v18.16b, v18.16b", @@ -2703,7 +2659,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(18), rn: vreg(18), rm: vreg(18), - ty: I16X8, + size: VectorSize::Size16x8, }, "5246726E", "ushl v18.8h, v18.8h, v18.8h", @@ -2715,7 +2671,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(18), rn: vreg(1), rm: vreg(21), - ty: I32X4, + size: VectorSize::Size32x4, }, "3244B56E", "ushl v18.4s, v1.4s, v21.4s", @@ -2727,7 +2683,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(5), rn: vreg(7), rm: vreg(19), - ty: I64X2, + size: VectorSize::Size64x2, }, "E544F36E", "ushl v5.2d, v7.2d, v19.2d", @@ -2739,7 +2695,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(18), rn: vreg(18), rm: vreg(18), - ty: I8X16, + size: VectorSize::Size8x16, }, "5246324E", "sshl v18.16b, v18.16b, v18.16b", @@ -2751,7 +2707,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(30), rn: vreg(1), rm: vreg(29), - ty: I16X8, + size: VectorSize::Size16x8, }, "3E447D4E", "sshl v30.8h, v1.8h, v29.8h", @@ -2763,7 +2719,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(8), rn: vreg(22), rm: vreg(21), - ty: I32X4, + size: VectorSize::Size32x4, }, "C846B54E", "sshl v8.4s, v22.4s, v21.4s", @@ -2775,7 +2731,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(8), rn: vreg(22), rm: vreg(2), - ty: I64X2, + size: VectorSize::Size64x2, }, "C846E24E", "sshl v8.2d, v22.2d, v2.2d", @@ -2786,7 +2742,7 @@ fn test_aarch64_binemit() { op: VecMisc2::Not, rd: writable_vreg(2), rn: vreg(1), - ty: I32X4, + size: VectorSize::Size32x4, }, "2258206E", "mvn v2.16b, v1.16b", @@ -2797,7 +2753,7 @@ fn test_aarch64_binemit() { op: VecMisc2::Neg, rd: writable_vreg(8), rn: vreg(12), - ty: I8X16, + size: VectorSize::Size8x16, }, "88B9206E", "neg v8.16b, v12.16b", @@ -2808,7 +2764,7 @@ fn test_aarch64_binemit() { op: VecMisc2::Neg, rd: writable_vreg(0), rn: vreg(31), - ty: I16X8, + size: VectorSize::Size16x8, }, "E0BB606E", "neg v0.8h, v31.8h", @@ -2819,7 +2775,7 @@ fn test_aarch64_binemit() { op: VecMisc2::Neg, rd: writable_vreg(2), rn: vreg(3), - ty: I32X4, + size: VectorSize::Size32x4, }, "62B8A06E", "neg v2.4s, v3.4s", @@ -2830,7 +2786,7 @@ fn test_aarch64_binemit() { op: VecMisc2::Neg, rd: writable_vreg(10), rn: vreg(8), - ty: I64X2, + size: VectorSize::Size64x2, }, "0AB9E06E", "neg v10.2d, v8.2d", @@ -2841,7 +2797,7 @@ fn test_aarch64_binemit() { op: VecLanesOp::Uminv, rd: writable_vreg(2), rn: vreg(1), - ty: I8X16, + size: VectorSize::Size8x16, }, "22A8316E", "uminv b2, v1.16b", @@ -2852,7 +2808,7 @@ fn test_aarch64_binemit() { op: VecLanesOp::Uminv, rd: writable_vreg(3), rn: vreg(11), - ty: I16X8, + size: VectorSize::Size16x8, }, "63A9716E", "uminv h3, v11.8h", @@ -2863,7 +2819,7 @@ fn test_aarch64_binemit() { op: VecLanesOp::Uminv, rd: writable_vreg(18), rn: vreg(4), - ty: I32X4, + size: VectorSize::Size32x4, }, "92A8B16E", "uminv s18, v4.4s", @@ -3214,7 +3170,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(1), rn: vreg(30), idx: 2, - size: ScalarSize::Size32, + size: VectorSize::Size32x4, }, "C107145E", "mov s1, v30.s[2]", @@ -3225,7 +3181,7 @@ fn test_aarch64_binemit() { rd: writable_vreg(23), rn: vreg(11), idx: 0, - size: ScalarSize::Size64, + size: VectorSize::Size64x2, }, "7705085E", "mov d23, v11.d[0]", @@ -3443,6 +3399,50 @@ fn test_aarch64_binemit() { "fmin d15, d30, d31", )); + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Uqadd64, + rd: writable_vreg(21), + rn: vreg(22), + rm: vreg(23), + }, + "D50EF77E", + "uqadd d21, d22, d23", + )); + + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Sqadd64, + rd: writable_vreg(21), + rn: vreg(22), + rm: vreg(23), + }, + "D50EF75E", + "sqadd d21, d22, d23", + )); + + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Uqsub64, + rd: writable_vreg(21), + rn: vreg(22), + rm: vreg(23), + }, + "D52EF77E", + "uqsub d21, d22, d23", + )); + + insns.push(( + Inst::FpuRRR { + fpu_op: FPUOp2::Sqsub64, + rd: writable_vreg(21), + rn: vreg(22), + rm: vreg(23), + }, + "D52EF75E", + "sqsub d21, d22, d23", + )); + insns.push(( Inst::FpuRRRR { fpu_op: FPUOp3::MAdd32, diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index 79a72c245c..1c5c6f9a1c 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -5,8 +5,8 @@ use crate::binemit::CodeOffset; use crate::ir::types::{ - B1, B16, B16X8, B32, B32X4, B64, B64X2, B8, B8X16, F32, F32X2, F32X4, F64, F64X2, FFLAGS, I16, - I16X4, I16X8, I32, I32X2, I32X4, I64, I64X2, I8, I8X16, I8X8, IFLAGS, R32, R64, + B1, B16, B16X8, B32, B32X4, B64, B64X2, B8, B8X16, F32, F32X4, F64, F64X2, FFLAGS, I16, I16X8, + I32, I32X4, I64, I64X2, I8, I8X16, IFLAGS, R32, R64, }; use crate::ir::{ExternalName, Opcode, SourceLoc, TrapCode, Type}; use crate::machinst::*; @@ -125,6 +125,14 @@ pub enum FPUOp2 { Max64, Min32, Min64, + /// Signed saturating add + Sqadd64, + /// Unsigned saturating add + Uqadd64, + /// Signed saturating subtract + Sqsub64, + /// Unsigned saturating subtract + Uqsub64, } /// A floating-point unit (FPU) operation with two args, a register and an immediate. @@ -208,16 +216,12 @@ pub enum VecExtendOp { #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] pub enum VecALUOp { /// Signed saturating add - SQAddScalar, Sqadd, /// Unsigned saturating add - UQAddScalar, Uqadd, /// Signed saturating subtract - SQSubScalar, Sqsub, /// Unsigned saturating subtract - UQSubScalar, Uqsub, /// Compare bitwise equal Cmeq, @@ -590,7 +594,7 @@ pub enum Inst { rd: Writable, rn: Reg, idx: u8, - size: ScalarSize, + size: VectorSize, }, /// 1-op FPU instruction. @@ -734,21 +738,21 @@ pub enum Inst { rd: Writable, rn: Reg, idx: u8, - ty: Type, + size: VectorSize, }, /// Duplicate general-purpose register to vector. VecDup { rd: Writable, rn: Reg, - ty: Type, + size: VectorSize, }, /// Duplicate scalar to vector. VecDupFromFpu { rd: Writable, rn: Reg, - ty: Type, + size: VectorSize, }, /// Vector extend. @@ -764,7 +768,7 @@ pub enum Inst { rd: Writable, rn: Reg, rm: Reg, - ty: Type, + size: VectorSize, }, /// Vector two register miscellaneous instruction. @@ -772,7 +776,7 @@ pub enum Inst { op: VecMisc2, rd: Writable, rn: Reg, - ty: Type, + size: VectorSize, }, /// Vector instruction across lanes. @@ -780,7 +784,7 @@ pub enum Inst { op: VecLanesOp, rd: Writable, rn: Reg, - ty: Type, + size: VectorSize, }, /// Move to the NZCV flags (actually a `MSR NZCV, Xn` insn). @@ -2504,13 +2508,8 @@ impl Inst { format!("mov {}.16b, {}.16b", rd, rn) } &Inst::FpuMoveFromVec { rd, rn, idx, size } => { - let vector_type = match size { - ScalarSize::Size32 => F32, - ScalarSize::Size64 => F64, - _ => unimplemented!(), - }; - let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size); - let rn = show_vreg_element(rn, mb_rru, idx, vector_type); + let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size.lane_size()); + let rn = show_vreg_element(rn, mb_rru, idx, size); format!("mov {}, {}", rd, rn) } &Inst::FpuRR { fpu_op, rd, rn } => { @@ -2542,6 +2541,10 @@ impl Inst { FPUOp2::Max64 => ("fmax", ScalarSize::Size64), FPUOp2::Min32 => ("fmin", ScalarSize::Size32), FPUOp2::Min64 => ("fmin", ScalarSize::Size64), + FPUOp2::Sqadd64 => ("sqadd", ScalarSize::Size64), + FPUOp2::Uqadd64 => ("uqadd", ScalarSize::Size64), + FPUOp2::Sqsub64 => ("sqsub", ScalarSize::Size64), + FPUOp2::Uqsub64 => ("uqsub", ScalarSize::Size64), }; let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size); let rn = show_vreg_scalar(rn, mb_rru, size); @@ -2557,7 +2560,7 @@ impl Inst { }; let show_vreg_fn: fn(Reg, Option<&RealRegUniverse>) -> String = if vector { - |reg, mb_rru| show_vreg_vector(reg, mb_rru, F32X2) + |reg, mb_rru| show_vreg_vector(reg, mb_rru, VectorSize::Size32x2) } else { |reg, mb_rru| show_vreg_scalar(reg, mb_rru, ScalarSize::Size64) }; @@ -2706,45 +2709,36 @@ impl Inst { let rn = rn.show_rru(mb_rru); format!("mov {}.d[0], {}", rd, rn) } - &Inst::MovFromVec { rd, rn, idx, ty } => { - let op = match ty { - I32 | I64 => "mov", - _ => "umov", + &Inst::MovFromVec { rd, rn, idx, size } => { + let op = match size { + VectorSize::Size8x16 => "umov", + VectorSize::Size16x8 => "umov", + VectorSize::Size32x4 => "mov", + VectorSize::Size64x2 => "mov", + _ => unimplemented!(), }; - let rd = show_ireg_sized(rd.to_reg(), mb_rru, OperandSize::from_ty(ty)); - let rn = show_vreg_element(rn, mb_rru, idx, ty); + let rd = show_ireg_sized(rd.to_reg(), mb_rru, size.operand_size()); + let rn = show_vreg_element(rn, mb_rru, idx, size); format!("{} {}, {}", op, rd, rn) } - &Inst::VecDup { rd, rn, ty } => { - let vector_type = match ty { - I8 => I8X16, - I16 => I16X8, - I32 => I32X4, - I64 => I64X2, - _ => unimplemented!(), - }; - let rd = show_vreg_vector(rd.to_reg(), mb_rru, vector_type); - let rn = show_ireg_sized(rn, mb_rru, OperandSize::from_ty(ty)); + &Inst::VecDup { rd, rn, size } => { + let rd = show_vreg_vector(rd.to_reg(), mb_rru, size); + let rn = show_ireg_sized(rn, mb_rru, size.operand_size()); format!("dup {}, {}", rd, rn) } - &Inst::VecDupFromFpu { rd, rn, ty } => { - let vector_type = match ty { - F32 => F32X4, - F64 => F64X2, - _ => unimplemented!(), - }; - let rd = show_vreg_vector(rd.to_reg(), mb_rru, vector_type); - let rn = show_vreg_element(rn, mb_rru, 0, ty); + &Inst::VecDupFromFpu { rd, rn, size } => { + let rd = show_vreg_vector(rd.to_reg(), mb_rru, size); + let rn = show_vreg_element(rn, mb_rru, 0, size); format!("dup {}, {}", rd, rn) } &Inst::VecExtend { t, rd, rn } => { let (op, dest, src) = match t { - VecExtendOp::Sxtl8 => ("sxtl", I16X8, I8X8), - VecExtendOp::Sxtl16 => ("sxtl", I32X4, I16X4), - VecExtendOp::Sxtl32 => ("sxtl", I64X2, I32X2), - VecExtendOp::Uxtl8 => ("uxtl", I16X8, I8X8), - VecExtendOp::Uxtl16 => ("uxtl", I32X4, I16X4), - VecExtendOp::Uxtl32 => ("uxtl", I64X2, I32X2), + VecExtendOp::Sxtl8 => ("sxtl", VectorSize::Size16x8, VectorSize::Size8x8), + VecExtendOp::Sxtl16 => ("sxtl", VectorSize::Size32x4, VectorSize::Size16x4), + VecExtendOp::Sxtl32 => ("sxtl", VectorSize::Size64x2, VectorSize::Size32x2), + VecExtendOp::Uxtl8 => ("uxtl", VectorSize::Size16x8, VectorSize::Size8x8), + VecExtendOp::Uxtl16 => ("uxtl", VectorSize::Size32x4, VectorSize::Size16x4), + VecExtendOp::Uxtl32 => ("uxtl", VectorSize::Size64x2, VectorSize::Size32x2), }; let rd = show_vreg_vector(rd.to_reg(), mb_rru, dest); let rn = show_vreg_vector(rn, mb_rru, src); @@ -2755,72 +2749,54 @@ impl Inst { rn, rm, alu_op, - ty, + size, } => { - let (op, vector, ty) = match alu_op { - VecALUOp::SQAddScalar => ("sqadd", false, ty), - VecALUOp::Sqadd => ("sqadd", true, ty), - VecALUOp::UQAddScalar => ("uqadd", false, ty), - VecALUOp::Uqadd => ("uqadd", true, ty), - VecALUOp::SQSubScalar => ("sqsub", false, ty), - VecALUOp::Sqsub => ("sqsub", true, ty), - VecALUOp::UQSubScalar => ("uqsub", false, ty), - VecALUOp::Uqsub => ("uqsub", true, ty), - VecALUOp::Cmeq => ("cmeq", true, ty), - VecALUOp::Cmge => ("cmge", true, ty), - VecALUOp::Cmgt => ("cmgt", true, ty), - VecALUOp::Cmhs => ("cmhs", true, ty), - VecALUOp::Cmhi => ("cmhi", true, ty), - VecALUOp::Fcmeq => ("fcmeq", true, ty), - VecALUOp::Fcmgt => ("fcmgt", true, ty), - VecALUOp::Fcmge => ("fcmge", true, ty), - VecALUOp::And => ("and", true, I8X16), - VecALUOp::Bic => ("bic", true, I8X16), - VecALUOp::Orr => ("orr", true, I8X16), - VecALUOp::Eor => ("eor", true, I8X16), - VecALUOp::Bsl => ("bsl", true, I8X16), - VecALUOp::Umaxp => ("umaxp", true, ty), - VecALUOp::Add => ("add", true, ty), - VecALUOp::Sub => ("sub", true, ty), - VecALUOp::Mul => ("mul", true, ty), - VecALUOp::Sshl => ("sshl", true, ty), - VecALUOp::Ushl => ("ushl", true, ty), + let (op, size) = match alu_op { + VecALUOp::Sqadd => ("sqadd", size), + VecALUOp::Uqadd => ("uqadd", size), + VecALUOp::Sqsub => ("sqsub", size), + VecALUOp::Uqsub => ("uqsub", size), + VecALUOp::Cmeq => ("cmeq", size), + VecALUOp::Cmge => ("cmge", size), + VecALUOp::Cmgt => ("cmgt", size), + VecALUOp::Cmhs => ("cmhs", size), + VecALUOp::Cmhi => ("cmhi", size), + VecALUOp::Fcmeq => ("fcmeq", size), + VecALUOp::Fcmgt => ("fcmgt", size), + VecALUOp::Fcmge => ("fcmge", size), + VecALUOp::And => ("and", VectorSize::Size8x16), + VecALUOp::Bic => ("bic", VectorSize::Size8x16), + VecALUOp::Orr => ("orr", VectorSize::Size8x16), + VecALUOp::Eor => ("eor", VectorSize::Size8x16), + VecALUOp::Bsl => ("bsl", VectorSize::Size8x16), + VecALUOp::Umaxp => ("umaxp", size), + VecALUOp::Add => ("add", size), + VecALUOp::Sub => ("sub", size), + VecALUOp::Mul => ("mul", size), + VecALUOp::Sshl => ("sshl", size), + VecALUOp::Ushl => ("ushl", size), }; - - let show_vreg_fn: fn(Reg, Option<&RealRegUniverse>, Type) -> String = if vector { - |reg, mb_rru, ty| show_vreg_vector(reg, mb_rru, ty) - } else { - |reg, mb_rru, _ty| show_vreg_scalar(reg, mb_rru, ScalarSize::Size64) - }; - - let rd = show_vreg_fn(rd.to_reg(), mb_rru, ty); - let rn = show_vreg_fn(rn, mb_rru, ty); - let rm = show_vreg_fn(rm, mb_rru, ty); + let rd = show_vreg_vector(rd.to_reg(), mb_rru, size); + let rn = show_vreg_vector(rn, mb_rru, size); + let rm = show_vreg_vector(rm, mb_rru, size); format!("{} {}, {}, {}", op, rd, rn, rm) } - &Inst::VecMisc { op, rd, rn, ty } => { - let (op, ty) = match op { - VecMisc2::Not => ("mvn", I8X16), - VecMisc2::Neg => ("neg", ty), + &Inst::VecMisc { op, rd, rn, size } => { + let (op, size) = match op { + VecMisc2::Not => ("mvn", VectorSize::Size8x16), + VecMisc2::Neg => ("neg", size), }; - let rd = show_vreg_vector(rd.to_reg(), mb_rru, ty); - let rn = show_vreg_vector(rn, mb_rru, ty); + let rd = show_vreg_vector(rd.to_reg(), mb_rru, size); + let rn = show_vreg_vector(rn, mb_rru, size); format!("{} {}, {}", op, rd, rn) } - &Inst::VecLanes { op, rd, rn, ty } => { + &Inst::VecLanes { op, rd, rn, size } => { let op = match op { VecLanesOp::Uminv => "uminv", }; - let size = match ty { - I8X16 => ScalarSize::Size8, - I16X8 => ScalarSize::Size16, - I32X4 => ScalarSize::Size32, - _ => unimplemented!(), - }; - - let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size); - let rn = show_vreg_vector(rn, mb_rru, ty); + let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size.lane_size()); + let rn = show_vreg_vector(rn, mb_rru, size); format!("{} {}, {}", op, rd, rn) } &Inst::MovToNZCV { rn } => { diff --git a/cranelift/codegen/src/isa/aarch64/inst/regs.rs b/cranelift/codegen/src/isa/aarch64/inst/regs.rs index 88d67fb257..cbf1440927 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/regs.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/regs.rs @@ -1,8 +1,8 @@ //! AArch64 ISA definitions: registers. -use crate::ir::types::*; use crate::isa::aarch64::inst::OperandSize; use crate::isa::aarch64::inst::ScalarSize; +use crate::isa::aarch64::inst::VectorSize; use crate::machinst::*; use crate::settings; @@ -307,40 +307,42 @@ pub fn show_vreg_scalar(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: Scalar } /// Show a vector register. -pub fn show_vreg_vector(reg: Reg, mb_rru: Option<&RealRegUniverse>, ty: Type) -> String { +pub fn show_vreg_vector(reg: Reg, mb_rru: Option<&RealRegUniverse>, size: VectorSize) -> String { assert_eq!(RegClass::V128, reg.get_class()); let mut s = reg.show_rru(mb_rru); - match ty { - F32X2 => s.push_str(".2s"), - F32X4 => s.push_str(".4s"), - F64X2 => s.push_str(".2d"), - I8X8 => s.push_str(".8b"), - I8X16 => s.push_str(".16b"), - I16X4 => s.push_str(".4h"), - I16X8 => s.push_str(".8h"), - I32X2 => s.push_str(".2s"), - I32X4 => s.push_str(".4s"), - I64X2 => s.push_str(".2d"), - _ => unimplemented!(), - } + let suffix = match size { + VectorSize::Size8x8 => ".8b", + VectorSize::Size8x16 => ".16b", + VectorSize::Size16x4 => ".4h", + VectorSize::Size16x8 => ".8h", + VectorSize::Size32x2 => ".2s", + VectorSize::Size32x4 => ".4s", + VectorSize::Size64x2 => ".2d", + }; + s.push_str(suffix); s } /// Show an indexed vector element. -pub fn show_vreg_element(reg: Reg, mb_rru: Option<&RealRegUniverse>, idx: u8, ty: Type) -> String { +pub fn show_vreg_element( + reg: Reg, + mb_rru: Option<&RealRegUniverse>, + idx: u8, + size: VectorSize, +) -> String { assert_eq!(RegClass::V128, reg.get_class()); let mut s = reg.show_rru(mb_rru); - let suffix = match ty { - I8 => "b", - I16 => "h", - I32 => "s", - I64 => "d", - F32 => "s", - F64 => "d", - _ => unimplemented!(), + let suffix = match size { + VectorSize::Size8x8 => "b", + VectorSize::Size8x16 => "b", + VectorSize::Size16x4 => "h", + VectorSize::Size16x8 => "h", + VectorSize::Size32x2 => "s", + VectorSize::Size32x4 => "s", + VectorSize::Size64x2 => "d", }; s.push_str(&format!(".{}[{}]", suffix, idx)); diff --git a/cranelift/codegen/src/isa/aarch64/lower.rs b/cranelift/codegen/src/isa/aarch64/lower.rs index 03a464be9a..d60fdfe144 100644 --- a/cranelift/codegen/src/isa/aarch64/lower.rs +++ b/cranelift/codegen/src/isa/aarch64/lower.rs @@ -14,7 +14,7 @@ use crate::ir::Inst as IRInst; use crate::ir::{InstructionData, Opcode, TrapCode, Type}; use crate::machinst::lower::*; use crate::machinst::*; -use crate::{CodegenError, CodegenResult}; +use crate::CodegenResult; use crate::isa::aarch64::inst::*; use crate::isa::aarch64::AArch64Backend; @@ -736,20 +736,11 @@ pub(crate) fn lower_vector_compare>( ty: Type, cond: Cond, ) -> CodegenResult<()> { - match ty { - F32X4 | F64X2 | I8X16 | I16X8 | I32X4 => {} - _ => { - return Err(CodegenError::Unsupported(format!( - "unsupported SIMD type: {:?}", - ty - ))); - } - }; - let is_float = match ty { F32X4 | F64X2 => true, _ => false, }; + let size = VectorSize::from_ty(ty); // 'Less than' operations are implemented by swapping // the order of operands and using the 'greater than' // instructions. @@ -784,7 +775,7 @@ pub(crate) fn lower_vector_compare>( rd, rn, rm, - ty, + size, }); if cond == Cond::Ne { @@ -792,7 +783,7 @@ pub(crate) fn lower_vector_compare>( op: VecMisc2::Not, rd, rn: rd.to_reg(), - ty: I8X16, + size, }); } diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 004e59441d..80b4518f9f 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -70,7 +70,7 @@ pub(crate) fn lower_insn_to_regs>( rn, rm, alu_op: VecALUOp::Add, - ty, + size: VectorSize::from_ty(ty), }); } } @@ -89,13 +89,13 @@ pub(crate) fn lower_insn_to_regs>( rn, rm, alu_op: VecALUOp::Sub, - ty, + size: VectorSize::from_ty(ty), }); } } Opcode::UaddSat | Opcode::SaddSat | Opcode::UsubSat | Opcode::SsubSat => { - // We use the vector instruction set's saturating adds (UQADD / - // SQADD), which require vector registers. + // We use the scalar SIMD & FP saturating additions and subtractions + // (SQADD / UQADD / SQSUB / UQSUB), which require scalar FP registers. let is_signed = op == Opcode::SaddSat || op == Opcode::SsubSat; let ty = ty.unwrap(); let rd = get_output_reg(ctx, outputs[0]); @@ -105,11 +105,11 @@ pub(crate) fn lower_insn_to_regs>( } else { NarrowValueMode::ZeroExtend64 }; - let alu_op = match op { - Opcode::UaddSat => VecALUOp::UQAddScalar, - Opcode::SaddSat => VecALUOp::SQAddScalar, - Opcode::UsubSat => VecALUOp::UQSubScalar, - Opcode::SsubSat => VecALUOp::SQSubScalar, + let fpu_op = match op { + Opcode::UaddSat => FPUOp2::Uqadd64, + Opcode::SaddSat => FPUOp2::Sqadd64, + Opcode::UsubSat => FPUOp2::Uqsub64, + Opcode::SsubSat => FPUOp2::Sqsub64, _ => unreachable!(), }; let va = ctx.alloc_tmp(RegClass::V128, I128); @@ -118,18 +118,17 @@ pub(crate) fn lower_insn_to_regs>( let rb = put_input_in_reg(ctx, inputs[1], narrow_mode); ctx.emit(Inst::MovToVec64 { rd: va, rn: ra }); ctx.emit(Inst::MovToVec64 { rd: vb, rn: rb }); - ctx.emit(Inst::VecRRR { + ctx.emit(Inst::FpuRRR { + fpu_op, rd: va, rn: va.to_reg(), rm: vb.to_reg(), - alu_op, - ty: I64, }); ctx.emit(Inst::MovFromVec { rd, rn: va.to_reg(), idx: 0, - ty: I64, + size: VectorSize::Size64x2, }); } else { let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); @@ -148,7 +147,7 @@ pub(crate) fn lower_insn_to_regs>( rn, rm, alu_op, - ty, + size: VectorSize::from_ty(ty), }); } } @@ -167,7 +166,7 @@ pub(crate) fn lower_insn_to_regs>( op: VecMisc2::Neg, rd, rn, - ty, + size: VectorSize::from_ty(ty), }); } } @@ -192,7 +191,7 @@ pub(crate) fn lower_insn_to_regs>( rd, rn, rm, - ty, + size: VectorSize::from_ty(ty), }); } } @@ -422,7 +421,7 @@ pub(crate) fn lower_insn_to_regs>( op: VecMisc2::Not, rd, rn: rm, - ty, + size: VectorSize::from_ty(ty), }); } } @@ -466,7 +465,7 @@ pub(crate) fn lower_insn_to_regs>( rd, rn, rm, - ty, + size: VectorSize::from_ty(ty), }); } } @@ -495,7 +494,7 @@ pub(crate) fn lower_insn_to_regs>( ctx.emit(alu_inst_immshift(alu_op, rd, rn, rm)); } else { let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); - + let size = VectorSize::from_ty(ty); let (alu_op, is_right_shift) = match op { Opcode::Ishl => (VecALUOp::Sshl, false), Opcode::Ushr => (VecALUOp::Ushl, true), @@ -514,18 +513,14 @@ pub(crate) fn lower_insn_to_regs>( put_input_in_reg(ctx, inputs[1], NarrowValueMode::None) }; - ctx.emit(Inst::VecDup { - rd, - rn: rm, - ty: ty.lane_type(), - }); + ctx.emit(Inst::VecDup { rd, rn: rm, size }); ctx.emit(Inst::VecRRR { alu_op, rd, rn, rm: rd.to_reg(), - ty, + size, }); } } @@ -1167,7 +1162,7 @@ pub(crate) fn lower_insn_to_regs>( rd, rn, rm, - ty, + size: VectorSize::from_ty(ty), }); } } @@ -1297,7 +1292,7 @@ pub(crate) fn lower_insn_to_regs>( rd, rn, idx: 0, - ty: I64, + size: VectorSize::Size64x2, }); } } @@ -1557,15 +1552,15 @@ pub(crate) fn lower_insn_to_regs>( let idx = *imm; let rd = get_output_reg(ctx, outputs[0]); let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let size = VectorSize::from_ty(ctx.input_ty(insn, 0)); let ty = ty.unwrap(); if ty_is_int(ty) { - ctx.emit(Inst::MovFromVec { rd, rn, idx, ty }); + ctx.emit(Inst::MovFromVec { rd, rn, idx, size }); // Plain moves are faster on some processors. } else if idx == 0 { ctx.emit(Inst::gen_move(rd, rn, ty)); } else { - let size = ScalarSize::from_ty(ty); ctx.emit(Inst::FpuMoveFromVec { rd, rn, idx, size }); } } else { @@ -1576,11 +1571,12 @@ pub(crate) fn lower_insn_to_regs>( Opcode::Splat => { let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); let rd = get_output_reg(ctx, outputs[0]); - let ty = ctx.input_ty(insn, 0); - let inst = if ty_is_int(ty) { - Inst::VecDup { rd, rn, ty } + let input_ty = ctx.input_ty(insn, 0); + let size = VectorSize::from_ty(ty.unwrap()); + let inst = if ty_is_int(input_ty) { + Inst::VecDup { rd, rn, size } } else { - Inst::VecDupFromFpu { rd, rn, ty } + Inst::VecDupFromFpu { rd, rn, size } }; ctx.emit(inst); } @@ -1598,21 +1594,22 @@ pub(crate) fn lower_insn_to_regs>( // cmp xm, #0 // cset xm, ne - let input_ty = ctx.input_ty(insn, 0); + let size = VectorSize::from_ty(ctx.input_ty(insn, 0)); + if op == Opcode::VanyTrue { ctx.emit(Inst::VecRRR { alu_op: VecALUOp::Umaxp, rd: tmp, rn: rm, rm: rm, - ty: input_ty, + size, }); } else { ctx.emit(Inst::VecLanes { op: VecLanesOp::Uminv, rd: tmp, rn: rm, - ty: input_ty, + size, }); }; @@ -1620,7 +1617,7 @@ pub(crate) fn lower_insn_to_regs>( rd, rn: tmp.to_reg(), idx: 0, - ty: I64, + size: VectorSize::Size64x2, }); ctx.emit(Inst::AluRRImm12 {