diff --git a/build.rs b/build.rs index 95c4b03b63..b551b80ee6 100644 --- a/build.rs +++ b/build.rs @@ -188,12 +188,15 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool { ("simd", "simd_f32x4_cmp") => return false, ("simd", "simd_f64x2_cmp") => return false, ("simd", "simd_i8x16_arith") => return false, + ("simd", "simd_i8x16_arith2") => return false, ("simd", "simd_i8x16_cmp") => return false, ("simd", "simd_i8x16_sat_arith") => return false, ("simd", "simd_i16x8_arith") => return false, + ("simd", "simd_i16x8_arith2") => return false, ("simd", "simd_i16x8_cmp") => return false, ("simd", "simd_i16x8_sat_arith") => return false, ("simd", "simd_i32x4_arith") => return false, + ("simd", "simd_i32x4_arith2") => return false, ("simd", "simd_i32x4_cmp") => return false, ("simd", "simd_load_extend") => return false, ("simd", "simd_load_splat") => return false, diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index f12205dbd4..0b93effada 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -352,11 +352,16 @@ fn enc_fround(top22: u32, rd: Writable, rn: Reg) -> u32 { (top22 << 10) | (machreg_to_vec(rn) << 5) | machreg_to_vec(rd.to_reg()) } -fn enc_vec_rr_misc(size: u32, bits_12_16: u32, rd: Writable, rn: Reg) -> u32 { +fn enc_vec_rr_misc(u: u32, size: u32, bits_12_16: u32, rd: Writable, rn: Reg) -> u32 { + debug_assert_eq!(u & 0b1, u); debug_assert_eq!(size & 0b11, size); debug_assert_eq!(bits_12_16 & 0b11111, bits_12_16); - let bits = 0b0_1_1_01110_00_10000_00000_10_00000_00000; - bits | size << 22 | bits_12_16 << 12 | machreg_to_vec(rn) << 5 | machreg_to_vec(rd.to_reg()) + let bits = 0b0_1_0_01110_00_10000_00000_10_00000_00000; + bits | u << 29 + | size << 22 + | bits_12_16 << 12 + | machreg_to_vec(rn) << 5 + | machreg_to_vec(rd.to_reg()) } fn enc_vec_lanes(q: u32, u: u32, size: u32, opcode: u32, rd: Writable, rn: Reg) -> u32 { @@ -1114,11 +1119,12 @@ impl MachInstEmit for Inst { VectorSize::Size64x2 => 0b11, _ => unimplemented!(), }; - let (bits_12_16, size) = match op { - VecMisc2::Not => (0b00101, 0b00), - VecMisc2::Neg => (0b01011, enc_size), + let (u, bits_12_16, size) = match op { + VecMisc2::Not => (0b1, 0b00101, 0b00), + VecMisc2::Neg => (0b1, 0b01011, enc_size), + VecMisc2::Abs => (0b0, 0b01011, enc_size), }; - sink.put4(enc_vec_rr_misc(size, bits_12_16, rd, rn)); + sink.put4(enc_vec_rr_misc(u, size, bits_12_16, rd, rn)); } &Inst::VecLanes { op, rd, rn, size } => { let (q, size) = match size { @@ -1360,6 +1366,11 @@ impl MachInstEmit for Inst { } VecALUOp::Sshl => (0b010_01110_00_1 | enc_size << 1, 0b010001), VecALUOp::Ushl => (0b011_01110_00_1 | enc_size << 1, 0b010001), + VecALUOp::Umin => (0b011_01110_00_1 | enc_size << 1, 0b011011), + VecALUOp::Smin => (0b010_01110_00_1 | enc_size << 1, 0b011011), + VecALUOp::Umax => (0b011_01110_00_1 | enc_size << 1, 0b011001), + VecALUOp::Smax => (0b010_01110_00_1 | enc_size << 1, 0b011001), + VecALUOp::Urhadd => (0b011_01110_00_1 | enc_size << 1, 0b000101), }; sink.put4(enc_vec_rrr(top11, rm, bit15_10, rn, rd)); } diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs index 29e3036e16..a06111e897 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs @@ -2737,6 +2737,186 @@ fn test_aarch64_binemit() { "sshl v8.2d, v22.2d, v2.2d", )); + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Umin, + rd: writable_vreg(1), + rn: vreg(12), + rm: vreg(3), + size: VectorSize::Size8x16, + }, + "816D236E", + "umin v1.16b, v12.16b, v3.16b", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Umin, + rd: writable_vreg(30), + rn: vreg(20), + rm: vreg(10), + size: VectorSize::Size16x8, + }, + "9E6E6A6E", + "umin v30.8h, v20.8h, v10.8h", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Umin, + rd: writable_vreg(8), + rn: vreg(22), + rm: vreg(21), + size: VectorSize::Size32x4, + }, + "C86EB56E", + "umin v8.4s, v22.4s, v21.4s", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Smin, + rd: writable_vreg(1), + rn: vreg(12), + rm: vreg(3), + size: VectorSize::Size8x16, + }, + "816D234E", + "smin v1.16b, v12.16b, v3.16b", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Smin, + rd: writable_vreg(30), + rn: vreg(20), + rm: vreg(10), + size: VectorSize::Size16x8, + }, + "9E6E6A4E", + "smin v30.8h, v20.8h, v10.8h", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Smin, + rd: writable_vreg(8), + rn: vreg(22), + rm: vreg(21), + size: VectorSize::Size32x4, + }, + "C86EB54E", + "smin v8.4s, v22.4s, v21.4s", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Umax, + rd: writable_vreg(6), + rn: vreg(9), + rm: vreg(8), + size: VectorSize::Size8x16, + }, + "2665286E", + "umax v6.16b, v9.16b, v8.16b", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Umax, + rd: writable_vreg(11), + rn: vreg(13), + rm: vreg(2), + size: VectorSize::Size16x8, + }, + "AB65626E", + "umax v11.8h, v13.8h, v2.8h", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Umax, + rd: writable_vreg(8), + rn: vreg(12), + rm: vreg(14), + size: VectorSize::Size32x4, + }, + "8865AE6E", + "umax v8.4s, v12.4s, v14.4s", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Smax, + rd: writable_vreg(6), + rn: vreg(9), + rm: vreg(8), + size: VectorSize::Size8x16, + }, + "2665284E", + "smax v6.16b, v9.16b, v8.16b", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Smax, + rd: writable_vreg(11), + rn: vreg(13), + rm: vreg(2), + size: VectorSize::Size16x8, + }, + "AB65624E", + "smax v11.8h, v13.8h, v2.8h", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Smax, + rd: writable_vreg(8), + rn: vreg(12), + rm: vreg(14), + size: VectorSize::Size32x4, + }, + "8865AE4E", + "smax v8.4s, v12.4s, v14.4s", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Urhadd, + rd: writable_vreg(8), + rn: vreg(1), + rm: vreg(3), + size: VectorSize::Size8x16, + }, + "2814236E", + "urhadd v8.16b, v1.16b, v3.16b", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Urhadd, + rd: writable_vreg(2), + rn: vreg(13), + rm: vreg(6), + size: VectorSize::Size16x8, + }, + "A215666E", + "urhadd v2.8h, v13.8h, v6.8h", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Urhadd, + rd: writable_vreg(8), + rn: vreg(12), + rm: vreg(14), + size: VectorSize::Size32x4, + }, + "8815AE6E", + "urhadd v8.4s, v12.4s, v14.4s", + )); + insns.push(( Inst::VecMisc { op: VecMisc2::Not, @@ -2792,6 +2972,50 @@ fn test_aarch64_binemit() { "neg v10.2d, v8.2d", )); + insns.push(( + Inst::VecMisc { + op: VecMisc2::Abs, + rd: writable_vreg(1), + rn: vreg(1), + size: VectorSize::Size8x16, + }, + "21B8204E", + "abs v1.16b, v1.16b", + )); + + insns.push(( + Inst::VecMisc { + op: VecMisc2::Abs, + rd: writable_vreg(29), + rn: vreg(28), + size: VectorSize::Size16x8, + }, + "9DBB604E", + "abs v29.8h, v28.8h", + )); + + insns.push(( + Inst::VecMisc { + op: VecMisc2::Abs, + rd: writable_vreg(7), + rn: vreg(8), + size: VectorSize::Size32x4, + }, + "07B9A04E", + "abs v7.4s, v8.4s", + )); + + insns.push(( + Inst::VecMisc { + op: VecMisc2::Abs, + rd: writable_vreg(1), + rn: vreg(10), + size: VectorSize::Size64x2, + }, + "41B9E04E", + "abs v1.2d, v10.2d", + )); + insns.push(( Inst::VecLanes { op: VecLanesOp::Uminv, diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index 814f846645..eb9c159670 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -261,6 +261,16 @@ pub enum VecALUOp { Sshl, /// Unsigned shift left Ushl, + /// Unsigned minimum + Umin, + /// Signed minimum + Smin, + /// Unsigned maximum + Umax, + /// Signed maximum + Smax, + /// Unsigned rounding halving add + Urhadd, } /// A Vector miscellaneous operation with two registers. @@ -270,6 +280,8 @@ pub enum VecMisc2 { Not, /// Negate Neg, + /// Absolute value + Abs, } /// An operation across the lanes of vectors. @@ -2780,6 +2792,11 @@ impl Inst { VecALUOp::Mul => ("mul", size), VecALUOp::Sshl => ("sshl", size), VecALUOp::Ushl => ("ushl", size), + VecALUOp::Umin => ("umin", size), + VecALUOp::Smin => ("smin", size), + VecALUOp::Umax => ("umax", size), + VecALUOp::Smax => ("smax", size), + VecALUOp::Urhadd => ("urhadd", size), }; let rd = show_vreg_vector(rd.to_reg(), mb_rru, size); let rn = show_vreg_vector(rn, mb_rru, size); @@ -2790,6 +2807,7 @@ impl Inst { let (op, size) = match op { VecMisc2::Not => ("mvn", VectorSize::Size8x16), VecMisc2::Neg => ("neg", size), + VecMisc2::Abs => ("abs", size), }; let rd = show_vreg_vector(rd.to_reg(), mb_rru, size); diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index 1dfc4091fe..de0639a1be 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -1648,8 +1648,26 @@ pub(crate) fn lower_insn_to_regs>( } Opcode::Isplit | Opcode::Iconcat => panic!("Vector ops not supported."), - Opcode::Imax | Opcode::Imin | Opcode::Umin | Opcode::Umax => { - panic!("Vector ops not supported.") + + Opcode::Imax | Opcode::Umax | Opcode::Umin | Opcode::Imin => { + let alu_op = match op { + Opcode::Umin => VecALUOp::Umin, + Opcode::Imin => VecALUOp::Smin, + Opcode::Umax => VecALUOp::Umax, + Opcode::Imax => VecALUOp::Smax, + _ => unreachable!(), + }; + let rd = get_output_reg(ctx, outputs[0]); + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + let ty = ty.unwrap(); + ctx.emit(Inst::VecRRR { + alu_op, + rd, + rn, + rm, + size: VectorSize::from_ty(ty), + }); } Opcode::Fadd | Opcode::Fsub | Opcode::Fmul | Opcode::Fdiv | Opcode::Fmin | Opcode::Fmax => { @@ -2154,8 +2172,31 @@ pub(crate) fn lower_insn_to_regs>( Opcode::DummySargT => unreachable!(), - Opcode::AvgRound => unimplemented!(), - Opcode::Iabs => unimplemented!(), + Opcode::Iabs => { + let rd = get_output_reg(ctx, outputs[0]); + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let ty = ty.unwrap(); + ctx.emit(Inst::VecMisc { + op: VecMisc2::Abs, + rd, + rn, + size: VectorSize::from_ty(ty), + }); + } + Opcode::AvgRound => { + let rd = get_output_reg(ctx, outputs[0]); + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + let rm = put_input_in_reg(ctx, inputs[1], NarrowValueMode::None); + let ty = ty.unwrap(); + ctx.emit(Inst::VecRRR { + alu_op: VecALUOp::Urhadd, + rd, + rn, + rm, + size: VectorSize::from_ty(ty), + }); + } + Opcode::Snarrow | Opcode::Unarrow | Opcode::SwidenLow