diff --git a/build.rs b/build.rs index 2baea0ab4f..c331648114 100644 --- a/build.rs +++ b/build.rs @@ -183,6 +183,7 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool { ("simd", "simd_address") => return false, ("simd", "simd_align") => return false, ("simd", "simd_bitwise") => return false, + ("simd", "simd_bit_shift") => return false, ("simd", "simd_boolean") => return false, ("simd", "simd_f32x4_cmp") => return false, ("simd", "simd_f64x2_cmp") => return false, diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs index bafe42abd0..3d08d524b4 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit.rs @@ -1352,6 +1352,8 @@ impl MachInstEmit for Inst { debug_assert_ne!(I64X2, ty); (0b010_01110_00_1 | enc_size << 1, 0b100111) } + VecALUOp::Sshl => (0b010_01110_00_1 | enc_size << 1, 0b010001), + VecALUOp::Ushl => (0b011_01110_00_1 | enc_size << 1, 0b010001), }; sink.put4(enc_vec_rrr(top11, rm, bit15_10, rn, rd)); } diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs index 16bec07ac3..2656c0ccfe 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs @@ -2473,6 +2473,102 @@ fn test_aarch64_binemit() { "mul v18.4s, v18.4s, v18.4s", )); + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Ushl, + rd: writable_vreg(18), + rn: vreg(18), + rm: vreg(18), + ty: I8X16, + }, + "5246326E", + "ushl v18.16b, v18.16b, v18.16b", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Ushl, + rd: writable_vreg(18), + rn: vreg(18), + rm: vreg(18), + ty: I16X8, + }, + "5246726E", + "ushl v18.8h, v18.8h, v18.8h", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Ushl, + rd: writable_vreg(18), + rn: vreg(1), + rm: vreg(21), + ty: I32X4, + }, + "3244B56E", + "ushl v18.4s, v1.4s, v21.4s", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Ushl, + rd: writable_vreg(5), + rn: vreg(7), + rm: vreg(19), + ty: I64X2, + }, + "E544F36E", + "ushl v5.2d, v7.2d, v19.2d", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Sshl, + rd: writable_vreg(18), + rn: vreg(18), + rm: vreg(18), + ty: I8X16, + }, + "5246324E", + "sshl v18.16b, v18.16b, v18.16b", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Sshl, + rd: writable_vreg(30), + rn: vreg(1), + rm: vreg(29), + ty: I16X8, + }, + "3E447D4E", + "sshl v30.8h, v1.8h, v29.8h", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Sshl, + rd: writable_vreg(8), + rn: vreg(22), + rm: vreg(21), + ty: I32X4, + }, + "C846B54E", + "sshl v8.4s, v22.4s, v21.4s", + )); + + insns.push(( + Inst::VecRRR { + alu_op: VecALUOp::Sshl, + rd: writable_vreg(8), + rn: vreg(22), + rm: vreg(2), + ty: I64X2, + }, + "C846E24E", + "sshl v8.2d, v22.2d, v2.2d", + )); + insns.push(( Inst::VecMisc { op: VecMisc2::Not, diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs index 33f0c1604c..3f1f849336 100644 --- a/cranelift/codegen/src/isa/aarch64/inst/mod.rs +++ b/cranelift/codegen/src/isa/aarch64/inst/mod.rs @@ -249,6 +249,10 @@ pub enum VecALUOp { Sub, /// Multiply Mul, + /// Signed shift left + Sshl, + /// Unsigned shift left + Ushl, } /// A Vector miscellaneous operation with two registers. @@ -2750,6 +2754,8 @@ impl ShowWithRRU for Inst { VecALUOp::Add => ("add", true, ty), VecALUOp::Sub => ("sub", true, ty), VecALUOp::Mul => ("mul", true, ty), + VecALUOp::Sshl => ("sshl", true, ty), + VecALUOp::Ushl => ("ushl", true, ty), }; let show_vreg_fn: fn(Reg, Option<&RealRegUniverse>, Type) -> String = if vector { diff --git a/cranelift/codegen/src/isa/aarch64/lower_inst.rs b/cranelift/codegen/src/isa/aarch64/lower_inst.rs index ba8210b875..664f2729a3 100644 --- a/cranelift/codegen/src/isa/aarch64/lower_inst.rs +++ b/cranelift/codegen/src/isa/aarch64/lower_inst.rs @@ -484,24 +484,60 @@ pub(crate) fn lower_insn_to_regs>( Opcode::Ishl | Opcode::Ushr | Opcode::Sshr => { let ty = ty.unwrap(); let size = InstSize::from_bits(ty_bits(ty)); - let narrow_mode = match (op, size) { - (Opcode::Ishl, _) => NarrowValueMode::None, - (Opcode::Ushr, InstSize::Size64) => NarrowValueMode::ZeroExtend64, - (Opcode::Ushr, InstSize::Size32) => NarrowValueMode::ZeroExtend32, - (Opcode::Sshr, InstSize::Size64) => NarrowValueMode::SignExtend64, - (Opcode::Sshr, InstSize::Size32) => NarrowValueMode::SignExtend32, - _ => unreachable!(), - }; let rd = get_output_reg(ctx, outputs[0]); - let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); - let rm = put_input_in_reg_immshift(ctx, inputs[1], ty_bits(ty)); - let alu_op = match op { - Opcode::Ishl => choose_32_64(ty, ALUOp::Lsl32, ALUOp::Lsl64), - Opcode::Ushr => choose_32_64(ty, ALUOp::Lsr32, ALUOp::Lsr64), - Opcode::Sshr => choose_32_64(ty, ALUOp::Asr32, ALUOp::Asr64), - _ => unreachable!(), - }; - ctx.emit(alu_inst_immshift(alu_op, rd, rn, rm)); + if ty_bits(ty) < 128 { + let narrow_mode = match (op, size) { + (Opcode::Ishl, _) => NarrowValueMode::None, + (Opcode::Ushr, InstSize::Size64) => NarrowValueMode::ZeroExtend64, + (Opcode::Ushr, InstSize::Size32) => NarrowValueMode::ZeroExtend32, + (Opcode::Sshr, InstSize::Size64) => NarrowValueMode::SignExtend64, + (Opcode::Sshr, InstSize::Size32) => NarrowValueMode::SignExtend32, + _ => unreachable!(), + }; + let rn = put_input_in_reg(ctx, inputs[0], narrow_mode); + let rm = put_input_in_reg_immshift(ctx, inputs[1], ty_bits(ty)); + let alu_op = match op { + Opcode::Ishl => choose_32_64(ty, ALUOp::Lsl32, ALUOp::Lsl64), + Opcode::Ushr => choose_32_64(ty, ALUOp::Lsr32, ALUOp::Lsr64), + Opcode::Sshr => choose_32_64(ty, ALUOp::Asr32, ALUOp::Asr64), + _ => unreachable!(), + }; + ctx.emit(alu_inst_immshift(alu_op, rd, rn, rm)); + } else { + let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None); + + let (alu_op, is_right_shift) = match op { + Opcode::Ishl => (VecALUOp::Sshl, false), + Opcode::Ushr => (VecALUOp::Ushl, true), + Opcode::Sshr => (VecALUOp::Sshl, true), + _ => unreachable!(), + }; + + let rm = if is_right_shift { + // Right shifts are implemented with a negative left shift. + let tmp = ctx.alloc_tmp(RegClass::I64, I32); + let rm = put_input_in_rse_imm12(ctx, inputs[1], NarrowValueMode::None); + let rn = zero_reg(); + ctx.emit(alu_inst_imm12(ALUOp::Sub32, tmp, rn, rm)); + tmp.to_reg() + } else { + put_input_in_reg(ctx, inputs[1], NarrowValueMode::None) + }; + + ctx.emit(Inst::VecDup { + rd, + rn: rm, + ty: ty.lane_type(), + }); + + ctx.emit(Inst::VecRRR { + alu_op, + rd, + rn, + rm: rd.to_reg(), + ty, + }); + } } Opcode::Rotr | Opcode::Rotl => {