Merge pull request #1977 from jgouly/simd_shift
arm64: Implement SIMD shift instructions
This commit is contained in:
1
build.rs
1
build.rs
@@ -183,6 +183,7 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
|
||||
("simd", "simd_address") => return false,
|
||||
("simd", "simd_align") => return false,
|
||||
("simd", "simd_bitwise") => return false,
|
||||
("simd", "simd_bit_shift") => return false,
|
||||
("simd", "simd_boolean") => return false,
|
||||
("simd", "simd_f32x4_cmp") => return false,
|
||||
("simd", "simd_f64x2_cmp") => return false,
|
||||
|
||||
@@ -1352,6 +1352,8 @@ impl MachInstEmit for Inst {
|
||||
debug_assert_ne!(I64X2, ty);
|
||||
(0b010_01110_00_1 | enc_size << 1, 0b100111)
|
||||
}
|
||||
VecALUOp::Sshl => (0b010_01110_00_1 | enc_size << 1, 0b010001),
|
||||
VecALUOp::Ushl => (0b011_01110_00_1 | enc_size << 1, 0b010001),
|
||||
};
|
||||
sink.put4(enc_vec_rrr(top11, rm, bit15_10, rn, rd));
|
||||
}
|
||||
|
||||
@@ -2473,6 +2473,102 @@ fn test_aarch64_binemit() {
|
||||
"mul v18.4s, v18.4s, v18.4s",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecRRR {
|
||||
alu_op: VecALUOp::Ushl,
|
||||
rd: writable_vreg(18),
|
||||
rn: vreg(18),
|
||||
rm: vreg(18),
|
||||
ty: I8X16,
|
||||
},
|
||||
"5246326E",
|
||||
"ushl v18.16b, v18.16b, v18.16b",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecRRR {
|
||||
alu_op: VecALUOp::Ushl,
|
||||
rd: writable_vreg(18),
|
||||
rn: vreg(18),
|
||||
rm: vreg(18),
|
||||
ty: I16X8,
|
||||
},
|
||||
"5246726E",
|
||||
"ushl v18.8h, v18.8h, v18.8h",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecRRR {
|
||||
alu_op: VecALUOp::Ushl,
|
||||
rd: writable_vreg(18),
|
||||
rn: vreg(1),
|
||||
rm: vreg(21),
|
||||
ty: I32X4,
|
||||
},
|
||||
"3244B56E",
|
||||
"ushl v18.4s, v1.4s, v21.4s",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecRRR {
|
||||
alu_op: VecALUOp::Ushl,
|
||||
rd: writable_vreg(5),
|
||||
rn: vreg(7),
|
||||
rm: vreg(19),
|
||||
ty: I64X2,
|
||||
},
|
||||
"E544F36E",
|
||||
"ushl v5.2d, v7.2d, v19.2d",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecRRR {
|
||||
alu_op: VecALUOp::Sshl,
|
||||
rd: writable_vreg(18),
|
||||
rn: vreg(18),
|
||||
rm: vreg(18),
|
||||
ty: I8X16,
|
||||
},
|
||||
"5246324E",
|
||||
"sshl v18.16b, v18.16b, v18.16b",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecRRR {
|
||||
alu_op: VecALUOp::Sshl,
|
||||
rd: writable_vreg(30),
|
||||
rn: vreg(1),
|
||||
rm: vreg(29),
|
||||
ty: I16X8,
|
||||
},
|
||||
"3E447D4E",
|
||||
"sshl v30.8h, v1.8h, v29.8h",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecRRR {
|
||||
alu_op: VecALUOp::Sshl,
|
||||
rd: writable_vreg(8),
|
||||
rn: vreg(22),
|
||||
rm: vreg(21),
|
||||
ty: I32X4,
|
||||
},
|
||||
"C846B54E",
|
||||
"sshl v8.4s, v22.4s, v21.4s",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecRRR {
|
||||
alu_op: VecALUOp::Sshl,
|
||||
rd: writable_vreg(8),
|
||||
rn: vreg(22),
|
||||
rm: vreg(2),
|
||||
ty: I64X2,
|
||||
},
|
||||
"C846E24E",
|
||||
"sshl v8.2d, v22.2d, v2.2d",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecMisc {
|
||||
op: VecMisc2::Not,
|
||||
|
||||
@@ -249,6 +249,10 @@ pub enum VecALUOp {
|
||||
Sub,
|
||||
/// Multiply
|
||||
Mul,
|
||||
/// Signed shift left
|
||||
Sshl,
|
||||
/// Unsigned shift left
|
||||
Ushl,
|
||||
}
|
||||
|
||||
/// A Vector miscellaneous operation with two registers.
|
||||
@@ -2750,6 +2754,8 @@ impl ShowWithRRU for Inst {
|
||||
VecALUOp::Add => ("add", true, ty),
|
||||
VecALUOp::Sub => ("sub", true, ty),
|
||||
VecALUOp::Mul => ("mul", true, ty),
|
||||
VecALUOp::Sshl => ("sshl", true, ty),
|
||||
VecALUOp::Ushl => ("ushl", true, ty),
|
||||
};
|
||||
|
||||
let show_vreg_fn: fn(Reg, Option<&RealRegUniverse>, Type) -> String = if vector {
|
||||
|
||||
@@ -484,6 +484,8 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
Opcode::Ishl | Opcode::Ushr | Opcode::Sshr => {
|
||||
let ty = ty.unwrap();
|
||||
let size = InstSize::from_bits(ty_bits(ty));
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
if ty_bits(ty) < 128 {
|
||||
let narrow_mode = match (op, size) {
|
||||
(Opcode::Ishl, _) => NarrowValueMode::None,
|
||||
(Opcode::Ushr, InstSize::Size64) => NarrowValueMode::ZeroExtend64,
|
||||
@@ -492,7 +494,6 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
(Opcode::Sshr, InstSize::Size32) => NarrowValueMode::SignExtend32,
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let rd = get_output_reg(ctx, outputs[0]);
|
||||
let rn = put_input_in_reg(ctx, inputs[0], narrow_mode);
|
||||
let rm = put_input_in_reg_immshift(ctx, inputs[1], ty_bits(ty));
|
||||
let alu_op = match op {
|
||||
@@ -502,6 +503,41 @@ pub(crate) fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
|
||||
_ => unreachable!(),
|
||||
};
|
||||
ctx.emit(alu_inst_immshift(alu_op, rd, rn, rm));
|
||||
} else {
|
||||
let rn = put_input_in_reg(ctx, inputs[0], NarrowValueMode::None);
|
||||
|
||||
let (alu_op, is_right_shift) = match op {
|
||||
Opcode::Ishl => (VecALUOp::Sshl, false),
|
||||
Opcode::Ushr => (VecALUOp::Ushl, true),
|
||||
Opcode::Sshr => (VecALUOp::Sshl, true),
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let rm = if is_right_shift {
|
||||
// Right shifts are implemented with a negative left shift.
|
||||
let tmp = ctx.alloc_tmp(RegClass::I64, I32);
|
||||
let rm = put_input_in_rse_imm12(ctx, inputs[1], NarrowValueMode::None);
|
||||
let rn = zero_reg();
|
||||
ctx.emit(alu_inst_imm12(ALUOp::Sub32, tmp, rn, rm));
|
||||
tmp.to_reg()
|
||||
} else {
|
||||
put_input_in_reg(ctx, inputs[1], NarrowValueMode::None)
|
||||
};
|
||||
|
||||
ctx.emit(Inst::VecDup {
|
||||
rd,
|
||||
rn: rm,
|
||||
ty: ty.lane_type(),
|
||||
});
|
||||
|
||||
ctx.emit(Inst::VecRRR {
|
||||
alu_op,
|
||||
rd,
|
||||
rn,
|
||||
rm: rd.to_reg(),
|
||||
ty,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::Rotr | Opcode::Rotl => {
|
||||
|
||||
Reference in New Issue
Block a user