Cranelift AArch64: Improve the Popcnt implementation
Now the backend uses the CNT instruction, which results into a major simplification. Copyright (c) 2021, Arm Limited.
This commit is contained in:
@@ -601,6 +601,14 @@ impl ScalarSize {
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert from an integer operand size.
|
||||
pub fn from_operand_size(size: OperandSize) -> ScalarSize {
|
||||
match size {
|
||||
OperandSize::Size32 => ScalarSize::Size32,
|
||||
OperandSize::Size64 => ScalarSize::Size64,
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert from a type into the smallest size that fits.
|
||||
pub fn from_ty(ty: Type) -> ScalarSize {
|
||||
Self::from_bits(ty_bits(ty))
|
||||
|
||||
@@ -1463,12 +1463,18 @@ impl MachInstEmit for Inst {
|
||||
debug_assert!(size == VectorSize::Size32x4 || size == VectorSize::Size64x2);
|
||||
(0b0, 0b11000, enc_size | 0b10)
|
||||
}
|
||||
VecMisc2::Cnt => {
|
||||
debug_assert!(size == VectorSize::Size8x8 || size == VectorSize::Size8x16);
|
||||
(0b0, 0b00101, enc_size)
|
||||
}
|
||||
};
|
||||
sink.put4(enc_vec_rr_misc((q << 1) | u, size, bits_12_16, rd, rn));
|
||||
}
|
||||
&Inst::VecLanes { op, rd, rn, size } => {
|
||||
let (q, size) = match size {
|
||||
VectorSize::Size8x8 => (0b0, 0b00),
|
||||
VectorSize::Size8x16 => (0b1, 0b00),
|
||||
VectorSize::Size16x4 => (0b0, 0b01),
|
||||
VectorSize::Size16x8 => (0b1, 0b01),
|
||||
VectorSize::Size32x4 => (0b1, 0b10),
|
||||
_ => unreachable!(),
|
||||
|
||||
@@ -3792,6 +3792,28 @@ fn test_aarch64_binemit() {
|
||||
"frintp v12.2d, v17.2d",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecMisc {
|
||||
op: VecMisc2::Cnt,
|
||||
rd: writable_vreg(23),
|
||||
rn: vreg(5),
|
||||
size: VectorSize::Size8x8,
|
||||
},
|
||||
"B758200E",
|
||||
"cnt v23.8b, v5.8b",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecLanes {
|
||||
op: VecLanesOp::Uminv,
|
||||
rd: writable_vreg(0),
|
||||
rn: vreg(31),
|
||||
size: VectorSize::Size8x8,
|
||||
},
|
||||
"E0AB312E",
|
||||
"uminv b0, v31.8b",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecLanes {
|
||||
op: VecLanesOp::Uminv,
|
||||
@@ -3836,6 +3858,17 @@ fn test_aarch64_binemit() {
|
||||
"addv b2, v29.16b",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecLanes {
|
||||
op: VecLanesOp::Addv,
|
||||
rd: writable_vreg(15),
|
||||
rn: vreg(7),
|
||||
size: VectorSize::Size16x4,
|
||||
},
|
||||
"EFB8710E",
|
||||
"addv h15, v7.4h",
|
||||
));
|
||||
|
||||
insns.push((
|
||||
Inst::VecLanes {
|
||||
op: VecLanesOp::Addv,
|
||||
|
||||
@@ -331,6 +331,8 @@ pub enum VecMisc2 {
|
||||
Frintm,
|
||||
/// Floating point round to integral, rounding towards plus infinity
|
||||
Frintp,
|
||||
/// Population count per byte
|
||||
Cnt,
|
||||
}
|
||||
|
||||
/// A Vector narrowing operation with two registers.
|
||||
@@ -3752,6 +3754,7 @@ impl Inst {
|
||||
VecMisc2::Frintz => ("frintz", size),
|
||||
VecMisc2::Frintm => ("frintm", size),
|
||||
VecMisc2::Frintp => ("frintp", size),
|
||||
VecMisc2::Cnt => ("cnt", size),
|
||||
};
|
||||
|
||||
let rd_size = if is_shll { size.widen() } else { size };
|
||||
|
||||
Reference in New Issue
Block a user