Cranelift AArch64: Improve code generation for vector constants

In particular, introduce initial support for the MOVI and MVNI
instructions, with 8-bit elements. Also, treat vector constants
as 32- or 64-bit floating-point numbers, if their value allows
it, by relying on the architectural zero extension. Finally,
stop generating literal loads for 32-bit constants.

Copyright (c) 2020, Arm Limited.
This commit is contained in:
Anton Kirilov
2020-10-14 13:04:08 +01:00
parent 7b43bf76ed
commit 207779fe1d
12 changed files with 549 additions and 164 deletions

View File

@@ -437,6 +437,21 @@ fn enc_stxr(ty: Type, rs: Writable<Reg>, rt: Reg, rn: Reg) -> u32 {
| machreg_to_gpr(rt)
}
fn enc_asimd_mod_imm(rd: Writable<Reg>, q_op: u32, cmode: u32, imm: u8) -> u32 {
let abc = (imm >> 5) as u32;
let defgh = (imm & 0b11111) as u32;
debug_assert_eq!(cmode & 0b1111, cmode);
debug_assert_eq!(q_op & 0b11, q_op);
0b0_0_0_0111100000_000_0000_01_00000_00000
| (q_op << 29)
| (abc << 16)
| (cmode << 12)
| (defgh << 5)
| machreg_to_vec(rd.to_reg())
}
/// State carried between emissions of a sequence of instructions.
#[derive(Default, Clone, Debug)]
pub struct EmitState {
@@ -1588,19 +1603,6 @@ impl MachInstEmit for Inst {
};
sink.put4(enc_inttofpu(top16, rd, rn));
}
&Inst::LoadFpuConst32 { rd, const_data } => {
let inst = Inst::FpuLoad32 {
rd,
mem: AMode::Label(MemLabel::PCRel(8)),
srcloc: None,
};
inst.emit(sink, emit_info, state);
let inst = Inst::Jump {
dest: BranchTarget::ResolvedOffset(8),
};
inst.emit(sink, emit_info, state);
sink.put4(const_data.to_bits());
}
&Inst::LoadFpuConst64 { rd, const_data } => {
let inst = Inst::FpuLoad64 {
rd,
@@ -1612,7 +1614,7 @@ impl MachInstEmit for Inst {
dest: BranchTarget::ResolvedOffset(12),
};
inst.emit(sink, emit_info, state);
sink.put8(const_data.to_bits());
sink.put8(const_data);
}
&Inst::LoadFpuConst128 { rd, const_data } => {
let inst = Inst::FpuLoad128 {
@@ -1751,6 +1753,53 @@ impl MachInstEmit for Inst {
| machreg_to_vec(rd.to_reg()),
);
}
&Inst::VecDupImm {
rd,
imm,
invert,
size,
} => {
let (imm, shift, shift_ones) = imm.value();
let (op, cmode) = match size.lane_size() {
ScalarSize::Size8 => {
assert!(!invert);
assert_eq!(shift, 0);
(0, 0b1110)
}
ScalarSize::Size16 => {
let s = shift & 8;
assert!(!shift_ones);
assert_eq!(s, shift);
(invert as u32, 0b1000 | (s >> 2))
}
ScalarSize::Size32 => {
if shift_ones {
assert!(shift == 8 || shift == 16);
(invert as u32, 0b1100 | (shift >> 4))
} else {
let s = shift & 24;
assert_eq!(s, shift);
(invert as u32, 0b0000 | (s >> 2))
}
}
ScalarSize::Size64 => {
assert!(!invert);
assert_eq!(shift, 0);
(1, 0b1110)
}
_ => unreachable!(),
};
let q_op = op | ((size.is_128bits() as u32) << 1);
sink.put4(enc_asimd_mod_imm(rd, q_op, cmode, imm));
}
&Inst::VecExtend {
t,
rd,
@@ -1803,8 +1852,8 @@ impl MachInstEmit for Inst {
&Inst::VecMovElement {
rd,
rn,
idx1,
idx2,
dest_idx,
src_idx,
size,
} => {
let (imm5, shift) = match size.lane_size() {
@@ -1815,10 +1864,10 @@ impl MachInstEmit for Inst {
_ => unreachable!(),
};
let mask = 0b11111 >> shift;
debug_assert_eq!(idx1 & mask, idx1);
debug_assert_eq!(idx2 & mask, idx2);
let imm4 = (idx2 as u32) << (shift - 1);
let imm5 = imm5 | ((idx1 as u32) << shift);
debug_assert_eq!(dest_idx & mask, dest_idx);
debug_assert_eq!(src_idx & mask, src_idx);
let imm4 = (src_idx as u32) << (shift - 1);
let imm5 = imm5 | ((dest_idx as u32) << shift);
sink.put4(
0b011_01110000_00000_0_0000_1_00000_00000
| (imm5 << 16)