Cranelift AArch64: Improve code generation for vector constants

In particular, introduce initial support for the MOVI and MVNI
instructions, with 8-bit elements. Also, treat vector constants
as 32- or 64-bit floating-point numbers, if their value allows
it, by relying on the architectural zero extension. Finally,
stop generating literal loads for 32-bit constants.

Copyright (c) 2020, Arm Limited.
This commit is contained in:
Anton Kirilov
2020-10-14 13:04:08 +01:00
parent 7b43bf76ed
commit 207779fe1d
12 changed files with 549 additions and 164 deletions

View File

@@ -813,7 +813,11 @@ pub(crate) fn lower_constant_f32<C: LowerCtx<I = Inst>>(
rd: Writable<Reg>,
value: f32,
) {
ctx.emit(Inst::load_fp_constant32(rd, value));
let alloc_tmp = |class, ty| ctx.alloc_tmp(class, ty);
for inst in Inst::load_fp_constant32(rd, value.to_bits(), alloc_tmp) {
ctx.emit(inst);
}
}
pub(crate) fn lower_constant_f64<C: LowerCtx<I = Inst>>(
@@ -821,7 +825,11 @@ pub(crate) fn lower_constant_f64<C: LowerCtx<I = Inst>>(
rd: Writable<Reg>,
value: f64,
) {
ctx.emit(Inst::load_fp_constant64(rd, value));
let alloc_tmp = |class, ty| ctx.alloc_tmp(class, ty);
for inst in Inst::load_fp_constant64(rd, value.to_bits(), alloc_tmp) {
ctx.emit(inst);
}
}
pub(crate) fn lower_constant_f128<C: LowerCtx<I = Inst>>(
@@ -829,7 +837,38 @@ pub(crate) fn lower_constant_f128<C: LowerCtx<I = Inst>>(
rd: Writable<Reg>,
value: u128,
) {
ctx.emit(Inst::load_fp_constant128(rd, value));
let alloc_tmp = |class, ty| ctx.alloc_tmp(class, ty);
for inst in Inst::load_fp_constant128(rd, value, alloc_tmp) {
ctx.emit(inst);
}
}
pub(crate) fn lower_splat_const<C: LowerCtx<I = Inst>>(
ctx: &mut C,
rd: Writable<Reg>,
value: u64,
size: VectorSize,
) {
let (value, narrow_size) = match size.lane_size() {
ScalarSize::Size8 => (value as u8 as u64, ScalarSize::Size128),
ScalarSize::Size16 => (value as u16 as u64, ScalarSize::Size8),
ScalarSize::Size32 => (value as u32 as u64, ScalarSize::Size16),
ScalarSize::Size64 => (value, ScalarSize::Size32),
_ => unreachable!(),
};
let (value, size) = match Inst::get_replicated_vector_pattern(value as u128, narrow_size) {
Some((value, lane_size)) => (
value,
VectorSize::from_lane_size(lane_size, size.is_128bits()),
),
None => (value, size),
};
let alloc_tmp = |class, ty| ctx.alloc_tmp(class, ty);
for inst in Inst::load_replicated_vector_pattern(rd, value, size, alloc_tmp) {
ctx.emit(inst);
}
}
pub(crate) fn lower_condcode(cc: IntCC) -> Cond {